o
    Zh                     @  sR   d dl mZ d dlZd dlZd dlmZmZmZmZ d dl	m
Z
 G dd dZdS )    )annotationsN)AnyDictListOptional)Documentc                      s   e Zd ZdZ	d1d2 fdd	Zed3ddZed4ddZd5ddZ		d6d7dd Z		!d8d9d%d&Z
	!	'd:d;d)d*Z	!	'	d<d=d/d0Z  ZS )>RecursiveJsonSplittera  Splits JSON data into smaller, structured chunks while preserving hierarchy.

    This class provides methods to split JSON data into smaller dictionaries or
    JSON-formatted strings based on configurable maximum and minimum chunk sizes.
    It supports nested JSON structures, optionally converts lists into dictionaries
    for better chunking, and allows the creation of document objects for further use.

    Attributes:
        max_chunk_size (int): The maximum size for each chunk. Defaults to 2000.
        min_chunk_size (int): The minimum size for each chunk, derived from
            `max_chunk_size` if not explicitly provided.
      Nmax_chunk_sizeintmin_chunk_sizeOptional[int]c                   s6   t    || _|dur|| _dS t|d d| _dS )a!  Initialize the chunk size configuration for text processing.

        This constructor sets up the maximum and minimum chunk sizes, ensuring that
        the `min_chunk_size` defaults to a value slightly smaller than the
        `max_chunk_size` if not explicitly provided.

        Args:
            max_chunk_size (int): The maximum size for a chunk. Defaults to 2000.
            min_chunk_size (Optional[int]): The minimum size for a chunk. If None,
                defaults to the maximum chunk size minus 200, with a lower bound of 50.

        Attributes:
            max_chunk_size (int): The configured maximum size for each chunk.
            min_chunk_size (int): The configured minimum size for each chunk, derived
                from `max_chunk_size` if not explicitly provided.
        N   2   )super__init__r
   maxr   )selfr
   r   	__class__ T/var/www/html/lang_env/lib/python3.10/site-packages/langchain_text_splitters/json.pyr      s   
zRecursiveJsonSplitter.__init__datar   returnc                 C  s   t t| S )z1Calculate the size of the serialized JSON object.)lenjsondumps)r   r   r   r   
_json_size3   s   z RecursiveJsonSplitter._json_sizedpath	List[str]valuer   Nonec                 C  s.   |dd D ]}|  |i } q|| |d < dS )z;Set a value in a nested dictionary based on the given path.N)
setdefault)r   r   r!   keyr   r   r   _set_nested_dict8   s   z&RecursiveJsonSplitter._set_nested_dictc                   sD   t |tr fdd| D S t |tr  fddt|D S |S )Nc                   s   i | ]
\}}|  |qS r   )_list_to_dict_preprocessing).0kvr   r   r   
<dictcomp>B   s    zERecursiveJsonSplitter._list_to_dict_preprocessing.<locals>.<dictcomp>c                   s    i | ]\}}t | |qS r   )strr'   )r(   iitemr+   r   r   r,   E   s    )
isinstancedictitemslist	enumerate)r   r   r   r+   r   r'   ?   s   


z1RecursiveJsonSplitter._list_to_dict_preprocessingDict[str, Any]current_pathOptional[List[str]]chunksOptional[List[Dict]]
List[Dict]c           
      C  s   |pg }|dur
|ni g}t |trT| D ];\}}||g }| |d }| ||i}| j| }	||	k r@| |d || q|| jkrJ|i  | ||| q|S | |d || |S )zESplit json into maximum size dictionaries while preserving structure.Nr#   )	r0   r1   r2   r   r
   r&   r   append_json_split)
r   r   r6   r8   r%   r!   new_path
chunk_sizesize	remainingr   r   r   r<   M   s    




z!RecursiveJsonSplitter._json_splitF	json_dataconvert_listsboolc                 C  s4   |r|  | |}n|  |}|d s|  |S )z'Splits JSON into a list of JSON chunks.r#   )r<   r'   pop)r   rA   rB   r8   r   r   r   
split_jsonl   s   
z RecursiveJsonSplitter.split_jsonTensure_asciic                   s    | j ||d} fdd|D S )z2Splits JSON into a list of JSON formatted strings.)rA   rB   c                   s   g | ]	}t j| d qS )rF   )r   r   )r(   chunkrG   r   r   
<listcomp>   s    z4RecursiveJsonSplitter.split_text.<locals>.<listcomp>)rE   )r   rA   rB   rF   r8   r   rG   r   
split_text|   s   z RecursiveJsonSplitter.split_texttexts	metadatasOptional[List[dict]]List[Document]c                 C  sf   |pi gt | }g }t|D ]!\}}| j|||dD ]}	t|| }
t|	|
d}|| qq|S )z4Create documents from a list of json objects (Dict).)rA   rB   rF   )Zpage_contentmetadata)r   r4   rJ   copydeepcopyr   r;   )r   rK   rB   rF   rL   Z
_metadatasZ	documentsr.   textrH   rO   Znew_docr   r   r   create_documents   s   
z&RecursiveJsonSplitter.create_documents)r	   N)r
   r   r   r   )r   r   r   r   )r   r   r   r    r!   r   r   r"   )r   r   r   r   )NN)r   r5   r6   r7   r8   r9   r   r:   )F)rA   r5   rB   rC   r   r:   )FT)rA   r5   rB   rC   rF   rC   r   r    )FTN)
rK   r:   rB   rC   rF   rC   rL   rM   r   rN   )__name__
__module____qualname____doc__r   staticmethodr   r&   r'   r<   rE   rJ   rS   __classcell__r   r   r   r   r   
   s*    
"r   )
__future__r   rP   r   typingr   r   r   r   Zlangchain_core.documentsr   r   r   r   r   r   <module>   s    