o
    ZhD"                     @   sl   d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZ G dd deZdS )	    N)PathLike)Path)AnyCallableDictIteratorOptionalUnion)Document)
BaseLoaderc                   @   s   e Zd ZdZ					ddeeef dedee dee d	ee	e
e
ge
f  d
edefddZdee fddZdededee fddZdedefddZde
eef dede
eef fddZdeddfddZdS )
JSONLoadera  
    Load a `JSON` file using a `jq` schema.

    Setup:
        .. code-block:: bash

            pip install -U jq

    Instantiate:
        .. code-block:: python

            from langchain_community.document_loaders import JSONLoader
            import json
            from pathlib import Path

            file_path='./sample_quiz.json'
            data = json.loads(Path(file_path).read_text())
            loader = JSONLoader(
                     file_path=file_path,
                     jq_schema='.quiz',
                     text_content=False)

    Load:
        .. code-block:: python

            docs = loader.load()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            {"sport": {"q1": {"question": "Which one is correct team name in
            NBA?", "options": ["New York Bulls"
            {'source': '/sample_quiz
            .json', 'seq_num': 1}

    Async load:
        .. code-block:: python

            docs = await loader.aload()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            {"sport": {"q1": {"question": "Which one is correct team name in
            NBA?", "options": ["New York Bulls"
            {'source': '/sample_quizg
            .json', 'seq_num': 1}

    Lazy load:
        .. code-block:: python

            docs = []
            docs_lazy = loader.lazy_load()

            # async variant:
            # docs_lazy = await loader.alazy_load()

            for doc in docs_lazy:
                docs.append(doc)
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            {"sport": {"q1": {"question": "Which one is correct team name in
            NBA?", "options": ["New York Bulls"
            {'source': '/sample_quiz
            .json', 'seq_num': 1}
    NFT	file_path	jq_schemacontent_keyis_content_key_jq_parsablemetadata_functext_content
json_linesc           	      C   sf   z	ddl }|| _ W n ty   tdw t| | _||| _|| _|| _|| _	|| _
|| _dS )a~  Initialize the JSONLoader.

        Args:
            file_path (Union[str, PathLike]): The path to the JSON or JSON Lines file.
            jq_schema (str): The jq schema to use to extract the data or text from
                the JSON.
            content_key (str): The key to use to extract the content from
                the JSON if the jq_schema results to a list of objects (dict).
                If is_content_key_jq_parsable is True, this has to be a jq compatible
                schema. If is_content_key_jq_parsable is False, this should be a simple
                string key.
            is_content_key_jq_parsable (bool): A flag to determine if
                content_key is parsable by jq or not. If True, content_key is
                treated as a jq schema and compiled accordingly. If False or if
                content_key is None, content_key is used as a simple string.
                Default is False.
            metadata_func (Callable[Dict, Dict]): A function that takes in the JSON
                object extracted by the jq_schema and the default metadata and returns
                a dict of the updated metadata.
            text_content (bool): Boolean flag to indicate whether the content is in
                string format, default to True.
            json_lines (bool): Boolean flag to indicate whether the input is in
                JSON Lines format.
        r   Nz=jq package not found, please install it with `pip install jq`)jqImportErrorr   resolver   compile
_jq_schema_is_content_key_jq_parsable_content_key_metadata_func_text_content_json_lines)	selfr   r   r   r   r   r   r   r    r   g/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/document_loaders/json_loader.py__init__T   s   "

zJSONLoader.__init__returnc                 c   s    d}| j r;| jjdd$}|D ]}| }|r(| ||D ]	}|V  |d7 }qqW d   dS 1 s4w   Y  dS | | jjdd|D ]	}|V  |d7 }qFdS )z-Load and return documents from the JSON file.r   zutf-8)encoding   N)r   r   openstrip_parse	read_text)r   indexflinedocr   r   r    	lazy_load   s"   
"
zJSONLoader.lazy_loadcontentr)   c                 c   st    | j t|}| jdur| | t||d D ]\}}| j|d}| j|t	| j
|d}t||dV  qdS )z#Convert given content to documents.Nr$   )sample)r/   sourceZseq_num)Zpage_contentmetadata)r   inputjsonloadsr   _validate_content_key	enumerate	_get_text_get_metadatastrr   r
   )r   r.   r)   datair/   textr1   r   r   r    r'      s   

zJSONLoader._parser/   c                 C   s   | j dur| jr| j| j }|| }n|| j  }n|}| jr5t|ts5|dur5t	dt
| dt|tr<|S t|ttfrL|rJt|S dS |durTt|S dS )zConvert sample to string formatNz%Expected page_content is string, got z instead.                     Set `text_content=False` if the desired input for                     `page_content` is not a string )r   r   r   r   r2   firstr   
isinstancer9   
ValueErrortypedictlistr3   dumps)r   r/   Zcompiled_content_keyr.   r   r   r    r7      s   

zJSONLoader._get_textadditional_fieldsc                 K   s<   | j dur|  ||}t|tstdt| d|S |S )z
        Return a metadata dictionary base on the existence of metadata_func
        :param sample: single data payload
        :param additional_fields: key-word arguments to be added as metadata values
        :return:
        NzUExpected the metadata_func to return a dict but got                                 ``)r   r?   rB   r@   rA   )r   r/   rE   resultr   r   r    r8      s   
	
zJSONLoader._get_metadatar:   c                 C   s   |  }t|tstdt| d| js'|| jdu r'td| j d| jrA| j	| j
| du rCtd| j ddS dS )zCheck if a content key is validztExpected the jq schema to result in a list of objects (dict),                     so sample must be a dict but got `rF   Nz_Expected the jq schema to result in a list of objects (dict)                     with the key `z ` which should be parsable by jq)r>   r?   rB   r@   rA   r   getr   r   r   r2   r<   )r   r:   r/   r   r   r    r5      s2   
z JSONLoader._validate_content_key)NFNTF)__name__
__module____qualname____doc__r	   r9   r   r   boolr   r   r!   r   r
   r-   intr'   r   r7   r8   r5   r   r   r   r    r      sB    L

3


r   )r3   osr   pathlibr   typingr   r   r   r   r   r	   Zlangchain_core.documentsr
   Z)langchain_community.document_loaders.baser   r   r   r   r   r    <module>   s     