o
    Zhm                     @   s   d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZmZ d dlmZ d dlmZ d dlmZ d dlmZmZ G d	d
 d
eZG dd deZdS )    N)TextIOWrapper)Path)AnyDictIteratorListOptionalSequenceUnion)Document)
BaseLoader)detect_file_encodings)UnstructuredFileLoadervalidate_unstructured_versionc                   @   s   e Zd ZdZ					ddddeeef dee dee d	ee	 d
ee de
dee fddZdee fddZdedee fddZdS )	CSVLoadera  Load a `CSV` file into a list of Documents.

    Each document represents one row of the CSV file. Every row is converted
    into a key/value pair and outputted to a new line in the document's
    page_content.

    The source for each document loaded from csv is set to the value of the
    `file_path` argument for all documents by default.
    You can override this by setting the `source_column` argument to the
    name of a column in the CSV file.
    The source of each document will then be set to the value of the column
    with the name specified in `source_column`.

    Output Example:
        .. code-block:: txt

            column1: value1
            column2: value2
            column3: value3

    Instantiate:
        .. code-block:: python

            from langchain_community.document_loaders import CSVLoader

            loader = CSVLoader(file_path='./hw_200.csv',
                csv_args={
                'delimiter': ',',
                'quotechar': '"',
                'fieldnames': ['Index', 'Height', 'Weight']
            })

    Load:
        .. code-block:: python

            docs = loader.load()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Index: Index
            Height: Height(Inches)"
            Weight: "Weight(Pounds)"
            {'source': './hw_200.csv', 'row': 0}

    Async load:
        .. code-block:: python

            docs = await loader.aload()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Index: Index
            Height: Height(Inches)"
            Weight: "Weight(Pounds)"
            {'source': './hw_200.csv', 'row': 0}

    Lazy load:
        .. code-block:: python

            docs = []
            docs_lazy = loader.lazy_load()

            # async variant:
            # docs_lazy = await loader.alazy_load()

            for doc in docs_lazy:
                docs.append(doc)
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            Index: Index
            Height: Height(Inches)"
            Weight: "Weight(Pounds)"
            {'source': './hw_200.csv', 'row': 0}
    N F)content_columns	file_pathsource_columnmetadata_columnscsv_argsencodingautodetect_encodingr   c                C   s2   || _ || _|| _|| _|pi | _|| _|| _dS )a  

        Args:
            file_path: The path to the CSV file.
            source_column: The name of the column in the CSV file to use as the source.
              Optional. Defaults to None.
            metadata_columns: A sequence of column names to use as metadata. Optional.
            csv_args: A dictionary of arguments to pass to the csv.DictReader.
              Optional. Defaults to None.
            encoding: The encoding of the CSV file. Optional. Defaults to None.
            autodetect_encoding: Whether to try to autodetect the file encoding.
            content_columns: A sequence of column names to use for the document content.
                If not present, use all columns that are not part of the metadata.
        N)r   r   r   r   r   r   r   )selfr   r   r   r   r   r   r   r   r   f/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/document_loaders/csv_loader.py__init__c   s   

zCSVLoader.__init__returnc                 c   s8   z&t | jd| jd}| |E d H  W d    W d S 1 s w   Y  W d S  ty } zT| jrlt| j}|D ]1}z&t | jd|jd}| |E d H  	 W d    W  n$1 s[w   Y  W q9 tyj   Y q9w n	td| j |W Y d }~d S W Y d }~d S d }~w ty } z	td| j |d }~ww )N )newliner   zError loading )	openr   r   _CSVLoader__read_fileUnicodeDecodeErrorr   r   RuntimeError	Exception)r   csvfileeZdetected_encodingsr   r   r   r   	lazy_load   s:   &

&
	zCSVLoader.lazy_loadr$   c           	   
   #   s    t j|fi  j}t|D ]]\}}z jd ur| j nt j}W n ty4   td j dw d	 fdd|
 D }||d} jD ]}z|| ||< W qK tyd   td| dw t||dV  qd S )	NzSource column 'z' not found in CSV file.
c                 3   s|    | ]9\}} j r| j v r;n| jvr|d ur| n| dt|tr(| nt|tr6dttj|n| V  qd S )Nz: ,)r   r   strip
isinstancestrlistjoinmap).0kvr   r   r   	<genexpr>   s     


z(CSVLoader.__read_file.<locals>.<genexpr>)sourcerowzMetadata column ')Zpage_contentmetadata)csv
DictReaderr   	enumerater   r+   r   KeyError
ValueErrorr-   itemsr   r   )	r   r$   Z
csv_readerir5   r4   contentr6   colr   r2   r   Z__read_file   s2   



zCSVLoader.__read_file)Nr   NNF)__name__
__module____qualname____doc__r
   r+   r   r   r	   r   boolr   r   r   r&   r   r    r   r   r   r   r      s4    U	
	
!r   c                       s@   e Zd ZdZ	ddededef fddZdefd	d
Z  Z	S )UnstructuredCSVLoadera|  Load `CSV` files using `Unstructured`.

    Like other
    Unstructured loaders, UnstructuredCSVLoader can be used in both
    "single" and "elements" mode. If you use the loader in "elements"
    mode, the CSV file will be a single Unstructured Table element.
    If you use the loader in "elements" mode, an HTML representation
    of the table will be available in the "text_as_html" key in the
    document metadata.

    Examples
    --------
    from langchain_community.document_loaders.csv_loader import UnstructuredCSVLoader

    loader = UnstructuredCSVLoader("stanley-cups.csv", mode="elements")
    docs = loader.load()
    singler   modeunstructured_kwargsc                    s&   t dd t jd||d| dS )a  

        Args:
            file_path: The path to the CSV file.
            mode: The mode to use when loading the CSV file.
              Optional. Defaults to "single".
            **unstructured_kwargs: Keyword arguments to pass to unstructured.
        z0.6.8)Zmin_unstructured_version)r   rG   Nr   )r   superr   )r   r   rG   rH   	__class__r   r   r      s   
zUnstructuredCSVLoader.__init__r   c                 C   s"   ddl m} |dd| ji| jS )Nr   )partition_csvfilenamer   )Zunstructured.partition.csvrL   r   rH   )r   rL   r   r   r   _get_elements   s   z#UnstructuredCSVLoader._get_elements)rF   )
r@   rA   rB   rC   r+   r   r   r   rN   __classcell__r   r   rJ   r   rE      s    rE   )r7   ior   pathlibr   typingr   r   r   r   r   r	   r
   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser   Z,langchain_community.document_loaders.helpersr   Z1langchain_community.document_loaders.unstructuredr   r   r   rE   r   r   r   r   <module>   s    $ /