o
    Zhi                     @  sp   d dl mZ d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZ G dd deZdS )    )annotationsN)AnyDictIteratorListOptionalTuple)Document)
BaseLoaderc                   @  s\   e Zd ZdZ		d"d#ddZd$ddZd%ddZd%ddZd&ddZd'ddZ	d(d d!Z
dS ))AthenaLoaderaY  Load documents from `AWS Athena`.

    Each document represents one row of the result.
    - By default, all columns are written into the `page_content` of the document
    and none into the `metadata` of the document.
    - If `metadata_columns` are provided then these columns are written
    into the `metadata` of the document while the rest of the columns
    are written into the `page_content` of the document.

    To authenticate, the AWS client uses this method to automatically load credentials:
    https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html

    If a specific credential profile should be used, you must pass
    the name of the profile from the ~/.aws/credentials file that is to be used.

    Make sure the credentials / roles used have the required policies to
    access the Amazon Textract service.
    Nquerystrdatabases3_output_uriprofile_nameOptional[str]metadata_columnsOptional[List[str]]c           	   
   C  s   || _ || _|| _|dur|ng | _zddl}W n ty#   tdw z|dur/|j|dn| }W n tyE } ztd|d}~ww |	d| _
|	d| _dS )ag  Initialize Athena document loader.

        Args:
            query: The query to run in Athena.
            database: Athena database.
            s3_output_uri: Athena output path.
            profile_name: Optional. AWS credential profile, if profiles are being used.
            metadata_columns: Optional. Columns written to Document `metadata`.
        Nr   zRCould not import boto3 python package. Please install it with `pip install boto3`.)r   zCould not load credentials to authenticate with AWS client. Please check that credentials in the specified profile name are valid.ZathenaZs3)r   r   r   r   boto3ImportErrorSession	Exception
ValueErrorclientathena_client	s3_client)	selfr   r   r   r   r   r   sessione r   b/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/document_loaders/athena.py__init__!   s4   zAthenaLoader.__init__returnList[Dict[str, Any]]c                 C  s   | j j| jd| jid| jid}|d }	 | j j|d}|d d d	 }|d
kr)n%|dkr@|d d }|d }d| }t||dkrHtdtd q| 	|}t
|jddS )NZDatabaseZOutputLocation)ZQueryStringZQueryExecutionContextZResultConfigurationQueryExecutionIdT)r$   ZQueryExecutionStatusStateZ	SUCCEEDEDZFAILEDZStateChangeReasonzQuery Failed: 	CANCELLEDz Query was cancelled by the user.   records)Zorient)r   Zstart_query_executionr   r   r   Zget_query_executionr   timesleep_get_result_setjsonloadsto_json)r   responsequery_execution_idstateZresp_statusZstate_change_reasonerrZ
result_setr   r   r    _execute_queryO   s0   


zAthenaLoader._execute_queryinput_stringsuffixc                 C  s$   |r| |r|d t|  S |S N)endswithlenr   r5   r6   r   r   r    _remove_suffixi   s   zAthenaLoader._remove_suffixc                 C  s"   |r| |r|t|d  S |S r7   )
startswithr9   r:   r   r   r    _remove_prefixn   s   zAthenaLoader._remove_prefixr1   r   c           	      C  s   zdd l }W n ty   tdw | j}| | |ddd}|d }d|dd  |g d }| jj||d}|j	t
|d  d	d
}|S )Nr   zTCould not import pandas python package. Please install it with `pip install pandas`./zs3://r(   z.csv)ZBucketKeyZBodyutf8)encoding)Zpandasr   r   r=   r;   splitjoinr   Z
get_objectZread_csvioBytesIOread)	r   r1   pdZ
output_uritokensZbucketkeyobjdfr   r   r    r,   s   s$   zAthenaLoader._get_result_setquery_resultTuple[List[str], List[str]]c                 C  sJ   g }g }t |d  }|D ]}|| jv r|| q|| q||fS )Nr   )listkeysr   append)r   rL   content_columnsr   Zall_columnsrI   r   r   r    _get_columns   s   
zAthenaLoader._get_columnsIterator[Document]c                 #  sj    |   }| |\ |D ]$}d fdd| D }fdd| D }t||d}|V  qd S )N
c                 3  s*    | ]\}}| v r| d | V  qdS )z: Nr   .0kv)rQ   r   r    	<genexpr>   s     z)AthenaLoader.lazy_load.<locals>.<genexpr>c                   s&   i | ]\}}| v r|d ur||qS r7   r   rU   )r   r   r    
<dictcomp>   s     z*AthenaLoader.lazy_load.<locals>.<dictcomp>)page_contentmetadata)r4   rR   rC   itemsr	   )r   rL   rowr[   r\   docr   )rQ   r   r    	lazy_load   s   
zAthenaLoader.lazy_load)NN)
r   r   r   r   r   r   r   r   r   r   )r"   r#   )r5   r   r6   r   r"   r   )r1   r   r"   r   )rL   r#   r"   rM   )r"   rS   )__name__
__module____qualname____doc__r!   r4   r;   r=   r,   rR   r`   r   r   r   r    r      s    
.



r   )
__future__r   rD   r-   r*   typingr   r   r   r   r   r   Zlangchain_core.documentsr	   Z)langchain_community.document_loaders.baser
   r   r   r   r   r    <module>   s     