o
    Zh                     @   s\   d dl Z d dlZd dlmZmZmZ d dlmZ d dlm	Z	 d dl
mZ G dd deZdS )    N)CallableListOptional)
BaseLoader)Document)get_client_infoc                   @   sN   e Zd ZdZ	ddedededeeegef  fddZd	e	e
 fd
dZdS )GCSFileLoaderzLoad from GCS file.Nproject_namebucketblobloader_funcc                 C   s<   || _ || _|| _dtdtfdd}|r|| _dS || _dS )a9  Initialize with bucket and key name.

        Args:
            project_name: The name of the project to load
            bucket: The name of the GCS bucket.
            blob: The name of the GCS blob to load.
            loader_func: A loader function that instantiates a loader based on a
                file_path argument. If nothing is provided, the
                UnstructuredFileLoader is used.

        Examples:
            To use an alternative PDF loader:
            >> from from langchain_community.document_loaders import PyPDFLoader
            >> loader = GCSFileLoader(..., loader_func=PyPDFLoader)

            To use UnstructuredFileLoader with additional arguments:
            >> loader = GCSFileLoader(...,
            >>      loader_func=lambda x: UnstructuredFileLoader(x, mode="elements"))

        	file_pathreturnc                 S   s<   zddl m} W || S  ty   d}t| Y || S w )Nr   )UnstructuredFileLoaderzUnstructuredFileLoader loader not found! Either provide a custom loader with loader_func argument, or install `pip install langchain-google-community`)Z1langchain_community.document_loaders.unstructuredr   ImportErrorprint)r   r   message r   Z/var/www/html/lang_env/lib/python3.10/site-packages/langchain_google_community/gcs_file.pydefault_loader_func-   s   

z3GCSFileLoader.__init__.<locals>.default_loader_funcN)r
   r   r	   strr   _loader_func)selfr	   r
   r   r   r   r   r   r   __init__   s
   zGCSFileLoader.__init__r   c                 C   s  zddl m} W n ty   tdw |j| jtdd}|| j}|| j}|	| jj
}t J}| d| j }tjtj|dd || | |}| }	|	D ]}
d	|
j
v rmd
| j d| j |
j
d	< |ru|
j
| qY|	W  d   S 1 sw   Y  dS )zLoad documents.r   )storagezCould not import google-cloud-storage python package. Please, install gcs dependency group: `pip install langchain-google-community[gcs]`zgoogle-cloud-storage)Zclient_info/T)exist_oksourcezgs://N)Zgoogle.cloudr   r   ZClientr	   r   Z
get_bucketr
   r   Zget_blobmetadatatempfileTemporaryDirectoryosmakedirspathdirnameZdownload_to_filenamer   loadupdate)r   r   Zstorage_clientr
   r   r   temp_dirr   loaderdocsdocr   r   r   r%   =   s6   




$zGCSFileLoader.load)N)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r%   r   r   r   r   r      s    
/r   )r!   r   typingr   r   r   Zlangchain_core.document_loadersr   Zlangchain_core.documentsr   Z!langchain_google_community._utilsr   r   r   r   r   r   <module>   s    