o
    Zh^                     @   s~   d dl Z d dlmZ d dlmZmZmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZmZmZ G dd deeZdS )	    N)Path)AnyClassVarDictListOptionalSequenceTupleUnion)
BaseLoader)Document)	BaseModelfield_validatormodel_validatorc                   @   s  e Zd ZU dZdZeeedf  ed< e	
 d d Ze	ed< 	 e	
 d d Ze	ed	< 	 e	
 d d
 Ze	ed< 	 dZeed< 	 dZee ed< 	 dZeee  ed< 	 dZeee  ed< 	 dZeed< 	 dZeee  ed< 	 dZeed< 	 dZeed< 	 i Zedef ed< 	 dZeed< 	 dZeed< 	 dgZee ed< 	 dedefddZ dedefd d!Z!dedefd"d#Z"dedee fd$d%Z#e$d&d'e%d(eeef defd)d*Z&e'd	d+ed,edefd-d.Z(e'dd+ee dee fd/d0Z)defd1d2Z*dedee+ fd3d4Z,dede+fd5d6Z-dd7dedeee  dee+ fd8d9Z.d:ededeeee/eee f f  fd;d<Z0dee+ fd=d>Z1dedee+ fd?d@Z2dee+ fdAdBZ3dee+ fdCdDZ4dS )EGoogleDriveLoaderz%Load Google Docs from `Google Drive`.)*https://www.googleapis.com/auth/drive.filez.https://www.googleapis.com/auth/drive.readonlyz3https://www.googleapis.com/auth/drive.meet.readonlyz7https://www.googleapis.com/auth/drive.metadata.readonlyz.https://www.googleapis.com/auth/drive.metadata.VALID_SCOPESz.credentialsz	keys.jsonservice_account_keyzcredentials.jsoncredentials_pathz
token.json
token_pathNcredentials	folder_iddocument_idsfile_idsF	recursive
file_typesload_trashed_filesfile_loader_clsstrfile_loader_kwargs	load_authload_extended_metadatar   scopesidreturnc              
   C   s   zddl }ddlm} W n ty } ztd|d}~ww |  }|dd|d}z| j|dd	 }|d W S  |jj	yL   t
d
|  Y dS  tyl } zt
d|  t
d|  W Y d}~dS d}~ww )zFetch the size of the file.r   Nbuild_You must run `pip install --upgrade google-api-python-client` to load authorization identities.drivev3r   sizefileIdfieldszinsufficientFilePermissions: The user does not have sufficient                 permissions to retrieve size for the file with fileId: unknownzAError occurred while fetching the size for the file with fileId: Error: googleapiclient.errorsgoogleapiclient.discoveryr&   ImportError_load_credentialsfilesgetexecuteerrors	HttpErrorprint	Exceptionselfr#   googleapiclientr&   exccredsservicefile rD   W/var/www/html/lang_env/lib/python3.10/site-packages/langchain_google_community/drive.py_get_file_size_from_idB   s>   
z(GoogleDriveLoader._get_file_size_from_idc              
   C   s   zddl }ddlm} W n ty } ztd|d}~ww |  }|dd|d}z| j|dd	 }|d d d
W S  |jj	yQ   t
d|  Y dS  tym } zt
d| d|  W Y d}~dS d}~ww )zFetch the owner of the file.r   Nr%   r'   r(   r)   r*   Zownersr,   emailAddresszinsufficientFilePermissions: The user does not have sufficient                 permissions to retrieve owner for the file with fileId: r/   zRError occurred while fetching the owner for the file with fileId:                 z with error: r1   r=   rD   rD   rE   _get_owner_metadata_from_ida   sD   z-GoogleDriveLoader._get_owner_metadata_from_idc           
   
   C   s   zddl }ddlm} W n ty } ztd|d}~ww |  }|dd|d}g }|}	 z!| j|d	d
 }	||	d  d|	v rL|	d d }nW nW n |j	j
ya   td|  Y nw q.|  d|S )z7Fetch the full path of the file starting from the root.r   Nr%   r'   r(   r)   r*   Tzname, parentsr,   nameparentszinsufficientFilePermissions: The user does not have sufficient                    permissions to retrieve path for the file with fileId: /)r2   r3   r&   r4   r5   r6   r7   r8   appendr9   r:   r;   reversejoin)
r>   r#   r?   r&   r@   rA   rB   pathZ
current_idrC   rD   rD   rE   _get_file_path_from_id   sL   
z(GoogleDriveLoader._get_file_path_from_idc              
   C   s$  zddl }ddlm} W n ty } ztd|d}~ww g }|  }|dd|d}z| j|d }W n5 |jj	yK   t
d	|  | Y S  tyl } zt
d
|  t
d|  |W  Y d}~S d}~ww |di D ]}	| j||	dddd d}
|
r||
 qs|S )z2Fetch the list of people having access to ID file.r   Nr%   r'   r(   r)   r*   r-   zinsufficientFilePermissions: The user does not have sufficient                 permissions to retrieve permission for the file with fileId: zXError occurred while fetching the permissions for the file with                 fileId: r0   permissionsr#    rG   )r-   ZpermissionIdr.   )r2   r3   r&   r4   r5   rR   listr8   r9   r:   r;   r<   r7   rL   )r>   r#   r?   r&   r@   authorized_identitiesrA   rB   rR   permZemail_idrD   rD   rE   _get_identity_metadata_from_id   sX   
z0GoogleDriveLoader._get_identity_metadata_from_idbefore)modevaluesc                    s(  | dr| ds| drtd| ds&| ds&| ds&td| d}|r| ds7| dr;tddd	d
ddt t  }ddd  D }ddd  D }|D ]}||vr{td| d| d| qhdtdtffdd  fdd|D |d< |S )zDValidate that either folder_id or document_ids is set, but not both.r   r   r   zICannot specify both folder_id and document_ids nor folder_id and file_idsz8Must specify either folder_id, document_ids, or file_idsr   zdfile_types can only be given when folder_id is given, (not when document_ids or file_ids are given).$application/vnd.google-apps.document'application/vnd.google-apps.spreadsheetapplication/pdf(application/vnd.google-apps.presentation)ZdocumentsheetZpdfZpresentation, c                 S      g | ]}d | d qS 'rD   .0xrD   rD   rE   
<listcomp>       z5GoogleDriveLoader.validate_inputs.<locals>.<listcomp>c                 S   ra   rb   rD   rd   rD   rD   rE   rg      rh   zGiven file type z) is not supported. Supported values are: z; and their full-form names: rf   r$   c                    s   |  v r |  S | S )NrD   )rf   )type_mappingrD   rE   	full_form   s   z4GoogleDriveLoader.validate_inputs.<locals>.full_formc                    s   g | ]} |qS rD   rD   )re   	file_type)rj   rD   rE   rg      s    )r7   
ValueErrorrT   keysrZ   rN   r   )clsrZ   r   Zallowed_typesZshort_namesZ
full_namesrk   rD   )rj   ri   rE   validate_inputs   sT   

z!GoogleDriveLoader.validate_inputsvkwargsc                 K   s   |  std| d|S )z&Validate that credentials_path exists.zcredentials_path z does not exist)existsrl   )rn   rp   rq   rD   rD   rE   validate_credentials_path  s   z+GoogleDriveLoader.validate_credentials_pathc                    sH   |st d fdd|D }|r"t dd| dd j |S )z^Validate that the provided scopes are not empty and
        are valid Google Drive API scopes.z#At least one scope must be providedc                    s   g | ]	}| j vr|qS rD   )r   )re   scopern   rD   rE   rg     s    z5GoogleDriveLoader.validate_scopes.<locals>.<listcomp>z#Invalid Google Drive API scope(s): r`   z. Valid scopes are: )rl   rN   r   )rn   rp   Zinvalid_scopesrD   ru   rE   validate_scopes
  s   
z!GoogleDriveLoader.validate_scopesc           
      C   s^  z ddl m} ddlm} ddlm} ddlm} ddlm	} W n t
y+   t
dw d}| j r?|jjt| j| jd	S | j rN|t| j| j}| jrV| j}|S |r[|js|rj|jrj|jrj||  n%d
tjvr| \}}|| j}|r~|S n|t| j| j}|jdd}t| jd}	|	|  W d   |S 1 sw   Y  |S )zLoad credentials.r   )default)Request)service_account)Credentials)InstalledAppFlowzxCould execute GoogleDriveLoader. Please, install drive dependency group: `pip install langchain-google-community[drive]`N)r"   ZGOOGLE_APPLICATION_CREDENTIALS)portw) Zgoogle.authrw   Zgoogle.auth.transport.requestsrx   Zgoogle.oauth2ry   Zgoogle.oauth2.credentialsrz   Zgoogle_auth_oauthlib.flowr{   r4   r   rr   Zfrom_service_account_filer   r"   r   Zfrom_authorized_user_filer   ZvalidZexpiredZrefresh_tokenrefreshosenvironZwith_scopesZfrom_client_secrets_filer   Zrun_local_serveropenwriteto_json)
r>   rw   rx   ry   rz   r{   rA   projectZflowtokenrD   rD   rE   r5     sT   





z#GoogleDriveLoader._load_credentialsc              	   C   s  ddl m} |  }|dd|d}| j|d }|dg }| jr)| |}| jr;| 	|}| 
|}	| |}
g }|D ]}|d d }|  j||d	 }|d
g }|s]q?|d }t|dd ddD ]f\}}d| d|d d  |d d  d| |d}| jr||d< | jr||d< |	|d< |
|d< g }t|D ]\}}t||kr||  nd}|| d|   qd|}|t||d qkq?|S )z%Load a sheet and all tabs from an ID.r   r%   sheetsZv4r*   )spreadsheetId
propertiestitle)r   rangerZ      N)startz'https://docs.google.com/spreadsheets/d/z
/edit?gid=ZsheetIdz - )sourcer   rowrU   ownerr+   	full_pathrS   z: 
page_contentmetadata)r3   r&   r5   Zspreadsheetsr7   r8   r    rW   r!   rH   rF   rP   rZ   	enumeratelenstriprL   rN   r   )r>   r#   r&   rA   Zsheets_serviceZspreadsheetr   rU   r   r+   r   Z	documentsr_   Z
sheet_nameresultrZ   headerir   r   contentjrp   r   r   rD   rD   rE   _load_sheet_from_idO  sZ   





z%GoogleDriveLoader._load_sheet_from_idc              
   C   s  ddl m} ddlm} ddlm} ddlm} |  }|dd|d}| j	r+| 
|}| jr=| |}	| |}
| |}| j|d	d
d }| j|dd}| }|||}d}z|du rk| \}}|du saW n' |y } z|jjdkrtd| ntd| W Y d}~nd}~ww | d}|d |d |d d}| j	r||d< | jr|	|d< |
|d< ||d< t||dS )zLoad a document from an ID.r   BytesIOr%   )r:   MediaIoBaseDownloadr(   r)   r*   TzmodifiedTime,name,webViewLink)r-   supportsAllDrivesr.   z
text/plain)r-   mimeTypeFi  zFile not found: {}zAn error occurred: {}Nzutf-8ZwebViewLinkrI   ZmodifiedTime)r   r   whenrU   r   r+   r   r   )ior   r3   r&   r2   r:   googleapiclient.httpr   r5   r    rW   r!   rH   rF   rP   r6   r7   r8   Zexport_media
next_chunkrespstatusr;   formatgetvaluedecoder   )r>   r#   r   r&   r:   r   rA   rB   rU   r   r+   r   rC   requestfh
downloaderdoner   etextr   rD   rD   rE   _load_document_from_id  s^   



	



z(GoogleDriveLoader._load_document_from_idr   c          
         s   ddl m} |  }|dd|d}| ||} r# fdd|D }n|}g }|D ]C}	|	d r3| js3q)|	d	 d
v rD|| |	d  q)|	d	 dkrU|| |	d  q)|	d	 dks`| j	durk|| 
|	d  q)	 q)|S )zLoad documents from a folder.r   r%   r(   r)   r*   c                    s   g | ]
}|d   v r|qS )r   rD   )re   fr   rD   rE   rg     s    zAGoogleDriveLoader._load_documents_from_folder.<locals>.<listcomp>Ztrashedr   )r[   r^   r#   r\   r]   N)r3   r&   r5   _fetch_files_recursiver   rL   r   extendr   r   _load_file_from_id)
r>   r   r   r&   rA   rB   r6   _filesreturnsrC   rD   r   rE   _load_documents_from_folder  s(   
z-GoogleDriveLoader._load_documents_from_folderrB   c                 C   sv   |  jd| dddddd }|dg }g }|D ]}|d d	kr3| jr2|| ||d
  q|| q|S )z+Fetch all files and subfolders recursively.rc   z' in parentsi  Tz:nextPageToken, files(id, name, mimeType, parents, trashed))qZpageSizeZincludeItemsFromAllDrivesr   r.   r6   r   z"application/vnd.google-apps.folderr#   )r6   rT   r8   r7   r   r   r   rL   )r>   rB   r   resultsr6   r   rC   rD   rD   rE   r     s&   
z(GoogleDriveLoader._fetch_files_recursivec                    s"    j std fdd j D S )z"Load documents from a list of IDs.zdocument_ids must be setc                    s   g | ]}  |qS rD   )r   )re   Zdoc_idr>   rD   rE   rg     s    z>GoogleDriveLoader._load_documents_from_ids.<locals>.<listcomp>)r   rl   r   rD   r   rE   _load_documents_from_ids  s   z*GoogleDriveLoader._load_documents_from_idsc                 C   s   ddl m} ddlm} ddlm} |  }|dd|d}| jr%| |}| j	r7| 
|}| |}	| |}
| j|dd	 }| j|d
}| }|||}d}|du rb| \}}|du sX| jdur|d | jdd|i| j}| }|D ]3}d| d|jd< d|jvr|d |jd< | jr||jd< | j	r||jd< |	|jd< |
|jd< q}|S ddlm} | }|||}g }t|jD ]3\}}d| d|d |d}| jr||d< | j	r||d< |	|d< |
|d< |t| |d q|S )zLoad a file from an ID.r   r   r%   r   r(   r)   r*   T)r-   r   rQ   FNrC   z https://drive.google.com/file/d/z/viewr   r   rI   rU   r   r+   r   )	PdfReader)r   r   pager   rD   )r   r   r3   r&   r   r   r5   r    rW   r!   rH   rF   rP   r6   r7   r8   Z	get_mediar   r   seekr   loadr   ZPyPDF2r   r   r   ZpagesrL   r   Zextract_text)r>   r#   r   r&   r   rA   rB   rU   r   r+   r   rC   r   r   r   r   r   loaderdocsdocr   r   Z
pdf_readerr   r   r   rD   rD   rE   r      sr   













z$GoogleDriveLoader._load_file_from_idc                 C   s2   | j stdg }| j D ]
}|| | q|S )zLoad files from a list of IDs.zfile_ids must be set)r   rl   r   r   )r>   r   Zfile_idrD   rD   rE   _load_file_from_idsD  s   
z%GoogleDriveLoader._load_file_from_idsc                 C   s.   | j r| j| j | jdS | jr|  S |  S )zLoad documents.r   )r   r   r   r   r   r   r   rD   rD   rE   r   M  s   zGoogleDriveLoader.load)5__name__
__module____qualname____doc__r   r   r	   r   __annotations__r   homer   r   r   r   r   r   r   r   r   r   r   boolr   r   r   r   r   r   r    r!   r"   rF   rH   rP   rW   r   classmethodro   r   rs   rv   r5   r   r   r   r   r
   r   r   r   r   r   rD   rD   rD   rE   r      s   
 &,/577

#
D	r   )r   pathlibr   typingr   r   r   r   r   r   r	   r
   Zlangchain_core.document_loadersr   Zlangchain_core.documentsr   Zpydanticr   r   r   r   rD   rD   rD   rE   <module>   s   	(