o
    Zhy4                     @  st  d Z ddlmZ ddlZddlZddlZddlZddlZddlZddl	m
Z
 ddlmZ ddlmZmZ ddlmZmZmZmZmZmZmZmZ ddlmZmZmZmZmZ dd	lmZm Z  dd
l!m"Z"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ erddl,m-Z- ddl.m/Z/m0Z0 e1e2Z3dZ4G dd deZ5G dd deZ6d!ddZ7d"ddZ8G dd  d e#eZ9dS )#1Base class for all loaders that uses O365 Package    )annotationsN)abstractmethod)datetime)PathPurePath)TYPE_CHECKINGAnyDictIterableListOptionalSequenceUnion)	BaseModelFieldFilePathPrivateAttr	SecretStr)BaseSettingsSettingsConfigDict)BaseBlobParser
BaseLoader)FileSystemBlobLoader)Blob)MimeTypeBasedParser)
get_parser)Account)DriveFolderi  P c                   @  sF   e Zd ZU edddZded< edddZded< ed	d
dddZdS )_O365Settings.ZO365_CLIENT_ID)aliasstr	client_idZO365_CLIENT_SECRETr   client_secretFz.env ignore)case_sensitiveZenv_fileZ
env_prefixextraN)	__name__
__module____qualname__r   r#   __annotations__r$   r   Zmodel_config r-   r-   e/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/document_loaders/base_o365.pyr    *   s   
 
r    c                   @  s&   e Zd ZU e d d Zded< dS )_O365TokenStorage.credentialszo365_token.txtr   
token_pathN)r)   r*   r+   r   homer1   r,   r-   r-   r-   r.   r/   3   s   
 r/   
file_typesSequence[str]returnDict[str, str]c                 C  s@   i }| D ]}t d| \}}|r|||< qtd| |S )2Fetch the mime types for the specified file types.zfile.zUnknown mimetype of extension )	mimetypes
guess_type
ValueError)r3   mime_types_mappingext	mime_type_r-   r-   r.   fetch_mime_types7   s   
r?   
mime_typesc                 C  s>   i }| D ]}t |}|r|||dd < qtd| |S )r7      NzUnknown mimetype )r8   guess_extensionr:   )r@   r;   r=   r<   r-   r-   r.   fetch_extensionsC   s   
rC   c                      s   e Zd ZU dZeedZded< 	 dZded< 	 e	Z
ded	< 	 dZded
< 	 dZded< 	 i Zded< 	 e Zded< e Zded< e Zded< d. fddZed/ddZeed0dd Zd1d$d%Zd2d)d*Zd3d,d-Z  ZS )4O365BaseLoaderr   )default_factoryr    settingsFboolauth_with_tokenzUnion[int, str]
chunk_size	recursiveNzOptional[datetime]modified_sincezOptional[Dict[str, Any]]handlersr   _blob_parserr4   _file_typesr6   _mime_typeskwargsr	   r5   Nonec                   s  t  jdi |  jrdt j }zt| _tt| _ fdd j	 D }W n- t
yZ   zt| _tt j  _ j}W n t
yW   t
d| dw Y nw t|d d _d S td _t jtsytdt j tt jj  _d S )	Nc                   s   i | ]
\}} j | |qS r-   rO   ).0	extensionhandlerselfr-   r.   
<dictcomp>   s    
z+O365BaseLoader.__init__.<locals>.<dictcomp>z=`handlers` keys must be either file extensions or mimetypes.
zj could not be interpreted as either.
File extensions and mimetypes cannot mix. Use either one or the other)rL   Zfallback_parserdefaultzLget_parser("default) was supposed to return MimeTypeBasedParser.It returned r-   )super__init__rL   listkeysr?   rO   setrN   itemsr:   rC   r   rM   r   
isinstance	TypeErrortype)rW   rP   Zhandler_keysZmime_handlers	__class__rV   r.   r[      sD   





zO365BaseLoader.__init__c                 C  s   | j S )zBReturn a dict of supported file types to corresponding mime types.rR   rV   r-   r-   r.   _fetch_mime_types   s   z O365BaseLoader._fetch_mime_types	List[str]c                 C  s   dS )zReturn required scopes.Nr-   rV   r-   r-   r.   _scopes   s    zO365BaseLoader._scopesfolderr   Iterable[Blob]c                 c  s   | j }| }i }t }tjtj|dd |D ]U}|jrq|j	t
| v rq| jr3|j| jkrq|j}td|jrJ|jjd tj|j }|j|| jd ||j	t|jt|jt|jt|j|jt|jd||j< qt|d}| D ]"}	t |	jt!st"d|	jr|#t|	jji }
|	j$%|
 |	V  q{W d	   n1 sw   Y  | j&r|' D ]}| (|E d	H  qd	S d	S )
a  Lazily load all files from a specified folder of the configured MIME type.

        Args:
            folder: The Folder instance from which the files are to be loaded. This
                Folder instance should represent a directory in a file system where the
                files are stored.

        Yields:
            An iterator that yields Blob instances, which are binary representations of
                the files loaded from the folder.
        T)exist_ok"Doc.aspx\?sourcedoc=.*file=([^&]+)/Zto_pathrI   sourcer=   createdmodified
created_bymodified_bydescriptionidpath#Expected blob path to be a PurePathN))re   Z	get_itemstempfileTemporaryDirectoryosmakedirsrw   dirnameis_filer=   r\   valuesrK   rq   web_urlresearch_parenturllibparsequotenamedownloadrI   r"   rp   rr   rs   rt   	object_idr   yield_blobsr`   r   NotImplementedErrorgetmetadataupdaterJ   Zget_child_folders_load_from_folder)rW   rh   file_mime_typesr_   metadata_dicttemp_dirfilero   loaderblobfile_metadata_Z	subfolderr-   r-   r.   r      s^   

%z O365BaseLoader._load_from_folderdriver   
object_idsc                 c  sP   | j }i }t }|D ]\}||}|s#td| d| d q|jri|jt|	 v ri|j
}td|j
rF|jj
d tj|j }|j|| jd ||j|j|jt|jt|j|jt|jd||j< qt|d}	|	 D ]"}
t|
jtst d	|
jr|!t|
jji }|
j"#| |
V  qsW d
   d
S 1 sw   Y  d
S )a  Lazily load files specified by their object_ids from a drive.

        Load files into the system as binary large objects (Blobs) and return Iterable.

        Args:
            drive: The Drive instance from which the files are to be loaded. This Drive
                instance should represent a cloud storage service or similar storage
                system where the files are stored.
            object_ids: A list of object_id strings. Each object_id represents a unique
                identifier for a file in the drive.

        Yields:
            An iterator that yields Blob instances, which are binary representations of
            the files loaded from the drive using the specified object_ids.
        z!There isn't a file withobject_id z
 in drive .rk   rl   rm   rn   rv   rx   N)$re   ry   rz   Zget_itemloggingwarningr~   r=   r\   r   r   r   r   r   r   r   r   r   r   rI   rp   rq   r"   rr   rs   rt   r   r   r   r`   rw   r   r   r   r   r   )rW   r   r   r   r   r   r   r   ro   r   r   r   r-   r-   r.   _load_from_object_ids   s`   


"z$O365BaseLoader._load_from_object_idsr   c                 C  s   z
ddl m}m} W n ty   tdw | jr?t }|j}||j|jd}|d| j	j
| j	j f| j|dddi}|S |t d d	}|d| j	j
| j	j f| j|dddi}|  |S )znAuthenticates the OneDrive API client

        Returns:
            The authenticated Account object.
        r   )r   FileSystemTokenBackendzAO365 package not found, please install it with `pip install o365`)r1   Ztoken_filename)credentialsZscopestoken_backendZraise_http_errorsFr0   )r1   Nr-   )O365r   r   ImportErrorrH   r/   r1   parentr   rF   r#   r$   Zget_secret_valuerg   r   r2   Zauthenticate)rW   r   r   Ztoken_storager1   r   accountr-   r-   r.   _auth$  sJ   



zO365BaseLoader._auth)rP   r	   r5   rQ   )r5   r6   )r5   rf   )rh   r   r5   ri   )r   r   r   rf   r5   ri   )r5   r   )r)   r*   r+   __doc__r   r    rF   r,   rH   
CHUNK_SIZErI   rJ   rK   rL   r   rM   rN   rO   r[   propertyre   r   rg   r   r   r   __classcell__r-   r-   rc   r.   rD   O   s4   
 &

8=rD   )r3   r4   r5   r6   )r@   r4   r5   r6   ):r   
__future__r   r   r8   r{   r   ry   r   abcr   r   pathlibr   r   typingr   r	   r
   r   r   r   r   r   Zpydanticr   r   r   r   r   Zpydantic_settingsr   r   Z)langchain_community.document_loaders.baser   r   Z=langchain_community.document_loaders.blob_loaders.file_systemr   Z8langchain_community.document_loaders.blob_loaders.schemar   Z4langchain_community.document_loaders.parsers.genericr   Z5langchain_community.document_loaders.parsers.registryr   r   r   Z
O365.driver   r   	getLoggerr)   loggerr   r    r/   r?   rC   rD   r-   r-   r-   r.   <module>   s:    (
	

