o
    ®©ZhY$  ã                   @  sŽ   d Z ddlmZ ddlZddlmZ ddlmZmZm	Z	m
Z
mZ ddlZddlmZ ddlmZ ddlmZ dd	lmZ G d
d„ deeƒZdS )z7Loader that loads data from Sharepoint Document Libraryé    )ÚannotationsN)ÚPath)ÚAnyÚDictÚIteratorÚListÚOptional)Ú
BaseLoader)ÚDocument)ÚField)ÚO365BaseLoaderc                   @  sº   e Zd ZU dZedƒZded< 	 dZded< 	 dZded	< 	 dZ	ded
< 	 dZ
ded< 	 e ¡ d d Zded< 	 dZded< 	 ed$dd„ƒZd%dd„Zd&dd„Zd'dd „Zd(d"d#„ZdS ))ÚSharePointLoaderzLoad  from `SharePoint`..ÚstrÚdocument_library_idNzOptional[str]Úfolder_pathzOptional[List[str]]Ú
object_idsÚ	folder_idFzOptional[bool]Ú	load_authz.credentialszo365_token.txtr   Ú
token_pathÚload_extended_metadataÚreturnú	List[str]c                 C  s   ddgS )zcReturn required scopes.
        Returns:
            List[str]: A list of required scopes.
        Z
sharepointÚbasic© )Úselfr   r   úf/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/document_loaders/sharepoint.pyÚ_scopes%   s   zSharePointLoader._scopesúIterator[Document]c                 c  sŠ   z
ddl m}m} W n ty   tdƒ‚w |  ¡  ¡  | j¡}t||ƒs/t	d| j› dƒ‚| j
r“| | j
¡}t||ƒsFt	d| j
› dƒ‚|  |¡D ]G}t|j d¡ƒ}| jdu r_|  |¡}| jdu rq|  |¡}| d	|ji¡ | j |¡D ]}	| jdu rƒ||	jd
< | jdu rŽ|	j |¡ |	V  qwqK| jr÷| | j¡}t||ƒsªt	d| j
› dƒ‚|  |¡D ]G}t|j d¡ƒ}| jdu rÃ|  |¡}| jdu rÕ|  |¡}| d	|ji¡ | j |¡D ]}	| jdu rç||	jd
< | jdu rò|	j |¡ |	V  qÛq¯| jrH|  || j¡D ]E}t|j d¡ƒ}| jdu r|  |¡}| jdu r"|  |¡}| j |¡D ]}	| jdu r5||	jd
< | jdu rA|	j |¡ |	V  q(q| j
s½| js¿| jsÁ| ¡ }t||ƒsbt	dƒ‚|  |¡D ][}t|j d¡ƒ}| jdu r||  |¡}| jdu r‡|  |¡}| j |¡D ]-}
|
j |j¡ | jdu r¡||
jd
< | jdu r¶|
j |¡ |
j d	|ji¡ |
V  qqgdS dS dS dS )z¦
        Load documents lazily. Use this when working at a large scale.
        Yields:
            Document: A document object representing the parsed blob.
        r   )ÚDriveÚFolderzAO365 package not found, please install it with `pip install o365`zThere isn't a Drive with id Ú.zThere isn't a folder with path ÚidTZsource_full_urlÚauthorized_identitieszUnable to fetch root folderN)Z
O365.driver   r   ÚImportErrorZ_authZstorageZ	get_driver   Ú
isinstanceÚ
ValueErrorr   Zget_item_by_pathZ_load_from_folderr   ÚmetadataÚgetr   r"   r   Úget_extended_metadataÚupdateZweb_urlZ_blob_parserZ
lazy_parser   Zget_itemr   Z_load_from_object_idsZget_root_folder)r   r   r   ÚdriveZtarget_folderZblobÚfile_idZauth_identitiesZextended_metadataZparsed_blobZ	blob_partr   r   r   Ú	lazy_load-   s¨   €ÿÿ








û







û



û


ÿ
÷özSharePointLoader.lazy_loadr+   r   c                 C  sº   |   ¡ }| d¡}d| j› d|› d}dd|› i}tjd||d}| ¡ }g }| d	¡D ]-}	|	 d
¡rZ|	 d
¡ d¡pK|	 d
¡ d¡pK|	 d
¡ d¡}
|
rZ|
 d¡}|rZ| |¡ q-|S )a  
        Retrieve the access identities (user/group emails) for a given file.
        Args:
            file_id (str): The ID of the file.
        Returns:
            List: A list of group names (email addresses) that have
                  access to the file.
        Úaccess_tokenú(https://graph.microsoft.com/v1.0/drives/ú/items/z/permissionsÚAuthorizationúBearer ÚGET©ÚheadersÚvalueZgrantedToV2ZsiteUserÚuserÚgroupÚemail)Ú_fetch_access_tokenr'   r   ÚrequestsÚrequestÚjsonÚappend)r   r+   Údatar-   Úurlr4   ÚresponseZaccess_listZgroup_namesZaccess_dataZ	site_datar8   r   r   r   r"      s2   	
ÿÿÿ
ÿý

€z&SharePointLoader.authorized_identitiesr   c                 C  sD   t | jdd}| ¡ }W d  ƒ n1 sw   Y  t |¡}|S )z|
        Fetch the access token from the token file.
        Returns:
            The access token as a dictionary.
        zutf-8)ÚencodingN)Úopenr   Úreadr<   Úloads)r   ÚfÚsr>   r   r   r   r9   £   s
   
ÿ
z$SharePointLoader._fetch_access_tokenr   c           	      C  s¦   |   ¡ }| d¡}d| j› d|› d}dd|› i}tjd||d}| ¡ }| d	d
¡| di ¡ di ¡ dd¡| di ¡ dd¡ d¡d d | dd¡ dœ}|S )a  
        Retrieve extended metadata for a file in SharePoint.
        As of today, following fields are supported in the extended metadata:
        - size: size of the source file.
        - owner: display name of the owner of the source file.
        - full_path: pretty human readable path of the source file.
        Args:
            file_id (str): The ID of the file.
        Returns:
            dict: A dictionary containing the extended metadata of the file,
                  including size, owner, and full path.
        r-   r.   r/   z,?$select=size,createdBy,parentReference,namer0   r1   r2   r3   Úsizer   Z	createdByr6   ZdisplayNameÚ ZparentReferenceÚpathú:éÿÿÿÿú/Úname)rG   ÚownerÚ	full_path)r9   r'   r   r:   r;   r<   Úsplit)	r   r+   r>   r-   r?   r4   r@   r&   Zstaged_metadatar   r   r   r(   ®   s2   
ÿÿÿ


þý
üûz&SharePointLoader.get_extended_metadata)r   r   )r   r   )r+   r   r   r   )r   r   )r+   r   r   r   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   Ú__annotations__r   r   r   r   r   Úhomer   r   Úpropertyr   r,   r"   r9   r(   r   r   r   r   r      s,   
 

T
"r   )rT   Ú
__future__r   r<   Úpathlibr   Útypingr   r   r   r   r   r:   Zlangchain_core.document_loadersr	   Zlangchain_core.documentsr
   Zpydanticr   Z.langchain_community.document_loaders.base_o365r   r   r   r   r   r   Ú<module>   s    