o
    Zh#                     @   s~   d dl Z d dlZd dlZd dlmZ d dlmZmZm	Z	 d dl
mZ d dlmZ d dlmZ e eZdZG dd	 d	eZdS )
    N)BytesIO)ListOptionalSequence)ElementTree)Document)
BaseLoader@   c                   @   s  e Zd ZdZ	d(dddededee defd	d
Z						d)dee	e  dee	e  dee dededede	e
 fddZdedede	e ddfddZdee dedede	e
 fddZdedededee
 fddZd edefd!d"Zdedefd#d$Zed%edefd&d'ZdS )*
QuipLoaderz_Load `Quip` pages.

    Port of https://github.com/quip/quip-api/tree/master/samples/baqup
    <   F)allow_dangerous_xml_parsingapi_urlaccess_tokenrequest_timeoutr   c                C   sH   zddl m} W n ty   tdw ||||d| _|s"tddS )a  
        Args:
            api_url: https://platform.quip.com
            access_token: token of access quip API. Please refer:
                https://quip.com/dev/automation/documentation/current#section/Authentication/Get-Access-to-Quip's-APIs
            request_timeout: timeout of request, default 60s.
            allow_dangerous_xml_parsing: Allow dangerous XML parsing, defaults to False
        r   )
QuipClientz?`quip_api` package not found, please run `pip install quip_api`)r   base_urlr   ac  The quip client uses the built-in XML parser which may causesecurity issues when parsing XML data in some cases. Please see https://docs.python.org/3/library/xml.html#xml-vulnerabilities For more information, set `allow_dangerous_xml_parsing` as True if you are sure that your distribution of the standard library is not vulnerable to XML vulnerabilities.N)quip_api.quipr   ImportErrorquip_client
ValueError)selfr   r   r   r   r    r   `/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/document_loaders/quip.py__init__   s   zQuipLoader.__init__N  
folder_ids
thread_idsmax_docsinclude_all_foldersinclude_commentsinclude_imagesreturnc           	      C   s   |s
|s
|s
t d|pg }|r|D ]	}| |d| q|r=| j }d|v r0| |d d| d|v r=| |d d| tt|d| }| |||S )aA  
        Args:
            :param folder_ids: List of specific folder IDs to load, defaults to None
            :param thread_ids: List of specific thread IDs to load, defaults to None
            :param max_docs: Maximum number of docs to retrieve in total, defaults 1000
            :param include_all_folders: Include all folders that your access_token
                   can access, but doesn't include your private folder
            :param include_comments: Include comments, defaults to False
            :param include_images: Include images, defaults to False
        z_Must specify at least one among `folder_ids`, `thread_ids` or set `include_all`_folders as Truer   Zgroup_folder_idsZshared_folder_idsN)r   get_thread_ids_by_folder_idr   Zget_authenticated_userlistsetprocess_threads)	r   r   r   r   r   r   r    	folder_iduserr   r   r   load=   s(   


zQuipLoader.loadr&   depthc           
      C   sB  ddl m}m} z| j|}W nW |yF } z*|jdkr,td| d| d|  ntd| d| d|j  W Y d	}~d	S d	}~w |yg } ztd| d| d
|j  W Y d	}~d	S d	}~ww |d dd| }t	d| d|  |d D ]}	d|	v r| 
|	d |d | qd|	v r||	d  qd	S )z4Get thread ids by folder id and update in thread_idsr   )	HTTPError	QuipErrori  zdepth z!, Skipped over restricted folder z, z, Skipped over folder z due to unknown error Nz due to HTTP error foldertitlez	Folder %sz, Processing folder childrenr&      	thread_id)r   r*   r+   r   Z
get_foldercodeloggingwarninggetinfor"   append)
r   r&   r)   r   r*   r+   r,   er-   childr   r   r   r"   j   sF   
z&QuipLoader.get_thread_ids_by_folder_idinclude_messagesc                 C   s2   g }|D ]}|  |||}|dur|| q|S )z2Process a list of thread into a list of documents.N)process_threadr6   )r   r   r    r9   docsr0   docr   r   r   r%      s   
zQuipLoader.process_threadsr0   c                 C   s  | j |}|d d }|d d }|d d }|d d }t|}td| d| d| d	|  d
|v rz
| j |d
 }	W n$ tjj	j
yf }
 ztd| d| d|
  W Y d }
~
d S d }
~
ww ||||d}d}|rw| |	}|r|d | | }t|d
 | |dS d S )Nthreadidr-   linkZupdated_useczprocessing thread z title z link z update_ts htmlzError parsing thread  z, skipping, )r-   	update_tsr>   source z/n)Zpage_contentmetadata)r   Z
get_threadr
   _sanitize_titleloggerr5   Zparse_document_htmlxmletreeZcElementTreeZ
ParseErrorerrorprocess_thread_imagesprocess_thread_messagesr   )r   r0   r    r9   r=   r-   r?   rB   sanitized_titletreer7   rE   textr   r   r   r:      sH   


zQuipLoader.process_threadrN   c                 C   s   d}zddl m} ddlm} W n ty   tdw |dD ]I}|d}|r/|ds0q!|d	\}}}}	| j	||	}
z|
t|
 }|d
 || }W q! tyj } z
td|  |d }~ww |S )NrD   r   )Image)pytesseractzg`Pillow or pytesseract` package not found, please run `pip install Pillow` or `pip install pytesseract`imgsrcz/blob/
z!failed to convert image to text, )ZPILrP   rQ   r   iterr4   
startswithsplitr   Zget_blobopenr   readZimage_to_stringOSErrorrG   rJ   )r   rN   rO   rP   rQ   rR   rS   _r0   Zblob_idZblob_responseimager7   r   r   r   rK      s0   
z QuipLoader.process_thread_imagesc                 C   s`   d }g }	 | j j||dd}|| |r|d d d }nnq|  dd |D }d	|S )
NTd   )max_created_useccountZcreated_usecr/   c                 S   s   g | ]}|d  qS )rO   r   ).0messager   r   r   
<listcomp>   s    z6QuipLoader.process_thread_messages.<locals>.<listcomp>rU   )r   Zget_messagesextendreversejoin)r   r0   r_   messageschunkZtextsr   r   r   rL      s   
	
z"QuipLoader.process_thread_messagesr-   c                 C   s8   t dd| }t dd|}t|tkr|d t }|S )Nz\srA   z(?u)[^- \w.]rD   )resublen_MAXIMUM_TITLE_LENGTH)r-   rM   r   r   r   rF      s
   zQuipLoader._sanitize_title)r   )NNr   FFF)__name__
__module____qualname____doc__strr   intboolr   r   r   r(   r"   r   r%   r:   r   rK   rL   staticmethodrF   r   r   r   r   r
      s    	
(


-
%

,r
   )r2   rj   Zxml.etree.cElementTreerH   ior   typingr   r   r   Zxml.etree.ElementTreer   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser   	getLoggerrn   rG   rm   r
   r   r   r   r   <module>   s    
