o
    Zh                     @   s   d dl Z d dlmZmZmZmZ d dlZd dlmZ d dl	m
Z
 dZed Zed Zed Ze je jd	 e eZG d
d de
ZdS )    N)AnyDictListOptional)Document)
BaseLoaderzhttps://api.notion.com/v1z/databases/{database_id}/queryz/pages/{page_id}z/blocks/{block_id}/children)levelc                   @   s  e Zd ZdZ	d#dddededee deeeef  d	df
d
dZ	d	e
e fddZddifdeeef d	e
eeef  fddZdeeef d	efddZd$deded	efddZdi fdddededeeef deeeef  d	ef
ddZd e
eeef  d	efd!d"ZdS )%NotionDBLoaderaY  Load from `Notion DB`.

    Reads content from pages within a Notion Database.
    Args:
        integration_token (str): Notion integration token.
        database_id (str): Notion database id.
        request_timeout_sec (int): Timeout for Notion requests in seconds.
            Defaults to 10.
        filter_object (Dict[str, Any]): Filter object used to limit returned
            entries based on specified criteria.
            E.g.: {
                "timestamp": "last_edited_time",
                "last_edited_time": {
                    "on_or_after": "2024-02-07"
                }
            } -> will only return entries that were last edited
                on or after 2024-02-07
            Notion docs: https://developers.notion.com/reference/post-database-query-filter
            Defaults to None, which will return ALL entries.
    
   N)filter_objectintegration_tokendatabase_idrequest_timeout_secr   returnc                C   sL   |st d|st d|| _|| _d| j ddd| _|| _|p"i | _dS )zInitialize with parameters.z"integration_token must be providedzdatabase_id must be providedzBearer zapplication/jsonz
2022-06-28)AuthorizationzContent-TypezNotion-VersionN)
ValueErrortokenr   headersr   r   )selfr   r   r   r    r   d/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/document_loaders/notiondb.py__init__)   s   	zNotionDBLoader.__init__c                    s      }t fdd|D S )zqLoad documents from the Notion database.
        Returns:
            List[Document]: List of documents.
        c                 3   s    | ]}  |V  qd S )N)	load_page).0page_summaryr   r   r   	<genexpr>G   s    z&NotionDBLoader.load.<locals>.<genexpr>)_retrieve_page_summarieslist)r   Zpage_summariesr   r   r   loadA   s   zNotionDBLoader.loadZ	page_sized   
query_dictc                 C   sT   g }	 | j tj| jdd|| jd}||d |ds"	 |S |d|d< q)	zi
        Get all the pages from a Notion database
        OR filter based on specified criteria.
        T)r   POST)methodr!   r   resultsZhas_morenext_cursorZstart_cursor)_requestDATABASE_URLformatr   r   extendget)r   r!   Zpagesdatar   r   r   r   I   s   
z'NotionDBLoader._retrieve_page_summariesr   c           
      C   s  |d }i }|d   D ]\}}|d }|dkr | |d }n|dkr,| |d }n|dkr@|d r=dd |d D ng }n|d	krI|d	 }n|d
krc|d
 r`|d
 d  d|d
 d  nd}n|dkrt|d rq|d d nd}n|dkrg }|d r|d D ]}|d}	|	std|  ||	 qn[|dkr|d r|d nd}nL|dkr|d r|d nd}n=|dkr|d r|d nd}n.|dkr|d }n%|dkr|d }n|dkr|d }n|dkr|d r|d d nd}nd}||| < q||d< t| ||dS )z\Read a page.

        Args:
            page_summary: Page summary from Notion API.
        id
propertiestype	rich_texttitleZmulti_selectc                 S   s   g | ]}|d  qS )namer   r   itemr   r   r   
<listcomp>y   s    z,NotionDBLoader.load_page.<locals>.<listcomp>url	unique_idprefix-numberNstatusr1   Zpeoplez-Missing 'name' in 'people' property for page dateZlast_edited_timeZcreated_timeZcheckboxemailselect)Zpage_contentmetadata)	items_concatenate_rich_textr*   loggerwarningappendlowerr   _load_blocks)
r   r   Zpage_idr>   Z	prop_nameZ	prop_dataZ	prop_typevaluer3   r1   r   r   r   r   c   sp   
 




zNotionDBLoader.load_pager   block_idnum_tabsc                 C   s   g }|}|r^|  tj|d}|d D ]C}||d  }d|vr qg }|d D ]}	d|	v r9|d| |	d d   q&|d rN| j|d	 |d
 d}
||
 |d| q|d}|sd|S )zRead a block and its children.)rG   r$   r.   r/   text	contentZhas_childrenr,      )rH   
r%   )r&   	BLOCK_URLr(   rC   rE   joinr*   )r   rG   rH   Zresult_lines_arrZcur_block_idr+   resultZ
result_objZcur_result_text_arrr/   Zchildren_textr   r   r   rE      s0   


zNotionDBLoader._load_blocksGETr5   r#   c                C   s<   |  }|r
||d< tj||| j|| jd}|  | S )Nfilter)r   jsontimeout)copyrequestsrequestr   r   raise_for_statusrS   )r   r5   r#   r!   r   Zjson_payloadresr   r   r   r&      s   zNotionDBLoader._requestrich_text_arrayc                 C   s   d dd |D S )z4Concatenate all text content from a rich_text array. c                 s   s    | ]}|d  V  qdS )
plain_textNr   r2   r   r   r   r      s    z8NotionDBLoader._concatenate_rich_text.<locals>.<genexpr>)rO   )r   rZ   r   r   r   r@      s   z%NotionDBLoader._concatenate_rich_text)r
   )r   )__name__
__module____qualname____doc__strr   intr   r   r   r   r   r   r   r   rE   r&   r@   r   r   r   r   r	      sR    
	

J%

"r	   )loggingtypingr   r   r   r   rV   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser   ZNOTION_BASE_URLr'   ZPAGE_URLrN   basicConfigWARNING	getLoggerr]   rA   r	   r   r   r   r   <module>   s    
