o
    Zh                     @   sl   d dl Z d dlmZmZmZmZmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ G dd deZdS )	    N)AnyDictIteratorListOptionalUnion)Document)get_from_dict_or_env)PyPDFLoader)
BaseLoaderc                   @   s   e Zd ZdZ	ddedee dee fddZeded	efd
dZ	d	e
fddZde
de
deeef d	efddZd	ee fddZd	ee fddZd	ee fddZd	ee fddZdS )RSpaceLoadera'  Load content from RSpace notebooks, folders, documents or PDF Gallery files.

    Map RSpace document <-> Langchain Document in 1-1. PDFs are imported using PyPDF.

    Requirements are rspace_client (`pip install rspace_client`) and PyPDF if importing
     PDF docs (`pip install pypdf`).

    N	global_idapi_keyurlc                 C   s8   |||d}t |}|d | _|d | _|d | _dS )a  api_key: RSpace API key - can also be supplied as environment variable
        'RSPACE_API_KEY'
        url: str
        The URL of your RSpace instance - can also be supplied as environment
        variable 'RSPACE_URL'
        global_id: str
         The global ID of the resource to load,
        e.g. 'SD12344' (a single document); 'GL12345'(A PDF file in the gallery);
        'NB4567' (a notebook); 'FL12244' (a folder)
        )r   r   r   r   r   r   N)r   validate_environmentr   r   r   )selfr   r   r   argsZverified_args r   b/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/document_loaders/rspace.py__init__   s   


zRSpaceLoader.__init__valuesreturnc                 C   s@   t |dd|d< t |dd|d< d|vs|d du rtd|S )z3Validate that API key and URL exist in environment.r   ZRSPACE_API_KEYr   Z
RSPACE_URLr   NzBNo value supplied for global_id. Please supply an RSpace global ID)r	   
ValueError)clsr   r   r   r   r   ,   s   z!RSpaceLoader.validate_environmentc                 C   st   z
ddl m}m} W n ty   tdw z|| j| j}|  W n ty4   td| j dw ||j	fS )zCreate a RSpace client.r   )elnfield_contentz(You must run `pip install rspace_client`z%Unable to initialize client - is url z or api key  correct?)
Zrspace_client.elnr   r   ImportErrorZ	ELNClientr   r   Z
get_status	ExceptionZFieldContent)r   r   r   r   r   r   _create_rspace_client7   s   
z"RSpaceLoader._create_rspace_clientclir   d_idc                 C   s   d}| |}|d|d  d7 }|d D ]}||d  d7 }||d }|| 7 }|d7 }qtdd	|d  d
|d  i|dS )N z<h2>namez<h2/>fields
contentsourcezrspace: -ZglobalId)metadataZpage_content)Zget_documentZget_textr   )r   r   r   r    r%   docfZfcr   r   r   _get_docJ   s   

zRSpaceLoader._get_docc                 c   s$    |   \}}| ||| jV  d S )N)r   r+   r   )r   r   r   r   r   r   _load_structured_docX   s   z!RSpaceLoader._load_structured_docc                 c   s^    |   \}}| jr|j| jdd  dgd}dd |d D }|D ]
}| |||V  q"d S )N   Zdocument)Z	folder_idZtypesToIncludec                 S   s   g | ]}|d  qS )idr   ).0dr   r   r   
<listcomp>b   s    z2RSpaceLoader._load_folder_tree.<locals>.<listcomp>records)r   r   Zlist_folder_treer+   )r   r   r   Zdocs_in_folderZdoc_idsZdoc_idr   r   r   _load_folder_tree\   s   zRSpaceLoader._load_folder_treec           	      c   s    |   \}}|| j}tj|d \}}| dkr>| j d}|| j| t|}|	 D ]}| j|j
d< |V  q2d S d S )Nr"   z.pdfZ
rspace_src)r   Zget_file_infor   ospathsplitextlowerZdownload_filer
   	lazy_loadr(   )	r   r   r   	file_info_extoutfileZ
pdf_loaderZpdfr   r   r   	_load_pdff   s   zRSpaceLoader._load_pdfc                 c   s    | j rd| j v r|  D ]}|V  qd S | j r)d| j v r)|  D ]}|V  q!d S | j rA| j dd dv rA|  D ]}|V  q9d S td)NZGLZSDr   r-   )ZFLZNBzUnknown global ID type)r   r=   r,   r3   r   )r   r0   r   r   r   r8   r   s   zRSpaceLoader.lazy_load)NN)__name__
__module____qualname____doc__strr   r   classmethodr   r   r   r   r   intr   r+   r   r,   r3   r=   r8   r   r   r   r   r      s$    


"
r   )r4   typingr   r   r   r   r   r   Zlangchain_core.documentsr   Zlangchain_core.utilsr	   Z$langchain_community.document_loadersr
   Z)langchain_community.document_loaders.baser   r   r   r   r   r   <module>   s     