o
    Zh)                     @   sL   d dl mZmZmZ d dlmZ d dlmZ d dlm	Z	 G dd deZ
dS )    )IteratorLiteralOptional)
BaseLoader)Document)get_from_envc                
   @   sR   e Zd ZdZdddddedee ded d	ee fd
dZde	e
 fddZdS )SpiderLoaderzLoad web pages as Documents using Spider AI.

    Must have the Python package `spider-client` installed and a Spider API key.
    See https://spider.cloud for more.
    Nscrape)api_keymodeparamsurlr
   r   r	   crawlr   c                C   s   |du r	ddd}zddl m} W n ty   tdw |dvr)td	| d
|p/tdd}||d| _ || _|| _|| _dS )a  Initialize with API key and URL.

        Args:
            url: The URL to be processed.
            api_key: The Spider API key. If not specified, will be read from env
            var `SPIDER_API_KEY`.
            mode: The mode to run the loader in. Default is "scrape".
                 Options include "scrape" (single page) and "crawl" (with deeper
                 crawling following subpages).
            params: Additional parameters for the Spider API.
        NmarkdownT)Zreturn_formatmetadatar   )SpiderzB`spider` package not found, please run `pip install spider-client`r   zUnrecognized mode 'z%'. Expected one of 'scrape', 'crawl'.r
   ZSPIDER_API_KEY)r
   )spiderr   ImportError
ValueErrorr   r   r   r   )selfr   r
   r   r   r    r   b/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/document_loaders/spider.py__init__   s(   

zSpiderLoader.__init__returnc                 c   s    g }| j dkr| jj| j| jd}|r|| n| j dkr0| jj| j| jd}|r0|| |D ]>}| j dkrT|d dd}|d di }|durTt	||d	V  | j dkrp|dd}|di }|durpt	||d	V  q2dS )
z+Load documents based on the specified mode.r	   )r   r   r   content r   N)page_contentr   )
r   r   Z
scrape_urlr   r   appendZ	crawl_urlextendgetr   )r   Zspider_docsresponsedocr   r   r   r   r   	lazy_load:   s6   





zSpiderLoader.lazy_load)__name__
__module____qualname____doc__strr   r   dictr   r   r   r#   r   r   r   r   r      s    

+r   N)typingr   r   r   Zlangchain_core.document_loadersr   Zlangchain_core.documentsr   Zlangchain_core.utilsr   r   r   r   r   r   <module>   s
    