o
    Zh	                     @   sf   d Z ddlZddlmZmZmZmZ ddlmZ ddl	m
Z
 ddlmZ eeZG dd deZdS )	zScrapfly Web Reader.    N)IteratorListLiteralOptional)
BaseLoader)Document)get_from_envc                   @   s`   e Zd ZdZddddddee dee ded	 d
ee de	ddfddZ
dee fddZdS )ScrapflyLoaderzTurn a url to llm accessible markdown with `Scrapfly.io`.

    For further details, visit: https://scrapfly.io/docs/sdk/python
    NmarkdownT)api_keyscrape_formatscrape_configcontinue_on_failureurlsr   r   )r
   textr   r   returnc                C   sj   zddl m} W n ty   tdw |std|p tdd}||d| _ || _|| _|| _|| _dS )	a  Initialize client.

        Args:
            urls: List of urls to scrape.
            api_key: The Scrapfly API key. If not specified must have env var
                SCRAPFLY_API_KEY set.
            scrape_format: Scrape result format, one or "markdown" or "text".
            scrape_config: Dictionary of ScrapFly scrape config object.
            continue_on_failure: Whether to continue if scraping a url fails.
        r   )ScrapflyClientzC`scrapfly` package not found, please run `pip install scrapfly-sdk`zURLs must be provided.r   ZSCRAPFLY_API_KEY)keyN)	scrapflyr   ImportError
ValueErrorr   r   r   r   r   )selfr   r   r   r   r   r    r   d/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/document_loaders/scrapfly.py__init__   s   
zScrapflyLoader.__init__c                 c   s    ddl m} | jd ur| jni }| jD ]B}z| j ||fd| ji|}t|jd d|idV  W q tyV } z| j	rJt
d| d|  n|W Y d }~qd }~ww d S )	Nr   )ScrapeConfigformatcontenturl)Zpage_contentmetadatazError fetching data from z, exception: )r   r   r   r   Zscraper   r   Zscrape_result	Exceptionr   loggererror)r   r   r   r   responseer   r   r   	lazy_load5   s(   
zScrapflyLoader.lazy_load)__name__
__module____qualname____doc__r   strr   r   dictboolr   r   r   r%   r   r   r   r   r	      s(    	
"r	   )r)   loggingtypingr   r   r   r   Zlangchain_core.document_loadersr   Zlangchain_core.documentsr   Zlangchain_core.utilsr   	getLogger__file__r!   r	   r   r   r   r   <module>   s    
