o
    Zh	                     @   sb   d Z ddlZddlmZmZmZ ddlmZ ddlm	Z	 ddl
mZ eeZG dd deZdS )	zScrapingAnt Web Extractor.    N)IteratorListOptional)
BaseLoader)Document)get_from_envc                   @   sV   e Zd ZdZdddddee dee dee ded	df
d
dZ	d	e
e fddZdS )ScrapingAntLoaderzTurn an url to LLM accessible markdown with `ScrapingAnt`.

    For further details, visit: https://docs.scrapingant.com/python-client
    NT)api_keyscrape_configcontinue_on_failureurlsr	   r
   r   returnc                C   sd   zddl m} W n ty   tdw |std|p tdd}||d| _|| _|| _|| _dS )	at  Initialize client.

        Args:
            urls: List of urls to scrape.
            api_key: The ScrapingAnt API key. If not specified must have env var
                SCRAPINGANT_API_KEY set.
            scrape_config: The scraping config from ScrapingAntClient.markdown_request
            continue_on_failure: Whether to continue if scraping an url fails.
        r   )ScrapingAntClientzL`scrapingant-client` package not found, run `pip install scrapingant-client`zURLs must be provided.r	   ZSCRAPINGANT_API_KEY)tokenN)	Zscrapingant_clientr   ImportError
ValueErrorr   clientr   r
   r   )selfr   r	   r
   r   r    r   g/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/document_loaders/scrapingant.py__init__   s   
zScrapingAntLoader.__init__c                 c   s    | j dur	| j ni }| jD ]=}z| jjdd|i|}t|jd|jidV  W q tyK } z| jr?t	
d| d|  n|W Y d}~qd}~ww dS )zFetch data from ScrapingAnt.Nurl)Zpage_contentmetadatazError fetching data from z, exception: r   )r
   r   r   Zmarkdown_requestr   markdownr   	Exceptionr   loggererror)r   r
   r   resulter   r   r   	lazy_load3   s"   
zScrapingAntLoader.lazy_load)__name__
__module____qualname____doc__r   strr   dictboolr   r   r   r   r   r   r   r   r      s"    	
 r   )r#   loggingtypingr   r   r   Zlangchain_core.document_loadersr   Zlangchain_core.documentsr   Zlangchain_core.utilsr   	getLogger__file__r   r   r   r   r   r   <module>   s    
