o
    Zh                     @   sV   d dl Z d dlmZmZ d dlZd dlmZ d dlmZ e 	e
ZG dd deZdS )    N)AnyList)Document)
BaseLoaderc                   @   sd   e Zd ZdZ	ddedee defddZded	efd
dZded	e	fddZ
d	ee fddZdS )DiffbotLoaderzLoad `Diffbot` json file.T	api_tokenurlscontinue_on_failurec                 C   s   || _ || _|| _dS )a	  Initialize with API token, ids, and key.

        Args:
            api_token: Diffbot API token.
            urls: List of URLs to load.
            continue_on_failure: Whether to continue loading other URLs if one fails.
               Defaults to True.
        N)r   r   r	   )selfr   r   r	    r   c/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/document_loaders/diffbot.py__init__   s   
zDiffbotLoader.__init__diffbot_apireturnc                 C   s
   d| S )Nzhttps://api.diffbot.com/v3/r   )r
   r   r   r   r   _diffbot_api_url   s   
zDiffbotLoader._diffbot_api_urlurlc                 C   s8   |  d}| j|d}tj||dd}|jr| S i S )z'Get Diffbot file from Diffbot REST API.Zarticle)tokenr   
   )paramstimeout)r   r   requestsgetokjson)r
   r   Zdiffbot_urlr   responser   r   r   _get_diffbot_data!   s   
zDiffbotLoader._get_diffbot_datac                 C   s   t  }| jD ]G}z"| |}d|v r|d d d nd}d|i}|t||d W q tyM } z| jrAtd| d|  n|W Y d	}~qd	}~ww |S )
z>Extract text from Diffbot on all the URLs and return Documentsobjectsr   text source)Zpage_contentmetadatazError fetching or processing z, exception: N)	listr   r   appendr   	Exceptionr	   loggererror)r
   docsr   datar   r    er   r   r   load.   s   

zDiffbotLoader.loadN)T)__name__
__module____qualname____doc__strr   boolr   r   r   r   r   r)   r   r   r   r   r      s    
r   )loggingtypingr   r   r   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser   	getLoggerr*   r$   r   r   r   r   r   <module>   s    
