o
    Zh                     @   sf   d dl Z d dlmZmZmZmZmZ d dlmZ d dl	m
Z
 d dlmZ e eZG dd de
ZdS )    N)AnyIteratorListOptionalSequence)Document)
BaseLoader)NewsURLLoaderc                   @   s   e Zd ZdZ				ddeee  dee deded	ed
dfddZ	d
e
e fddZed
ee fddZd
ee fddZdS )RSSFeedLoaderaQ  Load news articles from `RSS` feeds using `Unstructured`.

    Args:
        urls: URLs for RSS feeds to load. Each articles in the feed is loaded into its own document.
        opml: OPML file to load feed urls from. Only one of urls or opml should be provided.  The value
        can be a URL string, or OPML markup contents as byte or string.
        continue_on_failure: If True, continue loading documents even if
            loading fails for a particular URL.
        show_progress_bar: If True, use tqdm to show a loading progress bar. Requires
            tqdm to be installed, ``pip install tqdm``.
        **newsloader_kwargs: Any additional named arguments to pass to
            NewsURLLoader.

    Example:
        .. code-block:: python

            from langchain_community.document_loaders import RSSFeedLoader

            loader = RSSFeedLoader(
                urls=["<url-1>", "<url-2>"],
            )
            docs = loader.load()

    The loader uses feedparser to parse RSS feeds.  The feedparser library is not installed by default so you should
    install it if using this loader:
    https://pythonhosted.org/feedparser/

    If you use OPML, you should also install listparser:
    https://pythonhosted.org/listparser/

    Finally, newspaper is used to process each article:
    https://newspaper.readthedocs.io/en/latest/
    NTFurlsopmlcontinue_on_failureshow_progress_barnewsloader_kwargsreturnc                 K   s:   |du |du krt d|| _|| _|| _|| _|| _dS )zInitialize with urls or OPML.Nz;Provide either the urls or the opml argument, but not both.)
ValueErrorr   r   r   r   r   )selfr   r   r   r   r    r   _/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/document_loaders/rss.py__init__/   s   	
zRSSFeedLoader.__init__c              
   C   sR   |   }| jr%zddlm} W n ty  } ztd|d }~ww ||}t|S )Nr   )tqdmzPackage tqdm must be installed if show_progress_bar=True. Please install with 'pip install tqdm' or set show_progress_bar=False.)	lazy_loadr   r   ImportErrorlist)r   iterr   er   r   r   loadD   s   zRSSFeedLoader.loadc              
   C   sX   | j r| j S zdd l}W n ty } ztd|d }~ww || j}dd |jD S )Nr   zPackage listparser must be installed if the opml arg is used. Please install with 'pip install listparser' or use the urls arg instead.c                 S   s   g | ]}|j qS r   )url).0feedr   r   r   
<listcomp>_   s    z+RSSFeedLoader._get_urls.<locals>.<listcomp>)r   
listparserr   parser   Zfeeds)r   r!   r   Zrssr   r   r   	_get_urlsR   s   zRSSFeedLoader._get_urlsc                 c   s6   zdd l }W n ty   tdw | jD ]}z||}t|ddr/td| d|j W n" tyR } z| jrLt	
d| d|  W Y d }~q|d }~ww z"|jD ]}td
d|jgi| j}| d }||jd< |V  qWW q ty } z| jrt	
d	|j d|  W Y d }~q|d }~ww d S )Nr   zMfeedparser package not found, please install it with `pip install feedparser`ZbozoFzError fetching z, exception: r   r   zError processing entry r   )
feedparserr   r#   r"   getattrr   Zbozo_exception	Exceptionr   loggererrorentriesr	   linkr   r   metadata)r   r$   r   r   r   entryloaderZarticler   r   r   r   a   sT   



zRSSFeedLoader.lazy_load)NNTF)__name__
__module____qualname____doc__r   r   strboolr   r   r   r   r   propertyr#   r   r   r   r   r   r   r
      s.    $

r
   )loggingtypingr   r   r   r   r   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser   Z)langchain_community.document_loaders.newsr	   	getLoggerr.   r'   r
   r   r   r   r   <module>   s    
