o
    Zhk                     @   sz   d dl Z d dlZd dlZd dlmZmZ d dlZd dlmZ d dl	m
Z
 edZedZedZG dd	 d	e
ZdS )
    N)ListTuple)Document)
BaseLoaderzBV\w+zav[0-9]+zp=(\d+)c                	   @   sb   e Zd ZdZ			ddee dededefddZd	ee fd
dZded	e	ee
f fddZdS )BiliBiliLoaderz9
    Load fetching transcripts from BiliBili videos.
     
video_urlssessdatabili_jctbuvid3c                 C   sb   || _ d| _zddlm} W n ty   tdw |r+|r-|r/|j|||d| _dS dS dS dS )a  
        Initialize the loader with BiliBili video URLs and authentication cookies.
        if no authentication cookies are provided, the loader can't get transcripts
        and will only fetch videos info.

        Args:
            video_urls (List[str]): List of BiliBili video URLs.
            sessdata (str): SESSDATA cookie value for authentication.
            bili_jct (str): BILI_JCT cookie value for authentication.
            buvid3 (str): BUVI3 cookie value for authentication.
        Nr   )videoTrequests package not found, please install it with `pip install bilibili-api-python`)r	   r
   r   )r   
credentialbilibili_apir   ImportErrorZ
Credential)selfr   r	   r
   r   r    r   d/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/document_loaders/bilibili.py__init__   s   zBiliBiliLoader.__init__returnc                 C   s8   g }| j D ]}| |\}}t||d}|| q|S )z
        Load and return a list of documents containing video transcripts.

        Returns:
            List[Document]: List of Document objects transcripts and metadata.
        )Zpage_contentmetadata)r   _get_bilibili_subs_and_infor   append)r   resultsurlZ
transcript
video_infodocr   r   r   load6   s   
zBiliBiliLoader.loadr   c                 C   s  t |}z
ddlm}m} W n ty   tdw |r(|j| | jd}nt	|}|r@|jt
| dd | jd}ntd| || }|d	|i | js[d
|fS d}t|}	|	rt|d t
|	dd  d }n|d }|||}
|
dg }|r|d dd
}|dsd| }t|}|jdkrt|jdg }ddd |D }d|d  d|d  d| }||fS td| d|j  d
|fS td| d d
|fS ) zU
        Retrieve video information and transcript for a given BiliBili URL.
        r   )syncr   r   )bvidr      N)aidr   z(Unable to find a valid video ID in URL: r   r   Zpages   cidZ	subtitlesZsubtitle_urlhttpzhttps:   body c                 S   s   g | ]}|d  qS )contentr   ).0cr   r   r   
<listcomp>u   s    z>BiliBiliLoader._get_bilibili_subs_and_info.<locals>.<listcomp>zVideo Title: titlez, description: Zdescz

Transcript: zFailed to fetch subtitles for z. HTTP Status Code: zNo subtitles found for video: z. Returning empty transcript.)
BV_PATTERNsearchr   r   r   r   ZVideogroupr   
AV_PATTERNint
ValueErrorget_infoupdatePAGE_INDEX_PATTERNZget_subtitleget
startswithrequestsstatus_codejsonloadsr(   joinwarningswarn)r   r   r   r   r   vr!   r   r#   Z
page_matchsubZsub_listZsub_urlresponseZraw_sub_titlesZraw_transcriptZraw_transcript_with_meta_infor   r   r   r   E   sh   

"





z*BiliBiliLoader._get_bilibili_subs_and_infoN)r   r   r   )__name__
__module____qualname____doc__r   strr   r   r   r   dictr   r   r   r   r   r      s     
 r   )r:   rer=   typingr   r   r8   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser   compiler-   r0   r5   r   r   r   r   r   <module>   s    


