o
    Zh'                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZmZmZ eeZG dd deZdS )	    N)Path)IteratorListUnion)BaseChatLoader)ChatSession)	AIMessageBaseMessageHumanMessagec                   @   s~   e Zd ZdZdeeef fddZedede	fddZ
edede	fd	d
Zededee fddZdee	 fddZdS )TelegramChatLoadera  Load `telegram` conversations to LangChain chat messages.

    To export, use the Telegram Desktop app from
    https://desktop.telegram.org/, select a conversation, click the three dots
    in the top right corner, and select "Export chat history". Then select
    "Machine-readable JSON" (preferred) to export. Note: the 'lite' versions of
    the desktop app (like "Telegram for MacOS") do not support exporting chat
    history.
    pathc                 C   s"   t |tr
|| _dS t|| _dS )zInitialize the TelegramChatLoader.

        Args:
            path (Union[str, Path]): Path to the exported Telegram chat zip,
                 directory, json, or HTML file.
        N)
isinstancestrr   )selfr    r   `/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/chat_loaders/telegram.py__init__   s   "
zTelegramChatLoader.__init__	file_pathreturnc                 C   s   zddl m} W n ty   tdw t| ddd}||d}W d   n1 s+w   Y  g }d}|d	D ]A}|d
d }|d}|du rU|du rUtd q9|du r\|}	n|j	 }	|dj	 }
|
t|
|	d|igdd |	}q9t|dS )zLoad a single chat session from an HTML file.

        Args:
            file_path (str): Path to the HTML file.

        Returns:
            ChatSession: The loaded chat session.
        r   )BeautifulSoupzPlease install the 'beautifulsoup4' package to load Telegram HTML files. You can do this by running'pip install beautifulsoup4' in your terminal.rutf-8encodingzhtml.parserNz.message.defaultz.pull_right.date.detailstitlez
.from_namezfrom_name not found in messagez.textmessage_timeZsendereventscontentZadditional_kwargsmessages)Zbs4r   ImportErroropenselectZ
select_oneloggerdebugtextstripappendr
   r   )r   r   fileZsoupresultsZprevious_sendermessage	timestampZfrom_name_element	from_namer'   r   r   r   _load_single_chat_session_html'   s@   



	
z1TelegramChatLoader._load_single_chat_session_htmlc           	      C   s   t | ddd}t|}W d   n1 sw   Y  |dg }g }|D ]#}|dd}|dd}|d	d}|t||d
|igdd q&t|dS )zLoad a single chat session from a JSON file.

        Args:
            file_path (str): Path to the JSON file.

        Returns:
            ChatSession: The loaded chat session.
        r   r   r   Nr!   r'    datefromr   r   r   r    )r#   jsonloadgetr)   r
   r   )	r   r*   datar!   r+   r,   r'   r-   r.   r   r   r   _load_single_chat_session_jsonV   s$   


z1TelegramChatLoader._load_single_chat_session_jsonc              
   c   s    t j| r| dr| V  dS t j| r6t | D ]\}}}|D ]}|dr2t j||V  q#qdS t| r{t	| 1}|
 D ]#}|drht }|j||dV  W d   n1 scw   Y  qEW d   dS 1 stw   Y  dS dS )zIterate over files in a directory or zip file.

        Args:
            path (str): Path to the directory or zip file.

        Yields:
            str: Path to each file.
        ).html.json)r   N)osr   isfileendswithisdirwalkjoinzipfile
is_zipfileZipFilenamelisttempfileTemporaryDirectoryextract)r   root_filesr*   zip_filetemp_dirr   r   r   _iterate_filesv   s,   





"z!TelegramChatLoader._iterate_filesc                 c   sF    |  | jD ]}|dr| |V  q|dr | |V  qdS )zLazy load the messages from the chat file and yield them
        in as chat sessions.

        Yields:
            ChatSession: The loaded chat session.
        r8   r9   N)rL   r   r<   r/   r7   )r   r   r   r   r   	lazy_load   s   

zTelegramChatLoader.lazy_loadN)__name__
__module____qualname____doc__r   r   r   r   staticmethodr   r/   r7   r   rL   rM   r   r   r   r   r      s    


.r   )r3   loggingr:   rD   r@   pathlibr   typingr   r   r   Zlangchain_core.chat_loadersr   Zlangchain_core.chat_sessionsr   Zlangchain_core.messagesr   r	   r
   	getLoggerrN   r%   r   r   r   r   r   <module>   s    
