o
    Zhw#                     @  s   d dl mZ d dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
mZ d dlmZ d dlmZ er:d dlZd dlmZ dddZG dd deZdddZG dd deZeZdS )    )annotationsN)Path)TYPE_CHECKINGDictListOptionalUnionDocument)
BaseLoader)
EntityLikerowdictreturnstrc                 C  s.   | d }| d }| d }| d| d| dS )zBCombine message information in a readable format ready to be used.datefromtextz on z: 

 )r   r   Zsenderr   r   r   d/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/document_loaders/telegram.pyconcatenate_rows   s   r   c                   @  s$   e Zd ZdZdddZddd	Zd
S )TelegramChatFileLoaderzLoad from `Telegram chat` dump.pathUnion[str, Path]c                 C  s
   || _ dS )zInitialize with a path.N)	file_path)selfr   r   r   r   __init__   s   
zTelegramChatFileLoader.__init__r   List[Document]c                 C  sr   t | j}t|dd}t|}W d   n1 sw   Y  ddd |d D }dt|i}t||d	gS )
Load documents.utf8encodingN c                 s  s2    | ]}|d  dkrt |d trt|V  qdS )typemessager   N)
isinstancer   r   ).0r%   r   r   r   	<genexpr>'   s    z.TelegramChatFileLoader.load.<locals>.<genexpr>messagessourcepage_contentmetadata)r   r   openjsonloadjoinr   r
   )r   pfdr   r-   r   r   r   r0       s   

zTelegramChatFileLoader.loadN)r   r   r   r   )__name__
__module____qualname____doc__r   r0   r   r   r   r   r      s    
r   r   Union[str, List[str]]r   c           	      C  s   ddl m} |dg ddd}t| tr| g} dd | D }t|D ]\}}|d	 |jd
< q"g }|D ]2}||j}t|D ]%\}}t||jd
 |dd}|jd
  d|jd  |jd< |	| q>q2|S )zIConvert a string or list of strings to a list of Documents with metadata.r   )RecursiveCharacterTextSplitteri   )r   
.!?, r#      )
chunk_size
separatorsZchunk_overlapc                 S  s   g | ]}t |d qS ))r,   r	   )r'   pager   r   r   
<listcomp>>   s    z text_to_docs.<locals>.<listcomp>   rE   )rE   chunkr+   -rH   r*   )
Zlangchain_text_splittersr;   r&   r   	enumerater-   Z
split_textr,   r
   append)	r   r;   Ztext_splitterZ	page_docsidocZ
doc_chunkschunksrH   r   r   r   text_to_docs1   s,   
 rO   c                   @  sN   e Zd ZdZ					dd ddZd!ddZd"ddZd#ddZd$ddZdS )%TelegramChatApiLoaderz)Load `Telegram` chat json directory dump.Ntelegram_data.jsonchat_entityOptional[EntityLike]api_idOptional[int]api_hashOptional[str]usernamer   r   c                 C  s"   || _ || _|| _|| _|| _dS )aI  Initialize with API parameters.

        Args:
            chat_entity: The chat entity to fetch data from.
            api_id: The API ID.
            api_hash: The API hash.
            username: The username.
            file_path: The file path to save the data to. Defaults to
                 "telegram_data.json".
        N)rR   rT   rV   rX   r   )r   rR   rT   rV   rX   r   r   r   r   r   V   s
   
zTelegramChatApiLoader.__init__r   Nonec                   s   ddl m} g }|| j| j| j4 I dH 8}|| j2 z%3 dH W }|jdu}|r.|jjnd}|	|j
|j|j |j||d q6 W d  I dH  n1 I dH sTw   Y  t| jddd}tj||dd	d
 W d   dS 1 svw   Y  dS )z8Fetch data from Telegram API and save it as a JSON file.r   )TelegramClientN)	sender_idr   r   
message.idis_replyreply_to_idwzutf-8r!   F   )ensure_asciiindent)Ztelethon.syncrZ   rX   rT   rV   Ziter_messagesrR   Zreply_toZreply_to_msg_idrK   r[   r   r   	isoformatidr.   r   r/   dump)r   rZ   dataclientr%   r]   r^   r3   r   r   r   fetch_data_from_telegramn   s*   
("z.TelegramChatApiLoader.fetch_data_from_telegramrf   pd.DataFramer   c                   s`   d fdd ||d	   }||d	  j d
gdd
 td
<  fdd|d D }|S )a
  Create a dictionary of message threads from the given data.

        Args:
            data (pd.DataFrame): A DataFrame containing the conversation                 data with columns:
                - message.sender_id
                - text
                - date
                - message.id
                - is_reply
                - reply_to_id

        Returns:
            dict: A dictionary where the key is the parent message ID and                 the value is a list of message IDs in ascending order.
        	parent_idint
reply_datari   r   	List[int]c                   s>   ||d | k d   }g }|D ]}||g || 7 }q|S )a^  
            Recursively find all replies to a given parent message ID.

            Args:
                parent_id (int): The parent message ID.
                reply_data (pd.DataFrame): A DataFrame containing reply messages.

            Returns:
                list: A list of message IDs that are replies to the parent message ID.
            r^   r\   )tolist)rj   rl   Zdirect_repliesZall_repliesZreply_id)find_repliesr   r   ro      s   z@TelegramChatApiLoader._get_message_threads.<locals>.find_repliesr]   r^   )Zsubsetc                   s   i | ]}||g | qS r   r   )r'   rj   ro   Zreply_messagesr   r   
<dictcomp>   s    z>TelegramChatApiLoader._get_message_threads.<locals>.<dictcomp>r\   N)rj   rk   rl   ri   r   rm   )ZdropnaZastyperk   )r   rf   Zparent_messagesmessage_threadsr   rp   r   _get_message_threads   s   z*TelegramChatApiLoader._get_message_threadsrr   Dict[int, List[int]]c                 C  s`   d}|  D ]%\}}||d | jddd  }dd |D }|d|d	 7 }q| S )
aw  
        Combine the message texts for each parent message ID based             on the list of message threads.

        Args:
            message_threads (dict): A dictionary where the key is the parent message                 ID and the value is a list of message IDs in ascending order.
            data (pd.DataFrame): A DataFrame containing the conversation data:
                - message.sender_id
                - text
                - date
                - message.id
                - is_reply
                - reply_to_id

        Returns:
            str: A combined string of message texts sorted by date.
        r#   r\   r   )Zbyr   c                 S  s   g | ]}t |qS r   )r   )r'   elemr   r   r   rF      s    z@TelegramChatApiLoader._combine_message_texts.<locals>.<listcomp>rA   z.
)itemsisinZsort_valuesrn   r1   strip)r   rr   rf   Zcombined_textrj   Zmessage_idsZmessage_textsr   r   r   _combine_message_texts   s   z,TelegramChatApiLoader._combine_message_textsr   c           
      C  s   | j dur"zddl}|  t|   W n ty!   tdw t| j}t	|dd}t
|}W d   n1 s=w   Y  zddl}W n tyS   tdw ||}||}| |}| ||}	t|	S )r   Nr   zy`nest_asyncio` package not found.
                    please install with `pip install nest_asyncio`
                    r    r!   zf`pandas` package not found. 
                please install with `pip install pandas`
                )rR   nest_asyncioapplyasynciorunrh   ImportErrorr   r   r.   r/   r0   pandasZjson_normalizeZ	DataFramers   ry   rO   )
r   rz   r2   r3   r4   pdZnormalized_messagesdfrr   Zcombined_textsr   r   r   r0      s4   




zTelegramChatApiLoader.load)NNNNrQ   )
rR   rS   rT   rU   rV   rW   rX   rW   r   r   )r   rY   )rf   ri   r   r   )rr   rt   rf   ri   r   r   r5   )	r6   r7   r8   r9   r   rh   rs   ry   r0   r   r   r   r   rP   S   s    


;&rP   )r   r   r   r   )r   r:   r   r   )
__future__r   r|   r/   pathlibr   typingr   r   r   r   r   Zlangchain_core.documentsr
   Z)langchain_community.document_loaders.baser   r   r   Ztelethon.hintsr   r   r   rO   rP   ZTelegramChatLoaderr   r   r   r   <module>   s     

" :