o
    Zh.                     @  s  d dl mZ d dlZd dlZd dlZd dlZd dlmZm	Z	 d dl
mZmZ d dlmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dl m!Z! d dl"m#Z# ej$rud dl%mZ& d dl'm(Z( ej)ej*e+ ej,ej- f Z.e/e0Z1G dd deZG dd de2eZ3G dd deZ4G dd deZ5G dd de2eZ6e6j7fd1dd Z8e8Z9				d2d3d-d.Z:eG d/d0 d0e5Z;dS )4    )annotationsN)ABCabstractmethod)	dataclassfield)Enum)TextSplitter)Document)Field)BaseRagasEmbeddings)ExceptionInRunner)Executor)	RunConfig)rng)	Extractorc                   @  sj   e Zd ZU edd dZded< edddZd	ed
< edd Ze	dddZ
e	dddZdddZdS )r	   c                   C  s   t t S N)struuiduuid4 r   r   M/var/www/html/lang_env/lib/python3.10/site-packages/ragas/testset/docstore.py<lambda>!   s    zDocument.<lambda>default_factoryr   doc_idNFdefaultreprzt.Optional[t.List[float]]	embeddingc                 C  s:   | j d}|d ur| j d }|S td| j | j}|S )Nfilenamez9Document [ID: %s] has no filename, using `doc_id` instead)metadatagetloggerinfor   )selfr   r   r   r   r   $   s   
zDocument.filenamedoc
LCDocumentc                 C     t t }| |j|j|dS N)page_contentr    r   )r   r   r   r)   r    clsr%   r   r   r   r   from_langchain_document1      z Document.from_langchain_documentLlamaindexDocumentc                 C  r'   r(   )r   r   r   textr    r*   r   r   r   from_llamaindex_document:   r-   z!Document.from_llamaindex_documentreturnboolc                 C  s   | j |j kS r   r   )r$   otherr   r   r   __eq__C   s   zDocument.__eq__)r%   r&   )r%   r.   )r1   r2   )__name__
__module____qualname__r
   r   __annotations__r   propertyr   classmethodr,   r0   r5   r   r   r   r   r	       s   
 
r	   c                   @  s    e Zd ZdZdZdZdZdZdS )	Directionz0
    Direction for getting adjascent nodes.
    nextprevupZdownN)r6   r7   r8   __doc__NEXTPREVZUPZDOWNr   r   r   r   r<   H   s    r<   c                   @  sn   e Zd ZU eeddZded< eeddZded< edddZ	d	ed
< dZ
ded< edd Zedd ZdS )NodeF)r   r   zt.List[str]
keyphraseszt.Dict[Direction, t.Any]relationshipsNr   t.Optional[float]doc_similarityr   intwinsc                 C     | j tjS r   )rE   r!   r<   rA   r$   r   r   r   r=   Y      z	Node.nextc                 C  rJ   r   )rE   r!   r<   rB   rK   r   r   r   r>   ]   rL   z	Node.prev)r6   r7   r8   r
   listrD   r9   dictrE   rG   rI   r:   r=   r>   r   r   r   r   rC   S   s   
 
rC   c                   @  sn   e Zd Zdd Zed%d&ddZed%d'd
dZed(ddZed)d*ddZe	d+d,ddZ	d-d"d#Z
d$S ).DocumentStorec                 C  s
   i | _ d S r   )Z	documentsrK   r   r   r   __init__c      
zDocumentStore.__init__Tdocst.Sequence[Document]c                 C     d S r   r   )r$   rR   show_progressr   r   r   add_documentsf      zDocumentStore.add_documentsnodest.Sequence[Node]c                 C  rT   r   r   )r$   rX   rU   r   r   r   	add_nodesj   rW   zDocumentStore.add_nodesnode_idr   r1   rC   c                 C  rT   r   r   r$   r[   r   r   r   get_noden   rW   zDocumentStore.get_node   t.List[Node]c                 C  rT   r   r   )r$   kr   r   r   get_random_nodesr   rW   zDocumentStore.get_random_nodesffffff?   node	thresholdfloattop_krH   't.Union[t.List[Document], t.List[Node]]c                 C  rT   r   r   )r$   rd   re   rg   r   r   r   get_similarv   s   zDocumentStore.get_similar
run_configr   c                 C  rT   r   r   r$   rj   r   r   r   set_run_config|      zDocumentStore.set_run_configNTrR   rS   rX   rY   r[   r   r1   rC   )r^   r1   r_   rb   rc   rd   rC   re   rf   rg   rH   r1   rh   rj   r   )r6   r7   r8   rP   r   rV   rZ   r]   ra   ri   rl   r   r   r   r   rO   b   s    rO   c                   @  s   e Zd ZdZdZdZdZdS )SimilarityModezModes for similarity/distance.ZcosineZdot_productZ	euclideanN)r6   r7   r8   r@   DEFAULTDOT_PRODUCT	EUCLIDEANr   r   r   r   rv      s
    rv   
embedding1	Embedding
embedding2moder1   rf   c                 C  sn   |t jkrttjt| t|  S |t jkr!t| |S t| |}tj| tj| }|| S )zGet embedding similarity.)	rv   ry   rf   npZlinalgnormarrayrx   dot)rz   r|   r}   productr   r   r   r   
similarity   s   
"
r   query_embedding
embeddingst.List[Embedding]similarity_fn"t.Optional[t.Callable[..., float]]similarity_top_kt.Optional[int]embedding_idst.Optional[t.List]similarity_cutoffrF   t.Tuple[t.List[float], t.List]c                 C  s   |du rt tt|}|pt}t|}t| }g }t|D ](\}	}
|||
}|du s1||krHt||||	 f |rHt||krHt	| q t
|dd dd}dd |D }dd |D }||fS )	zm
    Get top nodes by similarity to the query.
    returns the scores and the embedding_ids of the nodes
    Nc                 S  s   | d S )Nr   r   )xr   r   r   r      s    z&get_top_k_embeddings.<locals>.<lambda>T)keyreversec                 S  s   g | ]\}}|qS r   r   ).0s_r   r   r   
<listcomp>       z(get_top_k_embeddings.<locals>.<listcomp>c                 S  s   g | ]\}}|qS r   r   )r   r   nr   r   r   r      r   )rM   rangelendefault_similarity_fnsr~   r   	enumerateheapqheappushheappopsorted)r   r   r   r   r   r   Zembeddings_npZquery_embedding_npZsimilarity_heapiZembr   Zresult_tupsZresult_similaritiesZ
result_idsr   r   r   get_top_k_embeddings   s"   



r   c                   @  s   e Zd ZU ded< edddZded< edddZded	< eed
Zded< eed
Z	ded< ee
d
Zded< dZded< d=ddZd>d?ddZd>d@ddZdd  Zd!d" ZdAd'd(ZdBd*d+ZdCdDd.d/Z	1dEdFd8d9ZdGd;d<ZdS )HInMemoryDocumentStorer   splitterNFr   zt.Optional[Extractor]	extractorzt.Optional[BaseRagasEmbeddings]r   r   r_   rX   r   node_embeddings_listzt.Dict[str, Node]node_mapzt.Optional[RunConfig]rj   items/t.Union[t.Sequence[Document], t.Sequence[Node]]c                 C  rT   r   r   )r$   r   r   r   r   _embed_items   rm   z"InMemoryDocumentStore._embed_itemsTrR   rS   c                 C  s:   | j dus	J ddd | j|D }| j||d dS )z.
        Add documents in batch mode.
        NEmbeddings must be setc                 S  s   g | ]}t |qS r   )rC   r,   )r   dr   r   r   r      s    z7InMemoryDocumentStore.add_documents.<locals>.<listcomp>)rU   )r   r   Ztransform_documentsrZ   )r$   rR   rU   rX   r   r   r   rV      s
   
z#InMemoryDocumentStore.add_documentsrY   c                 C  s  | j d us	J d| jd usJ di }i }tddd| jd}d}t|D ]?\}}|jd u rH|||i |j| j j|j	d| d	d
 |d7 }|j
sd|||i |j| jj|d| d	d
 |d7 }q%| }	|	snt t|D ]J\}}|| v r|	||  |_|| v r|	||  }
|
|_
|jd ur|j
g kr| j| || j|j< t|jttjfsJ d| j|j qr|   |   d S )Nr   zExtractor must be setzembedding nodesFT)ZdescZkeep_progress_barZraise_exceptionsrj   r   zembed_node_task[])namer^   zkeyphrase-extraction[z$Embedding must be list or np.ndarray)r   r   r   rj   r   r   updateZsubmitZ
embed_textr)   rD   extractresultsr   keysrX   appendr   r   
isinstancerM   r~   Zndarrayr   calculate_nodes_docs_similarityset_node_relataionships)r$   rX   rU   Znodes_to_embedZnodes_to_extractexecutorZ
result_idxr   r   r   rD   r   r   r   rZ      sd   


zInMemoryDocumentStore.add_nodesc                 C  s   t | jD ]=\}}|dkr3| j|d  }|j|jkr'||jtj< ||jtj< nd |jtj< d |jtj< |t| jd krBd |jtj< qd S )Nr   r^   )r   rX   r   rE   r<   rB   rA   r   )r$   r   rd   Z	prev_noder   r   r   r     s   z-InMemoryDocumentStore.set_node_relataionshipsc                   s   i }t dd | jD }t dd | jD }t|t|kr.td | jD ]}d|_q&d S |D ]  t fdd| jD }|t|d}tj	|dd	| < q0| jD ]}|j
d us_J d
t|j
||j |_qTd S )Nc                 S  s   g | ]
}|j d ur|j qS r   )r   r   rd   r   r   r   r   "      zIInMemoryDocumentStore.calculate_nodes_docs_similarity.<locals>.<listcomp>c                 S     g | ]}|j qS r   r3   r   r   r   r   r   $      z/Filename and doc_id are the same for all nodes.g      ?c                   s   g | ]
}|j  kr|jqS r   )r   r   r   Zfile_idr   r   r   .  r   r   )ZaxiszEmbedding cannot be None)setrX   r   r"   warningrG   r~   r   Zreshapemeanr   r   r   )r$   Zdoc_embeddingsZfilename_idsZnode_idsrd   Znodes_embeddingr   r   r   r     s,   


z5InMemoryDocumentStore.calculate_nodes_docs_similarityr[   r   r1   rC   c                 C  s
   | j | S r   )r   r\   r   r   r   r]   9  rQ   zInMemoryDocumentStore.get_noder   c                 C  s   t r   )NotImplementedError)r$   r   r   r   r   get_document<  rm   z"InMemoryDocumentStore.get_documentr^   皙?c           	        s   dd   fdd| j D }dd | j D }t|t| }|t| }tjt| j ||d }|D ]}| j |}| j |  jd7  _q8|S )Nc                 S  s   t | |  S r   )r~   exp)rI   alphar   r   r   adjustment_factor@  s   zAInMemoryDocumentStore.get_random_nodes.<locals>.adjustment_factorc                   s   g | ]} |j qS r   )rI   r   r   r   r   r   r   C  s    z:InMemoryDocumentStore.get_random_nodes.<locals>.<listcomp>c                 S  r   r   )rG   r   r   r   r   r   D  r   )sizepr^   )	rX   r~   r   sumr   choicetolistindexrI   )	r$   r`   r   scoresZsimilarity_scoresZprobrX   rd   idxr   r   r   ra   ?  s   z&InMemoryDocumentStore.get_random_nodesrb   rc   rd   re   rf   rg   rH   rh   c                   sd   |}|j d u rtdt|j  jt||d d\}}|dd  |dd  }} fdd|D }|S )NzDocument has no embedding.r^   )r   r   r   r   r   c                   s   g | ]} j | qS r   )rX   )r   r   rK   r   r   r   `  s    z5InMemoryDocumentStore.get_similar.<locals>.<listcomp>)r   
ValueErrorr   r   r   )r$   rd   re   rg   r%   r   Zdoc_idsr   r   rK   r   ri   P  s   

	z!InMemoryDocumentStore.get_similarr   c                 C  s   | j r	| j | || _d S r   )r   rl   rj   rk   r   r   r   rl   c  s   
z$InMemoryDocumentStore.set_run_config)r   r   rn   ro   rp   rq   )r   r   r1   rC   )r^   r   rr   rs   rt   ru   )r6   r7   r8   r9   r   r   r   rM   rX   r   rN   r   rj   r   rV   rZ   r   r   r]   r   ra   ri   rl   r   r   r   r   r      s&   
 
9

r   )rz   r{   r|   r{   r}   rv   r1   rf   )NNNN)r   r{   r   r   r   r   r   r   r   r   r   rF   r1   r   )<
__future__r   r   loggingtypingtr   abcr   r   dataclassesr   r   enumr   numpyr~   Znumpy.typingZnptZlangchain.text_splitterr   Zlangchain_core.documentsr	   r&   Zlangchain_core.pydantic_v1r
   Zragas.embeddings.baser   Zragas.exceptionsr   Zragas.executorr   Zragas.run_configr   Zragas.testset.utilsr   TYPE_CHECKINGZllama_index.core.schemar.   Zragas.testset.extractorr   UnionListrf   ZNDArrayZfloat64r{   	getLoggerr6   r"   r   r<   rC   rO   rv   rw   r   r   r   r   r   r   r   r   <module>   sL    
(#