o
    Zh F                     @  s   d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZmZmZmZmZmZ d dlZd dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlm Z  e!g dZ"dZ#dddZ$G dd deZ%dS )    )annotationsN)ConfigParser)Path)AnyCallableDictIterableListOptionalTuple)Document)
Embeddingsguard_import)VectorStore)Docstore)InMemoryDocstore)maximal_marginal_relevance)angularZ	euclideanZ	manhattanZhammingdotr   returnr   c                   C  s   t dS )z1Import annoy if available, otherwise raise error.annoyr    r   r   ]/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/vectorstores/annoy.pydependable_annoy_import   s   r   c                   @  s   e Zd ZdZdYddZedZddZ	d[d\ddZd]d d!Z	#d^d_d(d)Z		#d^d`d+d,Z
	#d^dad.d/Z	#d^dbd1d2Z	#d^dcd3d4Z	#d^ddd5d6Z	"	7	8dedfd<d=Z	"	7	8dedgd>d?Zeded@d#fdhdFdGZeded@d#fdidHdIZeded@d#fdjdLdMZdkdldSdTZedNdUdmdWdXZdS )nAnnoya  `Annoy` vector store.

    To use, you should have the ``annoy`` python package installed.

    Example:
        .. code-block:: python

            from langchain_community.vectorstores import Annoy
            db = Annoy(embedding_function, index, docstore, index_to_docstore_id)

    embedding_functionr   indexr   metricstrdocstorer   index_to_docstore_idDict[int, str]c                 C  s"   || _ || _|| _|| _|| _dS )z%Initialize with necessary components.N)r   r   r   r    r!   )selfr   r   r   r    r!   r   r   r   __init__*   s
   	
zAnnoy.__init__r   Optional[Embeddings]c                 C  s   d S Nr   r#   r   r   r   
embeddings9   s   zAnnoy.embeddingsNtextsIterable[str]	metadatasOptional[List[dict]]kwargs	List[str]c                 K  s   t d)Nz=Annoy does not allow to add new data once the index is build.)NotImplementedError)r#   r)   r+   r-   r   r   r   	add_texts>   s   zAnnoy.add_textsidxs	List[int]distsList[float]List[Tuple[Document, float]]c                 C  s^   g }t ||D ]%\}}| j| }| j|}t|ts%td| d| |||f q|S )a  Turns annoy results into a list of documents and scores.

        Args:
            idxs: List of indices of the documents in the index.
            dists: List of distances of the documents in the index.
        Returns:
            List of Documents and scores.
        Could not find document for id , got )zipr!   r    search
isinstancer   
ValueErrorappend)r#   r1   r3   docsidxdist_iddocr   r   r   process_index_resultsH   s   

zAnnoy.process_index_results   	embeddingkintsearch_kc                 C  $   | j j|||dd\}}| ||S a}  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            search_k: inspect up to search_k nodes which defaults
                to n_trees * n if not provided
        Returns:
            List of Documents most similar to the query and score for each
        TrH   Zinclude_distances)r   get_nns_by_vectorrB   )r#   rE   rF   rH   r1   r3   r   r   r   &similarity_search_with_score_by_vector\      
z,Annoy.similarity_search_with_score_by_vectordocstore_indexc                 C  rI   rJ   )r   Zget_nns_by_itemrB   )r#   rO   rF   rH   r1   r3   r   r   r   %similarity_search_with_score_by_indexn   rN   z+Annoy.similarity_search_with_score_by_indexqueryc                 C  s   |  |}| |||}|S )a~  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            search_k: inspect up to search_k nodes which defaults
                to n_trees * n if not provided

        Returns:
            List of Documents most similar to the query and score for each
        )r   rM   )r#   rQ   rF   rH   rE   r=   r   r   r   similarity_search_with_score   s   
z"Annoy.similarity_search_with_scoreList[Document]c                 K     |  |||}dd |D S )a  Return docs most similar to embedding vector.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            search_k: inspect up to search_k nodes which defaults
                to n_trees * n if not provided

        Returns:
            List of Documents most similar to the embedding.
        c                 S     g | ]\}}|qS r   r   .0rA   _r   r   r   
<listcomp>       z5Annoy.similarity_search_by_vector.<locals>.<listcomp>)rM   )r#   rE   rF   rH   r-   docs_and_scoresr   r   r   similarity_search_by_vector      z!Annoy.similarity_search_by_vectorc                 K  rT   )az  Return docs most similar to docstore_index.

        Args:
            docstore_index: Index of document in docstore
            k: Number of Documents to return. Defaults to 4.
            search_k: inspect up to search_k nodes which defaults
                to n_trees * n if not provided

        Returns:
            List of Documents most similar to the embedding.
        c                 S  rU   r   r   rV   r   r   r   rY      rZ   z4Annoy.similarity_search_by_index.<locals>.<listcomp>)rP   )r#   rO   rF   rH   r-   r[   r   r   r   similarity_search_by_index   r]   z Annoy.similarity_search_by_indexc                 K  rT   )al  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            search_k: inspect up to search_k nodes which defaults
                to n_trees * n if not provided

        Returns:
            List of Documents most similar to the query.
        c                 S  rU   r   r   rV   r   r   r   rY      rZ   z+Annoy.similarity_search.<locals>.<listcomp>)rR   )r#   rQ   rF   rH   r-   r[   r   r   r   similarity_search   s   zAnnoy.similarity_search         ?fetch_klambda_multfloatc                   s   j j||ddd fdd D }ttj|gtjd|||d} fdd|D }g }	|D ]!}
j|
 }j|}t	|t
sKtd	| d
| |	| q/|	S )a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            embedding: Embedding to look up documents similar to.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            k: Number of Documents to return. Defaults to 4.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.

        Returns:
            List of Documents selected by maximal marginal relevance.
        rD   FrK   c                   s   g | ]} j |qS r   )r   Zget_item_vectorrW   ir'   r   r   rY      s    zAAnnoy.max_marginal_relevance_search_by_vector.<locals>.<listcomp>)Zdtype)rF   rc   c                   s   g | ]
}|d kr | qS )rD   r   re   )r1   r   r   rY      s    r6   r7   )r   rL   r   nparrayZfloat32r!   r    r9   r:   r   r;   r<   )r#   rE   rF   rb   rc   r-   r(   Zmmr_selectedZselected_indicesr=   rf   r@   rA   r   )r1   r#   r   'max_marginal_relevance_search_by_vector   s&   

z-Annoy.max_marginal_relevance_search_by_vectorc                 K  s    |  |}| j||||d}|S )a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents selected by maximal marginal relevance.
        )rc   )r   ri   )r#   rQ   rF   rb   rc   r-   rE   r=   r   r   r   max_marginal_relevance_search   s
   
z#Annoy.max_marginal_relevance_searchd   r(   List[List[float]]r   treesn_jobsc                   s   |t vrtd| dtt  td}	|stdt|d }
|	j|
|d}t|D ]
\}}||| q+|j||d g }t|D ]\}}|rM|| ni }|	t
||d qCd	d
 tt|D  t fdd
t|D }| |j||| S )NzUnsupported distance metric: z. Expected one of r   z/embeddings must be provided to build AnnoyIndexr   r   )rn   )Zpage_contentmetadatac                 S  s   i | ]	}|t t qS r   )r   uuiduuid4re   r   r   r   
<dictcomp>6      z Annoy.__from.<locals>.<dictcomp>c                   s   i | ]	\}} | |qS r   r   )rW   rf   rA   Zindex_to_idr   r   rs   8  rt   )INDEX_METRICSr;   listr   len
AnnoyIndex	enumerateZadd_itembuildr<   r   ranger   embed_query)clsr)   r(   rE   r+   r   rm   rn   r-   r   fr   rf   ZembZ	documentstextrp   r    r   ru   r   Z__from  s.   zAnnoy.__fromc           	      K  s(   | |}| j|||||||fi |S )a  Construct Annoy wrapper from raw documents.

        Args:
            texts: List of documents to index.
            embedding: Embedding function to use.
            metadatas: List of metadata dictionaries to associate with documents.
            metric: Metric to use for indexing. Defaults to "angular".
            trees: Number of trees to use for indexing. Defaults to 100.
            n_jobs: Number of jobs to use for indexing. Defaults to -1.

        This is a user friendly interface that:
            1. Embeds documents.
            2. Creates an in memory docstore
            3. Initializes the Annoy database

        This is intended to be a quick way to get started.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import Annoy
                from langchain_community.embeddings import OpenAIEmbeddings
                embeddings = OpenAIEmbeddings()
                index = Annoy.from_texts(texts, embeddings)
        )Zembed_documents_Annoy__from)	r~   r)   rE   r+   r   rm   rn   r-   r(   r   r   r   
from_texts<  s   
$zAnnoy.from_textstext_embeddingsList[Tuple[str, List[float]]]c           
      K  s:   dd |D }dd |D }	| j ||	|||||fi |S )a  Construct Annoy wrapper from embeddings.

        Args:
            text_embeddings: List of tuples of (text, embedding)
            embedding: Embedding function to use.
            metadatas: List of metadata dictionaries to associate with documents.
            metric: Metric to use for indexing. Defaults to "angular".
            trees: Number of trees to use for indexing. Defaults to 100.
            n_jobs: Number of jobs to use for indexing. Defaults to -1

        This is a user friendly interface that:
            1. Creates an in memory docstore with provided embeddings
            2. Initializes the Annoy database

        This is intended to be a quick way to get started.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import Annoy
                from langchain_community.embeddings import OpenAIEmbeddings
                embeddings = OpenAIEmbeddings()
                text_embeddings = embeddings.embed_documents(texts)
                text_embedding_pairs = list(zip(texts, text_embeddings))
                db = Annoy.from_embeddings(text_embedding_pairs, embeddings)
        c                 S     g | ]}|d  qS )r   r   rW   tr   r   r   rY     rZ   z)Annoy.from_embeddings.<locals>.<listcomp>c                 S  r   )   r   r   r   r   r   rY     rZ   )r   )
r~   r   rE   r+   r   rm   rn   r-   r)   r(   r   r   r   from_embeddingse  s   %zAnnoy.from_embeddingsFfolder_pathprefaultboolNonec                 C  s   t |}tj|dd t }| jj| jd|d< | jjt|d |d t	|d d}t
| j| j|f| W d	   d	S 1 sBw   Y  d	S )
a  Save Annoy index, docstore, and index_to_docstore_id to disk.

        Args:
            folder_path: folder path to save index, docstore,
                and index_to_docstore_id to.
            prefault: Whether to pre-load the index into memory.
        T)exist_ok)r   r   ANNOYindex.annoy)r   	index.pklwbN)r   osmakedirsr   r   r   r   saver   openpickledumpr    r!   )r#   r   r   pathconfig_objectfiler   r   r   
save_local  s   
"zAnnoy.save_local)allow_dangerous_deserializationr   c                C  s   |st dt|}td}t|d d}t|\}}}	W d   n1 s(w   Y  t|	d d }
|	d d }|j|
|d	}|t|d
  | |j	||||S )aR  Load Annoy index, docstore, and index_to_docstore_id to disk.

        Args:
            folder_path: folder path to load index, docstore,
                and index_to_docstore_id from.
            embeddings: Embeddings to use when generating queries.
            allow_dangerous_deserialization: whether to allow deserialization
                of the data which involves loading a pickle file.
                Pickle files can be modified by malicious actors to deliver a
                malicious payload that results in execution of
                arbitrary code on your machine.
        aB  The de-serialization relies loading a pickle file. Pickle files can be modified to deliver a malicious payload that results in execution of arbitrary code on your machine.You will need to set `allow_dangerous_deserialization` to `True` to enable deserialization. If you do this, make sure that you trust the source of the data. For example, if you are loading a file that you created, and know that no one else has modified the file, then this is safe to do. Do not set this to `True` if you are loading a file from an untrusted source (e.g., some random site on the internet.).r   r   rbNr   r   r   ro   r   )
r;   r   r   r   r   loadrG   ry   r   r}   )r~   r   r(   r   r   r   r   r    r!   r   r   r   r   r   r   r   
load_local  s,   
zAnnoy.load_local)
r   r   r   r   r   r   r    r   r!   r"   )r   r%   r&   )r)   r*   r+   r,   r-   r   r   r.   )r1   r2   r3   r4   r   r5   )rC   rD   )rE   r4   rF   rG   rH   rG   r   r5   )rO   rG   rF   rG   rH   rG   r   r5   )rQ   r   rF   rG   rH   rG   r   r5   )
rE   r4   rF   rG   rH   rG   r-   r   r   rS   )
rO   rG   rF   rG   rH   rG   r-   r   r   rS   )
rQ   r   rF   rG   rH   rG   r-   r   r   rS   )rC   r`   ra   )rE   r4   rF   rG   rb   rG   rc   rd   r-   r   r   rS   )rQ   r   rF   rG   rb   rG   rc   rd   r-   r   r   rS   )r)   r.   r(   rl   rE   r   r+   r,   r   r   rm   rG   rn   rG   r-   r   r   r   )r)   r.   rE   r   r+   r,   r   r   rm   rG   rn   rG   r-   r   r   r   )r   r   rE   r   r+   r,   r   r   rm   rG   rn   rG   r-   r   r   r   )F)r   r   r   r   r   r   )r   r   r(   r   r   r   r   r   )__name__
__module____qualname____doc__r$   propertyr(   r0   rB   rM   rP   rR   r\   r^   r_   ri   rj   classmethodDEFAULT_METRICr   r   r   r   r   r   r   r   r   r      sd    


2%(+r   )r   r   )&
__future__r   r   r   rq   configparserr   pathlibr   typingr   r   r   r   r	   r
   r   numpyrg   Zlangchain_core.documentsr   Zlangchain_core.embeddingsr   Zlangchain_core.utilsr   Zlangchain_core.vectorstoresr   Z!langchain_community.docstore.baser   Z&langchain_community.docstore.in_memoryr   Z&langchain_community.vectorstores.utilsr   	frozensetrv   r   r   r   r   r   r   r   <module>   s&    $
