o
    ZhR                     @  s   d dl mZ d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZmZmZmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZmZ erVd dlmZ d d	lmZmZ ee Z!dddZ"edddZ#G dd deZ$dS )    )annotationsN)	TYPE_CHECKINGAnyCallableIterableListOptionalTupleTypeVarUnion)Document)
Embeddings)VectorStore)DistanceStrategymaximal_marginal_relevanceClient)NeighborVectorDistanceMetricreturnr   c               
   C  s6   z	ddl m}  W | S  ty } ztd|d }~ww )Nr   r   zoCould not import aerospike_vector_search python package. Please install it with `pip install aerospike_vector`.)aerospike_vector_searchr   ImportError)r   e r   a/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/vectorstores/aerospike.py_import_aerospike#   s   r   AVST	Aerospike)boundc                   @  s,  e Zd ZdZdddddejfd]ddZed^ddZd_ddZ	d`d!d"Z
edad%d&Z				'		(dbdcd4d5Z		ddded7d8Z	9		dfdgd>d?Z	9		dfdhd@dAZ	9		dfdidCdDZ	9		dfdjdEdFZdkdHdIZedldLdMZ	9	N	O		dmdndRdSZ	9	N	O		dmdodTdUZe			V			'	dpdqd[d\ZdS )rr   zu`Aerospike` vector store.

    To use, you should have the ``aerospike_vector_search`` python package installed.
    NZ_vector_textZ_idclientr   	embeddingUnion[Embeddings, Callable]	namespacestr
index_nameOptional[str]
vector_keytext_keyid_keyset_namedistance_strategy7Optional[Union[DistanceStrategy, VectorDistanceMetric]]c
                 C  sv   t  }
t|tstd t||
stdt| || _|| _|| _	|| _
|| _|| _|| _|| _| |	| _dS )a  Initialize with Aerospike client.

        Args:
            client: Aerospike client.
            embedding: Embeddings object or Callable (deprecated) to embed text.
            namespace: Namespace to use for storing vectors. This should match
            index_name: Name of the index previously created in Aerospike. This
            vector_key: Key to use for vector in metadata. This should match the
                key used during index creation.
            text_key: Key to use for text in metadata.
            id_key: Key to use for id in metadata.
            set_name: Default set name to use for storing vectors.
            distance_strategy: Distance strategy to use for similarity search
                This should match the distance strategy used during index creation.
        z`Passing in `embedding` as a Callable is deprecated. Please pass in an Embeddings object instead.zDclient should be an instance of aerospike_vector_search.Client, got N)r   
isinstancer   warningswarn
ValueErrortype_client
_embedding	_text_key_vector_key_id_key_index_name
_namespace	_set_nameconvert_distance_strategy_distance_strategy)selfr    r!   r#   r%   r'   r(   r)   r*   r+   	aerospiker   r   r   __init__7   s(   

zAerospike.__init__r   Optional[Embeddings]c                 C  s   t | jtr	| jS dS )z/Access the query embedding object if available.N)r-   r3   r   r<   r   r   r   
embeddingsm   s   zAerospike.embeddingstextsIterable[str]List[List[float]]c                   s.   t  jtr jt|S  fdd|D S )zEmbed search docs.c                   s   g | ]}  |qS r   )r3   ).0tr@   r   r   
<listcomp>x   s    z.Aerospike._embed_documents.<locals>.<listcomp>)r-   r3   r   Zembed_documentslist)r<   rB   r   r@   r   _embed_documentst   s   zAerospike._embed_documentstextList[float]c                 C  s"   t | jtr| j|S | |S )zEmbed query text.)r-   r3   r   Zembed_query)r<   rJ   r   r   r   _embed_queryz   s   
zAerospike._embed_query-Union[VectorDistanceMetric, DistanceStrategy]r   c                 C  sR   ddl m} t| tr| S | |jkrtjS | |jkrtjS | |jkr%tjS td)z
        Convert Aerospikes distance strategy to langchains DistanceStrategy
        enum. This is a convenience method to allow users to pass in the same
        distance metric used to create the index.
        r   )r   DUnknown distance strategy, must be cosine, dot_product, or euclidean)	aerospike_vector_search.typesr   r-   r   COSINEDOT_PRODUCTZSQUARED_EUCLIDEANEUCLIDEAN_DISTANCEr0   )r+   r   r   r   r   r:      s   	



z#Aerospike.convert_distance_strategy  T	metadatasOptional[List[dict]]idsOptional[List[str]]embedding_chunk_sizeintwait_for_indexboolkwargsr   	List[str]c              	   K  sB  |du r| j }|du r| j}|r|du rtdt|}|p$dd |D }|r/dd |D }n	|p7dd |D }tdt||D ]S}	||	|	|  }
||	|	|  }||	|	|  }| |
}t|||
D ]\}}}||| j< ||| j	< qet||D ]\}}||| j
< | jjd
| j|||d| qzq@|r| jj| j|d	 |S )a  Run more texts through the embeddings and add to the vectorstore.


        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadata associated with the texts.
            ids: Optional list of ids to associate with the texts.
            set_name: Optional aerospike set name to add the texts to.
            batch_size: Batch size to use when adding the texts to the vectorstore.
            embedding_chunk_size: Chunk size to use when embedding the texts.
            index_name: Optional aerospike index name used for waiting for index
                completion. If not provided, the default index_name will be used.
            wait_for_index: If True, wait for the all the texts to be indexed
                before returning. Requires index_name to be provided. Defaults
                to True.
            kwargs: Additional keyword arguments to pass to the client upsert call.

        Returns:
            List of ids from adding the texts into the vectorstore.

        Nz6if wait_for_index is True, index_name must be providedc                 S  s   g | ]}t t qS r   )r$   uuiduuid4rE   _r   r   r   rG          z'Aerospike.add_texts.<locals>.<listcomp>c                 S  s   g | ]}|  qS r   )copy)rE   mr   r   r   rG          c                 S  s   g | ]}i qS r   r   r`   r   r   r   rG      s    r   )r#   keyr*   Zrecord_data)r#   namer   )r9   r7   r0   rH   rangelenrI   zipr5   r4   r6   r2   Zupsertr8   Zwait_for_index_completion)r<   rB   rT   rV   r*   rX   r%   rZ   r\   iZchunk_textsZ	chunk_idsZchunk_metadatasrA   metadatar!   rJ   idr   r   r   	add_texts   sL    



zAerospike.add_textsOptional[bool]c              	   K  sT   ddl m} |r(|D ]}z| jjd| j||d| W q
 |y'   Y  dS w dS )a7  Delete by vector ID or other criteria.

        Args:
            ids: List of ids to delete.
            **kwargs: Other keyword arguments to pass to client delete call.

        Returns:
            Optional[bool]: True if deletion is successful,
            False otherwise, None if not implemented.
        r   )AVSServerError)r#   rf   r*   FTNr   )r   rp   r2   deleter8   )r<   rV   r*   r\   rp   rm   r   r   r   rq      s   
zAerospike.delete   querykmetadata_keysList[Tuple[Document, float]]c                 K  s    | j | |f|||d|S )a  Return aerospike documents most similar to query, along with scores.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            metadata_keys: List of metadata keys to return with the documents.
                If None, all metadata keys will be returned. Defaults to None.
            index_name: Name of the index to search. Overrides the default
                index_name.
            kwargs: Additional keyword arguments to pass to the search method.

        Returns:
            List of Documents most similar to the query and associated scores.
        rt   ru   r%   )&similarity_search_by_vector_with_scorerL   )r<   rs   rt   ru   r%   r\   r   r   r   similarity_search_with_score  s   z&Aerospike.similarity_search_with_scorec                 K  s   g }|r| j |vr| j g| }|du r| j}|du rtd| jjd|| j|||d|}|D ])}|j}	| j |	v rO|	| j }
|j}|	t
|
|	d|f q0td| j  d q0|S )a  Return aerospike documents most similar to embedding, along with scores.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            metadata_keys: List of metadata keys to return with the documents.
                If None, all metadata keys will be returned. Defaults to None.
            index_name: Name of the index to search. Overrides the default
                index_name.
            kwargs: Additional keyword arguments to pass to the client
                vector_search method.

        Returns:
            List of Documents most similar to the query and associated scores.

        Nzindex_name must be provided)r%   r#   rs   limitfield_names)Zpage_contentrl   zFound document with no `z` key. Skipping.r   )r4   r7   r0   r2   Zvector_searchr8   fieldspopZdistanceappendr   loggerwarning)r<   r!   rt   ru   r%   r\   docsresultsresultrl   rJ   scorer   r   r   rx   *  s6   	
z0Aerospike.similarity_search_by_vector_with_scoreList[Document]c                 K  s$   dd | j |f|||d|D S )ak  Return docs most similar to embedding vector.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            metadata_keys: List of metadata keys to return with the documents.
                If None, all metadata keys will be returned. Defaults to None.
            index_name: Name of the index to search. Overrides the default
                index_name.
            kwargs: Additional keyword arguments to pass to the search method.


        Returns:
            List of Documents most similar to the query vector.
        c                 S     g | ]\}}|qS r   r   rE   docra   r   r   r   rG   }  s    z9Aerospike.similarity_search_by_vector.<locals>.<listcomp>rw   )rx   )r<   r!   rt   ru   r%   r\   r   r   r   similarity_search_by_vectorf  s   z%Aerospike.similarity_search_by_vectorc                 K  s(   | j |f|||d|}dd |D S )a*  Return aerospike documents most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            metadata_keys: List of metadata keys to return with the documents.
                If None, all metadata keys will be returned. Defaults to None.
            index_name: Optional name of the index to search. Overrides the
                default index_name.

        Returns:
            List of Documents most similar to the query and score for each
        rw   c                 S  r   r   r   r   r   r   r   rG     re   z/Aerospike.similarity_search.<locals>.<listcomp>)ry   )r<   rs   rt   ru   r%   r\   Zdocs_and_scoresr   r   r   similarity_search  s   zAerospike.similarity_searchCallable[[float], float]c                 C  s>   | j tjkr	| jS | j tjkr| jS | j tjkr| jS td)a  
        The 'correct' relevance function
        may differ depending on a few things, including:
        - the distance / similarity metric used by the VectorStore
        - the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
        - embedding dimensionality
        - etc.

        0 is dissimilar, 1 is similar.

        Aerospike's relevance_fn assume euclidean and dot product embeddings are
        normalized to unit norm.
        rN   )	r;   r   rP   _cosine_relevance_score_fnrQ   Z%_max_inner_product_relevance_score_fnrR   Z_euclidean_relevance_score_fnr0   r@   r   r   r   _select_relevance_score_fn  s   z$Aerospike._select_relevance_score_fnr   floatc                 C  s   d| d  S )zgAerospike returns cosine distance scores between [0,2]

        0 is dissimilar, 1 is similar.
              r   )r   r   r   r   r     s   z$Aerospike._cosine_relevance_score_fn         ?fetch_klambda_multc           
        s   |rj |vrj g| }j|f|||d| ttj|gtjdfdd D ||d}|rEj |v rE|D ]}	 |	 jj  q9 fdd|D S )a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree of
                diversity among the results with 0 corresponding to maximum
                diversity and 1 to minimum diversity. Defaults to 0.5.
            metadata_keys: List of metadata keys to return with the documents.
                If None, all metadata keys will be returned. Defaults to None.
            index_name: Optional name of the index to search. Overrides the
                default index_name.
        Returns:
            List of Documents selected by maximal marginal relevance.
        rw   )Zdtypec                   s   g | ]}|j  j qS r   )rl   r5   )rE   r   r@   r   r   rG     rb   zEAerospike.max_marginal_relevance_search_by_vector.<locals>.<listcomp>)rt   r   c                   s   g | ]} | qS r   r   )rE   rk   )r   r   r   rG     re   )r5   r   r   nparrayZfloat32rl   r}   )
r<   r!   rt   r   r   ru   r%   r\   Zmmr_selectedrk   r   )r   r<   r   'max_marginal_relevance_search_by_vector  s*   z1Aerospike.max_marginal_relevance_search_by_vectorc           	      K  s(   |  |}| j||||f||d|S )a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
            index_name: Name of the index to search.
        Returns:
            List of Documents selected by maximal marginal relevance.
        )ru   r%   )rL   r   )	r<   rs   rt   r   r   ru   r%   r\   r!   r   r   r   max_marginal_relevance_search  s   
z'Aerospike.max_marginal_relevance_searchtestr   embeddings_chunk_sizeclient_kwargsOptional[dict]c
                 K  s8   | |||fi |
}|j |f||||d|	pi  |S )a  
        This is a user friendly interface that:
            1. Embeds text.
            2. Converts the texts into documents.
            3. Adds the documents to a provided Aerospike index

        This is intended to be a quick way to get started.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import Aerospike
                from langchain_openai import OpenAIEmbeddings
                from aerospike_vector_search import Client, HostPort

                client = Client(seeds=HostPort(host="localhost", port=5000))
                aerospike = Aerospike.from_texts(
                    ["foo", "bar", "baz"],
                    embedder,
                    client,
                    "namespace",
                    index_name="index",
                    vector_key="vector",
                    distance_strategy=MODEL_DISTANCE_CALC,
                )
        )rT   rV   r%   rX   )rn   )clsrB   r!   rT   r    r#   r%   rV   r   r   r\   r=   r   r   r   
from_texts  s$   (zAerospike.from_texts)r    r   r!   r"   r#   r$   r%   r&   r'   r$   r(   r$   r)   r$   r*   r&   r+   r,   )r   r?   )rB   rC   r   rD   )rJ   r$   r   rK   )r+   rM   r   r   )NNNrS   NT)rB   rC   rT   rU   rV   rW   r*   r&   rX   rY   r%   r&   rZ   r[   r\   r   r   r]   )NN)rV   rW   r*   r&   r\   r   r   ro   )rr   NN)rs   r$   rt   rY   ru   rW   r%   r&   r\   r   r   rv   )r!   rK   rt   rY   ru   rW   r%   r&   r\   r   r   rv   )r!   rK   rt   rY   ru   rW   r%   r&   r\   r   r   r   )rs   r$   rt   rY   ru   rW   r%   r&   r\   r   r   r   )r   r   )r   r   r   r   )rr   r   r   NN)r!   rK   rt   rY   r   rY   r   r   ru   rW   r%   r&   r\   r   r   r   )rs   r$   rt   rY   r   rY   r   r   ru   rW   r%   r&   r\   r   r   r   )NNr   NNrS   N)rB   r]   r!   r   rT   rU   r    r   r#   r$   r%   r&   rV   rW   r   rY   r   r   r\   r   r   r   )__name__
__module____qualname____doc__r   rR   r>   propertyrA   rI   rL   staticmethodr:   rn   rq   ry   rx   r   r   r   r   r   r   classmethodr   r   r   r   r   r   1   s    
6

R#"?%

8&)r   r   )%
__future__r   loggingr^   r.   typingr   r   r   r   r   r   r	   r
   r   numpyr   Zlangchain_core.documentsr   Zlangchain_core.embeddingsr   Zlangchain_core.vectorstoresr   Z&langchain_community.vectorstores.utilsr   r   r   r   rO   r   r   	getLoggerr   r   r   r   r   r   r   r   r   <module>   s"    ,

