o
    Zh                     @  s  d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZmZmZmZmZmZmZmZmZmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lm Z m!Z! d d
l"m#Z# d dl$m%Z%m&Z& e'e(Z)ddddZ*dddZ+G dd deZ,dS )    )annotationsN)Path)
AnyCallableDictIterableListOptionalSequenceSizedTupleUnionDocument)
Embeddings)run_in_executor)VectorStore)AddableMixinDocstore)InMemoryDocstore)DistanceStrategymaximal_marginal_relevanceno_avx2Optional[bool]returnr   c                 C  s\   | du rdt jv rtt d} z| rddlm} W |S ddl}W |S  ty-   tdw )aM  
    Import faiss if available, otherwise raise error.
    If FAISS_NO_AVX2 environment variable is set, it will be considered
    to load FAISS with no AVX2 optimization.

    Args:
        no_avx2: Load FAISS strictly with no AVX2 optimization
            so that the vectorstore is portable and compatible with other devices.
    NZFAISS_NO_AVX2r   )	swigfaisszCould not import faiss python package. Please install it with `pip install faiss-gpu` (for CUDA supported GPU) or `pip install faiss-cpu` (depending on Python version).)osenvironboolgetenvfaissr   ImportError)r   r     r"   ]/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/vectorstores/faiss.pydependable_faiss_import'   s   
	
r$   xyx_namestry_nameNonec                 C  sZ   t | tr+t |tr+t| t|kr+t| d| d| dt|  d| dt| d S )Nz and z% expected to be equal length but len(z)=z	 and len()
isinstancer   len
ValueError)r%   r&   r'   r)   r"   r"   r#   _len_check_if_sizedB   s   $r.   c                   @  s  e Zd ZdZddejfdddZedddZdddZ	dddZ
dd"d#Zdd$d%Z		ddd-d.Z		ddd1d2Z		ddd3d4Z		ddd7d8Z	9		:dddBdCZ	9		:dddDdEZ	9		:dddGdHZ	9		:dddIdJZ	9		:dddMdNZ	9		:dddOdPZ	9		:dddQdRZ	9		:dddSdTZd9d:dUddVddYdZZd9d:dUddVdd[d\Z	9	:	U	ddd]d^Z	9	:	U	ddd_d`Z	9	:	U	dddadbZ	9	:	U	dddcddZdddfdgZddjdkZ e!dddejfddmdnZ"e!		dddodpZ#e!		dddrdsZ$e!		dddtduZ%e!		dddvdwZ&dddzd{Z'e!	ddd|dd~dZ(dddZ)e!dd|dddZ*dddZ+	9		:ddddZ,	9		:ddddZ-e.dddZ/dddZ0dS )FAISSu  FAISS vector store integration.

    See [The FAISS Library](https://arxiv.org/pdf/2401.08281) paper.

    Setup:
        Install ``langchain_community`` and ``faiss-cpu`` python packages.

        .. code-block:: bash

            pip install -qU langchain_community faiss-cpu

    Key init args — indexing params:
        embedding_function: Embeddings
            Embedding function to use.

    Key init args — client params:
        index: Any
            FAISS index to use.
        docstore: Docstore
            Docstore to use.
        index_to_docstore_id: Dict[int, str]
            Mapping of index to docstore id.

    Instantiate:
        .. code-block:: python

            import faiss
            from langchain_community.vectorstores import FAISS
            from langchain_community.docstore.in_memory import InMemoryDocstore
            from langchain_openai import OpenAIEmbeddings

            index = faiss.IndexFlatL2(len(OpenAIEmbeddings().embed_query("hello world")))

            vector_store = FAISS(
                embedding_function=OpenAIEmbeddings(),
                index=index,
                docstore= InMemoryDocstore(),
                index_to_docstore_id={}
            )

    Add Documents:
        .. code-block:: python

            from langchain_core.documents import Document

            document_1 = Document(page_content="foo", metadata={"baz": "bar"})
            document_2 = Document(page_content="thud", metadata={"bar": "baz"})
            document_3 = Document(page_content="i will be deleted :(")

            documents = [document_1, document_2, document_3]
            ids = ["1", "2", "3"]
            vector_store.add_documents(documents=documents, ids=ids)

    Delete Documents:
        .. code-block:: python

            vector_store.delete(ids=["3"])

    Search:
        .. code-block:: python

            results = vector_store.similarity_search(query="thud",k=1)
            for doc in results:
                print(f"* {doc.page_content} [{doc.metadata}]")

        .. code-block:: python

            * thud [{'bar': 'baz'}]

    Search with filter:
        .. code-block:: python

            results = vector_store.similarity_search(query="thud",k=1,filter={"bar": "baz"})
            for doc in results:
                print(f"* {doc.page_content} [{doc.metadata}]")

        .. code-block:: python

            * thud [{'bar': 'baz'}]

    Search with score:
        .. code-block:: python

            results = vector_store.similarity_search_with_score(query="qux",k=1)
            for doc, score in results:
                print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")

        .. code-block:: python

            * [SIM=0.335304] foo [{'baz': 'bar'}]

    Async:
        .. code-block:: python

            # add documents
            # await vector_store.aadd_documents(documents=documents, ids=ids)

            # delete documents
            # await vector_store.adelete(ids=["3"])

            # search
            # results = vector_store.asimilarity_search(query="thud",k=1)

            # search with score
            results = await vector_store.asimilarity_search_with_score(query="qux",k=1)
            for doc,score in results:
                print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")

        .. code-block:: python

            * [SIM=0.335304] foo [{'baz': 'bar'}]

    Use as Retriever:
        .. code-block:: python

            retriever = vector_store.as_retriever(
                search_type="mmr",
                search_kwargs={"k": 1, "fetch_k": 2, "lambda_mult": 0.5},
            )
            retriever.invoke("thud")

        .. code-block:: python

            [Document(metadata={'bar': 'baz'}, page_content='thud')]

    NFembedding_function/Union[Callable[[str], List[float]], Embeddings]indexr   docstorer   index_to_docstore_idDict[int, str]relevance_score_fn"Optional[Callable[[float], float]]normalize_L2r   distance_strategyr   c                 C  sn   t |ts
td || _|| _|| _|| _|| _|| _	|| _
| jtjkr3| j
r5td| j  dS dS dS )z%Initialize with necessary components.t`embedding_function` is expected to be an Embeddings object, support for passing in a function will soon be removed.z2Normalizing L2 is not applicable for metric type: N)r+   r   loggerwarningr0   r2   r3   r4   r9   override_relevance_score_fn_normalize_L2r   EUCLIDEAN_DISTANCEwarningswarn)selfr0   r2   r3   r4   r6   r8   r9   r"   r"   r#   __init__   s(   
zFAISS.__init__r   Optional[Embeddings]c                 C  s   t | jtr	| jS d S N)r+   r0   r   rB   r"   r"   r#   
embeddings   s
   
zFAISS.embeddingstexts	List[str]List[List[float]]c                   s*   t  jtr j|S  fdd|D S )Nc                   s   g | ]}  |qS r"   )r0   ).0textrF   r"   r#   
<listcomp>       z*FAISS._embed_documents.<locals>.<listcomp>)r+   r0   r   embed_documentsrB   rH   r"   rF   r#   _embed_documents   s   zFAISS._embed_documentsc                   (   t | jtr| j|I d H S tdNr:   )r+   r0   r   aembed_documents	ExceptionrP   r"   r"   r#   _aembed_documents   s   zFAISS._aembed_documentsrL   r(   List[float]c                 C  s"   t | jtr| j|S | |S rE   )r+   r0   r   Zembed_queryrB   rL   r"   r"   r#   _embed_query  s   
zFAISS._embed_queryc                   rR   rS   )r+   r0   r   Zaembed_queryrU   rX   r"   r"   r#   _aembed_query  s   zFAISS._aembed_queryIterable[str]rG   Iterable[List[float]]	metadatasOptional[Iterable[dict]]idsOptional[List[str]]c           
        s   t  }t| jtstd| j dt||dd |p!dd |D }t||dd |p1dd	 |D }d
d t|||D }t||dd |rTt|tt|krTtdt	j
|t	jd}| jrd|| | j| | jdd t||D  t| j  fddt|D }	| j|	 |S )NzSIf trying to add texts, the underlying docstore should support adding items, which z	 does notrH   r]   c                 S  s   g | ]}t t qS r"   )r(   uuiduuid4rK   _r"   r"   r#   rM   (      zFAISS.__add.<locals>.<listcomp>r_   c                 s  s    | ]}i V  qd S rE   r"   rc   r"   r"   r#   	<genexpr>+  s    zFAISS.__add.<locals>.<genexpr>c                 S  s    g | ]\}}}t |||d qS ))idZpage_contentmetadatar   )rK   id_tmr"   r"   r#   rM   ,  s    	documentsrG   z$Duplicate ids found in the ids list.Zdtypec                 S     i | ]\}}||qS r"   r"   )rK   ri   docr"   r"   r#   
<dictcomp><  rN   zFAISS.__add.<locals>.<dictcomp>c                   s   i | ]	\}} | |qS r"   r"   )rK   jri   starting_lenr"   r#   rp   >      )r$   r+   r3   r   r-   r.   zipr,   setnparrayfloat32r>   r8   r2   addr4   	enumerateupdate)
rB   rH   rG   r]   r_   r    Z
_metadatasrl   vectorindex_to_idr"   rr   r#   Z__add  s4   


zFAISS.__addOptional[List[dict]]kwargsc                 K  s$   t |}| |}| j||||dS )al  Run more texts through the embeddings and add to the vectorstore.

        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            ids: Optional list of unique IDs.

        Returns:
            List of ids from adding the texts into the vectorstore.
        r]   r_   )listrQ   _FAISS__addrB   rH   r]   r_   r   rG   r"   r"   r#   	add_textsB  s   
zFAISS.add_textsc                   s,   t |}| |I dH }| j||||dS )a  Run more texts through the embeddings and add to the vectorstore
            asynchronously.

        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            ids: Optional list of unique IDs.

        Returns:
            List of ids from adding the texts into the vectorstore.
        Nr   )r   rV   r   r   r"   r"   r#   
aadd_textsW  s   zFAISS.aadd_textstext_embeddings!Iterable[Tuple[str, List[float]]]c                 K  s   t | \}}| j||||dS )a  Add the given texts and embeddings to the vectorstore.

        Args:
            text_embeddings: Iterable pairs of string and embedding to
                add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            ids: Optional list of unique IDs.

        Returns:
            List of ids from adding the texts into the vectorstore.
        r   )ru   r   )rB   r   r]   r_   r   rH   rG   r"   r"   r#   add_embeddingsm  s   zFAISS.add_embeddings      	embeddingkintfilter)Optional[Union[Callable, Dict[str, Any]]]fetch_kList[Tuple[Document, float]]c                   s@  t  }tj|gtjd}| jr|| | j||du r|n|\}}	g }
|dur.| |}t	|	d D ]C\}}|dkr=q4| j
| }| j|}t|tsWtd| d| |durl||jrk|
||d | f q4|
||d | f q4|ddur| jtjtjfv rtjntj  fdd	|
D }
|
d| S )
a  Return docs most similar to query.

        Args:
            embedding: Embedding vector to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Union[Callable, Dict[str, Any]]]): Filter by metadata.
                Defaults to None. If a callable, it must take as input the
                metadata dict of Document and return a bool.
            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.
            **kwargs: kwargs to be passed to similarity search. Can include:
                score_threshold: Optional, a floating point value between 0 to 1 to
                    filter the resulting set of retrieved docs

        Returns:
            List of documents most similar to the query text and L2 distance
            in float for each. Lower score represents more similarity.
        rm   Nr   Could not find document for id , got score_thresholdc                   s"   g | ]\}} |r||fqS r"   r"   )rK   ro   Z
similaritycmpr   r"   r#   rM     s    z@FAISS.similarity_search_with_score_by_vector.<locals>.<listcomp>)r$   rw   rx   ry   r>   r8   r2   search_create_filter_funcr{   r4   r3   r+   r   r-   rh   appendgetr9   r   MAX_INNER_PRODUCTZJACCARDoperatorgele)rB   r   r   r   r   r   r    r}   scoresindicesdocsfilter_funcrq   i_idro   r"   r   r#   &similarity_search_with_score_by_vector  s@   






z,FAISS.similarity_search_with_score_by_vectorc                   s&   t d| j|f|||d|I dH S )a  Return docs most similar to query asynchronously.

        Args:
            embedding: Embedding vector to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, Any]]): Filter by metadata.
                Defaults to None. If a callable, it must take as input the
                metadata dict of Document and return a bool.

            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.
            **kwargs: kwargs to be passed to similarity search. Can include:
                score_threshold: Optional, a floating point value between 0 to 1 to
                    filter the resulting set of retrieved docs

        Returns:
            List of documents most similar to the query text and L2 distance
            in float for each. Lower score represents more similarity.
        Nr   r   r   )r   r   )rB   r   r   r   r   r   r"   r"   r#   'asimilarity_search_with_score_by_vector  s   z-FAISS.asimilarity_search_with_score_by_vectorqueryc                 K  s(   |  |}| j||f||d|}|S )a  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, str]]): Filter by metadata.
                Defaults to None. If a callable, it must take as input the
                metadata dict of Document and return a bool.

            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of documents most similar to the query text with
            L2 distance in float. Lower score represents more similarity.
        r   r   )rY   r   rB   r   r   r   r   r   r   r   r"   r"   r#   similarity_search_with_score  s   
z"FAISS.similarity_search_with_scorec                   s6   |  |I dH }| j||f||d|I dH }|S )a  Return docs most similar to query asynchronously.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, str]]): Filter by metadata.
                Defaults to None. If a callable, it must take as input the
                metadata dict of Document and return a bool.

            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of documents most similar to the query text with
            L2 distance in float. Lower score represents more similarity.
        Nr   )rZ   r   r   r"   r"   r#   asimilarity_search_with_score  s   z#FAISS.asimilarity_search_with_scoreOptional[Dict[str, Any]]List[Document]c                 K  (   | j ||f||d|}dd |D S )aY  Return docs most similar to embedding vector.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, str]]): Filter by metadata.
                Defaults to None. If a callable, it must take as input the
                metadata dict of Document and return a bool.

            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of Documents most similar to the embedding.
        r   c                 S     g | ]\}}|qS r"   r"   rK   ro   rd   r"   r"   r#   rM   M      z5FAISS.similarity_search_by_vector.<locals>.<listcomp>)r   rB   r   r   r   r   r   docs_and_scoresr"   r"   r#   similarity_search_by_vector/  s   z!FAISS.similarity_search_by_vectorc                   0   | j ||f||d|I dH }dd |D S )ah  Return docs most similar to embedding vector asynchronously.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, str]]): Filter by metadata.
                Defaults to None. If a callable, it must take as input the
                metadata dict of Document and return a bool.

            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of Documents most similar to the embedding.
        r   Nc                 S  r   r"   r"   r   r"   r"   r#   rM   m  r   z6FAISS.asimilarity_search_by_vector.<locals>.<listcomp>)r   r   r"   r"   r#   asimilarity_search_by_vectorO  s   z"FAISS.asimilarity_search_by_vectorc                 K  r   )a  Return docs most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter: (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of Documents most similar to the query.
        r   c                 S  r   r"   r"   r   r"   r"   r#   rM     r   z+FAISS.similarity_search.<locals>.<listcomp>)r   rB   r   r   r   r   r   r   r"   r"   r#   similarity_searcho  s   zFAISS.similarity_searchc                   r   )a  Return docs most similar to query asynchronously.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter: (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
            fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
                      Defaults to 20.

        Returns:
            List of Documents most similar to the query.
        r   Nc                 S  r   r"   r"   r   r"   r"   r#   rM     r   z,FAISS.asimilarity_search.<locals>.<listcomp>)r   r   r"   r"   r#   asimilarity_search  s   zFAISS.asimilarity_search      ?r   r   lambda_multr   r   floatc                  s\   j tj|gtjd|du r|n|d \}}|durY |}g }	|d D ]+}
|
dkr.q' j|
 } j|}t|t	sHt
d| d| ||jrR|	|
 q't|	g} fdd	|d D }ttj|gtjd|||d
}g }|D ]4}
|d |
 dkrqw j|d |
  } j|}t|t	st
d| d| |||d |
 f qw|S )az  Return docs and their similarity scores selected using the maximal marginal
            relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch before filtering to
                     pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents and similarity scores selected by maximal marginal
                relevance and score for each.
        rm   N   r   r   r   r   c                   s$   g | ]}|d kr j t|qS )r   )r2   Zreconstructr   )rK   r   rF   r"   r#   rM     s   $ zLFAISS.max_marginal_relevance_search_with_score_by_vector.<locals>.<listcomp>)r   r   )r2   r   rw   rx   ry   r   r4   r3   r+   r   r-   rh   r   r   )rB   r   r   r   r   r   r   r   r   Zfiltered_indicesr   r   ro   rG   Zmmr_selectedr   r"   rF   r#   2max_marginal_relevance_search_with_score_by_vector  sF   





z8FAISS.max_marginal_relevance_search_with_score_by_vectorc             	     s    t d| j|||||dI dH S )a  Return docs and their similarity scores selected using the maximal marginal
            relevance asynchronously.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch before filtering to
                     pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents and similarity scores selected by maximal marginal
                relevance and score for each.
        Nr   )r   r   )rB   r   r   r   r   r   r"   r"   r#   3amax_marginal_relevance_search_with_score_by_vector  s   z9FAISS.amax_marginal_relevance_search_with_score_by_vectorc                 K  s"   | j |||||d}dd |D S )a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch before filtering to
                     pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents selected by maximal marginal relevance.
        r   c                 S  r   r"   r"   r   r"   r"   r#   rM   )  r   zAFAISS.max_marginal_relevance_search_by_vector.<locals>.<listcomp>)r   rB   r   r   r   r   r   r   r   r"   r"   r#   'max_marginal_relevance_search_by_vector  s   
z-FAISS.max_marginal_relevance_search_by_vectorc                   s*   | j |||||dI dH }dd |D S )a(  Return docs selected using the maximal marginal relevance asynchronously.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch before filtering to
                     pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents selected by maximal marginal relevance.
        r   Nc                 S  r   r"   r"   r   r"   r"   r#   rM   J  r   zBFAISS.amax_marginal_relevance_search_by_vector.<locals>.<listcomp>)r   r   r"   r"   r#   (amax_marginal_relevance_search_by_vector+  s   

z.FAISS.amax_marginal_relevance_search_by_vectorc           	      K  s*   |  |}| j|f||||d|}|S )a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch before filtering (if needed) to
                     pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents selected by maximal marginal relevance.
        r   )rY   r   	rB   r   r   r   r   r   r   r   r   r"   r"   r#   max_marginal_relevance_searchL  s   
z#FAISS.max_marginal_relevance_searchc           	        s8   |  |I dH }| j|f||||d|I dH }|S )a+  Return docs selected using the maximal marginal relevance asynchronously.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch before filtering (if needed) to
                     pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents selected by maximal marginal relevance.
        Nr   )rZ   r   r   r"   r"   r#   amax_marginal_relevance_searchq  s   z$FAISS.amax_marginal_relevance_searchr   c                   s   |du rt dt|| j }|rt d| dd | j D fdd|D  | jtj	 tj
d | j|  fd	d
t| j D }dd t|D | _dS )zDelete by ID. These are the IDs in the vectorstore.

        Args:
            ids: List of ids to delete.

        Returns:
            Optional[bool]: True if deletion is successful,
            False otherwise, None if not implemented.
        NzNo ids provided to delete.zESome specified ids do not exist in the current store. Ids not found: c                 S  s   i | ]\}}||qS r"   r"   )rK   idxri   r"   r"   r#   rp     rN   z FAISS.delete.<locals>.<dictcomp>c                   s   h | ]} | qS r"   r"   rK   ri   )reversed_indexr"   r#   	<setcomp>  r   zFAISS.delete.<locals>.<setcomp>rm   c                   s   g | ]
\}}| vr|qS r"   r"   rK   r   ri   )index_to_deleter"   r#   rM     s
    z FAISS.delete.<locals>.<listcomp>c                 S  rn   r"   r"   r   r"   r"   r#   rp     rN   T)r-   rv   
differencer4   valuesitemsr2   Z
remove_idsrw   ZfromiterZint64r3   deletesortedr{   )rB   r_   r   Zmissing_idsZremaining_idsr"   )r   r   r#   r     s$   

zFAISS.deletetargetr*   c                 C  s   t | jts
tdt| j}| j|j g }|j D ]\}}|j	|}t |t
s0td||| ||f q| jdd |D  dd |D }| j| dS )zMerge another FAISS object with the current one.

        Add the target FAISS to the current one.

        Args:
            target: FAISS object you wish to merge into the current one

        Returns:
            None.
        z'Cannot merge with this type of docstorezDocument should be returnedc                 S  s   i | ]\}}}||qS r"   r"   )rK   rd   r   ro   r"   r"   r#   rp     re   z$FAISS.merge_from.<locals>.<dictcomp>c                 S  s   i | ]\}}}||qS r"   r"   )rK   r2   r   rd   r"   r"   r#   rp     re   N)r+   r3   r   r-   r,   r4   r2   
merge_fromr   r   r   r   rz   r|   )rB   r   rs   Z	full_infor   Z	target_idro   r~   r"   r"   r#   r     s   

zFAISS.merge_fromr   c                 K  s   t  }	|tjkr|	t|d }
n	|	t|d }
|dt }|di }| ||
||f||d|}|j||||d |S )Nr   r3   r4   )r8   r9   r   )	r$   r   r   ZIndexFlatIPr,   ZIndexFlatL2popr   r   )clsrH   rG   r   r]   r_   r8   r9   r   r    r2   r3   r4   Zvecstorer"   r"   r#   Z__from  s&   
	zFAISS.__fromc                 K  s&   | |}| j|||f||d|S )aO  Construct FAISS wrapper from raw documents.

        This is a user friendly interface that:
            1. Embeds documents.
            2. Creates an in memory docstore
            3. Initializes the FAISS database

        This is intended to be a quick way to get started.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import FAISS
                from langchain_community.embeddings import OpenAIEmbeddings

                embeddings = OpenAIEmbeddings()
                faiss = FAISS.from_texts(texts, embeddings)
        r   )rO   _FAISS__fromr   rH   r   r]   r_   r   rG   r"   r"   r#   
from_texts  s   
zFAISS.from_texts	list[str]c                   s.   | |I dH }| j|||f||d|S )ae  Construct FAISS wrapper from raw documents asynchronously.

        This is a user friendly interface that:
            1. Embeds documents.
            2. Creates an in memory docstore
            3. Initializes the FAISS database

        This is intended to be a quick way to get started.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import FAISS
                from langchain_community.embeddings import OpenAIEmbeddings

                embeddings = OpenAIEmbeddings()
                faiss = await FAISS.afrom_texts(texts, embeddings)
        Nr   )rT   r   r   r"   r"   r#   afrom_texts  s   zFAISS.afrom_textsc                 K  s0   t | \}}| jt|t||f||d|S )a  Construct FAISS wrapper from raw documents.

        This is a user friendly interface that:
            1. Embeds documents.
            2. Creates an in memory docstore
            3. Initializes the FAISS database

        This is intended to be a quick way to get started.

        Example:
            .. code-block:: python

                from langchain_community.vectorstores import FAISS
                from langchain_community.embeddings import OpenAIEmbeddings

                embeddings = OpenAIEmbeddings()
                text_embeddings = embeddings.embed_documents(texts)
                text_embedding_pairs = zip(texts, text_embeddings)
                faiss = FAISS.from_embeddings(text_embedding_pairs, embeddings)
        r   )ru   r   r   )r   r   r   r]   r_   r   rH   rG   r"   r"   r#   from_embeddingsB  s   zFAISS.from_embeddingsc                   s   | j ||f||d|S )z:Construct FAISS wrapper from raw documents asynchronously.r   )r   )r   r   r   r]   r_   r   r"   r"   r#   afrom_embeddingsi  s   
zFAISS.afrom_embeddingsfolder_path
index_namec                 C  s   t |}|jddd t }|| jt|| d  t|| d d}t| j	| j
f| W d   dS 1 s<w   Y  dS )a  Save FAISS index, docstore, and index_to_docstore_id to disk.

        Args:
            folder_path: folder path to save index, docstore,
                and index_to_docstore_id to.
            index_name: for saving with a specific index file name
        T)exist_okparents.faiss.pklwbN)r   mkdirr$   Zwrite_indexr2   r(   openpickledumpr3   r4   )rB   r   r   pathr    fr"   r"   r#   
save_local{  s   "zFAISS.save_local)allow_dangerous_deserializationr   c                K  s   |st dt|}t }|t|| d }t|| d d}	t|	\}
}W d   n1 s5w   Y  | |||
|fi |S )a  Load FAISS index, docstore, and index_to_docstore_id from disk.

        Args:
            folder_path: folder path to load index, docstore,
                and index_to_docstore_id from.
            embeddings: Embeddings to use when generating queries
            index_name: for saving with a specific index file name
            allow_dangerous_deserialization: whether to allow deserialization
                of the data which involves loading a pickle file.
                Pickle files can be modified by malicious actors to deliver a
                malicious payload that results in execution of
                arbitrary code on your machine.
        B  The de-serialization relies loading a pickle file. Pickle files can be modified to deliver a malicious payload that results in execution of arbitrary code on your machine.You will need to set `allow_dangerous_deserialization` to `True` to enable deserialization. If you do this, make sure that you trust the source of the data. For example, if you are loading a file that you created, and know that no one else has modified the file, then this is safe to do. Do not set this to `True` if you are loading a file from an untrusted source (e.g., some random site on the internet.).r   r   rbN)r-   r   r$   Z
read_indexr(   r   r   load)r   r   rG   r   r   r   r   r    r2   r   r3   r4   r"   r"   r#   
load_local  s    zFAISS.load_localbytesc                 C  s   t | j| j| jfS )zCSerialize FAISS index, docstore, and index_to_docstore_id to bytes.)r   dumpsr2   r3   r4   rF   r"   r"   r#   serialize_to_bytes  s   zFAISS.serialize_to_bytes
serializedc                K  s2   |st dt|\}}}| ||||fi |S )zGDeserialize FAISS index, docstore, and index_to_docstore_id from bytes.r   )r-   r   loads)r   r   rG   r   r   r2   r3   r4   r"   r"   r#   deserialize_from_bytes  s   
zFAISS.deserialize_from_bytesCallable[[float], float]c                 C  sN   | j dur| j S | jtjkr| jS | jtjkr| jS | jtjkr#| jS t	d)a8  
        The 'correct' relevance function
        may differ depending on a few things, including:
        - the distance / similarity metric used by the VectorStore
        - the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
        - embedding dimensionality
        - etc.
        NzJUnknown distance strategy, must be cosine, max_inner_product, or euclidean)
r=   r9   r   r   Z%_max_inner_product_relevance_score_fnr?   Z_euclidean_relevance_score_fnZCOSINEZ_cosine_relevance_score_fnr-   rF   r"   r"   r#   _select_relevance_score_fn  s   
	z FAISS._select_relevance_score_fnc                   sH   |     du rtd| j|f|||d|} fdd|D }|S )?Return docs and their similarity scores on a scale from 0 to 1.NLrelevance_score_fn must be provided to FAISS constructor to normalize scoresr   c                      g | ]
\}}| |fqS r"   r"   rK   ro   Zscorer6   r"   r#   rM         zBFAISS._similarity_search_with_relevance_scores.<locals>.<listcomp>)r   r-   r   rB   r   r   r   r   r   r   Zdocs_and_rel_scoresr"   r   r#   (_similarity_search_with_relevance_scores  s$   
z.FAISS._similarity_search_with_relevance_scoresc                   sP   |     du rtd| j|f|||d|I dH } fdd|D }|S )r   Nr   r   c                   r   r"   r"   r   r   r"   r#   rM   6  r   zCFAISS._asimilarity_search_with_relevance_scores.<locals>.<listcomp>)r   r-   r   r  r"   r   r#   )_asimilarity_search_with_relevance_scores  s&   
z/FAISS._asimilarity_search_with_relevance_scores Callable[[Dict[str, Any]], bool]c                   s   t | r| S t| tstdt|  ddlm}m}m}m	}m
}m} ||||||d}dd dd d}||B  tt g d	 }	d
| D ]}
|
r\|
dr\|
|	vr\td|
 qHd fdddfdd| S )a  
        Create a filter function based on the provided filter.

        Args:
            filter: A callable or a dictionary representing the filter
            conditions for documents.

        Returns:
            A function that takes Document's metadata and returns True if it
            satisfies the filter conditions, otherwise False.

        Raises:
            ValueError: If the filter is invalid or contains unsuported operators.
        z5filter must be a dict of metadata or a callable, not r   )eqr   gtr   ltne)z$eqz$neqz$gtz$ltz$gtez$ltec                 S  s   | |v S rE   r"   abr"   r"   r#   <lambda>`      z+FAISS._create_filter_func.<locals>.<lambda>c                 S  s   | |vS rE   r"   r	  r"   r"   r#   r  a  r  )z$inz$nin)$and$or$not
   $&filter contains unsupported operator: fieldr(   	condition%Union[Dict[str, Any], List[Any], Any]r   r  c                   s   t  tr.g   D ]\}}|vrtd| | |f qdfdd}|S t  trKt krDt fdd	S  fd
d	S  fdd	S )a  
            Creates a filter function based on field and condition.

            Args:
                field: The document field to filter on
                condition: Filter condition (dict for operators, list for in,
                           or direct value for equality)

            Returns:
                A filter function that takes a document and returns boolean
            r  ro   Dict[str, Any]r   r   c                   s    |   t fddD S )aW  
                    Evaluates a document against a set of predefined operators
                    and their values. This function applies multiple
                    comparison/sequence operators to a specific field value
                    from the document. All conditions must be satisfied for the
                    function to return True.

                    Args:
                        doc (Dict[str, Any]): The document to evaluate, containing
                        key-value pairs where keys are field names and values
                        are the field values. The document must contain the field
                        being filtered.

                    Returns:
                        bool: True if the document's field value satisfies all
                            operator conditions, False otherwise.
                    c                 3  s    | ]
\}}| |V  qd S rE   r"   )rK   opvalueZ	doc_valuer"   r#   rf     s    zYFAISS._create_filter_func.<locals>.filter_func_cond.<locals>.filter_fn.<locals>.<genexpr>)r   allro   )r  	operatorsr  r#   	filter_fn  s   
zFFAISS._create_filter_func.<locals>.filter_func_cond.<locals>.filter_fnc                      |   v S rE   r   r  )condition_setr  r"   r#   r        zEFAISS._create_filter_func.<locals>.filter_func_cond.<locals>.<lambda>c                   r  rE   r   r  r  r  r"   r#   r    r"  c                   s   |   kS rE   r   r  r#  r"   r#   r    r"  N)ro   r  r   r   )r+   dictr   r-   r   r   r,   	frozenset)r  r  r  r  r  )
OPERATIONSSET_CONVERT_THRESHOLD)r  r!  r  r  r#   filter_func_condl  s   

z3FAISS._create_filter_func.<locals>.filter_func_condr   r  c                   s   d| v rfdd| d D fddS d| v r*fdd| d D fddS d	| v r:| d	   fd
dS fdd|   D fddS )a  
            Creates a filter function that evaluates documents against specified
            filter conditions.

            This function processes a dictionary of filter conditions and returns
            a callable that can evaluate documents against these conditions. It
            supports logical operators ($and, $or, $not) and field-level filtering.

            Args:
                filter (Dict[str, Any]): A dictionary containing filter conditions.
                Can include:
                    - Logical operators ($and, $or, $not) with lists of sub-filters
                    - Field-level conditions with comparison or sequence operators
                    - Direct field-value mappings for equality comparison

            Returns:
                Callable[[Dict[str, Any]], bool]: A function that takes a document
                (as a dictionary) and returns True if the document matches all
                filter conditions, False otherwise.
            r  c                      g | ]} |qS r"   r"   rK   Z
sub_filterr   r"   r#   rM     r   zBFAISS._create_filter_func.<locals>.filter_func.<locals>.<listcomp>c                      t  fddD S )Nc                 3      | ]}| V  qd S rE   r"   rK   r   r  r"   r#   rf         SFAISS._create_filter_func.<locals>.filter_func.<locals>.<lambda>.<locals>.<genexpr>r  r  filtersr  r#   r    rN   z@FAISS._create_filter_func.<locals>.filter_func.<locals>.<lambda>r  c                   r)  r"   r"   r*  r+  r"   r#   rM     r   c                   r,  )Nc                 3  r-  rE   r"   r.  r  r"   r#   rf     r/  r0  )anyr  r2  r  r#   r    rN   r  c                   s
    |  S rE   r"   r  )condr"   r#   r    s   
 c                   s   g | ]	\}} ||qS r"   r"   )rK   r  r  )r(  r"   r#   rM     s    c                   r,  )Nc                 3  r-  rE   r"   )rK   r  r  r"   r#   rf     r/  r0  r1  r  )
conditionsr  r#   r    rN   )r   )r   )r   r(  )r5  r6  r3  r#   r     s   
z.FAISS._create_filter_func.<locals>.filter_funcN)r  r(   r  r  r   r  )r   r  r   r  )callabler+   r$  r-   typer   r  r   r  r   r  r  r%  r   
startswith)r   r  r   r  r   r  r  ZCOMPARISON_OPERATORSZSEQUENCE_OPERATORSZVALID_OPERATORSr  r"   )r&  r'  r   r(  r#   r   ;  s6   
 	4'zFAISS._create_filter_funcSequence[str]list[Document]c                  s     fdd|D }dd |D S )Nc                   s   g | ]} j |qS r"   )r3   r   r   rF   r"   r#   rM     re   z$FAISS.get_by_ids.<locals>.<listcomp>c                 S  s   g | ]	}t |tr|qS r"   )r+   r   )rK   ro   r"   r"   r#   rM     rt   r"   )rB   r_   r   r"   rF   r#   
get_by_ids  s   zFAISS.get_by_ids)r0   r1   r2   r   r3   r   r4   r5   r6   r7   r8   r   r9   r   )r   rD   )rH   rI   r   rJ   )rL   r(   r   rW   )NN)
rH   r[   rG   r\   r]   r^   r_   r`   r   rI   )
rH   r[   r]   r   r_   r`   r   r   r   rI   )
r   r   r]   r   r_   r`   r   r   r   rI   )r   Nr   )r   rW   r   r   r   r   r   r   r   r   r   r   )r   r(   r   r   r   r   r   r   r   r   r   r   )r   rW   r   r   r   r   r   r   r   r   r   r   )r   rW   r   r   r   r   r   r   r   r   r   r   )r   r(   r   r   r   r   r   r   r   r   r   r   )r   rW   r   r   r   r   r   r   r   r   r   r   )r   r   r   N)r   rW   r   r   r   r   r   r   r   r   r   r   r   r   )r   r(   r   r   r   r   r   r   r   r   r   r   r   r   rE   )r_   r`   r   r   r   r   )r   r/   r   r*   )rH   r[   rG   rJ   r   r   r]   r^   r_   r`   r8   r   r9   r   r   r   r   r/   )rH   rI   r   r   r]   r   r_   r`   r   r   r   r/   )rH   r   r   r   r]   r   r_   r`   r   r   r   r/   )r   r   r   r   r]   r^   r_   r`   r   r   r   r/   )r2   )r   r(   r   r(   r   r*   )r   r(   rG   r   r   r(   r   r   r   r   r   r/   )r   r   )
r   r   rG   r   r   r   r   r   r   r/   )r   r   )r   r   r   r  )r_   r:  r   r;  )1__name__
__module____qualname____doc__r   r?   rC   propertyrG   rQ   rV   rY   rZ   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   classmethodr   r   r   r   r   r   r   r   r   r   r  r  staticmethodr   r<  r"   r"   r"   r#   r/   K   s    	#



-D*%%##H*"$(%
" $$&
3
  r/   rE   )r   r   r   r   )
r%   r   r&   r   r'   r(   r)   r(   r   r*   )-
__future__r   loggingr   r   r   ra   r@   pathlibr   typingr   r   r   r   r   r	   r
   r   r   r   numpyrw   Zlangchain_core.documentsr   Zlangchain_core.embeddingsr   Zlangchain_core.runnables.configr   Zlangchain_core.vectorstoresr   Z!langchain_community.docstore.baser   r   Z&langchain_community.docstore.in_memoryr   Z&langchain_community.vectorstores.utilsr   r   	getLoggerr=  r;   r$   r.   r/   r"   r"   r"   r#   <module>   s*    0

	