o
    Zhm0                     @  s   d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	m
Z
mZmZmZmZ d dlmZ d dlmZ er@d dlmZ d dlmZ G d	d
 d
eeZedeeef dZeeZdZG dd deZdS )    )annotationsN)Enum)	TYPE_CHECKINGAnyDict	GeneratorIterableListOptionalTypeVarUnion)Document)VectorStore)
Embeddings)
Collectionc                   @  s    e Zd ZdZdZ	 dZ	 dZdS )DocumentDBSimilarityTypez)DocumentDB Similarity Type as enumerator.ZcosineZ
dotProductZ	euclideanN)__name__
__module____qualname____doc__COSDOTZEUC r   r   b/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/vectorstores/documentdb.pyr      s    r   DocumentDBDocumentType)bound   c                   @  s   e Zd ZdZdddddUddZedVddZdWddZedXddZ	dYddZ
dZdd Zd!ejd"d#fd[d+d,Z	-d\d]d3d4Zd^d7d8Ze	-	-d_d`d;d<Zd\dad@dAZd\dbdDdEZ	F	G	-dcdddOdPZ	F	Gded-dQdfdSdTZd-S )gDocumentDBVectorSearcha  `Amazon DocumentDB (with MongoDB compatibility)` vector store.
    Please refer to the official Vector Search documentation for more details:
    https://docs.aws.amazon.com/documentdb/latest/developerguide/vector-search.html

    To use, you should have both:
    - the ``pymongo`` python package installed
    - a connection string and credentials associated with a DocumentDB cluster

    Example:
        . code-block:: python

            from langchain_community.vectorstores import DocumentDBVectorSearch
            from langchain_community.embeddings.openai import OpenAIEmbeddings
            from pymongo import MongoClient

            mongo_client = MongoClient("<YOUR-CONNECTION-STRING>")
            collection = mongo_client["<db_name>"]["<collection_name>"]
            embeddings = OpenAIEmbeddings()
            vectorstore = DocumentDBVectorSearch(collection, embeddings)
    ZvectorSearchIndexZtextContentZvectorContent)
index_nametext_keyembedding_key
collection"Collection[DocumentDBDocumentType]	embeddingr   r   strr   r    c                C  s*   || _ || _|| _|| _|| _tj| _dS )a  Constructor for DocumentDBVectorSearch

        Args:
            collection: MongoDB collection to add the texts to.
            embedding: Text embedding model to use.
            index_name: Name of the Vector Search index.
            text_key: MongoDB field that will contain the text
                for each document.
            embedding_key: MongoDB field that will contain the embedding
                for each document.
        N)_collection
_embedding_index_name	_text_key_embedding_keyr   r   _similarity_type)selfr!   r#   r   r   r    r   r   r   __init__B   s   zDocumentDBVectorSearch.__init__returnc                 C     | j S N)r&   r+   r   r   r   
embeddings]   s   z!DocumentDBVectorSearch.embeddingsc                 C  r.   )zUReturns the index name

        Returns:
            Returns the index name

        )r'   r0   r   r   r   get_index_namea   s   z%DocumentDBVectorSearch.get_index_nameconnection_string	namespacekwargsr   c           
      K  s\   zddl m} W n ty   tdw ||}|d\}}|| | }	| |	|fi |S )a  Creates an Instance of DocumentDBVectorSearch from a Connection String

        Args:
            connection_string: The DocumentDB cluster endpoint connection string
            namespace: The namespace (database.collection)
            embedding: The embedding utility
            **kwargs: Dynamic keyword arguments

        Returns:
            an instance of the vector store

        r   )MongoClientzGCould not import pymongo, please install it with `pip install pymongo`..)Zpymongor6   ImportErrorsplit)
clsr3   r4   r#   r5   r6   clientZdb_nameZcollection_namer!   r   r   r   from_connection_stringj   s   z-DocumentDBVectorSearch.from_connection_stringboolc                 C  s6   | j  }| j}|D ]}|d}||kr dS q
dS )zVerifies if the specified index name during instance
            construction exists on the collection

        Returns:
          Returns True on success and False if no such index exists
            on the collection
        nameTF)r%   Zlist_indexesr'   pop)r+   cursorr   resZcurrent_index_namer   r   r   index_exists   s   

z#DocumentDBVectorSearch.index_existsNonec                 C  s   |   r| j| j dS dS )zEDeletes the index specified during instance construction if it existsN)rB   r%   Z
drop_indexr'   r0   r   r   r   delete_index   s   z#DocumentDBVectorSearch.delete_indexi      @   
dimensionsint
similarityr   mef_constructiondict[str, Any]c              	   C  sH   || _ | jj| j| jdid||||ddgd}| jj}||}|S )a  Creates an index using the index name specified at
            instance construction

        Args:
            dimensions: Number of dimensions for vector similarity.
                The maximum number of supported dimensions is 2000

            similarity: Similarity algorithm to use with the HNSW index.
                 Possible options are:
                    - DocumentDBSimilarityType.COS (cosine distance),
                    - DocumentDBSimilarityType.EUC (Euclidean distance), and
                    - DocumentDBSimilarityType.DOT (dot product).

            m: Specifies the max number of connections for an HNSW index.
                Large impact on memory consumption.

            ef_construction: Specifies the size of the dynamic candidate list
                for constructing the graph for HNSW index. Higher values lead
                to more accurate results but slower indexing speed.


        Returns:
            An object describing the created index

        vectorZhnsw)typerI   rG   rJ   ZefConstruction)r>   keyZvectorOptions)ZcreateIndexesZindexes)r*   r%   r>   r'   r)   Zdatabasecommand)r+   rG   rI   rJ   rK   Zcreate_index_commandsZcurrent_databaseZcreate_index_responsesr   r   r   create_index   s$    z#DocumentDBVectorSearch.create_indexNtextsIterable[str]	metadatasOptional[List[Dict[str, Any]]]r	   c                 K  s   | dt}|pdd |D }g }g }g }tt||D ]%\}	\}
}||
 || |	d | dkrA|| || g }g }q|rM|| || |S )N
batch_sizec                 s  s    | ]}i V  qd S r/   r   ).0_r   r   r   	<genexpr>   s    z3DocumentDBVectorSearch.add_texts.<locals>.<genexpr>   r   )getDEFAULT_INSERT_BATCH_SIZE	enumeratezipappendextend_insert_texts)r+   rR   rT   r5   rV   Z
_metadatasZtexts_batchZmetadatas_batchZ
result_idsitextmetadatar   r   r   	add_texts   s    

z DocumentDBVectorSearch.add_texts	List[str]List[Dict[str, Any]]c                   s@   |sg S  j |} fddt|||D } j|}|jS )zUsed to Load Documents into the collection

        Args:
            texts: The list of documents strings to load
            metadatas: The list of metadata objects associated with each document

        Returns:

        c                   s&   g | ]\}}} j | j|i|qS r   )r(   r)   )rW   trJ   r#   r0   r   r   
<listcomp>  s    z8DocumentDBVectorSearch._insert_texts.<locals>.<listcomp>)r&   Zembed_documentsr^   r%   Zinsert_manyZinserted_ids)r+   rR   rT   r1   Z	to_insertZinsert_resultr   r0   r   ra      s   

z$DocumentDBVectorSearch._insert_textsOptional[List[dict]],Optional[Collection[DocumentDBDocumentType]]c                 K  s4   |d u rt d| ||fi |}|j||d |S )Nz*Must provide 'collection' named parameter.)rT   )
ValueErrorre   )r:   rR   r#   rT   r!   r5   Zvectorstorer   r   r   
from_texts  s
   	z!DocumentDBVectorSearch.from_textsidsOptional[List[str]]Optional[bool]c                 K  s(   |d u rt d|D ]}| | q
dS )Nz#No document ids provided to delete.T)rl   delete_document_by_id)r+   rn   r5   document_idr   r   r   delete  s
   zDocumentDBVectorSearch.deleterr   Optional[str]c              
   C  s\   zddl m} W n ty } ztd|d}~ww |du r"td| jd||i dS )zjRemoves a Specific Document by Id

        Args:
            document_id: The document identifier
        r   )ObjectIdz>Unable to import bson, please install with `pip install bson`.Nz"No document id provided to delete.Z_id)Zbson.objectidru   r8   rl   r%   Z
delete_one)r+   rr   ru   er   r   r   rq   &  s   z,DocumentDBVectorSearch.delete_document_by_id   (   r1   List[float]k	ef_searchfilterOptional[Dict[str, Any]]List[Document]c           
   	   C  sh   |si }d|idd|| j | j||diig}| j|}g }|D ]}|| j}	|t|	|d q |S )a   Returns a list of documents.

        Args:
            embeddings: The query vector
            k: the number of documents to return
            ef_search: Specifies the size of the dynamic candidate list
                that HNSW index uses during search. A higher value of
                efSearch provides better recall at cost of speed.
            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
        Returns:
            A list of documents closest to the query vector
        z$matchz$searchZvectorSearch)rM   pathrI   rz   ZefSearch)Zpage_contentrd   )r)   r*   r%   Z	aggregater?   r(   r_   r   )
r+   r1   rz   r{   r|   Zpipeliner@   docsrA   rc   r   r   r    _similarity_search_without_score7  s(   z7DocumentDBVectorSearch._similarity_search_without_score)r|   queryc                K  s,   | j |}| j||||d}dd |D S )N)r1   rz   r{   r|   c                 S  s   g | ]}|qS r   r   )rW   docr   r   r   ri   t  s    z<DocumentDBVectorSearch.similarity_search.<locals>.<listcomp>)r&   Zembed_queryr   )r+   r   rz   r{   r|   r5   r1   r   r   r   r   similarity_searchg  s
   	z(DocumentDBVectorSearch.similarity_search)
r!   r"   r#   r   r   r$   r   r$   r    r$   )r-   r   )r-   r$   )
r3   r$   r4   r$   r#   r   r5   r   r-   r   )r-   r=   )r-   rC   )
rG   rH   rI   r   rJ   rH   rK   rH   r-   rL   r/   )rR   rS   rT   rU   r5   r   r-   r	   )rR   rf   rT   rg   r-   r	   )NN)rR   rf   r#   r   rT   rj   r!   rk   r5   r   r-   r   )rn   ro   r5   r   r-   rp   )rr   rt   r-   rC   )rw   rx   N)
r1   ry   rz   rH   r{   rH   r|   r}   r-   r~   )rw   rx   )r   r$   rz   rH   r{   rH   r|   r}   r5   r   r-   r~   )r   r   r   r   r,   propertyr1   r2   classmethodr<   rB   rD   r   r   rQ   re   ra   rm   rs   rq   r   r   r   r   r   r   r   ,   sH    
	

	A
3r   )
__future__r   loggingenumr   typingr   r   r   r   r   r	   r
   r   r   Zlangchain_core.documentsr   Zlangchain_core.vectorstoresr   Zlangchain_core.embeddingsr   Zpymongo.collectionr   r$   r   r   	getLoggerr   loggerr\   r   r   r   r   r   <module>   s    ,
