o
    Zh;                     @  s   d dl mZ d dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZ d dlZd dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ eeZG dd deZdS )    )annotationsN)deepcopy)Enum)AnyIterableListOptionalTuple)Document)
Embeddings)run_in_executor)VectorStore)maximal_marginal_relevancec                   @  s  e Zd ZdZ	dPdQddZedRddZ			dSdTddZe							dUdVd!d"Z	G d#d$ d$e
Zd%ejdfdWd,d-Zd%ejdfdXd/d0Zd%ejdfdYd2d3Zd%ejdfdZd4d5Z	%	6	7d[dd8d\d<d=Z	%		>d]d^dBdCZd_dFdGZd`dIdJZdadbdLdMZ	dadbdNdOZdS )cRockseta  `Rockset` vector store.

    To use, you should have the `rockset` python package installed. Note that to use
    this, the collection being used must already exist in your Rockset instance.
    You must also ensure you use a Rockset ingest transformation to apply
    `VECTOR_ENFORCE` on the column being used to store `embedding_key` in the
    collection.
    See: https://rockset.com/blog/introducing-vector-search-on-rockset/ for more details

    Everything below assumes `commons` Rockset workspace.

    Example:
        .. code-block:: python

            from langchain_community.vectorstores import Rockset
            from langchain_community.embeddings.openai import OpenAIEmbeddings
            import rockset

            # Make sure you use the right host (region) for your Rockset instance
            # and APIKEY has both read-write access to your collection.

            rs = rockset.RocksetClient(host=rockset.Regions.use1a1, api_key="***")
            collection_name = "langchain_demo"
            embeddings = OpenAIEmbeddings()
            vectorstore = Rockset(rs, collection_name, embeddings,
                "description", "description_embedding")

    commonsclientr   
embeddingsr   collection_namestrtext_keyembedding_key	workspacec                 C  s   zddl m} W n ty   tdw t||s"tdt| || _|| _|| _|| _	|| _
|| _z	| jd W dS  tyG   Y dS w )aN  Initialize with Rockset client.
        Args:
            client: Rockset client object
            collection: Rockset collection to insert docs / query
            embeddings: Langchain Embeddings object to use to generate
                        embedding for given text.
            text_key: column in Rockset collection to use to store the text
            embedding_key: column in Rockset collection to use to store the embedding.
                           Note: We must apply `VECTOR_ENFORCE()` on this column via
                           Rockset ingest transformation.

        r   )RocksetClient]Could not import rockset client python package. Please install it with `pip install rockset`.z;client should be an instance of rockset.RocksetClient, got Z	langchainN)rocksetr   ImportError
isinstance
ValueErrortype_client_collection_name_embeddings	_text_key_embedding_key
_workspaceZset_applicationAttributeError)selfr   r   r   r   r   r   r    r'   a/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/vectorstores/rocksetdb.py__init__1   s0   
zRockset.__init__returnc                 C  s   | j S N)r!   r&   r'   r'   r(   r   a   s   zRockset.embeddingsN    textsIterable[str]	metadatasOptional[List[dict]]idsOptional[List[str]]
batch_sizeintkwargs	List[str]c                 K  s   g }g }t |D ]D\}}	t||kr|| |7 }g }i }
|r+t||kr+t|| }
|r9t||kr9|| |
d< |	|
| j< | j|	|
| j< ||
 qt|dkr\|| |7 }g }|S )a  Run more texts through the embeddings and add to the vectorstore

                Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            ids: Optional list of ids to associate with the texts.
            batch_size: Send documents in batches to rockset.

        Returns:
            List of ids from adding the texts into the vectorstore.

        _idr   )		enumeratelen_write_documents_to_rocksetr   r"   r!   embed_queryr#   append)r&   r.   r0   r2   r4   r6   batchZ
stored_idsitextdocr'   r'   r(   	add_textse   s$   
zRockset.add_texts 	embeddingc
                 K  sX   |dusJ d|sJ d|sJ d|sJ d| |||||}| ||||	 |S )znCreate Rockset wrapper with existing texts.
        This is intended as a quicker way to get started.
        NzRockset Client cannot be NonezCollection name cannot be emptyzText key name cannot be emptyzEmbedding key cannot be empty)rB   )clsr.   rD   r0   r   r   r   r   r2   r4   r6   r   r'   r'   r(   
from_texts   s   zRockset.from_textsc                   @  s"   e Zd ZdZdZdZd	ddZdS )
zRockset.DistanceFunction
COSINE_SIMEUCLIDEAN_DISTDOT_PRODUCTr*   r   c                 C  s   | j dkrdS dS )NrH   ASCZDESC)valuer,   r'   r'   r(   order_by   s   
z!Rockset.DistanceFunction.order_byN)r*   r   )__name__
__module____qualname__rG   rH   rI   rL   r'   r'   r'   r(   DistanceFunction   s
    rP      querykdistance_func	where_strOptional[str]List[Tuple[Document, float]]c                 K      | j | j||||fi |S )a  Perform a similarity search with Rockset

        Args:
            query (str): Text to look up documents similar to.
            distance_func (DistanceFunction): how to compute distance between two
                vectors in Rockset.
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.
            where_str (Optional[str], optional): Metadata filters supplied as a
                SQL `where` condition string. Defaults to None.
                eg. "price<=70.0 AND brand='Nintendo'"

            NOTE: Please do not let end-user to fill this and always be aware
                  of SQL injection.

        Returns:
            List[Tuple[Document, float]]: List of documents with their relevance score
        )1similarity_search_by_vector_with_relevance_scoresr!   r<   r&   rR   rS   rT   rU   r6   r'   r'   r(   'similarity_search_with_relevance_scores   s   
z/Rockset.similarity_search_with_relevance_scoresList[Document]c                 K  rX   )zaSame as `similarity_search_with_relevance_scores` but
        doesn't return the scores.
        )similarity_search_by_vectorr!   r<   rZ   r'   r'   r(   similarity_search   s   
zRockset.similarity_searchList[float]c                 K  s&   | j ||||fi |}dd |D S )zZAccepts a query_embedding (vector), and returns documents with
        similar embeddings.c                 S  s   g | ]\}}|qS r'   r'   ).0rA   _r'   r'   r(   
<listcomp>       z7Rockset.similarity_search_by_vector.<locals>.<listcomp>)rY   )r&   rD   rS   rT   rU   r6   Zdocs_and_scoresr'   r'   r(   r]      s   z#Rockset.similarity_search_by_vectorc              
   K  s:  d}d|v r
|d }|  |||||}z| jjjd|id}W n ty9 }	 ztd|	 g W  Y d}	~	S d}	~	ww g }
|jD ][}i }t|t	sQJ d
t|| D ]9\}}|| jkrqt|tsnJ d
| jt||}qU|d	krt|tsJ d

t||}qU|dvr|||< qU|
t||d|f q?|
S )z|Accepts a query_embedding (vector), and returns documents with
        similar embeddings along with their relevance scores.Texclude_embeddingsrR   )sqlz$Exception when querying Rockset: %s
Nz;document should be of type `dict[str,Any]`. But found: `{}`zIpage content stored in column `{}` must be of type `str`. But found: `{}`distzDComputed distance between vectors must of type `float`. But found {})r8   Z_event_time_meta)page_contentmetadata)_build_query_sqlr   ZQueriesrR   	Exceptionloggererrorresultsr   dictformatr   itemsr"   r   floatr=   r
   )r&   rD   rS   rT   rU   r6   rd   Zq_strZquery_responseeZfinalResultZdocumentri   vrh   Zscorer'   r'   r(   rY      sV   




z9Rockset.similarity_search_by_vector_with_relevance_scores         ?)rU   fetch_klambda_multrr   c                  sx   j |}j|f||dd| fdd D }tt||||d}	|	D ]	}
 |
 jj= q) fdd|	D S )a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            distance_func (DistanceFunction): how to compute distance between two
                vectors in Rockset.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
            where_str: where clause for the sql query
        Returns:
            List of Documents selected by maximal marginal relevance.
        F)rS   rU   rd   c                   s   g | ]}|j  j qS r'   )ri   r#   )r`   rA   r,   r'   r(   rb   V  s    z9Rockset.max_marginal_relevance_search.<locals>.<listcomp>)rx   rS   c                   s   g | ]} | qS r'   r'   r`   r?   )initial_docsr'   r(   rb   d  rc   )r!   r<   r]   r   nparrayri   r#   )r&   rR   rS   rw   rx   rU   r6   query_embeddingr   Zselected_indicesr?   r'   )rz   r&   r(   max_marginal_relevance_search0  s(   	z%Rockset.max_marginal_relevance_searchTr}   rd   boolc           	      C  s   d tt|}|j d| j d| d}|rd| dnd}|r(d| j d	nd}d
| d| d| j d| j d| d|  dt| dS )zABuilds Rockset SQL query to query similar vectors to query_vector,(z, [z
]) as distzWHERE 
rC   z EXCEPT(z),zSELECT * z
FROM .zORDER BY dist z
LIMIT )joinmapr   rK   r#   r$   r    rL   )	r&   r}   rT   rS   rU   rd   Zq_embedding_strZdistance_strZselect_embeddingr'   r'   r(   rj   h  s,   
zRockset._build_query_sqlr>   
List[dict]c                 C  s(   | j jj| j|| jd}dd |jD S )NZ
collectiondatar   c                 S  s   g | ]}|j qS r'   )r8   )r`   Z
doc_statusr'   r'   r(   rb     s    z7Rockset._write_documents_to_rockset.<locals>.<listcomp>)r   	DocumentsZadd_documentsr    r$   r   )r&   r>   Zadd_doc_resr'   r'   r(   r;     s   
z#Rockset._write_documents_to_rocksetNonec                   sR   zddl m  W n ty   tdw | jjj| j fdd|D | jd dS )z1Delete a list of docs from the Rockset collectionr   DeleteDocumentsRequestDatar   c                   s   g | ]} |d qS ))idr'   ry   r   r'   r(   rb     s    z(Rockset.delete_texts.<locals>.<listcomp>r   N)Zrockset.modelsr   r   r   r   Zdelete_documentsr    r$   )r&   r2   r'   r   r(   delete_texts  s   
zRockset.delete_textsOptional[bool]c              
   K  sP   z|d u rg }|  | W dS  ty' } ztd| W Y d }~dS d }~ww )Nz.Exception when deleting docs from Rockset: %s
FT)r   rk   rl   rm   )r&   r2   r6   rs   r'   r'   r(   delete  s   zRockset.deletec                   s   t d | j|fi |I d H S r+   )r   r   )r&   r2   r6   r'   r'   r(   adelete  s   zRockset.adelete)r   )r   r   r   r   r   r   r   r   r   r   r   r   )r*   r   )NNr-   )r.   r/   r0   r1   r2   r3   r4   r5   r6   r   r*   r7   )NNrC   rC   rC   Nr-   )r.   r7   rD   r   r0   r1   r   r   r   r   r   r   r   r   r2   r3   r4   r5   r6   r   r*   r   )rR   r   rS   r5   rT   rP   rU   rV   r6   r   r*   rW   )rR   r   rS   r5   rT   rP   rU   rV   r6   r   r*   r\   )rD   r_   rS   r5   rT   rP   rU   rV   r6   r   r*   r\   )rD   r_   rS   r5   rT   rP   rU   rV   r6   r   r*   rW   )rQ   ru   rv   )rR   r   rS   r5   rw   r5   rx   rr   rU   rV   r6   r   r*   r\   )rQ   NT)r}   r_   rT   rP   rS   r5   rU   rV   rd   r   r*   r   )r>   r   r*   r7   )r2   r7   r*   r   r+   )r2   r3   r6   r   r*   r   )rM   rN   rO   __doc__r)   propertyr   rB   classmethodrF   r   rP   rG   r[   r^   r]   rY   r~   rj   r;   r   r   r   r'   r'   r'   r(   r      sf    $0($:<

r   )
__future__r   loggingcopyr   enumr   typingr   r   r   r   r	   numpyr{   Zlangchain_core.documentsr
   Zlangchain_core.embeddingsr   Zlangchain_core.runnablesr   Zlangchain_core.vectorstoresr   Z&langchain_community.vectorstores.utilsr   	getLoggerrM   rl   r   r'   r'   r'   r(   <module>   s    
