o
    Zh7                     @  s   d Z ddlmZ ddlZddlZddlmZmZmZm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ er:ddlmZ e ZG d	d
 d
eZdS )z'Wrapper around Epsilla vector database.    )annotationsN)TYPE_CHECKINGAnyIterableListOptionalTypeDocument)
Embeddings)VectorStore)vectordbc                   @  s   e Zd ZU dZdZded< dZded< dZded< eefdBddZe	dCddZ
dDddZdEdDddZ	dFdGd!d"Z	dHdId'd(Z			)dJdKd1d2Z	dLdMd8d9Zeddeeed)fdNd=d>Zedeeed)fdOd@dAZdS )PEpsillaar  
    Wrapper around Epsilla vector database.

    As a prerequisite, you need to install ``pyepsilla`` package
    and have a running Epsilla vector database (for example, through our docker image)
    See the following documentation for how to run an Epsilla vector database:
    https://epsilla-inc.gitbook.io/epsilladb/quick-start

    Args:
        client (Any): Epsilla client to connect to.
        embeddings (Embeddings): Function used to embed the texts.
        db_path (Optional[str]): The path where the database will be persisted.
                                 Defaults to "/tmp/langchain-epsilla".
        db_name (Optional[str]): Give a name to the loaded database.
                                 Defaults to "langchain_store".
    Example:
        .. code-block:: python

            from langchain_community.vectorstores import Epsilla
            from pyepsilla import vectordb

            client = vectordb.Client()
            embeddings = OpenAIEmbeddings()
            db_path = "/tmp/vectorstore"
            db_name = "langchain_store"
            epsilla = Epsilla(client, embeddings, db_path, db_name)
    Zlangchain_storestr_LANGCHAIN_DEFAULT_DB_NAMEz/tmp/langchain-epsilla_LANGCHAIN_DEFAULT_DB_PATHZlangchain_collection_LANGCHAIN_DEFAULT_TABLE_NAMEclientr   
embeddingsr   db_pathOptional[str]db_namec              
   C  s   zddl }W n ty } ztd|d}~ww t||jj|jjjfs-tdt	| || _
|| _|| _tj| _| j
j||d | j
j|d dS )z%Initialize with necessary components.r   NziCould not import pyepsilla python package. Please install pyepsilla package with `pip install pyepsilla`.zbclient should be an instance of pyepsilla.vectordb.Client or pyepsilla.cloud.client.Vectordb, got )r   r   r   )	pyepsillaImportError
isinstancer   ZClientcloudr   ZVectordb	TypeErrortype_clientZ_db_name_embeddingsr   r   _collection_nameZload_dbZuse_db)selfr   r   r   r   r   e r$   _/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/vectorstores/epsilla.py__init__4   s0   zEpsilla.__init__returnOptional[Embeddings]c                 C  s   | j S N)r    r"   r$   r$   r%   r   S   s   zEpsilla.embeddingscollection_nameNonec                 C  s
   || _ dS )z~
        Set default collection to use.

        Args:
            collection_name (str): The name of the collection.
        N)r!   r"   r+   r$   r$   r%   use_collectionW   s   
zEpsilla.use_collection c                 C  s   |s| j }| j| dS )z
        Clear data in a collection.

        Args:
            collection_name (Optional[str]): The name of the collection.
                If not provided, the default collection will be used.
        N)r!   r   Z
drop_tabler-   r$   r$   r%   
clear_data`   s   zEpsilla.clear_dataNresponse_fieldsOptional[List[str]]
List[dict]c                 C  sT   |s| j }| jj||d\}}|dkr&td|d   td|d |d S )a  Get the collection.

        Args:
            collection_name (Optional[str]): The name of the collection
                to retrieve data from.
                If not provided, the default collection will be used.
            response_fields (Optional[List[str]]): List of field names in the result.
                If not specified, all available fields will be responded.

        Returns:
            A list of the retrieved data.
        )
table_namer1      zFailed to get records: message
Error: {}.result)r!   r   getloggererror	Exceptionformat)r"   r+   r1   status_coderesponser$   r$   r%   r9   l   s   
zEpsilla.getr4   list	metadatasOptional[list[dict]]c                 C  sF  |st dt|d }dddddddd	|d
g}|d urndd |D }|D ]E}| D ]>\}}	||v r7q.t|	tr?d}
n t|	trGd}
nt|	trOd}
nt|	trWd}
nt d| d|||
d || q.q(| j	j
||d\}}|dkr|dkrtd| d d S td| d|d   td|d d S )NzEmbeddings list is empty.r   idINT)namedataTypetextSTRINGr   ZVECTOR_FLOAT)rE   rF   
dimensionsc                 S  s   g | ]}|d  qS )rE   r$   ).0fieldr$   r$   r%   
<listcomp>       z.Epsilla._create_collection.<locals>.<listcomp>FLOATZBOOLzUnsupported data type for .)Ztable_fieldsr5   i  z#Continuing with the existing table zFailed to create collection : r6   r7   )
ValueErrorlenitemsr   r   intfloatboolappendr   Zcreate_tabler:   infor;   r<   r=   )r"   r4   r   rA   dimfieldsfield_namesmetadatakeyvalueZd_typer>   r?   r$   r$   r%   _create_collection   sH   





zEpsilla._create_collectionFtextsIterable[str]Optional[List[dict]]drop_oldOptional[bool]kwargs	List[str]c                   sN  |s j }n| _ |r jj|d t|}z j|}W n ty0    fdd|D }Y nw t|dkr>t	d g S  j
|||d dd |D }g }t|D ](\}	}
|
||	 ||	 d}|d	urv||	  }|D ]\}}|||< qm|| qS jj||d
\}}|dkrtd| d|d   td|d dd |D S )a  
        Embed texts and add them to the database.

        Args:
            texts (Iterable[str]): The texts to embed.
            metadatas (Optional[List[dict]]): Metadata dicts
                        attached to each of the texts. Defaults to None.
            collection_name (Optional[str]): Which collection to use.
                        Defaults to "langchain_collection".
                        If provided, default collection name will be set as well.
            drop_old (Optional[bool]): Whether to drop the previous collection
                        and create a new one. Defaults to False.

        Returns:
            List of ids of the added texts.
        r   c                   s   g | ]} j |qS r$   )r    embed_query)rJ   xr*   r$   r%   rL          z%Epsilla.add_texts.<locals>.<listcomp>r   zNothing to insert, skipping.)r4   r   rA   c                 S  s   g | ]}t t qS r$   )hashuuiduuid4)rJ   _r$   r$   r%   rL      ri   rC   rG   r   N)r4   recordsr5   zFailed to add records to rP   r6   r7   c                 S  s   g | ]}t |qS r$   )r   )rJ   rC   r$   r$   r%   rL      rM   )r!   r   Zdrop_dbr@   r    Zembed_documentsNotImplementedErrorrR   r:   debugr_   	enumeraterS   rW   insertr;   r<   r=   )r"   r`   rA   r+   rc   re   r   Zidsro   indexrC   recordr\   r]   r^   r>   r?   r$   r*   r%   	add_texts   sL   


zEpsilla.add_texts   querykrT   List[Document]c                   s   |s| j }| j|}| jj|d||d\}}|dkr/td|d  d td|d g d t	t
 fd	d
|d S )a  
        Return the documents that are semantically most relevant to the query.

        Args:
            query (str): String to query the vectorstore with.
            k (Optional[int]): Number of documents to return. Defaults to 4.
            collection_name (Optional[str]): Collection to use.
                Defaults to "langchain_store" or the one provided before.
        Returns:
            List of documents that are semantically most relevant to the query
        r   )r4   Zquery_fieldquery_vectorlimitr5   zSearch failed: r6   rO   r7   rn   c                   s    t  d  fdd D dS )NrG   c                   s   i | ]}| vr|| qS r$   r$   )rJ   r]   )exclude_keysitemr$   r%   
<dictcomp>  s    z?Epsilla.similarity_search.<locals>.<lambda>.<locals>.<dictcomp>)page_contentr\   r	   r~   r}   r   r%   <lambda>  s    z+Epsilla.similarity_search.<locals>.<lambda>r8   )r!   r    rg   r   rx   r:   r;   r<   r=   r@   map)r"   rx   ry   r+   re   r{   r>   r?   r$   r   r%   similarity_search   s&   

zEpsilla.similarity_searchclsType[Epsilla]	embeddingc	                 K  s.   t ||||d}
|
j|f|||d|	 |
S )a  Create an Epsilla vectorstore from raw documents.

        Args:
            texts (List[str]): List of text data to be inserted.
            embeddings (Embeddings): Embedding function.
            client (pyepsilla.vectordb.Client): Epsilla client to connect to.
            metadatas (Optional[List[dict]]): Metadata for each text.
                    Defaults to None.
            db_path (Optional[str]): The path where the database will be persisted.
                    Defaults to "/tmp/langchain-epsilla".
            db_name (Optional[str]): Give a name to the loaded database.
                    Defaults to "langchain_store".
            collection_name (Optional[str]): Which collection to use.
                    Defaults to "langchain_collection".
                    If provided, default collection name will be set as well.
            drop_old (Optional[bool]): Whether to drop the previous collection
                    and create a new one. Defaults to False.

        Returns:
            Epsilla: Epsilla vector store.
        )r   r   )rA   r+   rc   )r   rv   )r   r`   r   rA   r   r   r   r+   rc   re   instancer$   r$   r%   
from_texts  s   "zEpsilla.from_texts	documentsc              	   K  s>   dd |D }	dd |D }
| j |	|f|
|||||d|S )a"  Create an Epsilla vectorstore from a list of documents.

        Args:
            texts (List[str]): List of text data to be inserted.
            embeddings (Embeddings): Embedding function.
            client (pyepsilla.vectordb.Client): Epsilla client to connect to.
            metadatas (Optional[List[dict]]): Metadata for each text.
                    Defaults to None.
            db_path (Optional[str]): The path where the database will be persisted.
                    Defaults to "/tmp/langchain-epsilla".
            db_name (Optional[str]): Give a name to the loaded database.
                    Defaults to "langchain_store".
            collection_name (Optional[str]): Which collection to use.
                    Defaults to "langchain_collection".
                    If provided, default collection name will be set as well.
            drop_old (Optional[bool]): Whether to drop the previous collection
                    and create a new one. Defaults to False.

        Returns:
            Epsilla: Epsilla vector store.
        c                 S     g | ]}|j qS r$   )r   rJ   docr$   r$   r%   rL   m      z*Epsilla.from_documents.<locals>.<listcomp>c                 S  r   r$   )r\   r   r$   r$   r%   rL   n  r   )rA   r   r   r   r+   rc   )r   )r   r   r   r   r   r   r+   rc   re   r`   rA   r$   r$   r%   from_documentsL  s   !	zEpsilla.from_documents)r   r   r   r   r   r   r   r   )r'   r(   )r+   r   r'   r,   )r/   )r/   N)r+   r   r1   r2   r'   r3   r)   )r4   r   r   r@   rA   rB   r'   r,   )Nr/   F)r`   ra   rA   rb   r+   r   rc   rd   re   r   r'   rf   )rw   r/   )
rx   r   ry   rT   r+   r   re   r   r'   rz   )r   r   r`   rf   r   r   rA   rb   r   r   r   r   r   r   r+   r   rc   rd   re   r   r'   r   )r   r   r   rz   r   r   r   r   r   r   r   r   r+   r   rc   rd   re   r   r'   r   )__name__
__module____qualname____doc__r   __annotations__r   r   r&   propertyr   r.   r0   r9   r_   rv   r   classmethodr   r   r$   r$   r$   r%   r      sJ   
 
	/G(,r   )r   
__future__r   loggingrk   typingr   r   r   r   r   r   Zlangchain_core.documentsr
   Zlangchain_core.embeddingsr   Zlangchain_core.vectorstoresr   r   r   	getLoggerr:   r   r$   r$   r$   r%   <module>   s     