o
    Zhda                     @  s   d dl mZ d dlZd dlZd dlZd dlZd dlmZmZm	Z	m
Z
mZmZmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d	ZdddZdddZG dd deZdS )    )annotationsN)AnyCallableDictIterableListOptionalType)Document)
Embeddingsguard_import)VectorStore)maximal_marginal_relevance   returnr   c                   C  s   t dS )zImport lancedb package.lancedbr    r   r   _/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/vectorstores/lancedb.pyimport_lancedb   s   r   filterDict[str, str]strc                 C  s   d dd |  D S )z2Converts a dict filter to a LanceDB filter string.z AND c                 S  s    g | ]\}}| d | dqS )z = ''r   ).0kvr   r   r   
<listcomp>   s     z#to_lance_filter.<locals>.<listcomp>)joinitems)r   r   r   r   to_lance_filter   s   r    c                   @  sp  e Zd ZdZdddddddddddd	ddefdrddZdsdtd&d'Zedud(d)Z		dvdwd2d3Z		 dxdyd7d8Z
			9	:		;	dzd{dDdEZd|dGdHZ		dvd}dJdKZ			d~ddOdPZddRdSZ			d~ddVdWZ			d~ddXdYZ		dvddZd[Z				 ddd^d_Z		`	a	dddedfZ		`	a	dddgdhZe													dddldmZ					dddpdqZdS )LanceDBay  `LanceDB` vector store.

    To use, you should have ``lancedb`` python package installed.
    You can install it with ``pip install lancedb``.

    Args:
        connection: LanceDB connection to use. If not provided, a new connection
                    will be created.
        embedding: Embedding to use for the vectorstore.
        vector_key: Key to use for the vector in the database. Defaults to ``vector``.
        id_key: Key to use for the id in the database. Defaults to ``id``.
        text_key: Key to use for the text in the database. Defaults to ``text``.
        table_name: Name of the table to use. Defaults to ``vectorstore``.
        api_key: API key to use for LanceDB cloud database.
        region: Region to use for LanceDB cloud database.
        mode: Mode to use for adding data to the table. Valid values are
              ``append`` and ``overwrite``. Defaults to ``overwrite``.



    Example:
        .. code-block:: python
            vectorstore = LanceDB(uri='/lancedb', embedding_function)
            vectorstore.add_texts(['text1', 'text2'])
            result = vectorstore.similarity_search('text1')
    Nz/tmp/lancedbvectoridtextvectorstore	overwritel2
connectionOptional[Any]	embeddingOptional[Embeddings]uriOptional[str]
vector_keyid_keytext_key
table_nameapi_keyregionmodetabledistancererankerrelevance_score_fn"Optional[Callable[[float], float]]limitintc                 C  s  t d}t d|j_|| _|| _|| _|| _|dkr!|p tdnd| _	|	| _
|
| _|| _|| _|| _d| _t||jjrA|| _n|du rId| _ntdt|tr`| j	du r`|dr`td| jdu ritd	t||jjrt|| _n:t|t|jjfrtd
| j	du r||| _n!t|tr|dr|j|| j	| j
d| _n||| _td |durzt||jj|jjjfsJ || _t |dr|j!nd| _"W dS  t#y   tdw | j$|dd| _dS )z$Initialize with Lance DB vectorstorer   zlancedb.remote.table ZLANCE_API_KEYNz9`reranker` has to be a lancedb.rerankers.Reranker object.zdb://z&API key is required for LanceDB cloud.z#embedding object should be providedzs`connection` has to be a lancedb.db.LanceDBConnection object.                `lancedb.db.LanceTable` is deprecated.)r2   r3   z[api key provided with local uri.                            The data will be stored locallynameZremote_tablezj`table` has to be a lancedb.db.LanceTable or 
                    lancedb.remote.table.RemoteTable object.T)set_default)%r   remoter5   
_embedding_vector_key_id_key	_text_keyosgetenvr2   r3   r4   r6   override_relevance_score_fnr:   
_fts_index
isinstanceZ	rerankersZReranker	_reranker
ValueErrorr   
startswithdbZLanceDBConnection_connectionZ
LanceTableconnectwarningswarnZRemoteTable_tablehasattrr=   _table_nameAssertionError	get_table)selfr(   r*   r,   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r:   r   r   r   r   __init__:   st    







zLanceDB.__init__Fresultsr   scoreboolr   c                   s   j j}d|v rdn	d|v rdnd d|v  d u s|s- fddttD S rA|rC fddttD S d S d S )NZ	_distanceZ_relevance_scoremetadatac                   s:   g | ]}t j |   rd  |  ni dqS r[   )Zpage_contentr[   r
   rC   Zas_pyr   idx)has_metadatarX   rV   r   r   r      s    z+LanceDB.results_to_docs.<locals>.<listcomp>c                   sJ   g | ]!}t j |   rd  |  ni d |  fqS r\   r]   r^   r`   rX   Z	score_colrV   r   r   r      s    
)Zschemanamesrangelen)rV   rX   rY   columnsr   ra   r   results_to_docs   s    


zLanceDB.results_to_docsc                 C  s   | j S )N)r@   rV   r   r   r   
embeddings   s   zLanceDB.embeddingstextsIterable[str]	metadatasOptional[List[dict]]idsOptional[List[str]]kwargs	List[str]c                 K  s   g }|p
dd |D }| j t|}t|D ]&\}}|| }	|r%|| nd|| i}
|| j|	| j|| | j|d|
i q|  }|du rS| j	j
| j|d}|| _n| jdu ra|j|| jd n|| d| _|S )a  Turn texts into embedding and add it to the database

        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            ids: Optional list of ids to associate with the texts.
            ids: Optional list of ids to associate with the texts.

        Returns:
            List of ids of the added texts.
        c                 S     g | ]}t t qS r   r   uuiduuid4r   _r   r   r   r          z%LanceDB.add_texts.<locals>.<listcomp>r#   r[   Ndata)r4   )r@   Zembed_documentslist	enumerateappendrA   rB   rC   rU   rM   create_tablerS   rQ   r2   addr4   rG   )rV   ri   rk   rm   ro   docsrh   r_   r$   r*   r[   tblr   r   r   	add_texts   s,   
	

zLanceDB.add_textsr=   r>   Optional[bool]c                 C  sJ   |dur|r|| _ | j }n|}n| j }z| j|W S  ty$   Y dS w )a  
        Fetches a table object from the database.

        Args:
            name (str, optional): The name of the table to fetch. Defaults to None
                                    and fetches current table object.
            set_default (bool, optional): Sets fetched table as the default table.
                                        Defaults to False.

        Returns:
            Any: The fetched table object.

        Raises:
            ValueError: If the specified table is not found in the database.

        N)rS   rM   Z
open_table	Exception)rV   r=   r>   _namer   r   r   rU      s   zLanceDB.get_table   `   L2col_name
vector_colnum_partitionsOptional[int]num_sub_vectorsindex_cache_sizemetricNonec           	      C  s@   |  |}|r|j|||||d dS |r|| dS td)aO  
        Create a scalar(for non-vector cols) or a vector index on a table.
        Make sure your vector column has enough data before creating an index on it.

        Args:
            vector_col: Provide if you want to create index on a vector column.
            col_name: Provide if you want to create index on a non-vector column.
            metric: Provide the metric to use for vector index. Defaults to 'L2'
                    choice of metrics: 'L2', 'dot', 'cosine'
            num_partitions: Number of partitions to use for the index. Defaults to 256.
            num_sub_vectors: Number of sub-vectors to use for the index. Defaults to 96.
            index_cache_size: Size of the index cache. Defaults to None.
            name: Name of the table to create index on. Defaults to None.

        Returns:
            None
        )r   vector_column_namer   r   r   z%Provide either vector_col or col_nameN)rU   create_indexZcreate_scalar_indexrJ   )	rV   r   r   r   r   r   r   r=   r   r   r   r   r   	  s   

zLanceDB.create_indexr   c                 C  sB   t |d}t| dW  d   S 1 sw   Y  dS )z!Get base64 string from image URI.rbzutf-8N)openbase64	b64encodereaddecode)rV   r,   Z
image_filer   r   r   encode_image3  s   $zLanceDB.encode_imageurisc                   s      } fdd|D }|du rdd |D }d} jdur-t jdr- jj|d}ntdg }t|D ]$\}	}
|rA||	 nd||	 i}| j|
 j||	  j	||	 d	|i q7|du rn j
j j|d
}| _|S || |S )as  Run more images through the embeddings and add to the vectorstore.

        Args:
            uris List[str]: File path to the image.
            metadatas (Optional[List[dict]], optional): Optional list of metadatas.
            ids (Optional[List[str]], optional): Optional list of IDs.

        Returns:
            List[str]: List of IDs of the added images.
        c                   s   g | ]} j |d qS ))r,   )r   )r   r,   rg   r   r   r   L  rw   z&LanceDB.add_images.<locals>.<listcomp>Nc                 S  rq   r   rr   ru   r   r   r   r   O  rw   embed_image)r   zEembedding object should be provided and must have embed_image method.r#   r[   rx   )rU   r@   rR   r   rJ   r{   r|   rA   rB   rC   rM   r}   rS   rQ   r~   )rV   r   rk   rm   ro   r   Z	b64_textsrh   ry   r_   Zembr[   r   rg   r   
add_images8  s4   


zLanceDB.add_imagesqueryr   r   c                 K  s   |d u r| j }| |}t|trt|}|dd}|dd}|d }	r<|j|| jd ||	j	||d}
n|j|| jd |j	||d}
|dkr\| j
d ur\|
j| j
d	 |
 }t|d
krktd |S )N	prefilterF
query_typer"   metrics)r   r   )r   hybrid)r7   r   zNo results found for the query.)r:   rU   rH   dictr    getsearchrA   r   whererI   ZrerankZto_arrowrd   rO   rP   )rV   r   r   r   r=   ro   r   r   r   r   Zlance_queryr   r   r   r   _queryl  s2   


zLanceDB._queryCallable[[float], float]c                 C  sN   | j r| j S | jdkr| jS | jdkr| jS | jdkr| jS td| j d)a8  
        The 'correct' relevance function
        may differ depending on a few things, including:
        - the distance / similarity metric used by the VectorStore
        - the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
        - embedding dimensionality
        - etc.
        Zcosiner'   ipzANo supported normalization function for distance metric of type: z=.Consider providing relevance_score_fn to Chroma constructor.)rF   r6   Z_cosine_relevance_score_fnZ_euclidean_relevance_score_fnZ%_max_inner_product_relevance_score_fnrJ   rg   r   r   r   _select_relevance_score_fn  s   	


z"LanceDB._select_relevance_score_fnList[float]Optional[Dict[str, str]]c                 K  s>   |du r| j }| j||f||d|}| j||dddS )zD
        Return documents most similar to the query vector.
        Nr   r=   rY   FrY   )r:   r   rf   pop)rV   r*   r   r   r=   ro   resr   r   r   similarity_search_by_vector  s   z#LanceDB.similarity_search_by_vectorc                   s@   |du r| j }|   | j||fddi|} fdd|D S )zZ
        Return documents most similar to the query vector with relevance scores.
        NrY   Tc                   s    g | ]\}}| t |fqS r   )float)r   docrY   r8   r   r   r     s    zMLanceDB.similarity_search_by_vector_with_relevance_scores.<locals>.<listcomp>)r:   r   r   )rV   r*   r   r   r=   ro   Zdocs_and_scoresr   r   r   1similarity_search_by_vector_with_relevance_scores  s   
z9LanceDB.similarity_search_by_vector_with_relevance_scoresc                 K  s  |du r| j }|dd}|dd}|dd}| jdu r"td|dks*|d	krk| jdu rg| jdu rg| |}|j| jdd
| _|d	krQ| j	|}	|	|f}
n|}
| j
|
|f||d|}| j||dS td| j	|}	| j
|	|fd|i|}| j||dS )zAReturn documents most similar to the query with relevance scores.NrY   Tr=   r   r"   z4search needs an emmbedding function to be specified.ftsr   )replacer   r   z?Full text/ Hybrid search is not supported in LanceDB Cloud yet.r   )r:   r   r@   rJ   r2   rG   rU   Zcreate_fts_indexrC   embed_queryr   rf   NotImplementedError)rV   r   r   r   ro   rY   r=   r   r   r*   r   r   r   r   r   similarity_search_with_score  s.   


z$LanceDB.similarity_search_with_scorer   List[Document]c              	   K  s"   | j d|||||dd|}|S )ap  Return documents most similar to the query

        Args:
            query: String to query the vectorstore with.
            k: Number of documents to return.
            filter (Optional[Dict]): Optional filter arguments
                sql_filter(Optional[string]): SQL filter to apply to the query.
                prefilter(Optional[bool]): Whether to apply the filter prior
                                             to the vector search.
        Raises:
            ValueError: If the specified table is not found in the database.

        Returns:
            List of documents most similar to the query.
        F)r   r   r=   r   r   rY   Nr   )r   )rV   r   r   r=   r   r   ro   r   r   r   r   similarity_search  s   zLanceDB.similarity_search         ?fetch_klambda_multr   c           	      K  sD   |du r| j }| jdu rtd| j|}| j|||||d}|S )a?  Return docs selected using the maximal marginal relevance.
        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.

        Returns:
            List of Documents selected by maximal marginal relevance.
        NzBFor MMR search, you must specify an embedding function oncreation.)r   r   )r:   r@   rJ   r   'max_marginal_relevance_search_by_vector)	rV   r   r   r   r   r   ro   r*   r   r   r   r   max_marginal_relevance_search  s   
z%LanceDB.max_marginal_relevance_searchc           
        sf   | j d|||d|}ttj|tjd|d  |p| j|d | |} fddt|D }	|	S )	aH  Return docs selected using the maximal marginal relevance.
        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.

        Returns:
            List of Documents selected by maximal marginal relevance.
        )r   r   r   )Zdtyper"   )r   r   c                   s   g | ]
\}}| v r|qS r   r   )r   irZmmr_selectedr   r   r   m  s    zCLanceDB.max_marginal_relevance_search_by_vector.<locals>.<listcomp>Nr   )	r   r   nparrayZfloat32Z	to_pylistr:   rf   r{   )
rV   r*   r   r   r   r   ro   rX   
candidatesZselected_resultsr   r   r   r   C  s    

z/LanceDB.max_marginal_relevance_search_by_vectorclsType[LanceDB]r   c                 K  s:   t d|||||||	|
||||d|}|j||d |S )N)r(   r*   r.   r/   r0   r1   r2   r3   r4   r6   r7   r8   )rk   r   )r!   r   )r   ri   r*   rk   r(   r.   r/   r0   r1   r2   r3   r4   r6   r7   r8   ro   instancer   r   r   
from_textsp  s$   zLanceDB.from_texts
delete_alldrop_columnsc                 K  s   |  |}|r|| dS |r!|| j dd| dS |r3| jdur,td|| dS |r<|d dS td)a  
        Allows deleting rows by filtering, by ids or drop columns from the table.

        Args:
            filter: Provide a string SQL expression -  "{col} {operation} {value}".
            ids: Provide list of ids to delete from the table.
            drop_columns: Provide list of columns to drop from the table.
            delete_all: If True, delete all rows from the table.
        z
 in ('{}'),Nz;Column operations currently not supported in LanceDB Cloud.truez6Provide either filter, ids, drop_columns or delete_all)	rU   deleterB   formatr   r2   r   r   rJ   )rV   rm   r   r   r   r=   ro   r   r   r   r   r     s   
"
zLanceDB.delete)r(   r)   r*   r+   r,   r-   r.   r-   r/   r-   r0   r-   r1   r-   r2   r-   r3   r-   r4   r-   r5   r)   r6   r-   r7   r)   r8   r9   r:   r;   )F)rX   r   rY   rZ   r   r   )r   r+   )NN)
ri   rj   rk   rl   rm   rn   ro   r   r   rp   )NF)r=   r-   r>   r   r   r   )NNr   r   Nr   N)r   r-   r   r-   r   r   r   r   r   r   r   r-   r=   r-   r   r   )r,   r   r   r   )
r   rp   rk   rl   rm   rn   ro   r   r   rp   )NNN)r   r   r   r   r   r)   r=   r-   ro   r   r   r   )r   r   )r*   r   r   r   r   r   r=   r-   ro   r   r   r   )
r   r   r   r   r   r   ro   r   r   r   )NNNF)r   r   r   r   r=   r-   r   r)   r   r   ro   r   r   r   )Nr   r   N)r   r   r   r   r   r;   r   r   r   r   ro   r   r   r   )r*   r   r   r   r   r;   r   r   r   r   ro   r   r   r   )NNr"   r#   r$   r%   NNr&   r'   NN)"r   r   ri   rp   r*   r   rk   rl   r(   r)   r.   r-   r/   r-   r0   r-   r1   r-   r2   r-   r3   r-   r4   r-   r6   r-   r7   r)   r8   r9   ro   r   r   r!   )NNNNN)rm   rn   r   r   r   r-   r   rn   r=   r-   ro   r   r   r   )__name__
__module____qualname____doc__	DEFAULT_KrW   rf   propertyrh   r   rU   r   r   r   r   r   r   r   r   r   r   r   classmethodr   r   r   r   r   r   r!      s    X"1#
*7
&+ /-'r!   )r   r   )r   r   r   r   )
__future__r   r   rD   rs   rO   typingr   r   r   r   r   r   r	   numpyr   Zlangchain_core.documentsr
   Zlangchain_core.embeddingsr   Zlangchain_core.utilsr   Zlangchain_core.vectorstoresr   Z&langchain_community.vectorstores.utilsr   r   r   r    r!   r   r   r   r   <module>   s     $

