o
    Zhh                     @  s   d dl mZ d dlZd dlmZmZmZmZmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ er6d dlmZ dd	d
d
dZeeZG dd deZdS )    )annotationsN)TYPE_CHECKINGAnyDictIterableListOptionalTuple)Document)
Embeddings)VectorStoreHippoClient	localhostZ7788Zadmin)hostportusernamepasswordc                   @  s   e Zd ZdZ							dNdOddZdPddZ	dQdRd!d"Z	dSdTd$d%ZdUd&d'ZdVd(d)Z	dUd*d+Z
			,dWdXd5d6Z	7			dYdZd>d?Z	7			dYd[dAdBZ	7			dYd\dEdFZedddedddfd]dLdMZdS )^Hippoa  `Hippo` vector store.

    You need to install `hippo-api` and run Hippo.

    Please visit our official website for how to run a Hippo instance:
    https://www.transwarp.cn/starwarp

    Args:
        embedding_function (Embeddings): Function used to embed the text.
        table_name (str): Which Hippo table to use. Defaults to
            "test".
        database_name (str): Which Hippo database to use. Defaults to
            "default".
        number_of_shards (int): The number of shards for the Hippo table.Defaults to
            1.
        number_of_replicas (int): The number of replicas for the Hippo table.Defaults to
            1.
        connection_args (Optional[dict[str, any]]): The connection args used for
            this class comes in the form of a dict.
        index_params (Optional[dict]): Which index params to use. Defaults to
            IVF_FLAT.
        drop_old (Optional[bool]): Whether to drop the current collection. Defaults
            to False.
        primary_field (str): Name of the primary key field. Defaults to "pk".
        text_field (str): Name of the text field. Defaults to "text".
        vector_field (str): Name of the vector field. Defaults to "vector".

    The connection args used for this class comes in the form of a dict,
    here are a few of the options:
        host (str): The host of Hippo instance. Default at "localhost".
        port (str/int): The port of Hippo instance. Default at 7788.
        user (str): Use which user to connect to Hippo instance. If user and
            password are provided, we will add related header in every RPC call.
        password (str): Required when user is provided. The password
            corresponding to the user.

    Example:
        .. code-block:: python

        from langchain_community.vectorstores import Hippo
        from langchain_community.embeddings import OpenAIEmbeddings

        embedding = OpenAIEmbeddings()
        # Connect to a hippo instance on localhost
        vector_store = Hippo.from_documents(
            docs,
            embedding=embeddings,
            table_name="langchain_test",
            connection_args=HIPPO_CONNECTION
        )

    Raises:
        ValueError: If the hippo-api python package is not installed.
    testdefault   NFembedding_functionr   
table_namestrdatabase_namenumber_of_shardsintnumber_of_replicasconnection_argsOptional[Dict[str, Any]]index_paramsOptional[dict]drop_oldOptional[bool]c	           
   
   C  s$  || _ || _|| _|| _|| _|| _d| _d| _d| _g | _	|d u r$t
}| || _d | _z| j| j| jrB|rB| j| j| j W n ty\ }	 ztd| j d|	   d }	~	ww z| j| j| jrq| j| j| j| _W n ty }	 ztd| j d|	   d }	~	ww |   d S )NpktextZvectorz+An error occurred while deleting the table z: z*An error occurred while getting the table )r   r   embedding_funcr   r   r!   _primary_field_text_field_vector_fieldfieldsDEFAULT_HIPPO_CONNECTION_create_connection_aliashccolZcheck_table_existsZdelete_table	Exceptionloggingerror	get_table_get_env)
selfr   r   r   r   r   r   r!   r#   e r7   ]/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/vectorstores/hippo.py__init__P   sP   zHippo.__init__dictreturnr   c           	   
     s
  zddl m} W n ty } ztd|d}~ww |dd}|dd |dd}|d	d}|dur[ dur[d
|v rP|d
}d
 fdd|D }nt|d t  }ntdzt	d| d ||g||dW S  t
y } ztd |d}~ww )z*Create the connection to the Hippo server.r   r   zQUnable to import transwarp_hipp_api, please install with `pip install hippo-api`.Nr   r   r   Zshivar   ,c                   s   g | ]	}| d   qS ):r7   ).0hr   r7   r8   
<listcomp>   s    z2Hippo._create_connection_alias.<locals>.<listcomp>r=   z/Missing standard address type for reuse attemptzcreate HippoClient[])r   pwdzFailed to create new connection) transwarp_hippo_api.hippo_clientr   ImportErrorgetsplitjoinr   
ValueErrorloggerinfor0   r2   )	r5   r   r   r6   r   r   r   hostsZgiven_addressr7   r@   r8   r-      s8   

zHippo._create_connection_alias
embeddingsOptional[list]	metadatasOptional[List[dict]]Nonec                 C  s<   t d |d urt d | || |   |   d S )Nzinit ...zcreate collection)rJ   rK   _create_collection_extract_fields_create_index)r5   rM   rO   r7   r7   r8   r4      s   

zHippo._get_envlistc           
   	   C  sD  ddl m} ddlm} t|d }td|  g }||| jd|j	 ||| j
d|j	 ||| jd|jd|id |rp|d  D ]&\}}t|tret|}	|||d|jd|	id qI|||d|j	 qItd	|  | jj| jd|| j| j| jd
 | j| j| j| _td| j d| j d d S )Nr   )
HippoField)	HippoTypez[_create_collection] dim: TF	dimension)Ztype_paramsz[_create_collection] fields: )nameZauto_idr+   r   r   r   z$[_create_collection] : create table z in z successfully)rD   rV   transwarp_hippo_api.hippo_typerW   lenrJ   debugappendr(   STRINGr)   r*   ZFLOAT_VECTORitems
isinstancerU   r.   Zcreate_tabler   r   r   r   r3   r/   rK   )
r5   rM   rO   rV   rW   dimr+   keyvalueZ	value_dimr7   r7   r8   rR      sZ   
	zHippo._create_collectionc                 C  sb   ddl m} t| j|r/| jj}td|  |D ]	}| j|j	 qtd| j  dS dS )z,Grab the existing fields from the Collectionr   
HippoTablez[_extract_fields] schema:z04 [_extract_fields] fields:N)
rD   re   r`   r/   schemarJ   r\   r+   r]   rY   )r5   re   rf   xr7   r7   r8   rS      s   zHippo._extract_fieldsc                 C  s   ddl m} t| j|rJ| j| j| j| ji }|dd}|du r&dS | j| j| j| j d D ]}t	
d|  |d | jkrI|  S q4dS )z0Return the vector index information if it existsr   rd   embedding_indexesNz[_get_index] embedding_indexes column)rD   re   r`   r/   r.   Zget_table_infor   r   rF   rJ   r\   r*   )r5   re   Z
table_inforh   rg   r7   r7   r8   
_get_index  s(   
zHippo._get_indexc                 C  s  ddl m} ddlm}m} t| j|r|  du r|  du r| jdu r]d|j	|j
dd| _| jj| j| jd | jd	 | jd
 | jd d t| j| jd  td dS |j
|j|j|j|jd}|j|j|j	|j	d}|| jd
  | jd
< | jd	 dkr|| jd	  | jd	< | j| j| jd | jd	 | jd
  t| j| jd  dS | jd	 dks| jd	 dkr|| jd	  | jd	< | jj| j| jd | jd	 | jd
 | jdd| jddd t| j| jd  dS | jd	 dkrB|| jd	  | jd	< | jj| j| jd | jd	 | jd
 | jdd| jdd| jdd| jdd t| j| jd  dS | jd	 dkr|| jd	  | jd	< | jj| j| jd | jd	 | jd
 | jd| jd| jdd t| j| jd  dS tddS dS dS ) z Create a index on the collectionr   rd   )	IndexType
MetricTypeNZlangchain_auto_create
   )
index_namemetric_type
index_typenlistrn   rp   ro   rq   )rq   zcreate index successfully)IVF_FLATFLATIVF_SQIVF_PQHNSW)ipIPl2L2rs   rr   rt   nprobe)rq   r{   ru   nbits   m)rq   r{   r|   r~   rv   Mef_construction	ef_search)r   r   r   zeIndex name does not match, please enter the correct index name. (FLAT, IVF_FLAT, IVF_PQ,IVF_SQ, HNSW))rD   re   rZ   rk   rl   r`   r/   rj   r!   rz   rr   Zcreate_indexr*   rJ   r\   Zactivate_indexrK   rs   rt   ru   rv   rx   rF   rI   )r5   re   rk   rl   Z
index_dictZmetric_dictr7   r7   r8   rT     s   
	









	zHippo._create_index  textsIterable[str]timeoutOptional[int]
batch_sizekwargsr   	List[str]c                   s  ddl m} |rtdd |D rtd g S t|}td|  zj|}W n ty>   fdd|D }Y nw t	|dkrLtd g S td	t	|  t
j|sb|| j|j|itd
|  tdj  |dur|D ]}| D ]\}	}
|	jv r|	g |
 qqtj  j }t	|}djv rjd td|  td||D ]:t| |  fddjD }zj|}td|  W q ty } z	td| |d}~ww dgS )a  
        Add text to the collection.

        Args:
            texts: An iterable that contains the text to be added.
            metadatas: An optional list of dictionaries,
            each dictionary contains the metadata associated with a text.
            timeout: Optional timeout, in seconds.
            batch_size: The number of texts inserted in each batch, defaults to 1000.
            **kwargs: Other optional parameters.

        Returns:
            A list of strings, containing the unique identifiers of the inserted texts.

        Note:
            If the collection has not yet been created,
            this method will create a new collection.
        r   rd   c                 s  s    | ]}|d kV  qdS ) Nr7   )r>   tr7   r7   r8   	<genexpr>  s    z"Hippo.add_texts.<locals>.<genexpr>zNothing to insert, skipping.z[add_texts] texts: c                   s   g | ]} j |qS r7   )r'   embed_queryr>   rg   )r5   r7   r8   rA         z#Hippo.add_texts.<locals>.<listcomp>z[add_texts] len_embeddings:z[add_texts] metadatas:z[add_texts] fields:Nr%   z[add_texts] total_count:c                   s   g | ]
}|   qS r7   r7   r   )endiinsert_dictr7   r8   rA     s    z05 [add_texts] insert z0Failed to insert batch starting at entity: %s/%sr   )rD   re   allrJ   r\   rU   r'   Zembed_documentsNotImplementedErrorr[   r`   r/   r4   r)   r*   r+   r_   
setdefaultr]   removerangeminZinsert_rowsrK   r0   r2   )r5   r   rO   r   r   r   re   rM   drb   rc   vectorsZtotal_countZinsert_listresr6   r7   )r   r   r   r5   r8   	add_texts  sb   




zHippo.add_texts   querykparamexprOptional[str]List[Document]c                 K  sB   | j du rtd g S | jd|||||d|}dd |D S )a  
        Perform a similarity search on the query string.

        Args:
            query (str): The text to search for.
            k (int, optional): The number of results to return. Default is 4.
            param (dict, optional): Specifies the search parameters for the index.
            Defaults to None.
            expr (str, optional): Filtering expression. Defaults to None.
            timeout (int, optional): Time to wait before a timeout error.
            Defaults to None.
            kwargs: Keyword arguments for Collection.search().

        Returns:
            List[Document]: The document results of the search.
        N!No existing collection to search.)r   r   r   r   r   c                 S  s   g | ]\}}|qS r7   r7   )r>   doc_r7   r7   r8   rA     s    z+Hippo.similarity_search.<locals>.<listcomp>r7   )r/   rJ   r\   similarity_search_with_score)r5   r   r   r   r   r   r   r   r7   r7   r8   similarity_search  s   


zHippo.similarity_searchList[Tuple[Document, float]]c           	      K  sD   | j du rtd g S | j|}| jd|||||d|}|S )a  
        Performs a search on the query string and returns results with scores.

        Args:
            query (str): The text being searched.
            k (int, optional): The number of results to return.
            Default is 4.
            param (dict): Specifies the search parameters for the index.
            Default is None.
            expr (str, optional): Filtering expression. Default is None.
            timeout (int, optional): The waiting time before a timeout error.
            Default is None.
            kwargs: Keyword arguments for Collection.search().

        Returns:
            List[float], List[Tuple[Document, any, any]]:
        Nr   )	embeddingr   r   r   r   r7   )r/   rJ   r\   r'   r   &similarity_search_with_score_by_vector)	r5   r   r   r   r   r   r   r   retr7   r7   r8   r     s   


z"Hippo.similarity_search_with_scorer   List[float]c                   sH  | j du rtd g S | jdd }|| j td| j  td|g  td|  td|  td|  | j j| j|g|||d td	   | jd
 }g }	d}
t fdd|D  D ]5}dd t||D }t	|
| j|d}td d |    d | |
 }|
d7 }
|	||f ql|	S )a  
        Performs a search on the query string and returns results with scores.

        Args:
            embedding (List[float]): The embedding vector being searched.
            k (int, optional): The number of results to return.
            Default is 4.
            param (dict): Specifies the search parameters for the index.
            Default is None.
            expr (str, optional): Filtering expression. Default is None.
            timeout (int, optional): The waiting time before a timeout error.
            Default is None.
            kwargs: Keyword arguments for Collection.search().

        Returns:
            List[Tuple[Document, float]]: Resulting documents and scores.
        Nr   zsearch_field:zvectors:zoutput_fields:ztopk:zdsl:)Zsearch_fieldr   output_fieldsZtopkZdslz-[similarity_search_with_score_by_vector] res:z%scoresr   c                   s   g | ]} d  | qS )r   r7   )r>   fieldr   r7   r8   rA   f  r   z@Hippo.similarity_search_with_score_by_vector.<locals>.<listcomp>c                 S  s   i | ]\}}||qS r7   r7   )r>   r   rc   r7   r7   r8   
<dictcomp>g  s    z@Hippo.similarity_search_with_score_by_vector.<locals>.<dictcomp>)Zpage_contentmetadataz;[similarity_search_with_score_by_vector] res[0][score_col]:r   )r/   rJ   r\   r+   r   r*   r   r)   zipr
   popr]   )r5   r   r   r   r   r   r   r   Z	score_colr   countr_   metar   Zscorer7   r   r8   r   .  sB   



z,Hippo.similarity_search_with_score_by_vectorDict[str, Any]Optional[Dict[Any, Any]]search_paramsbool'Hippo'c
              	   K  sd   |du ri }t d | d||||||	d|
}t d|  t d|  |j||d |S )a  
        Creates an instance of the VST class from the given texts.

        Args:
            texts (List[str]): List of texts to be added.
            embedding (Embeddings): Embedding model for the texts.
            metadatas (List[dict], optional):
            List of metadata dictionaries for each text.Defaults to None.
            table_name (str): Name of the table. Defaults to "test".
            database_name (str): Name of the database. Defaults to "default".
            connection_args (dict[str, Any]): Connection parameters.
            Defaults to DEFAULT_HIPPO_CONNECTION.
            index_params (dict): Indexing parameters. Defaults to None.
            search_params (dict): Search parameters. Defaults to an empty dictionary.
            drop_old (bool): Whether to drop the old collection. Defaults to False.
            kwargs: Other arguments.

        Returns:
            Hippo: An instance of the VST class.
        Nz'00 [from_texts] init the class of Hippo)r   r   r   r   r!   r#   z[from_texts] texts:z[from_texts] metadatas:)r   rO   r7   )rJ   rK   r\   r   )clsr   r   rO   r   r   r   r!   r   r#   r   Z	vector_dbr7   r7   r8   
from_textss  s"   #
	zHippo.from_texts)r   r   r   r   NNF)r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   )r   r:   r;   r   )NN)rM   rN   rO   rP   r;   rQ   )N)rM   rU   rO   rP   r;   rQ   )r;   rQ   )r;   r    )NNr   )r   r   rO   rP   r   r   r   r   r   r   r;   r   )r   NNN)r   r   r   r   r   r"   r   r   r   r   r   r   r;   r   )r   r   r   r   r   r"   r   r   r   r   r   r   r;   r   )r   r   r   r   r   r"   r   r   r   r   r   r   r;   r   )r   r   r   r   rO   rP   r   r   r   r   r   r   r!   r   r   r    r#   r   r   r   r;   r   )__name__
__module____qualname____doc__r9   r-   r4   rR   rS   rj   rT   r   r   r   r   classmethodr,   r   r7   r7   r7   r8   r      s\    :
9"
A

w[%*Er   )
__future__r   r1   typingr   r   r   r   r   r   r	   Zlangchain_core.documentsr
   Zlangchain_core.embeddingsr   Zlangchain_core.vectorstoresr   rD   r   r,   	getLoggerr   rJ   r   r7   r7   r7   r8   <module>   s    $
