o
    ZhY                     @  s   d dl mZ d dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ e ZdddZG dd deZG dd deZG dd deZdS )    )annotationsN)sha1)Thread)AnyDictIterableListOptionalTuple)Document)
Embeddings)VectorStore)BaseSettingsSettingsConfigDictsstrargsr   returnboolc                 G  s   |D ]	}|| vr dS qdS )z
    Check if a string contains multiple substrings.
    Args:
        s: string to check.
        *args: substrings to check.

    Returns:
        True if all substrings are in the string, False otherwise.
    FT )r   r   ar   r   _/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/vectorstores/myscale.pyhas_mul_sub_str   s
   
r   c                   @  s   e Zd ZU dZdZded< dZded< dZd	ed
< dZd	ed< dZ	ded< dZ
ded< dddddZded< dZded< dZded< dZded< d'd d!Zed"d#d$d%d&ZdS )(MyScaleSettingsa  MyScale client configuration.

    Attribute:
        myscale_host (str) : An URL to connect to MyScale backend.
                             Defaults to 'localhost'.
        myscale_port (int) : URL port to connect with HTTP. Defaults to 8443.
        username (str) : Username to login. Defaults to None.
        password (str) : Password to login. Defaults to None.
        index_type (str): index type string.
        index_param (dict): index build parameter.
        database (str) : Database name to find the table. Defaults to 'default'.
        table (str) : Table name to operate on.
                      Defaults to 'vector_table'.
        metric (str) : Metric to compute distance,
                       supported are ('L2', 'Cosine', 'IP'). Defaults to 'Cosine'.
        column_map (Dict) : Column type map to project column name onto langchain
                            semantics. Must have keys: `text`, `id`, `vector`,
                            must be same size to number of columns. For example:
                            .. code-block:: python

                                {
                                    'id': 'text_id',
                                    'vector': 'text_embedding',
                                    'text': 'text_plain',
                                    'metadata': 'metadata_dictionary_in_json',
                                }

                            Defaults to identity map.

    	localhostr   hosti   intportNOptional[str]usernamepasswordZMSTG
index_typezOptional[Dict[str, str]]index_paramidtextvectormetadata)r#   r$   r%   r&   zDict[str, str]
column_mapdefaultdatabaseZ	langchaintableZCosinemetricitemr   r   c                 C  s
   t | |S N)getattr)selfr,   r   r   r   __getitem__U   s   
zMyScaleSettings.__getitem__z.envutf-8Zmyscale_ignore)Zenv_fileZenv_file_encodingZ
env_prefixextra)r,   r   r   r   )__name__
__module____qualname____doc__r   __annotations__r   r   r    r!   r"   r'   r)   r*   r+   r0   r   Zmodel_configr   r   r   r   r   !   s.   
 

r   c                      s   e Zd ZdZ	dFdG fddZedHddZdIddZdJddZdKddZ				dLdMd$d%Z
e				dNdOd(d)ZdPd*d+Z	dFdQd1d2Z	dRdSd7d8Z	3	dRdTd9d:Z	dRdUd<d=ZdVd>d?Z		dWdXdBdCZedPdDdEZ  ZS )YMyScalea  `MyScale` vector store.

    You need a `clickhouse-connect` python package, and a valid account
    to connect to MyScale.

    MyScale can not only search with simple vector indexes.
    It also supports a complex query with multiple conditions,
    constraints and even sub-queries.

    For more information, please visit
        [myscale official site](https://docs.myscale.com/en/overview/)
    N	embeddingr   configOptional[MyScaleSettings]kwargsr   r   Nonec                   s  zddl m} W n ty   tdw zddlm} || _W n ty-   dd | _Y nw t   |dur;|| _nt | _| jsDJ | jj	rL| jj
sNJ | jjr^| jjr^| jjr^| jjs`J dD ]
}|| jjv slJ qb| jj d	v swJ | jjd
v rtd t|d}| jjrdddd | jj D  nd}d| jj d| jj d| jjd  d| jjd  d| jjd  d| jjd  d| jjd  d| d| jjd  d| jj d| jj d | d!| jjd  d"}	|| _d#| _d$| _|| _| jj d%v rd&nd'| _|d-| jj	| jj
| jj| jjd(|| _z| j d) W n t!yB }
 zt"d*| jj# d+ W Y d}
~
nd}
~
ww | j d, | j |	 dS ).zMyScale Wrapper to LangChain

        embedding (Embeddings):
        config (MyScaleSettings): Configuration to MyScale Client
        Other keyword arguments will pass into
            [clickhouse-connect](https://docs.myscale.com/)
        r   )
get_clientzlCould not import clickhouse connect python package. Please install it with `pip install clickhouse-connect`.)tqdmc                 S  s   | S r-   r   )xr   r   r   <lambda>   s    z"MyScale.__init__.<locals>.<lambda>N)r#   r%   r$   r&   )ZIPCOSINEL2)ipZcosinel2z_Lower case metric types will be deprecated the future. Please use one of ('IP', 'Cosine', 'L2')ztry this out, ,c                 S  s"   g | ]\}}d | d| d qS )'=r   ).0kvr   r   r   
<listcomp>      " z$MyScale.__init__.<locals>.<listcomp> z(
            CREATE TABLE IF NOT EXISTS .z(
                r#   z String,
                r$   r%   z! Array(Float32),
                r&   zP JSON,
                CONSTRAINT cons_vec_len CHECK length(                    z) = z$,
                VECTOR INDEX vidx z                     TYPE z&(                        'metric_type=rI   z,)
            ) ENGINE = MergeTree ORDER BY z	
        \)rR   rI   )rC   rD   ASCZDESC)r   r   r   r    z"SET allow_experimental_json_type=1zClickhouse version=z6 - There is no allow_experimental_json_type parameter.z$SET allow_experimental_object_type=1r   )$Zclickhouse_connectr?   ImportErrorr@   pgbarsuper__init__r;   r   r   r   r'   r)   r*   r+   upperloggerwarninglenembed_queryr"   joinitemsr!   dimBSmust_escape_embeddings
dist_orderr   r    clientcommand	ExceptiondebugZserver_version)r/   r:   r;   r=   r?   r@   rL   r_   Zindex_paramsZschema__	__class__r   r   rW   n   s   


"





	


zMyScale.__init__c                 C  s   | j S r-   )rb   r/   r   r   r   
embeddings   s   zMyScale.embeddingsvaluer   c                   s   d  fdd|D S )NrP   c                 3  s,    | ]}| j v r j | n|V  qd S r-   )ra   r`   )rK   crk   r   r   	<genexpr>   s   * z%MyScale.escape_str.<locals>.<genexpr>)r]   )r/   rm   r   rk   r   
escape_str   s   zMyScale.escape_strtransacr   column_namesIterable[str]c              
     sr   d |}g }|D ]}d  fdd|D }|d| d q	d jj d jj d| dd | d		}|S )
NrH   c                   s"   g | ]}d   t| d qS rI   )rp   r   )rK   Z_nrk   r   r   rN      rO   z'MyScale._build_istr.<locals>.<listcomp>()z8
                INSERT INTO TABLE 
                    rQ   z))
                VALUES
                z
                )r]   appendr;   r)   r*   )r/   rq   rr   ks_datanZi_strr   rk   r   _build_istr   s   
zMyScale._build_istrc                 C  s   |  ||}| j| d S r-   )r{   rd   re   )r/   rq   rr   Z_i_strr   r   r   _insert   s   zMyScale._insert    texts	metadatasOptional[List[dict]]
batch_sizer   idsOptional[Iterable[str]]	List[str]c              
   K  s  |pdd |D }| j j}g }|d ||d ||d t| jj|i}|p*dd |D }ttj|||d < tt|t| dksCJ t	|
  \}	}
z]d	}| jt	|
 d
t|dD ]4}t||	| j jd  | jksnJ || t||kr|r|  t| j||	gd}|  g }qZt|dkr|r|  | ||	 dd |D W S  ty } ztdt| dt| d g W  Y d	}~S d	}~ww )a  Run more texts through the embeddings and add to the vectorstore.

        Args:
            texts: Iterable of strings to add to the vectorstore.
            ids: Optional list of ids to associate with the texts.
            batch_size: Batch size of insertion
            metadata: Optional column data to be inserted

        Returns:
            List of ids from adding the texts into the vectorstore.

        c                 S  s   g | ]}t |d  qS )r1   )r   encode	hexdigest)rK   tr   r   r   rN      s    z%MyScale.add_texts.<locals>.<listcomp>r#   r$   r%   c                 S  s   g | ]}i qS r   r   )rK   rh   r   r   r   rN         r&   r   NzInserting data...)Zdesctotal)targetr   c                 S  s   g | ]}|qS r   r   )rK   ir   r   r   rN     r   	[91m[1m
[0m [95m[0m)r;   r'   maprb   r\   jsondumpsr[   setzipr^   rU   indexr_   rw   r]   r   r|   startrf   rY   errortyper   )r/   r~   r   r   r   r=   Zcolmap_rq   rr   keysvaluesr   rM   er   r   r   	add_texts   sF   
$
 zMyScale.add_textsOptional[List[Dict[Any, Any]]]text_idsc           	      K  s(   | ||fi |}|j ||||d |S )aZ  Create Myscale wrapper with existing texts

        Args:
            texts (Iterable[str]): List or tuple of strings to be added
            embedding (Embeddings): Function to extract text embedding
            config (MyScaleSettings, Optional): Myscale configuration
            text_ids (Optional[Iterable], optional): IDs for the texts.
                                                     Defaults to None.
            batch_size (int, optional): Batchsize when transmitting data to MyScale.
                                        Defaults to 32.
            metadata (List[dict], optional): metadata to texts. Defaults to None.
            Other keyword arguments will pass into
                [clickhouse-connect](https://clickhouse.com/docs/en/integrations/python#clickhouse-connect-driver-api)
        Returns:
            MyScale Index
        )r   r   r   )r   )	clsr~   r:   r   r;   r   r   r=   ctxr   r   r   
from_texts  s   zMyScale.from_textsc                 C  s   d| j j d| j j d}|| j j d| j j d7 }|d| j j d7 }|d7 }| jd	| j j d| j j  D ]}|d
|d dd|d dd7 }q;|d7 }|S )zText representation for myscale, prints backends, username and schemas.
            Easy to use with `str(Myscale())`

        Returns:
            repr: string to show connection info and data schema
        z	[92m[1mrQ   z @ :z[0m

z[1musername: z[0m

Table Schema:
z4---------------------------------------------------
zDESC z|[94mnameZ24sz
[0m|[96mr   z[0m|
)	r;   r)   r*   r   r   r   rd   querynamed_results)r/   _reprrr   r   r   __repr__>  s   zMyScale.__repr__q_embList[float]topk	where_strr   c                 C  s   d tt|}|rd| }nd}d| jjd  d| jjd  d| jj d	| jj d
| d| jjd  d| d| j d| d
}|S )NrH   	PREWHERE rP   
            SELECT r$   z, 
                r&   z, dist
            FROM rQ   
            
            ORDER BY distance(r%   , []) 
                AS dist 
            LIMIT )r]   r   r   r;   r'   r)   r*   rc   r/   r   r   r   Z	q_emb_strq_strr   r   r   _build_qstrR  0   


	zMyScale._build_qstr   r   rL   List[Document]c                 K  s   | j | j|||fi |S )a  Perform a similarity search with MyScale

        Args:
            query (str): query string
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.
            where_str (Optional[str], optional): where condition string.
                                                 Defaults to None.

            NOTE: Please do not let end-user to fill this and always be aware
                  of SQL injection. When dealing with metadatas, remember to
                  use `{self.metadata_column}.attribute` instead of `attribute`
                  alone. The default name for it is `metadata`.

        Returns:
            List[Document]: List of Documents
        )similarity_search_by_vectorrb   r\   )r/   r   rL   r   r=   r   r   r   similarity_searchf  s
   zMyScale.similarity_searchc              
     x     |||}z fdd j| D W S  ty; } ztdt| dt| d g W  Y d}~S d}~ww )  Perform a similarity search with MyScale by vectors

        Args:
            query (str): query string
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.
            where_str (Optional[str], optional): where condition string.
                                                 Defaults to None.

            NOTE: Please do not let end-user to fill this and always be aware
                  of SQL injection. When dealing with metadatas, remember to
                  use `{self.metadata_column}.attribute` instead of `attribute`
                  alone. The default name for it is `metadata`.

        Returns:
            List[Document]: List of (Document, similarity)
        c                   s0   g | ]}t | jjd   | jjd  dqS )r$   r&   Zpage_contentr&   r   r;   r'   rK   r   rk   r   r   rN     s    z7MyScale.similarity_search_by_vector.<locals>.<listcomp>r   r   r   N	r   rd   r   r   rf   rY   r   r   r   r/   r:   rL   r   r=   r   r   r   rk   r   r   }     
 z#MyScale.similarity_search_by_vectorList[Tuple[Document, float]]c              
           j|||}z fdd j| D W S  ty? } ztdt	| dt
| d g W  Y d}~S d}~ww )/  Perform a similarity search with MyScale

        Args:
            query (str): query string
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.
            where_str (Optional[str], optional): where condition string.
                                                 Defaults to None.

            NOTE: Please do not let end-user to fill this and always be aware
                  of SQL injection. When dealing with metadatas, remember to
                  use `{self.metadata_column}.attribute` instead of `attribute`
                  alone. The default name for it is `metadata`.

        Returns:
            List[Document]: List of documents most similar to the query text
            and cosine distance in float for each.
            Lower score represents more similarity.
        c                   s8   g | ]}t | jjd   | jjd  d|d fqS )r$   r&   r   distr   r   rk   r   r   rN     s    zCMyScale.similarity_search_with_relevance_scores.<locals>.<listcomp>r   r   r   Nr   rb   r\   rd   r   r   rf   rY   r   r   r   r/   r   rL   r   r=   r   r   r   rk   r   'similarity_search_with_relevance_scores     

 z/MyScale.similarity_search_with_relevance_scoresc                 C  s$   | j d| jj d| jj  dS )z,
        Helper function: Drop data
        zDROP TABLE IF EXISTS rQ   N)rd   re   r;   r)   r*   rk   r   r   r   drop  s   zMyScale.dropOptional[List[str]]Optional[bool]c           	   
   K  s   |du r|du rJ dg }|r/t |dkr/ddd |D }|| jjd  d| d	 |r6|| t |dks>J d
|}d| jj d| jj d| }z	| j| W dS  t	yu } zt
t| W Y d}~dS d}~ww )a3  Delete by vector ID or other criteria.

        Args:
            ids: List of ids to delete.
            **kwargs: Other keyword arguments that subclasses might use.

        Returns:
            Optional[bool]: True if deletion is successful,
            False otherwise, None if not implemented.
        NzIYou need to specify where to be deleted! Either with `ids` or `where_str`r   rG   c                 S  s   g | ]}d | d qS rt   r   )rK   r#   r   r   r   rN     s    z"MyScale.delete.<locals>.<listcomp>r#   z IN (rv   z AND zDELETE FROM rQ   z WHERE TF)r[   r]   rw   r;   r'   r)   r*   rd   re   rf   rY   r   r   )	r/   r   r   r=   ZcondsZid_listZwhere_str_finalZqstrr   r   r   r   delete  s.   

zMyScale.deletec                 C  s   | j jd S )Nr&   )r;   r'   rk   r   r   r   metadata_column  s   zMyScale.metadata_columnr-   )r:   r   r;   r<   r=   r   r   r>   )r   r   )rm   r   r   r   )rq   r   rr   rs   r   r   )rq   r   rr   rs   r   r>   )Nr}   N)r~   rs   r   r   r   r   r   r   r=   r   r   r   )NNNr}   )r~   rs   r:   r   r   r   r;   r<   r   r   r   r   r=   r   r   r9   r   r   r   r   r   r   r   r   r   r   r   N)
r   r   rL   r   r   r   r=   r   r   r   
r:   r   rL   r   r   r   r=   r   r   r   
r   r   rL   r   r   r   r=   r   r   r   )r   r>   )NN)r   r   r   r   r=   r   r   r   )r4   r5   r6   r7   rW   propertyrl   rp   r{   r|   r   classmethodr   r   r   r   r   r   r   r   r   __classcell__r   r   ri   r   r9   `   sF    `


8
%
%
&r9   c                      sb   e Zd ZdZdg fd# fddZ	d$d%ddZ		d&d'ddZ	d&d(dd Zed)d!d"Z	  Z
S )*MyScaleWithoutJSONzsMyScale vector store without metadata column

    This is super handy if you are working to a SQL-native table
    Nr:   r   r;   r<   must_have_colsr   r=   r   r   r>   c                   s    t  j||fi | || _dS )ag  Building a myscale vector store without metadata column

        embedding (Embeddings): embedding model
        config (MyScaleSettings): Configuration to MyScale Client
        must_have_cols (List[str]): column names to be included in query
        Other keyword arguments will pass into
            [clickhouse-connect](https://docs.myscale.com/)
        N)rV   rW   r   )r/   r:   r;   r   r=   ri   r   r   rW     s   
zMyScaleWithoutJSON.__init__r   r   r   r   r   r   r   c                 C  s   d tt|}|rd| }nd}d| jjd  dd | j d| jj d| jj d	| d
| jjd  d| d| j d| d	}|S )NrH   r   rP   r   r$   z, dist, 
                z
            FROM rQ   r   r   r%   r   r   r   )	r]   r   r   r;   r'   r   r)   r*   rc   r   r   r   r   r     r   zMyScaleWithoutJSON._build_qstrr   rL   r   c              
     r   )r   c                   s4   g | ] t  jjd    fddjD dqS )r$   c                      i | ]}| | qS r   r   rK   rL   r   r   r   
<dictcomp>A      zMMyScaleWithoutJSON.similarity_search_by_vector.<locals>.<listcomp>.<dictcomp>r   r   r;   r'   r   rK   rk   r   r   rN   >  s    zBMyScaleWithoutJSON.similarity_search_by_vector.<locals>.<listcomp>r   r   r   Nr   r   r   rk   r   r   %  r   z.MyScaleWithoutJSON.similarity_search_by_vectorr   r   c              
     r   )r   c                   s<   g | ] t  jjd    fddjD d d fqS )r$   c                   r   r   r   r   r   r   r   r   d  r   zYMyScaleWithoutJSON.similarity_search_with_relevance_scores.<locals>.<listcomp>.<dictcomp>r   r   r   r   rk   r   r   rN   `  s    zNMyScaleWithoutJSON.similarity_search_with_relevance_scores.<locals>.<listcomp>r   r   r   Nr   r   r   rk   r   r   I  r   z:MyScaleWithoutJSON.similarity_search_with_relevance_scoresc                 C  s   dS )NrP   r   rk   r   r   r   r   n  s   z"MyScaleWithoutJSON.metadata_column)
r:   r   r;   r<   r   r   r=   r   r   r>   r-   r   r   r   r   r   )r4   r5   r6   r7   rW   r   r   r   r   r   r   r   r   ri   r   r     s    %%r   )r   r   r   r   r   r   )
__future__r   r   logginghashlibr   	threadingr   typingr   r   r   r   r	   r
   Zlangchain_core.documentsr   Zlangchain_core.embeddingsr   Zlangchain_core.vectorstoresr   Zpydantic_settingsr   r   	getLoggerrY   r   r   r9   r   r   r   r   r   <module>   s$     
?   