o
    Zh                     @  s   d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZ d dlmZ er`d d	lmZ d d
lmZ  G dd deZ!dS )    )annotationsN)contextmanager)StringIO)	TYPE_CHECKINGAnyDict	GeneratorIterableListOptionalTupleType)
Embeddings)VectorStore)Document)
connection)cursorc                   @  s  e Zd ZdZG dd deejZG dd dZdddddqddZ	G dd dZ
drddZdrddZ		dsdtd!d"Z	dudvd#d$Zdwd%d&Z	dudxd.d/Zdyd3d4Ze		5	6	7	dzd{d:d;Z		dsd|d@dAZ	7d}d~dCdDZddHdIZ	JdddNdOZ	JdddRdSZ	JdddTdUZ	JdddVdWZ	duddZd[Zdd^d_ZddadbZdrdcddZdrdedfZddidjZ ddkdlZ!ddmdnZ"ddodpZ#dS )YellowbrickzYellowbrick as a vector database.
    Example:
        .. code-block:: python
            from langchain_community.vectorstores import Yellowbrick
            from langchain_community.embeddings.openai import OpenAIEmbeddings
            ...
    c                   @  s   e Zd ZdZdZdZdS )zYellowbrick.IndexTypez<Enumerator for the supported Index types within Yellowbrick.noneZlshN)__name__
__module____qualname____doc__NONELSH r   r   c/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/vectorstores/yellowbrick.py	IndexType*   s    r   c                   @  s,   e Zd ZdZ		ddddZddddZdS )Yellowbrick.IndexParamsz/Parameters for configuring a Yellowbrick index.N
index_type!Optional['Yellowbrick.IndexType']paramsOptional[Dict[str, Any]]c                 C  s$   |d u rt jj}|| _|pi | _d S N)r   r   r   r   r!   )selfr   r!   r   r   r   __init__3   s   z Yellowbrick.IndexParams.__init__keystrdefaultr   returnc                 C  s   | j ||S r#   )r!   get)r$   r&   r(   r   r   r   	get_param=   s   z!Yellowbrick.IndexParams.get_paramNN)r   r    r!   r"   r#   )r&   r'   r(   r   r)   r   )r   r   r   r   r%   r+   r   r   r   r   IndexParams0   s    
r-   NF)schemaloggerdrop	embeddingr   connection_stringr'   tabler.   Optional[str]r/   Optional[logging.Logger]r0   boolr)   Nonec                C  s^  ddl m} |  |r|| _n'tt| _| jtj t	 }|tj
 td}	||	 | j| t|tsD| jd dS d| _d| _d| _|| _t|| j| _t| jj || _|| _|| _d| _|   | j  1}
|r| j!| j| j|
d	 | j!| j| j | j|
d	 | "|
 | #|
 | $|
 W d   dS 1 sw   Y  dS )
zInitialize with yellowbrick client.
        Args:
            embedding: Embedding operator
            connection_string: Format 'postgres://username:password@host:port/database'
            table: Table used to store / retrieve embeddings from
        r   )extrasz)%(asctime)s - %(levelname)s - %(message)sz+embeddings input must be Embeddings object.NZ
_lsh_indexZ_lsh_hyperplane_content)r3   r.   r   )%psycopg2r8   Zregister_uuidr/   logging	getLoggerr   setLevelERRORStreamHandlerDEBUG	FormattersetFormatter
addHandler
isinstancer   errorLSH_INDEX_TABLELSH_HYPERPLANE_TABLECONTENT_TABLEr2   r   DatabaseConnectionr   atexitregisterclose_connection_schema_table
_embeddingZ_max_embedding_len_check_database_utf8
get_cursorr0   _drop_lsh_index_tables_create_schema_create_table)r$   r1   r2   r3   r.   r/   r0   r8   handler	formatterr   r   r   r   r%   @   sJ   





"zYellowbrick.__init__c                      sp   e Zd ZU dZded< dZded< ded< d fddZdddZdddZe	dddZ
e	dddZ  ZS )zYellowbrick.DatabaseConnectionNr'   _connection_stringzOptional['PgConnection']_connectionlogging.Logger_loggerr2   r/   r)    'Yellowbrick.DatabaseConnection'c                   s.   | j d u rt | | _ || j _|| j _| j S r#   )	_instancesuper__new__rW   rZ   )clsr2   r/   	__class__r   r   r^      s
   
z&Yellowbrick.DatabaseConnection.__new__r7   c                 C  s*   | j r| j js| j   d | _ d S d S d S r#   )rX   closedclose)r$   r   r   r   rL      s   

z/Yellowbrick.DatabaseConnection.close_connection'PgConnection'c                 C  s2   dd l }| jr| jjr|| j| _d| j_| jS )Nr   F)r:   rX   rb   connectrW   Z
autocommit)r$   r:   r   r   r   get_connection   s
   z-Yellowbrick.DatabaseConnection.get_connection%Generator['PgConnection', None, None]c              
   c  sh    ddl m} |  }z|V  W n |y- } z|  | jjddd td|d }~ww |  d S )Nr   )DatabaseErrorz2Database error occurred, rolling back transaction.T)exc_infozDatabase transaction failed.)r:   rh   rf   rollbackrZ   rE   RuntimeErrorcommit)r$   rh   conner   r   r   get_managed_connection   s   

z5Yellowbrick.DatabaseConnection.get_managed_connection!Generator['PgCursor', None, None]c              	   c  sV    |   }| }z	|V  W |  n|  w W d    d S 1 s$w   Y  d S r#   )ro   r   rc   )r$   rm   r   r   r   r   rQ      s   
"z)Yellowbrick.DatabaseConnection.get_cursor)r2   r'   r/   rY   r)   r[   r)   r7   )r)   rd   )r)   rg   )r)   rp   )r   r   r   r\   __annotations__rX   r^   rL   rf   r   ro   rQ   __classcell__r   r   r`   r   rI   ~   s   
 
	
	rI   r   
'PgCursor'c                 C  s:   ddl m} | jr||dj|| jd dS dS )z>
        Helper function: create schema if not exists
        r   sqlzE
                    CREATE SCHEMA IF NOT EXISTS {s}
                )sN)r:   rv   rM   executeSQLformat
Identifier)r$   r   rv   r   r   r   rS      s   
zYellowbrick._create_schemac           
      C  s   ddl m} | jr| jfnd}|jg || j| j R  }|| j| j d }||dj||d | jr<| jfnd}|jg || jR  }|jg || j| j R  }|| j| j d }|| j| j d }	||d	j||||	d
 dS )z=
        Helper function: create table if not exists
        r   ru   r   
_pk_doc_ida0  
                CREATE TABLE IF NOT EXISTS {t} (
                doc_id UUID NOT NULL,
                text VARCHAR(60000) NOT NULL,
                metadata VARCHAR(1024) NOT NULL,
                CONSTRAINT {c} PRIMARY KEY (doc_id))
                DISTRIBUTE ON (doc_id) SORT ON (doc_id)
            tcZ_pk_doc_id_embedding_id
_fk_doc_ida  
                CREATE TABLE IF NOT EXISTS {t1} (
                doc_id UUID NOT NULL,
                embedding_id SMALLINT NOT NULL,
                embedding FLOAT NOT NULL,
                CONSTRAINT {c1} PRIMARY KEY (doc_id, embedding_id),
                CONSTRAINT {c2} FOREIGN KEY (doc_id) REFERENCES {t2}(doc_id))
                DISTRIBUTE ON (doc_id) SORT ON (doc_id)
            t1t2c1c2N)	r:   rv   rM   r{   rN   rH   rx   ry   rz   )
r$   r   rv   schema_prefixr~   r   r   r   r   r   r   r   r   rT      s>   	
zYellowbrick._create_tableOptional['PgCursor']c                 C  s\   |du r$| j  }| j|||d W d   dS 1 sw   Y  dS | j|||d dS )z
        Helper function: Drop data. If a cursor is provided, use it;
        otherwise, obtain a new cursor for the operation.
        N)r.   )r   rQ   _drop_table)r$   r3   r.   r   r   r   r   r0      s
   
"zYellowbrick.dropc                 C  sF   ddl m} |r|||}n||}|d|}|| dS )zI
        Executes the drop table command using the given cursor.
        r   ru   z1
        DROP TABLE IF EXISTS {} CASCADE
        N)r:   rv   r{   ry   rz   rx   )r$   r   r3   r.   rv   
table_nameZdrop_table_queryr   r   r   r     s   	
zYellowbrick._drop_tablec                 C  sh   | j  }d}|| | d }W d   n1 sw   Y  | dks.| dkr0dS td)zE
        Helper function: Test the database is UTF-8 encoded
        z
                SELECT pg_encoding_to_char(encoding)
                FROM pg_database
                WHERE datname = current_database();
            r   Nutf8zutf-8TzDatabase encoding is not UTF-8)r   rQ   rx   fetchonelower	Exception)r$   r   queryencodingr   r   r   rP     s   
	z Yellowbrick._check_database_utf8textsIterable[str]	metadatasOptional[List[dict]]kwargsr   	List[str]c              
   K  s  d}t |}| jt |}g }|sdd |D }|dp!t }| j }t }	t }
t	j
|	ddt	jd}t	j
|
ddt	jd}d}t|D ]T\}}tt }|| |||t|| g t|| D ]\}}||||g qj|d	7 }||kr| ||	|
 |	d |	d |
d |
d d}qH|dkr| ||	|
 W d    n1 sw   Y  |jtjjkr| |t| |S )
Ni'  c                 S  s   g | ]}i qS r   r   ).0_r   r   r   
<listcomp>?  s    z)Yellowbrick.add_texts.<locals>.<listcomp>index_params	")	delimiter	quotecharquotingr      )listrO   Zembed_documentsr*   r   r-   r   rQ   r   csvwriterQUOTE_MINIMAL	enumerater'   uuiduuid4appendwriterowjsondumps_copy_to_dbseektruncater   r   r   _update_indexUUID)r$   r   r   r   Z
batch_size
embeddingsresultsr   r   
content_ioembeddings_ioZcontent_writerZembeddings_writerZcurrent_batch_sizeitextZdoc_uuidembedding_idr1   r   r   r   	add_texts3  sN   






"zYellowbrick.add_textsr   r   r   c           	      C  s   | d | d ddlm} | jr| jfnd}|jg || j| j R  }|dj|d}|	|| | jr=| jfnd}|jg || jR  }|dj|d}|	|| d S )Nr   ru   r   z
            COPY {table} (doc_id, text, metadata) FROM 
            STDIN WITH (FORMAT CSV, DELIMITER E'\t', QUOTE '"')
        )r3   z
            COPY {table} (doc_id, embedding_id, embedding) FROM 
            STDIN WITH (FORMAT CSV, DELIMITER E'\t', QUOTE '"')
        )
r   r:   rv   rM   r{   rN   rH   ry   rz   Zcopy_expert)	r$   r   r   r   rv   r   r3   Zcontent_copy_queryZembeddings_copy_queryr   r   r   r   j  s&   

zYellowbrick._copy_to_db 	langchainpublicr_   Type[Yellowbrick]c           
      K  s,   | |||||d}	|	j d||d| |	S )a  Add texts to the vectorstore index.
        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            connection_string: URI to Yellowbrick instance
            embedding: Embedding function
            table: table to store embeddings
            kwargs: vectorstore specific parameters
        )r1   r2   r3   r.   r0   )r   r   Nr   )r   )
r_   r   r1   r   r2   r3   r.   r0   r   Zvssr   r   r   
from_texts  s   zYellowbrick.from_textsidsOptional[List[str]]
delete_allOptional[bool]c                   s|  ddl m  |r d}n%|dur/t fdd|D } d|} dj|d	}ntd
| jr:| jfnd}| j	 s} j
g || j| j R  }	 dj|	|d}
||
  j
g || jR  }	 dj|	|d}
||
 | j|| j| j g|R  r j
g || j| j R  }	 dj|	|d}
||
 W d   dS W d   dS 1 sw   Y  dS )zxDelete vectors by uuids.

        Args:
            ids: List of ids to delete, where each id is a uuid string.
        r   ru   z'
                WHERE 1=1
            Nc                 3  s    | ]}  |V  qd S r#   )Literal)r   idru   r   r   	<genexpr>  s    z%Yellowbrick.delete.<locals>.<genexpr>z, z5
                WHERE doc_id IN ({ids})
            )r   z*Either ids or delete_all must be provided.r   zDELETE FROM {table} {where_sql})r3   	where_sql)r:   rv   ry   tuplejoinrz   
ValueErrorrM   r   rQ   r{   rN   rH   rx   _table_existsrF   )r$   r   r   r   r   ZuuidsZids_formattedr   r   Ztable_identifierr   r   ru   r   delete  sh   








zYellowbrick.deleter   c                 C  sJ   ddl m} ||}||}||dj||d | d dkS )z>
        Checks if a table exists in the given schema
        r   ru   z
                SELECT COUNT(*)
                FROM sys.table t INNER JOIN sys.schema s ON t.schema_id = s.schema_id
                WHERE s.name = {schema} AND t.name = {table_name}
            )r.   r   )r:   rv   r   rx   ry   rz   r   )r$   r   r   r.   rv   r   r   r   r     s   

zYellowbrick._table_existsvectorList[float]	uuid.UUIDc                 C  sF   dd l }dtt|}|| }| }tj|d d d}|S )Nr   ,   )bytes)	hashlibr   mapr'   sha1encodedigestr   r   )r$   r   r   Z
vector_strZhash_objectZhash_digestZvector_uuidr   r   r   _generate_vector_uuid  s   z!Yellowbrick._generate_vector_uuid   kintList[Tuple[Document, float]]c                   s   ddl m} ddlm} |dpt }| j }d| j	 }| 
| |d||}	||	  fddt|D }
|d	||}||||
 ||}| jr]| jfnd
}|jg || j	R  }|jg || j	| j R  }|jtjjkr| j	d }| ||| | jr| jfnd
}|jg || j	| j R  }||}|dj|||||||ddd}|||f | }n|dj|||d}|||f | }W d   n1 sw   Y  g }|D ]}t|d pi }t|d |d}|||d f q|S )a  Perform a similarity search with Yellowbrick with vector

        Args:
            embedding (List[float]): query embedding
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.

            NOTE: Please do not let end-user fill this and always be aware
                  of SQL injection.

        Returns:
            List[Document, float]: List of Documents and scores
        r   ru   )execute_valuesr   Ztmp_z 
                CREATE TEMPORARY TABLE {} (
                doc_id UUID,
                embedding_id SMALLINT,
                embedding FLOAT)
                ON COMMIT DROP
                DISTRIBUTE REPLICATE
            c                   s   g | ]\}}t  ||fqS r   )r'   )r   r   Zembedding_valueZ
tmp_doc_idr   r   r   %  s    zFYellowbrick.similarity_search_with_score_by_vector.<locals>.<listcomp>z:INSERT INTO {} (doc_id, embedding_id, embedding) VALUES %sr   Z	_tmp_hasha/  
                    WITH index_docs AS (
                    SELECT
                        t1.doc_id,
                        SUM(ABS(t1.hash-t2.hash)) as hamming_distance
                    FROM
                        {lsh_index} t1
                    INNER JOIN
                        {input_hash_table} t2
                    ON t1.hash_index = t2.hash_index
                    GROUP BY t1.doc_id
                    HAVING hamming_distance <= {hamming_distance}
                    )
                    SELECT
                        text,
                        metadata,
                       SUM(v1.embedding * v2.embedding) /
                        (SQRT(SUM(v1.embedding * v1.embedding)) *
                       SQRT(SUM(v2.embedding * v2.embedding))) AS score
                    FROM
                        {v1} v1
                    INNER JOIN
                        {v2} v2
                    ON v1.embedding_id = v2.embedding_id
                    INNER JOIN
                        {v3} v3
                    ON v2.doc_id = v3.doc_id
                    INNER JOIN
                        index_docs v4
                    ON v2.doc_id = v4.doc_id
                    GROUP BY v3.doc_id, v3.text, v3.metadata
                    ORDER BY score DESC
                    LIMIT %s
                hamming_distance)v1v2v3	lsh_indexinput_hash_tabler   a  
                    SELECT 
                        text,
                        metadata,
                        score
                    FROM
                        (SELECT
                            v2.doc_id doc_id,
                            SUM(v1.embedding * v2.embedding) /
                            (SQRT(SUM(v1.embedding * v1.embedding)) *
                            SQRT(SUM(v2.embedding * v2.embedding))) AS score
                        FROM
                            {v1} v1
                        INNER JOIN
                            {v2} v2
                        ON v1.embedding_id = v2.embedding_id
                        GROUP BY v2.doc_id
                        ORDER BY score DESC LIMIT %s
                        ) v4
                    INNER JOIN
                        {v3} v3
                    ON v4.doc_id = v3.doc_id
                    ORDER BY score DESC
                )r   r   r   Nr   )Zpage_contentmetadata   )r:   rv   Zpsycopg2.extrasr   r*   r   r-   r   rQ   rN   r   ry   rz   r{   rx   r   rM   rH   r   r   r   _generate_tmp_lsh_hashesrF   r   r+   Zfetchallr   loadsr   r   )r$   r1   r   r   rv   r   r   r   Ztmp_embeddings_tableZcreate_table_queryZ
data_inputinsert_queryr   r   r   r   tmp_hash_tabler   r   Z	sql_queryr   	documentsresultr   docr   r   r   &similarity_search_with_score_by_vector  s   

	






#
-
|z2Yellowbrick.similarity_search_with_score_by_vectorr   List[Document]c                 K  s0   | j |}| jd||d|}dd |D S )ae  Perform a similarity search with Yellowbrick

        Args:
            query (str): query string
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.

            NOTE: Please do not let end-user fill this and always be aware
                  of SQL injection.

        Returns:
            List[Document]: List of Documents
        r1   r   c                 S     g | ]\}}|qS r   r   r   r   r   r   r   r   r         z1Yellowbrick.similarity_search.<locals>.<listcomp>Nr   rO   Zembed_queryr   r$   r   r   r   r1   r   r   r   r   similarity_search  s   zYellowbrick.similarity_searchc                 K  s&   | j |}| jd||d|}|S )ar  Perform a similarity search with Yellowbrick

        Args:
            query (str): query string
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.

            NOTE: Please do not let end-user fill this and always be aware
                  of SQL injection.

        Returns:
            List[Document]: List of (Document, similarity)
        r   Nr   r   r   r   r   r   similarity_search_with_score  s   z(Yellowbrick.similarity_search_with_scorec                 K  s$   | j d||d|}dd |D S )a  Perform a similarity search with Yellowbrick by vectors

        Args:
            embedding (List[float]): query embedding
            k (int, optional): Top K neighbors to retrieve. Defaults to 4.

            NOTE: Please do not let end-user fill this and always be aware
                  of SQL injection.

        Returns:
            List[Document]: List of documents
        r   c                 S  r   r   r   r   r   r   r   r     r   z;Yellowbrick.similarity_search_by_vector.<locals>.<listcomp>Nr   )r   )r$   r1   r   r   r   r   r   r   similarity_search_by_vector  s   z'Yellowbrick.similarity_search_by_vectordoc_idOptional[uuid.UUID]c                 C  s   ddl m} | jr| jfnd}|jg || j| j R  }|jg || j| j R  }|jg || jR  }|d|}|rN|dj|	t
|dn|d}	|d}
|d	j||||	|
d
}|| dS )zAdd hashes to LSH indexr   ru   r   zINSERT INTO {}zWHERE e.doc_id = {doc_id})r   r   zGROUP BY 1, 2av  
            {query_prefix}
            SELECT
                e.doc_id as doc_id,
                h.id as hash_index,
                CASE WHEN SUM(e.embedding * h.hyperplane) > 0 THEN 1 ELSE 0 END as hash
            FROM {embedding_table} e
            INNER JOIN {hyperplanes} h ON e.embedding_id = h.hyperplane_id
            {condition}
            {group_by}
        )query_prefixembedding_tablehyperplanes	conditiongroup_byN)r:   rv   rM   r{   rN   rG   rF   ry   rz   r   r'   rx   )r$   r   r   rv   r   lsh_hyperplane_tableZlsh_index_table_idZembedding_table_idZquery_prefix_idr   r   input_queryr   r   r   _update_lsh_hashes  s<   


zYellowbrick._update_lsh_hashestmp_embedding_tabler   c                 C  s   ddl m} | jr| jfnd}|jg || j| j R  }||}||}|d|}	|d}
|dj|	|||
d}|| dS )	zGenerate temp LSHr   ru   r   z+CREATE TEMPORARY TABLE {} ON COMMIT DROP ASz
GROUP BY 1a[  
            {query_prefix}
            SELECT
                h.id as hash_index,
                CASE WHEN SUM(e.embedding * h.hyperplane) > 0 THEN 1 ELSE 0 END as hash
            FROM {embedding_table} e
            INNER JOIN {hyperplanes} h ON e.embedding_id = h.hyperplane_id
            {group_by}
            DISTRIBUTE REPLICATE
        )r   r   r   r   N)	r:   rv   rM   r{   rN   rG   ry   rz   rx   )r$   r   r   r   rv   r   r   Ztmp_embedding_table_idZtmp_hash_table_idr   r   r   r   r   r   r     s.   




z$Yellowbrick._generate_tmp_lsh_hashesnum_hyperplanesc           	      C  s   ddl m} | jr| jfnd}|jg || j| j R  }||dj|d |	 d dkr3dS |jg || jR  }||dj|d |	 d }|d7 }|d	j|
||
||d
}|| dS )z4Generate random hyperplanes and store in Yellowbrickr   ru   r   zSELECT COUNT(*) FROM {t})r~   Nz!SELECT MAX(embedding_id) FROM {t}r   a2  
            WITH parameters AS (
                SELECT {num_hyperplanes} AS num_hyperplanes,
                    {dims_per_hyperplane} AS dims_per_hyperplane
            )
            INSERT INTO {hyperplanes_table} (id, hyperplane_id, hyperplane)
                SELECT id, hyperplane_id, (random() * 2 - 1) AS hyperplane
                FROM
                (SELECT range-1 id FROM sys.rowgenerator
                    WHERE range BETWEEN 1 AND
                    (SELECT num_hyperplanes FROM parameters) AND
                    worker_lid = 0 AND thread_id = 0) a,
                (SELECT range-1 hyperplane_id FROM sys.rowgenerator
                    WHERE range BETWEEN 1 AND
                    (SELECT dims_per_hyperplane FROM parameters) AND
                    worker_lid = 0 AND thread_id = 0) b
        )r  Zdims_per_hyperplanehyperplanes_table)r:   rv   rM   r{   rN   rG   rx   ry   rz   r   r   )	r$   r   r  rv   r   r  r~   Znum_dimensionsr   r   r   r   _populate_hyperplanes*  s.   
z!Yellowbrick._populate_hyperplanesc           
      C  s  ddl m} | jr| jfnd}|jg || j| j R  }|jg || j| j R  }|| j| j d }|| j| j d }||dj	||||d | jrW| jfnd}|jg || j| j
 R  }|| j| j
 d }	||d	j	||	d
 dS )z&Create LSH index and hyperplane tablesr   ru   r   r|   r   a  
                CREATE TABLE IF NOT EXISTS {t1} (
                doc_id UUID NOT NULL,
                hash_index SMALLINT NOT NULL,
                hash SMALLINT NOT NULL,
                CONSTRAINT {c1} PRIMARY KEY (doc_id, hash_index),
                CONSTRAINT {c2} FOREIGN KEY (doc_id) REFERENCES {t2}(doc_id))
                DISTRIBUTE ON (doc_id) SORT ON (doc_id)
            r   Z_pk_id_hp_ida2  
                CREATE TABLE IF NOT EXISTS {t} (
                id SMALLINT NOT NULL,
                hyperplane_id SMALLINT NOT NULL,
                hyperplane FLOAT NOT NULL,
                CONSTRAINT {c} PRIMARY KEY (id, hyperplane_id))
                DISTRIBUTE REPLICATE SORT ON (id)
            r}   N)r:   rv   rM   r{   rN   rF   rH   rx   ry   rz   rG   )
r$   r   rv   r   r   r   r   r   r~   r   r   r   r   _create_lsh_index_tablesT  s:   
	z$Yellowbrick._create_lsh_index_tablesc                 C  s8   | j | j| j| j |d | j | j| j| j |d dS )zDrop LSH index tables)r.   r3   r   N)r0   rM   rN   rF   rG   )r$   r   r   r   r   rR     s   

z"Yellowbrick._drop_lsh_index_tablesr   r   c                 C  st   |j tjjkr8| j "}| | | | | ||	dd | 
| W d   dS 1 s1w   Y  dS dS )z"Create index from existing vectorsr     N)r   r   r   r   r   rQ   rR   r  r  r+   r   r$   r   r   r   r   r   create_index  s   

"zYellowbrick.create_indexc                 C  sL   |j tjjkr$| j }| | W d   dS 1 sw   Y  dS dS )zDrop an indexN)r   r   r   r   r   rQ   rR   r  r   r   r   
drop_index  s
   "zYellowbrick.drop_indexc                 C  sN   |j tjjkr%| j }| || W d   dS 1 sw   Y  dS dS )zHUpdate an index with a new or modified embedding in the embeddings tableN)r   r   r   r   r   rQ   r   )r$   r   r   r   r   r   r   r     s
   "zYellowbrick._update_indexc              
   C  s,  ddl m} z{| j k}| jr| jfnd}|jg || jR  }|jg || jd R  }|jg || j| j R  }|dj	||d}|
| | | |dj	||d}|
| |dj	||d}	|
|	 W d    W d S 1 szw   Y  W d S  ty }
 ztd	|
 |
d }
~
ww )
Nr   ru   r   Z_v1zALTER TABLE {t1} RENAME TO {t2})r   r   z
                    INSERT INTO {t1} (doc_id, embedding_id, embedding) 
                    SELECT id, embedding_id, embedding FROM {t2}
                z
                    INSERT INTO {t1} (doc_id, text, metadata) 
                    SELECT DISTINCT id, text, metadata FROM {t2}
                zFailed to migrate schema: )r:   rv   r   rQ   rM   r{   rN   rH   ry   rz   rx   rT   r   rk   )r$   rv   r   r   r   Zold_embeddingscontentZalter_table_queryr   Zinsert_content_queryrn   r   r   r   migrate_schema_v1_to_v2  sH   




	&!z#Yellowbrick.migrate_schema_v1_to_v2)r1   r   r2   r'   r3   r'   r.   r4   r/   r5   r0   r6   r)   r7   )r   rt   r)   r7   r,   )r3   r'   r.   r4   r   r   r)   r7   r#   )r   rt   r3   r'   r.   r4   r)   r7   )r)   r6   )r   r   r   r   r   r   r)   r   )r   rt   r   r   r   r   r)   r7   )Nr   r   r   F)r_   r   r   r   r1   r   r   r   r2   r'   r3   r'   r.   r'   r0   r6   r   r   r)   r   )r   r   r   r   r   r   r)   r7   )r   )r   rt   r   r'   r.   r'   r)   r6   )r   r   r)   r   )r   )r1   r   r   r   r   r   r)   r   )r   r'   r   r   r   r   r)   r   )r   r'   r   r   r   r   r)   r   )r1   r   r   r   r   r   r)   r   )r   rt   r   r   r)   r7   )r   rt   r   r'   r   r'   r)   r7   )r   rt   r  r   r)   r7   )r   r   r)   r7   )r   r   r   r   r)   r7   rq   )$r   r   r   r   r'   enumEnumr   r-   r%   rI   rS   rT   r0   r   rP   r   r   classmethodr   r   r   r   r   r   r   r   r   r   r  r  rR   r  r  r   r
  r   r   r   r   r   !   sh    >
6
6

7 >

 
-
$
*
/


r   )"
__future__r   rJ   r   r  r   r;   r   
contextlibr   ior   typingr   r   r   r   r	   r
   r   r   r   Zlangchain_core.embeddingsr   Zlangchain_core.vectorstoresr   Z%langchain_community.docstore.documentr   Zpsycopg2.extensionsr   ZPgConnectionr   ZPgCursorr   r   r   r   r   <module>   s"    ,