o
    Zh=0                     @   s*  U d Z ddlZddlZddlZddlmZmZ ddlmZm	Z	m
Z
mZmZmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ dZdZG dd deZG dd deZG dd deZ G dd deZ!ee e!dZ"e	e#ee f e$d< G dd de%Z&G dd deZ'dS )zWrapper around scikit-learn NearestNeighbors implementation.

The vector store can be persisted in json, bson or parquet format.
    N)ABCabstractmethod)AnyDictIterableListLiteralOptionalTupleType)uuid4)Document)
Embeddings)guard_import)VectorStore)maximal_marginal_relevance      c                   @   s`   e Zd ZdZdeddfddZeedefddZed	e	ddfd
dZ
ede	fddZdS )BaseSerializerz Base class for serializing data.persist_pathreturnNc                 C   s
   || _ d S Nr   selfr    r   _/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/vectorstores/sklearn.py__init__   s   
zBaseSerializer.__init__c                 C      dS )z>The file extension suggested by this serializer (without dot).Nr   clsr   r   r   	extension       zBaseSerializer.extensiondatac                 C   r   )z"Saves the data to the persist_pathNr   r   r#   r   r   r   save#   r"   zBaseSerializer.savec                 C   r   )z$Loads the data from the persist_pathNr   r   r   r   r   load'   r"   zBaseSerializer.load)__name__
__module____qualname____doc__strr   classmethodr   r!   r   r%   r'   r   r   r   r   r      s    r   c                   @   sB   e Zd ZdZedefddZdeddfddZdefd	d
Z	dS )JsonSerializerzKSerialize data in JSON using the json package from python standard library.r   c                 C   r   )Njsonr   r   r   r   r   r!   /      zJsonSerializer.extensionr#   Nc                 C   s>   t | jd}t|| W d    d S 1 sw   Y  d S )Nw)openr   r/   dumpr   r#   fpr   r   r   r%   3   s   "zJsonSerializer.savec                 C   s:   t | jd}t|W  d    S 1 sw   Y  d S )Nr)r2   r   r/   r'   r   r5   r   r   r   r'   7   s   $zJsonSerializer.load)
r(   r)   r*   r+   r-   r,   r!   r   r%   r'   r   r   r   r   r.   ,   s    r.   c                       \   e Zd ZdZdeddf fddZedefddZd	eddfd
dZ	defddZ
  ZS )BsonSerializerz>Serialize data in Binary JSON using the `bson` python package.r   r   Nc                    s   t  | td| _d S Nbson)superr   r   r;   r   	__class__r   r   r   ?   s   zBsonSerializer.__init__c                 C   r   r:   r   r   r   r   r   r!   C   r0   zBsonSerializer.extensionr#   c                 C   sD   t | jd}|| j| W d    d S 1 sw   Y  d S )Nwb)r2   r   writer;   dumpsr4   r   r   r   r%   G   s   "zBsonSerializer.savec                 C   s@   t | jd}| j| W  d    S 1 sw   Y  d S )Nrb)r2   r   r;   loadsreadr7   r   r   r   r'   K   s   $zBsonSerializer.loadr(   r)   r*   r+   r,   r   r-   r!   r   r%   r'   __classcell__r   r   r=   r   r9   <   s    r9   c                       r8   )ParquetSerializerzFSerialize data in `Apache Parquet` format using the `pyarrow` package.r   r   Nc                    s.   t  | td| _td| _td| _d S )NZpandasZpyarrowzpyarrow.parquet)r<   r   r   pdpapqr   r=   r   r   r   S   s   

zParquetSerializer.__init__c                 C   r   )Nparquetr   r   r   r   r   r!   Y   r0   zParquetSerializer.extensionr#   c              
   C   s   | j |}| jj|}tj| jrIt	| jd }t
| j| z
| j|| j W n tyA } z	t
|| j |d }~ww t| d S | j|| j d S )Nz-backup)rH   Z	DataFramerI   TableZfrom_pandasospathexistsr   r,   renamerJ   Zwrite_table	Exceptionremove)r   r#   dftableZbackup_pathexcr   r   r   r%   ]   s   zParquetSerializer.savec                 C   s(   | j | j}| }dd | D S )Nc                 S   s   i | ]	\}}||  qS r   )tolist).0colZseriesr   r   r   
<dictcomp>p       z*ParquetSerializer.load.<locals>.<dictcomp>)rJ   Z
read_tabler   Z	to_pandasitems)r   rT   rS   r   r   r   r'   m   s   zParquetSerializer.loadrE   r   r   r=   r   rG   P   s    rG   r/   r;   rK   SERIALIZER_MAPc                   @   s   e Zd ZdZdS )SKLearnVectorStoreExceptionz'Exception raised by SKLearnVectorStore.N)r(   r)   r*   r+   r   r   r   r   r^   z   s    r^   c                   @   s  e Zd ZdZdddddedee ded	 d
ededdfddZ	e
defddZd1ddZd1ddZ		d2dee deee  deee  dedee f
ddZd1ddZeddee dededeeeef  fdd Zedd!edededeeeef  fd"d#Zefd!edededee fd$d%Zefd!edededeeeef  fd&d'Zeed(fdee ded)ed*ededee fd+d,Zeed(fd!eded)ed*ededee fd-d.Ze			d3dee dedeee  deee  dee dedd fd/d0Z dS )4SKLearnVectorStorezYSimple in-memory vector store based on the `scikit-learn` library
    `NearestNeighbors`.Nr/   Zcosine)r   
serializermetric	embeddingr   r`   r\   ra   kwargsr   c          	      K   s   t d}t ddd}|| _|jdd|i|| _d| _|| _|| _d | _| jd ur4t| }|| jd| _g | _	g | _
g | _g | _|g | _| jd urXtj| jrZ|   d S d S d S )	Nnumpyzsklearn.neighborszscikit-learn)Zpip_namera   Fr   r   )r   _npZNearestNeighbors
_neighbors_neighbors_fitted_embedding_functionZ_persist_path_serializerr]   _embeddings_texts
_metadatas_idsasarray_embeddings_nprM   rN   isfile_load)	r   rb   r   r`   ra   rc   npZsklearn_neighborsZserializer_clsr   r   r   r      s&   	
zSKLearnVectorStore.__init__c                 C   s   | j S r   )rh   r&   r   r   r   
embeddings   s   zSKLearnVectorStore.embeddingsc                 C   s8   | j d u r	td| j| j| j| jd}| j | d S )NzFYou must specify a persist_path on creation to persist the collection.)idstexts	metadatasrs   )ri   r^   rm   rk   rl   rj   r%   r$   r   r   r   persist   s   
zSKLearnVectorStore.persistc                 C   sP   | j d u r	td| j  }|d | _|d | _|d | _|d | _|   d S )NzCYou must specify a persist_path on creation to load the collection.rs   ru   rv   rt   )ri   r^   r'   rj   rk   rl   rm   _update_neighborsr$   r   r   r   rq      s   





zSKLearnVectorStore._loadru   rv   rt   c                 K   sl   t |}|pdd |D }| j| | j| j| | j|p(i gt|  | j| | 	  |S )Nc                 S   s   g | ]}t t qS r   )r,   r   )rW   _r   r   r   
<listcomp>   s    z0SKLearnVectorStore.add_texts.<locals>.<listcomp>)
listrk   extendrj   rh   Zembed_documentsrl   lenrm   rx   )r   ru   rv   rt   rc   rk   rm   r   r   r   	add_texts   s   zSKLearnVectorStore.add_textsc                 C   s>   t | jdkrtd| j| j| _| j| j d| _d S )Nr   (No data was added to SKLearnVectorStore.T)	r}   rj   r^   re   rn   ro   rf   fitrg   r&   r   r   r   rx      s   
z$SKLearnVectorStore._update_neighbors)kquery_embeddingr   c                K   s:   | j std| jj|g|d\}}tt|d |d S )zgSearch k embeddings similar to the query embedding. Returns a list of
        (index, distance) tuples.r   )Zn_neighborsr   )rg   r^   rf   Z
kneighborsr{   zip)r   r   r   rc   Zneigh_distsZ
neigh_idxsr   r   r   #_similarity_index_search_with_score   s   
z6SKLearnVectorStore._similarity_index_search_with_scorequeryc                   s4    j |} j|fd|i|} fdd|D S )Nr   c                    s:   g | ]\}}t  j| d  j| i j| d|fqS id)Zpage_contentmetadatar   rk   rm   rl   )rW   idxdistr&   r   r   rz      s    zCSKLearnVectorStore.similarity_search_with_score.<locals>.<listcomp>)rh   embed_queryr   )r   r   r   rc   r   indices_distsr   r&   r   similarity_search_with_score   s   
z/SKLearnVectorStore.similarity_search_with_scorec                 K   s$   | j |fd|i|}dd |D S )Nr   c                 S   s   g | ]\}}|qS r   r   )rW   docry   r   r   r   rz         z8SKLearnVectorStore.similarity_search.<locals>.<listcomp>)r   )r   r   r   rc   Zdocs_scoresr   r   r   similarity_search   s   z$SKLearnVectorStore.similarity_searchc                 K   sB   | j |fd|i|}t| \}}dd |D }ttt||S )Nr   c                 S   s   g | ]	}d t | qS )   )mathexp)rW   r   r   r   r   rz     rZ   zOSKLearnVectorStore._similarity_search_with_relevance_scores.<locals>.<listcomp>)r   r   r{   )r   r   r   rc   Z
docs_distsdocsdistsZscoresr   r   r   (_similarity_search_with_relevance_scores  s   z;SKLearnVectorStore._similarity_search_with_relevance_scoresg      ?fetch_klambda_multc                    sr   j |fd|i|}t| \ }j f }tjj|jjd|||d}	 fdd|	D }
fdd|
D S )a  Return docs selected using the maximal marginal relevance.
        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.
        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents selected by maximal marginal relevance.
        r   )Zdtype)r   r   c                    s   g | ]} | qS r   r   )rW   i)indicesr   r   rz   ,  r   zNSKLearnVectorStore.max_marginal_relevance_search_by_vector.<locals>.<listcomp>c                    s2   g | ]}t  j| d  j| i j| dqS r   r   )rW   r   r&   r   r   rz   -  s    )r   r   ro   r   re   arrayZfloat32)r   rb   r   r   r   rc   r   ry   Zresult_embeddingsZmmr_selectedZmmr_indicesr   )r   r   r   'max_marginal_relevance_search_by_vector  s&   
z:SKLearnVectorStore.max_marginal_relevance_search_by_vectorc                 K   s4   | j du r	td| j |}| j||||d}|S )a  Return docs selected using the maximal marginal relevance.
        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.
        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
        Returns:
            List of Documents selected by maximal marginal relevance.
        NzCFor MMR search, you must specify an embedding function on creation.)Z
lambda_mul)rh   
ValueErrorr   r   )r   r   r   r   r   rc   rb   r   r   r   r   max_marginal_relevance_search5  s   
z0SKLearnVectorStore.max_marginal_relevance_searchc                 K   s(   t |fd|i|}|j|||d |S )Nr   )rv   rt   )r_   r~   )r    ru   rb   rv   rt   r   rc   vsr   r   r   
from_textsV  s   
zSKLearnVectorStore.from_texts)r   N)NN)NNN)!r(   r)   r*   r+   r   r	   r,   r   r   r   propertyrs   rw   rq   r   r   dictr~   rx   	DEFAULT_Kfloatintr
   r   r   r   r   r   DEFAULT_FETCH_Kr   r   r-   r   r   r   r   r   r_      s    
#











-
!

r_   )(r+   r/   r   rM   abcr   r   typingr   r   r   r   r   r	   r
   r   uuidr   Zlangchain_core.documentsr   Zlangchain_core.embeddingsr   Zlangchain_core.utilsr   Zlangchain_core.vectorstoresr   Z&langchain_community.vectorstores.utilsr   r   r   r   r.   r9   rG   r]   r,   __annotations__RuntimeErrorr^   r_   r   r   r   r   <module>   s0    ($