o
    Zh|                     @   s   d Z ddlZddlmZmZmZmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZ dd	lmZ d
edefddZ				ddee dedededeee  deee  dee deddfddZG dd deZdS )z7Taken from: https://docs.pinecone.io/docs/hybrid-search    N)AnyDictListOptional)CallbackManagerForRetrieverRun)Document)
Embeddings)BaseRetriever)pre_init)
ConfigDicttextreturnc                 C   s   t t| d S )zhHash a text using SHA256.

    Args:
        text: Text to hash.

    Returns:
        Hashed text.
    zutf-8)strhashlibsha256encode	hexdigest)r    r   l/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/retrievers/pinecone_hybrid_search.py	hash_text   s   	r   contextcontextsindex
embeddingssparse_encoderids	metadatas	namespacetext_keyc              	      s:  d}t dt| |}	zddlm}
 |
|	}	W n	 ty   Y nw |du r+dd | D }|	D ]m}t|| t| }| || }||| }|rL||| ndd |D } fddt||D }||}||}|D ]}d	d |d
 D |d
< qkg }t||||D ]\}}}}|	||||d q|j
||d q-dS )a  Create an index from a list of contexts.

    It modifies the index argument in-place!

    Args:
        contexts: List of contexts to embed.
        index: Index to use.
        embeddings: Embeddings model to use.
        sparse_encoder: Sparse encoder to use.
        ids: List of ids to use for the documents.
        metadatas: List of metadata to use for the documents.
        namespace: Namespace value for index partition.
        r   )tqdmNc                 S      g | ]}t |qS r   )r   ).0r   r   r   r   
<listcomp><       z create_index.<locals>.<listcomp>c                 S   s   g | ]}i qS r   r   )r"   _r   r   r   r#   E   s    c                    s   g | ]
\}} |i|qS r   r   )r"   r   metadatar   r   r   r#   H   s    
c                 S   r!   r   floatr"   s1r   r   r   r#   R   r$   values)idZsparse_valuesr,   r&   )r   )rangelenZ	tqdm.autor    ImportErrorminzipZembed_documentsZencode_documentsappendZupsert)r   r   r   r   r   r   r   r   Z
batch_sizeZ	_iteratorr    iZi_endZcontext_batchZ	batch_idsZmetadata_batchmetaZdense_embedsZsparse_embedssZvectorsZdoc_idsparseZdenser&   r   r'   r   create_index   sH   



r8   c                   @   s   e Zd ZU dZeed< 	 	 dZeed< 	 dZeed< 	 dZ	e
ed< 	 dZeed	< 	 dZee ed
< 	 dZeed< edddZ			ddee deee  deee  d
ee ddf
ddZededefddZdedededee fddZdS )PineconeHybridSearchRetrieverz#`Pinecone Hybrid Search` retriever.r   Nr   r      top_kg      ?alphar   r   r   TZforbid)Zarbitrary_types_allowedextratextsr   r   r   c              
   C   s$   t || j| j| j|||| jd d S )N)r   r   r   r   )r8   r   r   r   r   )selfr>   r   r   r   r   r   r   	add_texts|   s   
z'PineconeHybridSearchRetriever.add_textsr,   c                 C   s6   zddl m} ddlm} W |S  ty   tdw )z?Validate that api key and python package exists in environment.r   hybrid_convex_scale)BaseSparseEncoderzbCould not import pinecone_text python package. Please install it with `pip install pinecone_text`.)pinecone_text.hybridrB   Z(pinecone_text.sparse.base_sparse_encoderrC   r0   )clsr,   rB   rC   r   r   r   validate_environment   s   z2PineconeHybridSearchRetriever.validate_environmentqueryrun_managerkwargsc                K   s   ddl m} | j|}| j|}|||| j\}}dd |d D |d< | jjd||| j	d| j
d|}g }|d D ]%}	|	d	 | j}
|	d	 }d
|vrYd
|	v rY|	d
 |d
< |t|
|d q=|S )Nr   rA   c                 S   r!   r   r(   r*   r   r   r   r#      r$   zIPineconeHybridSearchRetriever._get_relevant_documents.<locals>.<listcomp>r,   T)ZvectorZsparse_vectorr;   Zinclude_metadatar   matchesr&   Zscore)Zpage_contentr&   r   )rD   rB   r   Zencode_queriesr   Zembed_queryr<   r   rG   r;   r   popr   r3   r   )r?   rG   rH   rI   rB   Z
sparse_vecZ	dense_vecresultZfinal_resultresr   r&   r   r   r   _get_relevant_documents   s,   z5PineconeHybridSearchRetriever._get_relevant_documents)NNN)__name__
__module____qualname____doc__r   __annotations__r   r   r   r;   intr<   r)   r   r   r   r   r   Zmodel_configr   dictr@   r
   r   rF   r   r   rN   r   r   r   r   r9   f   sZ   
 


r9   )NNNr   )rR   r   typingr   r   r   r   Zlangchain_core.callbacksr   Zlangchain_core.documentsr   Zlangchain_core.embeddingsr   Zlangchain_core.retrieversr	   Zlangchain_core.utilsr
   Zpydanticr   r   r   rU   r8   r9   r   r   r   r   <module>   sD    

	
L