o
    Zh$                     @  s   d dl mZ d dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
mZmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZ d d	lmZmZ G d
d deZdS )    )annotationsN)Path)AnyDictListOptionalTupleUnion)CallbackManagerForRetrieverRun)Document)BaseRetriever)convert_to_secret_strget_from_dict_or_envpre_init)
ConfigDict	SecretStrc                   @  s   e Zd ZU dZded< 	 dZded< 	 eddZed@dAddZ	e
	d@dBddZe
	d@dCddZedDddZ		dEdFd!d"ZdGd$d%ZdHd*d+ZdId.d/ZdJd2d3ZdKd6d7ZdLd;d<ZdMd>d?ZdS )NNeuralDBRetrieverz0Document retriever that uses ThirdAI's NeuralDB.r   thirdai_keyNr   dbZforbid)extraOptional[str]returnNonec                 C  sJ   zddl m} tjd || ptd W d S  ty$   tdw )Nr   )	licensingzthirdai.neural_dbTHIRDAI_KEYz{Could not import thirdai python package and neuraldb dependencies. Please install it with `pip install thirdai[neural_db]`.)	thirdair   	importlibutil	find_specactivateosgetenvImportError)r   r    r#   f/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/retrievers/thirdai_neuraldb.py_verify_thirdai_library   s   z)NeuralDBRetriever._verify_thirdai_librarymodel_kwargsdictc                 K  s.   t | ddlm} | ||jdi |dS )a  
        Create a NeuralDBRetriever from scratch.

        To use, set the ``THIRDAI_KEY`` environment variable with your ThirdAI
        API key, or pass ``thirdai_key`` as a named parameter.

        Example:
            .. code-block:: python

                from langchain_community.retrievers import NeuralDBRetriever

                retriever = NeuralDBRetriever.from_scratch(
                    thirdai_key="your-thirdai-key",
                )

                retriever.insert([
                    "/path/to/doc.pdf",
                    "/path/to/doc.docx",
                    "/path/to/doc.csv",
                ])

                documents = retriever.invoke("AI-driven music therapy")
        r   	neural_dbr   r   Nr#   )r   r%   r   r)   NeuralDB)clsr   r&   ndbr#   r#   r$   from_scratch*   s   
zNeuralDBRetriever.from_scratch
checkpointUnion[str, Path]c                 C  s*   t | ddlm} | ||j|dS )a!  
        Create a NeuralDBRetriever with a base model from a saved checkpoint

        To use, set the ``THIRDAI_KEY`` environment variable with your ThirdAI
        API key, or pass ``thirdai_key`` as a named parameter.

        Example:
            .. code-block:: python

                from langchain_community.retrievers import NeuralDBRetriever

                retriever = NeuralDBRetriever.from_checkpoint(
                    checkpoint="/path/to/checkpoint.ndb",
                    thirdai_key="your-thirdai-key",
                )

                retriever.insert([
                    "/path/to/doc.pdf",
                    "/path/to/doc.docx",
                    "/path/to/doc.csv",
                ])

                documents = retriever.invoke("AI-driven music therapy")
        r   r(   r*   )r   r%   r   r)   r+   from_checkpoint)r,   r/   r   r-   r#   r#   r$   r1   L   s   
z!NeuralDBRetriever.from_checkpointvaluesr   c                 C  s   t t|dd|d< |S )z'Validate ThirdAI environment variables.r   r   )r   r   )r,   r2   r#   r#   r$   validate_environmentso   s   z'NeuralDBRetriever.validate_environmentsTsources	List[Any]trainbool	fast_modekwargsc                 K  s(   |  |}| jjd|||d| dS )as  Inserts files / document sources into the retriever.

        Args:
            train: When True this means that the underlying model in the
            NeuralDB will undergo unsupervised pretraining on the inserted files.
            Defaults to True.
            fast_mode: Much faster insertion with a slight drop in performance.
            Defaults to True.
        )r4   r6   Zfast_approximationNr#   )_preprocess_sourcesr   insert)selfr4   r6   r8   r9   r#   r#   r$   r;   {   s   

zNeuralDBRetriever.insertlistc                 C  s   ddl m} |s
|S g }|D ]D}t|ts|| q| dr+||| q| dr;||| q| drK||	| qt
d| d|S )zChecks if the provided sources are string paths. If they are, convert
        to NeuralDB document objects.

        Args:
            sources: list of either string paths to PDF, DOCX or CSV files, or
            NeuralDB document objects.
        r   r(   z.pdfz.docxz.csvzCould not automatically load z. Only files with .pdf, .docx, or .csv extensions can be loaded automatically. For other formats, please use the appropriate document object from the ThirdAI library.)r   r)   
isinstancestrappendlowerendswithZPDFZDOCXZCSVRuntimeError)r<   r4   r-   Zpreprocessed_sourcesdocr#   r#   r$   r:      s"   

z%NeuralDBRetriever._preprocess_sourcesqueryr?   document_idintc                 C     | j || dS )a!  The retriever upweights the score of a document for a specific query.
        This is useful for fine-tuning the retriever to user behavior.

        Args:
            query: text to associate with `document_id`
            document_id: id of the document to associate query with.
        N)r   Ztext_to_result)r<   rE   rF   r#   r#   r$   upvote   s   zNeuralDBRetriever.upvotequery_id_pairsList[Tuple[str, int]]c                 C     | j | dS )a  Given a batch of (query, document id) pairs, the retriever upweights
        the scores of the document for the corresponding queries.
        This is useful for fine-tuning the retriever to user behavior.

        Args:
            query_id_pairs: list of (query, document id) pairs. For each pair in
            this list, the model will upweight the document id for the query.
        N)r   Ztext_to_result_batch)r<   rJ   r#   r#   r$   upvote_batch   s   	zNeuralDBRetriever.upvote_batchsourcetargetc                 C  rH   )a=  The retriever associates a source phrase with a target phrase.
        When the retriever sees the source phrase, it will also consider results
        that are relevant to the target phrase.

        Args:
            source: text to associate to `target`.
            target: text to associate `source` to.
        N)r   	associate)r<   rN   rO   r#   r#   r$   rP      s   	zNeuralDBRetriever.associate
text_pairsList[Tuple[str, str]]c                 C  rL   )a.  Given a batch of (source, target) pairs, the retriever associates
        each source phrase with the corresponding target phrase.

        Args:
            text_pairs: list of (source, target) text pairs. For each pair in
            this list, the source will be associated with the target.
        N)r   associate_batch)r<   rQ   r#   r#   r$   rS      s   z!NeuralDBRetriever.associate_batchrun_managerr
   List[Document]c              
   K  s`   zd|vr	d|d< | j jdd|i|}dd |D W S  ty/ } ztd| |d}~ww )	zRetrieve {top_k} contexts with your retriever for a given query

        Args:
            query: Query to submit to the model
            top_k: The max number of context results to retrieve. Defaults to 10.
        Ztop_k
   rE   c                 S  s8   g | ]}t |j|j|j|j|j|j|d ddqS )   )id
upvote_idsrN   metadatascorecontext)Zpage_contentrZ   )r   textrX   rY   rN   rZ   r[   r\   ).0refr#   r#   r$   
<listcomp>   s    z=NeuralDBRetriever._get_relevant_documents.<locals>.<listcomp>z"Error while retrieving documents: Nr#   )r   search	Exception
ValueError)r<   rE   rT   r9   Z
referenceser#   r#   r$   _get_relevant_documents   s   	z)NeuralDBRetriever._get_relevant_documentspathc                 C  rL   )zSaves a NeuralDB instance to disk. Can be loaded into memory by
        calling NeuralDB.from_checkpoint(path)

        Args:
            path: path on disk to save the NeuralDB instance to.
        N)r   save)r<   rf   r#   r#   r$   rg      s   zNeuralDBRetriever.save)N)r   r   r   r   )r   r   r&   r'   r   r   )r/   r0   r   r   r   r   )r2   r   r   r   )TT)
r4   r5   r6   r7   r8   r7   r9   r'   r   r   )r4   r=   r   r=   )rE   r?   rF   rG   r   r   )rJ   rK   r   r   )rN   r?   rO   r?   r   r   )rQ   rR   r   r   )rE   r?   rT   r
   r9   r   r   rU   )rf   r?   r   r   )__name__
__module____qualname____doc____annotations__r   r   Zmodel_configstaticmethodr%   classmethodr.   r1   r   r3   r;   r:   rI   rM   rP   rS   re   rg   r#   r#   r#   r$   r      s:   
 !"

 





r   )
__future__r   r   r    pathlibr   typingr   r   r   r   r   r	   Zlangchain_core.callbacksr
   Zlangchain_core.documentsr   Zlangchain_core.retrieversr   Zlangchain_core.utilsr   r   r   Zpydanticr   r   r   r#   r#   r#   r$   <module>   s     