o
    Zh                     @  s   d Z ddlmZ ddlZddlmZmZmZmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ eeZG dd deeZ dS )zDHypothetical Document Embeddings.

https://arxiv.org/abs/2212.10496
    )annotationsN)AnyDictListOptional)CallbackManagerForChainRun)
Embeddings)BaseLanguageModel)StrOutputParser)BasePromptTemplate)Runnable)
ConfigDict)Chain)
PROMPT_MAP)LLMChainc                   @  s   e Zd ZU dZded< ded< edddZed/ddZed/ddZ	d0ddZ
d1ddZd2ddZ	d3d4d!d"Ze		d5d6d+d,Zed7d-d.ZdS )8HypotheticalDocumentEmbedderzrGenerate hypothetical document for query, and then embed that.

    Based on https://arxiv.org/abs/2212.10496
    r   base_embeddingsr   	llm_chainTZforbid)Zarbitrary_types_allowedextrareturn	List[str]c                 C  s   | j j d S )z Input keys for Hyde's LLM chain.required)r   Zinput_schemaZmodel_json_schemaself r   Q/var/www/html/lang_env/lib/python3.10/site-packages/langchain/chains/hyde/base.py
input_keys(   s   z'HypotheticalDocumentEmbedder.input_keysc                 C  s   t | jtr
| jjS dgS )z!Output keys for Hyde's LLM chain.text)
isinstancer   r   output_keysr   r   r   r   r   -   s   z(HypotheticalDocumentEmbedder.output_keystextsList[List[float]]c                 C  s   | j |S )zCall the base embeddings.)r   embed_documents)r   r    r   r   r   r"   5   s   z,HypotheticalDocumentEmbedder.embed_documents
embeddingsList[float]c                   sh   zddl }t||jddW S  ty3   td |s"g  Y S t|  fddt| D  Y S w )z)Combine embeddings into final embeddings.r   N)Zaxisa*  NumPy not found in the current Python environment. HypotheticalDocumentEmbedder will use a pure Python implementation for internal calculations, which may significantly impact performance, especially for large datasets. For optimal speed and efficiency, consider installing NumPy: pip install numpyc                   s   g | ]}t |  qS r   )sum).0Z
dim_valuesZnum_vectorsr   r   
<listcomp>J   s    zCHypotheticalDocumentEmbedder.combine_embeddings.<locals>.<listcomp>)	numpylistarraymeanImportErrorloggerwarninglenzip)r   r#   npr   r'   r   combine_embeddings9   s   z/HypotheticalDocumentEmbedder.combine_embeddingsr   strc                 C  sR   | j d }| j||i}t| jtr|| jd  g}n|g}| |}| |S )z1Generate a hypothetical document and embedded it.r   )r   r   invoker   r   r   r"   r3   )r   r   var_nameresultZ	documentsr#   r   r   r   embed_queryL   s   


z(HypotheticalDocumentEmbedder.embed_queryNinputsDict[str, Any]run_manager$Optional[CallbackManagerForChainRun]Dict[str, str]c                 C  s$   |pt  }| jj|d| idS )zCall the internal llm chain.	callbacks)config)r   Zget_noop_managerr   r5   Z	get_child)r   r9   r;   Z_run_managerr   r   r   _callW   s   z"HypotheticalDocumentEmbedder._callllmr	   
prompt_keyOptional[str]custom_promptOptional[BasePromptTemplate]kwargsr   c                 K  sb   |dur|}n|dur|t v rt | }ntdtt   d||B t B }| d||d|S )zILoad and use LLMChain with either a specific prompt key or custom prompt.NzHMust specify prompt_key if custom_prompt not provided. Should be one of .)r   r   r   )r   
ValueErrorr*   keysr
   )clsrA   r   rB   rD   rF   promptr   r   r   r   from_llmb   s   


z%HypotheticalDocumentEmbedder.from_llmc                 C  s   dS )NZ
hyde_chainr   r   r   r   r   _chain_typey   s   z(HypotheticalDocumentEmbedder._chain_type)r   r   )r    r   r   r!   )r#   r!   r   r$   )r   r4   r   r$   )N)r9   r:   r;   r<   r   r=   )NN)rA   r	   r   r   rB   rC   rD   rE   rF   r   r   r   )r   r4   )__name__
__module____qualname____doc____annotations__r   Zmodel_configpropertyr   r   r"   r3   r8   r@   classmethodrL   rM   r   r   r   r   r      s.   
 


r   )!rQ   
__future__r   loggingtypingr   r   r   r   Zlangchain_core.callbacksr   Zlangchain_core.embeddingsr   Zlangchain_core.language_modelsr	   Zlangchain_core.output_parsersr
   Zlangchain_core.promptsr   Zlangchain_core.runnablesr   Zpydanticr   Zlangchain.chains.baser   Zlangchain.chains.hyde.promptsr   Zlangchain.chains.llmr   	getLoggerrN   r.   r   r   r   r   r   <module>   s     
