o
    Zh J                     @   s2  d Z ddlZddlZddlmZ ddlmZ ddlmZm	Z	m
Z
mZ ddlmZmZmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZ ddlmZ defddZee Z!ej"ddde#fddZ$defddZ%G dd de&eZ'G dd deZ(G dd de(eZ)G dd de(eZ*dS )z@A chain for comparing the output of two models using embeddings.    N)Enum)util)AnyDictListOptional)AsyncCallbackManagerForChainRunCallbackManagerForChainRun	Callbacks)
Embeddings)pre_init)
ConfigDictField)Chain)PairwiseStringEvaluatorStringEvaluatorRUN_KEYreturnc               
   C   s2   zdd l } | W S  ty } ztd|d }~ww )Nr   z@Could not import numpy, please install with `pip install numpy`.)numpyImportError)npe r   c/var/www/html/lang_env/lib/python3.10/site-packages/langchain/evaluation/embedding_distance/base.py_import_numpy   s   r      )maxsizec                   C   s    t tdr	dS td dS )Nr   Ta  NumPy not found in the current Python environment. langchain will use a pure Python implementation for embedding distance operations, which may significantly impact performance, especially for large datasets. For optimal speed and efficiency, consider installing NumPy: pip install numpyF)boolr   	find_specloggerwarningr   r   r   r   _check_numpy%   s   r"   c                  C   sR   z
ddl m}  W |  S  ty(   zddlm}  W Y |  S  ty'   tdw w )zaCreate an Embeddings object.
    Returns:
        Embeddings: The created Embeddings object.
    r   OpenAIEmbeddingstCould not import OpenAIEmbeddings. Please install the OpenAIEmbeddings package using `pip install langchain-openai`.)langchain_openair$   r   %langchain_community.embeddings.openair#   r   r   r   _embedding_factory3   s   r(   c                   @   s$   e Zd ZdZdZdZdZdZdZdS )EmbeddingDistancea  Embedding Distance Metric.

    Attributes:
        COSINE: Cosine distance metric.
        EUCLIDEAN: Euclidean distance metric.
        MANHATTAN: Manhattan distance metric.
        CHEBYSHEV: Chebyshev distance metric.
        HAMMING: Hamming distance metric.
    ZcosineZ	euclideanZ	manhattanZ	chebyshevZhammingN)	__name__
__module____qualname____doc__COSINE	EUCLIDEAN	MANHATTAN	CHEBYSHEVHAMMINGr   r   r   r   r)   J   s    
r)   c                   @   s6  e Zd ZU dZeedZeed< ee	j
dZe	ed< edeeef deeef fdd	Zed
dZedee fddZdedefddZde	defddZedededefddZedededefddZedededefddZedededefddZedededefddZd edefd!d"Z d#S )$_EmbeddingDistanceChainMixina0  Shared functionality for embedding distance evaluators.

    Attributes:
        embeddings (Embeddings): The embedding objects to vectorize the outputs.
        distance_metric (EmbeddingDistance): The distance metric to use
                                            for comparing the embeddings.
    )default_factory
embeddings)defaultdistance_metricvaluesr   c                 C   s   | d}g }zddlm} || W n	 ty   Y nw zddlm} || W n	 ty4   Y nw |s;tdt|t|rUzddl}W |S  tyT   tdw |S )zValidate that the TikTok library is installed.

        Args:
            values (Dict[str, Any]): The values to validate.

        Returns:
            Dict[str, Any]: The validated values.
        r5   r   r#   r%   NzThe tiktoken library is required to use the default OpenAI embeddings with embedding distance evaluators. Please either manually select a different Embeddings object or install tiktoken using `pip install tiktoken`.)	getr&   r$   appendr   r'   
isinstancetupletiktoken)clsr8   r5   types_r$   r=   r   r   r   _validate_tiktoken_installedh   s8   


z9_EmbeddingDistanceChainMixin._validate_tiktoken_installedT)Zarbitrary_types_allowedc                 C   s   dgS )zgReturn the output keys of the chain.

        Returns:
            List[str]: The output keys.
        scorer   selfr   r   r   output_keys   s   z(_EmbeddingDistanceChainMixin.output_keysresultc                 C   s$   d|d i}t |v r|t  |t < |S )NrA   r   )rC   rE   parsedr   r   r   _prepare_output   s   z,_EmbeddingDistanceChainMixin._prepare_outputmetricc              
   C   sJ   t j| jt j| jt j| jt j| jt j	| j
i}||v r|| S td| )zGet the metric function for the given metric name.

        Args:
            metric (EmbeddingDistance): The metric name.

        Returns:
            Any: The metric function.
        zInvalid metric: )r)   r.   _cosine_distancer/   _euclidean_distancer0   _manhattan_distancer1   _chebyshev_distancer2   _hamming_distance
ValueError)rC   rH   Zmetricsr   r   r   _get_metric   s   
z(_EmbeddingDistanceChainMixin._get_metricabc                 C   s6   zddl m} W n ty   tdw d|| | S )zCompute the cosine distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.ndarray: The cosine distance.
        r   )cosine_similarityzThe cosine_similarity function is required to compute cosine distance. Please install the langchain-community package using `pip install langchain-community`.g      ?)Zlangchain_community.utils.mathrR   r   )rP   rQ   rR   r   r   r   rI      s   z-_EmbeddingDistanceChainMixin._cosine_distancec                 C   s:   t  rddl}|j| | S tdd t| |D d S )zCompute the Euclidean distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Euclidean distance.
        r   Nc                 s   s$    | ]\}}|| ||  V  qd S Nr   .0xyr   r   r   	<genexpr>   s   " zC_EmbeddingDistanceChainMixin._euclidean_distance.<locals>.<genexpr>g      ?)r"   r   ZlinalgZnormsumziprP   rQ   r   r   r   r   rJ      s   z0_EmbeddingDistanceChainMixin._euclidean_distancec                 C   8   t  rt }||| | S tdd t| |D S )zCompute the Manhattan distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Manhattan distance.
        c                 s        | ]\}}t || V  qd S rS   absrT   r   r   r   rX          zC_EmbeddingDistanceChainMixin._manhattan_distance.<locals>.<genexpr>)r"   r   rY   r_   rZ   r[   r   r   r   rK         z0_EmbeddingDistanceChainMixin._manhattan_distancec                 C   r\   )zCompute the Chebyshev distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Chebyshev distance.
        c                 s   r]   rS   r^   rT   r   r   r   rX     r`   zC_EmbeddingDistanceChainMixin._chebyshev_distance.<locals>.<genexpr>)r"   r   maxr_   rZ   r[   r   r   r   rL      ra   z0_EmbeddingDistanceChainMixin._chebyshev_distancec                 C   s:   t  rt }|| |kS tdd t| |D t|  S )zCompute the Hamming distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Hamming distance.
        c                 s   s     | ]\}}||krd V  qdS )r   Nr   rT   r   r   r   rX     r`   zA_EmbeddingDistanceChainMixin._hamming_distance.<locals>.<genexpr>)r"   r   meanrY   rZ   lenr[   r   r   r   rM     s    z._EmbeddingDistanceChainMixin._hamming_distancevectorsc                 C   sh   |  | j}t r't|t jr'||d dd|d dd }t|S ||d |d }t|S )zCompute the score based on the distance metric.

        Args:
            vectors (np.ndarray): The input vectors.

        Returns:
            float: The computed score.
        r   r   )	rO   r7   r"   r;   r   ZndarrayZreshapeitemfloat)rC   re   rH   rA   r   r   r   _compute_score  s   	&z+_EmbeddingDistanceChainMixin._compute_scoreN)!r*   r+   r,   r-   r   r(   r5   r   __annotations__r)   r.   r7   r   r   strr   r@   r   Zmodel_configpropertyr   rD   dictrG   rO   staticmethodrI   rJ   rK   rL   rM   rh   ri   r   r   r   r   r3   \   s0   
 $-r3   c                   @   sF  e Zd ZdZedefddZedefddZede	e fddZ
		dd
eeef dee deeef fddZ		dd
eeef dee deeef fddZd	d	d	d	dddedee dedee	e  deeeef  dededefddZd	d	d	d	dddedee dedee	e  deeeef  dededefddZd	S )EmbeddingDistanceEvalChaina"  Use embedding distances to score semantic difference between
    a prediction and reference.

    Examples:
        >>> chain = EmbeddingDistanceEvalChain()
        >>> result = chain.evaluate_strings(prediction="Hello", reference="Hi")
        >>> print(result)
        {'score': 0.5}
    r   c                 C   s   dS )zReturn whether the chain requires a reference.

        Returns:
            bool: True if a reference is required, False otherwise.
        Tr   rB   r   r   r   requires_reference4  s   z-EmbeddingDistanceEvalChain.requires_referencec                 C      d| j j dS )NZ
embedding_	_distancer7   valuerB   r   r   r   evaluation_name=     z*EmbeddingDistanceEvalChain.evaluation_namec                 C      ddgS )eReturn the input keys of the chain.

        Returns:
            List[str]: The input keys.
        
prediction	referencer   rB   r   r   r   
input_keysA     z%EmbeddingDistanceEvalChain.input_keysNinputsrun_managerc                 C   @   | j |d |d g}t rt }||}| |}d|iS )a0  Compute the score for a prediction and reference.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (Optional[CallbackManagerForChainRun], optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        ry   rz   rA   r5   Zembed_documentsr"   r   arrayri   rC   r}   r~   re   r   rA   r   r   r   _callJ  s   

z EmbeddingDistanceEvalChain._callc                    H   | j |d |d gI dH }t rt }||}| |}d|iS )a:  Asynchronously compute the score for a prediction and reference.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (AsyncCallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        ry   rz   NrA   r5   Zaembed_documentsr"   r   r   ri   r   r   r   r   _acallb     


z!EmbeddingDistanceEvalChain._acallF)rz   	callbackstagsmetadatainclude_run_infory   rz   r   r   r   r   kwargsc          	      K   "   | ||d||||d}|  |S )a  Evaluate the embedding distance between a prediction and
        reference.

        Args:
            prediction (str): The output string from the first model.
            reference (str): The reference string (required)
            callbacks (Callbacks, optional): The callbacks to use.
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        ry   rz   r}   r   r   r   r   rG   	rC   ry   rz   r   r   r   r   r   rE   r   r   r   _evaluate_strings}  s   
z,EmbeddingDistanceEvalChain._evaluate_stringsc          	         ,   | j ||d||||dI dH }| |S )a  Asynchronously evaluate the embedding distance between
        a prediction and reference.

        Args:
            prediction (str): The output string from the first model.
            reference (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r   r   NZacallrG   r   r   r   r   _aevaluate_strings  s   
z-EmbeddingDistanceEvalChain._aevaluate_stringsrS   )r*   r+   r,   r-   rl   r   rp   rk   ru   r   r{   r   r   r   r	   r   r   r   r
   rm   r   r   r   r   r   r   ro   )  s    







	

&
	
ro   c                   @   s(  e Zd ZdZedee fddZedefddZ	dde	ee
f d	ee de	ee
f fd
dZ	dde	ee
f d	ee de	ee
f fddZddddddedededeee  dee	ee
f  dede
defddZddddddedededeee  dee	ee
f  dede
defddZdS )"PairwiseEmbeddingDistanceEvalChaina  Use embedding distances to score semantic difference between two predictions.

    Examples:
    >>> chain = PairwiseEmbeddingDistanceEvalChain()
    >>> result = chain.evaluate_string_pairs(prediction="Hello", prediction_b="Hi")
    >>> print(result)
    {'score': 0.5}
    r   c                 C   rw   )rx   ry   prediction_br   rB   r   r   r   r{     r|   z-PairwiseEmbeddingDistanceEvalChain.input_keysc                 C   rq   )NZpairwise_embedding_rr   rs   rB   r   r   r   ru     rv   z2PairwiseEmbeddingDistanceEvalChain.evaluation_nameNr}   r~   c                 C   r   )a  Compute the score for two predictions.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (CallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        ry   r   rA   r   r   r   r   r   r     s   

z(PairwiseEmbeddingDistanceEvalChain._callc                    r   )a/  Asynchronously compute the score for two predictions.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (AsyncCallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        ry   r   NrA   r   r   r   r   r   r     r   z)PairwiseEmbeddingDistanceEvalChain._acallF)r   r   r   r   ry   r   r   r   r   r   r   c          	      K   r   )a  Evaluate the embedding distance between two predictions.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            tags (List[str], optional): Tags to apply to traces
            metadata (Dict[str, Any], optional): metadata to apply to
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        ry   r   r   r   	rC   ry   r   r   r   r   r   r   rE   r   r   r   _evaluate_string_pairs  s   
z9PairwiseEmbeddingDistanceEvalChain._evaluate_string_pairsc          	         r   )a  Asynchronously evaluate the embedding distance

        between two predictions.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            tags (List[str], optional): Tags to apply to traces
            metadata (Dict[str, Any], optional): metadata to apply to traces
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r   r   Nr   r   r   r   r   _aevaluate_string_pairs4  s   
z:PairwiseEmbeddingDistanceEvalChain._aevaluate_string_pairsrS   )r*   r+   r,   r-   rl   r   rk   r{   ru   r   r   r   r	   r   r   r   r
   r   rm   r   r   r   r   r   r   r     s    	





 
	

(
	
r   )+r-   	functoolsloggingenumr   	importlibr   typingr   r   r   r   Z langchain_core.callbacks.managerr   r	   r
   Zlangchain_core.embeddingsr   Zlangchain_core.utilsr   Zpydanticr   r   Zlangchain.chains.baser   Zlangchain.evaluation.schemar   r   Zlangchain.schemar   r   	getLoggerr*   r    	lru_cacher   r"   r(   rk   r)   r3   ro   r   r   r   r   r   <module>   s4    

 N 
