o
    ZhG                     @  s   d Z ddlmZ ddlZddlmZmZ ddlmZ ddl	m
Z
mZmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ eeZG dd deeZG dd deZG dd dZG dd deeZ G dd deeZ!G dd deeZ"dS )z3Interfaces to be implemented by general evaluators.    )annotationsN)ABCabstractmethod)Enum)AnyOptionalSequenceTupleUnion)warn)AgentAction)BaseLanguageModel)run_in_executor)Chainc                   @  s   e Zd ZdZdZ	 dZ	 dZ	 dZ	 dZ	 dZ		 dZ
	 d	Z	 d
Z	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZdS )EvaluatorTypezThe types of the evaluators.ZqaZcot_qaZ
context_qaZpairwise_stringZscore_stringZlabeled_pairwise_stringZlabeled_score_stringZ
trajectorycriteriaZlabeled_criteriaZstring_distanceZexact_matchZregex_matchZpairwise_string_distanceZembedding_distanceZpairwise_embedding_distanceZjson_validityZjson_equalityZjson_edit_distanceZjson_schema_validationN)__name__
__module____qualname____doc__ZQAZCOT_QAZ
CONTEXT_QAZPAIRWISE_STRINGZSCORE_STRINGZLABELED_PAIRWISE_STRINGZLABELED_SCORE_STRINGZAGENT_TRAJECTORYZCRITERIAZLABELED_CRITERIAZSTRING_DISTANCEZEXACT_MATCHZREGEX_MATCHZPAIRWISE_STRING_DISTANCEZEMBEDDING_DISTANCEZPAIRWISE_EMBEDDING_DISTANCEZJSON_VALIDITYZJSON_EQUALITYZJSON_EDIT_DISTANCEZJSON_SCHEMA_VALIDATION r   r   R/var/www/html/lang_env/lib/python3.10/site-packages/langchain/evaluation/schema.pyr      sT    r   c                   @  s"   e Zd ZdZeed
ddZd	S )LLMEvalChainz,A base class for evaluators that use an LLM.llmr   kwargsr   returnc                 K     dS )z#Create a new evaluator from an LLM.Nr   )clsr   r   r   r   r   from_llmM       zLLMEvalChain.from_llmN)r   r   r   r   r   r   )r   r   r   r   classmethodr   r   r   r   r   r   r   J   s
    r   c                   @  sX   e Zd ZdZedddZedddZedd	d
ZedddZ		ddddZ	dS )_EvalArgsMixinz(Mixin for checking evaluation arguments.r   boolc                 C  r   z2Whether this evaluator requires a reference label.Fr   selfr   r   r   requires_referenceV      z!_EvalArgsMixin.requires_referencec                 C  r   )0Whether this evaluator requires an input string.Fr   r$   r   r   r   requires_input[   r'   z_EvalArgsMixin.requires_inputstrc                 C     d| j j dS )z&Warning to show when input is ignored.zIgnoring input in , as it is not expected.	__class__r   r$   r   r   r   _skip_input_warning`   s   z"_EvalArgsMixin._skip_input_warningc                 C  r+   )z*Warning to show when reference is ignored.zIgnoring reference in r,   r-   r$   r   r   r   _skip_reference_warninge   s   z&_EvalArgsMixin._skip_reference_warningN	referenceOptional[str]inputNonec                 C  s|   | j r|du rt| jj d|dur| j st| j | jr,|du r,t| jj d|dur:| js<t| j dS dS dS )a  Check if the evaluation arguments are valid.

        Args:
            reference (Optional[str], optional): The reference label.
            input (Optional[str], optional): The input string.
        Raises:
            ValueError: If the evaluator requires an input string but none is provided,
                or if the evaluator requires a reference label but none is provided.
        Nz requires an input string.z requires a reference string.)r)   
ValueErrorr.   r   r   r/   r&   r0   )r%   r1   r3   r   r   r   _check_evaluation_argsl   s   
z%_EvalArgsMixin._check_evaluation_argsr   r"   r   r*   )NN)r1   r2   r3   r2   r   r4   )
r   r   r   r   propertyr&   r)   r/   r0   r6   r   r   r   r   r!   S   s    r!   c                   @  sx   e Zd ZdZedddZedddZed	d	d
dddZd	d	d
dddZ	d	d	d
dddZ
d	d	d
dddZd	S ) StringEvaluatorzcGrade, tag, or otherwise evaluate predictions relative to their inputs
    and/or reference labels.r   r*   c                 C  s   | j jS )zThe name of the evaluation.r-   r$   r   r   r   evaluation_name   s   zStringEvaluator.evaluation_namer"   c                 C  r   r#   r   r$   r   r   r   r&      r'   z"StringEvaluator.requires_referenceNr1   r3   
predictionUnion[str, Any]r1   Optional[Union[str, Any]]r3   r   r   dictc                K  r   )a:  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
                It is recommended that the dictionary contain the following keys:
                     - score: the score of the evaluation, if applicable.
                     - value: the string value of the evaluation, if applicable.
                     - reasoning: the reasoning for the evaluation, if applicable.
        Nr   r%   r=   r1   r3   r   r   r   r   _evaluate_strings   r   z!StringEvaluator._evaluate_stringsc                  s$   t d| jf|||d|I dH S )aI  Asynchronously evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
                It is recommended that the dictionary contain the following keys:
                     - score: the score of the evaluation, if applicable.
                     - value: the string value of the evaluation, if applicable.
                     - reasoning: the reasoning for the evaluation, if applicable.
        Nr=   r1   r3   )r   rB   rA   r   r   r   _aevaluate_strings   s   z"StringEvaluator._aevaluate_stringsr2   c                K  s&   | j ||d | jd|||d|S )a  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        r<   rC   Nr   )r6   rB   rA   r   r   r   evaluate_strings   s   z StringEvaluator.evaluate_stringsc                  s.   | j ||d | jd|||d|I dH S )a	  Asynchronously evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        r<   rC   Nr   )r6   rD   rA   r   r   r   aevaluate_strings   s   z!StringEvaluator.aevaluate_stringsr8   r7   )
r=   r>   r1   r?   r3   r?   r   r   r   r@   )
r=   r*   r1   r2   r3   r2   r   r   r   r@   )r   r   r   r   r9   r;   r&   r   rB   rD   rE   rF   r   r   r   r   r:      s&    #r:   c                   @  s\   e Zd ZdZeddddddZddddddZddddddZddddddZdS )PairwiseStringEvaluatorzDCompare the output of two models (or two outputs of the same model).Nr<   r=   r*   prediction_br1   r2   r3   r   r   r   r@   c                K  r   )1  Evaluate the output string pairs.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            reference (Optional[str], optional): The expected output / reference string.
            input (Optional[str], optional): The input string.
            kwargs: Additional keyword arguments, such as callbacks and optional reference strings.
        Returns:
            dict: A dictionary containing the preference, scores, and/or other information.
        Nr   r%   r=   rH   r1   r3   r   r   r   r   _evaluate_string_pairs   r   z.PairwiseStringEvaluator._evaluate_string_pairsc                  s&   t d| jf||||d|I dH S )@  Asynchronously evaluate the output string pairs.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            reference (Optional[str], optional): The expected output / reference string.
            input (Optional[str], optional): The input string.
            kwargs: Additional keyword arguments, such as callbacks and optional reference strings.
        Returns:
            dict: A dictionary containing the preference, scores, and/or other information.
        Nr=   rH   r1   r3   )r   rK   rJ   r   r   r   _aevaluate_string_pairs  s   z/PairwiseStringEvaluator._aevaluate_string_pairsc                K  s(   | j ||d | jd||||d|S )rI   r<   rM   Nr   )r6   rK   rJ   r   r   r   evaluate_string_pairs/  s   z-PairwiseStringEvaluator.evaluate_string_pairsc                  s0   | j ||d | jd||||d|I dH S )rL   r<   rM   Nr   )r6   rN   rJ   r   r   r   aevaluate_string_pairsL  s   z.PairwiseStringEvaluator.aevaluate_string_pairs)r=   r*   rH   r*   r1   r2   r3   r2   r   r   r   r@   )	r   r   r   r   r   rK   rN   rO   rP   r   r   r   r   rG      s    #"rG   c                   @  sb   e Zd ZdZedddZedddddZdddddZdddddZ	dddddZ
dS )AgentTrajectoryEvaluatorz,Interface for evaluating agent trajectories.r   r"   c                 C  r   )r(   Tr   r$   r   r   r   r)   m  r'   z'AgentTrajectoryEvaluator.requires_inputN)r1   r=   r*   agent_trajectory!Sequence[Tuple[AgentAction, str]]r3   r1   r2   r   r   r@   c                K  r   )  Evaluate a trajectory.

        Args:
            prediction (str): The final predicted response.
            agent_trajectory (List[Tuple[AgentAction, str]]):
                The intermediate steps forming the agent trajectory.
            input (str): The input to the agent.
            reference (Optional[str]): The reference answer.

        Returns:
            dict: The evaluation result.
        Nr   r%   r=   rR   r3   r1   r   r   r   r   _evaluate_agent_trajectoryr  r   z3AgentTrajectoryEvaluator._evaluate_agent_trajectoryc                  s&   t d| jf||||d|I dH S )  Asynchronously evaluate a trajectory.

        Args:
            prediction (str): The final predicted response.
            agent_trajectory (List[Tuple[AgentAction, str]]):
                The intermediate steps forming the agent trajectory.
            input (str): The input to the agent.
            reference (Optional[str]): The reference answer.

        Returns:
            dict: The evaluation result.
        N)r=   rR   r1   r3   )r   rV   rU   r   r   r   _aevaluate_agent_trajectory  s   z4AgentTrajectoryEvaluator._aevaluate_agent_trajectoryc                K  s(   | j ||d | jd||||d|S )rT   r<   r=   r3   rR   r1   Nr   )r6   rV   rU   r   r   r   evaluate_agent_trajectory  s   z2AgentTrajectoryEvaluator.evaluate_agent_trajectoryc                  s0   | j ||d | jd||||d|I dH S )rW   r<   rY   Nr   )r6   rX   rU   r   r   r   aevaluate_agent_trajectory  s   z3AgentTrajectoryEvaluator.aevaluate_agent_trajectoryr7   )r=   r*   rR   rS   r3   r*   r1   r2   r   r   r   r@   )r   r   r   r   r9   r)   r   rV   rX   rZ   r[   r   r   r   r   rQ   j  s    %$rQ   )#r   
__future__r   loggingabcr   r   enumr   typingr   r   r   r	   r
   warningsr   Zlangchain_core.agentsr   Zlangchain_core.language_modelsr   Zlangchain_core.runnables.configr   Zlangchain.chains.baser   	getLoggerr   loggerr*   r   r   r!   r:   rG   rQ   r   r   r   r   <module>   s$    
6	1tr