o
    Zh)                     @  s   d dl mZ d dlmZmZmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZmZ er8d dlmZ d dl	mZ G dd	 d	eZG d
d dZdS )    )annotations)TYPE_CHECKINGAnyCallableOptional	TypedDictUnion)DynamicRunEvaluator)	traceable)ExampleRunStringEvaluator)RunEvaluatorc                   @  s.   e Zd ZU dZded< 	 ded< 	 ded< dS )	SingleEvaluatorInputz!The input to a `StringEvaluator`.str
predictionzOptional[Any]	referencezOptional[str]inputN)__name__
__module____qualname____doc____annotations__ r   r   c/var/www/html/lang_env/lib/python3.10/site-packages/langsmith/evaluation/integrations/_langchain.pyr      s   
 r   c                   @  s,   e Zd ZdZddddd
dZdddZdS )LangChainStringEvaluatora  A class for wrapping a LangChain StringEvaluator.

    Requires the `langchain` package to be installed.

    Attributes:
        evaluator (StringEvaluator): The underlying StringEvaluator OR the name
            of the evaluator to load.

    Methods:
        as_run_evaluator() -> RunEvaluator:
            Convert the LangChainStringEvaluator to a RunEvaluator.

    Examples:
        Creating a simple LangChainStringEvaluator:

        >>> evaluator = LangChainStringEvaluator("exact_match")

        Converting a LangChainStringEvaluator to a RunEvaluator:

        >>> from langsmith.evaluation import LangChainStringEvaluator
        >>> from langchain_openai import ChatOpenAI
        >>> evaluator = LangChainStringEvaluator(
        ...     "criteria",
        ...     config={
        ...         "criteria": {
        ...             "usefulness": "The prediction is useful if"
        ...             " it is correct and/or asks a useful followup question."
        ...         },
        ...         "llm": ChatOpenAI(model="gpt-4o"),
        ...     },
        ... )
        >>> run_evaluator = evaluator.as_run_evaluator()
        >>> run_evaluator  # doctest: +ELLIPSIS
        <DynamicRunEvaluator ...>

        Customizing the LLM model used by the evaluator:

        >>> from langsmith.evaluation import LangChainStringEvaluator
        >>> from langchain_anthropic import ChatAnthropic
        >>> evaluator = LangChainStringEvaluator(
        ...     "criteria",
        ...     config={
        ...         "criteria": {
        ...             "usefulness": "The prediction is useful if"
        ...             " it is correct and/or asks a useful followup question."
        ...         },
        ...         "llm": ChatAnthropic(model="claude-3-opus-20240229"),
        ...     },
        ... )
        >>> run_evaluator = evaluator.as_run_evaluator()
        >>> run_evaluator  # doctest: +ELLIPSIS
        <DynamicRunEvaluator ...>

        Using the `evaluate` API with different evaluators:
        >>> def prepare_data(run: Run, example: Example):
        ...     # Convert the evaluation data into the format expected by the evaluator
        ...     # Only required for datasets with multiple inputs/output keys
        ...     return {
        ...         "prediction": run.outputs["prediction"],
        ...         "reference": example.outputs["answer"],
        ...         "input": str(example.inputs),
        ...     }
        >>> import re
        >>> from langchain_anthropic import ChatAnthropic
        >>> import langsmith
        >>> from langsmith.evaluation import LangChainStringEvaluator, evaluate
        >>> criteria_evaluator = LangChainStringEvaluator(
        ...     "criteria",
        ...     config={
        ...         "criteria": {
        ...             "usefulness": "The prediction is useful if it is correct"
        ...             " and/or asks a useful followup question."
        ...         },
        ...         "llm": ChatAnthropic(model="claude-3-opus-20240229"),
        ...     },
        ...     prepare_data=prepare_data,
        ... )
        >>> embedding_evaluator = LangChainStringEvaluator("embedding_distance")
        >>> exact_match_evaluator = LangChainStringEvaluator("exact_match")
        >>> regex_match_evaluator = LangChainStringEvaluator(
        ...     "regex_match", config={"flags": re.IGNORECASE}, prepare_data=prepare_data
        ... )
        >>> scoring_evaluator = LangChainStringEvaluator(
        ...     "labeled_score_string",
        ...     config={
        ...         "criteria": {
        ...             "accuracy": "Score 1: Completely inaccurate\nScore 5: Somewhat accurate\nScore 10: Completely accurate"
        ...         },
        ...         "normalize_by": 10,
        ...         "llm": ChatAnthropic(model="claude-3-opus-20240229"),
        ...     },
        ...     prepare_data=prepare_data,
        ... )
        >>> string_distance_evaluator = LangChainStringEvaluator(
        ...     "string_distance",
        ...     config={"distance_metric": "levenshtein"},
        ...     prepare_data=prepare_data,
        ... )
        >>> from langsmith import Client
        >>> client = Client()
        >>> results = evaluate(
        ...     lambda inputs: {"prediction": "foo"},
        ...     data=client.list_examples(dataset_name="Evaluate Examples", limit=1),
        ...     evaluators=[
        ...         embedding_evaluator,
        ...         criteria_evaluator,
        ...         exact_match_evaluator,
        ...         regex_match_evaluator,
        ...         scoring_evaluator,
        ...         string_distance_evaluator,
        ...     ],
        ... )  # doctest: +ELLIPSIS
        View the evaluation results for experiment:...
    N)configprepare_data	evaluatorUnion[StringEvaluator, str]r   Optional[dict]r   BOptional[Callable[[Run, Optional[Example]], SingleEvaluatorInput]]c                C  sh   ddl m} t||r|| _n t|tr&ddlm} ||fi |p!i | _n	tdt| || _	dS )a(  Initialize a LangChainStringEvaluator.

        See: https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.schema.StringEvaluator.html#langchain-evaluation-schema-stringevaluator

        Args:
            evaluator (StringEvaluator): The underlying StringEvaluator.
        r   r   )load_evaluatorzUnsupported evaluator type: N)
langchain.evaluation.schemar   
isinstancer   r   Zlangchain.evaluationr#   NotImplementedErrortype_prepare_data)selfr   r   r   r   r#   r   r   r   __init__   s   


z!LangChainStringEvaluator.__init__returnr   c                   s   j jrdnd}j jrdnd}d| | d t	dd fddtj jdddfdd}tj jdddfdd}t||S )zConvert the LangChainStringEvaluator to a RunEvaluator.

        This is the object used in the LangSmith `evaluate` API.

        Returns:
            RunEvaluator: The converted RunEvaluator.
        z)
       "input": example.inputs['input'], z0
       "reference": example.outputs['expected']z]
def prepare_data(run, example):
    return {
        "prediction": run.outputs['my_output'],zL
    }
evaluator = LangChainStringEvaluator(..., prepare_data=prepare_data)
Nrunr   exampleOptional[Example]r+   r   c                   s   | j rt| j dkrtdj d  jjr0|r0|j r0t|j dkr0tdj d  jjrK|rK|jrKt|jdkrKtdj d  ttt	| j 
 jjrf|rf|j rftt	|j 
 nd jjr{|r{|jr{tt	|j
 dS d dS )N   z
Evaluator z{ only supports a single prediction key. Please ensure that the run has a single output. Or initialize with a prepare_data:
z nly supports a single reference key. Please ensure that the example has a single output. Or create a custom evaluator yourself:
zy only supports a single input key. Please ensure that the example has a single input. Or initialize with a prepare_data:
)r   r   r   )Zoutputslen
ValueErrorr   requires_referencerequires_inputZinputsr   nextitervalues)r-   r.   )customization_error_strr)   r   r   prepare_evaluator_inputs   s`   


zKLangChainStringEvaluator.as_run_evaluator.<locals>.prepare_evaluator_inputs)namedictc                   sB   j d u r
 | |n | |}jjdi |}djji|S Nkeyr   )r(   r   Zevaluate_stringsevaluation_namer-   r.   Zeval_inputsresultsr9   r)   r   r   evaluate   s   


z;LangChainStringEvaluator.as_run_evaluator.<locals>.evaluatec                   sJ   j d u r | |n | |}jjdi |I d H }djji|S r<   )r(   r   Zaevaluate_stringsr>   r?   rA   r   r   	aevaluate  s   


z<LangChainStringEvaluator.as_run_evaluator.<locals>.aevaluate)N)r-   r   r.   r/   r+   r   )r-   r   r.   r/   r+   r;   )r   r4   r3   r
   r>   r	   )r)   Z	input_strZreference_strrB   rC   r   )r8   r9   r)   r   as_run_evaluator   s*   5	
	z)LangChainStringEvaluator.as_run_evaluator)r   r    r   r!   r   r"   )r+   r   )r   r   r   r   r*   rD   r   r   r   r   r      s    wr   N)
__future__r   typingr   r   r   r   r   r   Zlangsmith.evaluation.evaluatorr	   Zlangsmith.run_helpersr
   Zlangsmith.schemasr   r   r$   r   r   r   r   r   r   r   r   <module>   s     