o
    Zh7*                     @  s   d Z ddlmZ ddlZddlZddlmZmZmZm	Z	m
Z
 ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZmZ ddlmZ dddZdddZG dd deeeZ G dd deeeZ!G dd de!Z"dS )z-LLM Chains for evaluating question answering.    )annotationsN)AnyListOptionalSequenceTuple)	Callbacks)BaseLanguageModel)PromptTemplate)
ConfigDict)LLMChain)CONTEXT_PROMPT
COT_PROMPTPROMPT)LLMEvalChainStringEvaluator)RUN_KEYtextstrreturnOptional[Tuple[str, int]]c                 C  s   t d|  t j}|r"|d dkrdS |d dkr"dS zI|   d t	ddt
j}| dkr=W dS | dkrFW dS |   d	 t	ddt
j}| dkr`W dS | dkriW dS W d S  tyu   Y d S w )
Nzgrade:\s*(correct|incorrect)   CORRECT)r   r   	INCORRECT)r   r   r    )researchstrip
IGNORECASEgroupuppersplit	translater   	maketransstringpunctuation
IndexError)r   matchZ
first_word	last_word r*   Y/var/www/html/lang_env/lib/python3.10/site-packages/langchain/evaluation/qa/eval_chain.py
_get_score   s8    r,   dictc                 C  s6   |   }t|}|du rd\}}n|\}}|||dS )zParse the output text.

    Args:
        text (str): The output text to parse.

    Returns:
        Any: The parsed output.
    N)NN)	reasoningvaluescore)r   r,   )r   r.   Zparsed_scoresr/   r0   r*   r*   r+   _parse_string_eval_output1   s   	
r1   c                   @  s   e Zd ZU dZdZded< eddZed7d	d
Z	e
d8ddZe
d7ddZe
d7ddZe	d9d:ddZ			d;ddd<d'd(Zd=d*d+Zdddd,d-d>d3d4Zdddd,d-d>d5d6ZdS )?QAEvalChainz,LLM Chain for evaluating question answering.resultsr   
output_keyignoreextrar   boolc                 C     dS NFr*   clsr*   r*   r+   is_lc_serializableP      zQAEvalChain.is_lc_serializablec                 C  r9   )NZcorrectnessr*   selfr*   r*   r+   evaluation_nameT   r>   zQAEvalChain.evaluation_namec                 C  r9   NTr*   r?   r*   r*   r+   requires_referenceX   r>   zQAEvalChain.requires_referencec                 C  r9   rB   r*   r?   r*   r*   r+   requires_input\   r>   zQAEvalChain.requires_inputNllmr	   promptOptional[PromptTemplate]kwargsr   c                 K  sH   |pt }h d}|t|jkrtd| d|j | d||d|S )a  Load QA Eval Chain from LLM.

        Args:
            llm (BaseLanguageModel): the base language model to use.

            prompt (PromptTemplate): A prompt template containing the input_variables:
            'input', 'answer' and 'result' that will be used as the prompt
            for evaluation.
            Defaults to PROMPT.

            **kwargs: additional keyword arguments.

        Returns:
            QAEvalChain: the loaded QA eval chain.
        >   queryanswerresultInput variables should be 
, but got rE   rF   Nr*   )r   setinput_variables
ValueError)r<   rE   rF   rH   expected_input_varsr*   r*   r+   from_llm`   s   zQAEvalChain.from_llmrI   rJ   rK   	callbacksexamplesSequence[dict]predictionsquestion_key
answer_keyprediction_keyrU   r   
List[dict]c                  *    fddt |D }| j||dS )5Evaluate question answering examples and predictions.c                   ,   g | ]\}}| |  |  d qS )rI   rJ   rK   r*   .0iZexamplerZ   r[   rX   rY   r*   r+   
<listcomp>       
z(QAEvalChain.evaluate.<locals>.<listcomp>rT   	enumerateapply)r@   rV   rX   rY   rZ   r[   rU   inputsr*   rd   r+   evaluate      	zQAEvalChain.evaluater-   c                 C  &   t || j }t|v r|t |t< |S Nr1   r4   r   r@   rK   parsed_resultr*   r*   r+   _prepare_output      zQAEvalChain._prepare_outputF	referenceinputrU   include_run_info
predictionru   Optional[str]rv   rw   c                K      | |||d||d}|  |S )a  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): the LLM or chain prediction to evaluate.
            reference (Optional[str], optional): the reference label
                to evaluate against.
            input (Optional[str], optional): the input to consider during evaluation
            callbacks (Callbacks, optional): the callbacks to use for tracing.
            include_run_info (bool, optional): whether to include run info in the
                returned results.
            **kwargs: additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        r`   rU   rw   rr   r@   rx   ru   rv   rU   rw   rH   rK   r*   r*   r+   _evaluate_strings   s   
	zQAEvalChain._evaluate_stringsc                  *   | j |||d||dI d H }| |S )Nr`   rj   rU   rw   Zacallrr   r}   r*   r*   r+   _aevaluate_strings      


zQAEvalChain._aevaluate_stringsr   r8   r   r   rn   )rE   r	   rF   rG   rH   r   r   r2   r`   )rV   rW   rX   rW   rY   r   rZ   r   r[   r   rU   r   r   r\   rK   r-   r   r-   rx   r   ru   ry   rv   ry   rU   r   rw   r8   rH   r   r   r-   )__name__
__module____qualname____doc__r4   __annotations__r   model_configclassmethodr=   propertyrA   rC   rD   rS   rk   rr   r~   r   r*   r*   r*   r+   r2   G   sD   
 "

'r2   c                   @  s   e Zd ZdZed8ddZed8ddZed8dd	Ze	d
dZ
ed9ddZed:ddZe	d;d<ddZ			d=ddd>d(d)Zd?d+d,Zdddd-d.d@d4d5Zdddd-d.d@d6d7ZdS )AContextQAEvalChainz3LLM Chain for evaluating QA w/o GT based on contextr   r8   c                 C  r9   r:   r*   r;   r*   r*   r+   r=      r>   z%ContextQAEvalChain.is_lc_serializablec                 C  r9   )z.Whether the chain requires a reference string.Tr*   r?   r*   r*   r+   rC         z%ContextQAEvalChain.requires_referencec                 C  r9   )z+Whether the chain requires an input string.Tr*   r?   r*   r*   r+   rD      r   z!ContextQAEvalChain.requires_inputr5   r6   rF   r
   Nonec                 C  s0   h d}|t |jkrtd| d|j d S )N>   rI   contextrK   rL   rM   )rO   rP   rQ   )r<   rF   rR   r*   r*   r+   _validate_input_vars   s   z'ContextQAEvalChain._validate_input_varsr   c                 C  r9   )NzContextual Accuracyr*   r?   r*   r*   r+   rA      r>   z"ContextQAEvalChain.evaluation_nameNrE   r	   rG   rH   r   c                 K  &   |pt }| | | d||d|S )a  Load QA Eval Chain from LLM.

        Args:
            llm (BaseLanguageModel): the base language model to use.

            prompt (PromptTemplate): A prompt template containing the input_variables:
            'query', 'context' and 'result' that will be used as the prompt
            for evaluation.
            Defaults to PROMPT.

            **kwargs: additional keyword arguments.

        Returns:
            ContextQAEvalChain: the loaded QA eval chain.
        rN   Nr*   )r   r   r<   rE   rF   rH   r*   r*   r+   rS      s   
zContextQAEvalChain.from_llmrI   r   rK   rT   rV   r\   rX   rY   context_keyr[   rU   r   c                  r]   )r^   c                   r_   )rI   r   rK   r*   ra   r   r[   rX   rY   r*   r+   re     rf   z/ContextQAEvalChain.evaluate.<locals>.<listcomp>rT   rg   )r@   rV   rX   rY   r   r[   rU   rj   r*   r   r+   rk     rl   zContextQAEvalChain.evaluater-   c                 C  rm   rn   ro   rp   r*   r*   r+   rr   "  rs   z"ContextQAEvalChain._prepare_outputFrt   rx   ru   ry   rv   rw   c                K  rz   )Nr   r{   r|   r}   r*   r*   r+   r~   (  s   

	z$ContextQAEvalChain._evaluate_stringsc                  r   )Nr   r   r   r}   r*   r*   r+   r   =  r   z%ContextQAEvalChain._aevaluate_stringsr   )rF   r
   r   r   r   rn   )rE   r	   rF   rG   rH   r   r   r   r   )rV   r\   rX   r\   rY   r   r   r   r[   r   rU   r   r   r\   r   r   )r   r   r   r   r   r=   r   rC   rD   r   r   r   rA   rS   rk   rr   r~   r   r*   r*   r*   r+   r      sF    

r   c                   @  s>   e Zd ZdZedddZedddZe		ddddZd	S )CotQAEvalChainz=LLM Chain for evaluating QA using chain of thought reasoning.r   r8   c                 C  r9   r:   r*   r;   r*   r*   r+   r=   R  r>   z!CotQAEvalChain.is_lc_serializabler   c                 C  r9   )NzCOT Contextual Accuracyr*   r?   r*   r*   r+   rA   V  r>   zCotQAEvalChain.evaluation_nameNrE   r	   rF   rG   rH   r   c                 K  r   )zLoad QA Eval Chain from LLM.rN   Nr*   )r   r   r   r*   r*   r+   rS   Z  s   
zCotQAEvalChain.from_llmr   r   rn   )rE   r	   rF   rG   rH   r   r   r   )	r   r   r   r   r   r=   r   rA   rS   r*   r*   r*   r+   r   O  s    r   )r   r   r   r   )r   r   r   r-   )#r   
__future__r   r   r%   typingr   r   r   r   r   Z langchain_core.callbacks.managerr   Zlangchain_core.language_modelsr	   Zlangchain_core.promptsr
   Zpydanticr   Zlangchain.chains.llmr   Z#langchain.evaluation.qa.eval_promptr   r   r   Zlangchain.evaluation.schemar   r   Zlangchain.schemar   r,   r1   r2   r   r   r*   r*   r*   r+   <module>   s&    

 
