o
    Zh                      @   s|  d Z ddlZddlZddlZddlZddlmZ ddlmZm	Z	 ddl
mZ ddlmZ ddlmZ ddlmZ ddlmZ ded	efd
dZdejdedee fddZeddddddeej dedee dee dededejfddZdededeej fddZ e	dZ!e	dZ"d ee! d!ee" dee#e!e"f  fd"d#Z$ed$dd%ded&ed'ee% dee ddf
d(d)Z&dS )*zfBeta utility functions to assist in common eval workflows.

These functions may change in the future.
    N)Sequence)OptionalTypeVar)
evaluation)	warn_beta)Clientrun_dictid_mapc                 C   sf   | d }|  D ]\}}|t|t|}q|| d< | dr(|| d  | d< | ds1i | d< | S )a  Convert the IDs in the run dictionary using the provided ID map.

    Parameters:
    - run_dict (dict): The dictionary representing a run.
    - id_map (dict): The dictionary mapping old IDs to new IDs.

    Returns:
    - dict: The updated run dictionary.
    dotted_orderparent_run_idextra)itemsreplacestrget)r   r	   dokv r   L/var/www/html/lang_env/lib/python3.10/site-packages/langsmith/beta/_evals.py_convert_ids   s   


r   rootrun_to_example_mapreturnc                    s   | g}t  }| j|i g }|rJ| }|jh dd} |d t   |d <  |d  |d<  |d  |d< |jrC||j || |s fdd|D }|| j	 |d d< |S )	a&  Convert the root run and its child runs to a list of dictionaries.

    Parameters:
    - root (ls_schemas.Run): The root run to convert.
    - run_to_example_map (dict): The dictionary mapping run IDs to example IDs.

    Returns:
    - List[dict]: The list of converted run dictionaries.
    >   Zchild_run_idsZ
session_idZparent_run_ids)excludeidtrace_idc                    s   g | ]}t | qS r   )r   .0rr	   r   r   
<listcomp>A       z%_convert_root_run.<locals>.<listcomp>r   Zreference_example_id)
uuiduuid4r   popdictr   
child_runsextendappendr   )r   r   Zruns_r   resultssrcZsrc_dictresultr   r    r   _convert_root_run*   s"   


	r-   F)test_project_nameclientload_child_runsinclude_outputsrunsdataset_namer.   r/   r0   r1   c                   st  | s	t d|   pt   j|d}|rdd | D nd} jdd | D |dd | D |jd s9| }n
 fd	d| D }|pPd
t jdd  }t	 j
|d}	dd |	D |	d jrj|	d jn|	d j}
fdd|D } j||jd|
 dd}|D ])}|d |d  }tjjtjjd|d< |d | |d<  jdi |d|i q |j}|S )a  Convert the following runs to a dataset + test.

    This makes it easy to sample prod runs into a new regression testing
    workflow and compare against a candidate system.

    Internally, this function does the following:
        1. Create a dataset from the provided production run inputs.
        2. Create a new test project.
        3. Clone the production runs and re-upload against the dataset.

    Parameters:
    - runs (Sequence[ls_schemas.Run]): A sequence of runs to be executed as a test.
    - dataset_name (str): The name of the dataset to associate with the test runs.
    - client (Optional[Client]): An optional LangSmith client instance. If not provided,
        a new client will be created.
    - load_child_runs (bool): Whether to load child runs when copying runs.
        Defaults to False.

    Returns:
    - ls_schemas.TracerSession: The project containing the cloned runs.

    Examples:
    --------
    .. code-block:: python

        import langsmith
        import random

        client = langsmith.Client()

        # Randomly sample 100 runs from a prod project
        runs = list(client.list_runs(project_name="My Project", execution_order=1))
        sampled_runs = random.sample(runs, min(len(runs), 100))

        runs_as_test(runs, dataset_name="Random Runs")

        # Select runs named "extractor" whose root traces received good feedback
        runs = client.list_runs(
            project_name="<your_project>",
            filter='eq(name, "extractor")',
            trace_filter='and(eq(feedback_key, "user_score"), eq(feedback_score, 1))',
        )
        runs_as_test(runs, dataset_name="Extraction Good")
    z1Expected a non-empty sequence of runs. Received: )r3   c                 S      g | ]}|j qS r   )outputsr   r   r   r   r!          z(convert_runs_to_test.<locals>.<listcomp>Nc                 S   r4   r   )inputsr   r   r   r   r!      r6   c                 S   r4   r   )r   r   r   r   r   r!      r6   )r7   r5   Zsource_run_idsZ
dataset_idc                    s   g | ]
} j |jd qS ))r0   )Zread_runr   r   )r/   r0   r   r   r!      s    zprod-baseline-   c                 S   s   i | ]}|j |jqS r   )Zsource_run_idr   )r   er   r   r   
<dictcomp>   r"   z(convert_runs_to_test.<locals>.<dictcomp>r   c                    s    g | ]}t | D ]}|q	qS r   )r-   )r   Zroot_runr   )r   r   r   r!      s    zprod-baseline)whichdataset_version)project_nameZreference_dataset_idmetadataZend_time
start_time)tzr=   r   )
ValueErrorrtget_cached_clientZcreate_datasetZcreate_examplesr   r#   r$   hexlistZlist_examplesZmodified_atZ
created_atZcreate_project	isoformatdatetimenowtimezoneutcZ
create_runZupdate_project)r2   r3   r.   r/   r0   r1   Zdsr5   Zruns_to_copyZexamplesr<   Z	to_createprojectZnew_runZlatency_r   )r/   r0   r   r   convert_runs_to_testF   sP   6
	rM   r=   c           	      C   s   |j | d}tt}g }i }|D ]}|jd ur!||j | n|| |||j< q| D ]\}}t|dd d|| _	q0|S )N)r=   c                 S   s   | j S N)r
   )r   r   r   r   <lambda>   s    z%_load_nested_traces.<locals>.<lambda>)key)
Z	list_runscollectionsdefaultdictrE   r   r)   r   r   sortedr'   )	r=   r/   r2   Ztreemapr*   Zall_runsrunZrun_idr'   r   r   r   _load_nested_traces   s   

rU   TUlist1list2c                 C   s   t t| |S rN   )rE   	itertoolsproduct)rX   rY   r   r   r   _outer_product   s   r\   
   )max_concurrencyr/   
evaluatorsr^   c                C   s   ddl m} g }|D ]#}t|tjr|| q
t|r%|t| q
tdt	| |p3t
 }t| |}||d}|j|jgtt|| R  }	W d   n1 sXw   Y  |	D ]}
q_dS )a  Compute test metrics for a given test name using a list of evaluators.

    Args:
        project_name (str): The name of the test project to evaluate.
        evaluators (list): A list of evaluators to compute metrics with.
        max_concurrency (Optional[int], optional): The maximum number of concurrent
            evaluations. Defaults to 10.
        client (Optional[Client], optional): The client to use for evaluations.
            Defaults to None.

    Returns:
        None: This function does not return any value.
    r   )ContextThreadPoolExecutorz5Evaluation not yet implemented for evaluator of type )max_workersN)	langsmithr`   
isinstancels_evalZRunEvaluatorr)   callableZrun_evaluatorNotImplementedErrortyperB   rC   rU   mapZevaluate_runzipr\   )r=   r_   r^   r/   r`   Zevaluators_funcZtracesexecutorr*   rL   r   r   r   compute_test_metrics   s,   

rl   )'__doc__rQ   rG   rZ   r#   collections.abcr   typingr   r   Zlangsmith.run_treesZ	run_treesrB   Zlangsmith.schemasZschemasZ
ls_schemasrb   r   rd   Z#langsmith._internal._beta_decoratorr   Zlangsmith.clientr   r&   r   ZRunrE   r-   r   boolZTracerSessionrM   rU   rV   rW   tupler\   intrl   r   r   r   r   <module>   sh    k*