o
    +ifm                     @   sB  d Z ddlZddlZddlZddlmZmZmZmZm	Z	m
Z
 ddlZddlZddlmZmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddl m!Z!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z,m-Z- e*e.Z/G dd deZ0dd Z1G dd dZ2G dd de2Z3dS )zMetrics base class.    N)AnyDictListOptionalTupleUnion)BaseFileLockTimeout   )config)Dataset)ArrowReader)ArrowWriter)DownloadConfig)DownloadManager)Features)DatasetInfo
MetricInfo)camelcase_to_snakecase)FileLock)
deprecated)
get_logger)copyfunc	temp_seedc                       s0   e Zd ZdZ fddZdd Zdd Z  ZS )FileFreeLockz-Thread lock until a file **cannot** be lockedc                    s,   t || _t j| jjg|R i | d S N)r   filelocksuper__init__	lock_file)selfr   argskwargs	__class__ H/var/www/html/corbot_env/lib/python3.10/site-packages/datasets/metric.pyr   0   s   
"zFileFreeLock.__init__c                 C   sL   z
| j jddd W n ty   | j j| j_Y d S w | j   d | j_d S )Ng{Gz?g{Gz?)timeoutpoll_intervall)r   acquirer	   r   _contextlock_file_fdreleaser    r%   r%   r&   _acquire4   s   
zFileFreeLock._acquirec                 C   s   d | j _d S r   )r*   r+   r-   r%   r%   r&   _release?   s   zFileFreeLock._release)__name__
__module____qualname____doc__r   r.   r/   __classcell__r%   r%   r#   r&   r   -   s
    r   c                 C   sP   t | tkrt| dkr|  S dd }d|| d d  d|| dd   dS )	N   c                 S   s   d dd | D S )Nz, c                 s   s    | ]}t |V  qd S r   )repr).0xr%   r%   r&   	<genexpr>J       z?summarize_if_long_list.<locals>.format_chunk.<locals>.<genexpr>)join)chunkr%   r%   r&   format_chunkI   s   z,summarize_if_long_list.<locals>.format_chunk[   z, ..., ])typelistlen)objr=   r%   r%   r&   summarize_if_long_listE   s   *rF   c                   @   s  e Zd ZdZdefddZedd Zedefdd	Z	ede
e fd
dZedefddZedefddZedefddZedefddZede
e fddZedefddZede
ee  fddZede
ee  fddZedefddZede
e fddZd S )!MetricInfoMixinu   This base class exposes some attributes of MetricInfo
    at the base level of the Metric for easy access.

    <Deprecated version="2.5.0">

    Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate

    </Deprecated>

    infoc                 C   s
   || _ d S r   _metric_info)r    rH   r%   r%   r&   r   [   s   
zMetricInfoMixin.__init__c                 C   s   | j S )zN:class:`datasets.MetricInfo` object containing all the metadata in the metric.rI   r-   r%   r%   r&   rH   ^   s   zMetricInfoMixin.inforeturnc                 C      | j jS r   )rJ   metric_namer-   r%   r%   r&   namec      zMetricInfoMixin.namec                 C   rL   r   )rJ   experiment_idr-   r%   r%   r&   rP   g   rO   zMetricInfoMixin.experiment_idc                 C   rL   r   )rJ   descriptionr-   r%   r%   r&   rQ   k   rO   zMetricInfoMixin.descriptionc                 C   rL   r   )rJ   citationr-   r%   r%   r&   rR   o   rO   zMetricInfoMixin.citationc                 C   rL   r   )rJ   featuresr-   r%   r%   r&   rS   s   rO   zMetricInfoMixin.featuresc                 C   rL   r   )rJ   inputs_descriptionr-   r%   r%   r&   rT   w   rO   z"MetricInfoMixin.inputs_descriptionc                 C   rL   r   )rJ   homepager-   r%   r%   r&   rU   {   rO   zMetricInfoMixin.homepagec                 C   rL   r   )rJ   licenser-   r%   r%   r&   rV      rO   zMetricInfoMixin.licensec                 C   rL   r   )rJ   codebase_urlsr-   r%   r%   r&   rW      rO   zMetricInfoMixin.codebase_urlsc                 C   rL   r   )rJ   reference_urlsr-   r%   r%   r&   rX      rO   zMetricInfoMixin.reference_urlsc                 C   rL   r   )rJ   
streamabler-   r%   r%   r&   rY      rO   zMetricInfoMixin.streamablec                 C   rL   r   )rJ   formatr-   r%   r%   r&   rZ      rO   zMetricInfoMixin.formatN)r0   r1   r2   r3   r   r   propertyrH   strrN   r   rP   rQ   rR   r   rS   rT   rU   rV   r   rW   rX   boolrY   rZ   r%   r%   r%   r&   rG   O   s:    
rG   c                   @   sx  e Zd ZdZed									d;d	ee d
edee dededee dee dede	ee
f fddZdd Zdd Zdd Zd<deeef fddZdeee ee f fddZdd  Zd!d" Zd#d$ Zddd%dee fd&d'Zddd%d(d)Zddd*d+d,Zd<d-d.Zdefd/d0Z		d=d1ee d2ee fd3d4Z d5d6 Z!ddd%de"ee#f fd7d8Z$d9d: Z%dS )>Metricu  A Metric is the base class and common API for all metrics.

    <Deprecated version="2.5.0">

    Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate

    </Deprecated>

    Args:
        config_name (``str``): This is used to define a hash specific to a metrics computation script and prevents the metric's data
            to be overridden when the metric loading script is modified.
        keep_in_memory (:obj:`bool`): keep all predictions and references in memory. Not possible in distributed settings.
        cache_dir (``str``): Path to a directory in which temporary prediction/references data will be stored.
            The data directory should be located on a shared file-system in distributed setups.
        num_process (``int``): specify the total number of nodes in a distributed settings.
            This is useful to compute metrics in distributed setups (in particular non-additive metrics like F1).
        process_id (``int``): specify the id of the current process in a distributed setup (between 0 and num_process-1)
            This is useful to compute metrics in distributed setups (in particular non-additive metrics like F1).
        seed (:obj:`int`, optional): If specified, this will temporarily set numpy's random seed when :func:`datasets.Metric.compute` is run.
        experiment_id (``str``): A specific experiment id. This is used if several distributed evaluations share the same file system.
            This is useful to compute metrics in distributed setups (in particular non-additive metrics like F1).
        max_concurrent_cache_files (``int``): Max number of concurrent metrics cache files (default 10000).
        timeout (``Union[int, float]``): Timeout in second for distributed setting synchronization.
    uO   Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluateNFr
   r   '  d   config_namekeep_in_memory	cache_dirnum_process
process_idseedrP   max_concurrent_cache_filesr'   c
                 K   s  |pd| _ |  }t| jj|_| j |_ |pd|_t| | t	|t
r(|dk r,tdt	|t
r5||kr9td|rC|dkrCtd|| _|| _|| _|| _tj|pVtj| _|  | _|d u rytj ^}}}}|dk rs|| n|d | _n|| _|	| _tt| j | | _ tt| j!| | _!tt| j"| | _"| j j# j$| j%j&7  _$| j!j# j$| j%j&7  _$| j"j# j$| j%j&7  _$d | _'d | _(d | _)d | _*d | _+d | _,d | _-d | _.d | _/d S )	Ndefaultdefault_experimentr   z.'process_id' should be a number greater than 0z8'num_process' should be a number greater than process_idr
   zPUsing 'keep_in_memory' is not possible in distributed setting (num_process > 1).ip  )0ra   _infor   r$   r0   rM   rP   rG   r   
isinstanceint
ValueErrorrd   re   rg   rb   ospath
expanduserr   HF_METRICS_CACHE_data_dir_root_build_data_dirdata_dirnprandom	get_staterf   r'   types
MethodTyper   compute	add_batchadd__func__r3   rH   rT   
buf_writerwriterwriter_batch_sizedatacache_file_namer   rendez_vous_lock
file_paths	filelocks)r    ra   rb   rc   rd   re   rf   rP   rg   r'   r"   rH   _posr%   r%   r&   r      sL   



zMetric.__init__c                 C   s   | j du rdS t| j S )zReturn the number of examples (predictions or predictions/references pair)
        currently stored in the metric's cache.
        Nr   )r   rD   r-   r%   r%   r&   __len__   s   zMetric.__len__c              	   C   s(   d| j  d| j d| j dt|  d	S )NzMetric(name: "z", features: z, usage: """z""", stored examples: ))rN   rS   rT   rD   r-   r%   r%   r&   __repr__   s   zMetric.__repr__c                 C   s,   | j }tj|| j| j}tj|dd |S )a  Path of this metric in cache_dir:
        Will be:
            self._data_dir_root/self.name/self.config_name/self.hash (if not none)/
        If any of these element is missing or if ``with_version=False`` the corresponding subfolders are dropped.
        T)exist_ok)rr   rn   ro   r;   rN   ra   makedirs)r    builder_data_dirr%   r%   r&   rs      s   zMetric._build_data_dirrK   c                 C   s   t j| j| j d| j d| j d}d}t| jD ]^}t	|d }z|j
|d W nI tyt   | jdkrDtd| d| j d	d|| jd krUtd
| j ddtt }t j| j| j d| d| j d| j d}Y qw  ||fS ||fS )zTCreate a new cache file. If the default cache file is used, we generated a new hash.-.arrowN.lockr'   r
   z^Error in _create_cache_file: another metric instance is already using the local cache file at .. Please specify an experiment_id (currently: :) to avoid collision between distributed metric instances.zCannot acquire lock, too many metric instance are operating concurrently on this file system.You should set a larger value of max_concurrent_cache_files when creating the metric (current value is z).)rn   ro   r;   rt   rP   rd   re   rangerg   r   r)   r	   rm   r\   uuiduuid4)r    r'   	file_pathr   i	file_uuidr%   r%   r&   _create_cache_file  s<   (
$zMetric._create_cache_filec              
      s    j dkr jdu rtd jg}n fddt j D }g }t|D ]7\}}|dkr4| j q%t|d }z	|j j	d W n t
yV   td	| d
| ddw || q%||fS )zGet a lock on all the cache files in a distributed setup.
        We wait for timeout second to let all the distributed node finish their tasks (default is 100 seconds).
        r
   Nz|Metric cache file doesn't exist. Please make sure that you call `add` or `add_batch` at least once before calling `compute`.c                    2   g | ]}t j j j d  j d | dqS )r   r   rn   ro   r;   rt   rP   rd   r7   re   r-   r%   r&   
<listcomp>8      $z/Metric._get_all_cache_files.<locals>.<listcomp>r   r   r   z#Cannot acquire lock on cached file z for process .)rd   r   rm   r   	enumerateappendr   r   r)   r'   r	   )r    r   r   re   r   r   r%   r-   r&   _get_all_cache_files,  s2   



zMetric._get_all_cache_filesc              
      sr    fddt  jD }|D ](}t|}z	|j jd W n ty1   td| d j dd w |  qd S )Nc                    r   )r   z.arrow.lockr   r   r-   r%   r&   r   P  r   z5Metric._check_all_processes_locks.<locals>.<listcomp>r   Expected to find locked file  from process  but it doesn't exist.)	r   rd   r   r)   r'   r	   rm   re   r,   )r    expected_lock_file_namesexpected_lock_file_name
nofilelockr%   r-   r&   _check_all_processes_locksO  s    

z!Metric._check_all_processes_locksc              	   C   s   t j| j| j d| j d}t|}z	|j| jd W n t	y1   t
d| d| j dd w |  t j| j| j d| j d}t|}z	|j| jd W n t	yg   t
d| d| j d	d w |  d S )
Nr   z-0.arrow.lockr   r   r   r   	-rdv.lockzCouldn't acquire lock on r   )rn   ro   r;   rt   rP   rd   r   r)   r'   r	   rm   re   r,   r   )r    r   r   lock_file_namer   r%   r%   r&   _check_rendez_vous_  s(     zMetric._check_rendez_vousc                 C   s   | j dur
| j   d| _ | jdur| jdkr| j  | jr5t| jt| j	dd}t
| j | _dS | jdkrq|  \}}ztdt| j	dd}t
d	i |dd |D | _W n tyh   tddw || _|| _dS dS )
zClose all the writing process and load/gather the data
        from all the nodes if main node or all_process is True.
        Nr   rS   )ro   rH    c                 S   s   g | ]}d |iqS )filenamer%   )r7   fr%   r%   r&   r     s    z$Metric._finalize.<locals>.<listcomp>zError in finalize: another metric instance is already using the local cache file. Please specify an experiment_id to avoid collision between distributed metric instances.r%   )r   finalizer   re   r,   rb   r   rt   r   rS   r   from_bufferr~   getvaluer   r   
read_filesFileNotFoundErrorrm   r   r   )r    readerr   r   r%   r%   r&   	_finalizes  s.   



$
zMetric._finalizepredictions
referencesc                   s  ||d |du r|du r fddj D } | n fddj D }|r8td| dtj   fd	dj D }fd
dD }tdd | D r_jdi |   d_d_	j
dkr܈jjjjd fddj D }tj jdi ||}W d   n1 sw   Y  jdurd_`d_|S tttjjD ]\}	}
td|
  `d_`d_t|
 |	  q|S dS )ai  Compute the metrics.

        Usage of positional arguments is not allowed to prevent mistakes.

        Args:
            predictions (list/array/tensor, optional): Predictions.
            references (list/array/tensor, optional): References.
            **kwargs (optional): Keyword arguments that will be forwarded to the metrics :meth:`_compute`
                method (see details in the docstring).

        Return:
            dict or None

            - Dictionary with the metrics if this metric is run on the main process (``process_id == 0``).
            - None if the metric is not run on the main process (``process_id != 0``).

        Example:

        ```py
        >>> from datasets import load_metric
        >>> metric = load_metric("accuracy")
        >>> accuracy = metric.compute(predictions=model_prediction, references=labels)
        ```
        r   Nc                    s   i | ]	}| vr|d qS r   r%   r7   k
all_kwargsr%   r&   
<dictcomp>      z"Metric.compute.<locals>.<dictcomp>c                    s   g | ]}| vr|qS r%   r%   r   r   r%   r&   r         z"Metric.compute.<locals>.<listcomp>zMetric inputs are missing: . All required inputs are c                       i | ]}| | qS r%   r%   r7   
input_namer   r%   r&   r         c                    s    i | ]}|j vr| | qS r%   r   r   )r"   r    r%   r&   r     s     c                 s   s    | ]}|d uV  qd S r   r%   )r7   vr%   r%   r&   r9     r:   z!Metric.compute.<locals>.<genexpr>r   )rB   c                    s   i | ]}| j | qS r%   )r   r   r-   r%   r&   r     r   z	Removing r%   )rS   updaterm   rC   anyvaluesr{   r   r   r   re   r   
set_formatrH   rZ   r   rf   _computer~   reversedzipr   r   loggerr   rn   remover,   )r    r   r   r"   missing_kwargsmissing_inputsinputscompute_kwargsoutputr   r   r%   )r   r"   r    r&   rz     sJ   



zMetric.computec                   sn  fdd|D }|rt d| dtj ||d|  fddjD  jj  jdu r9  z	j  W dS  tj	y   t
 fd	d
 D rtt  fdd D d }d dt   d| dt |  d	}n2tjddgkrdj d}d fdd
jD }||7 }ndj dt| dt| }t |dw )a  Add a batch of predictions and references for the metric's stack.

        Args:
            predictions (list/array/tensor, optional): Predictions.
            references (list/array/tensor, optional): References.

        Example:

        ```py
        >>> from datasets import load_metric
        >>> metric = load_metric("accuracy")
        >>> metric.add_batch(predictions=model_prediction, references=labels)
        ```
        c                       g | ]	}| j vr|qS r%   r   r   r-   r%   r&   r     r   z$Metric.add_batch.<locals>.<listcomp>Bad inputs for metric: r   r   c                    r   r%   r%   r7   intput_namebatchr%   r&   r     r   z$Metric.add_batch.<locals>.<dictcomp>Nc                 3   s0    | ]}t  | t tt  kV  qd S r   )rD   nextiterr   r7   cr   r%   r&   r9     s   . z#Metric.add_batch.<locals>.<genexpr>c                    s(   g | ]}t  | t   kr|qS r%   )rD   r   )r   col0r%   r&   r     s   ( r   zMismatch in the number of z (z) and r   r   r   @Metric inputs don't match the expected format.
Expected format: ,
c                 3   (    | ]}d | dt  |  V  qdS zInput z: NrF   r   r   r%   r&   r9         
zPPredictions and/or references don't match the expected format.
Expected format: z,
Input predictions: z,
Input references: )rm   rC   rS   rH   encode_batchr   _init_writerwrite_batchpaArrowInvalidr   r   r   rD   sortedr;   rF   )r    r   r   r"   
bad_inputsbad_col	error_msgerror_msg_inputsr%   )r   r   r    r&   r{     s@   
,

zMetric.add_batch)
prediction	referencec                   s   fdd|D }|rt d| dtj ||d|  fddjD  jj  jdu r9  z	j  W dS  tj	yg   d	j d
}d

 fddjD }||7 }t |dw )a  Add one prediction and reference for the metric's stack.

        Args:
            prediction (list/array/tensor, optional): Predictions.
            reference (list/array/tensor, optional): References.

        Example:

        ```py
        >>> from datasets import load_metric
        >>> metric = load_metric("accuracy")
        >>> metric.add(predictions=model_predictions, references=labels)
        ```
        c                    r   r%   r   r   r-   r%   r&   r     r   zMetric.add.<locals>.<listcomp>r   r   r   c                    r   r%   r%   r   exampler%   r&   r     r   zMetric.add.<locals>.<dictcomp>Nr   r   c                 3   r   r   r   r   r   r%   r&   r9   &  r   zMetric.add.<locals>.<genexpr>)rm   rC   rS   rH   encode_exampler   r   writer   r   r;   )r    r   r   r"   r   r   r   r%   )r   r    r&   r|     s$   

z
Metric.addc              	   C   s(  | j dkr=| jdkr=tj| j| j d| j  d}t|| _z	| jj	|d W n t
y<   td| d| j dd w | jrRt | _t| jj| j| jd	| _n%d | _| jd u s_| jd u rk|  \}}|| _|| _t| jj| j| jd
| _| j dkr| jdkr|   | j  d S |   d S d S )Nr
   r   r   r   r   zXError in _init_writer: another metric instance is already using the local cache file at r   r   )rS   streamr   )rS   ro   r   )rd   re   rn   ro   r;   rt   rP   r   r   r)   TimeoutErrorrm   rb   r   BufferOutputStreamr~   r   rH   rS   r   r   r   r   r   r   r,   r   )r    r'   r   r   r   r%   r%   r&   r   ,  sD   

 




zMetric._init_writerc                 C      t )a  Construct the MetricInfo object. See `MetricInfo` for details.

        Warning: This function is only called once and the result is cached for all
        following .info() calls.

        Returns:
            info: (MetricInfo) The metrics information
        NotImplementedErrorr-   r%   r%   r&   rj   S     	zMetric._infodownload_config
dl_managerc                 C   sN   |du r |du rt  }tj| jd|_d|_t| j|| jd}| 	| dS )a  Downloads and prepares dataset for reading.

        Args:
            download_config (:class:`DownloadConfig`, optional): Specific download configuration parameters.
            dl_manager (:class:`DownloadManager`, optional): Specific download manager to use.
        N	downloadsF)dataset_namer   rt   )
r   rn   ro   r;   rt   rc   force_downloadr   rN   _download_and_prepare)r    r   r   r%   r%   r&   download_and_prepare^  s   
zMetric.download_and_preparec                 C   s   dS )aY  Downloads and prepares resources for the metric.

        This is the internal implementation to overwrite called when user calls
        `download_and_prepare`. It should download all required resources for the metric.

        Args:
            dl_manager (:class:`DownloadManager`): `DownloadManager` used to download and cache data.
        Nr%   )r    r   r%   r%   r&   r  u  r   zMetric._download_and_preparec                K   r   )zEThis method defines the common API for all the metrics in the libraryr   )r    r   r   r"   r%   r%   r&   r     s   zMetric._computec                 C   s`   t | dr| jd ur| j  t | dr| jd ur| j  t | dr%| `t | dr.| `d S d S )Nr   r   r   r   )hasattrr   r,   r   r   r   r-   r%   r%   r&   __del__  s   



zMetric.__del__)	NFNr
   r   NNr_   r`   )r
   )NN)&r0   r1   r2   r3   r   r   r\   r]   rl   r   floatr   r   r   rs   r   r   r   r   r   r   r   r   dictrz   r{   r|   r   r   rj   r   r   r  r  r   r   r   r  r%   r%   r%   r&   r^      sl    	

D!#"G/
!'
r^   )4r3   rn   rx   r   typingr   r   r   r   r   r   numpyru   pyarrowr   r   r   r	   r   r   arrow_datasetr   arrow_readerr   arrow_writerr   download.download_configr   download.download_managerr   rS   r   rH   r   r   namingr   utils._filelockr   utils.deprecation_utilsr   utils.loggingr   utils.py_utilsr   r   r0   r   r   rF   rG   r^   r%   r%   r%   r&   <module>   s4    
E