o
    TZhm                     @   sB  d Z ddlZddlZddlZddlmZmZmZmZm	Z	m
Z
 ddlZddlZddlmZmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddl m!Z!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z,m-Z- e*e.Z/G dd deZ0dd Z1G dd dZ2G dd de2Z3dS )zMetrics base class.    N)AnyDictListOptionalTupleUnion)BaseFileLockTimeout   )config)Dataset)ArrowReader)ArrowWriter)DownloadConfig)DownloadManager)Features)DatasetInfo
MetricInfo)camelcase_to_snakecase)FileLock)
deprecated)
get_logger)copyfunc	temp_seedc                       s0   e Zd ZdZ fddZdd Zdd Z  ZS )FileFreeLockz-Thread lock until a file **cannot** be lockedc                    s,   t || _t j| jjg|R i | d S N)r   filelocksuper__init__	lock_file)selfr   argskwargs	__class__ F/var/www/html/lang_env/lib/python3.10/site-packages/datasets/metric.pyr   0   s   
"zFileFreeLock.__init__c                 C   sL   z
| j jddd W n ty   | j j| j_Y d S w | j   d | j_d S )Ng{Gz?g{Gz?)timeoutZpoll_intervall)r   acquirer	   r   _contextlock_file_fdreleaser    r%   r%   r&   _acquire4   s   
zFileFreeLock._acquirec                 C   s   d | j _d S r   )r)   r*   r,   r%   r%   r&   _release?   s   zFileFreeLock._release)__name__
__module____qualname____doc__r   r-   r.   __classcell__r%   r%   r#   r&   r   -   s
    r   c                 C   sP   t | tkrt| dkr|  S dd }d|| d d  d|| dd   dS )	N   c                 S   s   d dd | D S )Nz, c                 s   s    | ]}t |V  qd S r   )repr).0xr%   r%   r&   	<genexpr>J       z?summarize_if_long_list.<locals>.format_chunk.<locals>.<genexpr>)join)chunkr%   r%   r&   format_chunkI   s   z,summarize_if_long_list.<locals>.format_chunk[   z, ..., ])typelistlen)objr<   r%   r%   r&   summarize_if_long_listE   s   *rE   c                   @   s  e Zd ZdZdefddZedd Zedefdd	Z	ede
e fd
dZedefddZedefddZedefddZedefddZede
e fddZedefddZede
ee  fddZede
ee  fddZedefddZede
e fddZd S )!MetricInfoMixinu   This base class exposes some attributes of MetricInfo
    at the base level of the Metric for easy access.

    <Deprecated version="2.5.0">

    Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate

    </Deprecated>

    infoc                 C   s
   || _ d S r   _metric_info)r    rG   r%   r%   r&   r   [   s   
zMetricInfoMixin.__init__c                 C   s   | j S )zN:class:`datasets.MetricInfo` object containing all the metadata in the metric.rH   r,   r%   r%   r&   rG   ^   s   zMetricInfoMixin.inforeturnc                 C      | j jS r   )rI   metric_namer,   r%   r%   r&   namec      zMetricInfoMixin.namec                 C   rK   r   )rI   experiment_idr,   r%   r%   r&   rO   g   rN   zMetricInfoMixin.experiment_idc                 C   rK   r   )rI   descriptionr,   r%   r%   r&   rP   k   rN   zMetricInfoMixin.descriptionc                 C   rK   r   )rI   citationr,   r%   r%   r&   rQ   o   rN   zMetricInfoMixin.citationc                 C   rK   r   )rI   featuresr,   r%   r%   r&   rR   s   rN   zMetricInfoMixin.featuresc                 C   rK   r   )rI   inputs_descriptionr,   r%   r%   r&   rS   w   rN   z"MetricInfoMixin.inputs_descriptionc                 C   rK   r   )rI   homepager,   r%   r%   r&   rT   {   rN   zMetricInfoMixin.homepagec                 C   rK   r   )rI   licenser,   r%   r%   r&   rU      rN   zMetricInfoMixin.licensec                 C   rK   r   )rI   codebase_urlsr,   r%   r%   r&   rV      rN   zMetricInfoMixin.codebase_urlsc                 C   rK   r   )rI   reference_urlsr,   r%   r%   r&   rW      rN   zMetricInfoMixin.reference_urlsc                 C   rK   r   )rI   
streamabler,   r%   r%   r&   rX      rN   zMetricInfoMixin.streamablec                 C   rK   r   )rI   formatr,   r%   r%   r&   rY      rN   zMetricInfoMixin.formatN)r/   r0   r1   r2   r   r   propertyrG   strrM   r   rO   rP   rQ   r   rR   rS   rT   rU   r   rV   rW   boolrX   rY   r%   r%   r%   r&   rF   O   s:    
rF   c                   @   sx  e Zd ZdZed									d;d	ee d
edee dededee dee dede	ee
f fddZdd Zdd Zdd Zd<deeef fddZdeee ee f fddZdd  Zd!d" Zd#d$ Zddd%dee fd&d'Zddd%d(d)Zddd*d+d,Zd<d-d.Zdefd/d0Z		d=d1ee d2ee fd3d4Z d5d6 Z!ddd%de"ee#f fd7d8Z$d9d: Z%dS )>Metricu  A Metric is the base class and common API for all metrics.

    <Deprecated version="2.5.0">

    Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate

    </Deprecated>

    Args:
        config_name (``str``): This is used to define a hash specific to a metrics computation script and prevents the metric's data
            to be overridden when the metric loading script is modified.
        keep_in_memory (:obj:`bool`): keep all predictions and references in memory. Not possible in distributed settings.
        cache_dir (``str``): Path to a directory in which temporary prediction/references data will be stored.
            The data directory should be located on a shared file-system in distributed setups.
        num_process (``int``): specify the total number of nodes in a distributed settings.
            This is useful to compute metrics in distributed setups (in particular non-additive metrics like F1).
        process_id (``int``): specify the id of the current process in a distributed setup (between 0 and num_process-1)
            This is useful to compute metrics in distributed setups (in particular non-additive metrics like F1).
        seed (:obj:`int`, optional): If specified, this will temporarily set numpy's random seed when :func:`datasets.Metric.compute` is run.
        experiment_id (``str``): A specific experiment id. This is used if several distributed evaluations share the same file system.
            This is useful to compute metrics in distributed setups (in particular non-additive metrics like F1).
        max_concurrent_cache_files (``int``): Max number of concurrent metrics cache files (default 10000).
        timeout (``Union[int, float]``): Timeout in second for distributed setting synchronization.
    uO   Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluateNFr
   r   '  d   config_namekeep_in_memory	cache_dirnum_process
process_idseedrO   max_concurrent_cache_filesr'   c
                 K   s  |pd| _ |  }t| jj|_| j |_ |pd|_t| | t	|t
r(|dk r,tdt	|t
r5||kr9td|rC|dkrCtd|| _|| _|| _|| _tj|pVtj| _|  | _|d u rytj ^}}}}|dk rs|| n|d | _n|| _|	| _tt| j | | _ tt| j!| | _!tt| j"| | _"| j j# j$| j%j&7  _$| j!j# j$| j%j&7  _$| j"j# j$| j%j&7  _$d | _'d | _(d | _)d | _*d | _+d | _,d | _-d | _.d | _/d S )	NdefaultZdefault_experimentr   z.'process_id' should be a number greater than 0z8'num_process' should be a number greater than process_idr
   zPUsing 'keep_in_memory' is not possible in distributed setting (num_process > 1).ip  )0r`   _infor   r$   r/   rL   rO   rF   r   
isinstanceint
ValueErrorrc   rd   rf   ra   ospath
expanduserr   ZHF_METRICS_CACHE_data_dir_root_build_data_dirdata_dirnprandomZ	get_statere   r'   types
MethodTyper   compute	add_batchadd__func__r2   rG   rS   
buf_writerwriterwriter_batch_sizedatacache_file_namer   rendez_vous_lock
file_paths	filelocks)r    r`   ra   rb   rc   rd   re   rO   rf   r'   r"   rG   _posr%   r%   r&   r      sL   



zMetric.__init__c                 C   s   | j du rdS t| j S )zReturn the number of examples (predictions or predictions/references pair)
        currently stored in the metric's cache.
        Nr   )r{   rC   r,   r%   r%   r&   __len__   s   zMetric.__len__c              	   C   s(   d| j  d| j d| j dt|  d	S )NzMetric(name: "z", features: z, usage: """z""", stored examples: ))rM   rR   rS   rC   r,   r%   r%   r&   __repr__   s   zMetric.__repr__c                 C   s,   | j }tj|| j| j}tj|dd |S )a  Path of this metric in cache_dir:
        Will be:
            self._data_dir_root/self.name/self.config_name/self.hash (if not none)/
        If any of these element is missing or if ``with_version=False`` the corresponding subfolders are dropped.
        T)exist_ok)ro   rl   rm   r:   rM   r`   makedirs)r    Zbuilder_data_dirr%   r%   r&   rp      s   zMetric._build_data_dirrJ   c                 C   s   t j| j| j d| j d| j d}d}t| jD ]^}t	|d }z|j
|d W nI tyt   | jdkrDtd| d| j d	d|| jd krUtd
| j ddtt }t j| j| j d| d| j d| j d}Y qw  ||fS ||fS )zTCreate a new cache file. If the default cache file is used, we generated a new hash.-.arrowN.lockr'   r
   z^Error in _create_cache_file: another metric instance is already using the local cache file at .. Please specify an experiment_id (currently: :) to avoid collision between distributed metric instances.zCannot acquire lock, too many metric instance are operating concurrently on this file system.You should set a larger value of max_concurrent_cache_files when creating the metric (current value is z).)rl   rm   r:   rq   rO   rc   rd   rangerf   r   r(   r	   rk   r[   uuiduuid4)r    r'   	file_pathr   iZ	file_uuidr%   r%   r&   _create_cache_file  s<   (
$zMetric._create_cache_filec              
      s    j dkr jdu rtd jg}n fddt j D }g }t|D ]7\}}|dkr4| j q%t|d }z	|j j	d W n t
yV   td	| d
| ddw || q%||fS )zGet a lock on all the cache files in a distributed setup.
        We wait for timeout second to let all the distributed node finish their tasks (default is 100 seconds).
        r
   Nz|Metric cache file doesn't exist. Please make sure that you call `add` or `add_batch` at least once before calling `compute`.c                    2   g | ]}t j j j d  j d | dqS )r   r   rl   rm   r:   rq   rO   rc   r6   rd   r,   r%   r&   
<listcomp>8      $z/Metric._get_all_cache_files.<locals>.<listcomp>r   r   r   z#Cannot acquire lock on cached file z for process .)rc   r~   rk   r   	enumerateappendr   r   r(   r'   r	   )r    r   r   rd   r   r   r%   r,   r&   _get_all_cache_files,  s2   



zMetric._get_all_cache_filesc              
      sr    fddt  jD }|D ](}t|}z	|j jd W n ty1   td| d j dd w |  qd S )Nc                    r   )r   z.arrow.lockr   r   r,   r%   r&   r   P  r   z5Metric._check_all_processes_locks.<locals>.<listcomp>r   Expected to find locked file  from process  but it doesn't exist.)	r   rc   r   r(   r'   r	   rk   rd   r+   )r    Zexpected_lock_file_namesexpected_lock_file_name
nofilelockr%   r,   r&   _check_all_processes_locksO  s    

z!Metric._check_all_processes_locksc              	   C   s   t j| j| j d| j d}t|}z	|j| jd W n t	y1   t
d| d| j dd w |  t j| j| j d| j d}t|}z	|j| jd W n t	yg   t
d| d| j d	d w |  d S )
Nr   z-0.arrow.lockr   r   r   r   	-rdv.lockzCouldn't acquire lock on r   )rl   rm   r:   rq   rO   rc   r   r(   r'   r	   rk   rd   r+   r   )r    r   r   Zlock_file_namer   r%   r%   r&   _check_rendez_vous_  s(     zMetric._check_rendez_vousc                 C   s   | j dur
| j   d| _ | jdur| jdkr| j  | jr5t| jt| j	dd}t
| j | _dS | jdkrq|  \}}ztdt| j	dd}t
d	i |dd |D | _W n tyh   tddw || _|| _dS dS )
zClose all the writing process and load/gather the data
        from all the nodes if main node or all_process is True.
        Nr   rR   )rm   rG    c                 S   s   g | ]}d |iqS )filenamer%   )r6   fr%   r%   r&   r     s    z$Metric._finalize.<locals>.<listcomp>zError in finalize: another metric instance is already using the local cache file. Please specify an experiment_id to avoid collision between distributed metric instances.r%   )r{   finalizer   rd   r+   ra   r   rq   r   rR   r   Zfrom_bufferrz   getvaluer}   r   Z
read_filesFileNotFoundErrorrk   r   r   )r    readerr   r   r%   r%   r&   	_finalizes  s.   



$
zMetric._finalizepredictions
referencesc                   s  ||d |du r|du r fddj D } | n fddj D }|r8td| dtj   fd	dj D }fd
dD }tdd | D r_jdi |   d_d_	j
dkr܈jjjjd fddj D }tj jdi ||}W d   n1 sw   Y  jdurd_`d_|S tttjjD ]\}	}
td|
  `d_`d_t|
 |	  q|S dS )ai  Compute the metrics.

        Usage of positional arguments is not allowed to prevent mistakes.

        Args:
            predictions (list/array/tensor, optional): Predictions.
            references (list/array/tensor, optional): References.
            **kwargs (optional): Keyword arguments that will be forwarded to the metrics :meth:`_compute`
                method (see details in the docstring).

        Return:
            dict or None

            - Dictionary with the metrics if this metric is run on the main process (``process_id == 0``).
            - None if the metric is not run on the main process (``process_id != 0``).

        Example:

        ```py
        >>> from datasets import load_metric
        >>> metric = load_metric("accuracy")
        >>> accuracy = metric.compute(predictions=model_prediction, references=labels)
        ```
        r   Nc                    s   i | ]	}| vr|d qS r   r%   r6   k
all_kwargsr%   r&   
<dictcomp>      z"Metric.compute.<locals>.<dictcomp>c                    s   g | ]}| vr|qS r%   r%   r   r   r%   r&   r         z"Metric.compute.<locals>.<listcomp>zMetric inputs are missing: . All required inputs are c                       i | ]}| | qS r%   r%   r6   Z
input_namer   r%   r&   r         c                    s    i | ]}|j vr| | qS r%   r   r   )r"   r    r%   r&   r     s     c                 s   s    | ]}|d uV  qd S r   r%   )r6   vr%   r%   r&   r8     r9   z!Metric.compute.<locals>.<genexpr>r   )rA   c                    s   i | ]}| j | qS r%   )r}   r   r,   r%   r&   r     r   z	Removing r%   )rR   updaterk   rB   anyvaluesrw   r   r~   r   rd   r}   Z
set_formatrG   rY   r   re   _computerz   reversedzipr   r   loggerr{   rl   remover+   )r    r   r   r"   Zmissing_kwargsZmissing_inputsZinputsZcompute_kwargsoutputr   r   r%   )r   r"   r    r&   rv     sJ   



zMetric.computec                   sn  fdd|D }|rt d| dtj ||d|  fddjD  jj  jdu r9  z	j  W dS  tj	y   t
 fd	d
 D rtt  fdd D d }d dt   d| dt |  d	}n2tjddgkrdj d}d fdd
jD }||7 }ndj dt| dt| }t |dw )a  Add a batch of predictions and references for the metric's stack.

        Args:
            predictions (list/array/tensor, optional): Predictions.
            references (list/array/tensor, optional): References.

        Example:

        ```py
        >>> from datasets import load_metric
        >>> metric = load_metric("accuracy")
        >>> metric.add_batch(predictions=model_prediction, references=labels)
        ```
        c                       g | ]	}| j vr|qS r%   r   r   r,   r%   r&   r     r   z$Metric.add_batch.<locals>.<listcomp>Bad inputs for metric: r   r   c                    r   r%   r%   r6   Zintput_namebatchr%   r&   r     r   z$Metric.add_batch.<locals>.<dictcomp>Nc                 3   s0    | ]}t  | t tt  kV  qd S r   )rC   nextiterr   r6   cr   r%   r&   r8     s   . z#Metric.add_batch.<locals>.<genexpr>c                    s(   g | ]}t  | t   kr|qS r%   )rC   r   )r   col0r%   r&   r     s   ( r   zMismatch in the number of z (z) and r   r   r   @Metric inputs don't match the expected format.
Expected format: ,
c                 3   (    | ]}d | dt  |  V  qdS zInput z: NrE   r   r   r%   r&   r8         
zPPredictions and/or references don't match the expected format.
Expected format: z,
Input predictions: z,
Input references: )rk   rB   rR   rG   Zencode_batchr{   _init_writerZwrite_batchpaArrowInvalidr   r   r   rC   sortedr:   rE   )r    r   r   r"   
bad_inputsZbad_col	error_msgerror_msg_inputsr%   )r   r   r    r&   rw     s@   
,

zMetric.add_batch)
prediction	referencec                   s   fdd|D }|rt d| dtj ||d|  fddjD  jj  jdu r9  z	j  W dS  tj	yg   d	j d
}d

 fddjD }||7 }t |dw )a  Add one prediction and reference for the metric's stack.

        Args:
            prediction (list/array/tensor, optional): Predictions.
            reference (list/array/tensor, optional): References.

        Example:

        ```py
        >>> from datasets import load_metric
        >>> metric = load_metric("accuracy")
        >>> metric.add(predictions=model_predictions, references=labels)
        ```
        c                    r   r%   r   r   r,   r%   r&   r     r   zMetric.add.<locals>.<listcomp>r   r   r   c                    r   r%   r%   r   exampler%   r&   r     r   zMetric.add.<locals>.<dictcomp>Nr   r   c                 3   r   r   r   r   r   r%   r&   r8   &  r   zMetric.add.<locals>.<genexpr>)rk   rB   rR   rG   Zencode_exampler{   r   writer   r   r:   )r    r   r   r"   r   r   r   r%   )r   r    r&   rx     s$   

z
Metric.addc              	   C   s(  | j dkr=| jdkr=tj| j| j d| j  d}t|| _z	| jj	|d W n t
y<   td| d| j dd w | jrRt | _t| jj| j| jd	| _n%d | _| jd u s_| jd u rk|  \}}|| _|| _t| jj| j| jd
| _| j dkr| jdkr|   | j  d S |   d S d S )Nr
   r   r   r   r   zXError in _init_writer: another metric instance is already using the local cache file at r   r   )rR   streamr|   )rR   rm   r|   )rc   rd   rl   rm   r:   rq   rO   r   r   r(   TimeoutErrorrk   ra   r   ZBufferOutputStreamrz   r   rG   rR   r|   r{   r~   r   r   r   r+   r   )r    r'   r   r~   r   r%   r%   r&   r   ,  sD   

 




zMetric._init_writerc                 C      t )a  Construct the MetricInfo object. See `MetricInfo` for details.

        Warning: This function is only called once and the result is cached for all
        following .info() calls.

        Returns:
            info: (MetricInfo) The metrics information
        NotImplementedErrorr,   r%   r%   r&   rh   S     	zMetric._infodownload_config
dl_managerc                 C   sN   |du r |du rt  }tj| jd|_d|_t| j|| jd}| 	| dS )a  Downloads and prepares dataset for reading.

        Args:
            download_config (:class:`DownloadConfig`, optional): Specific download configuration parameters.
            dl_manager (:class:`DownloadManager`, optional): Specific download manager to use.
        NZ	downloadsF)Zdataset_namer   rq   )
r   rl   rm   r:   rq   rb   Zforce_downloadr   rM   _download_and_prepare)r    r   r   r%   r%   r&   download_and_prepare^  s   
zMetric.download_and_preparec                 C   s   dS )aY  Downloads and prepares resources for the metric.

        This is the internal implementation to overwrite called when user calls
        `download_and_prepare`. It should download all required resources for the metric.

        Args:
            dl_manager (:class:`DownloadManager`): `DownloadManager` used to download and cache data.
        Nr%   )r    r   r%   r%   r&   r   u  r   zMetric._download_and_preparec                K   r   )zEThis method defines the common API for all the metrics in the libraryr   )r    r   r   r"   r%   r%   r&   r     s   zMetric._computec                 C   s`   t | dr| jd ur| j  t | dr| jd ur| j  t | dr%| `t | dr.| `d S d S )Nr   r   r{   r}   )hasattrr   r+   r   r{   r}   r,   r%   r%   r&   __del__  s   



zMetric.__del__)	NFNr
   r   NNr^   r_   )r
   )NN)&r/   r0   r1   r2   r   r   r[   r\   rj   r   floatr   r   r   rp   r   r   r   r   r   r   r   r   dictrv   rw   rx   r   r   rh   r   r   r   r   r   r   r   r   r%   r%   r%   r&   r]      sl    	

D!#"G/
!'
r]   )4r2   rl   rt   r   typingr   r   r   r   r   r   numpyrr   Zpyarrowr   r   r   r	   r   r   Zarrow_datasetr   Zarrow_readerr   Zarrow_writerr   Zdownload.download_configr   Zdownload.download_managerr   rR   r   rG   r   r   Znamingr   Zutils._filelockr   Zutils.deprecation_utilsr   Zutils.loggingr   Zutils.py_utilsr   r   r/   r   r   rE   rF   r]   r%   r%   r%   r&   <module>   s4    
E