o
    TZh                     @   s,  d dl Z d dlZd dlmZ d dlmZ ddlmZ ddlm	Z	 e	e
ZG dd	 d	e jZG d
d deZG dd deZG dd deZG dd deZd*dee defddZG dd deZG dd deZG dd deZG dd deZdee defd d!Zd+d#ed$ed%efd&d'Zd(d) ZdS ),    N)Optional)insecure_hashlib   )config   )
get_loggerc                   @   s   e Zd ZdZdZdZdZdS )VerificationModea  `Enum` that specifies which verification checks to run.

    The default mode is `BASIC_CHECKS`, which will perform only rudimentary checks to avoid slowdowns
    when generating/downloading a dataset for the first time.

    The verification modes:

    |                           | Verification checks                                                           |
    |---------------------------|------------------------------------------------------------------------------ |
    | `ALL_CHECKS`              | Split checks, uniqueness of the keys yielded in case of the GeneratorBuilder  |
    |                           | and the validity (number of files, checksums, etc.) of downloaded files       |
    | `BASIC_CHECKS` (default)  | Same as `ALL_CHECKS` but without checking downloaded files                    |
    | `NO_CHECKS`               | None                                                                          |

    Z
all_checksZbasic_checksZ	no_checksN)__name__
__module____qualname____doc__Z
ALL_CHECKSZBASIC_CHECKSZ	NO_CHECKS r   r   P/var/www/html/lang_env/lib/python3.10/site-packages/datasets/utils/info_utils.pyr      s
    r   c                   @      e Zd ZdZdS )ChecksumVerificationExceptionz>Exceptions during checksums verifications of downloaded files.Nr	   r
   r   r   r   r   r   r   r   $       r   c                   @   r   )UnexpectedDownloadedFilez(Some downloaded files were not expected.Nr   r   r   r   r   r   (   r   r   c                   @   r   )ExpectedMoreDownloadedFilesz7Some files were supposed to be downloaded but were not.Nr   r   r   r   r   r   ,   r   r   c                   @   r   )NonMatchingChecksumErrorz?The downloaded file checksum don't match the expected checksum.Nr   r   r   r   r   r   0   r   r   expected_checksumsrecorded_checksumsc                    s    d u rt d d S tt t dkr#ttt t ttt  dkr;tttt   fdd D }|d urMd| nd}t|dkr`td| d| d	t d
|  d S )NzUnable to verify checksums.r   c                    s    g | ]} | | kr|qS r   r   ).0urlr   r   r   r   
<listcomp><   s     z$verify_checksums.<locals>.<listcomp>z for  zChecksums didn't matchz:
zY
Set `verification_mode='no_checks'` to skip checksums verification and ignore this errorz&All the checksums matched successfully)loggerinfolensetr   strr   r   )r   r   Zverification_nameZbad_urlsZfor_verification_namer   r   r   verify_checksums4   s    
r"   c                   @   r   )SplitsVerificationExceptionz%Exceptions during splis verificationsNr   r   r   r   r   r#   G   r   r#   c                   @   r   )UnexpectedSplitsz6The expected splits of the downloaded file is missing.Nr   r   r   r   r   r$   K   r   r$   c                   @   r   )ExpectedMoreSplitsz!Some recorded splits are missing.Nr   r   r   r   r   r%   O   r   r%   c                   @   r   )NonMatchingSplitsSizesErrorz7The splits sizes don't match the expected splits sizes.Nr   r   r   r   r   r&   S   r   r&   expected_splitsrecorded_splitsc                    s    d u rt d d S tt t dkr#ttt t ttt  dkr;tttt   fdd D }t|dkrQtt|t d d S )NzUnable to verify splits sizes.r   c                    s2   g | ]} | j | j kr | | d qS ))expectedZrecorded)Znum_examples)r   namer'   r(   r   r   r   _   s
    z!verify_splits.<locals>.<listcomp>z$All the splits matched successfully.)r   r   r   r    r%   r!   r$   r&   )r'   r(   Z
bad_splitsr   r+   r   verify_splitsW   s   
r,   Tpathrecord_checksumreturnc                    sx   |r1t  }t| d t fdddD ]}|| q| }W d   n1 s+w   Y  nd}tj| |dS )z7Compute the file size and the sha256 checksum of a filerbc                      s
     dS )Ni   )readr   fr   r   <lambda>n   s   
 z(get_size_checksum_dict.<locals>.<lambda>    N)	num_byteschecksum)	r   sha256openiterupdate	hexdigestosr-   getsize)r-   r.   mchunkr7   r   r2   r   get_size_checksum_dicti   s   
rA   c                 C   s   | r
t jr
| t jk S dS )zCheck if `dataset_size` is smaller than `config.IN_MEMORY_MAX_SIZE`.

    Args:
        dataset_size (int): Dataset size in bytes.

    Returns:
        bool: Whether `dataset_size` is smaller than `config.IN_MEMORY_MAX_SIZE`.
    F)r   ZIN_MEMORY_MAX_SIZE)Zdataset_sizer   r   r   is_small_datasetv   s   
	
rB   )N)T)enumr=   typingr   Zhuggingface_hub.utilsr   r   r   loggingr   r	   r   Enumr   	Exceptionr   r   r   r   dictr"   r#   r$   r%   r&   r,   r!   boolrA   rB   r   r   r   r   <module>   s(    