o
    TZh                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
mZ d dlZd dlZd dlZd dlZd dlmZ ejjeZdd Zdede	e d	e	e d
e
eeef fddZG dd dejZdS )    N)Path)ListOptionalTupleUnion)filenames_for_dataset_splitc                 C   s   t |  jS N)r   statst_mtime)cached_directory_path r   \/var/www/html/lang_env/lib/python3.10/site-packages/datasets/packaged_modules/cache/cache.py_get_modification_time   s   r   dataset_nameconfig_name	cache_dirreturnc              
   C   s  t jt|p	tjj}t j|| dd}dd t		t j||p#dddD }|sf|d urAdd t		t j|dddD }t
dd |D }td	|  |rWd
| dnd |rbd|  d tt
|tdd }|jdd  \}}dd t		t j|d||D }	|st|	dkrtd|  dd|	 d|  d|	d  d	|jd }d| d| dtt| d}
t|
 |||fS )N/___c                 S      g | ]
}t j|r|qS r   ospathisdir.0r   r   r   r   
<listcomp>       
z'_find_hash_in_cache.<locals>.<listcomp>*c                 S   r   r   r   r   r   r   r   r   &   r   c                 S   s   h | ]	}t |jd  qS )r   partsr   r   r   r   	<setcomp>.   s    z&_find_hash_in_cache.<locals>.<setcomp>zCouldn't find cache for z for config '' z!
Available configs in the cache: )keyc                 S   s&   g | ]}t j|rt|jd  qS r   )r   r   r   r   r!   r   r   r   r   r   8   s    
   zThere are multiple 'z' configurations in the cache: z, zR
Please specify which configuration to reload from the cache, e.g.
	load_dataset('z', 'r   z')r    z/Found the latest cached dataset configuration 'z' at z (last modified on z).)r   r   
expanduserstrdatasetsconfigZHF_DATASETS_CACHEjoinreplaceglobsorted
ValueErrorr   r   r!   lentimectimeloggerwarning)r   r   r   Z#cached_datasets_directory_path_rootZcached_directory_pathsZavailable_configsr   versionhashZother_configsZwarning_msgr   r   r   _find_hash_in_cache   s^   


r9   c                       s   e Zd Z																ddee dee dee dee dee d	ee d
eej deej deee	ef  dee deeee
eejjf  dee dee dee f fddZdejfddZddee fddZdd Zdd Z  ZS ) CacheN0.0.0
deprecatedr   r   r   r7   r8   	base_pathinfofeaturestokenrepo_id
data_filesdata_dirstorage_optionswriter_batch_sizec                    s   |
dkrt d|
 dt |
}	|dkrt jdtd |}|d u r)|d u r)td|d ur1||d< |d ur9||d< |d	krZ|d	krZ| |pFd
j||d}t|pQ|||d\}}}n|d	ksb|d	krftdt j	||||||||	|||d d S )Nr<   z'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.
You can remove this warning by passing 'token=z
' instead.z\Parameter 'name' was renamed to 'config_name' in version 2.3.0 and will be removed in 3.0.0.)categoryzArepo_id or dataset_name is required for the Cache dataset builderrB   rC   autodefault)config_kwargsZcustom_features)r   r   r   z0Pass both hash='auto' and version='auto' instead)r   r   r   r7   r8   r=   r>   r@   rA   rD   rE   )
warningswarnFutureWarningr1   ZBUILDER_CONFIG_CLASSZcreate_config_idr9   NotImplementedErrorsuper__init__)selfr   r   r   r7   r8   r=   r>   r?   r@   Zuse_auth_tokenrA   rB   rC   rD   rE   namerI   Z	config_id	__class__r   r   rO   M   sX   
zCache.__init__r   c                 C   s   t  S r   )r+   DatasetInforP   r   r   r   _info   s   zCache._info
output_dirc                 O   sR   t j| jstd| j d| j |d ur%|| jkr't| j| d S d S d S )NzCache directory for z doesn't exist at )r   r   existsr   r1   r   shutilcopytree)rP   rW   argskwargsr   r   r   download_and_prepare   s
   zCache.download_and_preparec                    sL   t  jjtjrt jj }ntd j d j	  fdd|D S )NzMissing splits info for z in cache directory c                    s6   g | ]}t j|jd t j j|jd|jdidqS )filesarrow)r   splitZfiletype_suffixshard_lengths)rQ   Z
gen_kwargs)r+   ZSplitGeneratorrQ   r   r   r   ra   )r   Z
split_inforU   r   r   r      s    z+Cache._split_generators.<locals>.<listcomp>)

isinstancer>   Zsplitsr+   Z	SplitDictlistvaluesr1   r   r   )rP   Z
dl_managerZsplit_infosr   rU   r   _split_generators   s   
zCache._split_generatorsc           	      c   s    t |D ]W\}}t|dF}z t tj|D ]\}}tj|g}| d| |fV  qW n tyL } zt	d| dt
| d|   d }~ww W d    n1 sWw   Y  qd S )Nrb_zFailed to read file 'z' with error z: )	enumerateopenpaZipcZopen_streamTableZfrom_batchesr1   r5   errortype)	rP   r^   Zfile_idxfilefZ	batch_idxZrecord_batchZpa_tableer   r   r   _generate_tables   s"    zCache._generate_tables)NNNr;   NNNNNr<   NNNNNr<   r   )__name__
__module____qualname__r   r*   r+   rT   ZFeaturesr   boolrc   dictrB   ZDataFilesDictintrO   rV   r]   re   rq   __classcell__r   r   rR   r   r:   L   sd    	
Br:   )r/   r   rY   r3   rJ   pathlibr   typingr   r   r   r   Zpyarrowrj   r+   Zdatasets.configZdatasets.data_filesZdatasets.namingr   utilsloggingZ
get_loggerrr   r5   r   r*   r9   ZArrowBasedBuilderr:   r   r   r   r   <module>   s0    
4