o
    TZh                     @   sz   d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	 dZ
dZg dZg dZd	efd
dZG dd deZdS )    N)ArgumentParser	Namespace)BaseDatasetsCLICommand)
get_loggerz><<<<<<< This should probably be modified because it mentions: z=======
>>>>>>>
)ZTextEncoderConfigZByteTextEncoderZSubwordTextEncoderZencoder_configZmaybe_build_from_corpusZ
manual_dir))z
tfds\.coreZdatasets)ztf\.io\.gfile\.GFileopen)ztf\.([\w\d]+)zdatasets.Value('\1'))ztfds\.features\.Text\(\)zdatasets.Value('string'))ztfds\.features\.Text\(zdatasets.Value('string'),)z+features\s*=\s*tfds.features.FeaturesDict\(zfeatures=datasets.Features()ztfds\.features\.FeaturesDict\(zdict()zThe TensorFlow Datasets AuthorszDThe TensorFlow Datasets Authors and the HuggingFace Datasets Authors)ztfds\.z	datasets.)zdl_manager\.manual_dirzself.config.data_dir)zself\.builder_configzself.configargsc                 C   s   t | j| jS )zz
    Factory function used to convert a model TF 1.0 checkpoint in a PyTorch checkpoint.

    Returns: ConvertCommand
    )ConvertCommand	tfds_pathdatasets_directory)r    r   P/var/www/html/lang_env/lib/python3.10/site-packages/datasets/commands/convert.pyconvert_command_factory*   s   r   c                   @   s8   e Zd ZedefddZdedefddZdd	 Zd
S )r   parserc                 C   sB   | j ddd}|jdtddd |jdtdd	d |jtd
 dS )z
        Register this command to argparse so it's available for the datasets-cli

        Args:
            parser: Root parser to register command-specific arguments
        convertzHConvert a TensorFlow Datasets dataset to a HuggingFace Datasets dataset.)helpz--tfds_pathTzQPath to a TensorFlow Datasets folder to convert or a single tfds file to convert.)typerequiredr   z--datasets_directoryz(Path to the HuggingFace Datasets folder.)funcN)
add_parseradd_argumentstrset_defaultsr   )r   Ztrain_parserr   r   r   register_subcommand4   s   z"ConvertCommand.register_subcommandr	   r
   c                 G   s   t d| _|| _|| _d S )Nzdatasets-cli/converting)r   _logger
_tfds_path_datasets_directory)selfr	   r
   r   r   r   r   __init__K   s   

zConvertCommand.__init__c              	      sN  t j| jrt j| j}nt j| jrt j| j}ntdt j| j}| j	
d| d|  g }g }i }t j| jrHt |}nt j| jg}|D ]}| j	
d|  t j||}t j||}	t j|r~d|v s~d|v s~d|vr| j	
d qRt|d	d
}
|
 }W d    n1 sw   Y  g }d}d}g }|D ]}| d v rqd v rqd v rqd v rd nWd v rd qd v rd nId v r܈ dd n>t fddtD rd}tt fddt}|tt| d  |  |t qtD ]\}}t||  qd v r<td }|dd |d d!D  d"|d   d# v sKd$ v sKd v rTtd%    d& v s^d' v r`d}|  q|snd(|v r|dd}t j||t j|}	t j!dd) | j	
d*  |"fd+d,|D  n||	 |r||	 t|	d-d	d
}
|
#| W d    n	1 sw   Y  | j	
d.|	  qR|D ]:}z#t j|}||dd }| j	
d/| d|  t$%|| W q t&y   | j	'd0| d1 Y qw |r#|D ]}| j	(d2| d3 qd S d S )4NzA--tfds_path is neither a directory nor a file. Please check path.zConverting datasets from z to zLooking at file r   _testz.pyzSkipping filezutf-8)encodingFz!import tensorflow.compat.v2 as tfz
@tfds.corezbuilder=selfz-import tensorflow_datasets.public_api as tfdszimport datasets
zimport tensorflow zfrom absl import loggingzfrom datasets import logging
	getLoggerr   c                 3   s    | ]}| v V  qd S Nr   ).0Z
expressionout_liner   r   	<genexpr>       z%ConvertCommand.run.<locals>.<genexpr>Tc                    s   |  v S r"   r   )er$   r   r   <lambda>   s    z$ConvertCommand.run.<locals>.<lambda>
Ztensorflow_datasetsz/from\stensorflow_datasets.*import\s([^\.\r\n]+)c                 s   s    | ]}|  V  qd S r"   )stripr#   impr   r   r   r&      r'      ,zfrom . import ztf.ztfds.zError converting ZGeneratorBasedBuilderZBeamBasedBuilderZwmt)exist_okzAdding directory c                    s   i | ]}| qS r   r   r,   )
output_dirr   r   
<dictcomp>   s    z&ConvertCommand.run.<locals>.<dictcomp>wzConverted in zMoving z#Cannot find destination folder for z. Please copy manually.z!You need to manually update file z4 to remove configurations using 'TextEncoderConfig'.))ospathisdirr   abspathisfiledirname
ValueErrorr   r   infolistdirbasenamejoinr   	readlinesreplaceanyTO_HIGHLIGHTlistfilterappendHIGHLIGHT_MESSAGE_PREr   HIGHLIGHT_MESSAGE_POST
TO_CONVERTresubmatchextendgroupsplitr+   makedirsupdate
writelinesshutilcopyKeyErrorerrorwarning)r   Zabs_tfds_pathZabs_datasets_pathZutils_filesZwith_manual_updateZimports_to_builder_mapZ
file_namesf_nameZ
input_fileZoutput_fileflinesZ	out_linesZ
is_builderZneeds_manual_updateZtfds_importsline	to_removepatternreplacementrK   dir_nameZ
utils_fileZdest_folder	file_pathr   )r%   r1   r   runQ   s   
$



 


zConvertCommand.runN)	__name__
__module____qualname__staticmethodr   r   r   r   r`   r   r   r   r   r   3   s
    r   )r4   rI   rR   argparser   r   Zdatasets.commandsr   Zdatasets.utils.loggingr   rF   rG   rB   rH   r   r   r   r   r   r   <module>   s    		