o
    +if                     @   sz   d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	 dZ
dZg dZg dZd	efd
dZG dd deZdS )    N)ArgumentParser	Namespace)BaseDatasetsCLICommand)
get_loggerz><<<<<<< This should probably be modified because it mentions: z=======
>>>>>>>
)TextEncoderConfigByteTextEncoderSubwordTextEncoderencoder_configmaybe_build_from_corpus
manual_dir))z
tfds\.coredatasets)ztf\.io\.gfile\.GFileopen)ztf\.([\w\d]+)zdatasets.Value('\1'))ztfds\.features\.Text\(\)zdatasets.Value('string'))ztfds\.features\.Text\(zdatasets.Value('string'),)z+features\s*=\s*tfds.features.FeaturesDict\(zfeatures=datasets.Features()ztfds\.features\.FeaturesDict\(zdict()zThe TensorFlow Datasets AuthorszDThe TensorFlow Datasets Authors and the HuggingFace Datasets Authors)ztfds\.z	datasets.)zdl_manager\.manual_dirzself.config.data_dir)zself\.builder_configzself.configargsc                 C   s   t | j| jS )zz
    Factory function used to convert a model TF 1.0 checkpoint in a PyTorch checkpoint.

    Returns: ConvertCommand
    )ConvertCommand	tfds_pathdatasets_directory)r    r   R/var/www/html/corbot_env/lib/python3.10/site-packages/datasets/commands/convert.pyconvert_command_factory*   s   r   c                   @   s8   e Zd ZedefddZdedefddZdd	 Zd
S )r   parserc                 C   sB   | j ddd}|jdtddd |jdtdd	d |jtd
 dS )z
        Register this command to argparse so it's available for the datasets-cli

        Args:
            parser: Root parser to register command-specific arguments
        convertzHConvert a TensorFlow Datasets dataset to a HuggingFace Datasets dataset.)helpz--tfds_pathTzQPath to a TensorFlow Datasets folder to convert or a single tfds file to convert.)typerequiredr   z--datasets_directoryz(Path to the HuggingFace Datasets folder.)funcN)
add_parseradd_argumentstrset_defaultsr   )r   train_parserr   r   r   register_subcommand4   s   z"ConvertCommand.register_subcommandr   r   c                 G   s   t d| _|| _|| _d S )Nzdatasets-cli/converting)r   _logger
_tfds_path_datasets_directory)selfr   r   r   r   r   r   __init__K   s   

zConvertCommand.__init__c              	      sN  t j| jrt j| j}nt j| jrt j| j}ntdt j| j}| j	
d| d|  g }g }i }t j| jrHt |}nt j| jg}|D ]}| j	
d|  t j||}t j||}	t j|r~d|v s~d|v s~d|vr| j	
d qRt|d	d
}
|
 }W d    n1 sw   Y  g }d}d}g }|D ]}| d v rqd v rqd v rqd v rd nWd v rd qd v rd nId v r܈ dd n>t fddtD rd}tt fddt}|tt| d  |  |t qtD ]\}}t||  qd v r<td }|dd |d d!D  d"|d   d# v sKd$ v sKd v rTtd%    d& v s^d' v r`d}|  q|snd(|v r|dd}t j||t j|}	t j!dd) | j	
d*  |"fd+d,|D  n||	 |r||	 t|	d-d	d
}
|
#| W d    n	1 sw   Y  | j	
d.|	  qR|D ]:}z#t j|}||dd }| j	
d/| d|  t$%|| W q t&y   | j	'd0| d1 Y qw |r#|D ]}| j	(d2| d3 qd S d S )4NzA--tfds_path is neither a directory nor a file. Please check path.zConverting datasets from z to zLooking at file r%   _testz.pyzSkipping filezutf-8)encodingFz!import tensorflow.compat.v2 as tfz
@tfds.corezbuilder=selfz-import tensorflow_datasets.public_api as tfdszimport datasets
zimport tensorflow zfrom absl import loggingzfrom datasets import logging
	getLoggerr   c                 3   s    | ]}| v V  qd S Nr   ).0
expressionout_liner   r   	<genexpr>       z%ConvertCommand.run.<locals>.<genexpr>Tc                    s   |  v S r*   r   )er-   r   r   <lambda>   s    z$ConvertCommand.run.<locals>.<lambda>
tensorflow_datasetsz/from\stensorflow_datasets.*import\s([^\.\r\n]+)c                 s   s    | ]}|  V  qd S r*   )stripr+   impr   r   r   r/      r0      ,zfrom . import ztf.ztfds.zError converting GeneratorBasedBuilderBeamBasedBuilderwmt)exist_okzAdding directory c                    s   i | ]}| qS r   r   r6   )
output_dirr   r   
<dictcomp>   s    z&ConvertCommand.run.<locals>.<dictcomp>wzConverted in zMoving z#Cannot find destination folder for z. Please copy manually.z!You need to manually update file z4 to remove configurations using 'TextEncoderConfig'.))ospathisdirr"   abspathisfiledirname
ValueErrorr#   r!   infolistdirbasenamejoinr   	readlinesreplaceanyTO_HIGHLIGHTlistfilterappendHIGHLIGHT_MESSAGE_PREr   HIGHLIGHT_MESSAGE_POST
TO_CONVERTresubmatchextendgroupsplitr5   makedirsupdate
writelinesshutilcopyKeyErrorerrorwarning)r$   abs_tfds_pathabs_datasets_pathutils_fileswith_manual_updateimports_to_builder_map
file_namesf_name
input_fileoutput_fileflines	out_lines
is_builderneeds_manual_updatetfds_importsline	to_removepatternreplacementrX   dir_name
utils_filedest_folder	file_pathr   )r.   r>   r   runQ   s   
$



 


zConvertCommand.runN)	__name__
__module____qualname__staticmethodr   r    r   r%   r{   r   r   r   r   r   3   s
    r   )rA   rV   r_   argparser   r   datasets.commandsr   datasets.utils.loggingr   rS   rT   rO   rU   r   r   r   r   r   r   <module>   s    		