o
    TZh                     @   s   d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ dd ZG dd deZdS )    N)ArgumentParser)Path)copyfile)List)config)DatasetBuilder)BaseDatasetsCLICommand)DownloadConfig)DownloadMode)dataset_module_factoryimport_main_class)VerificationModec              
   K   s8   t | j| j| j| j| j| j| jp| j| j	| j
f	i |S N)RunBeamCommanddatasetname	cache_dirbeam_pipeline_optionsdata_dirall_configsZ	save_info
save_infosignore_verificationsforce_redownload)argskwargs r   Q/var/www/html/lang_env/lib/python3.10/site-packages/datasets/commands/run_beam.pyrun_beam_command_factory   s   

r   c                   @   sT   e Zd ZedefddZdededededed	ed
ededefddZdd Z	dS )r   parserc                 C   s   | j ddd}|jdtdd |jdtd dd	 |jd
td dd	 |jdtddd	 |jdtd dd	 |jdddd |jdddd |jdddd |jdddd |jdddd |jtd d S )NZrun_beamz&Run a Beam dataset processing pipeline)helpr   zName of the dataset to download)typer   z--namezDataset config name)r    defaultr   z--cache_dirz-Cache directory where the datasets are storedz--beam_pipeline_options zrBeam pipeline options, separated by commas. Example:: `--beam_pipeline_options=job_name=my-job,project=my-project`z
--data_dirz?Can be used to specify a manual directory to get the files fromz--all_configs
store_truezTest all dataset configurations)actionr   z--save_infozSave the dataset infos filez--ignore_verificationsz0Run the test without checksums and splits checksz--force_redownloadzForce dataset redownloadz--save_infoszalias for save_info)func)
add_parseradd_argumentstrset_defaultsr   )r   Zrun_beam_parserr   r   r   register_subcommand    s:   z"RunBeamCommand.register_subcommandr   r   r   r   r   r   r   r   r   c
                 K   s@   || _ || _|| _|| _|| _|| _|| _|| _|	| _|
| _	d S r   )
_dataset_name
_cache_dir_beam_pipeline_options	_data_dir_all_configs_save_infos_ignore_verifications_force_redownload_config_kwargs)selfr   r   r   r   r   r   r   r   r   Zconfig_kwargsr   r   r   __init__A   s   
zRunBeamCommand.__init__c                 C   s  dd l }| jd ur| jrtd td | j| j}}t|}t|j}g }| j	r;|j
jjdd | j	dD d}nd }| jrct|jdkrc|jD ]}|||j| j|j|| j|jdd	 qJn||d|| j|| j|jdd
| j |D ]$}	|	j| jstjntjttjd| j rt!j"nt!j#dd | j$r|	$  q|td | j$rt%j&'|( tj)}
t*|jd }t%j&'||}t%j&+|rt%j&,|}nt%j&+|r|}ntd|
  td t%j&'|tj)}t-|
| td|  d S d S )Nr   z?Both parameters `name` and `all_configs` can't be used at once.   c                 S   s   g | ]}|rd |   qS )z--)strip).0optr   r   r   
<listcomp>e   s    z&RunBeamCommand.run.<locals>.<listcomp>,)flags	base_path)config_namer   hashbeam_optionsr   r>   )r?   r   rA   r   r>   )r   F)Zdownload_modeZdownload_configZverification_modeZtry_from_hf_gcszApache beam run successful.z.pyzDataset Infos file saved at r   ).Zapache_beamr,   r0   printexitr+   r   r   module_pathr.   optionsZpipeline_optionsZPipelineOptionssplitlenZBUILDER_CONFIGSappendr   r/   r@   r-   Zbuilder_kwargsgetr4   Zdownload_and_preparer3   r
   ZREUSE_CACHE_IF_EXISTSZFORCE_REDOWNLOADr	   r   ZDOWNLOADED_DATASETS_PATHr2   r   Z	NO_CHECKSZ
ALL_CHECKSr1   ospathjoinZget_imported_module_dirZDATASETDICT_INFOS_FILENAMEr   isfiledirnamer   )r5   ZbeamrK   r?   Zdataset_moduleZbuilder_clsZbuildersrA   Zbuilder_configbuilderZdataset_infos_pathr   Zcombined_pathZdataset_dirZuser_dataset_infos_pathr   r   r   runY   s   






zRunBeamCommand.runN)
__name__
__module____qualname__staticmethodr   r*   r(   boolr6   rP   r   r   r   r   r      s.     	

r   )rJ   argparser   pathlibr   shutilr   typingr   Zdatasetsr   Zdatasets.builderr   Zdatasets.commandsr   Z!datasets.download.download_configr	   Z"datasets.download.download_managerr
   Zdatasets.loadr   r   Zdatasets.utils.info_utilsr   r   r   r   r   r   r   <module>   s    