o
    +if g                     @   s  d Z ddlZddlZddlZddlZddlmZmZ ddlm	Z	m
Z
mZmZmZmZ ddlZddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZmZmZmZ ddl m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z' e%e(Z)G dd de*Z+e!dd5ddZ,e!dd5ddZ-e!dd6de.de.dee fddZ/e!d d6de.de.dee fd!d"Z0						#d7de.d$eee	e
e.f  dee d%eeee.f  d&eee.e'f  d'eee1e.f  fd(d)Z2					d8de.d&eee.e'f  dee d%eeee.f  d*ee. d$eee	e
e.f  fd+d,Z3					d8de.d&eee.e'f  dee d%eeee.f  d*ee. d$eee	e
e.f  d-ee. fd.d/Z4							#d9de.d0ee. d$eee.ee. ee.ee.ee. f f f  dee d%eeee.f  d&eee.e'f  d'eee1e.f  d-efd1d2Z5							#d9de.d0ee. d$eee.ee. ee.ee.ee. f f f  dee d%eeee.f  d&eee.e'f  d'eee1e.f  fd3d4Z6dS ):zList and inspect datasets.    N)PathPurePath)DictListMappingOptionalSequenceUnion   )config)DownloadConfig)DownloadMode)StreamingDownloadManager)DatasetInfo)dataset_module_factoryget_dataset_builder_classimport_main_classload_dataset_buildermetric_module_factory)
deprecated)relative_to_absolute_path)
get_logger)Versionc                   @   s   e Zd ZdS )SplitsNotFoundErrorN)__name__
__module____qualname__ r   r   I/var/www/html/corbot_env/lib/python3.10/site-packages/datasets/inspect.pyr   0   s    r   z,Use 'huggingface_hub.list_datasets' instead.TFc                 C   s8   t j|d}| sdd |D }|sdd |D }t|S )a]  List all the datasets scripts available on the Hugging Face Hub.

    Args:
        with_community_datasets (`bool`, *optional*, defaults to `True`):
            Include the community provided datasets.
        with_details (`bool`, *optional*, defaults to `False`):
            Return the full details on the datasets instead of only the short name.

    Example:

    ```py
    >>> from datasets import list_datasets
    >>> list_datasets()
    ['acronym_identification',
     'ade_corpus_v2',
     'adversarial_qa',
     'aeslc',
     'afrikaans_ner_corpus',
     'ag_news',
     ...
    ]
    ```
    )fullc                 S      g | ]	}d |j vr|qS /id.0datasetr   r   r   
<listcomp>O       z!list_datasets.<locals>.<listcomp>c                 S      g | ]}|j qS r   r#   r%   r   r   r   r(   Q       )huggingface_hublist_datasetslist)with_community_datasetswith_detailsdatasetsr   r   r   r-   4   s   r-   ux   Use 'evaluate.list_evaluation_modules' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluatec                 C   s0   t  }| sdd |D }|sdd |D }|S )u  List all the metrics script available on the Hugging Face Hub.

    <Deprecated version="2.5.0">

    Use `evaluate.list_evaluation_modules` instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate

    </Deprecated>

    Args:
        with_community_metrics (:obj:`bool`, optional, default ``True``): Include the community provided metrics.
        with_details (:obj:`bool`, optional, default ``False``): Return the full details on the metrics instead of only the short name.

    Example:

    ```py
    >>> from datasets import list_metrics
    >>> list_metrics()
    ['accuracy',
     'bertscore',
     'bleu',
     'bleurt',
     'cer',
     'chrf',
     ...
    ]
    ```
    c                 S   r    r!   r#   r&   metricr   r   r   r(   v   r)   z list_metrics.<locals>.<listcomp>c                 S   r*   r   r#   r2   r   r   r   r(   x   r+   )r,   list_metrics)with_community_metricsr0   metricsr   r   r   r4   U   s   r4   z?Clone the dataset repository from the Hugging Face Hub instead.path
local_pathdownload_configc                 K   s   |du rt di |}tj| rtt| j} tj| r'tj	| |dd nt
jtj|jdj| d||jd td|  d| d	t|  d
 dS )a  
    Allow inspection/modification of a dataset script by copying on local drive at local_path.

    Args:
        path (`str`): Path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name
                as the directory),
                e.g. `'./dataset/squad'` or `'./dataset/squad/squad.py'`.
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`list_datasets`])
                e.g. `'squad'`, `'glue'` or `'openai/webtext'`.
        local_path (`str`):
            Path to the local folder to copy the dataset script to.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        **download_kwargs (additional keyword arguments):
            Optional arguments for [`DownloadConfig`] which will override
            the attributes of `download_config` if supplied.
    NT)dirs_exist_ok)endpointtokenr'   )repo_id	repo_type	local_dirforce_downloadzThe dataset  can be inspected at z\. You can modify this loading script  if it has one and use it with `datasets.load_dataset("")`.r   )r   osr7   isfilestrr   parentisdirshutilcopytreer,   HfApir   HF_ENDPOINTr<   snapshot_downloadr@   printr   as_posix)r7   r8   r9   download_kwargsr   r   r   inspect_dataset|   s   

rP   uz   Use 'evaluate.inspect_evaluation_module' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluatec              
   K   s   t | fd|i|}t|jdd}t|}tj|}t|D ]>\}}	}
tj	|tj
||}tj|dd dd |	D |	dd< |
D ]}ttj	||tj	|| qFt|| q!t|}td	|  d
| d| dt|  d	 dS )u  
    Allow inspection/modification of a metric script by copying it on local drive at local_path.

    <Deprecated version="2.5.0">

    Use `evaluate.inspect_evaluation_module` instead, from the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate

    </Deprecated>

    Args:
        path (``str``): path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
                e.g. ``'./dataset/squad'`` or ``'./dataset/squad/squad.py'``
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with ``datasets.list_datasets()``)
                e.g. ``'squad'``, ``'glue'`` or ``'openai/webtext'``
        local_path (``str``): path to the local folder to copy the datset script to.
        download_config (Optional ``datasets.DownloadConfig``): specific download configuration parameters.
        **download_kwargs (additional keyword arguments): optional attributes for DownloadConfig() which will override the attributes in download_config if supplied.
    r9   F)r'   T)exist_okc                 S   s   g | ]	}| d s|qS )).__)
startswith)r&   dirnamer   r   r   r(      r)   z"inspect_metric.<locals>.<listcomp>Nz"The processing scripts for metric rA   z. The main class is in zP. You can modify this processing scripts and use it with `datasets.load_metric("rB   )r   r   module_pathinspectgetsourcefilerC   r7   rU   walkjoinrelpathmakedirsrH   copy2copystatr   rM   r   rN   )r7   r8   r9   rO   metric_module
metric_clsmodule_source_pathmodule_source_dirpathdirpathdirnames	filenamesdst_dirpathfilenamer   r   r   inspect_metric   s&   
"
rh   r   
data_filesdownload_moderevisionr<   c           	         sJ   |dkrt dt |td} fdd|D S )a!
  Get the meta information about a dataset, returned as a dict mapping config name to DatasetInfoDict.

    Args:
        path (`str`): path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
                e.g. `'./dataset/squad'` or `'./dataset/squad/squad.py'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`datasets.list_datasets`])
                e.g. `'squad'`, `'glue'` or``'openai/webtext'`
        revision (`Union[str, datasets.Version]`, *optional*):
            If specified, the dataset module will be loaded from the datasets repository at this version.
            By default:
            - it is set to the local version of the lib.
            - it will also try to load it from the main branch if it's not available at the local version of the lib.
            Specifying a version that is different from your local version of the lib might cause compatibility issues.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        data_files (`Union[Dict, List, str]`, *optional*):
            Defining the data_files of the dataset configuration.
        token (`str` or `bool`, *optional*):
            Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If `True`, or not specified, will get token from `"~/.huggingface"`.
        use_auth_token (`str` or `bool`, *optional*):
            Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If `True`, or not specified, will get token from `"~/.huggingface"`.

            <Deprecated version="2.14.0">

            `use_auth_token` was deprecated in favor of `token` in version 2.14.0 and will be removed in 3.0.0.

            </Deprecated>

        **config_kwargs (additional keyword arguments):
            Optional attributes for builder class which will override the attributes if supplied.

    Example:

    ```py
    >>> from datasets import get_dataset_infos
    >>> get_dataset_infos('rotten_tomatoes')
    {'default': DatasetInfo(description="Movie Review Dataset.
This is a dataset of containing 5,331 positive and 5,331 negative processed
sentences from Rotten Tomatoes movie reviews...), ...}
    ```
    r   'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.
You can remove this warning by passing 'token=<use_auth_token>' instead.)r7   rk   r9   rj   ri   r<   c                    s,   i | ]}|t d|d  qS ))r7   config_nameri   r9   rj   rk   r<   r   )get_dataset_config_info)r&   rm   config_kwargsri   r9   rj   r7   rk   r<   r   r   
<dictcomp>  s    z%get_dataset_infos.<locals>.<dictcomp>)warningswarnFutureWarningget_dataset_config_names)	r7   ri   r9   rj   rk   r<   use_auth_tokenrp   config_namesr   ro   r   get_dataset_infos   s"   7rx   dynamic_modules_pathc           	      K   sT   t | f|||||d|}t|tj| d}t|j p)|j	d|j
p'dgS )a  Get the list of available config names for a particular dataset.

    Args:
        path (`str`): path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
                e.g. `'./dataset/squad'` or `'./dataset/squad/squad.py'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`datasets.list_datasets`])
                e.g. `'squad'`, `'glue'` or `'openai/webtext'`
        revision (`Union[str, datasets.Version]`, *optional*):
            If specified, the dataset module will be loaded from the datasets repository at this version.
            By default:
            - it is set to the local version of the lib.
            - it will also try to load it from the main branch if it's not available at the local version of the lib.
            Specifying a version that is different from your local version of the lib might cause compatibility issues.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        dynamic_modules_path (`str`, defaults to `~/.cache/huggingface/modules/datasets_modules`):
            Optional path to the directory in which the dynamic modules are saved. It must have been initialized with `init_dynamic_modules`.
            By default the datasets and metrics are stored inside the `datasets_modules` module.
        data_files (`Union[Dict, List, str]`, *optional*):
            Defining the data_files of the dataset configuration.
        **download_kwargs (additional keyword arguments):
            Optional attributes for [`DownloadConfig`] which will override the attributes in `download_config` if supplied,
            for example `token`.

    Example:

    ```py
    >>> from datasets import get_dataset_config_names
    >>> get_dataset_config_names("glue")
    ['cola',
     'sst2',
     'mrpc',
     'qqp',
     'stsb',
     'mnli',
     'mnli_mismatched',
     'mnli_matched',
     'qnli',
     'rte',
     'wnli',
     'ax']
    ```
    rk   r9   rj   ry   ri   dataset_namerm   default)r   r   rC   r7   basenamer.   builder_configskeysbuilder_kwargsgetDEFAULT_CONFIG_NAME)	r7   rk   r9   rj   ry   ri   rO   dataset_modulebuilder_clsr   r   r   ru   #  s   8	ru   returnc                 K   sj   t | f|||||d|}t|tj| d}t|j }	|	r.t|	dkr+|	d nd}
nd}
|j	p4|
S )a	  Get the default config name for a particular dataset.
    Can return None only if the dataset has multiple configurations and no default configuration.

    Args:
        path (`str`): path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
                e.g. `'./dataset/squad'` or `'./dataset/squad/squad.py'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`datasets.list_datasets`])
                e.g. `'squad'`, `'glue'` or `'openai/webtext'`
        revision (`Union[str, datasets.Version]`, *optional*):
            If specified, the dataset module will be loaded from the datasets repository at this version.
            By default:
            - it is set to the local version of the lib.
            - it will also try to load it from the main branch if it's not available at the local version of the lib.
            Specifying a version that is different from your local version of the lib might cause compatibility issues.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        dynamic_modules_path (`str`, defaults to `~/.cache/huggingface/modules/datasets_modules`):
            Optional path to the directory in which the dynamic modules are saved. It must have been initialized with `init_dynamic_modules`.
            By default the datasets and metrics are stored inside the `datasets_modules` module.
        data_files (`Union[Dict, List, str]`, *optional*):
            Defining the data_files of the dataset configuration.
        **download_kwargs (additional keyword arguments):
            Optional attributes for [`DownloadConfig`] which will override the attributes in `download_config` if supplied,
            for example `token`.

    Returns:
        Optional[str]: the default config name if there is one

    Example:

    ```py
    >>> from datasets import get_dataset_default_config_name
    >>> get_dataset_default_config_name("openbookqa")
    'main'
    ```
    rz   r{   r
   r   Nr}   )
r   r   rC   r7   r~   r.   r   r   lenr   )r7   rk   r9   rj   ry   ri   rO   r   r   r   default_config_namer   r   r   get_dataset_default_config_namej  s"   1	
r   rm   c              
      s   |dkrt dt |}t f||||||d|}	|	j}
|
jdu rd|r)| nt }|dur3||_|		t
|	j|d z fdd|	t
|	j|dD |
_W |
S  tyc } ztd|d}~ww |
S )	a  Get the meta information (DatasetInfo) about a dataset for a particular config

    Args:
        path (``str``): path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
                e.g. ``'./dataset/squad'`` or ``'./dataset/squad/squad.py'``
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with ``datasets.list_datasets()``)
                e.g. ``'squad'``, ``'glue'`` or ``'openai/webtext'``
        config_name (:obj:`str`, optional): Defining the name of the dataset configuration.
        data_files (:obj:`str` or :obj:`Sequence` or :obj:`Mapping`, optional): Path(s) to source data file(s).
        download_config (:class:`~download.DownloadConfig`, optional): Specific download configuration parameters.
        download_mode (:class:`DownloadMode` or :obj:`str`, default ``REUSE_DATASET_IF_EXISTS``): Download/generate mode.
        revision (:class:`~utils.Version` or :obj:`str`, optional): Version of the dataset script to load.
            As datasets have their own git repository on the Datasets Hub, the default version "main" corresponds to their "main" branch.
            You can specify a different version than the default "main" by using a commit SHA or a git tag of the dataset repository.
        token (``str`` or :obj:`bool`, optional): Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If True, or not specified, will get token from `"~/.huggingface"`.
        use_auth_token (``str`` or :obj:`bool`, optional): Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If True, or not specified, will get token from `"~/.huggingface"`.

            <Deprecated version="2.14.0">

            `use_auth_token` was deprecated in favor of `token` in version 2.14.0 and will be removed in 3.0.0.

            </Deprecated>

        **config_kwargs (additional keyword arguments): optional attributes for builder class which will override the attributes if supplied.

    r   rl   )nameri   r9   rj   rk   r<   N)	base_pathr9   c                    s   i | ]
}|j |j  d qS ))r   r|   )r   )r&   split_generatorr7   r   r   rq     s    z+get_dataset_config_info.<locals>.<dictcomp>z<The split names could not be parsed from the dataset config.)rr   rs   rt   r   infosplitscopyr   r<   _check_manual_downloadr   r   _split_generators	Exceptionr   )r7   rm   ri   r9   rj   rk   r<   rv   rp   builderr   errr   r   r   rn     sL   )




rn   c           
   	   K   sD   |dkrt dt |}t| f||||||d|}	t|	j S )aW	  Get the list of available splits for a particular config and dataset.

    Args:
        path (`str`): path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
                e.g. `'./dataset/squad'` or `'./dataset/squad/squad.py'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`datasets.list_datasets`])
                e.g. `'squad'`, `'glue'` or `'openai/webtext'`
        config_name (`str`, *optional*):
            Defining the name of the dataset configuration.
        data_files (`str` or `Sequence` or `Mapping`, *optional*):
            Path(s) to source data file(s).
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        revision ([`Version`] or `str`, *optional*):
            Version of the dataset script to load.
            As datasets have their own git repository on the Datasets Hub, the default version "main" corresponds to their "main" branch.
            You can specify a different version than the default "main" by using a commit SHA or a git tag of the dataset repository.
        token (`str` or `bool`, *optional*):
            Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If `True`, or not specified, will get token from `"~/.huggingface"`.
        use_auth_token (`str` or `bool`, *optional*):
            Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If `True`, or not specified, will get token from `"~/.huggingface"`.

            <Deprecated version="2.14.0">

            `use_auth_token` was deprecated in favor of `token` in version 2.14.0 and will be removed in 3.0.0.

            </Deprecated>

        **config_kwargs (additional keyword arguments):
            Optional attributes for builder class which will override the attributes if supplied.

    Example:

    ```py
    >>> from datasets import get_dataset_split_names
    >>> get_dataset_split_names('rotten_tomatoes')
    ['train', 'validation', 'test']
    ```
    r   rl   )rm   ri   r9   rj   rk   r<   )rr   rs   rt   rn   r.   r   r   )
r7   rm   ri   r9   rj   rk   r<   rv   rp   r   r   r   r   get_dataset_split_names  s&   8
r   )TF)N)NNNNNr   )NNNNN)NNNNNNr   )7__doc__rW   rC   rH   rr   pathlibr   r   typingr   r   r   r   r   r	   r,    r   download.download_configr   download.download_managerr   #download.streaming_download_managerr   r   r   loadr   r   r   r   r   utils.deprecation_utilsr   utils.file_utilsr   utils.loggingr   utils.versionr   r   logger
ValueErrorr   r-   r4   rE   rP   rh   boolrx   ru   r   rn   r   r   r   r   r   <module>   s    $$+
X
I
E(

Q(