o
    +if                     @   s   d dl Z d dlZd dlmZ d dlmZmZ ddlmZ ddl	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZm Z  ddl!m"Z" ddl#m$Z$ dd	l%m&Z&m'Z' e"e(Z)erndd
l*m+Z+ ddee fddZ,dddZ-dS )    N)wraps)TYPE_CHECKINGOptional   )DownloadConfig)	xbasenamexdirname	xet_parsexexistsxgetsizexglob
xgzip_openxisdirxisfilexjoinxlistdirxnumpy_loadxopenxpandas_read_csvxpandas_read_excelxPathxpyarrow_parquet_read_tablexrelpathxsio_loadmatxsplit	xsplitextxwalkxxml_dom_minidom_parse)
get_logger)patch_submodule)get_importslock_importable_fileDatasetBuilderdownload_configc                    s$  t | }t|dr|jrt|jtr j|j_ j|j_dS  fdd}t|d|t	
  t|d|t
  t|d|t
  t|d|t
  t|d	t
  t|d
t
  t|dt
  t|dt
  t|dt
  t|dt
  t|d|t
  t|d|t
  t|d|t
  t|d|t
  t|dt
  t|d|t
  t|d|t
  t|d|tdgd
  t|d|tdgd
  t|d|tdgd
  t|d|t
  t|d|t
  |j !dst|d|t"
   |_dS )aE  Extend the module to support streaming.

    We patch some functions in the module to use `fsspec` to support data streaming:
    - We use `fsspec.open` to open and read remote files. We patch the module function:
      - `open`
    - We use the "::" hop separator to join paths and navigate remote compressed/archive files. We patch the module
      functions:
      - `os.path.join`
      - `pathlib.Path.joinpath` and `pathlib.Path.__truediv__` (called when using the "/" operator)

    The patched functions are replaced with custom functions defined to work with the
    :class:`~download.streaming_download_manager.StreamingDownloadManager`.

    Args:
        module_path: Path to the module to be extended.
        download_config : mainly use use_auth_token or storage_options to support different platforms and auth types.
    _patched_for_streamingNc                    s    t   fdd}d|_|S )Nc                     s   | d i|S )Nr$    )argskwargs)r$   functionr&   K/var/www/html/corbot_env/lib/python3.10/site-packages/datasets/streaming.pywrapperI   s   z?extend_module_for_streaming.<locals>.wrap_auth.<locals>.wrapper	wrap_auth)r   _decorator_name_)r)   r+   r$   )r)   r*   r,   H   s   z.extend_module_for_streaming.<locals>.wrap_authopenz
os.listdirzos.walkz	glob.globzos.path.joinzos.path.dirnamezos.path.basenamezos.path.relpathzos.path.splitzos.path.splitextzos.path.existszos.path.isdirzos.path.isfilezos.path.getsizezpathlib.Pathz	gzip.openz
numpy.loadzpandas.read_csv__version__)attrszpandas.read_excelzscipy.io.loadmatzxml.etree.ElementTree.parsezxml.dom.minidom.parsezdatasets.packaged_modules.zpyarrow.parquet.read_table)#	importlibimport_modulehasattrr%   
isinstancer   tokenstorage_optionsr   r   startr   r   r   r   r   r   r   r   r   r
   r   r   r   r   r   r   r   r   r   r	   r   __name__
startswithr   )module_pathr$   moduler,   r&   r.   r*   extend_module_for_streaming,   s@   


	
r=   builderr#   c                    s   t | j| jd}t| j|d | jdsVt| j}t	|/ t
|D ]"}|d dkrF|d }d| jddd	 |g }t||d q$W d   n1 sQw   Y  dd
lm   fddt| jdd D }|D ]}t||d qndS )zExtend the dataset builder module and the modules imported by it to support streaming.

    Args:
        builder (:class:`DatasetBuilder`): Dataset builder instance.
    )r7   r6   r.   z	datasets.r   internalr   .Nr"   c                    s(   g | ]}t | r|j jkr|jqS r&   )
issubclass
__module__).0clsr"   r&   r*   
<listcomp>   s    z8extend_dataset_builder_for_streaming.<locals>.<listcomp>)r   r7   r6   r=   rC   r:   inspectgetfile	__class__r!   r    joinsplitr>   r#   type__mro__)r>   r$   importable_fileimportsinternal_import_nameinternal_module_nameparent_builder_modulesr<   r&   r"   r*   $extend_dataset_builder_for_streamingp   s(   
 

rS   )N)r>   r#   ).r2   rG   	functoolsr   typingr   r   download.download_configr   utils.file_utilsr   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   utils.loggingr   utils.patchingr   utils.py_utilsr    r!   r9   loggerr>   r#   r=   rS   r&   r&   r&   r*   <module>   s    dD