o
    TZht                     @   s   d dl Z d dlZd dlmZmZ d dlmZ d dlmZ d dl	Z
d dlZd dlmZ d dlmZ ejjeZeG dd dejZG d	d
 d
ejZdS )    N)InitVar	dataclass)StringIO)Optionalrequire_storage_cast)
table_castc                   @   s   e Zd ZU dZdZeej ed< dZ	e
ed< dZeee
  ed< dZee
 ed< d	Zeed
< dZeed< dZe
ed< dd ZdS )
TextConfigzBuilderConfig for text files.Nfeatureszutf-8encoding
deprecatederrorsencoding_errorsi   	chunksizeFkeep_linebreaksline	sample_byc                 C   s*   |dkrt d| dt || _d S d S )Nr   z'errors' was deprecated in favor of 'encoding_errors' in version 2.14.0 and will be removed in 3.0.0.
You can remove this warning by passing 'encoding_errors=z
' instead.)warningswarnFutureWarningr   )selfr    r   Z/var/www/html/lang_env/lib/python3.10/site-packages/datasets/packaged_modules/text/text.py__post_init__   s   
zTextConfig.__post_init__)__name__
__module____qualname____doc__r
   r   datasetsZFeatures__annotations__r   strr   r   r   r   intr   boolr   r   r   r   r   r   r	      s   
 r	   c                   @   s>   e Zd ZeZdd Zdd ZdejdejfddZ	d	d
 Z
dS )Textc                 C   s   t j| jjdS )N)r
   )r   ZDatasetInfoconfigr
   )r   r   r   r   _info*   s   z
Text._infoc                    s   | j jstd| j j  | j j}t|tttfr;|}t|tr&|g} fdd|D }tj	tj
jd|idgS g }| D ]!\}}t|trM|g} fdd|D }|tj	|d|id qA|S )a  The `data_files` kwarg in load_dataset() can be a str, List[str], Dict[str,str], or Dict[str,List[str]].

        If str or List[str], then the dataset returns only the 'train' split.
        If dict, then keys should be from the `datasets.Split` enum.
        z=At least one data file must be specified, but got data_files=c                       g | ]}  |qS r   Z
iter_files.0file
dl_managerr   r   
<listcomp>:       z*Text._split_generators.<locals>.<listcomp>files)nameZ
gen_kwargsc                    r&   r   r'   r(   r+   r   r   r-   @   r.   )r$   
data_files
ValueErrorZdownload_and_extract
isinstancer    listtupler   ZSplitGeneratorZSplitZTRAINitemsappend)r   r,   r1   r/   ZsplitsZ
split_namer   r+   r   _split_generators-   s    

zText._split_generatorspa_tablereturnc                 C   sd   | j jd ur&| j jj}tdd | j j D r||}|S t||}|S |tdt	 iS )Nc                 s   s    | ]}t | V  qd S )Nr   )r)   featurer   r   r   	<genexpr>G   s    z#Text._cast_table.<locals>.<genexpr>text)
r$   r
   Zarrow_schemaallvaluescastr   paschemastring)r   r9   rB   r   r   r   _cast_tableD   s   


zText._cast_tablec              	   c   s
   | j jd urt| j jndg}ttj|D ]\}}t|| j j| j j	d}| j j
dkrnd}	 || j j}|s;n2|| 7 }t| }| j jsRdd |D }tjjt|g|d}||f| |fV  |d	7 }q1n| j j
d
krd}d}	 || j j}	|	sn7||	7 }|| 7 }|d}tjjtdd |d d D g|d}||f| |fV  |d	7 }|d }qy|rtjjt|gg|d}||f| |fV  n| j j
dkr| }
tjjt|
gg|d}|| |fV  W d    n1 sw   Y  qd S )Nr=   )r   r   r   r   Tc                 S   s   g | ]}| d qS )
)rstrip)r)   r   r   r   r   r-   `   r.   z)Text._generate_tables.<locals>.<listcomp>)names   Z	paragraph z

c                 S   s   g | ]}|r|qS r   r   )r)   Zexampler   r   r   r-   r   s    Zdocument)r$   r
   r4   	enumerate	itertoolschainfrom_iterableopenr   r   r   readr   readliner   	readlinesr   rA   TableZfrom_arraysarrayrD   split)r   r/   Zpa_table_namesZfile_idxr*   fZ	batch_idxbatchr9   Z	new_batchr=   r   r   r   _generate_tablesQ   s`   
zText._generate_tablesN)r   r   r   r	   ZBUILDER_CONFIG_CLASSr%   r8   rA   rS   rD   rX   r   r   r   r   r#   '   s    r#   )rL   r   dataclassesr   r   ior   typingr   ZpyarrowrA   r   Zdatasets.features.featuresr   Zdatasets.tabler   utilsloggingZ
get_loggerr   loggerZBuilderConfigr	   ZArrowBasedBuilderr#   r   r   r   r   <module>   s    