o
    TZh                     @   s   d dl Z d dlmZ d dlmZmZ d dlZd dlm	Z
 d dlZd dlmZ ejjeZeG dd dejZG dd dejZdS )	    N)	dataclass)ListOptional)
table_castc                   @   sH   e Zd ZU dZdZee ed< dZee	e
  ed< dZeej ed< dS )ParquetConfigzBuilderConfig for Parquet.N
batch_sizecolumnsfeatures)__name__
__module____qualname____doc__r   r   int__annotations__r   r   strr	   datasetsFeatures r   r   `/var/www/html/lang_env/lib/python3.10/site-packages/datasets/packaged_modules/parquet/parquet.pyr      s
   
 r   c                   @   s>   e Zd ZeZdd Zdd ZdejdejfddZ	d	d
 Z
dS )Parquetc                 C   s\   | j jd ur&| j jd ur&t| j jt| j jkr&td| j j d| j j tj| j jdS )NzIThe columns and features argument must contain the same columns, but got z and )r	   )configr   r	   set
ValueErrorr   ZDatasetInfoselfr   r   r   _info   s   zParquet._infoc              
      sp  j jstdj j  j j}t|tttfr;|}t|tr&|g} fdd|D }tj	tj
jd|idgS g }| D ]P\}}t|trM|g} fdd|D }jjdu rtj|D ]"}t|d}tjt|j_W d   n1 sw   Y   |tj	|d|id qAj jdurtj jtjjkrtfd	d
jj D j_|S )z-We handle string, list and dicts in datafilesz=At least one data file must be specified, but got data_files=c                       g | ]}  |qS r   Z
iter_files.0file
dl_managerr   r   
<listcomp>1       z-Parquet._split_generators.<locals>.<listcomp>files)nameZ
gen_kwargsc                    r   r   r   r   r!   r   r   r#   8   r$   Nrbc                    s"   i | ]\}}| j jv r||qS r   )r   r   )r   colZfeatr   r   r   
<dictcomp>B   s   " z-Parquet._split_generators.<locals>.<dictcomp>)r   
data_filesr   Zdownload_and_extract
isinstancer   listtupler   ZSplitGeneratorZSplitZTRAINitemsinfor	   	itertoolschainfrom_iterableopenr   Zfrom_arrow_schemapqZread_schemaappendr   r   )r   r"   r*   r%   ZsplitsZ
split_namer    fr   )r"   r   r   _split_generators'   s4   

$zParquet._split_generatorspa_tablereturnc                 C   s    | j jd urt|| j jj}|S N)r/   r	   r   arrow_schema)r   r8   r   r   r   _cast_tableF   s   zParquet._cast_tablec                 c   sX   | j jd ur.| j jd ur.tdd | jjjD t| j jkr.td| j j d| jj dttj	
|D ]s\}}t|db}t|}|jjdkr| j jpU|jdj}z&t|j|| j jdD ]\}}tj|g}	| d	| | |	fV  qbW n ty }
 ztd
| dt|
 d|
   d }
~
ww W d    n1 sw   Y  q6d S )Nc                 s   s    | ]}|j V  qd S r:   )r&   )r   fieldr   r   r   	<genexpr>O   s    z+Parquet._generate_tables.<locals>.<genexpr>z)Tried to load parquet data with columns 'z' with mismatching features ''r'   r   )r   r   _zFailed to read file 'z' with error z: )r   r	   r   sortedr/   r;   r   	enumerater0   r1   r2   r3   r4   ZParquetFilemetadataZnum_row_groupsr   Z	row_groupZnum_rowsZiter_batchespaTableZfrom_batchesr<   loggererrortype)r   r%   Zfile_idxr    r6   Zparquet_filer   Z	batch_idxZrecord_batchr8   er   r   r   _generate_tablesM   s6   $
 zParquet._generate_tablesN)r
   r   r   r   ZBUILDER_CONFIG_CLASSr   r7   rD   rE   r<   rJ   r   r   r   r   r      s    r   )r0   dataclassesr   typingr   r   ZpyarrowrD   Zpyarrow.parquetZparquetr4   r   Zdatasets.tabler   utilsloggingZ
get_loggerr
   rF   ZBuilderConfigr   ZArrowBasedBuilderr   r   r   r   r   <module>   s    