o
    Zh>                     @   sN  d dl mZ d dlZd dlZd dlmZ d dlZd dlZd dl	Z
d dl	mZ d dlmZmZ d dlmZ d dlmZmZmZ zd dlmZ d dlmZmZ W n eyY   dZY nw zd dlZd dlmZ d d	l m!Z! d d
lm"Z" W n ey   d ZZY nw ej#jZ$dd Z%dd Z&ej#jdd Z'ej#jdd Z(ej#jdd Z)ej#jdd Z*ej#jdd Z+dd Z,dd Z-ej#j.dd Z/ej#jdd  Z0ej#jd!d" Z1d#d$ Z2ej#jd%d& Z3d'd( Z4ej#5d)de e6 gej#5d*d+d,d- Z7d.d/ Z8d0d1 Z9d2d3 Z:d4d5 Z;d6d7 Z<d8d9 Z=d:d; Z>ej#jd<d= Z?ej#jd>d? Z@ej#jd@dA ZAdBdC ZBdDdE ZCej#jdFdG ZDdHdI ZEej#jej#jFej#GdJej#GdKdLdM ZHej#5dNdOdP dQdP dRdP dSdP gej#5dTdUdVgdWdX ZIdYdZ ZJd[d\ ZKd]d^ ZLd_d` ZMdadb ZNdcdd ZOej#jPdedf ZQej#jPdgdh ZRdS )i    )OrderedDictN)copytree)fs)LocalFileSystem
FileSystem)util)_check_roundtrip_roundtrip_table_test_dataframe)_read_table_write_table)dataframe_with_lists)alltypes_samplec                 C   s   t dg di}tjtdd t|| d dd W d    n1 s$w   Y  tjtdd t|| d dd	 W d    d S 1 sEw   Y  d S )
Na         z"Unsupported Parquet format versionmatchztest_version.parquetz2.2versionz%Unsupported Parquet data page version)data_page_version)patablepytestraises
ValueErrorr   )tempdirr    r   W/var/www/html/lang_env/lib/python3.10/site-packages/pyarrow/tests/parquet/test_basic.pytest_parquet_invalid_version7   s   
"r!   c                  C   sH   t g dd } t jj| gdgd}ddg}|D ]}t||d qd S )Nr   i f0namesi   i   )data_page_size)r   arrayTablefrom_arraysr   )arrtZ
page_sizesZtarget_page_sizer   r   r    test_set_data_page_sizeA   s   r+   c                  C   s,   t d} tjj| dd}t|dddd d S )Nd   FZpreserve_index
   r   2.4)r%   Zwrite_batch_sizer   )r
   r   r'   from_pandasr   dfr   r   r   r    test_set_write_batch_sizeK   s
   
r3   c                  C   sh   t d} tjj| dd}t|dddd tt t|dddd W d    d S 1 s-w   Y  d S )	Nr,   Fr-   r   r.   r/   )Zdictionary_pagesize_limitr%   r   r   )r
   r   r'   r0   r   r   r   	TypeErrorr1   r   r   r    "test_set_dictionary_pagesize_limitU   s   "r5   c               	   C   s   g } t jtdd}| t j|gd  t \}}t j|}| t j|gd  dD ]}dD ]}| D ]
}t|d||d q8q4q0d S )Nr.   sizer   )z1.0z2.0)TF2.6)r   r   use_dictionary)	r   RecordBatchr0   r   appendr'   from_batchesr   r   )Ztablesbatchr2   _r   r9   r   r   r   r    test_chunked_table_writeb   s"   
r?   c                 C   s   t dd}tj|}t|ddidd t| d }t|d}t||dd	 W d    n1 s1w   Y  tj	|dd
}|
|sDJ d S )Nr.   r6   
memory_mapTr8   Zread_table_kwargsr   tmp_filewbr   )r@   r   r   r'   r0   r   stropenr   pqread_pandasequalsr   r2   r   filenamef
table_readr   r   r    test_memory_mapu      

rN   c                 C   s   t dd}tj|}t|ddidd t| d }t|d}t||dd	 W d    n1 s1w   Y  tj	|d
d}|
|sDJ d S )Nr.   r6   buffer_sizei  r8   rA   rB   rC   r   i   )rP   rD   rJ   r   r   r    test_enable_buffered_stream   rO   rQ   c                 C   sj   t jt dggdg}d}| | }| rJ t|t| | s&J tt|}||s3J d S )N*   intsz	foo # bar)	r   r'   r(   r&   existsr   rE   r   rI   )r   r   rK   pathrM   r   r   r    test_special_chars_filename   s   rV   c                   C   sv   t jtdd td  W d    n1 sw   Y  t jtdd td  W d    d S 1 s4w   Y  d S )NNoner   )r   r   r4   rG   
read_tableParquetFiler   r   r   r    test_invalid_source   s   "rZ   c                  C   s*   t jttdgdgd} t| dd d S )Ni@  r"   r#   r   )Zrow_group_size)r   r   listranger   )r*   r   r   r    (test_file_with_over_int16_max_row_groups   s   r]   c                  C   s   t dd} tj| }tjjdd | D |jjd}|jdj	t
 ks)J |jdj	tt
 ks9J t|dd	 d S )
Nr.   r6   c                 S   s   g | ]}| d dd  qS )r   N)chunk).0colr   r   r    
<listcomp>   s    z.test_empty_table_roundtrip.<locals>.<listcomp>r#   nullZ	null_listr8   r   )r   r   r'   r0   r(   Zitercolumnsschemar$   fieldtyperb   list_r   r1   r   r   r    test_empty_table_roundtrip   s   
 
rg   c                  C   s$   t  } tjj| dd}t| d S )NFr-   )pd	DataFramer   r'   r0   r   )r2   emptyr   r   r    test_empty_table_no_columns   s   rk   c                     sp   t t tt d g t dddgg}  fdd| D } fdd|D }tj|t }t| d S )N)int32Zlist_stringr   )Gc                    s$   g | ]}t j|t  d  qS )re   )r   r&   structflattenr_   r=   colsr   r    ra      s    zEtest_write_nested_zero_length_array_chunk_failure.<locals>.<listcomp>c                    s"   g | ]}t jj|t  d qS ))rc   )r   r:   r(   rc   rq   rr   r   r    ra      s    )	r   r   rl   rf   stringr'   r<   rc   r   )dataZ	my_arraysZ
my_batchesZtblr   rr   r    1test_write_nested_zero_length_array_chunk_failure   s   

rv   c                 C   s   | d }t dtjdtjdi}t|| t|}| }t	|| t
| d }t dtjdtjdi}t|| t|}| }t	|| d S )Nzzzz.parquetxr.   Zdtype)rh   ri   nparangeZint64r   r   	to_pandastmassert_frame_equalrE   )r   rU   r2   rM   Zdf_readr   r   r    test_multiple_path_types   s   

r~   c                 C   s   | d }t dg di}t|| t|}t|}||s"J tt	 t|t
 d W d    d S 1 s:w   Y  d S )Ntest.parquetr   r   
filesystem)r   r   r   r   ZFSProtocolClassr   rI   r   r   r4   r   )r   rU   r   Zfs_protocol_objresultr   r   r    test_fspath   s   

"r   r   name)data.parquetu   例.parquetc                 C   s   t dg di}| | }t|t| t|  tj||d}W d    n1 s,w   Y  ||s8J |	  |
 rBJ t|  tj|||d W d    n1 sZw   Y  t|}||skJ d S )Nr   r   r   )r   r   rG   write_tablerE   r   Z
change_cwdrX   rI   unlinkrT   )r   r   r   r   rU   r   r   r   r    test_relative_paths	  s   
r   c                   C   s:   t t td W d    d S 1 sw   Y  d S )Nzi-am-not-existing.parquet)r   r   FileNotFoundErrorrG   rX   r   r   r   r    test_read_non_existing_file"  s   "r   c                  C   sT   G dd dt j} tjtdd t| d W d    d S 1 s#w   Y  d S )Nc                   @   s   e Zd Zdd Zdd ZdS )z3test_file_error_python_exception.<locals>.BogusFilec                 W      t dNzorglubZeroDivisionErrorselfargsr   r   r    read*     z8test_file_error_python_exception.<locals>.BogusFile.readc                 W   r   r   r   r   r   r   r    seek-  r   z8test_file_error_python_exception.<locals>.BogusFile.seekN)__name__
__module____qualname__r   r   r   r   r   r    	BogusFile)  s    r   r   r       )ioBytesIOr   r   r   rG   rX   )r   r   r   r     test_file_error_python_exception(  s   "r   c                 C   s   t dg di}t|t| d  tt| d d}t|}W d    n1 s,w   Y  ||s8J tt| d d}tt |}W d    n1 sTw   Y  ||s`J d S )Nr   r   r   rb)	r   r   rG   r   rE   rF   rX   rI   Z
PythonFile)r   r   rL   r   r   r   r    test_parquet_read_from_buffer5  s   r   c                  C   s  t ttttd} t ttttd}| | g}t jj|ddgd}t	||dddd t	||ddgdgd t	||dddgddgd t jj| |gddgd}t	||dgdgd	 t jj|gd
gd}t
t t	||ddd W d    d S 1 sw   Y  d S )Nr,   r   br#   gzipFT)expectedcompressionr9   use_byte_stream_split)r   r9   r   tmp)r   r   r9   )r   r&   r[   mapfloatr\   intr'   r(   r   r   r   IOError)	arr_floatarr_intZ
data_floatr   mixed_tabler   r   r    test_byte_stream_splitC  s8   "r   c               
   C   sz  t ttttd} t ttttd}t jdd tdD t  d}t jdd tdD t dd}t g dd }t jj	| ||||gg d	d
}t
||dddddd t
||ddd t
||dddddd t
||dddddd t
||ddddddd t
||dddid tjtdd t
||dddddd W d    n1 sw   Y  tt t
||dddddd W d    n1 sw   Y  tt t
||ddd W d    n1 sw   Y  tt t
||dddid W d    n	1 sw   Y  tt t
||dgddid W d    n	1 s1w   Y  tt t
||ddid W d    n	1 sPw   Y  tt t
||ddgddddd W d    n	1 stw   Y  tt t
||ddddddd W d    n	1 sw   Y  tt t
||ddd W d    d S 1 sw   Y  d S )Nr,   c                 S   s   g | ]}t |qS r   )rE   r_   rw   r   r   r    ra   k  s    z(test_column_encoding.<locals>.<listcomp>rn   c                 S   s   g | ]	}t |d qS )r.   )rE   zfillr   r   r   r    ra   m  s    r.   )FTFF   )r   r   cder#   FZBYTE_STREAM_SPLITZPLAIN)r   r   r   )r   r9   column_encodingZDELTA_BINARY_PACKEDZDELTA_LENGTH_BYTE_ARRAYZDELTA_BYTE_ARRAY)r   r   r   r   r   ZRLEz0BYTE_STREAM_SPLIT only supports FLOAT and DOUBLEr   ZRLE_DICTIONARYr   ZMADE_UP_ENCODINGr   )r   r   )r   r9   r   r   T)r   r&   r[   r   r   r\   r   binaryr'   r(   r   r   r   r   OSErrorr   r4   )r   r   Zarr_binZarr_flbaZarr_boolr   r   r   r    test_column_encodingh  s    


$r   c               	   C   s   t ttttd} | | g}t jj|ddgd}t||ddd t||ddd t||dd	d
d t||dddd
d t||ddd t||ddd g d}t	
 }|D ]#\}}tttf t||||d W d    n1 sww   Y  qYd S )N  r   r   r#   r   r   )r   r   compression_level   snappy)r   r   )r   r   r   r   Zlz4	   ))r      )r   i)rW   i  )Zlzo   )r   r   )r   r&   r[   r   r   r\   r'   r(   r   r   r   r   r   r   r   r   )r)   ru   r   Zinvalid_combinationsbufcodeclevelr   r   r    test_compression_level  s>   	r   c                  C   sP   t g d} d}t j| g|g}t|ddid}d}|jd j|ks&J d S )N)r   r   r   r   r   zprohib; ,	{}ZflavorZspark)Zwrite_table_kwargsZprohib______r   )r   r&   r'   r(   r	   rc   r   )Za0r   r   r   Zexpected_namer   r   r     test_sanitized_spark_field_names  s   r   c                  C   sl   t dd} tj| }t }t||ddd |d t|dd}|d t|d	d}|	|s4J d S )
Ni'  r6   ZSNAPPYr8   )r   r   r   T)use_threadsF)
r   r   r'   r0   r   r   r   r   r   rI   )r2   r   r   Ztable1table2r   r   r    test_multithreaded_read  s   


r   c                  C   s   t jtdgg dd} tj|  }t	 }t
||dd |d t|}||s0J tt t
||dd W d    d S 1 sHw   Y  d S )Nr   )ABCD)columns)
chunk_sizer   )rh   ri   ry   rz   r   r'   r0   Zreset_indexr   r   r   r   r   rI   r   r   r   )ru   r   r   r   r   r   r    test_min_chunksize/  s   
"r   c                 C   s   t tdttddtdddtjddd	d
g dt tdt jdddt jddddt jddddd	}t	j
|}| d }z	t||dd W n
 t	jyX   Y nw | r_J d S )Nabcr   r   r      u1      @      @float64rx   TFT20130101periodsz
US/Eastern)r   tzns)r   freq)	r   r   r   r   r   rL   ghirB   r/   r   )rh   ri   r[   r\   ry   rz   astypeCategorical
date_ranger   r'   r0   r   ZArrowExceptionrT   )r   r2   ZpdfrK   r   r   r    (test_write_error_deletes_incomplete_file@  s(   
r   c              
   C   sN   d}zt | W d S  ty& } z||jd v sJ W Y d }~d S d }~ww )Nznonexistent-file.parquetr   )rG   rX   	Exceptionr   )r   rU   r   r   r   r    test_read_non_existent_file[  s    r   c                 C   sH   t   t jdd t| d  W d    d S 1 sw   Y  d S )Nerror)actionzv0.7.1.parquet)warningscatch_warningssimplefilterrG   rX   )Zdatadirr   r   r    test_read_table_doesnt_warnc  s   
"r   c                  C   s`   t jt ddggdg} t }tj| |dd |d t	|}t
| |   d S )Nr   defZsome_colr   r   r   )r   r'   r(   r&   r   r   rG   r   r   rX   r|   r}   r{   )r   rL   Z	roundtripr   r   r    test_zlib_compression_bugi  s   

r   c              	   C   s   t | d }tjtjtfdd" t|d}W d    n1 s!w   Y  t| W d    n1 s5w   Y  tjtjtfdd( t|d}|	d W d    n1 sZw   Y  t| W d    d S 1 sow   Y  d S )Nr   zsize is 0 bytesr   rC   zsize is 4 bytess   ffff)
rE   r   r   r   ZArrowInvalidr   rF   rG   rX   write)r   rU   rL   r   r   r    test_parquet_file_too_smallu  s"   "r   zignore:RangeIndex:FutureWarningz.ignore:tostring:DeprecationWarning:fastparquetc           	      C   s   t d}ttdttddtjddddg d	tjd
ddt	g dd}t
|}t| d }tj||d d ||}| }t|| t| d }||| t|}|d t|d< t| | d S )Nfastparquetr   r   r   r   r   r   rx   r   r   r   r   )r   r   r   )r   r   r   r   r   rL   zcross_compat_arrow.parquetr   z cross_compat_fastparquet.parquetrL   )r   Zimportorskiprh   ri   r[   r\   ry   rz   r   r   r   r   rE   rG   r   rY   r{   r|   r}   r   rH   r   object)	r   fpr2   r   Z
file_arrowZfp_fileZdf_fpZfile_fastparquetZtable_fpr   r   r    $test_fastparquet_cross_compatibility  s*   



r   array_factoryc                   C      t dd gd S Nr   r.   r   r&   r   r   r   r    <lambda>      r   c                   C      t dd gd  S r   r   r&   Zdictionary_encoder   r   r   r    r         c                   C   r   N r.   r   r   r   r   r    r     r   c                   C   r   r   r   r   r   r   r    r     r   read_dictionaryFTc                 C   s   t jd|  i}t }tj||dd |d |rdgnd }tj|d|d}|j	D ]}|j
\}| d }| |jd ksCJ q,d S )	Nr`   T)r9   r   F)r   r   r       )r   r'   Zfrom_pydictr   r   rG   r   r   rX   r   chunksbuffersZ
to_pybytesr7   )r   r   Z
orig_tableZbior   r`   r^   r   r   r   r    test_buffer_contents  s   

r  c                 C   sP   t jt tdgdgd}| d }tj||dd t|}||s&J d S )Nr   rS   r#   zarrow-10480.pyarrow.gzZGZIPr   )r   r   r&   r\   rG   r   rX   rI   )r   r   rU   r   r   r   r    "test_parquet_compression_roundtrip  s
   
r  c                 C   s   t jt jg ddgdg}| d }d}t||j}t|D ]}|| q W d    n1 s2w   Y  t	|}|j
j|ksDJ t|D ]}|||sTJ qHd S )Nrl   rn   r"   zempty_row_groups.parquetr   )r   r'   r(   r&   rG   ZParquetWriterrc   r\   r   rY   metadataZnum_row_groupsZread_row_grouprI   )r   r   rU   Z
num_groupswriterr   readerr   r   r    test_empty_row_groups  s   
r  c                 C   sV   d gd }| dg tj|gdg}| d }t|| t|}||ks)J d S )Ni   r   columnzarrow-11607.parquet)r;   r   r'   r(   rG   r   rX   )r   ru   r   rU   r   r   r   r    test_reads_over_batch  s   

r
  c                 C   s   | d }|j dd tjg dg dgddgd}t||d	  tjg d
g dgddgd}t||d  tt|}tjg dg dgddgd}||ksTJ d S )NZ dataset_column_order_permutationT)exist_okr   )皙?皙?333333?r   r   r#   zdata1.parquet)皙?      ?333333?)r   r   r   zdata2.parquet)r   r   r   r   r   r   )r  r  r  r  r  r  )mkdirr   r   rG   r   rX   rE   )r   caseZdata1Zdata2r   r   r   r   r     test_permutation_of_column_order  s   
r  c                 C   s  | d }t ttd}d}t j|g| dd t|D d}t|| tjt	dd tj
|d	| d
 W d    n1 sAw   Y  tjt	dd tj
||d W d    n1 s_w   Y  tj
|d| d
}||kssJ tj
|d| d}||ksJ t
|}||ksJ d S )Nzlargethrift.parquetr.   r   c                 S   s   g | ]}d | qS )Zsome_long_column_name_r   )r_   r   r   r   r    ra     r   z+test_thrift_size_limits.<locals>.<listcomp>r#   z1Couldn't deserialize thrift:.*Exceeded size limitr   2   )Zthrift_string_size_limit)Zthrift_container_size_limitr,   r   )r   r&   r[   r\   r   rG   r   r   r   r   rX   )r   rU   r&   Znum_colsr   gotr   r   r    test_thrift_size_limits  s4   
r  c           
      C   s  | d }t dg di}tj||dd tj|dd}||ks"J t| }|d |d ks2J |d |d |d< |d< | d	 }|| tj|d
d}||ksUJ |t dg diksbJ tj	t
dd tj|dd}W d   n1 s{w   Y  tj|d
d}| }	|	|ksJ |	t dg diksJ tj|dd}tj	t
dd | }W d   dS 1 sw   Y  dS )zUCheck that checksum verification works for datasets created with
    pq.write_table()zcorrect.parquetr   r   r   r   r   TZwrite_page_checksumZpage_checksum_verification   $   zcorrupted.parquetFr   r   r   r   CRC checksum verificationr   N)r   r   rG   r   rX   	bytearray
read_byteswrite_bytesr   r   r   rY   r   )
r   original_path
table_origtable_checkbin_dataZcorrupted_pathtable_corruptr>   Zcorrupted_pq_fileZtable_corrupt2r   r   r    +test_page_checksum_verification_write_table"  s<   

"r'  c                 C   s>  t dg di}| d }tj||dd t| }t|dks#J |d }tj|dd}||ks4J t|	 }|d	 |d
 ksDJ |d
 |d	 |d	< |d
< | d }t
|| ||j }|| tj|dd}	|	|ksqJ |	t dg diks~J tjtdd tj|dd}
W d   dS 1 sw   Y  dS )zXCheck that checksum verification works for datasets created with
    pq.write_to_datasetr   r  Zcorrect_dirTr  r   r   r  r  r  Zcorrupted_dirFr  r  r   N)r   r   rG   write_to_datasetr[   iterdirlenrX   r  r   r   r   r!  r   r   r   )r   r#  Zoriginal_dir_pathZoriginal_file_path_listr"  r$  r%  Zcorrupted_dir_pathZcorrupted_file_pathr&  r>   r   r   r    test_checksum_write_to_dataset\  s4   


"r+  c                 C   s   t dg di}| d }d}tjt|d tj||dd W d    n1 s)w   Y  t|| tjt|d tj|dd W d    n1 sMw   Y  tjt|d tj|dd W d    d S 1 slw   Y  d S )Nr   r   Zdeprecate_legacyzPassing 'use_legacy_dataset'r   F)Zuse_legacy_dataset)	r   r   r   ZwarnsFutureWarningrG   r(  rX   ZParquetDataset)r   r   rU   msgr   r   r    "test_deprecated_use_legacy_dataset  s   "r.  )Scollectionsr   r   r   shutilr   numpyry   r   Zpyarrowr   r   Zpyarrow.filesystemr   r   Zpyarrow.testsr   Zpyarrow.tests.parquet.commonr   r	   r
   Zpyarrow.parquetZparquetrG   r   r   ImportErrorZpandasrh   Zpandas.testingZtestingr|   Zpyarrow.tests.pandas_examplesr   r   markZ
pytestmarkr!   r+   r3   r5   r?   rN   rQ   rV   rZ   Zslowr]   rg   rk   rv   r~   r   ZparametrizeZ_get_instancer   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   filterwarningsr   r  r  r  r
  r  r  r'  Zdatasetr+  r.  r   r   r   r    <module>   s   


	









%}-





$:
4