o
    Zhc                     @   s  d dl Z d dlZd dlmZ d dlZd dlZd dlZd dlZ	d dl
mZmZ d dlmZ d dlmZ zd dlmZ d dl
mZ W n eyK   dZY nw zd dlZd dlmZ d dl
mZ W n eyk   d ZZY nw ejjZejjdd	 Zd
d Zejjej dg de	! ddddddfg de	" ddddddfg de	# ddddddfg de	$ ddddddfg de	% ddddddfg de	& ddddddfg de	' ddddddfg de	( ddddddfg de	) ddddddfg de	* ddddddfdde+dddge	, dde+d-ddddfg d e	. d!d"d#d d$dfg d%e	, dd&d'dddfgd(d) Z/d*d+ Z0ejjd,d- Z1d.d/ Z2d0d1 Z3d2d3 Z4d4d5 Z5d6d7 Z6d8d9 Z7d:d; Z8ejjd<d= Z9ej:d>d?d@ Z;dAdB Z<ejjdCdD Z=ejjdEdF Z>dGdH Z?ejj@ejjAdIdJ ZBdKdL ZCdMdN ZDej dOdPeEdQidPeEdQidfdPeEdQidReEdQidSfeEdQeEdQdTdUeEdQidVffdWdX ZFejjGdYdZ ZHdS )[    N)OrderedDict)_check_roundtripmake_sample_file)LocalFileSystem)util)_write_table)alltypes_samplec            	      C   s  t dd} | jt| jd} tjjddt| d| _t	| }t| j}|j
}t| |jt| ks3J |j|d ks<J |jdksCJ |jdksJJ d|jv sQJ t|jtsYJ t|j
tsaJ |j}|j|u skJ t||d ksuJ t| |d }t| |j| jd ksJ |jdksJ |jdksJ |jdksJ |jd	ksJ |jd
ksJ tt ||d   W d    n1 sw   Y  tt |d  W d    n1 sw   Y  t|jD ]-}||}t|t j!sJ t| t|jD ]}|"|}t|t j#sJ t| qqtt |d W d    n	1 s+w   Y  tt ||jd  W d    n	1 sIw   Y  |d}|jt| ks]J |j|d ksgJ |j$dksoJ tt |"d}W d    n	1 sw   Y  tt |"|d }W d    n	1 sw   Y  |"d}|j%dksJ |j&dksJ |jd	ksJ |j'dksJ |j(dksJ |j)du sJ t|j*t j+sJ |j,dksJ t-|j.ddhksJ |j/du sJ |j0d u s
J |j1dksJ |j2dksJ |j3dks"J tt4 |j5 W d    n	1 s6w   Y  tt4 |j6 W d    d S 1 sPw   Y  d S )N'  sizecolumnsr   i@B    2.6zparquet-cppBOOLEANNONE    boolTZSNAPPYZPLAINZRLEF)7r   Zreindexsortedr   nprandomrandintlenindexr   metadatareprnum_rowsnum_columnsnum_row_groupsformat_versionZ
created_by
isinstanceserialized_sizeintdictschemanameZmax_definition_levelZmax_repetition_levelphysical_typeZconverted_typepytestraises
IndexErrorrange	row_grouppqZRowGroupMetaDatacolumnZColumnChunkMetaDataZtotal_byte_sizefile_offset	file_path
num_valuesZpath_in_schemais_stats_set
statisticsZ
Statisticscompressionset	encodingsZhas_dictionary_pageZdictionary_page_offsetZdata_page_offsetZtotal_compressed_sizeZtotal_uncompressed_sizeNotImplementedErrorZhas_index_pageZindex_page_offset)	dffilehZncolsmetar&   colZrgrg_metacol_meta r?   Z/var/www/html/lang_env/lib/python3.10/site-packages/pyarrow/tests/parquet/test_metadata.pytest_parquet_metadata_api4   s   







$rA   c                 C   sH   t dg di}t|| d  t| d }|jddj d S )Nar   r      ztest_metadata_segfault.parquetr   )	patabler.   write_tableParquetFiler   r-   r/   r4   )tempdirrF   Zparquet_filer?   r?   r@   test_parquet_metadata_lifetime   s   rJ   )datatyper(   	min_value	max_value
null_countr2   distinct_count)r   r   r   N   ZINT32r   rQ   ZINT64)r   r   r   NrQ   r   )皙g@gffffff@N皙@FLOATrR   rS   DOUBLEr   b  ZaaaZ
BYTE_ARRAY    zutf-8)TFFTTr   FT   )       bs   12Ns   aaarZ   r[   c                 C   s   t d| i}ttd|g}	tjj||	dd}
t|
}|j}|	d}|
d}|j}|js3J t||j|s<J t||j|sEJ |j|ksLJ |j|ksSJ |j|ksZJ |j|ksaJ d S )NrK   F)r&   safer   )pd	DataFramerE   r&   fieldTablefrom_pandasr   r   r-   r/   r4   has_min_max_closeminmaxrO   r2   rP   r(   )rK   rL   r(   rM   rN   rO   r2   rP   r9   r&   rF   r:   r;   r=   r>   statr?   r?   r@   "test_parquet_column_statistics_api   s   .


rg   c                 C   s@   | t  krt|| dk S | t  krt|| dk S ||kS )NgHz>gvIh%<=)rE   float32absfloat64)rL   leftrightr?   r?   r@   rc      s
   rc   c                  C   sf   t dt jt jgddi} ttj| j}|	d
djjr#J |	d
djjd u s1J d S )Ntzdatetime64[ns])Zdtyper   )r]   r^   ZSeriesZNaTr   rE   r`   ra   r   r-   r/   r4   rb   re   )r9   r;   r?   r?   r@   &test_parquet_raise_on_unset_statistics   s    rn   c           
      C   s  ddt  fddt  fddt  ftddddtd	dddt d
ftddddtd	dddt dftdddddddtdddddddt d
ftdddddddtdddddddt dft	dddt	dddt 
 ftdtdt ddfg	}t|D ]E\}\}}}t jt j||g|dgdg}t| d| }tj||dd t|}|jddj}	|	j|ksJ |	j|ksJ qd S )N
   l   5f|~W	 l    u   ähnlichu   öffentlich   r   rW      msusi           z20.123z20.124   rY   rL   r<   zexample{}.parquetr   version)rE   uint64uint32utf8datetimetimeZtime32Ztime64	timestampdateZdate32decimalDecimalZ
decimal128	enumerater`   Zfrom_arraysarraystrformatr.   rG   rH   r   r-   r/   r4   rd   re   )
rI   casesiZmin_valZmax_valtyprm   pathpfstatsr?   r?   r@   %test_statistics_convert_logical_types   sJ   

r   c              	   C   sV  t jtdt g dfdt g dfg}t|| d  t| d }dD ]}|d	|}|j
du s9J |jd us@J q(t|| d	 d
d t| d	 }dD ]}|d	|}|j
d
u sdJ |jd u skJ qSt|| d dgd t| d }|d	d}|d	d}|j
du sJ |j
d
u sJ |jd usJ |jd u sJ d S )NrB   rC   rV   rB   rV   cdata.parquet)r   r   r   Tzdata2.parquetF)Zwrite_statisticszdata3.parquetr   )rE   r`   Zfrom_pydictr   r   r   r.   read_metadatar-   r/   r3   r4   )rI   rF   r;   r<   ccZcc_aZcc_br?   r?   r@   %test_parquet_write_disable_statistics  s4   r   c                  C   s  t d} | jdksJ | jdu sJ | jdu sJ t jdddd} | jdks)J | jdu s0J | jdu s7J tdt fdt fg}t jddd	t jddd	f}t j||\}}|d
kscJ |dksiJ t j	|||}||kswJ t jj	|ddd}t jddddt jddddf}||ksJ t j	|d}|dksJ t j|ddksJ t
t t j	|d W d    n1 sw   Y  t
jtdd t jdddt jdddf}t j|| W d    d S 1 sw   Y  d S )Nro   Fr   T)
descendingnulls_firstrB   rV   r   r   )rV   r   )rB   	ascendingat_end)rB   r   Zat_start)null_placementr?   )r?   r   )rB   znot a valid sort orderzinconsistent null placementmatch)r   )r.   SortingColumncolumn_indexr   r   rE   r&   int64to_orderingZfrom_orderingr)   r*   
ValueError)Zsorting_colr&   Zsorting_cols
sort_orderr   Zsorting_cols_roundtrippedexpectedZempty_sorting_colsr?   r?   r@   test_parquet_sorting_column0  sL   
"r   c                  C   s   t t dt  fdt  fgt  d} tjdddtjdddg}tj| |\}}|d	ks4J t|dks<J |d d
ksDJ |d dksLJ d S )NxyrB   rV   r   Tr   r   Fr   )za.xr   r   )rV   r   )rE   r&   structr   r.   r   r   r   )r&   sorting_columnsr   r   r?   r?   r@   "test_parquet_sorting_column_nesteda  s   r   c                  C   s   t g dg dd} tjddddtjddd	f}t  }t| ||d
 t | }t|}|j	dks9J ||
djksCJ d S )NrC   r   r   r   T)r   r   r   r   F)r   r   )r   )rE   rF   r.   r   BufferOutputStreamr   BufferReadergetvaluer   r    r-   r   )rF   r   writerreaderr   r?   r?   r@   !test_parquet_file_sorting_columnss  s   
r   c               
   C   s"  d} t jdt  | did}t jdt |g| did}t jdt  dd	| d
idt jdt t jdt  | did| didt jdt |g| didt dt  t jdt  | didt jdt  | didg}dd |D }t j|t |d}t  }t	|| |
 }tt |}|j}	|	d j|  d
ksJ |	d jd d	ksJ |	d }
|
j|  dksJ |
jj}|j|  dksJ |	d }|j|  dksJ |jd }|j|  dksJ |jd }|j|  dksJ |	d jd u sJ |	d j|  dksJ |	d j|  dksJ d S )Ns   PARQUET:field_idinners   100r   middles   101basics   others   abc   1listz
list-inners   10s   11r   s   102zno-metadataznon-integral-field-ids   xyzznegative-field-ids   -1000c                 S   s   g | ]}g qS r?   r?   ).0_r?   r?   r@   
<listcomp>  s    z*test_field_id_metadata.<locals>.<listcomp>r&   r   r   r   rD   rQ   rY   )rE   r_   int32r   list_rF   r&   r   r.   rG   r   rH   r   Zschema_arrowr   rL   Zvalue_field)Zfield_idr   r   fieldsZarrsrF   Zbiocontentsr   r&   Z
list_fieldZlist_item_fieldZstruct_fieldZstruct_middle_fieldZstruct_inner_fieldr?   r?   r@   test_field_id_metadata  s`   


r   c                  C   sz   dD ]8} t dg di}t  }t||| d t | }t|}|d	d}|j
| u s3J |j| u s:J qd S )N)FTrB   rC   )write_page_indexr   )rE   rF   r   r   r   r   r.   r   r-   r/   Zhas_offset_indexZhas_column_index)r   rF   r   r   r   r   r?   r?   r@   test_parquet_file_page_index  s   
r   c                 C   sl  ddg}t | d }tg dg dddgddgdd	ggd
}tj|}d }|D ]'}g }tj|t | | |d |d | |d u rJ|d }q*|	|d  q*t
|d}|| W d    n1 sgw   Y  t|}| }	| }
|
D ]}|dkr|
| |	| ksJ q{|
d dksJ |
d dksJ |
d dksJ |
d dksJ |	d dksJ d S )NzARROW-1983-dataset.0zARROW-1983-dataset.1	_metadatarC   )r   r   r   rD   rQ   )onetwothreeZmetadata_collectorr   wbr#   r   r   rt   r    )r   r]   r^   rE   r`   ra   r.   rG   Zset_file_pathappend_row_groupsopenwrite_metadata_filer   to_dict)rI   	filenamesZmetapathr9   rF   _metafilenamer;   fmdZ_mdkeyr?   r?   r@   test_multi_dataset_metadata  sB   

r   z#ignore:Parquet format:FutureWarningc           
      C   s^  t | d }tddg}t|| t|}|j }||s$J |jr.d|jvs.J dD ]}tj|||d t|}|dkrEdnd}|j	|ksNJ q0tj
d	d
gddgd|d}t|| d  tt | d }tj||||gd t|}|jd
ksJ d}	tjt|	d tjtddg|||gd W d    d S 1 sw   Y  d S )Nr   )rB   r   )rV   rj   s   ARROW:schema)1.0z2.0z2.4r   ry   r   r   r   r   g?g?r   r   r   r   zLAppendRowGroups requires equal schemas.
The two columns with index 0 differ.r   )rB   r   )rV   null)r   rE   r&   r.   write_metadatar   Zto_arrow_schemaequalsr   r!   rF   rG   r    r)   r*   RuntimeError)
rI   r   r&   Zparquet_metaZschema_as_arrowrz   Zexpected_versionrF   Zparquet_meta_multmsgr?   r?   r@   test_write_metadata  s8   




"r   c                  C   sB   t jt ddgddd id} t jtdg| d}t| d S )	NZf0doubleZlarger   i r   ro   r   )rE   r&   r_   rF   r   aranger   )Z	my_schemarF   r?   r?   r@   test_table_large_metadata"  s
   
r   c                  C   sH  t dd} t| }t| }t| | jd d d  }t|jtjs"J |j|js+J |j|jks3J |j|js<J |j|jksDJ |jdksKJ |j|jrTJ |j|jks\J t|jd tjsgJ |jd |jd stJ |jd |jd ksJ |jd |jd rJ |jd |jd ksJ |jd dksJ d S )Nr	   r
   r   zarbitrary objectr   r   )	r   r   r   r"   r&   r.   ZParquetSchemar   ZColumnSchema)r9   r:   Zfileh2Zfileh3r?   r?   r@   test_compare_schemas+  s$   
r   c                 C   s   d}t jt|tj|dddgd}| d }tj|}t	|| t
|}t
j|dd}|j|s8J |j|s@J |jjd	 |jd	 ksMJ d S )
Nd   )r   valuesr   r   r   ztest.parquetT)Z
memory_maps   pandas)r]   r^   r   r   r   randnrE   r`   ra   r   r.   read_schemar&   r   r   )rI   Nr9   Z	data_pathrF   read1Zread2r?   r?   r@   test_read_schemaF  s   


r   c                 C   s   t dt jg ddi}t|| d  t| d }| }t|d dks)J t|d d d dks7J |d d d d d	 d u sGJ d S )
NrB   r   rx   r   Z
row_groupsr   r   r   r4   )rE   rF   r   r.   rG   r   r   r   )rI   rF   r   metadata_dictr?   r?   r@   #test_parquet_metadata_empty_to_dict[  s   $r   c                  C   s   d} d}t dd t| D }t  }t|| | }W d    n1 s)w   Y  tt |}tt |}t|D ]}|	| qBt  }|
| | }W d    n1 sbw   Y  tt |}d S )NrW   i  c                 S   s   i | ]}t |tjd qS )ro   )r   r   r   r   )r   r   r?   r?   r@   
<dictcomp>o  s    z6test_metadata_exceeds_message_size.<locals>.<dictcomp>)rE   rF   r,   r   r.   rG   r   r   r   r   r   )ZNCOLSZNREPEATSrF   outbuforiginal_metadatar   r   r?   r?   r@   "test_metadata_exceeds_message_sizeg  s    




r   c                 C   sF  t dg di}d}t| | }d| }t|| t| | }|j}t||s/J tj|t d|s<J tj|d|  d|sKJ t	||sUJ tj	|t d|sbJ tj	|d|  d|sqJ t
| # tj|t d|sJ tj	|t d|sJ W d    d S 1 sw   Y  d S )NrB   rC   r   zfile:///
filesystem)rE   rF   r   r.   rG   r   r&   r   r   r   r   Z
change_cwd)rI   rF   fnamer1   Zfile_urir   r&   r?   r?   r@   test_metadata_schema_filesystem  sR   

"r   c                  C   s   t dg di} t  }t| | | }W d    n1 s"w   Y  tt |}d}tj	t
|d |d  W d    d S 1 sIw   Y  d S )NrB   rC   z#Argument 'other' has incorrect typer   )rE   rF   r   r.   rG   r   r   r   r)   r*   	TypeErrorr   )rF   r   r   r   r   r?   r?   r@   test_metadata_equals  s   

"r   zt1,t2,expected_errorcol1ro   col2z$The two columns with index 0 differ.)r   r   Zcol3z&This schema has 2 columns, other has 1c           
      C   s   t | }t |}t }t }t|| t|| |d |d t|j}t|j}|rYd}	t	j
t|	| d || W d    d S 1 sRw   Y  d S || d S )Nr   z(AppendRowGroups requires equal schemas.
r   )rE   rF   ioBytesIOr.   rG   seekrH   r   r)   r*   r   r   )
t1t2Zexpected_errorZtable1Ztable2Zbuf1Zbuf2meta1meta2prefixr?   r?   r@   $test_metadata_append_row_groups_diff  s    



"r   c                 C   s  |\}}| d }| d }| d }| d }| d}t dtdi}	t|	j|g  tj|	j|g t d t|	j| g  |d	}
t|	j|
g  W d    n1 sWw   Y  tj|	j|g |d |	 |	   kr|	   kr|	   kr||
 ksJ  J d S )
Nr   r   meta3meta4z/meta5r<   rY   r   zwb+)rE   rF   r,   r.   r   r&   r   as_urir   
read_bytesread)rI   Zs3_example_s3fsZs3_fsZs3_pathr   r   r   r  Zmeta5rF   Zmeta4_streamr?   r?   r@   (test_write_metadata_fs_file_combinations  s*   
r  )Ir~   r   collectionsr   r   numpyr   r)   ZpyarrowrE   Zpyarrow.tests.parquet.commonr   r   Z
pyarrow.fsr   Zpyarrow.testsr   Zpyarrow.parquetZparquetr.   r   ImportErrorZpandasr]   Zpandas.testingZtestingtmr   markZ
pytestmarkrA   rJ   ZparametrizeZuint8Zuint16r|   r{   Zint8Zint16r   r   rh   rj   chrbinaryencodeZbool_rg   rc   rn   r   r   r   r   r   r   r   r   filterwarningsr   r   r   r   r   ZslowZlarge_memoryr   r   r   r,   r   Zs3r  r?   r?   r?   r@   <module>   s   
Z*

#18

*
+	

#
