o
    ZhX                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZm	Z	 d dl
mZ d dlmZ zd dlmZ d dlmZmZmZ W n eyE   dZY nw zd dlZd dlmZ d dlmZmZ W n eyg   d ZZY nw ejjZejjdd Zejjd	d
 Z ejjdd Z!ejjdd Z"ejjdd Z#ejjdd Z$ejjdd Z%ejjdd Z&ejjdd Z'ejjdd Z(ejjej)ddd Z*ejjdd Z+ejjd d! Z,ejjd"d# Z-ejjd$d% Z.ejjd&d' Z/ejjd(d) Z0ejjd*d+ Z1ejjd,d- Z2ejjd.d/ Z3ejjd0d1 Z4ejjd2d3 Z5ejjd4d5 Z6ejjd6d7 Z7ejjej8d8g d9ej8d:d;d<gd=d> Z9ejjd?d@ Z:ejjdAdB Z;dS )C    N)LocalFileSystemSubTreeFileSystem)guid)Version)_read_table_test_dataframe_write_table)_roundtrip_pandas_dataframealltypes_samplec                 C   s   t dd}| d }tj|}d|jjv sJ t|| t|j}d|v s(J t	
|d d}|d dd ddd	d
gksAJ d S )N'  sizepandas_roundtrip.parquets   pandasutf8index_columnsranger      )kindnamestartstopstep)r
   paTablefrom_pandasschemametadatar   pqread_metadatajsonloadsdecode)tempdirdffilenamearrow_tabler   js r'   X/var/www/html/lang_env/lib/python3.10/site-packages/pyarrow/tests/parquet/test_pandas.py#test_pandas_parquet_custom_metadata4   s   

r)   c              	   C   s   t t dt  t dt  t dt  g}ttj	dtj
dtj	dtjdg dd}tdd	gd
d gd d gd}t jj||dd}t jj||dd}|jj|jddr]J |j|jsfJ tj| d |d}|| || d S )Nintfloatstring   dtype)ZABBAZEDDAZACDC)r*   r+   r,         g?F)r   preserve_indexT)Zcheck_metadatazmerged.parquet)r   )r   r   fieldint16float32r,   pd	DataFramenparangeuint8r   r   equalsr   ZParquetWriterwrite_table)r"   r   df1df2Ztable1Ztable2writerr'   r'   r(   :test_merging_parquet_tables_with_different_pandas_metadataH   s,   
r@   c                 C   s   t dd}tjjtt|j|jd d d ddgd|_| d }tj	|}|j
jd us.J t|| t|}| }t|| d S )N
   r   Zlevel_1Zlevel_2namesr   )r
   r6   
MultiIndexfrom_tupleslistzipcolumnsr   r   r   r   pandas_metadatar   r   read_pandas	to_pandastmassert_frame_equal)r"   r#   r$   r%   
table_readdf_readr'   r'   r(   %test_pandas_parquet_column_multiindexe   s   


rQ   c                 C   s   t dd}| d }tjj|dd}|jj}|d rJ |d s!J t|| t|}|jj}|d r5J |jj	}|jj	|ksAJ |
 }t|| d S )Nr   r   r   Fr2   r   rI   )r
   r   r   r   r   rJ   r   r   rK   r   rL   rM   rN   )r"   r#   r$   r%   r&   rO   r   rP   r'   r'   r(   >test_pandas_parquet_2_0_roundtrip_read_pandas_no_index_writtenx   s   


rS   c                  C   X   t d} tj| }t }t||dd | }t|}t|	 }t
| | d S )Nr   2.6versionr   r   r   r   BufferOutputStreamr   getvalueBufferReaderr   rL   rM   rN   r#   r%   imosbufreaderrP   r'   r'   r(   )test_pandas_parquet_native_file_roundtrip      
r`   c                  C   sj   t d} tj| }t }t||dd | }t|}tj	|ddgd
 }t| ddg | d S )Nr   rU   rV   stringsr:   rI   )r   r   r   r   rY   r   rZ   r[   r   rK   rL   rM   rN   r\   r'   r'   r(   test_read_pandas_column_subset   s   
rd   c                  C   rT   )Nr   rU   rV   rX   r\   r'   r'   r(   #test_pandas_parquet_empty_roundtrip   ra   re   c                  C   sJ   ddiddiddigdd} t j| d}tj|}t }t|| d S )	NZ	page_typer   Zrecord_typeZnon_consecutive_homer   Z1001)Zagg_colZ	uid_first)data)r6   r7   r   r   r   rY   r   )rf   r#   r%   r]   r'   r'   r(   !test_pandas_can_write_nested_data   s   rg   c           	      C   s   | d }d}t tj|tjdtj|tjdtj|tjdtj|dkg dd}t	j
|}|d}t||dd	 W d    n1 sHw   Y  t| }t|}| }t|| d S )
Nzpandas_pyfile_roundtrip.parquetr1   r.   r   )foobarNbazZqux)int64r5   float64boolrb   wbrU   rV   )r6   r7   r8   r9   rk   r5   rl   randomrandnr   r   r   openr   ioBytesIO
read_bytesr   rL   rM   rN   )	r"   r$   r   r#   r%   frf   rO   rP   r'   r'   r(   $test_pandas_parquet_pyfile_roundtrip   s"   rv   c           
      C   s  d}t jd tt j|t jdt j|t jdt j|t jdt j|t j	dt j|t j
dt j|t j
dt j|t jdt j|t jdt j|t jdt j|t jdt j|dkd}| d }tj|}dD ]}t||d|d t|}| }t|| qgdD ]}t||d|d	 t|}| }t|| qd
D ]$}	|	dkrtjj|	sqt||d|	d t|}| }t|| qd S )Nr   r   r.   )r:   uint16uint32uint64Zint8r4   int32rk   r5   rl   rm   r   )TFrU   )rW   use_dictionary)rW   write_statistics)NONEZSNAPPYZGZIPZLZ4ZZSTDr}   )rW   compression)r8   ro   seedr6   r7   r9   r:   rw   rx   ry   r4   rz   rk   r5   rl   rp   r   r   r   r   r   rL   rM   rN   libCodecZis_available)
r"   r   r#   r$   r%   r{   rO   rP   r|   r~   r'   r'   r(   )test_pandas_parquet_configuration_options   sV   r   z)ignore:Parquet format '2.0':FutureWarningc                  C   sJ   t dd} tddt|  d| _d| j_t| ddd}t||  d S )	Nd   r   r   rA   rh   z2.0Zspark)rW   Zflavor)	r   r8   r9   lenindexr   r	   rM   rN   )r#   resultr'   r'   r(   +test_spark_flavor_preserves_pandas_metadata  s   
r   c                 C   s   t ddt ddit dt dt dt did}t| d }t j|ddjdd	d
}tj|}t|| t	|}|
 }t|| d S )Nz2017-06-30 01:31:00g*_c@z2017-06-30 01:32:00)closetimedata.parquetzdatetime64[us]r.   r   FZdrop)r6   	Timestampstrr7   	set_indexr   r   r   r   r   rL   rM   rN   )r"   rf   pathZdfxZtdfxr%   	result_dfr'   r'   r(    test_index_column_name_duplicate  s$   


r   c           	      C   s   d}t t|}tjjg d|gddgd}tjd|i|d}tj|}| d }t	|| t
|}||s9J | }t|| d S )	Nr-   )rh   rh   ri   ZfoobarZsome_numbersrC   numbers)r   zdup_multi_index_levels.parquet)rG   r   r6   rE   from_arraysr7   r   r   r   r   r   r;   rL   rM   rN   )	r"   Znum_rowsr   r   r#   tabler$   Zresult_tabler   r'   r'   r(    test_multiindex_duplicate_values7  s   

r   c                 C   sB   d}t jt|dd ddd}t| d }| }t|| d S )N  carat        cut  color  clarity  depth  table  price     x     y     z
 0.23      Ideal      E      SI2   61.5   55.0    326  3.95  3.98  2.43
 0.21    Premium      E      SI1   59.8   61.0    326  3.89  3.84  2.31
 0.23       Good      E      VS1   56.9   65.0    327  4.05  4.07  2.31
 0.29    Premium      I      VS2   62.4   58.0    334  4.20  4.23  2.63
 0.31       Good      J      SI2   63.3   58.0    335  4.34  4.35  2.75
 0.24  Very Good      J     VVS2   62.8   57.0    336  3.94  3.96  2.48
 0.24  Very Good      I     VVS1   62.3   57.0    336  3.95  3.98  2.47
 0.26  Very Good      H      SI1   61.9   55.0    337  4.07  4.11  2.53
 0.22       Fair      E      VS2   65.1   61.0    337  3.87  3.78  2.49
 0.23  Very Good      H      VS1   59.4   61.0    338  4.00  4.05  2.39\s{2,}r   pythonsepZ	index_colheaderZenginezv0.7.1.parquet)r6   read_csvrr   rs   r   rL   rM   rN   datadirZexpected_stringexpectedr   r   r'   r'   r(   &test_backwards_compatible_index_namingM  s   r   c                 C   sJ   d}t jt|dg dddd }t| d }| }t|| d S )Nr   r   cutcolorclarityr   r   r   zv0.7.1.all-named-index.parquet)	r6   r   rr   rs   
sort_indexr   rL   rM   rN   r   r'   r'   r(   1test_backwards_compatible_index_multi_level_namedb  s   
r   c                 C   s\   d}t jt|dg dddd }|jg d|_t| d }| }t	
|| d S )	Nr   r   r   r   r   r   )r   Nr   zv0.7.1.some-named-index.parquet)r6   r   rr   rs   r   r   Z	set_namesr   rL   rM   rN   r   r'   r'   r(   6test_backwards_compatible_index_multi_level_some_named{  s   r   c              	   C   s   t dt tjkrtd tg dg dtjddddd	}tjjg d	tjddddgd
d gd|_	| d }t
|}| }t|| t
|dgd}| }t||dg jdd d S )Nz2.2.0zRegression in pandas 2.2.0r      r-   )g?g?g333333?z
2017-01-01r-   zEurope/Brussels)Zperiodstzabcr   rC   z'v0.7.1.column-metadata-handling.parquetr   rc   Tr   )r   r6   __version__pytestskipr7   Z
date_rangerE   r   r   r   rL   rM   rN   Zreset_index)r   r   r   r   r   r'   r'   r(   2test_backwards_compatible_column_metadata_handling  s,   
r   c                  C   s   t jddgddggddgd} | d d| d< | dg} tj| }t }t	|| t
|  }t|jt js@J |j| jsIJ d S )	Nr   r   r   dc1c2rc   category)r6   r7   astyper   r   r   r   rY   r   r<   rK   rZ   rL   
isinstancer   ZCategoricalIndexr;   )r#   r   bosZref_dfr'   r'   r(   )test_categorical_index_survives_roundtrip  s   r   c                  C   sh   t dt jg dg dddi} tj| }t }t|| |	 }t
| }t||  d S )Nr   )r   r   r   r   )r   r   r   T)
categoriesordered)r6   r7   Categoricalr   r   r   rY   r   r<   rZ   rK   rL   rM   rN   )r#   r   r   contentsr   r'   r'   r(   )test_categorical_order_survives_roundtrip  s   

r   c                  C   s   t d gd dgd d} | ddd}tj| }tj|}t }tj||ddd t	|
 }|d |d sAJ |d	 |d	 sLJ d S )
Nr   g      ?)colr*   r   rU   rA   )rW   
chunk_sizer   r   )r6   r7   r   r   r   r   rY   r   r<   
read_tablerZ   r;   )r#   Zdf_categoryr   Z	table_catr^   r   r'   r'   r(   *test_pandas_categorical_na_type_row_groups  s   r   c                  C   s   t jg ddd} g d}tdtjj| |di}t }t	t
|| t|  }|jjdks8J |jjj|k sCJ t|| d S )N)r   r   r   r   r   rB   r   rz   r.   )rh   ri   rj   x)r   r   )r8   arrayr6   r7   r   Z
from_codesr   rY   r   r<   r   r   rZ   rL   r   r/   catr   allrM   rN   )codesr   r#   r^   r   r'   r'   r(   !test_pandas_categorical_roundtrip  s   
r   c                 C   s   t tjt dk rtd tjdg didd}|d}tdg di}|d}t|d 	 t|d 	 ks@J t|d j
jj	 t|d j
jj	 ksZJ t| d }tt|| t| }t|| d S )	Nz1.3.0z:PyArrow backed string data type introduced in pandas 1.3.0r   )rh   ri   rh   zstring[pyarrow]r.   r   zcat.parquet)r   r6   r   r   r   r7   r   r   r   Z	to_pylistr   r   valuesr   r   r<   r   r   rL   rM   rN   )r"   r=   r>   r   r   r'   r'   r(   )test_categories_with_string_pyarrow_dtype  s    


(r   c                 C   s   t dg dd}|d d|d< t|}tj|t| d dgd tt| d 	 }t
|dg |dg  t|t| d	  tt| d	 	 }t
|dg |dg  t|t| d
  tt| d
 	 }t
|dg |dg  d S )Nr   r   partr   r   ZInt64case1r   Zpartition_colscase2r   )r6   r7   r   r   r   r   write_to_datasetr   r   rL   rM   rN   r<   )r"   r#   r   r   r'   r'   r(   5test_write_to_dataset_pandas_preserve_extensiondtypes
  s   
r   c                 C   s  t g dg dd}t jg ddd|_t|}|ddg  }|d d	|d< tj	|t
| d
 dgd tt
| d
  }t|| t	|t
| d  tt
| d  }t|| t|t
| d  tt
| d  }t|| d S )N)r   r   r   r   r   r   idxr   r   r   r   r   r   r   r   )r6   r7   Indexr   r   r   copyr   r   r   r   r   rL   rM   rN   r<   )r"   r#   r   Zdf_catr   r'   r'   r(   +test_write_to_dataset_pandas_preserve_index  s    
r   r2   )TFNmetadata_fname	_metadataZ_common_metadatac                    sL  d}d}| t   }|  g }g }g }t|D ]L}	t||	d}
tjtj|	| |	d | dddd|
_|d	|	 }t
jj|
|d	}|d }|jjd u sOJ t|| || ||
 || qt
jj|
|d	}t|j||  t|}d
dg |j d }t fdd|D }|dur|
jjnd |j_t|| d S )Nr1   )r   r   rk   r.   r   r   z
{}.parquetrR   r:   rb   rc   c                    s   g | ]}|  qS r'   r'   ).0r   rc   r'   r(   
<listcomp>d  s    z<test_dataset_read_pandas_common_metadata.<locals>.<listcomp>F)r   mkdirr   r   r6   r   r8   r9   r   formatr   r   r   Zreplace_schema_metadatar   r   r   appendr   Zwrite_metadataZParquetDatasetrK   rL   concatr   rM   rN   )r"   r2   r   Znfilesr   dirpathZ	test_dataframespathsir#   r   r   Ztable_for_metadataZdatasetr   r   r'   rc   r(   (test_dataset_read_pandas_common_metadata8  s>   





r   c                 C   sV   t dg di}| d }t|| tjdtt| t d}|t	
|s)J d S )Nr   r   r   )
filesystem)r6   r7   r   r   rK   r   r   r   r;   r   r   )r"   r#   r$   r   r'   r'   r(   %test_read_pandas_passthrough_keywordsj  s   
r   c                 C   s   t t ddgddggt ddgd}| d }tt t }ttd	|td
t g}tj	||}t
|| t| }t|| d S )N)idZ	something)Zvalue2else)r   Z
something2)valueZelse2rh   ri   )col1col2r   r   r   )r6   r7   ZSeriesr   Zmap_r,   r   r3   r   r   r   r   rK   rL   rM   rN   )r"   r#   r$   Zudtr   r%   r   r'   r'   r(   test_read_pandas_map_fieldsz  s   "
r   )<rr   r   numpyr8   r   Zpyarrowr   Z
pyarrow.fsr   r   Zpyarrow.utilr   Zpyarrow.vendored.versionr   Zpyarrow.parquetZparquetr   Zpyarrow.tests.parquet.commonr   r   r   ImportErrorZpandasr6   Zpandas.testingZtestingrM   r	   r
   markZ
pytestmarkr)   r@   rQ   rS   r`   rd   re   rg   rv   r   filterwarningsr   r   r   r   r   r   r   r   r   r   r   r   r   r   Zparametrizer   r   r   r'   r'   r'   r(   <module>   s   









,














/
