o
    թZh                     @   s  d Z ddlZddlmZ ddlmZ ddlZddlZddlZ	ddl
Z
ddlmZ ddlmZ ddlmZ ddlmZmZmZ ddlZddlmZ dd	lmZ dd
lmZmZmZm Z m!Z! zddl"Z"dZ#W n e$yo   dZ#Y nw zddl%Z%dZ&W n e$y   dZ&Y nw e
j'(de
j'(dgZ)e
j*e
j+de
j'j,e& peddddkddde
j+de
j'j,e# dddgddd Z-e
j*dd Z.e
j*dd Z/e
j*dd  Z0e
j*d!d" Z1e
j*d#d$ Z2e
j*ej3ej4j5ej3ej4j6ej3ej4j7ej8d%d&ej8d'd&ej8d(d&ej8d)d&gdd*d+ Z9									,dKd-d.Z:d/d0 Z;d1d2 Z<d3d4 Z=d5d6 Z>d7d8 Z?d9d: Z@d;d< ZAd=d> ZBd?d@ ZCdAdB ZDG dCdD dDZEG dEdF dFeEZFG dGdH dHeEZGG dIdJ dJeEZHdS )Lz test parquet compat     N)Decimal)BytesIO)using_copy_on_write)_get_option)is_platform_windows)pa_version_under11p0pa_version_under13p0pa_version_under15p0)Version)FastParquetImplPyArrowImpl
get_engineread_parquet
to_parquetTFz2ignore:DataFrame._data is deprecated:FutureWarningz=ignore:Passing a BlockManager to DataFrame:DeprecationWarningfastparquetmode.data_managerZsilentarrayz4fastparquet is not installed or ArrayManager is usedreason)Zmarkspyarrowpyarrow is not installed)paramsc                 C      | j S Nparamrequest r   S/var/www/html/lang_env/lib/python3.10/site-packages/pandas/tests/io/test_parquet.pyengine;   s   r!   c                   C   s   t std dS )Nr   r   )_HAVE_PYARROWpytestskipr   r   r   r    paQ   s   
r%   c                   C   s0   t s	td dS tddddkrtd dS )Nzfastparquet is not installedr   Tr   r   z.ArrayManager is not supported with fastparquetr   )_HAVE_FASTPARQUETr#   r$   r   r   r   r   r    fpX   s   

r'   c                   C   s   t g dddS )N         fooAB)pd	DataFramer   r   r   r    	df_compata   s   r2   c               
   C   sB   t tdttddtjddddg dt jd	d
dd} | S )Nabcr)            @      @float64dtypeTFT20130101r+   periods)abdef)r0   r1   listrangenparange
date_range)dfr   r   r    df_cross_compatf   s   rI   c                   C   s   t tddtjdgg dg dg dttddtd	d
dtjdddddtjdgg dt jdd	dt 	dt j
t 	dgdS )Nr3   r>   cr>   NrJ   )   foo   bars   bazr,   barbazr)   r4   r+      u1r5   r6   r7   r8          @      @r:   r;   r<   Z20130103)stringstring_with_nanZstring_with_nonebytesunicodeintZuintfloatZfloat_with_nanbooldatetimedatetime_with_nat)r0   r1   rC   rE   nanrD   rF   astyperG   	TimestampZNaTr   r   r   r    df_fullx   s$   

ra   z2019-01-04T16:41:24+0200z%Y-%m-%dT%H:%M:%S%zz2019-01-04T16:41:24+0215z2019-01-04T16:41:24-0200z2019-01-04T16:41:24-0215c                 C   r   r   r   r   r   r   r    timezone_aware_date_list   s   rb   r*   c
                    s   pddip	i du r|r|d< |d<  fdd}
du rFt  |
|	 W d   dS 1 s?w   Y  dS |
|	 dS )a  Verify parquet serializer and deserializer produce the same results.

    Performs a pandas to disk and disk to pandas round trip,
    then compares the 2 resulting DataFrames to verify equality.

    Parameters
    ----------
    df: Dataframe
    engine: str, optional
        'pyarrow' or 'fastparquet'
    path: str, optional
    write_kwargs: dict of str:str, optional
    read_kwargs: dict of str:str, optional
    expected: DataFrame, optional
        Expected deserialization result, otherwise will be equal to `df`
    check_names: list of str, optional
        Closed set of column names to be compared
    check_like: bool, optional
        If True, ignore the order of index & columns.
    repeat: int, optional
        How many times to repeat the test
    compressionNr!   c                    sZ   t | D ]&}jfi  tfi }dv r d jd< tj| d qd S )NrV   r)   rV   )check_names
check_likecheck_dtype)rD   r   r   loctmassert_frame_equal)repeat_actualrg   rf   re   rH   expectedpathread_kwargswrite_kwargsr   r    compare   s   
z!check_round_trip.<locals>.compare)ri   ensure_clean)rH   r!   rp   rr   rq   ro   re   rf   rg   rk   rs   r   rn   r    check_round_trip   s   "

"ru   c                 C   s0   ddl m} |j| dd}|jjj|ksJ dS )zCheck partitions of a parquet file are as expected.

    Parameters
    ----------
    path: str
        Path of the dataset.
    expected: iterable of str
        Expected partition names.
    r   NZhive)partitioning)Zpyarrow.datasetdatasetrv   schemanames)rp   ro   Zdsrw   r   r   r    check_partition_names   s   
rz   c                 C   sD   d}t jt|d t| dd W d    d S 1 sw   Y  d S )Nz.engine must be one of 'pyarrow', 'fastparquet'matchr,   rO   )r#   raises
ValueErrorru   )r2   msgr   r   r    test_invalid_engine   s   "r   c                 C   :   t dd t|  W d    d S 1 sw   Y  d S )Nio.parquet.enginer   r0   option_contextru   )r2   r%   r   r   r    test_options_py      
"r   c                 C   r   )Nr   r   r   )r2   r'   r   r   r    test_options_fp   r   r   c                 C   r   )Nr   autor   )r2   r'   r%   r   r   r    test_options_auto  r   r   c                 C   sP  t tdts	J t tdtsJ tdd# t tdts"J t tdts+J t tdts4J W d    n1 s>w   Y  tdd# t tdtsSJ t tdts\J t tdtseJ W d    n1 sow   Y  tdd$ t tdtsJ t tdtsJ t tdtsJ W d    d S 1 sw   Y  d S )Nr   r   r   r   )
isinstancer   r   r   r0   r   )r'   r%   r   r   r    test_options_get_engine  s"   "r   c                  C   s  ddl m}  | d}| d}tsdnttjt|k }ts!dnttjt|k }to.| }to3| }|s|s|r\d| d}t	j
t|d td	 W d    n1 sVw   Y  nd
}t	j
t|d td	 W d    n1 stw   Y  |rd| d}t	j
t|d td	 W d    d S 1 sw   Y  d S d}t	j
t|d td	 W d    d S 1 sw   Y  d S d S d S )Nr   )VERSIONSr   r   FzPandas requires version .z. or newer of .pyarrow.r{   r   z%Missing optional dependency .pyarrow.z. or newer of .fastparquet.z)Missing optional dependency .fastparquet.)Zpandas.compat._optionalr   getr"   r
   r   __version__r&   r   r#   r}   ImportErrorr   )r   Z
pa_min_verZ
fp_min_verZhave_pa_bad_versionZhave_fp_bad_versionZhave_usable_paZhave_usable_fpr|   r   r   r    "test_get_engine_auto_error_message   sD   






"
"r   c                 C   s   | }t  0}|j||d d t||d}t || t||ddgd}t ||ddg  W d    d S 1 s9w   Y  d S N)r!   rc   r!   r>   r@   )r!   columnsri   rt   r   r   rj   rI   r%   r'   rH   rp   resultr   r   r    test_cross_engine_pa_fpK  s   
"r   c                 C   s   | }t  0}|j||d d t||d}t || t||ddgd}t ||ddg  W d    d S 1 s9w   Y  d S r   r   r   r   r   r    test_cross_engine_fp_paY  s   
"r   c              	   C   s   t dg di}d}t 0}tjt|ddd |||  W d    n1 s*w   Y  W d    d S W d    d S 1 sBw   Y  d S )Nr>   r(   zqStarting with pandas version 3.0 all arguments of to_parquet except for the argument 'path' will be keyword-only.F)r|   Zcheck_stacklevelZraise_on_extra_warnings)r0   r1   ri   rt   assert_produces_warningFutureWarningr   )r!   rH   r   rp   r   r   r    !test_parquet_pos_args_deprecationf  s   
"r   c                   @   s4   e Zd Zdd Zdd Zejjejjdd Z	dS )Basec              	   C   s|   t  0}tj||d t|||d d W d    n1 sw   Y  W d    d S W d    d S 1 s7w   Y  d S )Nr{   rc   )ri   rt   r#   r}   r   )selfrH   r!   excerr_msgrp   r   r   r    check_error_on_writex  s   
"zBase.check_error_on_writec              	   C   sx   t  .}t | t|||d d W d    n1 sw   Y  W d    d S W d    d S 1 s5w   Y  d S )Nr   )ri   rt   external_error_raisedr   )r   rH   r!   r   rp   r   r   r    check_external_error_on_write~  s   
"z"Base.check_external_error_on_writec                 C   sr   |dkr	t | t|dddddd}|j| d t|j}W d    n1 s,w   Y  t|| d S )	Nr   iodataparquetzsimple.parquetrb)mode)content)	r#   importorskipopenZserve_contentreadr   urlri   rj   )r   Z
httpserverdatapathr2   r!   rB   rH   r   r   r    test_parquet_read_from_url  s   
zBase.test_parquet_read_from_urlN)
__name__
__module____qualname__r   r   r#   marknetwork
single_cpur   r   r   r   r    r   w  s    r   c                   @   s   e Zd Zdd Zdd Zejdg ddd Zd	d
 Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zejd!g d"d#d$ Zd%S )&	TestBasicc                 C   sF   t g dddt dtg dfD ]}d}| ||t| qd S )Nr(   r)   r,   r;   z+to_parquet only supports IO with DataFrames)r0   Seriesr`   rE   r   r   r~   )r   r!   objr   r   r   r    
test_error  s   zTestBasic.test_errorc                 C   s6   t tdttddd}ddg|_t|| d S )Nr3   r)   r4   rU   rY   r,   rO   )r0   r1   rC   rD   r   ru   )r   r!   rH   r   r   r    test_columns_dtypes  s   
zTestBasic.test_columns_dtypesrc   )NgzipZsnappybrotlic                 C   s(   t dg di}t||d|id d S )Nr.   r(   rc   rr   r0   r1   ru   )r   r!   rc   rH   r   r   r    test_compression  s   zTestBasic.test_compressionc                 C   sJ   t tdttddd}t dtdi}t|||ddgid d S )Nr3   r)   r4   r   rU   r   ro   rq   r0   r1   rC   rD   ru   )r   r!   rH   ro   r   r   r    test_read_columns  s
   
zTestBasic.test_read_columnsc              	   C   sX   t ttdtdd}t dddgi}t||||ddgid	gdgd
dd d S )Nr4   Zaabb)rY   partrY   r   r)   partition_colsr   )r   ==r>   )filtersr   )rp   ro   rr   rq   rk   r   )r   r!   tmp_pathrH   ro   r   r   r    test_read_filters  s   

zTestBasic.test_read_filtersc                 C   s   |dk}|r|dkr| tjjdd tdg di}t|| g dtjddd	td
g dg}|D ]}||_	t
|tjrG|j	d |_	t|||d q5g d|_	d|j	_t|| d S )Nr   zfastparquet write into indexr   r.   r(   )r*   r+   r4   r;   r+   r<   r3   )r)   r+   r4   )re   )r   r)   r*   r,   )applymarkerr#   r   xfailr0   r1   ru   rG   rC   indexr   ZDatetimeIndex
_with_freqname)r   r!   r   r   re   rH   Zindexesr   r   r   r    test_write_index  s(   

zTestBasic.test_write_indexc                 C   s:   |}t dg di}t jg d}||_t|| d S )Nr.   r(   )r>   r)   )r>   r*   )r?   r)   )r0   r1   
MultiIndexfrom_tuplesr   ru   )r   r%   r!   rH   r   r   r   r    test_write_multiindex  s
   zTestBasic.test_write_multiindexc                 C   s   |}t jdddd}t jtjddt| dftdd}t j	j
d	d
g|gddgd}|jd d}||fD ]}||_t|| t||dddgi|ddg d q8d S )Nz01-Jan-2018z01-Dec-2018ZMS)freqr*   r+   ABCr   Level1Level2leveldate)ry   r   r.   r/   rq   ro   )r0   rG   r1   rE   randomdefault_rngstandard_normallenrC   r   Zfrom_productcopyr   ru   )r   r%   r!   datesrH   Zindex1index2r   r   r   r    test_multiindex_with_columns  s"   
z&TestBasic.test_multiindex_with_columnsc                 C   s   t g dg dd}d dd}|jdd}t||||d t jg dg ddg d	d
}t||||d g dg dg}t jttddd tdD d|d
}|jdd}t||||d d S )Nr(   )qrs)r>   r?   F)rc   r   T)droprr   ro   )ZzyxZwvuZtsrr   rO   rO   rP   rP   r,   r,   quxr   onetwor   r   r   r   r   r      c                 S   s   g | ]}| qS r   r   ).0ir   r   r    
<listcomp>  s    z7TestBasic.test_write_ignoring_index.<locals>.<listcomp>)r   r   )r0   r1   Zreset_indexru   rC   rD   )r   r!   rH   rr   ro   arraysr   r   r    test_write_ignoring_index  s    
 z#TestBasic.test_write_ignoring_indexc                 C   sb   t jg d}t jtjdd|d}|dkr$| ||t	d d S |dkr/t
|| d S d S )Nr   r*   )r4   r+   r   r   Column name must be a stringr   )r0   r   r   r1   rE   r   r   r   r   	TypeErrorru   )r   r!   Z
mi_columnsrH   r   r   r    test_write_column_multiindex  s   z&TestBasic.test_write_column_multiindexc                 C   sn   g dg dg}t jtjdd|d}ddg|j_|dkr*| ||t	d	 d S |d
kr5t
|| d S d S )Nr   )r)   r*   r)   r*   r)   r*   r)   r*   r*   r   r   r   r   r   r   zColumn namer   )r0   r1   rE   r   r   r   r   ry   r   r~   ru   r   r!   r   rH   r   r   r    &test_write_column_multiindex_nonstring)  s   z0TestBasic.test_write_column_multiindex_nonstringc                 C   sJ   |}g dg dg}t jtjdd|d}ddg|j_t|| d S )Nr   r   r*   r   r   Z	ColLevel1Z	ColLevel2)	r0   r1   rE   r   r   r   r   ry   ru   r   r%   r!   r   rH   r   r   r    #test_write_column_multiindex_string:  s   z-TestBasic.test_write_column_multiindex_stringc                 C   s>   |}g d}t jtjdd|d}d|j_t|| d S )N)rO   rP   r,   r   r*   r   r4   r   Z	StringCol)	r0   r1   rE   r   r   r   r   r   ru   r   r   r   r    test_write_column_index_stringK  s   z(TestBasic.test_write_column_index_stringc                 C   sV   g d}t jtjdd|d}d|j_|dkr$| ||t	d d S t
|| d S )Nr)   r*   r+   r4   r*   r   r   ZNonStringColr   r   )r0   r1   rE   r   r   r   r   r   r   r   ru   r   r   r   r    !test_write_column_index_nonstringY  s   z+TestBasic.test_write_column_index_nonstringc           
      C   s  t d}|dkrt jjdd}|| ttg ddtg ddtg dtg d	tg d
dtg ddtg ddd}t	 }|
|| t||d}t||dd}W d    n1 slw   Y  |d jtdks}J ttjg dddtjg dddtjg dddtjg d	ddtjg d
ddtjg dddtjg dddd}	|dkr|jddd}|	jddd}	t||	 d S )Nzpyarrow.parquetr   z.Fastparquet nullable dtype support is disabledr   r)   r*   r+   NZint64Zuint8)r>   r?   rJ   N)TFTNr   )      ?rS   rT   NZfloat32r7   )r>   r?   rJ   r@   rA   rB   gr   numpy_nullabler!   dtype_backendr>   Int64r8   UInt8rU   booleanZFloat32Float64rJ   r)   )Zaxis)r#   r   r   r   r   r   tabler   ri   rt   write_tabler   r9   rE   r0   r1   r   rj   )
r   r!   r   pqr   r  rp   Zresult1Zresult2ro   r   r   r    test_dtype_backendi  sH   


zTestBasic.test_dtype_backendr9   )	r  r  r	  objectzdatetime64[ns, UTC]rZ   z	period[D]r
  rU   c                 C   sT   t dt jg |di}d }|dkrt dt jg ddi}t||ddi|d d S )Nvaluer8   rZ   r
  r  r  r   )r0   r1   r   ru   )r   r%   r9   rH   ro   r   r   r    test_read_empty_array  s   
zTestBasic.test_read_empty_arrayN)r   r   r   r   r   r#   r   parametrizer   r   r   r   r   r   r   r   r   r   r   r   r  r  r   r   r   r    r     s,    
		 1r   c                   @   s  e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	e
jje dde
jje dde
jdeejgdd Zdd Ze
jjdd Ze
jjdd Ze
jje
jddgg gdd Zdd  Zd!d" Zd#d$ Zd%d& Ze
jjded'd( gd)d*gd+d,d- Zd.d/ Zd0d1 Z d2d3 Z!d4d5 Z"d6d7 Z#d8d9 Z$d:d; Z%d<d= Z&d>d? Z'd@dA Z(dBdC Z)dDdE Z*dFdG Z+dHdI Z,dJdK Z-e
jje.dLddMdN Z/dOdP Z0dQS )RTestParquetPyArrowc                 C   s@   |}t jdddd}|d }||d< g d|d< t|| d S )Nr;   r+   Europe/Brusselsr=   tzdatetime_tzTNTbool_with_none)r0   rG   r   ru   )r   r%   ra   rH   dtir   r   r    
test_basic  s   
zTestParquetPyArrow.test_basicc                 C   s<   |}t jdddd|d< t|||ddg dddgid	 d S )
Nr;   r+   r  r  r  rU   rY   r   r   )r0   rG   ru   )r   r%   ra   rH   r   r   r    test_basic_subset_columns  s   


z,TestParquetPyArrow.test_basic_subset_columnsc                 C   sL   |j |d}t|tsJ t|}t|}| }d |jd< t|| d S )Nr   rd   )	r   r   rW   r   r   r   rh   ri   rj   )r   r%   ra   Z	buf_bytesZ
buf_streamresro   r   r   r    *test_to_bytes_without_path_or_buf_provided  s   
z=TestParquetPyArrow.test_to_bytes_without_path_or_buf_providedc                 C   s8   t jtdddtdd }| ||td d S )N   r4   r+   aaar   zDuplicate column names found	r0   r1   rE   rF   ZreshaperC   r   r   r~   r   r%   rH   r   r   r    test_duplicate_columns  s   $z)TestParquetPyArrow.test_duplicate_columnsc                 C   s&   t dt jdddi}t|| d S )Nr>   1 dayr+   r<   )r0   r1   timedelta_rangeru   r"  r   r   r    test_timedelta     z!TestParquetPyArrow.test_timedeltac                 C   s&   t dg di}| ||tj d S )Nr>   r>   r)   rS   )r0   r1   r   r   ArrowExceptionr"  r   r   r    test_unsupported  s   z#TestParquetPyArrow.test_unsupportedc                 C   sH   t jddt jd}tj|dgd}tr| ||tj d S t	|| d S )Nr*   
   r8   fp16r   r   )
rE   rF   float16r0   r1   r	   r   r   r)  ru   )r   r%   r   rH   r   r   r    test_unsupported_float16  s
   z+TestParquetPyArrow.test_unsupported_float16zqPyArrow does not cleanup of partial files dumps when unsupported dtypes are passed to_parquet function in windowsr   zfloat16 works on 15	path_typec              	   C   s   t jddt jd}tj|dgd}t 2}||}ttj	 |j
||d W d    n1 s2w   Y  tj|r?J W d    d S 1 sJw   Y  d S )Nr*   r+  r8   r,  r-  )rp   r!   )rE   rF   r.  r0   r1   ri   rt   r   r   r)  r   osrp   isfile)r   r%   r0  r   rH   Zpath_strrp   r   r   r     test_unsupported_float16_cleanup  s   
"z3TestParquetPyArrow.test_unsupported_float16_cleanupc                 C   sd   t  }t td|d< t jg dt g dd|d< t jg dg dd	d
|d< t|| d S )NZabcdefr>   )rO   r,   r,   rO   NrO   rN   r8   r?   )r>   r?   rJ   r>   rJ   r?   )r?   rJ   r@   T)
categoriesZorderedrJ   )r0   r1   CategoricalrC   ZCategoricalDtyperu   r"  r   r   r    test_categorical  s   

z#TestParquetPyArrow.test_categoricalc                 C   s@   t d}|jdi |}d|i}t|||j d||d d S )Ns3fs
filesystem/pyarrow.parquetrp   rq   rr   r   )r#   r   ZS3FileSystemru   r   )r   r2   s3_public_bucketr%   s3sor7  Zs3kwr   r   r    test_s3_roundtrip_explicit_fs$  s   


z0TestParquetPyArrow.test_s3_roundtrip_explicit_fsc                 C   s(   d|i}t ||d|j d||d d S )Nstorage_optionss3://r9  r:  ru   r   )r   r2   r;  r%   r<  r   r   r    test_s3_roundtrip1  s   
z$TestParquetPyArrow.test_s3_roundtrippartition_colr.   c              
   C   sr   t d | }|r |t|tj}d}|| |||< t|||d|j	 dd|i|d |dddd	 d S )
Nr7  categoryr@  z/parquet_dirr?  )r   rc   r?  Tr)   )ro   rp   rq   rr   rf   rk   )
r#   r   r   r_   dictfromkeysrE   Zint32ru   r   )r   r2   r;  r%   rC  r<  Zexpected_dfZpartition_col_typer   r   r    test_s3_roundtrip_for_dir=  s*   

z,TestParquetPyArrow.test_s3_roundtrip_for_dirc                 C   s2   t d t }|| t|}t|| d S )Nr   )r#   r   r   r   r   ri   rj   )r   r2   bufferZdf_from_bufr   r   r    test_read_file_like_obj_supportd  s
   

z2TestParquetPyArrow.test_read_file_like_obj_supportc                 C   s   t d |dd |dd t jtdd td W d    n1 s'w   Y  t jtdd |d W d    d S 1 sDw   Y  d S )Nr   HOMEZTestingUserUSERPROFILEz.*TestingUser.*r{   z~/file.parquet)r#   r   Zsetenvr}   OSErrorr   r   )r   r2   Zmonkeypatchr   r   r    test_expand_userk  s   

"z#TestParquetPyArrow.test_expand_userc                 C   s>   ddg}|}|j ||d d t|| t|j|jksJ d S )Nr[   rY   r   rc   r   rz   r   shape)r   r   r%   ra   r   rH   r   r   r    test_partition_cols_supportedt  s
   
z0TestParquetPyArrow.test_partition_cols_supportedc                 C   s@   d}|g}|}|j ||d d t|| t|j|jksJ d S )Nr[   rN  rO  )r   r   r%   ra   r   partition_cols_listrH   r   r   r    test_partition_cols_string|  s   
z-TestParquetPyArrow.test_partition_cols_stringc                 C   s   | S r   r   )xr   r   r    <lambda>  s    zTestParquetPyArrow.<lambda>rU   zpathlib.Path)Zidsc           	      C   s<   d}|g}|}||}|j ||d t|j|jksJ d S )Nr/   )r   )r   r   rP  )	r   r   r%   r2   r0  r   rR  rH   rp   r   r   r    test_partition_cols_pathlib  s   z.TestParquetPyArrow.test_partition_cols_pathlibc                 C   s   t jg g d}t|| d S )N)r   r   r   r"  r   r   r    test_empty_dataframe  s   z'TestParquetPyArrow.test_empty_dataframec                 C   sV   dd l }tdddgi}||jd| dg}|t}t||d|i|d d S )Nr   rT  r)   )typerx   r   )	r   r0   r1   rx   fieldZbool_r_   r[   ru   )r   r%   r   rH   rx   Zout_dfr   r   r    test_write_with_schema  s
   
z)TestParquetPyArrow.test_write_with_schemac                 C   sz   t d ttjg dddtjg dddtjg dddd}t|| td	tjg d
ddi}t|| d S )Nr   r(   r  r8   ZUInt32rK   rU   r>   r?   rJ   r>   r  )r#   r   r0   r1   r   ru   r"  r   r   r     test_additional_extension_arrays  s   

z3TestParquetPyArrow.test_additional_extension_arraysc              	   C   st   t d tdtjg dddi}td| t|||d| dd	 W d    d S 1 s3w   Y  d S )
Nr   r>   rK   string[pyarrow]r8   string_storagezstring[]ro   )r#   r   r0   r1   r   r   ru   r_   )r   r%   r^  rH   r   r   r     test_pyarrow_backed_string_array  s
   
"z3TestParquetPyArrow.test_pyarrow_backed_string_arrayc                 C   sV   t d ttjg dtjddddtjtjddddd}t	|| d S )	Nr   ))r   r)   )r)   r*   )r+   r4   z
2012-01-01r+   D)r=   r   r4   )rJ   r@   rA   )
r#   r   r0   r1   ZIntervalIndexr   period_rangeZfrom_breaksrG   ru   r"  r   r   r    test_additional_extension_types  s   

z2TestParquetPyArrow.test_additional_extension_typesc                 C   s4   d}t dt jddddi}t||d|id d S )	Nz2.6r>   z
2017-01-01Z1nsr+  r   r=   versionr   )r0   r1   rG   ru   )r   r%   verrH   r   r   r    test_timestamp_nanoseconds  s   z-TestParquetPyArrow.test_timestamp_nanosecondsc                 C   sP   |j tjjkr|tjjdd d|g }tj	|d|id}t
||dd d S )Nzitemporary skip this test until it is properly resolved: https://github.com/pandas-dev/pandas/issues/37286r      index_as_colr   r   F)rg   )tzinfor\   timezoneutcr   r#   r   r   r0   r1   ru   )r   r   r%   rb   idxrH   r   r   r    test_timezone_aware_index  s   
z,TestParquetPyArrow.test_timezone_aware_indexc                 C   sz   t d tdttdi}t }|j||d t	||dgd}W d    n1 s.w   Y  t
|dks;J d S )Nr   r>   r+   r   r>   r   r   r   r)   )r#   r   r0   r1   rC   rD   ri   rt   r   r   r   )r   r%   rH   rp   r   r   r   r    test_filter_row_groups  s   

z)TestParquetPyArrow.test_filter_row_groupsc                 C   s   t jtjddg dd}t }|j||d t	||}W d    n1 s+w   Y  |r?t
|jt jjjs=J d S t
|jt jjjsJJ d S )Nr*   )r+  r+   )r.   r/   Cr   r   )r0   r1   rE   r   r   r   ri   rt   r   r   r   Z_mgrcoreZ	internalsZArrayManagerZBlockManager)r   r%   Zusing_array_managerrH   rp   r   r   r   r    test_read_parquet_manager  s   
z,TestParquetPyArrow.test_read_parquet_managerc                 C   s   dd l }|}tjdddd}|d }||d< g d|d< |j|}|jtjd	}trO|d
 	d|d
< |d 	d|d< |d 	t|j
ddd|d< t||ddi|d d S )Nr   r;   r+   r  r  r  r  r  )Ztypes_mapperr\   ztimestamp[us][pyarrow]r]   us)unitr  r  r   r!   rq   ro   )r   r0   rG   r   TableZfrom_pandasZ	to_pandasZ
ArrowDtyper   r_   	timestampru   )r   r%   ra   r   rH   r  Zpa_tablero   r   r   r    &test_read_dtype_backend_pyarrow_config  s,   

z9TestParquetPyArrow.test_read_dtype_backend_pyarrow_configc                 C   sn   t jdddgit jddgdddd	}| }d
d l}t|jtdkr+|jd|_t	||ddi|d d S )Nr>   r)   r*   r+   r4   testr   zint64[pyarrow])r   r9   r   z11.0.0r  r   ry  )
r0   r1   Indexr   r   r
   r   r   r_   ru   )r   r%   rH   ro   r   r   r   r    ,test_read_dtype_backend_pyarrow_config_index  s   
z?TestParquetPyArrow.test_read_dtype_backend_pyarrow_config_indexc                 C   s   t tdttddd}ddg|_t|| ddg|_tjtdd	 t|| W d    n1 s5w   Y  t		d
ddddt		d
ddddg|_t|| d S )Nr3   r)   r4   r   r   rL   rM   z|S3r{     )
r0   r1   rC   rD   r   ru   r#   r}   NotImplementedErrorr\   r"  r   r   r    test_columns_dtypes_not_invalid*  s   


z2TestParquetPyArrow.test_columns_dtypes_not_invalidc                 C   s(   t jt jg dddd}t|| d S )Nr[  custom namer~  r   r0   r1   r  ru   r"  r   r   r    test_empty_columns>  s   z%TestParquetPyArrow.test_empty_columnsc                 C   sR   |d }t jddgid}ddi|_|j||d t||d}|j|jks'J d S )Nztest_df_metadata.pr)   r   Ztest_attributer   )r0   r1   attrsr   r   )r   r   r%   rp   rH   Znew_dfr   r   r    test_df_attrs_persistenceC  s   
z,TestParquetPyArrow.test_df_attrs_persistencec                 C   s   |d }t jdddgiddgd}|j|dd t d	d
 t|dd}W d    n1 s/w   Y  t jdddgidt jddgddd}t|| d S )Nztest_string_inference.pr>   rT  yr?   )r   r   r   r   future.infer_stringTstring[pyarrow_numpy]r8   )r   r9   r   )r0   r1   r   r   r   r  ri   rj   r   r   r%   rp   rH   r   ro   r   r   r    test_string_inferenceK  s   
z(TestParquetPyArrow.test_string_inferenceznot supported before 11.0c                 C   st   dd l }|d }tjdtdgidd}|j||d|dfgd t|}tjdd	gid
d}t	|| d S )Nr   z	decimal.pr>   z123.00r]  r8   ri  )rx   Z123zstring[python])
r   r0   r1   r   r   rx   Z
decimal128r   ri   rj   r  r   r   r    test_roundtrip_decimalY  s    z)TestParquetPyArrow.test_roundtrip_decimalc                 C   s   dd l }dd lm} |d }|d|g d| i}||| tdd t	|}W d    n1 s8w   Y  tj
dg didtjdgddd	}t|| d S )
Nr   zlarge_string.pr>   )Nr?   rJ   r  Tr  r8   )r   r9   r   )r   Zpyarrow.parquetr   r  r   Zlarge_stringr  r0   r   r   r1   r  ri   rj   )r   r   r%   r  rp   r  r   ro   r   r   r    #test_infer_string_large_string_typee  s   

z6TestParquetPyArrow.test_infer_string_large_string_typeN)1r   r   r   r  r  r  r#  r&  r*  r/  r#   r   r   r   skipifr	   r  strpathlibPathr3  r6  r   r>  rB  rG  rI  rM  rQ  rS  rV  rW  rZ  r\  ra  rd  rh  rp  rs  rv  r|  r  r  r  r  r  r   r  r  r   r   r   r    r    sl    


		
	
r  c                   @   s   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	e
jjdd Zdd Zdd Zdd Zdd Ze
jje dddd Ze
jje dddd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Ze
jje ddd-d. Zd/S )0TestParquetFastParquetc                 C   sF   |}t jdddd}|d }||d< t jddd|d< t|| d S )	Nr;   r+   z
US/Easternr  r  r$  r<   	timedelta)r0   rG   r   r%  ru   )r   r'   ra   rH   r  r   r   r    r    s   
z!TestParquetFastParquet.test_basicc                 C   s   t tdttddd}t}d}ddg|_| |||| ddg|_| |||| td	ddddtd	ddddg|_| |||| d S )
Nr3   r)   r4   r   r   r   rL   rM   r  )r0   r1   rC   rD   r   r   r   r\   )r   r'   rH   errr   r   r   r    test_columns_dtypes_invalid  s   

z2TestParquetFastParquet.test_columns_dtypes_invalidc                 C   s<   t jtdddtdd }d}| ||t| d S )Nr  r4   r+   r   r   z9Cannot create parquet dataset with duplicate column namesr!  r   r'   rH   r   r   r   r    r#    s   $z-TestParquetFastParquet.test_duplicate_columnsc                 C   s@   t dg di}t jddtjdgidd}t|||dd d S )	Nr>   )TNFr  g        r.  r8   F)ro   rg   )r0   r1   rE   r^   ru   r   r'   rH   ro   r   r   r    test_bool_with_none  s   z*TestParquetFastParquet.test_bool_with_nonec                 C   sT   t dt jddddi}| ||td  t dg di}d}| ||t| d S )Nr>   Z2013Mr+   re  r(  z"Can't infer object conversion type)r0   r1   rc  r   r~   r  r   r   r    r*    s
   z'TestParquetFastParquet.test_unsupportedc                 C   s&   t dt tdi}t|| d S )Nr>   r3   )r0   r1   r5  rC   ru   )r   r'   rH   r   r   r    r6    r'  z'TestParquetFastParquet.test_categoricalc                 C   sx   dt tdi}t|}t }|j||d dd t||dgd}W d    n1 s-w   Y  t|dks:J d S )Nr>   r+   r)   )r!   rc   Zrow_group_offsetsrq  rr  )	rC   rD   r0   r1   ri   rt   r   r   r   )r   r'   r@   rH   rp   r   r   r   r    rs    s   

z-TestParquetFastParquet.test_filter_row_groupsc                 C   s*   t ||d|j dd|id |dd d S )Nr@  z/fastparquet.parquetr?  )rc   r?  r:  rA  )r   r2   r;  r'   r<  r   r   r    rB    s   
z(TestParquetFastParquet.test_s3_roundtripc                 C   s\   ddg}|}|j |d|d d tj|sJ dd l}|t|dj}t|dks,J d S )Nr[   rY   r   r!   r   rc   r   Fr*   	r   r1  rp   existsr   ZParquetFiler  Zcatsr   r   r   r'   ra   r   rH   r   Zactual_partition_colsr   r   r    rQ       z4TestParquetFastParquet.test_partition_cols_supportedc                 C   sX   d}|}|j |d|d d tj|sJ dd l}|t|dj}t|dks*J d S )Nr[   r   r  r   Fr)   r  r  r   r   r    rS    s   z1TestParquetFastParquet.test_partition_cols_stringc                 C   s\   ddg}|}|j |dd |d tj|sJ dd l}|t|dj}t|dks,J d S )Nr[   rY   r   )r!   rc   partition_onr   Fr*   r  r  r   r   r    test_partition_on_supported  r  z2TestParquetFastParquet.test_partition_on_supportedc                 C   sX   ddg}|}d}t jt|d |j|dd ||d W d    d S 1 s%w   Y  d S )Nr[   rY   zYCannot use both partition_on and partition_cols. Use partition_cols for partitioning datar{   r   )r!   rc   r  r   )r#   r}   r~   r   )r   r   r'   ra   r   rH   r   r   r   r    3test_error_on_using_partition_cols_and_partition_on  s   "zJTestParquetFastParquet.test_error_on_using_partition_cols_and_partition_onzfastparquet writes into Indexr   c                 C   s"   t  }| }t|||d d S )Nr`  )r0   r1   r   ru   r  r   r   r    rW    s   z+TestParquetFastParquet.test_empty_dataframec                 C   s>   d|g }t j|d|id}| }d|j_t|||d d S )Nri  rj  rk  r   r`  )r0   r1   r   r   r   ru   )r   r'   rb   ro  rH   ro   r   r   r    rp    s
   
z0TestParquetFastParquet.test_timezone_aware_indexc              
   C   s   t dddgi}t g}|| tjtdd$ tt	 t
|ddd W d    n1 s2w   Y  W d    n1 sAw   Y  tjtdd t
|dd	d
 W d    n1 s_w   Y  W d    d S W d    d S 1 sww   Y  d S )Nr>   r)   r*   z!not supported for the fastparquetr{   r   T)r!   Zuse_nullable_dtypesr   r  )r0   r1   ri   rt   r   r#   r}   r~   r   r   r   )r   r'   rH   rp   r   r   r    &test_use_nullable_dtypes_not_supported'  s   

"z=TestParquetFastParquet.test_use_nullable_dtypes_not_supportedc              	   C   s   t d7}t|d tjtdd t|dd W d    n1 s&w   Y  t|j	dd W d    d S 1 s?w   Y  d S )	Ntest.parquets   breakit r{   r   r   F)
missing_ok)
ri   rt   r  r  write_bytesr#   r}   	Exceptionr   unlink)r   rp   r   r   r    $test_close_file_handle_on_read_error2  s   "z;TestParquetFastParquet.test_close_file_handle_on_read_errorc              	   C   s   t jddgddgdd}td*}t| d}|| W d    n1 s*w   Y  t||d}W d    n1 s?w   Y  t|| d S )Nr   r)   r-   r  r  wbr   )	r0   r1   ri   rt   r   encoder   r   rj   )r   r!   rH   rp   rB   r   r   r   r    test_bytes_file_name:  s   z+TestParquetFastParquet.test_bytes_file_namec              	   C     t d tjddgddgdd}t '}t jtdd |j|ddd	 W d    n1 s1w   Y  W d    n1 s@w   Y  t 7}t	
|d
 t jtdd t|ddd	 W d    n1 skw   Y  W d    d S W d    d S 1 sw   Y  d S )Nr   r   r)   r-   r  zfilesystem is not implementedr{   r,   r!   r8  rL   )r#   r   r0   r1   ri   rt   r}   r  r   r  r  r  r   r   rH   rp   r   r   r    test_filesystem_notimplementedD  (   


"z5TestParquetFastParquet.test_filesystem_notimplementedc              	   C   r  )Nr   r   r)   r-   r  z1filesystem must be a pyarrow or fsspec FileSystemr{   r,   r  rL   )r#   r   r0   r1   ri   rt   r}   r~   r   r  r  r  r   r  r   r   r    test_invalid_filesystemT  r  z.TestParquetFastParquet.test_invalid_filesystemc              	   C   s(  t d}tjddgddgdd}t ,}t jtdd |j|d|	 d	d
id W d    n1 s6w   Y  W d    n1 sEw   Y  t <}t
|d t jtdd t|d|	 d	d
id W d    n1 suw   Y  W d    d S W d    d S 1 sw   Y  d S )Nz
pyarrow.fsr   r)   r-   r  z8storage_options not supported with a pyarrow FileSystem.r{   r   r,   rO   )r!   r8  r?  rL   )r#   r   r0   r1   ri   rt   r}   r  r   ZLocalFileSystemr  r  r  r   )r   Zpa_fsrH   rp   r   r   r    .test_unsupported_pa_filesystem_storage_optionsd  s@   


"zETestParquetFastParquet.test_unsupported_pa_filesystem_storage_optionsc              	   C   s   d}t dttddi}td3}|| tjt	|d t
|dd W d    n1 s1w   Y  W d    d S W d    d S 1 sIw   Y  d S )	NzPdtype_backend numpy is invalid, only 'numpy_nullable' and 'pyarrow' are allowed.rY   r)   r4   ztmp.parquetr{   numpy)r  )r0   r1   rC   rD   ri   rt   r   r#   r}   r~   r   )r   r!   r   rH   rp   r   r   r    test_invalid_dtype_backend  s   
"z1TestParquetFastParquet.test_invalid_dtype_backendc                 C   sF   t jt jg dddd}t jt jg dddd}t|||d d S )Nr[  r  r~  r   r`  r  r  r   r   r    r    s   z)TestParquetFastParquet.test_empty_columnsN)r   r   r   r  r  r#  r  r*  r6  rs  r#   r   r   rB  rQ  rS  r  r  r  r   rW  rp  r  r  r  r  r  r  r  r  r   r   r   r    r    s6    	



	
r  )	NNNNNTFTr*   )I__doc__r\   decimalr   r   r   r1  r  r  rE   r#   Zpandas._configr   Zpandas._config.configr   Zpandas.compatr   Zpandas.compat.pyarrowr   r   r	   Zpandasr0   Zpandas._testingZ_testingri   Zpandas.util.versionr
   Zpandas.io.parquetr   r   r   r   r   r   r"   r   r   r&   r   filterwarningsZ
pytestmarkZfixturer   r  r!   r%   r'   r2   rI   ra   nowrm  rn  minmaxstrptimerb   ru   rz   r   r   r   r   r   r   r   r   r   r   r   r  r  r   r   r   r    <module>   s    
	







B+  /   L