o
    Zh                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dl	Z
d dlmZ d dl	mZ d dlmZ d dlmZ d dlmZ d dlmZ zd dlmZ d dlmZmZmZ W n eyc   dZY nw zd dlZ d dl!m"Z# W n ey}   d Z Z#Y nw ej$jej$j%gZ&dd	 Z'ej$jd
d Z(ej$jdd Z)ej$jdd Z*ej$jdd Z+ej$j,e-e.fddej$jdd Z/ej$jdd Z0ej$jdd Z1ej$jdd Z2ej$jdd Z3ej$jdd Z4ej$jej$5d d!gd!gge6d"d#k e6d$d%d#k e6d$d&7e
8 d#k fej$5d'd(d)d* Z9ej$jd+d, Z:ej$j;d-d. Z<ej$j;d/d0 Z=ej$jd1d2 Z>ej$jej$j;d3d4 Z?ej$jej$j;d5d6 Z@d7d8 ZAd9d: ZBd;d< ZCej$jd=d> ZDej$jd?d@ ZEej$jdAdB ZFej$jdCdD ZGej$jdEdF ZHej$jdGdH ZIddKdLZJdMdN ZKej$jej$5dOdPdQgdRdS ZLej$jdTdU ZMej$jdVdW ZNej$jej$5dOdPdQgdXdY ZOdZd[ ZPd\d] ZQ			dd^d_ZR	dd`daZSej$jdbdc ZTej$jddde ZUej$jdfdg ZVej$jdhdi ZWej$jdjdk ZXej$jej$j;dldm ZYej$jej$j;dndo ZZej$jej$j;dpdq Z[ej$jdrds Z\ddudvZ]ej$jdwdx Z^ej$jdydz Z_d{d| Z`d}d~ Zadd Zbdd Zcdd Zddd Zedd Zfej$5dddd Zgej$jdd ZhdS )    N)fs)LocalFileSystem)util)guid)Version)_read_table_test_dataframe_write_tablec                 C   s   t dg di}| d }|  |d }t|t| tj|t d}|	|s-J tjdt
| d}|	|s>J d S )Na         data_dirdata.parquet
filesystemzdata_dir/data.parquet)patablemkdirpqwrite_tablestr
read_tabler   r   equalsr   Z_filesystem_uri)tempdirr   	directorypathresult r   Y/var/www/html/lang_env/lib/python3.10/site-packages/pyarrow/tests/parquet/test_dataset.pytest_filesystem_uri8   s   
r!   c                 C   s   t  }t||  d S N)r   _get_instance_partition_test_for_filesystem)r   r   r   r   r    test_read_partitioned_directoryK   s   r%   c                 C   sB   t  }| }t|| t|}|jdgd}|jdgksJ d S )Nvaluescolumns)r   r#   r$   r   ParquetDatasetreadcolumn_names)r   r   	base_pathdatasetr   r   r   r    'test_read_partitioned_columns_selectionQ   s   

r.   c                 C   s  t  }| }ddg}g d}ddg}d|gd|gd|gg}tjtj|d	d
dtttj|td
ddtttj|dd
dddg dd}t	|||| t
j||g dd}| }	|	 jdd}
d|
d jvsrJ d|
d jvs{J d|
d jvsJ g dddgg}t
j|||d}| }	|	 jdd}
|
d dk|
d dk@ |
d dk@ }t|
d dk|
d dk@ }| dksJ | dksJ |
jd | |  ksJ dggdggfD ]}t
j|||d}| jdksJ qd S )Nr   r   r
   bcTFintegerstringbooleani4Zdtype      r   boolr   r2   r3   r4   r'   ))r2   =r   )r3   !=r0   )r4   ==Truer   filtersdropr0   )r2   r;   r   )r4   r=   Falser>   rC   )r3   r=   s   1 a)r3   r=   z1 a)r   r#   pd	DataFramenparrayrepeattileobject_generate_partition_directoriesr   r)   r*   	to_pandasreset_indexr&   sumshapenum_rows)r   r   r,   integer_keysstring_keysboolean_keyspartition_specdfr-   r   	result_dfr@   Zdf_filter_1Zdf_filter_2r   r   r    test_filters_equivalency^   sn   



rW   c                 C      t  }| }g d}d|gg}d}tjt|tj|dddddgd}t|||| tj	||d	d
gd}|
 }| jddjdd}	dd tt|	d jD }
|
ddgks[J d S )Nr   r   r   r      integersr8   r5   r6   indexr[   r]   r'   )r[   <rZ   )r[   >r   r?   ZbyTrA   c                 S   s   g | ]}|qS r   r   .0xr   r   r    
<listcomp>   s    z9test_filters_cutoff_exclusive_integer.<locals>.<listcomp>r   r   r   r#   rD   rE   rF   arangerG   rK   r   r)   r*   rL   sort_valuesrM   mapintr&   r   r   r,   rQ   rT   NrU   r-   r   rV   Zresult_listr   r   r    %test_filters_cutoff_exclusive_integer   6   rl   z5Loss of type information in creation of categoricals.)raisesreasonc              	   C   s  t  }| }tdddtdddtdddtdddtdddg}d|gg}d	}tjt|tj|d
ddddgd}t	|||| t
j||ddgd}| }| jddjdd}	tjtjtdddgd
dtj|d
dd}
|	d j|
ksJ d S )Ni  rZ   	   
            datesr8   Z
datetime64r6   )r]   ru   r]   r'   )ru   r^   z
2018-04-12)ru   r_   z
2018-04-10r?   r`   TrA   
categories)r   r#   datetimedaterD   rE   rF   rf   rG   rK   r   r)   r*   rL   rg   rM   Categoricalr&   )r   r   r,   Z	date_keysrT   rk   rU   r-   r   rV   expectedr   r   r    &test_filters_cutoff_exclusive_datetime   sF   r|   c              	   C   sp   | d }t t jddddtddj|dd tj|d	d
tdddfgd}|d	 g dks6J d S )Nztimestamps.parquetz
2020-01-01rq   D)Zperiodsfreq)ru   idT)Zuse_deprecated_int96_timestampsru   <=i  r   r8   r@   r   rY   )
rD   rE   Z
date_rangerangeZ
to_parquetr   r   rx   column	to_pylist)r   r   r   r   r   r    test_filters_inclusive_datetime   s   r   c                 C   rX   )NrY   r[   r8   r5   r6   r\   r]   r'   )r[   r   r   )r[   z>=r   r?   r`   TrA   c                 S   s   g | ]}t |qS r   )ri   ra   r   r   r    rd   '      z2test_filters_inclusive_integer.<locals>.<listcomp>r   r   re   rj   r   r   r    test_filters_inclusive_integer	  rm   r   c                 C   s~  t  }| }ddg}g d}ddg}d|gd|gd|gg}tjtj|d	d
dtttj|td
ddtttj|dd
dddg dd}t	|||| t
j||dgd}| }	|	 jdd}
d|
d jv sqJ d|
d jv szJ d|
d jvsJ t
j||dddgfddddhfgd}| }	|	 jdd}
d|
d jvsJ d|
d jvsJ d|
d jvsJ d S )Nr   r   r/   TFr2   r3   r4   r5   r6   r7   r8   r   r9   r   r:   r'   )r3   inabr?   rA   r
   r0   r1   r   )r3   r   r
   r0   znot inrC   )r   r#   rD   rE   rF   rG   rH   rI   rJ   rK   r   r)   r*   rL   rM   r&   )r   r   r,   rQ   rR   rS   rT   rU   r-   r   rV   r   r   r    test_filters_inclusive_set+  sN   
r   c                 C   sX  t  }| }g d}d|gg}d}tjt|tj|dddddgd}t|||| t	t
 tj||d	gd
 W d    n1 sEw   Y  t	t tj||dgd
 W d    n1 scw   Y  tj||ddt fgd
}| jdks~J tj||dddhfgd
}t	t | jdksJ W d    d S 1 sw   Y  d S )NrY   r[   r8   r5   r6   r\   r]   r'   )r[   r   r   r?   )r[   z=<r   r   r   r<   r   )r   r#   rD   rE   rF   rf   rG   rK   pytestrn   	TypeErrorr   r)   
ValueErrorsetr*   rP   NotImplementedError)r   r   r,   rQ   rT   rk   rU   r-   r   r   r    test_filters_invalid_pred_opZ  sJ   "r   c                 C   s   t  }| }g d}d|gg}d}tjt|tj|dddddgd}t|||| d	}tj	t
|d
 tj||dgd  W d    d S 1 sLw   Y  d S )NrY   r[   r8   r5   r6   r\   r]   r'   z1No match for FieldRef.Name\(non_existent_column\)match)Znon_existent_columnr^   r   r?   )r   r#   rD   rE   rF   rf   rG   rK   r   rn   r   r   r)   r*   )r   r   r,   rQ   rT   rk   rU   msgr   r   r    test_filters_invalid_column  s&   
"r   r@   )r[   r^   r   r[   r   nestedr
   r0   read_method)r   read_pandasc              	   C   s   t t|}t }| }g d}d|gg}t|}tt|tj	|ddt	dd t
|D d}	t||||	 t||d}
||fi |
}|jd	ksOJ d S )
NrY   r[   r5   r6   c                 S   s   g | ]	}|t |d qS )r   )r   rb   ir   r   r    rd         z+test_filters_read_table.<locals>.<listcomp>)r]   r[   r   r?   r   )getattrr   r   r#   lenrD   rE   rF   rf   rG   r   rK   dictrP   )r   r@   r   r*   r   r,   rQ   rT   rk   rU   kwargsr   r   r   r    test_filters_read_table  s    
	r   c           	      C   s   t  }| }ddg}d|gg}d}tjt|tj|dddddgd	}t|||| t	|}|
 }|d |ks@J d S )
NZ2019_2Z2019_3	year_weekr   rJ   r6   )r]   r   r]   r'   )r   r#   rD   rE   rF   rf   rG   rK   r   r)   r*   r   r   )	r   r   r,   rR   rT   rk   rU   r-   r   r   r   r    $test_partition_keys_with_underscores  s    
r   c                 C   sN   | \}}|d }t dg di}t|||d t||d}||s%J d S Nz/test.parquetr
   r   r   r   r   r	   r   r   )s3_example_s3fsr   r   r   r   r   r   r    test_read_s3fs     r   c                 C   sN   | \}}|d }t dg di}t|||d t||d}||s%J d S r   r   )r   r   r   r   r   r   r   r   r    test_read_directory_s3fs  r   r   c                 C   sJ   t | d }tdg di}t|| t|g }||s#J d S )Nr   r
   r   )r   r   r   r	   r   r)   r*   r   )r   Z	data_pathr   r   r   r   r    test_read_single_file_list  s
   
r   c                 C   s   dd l }ddlm} t|jtdkrtd | \}}tt ||}W d    n1 s0w   Y  t	|| t
j||d}|  d S )Nr   )S3FSWrapperz0.5z+S3FSWrapper no longer working for s3fs 0.5+r   )s3fspyarrow.filesystemr   r   __version__r   skipZwarnsFutureWarningr$   r   r)   r*   )r   r   r   r   r   wrapperr-   r   r   r    ,test_read_partitioned_directory_s3fs_wrapper  s   


r   c                 C   s   | \}}t || d S r"   )r$   r   r   r   r   r   r    $test_read_partitioned_directory_s3fs
  s   r   c                 C   s  ddg}g d}d|gd|gg}d}t jt|tj|ddd	tttj|tdd
dtj	|dg dd}t
| ||| tj|| d}| }| jddjdd}	|jddjddj|	jd}
|
d d|
d< |
d d|
d< |	jg dk sJ t|	|
 d S )Nr   r   r/   foobar   r5   r6   r7   r8   r   )r]   r   r   r&   r'   r   r]   r`   TrA   category)r]   r&   r   r   )rD   rE   rF   rf   rG   rH   rI   rJ   randomrandnrK   r   r)   r*   rL   rg   rM   reindexr(   astypealltmassert_frame_equal)r   r,   Zfoo_keysZbar_keysrT   rk   rU   r-   r   rV   Zexpected_dfr   r   r    r$     s>   

r$   c                    sB   t  tdtdd fdd|dg  d S )Npathsepsep/c              	      sH  | \}}|D ]}|||fg } t| d||g}| | d kr| |t g}t|}	tj|	}
	|d}t
|
| W d    n1 sRw   Y  |s^J  |dg}	|d}W d    n1 svw   Y  q||d |  |dg}	|d}W d    n1 sw   Y  qd S )Nz{}={}r   wbZ_SUCCESS)joinr   formatr   r   _filter_partitionr   Tablefrom_pandasopenr	   exists)base_dirlevel	part_keysnamer&   valueZthis_part_keysZ	level_dir	file_pathZfiltered_dfZ
part_tablefZfile_successZDEPTH_visit_levelrU   r   rT   r   r   r    r   @  s8   


z5_generate_partition_directories.<locals>._visit_levelr   )r   r   )r   r   rT   rU   r   r   r    rK   8  s   rK   c                 C   sl   t jt| td}g }|D ]\}}|| t|tjtjfr$t	|}|| | |kM }q| | j
|ddS )Nr6   r   )Zaxis)rF   Zonesr   r9   append
isinstancerx   ry   rD   	TimestamprB   )rU   r   	predicateZto_dropr   r   r   r   r    r   `  s   

r   c                 C   s   | d }|   tjtdg di}t||d  | d }|   tjtdg di}t||d  tj| dggd}|	d
tg dgsSJ d S )	NzA=0Br   r   zA=1r/   )Ar=   r   r   )r   r   r   r   rD   rE   r   r   r   r   r   Zchunked_array)r   Zdir1Ztable1Zdir2Ztable2r   r   r   r    "test_filter_before_validate_schemap  s   $r   c                    sx  d}d}| t   }|  g }g }t|D ].}t||d}|d tj|d< |d| }tj	
|}	t|	| ||	 || q|d   ddd	}
|
| t|} |s_J d
dd jd g} fdd|D }tj||d}tj	j fdd|D | jjd}||sJ tj|dd t||djd d d df }| dt   }tj	
|}t|| d S )Nrq   r8   seedZuint32
{}.parquetz_SUCCESS.crcTc                 [   s    t j| fi |}|j||dS )N)r(   use_threads)r   r)   r*   )pathsr(   r   r   r-   r   r   r    read_multiple_files  s   z5test_read_multiple_files.<locals>.read_multiple_filesr   r      r   c                    s   g | ]}  |jqS r   )fieldr   r   r   r   r    rd         z,test_read_multiple_files.<locals>.<listcomp>r'   c                    s   g | ]}  |qS r   )r   r   r   r   r    rd         )namesmetadata)r   rZ   )NT)r   r   r   r   r   rF   int64r   r   r   r   r	   r   touchZconcat_tablesr   num_columnsr   r   Zfrom_arraysschemar   Ziloc)r   nfilessizedirpath	test_datar   r   rU   r   r   r   r{   Zto_readZ	col_namesoutZ	bad_appleZbad_apple_pathtr   r   r    test_read_multiple_files  s@   




r   c                    s(  d}d}| t   }|  g }g }g }t|D ]:}t||d}t|| |d | |_d|j_|d| }	t	j
|}
t|
|	 ||
 || ||	 qt|}ddg |j d }t fd	d
|D }t|| |jt d }|j|jksJ t|j|jd| d S )Nr8   r   r   r]   r   uint8stringsr'   c                    s   g | ]}|  qS r   r   ra   r'   r   r    rd     r   z,test_dataset_read_pandas.<locals>.<listcomp>)r   r   r   r   rF   rf   r]   r   r   r   r   r   r	   r   r   r)   r   rL   rD   concatr   r   r   rO   r   r(   )r   r   r   r   r   framesr   r   rU   r   r   r-   r   r{   r   r'   r    test_dataset_read_pandas  s2   




r   c                 C   sj   | t   }|  tddd}|dd }tj|}t||dd tj	|dd}|
 |s3J d S )	Nrq   r   r   r   2.6versionT)Z
memory_map)r   r   r   r   r   r   r   r	   r   r)   r*   r   )r   r   rU   r   r   r-   r   r   r    test_dataset_memory_map  s   
r   c                 C   s   | t   }|  tddd}|dd }tj|}t||dd t	t
 tj|dd W d    n1 s:w   Y  d	D ]}tj||d}| |sSJ qAd S )
Nrq   r   r   r   r   r   i)buffer_size)   i   )r   r   r   r   r   r   r   r	   r   rn   r   r   r)   r*   r   )r   r   rU   r   r   r   r-   r   r   r    #test_dataset_enable_buffered_stream  s"   
r   c                 C   s   | t   }|  tddd}|dd }tj|}t||dd dD ] }tj	||d}|
 |s7J tj||d}||sEJ q%d S )	Nrq   r   r   r   r   r   )TF)
pre_buffer)r   r   r   r   r   r   r   r	   r   r)   r*   r   r   )r   r   rU   r   r   r   r-   actualr   r   r    test_dataset_enable_pre_buffer  s   
r   rq   r8   c                 C   sN   g }g }t |D ]}t||d}| d| }|t|| || q|S )Nr   r   )r   r   r   r   r	   )r,   r   
file_nrowsr   r   r   rU   r   r   r   r    _make_example_multifile_dataset*  s   r  c                 C   s(   dd |D }t |t | jksJ d S )Nc                 S   s   g | ]}t | qS r   )r   as_posix)rb   r   r   r   r    rd   7  r   z)_assert_dataset_paths.<locals>.<listcomp>)r   files)r-   r   r   r   r    _assert_dataset_paths6  s   r  
dir_prefix_.c                 C   sJ   | t   }|  t|ddd}|d|   t|}t|| d S )Nrq   r8   r   r  z	{}staging)r   r   r  r   r   r)   r  r   r  r   r   r-   r   r   r    test_ignore_private_directories;  s   

r  c                 C      | t   }|  t|ddd}|d d}|d W d    n1 s'w   Y  |d d}|d W d    n1 sCw   Y  t|}t|| d S )Nrq   r8   r	  z	.DS_Storer   s	   gibberishz.privater   r   r  r   writer   r)   r  r   r   r   r   r-   r   r   r    test_ignore_hidden_files_dotL     

r  c                 C   r  )Nrq   r8   r	  Z_committed_123r   s   abcdZ_started_321r  r  r   r   r    #test_ignore_hidden_files_underscore_  r  r  c                 C   sZ   | d | t  }|jdd t|ddd}t|}t|| t|}t|| d S )Nz{0}dataTparentsrq   r8   r	  )r   r   r   r  r   r)   r  r
  r   r   r    /test_ignore_no_private_directories_in_base_pathr  s   


r  c                 C   s   dgd dgd  }t jt tt|t | gddgd}tj|t| dgd | d }|	  tj|t|dgd tj
| d	gd
}||sNJ d S )NZxxxr   Zyyyr]   Z_partr   partition_colsZ_private_duplicateZ_private)Zignore_prefixes)r   r   rG   r   r   dictionary_encoder   write_to_datasetr   r   r   r   )r   partr   Zprivate_duplicater*   r   r   r    test_ignore_custom_prefixes  s"   r  c                 C   sB   | d }|   t|}| }|jdksJ |jdksJ d S )Nr-   r   )r   r   r)   r*   rP   r   )r   Z	empty_dirr-   r   r   r   r    test_empty_directory  s   
r  c                 C   s  dd l }dd lm} dd lm} |tdtdttdtj	gd tj
ddddd	d
}|j }ddg}	tjj||ddd}
|j|
| |	|d tjt| d}|d urw||d}||
j| W d    n1 sqw   Y  n||
j| |j| |d}t|jj}|t|
jjksJ | }| }|j }|	|dt|	 d  ksJ || }|	D ]}|| d||< q|r|dj ! }|d ||d< |"|| d S )Nr   
aaabbbbccc
eefeffgeeerq   
2017-01-01
2017-01-11datetime64[D]r6   datetime64[ns])group1group2numnanry   r$  r%  F)r   safeZpreserve_indexr   Z_common_metadatar   r   ry   )#pandaspandas.testingtestingpyarrow.parquetparquetrE   listr   rF   r'  rf   r   r(   tolistr   r   r   r  osr   r   r   r   write_metadatar   r)   r   r   r*   rL   r   r   typeZto_pandas_dtyper   )r,   r   r   
index_namerD   r   r   	output_dfcolsZpartition_byoutput_tablemetadata_pathr   r-   Zdataset_colsinput_tableinput_dfZinput_df_colscolZexpected_date_typer   r   r    &_test_write_to_dataset_with_partitions  sV   




r<  c              
   C   s   dd l }dd lm} |tdtdttdtjddddd	d
}|j	
 }tj|}|d u r8t }d}t|D ]
}|j|| |d q>dd |t| D }	t|	|ks]J |j| |d }
|
 }| }|| }t|| d S )Nr   r  r  rq   r   r!  r"  r6   r#  )r$  r%  r&  ry   r8   r   c                 S   s   g | ]	}| d r|qS )z.parquet)endswith)rb   filer   r   r    rd     s    
z8_test_write_to_dataset_no_partitions.<locals>.<listcomp>)r*  r-  r.  rE   r/  r   rF   rf   r   r(   r0  r   r   r   r   r#   r  Zlsr   r   r)   r*   rL   Zdrop_duplicatesr   r   )r,   r   rD   r   r5  r6  r7  nr   Zoutput_filesr9  r:  r   r   r    $_test_write_to_dataset_no_partitions  s<   

r@  c                 C      t t|  d S r"   r<  r   r   r   r   r    %test_write_to_dataset_with_partitions
     rD  c                 C   sr   t t jdt  dt jdt  dt jdt  dt jdt  dt jdt jdddg}tt| |d	 d S )
Nr$  )r3  r%  r&  r'  ry   us)unitr   )	r   r   r   r3   r   int32	timestampr<  r   )r   r   r   r   r    0test_write_to_dataset_with_partitions_and_schema  s   
rK  c                 C   s   t t| dd d S )Nr4  )r4  rB  rC  r   r   r    4test_write_to_dataset_with_partitions_and_index_name  s   
rL  c                 C   rA  r"   )r@  r   rC  r   r   r    #test_write_to_dataset_no_partitions   rE  rM  c                 C   s   t | d  t| d  d S )Ntest1test2)r<  r@  rC  r   r   r    test_write_to_dataset_pathlib%  s   rP  c                 C   s   |\}}t jtdd t| d |d W d    n1 sw   Y  t jtdd t| d |d W d    d S 1 s>w   Y  d S )Nz"path-like objects are only allowedr   rN  r   rO  )r   rn   r   r<  r@  )r   r   r   r  r   r   r    &test_write_to_dataset_pathlib_nonlocal+  s   "rQ  c                 C      | \}}t ||d d S Nr   )r<  r   r   r   r    *test_write_to_dataset_with_partitions_s3fs:     
rT  c                 C   rR  rS  )r@  r   r   r   r    (test_write_to_dataset_no_partitions_s3fsC  rU  rV  c                 C   sV   t dg di}tj|}t| }tj||t	 d t
|}||s)J d S )Nr   r   r   )rD   rE   r   r   r   r   r   r  r   r   r   r   )r   rU   r   r   r   r   r   r     test_write_to_dataset_filesystemL  s   
rW  d   c                 C   s   | d }t  }tjt|tj|dddgd}tj	
|}d}t||j}t|D ]}|| q/W d    n1 sAw   Y  t|}	|	jj|ksSJ | d }
||
d}t|j| W d    n1 sow   Y  tj| |d	}|S )
Nr   )r]   r&   r]   r&   r'   r   	_metadatar   r   )r   r#   rD   rE   rF   rf   r   r   r   r   r   r   ZParquetWriterr   r   r   ZParquetFiler   Znum_row_groupsr   r2  r)   )r   rk   r   r   rU   r   Z
num_groupswriterr   readerr8  r   r-   r   r   r    _make_dataset_for_picklingW  s2   

r\  c                    s$    fdd}t | }||sJ d S )Nc                    s   |    | kS r"   )loadsdumps)objpickle_moduler   r    is_pickleableu  s   z*test_pickle_dataset.<locals>.is_pickleable)r\  )r   ra  rb  r-   r   r`  r    test_pickle_datasets  s   rc  c                 C   sl   | d }t g dg dg dd}tj|}tj|t|ddgd t|	 }t
||d	  d S )
Nz
ARROW-3208)r)  rq   g      @rX    r   g333333=@)r)  rq   r   rX  rd  r   rr   )r   r   r   r   r   r   r   )onetwothreere  rf  )	root_pathr  zoutput.parquet)rD   rE   r   r   r   r   r  r   r)   r*   r   )r   r   rU   r   r   r   r    test_partitioned_dataset|  s   ri  c                 C   s(  | d }t jdd tdD d gdgd}t jdd tdD d gdgd}tj|t|d	 tj|t|d	 tj|dgd
 }|d d	 |d d	 g}|d j
dks_J |d d|d d}}||d r||d s~J d S ||d sJ ||d sJ d S )NzARROW-3325-datasetc                 S      g | ]}t d qS rq   r   Zrandsr   r   r   r    rd     r   z0test_dataset_read_dictionary.<locals>.<listcomp>r8   rq   Zf0r  c                 S   rj  rk  rl  r   r   r   r    rd     r   )rh  )Zread_dictionaryr   r   r   )r   r   r   r   r  r   r)   r*   chunkr  Z
num_chunksr   )r   r   t1t2r   Z	ex_chunksZc0c1r   r   r    test_dataset_read_dictionary  s&   $$rq  c                 C   s   t dt g dt  i}t|| d  t|| d  t dg}tj| d |d}t jdg di|d}||s@J tj| |d}t jdg di|d}||sYJ tj	| |d}t jdg di|d}|
 |stJ d S )Nr
   r   zdata1.parquetzdata2.parquet)r
   r   rH  )r   r   r   r   r   r   )r   r   rG   rI  r   r   r   r   r   r)   r*   )r   r   r   r   r{   r   r   r    test_read_table_schema  s   rr  c                 C   s   t t g dt  t g dt  d}t|| d  tj| d ddgd}t ddg}|j	ddgks;J |j|ksBJ d S )Nr   r   r   r
   r'   )r
   rI  )
r   r   rG   rI  r   r   r   r   r   r+   )r   r   r   Zexpected_schemar   r   r    *test_read_table_duplicate_column_selection  s   rs  c                 C   s   dd l m} | d }|d d d jdd tdg d	i}t|t|d d d d
  |jg dd}tj	t||d}|j
g dksIJ tjt||d }|j
g dks]J d S )Nr   Ztest_partitioningZ201210Z01Tr  r
   r   r   )yearmonthday)field_names)partitioning)r
   ru  rv  rw  )pyarrow.datasetr-   r   r   r   r   r   r   ry  r   r+   r)   r*   )r   dsrh  r   r  r   r   r   r    test_dataset_partitioning  s$   r|  c                 C   s^   t dg di}t|| d  tt| t }tjd|d}|	 }|
|s-J d S )Nr
   r   r   r  r   )r   r   r   r   r   ZSubTreeFileSystemr   r   r)   r*   r   )r   r   r   r-   r   r   r   r    #test_parquet_dataset_new_filesystem  s   r}  c                 C   st   t d}|d}tdg di}t|| d  t| dd}tj	||d}|d	 }|j
d
 j|ks8J d S )Nfsspecr>  r
   r   r   \r   r   z/data.parquetr   )r   Zimportorskipr   r   r   r   r   r   replacer)   	fragmentsr   )r   r~  r   r   r   r-   r{   r   r   r    6test_parquet_dataset_partitions_piece_path_with_fsspec  s   

r  c                    s   t dg di}| d }g   fdd}d}tj||dg||d |d d	 |d
 d	 |d d	 h}tttj }||ksAJ d S )Nr
   r   ry  c                    s     | j d S r"   )r   r   )Zwritten_fileZpaths_writtenr   r    file_visitor  s   zDtest_parquet_write_to_dataset_exposed_keywords.<locals>.file_visitorzpart-{i}.parquet)ry  r  basename_template1zpart-0.parquet23)r   r   r   r  r   rh   pathlibPath)r   r   r   r  r  Zexpected_pathsZpaths_written_setr   r  r    .test_parquet_write_to_dataset_exposed_keywords  s   


r  write_dataset_kwarg))
create_dirT)r  Fc                 C   s   ddl m} tdg di}| d }t|j}|\}}|ttjj	vs(J ||j	v s/J t
jj|ddd%}tj||fi ||i |jd \}	}
}|| |ksUJ W d   dS 1 s`w   Y  dS )	zEVerify kwargs in pq.write_to_dataset are passed onto ds.write_datasetr   Nr
   r   zout.parquetwrite_datasetT)Zautospec)rz  r-   r   r   inspect	signaturer  r   r  
parametersmockpatchrJ   Z
mock_calls)r   r  r{  r   r   r  keyargZmock_write_dataset_name_argsr   r   r   r    #test_write_to_dataset_kwargs_passed  s   "r  c                 C   s   t t jg dg ddg dd}t|}| d }tj|| d dgd d	d
 | D }t|dks8J d|vs>J d S )N)r
   r0   r
   r/   rv   r   )catr;  r-   r  r  c                 S   s   g | ]	}|  r|jqS r   )is_dirr   )rb   r   r   r   r    rd   :  r   z;test_write_to_dataset_category_observed.<locals>.<listcomp>r   zcat=c)	rD   rE   rz   r   r   r   r  iterdirr   )r   rU   r   r   subdirsr   r   r    'test_write_to_dataset_category_observed,  s   
r  )rq   r8   )NNNr"   )rX  )irx   r  r1  r  numpyrF   r   Zunittest.mockr  Zpyarrowr   Zpyarrow.computeZcomputeZpcr   r   r   Zpyarrow.testsr   Zpyarrow.utilr   Zpyarrow.vendored.versionr   r-  r.  r   Zpyarrow.tests.parquet.commonr   r   r	   ImportErrorr*  rD   r+  r,  r   markr-   Z
pytestmarkr!   r%   r.   rW   rl   Zxfailr   AssertionErrorr|   r   r   r   r   r   Zparametrizer   castr   r   r   Zs3r   r   r   r   r   r$   rK   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r  r<  r@  rD  rK  rL  rM  rP  rQ  rT  rV  rW  r\  rc  ri  rq  rr  rs  r|  r}  r  r  r  r  r   r   r   r    <module>   s*  


F
!*

!
.
(







'(

G
%






>
(












