o
    թZh%R                     @   sp  d Z ddlmZ ddlmZ ddlZddlZddlm	Z	 ddl
mZmZmZ ddlmZ ejdZejdZejd	Zed
d Zdd Zejdg dddggdd Zejdddgdd Zedd Zdd Zedd Zejdddd gid!fdd"gdd gd#d$fdd"gdd gd%d&fgd'd( Zed)d*d+gZ eejd,ddd"gid-g d.d/d-e d0d1e d0d2e d0d3e d4d5e d6d7e d6d8gd/gd9d: Z!eejd,ddd"gid;g d.d/d;e d0d1e d0d2e d0d3e d4d5e d6d7e d6d8gd/gd<d= Z"eejd,ddd"gid;g d.d/d;e d0d1e d0d2e d0d3e d4d5e d6d7e d6d8gd/gd>d? Z#ed@dA Z$edBdC Z%edDdE Z&edFdG Z'ejdHdIdJgdKdL Z(eejd,i ddigdMdN Z)ejdOi g dPfdQg dRig dRfgdSdT Z*ejdd0d4gdUgdVdW Z+edXdY Z,eejdZd[eg d\ge-g d]d^fd_eg d\ge-g d`d^fdaeg dbge-g dcd^fgddde Z.eejdddggejdfddgdhgdhdggdhdiggdjdk Z/edldm Z0edndo Z1edpdq Z2edrds Z3edtdu Z4dvdw Z5edxdy Z6edzd{ Z7ed|d} Z8d~d Z9dS )zx
Tests that the file header is properly handled or inferred
during parsing for all of the parsers defined in parsers.py
    )
namedtuple)StringION)ParserError)	DataFrameIndex
MultiIndexz=ignore:Passing a BlockManager to DataFrame:DeprecationWarningZpyarrow_xfailZpyarrow_skipc                 C   sT   | }d}t jt|d td}|j|dgd W d    d S 1 s#w   Y  d S )Nzbut only \d+ lines in filematchz,,
   header)pytestraises
ValueErrorr   read_csv)all_parsersparsermsgs r   Y/var/www/html/lang_env/lib/python3.10/site-packages/pandas/tests/io/parser/test_header.pytest_read_with_bad_header   s   "r   c                 C   sN   | }d}t jtdd |jt|dd W d    d S 1 s w   Y  d S )N$1,2,3,4,5
6,7,8,9,10
11,12,13,14,15
zUPassing negative integer to header is invalid. For no header, use header=None insteadr   r   r   r   r   r   r   r   r   datar   r   r   test_negative_header'   s   "r   r   )r         c                 C   sN   | }d}t jtdd |jt||d W d    d S 1 s w   Y  d S )Nz<1,2,3,4,5
        6,7,8,9,10
        11,12,13,14,15
        z8cannot specify multi-index header with negative integersr   r   r   )r   r   r   r   r   r   r    test_negative_multi_index_header6   s   "r!   TFc                 C   sR   | }d}d}t jt|d |jt||d W d    d S 1 s"w   Y  d S )NzMyColumn
a
b
a
bz#Passing a bool to header is invalidr   r   )r   r   	TypeErrorr   r   )r   r   r   r   r   r   r   r   test_bool_header_argD   s   "r#   c                 C   sZ   | }d}g d}|j t||d}tg dg dg dgg dg dd}t|| d S )	Nzfoo,1,2,3
bar,4,5,6
baz,7,8,9
ABCnames   r      )r         )      	   )foobarbazindexcolumnsr   r   r   tmassert_frame_equal)r   r   r   r)   resultexpectedr   r   r   test_header_with_index_colS   s   r=   c                 C   sD   | }d}d}|j t|ddd}|j t|ddd}t|| d S )Nzggot,to,ignore,this,line
got,to,ignore,this,line
index,A,B,C,D
foo,2,3,4,5
bar,7,8,9,10
baz,12,13,14,15
z7index,A,B,C,D
foo,2,3,4,5
bar,7,8,9,10
baz,12,13,14,15
r   r   r   	index_colr   r   r9   r:   )r   r   r   Zdata2r;   r<   r   r   r   test_header_not_first_linee   s   rA   c                    s   | }d}|j t|g dddgd}dd   fdd	td
D }tjdd	 td
D dd	 td
D gddgd}tjdd	 tdD dd	 tdD dd	 tdD dd	 tdD gg dd}t|||d}t|| d S )N  C0,,C_l0_g0,C_l0_g1,C_l0_g2

C1,,C_l1_g0,C_l1_g1,C_l1_g2
C2,,C_l2_g0,C_l2_g1,C_l2_g2
C3,,C_l3_g0,C_l3_g1,C_l3_g2
R0,R1,,,
R_l0_g0,R_l1_g0,R0C0,R0C1,R0C2
R_l0_g1,R_l1_g1,R1C0,R1C1,R1C2
R_l0_g2,R_l1_g2,R2C0,R2C1,R2C2
R_l0_g3,R_l1_g3,R3C0,R3C1,R3C2
R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2
r   r+   r   r,   r   r+   r>   c                 S   s   d|  d| S )NRr'   r   )rcr   r   r   <lambda>   s    z)test_header_multi_index.<locals>.<lambda>c                    s$   g | ]  fd dt dD qS )c                    s   g | ]} |qS r   r   ).0rF   )
data_gen_frE   r   r   
<listcomp>       z6test_header_multi_index.<locals>.<listcomp>.<listcomp>r,   )range)rH   rI   )rE   r   rJ      s   $ z+test_header_multi_index.<locals>.<listcomp>r-   c                 S      g | ]}d | qS )ZR_l0_gr   rH   ir   r   r   rJ      rK   c                 S   rN   )ZR_l1_gr   rO   r   r   r   rJ      rK   ZR0ZR1r(   c                 S   rN   )ZC_l0_gr   rO   r   r   r   rJ      rK   r,   c                 S   rN   )ZC_l1_gr   rO   r   r   r   rJ      rK   c                 S   rN   )ZC_l2_gr   rO   r   r   r   rJ      rK   c                 S   rN   )ZC_l3_gr   rO   r   r   r   rJ      rK   )ZC0ZC1ZC2ZC3)r7   r6   )r   r   rL   r   Zfrom_arraysr   r9   r:   )r   r   r   r;   r6   r7   r<   r   rM   r   test_header_multi_indexy   s&   "	rQ   z
kwargs,msgr?   r2   r3   zLindex_col must only contain row numbers when specifying a multi-index headerr+   )r?   r)   z9cannot specify names when specifying a multi-index header)r?   usecolsz;cannot specify usecols when specifying a multi-index headerc                 C   sZ   d}| }t jt|d |jt|fdg di| W d    d S 1 s&w   Y  d S )NrB   r   r   rC   r   )r   kwargsr   r   r   r   r   r   test_header_multi_index_invalid   s
    "rT   
_TestTuplefirstsecondrS   r,   )aq)rY   rE   )rY   r   )bt)rF   u)rF   v)Zskiprowsr)   rY   rZ   rE   r   r[   r\   rF   r]   r^   c                 C   Z   | }t g dg dgddgtg dd}d}|jt|fdd	i|}t|| d S )
Nr+   r   r,   r   r-   r.   r/   r0   r1   r
         onetworX   r5   zC,a,a,a,b,c,c
,q,r,s,t,u,v
,,,,,,
one,1,2,3,4,5,6
two,7,8,9,10,11,12r?   r   r   r   from_tuplesr   r   r9   r:   r   rS   r   r<   r   r;   r   r   r   &test_header_multi_index_common_format1   s   ri   r   c                 C   r_   )
Nr`   ra   rd   re   rX   r5   z<,a,a,a,b,c,c
,q,r,s,t,u,v
one,1,2,3,4,5,6
two,7,8,9,10,11,12r?   r   rf   rh   r   r   r   &test_header_multi_index_common_format2   s   rj   c                 C   sf   | }t g dg dgddgtg dd}|jdd}d	}|jt|fd
d i|}t|| d S )Nr`   ra   rd   re   rX   r5   T)Zdrop2a,a,a,b,c,c
q,r,s,t,u,v
1,2,3,4,5,6
7,8,9,10,11,12r?   )r   r   rg   Zreset_indexr   r   r9   r:   rh   r   r   r   &test_header_multi_index_common_format3*  s   rl   c                 C   s   | }t tjg dg dgddtddgtg dg dgg d	g d
gddgdd}d}|jt|ddgdd}t|| d S )Nr   r,   r   r-   r.   r0   r1   r
   rb   rc   int64dtyper+   r/   rY   r[   rF   rE   r   r\   r]   r^   r   r   r+   r   r   r   r+   r   r,   r   rY   rZ   levelscodesr)   r5   rk   r   r>   	r   nparrayr   r   r   r   r9   r:   r   r   r<   r   r;   r   r   r   0test_header_multi_index_common_format_malformed1Z  s   
	r}   c                 C   s   | }t tjg dg dgddtddgtg dg dgg d	g d
gd dgdd}d}|jt|ddgdd}t|| d S )Nrm   rn   ro   rp   r+   r/   rr   rs   rt   ru   rZ   rv   r5   1,a,a,b,c,c
q,r,s,t,u,v
1,2,3,4,5,6
7,8,9,10,11,12r   r>   ry   r|   r   r   r   0test_header_multi_index_common_format_malformed2o  s   

r   c                 C   s   | }t tjg dg dgddtddgddggd	dgd	dggd
tg dg dgg dg dgd dgdd}d}|jt|d	dgd	dgd}t|| d S )N)r,   r   r-   r.   )r1   r
   rb   rc   ro   rp   r+   r/   r   r0   r   )rw   rx   rr   )r   r\   r]   r^   )r   r+   r   r   rC   rZ   rv   r5   r~   r>   )r   rz   r{   r   r   r   r9   r:   r|   r   r   r   0test_header_multi_index_common_format_malformed3  s   "	r   c                 C   s^   | }d d gddgddgg}t ddg}t||d}d}|jt|d	dgd
}t|| d S )Nr+   r   r,   r   )rY   r%   )r[   r&   r7   za,b
A,B
,
1,2
3,4r   r   )r   rg   r   r   r   r9   r:   )r   r   r   r7   r<   r;   r   r   r   "test_header_multi_index_blank_line  s   r   zdata,header)1,2,3
4,5,6N)zfoo,bar,baz
1,2,3
4,5,6r   c                 C   sl   | }|j dkr|d urtjjdd}|| |jtdg dd}|jt|g d|d}t|| d S )NpyarrowzDataFrame.columns are different)reasonr   rr   r(   r)   r   )	enginer   markZxfailZapplymarkerr   r   r9   r:   )r   r   r   requestr   r   r<   r;   r   r   r   !test_header_names_backward_compat  s   
r   c                 C   s8   | }t g dd}|jtdfi |}t|| d S )Nrr   r   za,b,cr   r   r   r9   r:   )r   rS   r   r<   r;   r   r   r   test_read_only_header_no_rows  s   r   zkwargs,namesru   r)   )r2   r3   r4   ZquuxZpandac                 C   sP   | }d}t g dg dg dg|d}|jt|fdd i|}t|| d S )Nr   )r+   r   r,   r   r-   )r.   r/   r0   r1   r
   )rb   rc            r   r   r   )r   rS   r)   r   r   r<   r;   r   r   r   test_no_header  s   r   Zstring_headerc                 C   sR   d}d}| }t jt|d |jt||d W d    d S 1 s"w   Y  d S )Nz*header must be integer or list of integersz1,2
3,4r   r   r   )r   r   r   r   r   r   r   r   test_non_int_header  s   "r   c                 C   sH   d}| }t ddgddgddgd}|jt|dgd}t|| d S )Nza,b,c
0,1,2
1,2,3r   r+   r   r,   rr   r   r   )r   r   r   r<   r;   r   r   r   test_singleton_header  s
   r   zdata,expectedz#A,A,A,B
one,one,one,two
0,40,34,0.1)r   (   "   皙?)r%   rd   r%   zone.1)r%   zone.2r&   re   r   z%A,A,A,B
one,one,one.1,two
0,40,34,0.1)r   r   r%   zone.1.1r   z/A,A,A,B,B
one,one,one.1,two,two
0,40,34,0.1,0.1)r   r   r   r   r   )r   r   r   r   )r&   ztwo.1c                 C   s*   | }|j t|ddgd}t|| d S )Nr   r+   r   r@   )r   r   r<   r   r;   r   r   r   test_mangles_multi_index  s   )r   r7    ZUnnamedZ
NotUnnamedc                 C   s   | }ddg}|d u rd |pddgd }nd dg|pddg d }|jt|||d}g }|d u r8g d}t|D ]\}}	|	sPd	|d u rI|n|d  d
}	||	 q<tt|ddg}tddgddgg|d}
t	
||
 d S )Nr   r+   ,r   z
0,1
2,3
4,5
z
,0,1
0,2,3
1,4,5
r>   )r   r   r   z	Unnamed: Z_level_001r   r,   r   r-   r   )joinr   r   	enumerateappendr   rg   zipr   r9   r:   )r   r?   r7   r   r   r   r;   Zexp_columnsrP   colr<   r   r   r   test_multi_index_unnamed  s    r   c                 C   sL   | }d}|j t|dg dd}tddgddgd	d
gd}t|| d S )Nza, b
1,2,3
5,6,4
r   r$   )r   r)   r+   r-   r   r.   r,   r   r8   r   r   r   r;   r<   r   r   r   6test_names_longer_than_header_but_equal_with_data_rowsE  s
   r   c                 C   s   | }d}d}t g d}tg dg dg|d}|jt|ddgd	}t||jd d  |jt|ddgd	}t|| d S )
NzFMale, Male, Male, Female, Female
R, R, L, R, R
.86, .67, .88, .78, .81z^Male, Male, Male, Female, Female
R, R, L, R, R
.86, .67, .88, .78, .81
.86, .67, .88, .78, .82))ZMalerD   ) Male R)r   z L) Femaler   )r   z R.1)Q?q=
ףp?)\(?(\?gQ?)r   r   r   r   g=
ףp=?r   r   r+   r   )r   rg   r   r   r   r9   r:   Ziloc)r   r   s1s2mir<   Zdf1Zdf2r   r   r    test_read_csv_multiindex_columnsR  s   	r   c                 C   sR   | }d}t jtdd |jt|ddgd W d    d S 1 s"w   Y  d S )Nz1row11,row12,row13
row21,row22, row23
row31,row32
z1Header rows must have an equal number of columns.r   r   r   r   r   r   r   r   r   )r   r   caser   r   r   'test_read_csv_multi_header_length_checkr  s   "r   c                 C   sT   | }d}|j t|ddgd d}tg ddtjtjgdg dd	}t|| d S )
Nzx,1,5
y,2
z,3
rY   r[   r   r*   r-   rY   r[   )xyz)r6   )r   r   r   rz   nanr9   r:   r   r   r   r   #test_header_none_and_implicit_index  s   r   c                 C   sT   | }d}t jtdd |jt|ddgd d W d    d S 1 s#w   Y  d S )Nx,1
y,2,5
z,3
z"Expected 2 fields in line 2, saw 3r   rY   r[   r   r   r   r   r   r   1test_header_none_and_implicit_index_in_second_row  s
   "r   c                 C   sH   | }d}|j t|ddgd dd}tddgdd	gd
}t|| d S )Nr   rY   r[   skip)r)   r   Zon_bad_linesr   r   r+   r,   r   r8   r   r   r   r   &test_header_none_and_on_bad_lines_skip  s   r   c                 C   sV   | }d}d}t jt|d |jt|g dd W d    d S 1 s$w   Y  d S )Nza,b
1,2
z;Passed header=\[0,1,2\], len of 3, but only 2 lines in filer   )r   r+   r   r   r   )r   r   r   r   r   r   r   test_header_missing_rows  s   "r   c                 C   s<   | }d}|j t|dd}tdgddd}t|| d S )Nz1aa    bb(1,1)   cc(1,1)
                0  2  3.5z\s+)sepr   r   g      @)Zaazbb(1,1)zcc(1,1)r8   r   r   r   r    test_header_multiple_whitespaces  s
   r   c                 C   sn   | }d}d}t jt|dd |jt|dd}W d    n1 s"w   Y  tddd	gi}t || d S )
Nza,b
1,2
3,4
    z;The 'delim_whitespace' keyword in pd.read_csv is deprecatedF)r	   Zcheck_stacklevelT)Zdelim_whitespaceza,bz1,2z3,4)r9   Zassert_produces_warningFutureWarningr   r   r   r:   )r   r   r   Zdepr_msgr;   r<   r   r   r   test_header_delim_whitespace  s   r   c                 C   sN   | }d}|j t|d ddgdddd}tddgd	d
ggdd}t|| d S )Nz
a,i,x
b,j,y
r   r+   zstring[pyarrow]r   )r   rR   rq   Zdtype_backendr   rY   rP   r[   jrp   r8   )Zpyarrow_parser_onlyr   r   r;   r<   r   r   r   test_usecols_no_header_pyarrow  s   r   ):__doc__collectionsr   ior   numpyrz   r   Zpandas.errorsr   Zpandasr   r   r   Zpandas._testingZ_testingr9   r   filterwarningsZ
pytestmarkZusefixturesZxfail_pyarrowZskip_pyarrowr   r   Zparametrizer!   r#   r=   rA   rQ   rT   rU   ri   rj   rl   r}   r   r   r   r   r   r   r   r   rg   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   sf   
	



%
	








	









&"







