o
    թZh*                     @   s  d Z ddlmZmZ ddlZddlZddlZddlZddl	Z	ddl
mZmZ ddlmZ e	jdZe	jdZdd Zed	d
 Zee	jdddge	jdg ddd Zdd Zdd Ze	jddi eddgifdddieddgifdddgieddd gifd!dgd"d#eddgifd!dgd$d#edejdgifgd%d& Zd'd( Ze	jd)g d*d+d, Ze	jd-d"d$gd.d/ Zd0d1 Z e	jdg d2d3d4 Z!e	jdg d5d6d7 Z"d8d9 Z#d:d; Z$e	jd<e	jd=d>d?gd@dA Z%dS )BzZ
Tests encoding functionality during parsing
for all of the parsers defined in parsers.py
    )BytesIOTextIOWrapperN)	DataFrameread_csvz=ignore:Passing a BlockManager to DataFrame:DeprecationWarningZpyarrow_skipc                 C   sL   d}| }t d|}|j|d|d}tddggddgd	}t|| d S )
Ncp1255u   שלום:1234
562:123:)sepencodingi2  {   u   שלום1234columnsr   encoder   r   tmassert_frame_equal)all_parsersr	   parserdataresultexpected r   [/var/www/html/lang_env/lib/python3.10/site-packages/pandas/tests/io/parser/test_encoding.pytest_bytes_io_input   s   r   c                 C   s@   | }t d }|j|ddd d}tddgg}t|| d S )Nu   Łaski, Jan;1;utf-8)r   r	   headeru   Łaski, Jan   r   )r   r   r   r   r   r   r   r   test_read_csv_unicode(   s
   r   r   ,	r	   )utf-16zutf-16lezutf-16bec              	   C   s
  | }d d|}dt  d}|dd}d}t|_}||}t|d}	|	| W d    n1 s7w   Y  tt	|||d	}
|j
|fd
|i|}|j
|
fd
|i|}W d    n1 shw   Y  t|| W d    d S 1 s~w   Y  d S )Nz)skip this
skip this too
A,B,C
1,2,3
4,5,6r   __z__.csv   )r   Zskiprowsr   wbr	   r	   )replaceuuiduuid4r   ensure_cleanr   openwriter   r   r   r   )r   r   r	   r   r   pathkwargsutf8
bytes_datafZbytes_bufferr   r   r   r   r   test_utf16_bom_skiprows2   s(   

"r1   c                 C   s6   t j|d}| }|j|ddd}t|dksJ d S )Nzutf16_ex.txtr!   r    )r	   r   2   )osr,   joinr   len)r   csv_dir_pathr,   r   r   r   r   r   test_utf16_exampleO   s   r7   c                 C   sL   t j|d}| }|j|d dd}|d}|d d }d}||ks$J d S )Nunicode_series.csvlatin-1)r   r	   r   r   i`  u$   Á köldum klaka (Cold Fever) (1994))r3   r,   r4   r   Z	set_index)r   r6   r,   r   r   gotr   r   r   r   test_unicode_encodingV   s   
r;   zdata,kwargs,expectedza
1ar   z"a"
1	quotechar"zb
1namesb1
1T)r?   skip_blank_linesFc                    sn   | }d d fdd}|j dkr"|dkr"|ddr"tjd	d
 |j||fdi|}t|| d S )Nu   ﻿r   c                    s    |   }t|S )N)r   r   )_dataZbom_databomr.   r   r   _encode_data_with_bom{   s   z,test_utf8_bom.<locals>._encode_data_with_bompyarrowrB   rC   Tz,https://github.com/apache/arrow/issues/38676)reasonr	   )enginegetpytestskipr   r   r   )r   r   r-   r   requestr   rG   r   r   rE   r   test_utf8_bomb   s   

rO   c                 C   sL   t dgdgd}| }||}d|}|jt||d}t|| d S )Ng333333@test)Zmb_numZ	multibytezmb_num,multibyte
4.8,testr%   )r   formatr   r   r   r   r   )r   	utf_valueencoding_fmtr   r   r	   r   r   r   r   r   test_read_csv_utf_aliases   s   

rT   zfile_path,encoding)))ior   csvz	test1.csvr   ))rU   r   r   r8   r9   ))rU   r   r   zsauron.SHIFT_JIS.csvshiftjisc           
      C   s  | }|| }|j ||d}t||d}| |}|jrJ W d    n1 s(w   Y  t|| t|dd}	|j |	|d}|	jrFJ W d    n1 sPw   Y  t|| t|ddd}	|j |	|d}|	jroJ W d    n1 syw   Y  t|| d S )Nr%   rb)moder   )rY   	buffering)r   r*   closedr   r   )
r   	file_pathr	   datapathr   Zfpathr   far   Zfbr   r   r   test_binary_mode_file_buffers   s$   
r_   pass_encodingc           	      C   s   | }| |}|jdkr|du r|dv rtd tddgi}tjd|dd$}|d	 |d
 |j	||r:|nd d}t
|| W d    d S 1 sOw   Y  d S )NrH   T)       zThese cases freezeZfoobarzw+)rY   r	   Zreturn_filelikezfoo
barr   r%   )rQ   rJ   rL   rM   r   r   r)   r+   seekr   r   )	r   rR   rS   r`   r   r	   r   r0   r   r   r   r   test_encoding_temp_file   s   



"re   c                 C   s   | }d}d}d}t ||gi}t -}|| d| | |d |j||d}t|| |j	r8J W d    d S 1 sCw   Y  d S )Nz	shift-jisu	   てすとu   こむ
r   r%   )
r   tempfileNamedTemporaryFiler+   r   rd   r   r   r   r[   )r   r   r	   titler   r   r0   r   r   r   r   test_encoding_named_temp_file   s   

"rj   )r   r!   z	utf-16-bez	utf-16-lezutf-32c                 C   sR   d}t || }t|d| d}tddgddgdd	ggd
dgd}t|| d S )Nu   a	b
：foo	0
bar	1
baz	2r    )	delimiterr	   u   ：foor   rc   r   Zbazr#   r<   r@   )r   r   r   )r	   r   Zencoded_datar   r   r   r   r   %test_parse_encoded_special_characters   s   rl   )r   Nr!   r   r9   c              	   C   s   | }t g dg dg dd}t G}|j|d|d |jdkrLd}tjt|d	 |j||d
d W d    n1 s>w   Y  	 W d    d S |j||d
d}W d    n1 s^w   Y  t	|| d S )N)ZRaphaelZ	DonatellozMiguel AngelZLeonardo)redpurpleZorangeblue)Zsaizbo staffZnunchunkZkatana)namemaskZweaponF)indexr	   rH   BThe 'memory_map' option is not supported with the 'pyarrow' enginematchT)r	   
memory_map)
r   r   r)   to_csvrJ   rL   raises
ValueErrorr   r   )r   r	   r   r   filemsgdfr   r   r   test_encoding_memory_map   s&   

	
r}   c              	   C   s   | }t dgd d}d|jd< tdH}|j|dddd	 |jd
krMd}tjt|d |j	|ddd W d   n1 s?w   Y  	 W d   dS |j	|ddd}W d   n1 s_w   Y  t
|| dS )zO
    Chunk splits a multibyte character with memory_map=True

    GH 43540
    Zaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaai   )r   u   aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaąi  zbug-gh43540.csvFr   rr   r   r	   rH   rs   rt   NT)r   rv   )r   Zilocr   r)   rw   rJ   rL   rx   ry   r   r   )r   r   r|   fnamer{   dfrr   r   r    test_chunk_splits_multibyte_char  s   

	
r   c              	   C   sB  g }d}d}d}t t|t||D ])}ddd t ||d D d }z|d W n	 ty5   Y qw || q| }t|}td	J}	|j	|	d
d
dd |j
dkrd}
tjt|
d |j|	dddd W d   n1 ssw   Y  	 W d   dS |j|	dddd}W d   n1 sw   Y  t|| dS )zg
    GH 43787

    Test correct handling of UTF-8 chars when memory_map=True and encoding is UTF-8
        u   𐂀 c                 S   s   g | ]}t |qS r   )chr).0cr   r   r   
<listcomp>/  s    z,test_readcsv_memmap_utf8.<locals>.<listcomp>rf   r   zutf8test.csvFr~   rH   rs   rt   NT)r   rv   r	   )rangeordr4   r   UnicodeEncodeErrorappendr   r   r)   rw   rJ   rL   rx   ry   r   r   )r   linesline_lengthZ
start_charZend_charlnumliner   r|   r   r{   r   r   r   r   test_readcsv_memmap_utf8"  s4   "
	
r   Zpyarrow_xfailrY   zw+bzw+tc                 C   s~   | }d}d|v r
d}t j|dd}|| |d ||}W d    n1 s+w   Y  tg dgd}t|| d S )Ns   abcdtabcdr   )rY   r	   r   r   )rg   SpooledTemporaryFiler+   rd   r   r   r   r   )r   rY   r   contenthandler|   r   r   r   r   test_not_readableD  s   

r   )&__doc__rU   r   r   r3   rg   r'   numpynprL   Zpandasr   r   Zpandas._testingZ_testingr   markfilterwarningsZ
pytestmarkZusefixturesZskip_pyarrowr   r   Zparametrizer1   r7   r;   nanrO   rT   r_   re   rj   rl   r}   r   r   r   r   r   r   r   <module>   sn    
	






"