o
    թZh,                     @  s   d dl mZ d dlmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlZd dlmZ d dlmZmZ d dlmZ er\d dlmZ G dd deZdS )    )annotations)TYPE_CHECKINGN)using_pyarrow_string_dtype)lib)import_optional_dependency)ParserErrorParserWarning)find_stack_level)pandas_dtype)
is_integer)	DataFrame)_arrow_dtype_mappingarrow_string_types_mapper)
ParserBase)
ReadBufferc                      sT   e Zd ZdZd fddZddd	Zdd
dZdddZdddZdddZ	  Z
S )ArrowParserWrapperz7
    Wrapper for the pyarrow engine for read_csv()
    srcReadBuffer[bytes]returnNonec                   s$   t  | || _|| _|   d S N)super__init__kwdsr   _parse_kwds)selfr   r   	__class__ ]/var/www/html/lang_env/lib/python3.10/site-packages/pandas/io/parsers/arrow_parser_wrapper.pyr   %   s   zArrowParserWrapper.__init__c                 C  sN   | j d}|du rdn|| _| j d }t|trtdt| j d | _dS )z?
        Validates keywords before passing to pyarrow.
        encodingNzutf-8	na_valuesz?The pyarrow engine doesn't support passing a dict for na_values)r   getr    
isinstancedict
ValueErrorlistr!   )r   r    r!   r   r   r   r   ,   s   

zArrowParserWrapper._parse_kwdsc                 C  s  ddddddd}|  D ]\}}|| jv r'| j|dur'| j|| j|< q| j}t|tr4|g}nd}|| jd	< d
d | j  D | _| jd}|durt|rZ|| jd< n*|t	j
jkrfd| jd< n|t	j
jkrwddd}|| jd< n|t	j
jkrdd | jd< dd | j  D | _d| jd v | jd< | jdu rd| jv rdd | jd D | jd< | jdu | jdur| jn| jd | jd| _dS )z:
        Rename some arguments to pass to pyarrow
        include_columnsnull_valuesescape_charignore_empty_linesdecimal_point
quote_char)usecolsr!   
escapecharZskip_blank_linesdecimal	quotecharNtimestamp_parsersc                 S  &   i | ]\}}|d ur|dv r||qS )N)	delimiterr,   r)   r*   r   .0Zoption_nameZoption_valuer   r   r   
<dictcomp>Y       z;ArrowParserWrapper._get_pyarrow_options.<locals>.<dictcomp>on_bad_linesZinvalid_row_handlerr   strc                 S  s.   t jd| j d| j d| j tt d dS )Nz	Expected z columns, but found z: )
stacklevelskip)warningswarnZexpected_columnsZactual_columnstextr   r	   )Zinvalid_rowr   r   r   handle_warningk   s   
z?ArrowParserWrapper._get_pyarrow_options.<locals>.handle_warningc                 S  s   dS )Nr;   r   )_r   r   r   <lambda>v   s    z9ArrowParserWrapper._get_pyarrow_options.<locals>.<lambda>c                 S  r2   )N)r'   r(   Ztrue_valuesZfalse_valuesr+   r1   r   r4   r   r   r   r6   x   r7    Zstrings_can_be_nullc                 S  s   g | ]}d | qS )fr   )r5   nr   r   r   
<listcomp>   s    
z;ArrowParserWrapper._get_pyarrow_options.<locals>.<listcomp>Zskiprows)Zautogenerate_column_namesZ	skip_rowsr    )r   r9   )itemsr   r"   popdate_formatr#   r9   parse_optionscallabler   ZBadLineHandleMethodERRORWARNZSKIPconvert_optionsheaderr    read_options)r   mappingZpandas_nameZpyarrow_namerH   r8   r?   r   r   r   _get_pyarrow_options:   s^   


	
z'ArrowParserWrapper._get_pyarrow_optionsframer   c              
     s  t  j}d}| jdu r7| jdu r| jdu rt|| _t | j|kr3tt|t | j | j | _d}| j _|  j \} | jdur| j }t	| jD ]S\}}t
|r_ j| ||< n| jvrltd| d| jdur| j|dur|| j|fn j| | j j| f\}}	|	dur | |	 |< | j|= qO j|ddd | jdu r|sdgt  jj  j_| jdurt| jtrև fdd| j D | _nt| j| _z	 | j W  S  ty }
 zt|
d}
~
ww  S )	z
        Processes data read in based on kwargs.

        Parameters
        ----------
        frame: DataFrame
            The DataFrame to process.

        Returns
        -------
        DataFrame
            The processed DataFrame.
        TNFzIndex z invalid)ZdropZinplacec                   s$   i | ]\}}| j v r|t|qS r   )columnsr
   )r5   kvrR   r   r   r6      s
    
z>ArrowParserWrapper._finalize_pandas_output.<locals>.<dictcomp>)lenrS   rN   namesranger&   Z_do_date_conversionsZ	index_colcopy	enumerater   r%   Zdtyper"   ZastypeZ	set_indexindexr#   r$   rF   r
   	TypeError)r   rR   Znum_colsZmulti_index_namedr@   Zindex_to_setiitemkeyZ	new_dtypeer   rV   r   _finalize_pandas_output   sX   











z*ArrowParserWrapper._finalize_pandas_outputc                 C  s8   t |rtdd |D stdt|rtdd S )Nc                 s      | ]}t |tV  qd S r   r#   r9   r5   xr   r   r   	<genexpr>   s    z7ArrowParserWrapper._validate_usecols.<locals>.<genexpr>zwThe pyarrow engine does not allow 'usecols' to be integer column positions. Pass a list of string column names instead.z=The pyarrow engine does not allow 'usecols' to be a callable.)r   is_list_likeallr%   rJ   )r   r-   r   r   r   _validate_usecols   s   z$ArrowParserWrapper._validate_usecolsc              
   C  s  t d}t d}|   z|jdi | j}W n2 tyI   | jdd}|dur.| | | jdt }t	|rDt
dd |D sHtd w z|j| j|jdi | j|jdi | j|d	}W n |jyv } zt||d}~ww | jd
 }|tju r|j}	| }
t|jjD ]\}}|j|r|	||	||
}	q||	}|dkr|jtjd}n&|dkrt  }t! ||" < |j|jd}nt# r|jt$ d}n| }| %|S )z
        Reads the contents of a CSV file into a DataFrame and
        processes it according to the kwargs passed in the
        constructor.

        Returns
        -------
        DataFrame
            The DataFrame created from the CSV file.
        Zpyarrowzpyarrow.csvr'   Nr(   c                 s  rc   r   rd   re   r   r   r   rg      s    

z*ArrowParserWrapper.read.<locals>.<genexpr>z9The 'pyarrow' engine requires all na_values to be strings)rO   rI   rM   dtype_backend)Ztypes_mapperZnumpy_nullabler   )&r   rQ   ZConvertOptionsrM   r]   r"   rj   setr   rh   ri   Zread_csvr   ZReadOptionsrO   ZParseOptionsrI   ZArrowInvalidr   r   Z
no_defaultZschemaZfloat64r[   typesZis_nullfieldZ	with_typecastZ	to_pandaspdZ
ArrowDtyper   Z
Int64Dtypenullr   r   rb   )r   paZpyarrow_csvrM   includeZnullstablera   rk   Z
new_schemanew_typer^   Z
arrow_typerR   Zdtype_mappingr   r   r   read   sd   






zArrowParserWrapper.read)r   r   r   r   )r   r   )rR   r   r   r   )r   r   )__name__
__module____qualname____doc__r   r   rQ   rb   rj   rv   __classcell__r   r   r   r   r       s    


[
Ir   )
__future__r   typingr   r<   Zpandas._configr   Zpandas._libsr   Zpandas.compat._optionalr   Zpandas.errorsr   r   Zpandas.util._exceptionsr	   Zpandas.core.dtypes.commonr
   Zpandas.core.dtypes.inferencer   Zpandasrp   r   Zpandas.io._utilr   r   Zpandas.io.parsers.base_parserr   Zpandas._typingr   r   r   r   r   r   <module>   s"    