o
    թZhw7                     @  s(  d dl mZ d dlmZ d dlmZ d dlZd dlZd dl	m
Z
mZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZ d dlmZmZ d dlmZm Z m!Z! erd dl"m#Z#m$Z$m%Z% d dl&m'Z'm(Z(m)Z)m*Z* d dl+m,Z,m-Z- G dd deZ.dddZ/d ddZ0dS )!    )annotations)defaultdict)TYPE_CHECKINGN)libparsers)import_optional_dependency)DtypeWarning)find_stack_levelpandas_dtype)concat_compatunion_categoricals)CategoricalDtype)ensure_index_from_sequences)dedup_namesis_potential_multi_index)
ParserBaseParserErroris_index_col)HashableMappingSequence)	ArrayLikeDtypeArgDtypeObjReadCsvBuffer)Index
MultiIndexc                      sh   e Zd ZU ded< ded< d fd	d
Zd ddZd ddZ	d!d"ddZd#ddZd$d%ddZ	  Z
S )&CParserWrapperbool
low_memoryzparsers.TextReader_readersrcReadCsvBuffer[str]returnNonec                   s  t  | || _| }|dd| _| jdu|d< | j|d< | jj	|d< dD ]}||d  q(t
|dd |d< d|vsF|d tju rJd	|d< |d d
krTtd
 tj|fi || _| jj| _| jd u }| jjd u rrd | _n| | jj| j|\| _| _| _}| jd u rtt| jj| _| jd d  | _| jr| | j| j | jd usJ | jdkrt | js|   | j t!| jt! krՇ fddt"| jD | _t!| jt! k r|   | j | #| j | $  | j| _| j%s8| jj&dkrt'| jrd| _(| )| j| j\}| _| _| jd u r|| _| jjd u r8|s8| jd us/J d gt!| j | _| jj&dk| _*d S )Nr    FZallow_leading_colsusecolson_bad_lines)Zstorage_optionsencodingZ
memory_mapcompressiondtypeZdtype_backendnumpyZpyarrowstringc                   $   g | ]\}}| v s| v r|qS  r.   ).0inr&   r.   Y/var/www/html/lang_env/lib/python3.10/site-packages/pandas/io/parsers/c_parser_wrapper.py
<listcomp>   s
    z+CParserWrapper.__init__.<locals>.<listcomp>r   T)+super__init__kwdscopypopr    	index_colr&   r'   valueensure_dtype_objsgetr   Z
no_defaultr   r   Z
TextReaderr!   Zunnamed_colsnamesheaderZ_extract_multi_indexer_columnsindex_names	col_nameslistrangetable_width
orig_names_evaluate_usecolsZusecols_dtypesetissubsetZ_validate_usecols_nameslen	enumerateZ_validate_parse_dates_presence_set_noconvert_columns_has_complex_date_colleading_colsr   Z_name_processedZ_clean_index_namesZ_implicit_index)selfr"   r7   keyZpassed_namesr@   	__class__r2   r3   r6   <   s   


	

zCParserWrapper.__init__c                 C  s&   z| j   W d S  ty   Y d S w N)r!   close
ValueError)rN   r.   r.   r3   rS      s
   zCParserWrapper.closec                   s^   | j dusJ dd t| j D   fdd| jD }| || j}|D ]}| j| q$dS )z
        Set the columns that should not undergo dtype conversions.

        Currently, any column that is involved with date parsing will not
        undergo such conversions.
        Nc                 S  s   i | ]\}}||qS r.   r.   )r/   r0   xr.   r.   r3   
<dictcomp>       z9CParserWrapper._set_noconvert_columns.<locals>.<dictcomp>c                   s   g | ]} | qS r.   r.   r/   rU   Z
names_dictr.   r3   r4          z9CParserWrapper._set_noconvert_columns.<locals>.<listcomp>)rE   rJ   r>   Z_set_noconvert_dtype_columnsr!   Zset_noconvert)rN   Zcol_indicesZnoconvert_columnscolr.   rY   r3   rK      s   z%CParserWrapper._set_noconvert_columnsNnrows
int | None_tuple[Index | MultiIndex | None, Sequence[Hashable] | MultiIndex, Mapping[Hashable, ArrayLike]]c                   s  z| j r| j|}t|}n| j|}W nK tya   | jr\d| _t| jt	| j| j
}| j|| jd\} }|  | j | jd urJ|    fdd| D }| |f Y S |    w d| _| j}| jjr| jrstdg }| j
r| jjt| j
krtdt| j
 d| jj dt| jjD ]"}| j
d u r||}	n|| j
| }	| j|	|d	d
}	||	 qt|}| jd ur| |}t|t	|| j
}t| }
dd t||
D }|  ||\}}| || j}nTt| }
| jd usJ t!| j}t|t	|| j
}| jd ur| |}dd |
D }| jd u r*| "|| dd t||
D }|  ||\}}| #|||\}}|||fS )NFr*   c                   s   i | ]\}}| v r||qS r.   r.   )r/   kvcolumnsr.   r3   rV      s    z'CParserWrapper.read.<locals>.<dictcomp>z file structure not yet supportedz,Could not construct index. Requested to use z number of columns, but z left to parse.T)try_parse_datesc                 S     i | ]	\}\}}||qS r.   r.   r/   r`   r0   ra   r.   r.   r3   rV   .      c                 S  s   g | ]}|d  qS )   r.   rX   r.   r.   r3   r4   F  rZ   z'CParserWrapper.read.<locals>.<listcomp>c                 S  re   r.   r.   rf   r.   r.   r3   rV   J  rg   )$r    r!   Zread_low_memory_concatenate_chunksreadStopIterationZ_first_chunkr   rE   r   r:   Z_get_empty_metar*   Z_maybe_make_multi_index_columnsrA   r&   _filter_usecolsitemsrS   r>   rM   rL   NotImplementedErrorrI   r   rC   r9   _maybe_parse_datesappendr   sortedzipZ_do_date_conversionsrB   Z_check_data_lengthZ_make_index)rN   r\   chunksdatar>   indexZcol_dictZarraysr0   valuesZ	data_tupsZcolumn_namesZ	date_dataZalldatar.   rb   r3   rj      s   









zCParserWrapper.readr>   Sequence[Hashable]c                   s@   |  | j|  d urt|t kr fddt|D }|S )Nc                   r-   r.   r.   )r/   r0   namer2   r.   r3   r4   U  s    z2CParserWrapper._filter_usecols.<locals>.<listcomp>)rF   r&   rI   rJ   )rN   r>   r.   r2   r3   rl   Q  s   
zCParserWrapper._filter_usecolsTru   intrd   c                 C  s4   |r|  |r| j|| jd ur| j| nd d}|S )N)r[   )Z_should_parse_datesZ
_date_convr@   )rN   rv   ru   rd   r.   r.   r3   ro   Z  s   z!CParserWrapper._maybe_parse_dates)r"   r#   r$   r%   )r$   r%   rR   )r\   r]   r$   r^   )r>   rw   r$   rw   )T)ru   ry   rd   r   )__name__
__module____qualname____annotations__r6   rS   rK   rj   rl   ro   __classcell__r.   r.   rP   r3   r   8   s   
  


s	r   rs   list[dict[int, ArrayLike]]r$   dictc           
        s   t | d  }g }i }|D ]H  fdd| D }dd |D }dd |D }| }t|tr9t|dd| < qt|| < t|d	krV|  jt	t
krV|t  q|rqd
|}dd| dg}	tj|	tt d |S )z
    Concatenate chunks of data read with low_memory=True.

    The tricky part is handling Categoricals, where different chunks
    may have different inferred categories.
    r   c                   s   g | ]}|  qS r.   )r9   )r/   chunkrx   r.   r3   r4   o  rW   z'_concatenate_chunks.<locals>.<listcomp>c                 S  s   h | ]}|j qS r.   r_   )r/   ar.   r.   r3   	<setcomp>q  s    z&_concatenate_chunks.<locals>.<setcomp>c                 S  s   h | ]	}t |ts|qS r.   )
isinstancer   rX   r.   r.   r3   r   r  rg   F)Zsort_categoriesrh   , z	Columns (zK) have mixed types. Specify dtype option on import or set low_memory=False.)
stacklevel)rB   keysr9   r   r   r   r   rI   r*   npobjectrp   strjoinwarningswarnr   r	   )
rs   r>   Zwarning_columnsresultZarrsZdtypesZnon_cat_dtypesr*   Zwarning_namesZwarning_messager.   r   r3   ri   c  s,   
 

ri   r*   *DtypeArg | dict[Hashable, DtypeArg] | None*DtypeObj | dict[Hashable, DtypeObj] | Nonec                   sx   t tr$t  t fdd} D ]
}t| ||< q|S t tr2fddD S dur:tS S )zc
    Ensure we have either None, a dtype object, or a dictionary mapping to
    dtype objects.
    c                     s    S rR   r.   r.   )default_dtyper.   r3   <lambda>  s    z#ensure_dtype_objs.<locals>.<lambda>c                   s   i | ]	}|t  | qS r.   r
   )r/   r`   r_   r.   r3   rV     rg   z%ensure_dtype_objs.<locals>.<dictcomp>N)r   r   r   default_factoryr   r   )r*   Zdtype_convertedrO   r.   )r   r*   r3   r<     s   

r<   )rs   r   r$   r   )r*   r   r$   r   )1
__future__r   collectionsr   typingr   r   r+   r   Zpandas._libsr   r   Zpandas.compat._optionalr   Zpandas.errorsr   Zpandas.util._exceptionsr	   Zpandas.core.dtypes.commonr   Zpandas.core.dtypes.concatr   r   Zpandas.core.dtypes.dtypesr   Zpandas.core.indexes.apir   Zpandas.io.commonr   r   Zpandas.io.parsers.base_parserr   r   r   collections.abcr   r   r   Zpandas._typingr   r   r   r   Zpandasr   r   r   ri   r<   r.   r.   r.   r3   <module>   s0      
-%