o
    թZh]                     @  s  d Z ddlmZ ddlZddlZddlZddlmZmZm	Z	 ddl
Z
ddl
mZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlZddlmZmZ ddl m!Z! ddl"m#Z# ddl$m%Z%m&Z&m'Z'm(Z(m)Z) erddl*m+Z+m,Z,m-Z-m.Z.m/Z/ dFddZ0			dGdHd%d&Z1G d'd dZ2G d(d) d)e2Z3G d*d+ d+e2Z4ee!d d,		-	.				dIdJd:d;Z5ee!d d,d-ddej6ej6ddfdKdDdEZ7dS )Lz parquet compat     )annotationsN)TYPE_CHECKINGAnyLiteral)catch_warnings)using_pyarrow_string_dtype)_get_option)lib)import_optional_dependencyAbstractMethodError)doc)find_stack_level)check_dtype_backend)	DataFrame
get_option)_shared_docs)arrow_string_types_mapper)	IOHandles
get_handleis_fsspec_urlis_urlstringify_path)DtypeBackendFilePath
ReadBufferStorageOptionsWriteBufferenginestrreturnBaseImplc                 C  s   | dkrt d} | dkr>ttg}d}|D ]"}z| W   S  ty6 } z|dt| 7 }W Y d}~qd}~ww td| | dkrEt S | dkrLt S td	)
zreturn our implementationautozio.parquet.engine z
 - NzUnable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:pyarrowfastparquetz.engine must be one of 'pyarrow', 'fastparquet')r   PyArrowImplFastParquetImplImportErrorr   
ValueError)r   Zengine_classes
error_msgsZengine_classerr r,   H/var/www/html/lang_env/lib/python3.10/site-packages/pandas/io/parquet.py
get_engine3   s,   
r.   rbFpath1FilePath | ReadBuffer[bytes] | WriteBuffer[bytes]fsr   storage_optionsStorageOptions | Nonemodeis_dirboolVtuple[FilePath | ReadBuffer[bytes] | WriteBuffer[bytes], IOHandles[bytes] | None, Any]c           
   	   C  s\  t | }|dur;tddd}tddd}|dur%t||jr%|r$tdn|dur1t||jjr1n
tdt|j	 t
|r}|du r}|du rftd}td}z
|j| \}}W n t|jfye   Y nw |du r|td}|jj|fi |pwi \}}n|rt|r|d	krtd
d}	|s|st|trtj|st||d|d}	d}|	j}||	|fS )zFile handling for PyArrow.Nz
pyarrow.fsignore)errorsfsspecz8storage_options not supported with a pyarrow FileSystem.z9filesystem must be a pyarrow or fsspec FileSystem, not a r$   r/   z8storage_options passed with buffer, or non-supported URLFZis_textr3   )r   r
   
isinstanceZ
FileSystemNotImplementedErrorspecZAbstractFileSystemr)   type__name__r   Zfrom_uri	TypeErrorZArrowInvalidcoreZ	url_to_fsr   r   osr0   isdirr   handle)
r0   r2   r3   r5   r6   path_or_handleZpa_fsr;   pahandlesr,   r,   r-   _get_path_or_handleU   sf   


	
rJ   c                   @  s0   e Zd ZedddZdddZddd
dZd	S )r!   dfr   r    Nonec                 C  s   t | ts	tdd S )Nz+to_parquet only supports IO with DataFrames)r=   r   r)   )rK   r,   r,   r-   validate_dataframe   s   
zBaseImpl.validate_dataframec                 K     t | Nr   )selfrK   r0   compressionkwargsr,   r,   r-   write      zBaseImpl.writeNc                 K  rN   rO   r   )rP   r0   columnsrR   r,   r,   r-   read   rT   zBaseImpl.read)rK   r   r    rL   )rK   r   rO   )r    r   )rA   
__module____qualname__staticmethodrM   rS   rV   r,   r,   r,   r-   r!      s
    
c                   @  sF   e Zd ZdddZ					ddddZdddejddfdddZdS ) r&   r    rL   c                 C  s&   t ddd dd l}dd l}|| _d S )Nr$   z(pyarrow is required for parquet support.extrar   )r
   Zpyarrow.parquetZ(pandas.core.arrays.arrow.extension_typesapi)rP   r$   pandasr,   r,   r-   __init__   s   
zPyArrowImpl.__init__snappyNrK   r   r0   FilePath | WriteBuffer[bytes]rQ   
str | Noneindexbool | Noner3   r4   partition_colslist[str] | Nonec                 K  sL  |  | d|dd i}	|d ur||	d< | jjj|fi |	}
|jr:dt|ji}|
jj	}i ||}|

|}
t|||d|d ud\}}}t|tjrjt|drjt|jttfrjt|jtrg|j }n|j}z1|d ur| jjj|
|f|||d| n| jjj|
|f||d| W |d ur|  d S d S |d ur|  w w )	NschemaZpreserve_indexZPANDAS_ATTRSwb)r3   r5   r6   name)rQ   rd   
filesystem)rQ   ri   )rM   popr\   TableZfrom_pandasattrsjsondumpsrf   metadataZreplace_schema_metadatarJ   r=   ioBufferedWriterhasattrrh   r   bytesdecodeparquetZwrite_to_datasetZwrite_tableclose)rP   rK   r0   rQ   rb   r3   rd   ri   rR   Zfrom_pandas_kwargstabledf_metadataZexisting_metadataZmerged_metadatarG   rI   r,   r,   r-   rS      sj   




zPyArrowImpl.writeFuse_nullable_dtypesr7   dtype_backendDtypeBackend | lib.NoDefaultc                 K  s&  d|d< i }	|dkrddl m}
 |
 }|j|	d< n|dkr#tj|	d< nt r+t |	d< tddd	}|d
kr9d|	d< t|||dd\}}}zD| j	j
j|f|||d|}|jdi |	}|d
krg|jd
dd}|jjr}d|jjv r}|jjd }t||_|W |d ur|  S S |d ur|  w w )NTZuse_pandas_metadataZnumpy_nullabler   )_arrow_dtype_mappingZtypes_mapperr$   zmode.data_manager)ZsilentarrayZsplit_blocksr/   )r3   r5   )rU   ri   filtersF)copys   PANDAS_ATTRSr,   )pandas.io._utilr|   getpdZ
ArrowDtyper   r   r   rJ   r\   ru   Z
read_table	to_pandasZ_as_managerrf   ro   rm   loadsrl   rv   )rP   r0   rU   r~   ry   rz   r3   ri   rR   Zto_pandas_kwargsr|   mappingmanagerrG   rI   Zpa_tableresultrx   r,   r,   r-   rV      sT   



zPyArrowImpl.readr    rL   r_   NNNN)rK   r   r0   r`   rQ   ra   rb   rc   r3   r4   rd   re   r    rL   )ry   r7   rz   r{   r3   r4   r    r   )rA   rW   rX   r^   rS   r	   
no_defaultrV   r,   r,   r,   r-   r&      s    
Er&   c                   @  s@   e Zd ZdddZ					ddddZ				ddddZdS )r'   r    rL   c                 C  s   t ddd}|| _d S )Nr%   z,fastparquet is required for parquet support.rZ   )r
   r\   )rP   r%   r,   r,   r-   r^   )  s   
zFastParquetImpl.__init__r_   NrK   r   rQ   *Literal['snappy', 'gzip', 'brotli'] | Noner3   r4   c           	        s   |  | d|v r|d urtdd|v r|d}|d ur"d|d< |d ur*tdt|}t|r@td  fdd|d	< nrFtd
tdd | jj	||f|||d| W d    d S 1 sfw   Y  d S )Npartition_onzYCannot use both partition_on and partition_cols. Use partition_cols for partitioning dataZhiveZfile_scheme9filesystem is not implemented for the fastparquet engine.r;   c                   s    j | dfi p	i   S )Nrg   )open)r0   _r;   r3   r,   r-   <lambda>T  s    z'FastParquetImpl.write.<locals>.<lambda>Z	open_withz?storage_options passed with file object or non-fsspec file pathT)record)rQ   Zwrite_indexr   )
rM   r)   rj   r>   r   r   r
   r   r\   rS   )	rP   rK   r0   rQ   rb   rd   r3   ri   rR   r,   r   r-   rS   1  sB   

"zFastParquetImpl.writec                 K  s  i }| dd}| dtj}	d|d< |rtd|	tjur"td|d ur*tdt|}d }
t|rHtd}|j|d	fi |pAi j	|d
< nt
|tr^tj|s^t|d	d|d}
|
j}z| jj|fi |}|jd||d|W |
d ur}|
  S S |
d ur|
  w w )Nry   Frz   Zpandas_nullszNThe 'use_nullable_dtypes' argument is not supported for the fastparquet enginezHThe 'dtype_backend' argument is not supported for the fastparquet enginer   r;   r/   r2   r<   )rU   r~   r,   )rj   r	   r   r)   r>   r   r   r
   r   r2   r=   r   rD   r0   rE   r   rF   r\   ZParquetFiler   rv   )rP   r0   rU   r~   r3   ri   rR   Zparquet_kwargsry   rz   rI   r;   Zparquet_filer,   r,   r-   rV   f  sD   	
 


zFastParquetImpl.readr   r   )rK   r   rQ   r   r3   r4   r    rL   )NNNN)r3   r4   r    r   )rA   rW   rX   r^   rS   rV   r,   r,   r,   r-   r'   (  s    
8r'   )r3   r"   r_   rK   r   $FilePath | WriteBuffer[bytes] | NonerQ   ra   rb   rc   rd   re   ri   bytes | Nonec                 K  sp   t |tr|g}t|}	|du rt n|}
|	j| |
f|||||d| |du r6t |
tjs2J |
 S dS )a	  
    Write a DataFrame to the parquet format.

    Parameters
    ----------
    df : DataFrame
    path : str, path object, file-like object, or None, default None
        String, path object (implementing ``os.PathLike[str]``), or file-like
        object implementing a binary ``write()`` function. If None, the result is
        returned as bytes. If a string, it will be used as Root Directory path
        when writing a partitioned dataset. The engine fastparquet does not
        accept file-like objects.
    engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.

        When using the ``'pyarrow'`` engine and no storage options are provided
        and a filesystem is implemented by both ``pyarrow.fs`` and ``fsspec``
        (e.g. "s3://"), then the ``pyarrow.fs`` filesystem is attempted first.
        Use the filesystem keyword with an instantiated fsspec filesystem
        if you wish to use its implementation.
    compression : {{'snappy', 'gzip', 'brotli', 'lz4', 'zstd', None}},
        default 'snappy'. Name of the compression to use. Use ``None``
        for no compression.
    index : bool, default None
        If ``True``, include the dataframe's index(es) in the file output. If
        ``False``, they will not be written to the file.
        If ``None``, similar to ``True`` the dataframe's index(es)
        will be saved. However, instead of being saved as values,
        the RangeIndex will be stored as a range in the metadata so it
        doesn't require much space and is faster. Other indexes will
        be included as columns in the file output.
    partition_cols : str or list, optional, default None
        Column names by which to partition the dataset.
        Columns are partitioned in the order they are given.
        Must be None if path is not a string.
    {storage_options}

    filesystem : fsspec or pyarrow filesystem, default None
        Filesystem object to use when reading the parquet file. Only implemented
        for ``engine="pyarrow"``.

        .. versionadded:: 2.1.0

    kwargs
        Additional keyword arguments passed to the engine

    Returns
    -------
    bytes if no path argument is provided else None
    N)rQ   rb   rd   r3   ri   )r=   r   r.   rp   BytesIOrS   getvalue)rK   r0   r   rQ   rb   r3   rd   ri   rR   implZpath_or_bufr,   r,   r-   
to_parquet  s(   
Ar   FilePath | ReadBuffer[bytes]rU   ry   bool | lib.NoDefaultrz   r{   r~   &list[tuple] | list[list[tuple]] | Nonec              	   K  sf   t |}	|tjurd}
|du r|
d7 }
tj|
tt d nd}t| |	j| f||||||d|S )a  
    Load a parquet object from the file path, returning a DataFrame.

    Parameters
    ----------
    path : str, path object or file-like object
        String, path object (implementing ``os.PathLike[str]``), or file-like
        object implementing a binary ``read()`` function.
        The string could be a URL. Valid URL schemes include http, ftp, s3,
        gs, and file. For file URLs, a host is expected. A local file could be:
        ``file://localhost/path/to/table.parquet``.
        A file URL can also be a path to a directory that contains multiple
        partitioned parquet files. Both pyarrow and fastparquet support
        paths to directories as well as file URLs. A directory path could be:
        ``file://localhost/path/to/tables`` or ``s3://bucket/partition_dir``.
    engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.

        When using the ``'pyarrow'`` engine and no storage options are provided
        and a filesystem is implemented by both ``pyarrow.fs`` and ``fsspec``
        (e.g. "s3://"), then the ``pyarrow.fs`` filesystem is attempted first.
        Use the filesystem keyword with an instantiated fsspec filesystem
        if you wish to use its implementation.
    columns : list, default=None
        If not None, only these columns will be read from the file.
    {storage_options}

        .. versionadded:: 1.3.0

    use_nullable_dtypes : bool, default False
        If True, use dtypes that use ``pd.NA`` as missing value indicator
        for the resulting DataFrame. (only applicable for the ``pyarrow``
        engine)
        As new dtypes are added that support ``pd.NA`` in the future, the
        output with this option will change to use those dtypes.
        Note: this is an experimental option, and behaviour (e.g. additional
        support dtypes) may change without notice.

        .. deprecated:: 2.0

    dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
        Back-end data type applied to the resultant :class:`DataFrame`
        (still experimental). Behaviour is as follows:

        * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
          (default).
        * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
          DataFrame.

        .. versionadded:: 2.0

    filesystem : fsspec or pyarrow filesystem, default None
        Filesystem object to use when reading the parquet file. Only implemented
        for ``engine="pyarrow"``.

        .. versionadded:: 2.1.0

    filters : List[Tuple] or List[List[Tuple]], default None
        To filter out data.
        Filter syntax: [[(column, op, val), ...],...]
        where op is [==, =, >, >=, <, <=, !=, in, not in]
        The innermost tuples are transposed into a set of filters applied
        through an `AND` operation.
        The outer list combines these sets of filters through an `OR`
        operation.
        A single list of tuples can also be used, meaning that no `OR`
        operation between set of filters is to be conducted.

        Using this argument will NOT result in row-wise filtering of the final
        partitions unless ``engine="pyarrow"`` is also specified.  For
        other engines, filtering is only performed at the partition level, that is,
        to prevent the loading of some row-groups and/or files.

        .. versionadded:: 2.1.0

    **kwargs
        Any additional kwargs are passed to the engine.

    Returns
    -------
    DataFrame

    See Also
    --------
    DataFrame.to_parquet : Create a parquet object that serializes a DataFrame.

    Examples
    --------
    >>> original_df = pd.DataFrame(
    ...     {{"foo": range(5), "bar": range(5, 10)}}
    ...    )
    >>> original_df
       foo  bar
    0    0    5
    1    1    6
    2    2    7
    3    3    8
    4    4    9
    >>> df_parquet_bytes = original_df.to_parquet()
    >>> from io import BytesIO
    >>> restored_df = pd.read_parquet(BytesIO(df_parquet_bytes))
    >>> restored_df
       foo  bar
    0    0    5
    1    1    6
    2    2    7
    3    3    8
    4    4    9
    >>> restored_df.equals(original_df)
    True
    >>> restored_bar = pd.read_parquet(BytesIO(df_parquet_bytes), columns=["bar"])
    >>> restored_bar
        bar
    0    5
    1    6
    2    7
    3    8
    4    9
    >>> restored_bar.equals(original_df[['bar']])
    True

    The function uses `kwargs` that are passed directly to the engine.
    In the following example, we use the `filters` argument of the pyarrow
    engine to filter the rows of the DataFrame.

    Since `pyarrow` is the default engine, we can omit the `engine` argument.
    Note that the `filters` argument is implemented by the `pyarrow` engine,
    which can benefit from multithreading and also potentially be more
    economical in terms of memory.

    >>> sel = [("foo", ">", 2)]
    >>> restored_part = pd.read_parquet(BytesIO(df_parquet_bytes), filters=sel)
    >>> restored_part
        foo  bar
    0    3    8
    1    4    9
    zYThe argument 'use_nullable_dtypes' is deprecated and will be removed in a future version.TzFUse dtype_backend='numpy_nullable' instead of use_nullable_dtype=True.)
stacklevelF)rU   r~   r3   ry   rz   ri   )	r.   r	   r   warningswarnFutureWarningr   r   rV   )r0   r   rU   r3   ry   rz   ri   r~   rR   r   msgr,   r,   r-   read_parquet  s0    
r   )r   r   r    r!   )Nr/   F)r0   r1   r2   r   r3   r4   r5   r   r6   r7   r    r8   )Nr"   r_   NNNN)rK   r   r0   r   r   r   rQ   ra   rb   rc   r3   r4   rd   re   ri   r   r    r   )r0   r   r   r   rU   re   r3   r4   ry   r   rz   r{   ri   r   r~   r   r    r   )8__doc__
__future__r   rp   rm   rD   typingr   r   r   r   r   Zpandas._configr   Zpandas._config.configr   Zpandas._libsr	   Zpandas.compat._optionalr
   Zpandas.errorsr   Zpandas.util._decoratorsr   Zpandas.util._exceptionsr   Zpandas.util._validatorsr   r]   r   r   r   Zpandas.core.shared_docsr   r   r   Zpandas.io.commonr   r   r   r   r   Zpandas._typingr   r   r   r   r   r.   rJ   r!   r&   r'   r   r   r   r,   r,   r,   r-   <module>   sd    
	%? qX