o
    Zh&                     @   s   d dl Z d dlmZ d dlmZmZmZmZ d dlmZ	 d dl
mZ d dlmZ G dd dZdd	 Zh d
Z		dddZ		dddZdddZdS )    N)_pandas_api)CodecTableconcat_tablesschema)_feather)FeatherErrorc                   @   s6   e Zd ZdZdddZdddZdd	 Zdd
dZdS )FeatherDataseta  
    Encapsulates details of reading a list of Feather files.

    Parameters
    ----------
    path_or_paths : List[str]
        A list of file names
    validate_schema : bool, default True
        Check that individual file schemas are all the same / compatible
    Tc                 C   s   || _ || _d S N)pathsvalidate_schema)selfZpath_or_pathsr    r   F/var/www/html/lang_env/lib/python3.10/site-packages/pyarrow/feather.py__init__)   s   
zFeatherDataset.__init__Nc                 C   sj   t | jd |d}|g| _|j| _| jdd D ]}t ||d}| jr)| || | j| qt| jS )a,  
        Read multiple feather files as a single pyarrow.Table

        Parameters
        ----------
        columns : List[str]
            Names of columns to read from the file

        Returns
        -------
        pyarrow.Table
            Content of the file as a table (of columns)
        r   columns   N)
read_tabler   Z_tablesr   r   validate_schemasappendr   )r   r   Z_filpathtabler   r   r   r   -   s   
zFeatherDataset.read_tablec                 C   s(   | j |j std|| j |j d S )Nz-Schema in {!s} was different. 
{!s}

vs

{!s})r   equals
ValueErrorformat)r   Zpiecer   r   r   r   r   F   s   zFeatherDataset.validate_schemasc                 C   s   | j |dj|dS )a  
        Read multiple Parquet files as a single pandas DataFrame

        Parameters
        ----------
        columns : List[str]
            Names of columns to read from the file
        use_threads : bool, default True
            Use multiple threads when converting to pandas

        Returns
        -------
        pandas.DataFrame
            Content of the file as a pandas DataFrame (of columns)
        r   )use_threadsr   Z	to_pandas)r   r   r   r   r   r   read_pandasM   s   zFeatherDataset.read_pandas)Tr
   )NT)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   r	      s    

r	   c                 C   sH   |j dkrd S |jt t fv rtd| td| t|j)Nr   zqColumn '{}' exceeds 2GB maximum capacity of a Feather binary column. This restriction may be lifted in the futurezkColumn '{}' of type {} was chunked on conversion to Arrow and cannot be currently written to Feather format)Z
num_chunkstypeextbinarystringr   r   str)namecolr   r   r   check_chunked_overflowa   s   
r*   >   lz4ZzstdZuncompressed   c                 C   sr  t jrt jrt| t jjr|  } t | rH|dkrd}n|dkr$d}ntdt	j
| |d}|dkrGt|jjD ]\}}	|| }
t|	|
 q9n| }|dkrot|jtt|jkr^td|durftd|durntd	n|du r{td
r{d}n|dur|tvrtd|tztj||||||d W dS  ty   t|trzt| W   tjy   Y  w  w )a  
    Write a pandas.DataFrame to Feather format.

    Parameters
    ----------
    df : pandas.DataFrame or pyarrow.Table
        Data to write out as Feather format.
    dest : str
        Local destination path.
    compression : string, default None
        Can be one of {"zstd", "lz4", "uncompressed"}. The default of None uses
        LZ4 for V2 files if it is available, otherwise uncompressed.
    compression_level : int, default None
        Use a compression level particular to the chosen compressor. If None
        use the default compression level
    chunksize : int, default None
        For V2 files, the internal maximum size of Arrow RecordBatch chunks
        when writing the Arrow IPC file format. None means use the default,
        which is currently 64K
    version : int, default 2
        Feather file version. Version 2 is the current. Version 1 is the more
        limited legacy format
    r   Fr,   Nz%Version value should either be 1 or 2)preserve_indexz'cannot serialize duplicate column namesz2Feather V1 files do not support compression optionz0Feather V1 files do not support chunksize optionZ	lz4_framer+   z1compression="{}" not supported, must be one of {})compressioncompression_level	chunksizeversion)r   Zhave_pandasZ
has_sparse
isinstancepdZSparseDataFrameZto_denseZis_data_framer   r   Zfrom_pandas	enumerater   namesr*   lenZcolumn_namessetr   Zis_available_FEATHER_SUPPORTED_CODECSr   r   write_feather	Exceptionr'   osremoveerror)dfdestr.   r/   r0   r1   r-   r   ir(   r)   r   r   r   r9   s   sf   


r9   TFc                 K   s    t | |||djdd|i|S )a  
    Read a pandas.DataFrame from Feather format. To read as pyarrow.Table use
    feather.read_table.

    Parameters
    ----------
    source : str file path, or file-like object
        You can use MemoryMappedFile as source, for explicitly use memory map.
    columns : sequence, optional
        Only read a specific set of columns. If not provided, all columns are
        read.
    use_threads : bool, default True
        Whether to parallelize reading using multiple threads. If false the
        restriction is used in the conversion to Pandas as well as in the
        reading from Feather format.
    memory_map : boolean, default False
        Use memory mapping when opening file on disk, when source is a str.
    **kwargs
        Additional keyword arguments passed on to `pyarrow.Table.to_pandas`.

    Returns
    -------
    df : pandas.DataFrame
        The contents of the Feather file as a pandas.DataFrame
    )r   
memory_mapr   r   Nr   r   )sourcer   r   rA   kwargsr   r   r   read_feather   s   rD   c                 C   s   t j| ||d}|du r| S dd |D }ttdd |r&||}nttdd |r5||}ndd |D }td	|||j	d
k rK|S t
t||krU|S ||S )a  
    Read a pyarrow.Table from Feather format

    Parameters
    ----------
    source : str file path, or file-like object
        You can use MemoryMappedFile as source, for explicitly use memory map.
    columns : sequence, optional
        Only read a specific set of columns. If not provided, all columns are
        read.
    memory_map : boolean, default False
        Use memory mapping when opening file on disk, when source is a str
    use_threads : bool, default True
        Whether to parallelize reading using multiple threads.

    Returns
    -------
    table : pyarrow.Table
        The contents of the Feather file as a pyarrow.Table
    )Zuse_memory_mapr   Nc                 S   s   g | ]}t |qS r   )r#   ).0columnr   r   r   
<listcomp>  s    zread_table.<locals>.<listcomp>c                 S      | t kS r
   )inttr   r   r   <lambda>      zread_table.<locals>.<lambda>c                 S   rH   r
   )r'   rJ   r   r   r   rL     rM   c                 S   s   g | ]}|j qS r   )r   )rE   rK   r   r   r   rG     s    z<Columns must be indices or names. Got columns {} of types {}   )r   ZFeatherReaderreadallmapZread_indicesZ
read_names	TypeErrorr   r1   sortedr7   select)rB   r   rA   r   readerZcolumn_typesr   Zcolumn_type_namesr   r   r   r      s&   

r   )NNNr,   )NTF)NFT)r;   Zpyarrow.pandas_compatr   Zpyarrow.libr   r   r   r   libr$   Zpyarrowr   Zpyarrow._featherr   r	   r*   r8   r9   rD   r   r   r   r   r   <module>   s    D
T
 