o
    #ifJ1                     @   s   d dl mZ d dlZd dlmZ d dlmZ d dlmZ G dd dZ	dZ
G dd	 d	Zdd
dZde_ddddddddddddddZde
e_dS )    )IntegralN)Table)_resolve_filesystem_and_pathc                   @   s  e Zd ZdZdd Zedd Zedd Zedd	 Zed
d Z	edd Z
edd Zedd Zedd Zedd Zedd Zedd Zedd Zedd Zedd Zed d! Zed"d# Zed$d% Zd-d'd(Zd-d)d*Zd-d+d,Zd&S ).ORCFilea  
    Reader interface for a single ORC file

    Parameters
    ----------
    source : str or pyarrow.NativeFile
        Readable source. For passing Python file objects or byte buffers,
        see pyarrow.io.PythonFileInterface or pyarrow.io.BufferReader.
    c                 C   s   t  | _| j| d S N)_orc	ORCReaderreaderopen)selfsource r   D/var/www/html/corbot_env/lib/python3.10/site-packages/pyarrow/orc.py__init__&   s   
zORCFile.__init__c                 C   
   | j  S )z/The file metadata, as an arrow KeyValueMetadata)r	   metadatar   r   r   r   r   *      
zORCFile.metadatac                 C   r   )z#The file schema, as an arrow schema)r	   schemar   r   r   r   r   /   r   zORCFile.schemac                 C   r   )zThe number of rows in the file)r	   nrowsr   r   r   r   r   4   r   zORCFile.nrowsc                 C   r   )z!The number of stripes in the file)r	   nstripesr   r   r   r   r   9   r   zORCFile.nstripesc                 C   r   )z4Format version of the ORC file, must be 0.11 or 0.12)r	   file_versionr   r   r   r   r   >   r   zORCFile.file_versionc                 C   r   )z2Software instance and version that wrote this file)r	   software_versionr   r   r   r   r   C   r   zORCFile.software_versionc                 C   r   )zCompression codec of the file)r	   compressionr   r   r   r   r   H   r   zORCFile.compressionc                 C   r   )z?Number of bytes to buffer for the compression codec in the file)r	   compression_sizer   r   r   r   r   M   r   zORCFile.compression_sizec                 C   r   )z{Name of the writer that wrote this file.
        If the writer is unknown then its Writer ID
        (a number) is returned)r	   writerr   r   r   r   r   R   s   
zORCFile.writerc                 C   r   )zVersion of the writer)r	   writer_versionr   r   r   r   r   Y   r   zORCFile.writer_versionc                 C   r   )zRNumber of rows per an entry in the row index or 0
        if there is no row index)r	   row_index_strider   r   r   r   r   ^   s   
zORCFile.row_index_stridec                 C   r   )zNumber of stripe statistics)r	   nstripe_statisticsr   r   r   r   r   d   r   zORCFile.nstripe_statisticsc                 C   r   )z/Length of the data stripes in the file in bytes)r	   content_lengthr   r   r   r   r   i   r   zORCFile.content_lengthc                 C   r   )z<The number of compressed bytes in the file stripe statistics)r	   stripe_statistics_lengthr   r   r   r   r    n   r   z ORCFile.stripe_statistics_lengthc                 C   r   )z1The number of compressed bytes in the file footer)r	   file_footer_lengthr   r   r   r   r!   s   r   zORCFile.file_footer_lengthc                 C   r   )z*The number of bytes in the file postscript)r	   file_postscript_lengthr   r   r   r   r"   x   r   zORCFile.file_postscript_lengthc                 C   r   )zThe number of bytes in the file)r	   file_lengthr   r   r   r   r#   }   r   zORCFile.file_lengthNc                 C   s   |d u rd S | j }g }|D ]1}t|tr;t|}d|  kr$t|k r1n n|| j}|| qtdt||f |  S |S )Nr   z/Column indices must be in 0 <= ind < %d, got %d)r   
isinstancer   intlennameappend
ValueError)r   columnsr   namescolr   r   r   _select_names   s   


zORCFile._select_namesc                 C   s   |  |}| jj||dS )a  Read a single stripe from the file.

        Parameters
        ----------
        n : int
            The stripe index
        columns : list
            If not None, only these columns will be read from the stripe. A
            column name may be a prefix of a nested field, e.g. 'a' will select
            'a.b', 'a.c', and 'a.d.e'

        Returns
        -------
        pyarrow.RecordBatch
            Content of the stripe as a RecordBatch.
        r*   )r-   r	   read_stripe)r   nr*   r   r   r   r/      s   
zORCFile.read_stripec                 C   s   |  |}| jj|dS )a  Read the whole file.

        Parameters
        ----------
        columns : list
            If not None, only these columns will be read from the file. A
            column name may be a prefix of a nested field, e.g. 'a' will select
            'a.b', 'a.c', and 'a.d.e'. Output always follows the
            ordering of the file and not the `columns` list.

        Returns
        -------
        pyarrow.Table
            Content of the file as a Table.
        r.   )r-   r	   read)r   r*   r   r   r   r1      s   
zORCFile.readr   )__name__
__module____qualname____doc__r   propertyr   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r-   r/   r1   r   r   r   r   r      sP    



















r   a  file_version : {"0.11", "0.12"}, default "0.12"
    Determine which ORC file version to use.
    `Hive 0.11 / ORC v0 <https://orc.apache.org/specification/ORCv0/>`_
    is the older version
    while `Hive 0.12 / ORC v1 <https://orc.apache.org/specification/ORCv1/>`_
    is the newer one.
batch_size : int, default 1024
    Number of rows the ORC writer writes at a time.
stripe_size : int, default 64 * 1024 * 1024
    Size of each ORC stripe in bytes.
compression : string, default 'uncompressed'
    The compression codec.
    Valid values: {'UNCOMPRESSED', 'SNAPPY', 'ZLIB', 'LZ4', 'ZSTD'}
    Note that LZ0 is currently not supported.
compression_block_size : int, default 64 * 1024
    Size of each compression block in bytes.
compression_strategy : string, default 'speed'
    The compression strategy i.e. speed vs size reduction.
    Valid values: {'SPEED', 'COMPRESSION'}
row_index_stride : int, default 10000
    The row index stride i.e. the number of rows per
    an entry in the row index.
padding_tolerance : double, default 0.0
    The padding tolerance.
dictionary_key_size_threshold : double, default 0.0
    The dictionary key size threshold. 0 to disable dictionary encoding.
    1 to always enable dictionary encoding.
bloom_filter_columns : None, set-like or list-like, default None
    Columns that use the bloom filter.
bloom_filter_fpp : double, default 0.05
    Upper limit of the false-positive rate of the bloom filter.
c                   @   sd   e Zd ZdeZdZddddddd	d
d
dddddZdd Zdd Z	dd Z
dd Zdd ZdS )	ORCWritera  
Writer interface for a single ORC file

Parameters
----------
where : str or pyarrow.io.NativeFile
    Writable target. For passing Python file objects or byte buffers,
    see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
    or pyarrow.io.FixedSizeBufferWriter.
{}
F0.12      uncompressed   speed'          N皙?r   
batch_sizestripe_sizer   compression_block_sizecompression_strategyr   padding_tolerancedictionary_key_size_thresholdbloom_filter_columnsbloom_filter_fppc                C   s8   t  | _| jj|||||||||	|
||d d| _d S )NrA   T)r   r7   r   r
   is_open)r   wherer   rB   rC   r   rD   rE   r   rF   rG   rH   rI   r   r   r   r      s    

zORCWriter.__init__c                 C      |    d S r   closer   r   r   r   __del__     zORCWriter.__del__c                 C   s   | S r   r   r   r   r   r   	__enter__  s   zORCWriter.__enter__c                 O   rL   r   rM   )r   argskwargsr   r   r   __exit__  rP   zORCWriter.__exit__c                 C   s   | j sJ | j| dS )a
  
        Write the table into an ORC file. The schema of the table must
        be equal to the schema used when opening the ORC file.

        Parameters
        ----------
        table : pyarrow.Table
            The table to be written into the ORC file
        N)rJ   r   write)r   tabler   r   r   rU     s   

zORCWriter.writec                 C   s   | j r| j  d| _ dS dS )z$
        Close the ORC file
        FN)rJ   r   rN   r   r   r   r   rN   #  s   

zORCWriter.close)r2   r3   r4   format_orc_writer_args_docsr5   rJ   r   rO   rQ   rT   rU   rN   r   r   r   r   r7      s,    
r7   c                 C   s^   t | |\}}|d ur||} |d ur%t|dkr%t|  |}|S t| j|d}|S )Nr   r.   )r   open_input_filer&   r   r1   select)r   r*   
filesystempathresultr   r   r   
read_table,  s   
r^   a  
Read a Table from an ORC file.

Parameters
----------
source : str, pyarrow.NativeFile, or file-like object
    If a string passed, can be a single file name. For file-like objects,
    only read a single file. Use pyarrow.BufferReader to read a file
    contained in a bytes or buffer-like object.
columns : list
    If not None, only these columns will be read from the file. A column
    name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
    'a.c', and 'a.d.e'. Output always follows the ordering of the file and
    not the `columns` list. If empty, no columns will be read. Note
    that the table will still have the correct num_rows set despite having
    no columns.
filesystem : FileSystem, default None
    If nothing passed, will be inferred based on path.
    Path will try to be found in the local on-disk filesystem otherwise
    it will be parsed as an URI to determine the filesystem.
r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   c                C   st   t |trtjdtdd || } }t|||||||||	|
||d}||  W d    d S 1 s3w   Y  d S )NzThe order of the arguments has changed. Pass as 'write_table(table, where)' instead. The old order will raise an error in the future.   )
stacklevelrA   )r$   r   warningswarnFutureWarningr7   rU   )rV   rK   r   rB   rC   r   rD   rE   r   rF   rG   rH   rI   r   r   r   r   write_tableP  s.   

"rd   a]  
Write a table into an ORC file.

Parameters
----------
table : pyarrow.lib.Table
    The table to be written into the ORC file
where : str or pyarrow.io.NativeFile
    Writable target. For passing Python file objects or byte buffers,
    see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
    or pyarrow.io.FixedSizeBufferWriter.
{}
)NN)numbersr   ra   pyarrow.libr   pyarrow._orcr   
pyarrow.fsr   r   rX   r7   r^   r5   rd   rW   r   r   r   r   <module>   s4    $"
L$