o
    ZhJ1                     @   s   d dl mZ d dlZd dlmZ d dlmZ d dlmZ G dd dZ	dZ
G dd	 d	Zdd
dZde_ddddddddddddddZde
e_dS )    )IntegralN)Table)_resolve_filesystem_and_pathc                   @   s  e Zd ZdZdd Zedd Zedd Zedd	 Zed
d Z	edd Z
edd Zedd Zedd Zedd Zedd Zedd Zedd Zedd Zedd Zed d! Zed"d# Zed$d% Zd-d'd(Zd-d)d*Zd-d+d,Zd&S ).ORCFilea  
    Reader interface for a single ORC file

    Parameters
    ----------
    source : str or pyarrow.NativeFile
        Readable source. For passing Python file objects or byte buffers,
        see pyarrow.io.PythonFileInterface or pyarrow.io.BufferReader.
    c                 C   s   t  | _| j| d S N)_orcZ	ORCReaderreaderopen)selfsource r   B/var/www/html/lang_env/lib/python3.10/site-packages/pyarrow/orc.py__init__&   s   
zORCFile.__init__c                 C   
   | j  S )z/The file metadata, as an arrow KeyValueMetadata)r   metadatar
   r   r   r   r   *      
zORCFile.metadatac                 C   r   )z#The file schema, as an arrow schema)r   schemar   r   r   r   r   /   r   zORCFile.schemac                 C   r   )zThe number of rows in the file)r   nrowsr   r   r   r   r   4   r   zORCFile.nrowsc                 C   r   )z!The number of stripes in the file)r   nstripesr   r   r   r   r   9   r   zORCFile.nstripesc                 C   r   )z4Format version of the ORC file, must be 0.11 or 0.12)r   file_versionr   r   r   r   r   >   r   zORCFile.file_versionc                 C   r   )z2Software instance and version that wrote this file)r   software_versionr   r   r   r   r   C   r   zORCFile.software_versionc                 C   r   )zCompression codec of the file)r   compressionr   r   r   r   r   H   r   zORCFile.compressionc                 C   r   )z?Number of bytes to buffer for the compression codec in the file)r   compression_sizer   r   r   r   r   M   r   zORCFile.compression_sizec                 C   r   )z{Name of the writer that wrote this file.
        If the writer is unknown then its Writer ID
        (a number) is returned)r   writerr   r   r   r   r   R   s   
zORCFile.writerc                 C   r   )zVersion of the writer)r   writer_versionr   r   r   r   r   Y   r   zORCFile.writer_versionc                 C   r   )zRNumber of rows per an entry in the row index or 0
        if there is no row index)r   row_index_strider   r   r   r   r   ^   s   
zORCFile.row_index_stridec                 C   r   )zNumber of stripe statistics)r   nstripe_statisticsr   r   r   r   r   d   r   zORCFile.nstripe_statisticsc                 C   r   )z/Length of the data stripes in the file in bytes)r   content_lengthr   r   r   r   r   i   r   zORCFile.content_lengthc                 C   r   )z<The number of compressed bytes in the file stripe statistics)r   stripe_statistics_lengthr   r   r   r   r   n   r   z ORCFile.stripe_statistics_lengthc                 C   r   )z1The number of compressed bytes in the file footer)r   file_footer_lengthr   r   r   r   r    s   r   zORCFile.file_footer_lengthc                 C   r   )z*The number of bytes in the file postscript)r   file_postscript_lengthr   r   r   r   r!   x   r   zORCFile.file_postscript_lengthc                 C   r   )zThe number of bytes in the file)r   file_lengthr   r   r   r   r"   }   r   zORCFile.file_lengthNc                 C   s   |d u rd S | j }g }|D ]1}t|tr;t|}d|  kr$t|k r1n n|| j}|| qtdt||f |  S |S )Nr   z/Column indices must be in 0 <= ind < %d, got %d)r   
isinstancer   intlennameappend
ValueError)r
   columnsr   namescolr   r   r   _select_names   s   


zORCFile._select_namesc                 C   s   |  |}| jj||dS )a  Read a single stripe from the file.

        Parameters
        ----------
        n : int
            The stripe index
        columns : list
            If not None, only these columns will be read from the stripe. A
            column name may be a prefix of a nested field, e.g. 'a' will select
            'a.b', 'a.c', and 'a.d.e'

        Returns
        -------
        pyarrow.RecordBatch
            Content of the stripe as a RecordBatch.
        r)   )r,   r   read_stripe)r
   nr)   r   r   r   r.      s   
zORCFile.read_stripec                 C   s   |  |}| jj|dS )a  Read the whole file.

        Parameters
        ----------
        columns : list
            If not None, only these columns will be read from the file. A
            column name may be a prefix of a nested field, e.g. 'a' will select
            'a.b', 'a.c', and 'a.d.e'. Output always follows the
            ordering of the file and not the `columns` list.

        Returns
        -------
        pyarrow.Table
            Content of the file as a Table.
        r-   )r,   r   read)r
   r)   r   r   r   r0      s   
zORCFile.readr   )__name__
__module____qualname____doc__r   propertyr   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r,   r.   r0   r   r   r   r   r      sP    



















r   a  file_version : {"0.11", "0.12"}, default "0.12"
    Determine which ORC file version to use.
    `Hive 0.11 / ORC v0 <https://orc.apache.org/specification/ORCv0/>`_
    is the older version
    while `Hive 0.12 / ORC v1 <https://orc.apache.org/specification/ORCv1/>`_
    is the newer one.
batch_size : int, default 1024
    Number of rows the ORC writer writes at a time.
stripe_size : int, default 64 * 1024 * 1024
    Size of each ORC stripe in bytes.
compression : string, default 'uncompressed'
    The compression codec.
    Valid values: {'UNCOMPRESSED', 'SNAPPY', 'ZLIB', 'LZ4', 'ZSTD'}
    Note that LZ0 is currently not supported.
compression_block_size : int, default 64 * 1024
    Size of each compression block in bytes.
compression_strategy : string, default 'speed'
    The compression strategy i.e. speed vs size reduction.
    Valid values: {'SPEED', 'COMPRESSION'}
row_index_stride : int, default 10000
    The row index stride i.e. the number of rows per
    an entry in the row index.
padding_tolerance : double, default 0.0
    The padding tolerance.
dictionary_key_size_threshold : double, default 0.0
    The dictionary key size threshold. 0 to disable dictionary encoding.
    1 to always enable dictionary encoding.
bloom_filter_columns : None, set-like or list-like, default None
    Columns that use the bloom filter.
bloom_filter_fpp : double, default 0.05
    Upper limit of the false-positive rate of the bloom filter.
c                   @   sd   e Zd ZdeZdZddddddd	d
d
dddddZdd Zdd Z	dd Z
dd Zdd ZdS )	ORCWritera  
Writer interface for a single ORC file

Parameters
----------
where : str or pyarrow.io.NativeFile
    Writable target. For passing Python file objects or byte buffers,
    see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
    or pyarrow.io.FixedSizeBufferWriter.
{}
F0.12      uncompressed   speed'          N皙?r   
batch_sizestripe_sizer   compression_block_sizecompression_strategyr   padding_tolerancedictionary_key_size_thresholdbloom_filter_columnsbloom_filter_fppc                C   s8   t  | _| jj|||||||||	|
||d d| _d S )Nr@   T)r   r6   r   r	   is_open)r
   wherer   rA   rB   r   rC   rD   r   rE   rF   rG   rH   r   r   r   r      s    

zORCWriter.__init__c                 C      |    d S r   closer   r   r   r   __del__     zORCWriter.__del__c                 C   s   | S r   r   r   r   r   r   	__enter__  s   zORCWriter.__enter__c                 O   rK   r   rL   )r
   argskwargsr   r   r   __exit__  rO   zORCWriter.__exit__c                 C   s   | j sJ | j| dS )a
  
        Write the table into an ORC file. The schema of the table must
        be equal to the schema used when opening the ORC file.

        Parameters
        ----------
        table : pyarrow.Table
            The table to be written into the ORC file
        N)rI   r   write)r
   tabler   r   r   rT     s   

zORCWriter.writec                 C   s   | j r| j  d| _ dS dS )z$
        Close the ORC file
        FN)rI   r   rM   r   r   r   r   rM   #  s   

zORCWriter.close)r1   r2   r3   format_orc_writer_args_docsr4   rI   r   rN   rP   rS   rT   rM   r   r   r   r   r6      s,    
r6   c                 C   s^   t | |\}}|d ur||} |d ur%t|dkr%t|  |}|S t| j|d}|S )Nr   r-   )r   Zopen_input_filer%   r   r0   select)r   r)   
filesystempathresultr   r   r   
read_table,  s   
r\   a  
Read a Table from an ORC file.

Parameters
----------
source : str, pyarrow.NativeFile, or file-like object
    If a string passed, can be a single file name. For file-like objects,
    only read a single file. Use pyarrow.BufferReader to read a file
    contained in a bytes or buffer-like object.
columns : list
    If not None, only these columns will be read from the file. A column
    name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
    'a.c', and 'a.d.e'. Output always follows the ordering of the file and
    not the `columns` list. If empty, no columns will be read. Note
    that the table will still have the correct num_rows set despite having
    no columns.
filesystem : FileSystem, default None
    If nothing passed, will be inferred based on path.
    Path will try to be found in the local on-disk filesystem otherwise
    it will be parsed as an URI to determine the filesystem.
r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   c                C   st   t |trtjdtdd || } }t|||||||||	|
||d}||  W d    d S 1 s3w   Y  d S )NzThe order of the arguments has changed. Pass as 'write_table(table, where)' instead. The old order will raise an error in the future.   )
stacklevelr@   )r#   r   warningswarnFutureWarningr6   rT   )rU   rJ   r   rA   rB   r   rC   rD   r   rE   rF   rG   rH   r   r   r   r   write_tableP  s.   

"rb   a]  
Write a table into an ORC file.

Parameters
----------
table : pyarrow.lib.Table
    The table to be written into the ORC file
where : str or pyarrow.io.NativeFile
    Writable target. For passing Python file objects or byte buffers,
    see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
    or pyarrow.io.FixedSizeBufferWriter.
{}
)NN)numbersr   r_   Zpyarrow.libr   Zpyarrow._orcr   Z
pyarrow.fsr   r   rW   r6   r\   r4   rb   rV   r   r   r   r   <module>   s4    $"
L$