o
    TZhU!                     @   s@  d Z ddlZddlZddlZdd Zdd Zd Zdd	 e	d
D Zdd eD Zdd e
 D Zdd Zdd Zdd Zd:ddZd;ddZdd Zd:ddZd<ddZd ZeeZeed!Zd=d#d$Zd%d& Zd'd( Zd:d)d*Zd<d+d,Zd-d. Zd/d0 Zd:d1d2Zej dfd3d4Z!ddd5d6d7Z"d>d8d9Z#dS )?a  
Binary tensor encodings for PyTorch and NumPy.

This defines efficient binary encodings for tensors. The format is 8 byte
aligned and can be used directly for computations when transmitted, say,
via RDMA. The format is supported by WebDataset with the `.ten` filename
extension. It is also used by Tensorcom, Tensorcom RDMA, and can be used
for fast tensor storage with LMDB and in disk files (which can be memory
mapped)

Data is encoded as a series of chunks:

- magic number (int64)
- length in bytes (int64)
- bytes (multiple of 64 bytes long)

Arrays are a header chunk followed by a data chunk.
Header chunks have the following structure:

- dtype (int64)
- 8 byte array name
- ndim (int64)
- dim[0]
- dim[1]
- ...
    Nc                 C   s0   t | dr| jS t| ttfrt| S t| d)z#Determine the length of a in bytes.nbyteszcannot determine nbytes)hasattrr   
isinstance	bytearraybyteslen
ValueErrora r   c/var/www/html/lang_env/lib/python3.10/site-packages/datasets/packaged_modules/webdataset/_tenbin.pybytelen(   s
   

r   c                 C   s.   t | tttfr
| S t| dr| jS t| d)z)Return a the raw data corresponding to a.datazcannot return bytedata)r   r   r   
memoryviewr   r   r   r	   r   r   r   bytedata2   s
   

r   zl
float16 f2
float32 f4
float64 f8
int8 i1
int16 i2
int32 i4
int64 i8
uint8 u1
uint16 u2
unit32 u4
uint64 u8
c                 C   s   g | ]}|  qS r   )split.0xr   r   r   
<listcomp>K   s    r   
c                 C   s   i | ]	}|d  |d qS )r      r   r   r   r   r   
<dictcomp>L   s    r   c                 C   s   i | ]\}}||qS r   r   )r   kvr   r   r   r   M   s    c                 C   s:   | D ]}|j jtvrtd|s|j jdvrtdqdS )zCheck that the data has an acceptable type for tensor encoding.

    :param data: array
    :param allow64: allow 64 bit types
    zunsupported dataypte)Zfloat64Zint64Zuint64z664 bit datatypes not allowed unless explicitly enabledN)dtypenamelong_to_shortr   )r   Zallow64r
   r   r   r   check_acceptable_input_typeP   s   r   c                 C   s.   | ddt |    } | d} td| d S )zConvert a string to an int64.    ascii@qr   )r   encodestructunpack)sr   r   r   str64]   s   
r'   c                 C   s   t d| }|ddS )zConvert an int64 to a string.r"   r!   r   )r$   packdecodestrip)ibr   r   r   unstr64d   s   r-   c                 C   sd   |du s|du r
| S |du r| |fS t |ttfstdt||D ]\}}td| d| | S )zVerify the info strings.FNTz$required_infos must be tuple or listzactual info z doesn't match required info )r   tuplelistr   zip)r   infosZrequired_infosrequiredactualr   r   r   check_infosj   s   r4    c                 C   s   | j dkr	td| jt| j| j krtd| jjt	vr#tdt
t	| jj t
|t| jgt| j }ttj|ddS )z'Encode an array header as a byte array.
   ztoo many dimensionszmismatch between size and shapeunsupported array typei8r   )ndimr   r   npprodshapeitemsizer   r   r   r'   r   r/   r   array)r
   infoheaderr   r   r   encode_headerw   s   
*rB   c                 C   sr   t j| dd} t| d tvrtdt tt| d  }t| d }t| d }t| dd|  }|||fS )z)Decode a byte array into an array header.r8   r9   r   r7   r         )r;   
frombufferr-   short_to_longr   r   intr.   )hr   r@   Zrankr=   r   r   r   decode_header   s   
rI   c                 C   st   |du rdg}nt | t |krtd|  d| g }t| D ]\}}t|||t |  }||t|g7 }q |S )z?Given a list of arrays, encode them into a list of byte arrays.Nr5   zlength of list z must muatch length of infos )r   r   	enumeraterB   r   )lr1   resultr+   r
   rA   r   r   r   encode_list   s   rM   Fc           
      C   sz   g }g }t | ddd | ddd D ]#\}}t|\}}}tj||t|dj| }	||	g7 }||g7 }qt|||S )z5Given a list of byte arrays, decode them into arrays.NrC   r   )r   count)r0   rI   r;   rE   r<   Zreshaper4   )
rK   r1   rL   Zinfos0rA   r   r=   r   r@   r
   r   r   r   decode_list   s   &
rO   z~TenBin~r!   @   c                 C   s   || | d |  S )z$Round up to the next multiple of 64.r   r   )nr   r   r   r   roundup   s   rR   c                 C   s   t dd | D }t|}d}| D ]1}t|||d < |d7 }td|j|||d < |d7 }||||t| < |tt|7 }q|S )zKEncode a list of chunks into a single byte array, with lengths and magics..c                 s   s    | ]
}d t |j V  qdS )   N)rR   r   )r   r,   r   r   r   	<genexpr>   s    z encode_chunks.<locals>.<genexpr>r   r    r"   )sumr   magic_bytesr$   r(   r   r   rR   )rK   sizerL   offsetr,   r   r   r   encode_chunks   s   rY   c                 C   s   g }d}t | }||k rGt| ||d  krtd|d7 }td| ||d  d }|d7 }| |||  }|t|7 }|| ||k s|S )z*Decode a byte array into a list of chunks.r   r    zmagic bytes mismatchr"   )r   rV   r   r$   r%   rR   append)bufrL   rX   totalr   r,   r   r   r   decode_chunks   s   
	r]   c                 C   s"   t | ts	tdtt| |dS )z1Encode a list of arrays into a single byte array.zrequires listr1   )r   r/   r   rY   rM   )rK   r1   r   r   r   encode_buffer   s   
r_   c                 C   s   t t| |dS )z*Decode a byte array into a list of arrays.r^   )rO   r]   )r[   r1   r   r   r   decode_buffer   s   r`   c                 C   s\   t |}| t | td| | t| t|| }|dkr,| d|  dS dS )zBWrite a byte chunk to the stream with magics, length, and padding.r"   r       N)r   writerV   r$   r(   r   rR   )streamr[   r   paddingr   r   r   write_chunk   s   
re   c                 C   s|   |  d}|dkrdS |tkrtd|  d}td|d }|dk r(td|  |}t|| }|dkr<|  | |S )zARead a byte chunk from a stream with magics, length, and padding.r        Nzmagic number does not matchr"   r   znegative nbytes)readrV   r   r$   r%   rR   )rc   magicr   r   rd   r   r   r   
read_chunk   s   



ri   c                 C   s    t ||dD ]}t| | qdS )zEWrite a list of arrays to a stream, with magics, length, and padding.r^   N)rM   re   )rc   rK   r1   chunkr   r   r   rb      s   rb   c                 C   sV   g }t |D ]}t| }|du r nt| }|du rtd|||g7 }qt||dS )zFRead a list of arrays from a stream, with magics, length, and padding.Nzpremature EOFr^   )rangeri   r   rO   )rc   rQ   r1   chunks_rA   r   r   r   r   rg     s   rg   )r1   nocheckc                G   sT   |s|  dstdt| d}t|||d W d   dS 1 s#w   Y  dS )zBSave a list of arrays to a file, with magics, length, and padding..tenfile name should end in .tenwbr^   N)endswithr   openrb   )fnamer1   rn   argsrc   r   r   r   save  s
   "rv   c                 C   sP   |s|  dstdt| d}t||dW  d   S 1 s!w   Y  dS )zDRead a list of arrays from a file, with magics, length, and padding.ro   rp   rbr^   N)rr   r   rs   rg   )rt   r1   rn   rc   r   r   r   load  s
   
$rx   )N)r5   )F)rP   )FF)$__doc__r$   sysnumpyr;   r   r   r*   r   r   itemsrF   r   r'   r-   r4   rB   rI   rM   rO   Z	magic_strrh   r#   rV   rR   rY   r]   r_   r`   re   ri   rb   maxsizerg   rv   rx   r   r   r   r   <module>   sD   








