o
    (ifL                     @   s  d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZ dd	lmZ dd
lmZmZmZ ddlm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z' ddl(m)Z)m*Z* er|ddl+m,Z, e%-e.Z/e0dZ1dZ2dddZ3e	G dd dZ4e'			dFdee4 dee5 de5de5dee5 dee5 deee5e5f  deee6 ee6 f fddZ7G dd  d eZ8G d!d" d"eZ9			dFd#d$d%edee5 deee5e5f  dee5 ddfd&d'Z:d(e6fd)d*Z;d+e6fd,d-Z<d.e6fd/d0Z=d#d$d1e5ddfd2d3Z>d#d$d4ed5e?d1e5ddf
d6d7Z@d4ed8e4d5e?dee5 fd9d:ZAd;ee d<e5de9fd=d>ZBd#d$d?ee5 d5e?dee fd@dAZCd#d$d?ee5 d5e?dee fdBdCZDG dDdE dEeZEdS )Gz.Git LFS related type definitions and utilities    N)AbstractContextManager)	dataclass)ceil)getsize)Path)TYPE_CHECKINGBinaryIODictIterableListOptionalTuple	TypedDict)unquote)ENDPOINTHF_HUB_ENABLE_HF_TRANSFERREPO_TYPES_URL_PREFIXES   )build_hf_headersfix_hf_endpoint_in_urlget_sessionhf_raise_for_statushttp_backoffloggingtqdmvalidate_hf_hub_args)sha256sha_fileobj)CommitOperationAddz^[0-9a-f]{40}$zlfs-multipart-uploadzapplication/vnd.git-lfs+json)AcceptzContent-Typec                   @   s`   e Zd ZU dZeed< eed< eed< edefddZ	edefd	d
Z
edefddZdS )
UploadInfoad  
    Dataclass holding required information to determine whether a blob
    should be uploaded to the hub using the LFS protocol or the regular protocol

    Args:
        sha256 (`bytes`):
            SHA256 hash of the blob
        size (`int`):
            Size in bytes of the blob
        sample (`bytes`):
            First 512 bytes of the blob
    r   sizesamplepathc                 C   s\   t |}t|d}|dd d }t|}W d    n1 s"w   Y  | |||dS )Nrb   r!   r   r"   )r   ioopenpeekr   )clsr#   r!   filer"   sha r-   L/var/www/html/corbot_env/lib/python3.10/site-packages/huggingface_hub/lfs.py	from_pathO   s   
zUploadInfo.from_pathdatac                 C   s&   t | }| t||d d |dS )Nr%   )r!   r"   r   )r   digestlen)r*   r0   r,   r-   r-   r.   
from_bytesW   s   zUploadInfo.from_bytesfileobjc                 C   sD   | d}|dtj t|}| }|dtj | |||dS )Nr%   r   r&   )readseekr'   SEEK_SETr   tell)r*   r4   r"   r,   r!   r-   r-   r.   from_fileobj\   s   
zUploadInfo.from_fileobjN)__name__
__module____qualname____doc__bytes__annotations__intclassmethodstrr/   r3   r   r9   r-   r-   r-   r.   r    <   s   
 r    upload_infostoken	repo_typerepo_idrevisionendpointheadersreturnc                 C   s   |dur|nt }d}|tv rt| }| d| | d}dddgdd	 | D d
d}	|dur6dt|i|	d< i tt|d|pAi }t j|||	d}
t|
 |
 }|	dd}t
|tsctddd	 |D dd	 |D fS )aA  
    Requests the LFS batch endpoint to retrieve upload instructions

    Learn more: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md

    Args:
        upload_infos (`Iterable` of `UploadInfo`):
            `UploadInfo` for the files that are being uploaded, typically obtained
            from `CommitOperationAdd.upload_info`
        repo_type (`str`):
            Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
        repo_id (`str`):
            A namespace (user or an organization) and a repo name separated
            by a `/`.
        revision (`str`, *optional*):
            The git revision to upload to.
        headers (`dict`, *optional*):
            Additional headers to include in the request

    Returns:
        `LfsBatchInfo`: 2-tuple:
            - First element is the list of upload instructions from the server
            - Second element is an list of errors, if any

    Raises:
        `ValueError`: If an argument is invalid or the server response is malformed

        `HTTPError`: If the server returned an error
    N /z.git/info/lfs/objects/batchuploadbasic	multipartc                 S   s   g | ]}|j  |jd qS )oidr!   )r   hexr!   ).0rM   r-   r-   r.   
<listcomp>   s    z'post_lfs_batch_info.<locals>.<listcomp>r   )	operation	transfersobjects	hash_algonameref)rD   rI   jsonrW   zMalformed response from serverc                 S   s   g | ]
}d |vrt |qS error)_validate_batch_actionsrS   objr-   r-   r.   rT          c                 S   s   g | ]
}d |v rt |qS r]   )_validate_batch_errorr`   r-   r-   r.   rT      rb   )r   r   r   LFS_HEADERSr   r   postr   r\   get
isinstancelist
ValueError)rC   rD   rE   rF   rG   rH   rI   
url_prefix	batch_urlpayloadresp
batch_inforW   r-   r-   r.   post_lfs_batch_infof   s<   '
ro   c                   @   s   e Zd ZU eed< eed< dS )PayloadPartT
partNumberetagN)r:   r;   r<   r@   r?   rB   r-   r-   r-   r.   rp      s   
 rp   c                   @   s&   e Zd ZU dZeed< ee ed< dS )CompletionPayloadTz?Payload that will be sent to the Hub when uploading multi-part.rQ   partsN)r:   r;   r<   r=   rB   r?   r   rp   r-   r-   r-   r.   rs      s   
 rs   rU   r   lfs_batch_actionc              	   C   sH  t | |d}|du rtd| j d dS |d d }t| |d d}|dur2t| |di }|d}	t|d	 |d
}
|	durjzt|	}	W n tt	fy`   td|	 dw t
| ||	|
d nt| |
d |durt| t|d	 |}t j|t||d| jj | jjdd}t| t| j d dS )a  
    Handles uploading a given object to the Hub with the LFS protocol.

    Can be a No-op if the content of the file is already present on the hub large file storage.

    Args:
        operation (`CommitOperationAdd`):
            The add operation triggering this upload.
        lfs_batch_action (`dict`):
            Upload instructions from the LFS batch endpoint for this object. See [`~utils.lfs.post_lfs_batch_info`] for
            more details.
        headers (`dict`, *optional*):
            Headers to include in the request, including authentication and user agent headers.

    Raises:
        - `ValueError` if `lfs_batch_action` is improperly formatted
        - `HTTPError` if the upload resulted in an error
    actionsNzContent of file z. is already present upstream - skipping uploadrM   verifyheader
chunk_sizehref)rH   zTMalformed response from LFS batch endpoint: `chunk_size` should be an integer. Got 'z'.)rU   rx   ry   
upload_url)rU   r{   )rD   rI   rP   r[   z: Upload successful)r_   rf   loggerdebugpath_in_repo_validate_lfs_actionr   r@   ri   	TypeError_upload_multi_part_upload_single_partr   re   r   upload_infor   rR   r!   r   )rU   ru   rD   rI   rH   rv   upload_actionverify_actionrx   ry   r{   
verify_urlverify_respr-   r-   r.   
lfs_upload   sB   



r   
lfs_actionc                 C   s:   t | dtr| ddu st | dtstd| S ).validates response from the LFS batch endpointrz   rx   Nz"lfs_action is improperly formatted)rg   rf   rB   dictri   )r   r-   r-   r.   r     s
   r   lfs_batch_actionsc                 C   sp   t | dtrt | dtstd| di d}| di d}|dur.t| |dur6t| | S )r   rQ   r!   z)lfs_batch_actions is improperly formattedrv   rM   rw   N)rg   rf   rB   r@   ri   r   )r   r   r   r-   r-   r.   r_     s    r_   lfs_batch_errorc                 C   sh   t | dtrt | dtstd| d}t |tr.t |dtr.t |dts2td| S )r   rQ   r!   z'lfs_batch_error is improperly formattedr^   messagecode)rg   rf   rB   r@   ri   r   )r   
error_infor-   r-   r.   rc     s    
rc   r{   c                 C   sJ   | j dd}td||dd}t| W d   dS 1 sw   Y  dS )aZ  
    Uploads `fileobj` as a single PUT HTTP request (basic LFS transfer protocol)

    Args:
        upload_url (`str`):
            The URL to PUT the file to.
        fileobj:
            The file-like object holding the data to upload.

    Returns: `requests.Response`

    Raises: `requests.HTTPError` if the upload resulted in an error
    T	with_tqdmPUTi  i  i  i  r0   retry_on_status_codesN)as_filer   r   )rU   r{   r4   responser-   r-   r.   r   *  s   
"r   rx   ry   c                 C   s   t || j|d}t}trt| jtst| jtstd d}|r(t	| ||dnt
| ||d}t j|t|| jj td}t| dS )z@
    Uploads file using HF multipart LFS transfer protocol.
    )rx   r   ry   zlhf_transfer is enabled but does not support uploading from bytes or BinaryIO, falling back to regular uploadF)rU   sorted_parts_urlsry   )r\   rI   N)_get_sorted_parts_urlsr   r   rg   path_or_fileobjrB   r   warningswarn_upload_parts_hf_transfer_upload_parts_iterativelyr   re   _get_completion_payloadr   rR   rd   r   )rU   rx   ry   r{   r   use_hf_transferresponse_headerscompletion_resr-   r-   r.   r   >  s,   

r   r   c                 C   sN   dd t dd |  D dd dD }t|}|t|j| kr%td|S )Nc                 S   s   g | ]\}}|qS r-   r-   )rS   _r{   r-   r-   r.   rT   b  s    z*_get_sorted_parts_urls.<locals>.<listcomp>c                 S   s2   g | ]\}}|  rt|d krt|d|fqS )r   
   )isdigitr2   r@   )rS   part_numr{   r-   r-   r.   rT   e  s    c                 S   s   | d S Nr   r-   )tr-   r-   r.   <lambda>j  s    z(_get_sorted_parts_urls.<locals>.<lambda>)keyz0Invalid server response to upload large LFS file)sorteditemsr2   r   r!   ri   )rx   r   ry   sorted_part_upload_urls	num_partsr-   r-   r.   r   a  s   r   r   rQ   c                 C   sf   g }t | D ]'\}}|d}|d u s|dkr#td| d|d  ||d |d q||dS )Nrr   rK   zInvalid etag (`z`) returned for part r   )rq   rr   )rQ   rt   )	enumeraterf   ri   append)r   rQ   rt   part_numberrx   rr   r-   r-   r.   r   s  s   

r   r   c           	   
   C   s   g }| j dd=}t|D ]/\}}t||| |d}td||dd}t| ||j W d    n1 s7w   Y  qW d    |S 1 sHw   Y  |S )NTr   )	seek_from
read_limitr   r   r   )r   r   SliceFileObjr   r   r   rI   )	rU   r   ry   rI   r4   part_idxpart_upload_urlfileobj_slicepart_upload_resr-   r-   r.   r     s*   
r   c                 C   s(  zddl m} W n ty   tdw dt|jv }|s#td | j	j
}| j}t|dkr9d|dd   }t tjkrBd	nd }td
d	|d||dd;}z|d| j||dddd|rcd|jini }	W n tyy }
 ztd|
d }
~
ww |s|| |	W  d    S 1 sw   Y  d S )Nr   )multipart_uploadzFast uploading using 'hf_transfer' is enabled (HF_HUB_ENABLE_HF_TRANSFER=1) but 'hf_transfer' package is not available in your environment. Try `pip install hf_transfer`.callbackzYou are using an outdated version of `hf_transfer`. Consider upgrading to latest version to enable progress bars using `pip install -U hf_transfer`.(   u   (…)iTBzhuggingface_hub.lfs_upload)unit
unit_scaletotalinitialdescdisablerY            )	file_path
parts_urlsry   	max_filesparallel_failuresmax_retriesz~An error occurred while uploading using `hf_transfer`. Consider disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling.r-   )hf_transferr   ImportErrorri   inspect	signature
parametersr   r   r   r!   r~   r2   r|   getEffectiveLevelr   NOTSETr   r   update	ExceptionRuntimeError)rU   r   ry   r   supports_callbackr   r   r   progressoutputer-   r-   r.   r     sd   
	
$r   c                   @   sx   e Zd ZdZdededefddZdd Zd	d
 ZddefddZ	defddZ
ejfdededefddZdd ZdS )r   a  
    Utility context manager to read a *slice* of a seekable file-like object as a seekable, file-like object.

    This is NOT thread safe

    Inspired by stackoverflow.com/a/29838711/593036

    Credits to @julien-c

    Args:
        fileobj (`BinaryIO`):
            A file-like object to slice. MUST implement `tell()` and `seek()` (and `read()` of course).
            `fileobj` will be reset to its original position when exiting the context manager.
        seek_from (`int`):
            The start of the slice (offset from position 0 in bytes).
        read_limit (`int`):
            The maximum number of bytes to read from the slice.

    Attributes:
        previous_position (`int`):
            The previous position

    Examples:

    Reading 200 bytes with an offset of 128 bytes from a file (ie bytes 128 to 327):
    ```python
    >>> with open("path/to/file", "rb") as file:
    ...     with SliceFileObj(file, seek_from=128, read_limit=200) as fslice:
    ...         fslice.read(...)
    ```

    Reading a file in chunks of 512 bytes
    ```python
    >>> import os
    >>> chunk_size = 512
    >>> file_size = os.getsize("path/to/file")
    >>> with open("path/to/file", "rb") as file:
    ...     for chunk_idx in range(ceil(file_size / chunk_size)):
    ...         with SliceFileObj(file, seek_from=chunk_idx * chunk_size, read_limit=chunk_size) as fslice:
    ...             chunk = fslice.read(...)

    ```
    r4   r   r   c                 C   s   || _ || _|| _d S N)r4   r   r   )selfr4   r   r   r-   r-   r.   __init__  s   
zSliceFileObj.__init__c                 C   sF   | j  | _| j dtj}t| j|| j | _	| j | jt
j | S r   )r4   r8   _previous_positionr6   osSEEK_ENDminr   r   _lenr'   r7   )r   end_of_streamr-   r-   r.   	__enter__   s
   zSliceFileObj.__enter__c                 C   s   | j | jtj d S r   )r4   r6   r   r'   r7   )r   exc_type	exc_value	tracebackr-   r-   r.   __exit__  s   zSliceFileObj.__exit__nc                 C   sH   |   }|| jkrdS | j| }| j|dk r|}|S t||}|S )N    r   )r8   r   r4   r5   r   )r   r   posremaining_amountr0   r-   r-   r.   r5     s   

zSliceFileObj.readrJ   c                 C   s   | j  | j S r   )r4   r8   r   r   r-   r-   r.   r8     s   zSliceFileObj.telloffsetwhencec                 C   s   | j }|| j }|tjtjfv r)|tjkr|| n|| }t|t||}tj}n|tjkr@| j	 }t|| t||| }nt
d| d| j||| j  S )Nzwhence value z is not supported)r   r   r   r7   r   maxr   SEEK_CURr4   r8   ri   r6   )r   r   r   startendcur_posr-   r-   r.   r6     s   


zSliceFileObj.seekc                 c   s    | j ddV  d S )Ni  @ )r   )r5   r   r-   r-   r.   __iter__$  s   zSliceFileObj.__iter__N)r   )r:   r;   r<   r=   r   r@   r   r   r   r5   r8   r   r7   r6   r   r-   r-   r-   r.   r     s    ,r   )NNN)Fr=   r   r'   r   rer   
contextlibr   dataclassesr   mathr   os.pathr   pathlibr   typingr   r   r	   r
   r   r   r   r   urllib.parser   huggingface_hub.constantsr   r   r   utilsr   r   r   r   r   r   r   r   	utils.shar   r   _commit_apir   
get_loggerr:   r|   compile	OID_REGEXLFS_MULTIPART_UPLOAD_COMMANDrd   r    rB   r   ro   rp   rs   r   r   r_   rc   r   r@   r   r   r   r   r   r   r-   r-   r-   r.   <module>   s   ((


)M

D
#

8