o
    'ZhJ                     @  sz   d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZdddZ	dd	d
Z
dddZdddZdddZdS )    )annotationsNblobpathstrreturnbytesc              
   C  s   |  ds<|  ds<zdd l}W n ty! } ztd|d }~ww || d}| W  d    S 1 s7w   Y  t| }|  |jS )Nzhttp://zhttps://r   Oblobfile is not installed. Please install it by running `pip install blobfile`.rb)	
startswithblobfileImportErrorBlobFilereadrequestsgetraise_for_statuscontent)r   r
   efresp r   D/var/www/html/lang_env/lib/python3.10/site-packages/tiktoken/load.py	read_file   s"    
r   c                 C  sD  d}dt jv rt jd }ndt jv rt jd }nt jt d}d}|dkr+t| S t| 	 
 }t j||}t j|rZt|d}| W  d    S 1 sUw   Y  t| }z6t j|dd |d	 tt  d
 }t|d}|| W d    n1 sw   Y  t || W |S  ty   |r Y |S w )NTZTIKTOKEN_CACHE_DIRZDATA_GYM_CACHE_DIRzdata-gym-cacheF r   )exist_ok.z.tmpwb)osenvironpathjointempfile
gettempdirr   hashlibsha1encode	hexdigestexistsopenr   makedirsr   uuiduuid4writerenameOSError)r   Zuser_specified_cache	cache_dir	cache_key
cache_pathr   contentsZtmp_filenamer   r   r   read_file_cached   s<   

 r2   vocab_bpe_fileencoder_json_filedict[bytes, int]c                   s2  dd t dD }dd |D  d}t dD ]}||vr-|| | td| < |d7 }qt|dks6J t|  }dd |d	dd
 D }d fdddd t|D }t|}|D ]\}}	||||	 < |d7 }q`t	t|}
fdd|

 D }|dd  |dd  ||ksJ |S )Nc                 S  s(   g | ]}t | rt |d kr|qS ) )chrisprintable.0br   r   r   
<listcomp>F   s   ( z3data_gym_to_mergeable_bpe_ranks.<locals>.<listcomp>   c                 S  s   i | ]}t ||qS r   )r7   r9   r   r   r   
<dictcomp>H   s    z3data_gym_to_mergeable_bpe_ranks.<locals>.<dictcomp>r      c                 S  s   g | ]}t | qS r   )tuplesplit)r:   Z	merge_strr   r   r   r<   S   s    
valuer   r   r   c                   s   t  fdd| D S )Nc                 3  s    | ]} | V  qd S Nr   r9   data_gym_byte_to_byter   r   	<genexpr>V   s    zKdata_gym_to_mergeable_bpe_ranks.<locals>.decode_data_gym.<locals>.<genexpr>r   )rD   rF   r   r   decode_data_gymU   s   z8data_gym_to_mergeable_bpe_ranks.<locals>.decode_data_gymc                 S  s   i | ]
\}}t |g|qS r   rI   )r:   ir;   r   r   r   r>   Y   s    c                   s   i | ]	\}} ||qS r   r   )r:   kv)rJ   r   r   r>   d   s    s   <|endoftext|>s   <|startoftext|>)rD   r   r   r   )rangeappendr7   lenr2   decoderA   	enumeratejsonloadsitemspop)r3   r4   Zrank_to_intbytenr;   Zvocab_bpe_contentsZ
bpe_merges	bpe_ranksfirstsecondZencoder_jsonZencoder_json_loadedr   )rG   rJ   r   data_gym_to_mergeable_bpe_ranksB   s0   

r[   rX   tiktoken_bpe_fileNonec              
   C  s   zdd l }W n ty } ztd|d }~ww ||d*}t|  dd dD ]\}}|t|d t|	  d  q)W d    d S 1 sKw   Y  d S )	Nr   r   r   c                 S  s   | d S )Nr?   r   )xr   r   r   <lambda>u   s    z#dump_tiktoken_bpe.<locals>.<lambda>)key       
)
r
   r   r   sortedrU   r+   base64	b64encoder   r$   )rX   r\   r
   r   r   tokenrankr   r   r   dump_tiktoken_bpem   s   &"rh   c                 C  s$   t | }dd dd | D D S )Nc                 S  s    i | ]\}}t |t|qS r   )rd   	b64decodeint)r:   rf   rg   r   r   r   r>   |   s    z%load_tiktoken_bpe.<locals>.<dictcomp>c                 s  s    | ]	}|r|  V  qd S rE   )rA   )r:   liner   r   r   rH   ~   s    z$load_tiktoken_bpe.<locals>.<genexpr>)r2   
splitlines)r\   r1   r   r   r   load_tiktoken_bpey   s   rm   )r   r   r   r   )r3   r   r4   r   r   r5   )rX   r5   r\   r   r   r]   )r\   r   r   r5   )
__future__r   rd   r"   rS   r   r    r)   r   r   r2   r[   rh   rm   r   r   r   r   <module>   s    


%
+