o
    Zh`                     @   sp   d dl mZmZmZmZmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ G dd de
Zd	S )
    )AnyCallableDictIteratorListOptional)CallbackManagerForLLMRun)LLM)GenerationChunk)pre_init)Fieldc                   @   s  e Zd ZU dZdZeed< eed< dZeed< dZ	eed< dZ
eed< dZeed< dZeed	< eZeed
< eg Zee ed< 	 edZeed< 	 edZeed< 	 edZeed< 	 edZeee  ed< 	 edeeef deeef fddZedefddZdedefddZ		d#dedeee  dee  dedef
dd Z!		d#dedeee  dee  dede"e# f
d!d"Z$dS )$	ExLlamaV2a+  ExllamaV2 API.

    - working only with GPTQ models for now.
    - Lora models are not supported yet.

    To use, you should have the exllamav2 library installed, and provide the
    path to the Llama model as a named parameter to the constructor.
    Check out:

    Example:
        .. code-block:: python

            from langchain_community.llms import Exllamav2

            llm = Exllamav2(model_path="/path/to/llama/model")

    #TODO:
    - Add loras support
    - Add support for custom settings
    - Add support for custom stop sequences
    Nclient
model_pathexllama_cacheconfig	generator	tokenizersettingslogfuncstop_sequences   max_new_tokensT	streamingverbosedisallowed_tokensvaluesreturnc              
   C   s  zdd l }W n ty } ztd|d }~ww |j s!tdzddlm}m}m}m	} ddl
m}m}	 W n tyB   tdw |d }
|
sOdd	 |d
< |d
 }|d ra|d }||j ntd| }|d |_|  ||}||dd}|| ||}|d r|	|||}n||||}dd |d D |d< t|d|d  |d|d   |d}|r||| ||d< ||d< ||d< ||d< ||d< |S )Nr   z@Unable to import torch, please install with `pip install torch`.z/CUDA is not available. ExllamaV2 requires CUDA.)r   ExLlamaV2CacheExLlamaV2ConfigExLlamaV2Tokenizer)ExLlamaV2BaseGeneratorExLlamaV2StreamingGeneratorzCould not import exllamav2 library. Please install the exllamav2 library with (cuda 12.1 is required)example : !python -m pip install https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-linux_x86_64.whlr   c                  _   s   d S )N )argskwargsr#   r#   Y/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/llms/exllamav2.py<lambda>_   s    z0ExLlamaV2.validate_environment.<locals>.<lambda>r   r   z<settings is required. Custom settings are not supported yet.r   T)Zlazyr   c                 S   s   g | ]}|   qS r#   )striplower).0xr#   r#   r&   
<listcomp>z   s    z2ExLlamaV2.validate_environment.<locals>.<listcomp>r   zstop_sequences r   r   r   r   r   r   )torchImportErrorcudaZis_availableEnvironmentErrorZ	exllamav2r   r   r   r    Zexllamav2.generatorr!   r"   __dict__NotImplementedErrorZ	model_dirprepareZload_autosplitsetattrgetZdisallow_tokens)clsr   r-   er   r   r   r    r!   r"   r   r   r   r   modelr   r   r   Z
disallowedr#   r#   r&   validate_environment>   sh   
	


zExLlamaV2.validate_environmentc                 C   s   dS )zReturn type of llm.r   r#   )selfr#   r#   r&   	_llm_type   s   zExLlamaV2._llm_typetextc                 C   s   | j j|S )z-Get the number of tokens present in the text.)r   r   
num_tokens)r:   r<   r#   r#   r&   get_num_tokens   s   zExLlamaV2.get_num_tokenspromptstoprun_managerr%   c           	      K   s`   | j }| jrd}| j||||dD ]}|t|7 }q|S |j|| j| jd}|t|d  }|S )N )r?   r@   rA   r%   )r?   Zgen_settingsr=   )r   r   _streamstrZgenerate_simpler   r   len)	r:   r?   r@   rA   r%   r   Zcombined_text_outputchunkoutputr#   r#   r&   _call   s   
zExLlamaV2._callc           
      k   s    | j |}| j  | jg  | j|| j d}	 | j \}}}	|d7 }|r3|j|| j	d |V  |s=|| j
kr@	 d S q)Nr   T   )tokenr   )r   encoder   ZwarmupZset_stop_conditionsZbegin_streamr   streamZon_llm_new_tokenr   r   )
r:   r?   r@   rA   r%   Z	input_idsZgenerated_tokensrF   Zeos_r#   r#   r&   rC      s&   
zExLlamaV2._stream)NN)%__name__
__module____qualname____doc__r   r   __annotations__rD   r   r   r   r   r   printr   r   r   r   r   r   intr   boolr   r   r   r   r   r9   propertyr;   r>   r   rH   r   r
   rC   r#   r#   r#   r&   r   
   sf   
 $K


r   N)typingr   r   r   r   r   r   Zlangchain_core.callbacksr   Zlangchain_core.language_modelsr	   Zlangchain_core.outputsr
   Zlangchain_core.utilsr   Zpydanticr   r   r#   r#   r#   r&   <module>   s     