o
    Zh"                     @   sp   d dl Z d dlmZmZmZmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ dZdZd	ZG d
d de	ZdS )    N)AnyListMappingOptional)CallbackManagerForLLMRun)LLM)
ConfigDict)enforce_stop_tokenszgoogle/flan-t5-largetext2text-generation)r
   text-generationsummarizationc                   @   s  e Zd ZU dZdZeed< eZe	ed< 	 dZ
ee ed< 	 dZee ed< 	 eddZe						
	
	dde	de	dee dee	 dee dee dee dee dee dedefddZedee	ef fddZede	fddZ		dde	deee	  dee dede	f
ddZdS ) WeightOnlyQuantPipelinea  Weight only quantized model.

    To use, you should have the `intel-extension-for-transformers` packabge and
        `transformers` package installed.
    intel-extension-for-transformers:
        https://github.com/intel/intel-extension-for-transformers

    Example using from_model_id:
        .. code-block:: python

            from langchain_community.llms import WeightOnlyQuantPipeline
            from intel_extension_for_transformers.transformers import (
                WeightOnlyQuantConfig
            )
            config = WeightOnlyQuantConfig
            hf = WeightOnlyQuantPipeline.from_model_id(
                model_id="google/flan-t5-large",
                task="text2text-generation"
                pipeline_kwargs={"max_new_tokens": 10},
                quantization_config=config,
            )
    Example passing pipeline in directly:
        .. code-block:: python

            from langchain_community.llms import WeightOnlyQuantPipeline
            from intel_extension_for_transformers.transformers import (
                AutoModelForSeq2SeqLM
            )
            from intel_extension_for_transformers.transformers import (
                WeightOnlyQuantConfig
            )
            from transformers import AutoTokenizer, pipeline

            model_id = "google/flan-t5-large"
            tokenizer = AutoTokenizer.from_pretrained(model_id)
            config = WeightOnlyQuantConfig
            model = AutoModelForSeq2SeqLM.from_pretrained(
                model_id,
                quantization_config=config,
            )
            pipe = pipeline(
                "text-generation",
                model=model,
                tokenizer=tokenizer,
                max_new_tokens=10,
            )
            hf = WeightOnlyQuantPipeline(pipeline=pipe)
    Npipelinemodel_idmodel_kwargspipeline_kwargsZallow)extraFtaskdevice
device_mapload_in_4bitload_in_8bitquantization_configkwargsreturnc
              
   K   s  |durt |tr|dkrtdtjddu rtdzddlm}m} ddl	m
} dd	lm} dd
lm} W n tyD   tdw t |tr\|dkr\| sUtddt| }nt |trg|dk rgd}|du rq|du rqd}|pti }|j|fi |}z5|dkr|j|f|||	d|d|}n|dv r|j|f|||	d|d|}ntd| dt dW n ty } z	td| d|d}~ww d|v rdd | D }|pi }|d|||||d|}|jtvrtd|j dt d| d||||d|
S )z5Construct the pipeline object from model_id and task.Nr   z7`Device` and `device_map` cannot be set simultaneously!Ztorchz;Weight only quantization pipeline only support PyTorch now!r   )AutoModelForCausalLMAutoModelForSeq2SeqLM)is_ipex_available)AutoTokenizer)r   zCould not import transformers python package. Please install it with `pip install transformers` and `pip install intel-extension-for-transformers`.z)Don't find out Intel GPU on this machine!zxpu:cpur   F)r   r   r   Zuse_llm_runtimer   )r
   r   Got invalid task , currently only  are supportedzCould not load the z# model due to missing dependencies.trust_remote_codec                 S   s   i | ]\}}|d kr||qS )r$    ).0kvr%   r%   h/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/llms/weight_only_quantization.py
<dictcomp>   s    z9WeightOnlyQuantPipeline.from_model_id.<locals>.<dictcomp>)r   model	tokenizerr   r   )r   r   r   r   r%   )
isinstanceint
ValueError	importlibutil	find_specZ-intel_extension_for_transformers.transformersr   r   Z,intel_extension_for_transformers.utils.utilsr   Ztransformersr   r   ImportErrorstrZfrom_pretrainedVALID_TASKSitemsr   )clsr   r   r   r   r   r   r   r   r   r   r   r   r   r   Zhf_pipelineZ_model_kwargsr,   r+   eZ_pipeline_kwargsr   r%   r%   r)   from_model_idO   s   	



z%WeightOnlyQuantPipeline.from_model_idc                 C   s   | j | j| jdS )zGet the identifying parameters.r   r   r   r:   selfr%   r%   r)   _identifying_params   s   z+WeightOnlyQuantPipeline._identifying_paramsc                 C   s   dS )zReturn type of llm.Zweight_only_quantizationr%   r;   r%   r%   r)   	_llm_type   s   z!WeightOnlyQuantPipeline._llm_typepromptstoprun_managerc                 K   s   |  |}| j jdkr|d d t|d }n'| j jdkr%|d d }n| j jdkr2|d d }ntd| j j d	t d
|rFt||}|S )ab  Call the HuggingFace model and return the output.

        Args:
            prompt: The prompt to use for generation.
            stop: A list of strings to stop generation when encountered.

        Returns:
            The generated text.

        Example:
            .. code-block:: python

                from langchain_community.llms import WeightOnlyQuantPipeline
                llm = WeightOnlyQuantPipeline.from_model_id(
                    model_id="google/flan-t5-large",
                    task="text2text-generation",
                )
                llm.invoke("This is a prompt.")
        r   r   Zgenerated_textNr
   r   Zsummary_textr!   r"   r#   )r   r   lenr/   r5   r	   )r<   r?   r@   rA   r   responsetextr%   r%   r)   _call   s   

zWeightOnlyQuantPipeline._call)r   NNNFFN)NN)__name__
__module____qualname____doc__r   r   __annotations__DEFAULT_MODEL_IDr   r4   r   r   dictr   r   Zmodel_configclassmethodr.   boolr   r9   propertyr   r=   r>   r   r   rE   r%   r%   r%   r)   r      sx   
 1	
j
r   )r0   typingr   r   r   r   Z langchain_core.callbacks.managerr   Z#langchain_core.language_models.llmsr   Zpydanticr   Zlangchain_community.llms.utilsr	   rK   ZDEFAULT_TASKr5   r   r%   r%   r%   r)   <module>   s    