o
    Zhv"                     @  s   d dl mZ d dlZd dlmZmZmZmZmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ dZeeZG d	d
 d
eZdS )    )annotationsN)AnyCallableIteratorListMappingOptional)CallbackManagerForLLMRun)LLM)GenerationChunk)
ConfigDictz mlx-community/quantized-gemma-2bc                   @  s   e Zd ZU dZeZded< 	 dZded< 	 dZded< 	 dZ	ded	< 	 dZ
d
ed< 	 dZded< 	 dZded< 	 eddZe				d%d&ddZed'ddZed(ddZ		d)d*d d!Z		d)d+d#d$ZdS ),MLXPipelinea  MLX Pipeline API.

    To use, you should have the ``mlx-lm`` python package installed.

    Example using from_model_id:
        .. code-block:: python

            from langchain_community.llms import MLXPipeline
            pipe = MLXPipeline.from_model_id(
                model_id="mlx-community/quantized-gemma-2b",
                pipeline_kwargs={"max_tokens": 10, "temp": 0.7},
            )
    Example passing model and tokenizer in directly:
        .. code-block:: python

            from langchain_community.llms import MLXPipeline
            from mlx_lm import load
            model_id="mlx-community/quantized-gemma-2b"
            model, tokenizer = load(model_id)
            pipe = MLXPipeline(model=model, tokenizer=tokenizer)
    strmodel_idNr   model	tokenizerOptional[dict]tokenizer_configOptional[str]adapter_fileFboollazypipeline_kwargsZforbid)extrakwargsreturnc              
   K  s   zddl m} W n ty   tdw |pi }|r%|||||d\}}	n	||||d\}}	|p1i }
| d|||	||||
d|S )	z5Construct the pipeline object from model_id and task.r   )loadTCould not import mlx_lm python package. Please install it with `pip install mlx_lm`.)Zadapter_pathr   )r   )r   r   r   r   r   r   r   N )mlx_lmr   ImportError)clsr   r   r   r   r   r   r   r   r   Z_pipeline_kwargsr   r   \/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/llms/mlx_pipeline.pyfrom_model_idR   s2   zMLXPipeline.from_model_idMapping[str, Any]c                 C  s   | j | j| j| j| jdS )zGet the identifying parameters.r   r   r   r   r   r%   selfr   r   r"   _identifying_paramsz   s   zMLXPipeline._identifying_paramsc                 C  s   dS )NZmlx_pipeliner   r&   r   r   r"   	_llm_type   s   zMLXPipeline._llm_typepromptstopOptional[List[str]]run_manager"Optional[CallbackManagerForLLMRun]c              
   K  s   zddl m} ddlm}m} W n ty   tdw |d| j}|dd}	|dd	}
|d
d}|dd }|dd }|dd }|dd}|dd}|dd}||	|||}|d ||}|| j| j	||
||||dS )Nr   )generatemake_logits_processorsmake_samplerr   r   temp        
max_tokensd   verboseF	formatterrepetition_penaltyrepetition_context_sizetop_p      ?min_pmin_tokens_to_keep   )r   r   r*   r5   r7   r8   samplerlogits_processors)
r   r/   mlx_lm.sample_utilsr1   r2   r    getr   r   r   )r'   r*   r+   r-   r   r/   r1   r2   r   r3   r5   r7   r8   r9   r:   r;   r=   r>   r@   rA   r   r   r"   _call   sH   zMLXPipeline._callIterator[GenerationChunk]c                 k  s   zdd l m} ddlm}m} ddlm} W n ty"   tdw |d| j	}	|	dd}
|	dd	}|	d
d }|	dd }|	dd}|	dd}|	dd}| j
j|dd}||d }| j
j}| j
j}|  ||
psd|||}|d ||}t||| j||dt|D ]5\\}}}d }|| |  |j}|rt|d}|r||j |V  ||ks|d ur||v r d S qd S )Nr   r0   )generate_stepr   r   r3   r4   r5   r6   r9   r:   r;   r<   r=   r>   r?   np)Zreturn_tensors)r*   r   r@   rA   )text)Zmlx.corecorerB   r1   r2   Zmlx_lm.utilsrF   r    rC   r   r   encodearrayeos_token_iddetokenizerresetzipr   rangeZ	add_tokenfinalizelast_segmentr   Zon_llm_new_tokenrH   )r'   r*   r+   r-   r   Zmxr1   r2   rF   r   r3   Zmax_new_tokensr9   r:   r;   r=   r>   Zprompt_tokensrL   rM   r@   rA   tokenZprobnrH   chunkr   r   r"   _stream   sj   


zMLXPipeline._stream)NNFN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r$   )r   r   )NN)
r*   r   r+   r,   r-   r.   r   r   r   r   )
r*   r   r+   r,   r-   r.   r   r   r   rE   )__name__
__module____qualname____doc__DEFAULT_MODEL_IDr   __annotations__r   r   r   r   r   r   r   Zmodel_configclassmethodr#   propertyr(   r)   rD   rV   r   r   r   r"   r      sF   
 '
4r   )
__future__r   loggingtypingr   r   r   r   r   r   Zlangchain_core.callbacksr	   Z#langchain_core.language_models.llmsr
   Zlangchain_core.outputsr   Zpydanticr   r[   	getLoggerrW   loggerr   r   r   r   r"   <module>   s     
