o
    Zh(                     @  s   d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ G d
d deZdS )    )annotationsN)StringIO)AnyDictIteratorListOptional)CallbackManagerForLLMRun)LLM)GenerationChunk)get_pydantic_field_names)
ConfigDictc                   @  s  e Zd ZU dZdZded< 	 dZded< 	 dZd	ed
< 	 dZded< 	 dZ	ded< 	 dZ
ded< 	 dZded< 	 dZded< 	 dZded< 	 dZded< 	 dZded< 	 dZded< 	 dZded< 	 dZded < 	 d!Zd	ed"< 	 d#Zded$< 	 d#Zded%< 	 dZded&< 	 d'Zded(< 	 d)Zded*< 	 ed+d,ZedGd.d/ZedHd1d2ZedId4d5Z	dJdKd:d;Z		dLdMd?d@Z 		dLdNdBdCZ!dOdEdFZ"dS )P	LlamafileaO  Llamafile lets you distribute and run large language models with a
    single file.

    To get started, see: https://github.com/Mozilla-Ocho/llamafile

    To use this class, you will need to first:

    1. Download a llamafile.
    2. Make the downloaded file executable: `chmod +x path/to/model.llamafile`
    3. Start the llamafile in server mode:

        `./path/to/model.llamafile --server --nobrowser`

    Example:
        .. code-block:: python

            from langchain_community.llms import Llamafile
            llm = Llamafile()
            llm.invoke("Tell me a joke.")
    zhttp://localhost:8080strbase_urlNzOptional[int]request_timeoutFbool	streamingintseedg?floattemperature(   top_kgffffff?top_pg?min_p	n_predictr   n_keepg      ?tfs_z	typical_pg?repeat_penalty@   repeat_last_nTpenalize_nlg        presence_penaltyfrequency_penaltymirostatg      @mirostat_taug?mirostat_etaZforbid)extrareturnc                 C  s   dS )NZ	llamafile )selfr,   r,   Y/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/llms/llamafile.py	_llm_typex   s   zLlamafile._llm_type	List[str]c                   s$   g d  fddt | jD }|S )N)r   cacheZcallback_manager	callbacksmetadatanamer   r   tagsverboseZcustom_get_token_idsc                   s   g | ]}| vr|qS r,   r,   ).0kZignore_keysr,   r.   
<listcomp>   s    z/Llamafile._param_fieldnames.<locals>.<listcomp>)r   	__class__)r-   attrsr,   r9   r.   _param_fieldnames|   s
   
zLlamafile._param_fieldnamesDict[str, Any]c                 C  s"   i }| j D ]	}t| |||< q|S N)r=   getattr)r-   params	fieldnamer,   r,   r.   _default_params   s   
zLlamafile._default_paramsstopOptional[List[str]]kwargsr   c                 K  sV   | j }| D ]\}}||v r|||< q|d ur"t|dkr"||d< | jr)d|d< |S )Nr   rD   Tstream)rC   itemslenr   )r-   rD   rF   rA   r8   vr,   r,   r.   _get_parameters   s   zLlamafile._get_parameterspromptrun_manager"Optional[CallbackManagerForLLMRun]c                 K  s   | j r2t "}| j|f||d|D ]}||j q| }W d   |S 1 s+w   Y  |S | jdd|i|}d|i|}	ztj| j	 dddi|	d| j
d	}
W n tjjyi   tjd
| j	 dw |
  d|
_|
 d }|S )a  Request prompt completion from the llamafile server and return the
        output.

        Args:
            prompt: The prompt to use for generation.
            stop: A list of strings to stop generation when encountered.
            run_manager:
            **kwargs: Any additional options to pass as part of the
            generation request.

        Returns:
            The string generated by the model.

        )rD   rM   NrD   rL   /completionContent-Typeapplication/jsonFurlheadersjsonrG   timeoutTCould not connect to Llamafile server. Please make sure that a server is running at .zutf-8contentr,   )r   r   _streamwritetextgetvaluerK   requestspostr   r   
exceptionsConnectionErrorraise_for_statusencodingrU   )r-   rL   rD   rM   rF   Zbuffchunkr\   rA   payloadresponser,   r,   r.   _call   sJ   




	zLlamafile._callIterator[GenerationChunk]c                 k  s    | j dd|i|}d|vrd|d< d|i|}ztj| j dddi|d| jd}W n tjjy@   tjd	| j d
w d|_|jddD ]}| 	|}	t
|	d}
|r_|j|
jd |
V  qJdS )a"  Yields results objects as they are generated in real time.

        It also calls the callback manager's on_llm_new_token event with
        similar parameters to the OpenAI LLM class method of the same name.

        Args:
            prompt: The prompts to pass into the model.
            stop: Optional list of stop words to use when generating.
            run_manager:
            **kwargs: Any additional options to pass as part of the
            generation request.

        Returns:
            A generator representing the stream of tokens being generated.

        Yields:
            Dictionary-like objects each containing a token

        Example:
        .. code-block:: python

            from langchain_community.llms import Llamafile
            llm = Llamafile(
                temperature = 0.0
            )
            for chunk in llm.stream("Ask 'Hi, how are you?' like a pirate:'",
                    stop=["'","
"]):
                result = chunk["choices"][0]
                print(result["text"], end='', flush=True)

        rD   rG   TrL   rO   rP   rQ   rR   rW   rX   utf8)decode_unicode)r\   )tokenNr,   )rK   r^   r_   r   r   r`   ra   rc   
iter_lines_get_chunk_contentr   Zon_llm_new_tokenr\   )r-   rL   rD   rM   rF   rA   re   rf   Z	raw_chunkrY   rd   r,   r,   r.   rZ      s:   &

	

zLlamafile._streamrd   c                 C  s*   | dr|d}t|}|d S |S )zWhen streaming is turned on, llamafile server returns lines like:

        'data: {"content":" They","multimodal":true,"slot_id":0,"stop":false}'

        Here, we convert this to a dict and return the value of the 'content'
        field
        zdata:zdata: rY   )
startswithlstriprU   loads)r-   rd   cleaneddatar,   r,   r.   rm   1  s
   
	

zLlamafile._get_chunk_content)r+   r   )r+   r0   )r+   r>   r?   )rD   rE   rF   r   r+   r>   )NN)
rL   r   rD   rE   rM   rN   rF   r   r+   r   )
rL   r   rD   rE   rM   rN   rF   r   r+   rh   )rd   r   r+   r   )#__name__
__module____qualname____doc__r   __annotations__r   r   r   r   r   r   r   r   r   r   r    r!   r#   r$   r%   r&   r'   r(   r)   r   Zmodel_configpropertyr/   r=   rC   rK   rg   rZ   rm   r,   r,   r,   r.   r      sx   
 ?Fr   )
__future__r   rU   ior   typingr   r   r   r   r   r^   Z langchain_core.callbacks.managerr	   Z#langchain_core.language_models.llmsr
   Zlangchain_core.outputsr   Zlangchain_core.utilsr   Zpydanticr   r   r,   r,   r,   r.   <module>   s    