o
    Zh3                     @  s   d dl mZ d dlZd dlmZmZmZmZmZm	Z	m
Z
mZmZmZ d dlZd dlZd dlmZmZ d dlmZ d dlmZ erNd dlmZmZ d dlmZ G d	d
 d
eZdS )    )annotationsN)
TYPE_CHECKINGAnyAsyncIteratorDict	GeneratorIteratorListMappingOptionalUnion)AsyncCallbackManagerForLLMRunCallbackManagerForLLMRun)LLM)GenerationChunk)RESTfulChatModelHandleRESTfulGenerateModelHandle)LlamaCppGenerateConfigc                      s   e Zd ZU dZdZded< ded< 	 ded< 	 ded	< 	 			d9d: fddZed;ddZed<ddZ	d=ddZ
		d>d?ddZ		d>d@d%d&Z		d>dAd(d)Z	dBdCd,d-ZedDd0d1Z		d>dEd4d5Z	dBdFd7d8Z  ZS )G
Xinferencea	  `Xinference` large-scale model inference service.

    To use, you should have the xinference library installed:

    .. code-block:: bash

       pip install "xinference[all]"

    If you're simply using the services provided by Xinference, you can utilize the xinference_client package:

    .. code-block:: bash

        pip install xinference_client

    Check out: https://github.com/xorbitsai/inference
    To run, you need to start a Xinference supervisor on one server and Xinference workers on the other servers

    Example:
        To start a local instance of Xinference, run

        .. code-block:: bash

           $ xinference

        You can also deploy Xinference in a distributed cluster. Here are the steps:

        Starting the supervisor:

        .. code-block:: bash

           $ xinference-supervisor

        Starting the worker:

        .. code-block:: bash

           $ xinference-worker

    Then, launch a model using command line interface (CLI).

    Example:

    .. code-block:: bash

       $ xinference launch -n orca -s 3 -q q4_0

    It will return a model UID. Then, you can use Xinference with LangChain.

    Example:

    .. code-block:: python

        from langchain_community.llms import Xinference

        llm = Xinference(
            server_url="http://0.0.0.0:9997",
            model_uid = {model_uid} # replace model_uid with the model UID return from launching the model
        )

        llm.invoke(
            prompt="Q: where can we visit in the capital of France? A:",
            generate_config={"max_tokens": 1024, "stream": True},
        )

    Example:

    .. code-block:: python

        from langchain_community.llms import Xinference
        from langchain.prompts import PromptTemplate

        llm = Xinference(
            server_url="http://0.0.0.0:9997",
            model_uid={model_uid}, # replace model_uid with the model UID return from launching the model
            stream=True
        )
        prompt = PromptTemplate(
            input=['country'],
            template="Q: where can we visit in the capital of {country}? A:"
        )
        chain = prompt | llm
        chain.stream(input={'country': 'France'})


    To view all the supported builtin models, run:

    .. code-block:: bash

        $ xinference list --all

    NzOptional[Any]clientOptional[str]
server_url	model_uidzDict[str, Any]model_kwargsapi_keyr   c                   s   zddl m} W n# ty+   zddlm} W n ty( } ztd|d }~ww Y nw |p/i }t jd
i |||d | jd u rFtd| jd u rOtdi | _	d| _
|   |d urh| j
rhd| | j	d	< |||| _d S )Nr   )RESTfulClientzCould not import RESTfulClient from xinference. Please install it with `pip install xinference` or `pip install xinference_client`.r   r   r   zPlease provide server URLzPlease provide the model UIDFzBearer Authorization )xinference.clientr   ImportErrorZxinference_clientsuper__init__r   
ValueErrorr   _headers_cluster_authed_check_cluster_authenticatedr   )selfr   r   r   r   r   e	__class__r   Z/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/llms/xinference.pyr"      s@   	


zXinference.__init__returnstrc                 C  s   dS )zReturn type of llm.Z
xinferencer   r'   r   r   r+   	_llm_type   s   zXinference._llm_typeMapping[str, Any]c                 C  s"   i d| j id| jid| jiS )zGet the identifying parameters.r   r   r   r   r.   r   r   r+   _identifying_params   s   zXinference._identifying_paramsNonec                 C  sd   | j  d}t|}|jdkrd| _d S |jdkr%td| d  | }t|d | _d S )Nz/v1/cluster/auth  F   z+Failed to get cluster information, detail: detailauth)r   requestsgetstatus_coder%   RuntimeErrorjsonbool)r'   urlresponseZresponse_datar   r   r+   r&      s   




z'Xinference._check_cluster_authenticatedpromptstopOptional[List[str]]run_manager"Optional[CallbackManagerForLLMRun]kwargsc           
      K  s   | j du r	td| j | j}|di }i | j|}|r#||d< |r>|dr>d}| j||||dD ]}||7 }q5|S |j||d}	|	d	 d
 d S )aq  Call the xinference model and return the output.

        Args:
            prompt: The prompt to use for generation.
            stop: Optional list of stop words to use when generating.
            generate_config: Optional dictionary for the configuration used for
                generation.

        Returns:
            The generated string by the model.
        NClient is not initialized!generate_configr@   stream )modelr?   rB   rF   r?   rF   choicesr   text)r   r#   	get_modelr   r8   r   _stream_generategenerate)
r'   r?   r@   rB   rD   rI   rF   Zcombined_text_outputtoken
completionr   r   r+   _call   s&   


zXinference._callrI   =Union['RESTfulGenerateModelHandle', 'RESTfulChatModelHandle']rF   "Optional['LlamaCppGenerateConfig']Generator[str, None, None]c                 c  s|    |j ||d}|D ]1}t|tr;|dg }|r;|d }t|tr;|dd}	|d}
|r8|j|	| j|
d |	V  q
dS )	a^  
        Args:
            prompt: The prompt to use for generation.
            model: The model used for generation.
            stop: Optional list of stop words to use when generating.
            generate_config: Optional dictionary for the configuration used for
                generation.

        Yields:
            A string token.
        rJ   rK   r   rL   rH   logprobs)rP   verbose	log_probsN)rO   
isinstancedictr8   on_llm_new_tokenrW   )r'   rI   r?   rB   rF   Zstreaming_responsechunkrK   choicerP   rX   r   r   r+   rN      s&   


zXinference._stream_generateIterator[GenerationChunk]c                 k  sh    | di }i | j|}|r||d< | ||D ]}|r1| |}|r.|j|j| jd |V  qd S NrF   r@   )rW   )r8   r   _create_generate_stream$_stream_response_to_generation_chunkr[   rL   rW   r'   r?   r@   rB   rD   rF   Zstream_respr\   r   r   r+   _stream  s    
zXinference._streamOptional[Dict[str, List[str]]]Iterator[str]c                 c  s:    | j d u r
td| j | j}|j||dE d H  d S )NrE   rJ   )r   r#   rM   r   rO   )r'   r?   rF   rI   r   r   r+   r`   .  s
   
z"Xinference._create_generate_streamstream_responser   c                 C  sz   d}t | tr9| dg }|r4|d }t |tr0|dd}t|t|dd|dddd	S td
t|dS td)z0Convert a stream response to a generation chunk.rH   rK   r   rL   finish_reasonNrV   )rg   rV   )rL   Zgeneration_infozchoice type error!)rL   zstream_response type error!)rY   rZ   r8   r   	TypeError)rf   rP   rK   r]   r   r   r+   ra   6  s"   




z/Xinference._stream_response_to_generation_chunk'Optional[AsyncCallbackManagerForLLMRun]AsyncIterator[GenerationChunk]c                 K sx   | di }i | j|}|r||d< | ||2 z3 d H W }|r8| |}|r5|j|j| jdI d H  |V  q6 d S r_   )r8   r   _acreate_generate_streamra   r[   rL   rW   rb   r   r   r+   _astreamQ  s    
zXinference._astreamAsyncIterator[str]c              
   C sh  | j |d}|d ur| D ]\}}|||< qt|o|d}t 4 I d H ~}|j| j d|d4 I d H X}|jdkrV|jdkrHt	d|j
}	td|j d	|	 |j2 z,3 d H W }
|sht|
V  qY|
d
}|
dr|tdd   }|sqYt|V  qY6 W d   I d H  n1 I d H sw   Y  W d   I d H  d S 1 I d H sw   Y  d S )N)rI   r?   rG   z/v1/completions)r=   r;   r4   r3   z)astream call failed with status code 404.z%astream call failed with status code z. Details: zutf-8s   data:)r   itemsr<   r8   aiohttpZClientSessionpostr   statusFileNotFoundErrorrL   r#   contentr;   loadsdecode
startswithlenstrip)r'   r?   rF   request_bodykeyvaluerG   sessionr>   Zoptional_detaillineZjson_strr   r   r+   rk   f  sH   






*.z#Xinference._acreate_generate_stream)NNN)r   r   r   r   r   r   r   r   )r,   r-   )r,   r0   )r,   r2   )NN)
r?   r-   r@   rA   rB   rC   rD   r   r,   r-   )
rI   rS   r?   r-   rB   rC   rF   rT   r,   rU   )
r?   r-   r@   rA   rB   rC   rD   r   r,   r^   )N)r?   r-   rF   rd   r,   re   )rf   r-   r,   r   )
r?   r-   r@   rA   rB   ri   rD   r   r,   rj   )r?   r-   rF   rd   r,   rm   )__name__
__module____qualname____doc__r   __annotations__r"   propertyr/   r1   r&   rR   rN   rc   r`   staticmethodra   rl   rk   __classcell__r   r   r)   r+   r      sH   
 \*
0&r   )
__future__r   r;   typingr   r   r   r   r   r   r	   r
   r   r   ro   r7   Zlangchain_core.callbacksr   r   Z#langchain_core.language_models.llmsr   Zlangchain_core.outputsr   r   r   r   Zxinference.model.llm.corer   r   r   r   r   r+   <module>   s    0