o
    Zhg$                     @   s   d dl mZ d dlmZmZmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZ G dd	 d	eeZG d
d deZG dd de
ZdS )    )Enum)AnyIteratorListOptional)CallbackManagerForLLMRun)LLM)GenerationChunk)	BaseModel
ConfigDict)enforce_stop_tokensc                   @   s   e Zd ZdZdZdZdS )Devicez,The device to use for inference, cuda or cpucudacpuN)__name__
__module____qualname____doc__r   r    r   r   ]/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/llms/titan_takeoff.pyr      s    r   c                   @   sp   e Zd ZU dZeddZeed< 	 ej	Z
eed< 	 dZeed< 	 dZee ed	< 	 d
Zeed< 	 dZeed< dS )ReaderConfigzAConfiguration for the reader to be deployed in Titan Takeoff API.r   )Zprotected_namespacesZ
model_namedeviceprimaryconsumer_groupNtensor_paralleli   max_seq_length   max_batch_size)r   r   r   r   r   Zmodel_configstr__annotations__r   r   r   r   r   r   intr   r   r   r   r   r   r      s"   
 r   c                       s  e Zd ZU dZdZeed< 	 dZeed< 	 dZ	eed< 	 dZ
eed	< 	 d
Zeed< 	 ddddg fdededed	edee f
 fddZedefddZ	
	
ddedeee  dee dedef
ddZ	
	
ddedeee  dee dedee f
ddZ  ZS )TitanTakeoffa  Titan Takeoff API LLMs.

    Titan Takeoff is a wrapper to interface with Takeoff Inference API for
    generative text to text language models.

    You can use this wrapper to send requests to a generative language model
    and to deploy readers with Takeoff.

    Examples:
        This is an example how to deploy a generative language model and send
        requests.

        .. code-block:: python
            # Import the TitanTakeoff class from community package
            import time
            from langchain_community.llms import TitanTakeoff

            # Specify the embedding reader you'd like to deploy
            reader_1 = {
                "model_name": "TheBloke/Llama-2-7b-Chat-AWQ",
                "device": "cuda",
                "tensor_parallel": 1,
                "consumer_group": "llama"
            }

            # For every reader you pass into models arg Takeoff will spin
            # up a reader according to the specs you provide. If you don't
            # specify the arg no models are spun up and it assumes you have
            # already done this separately.
            llm = TitanTakeoff(models=[reader_1])

            # Wait for the reader to be deployed, time needed depends on the
            # model size and your internet speed
            time.sleep(60)

            # Returns the query, ie a List[float], sent to `llama` consumer group
            # where we just spun up the Llama 7B model
            print(embed.invoke(
                "Where can I see football?", consumer_group="llama"
            ))

            # You can also send generation parameters to the model, any of the
            # following can be passed in as kwargs:
            # https://docs.titanml.co/docs/next/apis/Takeoff%20inference_REST_API/generate#request
            # for instance:
            print(embed.invoke(
                "Where can I see football?", consumer_group="llama", max_new_tokens=100
            ))
    zhttp://localhostbase_urli  porti  	mgmt_portF	streamingNclientmodelsc                    sl   t  j||||d zddlm} W n ty   tdw || j| j| jd| _|D ]}| j	| q+dS )a  Initialize the Titan Takeoff language wrapper.

        Args:
            base_url (str, optional): The base URL where the Takeoff
                Inference Server is listening. Defaults to `http://localhost`.
            port (int, optional): What port is Takeoff Inference API
                listening on. Defaults to 3000.
            mgmt_port (int, optional): What port is Takeoff Management API
                listening on. Defaults to 3001.
            streaming (bool, optional): Whether you want to by default use the
                generate_stream endpoint over generate to stream responses.
                Defaults to False. In reality, this is not significantly different
                as the streamed response is buffered and returned similar to the
                non-streamed response, but the run manager is applied per token
                generated.
            models (List[ReaderConfig], optional): Any readers you'd like to
                spin up on. Defaults to [].

        Raises:
            ImportError: If you haven't installed takeoff-client, you will
            get an ImportError. To remedy run `pip install 'takeoff-client==0.4.0'`
        )r"   r#   r$   r%   r   )TakeoffClientzjtakeoff-client is required for TitanTakeoff. Please install it with `pip install 'takeoff-client>=0.4.0'`.)r#   r$   N)
super__init__Ztakeoff_clientr(   ImportErrorr"   r#   r$   r&   Zcreate_reader)selfr"   r#   r$   r%   r'   r(   model	__class__r   r   r*   o   s    zTitanTakeoff.__init__returnc                 C   s   dS )zReturn type of llm.Ztitan_takeoffr   )r,   r   r   r   	_llm_type   s   zTitanTakeoff._llm_typepromptstoprun_managerkwargsc           	      K   s`   | j rd}| j|||dD ]}||j7 }q|S | jj|fi |}|d }|dur.t||}|S )a  Call out to Titan Takeoff (Pro) generate endpoint.

        Args:
            prompt: The prompt to pass into the model.
            stop: Optional list of stop words to use when generating.
            run_manager: Optional callback manager to use when streaming.

        Returns:
            The string generated by the model.

        Example:
            .. code-block:: python

                model = TitanTakeoff()

                prompt = "What is the capital of the United Kingdom?"

                # Use of model(prompt), ie `__call__` was deprecated in LangChain 0.1.7,
                # use model.invoke(prompt) instead.
                response = model.invoke(prompt)

         )r2   r3   r4   textN)r%   _streamr7   r&   generater   )	r,   r2   r3   r4   r5   Ztext_outputchunkresponser7   r   r   r   _call   s   

zTitanTakeoff._callc                 k   s    | j j|fi |}d}|D ]>}||j7 }d|v rM|dr!d}t|dddkr8|dd\}}	|d}|rMt|d}
d}|rJ|j|
j	d |
V  q|rgt|
ddd}
|rb|j|
j	d |
V  d	S d	S )
a  Call out to Titan Takeoff (Pro) stream endpoint.

        Args:
            prompt: The prompt to pass into the model.
            stop: Optional list of stop words to use when generating.
            run_manager: Optional callback manager to use when streaming.

        Yields:
            A dictionary like object containing a string token.

        Example:
            .. code-block:: python

                model = TitanTakeoff()

                prompt = "What is the capital of the United Kingdom?"
                response = model.stream(prompt)

                # OR

                model = TitanTakeoff(streaming=True)

                response = model.invoke(prompt)

        r6   zdata:      
)r7   )tokenz</s>N)r&   Zgenerate_streamdata
startswithlensplitrstripr	   Zon_llm_new_tokenr7   replace)r,   r2   r3   r4   r5   r;   bufferr7   content_r:   r   r   r   r8      s0    




zTitanTakeoff._stream)NN)r   r   r   r   r"   r   r   r#   r    r$   r%   boolr&   r   r   r   r*   propertyr1   r   r   r<   r   r	   r8   __classcell__r   r   r.   r   r!   -   sp   
 2.

1
r!   N)enumr   typingr   r   r   r   Zlangchain_core.callbacksr   Z#langchain_core.language_models.llmsr   Zlangchain_core.outputsr	   Zpydanticr
   r   Zlangchain_community.llms.utilsr   r   r   r   r!   r   r   r   r   <module>   s    