o
    Zhs                     @  s6  d dl mZ d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZ d dlZd dlmZ d dlmZ d dlmZmZmZ d dlmZmZmZm Z  d dl!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z' d d	l(m)Z) e*e+Z,d$ddZ-d%ddZ.d&d'ddZ/d(ddZ0d(ddZ1edd d!d"G d#d deeZ2dS ))    )annotationsN)AnyCallableDictListLiteralMappingOptionalSequenceSetTupleUnioncast)
deprecated)
Embeddings)get_from_dict_or_envget_pydantic_field_namespre_init)	BaseModel
ConfigDictFieldmodel_validator)AsyncRetryingbefore_sleep_logretryretry_if_exception_typestop_after_attemptwait_exponential)is_openai_v1
embeddingsOpenAIEmbeddingsreturnCallable[[Any], Any]c                 C  sn   dd l }tdt| jtd| j| jdt|jj	t|jj
B t|jjB t|jjB t|jjB tttjdS )Nr   T   
multiplierminmaxreraisestopwaitr   before_sleep)openair   r   max_retriesr   retry_min_secondsretry_max_secondsr   errorTimeoutAPIErrorAPIConnectionErrorRateLimitErrorServiceUnavailableErrorr   loggerloggingWARNING)r   r-    r:   \/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/embeddings/openai.py_create_retry_decorator,   s(   





r<   r   c                   s   dd l }tdt| jtd| j| jdt|jj	t|jj
B t|jjB t|jjB t|jjB tttjd d fd	d
}|S )Nr   Tr#   r$   r(   funcr   r!   c                   s   d fdd}|S )	Nargsr   kwargsr!   r   c                    s4    2 z3 d H W }| i |I d H   S 6 t d)Nzthis is unreachable)AssertionError)r>   r?   _)async_retryingr=   r:   r;   	wrapped_fb   s
   z7_async_retry_decorator.<locals>.wrap.<locals>.wrapped_f)r>   r   r?   r   r!   r   r:   )r=   rC   rB   )r=   r;   wrapa   s   z$_async_retry_decorator.<locals>.wrap)r=   r   r!   r   )r-   r   r   r.   r   r/   r0   r   r1   r2   r3   r4   r5   r6   r   r7   r8   r9   )r   r-   rE   r:   rD   r;   _async_retry_decoratorG   s,   





rF   Fresponsedict
skip_emptyboolc                 C  s2   t dd | d D r|sdd l}|jd| S )Nc                 s  s     | ]}t |d  dkV  qdS )	embeddingr#   N)len).0dr:   r:   r;   	<genexpr>n   s    z"_check_response.<locals>.<genexpr>datar   z&OpenAI API returned an empty embedding)anyr-   r1   r3   )rG   rI   r-   r:   r:   r;   _check_responsem   s   rR   r?   c                   s@   t  r jjdi |S t }|d fdd}|di |S )	)Use tenacity to retry the embedding call.r?   r   r!   c                    s     j jdi | }t| jdS N)rI   r:   )clientcreaterR   rI   r?   rG   r   r:   r;   _embed_with_retry{   s   z+embed_with_retry.<locals>._embed_with_retryNr:   r?   r   r!   r   )r   rU   rV   r<   )r   r?   Zretry_decoratorrY   r:   rX   r;   embed_with_retryu   s   r[   c                   sJ   t  r jjdi |I dH S t d fdd}|di |I dH S )	rS   Nr?   r   r!   c                    s(    j jdi | I d H }t| jdS rT   )rU   ZacreaterR   rI   rW   rX   r:   r;   _async_embed_with_retry   s   z7async_embed_with_retry.<locals>._async_embed_with_retryr:   rZ   )r   async_clientrV   rF   )r   r?   r\   r:   rX   r;   async_embed_with_retry   s   r^   z0.0.9z1.0z!langchain_openai.OpenAIEmbeddings)ZsinceZremovalZalternative_importc                   @  sH  e Zd ZU dZedddZded< edddZded< dZd	ed
< eZ	ded< edddZ
ded< 	 edddZded< 	 dZded< dZded< dZded< 	 edddZded< 	 edddZded< 	 e Zded< dZded< d Zded!< 	 d"Zded#< 	 edd$dZd%ed&< 	 dZded'< dZd(ed)< 	 dZded*< 	 d+Zd(ed,< 	 eed-Zd.ed/< 	 d+Zd(ed0< 	 dZd1ed2< dZd3ed4< d5Z ded6< 	 d7Z!ded8< 	 dZ"d9ed:< 	 e#dd;d<d=Z$e%d>d?e&d^dBdCZ'e(d_dEdFZ)e*d`dGdHZ+ddIdadOdPZ,ddIdadQdRZ-	SdbdcdTdUZ.	SdbdcdVdWZ/dddZd[Z0ddd\d]Z1dS )er    aj  OpenAI embedding models.

    To use, you should have the ``openai`` python package installed, and the
    environment variable ``OPENAI_API_KEY`` set with your API key or pass it
    as a named parameter to the constructor.

    Example:
        .. code-block:: python

            from langchain_community.embeddings import OpenAIEmbeddings
            openai = OpenAIEmbeddings(openai_api_key="my-api-key")

    In order to use the library with Microsoft Azure endpoints, you need to set
    the OPENAI_API_TYPE, OPENAI_API_BASE, OPENAI_API_KEY and OPENAI_API_VERSION.
    The OPENAI_API_TYPE must be set to 'azure' and the others correspond to
    the properties of your endpoint.
    In addition, the deployment name must be passed as the model parameter.

    Example:
        .. code-block:: python

            import os

            os.environ["OPENAI_API_TYPE"] = "azure"
            os.environ["OPENAI_API_BASE"] = "https://<your-endpoint.openai.azure.com/"
            os.environ["OPENAI_API_KEY"] = "your AzureOpenAI key"
            os.environ["OPENAI_API_VERSION"] = "2023-05-15"
            os.environ["OPENAI_PROXY"] = "http://your-corporate-proxy:8080"

            from langchain_community.embeddings.openai import OpenAIEmbeddings
            embeddings = OpenAIEmbeddings(
                deployment="your-embeddings-deployment-name",
                model="your-embeddings-model-name",
                openai_api_base="https://your-endpoint.openai.azure.com/",
                openai_api_type="azure",
            )
            text = "This is a test query."
            query_result = embeddings.embed_query(text)

    NT)defaultexcluder   rU   r]   ztext-embedding-ada-002strmodelzOptional[str]
deploymentapi_version)r_   aliasopenai_api_versionbase_urlopenai_api_baseopenai_api_typeopenai_proxyi  intembedding_ctx_lengthapi_keyopenai_api_keyorganizationopenai_organizationzUnion[Literal['all'], Set[str]]allowed_specialallz.Union[Literal['all'], Set[str], Sequence[str]]disallowed_speciali  
chunk_size   r.   timeoutz0Optional[Union[float, Tuple[float, float], Any]]request_timeoutheadersrJ   tiktoken_enabledtiktoken_model_nameFshow_progress_bar)default_factoryDict[str, Any]model_kwargsrI   zUnion[Mapping[str, str], None]default_headersz!Union[Mapping[str, object], None]default_query   r/      r0   zUnion[Any, None]http_clientZforbidr:   )Zpopulate_by_nameextraZprotected_namespacesbefore)modevaluesr!   c              
   C  s   t | }|di }t|D ](}||v rtd| d||vr6td| d| d| d ||||< q|| }|rHtd| d	||d< |S )
z>Build extra kwargs from additional params that were passed in.r~   zFound z supplied twice.z	WARNING! z/ is not default parameter.
                    zJ was transferred to model_kwargs.
                    Please confirm that z is what you intended.zParameters za should be specified explicitly. Instead they were passed in as part of `model_kwargs` parameter.)	r   getlist
ValueErrorwarningswarnpopintersectionkeys)clsr   Zall_required_field_namesr   
field_nameZinvalid_model_kwargsr:   r:   r;   build_extra  s,   
zOpenAIEmbeddings.build_extrar   c              	   C  s  t |dd|d< |d ptd|d< t |dddd|d< t |d	d
dd|d	< |d dv r9d}t|d d|d< nd}t |dd|d|d< |d pRtdpRtd|d< zddl}W n tyf   tdw t r|d dv rutd |d |d |d |d |d |d |d |d d}|	ds|j
d i |j|d< |	ds|jd i |j|d< |S |	ds|j|d< |S 	 |S )!z?Validate that api key and python package exists in environment.rn   ZOPENAI_API_KEYrh   ZOPENAI_API_BASEri   ZOPENAI_API_TYPE )r_   rj   ZOPENAI_PROXYZazureZazure_adZazureadz
2023-05-15rt   i   rf   ZOPENAI_API_VERSIONrp   ZOPENAI_ORG_IDZOPENAI_ORGANIZATIONr   NTCould not import openai python package. Please install it with `pip install openai`.zfIf you have openai>=1.0.0 installed and are using Azure, please use the `AzureOpenAIEmbeddings` class.rw   r.   r   r   r   )rm   ro   rg   rv   r.   r   r   r   rU   r]   r:   )r   osgetenvr&   r-   ImportErrorr   r   r   r   ZOpenAIr   ZAsyncOpenAIZ	Embedding)r   r   Zdefault_api_versionr-   Zclient_paramsr:   r:   r;   validate_environment   s   







z%OpenAIEmbeddings.validate_environmentc              	   C  s   t  rd| ji| j}|S | j| j| j| j| j| j| j| j	d| j}| jdv r-| j
|d< | jrJzdd l}W n tyA   tdw | j| jd|_|S )Nrb   )rb   rw   rx   rm   ro   Zapi_baseZapi_typerd   r   enginer   r   )httphttps)r   rb   r~   rw   rx   rn   rp   rh   ri   rf   rc   rj   r-   r   proxy)selfZopenai_argsr-   r:   r:   r;   _invocation_paramsm  s8   	

z#OpenAIEmbeddings._invocation_params)rt   texts	List[str]r   Optional[int]List[List[float]]c                C  s  g }g }| j p	| j}|p| j}| jsazddlm} W n ty%   tdw |j|d}	t|D ]/\}
}|	j	|dd}t
dt|| jD ]}|||| j  }|	|}|| ||
 qDq0nkzddl}W n tyr   tdw z||}W n ty   td	 d
}||}Y nw t|D ]7\}
}| jdr|dd}|j	|| j| jd}t
dt|| jD ]}||||| j   ||
 qq| jrzddlm} |t
dt||}W n ty   t
dt||}Y n	w t
dt||}g }|D ])}
t| fd||
|
|  i| j}t|ts| }| dd |d D  qdd t
t|D }dd t
t|D }t
t|D ]*}
| j!rVt||
 dkrVqE|||
  ||
  |||
  t||
  qEdd t
t|D }t
t|D ]B}
||
 }t|dkrt| fddi| j}t|ts| }|d d d }n
t"j#|d||
 d}|t"j$%| & ||
< q|S )al  
        Generate length-safe embeddings for a list of texts.

        This method handles tokenization and embedding generation, respecting the
        set embedding context length and chunk size. It supports both tiktoken
        and HuggingFace tokenizer based on the tiktoken_enabled flag.

        Args:
            texts (List[str]): A list of texts to embed.
            engine (str): The engine or model to use for embeddings.
            chunk_size (Optional[int]): The size of chunks for processing embeddings.

        Returns:
            List[List[float]]: A list of embeddings for each input text.
        r   AutoTokenizerzCould not import transformers python package. This is needed in order to for OpenAIEmbeddings without `tiktoken`. Please install it with `pip install transformers`. Zpretrained_model_name_or_pathFZadd_special_tokensNCould not import tiktoken python package. This is needed in order to for OpenAIEmbeddings. Please install it with `pip install tiktoken`.5Warning: model not found. Using cl100k_base encoding.cl100k_base001
 textrq   rs   )tqdminputc                 s      | ]}|d  V  qdS rK   Nr:   rM   rr:   r:   r;   rO         z<OpenAIEmbeddings._get_len_safe_embeddings.<locals>.<genexpr>rP   c                 S     g | ]}g qS r:   r:   rM   rA   r:   r:   r;   
<listcomp>      z=OpenAIEmbeddings._get_len_safe_embeddings.<locals>.<listcomp>c                 S  r   r:   r:   r   r:   r:   r;   r     r   r#   c                 S  r   r:   r:   r   r:   r:   r;   r     r   r   rK   Zaxisweights)'rz   rb   rt   ry   transformersr   r   from_pretrained	enumerateencoderangerL   rl   decodeappendtiktokenencoding_for_modelKeyErrorr7   warningget_encodingendswithreplacerq   rs   r{   Z	tqdm.autor   r[   r   
isinstancerH   extendrI   npaveragelinalgnormtolist)r   r   r   rt   tokensindices
model_name_chunk_sizer   	tokenizerir   	tokenizedjtoken_chunk
chunk_textr   encodingrb   tokenr   Z_iterbatched_embeddingsrG   resultsnum_tokens_in_batchr   _resultaverage_embeddedr   r:   r:   r;   _get_len_safe_embeddings  s   



z)OpenAIEmbeddings._get_len_safe_embeddingsc                  s2  g }g }| j p
| j}|p| j}| jsbzddlm} W n ty&   tdw |j|d}	t|D ]/\}
}|	j	|dd}t
dt|| jD ]}|||| j  }|	|}|| ||
 qEq1nkzddl}W n tys   tdw z||}W n ty   td	 d
}||}Y nw t|D ]7\}
}| jdr|dd}|j	|| j| jd}t
dt|| jD ]}||||| j   ||
 qqg }|p| j}t
dt||D ]+}
t| fd||
|
|  i| jI dH }t|ts| }|dd |d D  qdd t
t|D }dd t
t|D }t
t|D ]}
|||
  ||
  |||
  t||
  q$dd t
t|D }t
t|D ]E}
||
 }t|dkrt| fddi| jI dH }t|tsv| }|d d d }n
tj|d||
 d}|tj !| " ||
< qQ|S )a  
        Asynchronously generate length-safe embeddings for a list of texts.

        This method handles tokenization and asynchronous embedding generation,
        respecting the set embedding context length and chunk size. It supports both
        `tiktoken` and HuggingFace `tokenizer` based on the tiktoken_enabled flag.

        Args:
            texts (List[str]): A list of texts to embed.
            engine (str): The engine or model to use for embeddings.
            chunk_size (Optional[int]): The size of chunks for processing embeddings.

        Returns:
            List[List[float]]: A list of embeddings for each input text.
        r   r   zCould not import transformers python package. This is needed in order to for OpenAIEmbeddings without  `tiktoken`. Please install it with `pip install transformers`.r   Fr   Nr   r   r   r   r   r   r   r   c                 s  r   r   r:   r   r:   r:   r;   rO   u  r   z=OpenAIEmbeddings._aget_len_safe_embeddings.<locals>.<genexpr>rP   c                 S  r   r:   r:   r   r:   r:   r;   r   w  r   z>OpenAIEmbeddings._aget_len_safe_embeddings.<locals>.<listcomp>c                 S  r   r:   r:   r   r:   r:   r;   r   x  r   c                 S  r   r:   r:   r   r:   r:   r;   r   }  r   r   rK   r   )#rz   rb   rt   ry   r   r   r   r   r   r   r   rL   rl   r   r   r   r   r   r7   r   r   r   r   rq   rs   r^   r   r   rH   r   r   r   r   r   r   )r   r   r   rt   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rb   r   r   rG   r   r   r   r   r   r   r:   r:   r;   _aget_len_safe_embeddings  s   





z*OpenAIEmbeddings._aget_len_safe_embeddingsr   c                 C  s   t t| j}| j||dS )aM  Call out to OpenAI's embedding endpoint for embedding search docs.

        Args:
            texts: The list of texts to embed.
            chunk_size: The chunk size of embeddings. If None, will use the chunk size
                specified by the class.

        Returns:
            List of embeddings, one for each text.
        r   )r   ra   rc   r   r   r   rt   r   r:   r:   r;   embed_documents  s   z OpenAIEmbeddings.embed_documentsc                   s"   t t| j}| j||dI dH S )aS  Call out to OpenAI's embedding endpoint async for embedding search docs.

        Args:
            texts: The list of texts to embed.
            chunk_size: The chunk size of embeddings. If None, will use the chunk size
                specified by the class.

        Returns:
            List of embeddings, one for each text.
        r   N)r   ra   rc   r   r   r:   r:   r;   aembed_documents  s   z!OpenAIEmbeddings.aembed_documentsr   List[float]c                 C  s   |  |gd S )zCall out to OpenAI's embedding endpoint for embedding query text.

        Args:
            text: The text to embed.

        Returns:
            Embedding for the text.
        r   )r   )r   r   r:   r:   r;   embed_query  s   	zOpenAIEmbeddings.embed_queryc                   s   |  |gI dH }|d S )zCall out to OpenAI's embedding endpoint async for embedding query text.

        Args:
            text: The text to embed.

        Returns:
            Embedding for the text.
        Nr   )r   )r   r   r   r:   r:   r;   aembed_query  s   	zOpenAIEmbeddings.aembed_query)r   r}   r!   r   )r   r   r!   r   )r!   r}   )r   r   r   ra   rt   r   r!   r   )r   )r   r   rt   r   r!   r   )r   ra   r!   r   )2__name__
__module____qualname____doc__r   rU   __annotations__r]   rb   rc   rf   rh   ri   rj   rl   rn   rp   setrq   rs   rt   r.   rw   rx   ry   rz   r{   rH   r~   rI   r   r   r/   r0   r   r   Zmodel_configr   classmethodr   r   r   propertyr   r   r   r   r   r   r   r:   r:   r:   r;   r       s   
 )	L$ z
)r   r    r!   r"   )r   r    r!   r   )F)rG   rH   rI   rJ   r!   rH   )r   r    r?   r   r!   r   )3
__future__r   r8   r   r   typingr   r   r   r   r   r   r	   r
   r   r   r   r   numpyr   Zlangchain_core._api.deprecationr   Zlangchain_core.embeddingsr   Zlangchain_core.utilsr   r   r   Zpydanticr   r   r   r   tenacityr   r   r   r   r   r   Z langchain_community.utils.openair   	getLoggerr   r7   r<   rF   rR   r[   r^   r    r:   r:   r:   r;   <module>   s0    8 	


&

