o
    Zh'                     @   s   d dl Z d dlZd dlmZmZmZmZ d dlmZm	Z	 d dl
mZ d dlmZ d dlmZmZmZmZ d dlmZ d dlmZmZ d d	lmZ d
ZdZG dd deeZdS )    N)AnyDictListOptional)BatchEmbedContentsRequestEmbedContentRequest)
Embeddings)secret_from_env)	BaseModelField	SecretStrmodel_validator)Self)GoogleGenerativeAIErrorget_client_info)build_generative_servicei N  d   c                   @   s  e Zd ZU dZdZeed< edddZe	ed< eddd	Z
ee	 ed
< eedddddZee ed< eddddZeed< eddd	Zee ed< eddd	Zee	 ed< eddd	Zee ed< edddefddZede	dee	 fd d!Zed"ee	 d#edeee	  fd$d%Z			d0de	d
ee	 d&ee	 d'ee def
d(d)Zedddd*d"ee	 d#ed
ee	 d+eee	  d'ee deee  fd,d-Z 			d0de	d
ee	 d&ee	 d'ee dee f
d.d/Z!dS )1GoogleGenerativeAIEmbeddingsa  `Google Generative AI Embeddings`.

    To use, you must have either:

        1. The ``GOOGLE_API_KEY`` environment variable set with your API key, or
        2. Pass your API key using the google_api_key kwarg
        to the GoogleGenerativeAIEmbeddings constructor.

    Example:
        .. code-block:: python

            from langchain_google_genai import GoogleGenerativeAIEmbeddings

            embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
            embeddings.embed_query("What's our Q1 revenue?")
    Nclient.zEThe name of the embedding model to use. Example: models/embedding-001)descriptionmodelzThe task type. Valid options include: task_type_unspecified, retrieval_query, retrieval_document, semantic_similarity, classification, and clustering)defaultr   	task_typeZGOOGLE_API_KEY)r   zaThe Google API key to use. If not provided, the GOOGLE_API_KEY environment variable will be used.)default_factoryr   google_api_keyTzThe default custom credentials (google.auth.credentials.Credentials) to use when making API calls. If not provided, credentials will be ascertained from the GOOGLE_API_KEY envvar)r   excluder   credentialszXA dictionary of client options to pass to the Google API client, such as `api_endpoint`.client_optionsz3A string, one of: [`rest`, `grpc`, `grpc_asyncio`].	transportz[A dictionary of request options to pass to the Google API client.Example: `{'timeout': 10}`request_optionsafter)modereturnc                 C   s@   t | jtr| j }n| j}td}t| j||| jd| _| S )z@Validates params and passes them to google-generativeai package.r   )r   Zapi_keyclient_infor   )	
isinstancer   r   Zget_secret_valuer   r   r   r   r   )selfr   r#    r&   X/var/www/html/lang_env/lib/python3.10/site-packages/langchain_google_genai/embeddings.pyvalidate_environmentV   s   z1GoogleGenerativeAIEmbeddings.validate_environmenttextc                 C   s,   t jd }d| d}dd t|| D S )z9Splits a string by punctuation and whitespace characters.z	
 z([z])c                 S   s   g | ]}|r|qS r&   r&   ).0segmentr&   r&   r'   
<listcomp>m   s    zFGoogleGenerativeAIEmbeddings._split_by_punctuation.<locals>.<listcomp>)stringpunctuationresplit)r)   Zsplit_bypatternr&   r&   r'   _split_by_punctuationg   s   
z2GoogleGenerativeAIEmbeddings._split_by_punctuationtexts
batch_sizec           
      C   s   d}t | }d}g }g }|dkrg S ||k rq| | }t t|d }d}	|tkr>t |dkr4|| |g}|d7 }d}	n$|| tksJt ||krMd}	n||d krUd}	||7 }|| |d7 }|	rm|| g }d}||k s|S )zlSplits texts in batches based on current maximum batch size
        and maximum tokens per request.
        r      F   T)lenr   r2   _MAX_TOKENS_PER_BATCHappend)
r3   r4   Z
text_indexZ	texts_lenZbatch_token_lenZbatchesZcurrent_batchZcurrent_textZcurrent_text_token_cntZend_of_batchr&   r&   r'   _prepare_batcheso   sD   


'z-GoogleGenerativeAIEmbeddings._prepare_batchestitleoutput_dimensionalityc                 C   s4   | j p|pd}tdd|igi| j| ||d}|S )NZRETRIEVAL_DOCUMENTpartsr)   )contentr   r   r;   r<   )r   r   r   upper)r%   r)   r   r;   r<   requestr&   r&   r'   _prepare_request   s   z-GoogleGenerativeAIEmbeddings._prepare_request)r4   r   titlesr<   rB   c                   s   g }d}t ||D ]W}|r|||t|  }	|t|7 }ndgt| }	 fddt||	D }
zjt|
jd}W n tyU } zt	d| |d}~ww |
dd |jD  q
|S )a  Embed a list of strings. Google Generative AI currently
        sets a max batch size of 100 strings.

        Args:
            texts: List[str] The list of strings to embed.
            batch_size: [int] The batch size of embeddings to send to the model
            task_type: task_type (https://ai.google.dev/api/rest/v1/TaskType)
            titles: An optional list of titles for texts provided.
            Only applicable when TaskType is RETRIEVAL_DOCUMENT.
            output_dimensionality: Optional reduced dimension for the output embedding.
            https://ai.google.dev/api/rest/v1/models/batchEmbedContents#EmbedContentRequest
        Returns:
            List of embeddings, one for each text.
        r   Nc                    s"   g | ]\}}j || d qS ))r)   r   r;   r<   )rA   )r*   r)   r;   r<   r%   r   r&   r'   r,      s    z@GoogleGenerativeAIEmbeddings.embed_documents.<locals>.<listcomp>)requestsr   zError embedding content: c                 S   s   g | ]}t |jqS r&   )listvalues)r*   er&   r&   r'   r,      s    )r   r:   r7   zipr   Zbatch_embed_contentsr   r   	Exceptionr   extend
embeddings)r%   r3   r4   r   rB   r<   rK   Zbatch_start_indexbatchZtitles_batchrD   resultrG   r&   rC   r'   embed_documents   s,   
z,GoogleGenerativeAIEmbeddings.embed_documentsc                 C   s,   | j pd}| j|g||r|gnd|dd S )a  Embed a text.

        Args:
            text: The text to embed.
            task_type: task_type (https://ai.google.dev/api/rest/v1/TaskType)
            title: An optional title for the text.
            Only applicable when TaskType is RETRIEVAL_DOCUMENT.
            output_dimensionality: Optional reduced dimension for the output embedding.
            https://ai.google.dev/api/rest/v1/models/batchEmbedContents#EmbedContentRequest

        Returns:
            Embedding for the text.
        ZRETRIEVAL_QUERYN)r   rB   r<   r   )r   rN   )r%   r)   r   r;   r<   r&   r&   r'   embed_query   s   
z(GoogleGenerativeAIEmbeddings.embed_query)NNN)"__name__
__module____qualname____doc__r   r   __annotations__r   r   strr   r   r	   r   r   r   r   r   r   r   r   r   r(   staticmethodr   r2   intr:   r   rA   _DEFAULT_BATCH_SIZEfloatrN   rO   r&   r&   r&   r'   r      s   
 
$7



8r   )r/   r-   typingr   r   r   r   Z)google.ai.generativelanguage_v1beta.typesr   r   Zlangchain_core.embeddingsr   Zlangchain_core.utilsr	   Zpydanticr
   r   r   r   Ztyping_extensionsr   Zlangchain_google_genai._commonr   r   Z'langchain_google_genai._genai_extensionr   r8   rX   r   r&   r&   r&   r'   <module>   s    