o
    ZhE                     @   s   d dl Z d dlmZmZmZmZ d dlZd dlmZm	Z	 d dl
mZ d dlmZmZmZmZ dZdZdZd	Zd
ZdZdZeddddG dd deeZG dd deeZeddddG dd deeZG dd deeZdS )    N)AnyDictListOptional)
deprecatedwarn_deprecated)
Embeddings)	BaseModel
ConfigDictField	SecretStrz'sentence-transformers/all-mpnet-base-v2zhkunlp/instructor-largezBAAI/bge-large-enz&Represent the document for retrieval: z<Represent the question for retrieving supporting documents: z9Represent this question for searching relevant passages: u9   为这个句子生成表示以用于检索相关文章：z0.2.21.0z+langchain_huggingface.HuggingFaceEmbeddings)sinceremovalZalternative_importc                       s   e Zd ZU dZdZeed< eZe	ed< 	 dZ
ee	 ed< 	 eedZee	ef ed< 	 eedZee	ef ed< 	 d	Zeed
< 	 d	Zeed< 	 def fddZedddZdee	 deee  fddZde	dee fddZ  ZS )HuggingFaceEmbeddingsai  HuggingFace sentence_transformers embedding models.

    To use, you should have the ``sentence_transformers`` python package installed.

    Example:
        .. code-block:: python

            from langchain_community.embeddings import HuggingFaceEmbeddings

            model_name = "sentence-transformers/all-mpnet-base-v2"
            model_kwargs = {'device': 'cpu'}
            encode_kwargs = {'normalize_embeddings': False}
            hf = HuggingFaceEmbeddings(
                model_name=model_name,
                model_kwargs=model_kwargs,
                encode_kwargs=encode_kwargs
            )
    Nclient
model_namecache_folderdefault_factorymodel_kwargsencode_kwargsFmulti_processshow_progresskwargsc              
      s   t  jdi | d|vr2d}d}t||d| jj dd| d d| d	 d| jj d
 d zddl}W n tyI } ztd|d}~ww |j| jfd| j	i| j
| _dS )$Initialize the sentence_transformer.r   0.2.160.4.0Default values for .model_name were deprecated in LangChain  and will be removed in %. Explicitly pass a model_name to the constructor instead.r   r   messager   NrCould not import sentence_transformers python package. Please install it with `pip install sentence-transformers`.r    )super__init__r   	__class____name__sentence_transformersImportErrorSentenceTransformerr   r   r   r   )selfr   r   r   r-   excr+   r(   a/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/embeddings/huggingface.pyr*   C   s@   

	zHuggingFaceEmbeddings.__init__forbidr(   extraZprotected_namespacestextsreturnc                 C   sp   ddl }ttdd |}| jr&| j }| j||}|j| | S | jj	|fd| j
i| j}| S )Compute doc embeddings using a HuggingFace transformer model.

        Args:
            texts: The list of texts to embed.

        Returns:
            List of embeddings, one for each text.
        r   Nc                 S   s   |  ddS )N
r"   )replace)xr(   r(   r3   <lambda>m   s    z7HuggingFaceEmbeddings.embed_documents.<locals>.<lambda>show_progress_bar)r-   listmapr   r   Zstart_multi_process_poolZencode_multi_processr/   Zstop_multi_process_poolencoder   r   tolist)r0   r7   r-   pool
embeddingsr(   r(   r3   embed_documentsb   s   	
z%HuggingFaceEmbeddings.embed_documentstextc                 C      |  |gd S Compute query embeddings using a HuggingFace transformer model.

        Args:
            text: The text to embed.

        Returns:
            Embeddings for the text.
        r   rE   r0   rF   r(   r(   r3   embed_queryy      	z!HuggingFaceEmbeddings.embed_query)r,   
__module____qualname____doc__r   r   __annotations__DEFAULT_MODEL_NAMEr   strr   r   r   dictr   r   r   r   boolr   r*   r
   model_configr   floatrE   rL   __classcell__r(   r(   r2   r3   r      s&   
 r   c                       s   e Zd ZU dZdZeed< eZe	ed< 	 dZ
ee	 ed< 	 eedZee	ef ed< 	 eedZee	ef ed< 	 eZe	ed	< 	 eZe	ed
< 	 dZeed< 	 def fddZedddZdee	 deee  fddZde	dee fddZ  ZS )HuggingFaceInstructEmbeddingsa  Wrapper around sentence_transformers embedding models.

    To use, you should have the ``sentence_transformers``
    and ``InstructorEmbedding`` python packages installed.

    Example:
        .. code-block:: python

            from langchain_community.embeddings import HuggingFaceInstructEmbeddings

            model_name = "hkunlp/instructor-large"
            model_kwargs = {'device': 'cpu'}
            encode_kwargs = {'normalize_embeddings': True}
            hf = HuggingFaceInstructEmbeddings(
                model_name=model_name,
                model_kwargs=model_kwargs,
                encode_kwargs=encode_kwargs
            )
    Nr   r   r   r   r   r   embed_instructionquery_instructionFr   r   c              
      s   t  jdi | d|vr2d}d}t||d| jj dd| d d| d	 d| jj d
 d zddlm} || jfd| ji| j	| _
W n tyY } ztd|d}~ww d| jv r}tdddd| jj d | jrttd | jd| _dS dS )r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r   )
INSTRUCTORr   z/Dependencies for InstructorEmbedding not found.Nr>   0.2.5r   "encode_kwargs['show_progress_bar']the show_progress method on r   r   namealternativeuBoth encode_kwargs['show_progress_bar'] and show_progress are set;encode_kwargs['show_progress_bar'] takes precedencer(   )r)   r*   r   r+   r,   ZInstructorEmbeddingr\   r   r   r   r   r.   r   r   warningswarnpop)r0   r   r   r   r\   er2   r(   r3   r*      sR   

	

z&HuggingFaceInstructEmbeddings.__init__r4   r(   r5   r7   r8   c                    s6    fdd|D } j j|fd ji j}| S )zCompute doc embeddings using a HuggingFace instruct model.

        Args:
            texts: The list of texts to embed.

        Returns:
            List of embeddings, one for each text.
        c                    s   g | ]} j |gqS r(   )rZ   ).0rF   r0   r(   r3   
<listcomp>   s    zAHuggingFaceInstructEmbeddings.embed_documents.<locals>.<listcomp>r>   r   rA   r   r   rB   )r0   r7   Zinstruction_pairsrD   r(   ri   r3   rE      s   	z-HuggingFaceInstructEmbeddings.embed_documentsrF   c                 C   s4   | j |g}| jj|gfd| ji| jd }| S )zCompute query embeddings using a HuggingFace instruct model.

        Args:
            text: The text to embed.

        Returns:
            Embeddings for the text.
        r>   r   )r[   r   rA   r   r   rB   )r0   rF   Zinstruction_pair	embeddingr(   r(   r3   rL      s   
	z)HuggingFaceInstructEmbeddings.embed_query)r,   rN   rO   rP   r   r   rQ   DEFAULT_INSTRUCT_MODELr   rS   r   r   r   rT   r   r   r   DEFAULT_EMBED_INSTRUCTIONrZ   DEFAULT_QUERY_INSTRUCTIONr[   r   rU   r*   r
   rV   r   rW   rE   rL   rX   r(   r(   r2   r3   rY      s*   
 'rY   c                       s   e Zd ZU dZdZeed< eZe	ed< 	 dZ
ee	 ed< 	 eedZee	ef ed< 	 eedZee	ef ed< 	 eZe	ed	< 	 d
Ze	ed< 	 dZeed< 	 def fddZedddZdee	 deee  fddZde	dee fddZ  ZS )HuggingFaceBgeEmbeddingsaE  HuggingFace sentence_transformers embedding models.

    To use, you should have the ``sentence_transformers`` python package installed.
    To use Nomic, make sure the version of ``sentence_transformers`` >= 2.3.0.

    Bge Example:
        .. code-block:: python

            from langchain_community.embeddings import HuggingFaceBgeEmbeddings

            model_name = "BAAI/bge-large-en-v1.5"
            model_kwargs = {'device': 'cpu'}
            encode_kwargs = {'normalize_embeddings': True}
            hf = HuggingFaceBgeEmbeddings(
                model_name=model_name,
                model_kwargs=model_kwargs,
                encode_kwargs=encode_kwargs
            )
     Nomic Example:
        .. code-block:: python

            from langchain_community.embeddings import HuggingFaceBgeEmbeddings

            model_name = "nomic-ai/nomic-embed-text-v1"
            model_kwargs = {
                'device': 'cpu',
                'trust_remote_code':True
                }
            encode_kwargs = {'normalize_embeddings': True}
            hf = HuggingFaceBgeEmbeddings(
                model_name=model_name,
                model_kwargs=model_kwargs,
                encode_kwargs=encode_kwargs,
                query_instruction = "search_query:",
                embed_instruction = "search_document:"
            )
    Nr   r   r   r   r   r   r[    rZ   Fr   r   c              
      s.  t  jdi | d|vr2d}d}t||d jj dd| d d| d	 d jj d
 d zddl}W n tyI } ztd|d}~ww g d} fdd|D }|j jfd j	i j
d|i _d jv rrt _d jv rtdddd jj d  jrtd  jd _dS dS )r   r   r]   r   r   r   r    r!   r"   r#   r$   r%   r   Nr'   )Ztorch_dtypeZattn_implementationprovider	file_nameZexportc                    s$   i | ]}| j v r| j |qS r(   )r   rf   )rh   kri   r(   r3   
<dictcomp>S  s
    
z5HuggingFaceBgeEmbeddings.__init__.<locals>.<dictcomp>r   r   z-zhr>   r   r^   r_   r`   rc   r(   )r)   r*   r   r+   r,   r-   r.   r/   r   r   r   r    DEFAULT_QUERY_BGE_INSTRUCTION_ZHr[   r   r   rd   re   rf   )r0   r   r   r   r-   r1   Zextra_model_kwargsZextra_model_kwargs_dictr2   ri   r3   r*   4  sj   

	



z!HuggingFaceBgeEmbeddings.__init__r4   r(   r5   r7   r8   c                    s6    fdd|D } j j|fd ji j}| S )r9   c                    s   g | ]} j |d d qS )r:   r"   )rZ   r;   )rh   tri   r(   r3   rj   {  s    z<HuggingFaceBgeEmbeddings.embed_documents.<locals>.<listcomp>r>   rk   )r0   r7   rD   r(   ri   r3   rE   r  s   	z(HuggingFaceBgeEmbeddings.embed_documentsrF   c                 C   s6   | dd}| jj| j| fd| ji| j}| S )rI   r:   r"   r>   )r;   r   rA   r[   r   r   rB   )r0   rF   rl   r(   r(   r3   rL     s   	z$HuggingFaceBgeEmbeddings.embed_query)r,   rN   rO   rP   r   r   rQ   DEFAULT_BGE_MODELr   rS   r   r   r   rT   r   r   r    DEFAULT_QUERY_BGE_INSTRUCTION_ENr[   rZ   r   rU   r*   r
   rV   r   rW   rE   rL   rX   r(   r(   r2   r3   rp      s*   
 &<rp   c                   @   s   e Zd ZU dZeed< 	 dZeed< 	 dZe	e ed< 	 i Z
eeef ed< 	 edd	d
ZedefddZedefddZedefddZdee deee  fddZdedee fddZdS )!HuggingFaceInferenceAPIEmbeddingszkEmbed texts using the HuggingFace API.

    Requires a HuggingFace Inference API key and a model name.
    api_keyz&sentence-transformers/all-MiniLM-L6-v2r   Napi_urladditional_headersr4   r(   r5   r8   c                 C   s   | j p| jS )N)r|   _default_api_urlri   r(   r(   r3   _api_url  s   z*HuggingFaceInferenceAPIEmbeddings._api_urlc                 C   s   d| j  S )NzAhttps://api-inference.huggingface.co/pipeline/feature-extraction/)r   ri   r(   r(   r3   r~     s   z2HuggingFaceInferenceAPIEmbeddings._default_api_urlc                 C   s   dd| j   i| jS )NAuthorizationzBearer )r{   Zget_secret_valuer}   ri   r(   r(   r3   _headers  s   z*HuggingFaceInferenceAPIEmbeddings._headersr7   c                 C   s(   t j| j| j|ddddd}| S )a  Get the embeddings for a list of texts.

        Args:
            texts (Documents): A list of texts to get embeddings for.

        Returns:
            Embedded texts as List[List[float]], where each inner List[float]
                corresponds to a single input text.

        Example:
            .. code-block:: python

                from langchain_community.embeddings import (
                    HuggingFaceInferenceAPIEmbeddings,
                )

                hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
                    api_key="your_api_key",
                    model_name="sentence-transformers/all-MiniLM-l6-v2"
                )
                texts = ["Hello, world!", "How are you?"]
                hf_embeddings.embed_documents(texts)
        T)Zwait_for_modelZ	use_cache)Zinputsoptions)headersjson)requestspostr   r   r   )r0   r7   responser(   r(   r3   rE     s   z1HuggingFaceInferenceAPIEmbeddings.embed_documentsrF   c                 C   rG   rH   rJ   rK   r(   r(   r3   rL     rM   z-HuggingFaceInferenceAPIEmbeddings.embed_query)r,   rN   rO   rP   r   rQ   r   rS   r|   r   r}   r   r
   rV   propertyr   r~   rT   r   r   rW   rE   rL   r(   r(   r(   r3   rz     s&   
 "rz   )rd   typingr   r   r   r   r   Zlangchain_core._apir   r   Zlangchain_core.embeddingsr   Zpydanticr	   r
   r   r   rR   rm   rx   rn   ro   ry   rv   r   rY   rp   rz   r(   r(   r(   r3   <module>   s<    jr 