o
    .if_                     @  s   d Z ddlmZ ddlZddlZddlZddlmZ ddlm	Z	m
Z
mZmZmZ ddlmZ ddlmZmZ ddlmZ ejd	d
Zd#ddZd$ddZd%ddZd&ddZd'dd ZG d!d" d"eZdS )(a!  Module contains code for a cache backed embedder.

The cache backed embedder is a wrapper around an embedder that caches
embeddings in a key-value store. The cache is used to avoid recomputing
embeddings for the same text.

The text is hashed and the hash is used as the key in the cache.
    )annotationsN)partial)CallableListSequenceUnioncast)
Embeddings)	BaseStore	ByteStore)EncoderBackedStorei  )intinput_stringstrreturn	uuid.UUIDc                 C  s    t | d }tt|S )z1Hash a string and returns the corresponding UUID.zutf-8)hashlibsha1encode	hexdigestuuiduuid5NAMESPACE_UUID)r   
hash_value r   S/var/www/html/corbot_env/lib/python3.10/site-packages/langchain/embeddings/cache.py_hash_string_to_uuid   s   r   key	namespacec                 C  s   |t t|  S )zEncode a key.)r   r   )r   r   r   r   r   _key_encoder   s   r   Callable[[str], str]c                 C  s   t t| dS )zCreate an encoder for a key.r   )r   r   r!   r   r   r   _create_key_encoder$   s   r"   valueSequence[float]bytesc                 C  s   t |  S )zSerialize a value.)jsondumpsr   )r#   r   r   r   _value_serializer)   s   r(   serialized_valueList[float]c                 C  s   t tt t|  S )zDeserialize a value.)r   r   floatr&   loadsdecode)r)   r   r   r   _value_deserializer.   s   r.   c                      sJ   e Zd ZdZd fdd	ZdddZdddZedddddZ  Z	S )CacheBackedEmbeddingsav  Interface for caching results from embedding models.

    The interface allows works with any store that implements
    the abstract store interface accepting keys of type str and values of list of
    floats.

    If need be, the interface can be extended to accept other implementations
    of the value serializer and deserializer, as well as the key encoder.

    Examples:

        .. code-block: python

            from langchain.embeddings import CacheBackedEmbeddings
            from langchain.storage import LocalFileStore
            from langchain_community.embeddings import OpenAIEmbeddings

            store = LocalFileStore('./my_cache')

            underlying_embedder = OpenAIEmbeddings()
            embedder = CacheBackedEmbeddings.from_bytes_store(
                underlying_embedder, store, namespace=underlying_embedder.model
            )

            # Embedding is computed and cached
            embeddings = embedder.embed_documents(["hello", "goodbye"])

            # Embeddings are retrieved from the cache, no computation is done
            embeddings = embedder.embed_documents(["hello", "goodbye"])
    underlying_embeddingsr	   document_embedding_storeBaseStore[str, List[float]]r   Nonec                   s   t    || _|| _dS )zInitialize the embedder.

        Args:
            underlying_embeddings: the embedder to use for computing embeddings.
            document_embedding_store: The store to use for caching document embeddings.
        N)super__init__r1   r0   )selfr0   r1   	__class__r   r   r5   S   s   

zCacheBackedEmbeddings.__init__texts	List[str]List[List[float]]c                   s   | j  }dd t|D } fdd|D }|r9| j|}| j tt|| t||D ]\}}|||< q0tt	t	t
  |S )aw  Embed a list of texts.

        The method first checks the cache for the embeddings.
        If the embeddings are not found, the method uses the underlying embedder
        to embed the documents and stores the results in the cache.

        Args:
            texts: A list of texts to embed.

        Returns:
            A list of embeddings for the given texts.
        c                 S  s   g | ]
\}}|d u r|qS )Nr   ).0ivectorr   r   r   
<listcomp>r   s    z9CacheBackedEmbeddings.embed_documents.<locals>.<listcomp>c                   s   g | ]} | qS r   r   )r<   r=   r9   r   r   r?   u   s    )r1   mget	enumerater0   embed_documentsmsetlistzipr   r   r+   )r6   r9   vectorsmissing_indicesmissing_textsmissing_vectorsindexupdated_vectorr   r@   r   rC   b   s"   
z%CacheBackedEmbeddings.embed_documentstextr   r*   c                 C  s   | j |S )a<  Embed query text.

        This method does not support caching at the moment.

        Support for caching queries is easily to implement, but might make
        sense to hold off to see the most common patterns.

        If the cache has an eviction policy, we may need to be a bit more careful
        about sharing the cache between documents and queries. Generally,
        one is OK evicting query caches, but document caches should be kept.

        Args:
            text: The text to embed.

        Returns:
            The embedding for the given text.
        )r0   embed_query)r6   rM   r   r   r   rN      s   z!CacheBackedEmbeddings.embed_query r!   document_embedding_cacher   r   c                C  s0   |}t |}tttt f ||tt}| ||S )a  On-ramp that adds the necessary serialization and encoding to the store.

        Args:
            underlying_embeddings: The embedder to use for embedding.
            document_embedding_cache: The cache to use for storing document embeddings.
            *,
            namespace: The namespace to use for document cache.
                       This namespace is used to avoid collisions with other caches.
                       For example, set it to the name of the embedding model used.
        )r"   r   r   r   r+   r(   r.   )clsr0   rP   r   key_encoderencoder_backed_storer   r   r   from_bytes_store   s   
z&CacheBackedEmbeddings.from_bytes_store)r0   r	   r1   r2   r   r3   )r9   r:   r   r;   )rM   r   r   r*   )r0   r	   rP   r   r   r   r   r/   )
__name__
__module____qualname____doc__r5   rC   rN   classmethodrT   __classcell__r   r   r7   r   r/   3   s    

!r/   )r   r   r   r   )r   r   r   r   r   r   )r   r   r   r    )r#   r$   r   r%   )r)   r%   r   r*   )rX   
__future__r   r   r&   r   	functoolsr   typingr   r   r   r   r   langchain_core.embeddingsr	   langchain_core.storesr
   r    langchain.storage.encoder_backedr   UUIDr   r   r   r"   r(   r.   r/   r   r   r   r   <module>   s"    




