o
    ZhO                     @   s~   d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZ eeZG dd deZdS )    N)Path)AnyDictIteratorPatternUnion)Document)
BaseLoaderc                   @   s`  e Zd ZU dZedejZee	d< edejZ
ee	d< edZee	d< edejZee	d	< ed
ejZee	d< edejZee	d< 		d,deeef dedefddZdeeef dejdefddZdedeeef defddZdedefddZd edefd!d"Zdedefd#d$Zdedefd%d&Zdedefd'd(Z de!e" fd)d*Z#d+S )-ObsidianLoaderz%Load `Obsidian` files from directory.z^---\n(.*?)\n---\nFRONT_MATTER_REGEXz	{{(.*?)}}TEMPLATE_VARIABLE_REGEXz[^\S\/]#([a-zA-Z_]+[-_/\w]*)	TAG_REGEXz^\s*(\w+)::\s*(.*)$DATAVIEW_LINE_REGEXz\[(\w+)::\s*(.*)\]DATAVIEW_INLINE_BRACKET_REGEXz\((\w+)::\s*(.*)\)DATAVIEW_INLINE_PAREN_REGEXUTF-8Tpathencodingcollect_metadatac                 C   s   || _ || _|| _dS )a%  Initialize with a path.

        Args:
            path: Path to the directory containing the Obsidian files.
            encoding: Charset encoding, defaults to "UTF-8"
            collect_metadata: Whether to collect metadata from the front matter.
                Defaults to True.
        N)	file_pathr   r   )selfr   r   r    r   d/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/document_loaders/obsidian.py__init__   s   
zObsidianLoader.__init__placeholdersmatchreturnc                 C   s"   dt | d}|d||< |S )z/Replace a template variable with a placeholder.Z__TEMPLATE_VAR___   )lengroup)r   r   r   placeholderr   r   r   _replace_template_var/   s   z$ObsidianLoader._replace_template_varobjc                 C   s   t |tr| D ]\}}||d| d}q	|S t |tr2| D ]\}}| ||||< q#|S t |trHt|D ]\}}| ||||< q;|S )zIRestore template variables replaced with placeholders to original values.z{{z}})
isinstancestritemsreplacedict_restore_template_varslist	enumerate)r   r#   r   r!   valuekeyiitemr   r   r   r)   7   s   


z%ObsidianLoader._restore_template_varscontentc                 C   s   | j si S | j|}|si S i }t| j|}| j||d}z"t	
|}| ||}d|v rBt|d trB|d d|d< |W S  t	jjyV   td i  Y S w )zEParse front matter metadata from the content and return it as a dict.r   tagsz, z Encountered non-yaml frontmatter)r   r   search	functoolspartialr"   r   subr    yamlZ	safe_loadr)   r$   r%   splitparserZParserErrorloggerwarning)r   r0   r   r   Zreplace_template_varZfront_matter_textfront_matterr   r   r   _parse_front_matterD   s,   


z"ObsidianLoader._parse_front_mattermetadatac                 C   sB   i }|  D ]\}}t|ttthv r|||< qt|||< q|S )z4Convert a dictionary to a compatible with langchain.)r&   typer%   intfloat)r   r=   resultr-   r,   r   r   r   !_to_langchain_compatible_metadatab   s   
z0ObsidianLoader._to_langchain_compatible_metadatac                 C   s0   | j st S | j|}|st S dd |D S )z0Return a set of all tags in within the document.c                 S   s   h | ]}|qS r   r   ).0tagr   r   r   	<setcomp>u   s    z6ObsidianLoader._parse_document_tags.<locals>.<setcomp>)r   setr   findall)r   r0   r   r   r   r   _parse_document_tagsl   s   z#ObsidianLoader._parse_document_tagsc                 C   sP   | j si S i dd | j|D dd | j|D dd | j|D S )zWParse obsidian dataview plugin fields from the content and return it
        as a dict.c                 S      i | ]	}|d  |d qS r   r   r   rC   r   r   r   r   
<dictcomp>~       z9ObsidianLoader._parse_dataview_fields.<locals>.<dictcomp>c                 S   rI   rJ   r   rK   r   r   r   rL      rM   c                 S   rI   rJ   r   rK   r   r   r   rL      rM   )r   r   rG   r   r   r   r0   r   r   r   _parse_dataview_fieldsw   s   

	
z%ObsidianLoader._parse_dataview_fieldsc                 C   s   | j s|S | jd|S )z4Remove front matter metadata from the given content. )r   r   r5   rN   r   r   r   _remove_front_matter   s   z#ObsidianLoader._remove_front_matterc           	   	   c   s    t t| jd}|D ]m}t|| jd}| }W d    n1 s%w   Y  | |}| |}| 	|}| 
|}t|jt|| j| j| jd| ||}|sb|drsd|t|dg pmg B |d< t||dV  qd S )Nz**/*.md)r   )sourcer   createdZlast_modifiedZlast_accessedr1   ,)Zpage_contentr=   )r*   r   r   globopenr   readr<   rH   rO   rQ   r%   namestatst_ctimest_mtimest_atimerB   getjoinrF   r   )	r   pathsr   ftextr;   r1   Zdataview_fieldsr=   r   r   r   	lazy_load   s4   





zObsidianLoader.lazy_loadN)r   T)$__name__
__module____qualname____doc__recompileDOTALLr   r   __annotations__r   r   	MULTILINEr   r   r   r   r%   r   boolr   r   Matchr"   r   r)   r(   r<   rB   rF   rH   rO   rQ   r   r   rb   r   r   r   r   r
      sF   
 




r
   )r3   loggingrg   pathlibr   typingr   r   r   r   r   r6   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser	   	getLoggerrc   r9   r
   r   r   r   r   <module>   s    
