o
    Zh                     @   sT   d dl Z d dlmZ d dlmZmZmZ d dlmZ d dl	m
Z
 G dd de
ZdS )    N)Path)IteratorPatternUnion)Document)
BaseLoaderc                   @   s   e Zd ZU dZedejejB Ze	e
d< 	 		ddeeef dedefd	d
ZdedefddZdedefddZdedefddZdee fddZdS )AcreomLoaderz%Load `acreom` vault from a directory.z^---\n(.*?)\n---\nFRONT_MATTER_REGEXUTF-8Tpathencodingcollect_metadatac                 C   s   || _ 	 || _	 || _dS )zInitialize the loader.N)	file_pathr   r   )selfr   r   r    r   b/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/document_loaders/acreom.py__init__   s   zAcreomLoader.__init__contentreturnc                 C   sf   | j si S | j|}i }|r1|dd}|D ]}d|v r0|dd\}}| || < qq|S )zEParse front matter metadata from the content and return it as a dict.   
:)r   r	   searchgroupsplitstrip)r   r   matchfront_matterlineslinekeyvaluer   r   r   _parse_front_matter    s   z AcreomLoader._parse_front_matterc                 C   s   | j s|S | jd|S )z4Remove front matter metadata from the given content. )r   r	   subr   r   r   r   r   _remove_front_matter1   s   z!AcreomLoader._remove_front_matterc                 C   s.   t dd|}t dd|}t dd|}|S )Nz\s*-\s\[\s\]\s.*|\s*\[\s\]\s.*r#   #z\[\[.*?\]\])rer$   r%   r   r   r   _process_acreom_content7   s   z$AcreomLoader._process_acreom_contentc              	   c   s    t t| jd}|D ]?}t|| jd}| }W d    n1 s%w   Y  | |}| |}| 	|}t
|jt
|d|}t||dV  qd S )Nz**/*.md)r   )sourcer   )Zpage_contentmetadata)listr   r   globopenr   readr"   r&   r)   strnamer   )r   Zpspftextr   r+   r   r   r   	lazy_load?   s    



zAcreomLoader.lazy_loadN)r
   T)__name__
__module____qualname____doc__r(   compile	MULTILINEDOTALLr	   r   __annotations__r   r0   r   boolr   dictr"   r&   r)   r   r   r5   r   r   r   r   r   
   s&   
 

r   )r(   pathlibr   typingr   r   r   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser   r   r   r   r   r   <module>   s    