o
    Zh                     @   sH   d dl Z d dlmZmZmZmZmZ d dlmZm	Z	 G dd deZ
dS )    N)AnyListOptionalSequenceUnion)BaseDocumentTransformerDocumentc                   @   sz   e Zd ZdZ				ddeeeee f  deeeee f  deded	e	d
dfddZ
dee d	e	d
ee fddZdS )MarkdownifyTransformeram  Converts HTML documents to Markdown format with customizable options for handling
    links, images, other tags and heading styles using the markdownify library.

    Arguments:
        strip: A list of tags to strip. This option can't be used with the convert option.
        convert: A list of tags to convert. This option can't be used with the strip option.
        autolinks: A boolean indicating whether the "automatic link" style should be used when a a tag's contents match its href. Defaults to True.
        heading_style: Defines how headings should be converted. Accepted values are ATX, ATX_CLOSED, SETEXT, and UNDERLINED (which is an alias for SETEXT). Defaults to ATX.
        kwargs: Additional options to pass to markdownify.

    Example:
        .. code-block:: python
            from langchain_community.document_transformers import MarkdownifyTransformer
            markdownify = MarkdownifyTransformer()
            docs_transform = markdownify.transform_documents(docs)

    More configuration options can be found at the markdownify GitHub page:
    https://github.com/matthewwithanm/python-markdownify
    NTATXstripconvert	autolinksheading_stylekwargsreturnc                 K   sB   t |tr|gn|| _t |tr|gn|| _|| _|| _|| _d S )N)
isinstancestrr   r   r   r   additional_options)selfr   r   r   r   r    r   l/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/document_transformers/markdownify.py__init__   s
   
zMarkdownifyTransformer.__init__	documentsc              	   K   s   zddl m } W n ty   tdw g }|D ],}|d
|j| j| j| j| jd| jdd }t	
dd|}|t||jd	 q|S )Nr   )markdownifyz`markdownify package not found, please 
                install it with `pip install markdownify`)htmlr   r   r   r        z\n\s*\nz

)metadatar   )r   ImportErrorZpage_contentr   r   r   r   r   replaceresubappendr   r   )r   r   r   r   Zconverted_documentsdocZmarkdown_contentZcleaned_markdownr   r   r   transform_documents*   s4   z*MarkdownifyTransformer.transform_documents)NNTr
   )__name__
__module____qualname____doc__r   r   r   r   boolr   r   r   r   r$   r   r   r   r   r	      s4    
r	   )r    typingr   r   r   r   r   Zlangchain_core.documentsr   r   r	   r   r   r   r   <module>   s    