o
    ifV	                     @   sT   d dl Z d dlZG dd deZG dd deZG dd deZG dd	 d	eZdS )
    Nc                   @   s   e Zd Zdd Zdd ZdS )Rulec                 C   s   || _ || _d S N)patternreplacement)selfr   r    r   D/var/www/html/corbot_env/lib/python3.10/site-packages/pysbd/utils.py__init__   s   
zRule.__init__c                 C   s   d | jj| j| jS )Nz&<{} pattern="{}" and replacement="{}">)format	__class____name__r   r   r   r   r   r   __repr__   s   zRule.__repr__N)r   
__module____qualname__r	   r   r   r   r   r   r      s    r   c                   @   s   e Zd ZdZdd ZdS )Textax  Extending str functionality to apply regex rules

    https://stackoverflow.com/questions/4698493/can-i-add-custom-methods-attributes-to-built-in-python-types

    Parameters
    ----------
    str : str
        string content

    Returns
    -------
    str
        input as it is if rule pattern doesnt match
        else replacing found pattern with replacement chars
    c                 G   s    |D ]}t |j|j| } q| S r   )resubr   r   )r   ruleseach_rr   r   r   apply!   s   z
Text.applyN)r   r   r   __doc__r   r   r   r   r   r      s    r   c                   @   s$   e Zd Zdd Zdd Zdd ZdS )TextSpanc                 C   s   || _ || _|| _dS )aV  
        Sentence text and its start & end character offsets within original text

        Parameters
        ----------
        sent : str
            Sentence text
        start : int
            start character offset of a sentence in original text
        end : int
            end character offset of a sentence in original text
        N)sentstartend)r   r   r   r   r   r   r   r	   )   s   
zTextSpan.__init__c                 C   s   d | jjt| j| j| jS )Nz!{0}(sent={1}, start={2}, end={3}))r
   r   r   reprr   r   r   r   r   r   r   r   :   s   zTextSpan.__repr__c                 C   s4   t | |jr| j|jko| j|jko| j|jkS d S r   )
isinstancer   r   r   r   )r   otherr   r   r   __eq__>   s   $zTextSpan.__eq__N)r   r   r   r	   r   r   r   r   r   r   r   '   s    r   c                   @   s"   e Zd ZdZdddZdd ZdS )	PySBDFactoryz.pysbd as a spacy component through entrypointsenc                 C   s   || _ tj|ddd| _d S )NFT)languageclean	char_span)nlppysbd	Segmenterseg)r   r%   r"   r   r   r   r	   F   s   zPySBDFactory.__init__c                 C   s>   | j |j}dd |D }|D ]}|j|v rdnd|_q|S )Nc                 S   s   g | ]}|j qS r   )r   ).0r   r   r   r   
<listcomp>M   s    z)PySBDFactory.__call__.<locals>.<listcomp>TF)r(   segmenttext_with_wsidxis_sent_start)r   docsents_char_spansstart_token_idstokenr   r   r   __call__K   s   zPySBDFactory.__call__N)r!   )r   r   r   r   r	   r3   r   r   r   r   r    C   s    
r    )r   r&   objectr   strr   r   r    r   r   r   r   <module>   s   