o
    if                     @   s8   d dl Z d dlmZ dd Zdd ZG dd deZdS )	    N)Textc                 C   2   d|  } t dj| dd| } | dd  } | S )N z(?<=\s{abbr})\.(?=(\s\d|\s+\())abbr   ∯   resubformatstriptxtr    r   T/var/www/html/corbot_env/lib/python3.10/site-packages/pysbd/abbreviation_replacer.pyreplace_pre_number_abbr      r   c                 C   r   )Nr   z(?<=\s{abbr})\.(?=(\s|:\d+))r   r   r   r	   r   r   r   r   replace_prepositive_abbr   r   r   c                   @   sD   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dS )AbbreviationReplacerc                 C   s   || _ || _d S )N)textlang)selfr   r   r   r   r   __init__   s   
zAbbreviationReplacer.__init__c                 C   s   t | jj| jj| jjg| jjjR  | _d}| jdD ]	}|| 	|7 }q|| _| 
  t | jj| jjj | _|  | _| jS )N T)r   r   applyr   PossessiveAbbreviationRuleKommanditgesellschaftRuleSingleLetterAbbreviationRulesAll
splitlines"search_for_abbreviations_in_string"replace_multi_period_abbreviations	AmPmRules)replace_abbreviation_as_sentence_boundary)r   abbr_handled_textliner   r   r   replace   s   


zAbbreviationReplacer.replacec                 C   s8   d dd | jD }d|}t|d| j| _| jS )N|c                 s   s    | ]}d  |V  qdS )z
(?=\s{}\s)N)r   ).0wordr   r   r   	<genexpr>-   s    zQAbbreviationReplacer.replace_abbreviation_as_sentence_boundary.<locals>.<genexpr>u@   (U∯S|U\.S|U∯K|E∯U|E\.U|U∯S∯A|U\.S\.A|I|i.v|I.V)∯({})z\1.)joinSENTENCE_STARTERSr   r
   r   r   )r   sent_startersregexr   r   r   r$   ,   s   
z>AbbreviationReplacer.replace_abbreviation_as_sentence_boundaryc                 C   s(   dd }t j| jj|| jt jd| _d S )Nc                 S   s    |   } ttdd| } | S )N.r   )groupr
   r   escape)matchr   r   r   mpa_replace3   s   zLAbbreviationReplacer.replace_multi_period_abbreviations.<locals>.mpa_replaceflags)r
   r   r   MULTI_PERIOD_ABBREVIATION_REGEXr   
IGNORECASE)r   r4   r   r   r   r"   2   s   z7AbbreviationReplacer.replace_multi_period_abbreviationsc                 C   s8   d| }t djt | dd|}|dd  }|S )Nr   zB(?<=\s{abbr})\.(?=((\.|\:|-|\?|,)|(\s([a-z]|I\s|I'm|I'll|\d|\())))r   r   r   )r
   r   r   r2   r   )r   r   r   r   r   r   replace_period_of_abbr?   s   z+AbbreviationReplacer.replace_period_of_abbrc           
      C   s   |  }| jjjD ]<}| }||vrq	tjd||tjd}|s#q	dt	t
| d }t||}t|D ]\}}	| ||	||}q8q	|S )Nz(?:^|\s|\r|\n){}r5   z(?<={z} ).{1})lowerr   AbbreviationABBREVIATIONSr   r
   findallr   r8   strr2   	enumeratescan_for_replacements)
r   r   loweredr   strippedabbrev_matchnext_word_start
char_arrayindr3   r   r   r   r!   N   s$   z7AbbreviationReplacer.search_for_abbreviations_in_stringc           	      C   s   z|| }W n t y   d}Y nw | jjj}| jjj}t| }|r,|  |v rP|  |v r;t	||}|S |  |v rJt
||}|S | ||}|S )Nr   )
IndexErrorr   r;   PREPOSITIVE_ABBREVIATIONSNUMBER_ABBREVIATIONSr>   isupperr   r:   r   r   r9   )	r   r   amrF   rE   charprepositivenumber_abbrupperr   r   r   r@   a   s"   



z*AbbreviationReplacer.scan_for_replacementsN)
__name__
__module____qualname__r   r'   r$   r"   r9   r!   r@   r   r   r   r   r      s    r   )r
   pysbd.utilsr   r   r   objectr   r   r   r   r   <module>   s
   		