o
    Zh(                     @   s@   d dl Z d dlZd dlmZmZ d dlmZ G dd deZdS )    N)RuleText)partialc                   @   s$  e Zd ZddZeejZdZ	dZ
eddZeddZed	d
Zedd
Zedd
ZdZdZdZdZdZdZdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Z	#d?d$d%Zd?d&d'Z d?d(d)Z!d*d+ Z"d,d- Z#d.d/ Z$d0d1 Z%d2d3 Z&d4d5 Z'd6d7 Z(d8d9 Z)d:d; Z*d@d<d=Z+d>S )AListItemReplacerzUi ii iii iv v vi vii viii ix x xi xii xiii xiv x xi xii xiii xv xvi xvii xviii xix xx z7(?<=^)[a-z](?=\.)|(?<=\A)[a-z](?=\.)|(?<=\s)[a-z](?=\.)zN(?<=\()[a-z]+(?=\))|(?<=^)[a-z]+(?=\))|(?<=\A)[a-z]+(?=\))|(?<=\s)[a-z]+(?=\))   ♨u   ∯   ☝ u   (?<=\S\S)\s(?=\S\s*\d+♨)u   (?<=\S\S)\s(?=\d{1,2}♨)u   (?<=\S\S)\s(?=\d{1,2}☝)u  \s\d{1,2}(?=\.\s)|^\d{1,2}(?=\.\s)|\s\d{1,2}(?=\.\))|^\d{1,2}(?=\.\))|(?<=\s\-)\d{1,2}(?=\.\s)|(?<=^\-)\d{1,2}(?=\.\s)|(?<=\s\⁃)\d{1,2}(?=\.\s)|(?<=^\⁃)\d{1,2}(?=\.\s)|(?<=s\-)\d{1,2}(?=\.\))|(?<=^\-)\d{1,2}(?=\.\))|(?<=\s\⁃)\d{1,2}(?=\.\))|(?<=^\⁃)\d{1,2}(?=\.\))u  (?<=\s)\d{1,2}\.(?=\s)|^\d{1,2}\.(?=\s)|(?<=\s)\d{1,2}\.(?=\))|^\d{1,2}\.(?=\))|(?<=\s\-)\d{1,2}\.(?=\s)|(?<=^\-)\d{1,2}\.(?=\s)|(?<=\s\⁃)\d{1,2}\.(?=\s)|(?<=^\⁃)\d{1,2}\.(?=\s)|(?<=\s\-)\d{1,2}\.(?=\))|(?<=^\-)\d{1,2}\.(?=\))|(?<=\s\⁃)\d{1,2}\.(?=\))|(?<=^\⁃)\d{1,2}\.(?=\))z\d{1,2}(?=\)\s)zI\([a-z]+(?=\))|(?<=^)[a-z]+(?=\))|(?<=\A)[a-z]+(?=\))|(?<=\s)[a-z]+(?=\))z+(?<=^)[a-z]\.|(?<=\A)[a-z]\.|(?<=\s)[a-z]\.zD\(((?=[mdclxvi])m*(c[md]|d?c*)(x[cl]|l?x*)(i[xv]|v?i*))\)(?=\s[A-Z])c                 C   s
   || _ d S N)textselfr    r   P/var/www/html/lang_env/lib/python3.10/site-packages/pysbd/lists_item_replacer.py__init__7   s   
zListItemReplacer.__init__c                 C   s&   |    |   |   |   | jS r   )format_alphabetical_listsformat_roman_numeral_lists!format_numbered_list_with_periods format_numbered_list_with_parensr   r   r   r   r   add_line_break:   s
   zListItemReplacer.add_line_breakc                 C   s   t | jd| j}|S )Nu   &✂&\1&⌬&)resubROMAN_NUMERALS_IN_PARENTHESESr   r   r   r   r   replace_parensA   s   zListItemReplacer.replace_parensc                 C   (   |    |   t| j| j| _d S r   )replace_parens_in_numbered_list-add_line_breaks_for_numbered_list_with_parensr   r   applyListMarkerRuler   r   r   r   r   F      z1ListItemReplacer.format_numbered_list_with_parensc                 C   s   | j | j| jddd d S )Nr   T)strip)
scan_listsNUMBERED_LIST_REGEX_1NUMBERED_LIST_REGEX_2r   r   r   r    replace_periods_in_numbered_listK   s   
z1ListItemReplacer.replace_periods_in_numbered_listc                 C   r   r   )r&   .add_line_breaks_for_numbered_list_with_periodsr   r   r   SubstituteListPeriodRuler   r   r   r   r   O   r!   z2ListItemReplacer.format_numbered_list_with_periodsc                 C   "   | j dd| _| jdd| _| jS )NFroman_numeral2add_line_breaks_for_alphabetical_list_with_periodstxt1add_line_breaks_for_alphabetical_list_with_parensr   r   r   r   r   T      z*ListItemReplacer.format_alphabetical_listsc                 C   r)   )NTr*   r,   r   r   r   r   r   [   r0   z+ListItemReplacer.format_roman_numeral_listsFc                 C   s   | j | j|d}|S )Nr*   )iterate_alphabet_arrayALPHABETICAL_LIST_WITH_PERIODSr   r+   r.   r   r   r   r-   b   s
   zCListItemReplacer.add_line_breaks_for_alphabetical_list_with_periodsc                 C   s   | j | jd|d}|S )NT)parensr+   )r1   ALPHABETICAL_LIST_WITH_PARENSr3   r   r   r   r/   i   s   zBListItemReplacer.add_line_breaks_for_alphabetical_list_with_parensc                 C   s   t || j}ttt|}t|D ]M\}}|t|d k r1|d ||d  kr1| |||| q|dkr_|d ||d  ksW|dkrK||d  dksW|dkr_||d  dkr_| |||| qd S )N   r   	   )	r   findallr   listmapint	enumeratelensubstitute_found_list_items)r   Zregex1Zregex2replacementr"   
list_arrayinditemr   r   r   r#   p   s   $zListItemReplacer.scan_listsc              	      s2   d fdd	}t |t| |d| j| _d S )NFr   c                    sR   |   } |rt|  } t| dkr| n| d}t |kr%d S t| S )Nr6   z.])z{}{})groupstrr"   r=   format)matchvalr"   replZchomped_matcheachr?   r   r   replace_item   s   zBListItemReplacer.substitute_found_list_items.<locals>.replace_item)rG   r"   rH   )NFr   )r   r   r   r   )r   regexrJ   r"   r?   rK   r   rI   r   r>   ~   s   

z,ListItemReplacer.substitute_found_list_itemsc                 C   sN   d| j v r!td| j s#td| j s%t| j | j| j| _ d S d S d S d S )Nr   u   ♨.+(
|).+♨u   for\s\d{1,2}♨\s[a-z])r   r   searchr   r   SpaceBetweenListItemsFirstRuleSpaceBetweenListItemsSecondRuler   r   r   r   r'      s   
z?ListItemReplacer.add_line_breaks_for_numbered_list_with_periodsc                 C   s(   |  | j| jd |  | j| jd d S )Nr   )r#   NUMBERED_LIST_PARENS_REGEXr   r   r   r   r      s   
z0ListItemReplacer.replace_parens_in_numbered_listc                 C   s8   d| j v rtd| j st| j | j| _ d S d S d S )Nr   u   ☝.+
.+☝|☝.+.+☝)r   r   rM   r   r   SpaceBetweenListItemsThirdRuler   r   r   r   r      s
   

z>ListItemReplacer.add_line_breaks_for_numbered_list_with_parensc                 C   .   ddd}t j| jt||d| jt jd}|S )uc   
        Input: 'a. ffegnog b. fgegkl c.'
        Output: a∯ ffegnog b∯ fgegkl c∯
        Nc                 S   s(   |   } | d}||krd|S | S )N.u   {}∯rC   r"   rE   )rF   rG   Zmatch_wo_periodr   r   r   replace_letter_period   s
   

zEListItemReplacer.replace_alphabet_list.<locals>.replace_letter_periodrG   flagsr   )r   r   +ALPHABETICAL_LIST_LETTERS_AND_PERIODS_REGEXr   r   
IGNORECASE)r   arU   r.   r   r   r   replace_alphabet_list   s   

z&ListItemReplacer.replace_alphabet_listc                 C   rR   )ue   
        Input: "a) ffegnog (b) fgegkl c)"
        Output: "a) ffegnog &✂&b) fgegkl c)"
        Nc                 S   sF   |   } d| v r| d}||krd|S | S | |kr!d| S | S )N(u   &✂&{}z{}rT   )rF   rG   Zmatch_wo_parenr   r   r   replace_alphabet_paren   s   


zMListItemReplacer.replace_alphabet_list_parens.<locals>.replace_alphabet_parenrV   rW   r   )r   r   'EXTRACT_ALPHABETICAL_LIST_LETTERS_REGEXr   r   rZ   )r   r[   r^   r.   r   r   r   replace_alphabet_list_parens   s   

z-ListItemReplacer.replace_alphabet_list_parensc                 C   s    |r	|  |}|S | |}|S r   )r`   r\   )r   r[   r4   r   r   r   replace_correct_alphabet_list   s
   

z.ListItemReplacer.replace_correct_alphabet_listc                 C   sp   t |dkt |dk@ s||d  |vs||vr| jS t|||d  || dkr0| jS | ||}|S Nr   r6   )r=   r   absindexra   r   r[   ialphabetr@   r4   resultr   r   r   last_array_item_replacement   s   $z,ListItemReplacer.last_array_item_replacementc                 C   s   t |dkt |dk@ s ||d  |vs ||vs ||d  |vr#| jS |||d  || dkrHt|||d  || dkrH| jS | ||}|S rb   )r=   r   rd   rc   ra   re   r   r   r   other_items_replacement   s    $z(ListItemReplacer.other_items_replacementc                    s   t || j}|r| jn| j  fdd|D }t|D ]!\}}|t|d kr3| || ||| _q| || ||| _q| jS )Nc                    s   g | ]}| v r|qS r   r   ).0rf   rg   r   r   
<listcomp>   s    z;ListItemReplacer.iterate_alphabet_array.<locals>.<listcomp>r6   )	r   r8   r   ROMAN_NUMERALSLATIN_NUMERALSr<   r=   ri   rj   )r   rL   r4   r+   r@   rA   rJ   r   rl   r   r1      s   
z'ListItemReplacer.iterate_alphabet_arrayN)F)FF),__name__
__module____qualname__splitrn   r9   stringascii_lowercasero   r2   r5   r   r(   r    rN   rO   rQ   r$   r%   rP   r_   rY   r   r   r   r   r   r&   r   r   r   r-   r/   r#   r>   r'   r   r   r\   r`   ra   ri   rj   r1   r   r   r   r   r      sL    









	r   )	rt   r   Zpysbd.utilsr   r   	functoolsr   objectr   r   r   r   r   <module>   s
   