o
    ifC                     @   sn   d dl Z d dlmZ d dlmZ d dlmZmZ d dlm	Z	 d dl
mZ d dlmZmZ G dd	 d	eeZdS )
    N)AbbreviationReplacer)BetweenPunctuation)CommonStandard)replace_punctuation)	Processor)TextRulec                   @   sd   e Zd ZdZG dd dejZG dd deZG dd dejZG dd	 d	e	Z	G d
d de
Z
dS )Deutschdec                   @   s0   e Zd ZeddZeddZejjeeg ZdS )zDeutsch.Numbersz%(?<=\s\d)\.(?=\s)|(?<=\s\d\d)\.(?=\s)   ∯z#(?<=-\d)\.(?=\s)|(?<=-\d\d)\.(?=\s)N)	__name__
__module____qualname__r	   NumberPeriodSpaceRuleNegativeNumberPeriodSpaceRuler   NumbersAll r   r   K/var/www/html/corbot_env/lib/python3.10/site-packages/pysbd/lang/deutsch.pyr      s    

r   c                       s.   e Zd Zd fdd	Zdd Zdd Z  ZS )	zDeutsch.ProcessorFc                    s   t  ||| d S Nsuper__init__)selftextlang	char_span	__class__r   r   r      s   zDeutsch.Processor.__init__c                 C   s&   t | jj| jjj | _|   | jS r   )r   r   applyr   r   r   replace_period_in_deutsch_dates)r   r   r   r   replace_numbers   s   z!Deutsch.Processor.replace_numbersc                 C   s0   g d}|D ]}t dj|dd| j| _qd S )N)JanuarFebruaru   MärzAprilMaiJuniJuliAugust	SeptemberOktoberNovemberDezemberz(?<=\d)\.(?=\s*{month}))monthr   )resubformatr   )r   MONTHSr.   r   r   r   r!   "   s   z1Deutsch.Processor.replace_period_in_deutsch_dates)F)r   r   r   r   r"   r!   __classcell__r   r   r   r   r      s    r   c                   @   s    e Zd Zg dZg Zg dZdS )zDeutsch.Abbreviation)   Ä   äadjadmadvartasstzb.azb.sbartbldgbrigbrosbsebuchstbzglbzwu   c.-à-dcacaptchrcmdrcocolcomdrconcorpcplzd.hzd.jdergldgldkrzdr ensetczev evtlffzg.g.azg.ugenggfgovhonhospzi.fzi.h.viiiiiinspivixjunzk.ozkath lfdltltdzm.emajmedmessrsmiomllemmmmemrmrdmrsmsmsgrmwstnonosnru   o.äopordpfcphppprofpvtreprepsresrevrtzs.p.asasensenssfcsgtsogsogensppsrststdzstr  suptsurgzu.a  zu.ezu.s.wzu.uu   u.äusfuswvvglviviiviiivsxxixiixiiixivxixxvxvixviixviiixxzz.bzz.tzz.zzz.ztztzztz
univ.-profzo.univ.-profzao.univ.profzass.profzhon.profz	univ.-dozzuniv.asszstud.ass
projektassassdiz	dipl.-ingmag)r9   rC   ro   rp   rq   rv   N)r   r   r   ABBREVIATIONSPREPOSITIVE_ABBREVIATIONSNUMBER_ABBREVIATIONSr   r   r   r   Abbreviation)   s    r   c                       s6   e Zd ZddZ fddZdd Zdd Z  ZS )	zDeutsch.AbbreviationReplaceruy   Am Auch Auf Bei Da Das Der Die Ein Eine Es Für Heute Ich Im In Ist Jetzt Mein Mit Nach So Und Warum Was Wenn Wer Wie Wir c                    s   t  || d S r   r   )r   r   r   r   r   r   r   3   s   z%Deutsch.AbbreviationReplacer.__init__c                 C   s~   t dd}t dd}t| jj| jjg| jjj||R  | _| | j| _| 	  t| jj| jj
j | _|  | _| jS )Nz(?<=\s[a-z])\.(?=\s)r   z(?<=^[a-z])\.(?=\s))r	   r   r   r    r   PossessiveAbbreviationRuleSingleLetterAbbreviationRulesr   "search_for_abbreviations_in_string"replace_multi_period_abbreviations	AmPmRules)replace_abbreviation_as_sentence_boundary)r   SingleLowerCaseLetterRule&SingleLowerCaseLetterAtStartOfLineRuler   r   r   replace6   s    




z$Deutsch.AbbreviationReplacer.replacec                 C   s   t dj|dd|}|S )Nz(?<={am})\.(?=\s))amr   )r/   r0   r1   )r   txtr   indexcharacter_arrayr   r   r   scan_for_replacementsH   s   z2Deutsch.AbbreviationReplacer.scan_for_replacements)	r   r   r   splitSENTENCE_STARTERSr   r   r   r3   r   r   r   r   r   .   s    r   c                       s$   e Zd Z fddZdd Z  ZS )zDeutsch.BetweenPunctuationc                    s   t  | d S r   r   )r   r   r   r   r   r   N   s   z#Deutsch.BetweenPunctuation.__init__c                 C   s8   d}d}d|v rt |t|S d|v rt |t|S |S )Nu.   ,,(?=(?P<tmp>[^“\\]+|\\{2}|\\.)*)(?P=tmp)“u/   „(?=(?P<tmp>[^“\\]+|\\{2}|\\.)*)(?P=tmp)“u   „z,,)r/   r0   r   )r   r   ,BETWEEN_UNCONVENTIONAL_DOUBLE_QUOTE_DE_REGEXBETWEEN_DOUBLE_QUOTES_DE_REGEXr   r   r   %sub_punctuation_between_double_quotesQ   s   z@Deutsch.BetweenPunctuation.sub_punctuation_between_double_quotes)r   r   r   r   r   r3   r   r   r   r   r   L   s    r   N)r   r   r   iso_coder   r   r   r   r   r   r   r   r   r   r   r
      s    	r
   )r/   pysbd.abbreviation_replacerr   pysbd.between_punctuationr   pysbd.lang.commonr   r   pysbd.punctuation_replacerr   pysbd.processorr   pysbd.utilsr   r	   r
   r   r   r   r   <module>   s   