o
    Zh                     @  s<   d dl mZ d dlmZmZ d dlmZ G dd deZdS )    )annotations)AnyList)TextSplitterc                      s8   e Zd ZdZ		dddd fddZdddZ  ZS )NLTKTextSplitterz"Splitting text using NLTK package.

englishF)use_span_tokenize	separatorstrlanguager	   boolkwargsr   returnNonec                  s   t  jdi | || _|| _|| _| jr| jdkrtdz| jr1ddlm} || j| _W dS ddlm	} || _W dS  t
yG   t
dw )	zInitialize the NLTK splitter. z6When use_span_tokenize is True, separator should be ''r   )_get_punkt_tokenizer)sent_tokenizezANLTK is not installed, please install it with `pip install nltk`.N )super__init__
_separator	_language_use_span_tokenize
ValueErrorZnltk.tokenizer   
_tokenizerr   ImportError)selfr
   r   r	   r   r   r   	__class__r   T/var/www/html/lang_env/lib/python3.10/site-packages/langchain_text_splitters/nltk.pyr      s"   	zNLTKTextSplitter.__init__text	List[str]c           	      C  s   | j r=t| j|}g }t|D ]*\}\}}|dkr0||d  d }||| |||  }n||| }|| qn| j|| jd}| || jS )z&Split incoming text and return chunks.r      )r   )	r   listr   Zspan_tokenize	enumerateappendr   Z_merge_splitsr   )	r   r!   spansZsplitsistartendZprev_endZsentencer   r   r    
split_text(   s   zNLTKTextSplitter.split_text)r   r   )
r
   r   r   r   r	   r   r   r   r   r   )r!   r   r   r"   )__name__
__module____qualname____doc__r   r+   __classcell__r   r   r   r    r      s    r   N)
__future__r   typingr   r   Zlangchain_text_splitters.baser   r   r   r   r   r    <module>   s    