o
    Zh\                     @  sn   d dl mZ d dlZd dlmZmZmZmZmZ d dl	m
Z
mZ G dd deZdddZG dd deZdS )    )annotationsN)AnyListLiteralOptionalUnion)LanguageTextSplitterc                      s0   e Zd ZdZ	dd fddZdddZ  ZS )CharacterTextSplitterz(Splitting text that looks at characters.

F	separatorstris_separator_regexboolkwargsr   returnNonec                   s"   t  jdi | || _|| _dS )Create a new TextSplitter.N )super__init__
_separator_is_separator_regex)selfr   r   r   	__class__r   Y/var/www/html/lang_env/lib/python3.10/site-packages/langchain_text_splitters/character.pyr      s   
zCharacterTextSplitter.__init__text	List[str]c                 C  sB   | j r| jnt| j}t||| j}| jrdn| j}| ||S )&Split incoming text and return chunks. )r   r   reescape_split_text_with_regex_keep_separator_merge_splits)r   r   r   splitsr   r   r   r   
split_text   s
   z CharacterTextSplitter.split_text)r   F)r   r   r   r   r   r   r   r   r   r   r   r   )__name__
__module____qualname____doc__r   r'   __classcell__r   r   r   r   r
   	   s
    r
   r   r   r   keep_separator$Union[bool, Literal['start', 'end']]r   r   c                   s   |r\|rUt d| d|  |dkr# fddtdt d dD n fd	dtdt dD }t d dkrB| d
d  7 }|dkrM| d
 g n d g| }nt || }nt| }dd |D S )N()endc                       g | ]} |  |d    qS    r   .0iZ_splitsr   r   
<listcomp>(        z*_split_text_with_regex.<locals>.<listcomp>r   r5      c                   r3   r4   r   r6   r9   r   r   r:   *   r;   c                 S  s   g | ]}|d kr|qS )r    r   )r7   sr   r   r   r:   7   s    )r!   splitrangelenlist)r   r   r.   r&   r   r9   r   r#      s    "r#   c                      sZ   e Zd ZdZ			dd fddZd ddZd!ddZed"ddZe	d#ddZ
  ZS )$RecursiveCharacterTextSplitterzSplitting text by recursively look at characters.

    Recursively tries to split by different characters to find one
    that works.
    NTF
separatorsOptional[List[str]]r.   r/   r   r   r   r   r   r   c                   s.   t  jdd|i| |pg d| _|| _dS )r   r.   )r   
 r    Nr   )r   r   _separatorsr   )r   rD   r.   r   r   r   r   r   r   A   s   
z'RecursiveCharacterTextSplitter.__init__r   r   r   c                 C  s&  g }|d }g }t |D ](\}}| jr|nt|}|dkr"|} nt||r4|}||d d } nq| jr:|nt|}t||| j}	g }
| jrMdn|}|	D ]2}| || jk ra|
	| qQ|
rp| 
|
|}|| g }
|sx|	| qQ| ||}|| qQ|
r| 
|
|}|| |S )r   r=   r    r5   N)	enumerater   r!   r"   searchr#   r$   Z_length_function_chunk_sizeappendr%   extend_split_text)r   r   rD   Zfinal_chunksr   Znew_separatorsr8   Z_sr   r&   Z_good_splitsr>   Zmerged_textZ
other_infor   r   r   rN   M   s@   

z*RecursiveCharacterTextSplitter._split_textc                 C  s   |  || jS )zSplit the input text into smaller chunks based on predefined separators.

        Args:
            text (str): The input text to be split.

        Returns:
            List[str]: A list of text chunks obtained after splitting.
        )rN   rH   )r   r   r   r   r   r'   u   s   	z)RecursiveCharacterTextSplitter.split_textlanguager   c                 K  s   |  |}| d|dd|S )a  Return an instance of this class based on a specific language.

        This method initializes the text splitter with language-specific separators.

        Args:
            language (Language): The language to configure the text splitter for.
            **kwargs (Any): Additional keyword arguments to customize the splitter.

        Returns:
            RecursiveCharacterTextSplitter: An instance of the text splitter configured
            for the specified language.
        T)rD   r   Nr   )get_separators_for_language)clsrO   r   rD   r   r   r   from_language   s   
z,RecursiveCharacterTextSplitter.from_languagec                 C  s  | t jks
| t jkrg dS | t jkrg dS | t jkr g dS | t jkr)g dS | t jkr2g dS | t jkr;g dS | t jkrDg dS | t j	krMg dS | t j
krVg d	S | t jkr_g d
S | t jkrhg dS | t jkrqg dS | t jkrzg dS | t jkrg dS | t jkrg dS | t jkrg dS | t jkrg dS | t jkrg dS | t jkrg dS | t jkrg dS | t jkrg dS | t jkrg dS | t jkrg dS | t jkrg dS | t jv rtd|  dtd|  dtt  )a
  Retrieve a list of separators specific to the given language.

        Args:
            language (Language): The language for which to get the separators.

        Returns:
            List[str]: A list of separators appropriate for the specified language.
        )
class z
void z
int z
float z
double 
if 
for 
while 
switch 
case r   rF   rG   r    )
func 
var 
const 
type rT   rU   rW   rX   r   rF   rG   r    )rS   
public 
protected 	
private 
static rT   rU   rV   rW   rX   r   rF   rG   r    )rS   r]   r^   r_   z

internal z
companion z
fun 
val rZ   rT   rU   rV   z
when rX   
else r   rF   rG   r    )

function r[   
let rZ   rS   rT   rU   rV   rW   rX   	
default r   rF   rG   r    )
enum 
interface z
namespace r\   rS   rc   r[   rd   rZ   rT   rU   rV   rW   rX   re   r   rF   rG   r    )rc   rS   rT   	
foreach rV   
do rW   rX   r   rF   rG   r    )
z	
message z	
service rf   z
option 
import z
syntax r   rF   rG   r    )rS   
def z
	def r   rF   rG   r    )z
=+
z
-+
z
\*+
z

.. *

r   rF   rG   r    )rk   rS   rT   
unless rV   rU   ri   z
begin z
rescue r   rF   rG   r    )rk   z
defp z
defmodule z
defprotocol z

defmacro z
defmacrop rT   rl   rV   rX   z
cond z
with rU   ri   r   rF   rG   r    )z
fn r[   rd   rT   rV   rU   z
loop 
match r[   r   rF   rG   r    )rS   z
object rk   ra   rZ   rT   rU   rV   rm   rX   r   rF   rG   r    )rY   rS   
struct rf   rT   rU   rV   ri   rW   rX   r   rF   rG   r    )	z
#{1,6} z```
z	
\*\*\*+
z
---+
z
___+
r   rF   rG   r    )z
\\chapter{z
\\section{z
\\subsection{z
\\subsubsection{z
\\begin{enumerate}z
\\begin{itemize}z
\\begin{description}z
\\begin{list}z
\\begin{quote}z
\\begin{quotation}z
\\begin{verse}z
\\begin{verbatim}z
\\begin{align}z$$$rG   r    )z<bodyz<divz<pz<brz<liz<h1z<h2z<h3z<h4z<h5z<h6z<spanz<tablez<trz<tdz<thz<ulz<olz<headerz<footerz<navz<headz<stylez<scriptz<metaz<titler    )rg   rf   z
implements z

delegate 
event rS   z

abstract r]   r^   r_   r`   z
return rT   z

continue rU   rh   rV   rW   z
break rX   rb   
try z
throw 	
finally 
catch r   rF   rG   r    )z
pragma z
using z

contract rg   z	
library z
constructor r\   rc   rp   z

modifier z
error rn   rf   rT   rU   rV   z

do while z

assembly r   rF   rG   r    )z
IDENTIFICATION DIVISION.z
ENVIRONMENT DIVISION.z
DATA DIVISION.z
PROCEDURE DIVISION.z
WORKING-STORAGE SECTION.z
LINKAGE SECTION.z
FILE SECTION.z
INPUT-OUTPUT SECTION.z
OPEN z
CLOSE z
READ z
WRITE z
IF z
ELSE z
MOVE z	
PERFORM z
UNTIL z	
VARYING z
ACCEPT z	
DISPLAY z

STOP RUN.rF   rG   r    )
z
local rc   rT   rU   rV   z
repeat r   rF   rG   r    )z	
main :: z
main = rd   z
in ri   z
where 
:: z
= 
data z	
newtype r\   rt   z
module rj   z
qualified z
import qualified rS   z

instance rX   z
| ru   z
= {z
, r   rF   rG   r    )rc   z
param rT   rh   rU   rV   rW   rS   rq   rs   rr   r   rF   rG   r    z	Language z is not implemented yet!z& is not supported! Please choose from )r   CZCPPZGOZJAVAZKOTLINZJSZTSPHPPROTOPYTHONZRSTZRUBYZELIXIRZRUSTZSCALAZSWIFTMARKDOWNZLATEXHTMLZCSHARPZSOLCOBOLZLUAZHASKELLZ
POWERSHELL_value2member_map_
ValueErrorrB   )rO   r   r   r   rP      sn   



















$



&
z:RecursiveCharacterTextSplitter.get_separators_for_language)NTF)
rD   rE   r.   r/   r   r   r   r   r   r   )r   r   rD   r   r   r   r(   )rO   r   r   r   r   rC   )rO   r   r   r   )r)   r*   r+   r,   r   rN   r'   classmethodrR   staticmethodrP   r-   r   r   r   r   rC   :   s    

(rC   )r   r   r   r   r.   r/   r   r   )
__future__r   r!   typingr   r   r   r   r   Zlangchain_text_splitters.baser   r	   r
   r#   rC   r   r   r   r   <module>   s    
