o
    Zh<                     @  s  d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZmZmZmZmZmZmZ d dlmZ d dlmZ d dlmZ erXd dlmZ eeZ	 G d	d
 d
eZ	 G dd dZ 	 G dd deZ!G dd deZ"dS )    )annotationsN)
HTMLParser)TYPE_CHECKINGAnyDictListOptionalTupleUnion)
BaseLoader)Document)TextSplitter)
Connectionc                      s@   e Zd ZdZd fddZdd
dZdddZdddZ  ZS )ParseOracleDocMetadatazParse Oracle doc metadata...returnNonec                   s"   t    |   d| _i | _d S )NF)super__init__resetmatchmetadataself	__class__ d/var/www/html/lang_env/lib/python3.10/site-packages/langchain_community/document_loaders/oracleai.pyr   %   s   

zParseOracleDocMetadata.__init__tagstrattrsList[Tuple[str, Optional[str]]]c                 C  sV   |dkr d}|D ]\}}|dkr|}|dkr|r|| j |< qd S |dkr)d| _d S d S )Nmeta namecontenttitleT)r   r   )r   r   r   entryr#   valuer   r   r   handle_starttag+   s   

z&ParseOracleDocMetadata.handle_starttagdatac                 C  s   | j r|| jd< d| _ d S d S )Nr%   F)r   r   )r   r)   r   r   r   handle_data7   s   

z"ParseOracleDocMetadata.handle_dataDict[str, Any]c                 C  s   | j S N)r   r   r   r   r   get_metadata<   s   z#ParseOracleDocMetadata.get_metadata)r   r   )r   r   r   r    r   r   )r)   r   r   r   )r   r+   )	__name__
__module____qualname____doc__r   r(   r*   r-   __classcell__r   r   r   r   r   "   s    

r   c                   @  s.   e Zd ZdZeddddZedddZdS )OracleDocReaderzRead a fileNinput_stringUnion[str, None]r   r   c           	      C  s   d}d}| d u rd tjddd} tt }td|}t| 	 
 }|d | }tdtd}|| | }| }||}|d | }|S )N       r"   Z>abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789   )kz>I)joinrandomchoicesinttimestructpackhashlibsha256encodedigestgetrandbitshexzfill)	r4   Z
out_lengthZhash_len	timestampZtimestamp_binZhashval_binZcounter_binZ	object_idZobject_id_hexr   r   r   generate_object_idF   s*   z"OracleDocReader.generate_object_idconnr   	file_pathparamsdictUnion[Document, None]c              
   C  s  i }zddl }W n ty } ztd|d}~ww zd|j_|  }t|d}| }W d   n1 s7w   Y  |du rGtd|dW S ||j	}	||j	}
|j
d|t||	|
d	 |  |	du rki }nt|	 }|d
s{|drt }|| | }t| jd | }||d< ||d< |
du rtd|dW S tt|
 |dW S  ty } ztd|  td|  |  W Y d}~dS d}~ww )zRead a file using OracleReader
        Args:
            conn: Oracle Connection,
            file_path: Oracle Directory,
            params: ONNX file name.
        Returns:
            Plain text and metadata as Langchain Document.
        r   NIUnable to import oracledb, please install with `pip install -U oracledb`.Frbr"   Zpage_contentr   a  
                declare
                    input blob;
                begin
                    input := :blob;
                    :mdata := dbms_vector_chain.utl_to_text(input, json(:pref));
                    :text := dbms_vector_chain.utl_to_text(input);
                end;)ZblobZprefmdatatext<!DOCTYPE html<HTML>$_oid_fileAn exception occurred :: zSkip processing )oracledbImportErrordefaults
fetch_lobscursoropenreadr   varZDB_TYPE_CLOBexecutejsondumpscloser   getvalue
startswithr   feedr-   r3   rI   username	Exceptionloggerinfo)rJ   rK   rL   r   rZ   er^   fr)   rR   rS   Zdoc_datapdoc_idexr   r   r   	read_filei   sf   

zOracleDocReader.read_filer,   )r4   r5   r   r   )rJ   r   rK   r   rL   rM   r   rN   )r.   r/   r0   r1   staticmethodrI   rr   r   r   r   r   r3   C   s    "r3   c                      s,   e Zd ZdZd fdd	ZdddZ  ZS )OracleDocLoaderzwRead documents using OracleDocLoader
    Args:
        conn: Oracle Connection,
        params: Loader parameters.
    rJ   r   rL   r+   kwargsr   c                   s.   || _ tt|| _t jdi | d S )Nr   )rJ   rc   loadsrd   rL   r   r   )r   rJ   rL   ru   r   r   r   r      s   zOracleDocLoader.__init__r   List[Document]c              
   C  s  zddl }W n ty } ztd|d}~ww d}g }i }ddi}z| jdurM| jd| _| jd| _| jd| _| jd	| _| jd
| _nt	dd|j
_| jrmt| j| j|}|du rh|W S || | jrd}t| jD ]/}	tj| j|	}
tj|
rt| j|
|}|du r|d }td| d qx|| qx| jr#zW| jdu s| jdu rt	d| j }| jd| _| jdurt| jdkrt	dd}|j|| j | j d | }|D ]}|d | jv r|d dvrt	dqd| _| jdur| jD ]}| jd | | _qd| j d t| d | j d | j d | j d | j d  }|| |D ]}i }|du rtt | jj!d! | j d! | j d! | j }||d"< |t"d#|d$ qF|d durt#|d }|$d%s|$d&rt% }|&| |' }t | jj!d! | j d! | j d! | j d! t#|d'  }||d"< |d' |d(< | jdurt| j}t(d|D ]}||d'  || j| < q|d du r|t"d#|d$ qF|t"t#|d |d$ qFW |W S  t	y" } ztd)|  t)*  |+   d}~ww |W S  t	y? } ztd)|  t)*   d}~ww )*z,Load data into LangChain Document objects...r   NrO   Z	plaintextfalsefiledirowner	tablenamecolnamezMissing loader parametersF   zTotal skipped: 
z%Missing owner or column name or both.
mdata_cols   z?Exceeds the max number of columns you can request for metadata.zgselect column_name, data_type from all_tab_columns where owner = :ownername and table_name = :tablename)Z	ownernamer|   )NUMBERZBINARY_DOUBLEZBINARY_FLOATLONGZDATE	TIMESTAMPZVARCHAR2zDThe datatype for the column requested for metadata is not supported.z, rowidz, z'select dbms_vector_chain.utl_to_text(t.z, json('z+')) mdata, dbms_vector_chain.utl_to_text(t.z) textz from .z trV   rW   r"   rQ   rT   rU      Z_rowidrY   ),rZ   r[   rL   getry   rz   r{   r|   r}   rj   r\   r]   r3   rr   rJ   appendoslistdirpathr:   isfilerk   rl   r^   r   lenrb   upperZfetchallZmdata_cols_sqlrc   rd   rI   ri   r   r   rg   r   rh   r-   range	traceback	print_excre   )r   rZ   rm   Zncolsresultsr   Zm_paramsdocZ
skip_count	file_namerK   r^   sqlrowsrowcolrp   r)   ro   irq   r   r   r   load   s\  



	
	


	



>zOracleDocLoader.load)rJ   r   rL   r+   ru   r   )r   rw   )r.   r/   r0   r1   r   r   r2   r   r   r   r   rt      s    rt   c                      s,   e Zd ZdZd fd
dZdddZ  ZS )OracleTextSplitterz$Splitting text using Oracle chunker.rJ   r   rL   r+   ru   r   r   r   c              
     s   || _ || _t jdi | z%ddl}zddl}W n ty+ } ztd|d}~ww || _|| _W dS  ty?   tdw )zInitialize.r   NrO   zjoracledb or json or both are not installed. Please install them. Recommendations: `pip install oracledb`. r   )	rJ   rL   r   r   rc   rZ   r[   	_oracledb_json)r   rJ   rL   ru   rc   rZ   rm   r   r   r   r     s,   zOracleTextSplitter.__init__rS   r   	List[str]c           	   
   C  s   zddl }W n ty } ztd|d}~ww g }z;d| jj_| j }|j|jd |j	d|| j
| jd 	 | }|du rF	 |W S | j
|d }||d	  q: tyn } ztd
|  t   d}~ww )z&Split incoming text and return chunks.r   NrO   F)r$   zUselect t.column_value from dbms_vector_chain.utl_to_chunks(:content, json(:params)) t)r$   rL   TZ
chunk_datarY   )rZ   r[   r   r\   r]   rJ   r^   ZsetinputsizesZCLOBrb   r   rd   rL   Zfetchonerv   r   rj   rk   rl   r   r   )	r   rS   rZ   rm   Zsplitsr^   r   drq   r   r   r   
split_text  sD   

	zOracleTextSplitter.split_text)rJ   r   rL   r+   ru   r   r   r   )rS   r   r   r   )r.   r/   r0   r1   r   r   r2   r   r   r   r   r   |  s    r   )#
__future__r   rA   rc   loggingr   r;   r?   r>   r   html.parserr   typingr   r   r   r   r   r	   r
   Zlangchain_core.document_loadersr   Zlangchain_core.documentsr   Zlangchain_text_splittersr   rZ   r   	getLoggerr.   rk   r   r3   rt   r   r   r   r   r   <module>   s2   $
r E