o
    qfJ                     @   s|  d dl Z d dlZd dlmZmZmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlm Z  d dl!m"Z"m#Z#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+Z+d dl,m-Z- d dl.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7 d dl6m8Z8 d dl9m:Z: d dl;Z;d dl<m=Z= ej>?e j>@e j>AeBd  d dlCmDZD eDE ZFe&d e jGd!< ee&d"d#d$d%ZHe ZIeFJd& d'd( ZKd)d* ZLd+d, ZMd-d. ZNd/d0 ZOd1d2 ZPd3d4 ZQd5d6 ZRd7d8 ZSd9d: ZTd;d< ZUd=d> ZVd?d@ ZWdAdB ZXdCdD ZYdEdF ZZdGdH Z[dIdJ Z\dS )K    N)	Timestampread_csv
read_excel)glob)SchemaBuilder)
ChatOpenAI)HumanMessage	AIMessage)UnstructuredHTMLLoader)UnstructuredXMLLoader)ElasticsearchStore)RecursiveCharacterTextSplitter)OpenAIEmbeddings)rmtree)
JSONLoader)Elasticsearch)	PdfReader)	natsorted)dumploaddumps)config)convert)PdfReadError)process)letter)canvas)ImageReader)FPDF)Presentation)
FileFormat)convert_file)Imagez/img_atlog/)LogUtils
SECRET_KEYOPENAI_API_KEYES_URLi,  T)request_timeoutretry_on_timeoutzindex startedc                 C   s  t | D ]y}|dr%t j| |}t j| |dd}t || q|drCt j| |}t j| |dd}t || q|drat j| |}t j| |dd}t || q|dr~t j| |}t j| |dd}t || qd S )	Nz.PNG.pngz.JPEG.jpegz.BMP.bmpz.JPG.jpg)oslistdirendswithpathjoinreplacerename)sharepoint_localpathfilenameold_filenew_file r8   </var/www/html/AlliBotV5_chatbot/AlliBotV50/Image_attached.pyfile_format_change/   s&   



r:   c           
      C   s   t | |}tj|td}t\}}|d }|dD ]D}|drFtj||	 dd }	tj
|	rE|jt|	d|d d	dd
 |d8 }n|d|| |d8 }|dk r]|  |d }q|  t| d S )N)pagesize(   
z[IMAGE:      d      )widthheightx      )r   r   Canvasr   split
startswithr-   r0   r1   stripexists	drawImager   
drawStringshowPagesaver   )

input_filetemp_diroutput_filetextcrC   rD   yline
image_pathr8   r8   r9   convert_text_and_images_to_pdfK   s&   

rX   c                 C   s   t  }|  |jddd zt| ddd}| }W d    n1 s%w   Y  W n$ tyO   t| ddd}| }W d    n1 sHw   Y  Y nw |D ]}|jdd	| d
dd qRtj	
| d }|| d d S )NArial   sizerutf-8encodingzlatin-1rB   
      C)txtlnalignr   z_txt.pdf)r   add_pageset_fontopen	readlinesUnicodeDecodeErrorcellrJ   r-   r0   splitextoutput)txt_pathpdfflinesrV   txt_path_without_extr8   r8   r9   convert_txt_to_pdfa   s$   

rt   c                 C   s   t  }|  |jddd tjtjtd }|jd|dd |j	ddd | 
d	}|D ]	}|d
d| q0|| d S )NTrZ   )automarginz/DejaVuSans.ttfDejaVu)fnameuni   r[   r=   r   ra   )r   rg   set_auto_page_breakr-   r0   r1   dirname__file__add_fontrh   rH   
multi_cellrn   )rS   output_pathrp   	font_pathrr   rV   r8   r8   r9   save_text_to_pdfw   s   
r   c              
   C   s   z+t  }||  tj| d }|| dtj |  t	d|  d|  W dS  t
yH } zt	d|  d|  W Y d }~dS d }~ww )	Nr   z	_pptx.pdfSuccessfully converted  to TFailed to convert  to PDF. Error: F)r   LoadFromFiler-   r0   rm   
SaveToFiler    PDFDisposeprint	Exception)	pptx_pathpresentationpdf_pather8   r8   r9   pptx_to_pdf   s   
r   c                 C   s6   t | d|dgd}|dksJ td|  d|  d S )Nrp   --pdf-engine=xelatex
outputfile
extra_args z
Converted r   )r!   r   )md_file_pathoutput_file_pathrn   r8   r8   r9   convert_md_to_pdf   s   r   c                 C   s   t |  g }tdD ]V}zt|}t|jdkrtd|| W q tyF } ztd| d|  t	
d|  W Y d }~qd }~w tya } ztd| d|  W Y d }~qd }~ww |S )Nz*.pdfr   z
Empty filezError reading : zError reading %sz"An unexpected error occurred with )r-   chdirr   r   lenpagesr   appendr   logger	exceptionr   )r0   pdfsfilereaderr   r8   r8   r9   readfiles_pdf   s"   
 r   c                 C   sB   t | d}t| dW  d    S 1 sw   Y  d S )Nrbr^   )ri   base64	b64encodereaddecode)rW   
image_filer8   r8   r9   encode_image   s   $r   c                 C   s   t | J}d}||t j |jdkr/|d}| dd}|j|dd |W  d    S | d|j  d	|j  }|| |W  d    S 1 sRw   Y  d S )
N)   r   BMPRGBr+   r*   JPEG)format.z	.resized.)	r"   ri   	thumbnailLANCZOSr   r   r2   rO   lower)rW   imgmax_sizeconverted_pathr8   r8   r9   resize_and_convert_image   s   

$
$r   c              
   C   sh   zt | d|dgd td|  d|  W dS  ty3 } ztd|  d|  W Y d }~d	S d }~ww )
Nrp   r   r   r   r   Tr   r   F)r!   r   r   )	docx_pathr   r   r8   r8   r9   convert_docx_to_pdf   s   r   c                 C   s4   i }|D ]}| | }t |tr| }|||< q|S )zU
    Convert a row to a JSON object with special handling for Timestamp objects.
    )
isinstancer   	isoformat)rowcolumn_namesdatacol_namevaluer8   r8   r9   row_to_json   s   

r   c                 C   s   dg i}t | }t|jD ]\}}| }|d d|i qW d    n1 s+w   Y  t|ddd}t||ddd W d    d S 1 sKw   Y  d S )	Nr   rS   wr^   r_   F   )ensure_asciiindent)
pdfplumberri   	enumerater   extract_textr   r   )input_pdf_pathoutput_json_pathpdf_contentrp   page_numpagerS   	json_filer8   r8   r9   pdf_to_json_new   s   	"r   c              	   C   s   t | }| D ]:\}}t||}d|gi}| d|dd  d| d}t|d}	t||	dd	 W d
   n1 s=w   Y  q|d}
|
j| dd d
S )z[
    Convert each row with specific columns in an Excel file to individual JSON files.
    content/z.xlsxr   _.jsonr   r   r   NF)index)r   iterrowsr   rH   ri   r   headto_excel)r5   r   output_folderbot_file_namedfr   r   	json_dataoutput_filenamerq   	df_headerr8   r8   r9   excel_to_json   s   

 
r   c                 C   sZ   t | d}t|}W d    n1 sw   Y  t }|| | }t|dd}|S )Nr]      r   )ri   r   r   
add_object	to_schemar   )	file_pathr   r   builderschemaschema_stringr8   r8   r9   generate_schema_from_json   s   

r   c           	      C   s   g }t | D ]G}|drNz"t j| |}t|}t||dd}| }|| |	| W q t
yM } ztd|  t| W Y d }~qd }~ww q|S )Nr   F)	jq_schematext_contentzjson conversation failed %s)r-   r.   r/   r0   r1   r   r   r   extendr   r   r   infor   )	directory_pathsuccess_file_list	documentsr5   r   r   loaderdocsr   r8   r8   r9   load_json_directory  s"   

r   c                 C   s  t j|st | t | D ]n}|dr~t j| |}t j|d d }t j||}t||}|srt	
d|  zt||| || W q tyq }	 zt \}
}}t	d|  t	
d|  W Y d }	~	qd }	~	ww t	
d|  || qd S )Nz.docxr   .pdfz%second docx conversation satisfied %szException  %sz docx to pdf conversion failed %sz$first docx conversation satisfied %s)r-   r0   rK   makedirsr.   r/   r1   rm   r   r   r   rX   r   r   sysexc_infor   )input_directoryoutput_directoryr   rQ   r5   input_file_pathoutput_file_namer   conv_statusexexc_type	exc_valueexc_tracebackr8   r8   r9   batch_convert_docx_to_pdf&  s.   



r   c                 C   s6   g }| D ]}t |dsi |_||jd< || q|S )Nmetadata
session_id)hasattrr  r   )r   r  updated_documentsdocr8   r8   r9   add_session_id_to_documents>  s   

r  c           &      C   s\  zg }d}t | }ttdddd}t| d t| d  t| d  t| d	  }g d
}g }|D ]}	|tt j| d|	  q4|D ]}
t|
}t	|}|
ds\|
dr_d}n
|
drgd}nd}zM|tddtdddddd| d| idgdg}t jt j|
d d |
dd  d }t j| |}t|j| ||
dd  W qH ty } ztd |
 d!|  d}W Y d }~qHd }~ww t jt jtd" }t| }t j|d#d$ g }|D ]}t j| |}t|}d%g i}z\tt|jD ]}|j| }| }|d% | q
t |d&d'}|!dd(}t j||}t"|d)}|#| W d    n	1 sGw   Y  td*|  ||dd  W q ty } zOz%|!dd(}t j||}t$|| td+|  ||dd  W n# ty } ztd,|  t%d-|  d}W Y d }~nd }~ww W Y d }~qd }~ww t&t|d. D ],} t'| d/d0d1}!|!( }"|"rt)|"|}#tdt j*d2< t+ }$t,j-|"t.||$t,/ d3}%qtd4 td5|  t|t|krd}t0| td6 |W S  ty- } zt%d-|  W Y d }~dS d }~ww )7Nrb   r$   zgpt-4or   )openai_api_keymodel
max_tokensz/*.jpgz/*.jpegz/*.pngz/*.bmp)r,   r*   r)   r+   *r*   r,   z
image/jpegr)   z	image/pngz?You are a useful bot that is especially good at OCR from images)r   rS   )typerS   	image_urlurlzdata:z;base64,)r  r  r   r   r   r?   r   r   zFailed to process image r   z/img_json_convT)exist_okr   r   r   r   r   zpypdf satisfied %szpdfplumber satisfied %szexception occured pdf %szException occured due to %sz/*.jsonz.pages[]F)r   r   r   r%   )r   es_connection
index_name	embeddingstrategyzpdf index addedzsuccess_file_list %szfiles removed)1r-   r.   r   r   r   r   r0   r1   r   r   r/   invoker	   r   rm   basenamerH   r   r   r   r   r   r   r|   r}   r   r   r   ranger   r   r   r   r2   ri   writer   r   r   r   r   r  environr   r   from_documentsesExactRetrievalStrategyr   )&r4   	indexnamer  r   status_codeorg_filelistchainimage_filessupported_formatsextr   processed_image_pathimage	mime_typemsgpdf_filenamer   r   img_json_pathr   pdf_successpdf_filer   pdf_datar   r   rS   pdf_jsonjson_filename	json_pathr   r   ir   json_documentsjson_documents_with_session_id
embeddingsdbr8   r8   r9   Image_indexingH  s   
0 
.




r3  )]r-   r   pandasr   r   r   r   gensonr   langchain.chat_modelsr   langchain.schema.messagesr   r	   $langchain_community.document_loadersr
   r    langchain_community.vectorstoresr   langchain.text_splitterr   langchain_openair   shutilr   langchain.document_loadersr   elasticsearchr   PyPDF2r   natsortr   jsonr   r   r   decoupler   doc2pdfr   PyPDF2.errorsr   r   docx2txtr   reportlab.lib.pagesizesr   reportlab.pdfgenr   reportlab.lib.utilsr   fpdfr   spire.presentationr   r    pypandocr!   r   PILr"   r0   r   r1   r|   r}   	img_atlogr#   getRootLoggerr   r  r  r1  r   r:   rX   rt   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r3  r8   r8   r8   r9   <module>   sp     
	
