from langchain_community.document_loaders import UnstructuredWordDocumentLoader
from langchain_community.document_loaders import UnstructuredCSVLoader
from langchain_community.document_loaders import UnstructuredExcelLoader
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import UnstructuredPowerPointLoader
from langchain_community.document_loaders import UnstructuredXMLLoader
from langchain.document_loaders import JSONLoader
from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from langchain_community.document_loaders.tsv import UnstructuredTSVLoader
from langchain_community.document_loaders.html import UnstructuredHTMLLoader




class DocumentLoader:
    def __init__(self):
        pass

    def excel_file_loader(self, excel_file_name):
        loader = UnstructuredExcelLoader(excel_file_name, mode='elements')
        docs = loader.load()
        return docs

    def doc_docx_file_loader(self, docx_doc_file_path):
        ''' This loader will support doc and docx files'''
        loader = UnstructuredWordDocumentLoader(docx_doc_file_path)
        docs = loader.load()
        return docs

    def csv_file_loader(self, csv_filepath):
        loader = UnstructuredCSVLoader(csv_filepath)
        docs = loader.load()
        return docs
    
    def pdf_file_loader(self,pdf_filepath):
        loader=PyPDFLoader(pdf_filepath)
        docs=loader.load()
        return docs
    
    def pptx_file_loader(self,pptx_filepath):
        loader=UnstructuredPowerPointLoader(pptx_filepath)
        docs=loader.load()
        return docs
    
    def xml_file_loader(self,xml_filepath):
        loader=UnstructuredXMLLoader(xml_filepath)
        docs = loader.load()
        return docs
    
    def json_file_loader(self,json_filepath):
        loader = JSONLoader(file_path=json_filepath, jq_schema=".", text_content=False)
        docs = loader.load()
        return docs
    
    def text_file_loader(self,text_filepath):
        loader=TextLoader(text_filepath)
        docs=loader.load()
        return docs
    
    def markdown_file_loader(self,md_filepath):
        loader = UnstructuredMarkdownLoader(md_filepath, mode="single",strategy="fast")
        docs=loader.load()
        return docs
    
    def tsv_file_loader(self,tsv_filepath):
       loader = UnstructuredTSVLoader(file_path=tsv_filepath, mode="elements")
       docs=loader.load()
       return docs
   
    def html_file_loader(self, html_filepath):
        loader = UnstructuredHTMLLoader(file_path=html_filepath, mode="elements")
        docs = loader.load()
        return docs
