from office365.sharepoint.client_context import ClientContext
from datetime import datetime
from elasticsearch import Elasticsearch
from elasticsearch import Elasticsearch
import os
from glob import glob
from PyPDF2 import PdfReader
from PyPDF2.errors import PdfReadError
from json import dump,load,dumps
import pdfplumber 
from natsort import natsorted
from langchain.document_loaders import JSONLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import ElasticsearchStore
import pandas as pd
import json
import io
import inspect


LOG_FILE = f"/var/www/html/carrier_appointments_json/log/carrier_appointments-log-{datetime.now().strftime('%Y-%m-%d')}.log"

def custom_log(level, message):
    # Get caller frame info
    frame = inspect.currentframe().f_back
    filename = os.path.basename(frame.f_code.co_filename)
    lineno = frame.f_lineno
    module = frame.f_globals["__name__"]

    # Format timestamp
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    # Format log line like your formatter
    log_line = f"{timestamp} - {module} - {level.upper():>8s} - [{filename:25s} : {lineno:3d}] - {message}\n"

    # Write to file
    with open(LOG_FILE, "a") as f:
        f.write(log_line)

    # Also print to stdout (like screen handler)
    print(log_line, end="")

custom_log("info", "This is a log message.")

es= Elasticsearch('https://elastic:8oKIqy312EBsAPzWT64NUzji@scic-elasticsearch.es.us-central1.gcp.cloud.es.io:443',request_timeout=300,retry_on_timeout=True)


# Credentials (Use secure storage for passwords)
username = "karthikraj@riskeducation.org"
password = "Figo@6516"  # Consider using secure storage, e.g., environment variables
#############################
# SharePoint site and file details
site_url = "https://combinedagents.sharepoint.com"
file_url = "/Shared Documents/Carrier Appointments - Master List.xlsx"

# SharePoint authentication
try:
    custom_log("info", f"sharepoint authentication")
    ctx = ClientContext(site_url).with_user_credentials(username, password)
except Exception as e:    
    custom_log("Error", f"sharepoint connection: {e}")

# Get the file from SharePoint
try:
    file = ctx.web.get_file_by_server_relative_url(file_url).get().execute_query()
    sharepoint_last_modified = file.time_last_modified.strftime('%Y-%m-%dT%H:%M:%S')
except Exception as e:
    custom_log("Error", f"sharepoint file retrieval: {e}")


# Fetch the last modified date from Elasticsearch
try:
    index_name = "carrier_appointments_last_modified_date"
    query = {
        "query": {
            "match": {
                "file_name": "Carrier Appointments - Master List.xlsx"
            }
        }
    }

    index_name = "carrier_appointments_last_modified_date"

    response = es.search(index=index_name, body=query)
    es_last_modified = None

except Exception as e:
    custom_log("Error", f"ElasticSearch last modified date error: {e}")


try:
    if response["hits"]["hits"]:
        es_last_modified = response["hits"]["hits"][0]["_source"]["last_modified"]

except Exception as e:
    custom_log("Error", f"No last modified date error: {e}")

try:
    # Compare values
    if es_last_modified == sharepoint_last_modified:
        custom_log("info", f"sharepoint date and ElasticSearch's date are same, so no changes")
        print("Last modified dates match.")
        
    elif es_last_modified != sharepoint_last_modified:
        # List of indices to delete
        # Delete old index entry
        index_name = "carrier_appointments_last_modified_date"
        file_name = "Carrier Appointments - Master List.xlsx"

        try:        
            response = es.search(index=index_name, body=query)
            es_last_modified = None
            doc_id = None  # Store document ID for deletion
            if response["hits"]["hits"]:
                hit = response["hits"]["hits"][0]  # First matching document
                es_last_modified = hit["_source"]["last_modified"]
                doc_id = hit["_id"]  # Get document ID
                try:
                    if doc_id:
                        es.delete(index=index_name, id=doc_id)
                        print(f"Deleted outdated document with ID: {doc_id}")
                        custom_log("info", f"Deleted outdated document with ID: {doc_id}")
                except Exception as e:
                    custom_log("error", f"Error at deleting the outdate documents and the error is: {e}")
        except Exception as e:
            custom_log("Error", f"Error at getting the last modified date from ElasticSearch: {e}")


        # Re-index the file with the new last modified date
        doc = {
            "file_name": file_name,
            "last_modified": sharepoint_last_modified
        }
        
        try:
            es.index(index=index_name, document=doc)
        except Exception as e:
            custom_log("Error", f"Error at changing the last modified date to sharepoint date in ElasticSearch: {e}")
        
        indices_to_delete = [
            "carrier_appointments_master_list_cl",
            "carrier_appointments_master_list_personal",
            "carrier_appointments_master_list_wholesalers",
            "carrier_appointments_master_list_caais"
        ]
        
        # Delete each index
        # for index in indices_to_delete:
        #     if es.indices.exists(index=index):
        #         es.indices.delete(index=index)
        #         print(f"Deleted index: {index}")
        #     else:
        #         print(f"Index not found: {index}")
                
        # SharePoint site and folder details
        site_url = "https://combinedagents.sharepoint.com"
        relative_url = "/Shared Documents"

        # Credentials (Use secure storage for passwords)
        username = "karthikraj@riskeducation.org"
        password = "Figo@6516"  # Consider using secure storage, e.g., environment variables

        # Authenticate to SharePoint
        ctx = ClientContext(site_url).with_user_credentials(username, password)

        # Get the folder by relative URL
        folder = ctx.web.get_folder_by_server_relative_url(relative_url)

        # Load folder content (files and subfolders)
        files = folder.files
        ctx.load(files)
        ctx.execute_query()

        # List files in the folder
        print("Files in folder:")
        for file in files:
            print(f" - {file.properties['Name']} ({file.properties['ServerRelativeUrl']})")

        # Download a specific file (example)
        file_name = "Carrier Appointments - Master List.xlsx"  # Change this to your target file
        # Define the target directory
        target_dir = "/var/www/html/"

        # Create the full path
        file_path = os.path.join(target_dir, file_name)
        for file in files:
            if file.properties['Name'] == file_name:
                # Create a byte stream to store file content
                file_stream = io.BytesIO()
                
                # Download the file
                file.download(file_stream).execute_query()
                
                # Save the file locally
                with open(file_path, "wb") as f:
                    f.write(file_stream.getvalue())
                
                print(f"{file_path} downloaded successfully!")        
                
        CL = pd.read_excel('/var/www/html/Carrier Appointments - Master List.xlsx',sheet_name="CL")
        Personal = pd.read_excel('/var/www/html/Carrier Appointments - Master List.xlsx',sheet_name="Personal")
        Wholesalers = pd.read_excel('/var/www/html/Carrier Appointments - Master List.xlsx',sheet_name="Wholesalers")
        CAAIS = pd.read_excel('/var/www/html/Carrier Appointments - Master List.xlsx',sheet_name="CAAIS")


        CL_json_excel = CL.to_json(orient='records', indent=4)
        Personal_json_excel = Personal.to_json(orient='records', indent=4)
        Wholesalers_json_excel = Wholesalers.to_json(orient='records', indent=4)
        CAAIS_json_excel = CAAIS.to_json(orient='records', indent=4)


        with open('/var/www/html/carrier_appointments_jsonCL_output.json', 'w') as file:
            file.write(CL_json_excel)
            
        with open('/var/www/html/carrier_appointments_jsonPersonal_output.json', 'w') as file:
            file.write(Personal_json_excel)

        with open('/var/www/html/carrier_appointments_jsonWholesalers_output.json', 'w') as file:
            file.write(Wholesalers_json_excel)

        with open('/var/www/html/carrier_appointments_jsonCAAIS_output.json', 'w') as file:
            file.write(CAAIS_json_excel)

        CL_loader = JSONLoader(file_path='/var/www/html/carrier_appointments_jsonCL_output.json', jq_schema=".[]", text_content=False)
        Personal_loader = JSONLoader(file_path='/var/www/html/carrier_appointments_jsonPersonal_output.json', jq_schema=".[]", text_content=False)
        Wholesalers_loader = JSONLoader(file_path='/var/www/html/carrier_appointments_jsonWholesalers_output.json', jq_schema=".[]", text_content=False)   
        CAAIS_loader = JSONLoader(file_path='/var/www/html/carrier_appointments_jsonCAAIS_output.json', jq_schema=".[]", text_content=False)

        CL_documents = CL_loader.load()
        Personal_documents = Personal_loader.load()
        Wholesalers_documents = Wholesalers_loader.load()
        CAAIS_documents = CAAIS_loader.load()


        os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")
        embeddings=OpenAIEmbeddings()

        db = ElasticsearchStore.from_documents(
        documents=CL_documents,
        es_connection=es,
        index_name='carrier_appointments_master_list_cl',
        embedding=embeddings,
        strategy=ElasticsearchStore.ExactRetrievalStrategy())
        custom_log("info", "carrier_appointments_master_list_cl indexing completed")

        db = ElasticsearchStore.from_documents(
        documents=Personal_documents,
        es_connection=es,
        index_name='carrier_appointments_master_list_personal',
        embedding=embeddings,
        strategy=ElasticsearchStore.ExactRetrievalStrategy())
        custom_log("info", "carrier_appointments_master_list_personal indexing completed")

        db = ElasticsearchStore.from_documents(
        documents=Wholesalers_documents,
        es_connection=es,
        index_name='carrier_appointments_master_list_wholesalers',
        embedding=embeddings,
        strategy=ElasticsearchStore.ExactRetrievalStrategy())
        custom_log("info", "carrier_appointments_master_list_wholesalers indexing completed")

        db = ElasticsearchStore.from_documents(
        documents=CAAIS_documents,
        es_connection=es,
        index_name='carrier_appointments_master_list_caais',
        embedding=embeddings,
        strategy=ElasticsearchStore.ExactRetrievalStrategy())  
        custom_log("info", "carrier_appointments_master_list_caais indexing completed")      

except Exception as e:
    custom_log("error in exception:", e)