import requests
import pandas as pd
from datetime import datetime, timedelta
import logging
import time
import math
import re
import pymsteams
import traceback
import inspect
import sys


logger = logging.getLogger()
logger.setLevel(logging.INFO)

# Clear existing handlers to prevent duplication
if logger.hasHandlers():
    logger.handlers.clear()

# Create handlers
console_handler = logging.StreamHandler()
file_handler = logging.FileHandler('/var/www/html/FPA_log/Finance_Posting_Automation.log', encoding='utf-8')
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
console_handler.setFormatter(formatter)
file_handler.setFormatter(formatter)

# Add handlers
logger.addHandler(console_handler)
logger.addHandler(file_handler)



# **Step 1: Authentication for BC**
tenant_id = "95f69e7d-1811-4ab9-9b5a-eba95d3eba9b"
client_id = "6a77dbc5-5110-4b3f-85cf-692520941150"
client_secret = "mzyHgqNa=I[:D3Ygq_OtuVhRKlcvb776"
# environment = "CEDEV_DONOTDELETE" #Production
environment = 'Production'
# company = "DEV-121219" #PROD-NAIER
company = "PROD-NAIER"

# accis_base_url = 'https://scicdev.crm.dynamics.com'
accis_base_url = 'https://scic.crm.dynamics.com'
accis_client_id = '6e5989aa-4fd1-4015-8648-8f75609d607b'
accis_client_secret = 'mUv8Q~1u3JtjdAdw.ov1sRfjzTm3yS.f~cC6wcQS'
accis_tenant_id = '95f69e7d-1811-4ab9-9b5a-eba95d3eba9b'

TEAMS_WEBHOOK_URL = "https://qubespacein.webhook.office.com/webhookb2/5d9de417-54b6-47f4-aee3-e10693e7d804@d996ac79-e80a-4fc8-be1a-8f891b569988/IncomingWebhook/c1361199482347dbaee878fbf5891a34/b187de49-aa7e-4d44-8967-d6c7f25ae53e/V26k0hgLbavRevpPNilrdVaK6-0Ldnlvzg7_UVKQam-BY1"  # ← Replace with your actual webhook URL

yesterday_date = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d")


def notify_teams_on_exception(teams_webhook_url, exception_obj):
    try:
        # Get the calling function name automatically
        caller = inspect.stack()[1]
        function_name = caller.function
 
        # Build Teams message
        teams_message = pymsteams.connectorcard(teams_webhook_url)
        teams_message.title("Python Error Notification for Posting Date - {yesterday_date}")
        teams_message.summary("Error occurred in script")
 
        section = pymsteams.cardsection()
        section.activityTitle("Error Details")
        section.addFact("Function", function_name)
        section.addFact("Error Type", type(exception_obj).__name__)
        section.addFact("Message", str(exception_obj))
        section.text(f"```\n{traceback.format_exc(limit=2)}\n```")
 
        teams_message.addSection(section)
        teams_message.send()
    except Exception as teams_error:
        logging.error(f"Failed to send error to Teams: {teams_error}")

 
def get_access_token(tenant_id, client_id, client_secret, max_retries=3, delay=5):
    """
    Fetch access token with retry mechanism and detailed error handling.
    """
    token_url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token"
    data = {
        "grant_type": "client_credentials",
        "client_id": client_id,
        "client_secret": client_secret,
        "scope": "https://api.businesscentral.dynamics.com/.default"
    }
 
    access_token = None  # Default value if fails
 
    for attempt in range(1, max_retries + 1):
        try:
            response = requests.post(token_url, data=data, timeout=10)
 
            if response.status_code != 200:
                logging.error(f"Non-200 status code received: {response.status_code} - {response.reason}")
                logging.error(f"Response content: {response.text}")
 
            response.raise_for_status()
 
            try:
                response_json = response.json()
                access_token = response_json.get("access_token")
                if not access_token:
                    logging.error(f"Access token not found in response JSON: {response_json}")
                else:
                    logging.info("Authentication Successful! Access token retrieved.")
                    return access_token
 
            except Exception as e:  # catches JSON errors and others
                logging.error(f"Error parsing token response: {e} | Response: {response.text}", exc_info=True)
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
 
        except Exception as e:  # catches requests.exceptions.RequestException and others
            logging.error(f"Error during token fetch: {e}", exc_info=True)
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
 
        if attempt < max_retries:
            logging.warning(f"Attempt {attempt} failed. Retrying in {delay} seconds...")
            time.sleep(delay)
        else:
            logging.critical("Maximum retry attempts reached. Failed to fetch access token.")
 
    return None
 
# Step 1: Get access token
access_token = get_access_token(tenant_id, client_id, client_secret)

if not access_token:
    logging.critical("Failed to authenticate after retries.")
    sys.exit()

# **Step 2: Define API Endpoints**
bc_base_url = f"https://api.businesscentral.dynamics.com/v2.0/{tenant_id}/{environment}/ODataV4"

# sales_invoice_url = f"{bc_base_url}/Company('{company}')/SalesInvs?$top=100"

# filter_values = ["ORD-02065-Q0T7F6", "ORD-02074-Y1B8L9", "ORD-13913-B0C8C8"]
# filter_query = " or ".join([f"External_Document_No eq '{value}'" for value in filter_values])
# sales_invoice_url = f"{bc_base_url}/Company('{company}')/SalesInvs?$filter={filter_query}"


# # filter_date = "2024-10-07"  #YY-MM-DD
# # filter_date = "2022-09-14"
# filter_date = "2020-05-26"
# # filter_date = "2023-03-29"
# # filter_date = "2020-05-27"
# # filter_date = "2020-02-10"
# # filter_date = "2024-06-20"
# # filter_date = "2020-05-26"
# # filter_date = "2020-12-21"
# # filter_date = "2020-05-14"  #without external doc no
# date_filter = f"Posting_Date eq {filter_date}T00:00:00Z"
# sales_invoice_url = f"{bc_base_url}/Company('{company}')/SalesInvs?$filter={date_filter}"


# Get yesterday's date in YYYY-MM-DD format
yesterday_date = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d")
date_filter = f"Posting_Date eq {yesterday_date}T00:00:00Z"
sales_invoice_url = f"{bc_base_url}/Company('{company}')/SalesInvs?$filter={date_filter}"

headers = {"Authorization": f"Bearer {access_token}", "Accept": "application/json"}

def fetch_data(url, headers):
    all_data = []
    next_page_url = url
    try:
        while next_page_url:
            response = requests.get(next_page_url, headers=headers)
            response.raise_for_status()  # Raise an error for bad responses (4xx, 5xx)
            data = response.json()
            all_data.extend(data.get("value", []))
            next_page_url = data.get("@odata.nextLink", None)  # Check if pagination exists
        return pd.DataFrame(all_data) if all_data else pd.DataFrame()  # Always return DataFrame
    except requests.exceptions.RequestException as e:
        logging.error(f"Error Fetching Data: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return pd.DataFrame()  # Return an empty DataFrame instead of None

# ✅ Fetch Sales Invoices safely
sales_invoices_df = fetch_data(sales_invoice_url, headers)

if sales_invoices_df is not None and not sales_invoices_df.empty:
    original_sales_invoices_df = sales_invoices_df.copy()
else:
    logging.warning("Sales invoices data is empty or could not be fetched.")

try:
    if sales_invoices_df.empty:
        logging.warning(f"No sales invoices found for the date: {yesterday_date}, So exit the code!")
        # print('No sales invoices found for the date')
        sys.exit()
    else:
        logging.info(f"Retrieved {len(sales_invoices_df)} sales invoices for the date: {yesterday_date}")
        # print("Retrieved", len(sales_invoices_df))
        # Process Sales Invoices
        sales_invoices_df.rename(columns={"No": "Document_No"}, inplace=True)
        filtered_sales_invoices_df = sales_invoices_df[["Document_No", "Sell_to_Customer_No", "Sell_to_Customer_Name", "External_Document_No", "Posting_Date"]]
        sales_invoices_df = filtered_sales_invoices_df.copy()

        sales_invoices_df['External_Document_No'] = sales_invoices_df['External_Document_No'].astype(str).str.strip()
        sales_invoices_df['External_Document_No'] = sales_invoices_df['External_Document_No'].replace('', pd.NA)

        missing_ext_doc_df = sales_invoices_df[sales_invoices_df['External_Document_No'].isna()].copy()
        
        sales_invoices_df = sales_invoices_df[sales_invoices_df['External_Document_No'].notna()].copy()

except Exception as e:
    logging.error(f"An error occurred while processing sales invoices: {e}", exc_info=True)
    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

# **Step 3: Fetch Data from ACCIS**
def get_accis_token(accis_tenant_id, accis_client_id, accis_client_secret, bc_base_url):
    token_url = f'https://login.microsoftonline.com/{accis_tenant_id}/oauth2/token'
    payload = {
        'grant_type': 'client_credentials',
        'client_id': accis_client_id,
        'client_secret': accis_client_secret,
        'resource': accis_base_url,
    }
    try:
        response = requests.post(token_url, data=payload)
        response.raise_for_status()
        return response.json().get('access_token')
    
    except requests.exceptions.RequestException as e:
        logging.error(f"Failed to retrieve ACCIS token: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return None
    

def fetch_accis_data_by_document_no(missing_ext_doc_df, accis_token, batch_size=20):
    """Fetch data from ACCIS based on Document_No (trit_bcinvoicenumber) in batches."""
    try:
        if missing_ext_doc_df.empty:
            # logging.info("No missing invoices external document number to check in ACCIS.")
            return pd.DataFrame()

        if not accis_token:
            raise ValueError("ACCIS token is missing in fetch_accis_data_by_document_no function!")

        headers = {
            "Authorization": f"Bearer {accis_token}",
            "Content-Type": "application/json",
            "Accept": "application/json",
        }

        # Convert Document_No column to a list (remove NaN values)
        document_numbers = missing_ext_doc_df["Document_No"].dropna().tolist()

        # Split the document numbers into smaller batches
        total_batches = math.ceil(len(document_numbers) / batch_size)
        all_results = []

        for i in range(total_batches):
            batch = document_numbers[i * batch_size:(i + 1) * batch_size]
            accis_filter_query = " or ".join([f"trit_bcinvoicenumber eq '{doc_no}'" for doc_no in batch])
            # accis_api_url = f"https://scicdev.crm.dynamics.com/api/data/v9.1/invoices?$filter={accis_filter_query}"
            accis_api_url = f"https://scic.crm.dynamics.com/api/data/v9.1/invoices?$filter={accis_filter_query}"
            
            # logging.info(f"Fetching batch {i+1}/{total_batches}, URL Length: {len(accis_api_url)}")
            
            try:
                response = requests.get(accis_api_url, headers=headers)
                response.raise_for_status()
                batch_data = response.json().get("value", [])
                
                if batch_data:
                    all_results.extend(batch_data)
            except requests.exceptions.HTTPError as e:
                logging.error(f"HTTP error in batch {i+1}: {e}", exc_info=True)
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
            
            except Exception as e:
                logging.error(f"Error in batch {i+1}: {e}", exc_info=True)
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                
        # Convert all results to DataFrame
        invoice_no_accis_df = pd.DataFrame(all_results)

        if invoice_no_accis_df.empty:
            logging.warning("No matching Document_No found in ACCIS.")
        else:
            invoice_no_accis_df.rename(columns={"trit_bcinvoicenumber": "Document_No"}, inplace=True)

        return invoice_no_accis_df

    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        
    return pd.DataFrame()

try:
    accis_token = get_accis_token(accis_tenant_id, accis_client_id, accis_client_secret, bc_base_url)
    accis_doc_df = fetch_accis_data_by_document_no(missing_ext_doc_df, accis_token, batch_size=20)
except Exception as e:
    logging.error(f"Error during ACCIS data fetch: {e}", exc_info=True)
    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

try:
    if not missing_ext_doc_df.empty:
        try:
            if accis_doc_df.empty:
                logging.warning("ACCIS data is empty. Marking all missing invoices as duplicates.")

                # ✅ Mark all invoices as duplicates since none were found in ACCIS
                missing_ext_doc_df["Duplicate_Flag"] = True
                duplicate_invoices = missing_ext_doc_df.copy()
                valid_missing_invoices = pd.DataFrame()  # No valid invoices
                duplicate_rows = duplicate_invoices
            else:
                try:
                    # ✅ Check if invoices exist in ACCIS
                    missing_ext_doc_df["Duplicate_Flag"] = missing_ext_doc_df["Document_No"].isin(accis_doc_df["Document_No"])

                    # ✅ Separate valid invoices & duplicates
                    valid_missing_invoices = missing_ext_doc_df[missing_ext_doc_df["Duplicate_Flag"]].copy()
                    duplicate_invoices = missing_ext_doc_df[~missing_ext_doc_df["Duplicate_Flag"]].copy()
                    duplicate_rows = duplicate_invoices
                
                except KeyError as e:
                    logging.error("Missing expected column in DataFrame: %s", str(e), exc_info=True)
                    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                    valid_missing_invoices = pd.DataFrame()
                    duplicate_invoices = pd.DataFrame()
                    duplicate_rows = pd.DataFrame()
                    
                except Exception as e:
                    logging.error("Unexpected error while processing missing invoices: %s", str(e), exc_info=True)
                    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                    valid_missing_invoices = pd.DataFrame()
                    duplicate_invoices = pd.DataFrame()
                    duplicate_rows = pd.DataFrame()

        except Exception as e:
            logging.error("Error while handling missing external document invoices: %s", str(e), exc_info=True)
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
            duplicate_rows = pd.DataFrame()
            valid_missing_invoices = pd.DataFrame()

        # ✅ Print counts (if valid data exists)
        logging.info(f"Non-duplicate invoices count without External Document Number: {len(valid_missing_invoices)}")
        logging.info(f"Duplicate invoices count without External Document Number: {len(duplicate_invoices)}")

    else:
        logging.info("No missing external document invoices found. Skipping processing.")

except Exception as e:
    logging.critical("Critical error in processing missing external document invoices: %s", str(e), exc_info=True)
    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

def fetch_accis_data(duplicate_rows, headers, batch_size=20):
    """Fetch ACCIS data in batches to avoid URL length issues."""
    try:
        # Convert External_Document_No column to a list (remove NaN values)
        filter_invoices = duplicate_rows["External_Document_No"].dropna().tolist()

        if not filter_invoices:
            logging.info("No External_Document_No values found.")
            return pd.DataFrame()

        total_batches = math.ceil(len(filter_invoices) / batch_size)
        all_results = []

        for i in range(total_batches):
            batch = filter_invoices[i * batch_size:(i + 1) * batch_size]
            accis_filter_query = " or ".join([f"trit_ordernumbercalc eq '{value}'" for value in batch])
            # accis_api_url = f"https://scicdev.crm.dynamics.com/api/data/v9.1/invoices?$filter={accis_filter_query}"
            accis_api_url = f"https://scic.crm.dynamics.com/api/data/v9.1/invoices?$filter={accis_filter_query}"
            
            # logging.info(f"Fetching batch {i+1}/{total_batches}, URL Length: {len(accis_api_url)}")
            
            try:
                response = requests.get(accis_api_url, headers=headers)
                response.raise_for_status()
                batch_data = response.json().get("value", [])

                if batch_data:
                    all_results.extend(batch_data)
                                
            except Exception as e:
                logging.error(f"Error in batch {i+1}: {e}", exc_info=True)
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

                
        # Convert all results to DataFrame
        accis_df = pd.DataFrame(all_results)

        if accis_df.empty:
            logging.warning("No matching External_Document_No found in ACCIS.")
        else:
            accis_df = accis_df[["trit_ordernumbercalc", "trit_bcinvoicenumber", "totalamount_base"]]
            accis_df.rename(columns={
                "trit_ordernumbercalc": "External_Document_No",
                "trit_bcinvoicenumber": "Document_No",
            }, inplace=True)

        return accis_df

    except Exception as err:
        logging.error(f"An unexpected error occurred: {err}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, err)

    return pd.DataFrame()

def process_duplicates(sales_invoices_df, missing_ext_doc_df, accis_df):
    try:
        # ✅ Step 1: Prepare DataFrames
        for df in [sales_invoices_df, missing_ext_doc_df]:
            df['External_Document_No'] = df['External_Document_No'].astype(str).str.strip()
            df['External_Document_No'] = df['External_Document_No'].replace('', pd.NA)

        # ✅ Step 2: Identify duplicates in both DataFrames
        def identify_duplicates(df):
            return df[
                df['External_Document_No'].notna() & 
                df.duplicated(subset=['External_Document_No'], keep=False)
            ].copy()

        sales_invoice_dup = identify_duplicates(sales_invoices_df)
        missing_ext_doc_no_dup = identify_duplicates(missing_ext_doc_df)

        # ✅ Step 3: Identify non-duplicates
        sales_non_dup = sales_invoices_df[~sales_invoices_df['External_Document_No'].isin(sales_invoice_dup['External_Document_No'])].copy()
        missing_non_dup = missing_ext_doc_df[~missing_ext_doc_df['External_Document_No'].isin(missing_ext_doc_no_dup['External_Document_No'])].copy()

        # ✅ Step 4: Merge both datasets
        final_duplicate_rows = pd.concat([sales_invoice_dup, missing_ext_doc_no_dup], ignore_index=True)
        final_non_duplicate_rows = pd.concat([sales_non_dup, missing_non_dup], ignore_index=True)

        return final_duplicate_rows, final_non_duplicate_rows

    except Exception as e:
        logging.error(f"Error processing duplicates: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return pd.DataFrame(), pd.DataFrame()


# Execute the updated process
duplicate_rows, non_duplicate_rows = process_duplicates(sales_invoices_df, missing_ext_doc_df, accis_doc_df)

token = get_accis_token(accis_tenant_id, accis_client_id, accis_client_secret, accis_base_url)
if token:
    headers = {'Authorization': f'Bearer {token}', 'OData-MaxVersion': '4.0', 'OData-Version': '4.0'}

    # Now duplicate_rows exists, so we can pass it to fetch_accis_data
    accis_df = fetch_accis_data(duplicate_rows, headers)

    if accis_doc_df is not None and not accis_doc_df.empty:
        accis_doc_df.rename(columns={
            "trit_ordernumbercalc": "External_Document_No",
            "trit_bcinvoicenumber": "Document_No",
        }, inplace=True)

    if accis_df is None:
        accis_df = pd.DataFrame()
    if accis_doc_df is None:
        accis_doc_df = pd.DataFrame()
    

    # ✅ Merge accis_df and accis_doc_df into one DataFrame
    if not accis_doc_df.empty:
        accis_df = pd.concat([accis_df, accis_doc_df], ignore_index=True)

    # Process duplicates again with the updated accis_df
    duplicate_rows, non_duplicate_rows = process_duplicates(sales_invoices_df, missing_ext_doc_df, accis_df)

else:
    logging.error("Failed to retrieve token, aborting execution.")

# logging.info(f"Duplicate {len(duplicate_rows)}")
# logging.info(f"Non-Duplicate {len(non_duplicate_rows)}")


try:
    if not duplicate_rows.empty:
        logging.info(f"Found {duplicate_rows.shape[0]} duplicate rows.")

        # ✅ Handle exact duplicates from accis_df
        if not accis_df.empty:
            try:
                exact_duplicates = accis_df[accis_df.duplicated(subset=["External_Document_No"], keep=False)]
            except Exception as e:
                logging.error("Error finding exact duplicates: %s", str(e))
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                exact_duplicates = pd.DataFrame()
        else:
            logging.warning("ACCIS data is empty. No exact duplicates to compare.")
            exact_duplicates = pd.DataFrame()

        # ✅ Identify non-exact duplicates
        exact_duplicate_docs = set(exact_duplicates["External_Document_No"]) if not exact_duplicates.empty else set()
        non_exact_duplicates = duplicate_rows[~duplicate_rows["External_Document_No"].isin(exact_duplicate_docs)]

        # ✅ Log exact duplicates if any
        if not exact_duplicates.empty:
            unique_exact_duplicates = exact_duplicates["External_Document_No"].dropna().unique().tolist()
            logging.info(f"Found {len(unique_exact_duplicates)} fully identical duplicate invoices in ACCIS.")

        # ✅ Proceed with handling non-exact duplicates
        if not non_exact_duplicates.empty:
            logging.info(f"Proceeding with normal duplicate removal for {non_exact_duplicates.shape[0]} invoices.")

            try:
                duplicate_docs = set(duplicate_rows["External_Document_No"])
                accis_docs = set(accis_df["Document_No"]) if not accis_df.empty else set()

                # Non-duplicate rows from original sales data
                non_duplicate_rows = sales_invoices_df[~sales_invoices_df["External_Document_No"].isin(duplicate_docs)]

                # Valid duplicates that exist in ACCIS
                if "Document_No" in duplicate_rows.columns:
                    validated_duplicates = duplicate_rows[duplicate_rows["Document_No"].isin(accis_docs)]
                else:
                    validated_duplicates = pd.DataFrame()
                    logging.warning("Missing 'Document_No' column in duplicate_rows.")

            except Exception as e:
                logging.error("Error filtering duplicate rows: %s", str(e))
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                non_duplicate_rows = sales_invoices_df.copy()
                validated_duplicates = pd.DataFrame()

            # ✅ Proceed to remove invalid duplicates
            if "Document_No" in duplicate_rows.columns:
                removed_duplicates = duplicate_rows[~duplicate_rows["Document_No"].isin(accis_docs)]
                if not exact_duplicates.empty:
                    removed_duplicates = removed_duplicates[
                        ~removed_duplicates["External_Document_No"].isin(exact_duplicates["External_Document_No"])
                    ]
            else:
                removed_duplicates = pd.DataFrame()
                logging.warning("Missing 'Document_No' column while identifying removed duplicates.")

            # ✅ Final dataset
            new_data = pd.concat([non_duplicate_rows, validated_duplicates], ignore_index=True)

            logging.info(f"Removed duplicates: {removed_duplicates.shape[0]} rows.")
            logging.info(f"Cleaned Data Size: {new_data.shape[0]}")

            # ✅ Deletion logic via API
            entity_name = "SalesInvs"
            delete_base_url = f"https://api.businesscentral.dynamics.com/v2.0/{tenant_id}/{environment}/ODataV4"
            headers = {"Authorization": f"Bearer {access_token}", "Accept": "application/json"}

            for doc_no in removed_duplicates["Document_No"].dropna().unique():
                delete_url = f"{delete_base_url}/Company('{company}')/{entity_name}(Document_Type='Invoice',No='{doc_no}')"
                try:
                    response = requests.delete(delete_url, headers=headers)
                    if response.status_code == 204:
                        logging.info(f"Successfully deleted invoice {doc_no}.")
                    else:
                        logging.error(f"Failed to delete invoice {doc_no}: {response.status_code} - {response.text}")
                except Exception as e:
                    logging.error("Error deleting invoice %s: %s", doc_no, str(e))

        else:
            logging.info("No non-exact duplicates to remove. Keeping all data except exact duplicates.")
            new_data = non_duplicate_rows if 'non_duplicate_rows' in locals() else sales_invoices_df

    else:
        logging.info("No duplicate rows found. Keeping all original data.")
        new_data = sales_invoices_df

except Exception as e:
    logging.error("Unexpected error in duplicate handling process: %s", str(e))
    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
    new_data = sales_invoices_df

  

# ----------------------------------------------------------------

def get_token(accis_client_id, accis_client_secret, accis_tenant_id, accis_base_url, max_retries=3, retry_delay=5):
    token_url = f'https://login.microsoftonline.com/{accis_tenant_id}/oauth2/token'

    payload = {
        'grant_type': 'client_credentials',
        'client_id': accis_client_id,
        'client_secret':accis_client_secret,
        'resource': accis_base_url,
    }

    for attempt in range(1, max_retries + 1):
        try:
            response = requests.post(token_url, data=payload, timeout=10)
            response.raise_for_status()  # Raise an HTTPError for bad responses (4xx, 5xx)

            token = response.json().get('access_token')
            if not token:
                logger.error(f"Access token missing in response: {response.text}")
                return None

            # logger.info("Access token retrieved successfully.")
            return token

        except requests.exceptions.RequestException as req_err:
            logger.error(f"Request error on attempt {attempt}: {req_err}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, req_err)

        except ValueError as json_err:
            logger.error(f"JSON decoding error on attempt {attempt}: {json_err}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, json_err)
            
        except Exception as e:
            logger.critical(f"Unexpected error on attempt {attempt}: {e}", exc_info=True)
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
            
        if attempt < max_retries:
            logger.info(f"Retrying in {retry_delay} seconds...")
            time.sleep(retry_delay)

    logger.critical("Failed to retrieve access token after multiple attempts.")
    return None

token = get_token(accis_client_id, accis_client_secret, accis_tenant_id, accis_base_url)

if token:
    headers = {
        'Authorization': f'Bearer {token}',
        'Prefer': 'odata.maxpagesize=5000',
        'OData-MaxVersion': '4.0',
        'OData-Version': '4.0'
    }
else:
    logger.critical("Token retrieval failed. API calls cannot proceed.")
    
def fetch_filtered_data(api_url, max_retries=3, delay=5):
    """Fetch data from API with pagination and retry mechanism, with auto-generated headers."""
    all_data = []
    next_link = api_url  # Initial API URL

    access_token = get_token(accis_client_id, accis_client_secret, accis_tenant_id, accis_base_url)
    if not access_token:
        logger.error("Failed to retrieve access token. Exiting API call.")
        return []

    headers = {
        'Authorization': f'Bearer {token}',
        'Prefer': 'odata.maxpagesize=5000',
        'OData-MaxVersion': '4.0',
        'OData-Version': '4.0'
    }

    # Debugging Fix: Convert `max_retries` to an integer if needed
    if not isinstance(max_retries, int):
        # logger.error(f"max_retries should be an integer, but got {type(max_retries)} instead.")
        max_retries = 3  # Default fallback

    # print("Max Retries Type (Fixed):", type(max_retries))  # Debugging

    for attempt in range(max_retries):  # Now max_retries is an integer 
        try:
            while next_link:
                response = requests.get(next_link, headers=headers)
                response.raise_for_status()  # Raises error for bad responses
                
                data = response.json()
                # print("API Response:", data)  # Debugging
                
                value_data = data.get("value", [])
                # print("Value Data Type:", type(value_data))  # Debugging

                if not isinstance(value_data, list):  # Check if it's a list
                    logger.error("Unexpected format: 'value' is not a list")
                    return []

                all_data.extend(value_data)  # Append fetched data
                
                next_link = data.get('@odata.nextLink', None)

            return all_data 

        except requests.exceptions.RequestException as e:
            logger.error(f"Attempt {attempt + 1}: Request error - {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
            if attempt < max_retries - 1:
                logger.info(f"Retrying in {delay} seconds...")
                time.sleep(delay)
            else:
                logger.error("Max retries reached. Returning partial data.")

    return all_data  

# invoice_numbers = [
#     "S-INV107434", "S-INV107435", "S-INV107436", "S-INV107437", "S-INV107438",
#     "S-INV107188", "S-INV107189", "S-INV107190", "S-INV107191", "S-INV107192",
#     "S-INV107193", "S-INV107194","S-INV107159"
# ]


def fetch_filtered_invoices(invoice_numbers, accis_base_url, fetch_filtered_data, batch_size=20):

    try:
        all_invoices = []

        for i in range(0, len(invoice_numbers), batch_size):
            batch = invoice_numbers[i:i+batch_size]
            invoice_filter = " or ".join([f"trit_bcinvoicenumber eq '{inv}'" for inv in batch])
            filter_query = f"({invoice_filter})"
            invoice_api_url = f"{accis_base_url}/api/data/v9.1/invoices?$filter={filter_query}"

            try:
                invoice_data = fetch_filtered_data(invoice_api_url)
                if invoice_data:
                    all_invoices.extend(invoice_data)
            except Exception as e:
                logging.error(f"Error fetching batch {i}-{i+batch_size}: {e}")
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                
        df_invoices_raw = pd.DataFrame(all_invoices)
        logging.info(f"Total Invoices fetched from ACCIS : {len(df_invoices_raw)}")

        if df_invoices_raw.empty:
            logging.warning("No invoices found.")
            return pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

        df_invoices_cleaned = df_invoices_raw[
            df_invoices_raw['trit_bcinvoicenumber'].notna() & 
            (df_invoices_raw['trit_bcinvoicenumber'] != "")
        ]

        # Extract invoices that are filtered out due to 'dues|membership|subscription'
        df_filtered_out_dues_membership = df_invoices_cleaned[
            df_invoices_cleaned["name"].str.contains("dues|membership|subscription", case=False, na=False)
        ]

        # Remove them from cleaned set
        df_invoices_cleaned = df_invoices_cleaned[
            ~df_invoices_cleaned["name"].str.contains("dues|membership|subscription", case=False, na=False)
        ]

        # Cancellation invoices
        df_cancellation_invoices = df_invoices_cleaned[
            df_invoices_cleaned["name"].str.contains("cancellation", case=False, na=False)
        ]

        # Non-cancellation invoices
        df_without_cancellation_invoices = df_invoices_cleaned[
            ~df_invoices_cleaned["name"].str.contains("cancellation", case=False, na=False)
        ]

        return (
            df_invoices_cleaned,
            df_cancellation_invoices,
            df_without_cancellation_invoices,
            df_filtered_out_dues_membership
        )

    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()


# Ensure invoice_numbers is not empty before calling the function
if not new_data.empty and "Document_No" in new_data.columns:
    invoice_numbers = new_data["Document_No"].dropna().tolist()
else:
    logging.error("Error: No valid invoice numbers found in new_data.")
    invoice_numbers = []

if invoice_numbers:
    (
        df_invoices,
        df_cancellation_invoices,
        df_without_cancellation_invoices,
        df_filtered_out_dues_membership
    ) = fetch_filtered_invoices(invoice_numbers, accis_base_url, fetch_filtered_data, batch_size=10)

    logging.info(
        f"Total accis invoices after name filter: {len(df_invoices)},\n "
        f"Cancellation in accis: {len(df_cancellation_invoices)},\n "
        f"Non-cancellation in accis: {len(df_without_cancellation_invoices)},\n "
        f"Filtered out in invoice table (dues/membership/subscription): {len(df_filtered_out_dues_membership)}"
    )


def fetch_price_list_data(df_invoices, accis_base_url, fetch_filtered_data, batch_size=10):
    try:
        #logging.info("🔄 Starting fetch_price_list_data function...")

        if df_invoices.empty:
            logging.warning("No invoices available to fetch price list data.")
            return df_invoices, pd.DataFrame()

        # Extract unique pricelevelid values
        unique_pricelevel_ids = df_invoices["_pricelevelid_value"].dropna().unique()
       # logging.info(f"📦 Found {len(unique_pricelevel_ids)} unique '_pricelevelid_value' entries.")

        if len(unique_pricelevel_ids) == 0:
            logging.warning("No valid 'pricelevelid' found in invoices.")
            return df_invoices, pd.DataFrame()

        all_price_list_data = []

        for i in range(0, len(unique_pricelevel_ids), batch_size):
            batch_ids = unique_pricelevel_ids[i: i + batch_size]
            pricelevel_filter_query = " or ".join([f"pricelevelid eq {id}" for id in batch_ids])
            price_list_api_url = f"{accis_base_url}/api/data/v9.1/pricelevels?$filter=({pricelevel_filter_query})"
            
            #logging.info(f"🔗 Fetching price list batch {i // batch_size + 1}")

            # Fetch Price List data for the batch
            price_list_data = fetch_filtered_data(price_list_api_url)

            if price_list_data:
                df_batch = pd.DataFrame(price_list_data)
                all_price_list_data.append(df_batch)
                #logging.info(f"✅ Batch {i // batch_size + 1} fetched: {len(df_batch)} records.")
            else:
                logging.warning(f"No data returned for batch {i // batch_size + 1}")

        if all_price_list_data:
            df_price_list = pd.concat(all_price_list_data, ignore_index=True)
            #logging.info(f"✅ Total price list entries fetched: {len(df_price_list)}")
        else:
            df_price_list = pd.DataFrame()
            logging.warning("No price list data collected from any batch.")

        return df_invoices, df_price_list

    except Exception as e:
        logging.error(f"Error in fetch_price_list_data: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return df_invoices, pd.DataFrame()

df_invoices, df_price_list = fetch_price_list_data(df_invoices, accis_base_url, fetch_filtered_data)


def merge_and_filter_invoices(df_invoices, df_price_list): 
    try:
        if df_invoices.empty:
            logging.warning("Invoices DataFrame is empty. Returning unfiltered invoices.")
            return df_invoices, pd.DataFrame()  # ✅ Only return df_invoices and removed invoices

        # Merge with price list
        df_merged = df_invoices.merge(
            df_price_list,
            left_on="_pricelevelid_value",
            right_on="pricelevelid",
            how="left",
            suffixes=("_inv", "_price")
        )

        # Ensure 'name_price' column exists before filtering
        if "name_price" in df_merged.columns:
            before_count = len(df_merged)

            # Filter condition
            filter_condition = df_merged["name_price"].str.contains("dues|membership|subscription", case=False, na=False)

            # ✅ Store removed invoices separately
            # pricelist_removed_invoices = df_merged[filter_condition].copy()

            # ✅ Apply filtering
            df_invoices_filtered = df_merged[~filter_condition]

            removed_count = before_count - len(df_invoices_filtered)

            # Log removed invoices
            logging.info(f"Invoices removed due to filtering by name (dues, membership, subscription): {removed_count}")

        else:
            logging.warning("Column 'name_price' not found in price list. No filtering applied.")

        # Print final counts
        logging.info(f"Final invoices after filtering: {len(df_invoices_filtered)}")

        return df_invoices_filtered.reset_index(drop=True)

    except Exception as e:
        logging.error(f"Error in merge_and_filter_invoices: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return df_invoices, pd.DataFrame()  # ✅ Always return two DataFrames


df_invoices = merge_and_filter_invoices(df_invoices, df_price_list)


def fetch_invoice_lines(df_invoice, accis_base_url, fetch_filtered_data, batch_size=10):
    all_invoice_lines = []
    required_columns = ["_invoiceid_value", "_salesorderdetailid_value", "baseamount", "invoicedetailid"]

    try:
        #logging.info("Starting fetch_invoice_lines function.")

        if df_invoice.empty:
            logging.warning("Invoice DataFrame is empty. Cannot fetch invoice lines.")
            return pd.DataFrame(columns=required_columns)

        invoice_ids = df_invoice["invoiceid"].dropna().tolist()
        #logging.info(f"Total invoice IDs to process: {len(invoice_ids)}")

        # Process in batches
        for i in range(0, len(invoice_ids), batch_size):
            batch_ids = invoice_ids[i: i + batch_size]
           # logging.info(f"Processing batch {i // batch_size + 1}")

            batch_filter = " or ".join(f"_invoiceid_value eq '{invoice_id}'" for invoice_id in batch_ids)
            invoice_line_api_url = f"{accis_base_url}/api/data/v9.1/invoicedetails?$filter={batch_filter}"

            invoice_line_data = fetch_filtered_data(invoice_line_api_url)

            if invoice_line_data:
                #logging.info(f"Fetched {len(invoice_line_data)} records for batch {i // batch_size + 1}")
                df_invoice_line = pd.DataFrame(invoice_line_data)

                # Select required columns
                df_invoice_line = df_invoice_line[required_columns].dropna(how="all")
                logging.debug(f"Filtered DataFrame for batch {i // batch_size + 1}:\n{df_invoice_line}")

                if not df_invoice_line.empty:
                    all_invoice_lines.append(df_invoice_line)
                else:
                    logging.warning(f"Fetched data is empty or NaN-only after filtering for batch {batch_ids}")
            else:
                logging.warning(f"No data returned from API for batch {batch_ids}")

        # Concatenate all invoice line DataFrames
        all_invoice_lines = [df for df in all_invoice_lines if not df.empty]

        if all_invoice_lines:
            df_all_invoice_lines = pd.concat(all_invoice_lines, ignore_index=True)
            # logging.info(f"Total valid invoice lines collected: {len(df_all_invoice_lines)}")
        else:
            df_all_invoice_lines = pd.DataFrame(columns=required_columns)
            logging.warning("No valid invoice lines found for provided invoices after batch processing.")

    except Exception as e:
        logging.error(f"Unexpected error in fetch_invoice_lines: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return pd.DataFrame(columns=required_columns)

   # logging.info("fetch_invoice_lines function completed successfully.")
    return df_all_invoice_lines

df_filtered_invoice_lines = fetch_invoice_lines(df_invoices, accis_base_url, fetch_filtered_data, batch_size=10)


def keep_highest_baseamount_lines(df_all_invoice_lines):
    try:
        #logging.info("Starting keep_highest_baseamount_lines function...")

        if df_all_invoice_lines.empty:
            logging.warning("Invoice lines DataFrame is empty. No processing done.")
            return pd.DataFrame()

       # logging.info(f"Processing {len(df_all_invoice_lines)} invoice lines to keep highest baseamount per invoice.")

        # Convert 'baseamount' to numeric
        df_all_invoice_lines["baseamount"] = pd.to_numeric(df_all_invoice_lines["baseamount"], errors="coerce")
        #logging.info("Converted 'baseamount' column to numeric.")

        # Handle potential NaNs in baseamount
        nan_count = df_all_invoice_lines["baseamount"].isna().sum()
        if nan_count > 0:
            logging.warning(f"Found {nan_count} NaN values in 'baseamount' after conversion.")

        # Group by invoice and keep the row with max baseamount
        df_highest_baseamount = df_all_invoice_lines.loc[
            df_all_invoice_lines.groupby("_invoiceid_value")["baseamount"].idxmax()
        ]
        #logging.info(f"Selected highest baseamount per invoice. Row count reduced to: {len(df_highest_baseamount)}")

        # Remove rows where highest baseamount is 1
        initial_count = len(df_highest_baseamount)
        df_filtered_lines = df_highest_baseamount[df_highest_baseamount["baseamount"] != 1]
        removed_count = initial_count - len(df_filtered_lines)

        logging.info(f" Removed {removed_count} rows with baseamount = $1.00. Final invoice lines count: {len(df_filtered_lines)}")

        return df_filtered_lines

    except Exception as e:
        logging.exception(f"Error in filtering highest baseamount invoice lines: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return pd.DataFrame()

df_invoice_lines = keep_highest_baseamount_lines(df_filtered_invoice_lines)


def merge_invoice_data(df_invoice, df_filtered):
    try:
        #logging.info("Starting merge_invoice_data function...")

        if df_invoice.empty:
            logging.warning(" The df_invoice DataFrame is empty. Skipping merge.")
            return pd.DataFrame()

        if df_filtered.empty:
            logging.warning(" The df_filtered DataFrame is empty. Skipping merge.")
            return pd.DataFrame()

        #logging.info(f"Merging {len(df_invoice)} invoice records with {len(df_filtered)} filtered invoice lines...")

        df_invoice_merged = df_invoice.merge(
            df_filtered,
            left_on="invoiceid",
            right_on="_invoiceid_value",
            how="right"
        )

        #logging.info(f"✅ Merge successful. Total merged records: {len(df_invoice_merged)}")

        df_final = df_invoice_merged[[
            "invoiceid", "_customerid_value", "_salesorderid_value",
            "invoicenumber", "createdon_inv", "trit_bcinvoicenumber",
            "invoicedetailid", "_invoiceid_value"
        ]]

      #  logging.info("Final selected columns extracted from merged data.")
        return df_final

    except Exception as e:
        logging.error(f"Error merging invoice data: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

    logging.info(" Returning empty DataFrame due to issue in merge or empty input.")
    
    return pd.DataFrame()

df_invoice_merged = merge_invoice_data(df_invoices, df_filtered_invoice_lines)


def process_invoices_with_dimensions(df_invoices, df_invoice_merged, accis_base_url, fetch_filtered_data, batch_size=20):
  #logging.info("🚀 Starting process_invoices_with_dimensions function...")

    # ✅ Step 1: Define dimension mapping
    #PROD dimension mapping
    dimension_mapping = {
        "3c208775-ad88-ea11-a811-000d3a8b6b20": "saleschannel_deliverymethod",
        "34208775-ad88-ea11-a811-000d3a8b6b20": "event_code",
        "3e208775-ad88-ea11-a811-000d3a8b6b20": "state_code"
    }
    
    #DEV dimension mapping
    # dimension_mapping = {
    # "7fe7517b-1316-ea11-a811-000d3a8aad27": "saleschannel_deliverymethod",
    # "0552bc32-1716-ea11-a811-000d3a8b66df": "event",
    # "89006263-1516-ea11-a811-000d3a8b66df": "state"
    #  }
    # logging.info(" Dimension mapping initialized.")

    invoice_data_list = []

    # ✅ Step 2: Loop through each invoice
    for idx, invoice_row in df_invoices.iterrows():
        try:
            invoice_id = invoice_row['invoiceid']
            invoice_number = invoice_row['trit_bcinvoicenumber']

            # ✅ Filter all lines for this invoice
            df_lines = df_invoice_merged[df_invoice_merged['_invoiceid_value'] == invoice_id]
            dimension_data = {}

            if not df_lines.empty:
                try:
                    detail_ids = df_lines['invoicedetailid'].tolist()

                    # ✅ Step 3: Split into batches
                    total_batches = math.ceil(len(detail_ids) / batch_size)
                    #logging.info(f"🔄 Splitting into {total_batches} batch(es) (batch size: {batch_size})")

                    for i in range(total_batches):
                        batch_ids = detail_ids[i * batch_size:(i + 1) * batch_size]

                        # ✅ OData batch filter using OR
                        filter_string = " or ".join([f"_trit_invoicedetail_value eq {id}" for id in batch_ids])
                        dim_line_api_url = f"{accis_base_url}/api/data/v9.1/trit_dimensionlines?$filter={filter_string}"
                        
                        logging.debug(f"Fetching dimension data from URL: {dim_line_api_url}")

                        df_dimension = pd.DataFrame(fetch_filtered_data(dim_line_api_url))

                        if not df_dimension.empty and '_trit_dimension_value' in df_dimension.columns:
                            for _, dim_row in df_dimension.iterrows():
                                dim_key = dim_row['_trit_dimension_value']
                                dim_label = dimension_mapping.get(dim_key)
                                if dim_label and dim_label not in dimension_data:
                                    dimension_data[dim_label] = dim_row.get('subject', 'No subject')
                            #logging.info(f"✅ Fetched dimension data for batch {i+1}/{total_batches}")
                       # else:
                           # logging.info(f"No dimension data found for batch {i+1}/{total_batches}")

                except Exception as e:
                    logging.warning(f"Failed to fetch dimensions for invoice {invoice_number}: {e}", exc_info=True)
                    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                    
            else:
                logging.warning(f"No invoice lines found for invoice {invoice_number}")

            # ✅ Add result row
            row_data = {"invoice_no": invoice_number}
            for col in dimension_mapping.values():
                row_data[col] = dimension_data.get(col, "No data")

            invoice_data_list.append(row_data)

        except Exception as e:
            logging.error(f"Error processing invoice row: {e}", exc_info=True)
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
            
    # ✅ Create result DataFrame
    df_result = pd.DataFrame(invoice_data_list)
    df_result.fillna("No data", inplace=True)
    #logging.info(f"Created result DataFrame with {len(df_result)} records.")

    # ✅ Extract 'no data' invoices
    dimension_cols = list(dimension_mapping.values())
    df_no_data = df_result[df_result[dimension_cols].isin(["No data"]).any(axis=1)].copy()

    logging.info(f"Completed processing invoices. Total: {len(df_result)}, Missing dimension invoices: {len(df_no_data)}")

    return df_result, df_no_data

df_result, df_no_data = process_invoices_with_dimensions(df_invoices, df_invoice_merged, accis_base_url, fetch_filtered_data,batch_size=20)


def get_invoice_lines_with_no_data(df_no_data, df_invoice_merged):

    try:
        invoice_nos_with_no_data = df_no_data['invoice_no'].unique()
        filtered_df = df_invoice_merged[df_invoice_merged['trit_bcinvoicenumber'].isin(invoice_nos_with_no_data)].copy()

        #logging.info(f"Found {len(filtered_df)} rows in df_invoice_merged matching 'no data' invoices.")
        return filtered_df

    except Exception as e:
        logging.error(f"Error filtering invoice lines with no data: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return pd.DataFrame()  # Return empty DataFrame in case of error


df_invoice_merged_no_data = get_invoice_lines_with_no_data(df_no_data, df_invoice_merged)
df_invoice_merged_no_data = df_invoice_merged_no_data.drop_duplicates(subset=["trit_bcinvoicenumber"])


def fetch_account_and_merge_invoice_data(df_invoice, accis_base_url, batch_size=50):
    """Fetch account details using invoice data and merge in batches."""
    
    result_df = pd.DataFrame()
    account_data_list = []
    
    try:
        if df_invoice.empty:
            logging.warning("Invoice DataFrame is empty. Skipping account data fetch.")
            return result_df, result_df
        
        if "_customerid_value" not in df_invoice.columns:
            logging.error("Missing '_customerid_value' column in df_invoice.")
            return result_df, result_df

        unique_customer_ids = df_invoice["_customerid_value"]
        if len(unique_customer_ids) == 0:
            logging.warning("No unique customer IDs found in invoices. Skipping API call.")
            return result_df, result_df
        
       # logging.info(f"Fetching account data for {len(unique_customer_ids)} unique customer IDs .")

        account_columns = ["accountid", "customertypecode", "_primarycontactid_value"]
        select_query = ",".join(account_columns)

        for i in range(0, len(unique_customer_ids), batch_size):
            batch = unique_customer_ids[i:i + batch_size]

            # Format the batch request properly
            account_id_filters = " or ".join([f"accountid eq '{aid}'" for aid in batch])
            
            # Encode the filter to handle long queries
            encoded_filter = requests.utils.quote(account_id_filters)

            account_api_url = f"{accis_base_url}/api/data/v9.1/accounts?$select={select_query}&$filter={encoded_filter}"
           # logging.info(f"Fetching batch {i // batch_size + 1}")

            batch_data = fetch_filtered_data(account_api_url)
            df_batch = pd.DataFrame(batch_data)

            if not df_batch.empty:
                account_data_list.append(df_batch)
               # logging.info(f"Successfully fetched {len(df_batch)} records for batch {i // batch_size + 1}.")

            time.sleep(0.5)  # Prevent API overload

        df_account_table = pd.concat(account_data_list, ignore_index=True) if account_data_list else pd.DataFrame()

        if df_account_table.empty:
            logging.warning("Account API returned empty dataset. No accounts fetched.")
            return result_df, result_df
        
       # logging.info(f" Total fetched accounts: {len(df_account_table)}")

        df_final = df_invoice.merge(
            df_account_table,
            left_on="_customerid_value",
            right_on="accountid",
            how="left",
            #suffixes=("_invoice", "_account")
        )
        df_final["customertypecode"] = df_final["customertypecode"].astype("Int64")

        pd.DataFrame(df_final)
       # logging.info(f"Successfully merged account data. Final row count: {len(df_final)}.")

        return df_final, df_account_table

    except Exception as e:
        logging.exception(f"Unexpected error during account data fetch and merge: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return result_df, result_df


def fetch_and_merge_contact_data(df_filtered_lines, accis_base_url, batch_size=50):
    #logging.info("Starting fetch_and_merge_contact_data function...")

    try:
        if df_filtered_lines.empty:
            logging.warning("Input DataFrame (df_filtered_lines) is empty. Skipping contact data fetch.")
            return pd.DataFrame(), pd.DataFrame()

        # Step 1: Extract unique customer IDs
        if "_customerid_value" not in df_filtered_lines.columns:
            logging.error("Missing '_customerid_value' column in df_filtered_lines. Cannot proceed with contact fetch.")
            return pd.DataFrame(), pd.DataFrame()

        unique_customer_ids = df_filtered_lines["_customerid_value"].dropna().unique()
        if len(unique_customer_ids) == 0:
            logging.warning("No unique customer IDs found for contact fetch. Skipping.")
            return pd.DataFrame(), pd.DataFrame()

       # logging.info(f"Extracted {len(unique_customer_ids)} unique customer IDs for contact data fetch.")

        contact_columns = ["contactid", "_parentcustomerid_value"]
        select_query = ",".join(contact_columns)
        all_contact_data = []

        # ✅ Fetch data in batches
        for i in range(0, len(unique_customer_ids), batch_size):
            batch_ids = unique_customer_ids[i:i + batch_size]
            customer_filter = " or ".join([f"contactid eq '{cust_id}'" for cust_id in batch_ids])
            contact_api_url = f"{accis_base_url}/api/data/v9.1/contacts?$select={select_query}&$filter={customer_filter}"
            
            batch_num = (i // batch_size) + 1
           # logging.info(f"📤 Fetching contact data for batch {batch_num} (Records: {len(batch_ids)})")
            logging.debug(f"API URL: {contact_api_url}")

            try:
                batch_contact_data = fetch_filtered_data(contact_api_url)
                if batch_contact_data:
                    all_contact_data.extend(batch_contact_data)
                    #logging.info(f"✅ Batch {batch_num} fetched successfully.")
                else:
                    logging.warning(f"Batch {batch_num} returned no data.")
            except Exception as e:
                logging.exception(f"Error fetching contact data for batch {batch_num}: {e}")
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

        # Convert collected data to DataFrame
        df_contact = pd.DataFrame(all_contact_data)
        if df_contact.empty:
            logging.warning("Contact API returned an empty dataset after all batches.")
            return pd.DataFrame(), pd.DataFrame()

        #logging.info(f"📥 Contact data fetched. Total records: {len(df_contact)}")

        # ✅ Merge contact data with df_filtered_lines
        try:
            df_final = df_filtered_lines.merge(
                df_contact, 
                left_on="_customerid_value", 
                right_on="contactid", 
                how="left"
            )
           # logging.info("🔗 Merged contact data with filtered invoice lines successfully.")
        except Exception as e:
            logging.exception(f"Error merging contact data: {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
            return pd.DataFrame(), pd.DataFrame()

       # logging.info("🏁 Contact data fetch and merge process completed.")
        return df_final, df_contact

    except Exception as e:
        logging.exception(f"Unexpected error during contact data fetch and merge: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return pd.DataFrame(), pd.DataFrame()


def fetch_account_and_merge_contact_data(df_invoice, accis_base_url, batch_size=50):
    """Fetch account details using invoice data and merge in batches."""
    
    result_df = pd.DataFrame()  # Default empty DataFrame
    account_data_list = []  # Stores batch-wise fetched account data
    
    try:
       # logging.info("🚀 Starting fetch_account_and_merge_contact_data...")

        # ✅ Validate invoice DataFrame
        if df_invoice.empty:
            logging.warning("Invoice DataFrame is empty. Skipping account data fetch.")
            return result_df, result_df
        
        if "_parentcustomerid_value" not in df_invoice.columns:
            logging.error("Missing '_parentcustomerid_value' column in df_invoice.")
            return result_df, result_df

        # 🔹 Extract unique customer IDs
        unique_customer_ids = df_invoice["_parentcustomerid_value"].dropna().unique()
        if len(unique_customer_ids) == 0:
            logging.warning("No unique contactid found in invoices. Skipping API call.")
            return result_df, result_df
        
       # logging.info(f"Preparing to fetch account data for {len(unique_customer_ids)} unique IDs in batches of {batch_size}.")

        # 🔹 Define account columns to retrieve
        account_columns = ["accountid", "customertypecode"]
        select_query = ",".join(account_columns)

        # 🔹 Process in batches
        for i in range(0, len(unique_customer_ids), batch_size):
            batch = unique_customer_ids[i:i + batch_size]
            account_id_filters = " or ".join([f"accountid eq '{aid}'" for aid in batch])

            # Construct API URL for the current batch
            account_api_url = f"{accis_base_url}/api/data/v9.1/accounts?$select={select_query}&$filter={account_id_filters}"
            batch_num = i // batch_size + 1
            #logging.info(f"🔍 Fetching batch {batch_num}: Records {i} to {i + len(batch) - 1}")
            logging.debug(f"API URL: {account_api_url}")

            try:
                batch_data = fetch_filtered_data(account_api_url)
                df_batch = pd.DataFrame(batch_data)

                if not df_batch.empty:
                    account_data_list.append(df_batch)
                    #logging.info(f"✅ Batch {batch_num} fetched successfully with {len(df_batch)} records.")
                else:
                    logging.warning(f"Batch {batch_num} returned no data.")
            except Exception as e:
                logging.exception(f"Error fetching data for batch {batch_num}: {e}")
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

            time.sleep(0.5)  # Prevent API overload

        # 🔹 Combine all batch results
        df_account_table = pd.concat(account_data_list, ignore_index=True) if account_data_list else pd.DataFrame()

        if df_account_table.empty:
            logging.warning("Account API returned empty dataset. No accounts fetched.")
            return result_df, result_df
        
       # logging.info(f"📊 Total fetched accounts: {len(df_account_table)}")

        # 🔹 Merge Invoice Data with Account Data
        try:
            df_final = df_invoice.merge(
                df_account_table,
                left_on="_parentcustomerid_value",
                right_on="accountid",
                how="left"
            )
         #   logging.info("🔗 Successfully merged account data with invoice data.")
        except Exception as e:
            logging.exception(f"Error during merging account data with invoice data: {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
            return result_df, result_df

        # ✅ Convert customertypecode to integer safely
        if "customertypecode" in df_final.columns:
            try:
                df_final["customertypecode"] = df_final["customertypecode"].astype("Int64", errors="ignore")
                #logging.info("🔢 Converted 'customertypecode' to Int64 type.")
            except Exception as e:
                logging.warning(f"Failed to convert 'customertypecode' to Int64: {e}")
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

       # logging.info(f"✅ Successfully merged and processed account data. Final row count: {len(df_final)}")

        return df_final, df_account_table

    except Exception as e:
        logging.exception(f"Unexpected error during account data fetch and merge: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return result_df, result_df
    
    
    # try:
    #    # logging.info("Merging df_final and df_final_account on 'trit_bcinvoicenumber'.")

    #     df_merged = df_final.merge(
    #         df_final_account,
    #         on="trit_bcinvoicenumber",
    #         how="left",
    #         suffixes=("_invoice", "_account")
    #     )

    #     #logging.info(f"🔗 Merged DataFrame shape: {df_merged.shape}")

    #     # Merge 'customertypecode' columns
    #     df_merged['customertypecode_merged'] = df_merged['customertypecode_invoice'].fillna(df_merged['customertypecode_account'])

    #     # Filter corporate invoices
    #     corporate_condition = df_merged['customertypecode_merged'] == 314310002
    #     corporate_invoices = df_merged[corporate_condition].copy()
    #     df_filtered = df_merged[~corporate_condition]

    #     # Drop duplicates
    #     df_merged_result = df_filtered.drop_duplicates(subset=["trit_bcinvoicenumber"]).reset_index(drop=True)
    #     corporate_invoices = corporate_invoices.drop_duplicates(subset=["trit_bcinvoicenumber"]).reset_index(drop=True)

    #     removed_count = len(corporate_invoices)

    #     # Reorder columns for final output
    #     df_merged_result = df_merged_result[[
    #         "invoiceid_invoice", "_customerid_value_invoice", "trit_bcinvoicenumber",
    #         "_salesorderid_value_invoice", "invoicedetailid_invoice",
    #         "_invoiceid_value_invoice", "customertypecode_merged","contactid"
    #     ]]

    #     #logging.info(f" Successfully merged and filtered data. Final row count: {len(df_merged_result)}.")
    #     logging.info(f" Removed {removed_count} corporate invoices after merging.")

    #     return df_merged_result, pd.DataFrame(), corporate_invoices  # ✅ Always return 3 values

    # except Exception as e:
    #     logging.exception(f"Unexpected error during invoice-account merging: {e}")
    #     return pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
    

def merge_invoice_account_data(df_final, df_final_account):

    # ✅ If both DataFrames are empty
    if df_final.empty and df_final_account.empty:
        logging.warning("Both DataFrames (df_final and df_final_account) are empty. Returning empty results.")
        return pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

    # ✅ If only df_final is empty
    if df_final.empty:
        logging.warning("df_final is empty. Returning df_final_account as the merged result.")
        df_final_account = df_final_account.rename(columns={
            "_salesorderid_value": "_salesorderid_value_invoice",
            "invoicedetailid": "invoicedetailid_invoice",
            "_invoiceid_value": "_invoiceid_value_invoice",
            "invoiceid": "invoiceid_invoice"
        })

        # ✅ Filter corporate invoices
        corporate_condition = df_final_account['customertypecode'] == 314310002
        corporate_invoices = df_final_account[corporate_condition].copy()
    
        # ✅ Filter licensee invoices
        licensee_condition = df_final_account['customertypecode'] == 314310001
        licensee_invoices = df_final_account[licensee_condition].copy()
    
        # ✅ Remaining data (non-corporate & non-licensee)
        # df_account_1 = df_final_account[~(corporate_condition | licensee_condition)]


        # Handle NaN as valid "other" type (i.e., not licensee or corporate)
        mask_other = ~corporate_condition & ~licensee_condition
        mask_other = mask_other.fillna(True)  # Treat NaN as True for "other"
        df_account_1 = df_final[mask_other]
        
        # Drop duplicates
        df_merged_result = df_account_1.drop_duplicates(subset=["trit_bcinvoicenumber"]).reset_index(drop=True)
        corporate_invoices = corporate_invoices.drop_duplicates(subset=["trit_bcinvoicenumber"]).reset_index(drop=True)
        licensee_invoices = licensee_invoices.drop_duplicates(subset=["trit_bcinvoicenumber"]).reset_index(drop=True)
    
        # logging.info(f" Successfully merged and filtered data. Final row count: {len(df_merged_result)}.")
        logging.info(f" Removed {len(corporate_invoices)} corporate invoices after merging.")
        logging.info(f" Extracted {len(licensee_invoices)} licensee invoices.")
        return df_account_1.copy(), licensee_invoices,corporate_invoices

    # ✅ If only df_final_account is empty
    if df_final_account.empty:
        logging.warning("df_final_account is empty. Returning df_final as the merged result.")
        df_final = df_final.rename(columns={
            "_salesorderid_value": "_salesorderid_value_invoice",
            "invoicedetailid": "invoicedetailid_invoice",
            "_invoiceid_value": "_invoiceid_value_invoice",
            "invoiceid": "invoiceid_invoice"
        })

        # ✅ Filter corporate invoices
        corporate_condition = df_final['customertypecode'] == 314310002
        corporate_invoices = df_final[corporate_condition].copy()
    
        # ✅ Filter licensee invoices
        licensee_condition = df_final['customertypecode'] == 314310001
        licensee_invoices = df_final[licensee_condition].copy()
    
        # ✅ Remaining data (non-corporate & non-licensee)
        # df_account_2 = df_final[~(corporate_condition | licensee_condition)]

        # Handle NaN as valid "other" type (i.e., not licensee or corporate)
        mask_other = ~corporate_condition & ~licensee_condition
        mask_other = mask_other.fillna(True)  # Treat NaN as True for "other"
        df_account_2 = df_final[mask_other]
        

        # Drop duplicates
        df_merged_result = df_account_2.drop_duplicates(subset=["trit_bcinvoicenumber"]).reset_index(drop=True)
        corporate_invoices = corporate_invoices.drop_duplicates(subset=["trit_bcinvoicenumber"]).reset_index(drop=True)
        licensee_invoices = licensee_invoices.drop_duplicates(subset=["trit_bcinvoicenumber"]).reset_index(drop=True)
    
        # logging.info(f" Successfully merged and filtered data. Final row count: {len(df_merged_result)}.")
        logging.info(f" Removed {len(corporate_invoices)} corporate invoices after merging.")
        logging.info(f" Extracted {len(licensee_invoices)} licensee invoices.")
        return df_account_2.copy(), licensee_invoices,corporate_invoices

    # ✅ If both DataFrames are present
    if "trit_bcinvoicenumber" not in df_final.columns or "trit_bcinvoicenumber" not in df_final_account.columns:
        logging.error("Missing 'trit_bcinvoicenumber' column in one or both DataFrames.")
        return pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
    
    try:
        #logging.info("🔄 Merging df_final and df_final_account on 'trit_bcinvoicenumber'.")
    
        df_merged = df_final.merge(
            df_final_account,
            on="trit_bcinvoicenumber",
            how="left",
            suffixes=("_invoice", "_account")
        )
    
       # logging.info(f"🔗 Merged DataFrame shape: {df_merged.shape}")
    
        # Merge 'customertypecode' columns
        df_merged['customertypecode_merged'] = df_merged['customertypecode_invoice'].fillna(df_merged['customertypecode_account'])
    
        # ✅ Filter corporate invoices
        corporate_condition = df_merged['customertypecode_merged'] == 314310002
        corporate_invoices = df_merged[corporate_condition].copy()
    
        # ✅ Filter licensee invoices
        licensee_condition = df_merged['customertypecode_merged'] == 314310001
        licensee_invoices = df_merged[licensee_condition].copy()
    
        # ✅ Remaining data (non-corporate & non-licensee)
        df_filtered = df_merged[~(corporate_condition | licensee_condition)]
    
        # Drop duplicates
        df_merged_result = df_filtered.drop_duplicates(subset=["trit_bcinvoicenumber"]).reset_index(drop=True)
        corporate_invoices = corporate_invoices.drop_duplicates(subset=["trit_bcinvoicenumber"]).reset_index(drop=True)
        licensee_invoices = licensee_invoices.drop_duplicates(subset=["trit_bcinvoicenumber"]).reset_index(drop=True)
    
        removed_count = len(corporate_invoices)
    
        # Reorder columns for final output
        df_merged_result = df_merged_result[[
            "invoiceid_invoice", "_customerid_value_invoice", "trit_bcinvoicenumber",
             "_salesorderid_value_invoice", "invoicedetailid_invoice",
            "_invoiceid_value_invoice", "customertypecode_merged", "contactid"
        ]]
    
        #logging.info(f" Successfully merged and filtered data. Final row count: {len(df_merged_result)}.")
        logging.info(f" Removed {removed_count} corporate invoices after merging.")
        logging.info(f" Extracted {len(licensee_invoices)} licensee invoices.")
    
        # ✅ Return all three
        return df_merged_result, licensee_invoices, corporate_invoices
    
    except Exception as e:
        logging.exception(f"Unexpected error during invoice-account merging: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

if not df_invoices.empty:
    # Fetch and merge account data (returns two DataFrames)
    df_final_account, df_account_ = fetch_account_and_merge_invoice_data(df_invoice_merged_no_data, accis_base_url)
    df_final_processed, df_contact = fetch_and_merge_contact_data(df_invoice_merged_no_data, accis_base_url)
    df_final, df_account = fetch_account_and_merge_contact_data(df_final_processed, accis_base_url)
    df_merged_result, licensee_invoices, corporate_invoices = merge_invoice_account_data(df_final, df_final_account)


def fetch_order_lines(df_merged_result, accis_base_url, fetch_filtered_data, batch_size=25):
    order_lines = []
    df_invoices_with_empty_salesorderid = pd.DataFrame()

    try:
        if not df_merged_result.empty and "_salesorderid_value_invoice" in df_merged_result.columns:
            # Separate invoices with empty salesorder ID
            df_invoices_with_empty_salesorderid = df_merged_result[df_merged_result["_salesorderid_value_invoice"].isna()]

            # Continue with valid IDs
            valid_rows = df_merged_result.dropna(subset=["_salesorderid_value_invoice"])
            unique_salesorder_ids = valid_rows["_salesorderid_value_invoice"].astype(str).unique().tolist()

            for i in range(0, len(unique_salesorder_ids), batch_size):
                batch = unique_salesorder_ids[i:i + batch_size]
                salesorder_filter = " or ".join([f"_salesorderid_value eq {sid}" for sid in batch])

                # Construct API URL with batch filter
                order_line_api_url = (
                    f"{accis_base_url}/api/data/v9.1/salesorderdetails?"
                    f"$filter={salesorder_filter}&$top=500"
                )

                batch_num = i // batch_size + 1
                #logging.info(f"📦 Fetching Sales Order Lines Batch {batch_num}: Records {i} to {i + len(batch) - 1}")
                logging.debug("Order Line API URL WORKING ")

                try:
                    order_line_data = fetch_filtered_data(order_line_api_url)

                    if order_line_data:
                        df_order_line = pd.DataFrame(order_line_data)

                        if not df_order_line.empty and df_order_line.dropna(how="all").shape[0] > 0:
                            order_lines.append(df_order_line)
                except Exception as e:
                    logging.error(f"Error fetching batch {batch_num}: {e}")
                    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                    continue

        # Clean and combine
        order_lines = [df.dropna(how="all") for df in order_lines if not df.empty and df.dropna(how="all").shape[0] > 0]
        df_all_order_lines = pd.concat(order_lines, ignore_index=True) if order_lines else pd.DataFrame()

        required_columns = ["_cr726_event_value", "_salesorderid_value", "_trit_msevtmgt_eventregistration_value"]
        df_all_order_lines = df_all_order_lines[required_columns] if not df_all_order_lines.empty else pd.DataFrame()

        df_all_order_lines = df_all_order_lines.drop_duplicates(subset=["_salesorderid_value", "_cr726_event_value"])

    except Exception as e:
        logging.error(f"Unexpected error in fetch_order_lines: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        df_all_order_lines = pd.DataFrame()

    # You can return both if needed
    return df_all_order_lines, df_invoices_with_empty_salesorderid



def merge_order_lines(df_final, df_all_order_lines):
    try:
        if not df_final.empty and not df_all_order_lines.empty:
            #logging.info("🔄 Merging order lines with final DataFrame...")

            df_final_merged = df_final.merge(
                df_all_order_lines,
                left_on="_salesorderid_value_invoice",
                right_on="_salesorderid_value",
                how="right"
            )

            # before_dedup = len(df_final_merged)
            # df_final_merged = df_final_merged.drop_duplicates(
            #     subset=["trit_bcinvoicenumber", "_cr726_event_value"]
            # )
            # after_dedup = len(df_final_merged)

           # logging.info(f"✅ Merge completed. Records before deduplication: {before_dedup}, after: {after_dedup}")
        else:
            logging.warning("Skipped merge: One or both DataFrames are empty.")
            df_final_merged = df_final

    except Exception as e:
        logging.error(f"Error merging order lines: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        df_final_merged = df_final  # Return original DataFrame in case of failure

    return df_final_merged


def fetch_event_data(df_all_order_lines, accis_base_url, fetch_filtered_data, batch_size=20):

    events = []
    df_removed_invoices = pd.DataFrame()

    try:
        if not df_all_order_lines.empty and "_cr726_event_value" in df_all_order_lines.columns:
            # ✅ Separate and log invoices with None in '_cr726_event_value'
            df_removed_invoices = df_all_order_lines[df_all_order_lines["_cr726_event_value"].isna()].copy()
            removed_count = len(df_removed_invoices)
            logging.info(f" Removed {removed_count} invoices with missing event IDs.")

            # ✅ Filter valid event IDs
            filtered_event_ids = df_all_order_lines["_cr726_event_value"].dropna().astype(str).unique().tolist()

            # ✅ Process in batches
            for i in range(0, len(filtered_event_ids), batch_size):
                batch = filtered_event_ids[i:i + batch_size]
                batch_num = (i // batch_size) + 1

                # Create OData filter using 'or'
                filter_query = " or ".join([f"msevtmgt_eventid eq {eid}" for eid in batch])
                event_api_url = f"{accis_base_url}/api/data/v9.1/msevtmgt_events?$filter={filter_query}"

               # logging.info(f"📦 Fetching event batch {batch_num}: Records {i} to {i + len(batch) - 1}")
               # logging.debug(f"🔗 Event API URL: {event_api_url}")

                try:
                    event_data = fetch_filtered_data(event_api_url)
                    if event_data:
                        df_event = pd.DataFrame(event_data)
                        if not df_event.empty:
                            events.append(df_event)
                except Exception as e:
                    logging.error(f"Error fetching batch {batch_num}: {e}")
                    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                    continue

        # ✅ Combine and filter required columns
        df_all_events = pd.concat(events, ignore_index=True) if events else pd.DataFrame()
        required_columns = ["msevtmgt_eventid", "trit_type", "_trit_state_value", "trit_eventcode", "trit_deliverymechanisms"]
        df_all_events = df_all_events[required_columns] if not df_all_events.empty else pd.DataFrame()

    except Exception as e:
        logging.error(f"Unexpected error in fetch_event_data: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        df_all_events = pd.DataFrame()
        df_removed_invoices = pd.DataFrame()

    return df_all_events, df_removed_invoices

def merge_event_data(df_final_merged, df_all_events):
    try:
        if not df_final_merged.empty and not df_all_events.empty:
           # logging.info(f"🔄 Starting event data merge...")
            #logging.info(f"📄 Rows before filtering None event values: {len(df_final_merged)}")

            # ✅ Remove rows where '_cr726_event_value' is None
            df_final_merged = df_final_merged[df_final_merged["_cr726_event_value"].notna()].copy()
           # logging.info(f" Rows after removing None values in '_cr726_event_value': {len(df_final_merged)}")

            try:
                # ✅ Perform the merge
                df_final_merged = df_final_merged.merge(
                    df_all_events,
                    left_on="_cr726_event_value",
                    right_on="msevtmgt_eventid",
                    how="left"
                )
                # logging.info(f" Merge completed successfully. Total rows after merge: {len(df_final_merged)}")

            except Exception as e:
                logging.error(f"Error merging event data: {e}")
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                return df_final_merged  # Return original DataFrame on merge failure

        else:
            logging.warning("One or both input DataFrames are empty. Skipping merge.")

    except Exception as e:
        logging.error(f"Unexpected error in merge_event_data: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

    return df_final_merged


def fetch_state_data(filtered_df, accis_base_url, fetch_filtered_data, batch_size=10):
    try:
        unique_state_ids = []
        if not filtered_df.empty and "_trit_state_value" in filtered_df.columns:
            try:
                unique_state_ids = filtered_df["_trit_state_value"].dropna().unique().tolist()
               # logging.info(f"🔍 Total unique state IDs: {len(unique_state_ids)}")
            except Exception as e:
                logging.error(f"Error extracting unique state IDs: {e}")
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                return pd.DataFrame()
        
        state_data_list = []

        # 🔁 Process in batches
        for i in range(0, len(unique_state_ids), batch_size):
            batch = unique_state_ids[i:i + batch_size]
            batch_filter = " or ".join([f"trit_stateid eq '{sid}'" for sid in batch])
            state_url = f"{accis_base_url}/api/data/v9.1/trit_states?$filter={batch_filter}"

            batch_num = i // batch_size + 1
            #logging.info(f"📦 Fetching batch {batch_num}: Records {i} to {i + len(batch) - 1}")
            logging.debug(f"API URL: {state_url}")

            try:
                state_data = fetch_filtered_data(state_url)
                if state_data:
                    state_df = pd.DataFrame(state_data)
                    state_data_list.append(state_df)
            except Exception as e:
                logging.error(f"Error fetching state data for batch {batch_num}: {e}")
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                continue

        try:
            df_all_states = pd.concat(state_data_list, ignore_index=True) if state_data_list else pd.DataFrame()
        except Exception as e:
            logging.error(f"Error concatenating state data: {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
            return pd.DataFrame()

        try:
            required_columns = ["trit_stateid", "trit_name"]
            df_all_states = df_all_states[required_columns] if not df_all_states.empty else pd.DataFrame()
        except Exception as e:
            logging.error(f"Error selecting required columns from state data: {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
            return pd.DataFrame()

    except Exception as e:
        logging.error(f"Unexpected error in fetch_state_data: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return pd.DataFrame()

   # logging.info(f"✅ Total states fetched: {len(df_all_states)}")
    return df_all_states

def merge_state_data(filtered_df, df_all_states):
    try:
        if not filtered_df.empty and not df_all_states.empty:
            try:
               # logging.info(f"Starting merge of filtered_df with state data...")
                #logging.info(f"filtered_df rows: {len(filtered_df)}, df_all_states rows: {len(df_all_states)}")

                final_event_data = filtered_df.merge(
                    df_all_states,
                    left_on="_trit_state_value",
                    right_on="trit_stateid",
                    how="left"
                )

               # logging.info(f"✅ Merge successful. Resulting rows: {len(final_event_data)}")

            except Exception as e:
                logging.error(f"Error merging state data: {e}")
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                return filtered_df  # Return original DataFrame if merging fails
        else:
            logging.warning("One or both DataFrames are empty. Skipping merge.")
            final_event_data = filtered_df

    except Exception as e:
        logging.error(f"Unexpected error in merge_state_data: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        
        
        return filtered_df  # Ensure execution even if an error occurs

    return final_event_data


def map_sales_channel_and_delivery(final_event_data):
    try:
        # Define mapping dictionaries
        sales_channel_mapping = {
            314310000: "SS",
            314310002: "LS",
            314310009: "UNIV",
            314310001: "High School"
        }

        delivery_method_mapping = {
            314310000: "CLASSROOM",
            314310001: "WEBINAR",
            314310002: "SELF-PACED",
            314310005: "CORP-CLASS",
            314310004: "CORP-WB",
            314310003: "BLENDED"
        }

        # Map values to new columns
        final_event_data["sales_channel"] = final_event_data["trit_type"].map(sales_channel_mapping)
        final_event_data["delivery_method"] = final_event_data["trit_deliverymechanisms"].map(delivery_method_mapping)

        #logging.info("✅ Sales channel and delivery method mapping completed.")

        # Combine them with fallback logic
        final_event_data["saleschannel_deliverymethod"] = final_event_data.apply(
            lambda row: (
                f"{row['sales_channel']}_{row['delivery_method']}" if pd.notna(row['sales_channel']) and pd.notna(row['delivery_method'])
                else row['sales_channel'] if pd.notna(row['sales_channel'])
                else row['delivery_method'] if pd.notna(row['delivery_method'])
                else None
            ),
            axis=1
        )

       # logging.info("✅ Combined 'saleschannel_deliverymethod' column created successfully.")
        return final_event_data

    except Exception as e:
        logging.error(f"Error occurred in map_sales_channel_and_delivery: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        raise

    except Exception as e:
        logging.error(f"Unexpected error in map_sales_channel_and_delivery: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        
        raise


def format_final_table(final_event_data):
    try:
       # logging.info("✅ Starting format_final_table function.")
        
        # Selecting the required columns
        columns_required = [
            "trit_bcinvoicenumber", 
            "trit_eventcode", 
            "invoicedetailid_invoice",
            "_invoiceid_value_invoice", 
            "state", 
            "saleschannel_deliverymethod"
        ]
        final_event_data = final_event_data[columns_required]
       # logging.info("✅ Selected necessary columns.")

        # Renaming columns
        final_event_data.rename(columns={
            "trit_bcinvoicenumber": "invoicenumber",
            "trit_eventcode": "event"
        }, inplace=True)
        #logging.info("✅ Columns renamed.")

        # Dropping duplicates
        final_event_data = final_event_data.drop_duplicates()
        #logging.info("✅ Removed duplicate records.")

        return final_event_data

    except Exception as e:
        logging.error(f"Error occurred in format_final_table: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        raise

    except Exception as e:
        logging.error(f"An unexpected error occurred in format_final_table: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        raise

def fetch_dimension_lines(df_filtered, final_event_data, accis_base_url, fetch_filtered_data, batch_size=20):
    try:
       # logging.info("✅ Starting fetch_dimension_lines function.")
        dimension_lines = []

        if not df_filtered.empty:
            filtered_invoice_detail_ids = df_filtered["invoicedetailid_invoice"].dropna().unique().tolist()
           # logging.info(f"🔍 Total invoice detail IDs to process: {len(filtered_invoice_detail_ids)}")

            # Batch processing
            for i in range(0, len(filtered_invoice_detail_ids), batch_size):
                batch_ids = filtered_invoice_detail_ids[i:i + batch_size]
                #logging.info(f"📦 Processing batch {i // batch_size + 1}: {batch_ids}")

                # Build filter query for batch
                filter_query = " or ".join([f"_trit_invoicedetail_value eq {id_}" for id_ in batch_ids])
                dimension_line_api_url = (
                    f"{accis_base_url}/api/data/v9.1/trit_dimensionlines?"
                    f"$filter={filter_query}"
                    f"&$select=subject,_trit_invoicedetail_value,_trit_dimension_value,_trit_dimensionvalue_value"
                )

                try:
                    dimension_data = fetch_filtered_data(dimension_line_api_url)
                    if dimension_data:
                        df_dimension_line = pd.DataFrame(dimension_data)
                        dimension_lines.append(df_dimension_line)
                        #logging.info(f"✅ Fetched dimension lines for batch {i // batch_size + 1}")
                except Exception as e:
                    logging.error(f"Error fetching dimension lines for batch {i // batch_size + 1}: {e}")
                    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                    continue

        # Combine all fetched data
        df_all_dimension_lines = pd.concat(dimension_lines, ignore_index=True) if dimension_lines else pd.DataFrame()
        #ogging.info(f"🧩 Total dimension lines fetched: {len(df_all_dimension_lines)}")

        # Merge with final_event_data
        try:
            df_final_ = final_event_data.merge(
                df_all_dimension_lines,
                left_on="invoicedetailid_invoice",
                right_on="_trit_invoicedetail_value",
                how="right"
            )
           # logging.info("🔗 Merged dimension lines with final event data.")
        except Exception as e:
            logging.error(f"Error merging dimension lines: {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

            
            return pd.DataFrame(columns=[
                "invoicenumber", "event", "_invoiceid_value_invoice", 
                "invoicedetailid_invoice", "state", "saleschannel_deliverymethod", "subject"
            ])

        # Select required columns
        required_columns = [
            "invoicenumber", "event", "_invoiceid_value_invoice", 
            "invoicedetailid_invoice", "state", "saleschannel_deliverymethod", "subject"
        ]
        dimension = df_final_[required_columns] if not df_final_.empty else pd.DataFrame(columns=required_columns)

       # logging.info("✅ Finished processing fetch_dimension_lines function.")
        return dimension

    except Exception as e:
        logging.error(f"Error in fetch_dimension_lines: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

    except Exception as e:
        logging.error(f"Unexpected error in fetch_dimension_lines: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

    return pd.DataFrame(columns=[
        "invoicenumber", "event", "_invoiceid_value_invoice", 
        "invoicedetailid_invoice", "state", "saleschannel_deliverymethod", "subject"
    ])


def group_and_aggregate_subjects(dimension):
    try:
        #logger.info("🚀 Starting group_and_aggregate_subjects function...")

        if dimension.empty:
            logger.warning("Received empty DataFrame. Returning empty result.")
            return pd.DataFrame(), {}

       # logger.info("Grouping by 'invoicedetailid_invoice' and aggregating 'subject' values...")

        # Group by "invoicedetailid_invoice" and aggregate subjects
        invoice_subjects = dimension.groupby("invoicedetailid_invoice")["subject"].apply(set).reset_index()
        logger.debug(f"Grouped and aggregated subjects:\n{invoice_subjects}")

        # Count the number of subjects for each "invoicedetailid"
        invoice_subjects["subject_count"] = invoice_subjects["subject"].apply(len)
        logger.debug(f" Added subject count column:\n{invoice_subjects}")

        # Convert into a dictionary for easier lookup
        invoice_subject_dict = dict(zip(invoice_subjects["invoicedetailid_invoice"], invoice_subjects["subject"]))
       # logger.info(f" Converted subject groups to dictionary with {len(invoice_subject_dict)} entries.")

        # Convert grouped data back into DataFrame format
        grouped_data = dimension.groupby("invoicedetailid_invoice")
        grouped_data = grouped_data.apply(lambda x: x).reset_index(drop=True)
       # logger.info("✅ Final grouped DataFrame created successfully.")

        return grouped_data, invoice_subject_dict

    except KeyError as e:
        logger.error(f"KeyError occurred in group_and_aggregate_subjects function: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        raise
    except Exception as e:
        logger.error(f"An unexpected error occurred in group_and_aggregate_subjects function: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        raise

def process_invoice_data(grouped_data, invoice_subject_dict, accis_base_url):
    try:
       # logger.info("🚀 Starting process_invoice_data function...")

        if grouped_data.empty:
            logger.info("No invoices to process. Exiting early.")
            return pd.DataFrame()

        # logger.info(f" Total unique invoice details to process: {grouped_data['invoicedetailid_invoice'].nunique()}")

        newly_added_data = []  # List to store new subjects with invoice numbers

        grouped_data = grouped_data.groupby("invoicedetailid_invoice")

        for invoice_detail_value, group in grouped_data:
            try:
                invoicenumber = group["invoicenumber"].iloc[0]  # Get the actual invoice number
                #logger.info(f"🔍 Processing Invoice: {invoicenumber}")

                if invoice_detail_value:
                    existing_subjects = invoice_subject_dict.get(invoice_detail_value, set())
                    logger.debug(f"Existing subjects for invoice {invoicenumber}: {existing_subjects}")

                    event_data_to_post = {
                        "event": group["event"].iloc[0] if pd.notna(group["event"].iloc[0]) else None,
                        "saleschannel_deliverymethod": group["saleschannel_deliverymethod"].iloc[0] if pd.notna(group["saleschannel_deliverymethod"].iloc[0]) else None,
                        "state": group["state"].iloc[0] if pd.notna(group["state"].iloc[0]) else None,
                    }

                    # Filter out None values
                    event_data_to_post = {k: v for k, v in event_data_to_post.items() if v is not None}
                    logger.debug(f"📤 Cleaned event data to post for invoice {invoicenumber}: {event_data_to_post}")

                    # Check which subjects are new
                    new_data_to_post = [
                        {"subject": value}
                        for key, value in event_data_to_post.items()
                        if value not in existing_subjects
                    ]

                    # logger.info(f"New subjects to post for invoice {invoicenumber}: {new_data_to_post}")

                    for item in new_data_to_post:
                        newly_added_data.append({"invoicenumber": invoicenumber, "subject": item["subject"]})
            except Exception as e:
                logger.warning(f"Error while processing individual invoice {invoicenumber}: {e}", exc_info=True)
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                

        # Convert newly added data into DataFrame
        df_newly_added = pd.DataFrame(newly_added_data)

        if not df_newly_added.empty:
            logger.info("Newly added subjects per invoice:")
            logger.info(df_newly_added.groupby("invoicenumber")["subject"].apply(list).reset_index())
        else:
            logger.info("No new subjects to add. All invoices are complete.")

        return df_newly_added

    except Exception as e:
        logger.error(f"Error occurred in process_invoice_data function: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        raise
    except Exception as e:
        logger.error(f"An unexpected error occurred in process_invoice_data function: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        raise

  
def fetch_and_map_dimension_values(df_newly_added, accis_base_url, grouped_data):
    try:
      #  logger.info("🚀 Starting fetch_and_map_dimension_values function...")

        if df_newly_added.empty:
            logger.info("No new subjects to process for dimension values. Skipping function.")
            return pd.DataFrame(), None

        # Step 1: Fetch Unique Subjects
        unique_subjects = df_newly_added["subject"].drop_duplicates()
       # logger.info(f"Found {len(unique_subjects)} unique subjects to fetch GUIDs for.")

        dimension_values_list = []

        # Step 2: Fetch Dimension Values (GUIDs) from API
        for subject in unique_subjects:
            dimensionvalue_api_url = f"{accis_base_url}/api/data/v9.1/trit_dimensionvalues?$filter=trit_code eq '{subject}'&$select=trit_dimensionvalueid,_trit_dimension_value,trit_code"
            logger.debug(f"Fetching GUID for subject: {subject} | URL: {dimensionvalue_api_url}")
            try:
                df_temp = pd.DataFrame(fetch_filtered_data(dimensionvalue_api_url))
                if not df_temp.empty:
                    logger.debug(f"Retrieved data for subject '{subject}' with {len(df_temp)} records.")
                    dimension_values_list.append(df_temp)
                else:
                    logger.warning(f"No data returned for subject '{subject}'.")
            except Exception as e:
                logger.error(f"Error fetching GUID for subject '{subject}': {e}", exc_info=True)
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

        # Step 3: Combine all fetched dimension values into a single DataFrame
        df_dimension_values = pd.concat(dimension_values_list, ignore_index=True) if dimension_values_list else pd.DataFrame()
       # logger.info(f"Total dimension value records fetched: {len(df_dimension_values)}")
        df_dimension_values = df_dimension_values.drop_duplicates(subset=["trit_code"])

        # Step 4: Merge GUIDs with Newly Added Subjects
        df_newly_added_with_guids = df_newly_added.merge(
            df_dimension_values,
            left_on="subject",
            right_on="trit_code",
            how="left"
        )
        #logger.info(f"🔗 Merged newly added subjects with GUIDs. Records after merge: {len(df_newly_added_with_guids)}")

        # Step 5: Merge with invoice details
        df_newly_added_with_guids = df_newly_added_with_guids.merge(
            grouped_data[["invoicedetailid_invoice", "invoicenumber"]],
            left_on="invoicenumber",
            right_on="invoicenumber",
            how="left"
        )
        #logger.info("Merged with grouped invoice details.")

        # Step 6: Extract Unique Dimension Values
        dimen = df_newly_added_with_guids["_trit_dimension_value"].drop_duplicates()
       # logger.info(f"🧩 Unique _trit_dimension_value count: {len(dimen)}")

        df_newly_added_with_guids = df_newly_added_with_guids.drop_duplicates(
            subset=["invoicenumber", "_trit_dimension_value", "trit_dimensionvalueid", "invoicedetailid_invoice"]
        )

       # logger.info(f"✅ Final shape of mapped data: {df_newly_added_with_guids.shape}")

        return df_newly_added_with_guids, dimen

    except Exception as e:
        logger.error(f"An unexpected error occurred in fetch_and_map_dimension_values function: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return pd.DataFrame(), None

def fetch_and_map_dimensions(dimen, accis_base_url, df_newly_added_with_guids):
    try:
      #  logger.info("🚀 Starting fetch_and_map_dimensions function...")

        if dimen.empty:
            logger.info("No dimensions to process.")
            return df_newly_added_with_guids

        dimension_list = []
        #logger.info(f"Found {len(dimen)} unique _trit_dimension_value(s) to fetch.")

        # Fetch dimension records from the API
        for dd in dimen:
            dimensionvalue_api_url = f"{accis_base_url}/api/data/v9.1/trit_dimensions?$filter=trit_dimensionid eq '{dd}'&$select=trit_dimensionid"
            logger.debug(f"Fetching dimension for ID: {dd} | URL: {dimensionvalue_api_url}")
            try:
                df_temp_dim = pd.DataFrame(fetch_filtered_data(dimensionvalue_api_url))
                if not df_temp_dim.empty:
                    logger.debug(f"Retrieved {len(df_temp_dim)} record(s) for dimension ID '{dd}'.")
                    dimension_list.append(df_temp_dim)
                else:
                    logger.warning(f"No records found for dimension ID '{dd}'.")
            except Exception as e:
                logger.error(f"Error fetching GUID for dimension '{dd}': {e}", exc_info=True)
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        # Combine all fetched dimension records
        df_dimension = pd.concat(dimension_list, ignore_index=True) if dimension_list else pd.DataFrame()
        # logger.info(f"Total dimension records fetched: {len(df_dimension)}")

        # Drop the '@odata.etag' column if it exists
        if "@odata.etag" in df_dimension.columns:
            df_dimension = df_dimension.drop(columns=["@odata.etag"])
            logger.debug("Dropped '@odata.etag' column from dimension data.")

        # Merge with the original DataFrame
        df_newly_added_with_guids = df_newly_added_with_guids.merge(
            df_dimension,
            left_on="_trit_dimension_value",
            right_on="trit_dimensionid",
            how="left",
            suffixes=("", "_dim")
        )
       # logger.info(f"🔗 Merged dimension info into main DataFrame. Records after merge: {len(df_newly_added_with_guids)}")

        df_newly_added_with_guids = df_newly_added_with_guids.drop_duplicates(
            subset=["invoicenumber", "invoicedetailid_invoice", "_trit_dimension_value"]
        )
        #logger.info(f"✅ Final shape after dropping duplicates: {df_newly_added_with_guids.shape}")

        return df_newly_added_with_guids

    except Exception as e:
        logger.error(f"An unexpected error occurred in fetch_and_map_dimensions function: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        raise  # Re-raise the exception after logging it


def post_invoice_dimension_data(df_newly_added_with_guids, headers):
    try:
        dimensionline_api_url = f"{accis_base_url}/api/data/v9.1/trit_dimensionlines"
        successful_links = {}  # Dictionary to store successful links
        failed_links = {}  # Dictionary to store failed links

        for _, row in df_newly_added_with_guids.iterrows():
            invoice_detail_value = row["invoicedetailid_invoice"]
            invoicenumber = row["invoicenumber"]
            dimensionvalue_id = row["trit_dimensionvalueid"]
            dimension_id = row["trit_dimensionid"]
            subject = row["subject"]

            # Ensure all required values exist
            if pd.notna(invoice_detail_value) and pd.notna(dimensionvalue_id) and pd.notna(dimension_id):
                payload = {
                    "trit_InvoiceDetail_trit_DimensionLine@odata.bind": f"/invoicedetails({str(invoice_detail_value).strip()})",
                    "trit_DimensionValue_trit_DimensionLine@odata.bind": f"/trit_dimensionvalues({str(dimensionvalue_id).strip()})",
                    "trit_Dimension_trit_DimensionLine@odata.bind": f"/trit_dimensions({str(dimension_id).strip()})",
                    "subject": str(subject).strip()
                }

                try:
                    response = requests.post(dimensionline_api_url, headers=headers, json=payload)
                    response.raise_for_status()

                    # Add successful links to the dictionary
                    if invoicenumber not in successful_links:
                        successful_links[invoicenumber] = []
                    successful_links[invoicenumber].append(subject)

                except Exception as e:
                    # Add failed links to the dictionary
                    if invoicenumber not in failed_links:
                        failed_links[invoicenumber] = []
                    failed_links[invoicenumber].append(subject)
                    logger.error(f"Error linking Subject '{subject}', DimensionValue '{dimensionvalue_id}', and Dimension '{dimension_id}' to InvoiceDetail '{invoicenumber}': {e}")
                    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

                    if response is not None:
                        logger.error(f"Response: {response.text}")

                time.sleep(0.5)  # Rate limiting

            else:
                logger.warning(f"Skipping InvoiceDetail '{invoicenumber}' due to missing values (DimensionValue GUID or Dimension ID).")

        # Log the successful and failed links in the requested format
        logger.info(f"Total Successful update Invoices in accis: {len(successful_links)}")
        logger.info(f"Total Failed Invoices update in accis: {len(failed_links)}")
        
        # if successful_links:
        #     successful_invoices = list(successful_links.keys())  # Extract only the invoice numbers
        #     logger.info(f"Successful Invoices: {successful_invoices}")
        #     print("Successful Invoices:", successful_invoices)  # Print the list

        successful_invoices = list(successful_links.keys())
        if successful_links:
            logger.info(f"Successful Invoices and Subjects:\n{successful_links}")
        if failed_links:
            logger.error(f"Failed Invoices and Subjects:\n{failed_links}")

        return successful_links, failed_links, successful_invoices

    except Exception as e:
        logger.error(f"An unexpected error occurred in post_invoice_dimension_data function: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        raise  # Re-raise the exception after logging it


try:
    if not df_merged_result.empty:
        #logger.info("Started : Processing valid invoice data")

        df_order_lines, df_missing_salesorder_ids = fetch_order_lines(df_merged_result, accis_base_url, fetch_filtered_data)
        df_final_merged = merge_order_lines(df_merged_result, df_order_lines)

        df_all_events, df_removed_invoices = fetch_event_data(df_final_merged, accis_base_url, fetch_filtered_data)
     
        df_invoice_not_find = pd.concat([df_missing_salesorder_ids, df_removed_invoices], ignore_index=True)
 
        df_merged = merge_event_data(df_final_merged, df_all_events)
        filtered_df = df_merged.groupby("invoiceid_invoice").first().reset_index()

        df_all_states = fetch_state_data(filtered_df, accis_base_url, fetch_filtered_data)
        final_event_data = merge_state_data(filtered_df, df_all_states)
 
        final_event_data.rename(columns={"trit_name": "state"}, inplace=True)
        final_event_data = map_sales_channel_and_delivery(final_event_data)
        final_event_data = format_final_table(final_event_data)

        accis_data_mapping = final_event_data.set_index("invoicenumber").to_dict(orient="index")

        dimension_data = fetch_dimension_lines(df_merged_result, final_event_data, accis_base_url, fetch_filtered_data, batch_size=20)
        grouped_data, invoice_subject_dict = group_and_aggregate_subjects(dimension_data)

        df_newly_added = process_invoice_data(grouped_data, invoice_subject_dict, accis_base_url)

        if df_newly_added is not None and not df_newly_added.empty:
            df_newly_added_with_guids, dimen = fetch_and_map_dimension_values(df_newly_added, accis_base_url, grouped_data)
        else:
            logger.info("Skipping fetch_and_map_dimension_values since df_newly_added is empty.")
            df_newly_added_with_guids, dimen = pd.DataFrame(), pd.DataFrame()

        if dimen is not None and not dimen.empty:
            df_newly_added_with_guids = fetch_and_map_dimensions(dimen, accis_base_url, df_newly_added_with_guids)
        else:
            logger.info("Skipping fetch_and_map_dimensions since dimen is empty.")
        
        successful_invoices, successful_links, failed_links = post_invoice_dimension_data(df_newly_added_with_guids, headers)
        successful_invoices = list(successful_invoices.keys())
    
except Exception as e:
    successful_invoices = []
    logger.exception(f"Error in Method 1 (main invoice processing): {e}")
    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
 
# ------------------------- end dim update ---------------------

def send_teams_message(webhook_url, missing_data_dict):
    try:
        teams_message = pymsteams.connectorcard(webhook_url)
        teams_message.title("Missing Invoices Summary")

        teams_message.text(
            "**Hello Team,**\n\n"
            f"Below are the details of invoices in {yesterday_date}. Please verify and take the necessary action."
        )

        # Iterate through each category and create a markdown table
        for label, df in missing_data_dict.items():
            if df.empty:
                continue

            section = pymsteams.cardsection()
            section.activityTitle(label)
            section.activitySubtitle(f"Total Records: {len(df)}")

            # Create markdown table from DataFrame
            markdown_table = df.head(20).to_markdown(index=False)  # Limit to first 20 rows for readability
            section.text(f"```\n{markdown_table}\n```")

            teams_message.addSection(section)

        teams_message.send()
        # logging.info("Teams message sent with detailed tables.")

    except Exception as e:
        logging.error(f"Failed to send Teams message: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
 


try:
    if not df_merged_result.empty or not licensee_invoices.empty or not corporate_invoices.empty or (
        'unique_exact_duplicates' in locals() and (
            (isinstance(unique_exact_duplicates, pd.DataFrame) and not unique_exact_duplicates.empty) or
            (isinstance(unique_exact_duplicates, list) and len(unique_exact_duplicates) > 0)
        )
    ):
        missing_data_dict = {}

        try:
            if not df_merged_result.empty:
                missing_data_dict["Category A - Missing dimension invoices in ACCIS no event data in order line"] = df_invoice_not_find[["trit_bcinvoicenumber"]]
        except Exception as e:
            logging.error(f"Error adding Category A: {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

        try:
            if not licensee_invoices.empty:
                missing_data_dict["Category B - Account type 'Licensee' with missing dimension"] = licensee_invoices[["trit_bcinvoicenumber"]]
        except Exception as e:
            logging.error(f"Error adding Category B: {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

        try:
            if not corporate_invoices.empty:
                missing_data_dict["Category C - Account type 'Corporate' with missing dimension"] = corporate_invoices[["trit_bcinvoicenumber"]]
        except Exception as e:
            logging.error(f"Error adding Category C: {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

        try:
            if 'unique_exact_duplicates' in locals():
                if isinstance(unique_exact_duplicates, list):
                    unique_exact_duplicates = pd.DataFrame(unique_exact_duplicates)

                if not unique_exact_duplicates.empty:
                    missing_data_dict["Category D - Duplicates were found in ACCIS while checking the original invoice- These order IDs appear to have been placed twice."] = unique_exact_duplicates
        except Exception as e:
            logging.error(f"Error processing Category D (duplicates): {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

        if missing_data_dict:
            webhook_url = ("https://qubespacein.webhook.office.com/webhookb2/5d9de417-54b6-47f4-aee3-e10693e7d804@d996ac79-e80a-4fc8-be1a-8f891b569988/IncomingWebhook/ad05cad5b545416a8ef25f5e7b056261/b187de49-aa7e-4d44-8967-d6c7f25ae53e/V24_C4l7YsaxIohQhi5SgnkrtH99Be9s4APXcGrKH9Lok1")# Your Teams webhook URL
           
            try:
                send_teams_message(webhook_url, missing_data_dict)
                logging.info("Teams message sent successfully.")
            except Exception as e:
                logging.error(f"Failed to send Teams message: {e}")
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
 
except Exception as e:
    logging.error(f"Unexpected error in reporting missing invoice data: {e}")
    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)


if df_no_data.empty:
    logging.warning("No invoices dimension update in ACCIS. Exiting the script.")
    sys.exit()

def get_bc_token(tenant_id, client_id, client_secret, max_retries=3, delay=5):
    """Fetch OAuth token for Business Central API with retry logic, even on unexpected errors."""
    # logging.info("Fetching Business Central access token...")
    token_url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token"
    token_data = {
        "grant_type": "client_credentials",
        "client_id": client_id,
        "client_secret": client_secret,
        "scope": "https://api.businesscentral.dynamics.com/.default"
    }

    for attempt in range(max_retries):
        try:
            response = requests.post(token_url, data=token_data)
            response.raise_for_status()

            token = response.json().get("access_token")
            if not token:
                logging.error("Failed to retrieve Business Central access token. No token in response.")
                return None

            # logging.info("Business Central access token retrieved successfully.")
            return token

        except Exception as e:  # Catch both request exceptions and unexpected errors
            logging.error(f"Attempt {attempt + 1} failed: {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
            if attempt < max_retries - 1:
                logging.info(f"Retrying in {delay} seconds...")
                time.sleep(delay)
            else:
                logging.error("Max retries reached. Failed to fetch token.")
                return None


# Get access tokens
bc_access_token = get_bc_token(tenant_id, client_id, client_secret)

# Headers
bc_headers = {
    "Authorization": f"Bearer {bc_access_token}",
    "Accept": "application/json",
    "Content-Type": "application/json"
}


invoice_numbers = successful_invoices


def get_sales_invoice_lines_batchwise(invoice_numbers, batch_size=20, max_retries=3):
    """Fetch sales invoice lines in batches, with retry logic and delay to avoid rate-limiting."""
    all_invoice_lines = {"value": []}
    
    bc_base_url = f"https://api.businesscentral.dynamics.com/v2.0/{tenant_id}/{environment}/ODataV4/Company('{company}')"

    for i in range(0, len(invoice_numbers), batch_size):
        batch = invoice_numbers[i:i + batch_size]
        filter_query = " or ".join([f"Document_No eq '{num}'" for num in batch])
        invoice_url = f"{bc_base_url}/SalesInvLines?$filter={filter_query}"

        attempt = 1
        while attempt <= max_retries:
            try:
                response = requests.get(invoice_url, headers=bc_headers)
                response.raise_for_status()

                batch_data = response.json().get("value", [])
                all_invoice_lines["value"].extend(batch_data)

                # logging.info(f"Fetched batch {i//batch_size + 1} (attempt {attempt}) with {len(batch)} invoices.")
                break  # Exit retry loop if successful

            except Exception as e:
                logging.warning(f"Attempt {attempt} failed for batch {i//batch_size + 1}: {e}")
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                attempt += 1
                time.sleep(1)  # Wait 1 second before retrying

                if attempt > max_retries:
                    logging.error(f"Max retries exceeded for batch {i//batch_size + 1}. Skipping this batch.")
        
        time.sleep(0.1)  # Delay between batches to avoid rate-limiting

    return all_invoice_lines

# Choose one of the options based on the requirement:

# Option 1: Fetch by specific invoice numbers
# invoice_numbers = ["S-INV107171", "S-INV104475", "S-INV104593"]
# invoice_numbers = sales_invoices_df.Document_No.to_list()

# invoice_numbers = successful_invoices
# invoice_data = get_sales_invoice_lines(invoice_numbers=invoice_numbers)
# Check if successful_invoices is empty before calling the function

if successful_invoices:
    invoice_data = get_sales_invoice_lines_batchwise(invoice_numbers, batch_size=20)
else:
    invoice_data = {"value": []}

# Option 2: Fetch top 10 invoices (use Top filter)
# invoice_data = get_sales_invoice_lines(use_top=True)

# Option 3: Fetch all invoices (no filter or top)
# invoice_data = get_sales_invoice_lines()

# Convert response to DataFrame
df_original = pd.DataFrame(invoice_data.get("value", []))

if df_original.empty:
    logging.warning("No sales invoice data retrieved.")
else:
    logging.info(f"Retrieved {len(df_original)} invoice records for update bc from invoices of successful update in ACCIS.")

def filter_invoices(df):
    """Filter out invoices with specific keywords in the Description column and Line_Amount == 1.00."""
    try:
        if "Description" in df.columns and "Document_No" in df.columns and "Line_Amount" in df.columns:
            keywords = ["Subscription", "Dues", "Membership"]
            pattern = "|".join(keywords)

            # Filter for keywords
            keyword_filtered = df["Description"].str.contains(pattern, case=False, na=False)
            # Filter for Line_Amount == 1.00
            amount_filtered = df["Line_Amount"] == 1.00

            # Combine both filters (rows to remove)
            combined_filter = keyword_filtered | amount_filtered

            # Prepare removed invoices for logging
            df_removed = df[combined_filter][["Document_No", "Description", "Line_Amount"]]

            # Final filtered dataframe (those NOT matching combined filter)
            df_filtered = df[~combined_filter]

            # Logging
            logging.info("Removed Invoices (based on Decscription or Line_Amount==1.00):")
            logging.info(df_removed)
            logging.info(f"Successfully filtered based on Decscription or Line_Amount==1.00) invoices. Remaining invoices count: {len(df_filtered)}")

            return df_filtered
        else:
            missing_cols = [col for col in ["Description", "Document_No", "Line_Amount"] if col not in df.columns]
            logging.warning(f"Required columns {missing_cols} not found in the data.")
            return df
    except Exception as e:
        logging.error(f"Error filtering invoices: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return df  # Return unfiltered data if error occurs

try:
    # Rename incorrect keys to match expected names
    final_event_data = final_event_data.rename(columns={
        "event": "ShortcutDimCode6",
        "saleschannel_deliverymethod": "Shortcut_Dimension_2_Code",
        "state": "ShortcutDimCode7"
    })

    # Define required fields after renaming
    fields_needed = ["invoicenumber", "Shortcut_Dimension_2_Code", "ShortcutDimCode6", "ShortcutDimCode7"]

    # Ensure all required fields exist
    missing_fields = [field for field in fields_needed if field not in final_event_data.columns]
    if missing_fields:
        logging.error(f"Missing fields in final_event_data after renaming: {missing_fields}")
        raise KeyError(f"Columns not found: {missing_fields}")

    # Create the correct mapping
    filtered_event_data = final_event_data[fields_needed]
    accis_data_mapping = filtered_event_data.set_index("invoicenumber").to_dict(orient="index")

except Exception as e:
    logging.exception(f"Unexpected error during mapping process: {e}")
    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
    accis_data_mapping = {}  # fallback to empty mapping if error occurs

def identify_missing_data(df_original, accis_base_url, fetch_filtered_data):
    try:
        missing_data_records = []
        invoices_with_missing_data = []  # Initialize as empty list

        # Identifying missing fields
        for key in ["Shortcut_Dimension_2_Code", "ShortcutDimCode6", "ShortcutDimCode7"]:
            missing_rows = df_original[df_original[key].isna() | (df_original[key] == "")]
            for _, row in missing_rows.iterrows():
                missing_data_records.append({
                    "Document_No": row["Document_No"],
                    "Line_No": row["Line_No"],
                    "Missing Field": key
                })

        df_missing = pd.DataFrame(missing_data_records)

        if df_missing.empty:
            invoices_without_missing_data = df_original["Document_No"].unique()
            logger.info(f"No missing values for these invoices: {list(invoices_without_missing_data)}")
            return [], pd.DataFrame()  # Return empty list and empty DataFrame

        invoices_with_missing_data = df_missing["Document_No"].unique().tolist()
        logger.info(f"Invoices with missing data - {len(invoices_with_missing_data)}: {invoices_with_missing_data}")

        # Skip API call if no invoices have missing data
        if not invoices_with_missing_data:
            return [], pd.DataFrame()

        # # Prepare filter query for API call
        # filter_query = " or ".join([f"trit_bcinvoicenumber eq '{num}'" for num in invoices_with_missing_data])
        # invoice_api_url = f"{accis_base_url}/api/data/v9.1/invoices?$filter={filter_query}"

        # # Fetch data from API
        # try:
        #     invoice_data = fetch_filtered_data(invoice_api_url)
        #     df_invoice = pd.DataFrame(invoice_data)
        # except Exception as api_error:
        #     logger.error(f"Error fetching invoice data from API: {str(api_error)}", exc_info=True)
        #     return invoices_with_missing_data, pd.DataFrame()

        return invoices_with_missing_data

    except Exception as e:
        logger.error(f"Error identifying missing data: {str(e)}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return [], pd.DataFrame()  # Return empty results in case of an error

# Example usage
# df_filtered = filter_invoices(df_original)
invoices_with_missing_data = identify_missing_data(df_original, accis_base_url, fetch_filtered_data)

#Step 2: Fetch Correct Data with Retry Logic
def get_invoice_data_with_retries(bc_invoice_numbers, accis_data_mapping, max_retries=2, delay=5):
    try:
        # Flatten the list if it contains nested lists
        flat_invoice_numbers = [inv for sublist in bc_invoice_numbers for inv in (sublist if isinstance(sublist, list) else [sublist])]

        invoice_data = {}
        still_missing = set(flat_invoice_numbers)  # Ensure all values are hashable strings

        for attempt in range(1, max_retries + 1):
            newly_found = []
            for inv in list(still_missing):
                data = accis_data_mapping.get(inv, {})
                if data:
                    invoice_data[inv] = data
                    still_missing.remove(inv)
                    newly_found.append(inv)

            # if newly_found:
                # logging.info(f"Invoices found in ACCIS: {len(newly_found)}")

            if not still_missing:
                break

            if attempt < max_retries:
                logging.warning(f"Invoices still missing after attempt {attempt}: {len(still_missing)} - {list(still_missing)}")
                time.sleep(delay)

        if still_missing:
            logging.error(f"Invoices not found in ACCIS after {max_retries} attempts: {len(still_missing)} - {list(still_missing)}")

        return invoice_data

    except Exception as e:
        logging.exception(f"Error in get_invoice_data_with_retries: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
 
        return {}  # Return empty dict if there's an error

invoice_data_to_fill = get_invoice_data_with_retries(invoices_with_missing_data, accis_data_mapping)

# Log summary
logging.info(f"Missing data: {len(invoices_with_missing_data)}, Retrieved data for {len(invoice_data_to_fill)} invoices.")


# Step 3: Update Missing Data
logs = []  # Store logs for better readability

success_invoices = set()
failed_invoices = set()
invoice_line_status = {}  # Track line update result per invoice

for index, row in df_original.iterrows():

    try:
        document_no = row["Document_No"]
        line_no = row["Line_No"]
    except Exception as e:
        logging.error(f"Unexpected error in data processing: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        failed_invoices.add(document_no)
        continue

    try:
        correct_data = invoice_data_to_fill.get(document_no, {})
        if not correct_data:
            logging.info(f"No correct data found for Invoice: {document_no}, skipping.")
            continue
    except Exception as e:
        logging.error(f"Error fetching correct data for Invoice {document_no}: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
 
        failed_invoices.add(document_no)
        continue

    update_payload = {}
    update_fields = ["Shortcut_Dimension_2_Code", "ShortcutDimCode6", "ShortcutDimCode7"]

    try:
        for key in update_fields:
            if key in row and (pd.isna(row[key]) or row[key] == ""):
                if key in correct_data:
                    update_payload[key] = correct_data[key]
    except Exception as e:
        logging.error(f"Error processing update fields for Invoice {document_no}, Line {line_no}: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
 
        failed_invoices.add(document_no)
        continue

    if not update_payload:
        logging.info(f"No update needed for Line {line_no} (Invoice: {document_no}).")
        continue

    try:
        etag = row["@odata.etag"]
        document_type = row["Document_Type"]
        update_base_url = f"https://api.businesscentral.dynamics.com/v2.0/{tenant_id}/{environment}/ODataV4/Company('{company}')"
        update_url = f"{update_base_url}/SalesInvLines(Document_Type='{document_type}',Document_No='{document_no}',Line_No={line_no})"
    except Exception as e:
        logging.error(f"Unexpected error while constructing API request: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
    
        failed_invoices.add(document_no)
        continue

    patch_headers = {
        "Authorization": f"Bearer {bc_access_token}",
        "If-Match": etag,
        "Content-Type": "application/json"
    }

    for attempt in range(2):  # max 2 attempts
        try:
            response = requests.patch(update_url, json=update_payload, headers=patch_headers)
            if response.status_code in [200, 204]:
                invoice_line_status.setdefault(document_no, []).append(True)
                break  # success, exit retry loop
            else:
                logging.error(f"Failed to update Line {line_no} (Invoice: {document_no}) | Status: {response.status_code} | Response: {response.text}")
                invoice_line_status.setdefault(document_no, []).append(False)
                break
        except requests.RequestException as e:
            logging.warning(f"Attempt {attempt+1} failed for Line {line_no} (Invoice: {document_no}): {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
 
            if attempt == 1:
                invoice_line_status.setdefault(document_no, []).append(False)

# After processing all lines, summarize by invoice
for invoice, results in invoice_line_status.items():
    if all(results):
        success_invoices.add(invoice)
    else:
        failed_invoices.add(invoice)

# Final summary
logging.info(f"BC Invoices Updated dimension Successfully: {len(success_invoices)} | Failed Invoices: {len(failed_invoices)}")
logging.info(f"BC Success dim Invoices: {sorted(success_invoices)}")
logging.info(f"BC Failed dim Invoices: {sorted(failed_invoices)}")
import requests
import pandas as pd
from datetime import datetime, timedelta
import logging
import time
import math
import pymsteams
import re
import pymsteams
import traceback
import inspect
import sys


logger = logging.getLogger()
logger.setLevel(logging.INFO)

# Clear existing handlers to prevent duplication
if logger.hasHandlers():
    logger.handlers.clear()

# Create handlers
console_handler = logging.StreamHandler()
file_handler = logging.FileHandler('Finance_Posting_Automation.log', encoding='utf-8')

formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
console_handler.setFormatter(formatter)
file_handler.setFormatter(formatter)

# Add handlers
logger.addHandler(console_handler)
logger.addHandler(file_handler)



# **Step 1: Authentication for BC**
tenant_id = "95f69e7d-1811-4ab9-9b5a-eba95d3eba9b"
client_id = "6a77dbc5-5110-4b3f-85cf-692520941150"
client_secret = "mzyHgqNa=I[:D3Ygq_OtuVhRKlcvb776"
# environment = "CEDEV_DONOTDELETE" #Production
environment = 'Production'
# company = "DEV-121219" #PROD-NAIER
company = "PROD-NAIER"

# accis_base_url = 'https://scicdev.crm.dynamics.com'
accis_base_url = 'https://scic.crm.dynamics.com'
accis_client_id = '6e5989aa-4fd1-4015-8648-8f75609d607b'
accis_client_secret = 'mUv8Q~1u3JtjdAdw.ov1sRfjzTm3yS.f~cC6wcQS'
accis_tenant_id = '95f69e7d-1811-4ab9-9b5a-eba95d3eba9b'

TEAMS_WEBHOOK_URL = "https://qubespacein.webhook.office.com/webhookb2/5d9de417-54b6-47f4-aee3-e10693e7d804@d996ac79-e80a-4fc8-be1a-8f891b569988/IncomingWebhook/c1361199482347dbaee878fbf5891a34/b187de49-aa7e-4d44-8967-d6c7f25ae53e/V26k0hgLbavRevpPNilrdVaK6-0Ldnlvzg7_UVKQam-BY1"  # ← Replace with your actual webhook URL

yesterday_date = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d")


def notify_teams_on_exception(teams_webhook_url, exception_obj):
    try:
        # Get the calling function name automatically
        caller = inspect.stack()[1]
        function_name = caller.function
 
        # Build Teams message
        teams_message = pymsteams.connectorcard(teams_webhook_url)
        teams_message.title("Python Error Notification for Posting Date - {yesterday_date}")
        teams_message.summary("Error occurred in script")
 
        section = pymsteams.cardsection()
        section.activityTitle("Error Details")
        section.addFact("Function", function_name)
        section.addFact("Error Type", type(exception_obj).__name__)
        section.addFact("Message", str(exception_obj))
        section.text(f"```\n{traceback.format_exc(limit=2)}\n```")
 
        teams_message.addSection(section)
        teams_message.send()
    except Exception as teams_error:
        logging.error(f"Failed to send error to Teams: {teams_error}")

 
def get_access_token(tenant_id, client_id, client_secret, max_retries=3, delay=5):
    """
    Fetch access token with retry mechanism and detailed error handling.
    """
    token_url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token"
    data = {
        "grant_type": "client_credentials",
        "client_id": client_id,
        "client_secret": client_secret,
        "scope": "https://api.businesscentral.dynamics.com/.default"
    }
 
    access_token = None  # Default value if fails
 
    for attempt in range(1, max_retries + 1):
        try:
            response = requests.post(token_url, data=data, timeout=10)
 
            if response.status_code != 200:
                logging.error(f"Non-200 status code received: {response.status_code} - {response.reason}")
                logging.error(f"Response content: {response.text}")
 
            response.raise_for_status()
 
            try:
                response_json = response.json()
                access_token = response_json.get("access_token")
                if not access_token:
                    logging.error(f"Access token not found in response JSON: {response_json}")
                else:
                    logging.info("Authentication Successful! Access token retrieved.")
                    return access_token
 
            except Exception as e:  # catches JSON errors and others
                logging.error(f"Error parsing token response: {e} | Response: {response.text}", exc_info=True)
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
 
        except Exception as e:  # catches requests.exceptions.RequestException and others
            logging.error(f"Error during token fetch: {e}", exc_info=True)
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
 
        if attempt < max_retries:
            logging.warning(f"Attempt {attempt} failed. Retrying in {delay} seconds...")
            time.sleep(delay)
        else:
            logging.critical("Maximum retry attempts reached. Failed to fetch access token.")
 
    return None
 
# Step 1: Get access token
access_token = get_access_token(tenant_id, client_id, client_secret)

if not access_token:
    logging.critical("Failed to authenticate after retries.")
    sys.exit()

# **Step 2: Define API Endpoints**
bc_base_url = f"https://api.businesscentral.dynamics.com/v2.0/{tenant_id}/{environment}/ODataV4"

# sales_invoice_url = f"{bc_base_url}/Company('{company}')/SalesInvs?$top=100"

# filter_values = ["ORD-02065-Q0T7F6", "ORD-02074-Y1B8L9", "ORD-13913-B0C8C8"]
# filter_query = " or ".join([f"External_Document_No eq '{value}'" for value in filter_values])
# sales_invoice_url = f"{bc_base_url}/Company('{company}')/SalesInvs?$filter={filter_query}"


# # filter_date = "2024-10-07"  #YY-MM-DD
# # filter_date = "2022-09-14"
# filter_date = "2020-05-26"
# # filter_date = "2023-03-29"
# # filter_date = "2020-05-27"
# # filter_date = "2020-02-10"
# # filter_date = "2024-06-20"
# # filter_date = "2020-05-26"
# # filter_date = "2020-12-21"
# # filter_date = "2020-05-14"  #without external doc no
# date_filter = f"Posting_Date eq {filter_date}T00:00:00Z"
# sales_invoice_url = f"{bc_base_url}/Company('{company}')/SalesInvs?$filter={date_filter}"


# Get yesterday's date in YYYY-MM-DD format
yesterday_date = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d")
date_filter = f"Posting_Date eq {yesterday_date}T00:00:00Z"
sales_invoice_url = f"{bc_base_url}/Company('{company}')/SalesInvs?$filter={date_filter}"

headers = {"Authorization": f"Bearer {access_token}", "Accept": "application/json"}

def fetch_data(url, headers):
    all_data = []
    next_page_url = url
    try:
        while next_page_url:
            response = requests.get(next_page_url, headers=headers)
            response.raise_for_status()  # Raise an error for bad responses (4xx, 5xx)
            data = response.json()
            all_data.extend(data.get("value", []))
            next_page_url = data.get("@odata.nextLink", None)  # Check if pagination exists
        return pd.DataFrame(all_data) if all_data else pd.DataFrame()  # Always return DataFrame
    except requests.exceptions.RequestException as e:
        logging.error(f"Error Fetching Data: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return pd.DataFrame()  # Return an empty DataFrame instead of None

# ✅ Fetch Sales Invoices safely
sales_invoices_df = fetch_data(sales_invoice_url, headers)

if sales_invoices_df is not None and not sales_invoices_df.empty:
    original_sales_invoices_df = sales_invoices_df.copy()
else:
    logging.warning("Sales invoices data is empty or could not be fetched.")

try:
    if sales_invoices_df.empty:
        logging.warning(f"No sales invoices found for the date: {yesterday_date}, So exit the code!")
        # print('No sales invoices found for the date')
        sys.exit()
    else:
        logging.info(f"Retrieved {len(sales_invoices_df)} sales invoices for the date: {yesterday_date}")
        # print("Retrieved", len(sales_invoices_df))
        # Process Sales Invoices
        sales_invoices_df.rename(columns={"No": "Document_No"}, inplace=True)
        filtered_sales_invoices_df = sales_invoices_df[["Document_No", "Sell_to_Customer_No", "Sell_to_Customer_Name", "External_Document_No", "Posting_Date"]]
        sales_invoices_df = filtered_sales_invoices_df.copy()

        sales_invoices_df['External_Document_No'] = sales_invoices_df['External_Document_No'].astype(str).str.strip()
        sales_invoices_df['External_Document_No'] = sales_invoices_df['External_Document_No'].replace('', pd.NA)

        missing_ext_doc_df = sales_invoices_df[sales_invoices_df['External_Document_No'].isna()].copy()
        
        sales_invoices_df = sales_invoices_df[sales_invoices_df['External_Document_No'].notna()].copy()

except Exception as e:
    logging.error(f"An error occurred while processing sales invoices: {e}", exc_info=True)
    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

# **Step 3: Fetch Data from ACCIS**
def get_accis_token(accis_tenant_id, accis_client_id, accis_client_secret, bc_base_url):
    token_url = f'https://login.microsoftonline.com/{accis_tenant_id}/oauth2/token'
    payload = {
        'grant_type': 'client_credentials',
        'client_id': accis_client_id,
        'client_secret': accis_client_secret,
        'resource': accis_base_url,
    }
    try:
        response = requests.post(token_url, data=payload)
        response.raise_for_status()
        return response.json().get('access_token')
    
    except requests.exceptions.RequestException as e:
        logging.error(f"Failed to retrieve ACCIS token: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return None
    

def fetch_accis_data_by_document_no(missing_ext_doc_df, accis_token, batch_size=20):
    """Fetch data from ACCIS based on Document_No (trit_bcinvoicenumber) in batches."""
    try:
        if missing_ext_doc_df.empty:
            # logging.info("No missing invoices external document number to check in ACCIS.")
            return pd.DataFrame()

        if not accis_token:
            raise ValueError("ACCIS token is missing in fetch_accis_data_by_document_no function!")

        headers = {
            "Authorization": f"Bearer {accis_token}",
            "Content-Type": "application/json",
            "Accept": "application/json",
        }

        # Convert Document_No column to a list (remove NaN values)
        document_numbers = missing_ext_doc_df["Document_No"].dropna().tolist()

        # Split the document numbers into smaller batches
        total_batches = math.ceil(len(document_numbers) / batch_size)
        all_results = []

        for i in range(total_batches):
            batch = document_numbers[i * batch_size:(i + 1) * batch_size]
            accis_filter_query = " or ".join([f"trit_bcinvoicenumber eq '{doc_no}'" for doc_no in batch])
            # accis_api_url = f"https://scicdev.crm.dynamics.com/api/data/v9.1/invoices?$filter={accis_filter_query}"
            accis_api_url = f"https://scic.crm.dynamics.com/api/data/v9.1/invoices?$filter={accis_filter_query}"
            
            # logging.info(f"Fetching batch {i+1}/{total_batches}, URL Length: {len(accis_api_url)}")
            
            try:
                response = requests.get(accis_api_url, headers=headers)
                response.raise_for_status()
                batch_data = response.json().get("value", [])
                
                if batch_data:
                    all_results.extend(batch_data)
            except requests.exceptions.HTTPError as e:
                logging.error(f"HTTP error in batch {i+1}: {e}", exc_info=True)
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
            
            except Exception as e:
                logging.error(f"Error in batch {i+1}: {e}", exc_info=True)
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                
        # Convert all results to DataFrame
        invoice_no_accis_df = pd.DataFrame(all_results)

        if invoice_no_accis_df.empty:
            logging.warning("No matching Document_No found in ACCIS.")
        else:
            invoice_no_accis_df.rename(columns={"trit_bcinvoicenumber": "Document_No"}, inplace=True)

        return invoice_no_accis_df

    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        
    return pd.DataFrame()

try:
    accis_token = get_accis_token(accis_tenant_id, accis_client_id, accis_client_secret, bc_base_url)
    accis_doc_df = fetch_accis_data_by_document_no(missing_ext_doc_df, accis_token, batch_size=20)
except Exception as e:
    logging.error(f"Error during ACCIS data fetch: {e}", exc_info=True)
    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

try:
    if not missing_ext_doc_df.empty:
        try:
            if accis_doc_df.empty:
                logging.warning("ACCIS data is empty. Marking all missing invoices as duplicates.")

                # ✅ Mark all invoices as duplicates since none were found in ACCIS
                missing_ext_doc_df["Duplicate_Flag"] = True
                duplicate_invoices = missing_ext_doc_df.copy()
                valid_missing_invoices = pd.DataFrame()  # No valid invoices
                duplicate_rows = duplicate_invoices
            else:
                try:
                    # ✅ Check if invoices exist in ACCIS
                    missing_ext_doc_df["Duplicate_Flag"] = missing_ext_doc_df["Document_No"].isin(accis_doc_df["Document_No"])

                    # ✅ Separate valid invoices & duplicates
                    valid_missing_invoices = missing_ext_doc_df[missing_ext_doc_df["Duplicate_Flag"]].copy()
                    duplicate_invoices = missing_ext_doc_df[~missing_ext_doc_df["Duplicate_Flag"]].copy()
                    duplicate_rows = duplicate_invoices
                
                except KeyError as e:
                    logging.error("Missing expected column in DataFrame: %s", str(e), exc_info=True)
                    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                    valid_missing_invoices = pd.DataFrame()
                    duplicate_invoices = pd.DataFrame()
                    duplicate_rows = pd.DataFrame()
                    
                except Exception as e:
                    logging.error("Unexpected error while processing missing invoices: %s", str(e), exc_info=True)
                    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                    valid_missing_invoices = pd.DataFrame()
                    duplicate_invoices = pd.DataFrame()
                    duplicate_rows = pd.DataFrame()

        except Exception as e:
            logging.error("Error while handling missing external document invoices: %s", str(e), exc_info=True)
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
            duplicate_rows = pd.DataFrame()
            valid_missing_invoices = pd.DataFrame()

        # ✅ Print counts (if valid data exists)
        logging.info(f"Non-duplicate invoices count without External Document Number: {len(valid_missing_invoices)}")
        logging.info(f"Duplicate invoices count without External Document Number: {len(duplicate_invoices)}")

    else:
        logging.info("No missing external document invoices found. Skipping processing.")

except Exception as e:
    logging.critical("Critical error in processing missing external document invoices: %s", str(e), exc_info=True)
    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

def fetch_accis_data(duplicate_rows, headers, batch_size=20):
    """Fetch ACCIS data in batches to avoid URL length issues."""
    try:
        # Convert External_Document_No column to a list (remove NaN values)
        filter_invoices = duplicate_rows["External_Document_No"].dropna().tolist()

        if not filter_invoices:
            logging.info("No External_Document_No values found.")
            return pd.DataFrame()

        total_batches = math.ceil(len(filter_invoices) / batch_size)
        all_results = []

        for i in range(total_batches):
            batch = filter_invoices[i * batch_size:(i + 1) * batch_size]
            accis_filter_query = " or ".join([f"trit_ordernumbercalc eq '{value}'" for value in batch])
            # accis_api_url = f"https://scicdev.crm.dynamics.com/api/data/v9.1/invoices?$filter={accis_filter_query}"
            accis_api_url = f"https://scic.crm.dynamics.com/api/data/v9.1/invoices?$filter={accis_filter_query}"
            
            # logging.info(f"Fetching batch {i+1}/{total_batches}, URL Length: {len(accis_api_url)}")
            
            try:
                response = requests.get(accis_api_url, headers=headers)
                response.raise_for_status()
                batch_data = response.json().get("value", [])

                if batch_data:
                    all_results.extend(batch_data)
                                
            except Exception as e:
                logging.error(f"Error in batch {i+1}: {e}", exc_info=True)
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

                
        # Convert all results to DataFrame
        accis_df = pd.DataFrame(all_results)

        if accis_df.empty:
            logging.warning("No matching External_Document_No found in ACCIS.")
        else:
            accis_df = accis_df[["trit_ordernumbercalc", "trit_bcinvoicenumber", "totalamount_base"]]
            accis_df.rename(columns={
                "trit_ordernumbercalc": "External_Document_No",
                "trit_bcinvoicenumber": "Document_No",
            }, inplace=True)

        return accis_df

    except Exception as err:
        logging.error(f"An unexpected error occurred: {err}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, err)

    return pd.DataFrame()

def process_duplicates(sales_invoices_df, missing_ext_doc_df, accis_df):
    try:
        # ✅ Step 1: Prepare DataFrames
        for df in [sales_invoices_df, missing_ext_doc_df]:
            df['External_Document_No'] = df['External_Document_No'].astype(str).str.strip()
            df['External_Document_No'] = df['External_Document_No'].replace('', pd.NA)

        # ✅ Step 2: Identify duplicates in both DataFrames
        def identify_duplicates(df):
            return df[
                df['External_Document_No'].notna() & 
                df.duplicated(subset=['External_Document_No'], keep=False)
            ].copy()

        sales_invoice_dup = identify_duplicates(sales_invoices_df)
        missing_ext_doc_no_dup = identify_duplicates(missing_ext_doc_df)

        # ✅ Step 3: Identify non-duplicates
        sales_non_dup = sales_invoices_df[~sales_invoices_df['External_Document_No'].isin(sales_invoice_dup['External_Document_No'])].copy()
        missing_non_dup = missing_ext_doc_df[~missing_ext_doc_df['External_Document_No'].isin(missing_ext_doc_no_dup['External_Document_No'])].copy()

        # ✅ Step 4: Merge both datasets
        final_duplicate_rows = pd.concat([sales_invoice_dup, missing_ext_doc_no_dup], ignore_index=True)
        final_non_duplicate_rows = pd.concat([sales_non_dup, missing_non_dup], ignore_index=True)

        return final_duplicate_rows, final_non_duplicate_rows

    except Exception as e:
        logging.error(f"Error processing duplicates: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return pd.DataFrame(), pd.DataFrame()


# Execute the updated process
duplicate_rows, non_duplicate_rows = process_duplicates(sales_invoices_df, missing_ext_doc_df, accis_doc_df)

token = get_accis_token(accis_tenant_id, accis_client_id, accis_client_secret, accis_base_url)
if token:
    headers = {'Authorization': f'Bearer {token}', 'OData-MaxVersion': '4.0', 'OData-Version': '4.0'}

    # Now duplicate_rows exists, so we can pass it to fetch_accis_data
    accis_df = fetch_accis_data(duplicate_rows, headers)

    if accis_doc_df is not None and not accis_doc_df.empty:
        accis_doc_df.rename(columns={
            "trit_ordernumbercalc": "External_Document_No",
            "trit_bcinvoicenumber": "Document_No",
        }, inplace=True)

    if accis_df is None:
        accis_df = pd.DataFrame()
    if accis_doc_df is None:
        accis_doc_df = pd.DataFrame()
    

    # ✅ Merge accis_df and accis_doc_df into one DataFrame
    if not accis_doc_df.empty:
        accis_df = pd.concat([accis_df, accis_doc_df], ignore_index=True)

    # Process duplicates again with the updated accis_df
    duplicate_rows, non_duplicate_rows = process_duplicates(sales_invoices_df, missing_ext_doc_df, accis_df)

else:
    logging.error("Failed to retrieve token, aborting execution.")

# logging.info(f"Duplicate {len(duplicate_rows)}")
# logging.info(f"Non-Duplicate {len(non_duplicate_rows)}")


try:
    if not duplicate_rows.empty:
        logging.info(f"Found {duplicate_rows.shape[0]} duplicate rows.")

        # ✅ Handle exact duplicates from accis_df
        if not accis_df.empty:
            try:
                exact_duplicates = accis_df[accis_df.duplicated(subset=["External_Document_No"], keep=False)]
            except Exception as e:
                logging.error("Error finding exact duplicates: %s", str(e))
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                exact_duplicates = pd.DataFrame()
        else:
            logging.warning("ACCIS data is empty. No exact duplicates to compare.")
            exact_duplicates = pd.DataFrame()

        # ✅ Identify non-exact duplicates
        exact_duplicate_docs = set(exact_duplicates["External_Document_No"]) if not exact_duplicates.empty else set()
        non_exact_duplicates = duplicate_rows[~duplicate_rows["External_Document_No"].isin(exact_duplicate_docs)]

        # ✅ Log exact duplicates if any
        if not exact_duplicates.empty:
            unique_exact_duplicates = exact_duplicates["External_Document_No"].dropna().unique().tolist()
            logging.info(f"Found {len(unique_exact_duplicates)} fully identical duplicate invoices in ACCIS.")

        # ✅ Proceed with handling non-exact duplicates
        if not non_exact_duplicates.empty:
            logging.info(f"Proceeding with normal duplicate removal for {non_exact_duplicates.shape[0]} invoices.")

            try:
                duplicate_docs = set(duplicate_rows["External_Document_No"])
                accis_docs = set(accis_df["Document_No"]) if not accis_df.empty else set()

                # Non-duplicate rows from original sales data
                non_duplicate_rows = sales_invoices_df[~sales_invoices_df["External_Document_No"].isin(duplicate_docs)]

                # Valid duplicates that exist in ACCIS
                if "Document_No" in duplicate_rows.columns:
                    validated_duplicates = duplicate_rows[duplicate_rows["Document_No"].isin(accis_docs)]
                else:
                    validated_duplicates = pd.DataFrame()
                    logging.warning("Missing 'Document_No' column in duplicate_rows.")

            except Exception as e:
                logging.error("Error filtering duplicate rows: %s", str(e))
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                non_duplicate_rows = sales_invoices_df.copy()
                validated_duplicates = pd.DataFrame()

            # ✅ Proceed to remove invalid duplicates
            if "Document_No" in duplicate_rows.columns:
                removed_duplicates = duplicate_rows[~duplicate_rows["Document_No"].isin(accis_docs)]
                if not exact_duplicates.empty:
                    removed_duplicates = removed_duplicates[
                        ~removed_duplicates["External_Document_No"].isin(exact_duplicates["External_Document_No"])
                    ]
            else:
                removed_duplicates = pd.DataFrame()
                logging.warning("Missing 'Document_No' column while identifying removed duplicates.")

            # ✅ Final dataset
            new_data = pd.concat([non_duplicate_rows, validated_duplicates], ignore_index=True)

            logging.info(f"Removed duplicates: {removed_duplicates.shape[0]} rows.")
            logging.info(f"Cleaned Data Size: {new_data.shape[0]}")

            # ✅ Deletion logic via API
            entity_name = "SalesInvs"
            delete_base_url = f"https://api.businesscentral.dynamics.com/v2.0/{tenant_id}/{environment}/ODataV4"
            headers = {"Authorization": f"Bearer {access_token}", "Accept": "application/json"}

            # for doc_no in removed_duplicates["Document_No"].dropna().unique():
            #     delete_url = f"{delete_base_url}/Company('{company}')/{entity_name}(Document_Type='Invoice',No='{doc_no}')"
            #     try:
            #         response = requests.delete(delete_url, headers=headers)
            #         if response.status_code == 204:
            #             logging.info(f"Successfully deleted invoice {doc_no}.")
            #         else:
            #             logging.error(f"Failed to delete invoice {doc_no}: {response.status_code} - {response.text}")
            #     except Exception as e:
            #         logging.error("Error deleting invoice %s: %s", doc_no, str(e))

        else:
            logging.info("No non-exact duplicates to remove. Keeping all data except exact duplicates.")
            new_data = non_duplicate_rows if 'non_duplicate_rows' in locals() else sales_invoices_df

    else:
        logging.info("No duplicate rows found. Keeping all original data.")
        new_data = sales_invoices_df

except Exception as e:
    logging.error("Unexpected error in duplicate handling process: %s", str(e))
    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
    new_data = sales_invoices_df



# ----------------------------------------------------------------

def get_token(accis_client_id, accis_client_secret, accis_tenant_id, accis_base_url, max_retries=3, retry_delay=5):
    token_url = f'https://login.microsoftonline.com/{accis_tenant_id}/oauth2/token'

    payload = {
        'grant_type': 'client_credentials',
        'client_id': accis_client_id,
        'client_secret':accis_client_secret,
        'resource': accis_base_url,
    }

    for attempt in range(1, max_retries + 1):
        try:
            response = requests.post(token_url, data=payload, timeout=10)
            response.raise_for_status()  # Raise an HTTPError for bad responses (4xx, 5xx)

            token = response.json().get('access_token')
            if not token:
                logger.error(f"Access token missing in response: {response.text}")
                return None

            # logger.info("Access token retrieved successfully.")
            return token

        except requests.exceptions.RequestException as req_err:
            logger.error(f"Request error on attempt {attempt}: {req_err}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, req_err)

        except ValueError as json_err:
            logger.error(f"JSON decoding error on attempt {attempt}: {json_err}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, json_err)
            
        except Exception as e:
            logger.critical(f"Unexpected error on attempt {attempt}: {e}", exc_info=True)
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
            
        if attempt < max_retries:
            logger.info(f"Retrying in {retry_delay} seconds...")
            time.sleep(retry_delay)

    logger.critical("Failed to retrieve access token after multiple attempts.")
    return None

token = get_token(accis_client_id, accis_client_secret, accis_tenant_id, accis_base_url)

if token:
    headers = {
        'Authorization': f'Bearer {token}',
        'Prefer': 'odata.maxpagesize=5000',
        'OData-MaxVersion': '4.0',
        'OData-Version': '4.0'
    }
else:
    logger.critical("Token retrieval failed. API calls cannot proceed.")
    
def fetch_filtered_data(api_url, max_retries=3, delay=5):
    """Fetch data from API with pagination and retry mechanism, with auto-generated headers."""
    all_data = []
    next_link = api_url  # Initial API URL

    access_token = get_token(accis_client_id, accis_client_secret, accis_tenant_id, accis_base_url)
    if not access_token:
        logger.error("Failed to retrieve access token. Exiting API call.")
        return []

    headers = {
        'Authorization': f'Bearer {token}',
        'Prefer': 'odata.maxpagesize=5000',
        'OData-MaxVersion': '4.0',
        'OData-Version': '4.0'
    }

    # Debugging Fix: Convert `max_retries` to an integer if needed
    if not isinstance(max_retries, int):
        # logger.error(f"max_retries should be an integer, but got {type(max_retries)} instead.")
        max_retries = 3  # Default fallback

    # print("Max Retries Type (Fixed):", type(max_retries))  # Debugging

    for attempt in range(max_retries):  # Now max_retries is an integer 
        try:
            while next_link:
                response = requests.get(next_link, headers=headers)
                response.raise_for_status()  # Raises error for bad responses
                
                data = response.json()
                # print("API Response:", data)  # Debugging
                
                value_data = data.get("value", [])
                # print("Value Data Type:", type(value_data))  # Debugging

                if not isinstance(value_data, list):  # Check if it's a list
                    logger.error("Unexpected format: 'value' is not a list")
                    return []

                all_data.extend(value_data)  # Append fetched data
                
                next_link = data.get('@odata.nextLink', None)

            return all_data 

        except requests.exceptions.RequestException as e:
            logger.error(f"Attempt {attempt + 1}: Request error - {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
            if attempt < max_retries - 1:
                logger.info(f"Retrying in {delay} seconds...")
                time.sleep(delay)
            else:
                logger.error("Max retries reached. Returning partial data.")

    return all_data  

# invoice_numbers = [
#     "S-INV107434", "S-INV107435", "S-INV107436", "S-INV107437", "S-INV107438",
#     "S-INV107188", "S-INV107189", "S-INV107190", "S-INV107191", "S-INV107192",
#     "S-INV107193", "S-INV107194","S-INV107159"
# ]


def fetch_filtered_invoices(invoice_numbers, accis_base_url, fetch_filtered_data, batch_size=20):

    try:
        all_invoices = []

        for i in range(0, len(invoice_numbers), batch_size):
            batch = invoice_numbers[i:i+batch_size]
            invoice_filter = " or ".join([f"trit_bcinvoicenumber eq '{inv}'" for inv in batch])
            filter_query = f"({invoice_filter})"
            invoice_api_url = f"{accis_base_url}/api/data/v9.1/invoices?$filter={filter_query}"

            try:
                invoice_data = fetch_filtered_data(invoice_api_url)
                if invoice_data:
                    all_invoices.extend(invoice_data)
            except Exception as e:
                logging.error(f"Error fetching batch {i}-{i+batch_size}: {e}")
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                
        df_invoices_raw = pd.DataFrame(all_invoices)
        logging.info(f"Total Invoices fetched from ACCIS : {len(df_invoices_raw)}")

        if df_invoices_raw.empty:
            logging.warning("No invoices found.")
            return pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

        df_invoices_cleaned = df_invoices_raw[
            df_invoices_raw['trit_bcinvoicenumber'].notna() & 
            (df_invoices_raw['trit_bcinvoicenumber'] != "")
        ]

        # Extract invoices that are filtered out due to 'dues|membership|subscription'
        df_filtered_out_dues_membership = df_invoices_cleaned[
            df_invoices_cleaned["name"].str.contains("dues|membership|subscription", case=False, na=False)
        ]

        # Remove them from cleaned set
        df_invoices_cleaned = df_invoices_cleaned[
            ~df_invoices_cleaned["name"].str.contains("dues|membership|subscription", case=False, na=False)
        ]

        # Cancellation invoices
        df_cancellation_invoices = df_invoices_cleaned[
            df_invoices_cleaned["name"].str.contains("cancellation", case=False, na=False)
        ]

        # Non-cancellation invoices
        df_without_cancellation_invoices = df_invoices_cleaned[
            ~df_invoices_cleaned["name"].str.contains("cancellation", case=False, na=False)
        ]

        return (
            df_invoices_cleaned,
            df_cancellation_invoices,
            df_without_cancellation_invoices,
            df_filtered_out_dues_membership
        )

    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()


# Ensure invoice_numbers is not empty before calling the function
if not new_data.empty and "Document_No" in new_data.columns:
    invoice_numbers = new_data["Document_No"].dropna().tolist()
else:
    logging.error("Error: No valid invoice numbers found in new_data.")
    invoice_numbers = []

if invoice_numbers:
    (
        df_invoices,
        df_cancellation_invoices,
        df_without_cancellation_invoices,
        df_filtered_out_dues_membership
    ) = fetch_filtered_invoices(invoice_numbers, accis_base_url, fetch_filtered_data, batch_size=10)

    logging.info(
        f"Total accis invoices after name filter: {len(df_invoices)},\n "
        f"Cancellation in accis: {len(df_cancellation_invoices)},\n "
        f"Non-cancellation in accis: {len(df_without_cancellation_invoices)},\n "
        f"Filtered out in invoice table (dues/membership/subscription): {len(df_filtered_out_dues_membership)}"
    )


def fetch_price_list_data(df_invoices, accis_base_url, fetch_filtered_data, batch_size=10):
    try:
        #logging.info("🔄 Starting fetch_price_list_data function...")

        if df_invoices.empty:
            logging.warning("No invoices available to fetch price list data.")
            return df_invoices, pd.DataFrame()

        # Extract unique pricelevelid values
        unique_pricelevel_ids = df_invoices["_pricelevelid_value"].dropna().unique()
       # logging.info(f"📦 Found {len(unique_pricelevel_ids)} unique '_pricelevelid_value' entries.")

        if len(unique_pricelevel_ids) == 0:
            logging.warning("No valid 'pricelevelid' found in invoices.")
            return df_invoices, pd.DataFrame()

        all_price_list_data = []

        for i in range(0, len(unique_pricelevel_ids), batch_size):
            batch_ids = unique_pricelevel_ids[i: i + batch_size]
            pricelevel_filter_query = " or ".join([f"pricelevelid eq {id}" for id in batch_ids])
            price_list_api_url = f"{accis_base_url}/api/data/v9.1/pricelevels?$filter=({pricelevel_filter_query})"
            
            #logging.info(f"🔗 Fetching price list batch {i // batch_size + 1}")

            # Fetch Price List data for the batch
            price_list_data = fetch_filtered_data(price_list_api_url)

            if price_list_data:
                df_batch = pd.DataFrame(price_list_data)
                all_price_list_data.append(df_batch)
                #logging.info(f"✅ Batch {i // batch_size + 1} fetched: {len(df_batch)} records.")
            else:
                logging.warning(f"No data returned for batch {i // batch_size + 1}")

        if all_price_list_data:
            df_price_list = pd.concat(all_price_list_data, ignore_index=True)
            #logging.info(f"✅ Total price list entries fetched: {len(df_price_list)}")
        else:
            df_price_list = pd.DataFrame()
            logging.warning("No price list data collected from any batch.")

        return df_invoices, df_price_list

    except Exception as e:
        logging.error(f"Error in fetch_price_list_data: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return df_invoices, pd.DataFrame()

df_invoices, df_price_list = fetch_price_list_data(df_invoices, accis_base_url, fetch_filtered_data)


def merge_and_filter_invoices(df_invoices, df_price_list): 
    try:
        if df_invoices.empty:
            logging.warning("Invoices DataFrame is empty. Returning unfiltered invoices.")
            return df_invoices, pd.DataFrame()  # ✅ Only return df_invoices and removed invoices

        # Merge with price list
        df_merged = df_invoices.merge(
            df_price_list,
            left_on="_pricelevelid_value",
            right_on="pricelevelid",
            how="left",
            suffixes=("_inv", "_price")
        )

        # Ensure 'name_price' column exists before filtering
        if "name_price" in df_merged.columns:
            before_count = len(df_merged)

            # Filter condition
            filter_condition = df_merged["name_price"].str.contains("dues|membership|subscription", case=False, na=False)

            # ✅ Store removed invoices separately
            # pricelist_removed_invoices = df_merged[filter_condition].copy()

            # ✅ Apply filtering
            df_invoices_filtered = df_merged[~filter_condition]

            removed_count = before_count - len(df_invoices_filtered)

            # Log removed invoices
            logging.info(f"Invoices removed due to filtering by name (dues, membership, subscription): {removed_count}")

        else:
            logging.warning("Column 'name_price' not found in price list. No filtering applied.")

        # Print final counts
        logging.info(f"Final invoices after filtering: {len(df_invoices_filtered)}")

        return df_invoices_filtered.reset_index(drop=True)

    except Exception as e:
        logging.error(f"Error in merge_and_filter_invoices: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return df_invoices, pd.DataFrame()  # ✅ Always return two DataFrames


df_invoices = merge_and_filter_invoices(df_invoices, df_price_list)


def fetch_invoice_lines(df_invoice, accis_base_url, fetch_filtered_data, batch_size=10):
    all_invoice_lines = []
    required_columns = ["_invoiceid_value", "_salesorderdetailid_value", "baseamount", "invoicedetailid"]

    try:
        #logging.info("Starting fetch_invoice_lines function.")

        if df_invoice.empty:
            logging.warning("Invoice DataFrame is empty. Cannot fetch invoice lines.")
            return pd.DataFrame(columns=required_columns)

        invoice_ids = df_invoice["invoiceid"].dropna().tolist()
        #logging.info(f"Total invoice IDs to process: {len(invoice_ids)}")

        # Process in batches
        for i in range(0, len(invoice_ids), batch_size):
            batch_ids = invoice_ids[i: i + batch_size]
           # logging.info(f"Processing batch {i // batch_size + 1}")

            batch_filter = " or ".join(f"_invoiceid_value eq '{invoice_id}'" for invoice_id in batch_ids)
            invoice_line_api_url = f"{accis_base_url}/api/data/v9.1/invoicedetails?$filter={batch_filter}"

            invoice_line_data = fetch_filtered_data(invoice_line_api_url)

            if invoice_line_data:
                #logging.info(f"Fetched {len(invoice_line_data)} records for batch {i // batch_size + 1}")
                df_invoice_line = pd.DataFrame(invoice_line_data)

                # Select required columns
                df_invoice_line = df_invoice_line[required_columns].dropna(how="all")
                logging.debug(f"Filtered DataFrame for batch {i // batch_size + 1}:\n{df_invoice_line}")

                if not df_invoice_line.empty:
                    all_invoice_lines.append(df_invoice_line)
                else:
                    logging.warning(f"Fetched data is empty or NaN-only after filtering for batch {batch_ids}")
            else:
                logging.warning(f"No data returned from API for batch {batch_ids}")

        # Concatenate all invoice line DataFrames
        all_invoice_lines = [df for df in all_invoice_lines if not df.empty]

        if all_invoice_lines:
            df_all_invoice_lines = pd.concat(all_invoice_lines, ignore_index=True)
            # logging.info(f"Total valid invoice lines collected: {len(df_all_invoice_lines)}")
        else:
            df_all_invoice_lines = pd.DataFrame(columns=required_columns)
            logging.warning("No valid invoice lines found for provided invoices after batch processing.")

    except Exception as e:
        logging.error(f"Unexpected error in fetch_invoice_lines: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return pd.DataFrame(columns=required_columns)

   # logging.info("fetch_invoice_lines function completed successfully.")
    return df_all_invoice_lines

df_filtered_invoice_lines = fetch_invoice_lines(df_invoices, accis_base_url, fetch_filtered_data, batch_size=10)


def keep_highest_baseamount_lines(df_all_invoice_lines):
    try:
        #logging.info("Starting keep_highest_baseamount_lines function...")

        if df_all_invoice_lines.empty:
            logging.warning("Invoice lines DataFrame is empty. No processing done.")
            return pd.DataFrame()

       # logging.info(f"Processing {len(df_all_invoice_lines)} invoice lines to keep highest baseamount per invoice.")

        # Convert 'baseamount' to numeric
        df_all_invoice_lines["baseamount"] = pd.to_numeric(df_all_invoice_lines["baseamount"], errors="coerce")
        #logging.info("Converted 'baseamount' column to numeric.")

        # Handle potential NaNs in baseamount
        nan_count = df_all_invoice_lines["baseamount"].isna().sum()
        if nan_count > 0:
            logging.warning(f"Found {nan_count} NaN values in 'baseamount' after conversion.")

        # Group by invoice and keep the row with max baseamount
        df_highest_baseamount = df_all_invoice_lines.loc[
            df_all_invoice_lines.groupby("_invoiceid_value")["baseamount"].idxmax()
        ]
        #logging.info(f"Selected highest baseamount per invoice. Row count reduced to: {len(df_highest_baseamount)}")

        # Remove rows where highest baseamount is 1
        initial_count = len(df_highest_baseamount)
        df_filtered_lines = df_highest_baseamount[df_highest_baseamount["baseamount"] != 1]
        removed_count = initial_count - len(df_filtered_lines)

        logging.info(f" Removed {removed_count} rows with baseamount = $1.00. Final invoice lines count: {len(df_filtered_lines)}")

        return df_filtered_lines

    except Exception as e:
        logging.exception(f"Error in filtering highest baseamount invoice lines: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return pd.DataFrame()

df_invoice_lines = keep_highest_baseamount_lines(df_filtered_invoice_lines)


def merge_invoice_data(df_invoice, df_filtered):
    try:
        #logging.info("Starting merge_invoice_data function...")

        if df_invoice.empty:
            logging.warning(" The df_invoice DataFrame is empty. Skipping merge.")
            return pd.DataFrame()

        if df_filtered.empty:
            logging.warning(" The df_filtered DataFrame is empty. Skipping merge.")
            return pd.DataFrame()

        #logging.info(f"Merging {len(df_invoice)} invoice records with {len(df_filtered)} filtered invoice lines...")

        df_invoice_merged = df_invoice.merge(
            df_filtered,
            left_on="invoiceid",
            right_on="_invoiceid_value",
            how="right"
        )

        #logging.info(f"✅ Merge successful. Total merged records: {len(df_invoice_merged)}")

        df_final = df_invoice_merged[[
            "invoiceid", "_customerid_value", "_salesorderid_value",
            "invoicenumber", "createdon_inv", "trit_bcinvoicenumber",
            "invoicedetailid", "_invoiceid_value"
        ]]

      #  logging.info("Final selected columns extracted from merged data.")
        return df_final

    except Exception as e:
        logging.error(f"Error merging invoice data: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

    logging.info(" Returning empty DataFrame due to issue in merge or empty input.")
    
    return pd.DataFrame()

df_invoice_merged = merge_invoice_data(df_invoices, df_filtered_invoice_lines)


def process_invoices_with_dimensions(df_invoices, df_invoice_merged, accis_base_url, fetch_filtered_data, batch_size=20):
  #logging.info("🚀 Starting process_invoices_with_dimensions function...")

    # ✅ Step 1: Define dimension mapping
    #PROD dimension mapping
    dimension_mapping = {
        "3c208775-ad88-ea11-a811-000d3a8b6b20": "saleschannel_deliverymethod",
        "34208775-ad88-ea11-a811-000d3a8b6b20": "event_code",
        "3e208775-ad88-ea11-a811-000d3a8b6b20": "state_code"
    }
    
    #DEV dimension mapping
    # dimension_mapping = {
    # "7fe7517b-1316-ea11-a811-000d3a8aad27": "saleschannel_deliverymethod",
    # "0552bc32-1716-ea11-a811-000d3a8b66df": "event",
    # "89006263-1516-ea11-a811-000d3a8b66df": "state"
    #  }
    # logging.info(" Dimension mapping initialized.")

    invoice_data_list = []

    # ✅ Step 2: Loop through each invoice
    for idx, invoice_row in df_invoices.iterrows():
        try:
            invoice_id = invoice_row['invoiceid']
            invoice_number = invoice_row['trit_bcinvoicenumber']

            # ✅ Filter all lines for this invoice
            df_lines = df_invoice_merged[df_invoice_merged['_invoiceid_value'] == invoice_id]
            dimension_data = {}

            if not df_lines.empty:
                try:
                    detail_ids = df_lines['invoicedetailid'].tolist()

                    # ✅ Step 3: Split into batches
                    total_batches = math.ceil(len(detail_ids) / batch_size)
                    #logging.info(f"🔄 Splitting into {total_batches} batch(es) (batch size: {batch_size})")

                    for i in range(total_batches):
                        batch_ids = detail_ids[i * batch_size:(i + 1) * batch_size]

                        # ✅ OData batch filter using OR
                        filter_string = " or ".join([f"_trit_invoicedetail_value eq {id}" for id in batch_ids])
                        dim_line_api_url = f"{accis_base_url}/api/data/v9.1/trit_dimensionlines?$filter={filter_string}"
                        
                        logging.debug(f"Fetching dimension data from URL: {dim_line_api_url}")

                        df_dimension = pd.DataFrame(fetch_filtered_data(dim_line_api_url))

                        if not df_dimension.empty and '_trit_dimension_value' in df_dimension.columns:
                            for _, dim_row in df_dimension.iterrows():
                                dim_key = dim_row['_trit_dimension_value']
                                dim_label = dimension_mapping.get(dim_key)
                                if dim_label and dim_label not in dimension_data:
                                    dimension_data[dim_label] = dim_row.get('subject', 'No subject')
                            #logging.info(f"✅ Fetched dimension data for batch {i+1}/{total_batches}")
                       # else:
                           # logging.info(f"No dimension data found for batch {i+1}/{total_batches}")

                except Exception as e:
                    logging.warning(f"Failed to fetch dimensions for invoice {invoice_number}: {e}", exc_info=True)
                    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                    
            else:
                logging.warning(f"No invoice lines found for invoice {invoice_number}")

            # ✅ Add result row
            row_data = {"invoice_no": invoice_number}
            for col in dimension_mapping.values():
                row_data[col] = dimension_data.get(col, "No data")

            invoice_data_list.append(row_data)

        except Exception as e:
            logging.error(f"Error processing invoice row: {e}", exc_info=True)
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
            
    # ✅ Create result DataFrame
    df_result = pd.DataFrame(invoice_data_list)
    df_result.fillna("No data", inplace=True)
    #logging.info(f"Created result DataFrame with {len(df_result)} records.")

    # ✅ Extract 'no data' invoices
    dimension_cols = list(dimension_mapping.values())
    df_no_data = df_result[df_result[dimension_cols].isin(["No data"]).any(axis=1)].copy()

    logging.info(f"Completed processing invoices. Total: {len(df_result)}, Missing dimension invoices: {len(df_no_data)}")

    return df_result, df_no_data

df_result, df_no_data = process_invoices_with_dimensions(df_invoices, df_invoice_merged, accis_base_url, fetch_filtered_data,batch_size=20)


def get_invoice_lines_with_no_data(df_no_data, df_invoice_merged):

    try:
        invoice_nos_with_no_data = df_no_data['invoice_no'].unique()
        filtered_df = df_invoice_merged[df_invoice_merged['trit_bcinvoicenumber'].isin(invoice_nos_with_no_data)].copy()

        #logging.info(f"Found {len(filtered_df)} rows in df_invoice_merged matching 'no data' invoices.")
        return filtered_df

    except Exception as e:
        logging.error(f"Error filtering invoice lines with no data: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return pd.DataFrame()  # Return empty DataFrame in case of error


df_invoice_merged_no_data = get_invoice_lines_with_no_data(df_no_data, df_invoice_merged)
df_invoice_merged_no_data = df_invoice_merged_no_data.drop_duplicates(subset=["trit_bcinvoicenumber"])


def fetch_account_and_merge_invoice_data(df_invoice, accis_base_url, batch_size=50):
    """Fetch account details using invoice data and merge in batches."""
    
    result_df = pd.DataFrame()
    account_data_list = []
    
    try:
        if df_invoice.empty:
            logging.warning("Invoice DataFrame is empty. Skipping account data fetch.")
            return result_df, result_df
        
        if "_customerid_value" not in df_invoice.columns:
            logging.error("Missing '_customerid_value' column in df_invoice.")
            return result_df, result_df

        unique_customer_ids = df_invoice["_customerid_value"]
        if len(unique_customer_ids) == 0:
            logging.warning("No unique customer IDs found in invoices. Skipping API call.")
            return result_df, result_df
        
       # logging.info(f"Fetching account data for {len(unique_customer_ids)} unique customer IDs .")

        account_columns = ["accountid", "customertypecode", "_primarycontactid_value"]
        select_query = ",".join(account_columns)

        for i in range(0, len(unique_customer_ids), batch_size):
            batch = unique_customer_ids[i:i + batch_size]

            # Format the batch request properly
            account_id_filters = " or ".join([f"accountid eq '{aid}'" for aid in batch])
            
            # Encode the filter to handle long queries
            encoded_filter = requests.utils.quote(account_id_filters)

            account_api_url = f"{accis_base_url}/api/data/v9.1/accounts?$select={select_query}&$filter={encoded_filter}"
           # logging.info(f"Fetching batch {i // batch_size + 1}")

            batch_data = fetch_filtered_data(account_api_url)
            df_batch = pd.DataFrame(batch_data)

            if not df_batch.empty:
                account_data_list.append(df_batch)
               # logging.info(f"Successfully fetched {len(df_batch)} records for batch {i // batch_size + 1}.")

            time.sleep(0.5)  # Prevent API overload

        df_account_table = pd.concat(account_data_list, ignore_index=True) if account_data_list else pd.DataFrame()

        if df_account_table.empty:
            logging.warning("Account API returned empty dataset. No accounts fetched.")
            return result_df, result_df
        
       # logging.info(f" Total fetched accounts: {len(df_account_table)}")

        df_final = df_invoice.merge(
            df_account_table,
            left_on="_customerid_value",
            right_on="accountid",
            how="left",
            #suffixes=("_invoice", "_account")
        )
        df_final["customertypecode"] = df_final["customertypecode"].astype("Int64")

        pd.DataFrame(df_final)
       # logging.info(f"Successfully merged account data. Final row count: {len(df_final)}.")

        return df_final, df_account_table

    except Exception as e:
        logging.exception(f"Unexpected error during account data fetch and merge: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return result_df, result_df


def fetch_and_merge_contact_data(df_filtered_lines, accis_base_url, batch_size=50):
    #logging.info("Starting fetch_and_merge_contact_data function...")

    try:
        if df_filtered_lines.empty:
            logging.warning("Input DataFrame (df_filtered_lines) is empty. Skipping contact data fetch.")
            return pd.DataFrame(), pd.DataFrame()

        # Step 1: Extract unique customer IDs
        if "_customerid_value" not in df_filtered_lines.columns:
            logging.error("Missing '_customerid_value' column in df_filtered_lines. Cannot proceed with contact fetch.")
            return pd.DataFrame(), pd.DataFrame()

        unique_customer_ids = df_filtered_lines["_customerid_value"].dropna().unique()
        if len(unique_customer_ids) == 0:
            logging.warning("No unique customer IDs found for contact fetch. Skipping.")
            return pd.DataFrame(), pd.DataFrame()

       # logging.info(f"Extracted {len(unique_customer_ids)} unique customer IDs for contact data fetch.")

        contact_columns = ["contactid", "_parentcustomerid_value"]
        select_query = ",".join(contact_columns)
        all_contact_data = []

        # ✅ Fetch data in batches
        for i in range(0, len(unique_customer_ids), batch_size):
            batch_ids = unique_customer_ids[i:i + batch_size]
            customer_filter = " or ".join([f"contactid eq '{cust_id}'" for cust_id in batch_ids])
            contact_api_url = f"{accis_base_url}/api/data/v9.1/contacts?$select={select_query}&$filter={customer_filter}"
            
            batch_num = (i // batch_size) + 1
           # logging.info(f"📤 Fetching contact data for batch {batch_num} (Records: {len(batch_ids)})")
            logging.debug(f"API URL: {contact_api_url}")

            try:
                batch_contact_data = fetch_filtered_data(contact_api_url)
                if batch_contact_data:
                    all_contact_data.extend(batch_contact_data)
                    #logging.info(f"✅ Batch {batch_num} fetched successfully.")
                else:
                    logging.warning(f"Batch {batch_num} returned no data.")
            except Exception as e:
                logging.exception(f"Error fetching contact data for batch {batch_num}: {e}")
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

        # Convert collected data to DataFrame
        df_contact = pd.DataFrame(all_contact_data)
        if df_contact.empty:
            logging.warning("Contact API returned an empty dataset after all batches.")
            return pd.DataFrame(), pd.DataFrame()

        #logging.info(f"📥 Contact data fetched. Total records: {len(df_contact)}")

        # ✅ Merge contact data with df_filtered_lines
        try:
            df_final = df_filtered_lines.merge(
                df_contact, 
                left_on="_customerid_value", 
                right_on="contactid", 
                how="left"
            )
           # logging.info("🔗 Merged contact data with filtered invoice lines successfully.")
        except Exception as e:
            logging.exception(f"Error merging contact data: {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
            return pd.DataFrame(), pd.DataFrame()

       # logging.info("🏁 Contact data fetch and merge process completed.")
        return df_final, df_contact

    except Exception as e:
        logging.exception(f"Unexpected error during contact data fetch and merge: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return pd.DataFrame(), pd.DataFrame()


def fetch_account_and_merge_contact_data(df_invoice, accis_base_url, batch_size=50):
    """Fetch account details using invoice data and merge in batches."""
    
    result_df = pd.DataFrame()  # Default empty DataFrame
    account_data_list = []  # Stores batch-wise fetched account data
    
    try:
       # logging.info("🚀 Starting fetch_account_and_merge_contact_data...")

        # ✅ Validate invoice DataFrame
        if df_invoice.empty:
            logging.warning("Invoice DataFrame is empty. Skipping account data fetch.")
            return result_df, result_df
        
        if "_parentcustomerid_value" not in df_invoice.columns:
            logging.error("Missing '_parentcustomerid_value' column in df_invoice.")
            return result_df, result_df

        # 🔹 Extract unique customer IDs
        unique_customer_ids = df_invoice["_parentcustomerid_value"].dropna().unique()
        if len(unique_customer_ids) == 0:
            logging.warning("No unique contactid found in invoices. Skipping API call.")
            return result_df, result_df
        
       # logging.info(f"Preparing to fetch account data for {len(unique_customer_ids)} unique IDs in batches of {batch_size}.")

        # 🔹 Define account columns to retrieve
        account_columns = ["accountid", "customertypecode"]
        select_query = ",".join(account_columns)

        # 🔹 Process in batches
        for i in range(0, len(unique_customer_ids), batch_size):
            batch = unique_customer_ids[i:i + batch_size]
            account_id_filters = " or ".join([f"accountid eq '{aid}'" for aid in batch])

            # Construct API URL for the current batch
            account_api_url = f"{accis_base_url}/api/data/v9.1/accounts?$select={select_query}&$filter={account_id_filters}"
            batch_num = i // batch_size + 1
            #logging.info(f"🔍 Fetching batch {batch_num}: Records {i} to {i + len(batch) - 1}")
            logging.debug(f"API URL: {account_api_url}")

            try:
                batch_data = fetch_filtered_data(account_api_url)
                df_batch = pd.DataFrame(batch_data)

                if not df_batch.empty:
                    account_data_list.append(df_batch)
                    #logging.info(f"✅ Batch {batch_num} fetched successfully with {len(df_batch)} records.")
                else:
                    logging.warning(f"Batch {batch_num} returned no data.")
            except Exception as e:
                logging.exception(f"Error fetching data for batch {batch_num}: {e}")
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

            time.sleep(0.5)  # Prevent API overload

        # 🔹 Combine all batch results
        df_account_table = pd.concat(account_data_list, ignore_index=True) if account_data_list else pd.DataFrame()

        if df_account_table.empty:
            logging.warning("Account API returned empty dataset. No accounts fetched.")
            return result_df, result_df
        
       # logging.info(f"📊 Total fetched accounts: {len(df_account_table)}")

        # 🔹 Merge Invoice Data with Account Data
        try:
            df_final = df_invoice.merge(
                df_account_table,
                left_on="_parentcustomerid_value",
                right_on="accountid",
                how="left"
            )
         #   logging.info("🔗 Successfully merged account data with invoice data.")
        except Exception as e:
            logging.exception(f"Error during merging account data with invoice data: {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
            return result_df, result_df

        # ✅ Convert customertypecode to integer safely
        if "customertypecode" in df_final.columns:
            try:
                df_final["customertypecode"] = df_final["customertypecode"].astype("Int64", errors="ignore")
                #logging.info("🔢 Converted 'customertypecode' to Int64 type.")
            except Exception as e:
                logging.warning(f"Failed to convert 'customertypecode' to Int64: {e}")
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

       # logging.info(f"✅ Successfully merged and processed account data. Final row count: {len(df_final)}")

        return df_final, df_account_table

    except Exception as e:
        logging.exception(f"Unexpected error during account data fetch and merge: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return result_df, result_df
    
    
    # try:
    #    # logging.info("Merging df_final and df_final_account on 'trit_bcinvoicenumber'.")

    #     df_merged = df_final.merge(
    #         df_final_account,
    #         on="trit_bcinvoicenumber",
    #         how="left",
    #         suffixes=("_invoice", "_account")
    #     )

    #     #logging.info(f"🔗 Merged DataFrame shape: {df_merged.shape}")

    #     # Merge 'customertypecode' columns
    #     df_merged['customertypecode_merged'] = df_merged['customertypecode_invoice'].fillna(df_merged['customertypecode_account'])

    #     # Filter corporate invoices
    #     corporate_condition = df_merged['customertypecode_merged'] == 314310002
    #     corporate_invoices = df_merged[corporate_condition].copy()
    #     df_filtered = df_merged[~corporate_condition]

    #     # Drop duplicates
    #     df_merged_result = df_filtered.drop_duplicates(subset=["trit_bcinvoicenumber"]).reset_index(drop=True)
    #     corporate_invoices = corporate_invoices.drop_duplicates(subset=["trit_bcinvoicenumber"]).reset_index(drop=True)

    #     removed_count = len(corporate_invoices)

    #     # Reorder columns for final output
    #     df_merged_result = df_merged_result[[
    #         "invoiceid_invoice", "_customerid_value_invoice", "trit_bcinvoicenumber",
    #         "_salesorderid_value_invoice", "invoicedetailid_invoice",
    #         "_invoiceid_value_invoice", "customertypecode_merged","contactid"
    #     ]]

    #     #logging.info(f" Successfully merged and filtered data. Final row count: {len(df_merged_result)}.")
    #     logging.info(f" Removed {removed_count} corporate invoices after merging.")

    #     return df_merged_result, pd.DataFrame(), corporate_invoices  # ✅ Always return 3 values

    # except Exception as e:
    #     logging.exception(f"Unexpected error during invoice-account merging: {e}")
    #     return pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
    

def merge_invoice_account_data(df_final, df_final_account):

    # ✅ If both DataFrames are empty
    if df_final.empty and df_final_account.empty:
        logging.warning("Both DataFrames (df_final and df_final_account) are empty. Returning empty results.")
        return pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

    # ✅ If only df_final is empty
    if df_final.empty:
        logging.warning("df_final is empty. Returning df_final_account as the merged result.")
        df_final_account = df_final_account.rename(columns={
            "_salesorderid_value": "_salesorderid_value_invoice",
            "invoicedetailid": "invoicedetailid_invoice",
            "_invoiceid_value": "_invoiceid_value_invoice",
            "invoiceid": "invoiceid_invoice"
        })

        # ✅ Filter corporate invoices
        corporate_condition = df_final_account['customertypecode'] == 314310002
        corporate_invoices = df_final_account[corporate_condition].copy()
    
        # ✅ Filter licensee invoices
        licensee_condition = df_final_account['customertypecode'] == 314310001
        licensee_invoices = df_final_account[licensee_condition].copy()
    
        # ✅ Remaining data (non-corporate & non-licensee)
        # df_account_1 = df_final_account[~(corporate_condition | licensee_condition)]


        # Handle NaN as valid "other" type (i.e., not licensee or corporate)
        mask_other = ~corporate_condition & ~licensee_condition
        mask_other = mask_other.fillna(True)  # Treat NaN as True for "other"
        df_account_1 = df_final[mask_other]
        
        # Drop duplicates
        df_merged_result = df_account_1.drop_duplicates(subset=["trit_bcinvoicenumber"]).reset_index(drop=True)
        corporate_invoices = corporate_invoices.drop_duplicates(subset=["trit_bcinvoicenumber"]).reset_index(drop=True)
        licensee_invoices = licensee_invoices.drop_duplicates(subset=["trit_bcinvoicenumber"]).reset_index(drop=True)
    
        # logging.info(f" Successfully merged and filtered data. Final row count: {len(df_merged_result)}.")
        logging.info(f" Removed {len(corporate_invoices)} corporate invoices after merging.")
        logging.info(f" Extracted {len(licensee_invoices)} licensee invoices.")
        return df_account_1.copy(), licensee_invoices,corporate_invoices

    # ✅ If only df_final_account is empty
    if df_final_account.empty:
        logging.warning("df_final_account is empty. Returning df_final as the merged result.")
        df_final = df_final.rename(columns={
            "_salesorderid_value": "_salesorderid_value_invoice",
            "invoicedetailid": "invoicedetailid_invoice",
            "_invoiceid_value": "_invoiceid_value_invoice",
            "invoiceid": "invoiceid_invoice"
        })

        # ✅ Filter corporate invoices
        corporate_condition = df_final['customertypecode'] == 314310002
        corporate_invoices = df_final[corporate_condition].copy()
    
        # ✅ Filter licensee invoices
        licensee_condition = df_final['customertypecode'] == 314310001
        licensee_invoices = df_final[licensee_condition].copy()
    
        # ✅ Remaining data (non-corporate & non-licensee)
        # df_account_2 = df_final[~(corporate_condition | licensee_condition)]

        # Handle NaN as valid "other" type (i.e., not licensee or corporate)
        mask_other = ~corporate_condition & ~licensee_condition
        mask_other = mask_other.fillna(True)  # Treat NaN as True for "other"
        df_account_2 = df_final[mask_other]
        

        # Drop duplicates
        df_merged_result = df_account_2.drop_duplicates(subset=["trit_bcinvoicenumber"]).reset_index(drop=True)
        corporate_invoices = corporate_invoices.drop_duplicates(subset=["trit_bcinvoicenumber"]).reset_index(drop=True)
        licensee_invoices = licensee_invoices.drop_duplicates(subset=["trit_bcinvoicenumber"]).reset_index(drop=True)
    
        # logging.info(f" Successfully merged and filtered data. Final row count: {len(df_merged_result)}.")
        logging.info(f" Removed {len(corporate_invoices)} corporate invoices after merging.")
        logging.info(f" Extracted {len(licensee_invoices)} licensee invoices.")
        return df_account_2.copy(), licensee_invoices,corporate_invoices

    # ✅ If both DataFrames are present
    if "trit_bcinvoicenumber" not in df_final.columns or "trit_bcinvoicenumber" not in df_final_account.columns:
        logging.error("Missing 'trit_bcinvoicenumber' column in one or both DataFrames.")
        return pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
    
    try:
        #logging.info("🔄 Merging df_final and df_final_account on 'trit_bcinvoicenumber'.")
    
        df_merged = df_final.merge(
            df_final_account,
            on="trit_bcinvoicenumber",
            how="left",
            suffixes=("_invoice", "_account")
        )
    
       # logging.info(f"🔗 Merged DataFrame shape: {df_merged.shape}")
    
        # Merge 'customertypecode' columns
        df_merged['customertypecode_merged'] = df_merged['customertypecode_invoice'].fillna(df_merged['customertypecode_account'])
    
        # ✅ Filter corporate invoices
        corporate_condition = df_merged['customertypecode_merged'] == 314310002
        corporate_invoices = df_merged[corporate_condition].copy()
    
        # ✅ Filter licensee invoices
        licensee_condition = df_merged['customertypecode_merged'] == 314310001
        licensee_invoices = df_merged[licensee_condition].copy()
    
        # ✅ Remaining data (non-corporate & non-licensee)
        df_filtered = df_merged[~(corporate_condition | licensee_condition)]
    
        # Drop duplicates
        df_merged_result = df_filtered.drop_duplicates(subset=["trit_bcinvoicenumber"]).reset_index(drop=True)
        corporate_invoices = corporate_invoices.drop_duplicates(subset=["trit_bcinvoicenumber"]).reset_index(drop=True)
        licensee_invoices = licensee_invoices.drop_duplicates(subset=["trit_bcinvoicenumber"]).reset_index(drop=True)
    
        removed_count = len(corporate_invoices)
    
        # Reorder columns for final output
        df_merged_result = df_merged_result[[
            "invoiceid_invoice", "_customerid_value_invoice", "trit_bcinvoicenumber",
             "_salesorderid_value_invoice", "invoicedetailid_invoice",
            "_invoiceid_value_invoice", "customertypecode_merged", "contactid"
        ]]
    
        #logging.info(f" Successfully merged and filtered data. Final row count: {len(df_merged_result)}.")
        logging.info(f" Removed {removed_count} corporate invoices after merging.")
        logging.info(f" Extracted {len(licensee_invoices)} licensee invoices.")
    
        # ✅ Return all three
        return df_merged_result, licensee_invoices, corporate_invoices
    
    except Exception as e:
        logging.exception(f"Unexpected error during invoice-account merging: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

if not df_invoices.empty:
    # Fetch and merge account data (returns two DataFrames)
    df_final_account, df_account_ = fetch_account_and_merge_invoice_data(df_invoice_merged_no_data, accis_base_url)
    df_final_processed, df_contact = fetch_and_merge_contact_data(df_invoice_merged_no_data, accis_base_url)
    df_final, df_account = fetch_account_and_merge_contact_data(df_final_processed, accis_base_url)
    df_merged_result, licensee_invoices, corporate_invoices = merge_invoice_account_data(df_final, df_final_account)


def fetch_order_lines(df_merged_result, accis_base_url, fetch_filtered_data, batch_size=25):
    order_lines = []
    df_invoices_with_empty_salesorderid = pd.DataFrame()

    try:
        if not df_merged_result.empty and "_salesorderid_value_invoice" in df_merged_result.columns:
            # Separate invoices with empty salesorder ID
            df_invoices_with_empty_salesorderid = df_merged_result[df_merged_result["_salesorderid_value_invoice"].isna()]

            # Continue with valid IDs
            valid_rows = df_merged_result.dropna(subset=["_salesorderid_value_invoice"])
            unique_salesorder_ids = valid_rows["_salesorderid_value_invoice"].astype(str).unique().tolist()

            for i in range(0, len(unique_salesorder_ids), batch_size):
                batch = unique_salesorder_ids[i:i + batch_size]
                salesorder_filter = " or ".join([f"_salesorderid_value eq {sid}" for sid in batch])

                # Construct API URL with batch filter
                order_line_api_url = (
                    f"{accis_base_url}/api/data/v9.1/salesorderdetails?"
                    f"$filter={salesorder_filter}&$top=500"
                )

                batch_num = i // batch_size + 1
                #logging.info(f"📦 Fetching Sales Order Lines Batch {batch_num}: Records {i} to {i + len(batch) - 1}")
                logging.debug("Order Line API URL WORKING ")

                try:
                    order_line_data = fetch_filtered_data(order_line_api_url)

                    if order_line_data:
                        df_order_line = pd.DataFrame(order_line_data)

                        if not df_order_line.empty and df_order_line.dropna(how="all").shape[0] > 0:
                            order_lines.append(df_order_line)
                except Exception as e:
                    logging.error(f"Error fetching batch {batch_num}: {e}")
                    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                    continue

        # Clean and combine
        order_lines = [df.dropna(how="all") for df in order_lines if not df.empty and df.dropna(how="all").shape[0] > 0]
        df_all_order_lines = pd.concat(order_lines, ignore_index=True) if order_lines else pd.DataFrame()

        required_columns = ["_cr726_event_value", "_salesorderid_value", "_trit_msevtmgt_eventregistration_value"]
        df_all_order_lines = df_all_order_lines[required_columns] if not df_all_order_lines.empty else pd.DataFrame()

        df_all_order_lines = df_all_order_lines.drop_duplicates(subset=["_salesorderid_value", "_cr726_event_value"])

    except Exception as e:
        logging.error(f"Unexpected error in fetch_order_lines: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        df_all_order_lines = pd.DataFrame()

    # You can return both if needed
    return df_all_order_lines, df_invoices_with_empty_salesorderid



def merge_order_lines(df_final, df_all_order_lines):
    try:
        if not df_final.empty and not df_all_order_lines.empty:
            #logging.info("🔄 Merging order lines with final DataFrame...")

            df_final_merged = df_final.merge(
                df_all_order_lines,
                left_on="_salesorderid_value_invoice",
                right_on="_salesorderid_value",
                how="right"
            )

            # before_dedup = len(df_final_merged)
            # df_final_merged = df_final_merged.drop_duplicates(
            #     subset=["trit_bcinvoicenumber", "_cr726_event_value"]
            # )
            # after_dedup = len(df_final_merged)

           # logging.info(f"✅ Merge completed. Records before deduplication: {before_dedup}, after: {after_dedup}")
        else:
            logging.warning("Skipped merge: One or both DataFrames are empty.")
            df_final_merged = df_final

    except Exception as e:
        logging.error(f"Error merging order lines: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        df_final_merged = df_final  # Return original DataFrame in case of failure

    return df_final_merged


def fetch_event_data(df_all_order_lines, accis_base_url, fetch_filtered_data, batch_size=20):

    events = []
    df_removed_invoices = pd.DataFrame()

    try:
        if not df_all_order_lines.empty and "_cr726_event_value" in df_all_order_lines.columns:
            # ✅ Separate and log invoices with None in '_cr726_event_value'
            df_removed_invoices = df_all_order_lines[df_all_order_lines["_cr726_event_value"].isna()].copy()
            removed_count = len(df_removed_invoices)
            logging.info(f" Removed {removed_count} invoices with missing event IDs.")

            # ✅ Filter valid event IDs
            filtered_event_ids = df_all_order_lines["_cr726_event_value"].dropna().astype(str).unique().tolist()

            # ✅ Process in batches
            for i in range(0, len(filtered_event_ids), batch_size):
                batch = filtered_event_ids[i:i + batch_size]
                batch_num = (i // batch_size) + 1

                # Create OData filter using 'or'
                filter_query = " or ".join([f"msevtmgt_eventid eq {eid}" for eid in batch])
                event_api_url = f"{accis_base_url}/api/data/v9.1/msevtmgt_events?$filter={filter_query}"

               # logging.info(f"📦 Fetching event batch {batch_num}: Records {i} to {i + len(batch) - 1}")
               # logging.debug(f"🔗 Event API URL: {event_api_url}")

                try:
                    event_data = fetch_filtered_data(event_api_url)
                    if event_data:
                        df_event = pd.DataFrame(event_data)
                        if not df_event.empty:
                            events.append(df_event)
                except Exception as e:
                    logging.error(f"Error fetching batch {batch_num}: {e}")
                    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                    continue

        # ✅ Combine and filter required columns
        df_all_events = pd.concat(events, ignore_index=True) if events else pd.DataFrame()
        required_columns = ["msevtmgt_eventid", "trit_type", "_trit_state_value", "trit_eventcode", "trit_deliverymechanisms"]
        df_all_events = df_all_events[required_columns] if not df_all_events.empty else pd.DataFrame()

    except Exception as e:
        logging.error(f"Unexpected error in fetch_event_data: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        df_all_events = pd.DataFrame()
        df_removed_invoices = pd.DataFrame()

    return df_all_events, df_removed_invoices

def merge_event_data(df_final_merged, df_all_events):
    try:
        if not df_final_merged.empty and not df_all_events.empty:
           # logging.info(f"🔄 Starting event data merge...")
            #logging.info(f"📄 Rows before filtering None event values: {len(df_final_merged)}")

            # ✅ Remove rows where '_cr726_event_value' is None
            df_final_merged = df_final_merged[df_final_merged["_cr726_event_value"].notna()].copy()
           # logging.info(f" Rows after removing None values in '_cr726_event_value': {len(df_final_merged)}")

            try:
                # ✅ Perform the merge
                df_final_merged = df_final_merged.merge(
                    df_all_events,
                    left_on="_cr726_event_value",
                    right_on="msevtmgt_eventid",
                    how="left"
                )
                # logging.info(f" Merge completed successfully. Total rows after merge: {len(df_final_merged)}")

            except Exception as e:
                logging.error(f"Error merging event data: {e}")
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                return df_final_merged  # Return original DataFrame on merge failure

        else:
            logging.warning("One or both input DataFrames are empty. Skipping merge.")

    except Exception as e:
        logging.error(f"Unexpected error in merge_event_data: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

    return df_final_merged


def fetch_state_data(filtered_df, accis_base_url, fetch_filtered_data, batch_size=10):
    try:
        unique_state_ids = []
        if not filtered_df.empty and "_trit_state_value" in filtered_df.columns:
            try:
                unique_state_ids = filtered_df["_trit_state_value"].dropna().unique().tolist()
               # logging.info(f"🔍 Total unique state IDs: {len(unique_state_ids)}")
            except Exception as e:
                logging.error(f"Error extracting unique state IDs: {e}")
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                return pd.DataFrame()
        
        state_data_list = []

        # 🔁 Process in batches
        for i in range(0, len(unique_state_ids), batch_size):
            batch = unique_state_ids[i:i + batch_size]
            batch_filter = " or ".join([f"trit_stateid eq '{sid}'" for sid in batch])
            state_url = f"{accis_base_url}/api/data/v9.1/trit_states?$filter={batch_filter}"

            batch_num = i // batch_size + 1
            #logging.info(f"📦 Fetching batch {batch_num}: Records {i} to {i + len(batch) - 1}")
            logging.debug(f"API URL: {state_url}")

            try:
                state_data = fetch_filtered_data(state_url)
                if state_data:
                    state_df = pd.DataFrame(state_data)
                    state_data_list.append(state_df)
            except Exception as e:
                logging.error(f"Error fetching state data for batch {batch_num}: {e}")
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                continue

        try:
            df_all_states = pd.concat(state_data_list, ignore_index=True) if state_data_list else pd.DataFrame()
        except Exception as e:
            logging.error(f"Error concatenating state data: {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
            return pd.DataFrame()

        try:
            required_columns = ["trit_stateid", "trit_name"]
            df_all_states = df_all_states[required_columns] if not df_all_states.empty else pd.DataFrame()
        except Exception as e:
            logging.error(f"Error selecting required columns from state data: {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
            return pd.DataFrame()

    except Exception as e:
        logging.error(f"Unexpected error in fetch_state_data: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return pd.DataFrame()

   # logging.info(f"✅ Total states fetched: {len(df_all_states)}")
    return df_all_states

def merge_state_data(filtered_df, df_all_states):
    try:
        if not filtered_df.empty and not df_all_states.empty:
            try:
               # logging.info(f"Starting merge of filtered_df with state data...")
                #logging.info(f"filtered_df rows: {len(filtered_df)}, df_all_states rows: {len(df_all_states)}")

                final_event_data = filtered_df.merge(
                    df_all_states,
                    left_on="_trit_state_value",
                    right_on="trit_stateid",
                    how="left"
                )

               # logging.info(f"✅ Merge successful. Resulting rows: {len(final_event_data)}")

            except Exception as e:
                logging.error(f"Error merging state data: {e}")
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                return filtered_df  # Return original DataFrame if merging fails
        else:
            logging.warning("One or both DataFrames are empty. Skipping merge.")
            final_event_data = filtered_df

    except Exception as e:
        logging.error(f"Unexpected error in merge_state_data: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        
        
        return filtered_df  # Ensure execution even if an error occurs

    return final_event_data


def map_sales_channel_and_delivery(final_event_data):
    try:
        # Define mapping dictionaries
        sales_channel_mapping = {
            314310000: "SS",
            314310002: "LS",
            314310009: "UNIV",
            314310001: "High School"
        }

        delivery_method_mapping = {
            314310000: "CLASSROOM",
            314310001: "WEBINAR",
            314310002: "SELF-PACED",
            314310005: "CORP-CLASS",
            314310004: "CORP-WB",
            314310003: "BLENDED"
        }

        # Map values to new columns
        final_event_data["sales_channel"] = final_event_data["trit_type"].map(sales_channel_mapping)
        final_event_data["delivery_method"] = final_event_data["trit_deliverymechanisms"].map(delivery_method_mapping)

        #logging.info("✅ Sales channel and delivery method mapping completed.")

        # Combine them with fallback logic
        final_event_data["saleschannel_deliverymethod"] = final_event_data.apply(
            lambda row: (
                f"{row['sales_channel']}_{row['delivery_method']}" if pd.notna(row['sales_channel']) and pd.notna(row['delivery_method'])
                else row['sales_channel'] if pd.notna(row['sales_channel'])
                else row['delivery_method'] if pd.notna(row['delivery_method'])
                else None
            ),
            axis=1
        )

       # logging.info("✅ Combined 'saleschannel_deliverymethod' column created successfully.")
        return final_event_data

    except Exception as e:
        logging.error(f"Error occurred in map_sales_channel_and_delivery: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        raise

    except Exception as e:
        logging.error(f"Unexpected error in map_sales_channel_and_delivery: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        
        raise


def format_final_table(final_event_data):
    try:
       # logging.info("✅ Starting format_final_table function.")
        
        # Selecting the required columns
        columns_required = [
            "trit_bcinvoicenumber", 
            "trit_eventcode", 
            "invoicedetailid_invoice",
            "_invoiceid_value_invoice", 
            "state", 
            "saleschannel_deliverymethod"
        ]
        final_event_data = final_event_data[columns_required]
       # logging.info("✅ Selected necessary columns.")

        # Renaming columns
        final_event_data.rename(columns={
            "trit_bcinvoicenumber": "invoicenumber",
            "trit_eventcode": "event"
        }, inplace=True)
        #logging.info("✅ Columns renamed.")

        # Dropping duplicates
        final_event_data = final_event_data.drop_duplicates()
        #logging.info("✅ Removed duplicate records.")

        return final_event_data

    except Exception as e:
        logging.error(f"Error occurred in format_final_table: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        raise

    except Exception as e:
        logging.error(f"An unexpected error occurred in format_final_table: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        raise

def fetch_dimension_lines(df_filtered, final_event_data, accis_base_url, fetch_filtered_data, batch_size=20):
    try:
       # logging.info("✅ Starting fetch_dimension_lines function.")
        dimension_lines = []

        if not df_filtered.empty:
            filtered_invoice_detail_ids = df_filtered["invoicedetailid_invoice"].dropna().unique().tolist()
           # logging.info(f"🔍 Total invoice detail IDs to process: {len(filtered_invoice_detail_ids)}")

            # Batch processing
            for i in range(0, len(filtered_invoice_detail_ids), batch_size):
                batch_ids = filtered_invoice_detail_ids[i:i + batch_size]
                #logging.info(f"📦 Processing batch {i // batch_size + 1}: {batch_ids}")

                # Build filter query for batch
                filter_query = " or ".join([f"_trit_invoicedetail_value eq {id_}" for id_ in batch_ids])
                dimension_line_api_url = (
                    f"{accis_base_url}/api/data/v9.1/trit_dimensionlines?"
                    f"$filter={filter_query}"
                    f"&$select=subject,_trit_invoicedetail_value,_trit_dimension_value,_trit_dimensionvalue_value"
                )

                try:
                    dimension_data = fetch_filtered_data(dimension_line_api_url)
                    if dimension_data:
                        df_dimension_line = pd.DataFrame(dimension_data)
                        dimension_lines.append(df_dimension_line)
                        #logging.info(f"✅ Fetched dimension lines for batch {i // batch_size + 1}")
                except Exception as e:
                    logging.error(f"Error fetching dimension lines for batch {i // batch_size + 1}: {e}")
                    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                    continue

        # Combine all fetched data
        df_all_dimension_lines = pd.concat(dimension_lines, ignore_index=True) if dimension_lines else pd.DataFrame()
        #ogging.info(f"🧩 Total dimension lines fetched: {len(df_all_dimension_lines)}")

        # Merge with final_event_data
        try:
            df_final_ = final_event_data.merge(
                df_all_dimension_lines,
                left_on="invoicedetailid_invoice",
                right_on="_trit_invoicedetail_value",
                how="right"
            )
           # logging.info("🔗 Merged dimension lines with final event data.")
        except Exception as e:
            logging.error(f"Error merging dimension lines: {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

            
            return pd.DataFrame(columns=[
                "invoicenumber", "event", "_invoiceid_value_invoice", 
                "invoicedetailid_invoice", "state", "saleschannel_deliverymethod", "subject"
            ])

        # Select required columns
        required_columns = [
            "invoicenumber", "event", "_invoiceid_value_invoice", 
            "invoicedetailid_invoice", "state", "saleschannel_deliverymethod", "subject"
        ]
        dimension = df_final_[required_columns] if not df_final_.empty else pd.DataFrame(columns=required_columns)

       # logging.info("✅ Finished processing fetch_dimension_lines function.")
        return dimension

    except Exception as e:
        logging.error(f"Error in fetch_dimension_lines: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

    except Exception as e:
        logging.error(f"Unexpected error in fetch_dimension_lines: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

    return pd.DataFrame(columns=[
        "invoicenumber", "event", "_invoiceid_value_invoice", 
        "invoicedetailid_invoice", "state", "saleschannel_deliverymethod", "subject"
    ])


def group_and_aggregate_subjects(dimension):
    try:
        #logger.info("🚀 Starting group_and_aggregate_subjects function...")

        if dimension.empty:
            logger.warning("Received empty DataFrame. Returning empty result.")
            return pd.DataFrame(), {}

       # logger.info("Grouping by 'invoicedetailid_invoice' and aggregating 'subject' values...")

        # Group by "invoicedetailid_invoice" and aggregate subjects
        invoice_subjects = dimension.groupby("invoicedetailid_invoice")["subject"].apply(set).reset_index()
        logger.debug(f"Grouped and aggregated subjects:\n{invoice_subjects}")

        # Count the number of subjects for each "invoicedetailid"
        invoice_subjects["subject_count"] = invoice_subjects["subject"].apply(len)
        logger.debug(f" Added subject count column:\n{invoice_subjects}")

        # Convert into a dictionary for easier lookup
        invoice_subject_dict = dict(zip(invoice_subjects["invoicedetailid_invoice"], invoice_subjects["subject"]))
       # logger.info(f" Converted subject groups to dictionary with {len(invoice_subject_dict)} entries.")

        # Convert grouped data back into DataFrame format
        grouped_data = dimension.groupby("invoicedetailid_invoice")
        grouped_data = grouped_data.apply(lambda x: x).reset_index(drop=True)
       # logger.info("✅ Final grouped DataFrame created successfully.")

        return grouped_data, invoice_subject_dict

    except KeyError as e:
        logger.error(f"KeyError occurred in group_and_aggregate_subjects function: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        raise
    except Exception as e:
        logger.error(f"An unexpected error occurred in group_and_aggregate_subjects function: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        raise

def process_invoice_data(grouped_data, invoice_subject_dict, accis_base_url):
    try:
       # logger.info("🚀 Starting process_invoice_data function...")

        if grouped_data.empty:
            logger.info("No invoices to process. Exiting early.")
            return pd.DataFrame()

        # logger.info(f" Total unique invoice details to process: {grouped_data['invoicedetailid_invoice'].nunique()}")

        newly_added_data = []  # List to store new subjects with invoice numbers

        grouped_data = grouped_data.groupby("invoicedetailid_invoice")

        for invoice_detail_value, group in grouped_data:
            try:
                invoicenumber = group["invoicenumber"].iloc[0]  # Get the actual invoice number
                #logger.info(f"🔍 Processing Invoice: {invoicenumber}")

                if invoice_detail_value:
                    existing_subjects = invoice_subject_dict.get(invoice_detail_value, set())
                    logger.debug(f"Existing subjects for invoice {invoicenumber}: {existing_subjects}")

                    event_data_to_post = {
                        "event": group["event"].iloc[0] if pd.notna(group["event"].iloc[0]) else None,
                        "saleschannel_deliverymethod": group["saleschannel_deliverymethod"].iloc[0] if pd.notna(group["saleschannel_deliverymethod"].iloc[0]) else None,
                        "state": group["state"].iloc[0] if pd.notna(group["state"].iloc[0]) else None,
                    }

                    # Filter out None values
                    event_data_to_post = {k: v for k, v in event_data_to_post.items() if v is not None}
                    logger.debug(f"📤 Cleaned event data to post for invoice {invoicenumber}: {event_data_to_post}")

                    # Check which subjects are new
                    new_data_to_post = [
                        {"subject": value}
                        for key, value in event_data_to_post.items()
                        if value not in existing_subjects
                    ]

                    # logger.info(f"New subjects to post for invoice {invoicenumber}: {new_data_to_post}")

                    for item in new_data_to_post:
                        newly_added_data.append({"invoicenumber": invoicenumber, "subject": item["subject"]})
            except Exception as e:
                logger.warning(f"Error while processing individual invoice {invoicenumber}: {e}", exc_info=True)
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                

        # Convert newly added data into DataFrame
        df_newly_added = pd.DataFrame(newly_added_data)

        if not df_newly_added.empty:
            logger.info("Newly added subjects per invoice:")
            logger.info(df_newly_added.groupby("invoicenumber")["subject"].apply(list).reset_index())
        else:
            logger.info("No new subjects to add. All invoices are complete.")

        return df_newly_added

    except Exception as e:
        logger.error(f"Error occurred in process_invoice_data function: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        raise
    except Exception as e:
        logger.error(f"An unexpected error occurred in process_invoice_data function: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        raise

  
def fetch_and_map_dimension_values(df_newly_added, accis_base_url, grouped_data):
    try:
      #  logger.info("🚀 Starting fetch_and_map_dimension_values function...")

        if df_newly_added.empty:
            logger.info("No new subjects to process for dimension values. Skipping function.")
            return pd.DataFrame(), None

        # Step 1: Fetch Unique Subjects
        unique_subjects = df_newly_added["subject"].drop_duplicates()
       # logger.info(f"Found {len(unique_subjects)} unique subjects to fetch GUIDs for.")

        dimension_values_list = []

        # Step 2: Fetch Dimension Values (GUIDs) from API
        for subject in unique_subjects:
            dimensionvalue_api_url = f"{accis_base_url}/api/data/v9.1/trit_dimensionvalues?$filter=trit_code eq '{subject}'&$select=trit_dimensionvalueid,_trit_dimension_value,trit_code"
            logger.debug(f"Fetching GUID for subject: {subject} | URL: {dimensionvalue_api_url}")
            try:
                df_temp = pd.DataFrame(fetch_filtered_data(dimensionvalue_api_url))
                if not df_temp.empty:
                    logger.debug(f"Retrieved data for subject '{subject}' with {len(df_temp)} records.")
                    dimension_values_list.append(df_temp)
                else:
                    logger.warning(f"No data returned for subject '{subject}'.")
            except Exception as e:
                logger.error(f"Error fetching GUID for subject '{subject}': {e}", exc_info=True)
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

        # Step 3: Combine all fetched dimension values into a single DataFrame
        df_dimension_values = pd.concat(dimension_values_list, ignore_index=True) if dimension_values_list else pd.DataFrame()
       # logger.info(f"Total dimension value records fetched: {len(df_dimension_values)}")
        df_dimension_values = df_dimension_values.drop_duplicates(subset=["trit_code"])

        # Step 4: Merge GUIDs with Newly Added Subjects
        df_newly_added_with_guids = df_newly_added.merge(
            df_dimension_values,
            left_on="subject",
            right_on="trit_code",
            how="left"
        )
        #logger.info(f"🔗 Merged newly added subjects with GUIDs. Records after merge: {len(df_newly_added_with_guids)}")

        # Step 5: Merge with invoice details
        df_newly_added_with_guids = df_newly_added_with_guids.merge(
            grouped_data[["invoicedetailid_invoice", "invoicenumber"]],
            left_on="invoicenumber",
            right_on="invoicenumber",
            how="left"
        )
        #logger.info("Merged with grouped invoice details.")

        # Step 6: Extract Unique Dimension Values
        dimen = df_newly_added_with_guids["_trit_dimension_value"].drop_duplicates()
       # logger.info(f"🧩 Unique _trit_dimension_value count: {len(dimen)}")

        df_newly_added_with_guids = df_newly_added_with_guids.drop_duplicates(
            subset=["invoicenumber", "_trit_dimension_value", "trit_dimensionvalueid", "invoicedetailid_invoice"]
        )

       # logger.info(f"✅ Final shape of mapped data: {df_newly_added_with_guids.shape}")

        return df_newly_added_with_guids, dimen

    except Exception as e:
        logger.error(f"An unexpected error occurred in fetch_and_map_dimension_values function: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return pd.DataFrame(), None

def fetch_and_map_dimensions(dimen, accis_base_url, df_newly_added_with_guids):
    try:
      #  logger.info("🚀 Starting fetch_and_map_dimensions function...")

        if dimen.empty:
            logger.info("No dimensions to process.")
            return df_newly_added_with_guids

        dimension_list = []
        #logger.info(f"Found {len(dimen)} unique _trit_dimension_value(s) to fetch.")

        # Fetch dimension records from the API
        for dd in dimen:
            dimensionvalue_api_url = f"{accis_base_url}/api/data/v9.1/trit_dimensions?$filter=trit_dimensionid eq '{dd}'&$select=trit_dimensionid"
            logger.debug(f"Fetching dimension for ID: {dd} | URL: {dimensionvalue_api_url}")
            try:
                df_temp_dim = pd.DataFrame(fetch_filtered_data(dimensionvalue_api_url))
                if not df_temp_dim.empty:
                    logger.debug(f"Retrieved {len(df_temp_dim)} record(s) for dimension ID '{dd}'.")
                    dimension_list.append(df_temp_dim)
                else:
                    logger.warning(f"No records found for dimension ID '{dd}'.")
            except Exception as e:
                logger.error(f"Error fetching GUID for dimension '{dd}': {e}", exc_info=True)
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        # Combine all fetched dimension records
        df_dimension = pd.concat(dimension_list, ignore_index=True) if dimension_list else pd.DataFrame()
        # logger.info(f"Total dimension records fetched: {len(df_dimension)}")

        # Drop the '@odata.etag' column if it exists
        if "@odata.etag" in df_dimension.columns:
            df_dimension = df_dimension.drop(columns=["@odata.etag"])
            logger.debug("Dropped '@odata.etag' column from dimension data.")

        # Merge with the original DataFrame
        df_newly_added_with_guids = df_newly_added_with_guids.merge(
            df_dimension,
            left_on="_trit_dimension_value",
            right_on="trit_dimensionid",
            how="left",
            suffixes=("", "_dim")
        )
       # logger.info(f"🔗 Merged dimension info into main DataFrame. Records after merge: {len(df_newly_added_with_guids)}")

        df_newly_added_with_guids = df_newly_added_with_guids.drop_duplicates(
            subset=["invoicenumber", "invoicedetailid_invoice", "_trit_dimension_value"]
        )
        #logger.info(f"✅ Final shape after dropping duplicates: {df_newly_added_with_guids.shape}")

        return df_newly_added_with_guids

    except Exception as e:
        logger.error(f"An unexpected error occurred in fetch_and_map_dimensions function: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        raise  # Re-raise the exception after logging it


def post_invoice_dimension_data(df_newly_added_with_guids, headers):
    try:
        dimensionline_api_url = f"{accis_base_url}/api/data/v9.1/trit_dimensionlines"
        successful_links = {}  # Dictionary to store successful links
        failed_links = {}  # Dictionary to store failed links

        for _, row in df_newly_added_with_guids.iterrows():
            invoice_detail_value = row["invoicedetailid_invoice"]
            invoicenumber = row["invoicenumber"]
            dimensionvalue_id = row["trit_dimensionvalueid"]
            dimension_id = row["trit_dimensionid"]
            subject = row["subject"]

            # Ensure all required values exist
            if pd.notna(invoice_detail_value) and pd.notna(dimensionvalue_id) and pd.notna(dimension_id):
                payload = {
                    "trit_InvoiceDetail_trit_DimensionLine@odata.bind": f"/invoicedetails({str(invoice_detail_value).strip()})",
                    "trit_DimensionValue_trit_DimensionLine@odata.bind": f"/trit_dimensionvalues({str(dimensionvalue_id).strip()})",
                    "trit_Dimension_trit_DimensionLine@odata.bind": f"/trit_dimensions({str(dimension_id).strip()})",
                    "subject": str(subject).strip()
                }

                try:
                    response = requests.post(dimensionline_api_url, headers=headers, json=payload)
                    response.raise_for_status()

                    # Add successful links to the dictionary
                    if invoicenumber not in successful_links:
                        successful_links[invoicenumber] = []
                    successful_links[invoicenumber].append(subject)

                except Exception as e:
                    # Add failed links to the dictionary
                    if invoicenumber not in failed_links:
                        failed_links[invoicenumber] = []
                    failed_links[invoicenumber].append(subject)
                    logger.error(f"Error linking Subject '{subject}', DimensionValue '{dimensionvalue_id}', and Dimension '{dimension_id}' to InvoiceDetail '{invoicenumber}': {e}")
                    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

                    if response is not None:
                        logger.error(f"Response: {response.text}")

                time.sleep(0.5)  # Rate limiting

            else:
                logger.warning(f"Skipping InvoiceDetail '{invoicenumber}' due to missing values (DimensionValue GUID or Dimension ID).")

        # Log the successful and failed links in the requested format
        logger.info(f"Total Successful update Invoices in accis: {len(successful_links)}")
        logger.info(f"Total Failed Invoices update in accis: {len(failed_links)}")
        
        # if successful_links:
        #     successful_invoices = list(successful_links.keys())  # Extract only the invoice numbers
        #     logger.info(f"Successful Invoices: {successful_invoices}")
        #     print("Successful Invoices:", successful_invoices)  # Print the list

        successful_invoices = list(successful_links.keys())
        if successful_links:
            logger.info(f"Successful Invoices and Subjects:\n{successful_links}")
        if failed_links:
            logger.error(f"Failed Invoices and Subjects:\n{failed_links}")

        return successful_links, failed_links, successful_invoices

    except Exception as e:
        logger.error(f"An unexpected error occurred in post_invoice_dimension_data function: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        raise  # Re-raise the exception after logging it


try:
    if not df_merged_result.empty:
        #logger.info("Started : Processing valid invoice data")

        df_order_lines, df_missing_salesorder_ids = fetch_order_lines(df_merged_result, accis_base_url, fetch_filtered_data)
        df_final_merged = merge_order_lines(df_merged_result, df_order_lines)

        df_all_events, df_removed_invoices = fetch_event_data(df_final_merged, accis_base_url, fetch_filtered_data)
     
        df_invoice_not_find = pd.concat([df_missing_salesorder_ids, df_removed_invoices], ignore_index=True)
 
        df_merged = merge_event_data(df_final_merged, df_all_events)
        filtered_df = df_merged.groupby("invoiceid_invoice").first().reset_index()

        df_all_states = fetch_state_data(filtered_df, accis_base_url, fetch_filtered_data)
        final_event_data = merge_state_data(filtered_df, df_all_states)
 
        final_event_data.rename(columns={"trit_name": "state"}, inplace=True)
        final_event_data = map_sales_channel_and_delivery(final_event_data)
        final_event_data = format_final_table(final_event_data)

        accis_data_mapping = final_event_data.set_index("invoicenumber").to_dict(orient="index")

        dimension_data = fetch_dimension_lines(df_merged_result, final_event_data, accis_base_url, fetch_filtered_data, batch_size=20)
        grouped_data, invoice_subject_dict = group_and_aggregate_subjects(dimension_data)

        df_newly_added = process_invoice_data(grouped_data, invoice_subject_dict, accis_base_url)

        if df_newly_added is not None and not df_newly_added.empty:
            df_newly_added_with_guids, dimen = fetch_and_map_dimension_values(df_newly_added, accis_base_url, grouped_data)
        else:
            logger.info("Skipping fetch_and_map_dimension_values since df_newly_added is empty.")
            df_newly_added_with_guids, dimen = pd.DataFrame(), pd.DataFrame()

        if dimen is not None and not dimen.empty:
            df_newly_added_with_guids = fetch_and_map_dimensions(dimen, accis_base_url, df_newly_added_with_guids)
        else:
            logger.info("Skipping fetch_and_map_dimensions since dimen is empty.")
        
        # successful_invoices, successful_links, failed_links = post_invoice_dimension_data(df_newly_added_with_guids, headers)
        # successful_invoices = list(successful_invoices.keys())
    
except Exception as e:
    successful_invoices = []
    logger.exception(f"Error in Method 1 (main invoice processing): {e}")
    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
 
# ------------------------- end dim update ---------------------

def send_teams_message(webhook_url, missing_data_dict):
    try:
        teams_message = pymsteams.connectorcard(webhook_url)
        teams_message.title("Missing Invoices Summary")

        teams_message.text(
            "**Hello Team,**\n\n"
            f"Below are the details of invoices in {yesterday_date}. Please verify and take the necessary action."
        )

        # Iterate through each category and create a markdown table
        for label, df in missing_data_dict.items():
            if df.empty:
                continue

            section = pymsteams.cardsection()
            section.activityTitle(label)
            section.activitySubtitle(f"Total Records: {len(df)}")

            # Create markdown table from DataFrame
            markdown_table = df.head(20).to_markdown(index=False)  # Limit to first 20 rows for readability
            section.text(f"```\n{markdown_table}\n```")

            teams_message.addSection(section)

        teams_message.send()
        # logging.info("Teams message sent with detailed tables.")

    except Exception as e:
        logging.error(f"Failed to send Teams message: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
 


try:
    if not df_merged_result.empty or not licensee_invoices.empty or not corporate_invoices.empty or (
        'unique_exact_duplicates' in locals() and (
            (isinstance(unique_exact_duplicates, pd.DataFrame) and not unique_exact_duplicates.empty) or
            (isinstance(unique_exact_duplicates, list) and len(unique_exact_duplicates) > 0)
        )
    ):
        missing_data_dict = {}

        try:
            if not df_merged_result.empty:
                missing_data_dict["Category A - Missing dimension invoices in ACCIS no event data in order line"] = df_invoice_not_find[["trit_bcinvoicenumber"]]
        except Exception as e:
            logging.error(f"Error adding Category A: {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

        try:
            if not licensee_invoices.empty:
                missing_data_dict["Category B - Account type 'Licensee' with missing dimension"] = licensee_invoices[["trit_bcinvoicenumber"]]
        except Exception as e:
            logging.error(f"Error adding Category B: {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

        try:
            if not corporate_invoices.empty:
                missing_data_dict["Category C - Account type 'Corporate' with missing dimension"] = corporate_invoices[["trit_bcinvoicenumber"]]
        except Exception as e:
            logging.error(f"Error adding Category C: {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

        try:
            if 'unique_exact_duplicates' in locals():
                if isinstance(unique_exact_duplicates, list):
                    unique_exact_duplicates = pd.DataFrame(unique_exact_duplicates)

                if not unique_exact_duplicates.empty:
                    missing_data_dict["Category D - Duplicates were found in ACCIS while checking the original invoice- These order IDs appear to have been placed twice."] = unique_exact_duplicates
        except Exception as e:
            logging.error(f"Error processing Category D (duplicates): {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)

        if missing_data_dict:
            webhook_url = ("https://qubespacein.webhook.office.com/webhookb2/5d9de417-54b6-47f4-aee3-e10693e7d804@d996ac79-e80a-4fc8-be1a-8f891b569988/IncomingWebhook/ad05cad5b545416a8ef25f5e7b056261/b187de49-aa7e-4d44-8967-d6c7f25ae53e/V24_C4l7YsaxIohQhi5SgnkrtH99Be9s4APXcGrKH9Lok1")# Your Teams webhook URL
           
            try:
                send_teams_message(webhook_url, missing_data_dict)
                logging.info("Teams message sent successfully.")
            except Exception as e:
                logging.error(f"Failed to send Teams message: {e}")
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
 
except Exception as e:
    logging.error(f"Unexpected error in reporting missing invoice data: {e}")
    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)


if df_no_data.empty:
    logging.warning("No invoices dimension update in ACCIS. Exiting the script.")
    sys.exit()

def get_bc_token(tenant_id, client_id, client_secret, max_retries=3, delay=5):
    """Fetch OAuth token for Business Central API with retry logic, even on unexpected errors."""
    # logging.info("Fetching Business Central access token...")
    token_url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token"
    token_data = {
        "grant_type": "client_credentials",
        "client_id": client_id,
        "client_secret": client_secret,
        "scope": "https://api.businesscentral.dynamics.com/.default"
    }

    for attempt in range(max_retries):
        try:
            response = requests.post(token_url, data=token_data)
            response.raise_for_status()

            token = response.json().get("access_token")
            if not token:
                logging.error("Failed to retrieve Business Central access token. No token in response.")
                return None

            # logging.info("Business Central access token retrieved successfully.")
            return token

        except Exception as e:  # Catch both request exceptions and unexpected errors
            logging.error(f"Attempt {attempt + 1} failed: {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
            if attempt < max_retries - 1:
                logging.info(f"Retrying in {delay} seconds...")
                time.sleep(delay)
            else:
                logging.error("Max retries reached. Failed to fetch token.")
                return None


# Get access tokens
bc_access_token = get_bc_token(tenant_id, client_id, client_secret)

# Headers
bc_headers = {
    "Authorization": f"Bearer {bc_access_token}",
    "Accept": "application/json",
    "Content-Type": "application/json"
}


invoice_numbers = successful_invoices


def get_sales_invoice_lines_batchwise(invoice_numbers, batch_size=20, max_retries=3):
    """Fetch sales invoice lines in batches, with retry logic and delay to avoid rate-limiting."""
    all_invoice_lines = {"value": []}
    
    bc_base_url = f"https://api.businesscentral.dynamics.com/v2.0/{tenant_id}/{environment}/ODataV4/Company('{company}')"

    for i in range(0, len(invoice_numbers), batch_size):
        batch = invoice_numbers[i:i + batch_size]
        filter_query = " or ".join([f"Document_No eq '{num}'" for num in batch])
        invoice_url = f"{bc_base_url}/SalesInvLines?$filter={filter_query}"

        attempt = 1
        while attempt <= max_retries:
            try:
                response = requests.get(invoice_url, headers=bc_headers)
                response.raise_for_status()

                batch_data = response.json().get("value", [])
                all_invoice_lines["value"].extend(batch_data)

                # logging.info(f"Fetched batch {i//batch_size + 1} (attempt {attempt}) with {len(batch)} invoices.")
                break  # Exit retry loop if successful

            except Exception as e:
                logging.warning(f"Attempt {attempt} failed for batch {i//batch_size + 1}: {e}")
                notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
                attempt += 1
                time.sleep(1)  # Wait 1 second before retrying

                if attempt > max_retries:
                    logging.error(f"Max retries exceeded for batch {i//batch_size + 1}. Skipping this batch.")
        
        time.sleep(0.1)  # Delay between batches to avoid rate-limiting

    return all_invoice_lines

# Choose one of the options based on the requirement:

# Option 1: Fetch by specific invoice numbers
# invoice_numbers = ["S-INV107171", "S-INV104475", "S-INV104593"]
# invoice_numbers = sales_invoices_df.Document_No.to_list()

# invoice_numbers = successful_invoices
# invoice_data = get_sales_invoice_lines(invoice_numbers=invoice_numbers)
# Check if successful_invoices is empty before calling the function

if successful_invoices:
    invoice_data = get_sales_invoice_lines_batchwise(invoice_numbers, batch_size=20)
else:
    invoice_data = {"value": []}

# Option 2: Fetch top 10 invoices (use Top filter)
# invoice_data = get_sales_invoice_lines(use_top=True)

# Option 3: Fetch all invoices (no filter or top)
# invoice_data = get_sales_invoice_lines()

# Convert response to DataFrame
df_original = pd.DataFrame(invoice_data.get("value", []))

if df_original.empty:
    logging.warning("No sales invoice data retrieved.")
else:
    logging.info(f"Retrieved {len(df_original)} invoice records for update bc from invoices of successful update in ACCIS.")

def filter_invoices(df):
    """Filter out invoices with specific keywords in the Description column and Line_Amount == 1.00."""
    try:
        if "Description" in df.columns and "Document_No" in df.columns and "Line_Amount" in df.columns:
            keywords = ["Subscription", "Dues", "Membership"]
            pattern = "|".join(keywords)

            # Filter for keywords
            keyword_filtered = df["Description"].str.contains(pattern, case=False, na=False)
            # Filter for Line_Amount == 1.00
            amount_filtered = df["Line_Amount"] == 1.00

            # Combine both filters (rows to remove)
            combined_filter = keyword_filtered | amount_filtered

            # Prepare removed invoices for logging
            df_removed = df[combined_filter][["Document_No", "Description", "Line_Amount"]]

            # Final filtered dataframe (those NOT matching combined filter)
            df_filtered = df[~combined_filter]

            # Logging
            logging.info("Removed Invoices (based on Decscription or Line_Amount==1.00):")
            logging.info(df_removed)
            logging.info(f"Successfully filtered based on Decscription or Line_Amount==1.00) invoices. Remaining invoices count: {len(df_filtered)}")

            return df_filtered
        else:
            missing_cols = [col for col in ["Description", "Document_No", "Line_Amount"] if col not in df.columns]
            logging.warning(f"Required columns {missing_cols} not found in the data.")
            return df
    except Exception as e:
        logging.error(f"Error filtering invoices: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return df  # Return unfiltered data if error occurs

try:
    # Rename incorrect keys to match expected names
    final_event_data = final_event_data.rename(columns={
        "event": "ShortcutDimCode6",
        "saleschannel_deliverymethod": "Shortcut_Dimension_2_Code",
        "state": "ShortcutDimCode7"
    })

    # Define required fields after renaming
    fields_needed = ["invoicenumber", "Shortcut_Dimension_2_Code", "ShortcutDimCode6", "ShortcutDimCode7"]

    # Ensure all required fields exist
    missing_fields = [field for field in fields_needed if field not in final_event_data.columns]
    if missing_fields:
        logging.error(f"Missing fields in final_event_data after renaming: {missing_fields}")
        raise KeyError(f"Columns not found: {missing_fields}")

    # Create the correct mapping
    filtered_event_data = final_event_data[fields_needed]
    accis_data_mapping = filtered_event_data.set_index("invoicenumber").to_dict(orient="index")

except Exception as e:
    logging.exception(f"Unexpected error during mapping process: {e}")
    notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
    accis_data_mapping = {}  # fallback to empty mapping if error occurs

def identify_missing_data(df_original, accis_base_url, fetch_filtered_data):
    try:
        missing_data_records = []
        invoices_with_missing_data = []  # Initialize as empty list

        # Identifying missing fields
        for key in ["Shortcut_Dimension_2_Code", "ShortcutDimCode6", "ShortcutDimCode7"]:
            missing_rows = df_original[df_original[key].isna() | (df_original[key] == "")]
            for _, row in missing_rows.iterrows():
                missing_data_records.append({
                    "Document_No": row["Document_No"],
                    "Line_No": row["Line_No"],
                    "Missing Field": key
                })

        df_missing = pd.DataFrame(missing_data_records)

        if df_missing.empty:
            invoices_without_missing_data = df_original["Document_No"].unique()
            logger.info(f"No missing values for these invoices: {list(invoices_without_missing_data)}")
            return [], pd.DataFrame()  # Return empty list and empty DataFrame

        invoices_with_missing_data = df_missing["Document_No"].unique().tolist()
        logger.info(f"Invoices with missing data - {len(invoices_with_missing_data)}: {invoices_with_missing_data}")

        # Skip API call if no invoices have missing data
        if not invoices_with_missing_data:
            return [], pd.DataFrame()

        # # Prepare filter query for API call
        # filter_query = " or ".join([f"trit_bcinvoicenumber eq '{num}'" for num in invoices_with_missing_data])
        # invoice_api_url = f"{accis_base_url}/api/data/v9.1/invoices?$filter={filter_query}"

        # # Fetch data from API
        # try:
        #     invoice_data = fetch_filtered_data(invoice_api_url)
        #     df_invoice = pd.DataFrame(invoice_data)
        # except Exception as api_error:
        #     logger.error(f"Error fetching invoice data from API: {str(api_error)}", exc_info=True)
        #     return invoices_with_missing_data, pd.DataFrame()

        return invoices_with_missing_data

    except Exception as e:
        logger.error(f"Error identifying missing data: {str(e)}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        return [], pd.DataFrame()  # Return empty results in case of an error

# Example usage
# df_filtered = filter_invoices(df_original)
invoices_with_missing_data = identify_missing_data(df_original, accis_base_url, fetch_filtered_data)

#Step 2: Fetch Correct Data with Retry Logic
def get_invoice_data_with_retries(bc_invoice_numbers, accis_data_mapping, max_retries=2, delay=5):
    try:
        # Flatten the list if it contains nested lists
        flat_invoice_numbers = [inv for sublist in bc_invoice_numbers for inv in (sublist if isinstance(sublist, list) else [sublist])]

        invoice_data = {}
        still_missing = set(flat_invoice_numbers)  # Ensure all values are hashable strings

        for attempt in range(1, max_retries + 1):
            newly_found = []
            for inv in list(still_missing):
                data = accis_data_mapping.get(inv, {})
                if data:
                    invoice_data[inv] = data
                    still_missing.remove(inv)
                    newly_found.append(inv)

            # if newly_found:
                # logging.info(f"Invoices found in ACCIS: {len(newly_found)}")

            if not still_missing:
                break

            if attempt < max_retries:
                logging.warning(f"Invoices still missing after attempt {attempt}: {len(still_missing)} - {list(still_missing)}")
                time.sleep(delay)

        if still_missing:
            logging.error(f"Invoices not found in ACCIS after {max_retries} attempts: {len(still_missing)} - {list(still_missing)}")

        return invoice_data

    except Exception as e:
        logging.exception(f"Error in get_invoice_data_with_retries: {e}")
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
 
        return {}  # Return empty dict if there's an error

invoice_data_to_fill = get_invoice_data_with_retries(invoices_with_missing_data, accis_data_mapping)

# Log summary
logging.info(f"Missing data: {len(invoices_with_missing_data)}, Retrieved data for {len(invoice_data_to_fill)} invoices.")


# Step 3: Update Missing Data
logs = []  # Store logs for better readability

success_invoices = set()
failed_invoices = set()
invoice_line_status = {}  # Track line update result per invoice

for index, row in df_original.iterrows():

    try:
        document_no = row["Document_No"]
        line_no = row["Line_No"]
    except Exception as e:
        logging.error(f"Unexpected error in data processing: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
        failed_invoices.add(document_no)
        continue

    try:
        correct_data = invoice_data_to_fill.get(document_no, {})
        if not correct_data:
            logging.info(f"No correct data found for Invoice: {document_no}, skipping.")
            continue
    except Exception as e:
        logging.error(f"Error fetching correct data for Invoice {document_no}: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
 
        failed_invoices.add(document_no)
        continue

    update_payload = {}
    update_fields = ["Shortcut_Dimension_2_Code", "ShortcutDimCode6", "ShortcutDimCode7"]

    try:
        for key in update_fields:
            if key in row and (pd.isna(row[key]) or row[key] == ""):
                if key in correct_data:
                    update_payload[key] = correct_data[key]
    except Exception as e:
        logging.error(f"Error processing update fields for Invoice {document_no}, Line {line_no}: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
 
        failed_invoices.add(document_no)
        continue

    if not update_payload:
        logging.info(f"No update needed for Line {line_no} (Invoice: {document_no}).")
        continue

    try:
        etag = row["@odata.etag"]
        document_type = row["Document_Type"]
        update_base_url = f"https://api.businesscentral.dynamics.com/v2.0/{tenant_id}/{environment}/ODataV4/Company('{company}')"
        update_url = f"{update_base_url}/SalesInvLines(Document_Type='{document_type}',Document_No='{document_no}',Line_No={line_no})"
    except Exception as e:
        logging.error(f"Unexpected error while constructing API request: {e}", exc_info=True)
        notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
    
        failed_invoices.add(document_no)
        continue

    patch_headers = {
        "Authorization": f"Bearer {bc_access_token}",
        "If-Match": etag,
        "Content-Type": "application/json"
    }

    for attempt in range(2):  # max 2 attempts
        try:
            response = requests.patch(update_url, json=update_payload, headers=patch_headers)
            if response.status_code in [200, 204]:
                invoice_line_status.setdefault(document_no, []).append(True)
                break  # success, exit retry loop
            else:
                logging.error(f"Failed to update Line {line_no} (Invoice: {document_no}) | Status: {response.status_code} | Response: {response.text}")
                invoice_line_status.setdefault(document_no, []).append(False)
                break
        except requests.RequestException as e:
            logging.warning(f"Attempt {attempt+1} failed for Line {line_no} (Invoice: {document_no}): {e}")
            notify_teams_on_exception(TEAMS_WEBHOOK_URL, e)
 
            if attempt == 1:
                invoice_line_status.setdefault(document_no, []).append(False)

# After processing all lines, summarize by invoice
for invoice, results in invoice_line_status.items():
    if all(results):
        success_invoices.add(invoice)
    else:
        failed_invoices.add(invoice)

# Final summary
logging.info(f"BC Invoices Updated dimension Successfully: {len(success_invoices)} | Failed Invoices: {len(failed_invoices)}")
logging.info(f"BC Success dim Invoices: {sorted(success_invoices)}")
logging.info(f"BC Failed dim Invoices: {sorted(failed_invoices)}")
