From 41d7b9c75512c1994494af5163b223b9cd9e1e76 Mon Sep 17 00:00:00 2001 From: denisacirstea Date: Fri, 12 Sep 2025 12:12:40 +0300 Subject: [PATCH] Implemented direct XML modification approach to replace {store_name} in all formulas including ArrayFormulas --- direct_xml_update.py | 79 ++++++++++++++++++++++++ update_excel.py | 139 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 213 insertions(+), 5 deletions(-) create mode 100644 direct_xml_update.py diff --git a/direct_xml_update.py b/direct_xml_update.py new file mode 100644 index 0000000..d887c8c --- /dev/null +++ b/direct_xml_update.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 +import os +import re +import openpyxl +from zipfile import ZipFile, ZIP_DEFLATED + +def update_excel_with_direct_xml(excel_path, store_name): + """ + Update all references to {store_name} in the Excel file by directly modifying XML + + Args: + excel_path: Path to the Excel file + store_name: The store name to replace {store_name} with + + Returns: + str: Path to the modified Excel file + """ + try: + print(f"Using direct XML modification to replace '{{store_name}}' with '{store_name}'...") + + # Save a copy of the original file to work with + temp_dir = os.path.dirname(os.path.abspath(excel_path)) + temp_file = os.path.join(temp_dir, "_temp_for_xml_edit.xlsx") + modified_file = os.path.join(temp_dir, excel_path.replace('.xlsx', '_modified.xlsx')) + + # Make a copy of the original file + import shutil + shutil.copy2(excel_path, temp_file) + + # Count of replacements + total_replacements = 0 + + # Process the Excel file + with ZipFile(temp_file, 'r') as zip_in: + with ZipFile(modified_file, 'w', ZIP_DEFLATED) as zip_out: + # Process each file in the zip + for item in zip_in.infolist(): + content = zip_in.read(item.filename) + + # Only modify XML files that might contain formulas or text + if item.filename.endswith('.xml'): + # Convert to string for text replacement + try: + text_content = content.decode('utf-8') + + # Check if this file contains our placeholder + if '{store_name}' in text_content: + # Count occurrences before replacement + occurrences = text_content.count('{store_name}') + total_replacements += occurrences + + # Replace all instances of {store_name} with the actual store name + modified_content = text_content.replace('{store_name}', store_name) + + # Convert back to bytes + content = modified_content.encode('utf-8') + + print(f"Replaced {occurrences} instances of '{{store_name}}' in {item.filename}") + except UnicodeDecodeError: + # Not a text file, leave as is + pass + + # Write the file (original or modified) to the new zip + zip_out.writestr(item, content) + + # Clean up the temporary file + if os.path.exists(temp_file): + os.remove(temp_file) + + print(f"Total replacements: {total_replacements}") + print(f"Modified Excel file saved as: {modified_file}") + + return modified_file + + except Exception as e: + print(f"Error updating Excel file: {e}") + import traceback + traceback.print_exc() + return None diff --git a/update_excel.py b/update_excel.py index c6c08ec..25b8ea7 100755 --- a/update_excel.py +++ b/update_excel.py @@ -4,6 +4,7 @@ import os import re import openpyxl from openpyxl.utils import get_column_letter +from direct_xml_update import update_excel_with_direct_xml def update_excel_variables(excel_path): """ @@ -124,9 +125,20 @@ def update_excel_variables(excel_path): sheet_name_mapping[sheet_name] = new_sheet_name print(f"Renamed sheet '{sheet_name}' to '{new_sheet_name}'") - # Update formulas in the Graphics sheet to reference the new sheet names - if sheet_name_mapping and 'Graphics' in wb.sheetnames: - update_formula_references(wb, sheet_name_mapping) + # Use direct XML modification to replace all instances of {store_name} + print("Using direct XML modification to update all formulas...") + modified_file = update_excel_with_direct_xml(excel_path, store_name) + + if modified_file and os.path.exists(modified_file): + # Use the modified file instead of the original + print(f"Using modified file: {modified_file}") + # Copy the modified file back to the original location + import shutil + shutil.copy2(modified_file, excel_path) + # Remove the modified file + os.remove(modified_file) + # Reload the workbook to get the changes + wb = openpyxl.load_workbook(excel_path) # Save the workbook wb.save(excel_path) @@ -239,14 +251,131 @@ def update_formula_references(workbook, sheet_name_mapping): # Print all cells in the Graphics sheet that have formulas print("\nDiagnostic: Checking all cells with formulas in Graphics sheet:") formula_cells_diagnostic = [] + + # Special check for rows 25-27 + print("\nDiagnostic: Checking cells in rows 25-27:") + + # Define columns to check, focusing on G through AG + columns_to_check = ['C', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'AA', 'AB', 'AC', 'AD', 'AE', 'AF', 'AG'] + + # Cells with ArrayFormula objects that need special handling + array_formula_cells = [] + + # Check each cell in rows 25-27 for all specified columns + for row_num in range(25, 28): # rows 25, 26, 27 + for col in columns_to_check: + cell_coord = f"{col}{row_num}" + try: + cell = graphics_sheet[cell_coord] + # Print cell information regardless of whether it has a formula + if cell.value is not None: + value_type = cell.data_type + value_preview = str(cell.value)[:50] if cell.value else "None" + print(f"Row {row_num}, Cell {cell_coord}: Type={value_type}, Value={value_preview}...") + + # Check if it's a formula that references our sheets + if cell.data_type == 'f': + if isinstance(cell.value, str) and any(old_name in cell.value for old_name in sheet_name_mapping.keys()): + formula_cells_diagnostic.append(f"{cell_coord}: {cell.value[:50]}...") + # Add this cell to our special handling + if cell_coord not in special_cells: + special_cells.append(cell_coord) + # Check for ArrayFormula objects + elif str(cell.value).startswith("