From 3bb09839aebd54e18bf3dbfc3d8329fe16048a62 Mon Sep 17 00:00:00 2001 From: denisacirstea Date: Fri, 12 Sep 2025 14:02:53 +0300 Subject: [PATCH] Fix Excel corruption issue when modifying XML content --- update_excel.py | 84 ++++++++++++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 40 deletions(-) diff --git a/update_excel.py b/update_excel.py index 4ac71d2..494b8f3 100755 --- a/update_excel.py +++ b/update_excel.py @@ -248,7 +248,7 @@ def update_excel_with_direct_xml(excel_path, store_name): # Create a temporary file for modification temp_dir = os.path.dirname(os.path.abspath(excel_path)) - temp_file = os.path.join(temp_dir, "_temp_for_xml_edit.xlsx") + temp_file = os.path.join(temp_dir, f"_temp_{os.path.basename(excel_path)}") # Make a copy of the original file import shutil @@ -257,46 +257,50 @@ def update_excel_with_direct_xml(excel_path, store_name): # Count of replacements total_replacements = 0 - # Process the Excel file - with ZipFile(temp_file, 'r') as zip_in: - with ZipFile(excel_path, 'w', ZIP_DEFLATED) as zip_out: - # Process each file in the zip - for item in zip_in.infolist(): - content = zip_in.read(item.filename) - - # Only modify XML files that might contain formulas or text - if item.filename.endswith('.xml'): - # Skip sheet8.xml which is the Variables sheet (based on common Excel structure) - if 'sheet8.xml' in item.filename: - print(f"Skipping Variables sheet: {item.filename}") - else: - # Convert to string for text replacement - try: - text_content = content.decode('utf-8') - - # Check if this file contains our placeholder - if '{store_name}' in text_content: - # Count occurrences before replacement - occurrences = text_content.count('{store_name}') - total_replacements += occurrences - - # Replace all instances of {store_name} with the actual store name - modified_content = text_content.replace('{store_name}', store_name) - - # Convert back to bytes - content = modified_content.encode('utf-8') - - print(f"Replaced {occurrences} instances of '{{store_name}}' in {item.filename}") - except UnicodeDecodeError: - # Not a text file, leave as is - pass - - # Write the file (original or modified) to the new zip - zip_out.writestr(item, content) + # Process the Excel file - use a safer approach + # First read all files from the zip + files_data = {} + with ZipFile(excel_path, 'r') as zip_ref: + for item in zip_ref.infolist(): + files_data[item.filename] = (zip_ref.read(item.filename), item) - # Clean up the temporary file - if os.path.exists(temp_file): - os.remove(temp_file) + # Modify the content + for filename, (content, item) in files_data.items(): + # Only modify XML files that might contain formulas or text + if filename.endswith('.xml') or filename.endswith('.rels'): + # Skip sheet8.xml which is the Variables sheet (based on common Excel structure) + if 'sheet8.xml' in filename: + print(f"Skipping Variables sheet: {filename}") + continue + + # Convert to string for text replacement + try: + text_content = content.decode('utf-8') + + # Check if this file contains our placeholder + if '{store_name}' in text_content: + # Count occurrences before replacement + occurrences = text_content.count('{store_name}') + total_replacements += occurrences + + # Replace all instances of {store_name} with the actual store name + modified_content = text_content.replace('{store_name}', store_name) + + # Convert back to bytes + files_data[filename] = (modified_content.encode('utf-8'), item) + + print(f"Replaced {occurrences} instances of '{{store_name}}' in {filename}") + except UnicodeDecodeError: + # Not a text file, leave as is + pass + + # Write the modified zip file + with ZipFile(temp_file, 'w', ZIP_DEFLATED) as zip_out: + for filename, (content, item) in files_data.items(): + zip_out.writestr(filename, content) + + # Replace the original file with the modified one + shutil.move(temp_file, excel_path) print(f"Total replacements: {total_replacements}") return True