From 8353c399d2ae11ac1a6a2e2083880a34783e2dc0 Mon Sep 17 00:00:00 2001 From: Ludy Date: Mon, 20 Jan 2025 11:52:23 +0100 Subject: [PATCH] Add: check for duplicate keys (#2749) # Description of Changes introduces the check for duplicate keys --- ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [x] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details. --- .github/scripts/check_language_properties.py | 78 ++++++++++++++++++-- 1 file changed, 71 insertions(+), 7 deletions(-) diff --git a/.github/scripts/check_language_properties.py b/.github/scripts/check_language_properties.py index 0496d3512..70e63822c 100644 --- a/.github/scripts/check_language_properties.py +++ b/.github/scripts/check_language_properties.py @@ -21,25 +21,60 @@ import argparse import re +def find_duplicate_keys(file_path): + """ + Identifies duplicate keys in a .properties file. + :param file_path: Path to the .properties file. + :return: List of tuples (key, first_occurrence_line, duplicate_line). + """ + keys = {} + duplicates = [] + + with open(file_path, "r", encoding="utf-8") as file: + for line_number, line in enumerate(file, start=1): + stripped_line = line.strip() + + # Skip empty lines and comments + if not stripped_line or stripped_line.startswith("#"): + continue + + # Split the line into key and value + if "=" in stripped_line: + key, _ = stripped_line.split("=", 1) + key = key.strip() + + # Check if the key already exists + if key in keys: + duplicates.append((key, keys[key], line_number)) + else: + keys[key] = line_number + + return duplicates + + # Maximum size for properties files (e.g., 200 KB) MAX_FILE_SIZE = 200 * 1024 def parse_properties_file(file_path): - """Parses a .properties file and returns a list of objects (including comments, empty lines, and line numbers).""" + """ + Parses a .properties file and returns a structured list of its contents. + :param file_path: Path to the .properties file. + :return: List of dictionaries representing each line in the file. + """ properties_list = [] with open(file_path, "r", encoding="utf-8") as file: for line_number, line in enumerate(file, start=1): stripped_line = line.strip() - # Empty lines + # Handle empty lines if not stripped_line: properties_list.append( {"line_number": line_number, "type": "empty", "content": ""} ) continue - # Comments + # Handle comments if stripped_line.startswith("#"): properties_list.append( { @@ -50,7 +85,7 @@ def parse_properties_file(file_path): ) continue - # Key-value pairs + # Handle key-value pairs match = re.match(r"^([^=]+)=(.*)$", line) if match: key, value = match.groups() @@ -67,9 +102,14 @@ def parse_properties_file(file_path): def write_json_file(file_path, updated_properties): + """ + Writes updated properties back to the file in their original format. + :param file_path: Path to the .properties file. + :param updated_properties: List of updated properties to write. + """ updated_lines = {entry["line_number"]: entry for entry in updated_properties} - # Sort by line numbers and retain comments and empty lines + # Sort lines by their numbers and retain comments and empty lines all_lines = sorted(set(updated_lines.keys())) original_format = [] @@ -88,8 +128,8 @@ def write_json_file(file_path, updated_properties): # Replace entries with those from the current JSON original_format.append(entry) - # Write back in the original format - with open(file_path, "w", encoding="utf-8") as file: + # Write the updated content back to the file + with open(file_path, "w", encoding="utf-8", newline="\n") as file: for entry in original_format: if entry["type"] == "comment": file.write(f"{entry['content']}\n") @@ -100,6 +140,12 @@ def write_json_file(file_path, updated_properties): def update_missing_keys(reference_file, file_list, branch=""): + """ + Updates missing keys in the translation files based on the reference file. + :param reference_file: Path to the reference .properties file. + :param file_list: List of translation files to update. + :param branch: Branch where the files are located. + """ reference_properties = parse_properties_file(reference_file) for file_path in file_list: basename_current_file = os.path.basename(os.path.join(branch, file_path)) @@ -245,6 +291,24 @@ def check_for_differences(reference_file, file_list, branch, actor): ) else: report.append("2. **Test Status:** ✅ **_Passed_**") + + if find_duplicate_keys(os.path.join(branch, file_path)): + has_differences = True + output = "\n".join( + [ + f" - `{key}`: first at line {first}, duplicate at `line {duplicate}`" + for key, first, duplicate in find_duplicate_keys( + os.path.join(branch, file_path) + ) + ] + ) + report.append("3. **Test Status:** ❌ **_Failed_**") + report.append(" - **Issue:**") + report.append(" - duplicate entries were found:") + report.append(output) + else: + report.append("3. **Test Status:** ✅ **_Passed_**") + report.append("") report.append("---") report.append("")