Stirling-PDF/.github/scripts/check_language_properties.py

321 lines
12 KiB
Python
Raw Permalink Normal View History

"""
Author: Ludy87
Description: This script processes .properties files for localization checks. It compares translation files in a branch with
a reference file to ensure consistency. The script performs two main checks:
1. Verifies that the number of lines (including comments and empty lines) in the translation files matches the reference file.
2. Ensures that all keys in the translation files are present in the reference file and vice versa.
The script also provides functionality to update the translation files to match the reference file by adding missing keys and
adjusting the format.
Usage:
python check_language_properties.py --reference-file <path_to_reference_file> --branch <branch_name> [--actor <actor_name>] [--files <list_of_changed_files>]
"""
expand: check language properties (#2724) # Description Improvement in `check_language_properties.py`: Added support for local Windows commands to check language properties files with an updated script. Added a new optional argument `--check-file` to specify a single file to check, replacing the need to process all files in the directory. Adjusted file path handling to better support Windows paths. Update to `update-translations.yml`: Added `workflow_dispatch` trigger to allow manual execution of the workflow. Documentation update in `HowToAddNewLanguage.md`: Added instructions for running the language properties check locally on Windows, including example commands. These changes streamline the process of checking language properties files and provide additional flexibility for local testing and manual workflow triggering. ## Checklist - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have performed a self-review of my own code - [ ] I have attached images of the change if it is UI based - [x] I have commented my code, particularly in hard-to-understand areas - [ ] If my code has heavily changed functionality I have updated relevant docs on [Stirling-PDFs doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) - [x] My changes generate no new warnings - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only)
2025-01-16 23:03:54 +01:00
# Sample for Windows:
# python .github/scripts/check_language_properties.py --reference-file src\main\resources\messages_en_GB.properties --branch "" --files src\main\resources\messages_de_DE.properties src\main\resources\messages_uk_UA.properties
2024-11-21 12:31:32 +01:00
import copy
import glob
import os
import argparse
import re
# Maximum size for properties files (e.g., 200 KB)
MAX_FILE_SIZE = 200 * 1024
def parse_properties_file(file_path):
"""Parses a .properties file and returns a list of objects (including comments, empty lines, and line numbers)."""
properties_list = []
with open(file_path, "r", encoding="utf-8") as file:
for line_number, line in enumerate(file, start=1):
stripped_line = line.strip()
# Empty lines
if not stripped_line:
properties_list.append(
{"line_number": line_number, "type": "empty", "content": ""}
)
continue
# Comments
if stripped_line.startswith("#"):
properties_list.append(
{
"line_number": line_number,
"type": "comment",
"content": stripped_line,
}
)
continue
# Key-value pairs
match = re.match(r"^([^=]+)=(.*)$", line)
if match:
key, value = match.groups()
properties_list.append(
{
"line_number": line_number,
"type": "entry",
"key": key.strip(),
"value": value.strip(),
}
)
return properties_list
def write_json_file(file_path, updated_properties):
updated_lines = {entry["line_number"]: entry for entry in updated_properties}
# Sort by line numbers and retain comments and empty lines
all_lines = sorted(set(updated_lines.keys()))
original_format = []
for line in all_lines:
if line in updated_lines:
entry = updated_lines[line]
else:
entry = None
ref_entry = updated_lines[line]
if ref_entry["type"] in ["comment", "empty"]:
original_format.append(ref_entry)
elif entry is None:
# Add missing entries from the reference file
original_format.append(ref_entry)
elif entry["type"] == "entry":
# Replace entries with those from the current JSON
original_format.append(entry)
# Write back in the original format
with open(file_path, "w", encoding="utf-8") as file:
for entry in original_format:
if entry["type"] == "comment":
file.write(f"{entry['content']}\n")
elif entry["type"] == "empty":
file.write(f"{entry['content']}\n")
elif entry["type"] == "entry":
file.write(f"{entry['key']}={entry['value']}\n")
def update_missing_keys(reference_file, file_list, branch=""):
reference_properties = parse_properties_file(reference_file)
for file_path in file_list:
basename_current_file = os.path.basename(os.path.join(branch, file_path))
if (
basename_current_file == os.path.basename(reference_file)
or not file_path.endswith(".properties")
or not basename_current_file.startswith("messages_")
):
continue
current_properties = parse_properties_file(os.path.join(branch, file_path))
updated_properties = []
for ref_entry in reference_properties:
ref_entry_copy = copy.deepcopy(ref_entry)
for current_entry in current_properties:
if current_entry["type"] == "entry":
if ref_entry_copy["type"] != "entry":
continue
if ref_entry_copy["key"] == current_entry["key"]:
ref_entry_copy["value"] = current_entry["value"]
updated_properties.append(ref_entry_copy)
write_json_file(os.path.join(branch, file_path), updated_properties)
def check_for_missing_keys(reference_file, file_list, branch):
update_missing_keys(reference_file, file_list, branch)
def read_properties(file_path):
if os.path.isfile(file_path) and os.path.exists(file_path):
with open(file_path, "r", encoding="utf-8") as file:
return file.read().splitlines()
return [""]
2024-11-21 12:31:32 +01:00
def check_for_differences(reference_file, file_list, branch, actor):
reference_branch = reference_file.split("/")[0]
basename_reference_file = os.path.basename(reference_file)
report = []
2024-11-23 23:09:46 +01:00
report.append(f"#### 🔄 Reference Branch: `{reference_branch}`")
reference_lines = read_properties(reference_file)
has_differences = False
only_reference_file = True
file_arr = file_list
if len(file_list) == 1:
file_arr = file_list[0].split()
base_dir = os.path.abspath(os.path.join(os.getcwd(), "src", "main", "resources"))
for file_path in file_arr:
absolute_path = os.path.abspath(file_path)
# Verify that file is within the expected directory
if not absolute_path.startswith(base_dir):
raise ValueError(f"Unsafe file found: {file_path}")
# Verify file size before processing
if os.path.getsize(os.path.join(branch, file_path)) > MAX_FILE_SIZE:
raise ValueError(
f"The file {file_path} is too large and could pose a security risk."
)
basename_current_file = os.path.basename(os.path.join(branch, file_path))
if (
basename_current_file == basename_reference_file
expand: check language properties (#2724) # Description Improvement in `check_language_properties.py`: Added support for local Windows commands to check language properties files with an updated script. Added a new optional argument `--check-file` to specify a single file to check, replacing the need to process all files in the directory. Adjusted file path handling to better support Windows paths. Update to `update-translations.yml`: Added `workflow_dispatch` trigger to allow manual execution of the workflow. Documentation update in `HowToAddNewLanguage.md`: Added instructions for running the language properties check locally on Windows, including example commands. These changes streamline the process of checking language properties files and provide additional flexibility for local testing and manual workflow triggering. ## Checklist - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have performed a self-review of my own code - [ ] I have attached images of the change if it is UI based - [x] I have commented my code, particularly in hard-to-understand areas - [ ] If my code has heavily changed functionality I have updated relevant docs on [Stirling-PDFs doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) - [x] My changes generate no new warnings - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only)
2025-01-16 23:03:54 +01:00
or (
# only local windows command
not file_path.startswith(
os.path.join("", "src", "main", "resources", "messages_")
)
and not file_path.startswith(
os.path.join(os.getcwd(), "src", "main", "resources", "messages_")
)
)
or not file_path.endswith(".properties")
or not basename_current_file.startswith("messages_")
):
continue
only_reference_file = False
2024-11-23 23:09:46 +01:00
report.append(f"#### 📃 **File Check:** `{basename_current_file}`")
current_lines = read_properties(os.path.join(branch, file_path))
reference_line_count = len(reference_lines)
current_line_count = len(current_lines)
if reference_line_count != current_line_count:
report.append("")
2024-11-23 23:09:46 +01:00
report.append("1. **Test Status:** ❌ **_Failed_**")
report.append(" - **Issue:**")
has_differences = True
if reference_line_count > current_line_count:
report.append(
2024-11-23 23:09:46 +01:00
f" - **_Mismatched line count_**: {reference_line_count} (reference) vs {current_line_count} (current). Comments, empty lines, or translation strings are missing."
)
elif reference_line_count < current_line_count:
report.append(
2024-11-23 23:09:46 +01:00
f" - **_Too many lines_**: {reference_line_count} (reference) vs {current_line_count} (current). Please verify if there is an additional line that needs to be removed."
)
else:
2024-11-23 23:09:46 +01:00
report.append("1. **Test Status:** ✅ **_Passed_**")
# Check for missing or extra keys
current_keys = []
reference_keys = []
for line in current_lines:
if not line.startswith("#") and line != "" and "=" in line:
key, _ = line.split("=", 1)
current_keys.append(key)
for line in reference_lines:
if not line.startswith("#") and line != "" and "=" in line:
key, _ = line.split("=", 1)
reference_keys.append(key)
current_keys_set = set(current_keys)
reference_keys_set = set(reference_keys)
missing_keys = current_keys_set.difference(reference_keys_set)
extra_keys = reference_keys_set.difference(current_keys_set)
missing_keys_list = list(missing_keys)
extra_keys_list = list(extra_keys)
if missing_keys_list or extra_keys_list:
has_differences = True
missing_keys_str = "`, `".join(missing_keys_list)
extra_keys_str = "`, `".join(extra_keys_list)
2024-11-23 23:09:46 +01:00
report.append("2. **Test Status:** ❌ **_Failed_**")
report.append(" - **Issue:**")
if missing_keys_list:
2024-11-23 23:09:46 +01:00
spaces_keys_list = []
for key in missing_keys_list:
if " " in key:
2024-11-23 23:09:46 +01:00
spaces_keys_list.append(key)
if spaces_keys_list:
spaces_keys_str = "`, `".join(spaces_keys_list)
report.append(
f" - **_Keys containing unnecessary spaces_**: `{spaces_keys_str}`!"
)
report.append(
2024-11-23 23:09:46 +01:00
f" - **_Extra keys in `{basename_current_file}`_**: `{missing_keys_str}` that are not present in **_`{basename_reference_file}`_**."
)
if extra_keys_list:
report.append(
2024-11-23 23:09:46 +01:00
f" - **_Missing keys in `{basename_reference_file}`_**: `{extra_keys_str}` that are not present in **_`{basename_current_file}`_**."
)
else:
2024-11-23 23:09:46 +01:00
report.append("2. **Test Status:** ✅ **_Passed_**")
report.append("")
report.append("---")
report.append("")
if has_differences:
report.append("## ❌ Overall Check Status: **_Failed_**")
2024-11-21 12:31:32 +01:00
report.append("")
report.append(
f"@{actor} please check your translation if it conforms to the standard. Follow the format of [messages_en_GB.properties](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/src/main/resources/messages_en_GB.properties)"
)
else:
report.append("## ✅ Overall Check Status: **_Success_**")
2024-11-21 12:31:32 +01:00
report.append("")
report.append(
f"Thanks @{actor} for your help in keeping the translations up to date."
)
if not only_reference_file:
print("\n".join(report))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Find missing keys")
2024-11-21 12:31:32 +01:00
parser.add_argument(
"--actor",
required=False,
help="Actor from PR.",
)
parser.add_argument(
"--reference-file",
required=True,
help="Path to the reference file.",
)
parser.add_argument(
"--branch",
type=str,
required=True,
help="Branch name.",
)
expand: check language properties (#2724) # Description Improvement in `check_language_properties.py`: Added support for local Windows commands to check language properties files with an updated script. Added a new optional argument `--check-file` to specify a single file to check, replacing the need to process all files in the directory. Adjusted file path handling to better support Windows paths. Update to `update-translations.yml`: Added `workflow_dispatch` trigger to allow manual execution of the workflow. Documentation update in `HowToAddNewLanguage.md`: Added instructions for running the language properties check locally on Windows, including example commands. These changes streamline the process of checking language properties files and provide additional flexibility for local testing and manual workflow triggering. ## Checklist - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have performed a self-review of my own code - [ ] I have attached images of the change if it is UI based - [x] I have commented my code, particularly in hard-to-understand areas - [ ] If my code has heavily changed functionality I have updated relevant docs on [Stirling-PDFs doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) - [x] My changes generate no new warnings - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only)
2025-01-16 23:03:54 +01:00
parser.add_argument(
"--check-file",
type=str,
required=False,
help="List of changed files, separated by spaces.",
)
parser.add_argument(
"--files",
nargs="+",
required=False,
help="List of changed files, separated by spaces.",
)
args = parser.parse_args()
# Sanitize --actor input to avoid injection attacks
if args.actor:
args.actor = re.sub(r"[^a-zA-Z0-9_\\-]", "", args.actor)
# Sanitize --branch input to avoid injection attacks
if args.branch:
args.branch = re.sub(r"[^a-zA-Z0-9\\-]", "", args.branch)
file_list = args.files
if file_list is None:
expand: check language properties (#2724) # Description Improvement in `check_language_properties.py`: Added support for local Windows commands to check language properties files with an updated script. Added a new optional argument `--check-file` to specify a single file to check, replacing the need to process all files in the directory. Adjusted file path handling to better support Windows paths. Update to `update-translations.yml`: Added `workflow_dispatch` trigger to allow manual execution of the workflow. Documentation update in `HowToAddNewLanguage.md`: Added instructions for running the language properties check locally on Windows, including example commands. These changes streamline the process of checking language properties files and provide additional flexibility for local testing and manual workflow triggering. ## Checklist - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have performed a self-review of my own code - [ ] I have attached images of the change if it is UI based - [x] I have commented my code, particularly in hard-to-understand areas - [ ] If my code has heavily changed functionality I have updated relevant docs on [Stirling-PDFs doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) - [x] My changes generate no new warnings - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only)
2025-01-16 23:03:54 +01:00
if args.check_file:
file_list = [args.check_file]
else:
file_list = glob.glob(
os.path.join(
os.getcwd(), "src", "main", "resources", "messages_*.properties"
)
)
update_missing_keys(args.reference_file, file_list)
else:
2024-11-21 12:31:32 +01:00
check_for_differences(args.reference_file, file_list, args.branch, args.actor)