diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index b909f28e8..d1287c011 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -27,6 +27,10 @@ Closes #(issue_number) - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) +### Translations (if applicable) + +- [ ] I ran [`scripts/counter_translation.py`](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/docs/counter_translation.md) + ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) diff --git a/docs/counter_translation.md b/docs/counter_translation.md new file mode 100644 index 000000000..b2cdd7445 --- /dev/null +++ b/docs/counter_translation.md @@ -0,0 +1,64 @@ +# `counter_translation.py` + +## Overview + +The script [`scripts/counter_translation.py`](../scripts/counter_translation.py) checks the translation progress of the property files in the directory `app/core/src/main/resources/`. +It compares each `messages_*.properties` file with the English reference file `messages_en_GB.properties` and calculates a percentage of completion for each language. + +In addition to console output, the script automatically updates the progress badges in the project’s `README.md` and maintains the configuration file [`scripts/ignore_translation.toml`](../scripts/ignore_translation.toml), which lists translation keys to be ignored for each language. + +## Requirements + +- Python 3.10 or newer (requires `tomlkit`). +- Must be executed **from the project root directory** so all relative paths are resolved correctly. +- Write permissions for `README.md` and `scripts/ignore_translation.toml`. + +## Default usage + +```bash +python scripts/counter_translation.py +``` + +This command: + +1. scans `app/core/src/main/resources/` for all `messages_*.properties` files, +2. calculates the translation progress for each file, +3. updates the badges in `README.md`, +4. reformats `scripts/ignore_translation.toml` (sorted, multi-line arrays). + +## Check a single language + +```bash +python scripts/counter_translation.py --lang messages_fr_FR.properties +``` + +- The specified file can be given as a relative (to the resources folder) or absolute path. +- The result is printed to the console (e.g. `fr_FR: 87% translated`). +- With `--show-missing-keys`, all untranslated keys are listed as well. + +## Output only the percentage + +For scripts or CI pipelines, the output can be reduced to just the percentage value: + +```bash +python scripts/counter_translation.py --lang messages_fr_FR.properties --show-percentage +``` + +The console will then only print `87` (without the percent symbol or any extra text). + +## Handling `ignore_translation.toml` + +- If a language section is missing, the script creates it automatically. +- Entries in `ignore` are alphabetically sorted and written as multi-line arrays. +- By default, `language.direction` is ignored. If that key is later translated, the script automatically removes it from the ignore list. + +## Integration in Pull Requests + +Whenever translations are updated, this script should be executed. +The updated badges and the modified `ignore_translation.toml` should be committed together with the changed `messages_*.properties` files. + +## Troubleshooting + +- **File not found**: Check the path or use `--lang` with an absolute path. +- **Line error**: The script reports the specific line in both files—this usually means a missing `=` or an unmatched line. +- **Incorrect percentages in README**: Make sure the script was run from the project root and that write permissions are available. diff --git a/scripts/counter_translation.py b/scripts/counter_translation.py index c17e012e6..cbcd5f1de 100644 --- a/scripts/counter_translation.py +++ b/scripts/counter_translation.py @@ -1,20 +1,56 @@ -"""A script to update language progress status in README.md based on +""" +A script to update language progress status in README.md based on properties file comparison. -This script compares default properties file with others in a directory to -determine language progress. -It then updates README.md based on provided progress list. +This script compares the default (reference) properties file, usually +`messages_en_GB.properties`, with other translation files in the +`app/core/src/main/resources/` directory. +It determines how many lines are fully translated and automatically updates +progress badges in the `README.md`. + +Additionally, it maintains a TOML configuration file +(`scripts/ignore_translation.toml`) that defines which keys are ignored +during comparison (e.g., static values like `language.direction`). Author: Ludy87 -Example: - To use this script, simply run it from command line: - $ python counter_translation.py -""" # noqa: D205 +Usage: + Run this script directly from the project root. + # --- Compare all translation files and update README.md --- + $ python scripts/counter_translation.py + + This will: + • Compare all files matching messages_*.properties + • Update progress badges in README.md + • Update/format ignore_translation.toml automatically + + # --- Check a single language file --- + $ python scripts/counter_translation.py --lang messages_fr_FR.properties + + This will: + • Compare the French translation file against the English reference + • Print the translation percentage in the console + + # --- Print ONLY the percentage (for CI pipelines or automation) --- + $ python scripts/counter_translation.py --lang messages_fr_FR.properties --show-percentage + + Example output: + 87 + +Arguments: + -l, --lang Specific properties file to check + (relative or absolute path). + --show-percentage Print only the percentage (no formatting, ideal for CI/CD). + --show-missing-keys Show the list of missing keys when checking a single language file. +""" + +import argparse import glob import os import re +import sys +from typing import Iterable import tomlkit import tomlkit.toml_file @@ -22,14 +58,15 @@ import tomlkit.toml_file def convert_to_multiline(data: tomlkit.TOMLDocument) -> tomlkit.TOMLDocument: """Converts 'ignore' and 'missing' arrays to multiline arrays and sorts the first-level keys of the TOML document. + Enhances readability and consistency in the TOML file by ensuring arrays contain unique and sorted entries. - Parameters: + Args: data (tomlkit.TOMLDocument): The original TOML document containing the data. Returns: tomlkit.TOMLDocument: A new TOML document with sorted keys and properly formatted arrays. - """ # noqa: D205 + """ sorted_data = tomlkit.document() for key in sorted(data.keys()): value = data[key] @@ -52,16 +89,19 @@ def convert_to_multiline(data: tomlkit.TOMLDocument) -> tomlkit.TOMLDocument: def write_readme(progress_list: list[tuple[str, int]]) -> None: - """Updates the progress status in the README.md file based - on the provided progress list. + """Updates the progress status in the README.md file based on the provided progress list. - Parameters: + This function reads the existing README.md content, identifies lines containing + language-specific progress badges, and replaces the percentage values and URLs + with the new progress data. + + Args: progress_list (list[tuple[str, int]]): A list of tuples containing - language and progress percentage. + language codes (e.g., 'fr_FR') and progress percentages (integers from 0 to 100). Returns: None - """ # noqa: D205 + """ with open("README.md", encoding="utf-8") as file: content = file.readlines() @@ -80,9 +120,21 @@ def write_readme(progress_list: list[tuple[str, int]]) -> None: def load_reference_keys(default_file_path: str) -> set[str]: - """Reads ALL keys from the reference file (excluding comments and empty lines).""" + """Reads all keys from the reference properties file (excluding comments and empty lines). + + This function skips the first 5 lines (assumed to be headers or metadata) and then + extracts keys from lines containing '=' separators, ignoring comments (#) and empty lines. + It also handles potential BOM (Byte Order Mark) characters. + + Args: + default_file_path (str): The path to the default (reference) properties file. + + Returns: + set[str]: A set of unique keys found in the reference file. + """ keys: set[str] = set() with open(default_file_path, encoding="utf-8") as f: + # Skip the first 5 lines (headers) for _ in range(5): try: next(f) @@ -98,20 +150,49 @@ def load_reference_keys(default_file_path: str) -> set[str]: return keys -def compare_files( - default_file_path, file_paths, ignore_translation_file -) -> list[tuple[str, int]]: - """Compares the default properties file with other - properties files in the directory. +def _lang_from_path(file_path: str) -> str: + """Extracts the language code from a properties file path. - Parameters: - default_file_path (str): The path to the default properties file. - files_directory (str): The directory containing other properties files. + Assumes the filename format is 'messages_.properties', where + is the code like 'fr_FR'. + + Args: + file_path (str): The full path to the properties file. Returns: - list[tuple[str, int]]: A list of tuples containing - language and progress percentage. - """ # noqa: D205 + str: The extracted language code. + """ + return ( + os.path.basename(file_path).split("messages_", 1)[1].split(".properties", 1)[0] + ) + + +def compare_files( + default_file_path: str, + file_paths: Iterable[str], + ignore_translation_file: str, + show_missing_keys: bool = False, + show_percentage: bool = False, +) -> list[tuple[str, int]]: + """Compares the default properties file with other properties files in the directory. + + This function calculates translation progress for each language file by comparing + keys and values line-by-line, skipping headers. It accounts for ignored keys defined + in a TOML configuration file and updates that file with cleaned ignore lists. + English variants (en_GB, en_US) are hardcoded to 100% progress. + + Args: + default_file_path (str): The path to the default properties file (reference). + file_paths (Iterable[str]): Iterable of paths to properties files to compare. + ignore_translation_file (str): Path to the TOML file with ignore/missing configurations per language. + show_missing_keys (bool, optional): If True, prints the list of missing keys for each file. Defaults to False. + show_percentage (bool, optional): If True, suppresses detailed output and focuses on percentage calculation. Defaults to False. + + Returns: + list[tuple[str, int]]: A sorted list of tuples containing language codes and progress percentages + (descending order by percentage). Duplicates are removed. + """ + # Count total translatable lines in reference (excluding empty and comments) num_lines = sum( 1 for line in open(default_file_path, encoding="utf-8") @@ -120,29 +201,29 @@ def compare_files( ref_keys: set[str] = load_reference_keys(default_file_path) - result_list = [] + result_list: list[tuple[str, int]] = [] sort_ignore_translation: tomlkit.TOMLDocument - # read toml - with open(ignore_translation_file, encoding="utf-8") as f: - sort_ignore_translation = tomlkit.parse(f.read()) + # Read or initialize TOML config + if os.path.exists(ignore_translation_file): + with open(ignore_translation_file, encoding="utf-8") as f: + sort_ignore_translation = tomlkit.parse(f.read()) + else: + sort_ignore_translation = tomlkit.document() for file_path in file_paths: - language = ( - os.path.basename(file_path) - .split("messages_", 1)[1] - .split(".properties", 1)[0] - ) + language = _lang_from_path(file_path) - fails = 0 + # Hardcode English variants to 100% if "en_GB" in language or "en_US" in language: - result_list.append(("en_GB", 100)) - result_list.append(("en_US", 100)) + result_list.append((language, 100)) continue + # Initialize language table in TOML if missing if language not in sort_ignore_translation: sort_ignore_translation[language] = tomlkit.table() + # Ensure default ignore list if empty if ( "ignore" not in sort_ignore_translation[language] or len(sort_ignore_translation[language].get("ignore", [])) < 1 @@ -158,95 +239,175 @@ def compare_files( if key in ref_keys or key == "language.direction" ] - # debug: add all keys from ref to ignore - # sort_ignore_translation[language]["ignore"] = list(ref_keys) - # continue # debug end - - # if "missing" not in sort_ignore_translation[language]: - # sort_ignore_translation[language]["missing"] = tomlkit.array() - # elif "language.direction" in sort_ignore_translation[language]["missing"]: - # sort_ignore_translation[language]["missing"].remove("language.direction") - + fails = 0 + missing_str_keys: list[str] = [] with ( open(default_file_path, encoding="utf-8") as default_file, open(file_path, encoding="utf-8") as file, ): + # Skip headers (first 5 lines) in both files for _ in range(5): next(default_file) try: next(file) except StopIteration: fails = num_lines + break for line_num, (line_default, line_file) in enumerate( zip(default_file, file), start=6 ): try: - # Ignoring empty lines and lines start with # + # Ignoring empty lines and lines starting with # if line_default.strip() == "" or line_default.startswith("#"): continue default_key, default_value = line_default.split("=", 1) file_key, file_value = line_file.split("=", 1) + default_key = default_key.strip() + default_value = default_value.strip() + file_key = file_key.strip() + file_value = file_value.strip() + if ( - default_value.strip() == file_value.strip() - and default_key.strip() + default_value == file_value + and default_key not in sort_ignore_translation[language]["ignore"] ): - print( - f"{language}: Line {line_num} is missing the translation." - ) - # if default_key.strip() not in sort_ignore_translation[language]["missing"]: - # missing_array = tomlkit.array() - # missing_array.append(default_key.strip()) - # missing_array.multiline(True) - # sort_ignore_translation[language]["missing"].extend(missing_array) + # Missing translation (same as default and not ignored) fails += 1 - # elif default_key.strip() in sort_ignore_translation[language]["ignore"]: - # if default_key.strip() in sort_ignore_translation[language]["missing"]: - # sort_ignore_translation[language]["missing"].remove(default_key.strip()) - if default_value.strip() != file_value.strip(): - # if default_key.strip() in sort_ignore_translation[language]["missing"]: - # sort_ignore_translation[language]["missing"].remove(default_key.strip()) - if ( - default_key.strip() - in sort_ignore_translation[language]["ignore"] - ): + missing_str_keys.append(default_key) + if default_value != file_value: + if default_key in sort_ignore_translation[language]["ignore"]: + # Remove from ignore if actually translated sort_ignore_translation[language]["ignore"].remove( - default_key.strip() + default_key ) except ValueError as e: print(f"Error processing line {line_num} in {file_path}: {e}") print(f"{line_default}|{line_file}") - exit(1) + sys.exit(1) except IndexError: - pass + # Handle mismatched line counts + fails += 1 + continue + + if show_missing_keys: + if len(missing_str_keys) > 0: + print(f" Missing keys: {missing_str_keys}") + else: + print(" No missing keys!") + + if not show_percentage: + print(f"{language}: {fails} out of {num_lines} lines are not translated.") - print(f"{language}: {fails} out of {num_lines} lines are not translated.") result_list.append( ( language, int((num_lines - fails) * 100 / num_lines), ) ) + + # Write cleaned and formatted TOML back ignore_translation = convert_to_multiline(sort_ignore_translation) with open(ignore_translation_file, "w", encoding="utf-8", newline="\n") as file: file.write(tomlkit.dumps(ignore_translation)) + # Remove duplicates and sort by percentage descending unique_data = list(set(result_list)) unique_data.sort(key=lambda x: x[1], reverse=True) return unique_data -if __name__ == "__main__": - directory = os.path.join(os.getcwd(), "app", "core", "src", "main", "resources") - messages_file_paths = glob.glob(os.path.join(directory, "messages_*.properties")) - reference_file = os.path.join(directory, "messages_en_GB.properties") +def main() -> None: + """Main entry point for the script. - scripts_directory = os.path.join(os.getcwd(), "scripts") + Parses command-line arguments and either processes a single language file + (with optional percentage output) or all files and updates the README.md. + + Command-line options: + --lang, -l : Specific properties file to check (e.g., 'messages_fr_FR.properties'). + --show-percentage: Print only the translation percentage for --lang and exit. + --show-missing-keys: Show the list of missing keys when checking a single language file. + """ + parser = argparse.ArgumentParser( + description="Compare i18n property files and optionally update README badges." + ) + parser.add_argument( + "--lang", + "-l", + help=( + "Specific properties file to check, e.g. 'messages_fr_FR.properties'. " + "If a relative filename is given, it is resolved against the resources directory." + ), + ) + parser.add_argument( + "--show-percentage", + "-sp", + action="store_true", + help="Print ONLY the translation percentage for --lang and exit.", + ) + parser.add_argument( + "--show-missing-keys", + "-smk", + action="store_true", + help="Show the list of missing keys when checking a single language file.", + ) + + args = parser.parse_args() + + # Project layout assumptions + cwd = os.getcwd() + resources_dir = os.path.join(cwd, "app", "core", "src", "main", "resources") + reference_file = os.path.join(resources_dir, "messages_en_GB.properties") + scripts_directory = os.path.join(cwd, "scripts") translation_state_file = os.path.join(scripts_directory, "ignore_translation.toml") - write_readme( - compare_files(reference_file, messages_file_paths, translation_state_file) + if args.lang: + # Resolve provided path + lang_input = args.lang + if os.path.isabs(lang_input) or os.path.exists(lang_input): + lang_file = lang_input + else: + lang_file = os.path.join(resources_dir, lang_input) + + if not os.path.exists(lang_file): + print(f"ERROR: Could not find language file: {lang_file}") + sys.exit(2) + + results = compare_files( + reference_file, + [lang_file], + translation_state_file, + args.show_missing_keys, + args.show_percentage, + ) + # Find the exact tuple for the requested language + wanted_key = _lang_from_path(lang_file) + for lang, pct in results: + if lang == wanted_key: + if args.show_percentage: + # Print ONLY the number + print(pct) + return + else: + print(f"{lang}: {pct}% translated") + return + + # Fallback (should not happen) + print("ERROR: Language not found in results.") + sys.exit(3) + + # Default behavior (no --lang): process all and update README + messages_file_paths = glob.glob( + os.path.join(resources_dir, "messages_*.properties") ) + progress = compare_files( + reference_file, messages_file_paths, translation_state_file + ) + write_readme(progress) + + +if __name__ == "__main__": + main()