diff --git a/.github/scripts/check_language_json.py b/.github/scripts/check_language_json.py deleted file mode 100644 index 3921bdaa5..000000000 --- a/.github/scripts/check_language_json.py +++ /dev/null @@ -1,345 +0,0 @@ -""" -Author: Ludy87 -Description: This script processes JSON translation files for localization checks. It compares translation files in a branch with -a reference file to ensure consistency. The script performs two main checks: -1. Verifies that the number of translation keys in the translation files matches the reference file. -2. Ensures that all keys in the translation files are present in the reference file and vice versa. - -The script also provides functionality to update the translation files to match the reference file by adding missing keys and -adjusting the format. - -Usage: - python check_language_json.py --reference-file --branch [--actor ] [--files ] -""" -# Sample for Windows: -# python .github/scripts/check_language_json.py --reference-file frontend/public/locales/en-GB/translation.json --branch "" --files frontend/public/locales/de-DE/translation.json frontend/public/locales/fr-FR/translation.json - -import copy -import glob -import os -import argparse -import re -import json - - -def find_duplicate_keys(file_path, keys=None, prefix=""): - """ - Identifies duplicate keys in a JSON file (including nested keys). - :param file_path: Path to the JSON file. - :param keys: Dictionary to track keys (used for recursion). - :param prefix: Prefix for nested keys. - :return: List of tuples (key, first_occurrence_path, duplicate_path). - """ - if keys is None: - keys = {} - - duplicates = [] - - with open(file_path, "r", encoding="utf-8") as file: - data = json.load(file) - - def process_dict(obj, current_prefix=""): - for key, value in obj.items(): - full_key = f"{current_prefix}.{key}" if current_prefix else key - - if isinstance(value, dict): - process_dict(value, full_key) - else: - if full_key in keys: - duplicates.append((full_key, keys[full_key], full_key)) - else: - keys[full_key] = full_key - - process_dict(data, prefix) - return duplicates - - -# Maximum size for JSON files (e.g., 500 KB) -MAX_FILE_SIZE = 500 * 1024 - - -def parse_json_file(file_path): - """ - Parses a JSON translation file and returns a flat dictionary of all keys. - :param file_path: Path to the JSON file. - :return: Dictionary with flattened keys. - """ - with open(file_path, "r", encoding="utf-8") as file: - data = json.load(file) - - def flatten_dict(d, parent_key="", sep="."): - items = {} - for k, v in d.items(): - new_key = f"{parent_key}{sep}{k}" if parent_key else k - if isinstance(v, dict): - items.update(flatten_dict(v, new_key, sep=sep)) - else: - items[new_key] = v - return items - - return flatten_dict(data) - - -def unflatten_dict(d, sep="."): - """ - Converts a flat dictionary with dot notation keys back to nested dict. - :param d: Flattened dictionary. - :param sep: Separator used in keys. - :return: Nested dictionary. - """ - result = {} - for key, value in d.items(): - parts = key.split(sep) - current = result - for part in parts[:-1]: - if part not in current: - current[part] = {} - current = current[part] - current[parts[-1]] = value - return result - - -def write_json_file(file_path, updated_properties): - """ - Writes updated properties back to the JSON file. - :param file_path: Path to the JSON file. - :param updated_properties: Dictionary of updated properties to write. - """ - nested_data = unflatten_dict(updated_properties) - - with open(file_path, "w", encoding="utf-8", newline="\n") as file: - json.dump(nested_data, file, ensure_ascii=False, indent=2) - file.write("\n") # Add trailing newline - - -def update_missing_keys(reference_file, file_list, branch=""): - """ - Updates missing keys in the translation files based on the reference file. - :param reference_file: Path to the reference JSON file. - :param file_list: List of translation files to update. - :param branch: Branch where the files are located. - """ - reference_properties = parse_json_file(reference_file) - - for file_path in file_list: - basename_current_file = os.path.basename(os.path.join(branch, file_path)) - if ( - basename_current_file == os.path.basename(reference_file) - or not file_path.endswith(".json") - or not os.path.dirname(file_path).endswith("locales") - ): - continue - - current_properties = parse_json_file(os.path.join(branch, file_path)) - updated_properties = {} - - for ref_key, ref_value in reference_properties.items(): - if ref_key in current_properties: - # Keep the current translation - updated_properties[ref_key] = current_properties[ref_key] - else: - # Add missing key with reference value - updated_properties[ref_key] = ref_value - - write_json_file(os.path.join(branch, file_path), updated_properties) - - -def check_for_missing_keys(reference_file, file_list, branch): - update_missing_keys(reference_file, file_list, branch) - - -def read_json_keys(file_path): - if os.path.isfile(file_path) and os.path.exists(file_path): - return parse_json_file(file_path) - return {} - - -def check_for_differences(reference_file, file_list, branch, actor): - reference_branch = branch - basename_reference_file = os.path.basename(reference_file) - - report = [] - report.append(f"#### 🔄 Reference Branch: `{reference_branch}`") - reference_keys = read_json_keys(reference_file) - has_differences = False - - only_reference_file = True - - file_arr = file_list - - if len(file_list) == 1: - file_arr = file_list[0].split() - - base_dir = os.path.abspath( - os.path.join(os.getcwd(), "frontend", "public", "locales") - ) - - for file_path in file_arr: - file_normpath = os.path.normpath(file_path) - absolute_path = os.path.abspath(file_normpath) - - # Verify that file is within the expected directory - if not absolute_path.startswith(base_dir): - raise ValueError(f"Unsafe file found: {file_normpath}") - - # Verify file size before processing - if os.path.getsize(os.path.join(branch, file_normpath)) > MAX_FILE_SIZE: - raise ValueError( - f"The file {file_normpath} is too large and could pose a security risk." - ) - - basename_current_file = os.path.basename(os.path.join(branch, file_normpath)) - locale_dir = os.path.basename(os.path.dirname(file_normpath)) - - if ( - basename_current_file == basename_reference_file - and locale_dir == "en-GB" - ): - continue - - if not file_normpath.endswith(".json") or basename_current_file != "translation.json": - continue - - only_reference_file = False - report.append(f"#### 📃 **File Check:** `{locale_dir}/{basename_current_file}`") - current_keys = read_json_keys(os.path.join(branch, file_path)) - reference_key_count = len(reference_keys) - current_key_count = len(current_keys) - - if reference_key_count != current_key_count: - report.append("") - report.append("1. **Test Status:** ❌ **_Failed_**") - report.append(" - **Issue:**") - has_differences = True - if reference_key_count > current_key_count: - report.append( - f" - **_Mismatched key count_**: {reference_key_count} (reference) vs {current_key_count} (current). Translation keys are missing." - ) - elif reference_key_count < current_key_count: - report.append( - f" - **_Too many keys_**: {reference_key_count} (reference) vs {current_key_count} (current). Please verify if there are additional keys that need to be removed." - ) - else: - report.append("1. **Test Status:** ✅ **_Passed_**") - - # Check for missing or extra keys - current_keys_set = set(current_keys.keys()) - reference_keys_set = set(reference_keys.keys()) - missing_keys = current_keys_set.difference(reference_keys_set) - extra_keys = reference_keys_set.difference(current_keys_set) - missing_keys_list = list(missing_keys) - extra_keys_list = list(extra_keys) - - if missing_keys_list or extra_keys_list: - has_differences = True - missing_keys_str = "`, `".join(missing_keys_list) - extra_keys_str = "`, `".join(extra_keys_list) - report.append("2. **Test Status:** ❌ **_Failed_**") - report.append(" - **Issue:**") - if missing_keys_list: - report.append( - f" - **_Extra keys in `{locale_dir}/{basename_current_file}`_**: `{missing_keys_str}` that are not present in **_`{basename_reference_file}`_**." - ) - if extra_keys_list: - report.append( - f" - **_Missing keys in `{locale_dir}/{basename_current_file}`_**: `{extra_keys_str}` that are not present in **_`{basename_reference_file}`_**." - ) - else: - report.append("2. **Test Status:** ✅ **_Passed_**") - - if find_duplicate_keys(os.path.join(branch, file_normpath)): - has_differences = True - output = "\n".join( - [ - f" - `{key}`: first at {first}, duplicate at `{duplicate}`" - for key, first, duplicate in find_duplicate_keys( - os.path.join(branch, file_normpath) - ) - ] - ) - report.append("3. **Test Status:** ❌ **_Failed_**") - report.append(" - **Issue:**") - report.append(" - duplicate entries were found:") - report.append(output) - else: - report.append("3. **Test Status:** ✅ **_Passed_**") - - report.append("") - report.append("---") - report.append("") - - if has_differences: - report.append("## ❌ Overall Check Status: **_Failed_**") - report.append("") - report.append( - f"@{actor} please check your translation if it conforms to the standard. Follow the format of [en-GB/translation.json](https://github.com/Stirling-Tools/Stirling-PDF/blob/V2/frontend/public/locales/en-GB/translation.json)" - ) - else: - report.append("## ✅ Overall Check Status: **_Success_**") - report.append("") - report.append( - f"Thanks @{actor} for your help in keeping the translations up to date." - ) - - if not only_reference_file: - print("\n".join(report)) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Find missing keys") - parser.add_argument( - "--actor", - required=False, - help="Actor from PR.", - ) - parser.add_argument( - "--reference-file", - required=True, - help="Path to the reference file.", - ) - parser.add_argument( - "--branch", - type=str, - required=True, - help="Branch name.", - ) - parser.add_argument( - "--check-file", - type=str, - required=False, - help="List of changed files, separated by spaces.", - ) - parser.add_argument( - "--files", - nargs="+", - required=False, - help="List of changed files, separated by spaces.", - ) - args = parser.parse_args() - - # Sanitize --actor input to avoid injection attacks - if args.actor: - args.actor = re.sub(r"[^a-zA-Z0-9_\\-]", "", args.actor) - - # Sanitize --branch input to avoid injection attacks - if args.branch: - args.branch = re.sub(r"[^a-zA-Z0-9\\-]", "", args.branch) - - file_list = args.files - if file_list is None: - if args.check_file: - file_list = [args.check_file] - else: - file_list = glob.glob( - os.path.join( - os.getcwd(), - "frontend", - "public", - "locales", - "*", - "translation.json", - ) - ) - update_missing_keys(args.reference_file, file_list) - else: - check_for_differences(args.reference_file, file_list, args.branch, args.actor) \ No newline at end of file diff --git a/.github/scripts/sync_translations.py b/.github/scripts/sync_translations.py index e70ada169..312daee01 100644 --- a/.github/scripts/sync_translations.py +++ b/.github/scripts/sync_translations.py @@ -12,7 +12,7 @@ It does two things: Also prints a CI-friendly report (intended for PR comments). Usage: - python sync_translations.py --reference-file [--branch ] [--actor ] [--files ] [--check] [--prune] [--dry-run] + python sync_translations.py --reference-file [--branch ] [--actor ] [--files ] [--check] [--prune] [--procent-translations] [--dry-run] """ from __future__ import annotations diff --git a/.github/workflows/sync_files_v2.yml b/.github/workflows/sync_files_v2.yml index 84645c59e..2f347de9f 100644 --- a/.github/workflows/sync_files_v2.yml +++ b/.github/workflows/sync_files_v2.yml @@ -54,7 +54,7 @@ jobs: - name: Sync translation JSON files run: | - python .github/scripts/check_language_json.py --reference-file "frontend/public/locales/en-GB/translation.json" --branch V2 + python .github/scripts/sync_translations.py --reference-file "frontend/public/locales/en-GB/translation.json" --branch V2 - name: Commit translation files run: | diff --git a/scripts/counter_translation_v2.py b/scripts/counter_translation_v2.py index 32b3075ad..8bd843453 100644 --- a/scripts/counter_translation_v2.py +++ b/scripts/counter_translation_v2.py @@ -1,204 +1,119 @@ -"""A script to update language progress status in README.md based on -JSON translation file comparison. +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- -This script compares the default translation JSON file with others in the locales directory to -determine language progress. -It then updates README.md based on provided progress list. +""" +A tiny helper that updates README.md translation progress by asking +.sync_translations.py for the per-locale percentage (via --procent-translations). Author: Ludy87 +""" -Example: - To use this script, simply run it from command line: - $ python counter_translation_v2.py -""" # noqa: D205 - +from __future__ import annotations import glob import os import re -import json - -import tomlkit -import tomlkit.toml_file +import subprocess +from pathlib import Path +from typing import List, Tuple -def convert_to_multiline(data: tomlkit.TOMLDocument) -> tomlkit.TOMLDocument: - """Converts 'ignore' and 'missing' arrays to multiline arrays and sorts the first-level keys of the TOML document. - Enhances readability and consistency in the TOML file by ensuring arrays contain unique and sorted entries. - - Parameters: - data (tomlkit.TOMLDocument): The original TOML document containing the data. - - Returns: - tomlkit.TOMLDocument: A new TOML document with sorted keys and properly formatted arrays. - """ # noqa: D205 - sorted_data = tomlkit.document() - for key in sorted(data.keys()): - value = data[key] - if isinstance(value, dict): - new_table = tomlkit.table() - for subkey in ("ignore", "missing"): - if subkey in value: - # Convert the list to a set to remove duplicates, sort it, and convert to multiline for readability - unique_sorted_array = sorted(set(value[subkey])) - array = tomlkit.array() - array.multiline(True) - for item in unique_sorted_array: - array.append(item) - new_table[subkey] = array - sorted_data[key] = new_table - else: - # Add other types of data unchanged - sorted_data[key] = value - return sorted_data +REPO_ROOT = Path(os.getcwd()) +LOCALES_DIR = REPO_ROOT / "frontend" / "public" / "locales" +REF_FILE = LOCALES_DIR / "en-GB" / "translation.json" +SYNC_SCRIPT = REPO_ROOT / ".github" / "scripts" / "sync_translations.py" +README = REPO_ROOT / "README.md" -def write_readme(progress_list: list[tuple[str, int]]) -> None: - """Updates the progress status in the README.md file based - on the provided progress list. - - Parameters: - progress_list (list[tuple[str, int]]): A list of tuples containing - language and progress percentage. - - Returns: - None - """ # noqa: D205 - with open("README.md", encoding="utf-8") as file: - content = file.readlines() - - for i, line in enumerate(content[2:], start=2): - for progress in progress_list: - language, value = progress - if language in line: - if match := re.search(r"\!\[(\d+(\.\d+)?)%\]\(.*\)", line): - content[i] = line.replace( - match.group(0), - f"![{value}%](https://geps.dev/progress/{value})", - ) - - with open("README.md", "w", encoding="utf-8", newline="\n") as file: - file.writelines(content) +def find_locale_files() -> List[Path]: + return sorted( + Path(p) for p in glob.glob(str(LOCALES_DIR / "*" / "translation.json")) + ) -def parse_json_file(file_path): +def percent_done_for_file(file_path: Path) -> int: """ - Parses a JSON translation file and returns a flat dictionary of all keys. - :param file_path: Path to the JSON file. - :return: Dictionary with flattened keys and values. + Calls sync_translations.py --procent-translations for a single locale file. + Returns an int 0..100. """ - with open(file_path, "r", encoding="utf-8") as file: - data = json.load(file) + # en-GB / en-US are always 100% by definition + norm = str(file_path).replace("\\", "/") + if norm.endswith("en-GB/translation.json") or norm.endswith( + "en-US/translation.json" + ): + return 100 - def flatten_dict(d, parent_key="", sep="."): - items = {} - for k, v in d.items(): - new_key = f"{parent_key}{sep}{k}" if parent_key else k - if isinstance(v, dict): - items.update(flatten_dict(v, new_key, sep=sep)) - else: - items[new_key] = v - return items - - return flatten_dict(data) + cmd = [ + "python", + str(SYNC_SCRIPT), + "--reference-file", + str(REF_FILE), + "--files", + str(file_path), + "--check", + "--procent-translations", + ] + res = subprocess.run(cmd, capture_output=True, text=True, check=True) + out = res.stdout.strip() + return int(float(out)) -def compare_files( - default_file_path, file_paths, ignore_translation_file -) -> list[tuple[str, int]]: - """Compares the default JSON translation file with other - translation files in the locales directory. +def update_readme(progress_list: List[Tuple[str, int]]) -> None: + """ + Update README badges. Expects lines like: + ... [xx%](https://geps.dev/progress/xx) + and replaces xx with the new percent. + """ + if not README.exists(): + print("README.md not found — skipping write.") + return - Parameters: - default_file_path (str): The path to the default translation JSON file. - file_paths (list): List of paths to translation JSON files. - ignore_translation_file (str): Path to the TOML file with ignore rules. + content = README.read_text(encoding="utf-8").splitlines(keepends=True) - Returns: - list[tuple[str, int]]: A list of tuples containing - language and progress percentage. - """ # noqa: D205 - default_keys = parse_json_file(default_file_path) - num_keys = len(default_keys) + # we start at line 2 like your original (skip title, etc.) + for i in range(2, len(content)): + line = content[i] + for lang, value in progress_list: + if lang in line: + content[i] = re.sub( + r"!\[(\d+(?:\.\d+)?)%\]\(https://geps\.dev/progress/\d+\)", + f"![{value}%](https://geps.dev/progress/{value})", + line, + ) + break - result_list = [] - sort_ignore_translation: tomlkit.TOMLDocument + README.write_text("".join(content), encoding="utf-8", newline="\n") - # read toml - with open(ignore_translation_file, encoding="utf-8") as f: - sort_ignore_translation = tomlkit.parse(f.read()) - for file_path in file_paths: - # Extract language code from directory name - locale_dir = os.path.basename(os.path.dirname(file_path)) +def main() -> None: + files = find_locale_files() + if not files: + print("No translation.json files found.") + return - # Convert locale format from hyphen to underscore for TOML compatibility - # e.g., en-GB -> en_GB, sr-LATN-RS -> sr_LATN_RS - language = locale_dir.replace("-", "_") + results: List[Tuple[str, int]] = [] + for f in files: + # language label from folder, e.g. de-DE, sr-LATN-RS + lang = f.parent.name.replace( + "-", "_" + ) # keep hyphenated form to match README lines + pct = percent_done_for_file(f) + results.append((lang, pct)) - fails = 0 - if language in ["en_GB", "en_US"]: - result_list.append(("en_GB", 100)) - result_list.append(("en_US", 100)) - continue + # ensure en-GB/en-US are included & set to 100 + have = {lang for lang, _ in results} + for hard in ("en-GB", "en-US"): + if hard not in have: + results.append((hard, 100)) - if language not in sort_ignore_translation: - sort_ignore_translation[language] = tomlkit.table() + # optional: sort by percent desc (nice to have) + results.sort(key=lambda x: x[1], reverse=True) - if ( - "ignore" not in sort_ignore_translation[language] - or len(sort_ignore_translation[language].get("ignore", [])) < 1 - ): - sort_ignore_translation[language]["ignore"] = tomlkit.array( - ["language.direction"] - ) + update_readme(results) - current_keys = parse_json_file(file_path) - - # Compare keys - for default_key, default_value in default_keys.items(): - if default_key not in current_keys: - # Key is missing entirely - if default_key not in sort_ignore_translation[language]["ignore"]: - print(f"{language}: Key '{default_key}' is missing.") - fails += 1 - elif ( - default_value == current_keys[default_key] - and default_key not in sort_ignore_translation[language]["ignore"] - ): - # Key exists but value is untranslated (same as reference) - print(f"{language}: Key '{default_key}' is missing the translation.") - fails += 1 - elif default_value != current_keys[default_key]: - # Key is translated, remove from ignore list if present - if default_key in sort_ignore_translation[language]["ignore"]: - sort_ignore_translation[language]["ignore"].remove(default_key) - - print(f"{language}: {fails} out of {num_keys} keys are not translated.") - result_list.append( - ( - language, - int((num_keys - fails) * 100 / num_keys), - ) - ) - - ignore_translation = convert_to_multiline(sort_ignore_translation) - with open(ignore_translation_file, "w", encoding="utf-8", newline="\n") as file: - file.write(tomlkit.dumps(ignore_translation)) - - unique_data = list(set(result_list)) - unique_data.sort(key=lambda x: x[1], reverse=True) - - return unique_data + # also print a compact summary to stdout (useful in CI logs) + # for lang, pct in results: + # print(f"{lang}: {pct}%") if __name__ == "__main__": - directory = os.path.join(os.getcwd(), "frontend", "public", "locales") - translation_file_paths = glob.glob(os.path.join(directory, "*", "translation.json")) - reference_file = os.path.join(directory, "en-GB", "translation.json") - - scripts_directory = os.path.join(os.getcwd(), "scripts") - translation_state_file = os.path.join(scripts_directory, "ignore_translation.toml") - - write_readme( - compare_files(reference_file, translation_file_paths, translation_state_file) - ) \ No newline at end of file + main()