Replace check_language_json.py with sync_translations.py

Removed .github/scripts/check_language_json.py and updated the workflow to use .github/scripts/sync_translations.py for translation checks and syncing. Updated the usage documentation in sync_translations.py. Refactored scripts/counter_translation_v2.py to use sync_translations.py for translation progress calculation, simplifying logic and removing TOML-based ignore handling.
This commit is contained in:
Ludy87
2025-10-27 08:50:51 +01:00
parent 8fc3f3e8cb
commit 427c52e0cc
4 changed files with 90 additions and 520 deletions

View File

@@ -1,204 +1,119 @@
"""A script to update language progress status in README.md based on
JSON translation file comparison.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
This script compares the default translation JSON file with others in the locales directory to
determine language progress.
It then updates README.md based on provided progress list.
"""
A tiny helper that updates README.md translation progress by asking
.sync_translations.py for the per-locale percentage (via --procent-translations).
Author: Ludy87
"""
Example:
To use this script, simply run it from command line:
$ python counter_translation_v2.py
""" # noqa: D205
from __future__ import annotations
import glob
import os
import re
import json
import tomlkit
import tomlkit.toml_file
import subprocess
from pathlib import Path
from typing import List, Tuple
def convert_to_multiline(data: tomlkit.TOMLDocument) -> tomlkit.TOMLDocument:
"""Converts 'ignore' and 'missing' arrays to multiline arrays and sorts the first-level keys of the TOML document.
Enhances readability and consistency in the TOML file by ensuring arrays contain unique and sorted entries.
Parameters:
data (tomlkit.TOMLDocument): The original TOML document containing the data.
Returns:
tomlkit.TOMLDocument: A new TOML document with sorted keys and properly formatted arrays.
""" # noqa: D205
sorted_data = tomlkit.document()
for key in sorted(data.keys()):
value = data[key]
if isinstance(value, dict):
new_table = tomlkit.table()
for subkey in ("ignore", "missing"):
if subkey in value:
# Convert the list to a set to remove duplicates, sort it, and convert to multiline for readability
unique_sorted_array = sorted(set(value[subkey]))
array = tomlkit.array()
array.multiline(True)
for item in unique_sorted_array:
array.append(item)
new_table[subkey] = array
sorted_data[key] = new_table
else:
# Add other types of data unchanged
sorted_data[key] = value
return sorted_data
REPO_ROOT = Path(os.getcwd())
LOCALES_DIR = REPO_ROOT / "frontend" / "public" / "locales"
REF_FILE = LOCALES_DIR / "en-GB" / "translation.json"
SYNC_SCRIPT = REPO_ROOT / ".github" / "scripts" / "sync_translations.py"
README = REPO_ROOT / "README.md"
def write_readme(progress_list: list[tuple[str, int]]) -> None:
"""Updates the progress status in the README.md file based
on the provided progress list.
Parameters:
progress_list (list[tuple[str, int]]): A list of tuples containing
language and progress percentage.
Returns:
None
""" # noqa: D205
with open("README.md", encoding="utf-8") as file:
content = file.readlines()
for i, line in enumerate(content[2:], start=2):
for progress in progress_list:
language, value = progress
if language in line:
if match := re.search(r"\!\[(\d+(\.\d+)?)%\]\(.*\)", line):
content[i] = line.replace(
match.group(0),
f"![{value}%](https://geps.dev/progress/{value})",
)
with open("README.md", "w", encoding="utf-8", newline="\n") as file:
file.writelines(content)
def find_locale_files() -> List[Path]:
return sorted(
Path(p) for p in glob.glob(str(LOCALES_DIR / "*" / "translation.json"))
)
def parse_json_file(file_path):
def percent_done_for_file(file_path: Path) -> int:
"""
Parses a JSON translation file and returns a flat dictionary of all keys.
:param file_path: Path to the JSON file.
:return: Dictionary with flattened keys and values.
Calls sync_translations.py --procent-translations for a single locale file.
Returns an int 0..100.
"""
with open(file_path, "r", encoding="utf-8") as file:
data = json.load(file)
# en-GB / en-US are always 100% by definition
norm = str(file_path).replace("\\", "/")
if norm.endswith("en-GB/translation.json") or norm.endswith(
"en-US/translation.json"
):
return 100
def flatten_dict(d, parent_key="", sep="."):
items = {}
for k, v in d.items():
new_key = f"{parent_key}{sep}{k}" if parent_key else k
if isinstance(v, dict):
items.update(flatten_dict(v, new_key, sep=sep))
else:
items[new_key] = v
return items
return flatten_dict(data)
cmd = [
"python",
str(SYNC_SCRIPT),
"--reference-file",
str(REF_FILE),
"--files",
str(file_path),
"--check",
"--procent-translations",
]
res = subprocess.run(cmd, capture_output=True, text=True, check=True)
out = res.stdout.strip()
return int(float(out))
def compare_files(
default_file_path, file_paths, ignore_translation_file
) -> list[tuple[str, int]]:
"""Compares the default JSON translation file with other
translation files in the locales directory.
def update_readme(progress_list: List[Tuple[str, int]]) -> None:
"""
Update README badges. Expects lines like:
... [xx%](https://geps.dev/progress/xx)
and replaces xx with the new percent.
"""
if not README.exists():
print("README.md not found — skipping write.")
return
Parameters:
default_file_path (str): The path to the default translation JSON file.
file_paths (list): List of paths to translation JSON files.
ignore_translation_file (str): Path to the TOML file with ignore rules.
content = README.read_text(encoding="utf-8").splitlines(keepends=True)
Returns:
list[tuple[str, int]]: A list of tuples containing
language and progress percentage.
""" # noqa: D205
default_keys = parse_json_file(default_file_path)
num_keys = len(default_keys)
# we start at line 2 like your original (skip title, etc.)
for i in range(2, len(content)):
line = content[i]
for lang, value in progress_list:
if lang in line:
content[i] = re.sub(
r"!\[(\d+(?:\.\d+)?)%\]\(https://geps\.dev/progress/\d+\)",
f"![{value}%](https://geps.dev/progress/{value})",
line,
)
break
result_list = []
sort_ignore_translation: tomlkit.TOMLDocument
README.write_text("".join(content), encoding="utf-8", newline="\n")
# read toml
with open(ignore_translation_file, encoding="utf-8") as f:
sort_ignore_translation = tomlkit.parse(f.read())
for file_path in file_paths:
# Extract language code from directory name
locale_dir = os.path.basename(os.path.dirname(file_path))
def main() -> None:
files = find_locale_files()
if not files:
print("No translation.json files found.")
return
# Convert locale format from hyphen to underscore for TOML compatibility
# e.g., en-GB -> en_GB, sr-LATN-RS -> sr_LATN_RS
language = locale_dir.replace("-", "_")
results: List[Tuple[str, int]] = []
for f in files:
# language label from folder, e.g. de-DE, sr-LATN-RS
lang = f.parent.name.replace(
"-", "_"
) # keep hyphenated form to match README lines
pct = percent_done_for_file(f)
results.append((lang, pct))
fails = 0
if language in ["en_GB", "en_US"]:
result_list.append(("en_GB", 100))
result_list.append(("en_US", 100))
continue
# ensure en-GB/en-US are included & set to 100
have = {lang for lang, _ in results}
for hard in ("en-GB", "en-US"):
if hard not in have:
results.append((hard, 100))
if language not in sort_ignore_translation:
sort_ignore_translation[language] = tomlkit.table()
# optional: sort by percent desc (nice to have)
results.sort(key=lambda x: x[1], reverse=True)
if (
"ignore" not in sort_ignore_translation[language]
or len(sort_ignore_translation[language].get("ignore", [])) < 1
):
sort_ignore_translation[language]["ignore"] = tomlkit.array(
["language.direction"]
)
update_readme(results)
current_keys = parse_json_file(file_path)
# Compare keys
for default_key, default_value in default_keys.items():
if default_key not in current_keys:
# Key is missing entirely
if default_key not in sort_ignore_translation[language]["ignore"]:
print(f"{language}: Key '{default_key}' is missing.")
fails += 1
elif (
default_value == current_keys[default_key]
and default_key not in sort_ignore_translation[language]["ignore"]
):
# Key exists but value is untranslated (same as reference)
print(f"{language}: Key '{default_key}' is missing the translation.")
fails += 1
elif default_value != current_keys[default_key]:
# Key is translated, remove from ignore list if present
if default_key in sort_ignore_translation[language]["ignore"]:
sort_ignore_translation[language]["ignore"].remove(default_key)
print(f"{language}: {fails} out of {num_keys} keys are not translated.")
result_list.append(
(
language,
int((num_keys - fails) * 100 / num_keys),
)
)
ignore_translation = convert_to_multiline(sort_ignore_translation)
with open(ignore_translation_file, "w", encoding="utf-8", newline="\n") as file:
file.write(tomlkit.dumps(ignore_translation))
unique_data = list(set(result_list))
unique_data.sort(key=lambda x: x[1], reverse=True)
return unique_data
# also print a compact summary to stdout (useful in CI logs)
# for lang, pct in results:
# print(f"{lang}: {pct}%")
if __name__ == "__main__":
directory = os.path.join(os.getcwd(), "frontend", "public", "locales")
translation_file_paths = glob.glob(os.path.join(directory, "*", "translation.json"))
reference_file = os.path.join(directory, "en-GB", "translation.json")
scripts_directory = os.path.join(os.getcwd(), "scripts")
translation_state_file = os.path.join(scripts_directory, "ignore_translation.toml")
write_readme(
compare_files(reference_file, translation_file_paths, translation_state_file)
)
main()