Stirling-PDF/scripts/counter_translation_v2.py
Anthony Stirling a15b0e33d5
test (#4559)
2025-10-01 00:03:26 +01:00

204 lines
7.4 KiB
Python

"""A script to update language progress status in README.md based on
JSON translation file comparison.
This script compares the default translation JSON file with others in the locales directory to
determine language progress.
It then updates README.md based on provided progress list.
Author: Ludy87
Example:
To use this script, simply run it from command line:
$ python counter_translation_v2.py
""" # noqa: D205
import glob
import os
import re
import json
import tomlkit
import tomlkit.toml_file
def convert_to_multiline(data: tomlkit.TOMLDocument) -> tomlkit.TOMLDocument:
"""Converts 'ignore' and 'missing' arrays to multiline arrays and sorts the first-level keys of the TOML document.
Enhances readability and consistency in the TOML file by ensuring arrays contain unique and sorted entries.
Parameters:
data (tomlkit.TOMLDocument): The original TOML document containing the data.
Returns:
tomlkit.TOMLDocument: A new TOML document with sorted keys and properly formatted arrays.
""" # noqa: D205
sorted_data = tomlkit.document()
for key in sorted(data.keys()):
value = data[key]
if isinstance(value, dict):
new_table = tomlkit.table()
for subkey in ("ignore", "missing"):
if subkey in value:
# Convert the list to a set to remove duplicates, sort it, and convert to multiline for readability
unique_sorted_array = sorted(set(value[subkey]))
array = tomlkit.array()
array.multiline(True)
for item in unique_sorted_array:
array.append(item)
new_table[subkey] = array
sorted_data[key] = new_table
else:
# Add other types of data unchanged
sorted_data[key] = value
return sorted_data
def write_readme(progress_list: list[tuple[str, int]]) -> None:
"""Updates the progress status in the README.md file based
on the provided progress list.
Parameters:
progress_list (list[tuple[str, int]]): A list of tuples containing
language and progress percentage.
Returns:
None
""" # noqa: D205
with open("README.md", encoding="utf-8") as file:
content = file.readlines()
for i, line in enumerate(content[2:], start=2):
for progress in progress_list:
language, value = progress
if language in line:
if match := re.search(r"\!\[(\d+(\.\d+)?)%\]\(.*\)", line):
content[i] = line.replace(
match.group(0),
f"![{value}%](https://geps.dev/progress/{value})",
)
with open("README.md", "w", encoding="utf-8", newline="\n") as file:
file.writelines(content)
def parse_json_file(file_path):
"""
Parses a JSON translation file and returns a flat dictionary of all keys.
:param file_path: Path to the JSON file.
:return: Dictionary with flattened keys and values.
"""
with open(file_path, "r", encoding="utf-8") as file:
data = json.load(file)
def flatten_dict(d, parent_key="", sep="."):
items = {}
for k, v in d.items():
new_key = f"{parent_key}{sep}{k}" if parent_key else k
if isinstance(v, dict):
items.update(flatten_dict(v, new_key, sep=sep))
else:
items[new_key] = v
return items
return flatten_dict(data)
def compare_files(
default_file_path, file_paths, ignore_translation_file
) -> list[tuple[str, int]]:
"""Compares the default JSON translation file with other
translation files in the locales directory.
Parameters:
default_file_path (str): The path to the default translation JSON file.
file_paths (list): List of paths to translation JSON files.
ignore_translation_file (str): Path to the TOML file with ignore rules.
Returns:
list[tuple[str, int]]: A list of tuples containing
language and progress percentage.
""" # noqa: D205
default_keys = parse_json_file(default_file_path)
num_keys = len(default_keys)
result_list = []
sort_ignore_translation: tomlkit.TOMLDocument
# read toml
with open(ignore_translation_file, encoding="utf-8") as f:
sort_ignore_translation = tomlkit.parse(f.read())
for file_path in file_paths:
# Extract language code from directory name
locale_dir = os.path.basename(os.path.dirname(file_path))
# Convert locale format from hyphen to underscore for TOML compatibility
# e.g., en-GB -> en_GB, sr-LATN-RS -> sr_LATN_RS
language = locale_dir.replace("-", "_")
fails = 0
if language in ["en_GB", "en_US"]:
result_list.append(("en_GB", 100))
result_list.append(("en_US", 100))
continue
if language not in sort_ignore_translation:
sort_ignore_translation[language] = tomlkit.table()
if (
"ignore" not in sort_ignore_translation[language]
or len(sort_ignore_translation[language].get("ignore", [])) < 1
):
sort_ignore_translation[language]["ignore"] = tomlkit.array(
["language.direction"]
)
current_keys = parse_json_file(file_path)
# Compare keys
for default_key, default_value in default_keys.items():
if default_key not in current_keys:
# Key is missing entirely
if default_key not in sort_ignore_translation[language]["ignore"]:
print(f"{language}: Key '{default_key}' is missing.")
fails += 1
elif (
default_value == current_keys[default_key]
and default_key not in sort_ignore_translation[language]["ignore"]
):
# Key exists but value is untranslated (same as reference)
print(f"{language}: Key '{default_key}' is missing the translation.")
fails += 1
elif default_value != current_keys[default_key]:
# Key is translated, remove from ignore list if present
if default_key in sort_ignore_translation[language]["ignore"]:
sort_ignore_translation[language]["ignore"].remove(default_key)
print(f"{language}: {fails} out of {num_keys} keys are not translated.")
result_list.append(
(
language,
int((num_keys - fails) * 100 / num_keys),
)
)
ignore_translation = convert_to_multiline(sort_ignore_translation)
with open(ignore_translation_file, "w", encoding="utf-8", newline="\n") as file:
file.write(tomlkit.dumps(ignore_translation))
unique_data = list(set(result_list))
unique_data.sort(key=lambda x: x[1], reverse=True)
return unique_data
if __name__ == "__main__":
directory = os.path.join(os.getcwd(), "frontend", "public", "locales")
translation_file_paths = glob.glob(os.path.join(directory, "*", "translation.json"))
reference_file = os.path.join(directory, "en-GB", "translation.json")
scripts_directory = os.path.join(os.getcwd(), "scripts")
translation_state_file = os.path.join(scripts_directory, "ignore_translation.toml")
write_readme(
compare_files(reference_file, translation_file_paths, translation_state_file)
)