Stirling-PDF/scripts/counter_translation_v3.py
Anthony Stirling 65a3eeca76
Toml (#5115)
# Description of Changes

<!--
Please provide a summary of the changes, including:

- What was changed
- Why the change was made
- Any challenges encountered

Closes #(issue_number)
-->

---

## Checklist

### General

- [ ] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [ ] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [ ] I have performed a self-review of my own code
- [ ] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### Translations (if applicable)

- [ ] I ran
[`scripts/counter_translation.py`](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/docs/counter_translation.md)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.
2025-12-03 09:57:00 +00:00

205 lines
7.4 KiB
Python

"""A script to update language progress status in README.md based on
TOML translation file comparison.
This script compares the default translation TOML file with others in the locales directory to
determine language progress.
It then updates README.md based on provided progress list.
Author: Ludy87
Updated for TOML format
Example:
To use this script, simply run it from command line:
$ python counter_translation_v3.py
""" # noqa: D205
import glob
import os
import re
import tomlkit
import tomlkit.toml_file
def convert_to_multiline(data: tomlkit.TOMLDocument) -> tomlkit.TOMLDocument:
"""Converts 'ignore' and 'missing' arrays to multiline arrays and sorts the first-level keys of the TOML document.
Enhances readability and consistency in the TOML file by ensuring arrays contain unique and sorted entries.
Parameters:
data (tomlkit.TOMLDocument): The original TOML document containing the data.
Returns:
tomlkit.TOMLDocument: A new TOML document with sorted keys and properly formatted arrays.
""" # noqa: D205
sorted_data = tomlkit.document()
for key in sorted(data.keys()):
value = data[key]
if isinstance(value, dict):
new_table = tomlkit.table()
for subkey in ("ignore", "missing"):
if subkey in value:
# Convert the list to a set to remove duplicates, sort it, and convert to multiline for readability
unique_sorted_array = sorted(set(value[subkey]))
array = tomlkit.array()
array.multiline(True)
for item in unique_sorted_array:
array.append(item)
new_table[subkey] = array
sorted_data[key] = new_table
else:
# Add other types of data unchanged
sorted_data[key] = value
return sorted_data
def write_readme(progress_list: list[tuple[str, int]]) -> None:
"""Updates the progress status in the README.md file based
on the provided progress list.
Parameters:
progress_list (list[tuple[str, int]]): A list of tuples containing
language and progress percentage.
Returns:
None
""" # noqa: D205
with open("README.md", encoding="utf-8") as file:
content = file.readlines()
for i, line in enumerate(content[2:], start=2):
for progress in progress_list:
language, value = progress
if language in line:
if match := re.search(r"\!\[(\d+(\.\d+)?)%\]\(.*\)", line):
content[i] = line.replace(
match.group(0),
f"![{value}%](https://geps.dev/progress/{value})",
)
with open("README.md", "w", encoding="utf-8", newline="\n") as file:
file.writelines(content)
def parse_toml_file(file_path):
"""
Parses a TOML translation file and returns a flat dictionary of all keys.
:param file_path: Path to the TOML file.
:return: Dictionary with flattened keys and values.
"""
with open(file_path, "r", encoding="utf-8") as file:
data = tomlkit.parse(file.read())
def flatten_dict(d, parent_key="", sep="."):
items = {}
for k, v in d.items():
new_key = f"{parent_key}{sep}{k}" if parent_key else k
if isinstance(v, dict):
items.update(flatten_dict(v, new_key, sep=sep))
else:
items[new_key] = v
return items
return flatten_dict(data)
def compare_files(
default_file_path, file_paths, ignore_translation_file
) -> list[tuple[str, int]]:
"""Compares the default TOML translation file with other
translation files in the locales directory.
Parameters:
default_file_path (str): The path to the default translation TOML file.
file_paths (list): List of paths to translation TOML files.
ignore_translation_file (str): Path to the TOML file with ignore rules.
Returns:
list[tuple[str, int]]: A list of tuples containing
language and progress percentage.
""" # noqa: D205
default_keys = parse_toml_file(default_file_path)
num_keys = len(default_keys)
result_list = []
sort_ignore_translation: tomlkit.TOMLDocument
# read toml
with open(ignore_translation_file, encoding="utf-8") as f:
sort_ignore_translation = tomlkit.parse(f.read())
for file_path in file_paths:
# Extract language code from directory name
locale_dir = os.path.basename(os.path.dirname(file_path))
# Convert locale format from hyphen to underscore for TOML compatibility
# e.g., en-GB -> en_GB, sr-LATN-RS -> sr_LATN_RS
language = locale_dir.replace("-", "_")
fails = 0
if language in ["en_GB", "en_US"]:
result_list.append(("en_GB", 100))
result_list.append(("en_US", 100))
continue
if language not in sort_ignore_translation:
sort_ignore_translation[language] = tomlkit.table()
if (
"ignore" not in sort_ignore_translation[language]
or len(sort_ignore_translation[language].get("ignore", [])) < 1
):
sort_ignore_translation[language]["ignore"] = tomlkit.array(
["language.direction"]
)
current_keys = parse_toml_file(file_path)
# Compare keys
for default_key, default_value in default_keys.items():
if default_key not in current_keys:
# Key is missing entirely
if default_key not in sort_ignore_translation[language]["ignore"]:
print(f"{language}: Key '{default_key}' is missing.")
fails += 1
elif (
default_value == current_keys[default_key]
and default_key not in sort_ignore_translation[language]["ignore"]
):
# Key exists but value is untranslated (same as reference)
print(f"{language}: Key '{default_key}' is missing the translation.")
fails += 1
elif default_value != current_keys[default_key]:
# Key is translated, remove from ignore list if present
if default_key in sort_ignore_translation[language]["ignore"]:
sort_ignore_translation[language]["ignore"].remove(default_key)
print(f"{language}: {fails} out of {num_keys} keys are not translated.")
result_list.append(
(
language,
int((num_keys - fails) * 100 / num_keys),
)
)
ignore_translation = convert_to_multiline(sort_ignore_translation)
with open(ignore_translation_file, "w", encoding="utf-8", newline="\n") as file:
file.write(tomlkit.dumps(ignore_translation))
unique_data = list(set(result_list))
unique_data.sort(key=lambda x: x[1], reverse=True)
return unique_data
if __name__ == "__main__":
directory = os.path.join(os.getcwd(), "frontend", "public", "locales")
translation_file_paths = glob.glob(os.path.join(directory, "*", "translation.toml"))
reference_file = os.path.join(directory, "en-GB", "translation.toml")
scripts_directory = os.path.join(os.getcwd(), "scripts")
translation_state_file = os.path.join(scripts_directory, "ignore_translation.toml")
write_readme(
compare_files(reference_file, translation_file_paths, translation_state_file)
)