mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2026-03-04 02:20:19 +01:00
Toml (#5115)
# Description of Changes <!-- Please provide a summary of the changes, including: - What was changed - Why the change was made - Any challenges encountered Closes #(issue_number) --> --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### Translations (if applicable) - [ ] I ran [`scripts/counter_translation.py`](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/docs/counter_translation.md) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details.
This commit is contained in:
@@ -1,113 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Convert Java .properties files to JSON for react-i18next
|
||||
Preserves hierarchical structure and handles special cases
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
def properties_to_dict(file_path):
|
||||
"""Convert .properties file to nested dictionary"""
|
||||
result = {}
|
||||
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
for line_num, line in enumerate(f, 1):
|
||||
line = line.strip()
|
||||
|
||||
# Skip empty lines and comments
|
||||
if not line or line.startswith('#'):
|
||||
continue
|
||||
|
||||
# Handle key=value pairs
|
||||
if '=' in line:
|
||||
key, value = line.split('=', 1)
|
||||
key = key.strip()
|
||||
value = value.strip()
|
||||
|
||||
# Handle multiline values (ending with \)
|
||||
while value.endswith('\\'):
|
||||
next_line = next(f, '').strip()
|
||||
value = value[:-1] + next_line
|
||||
|
||||
# Create nested structure from dot notation
|
||||
set_nested_value(result, key, value)
|
||||
|
||||
return result
|
||||
|
||||
def set_nested_value(dictionary, key_path, value):
|
||||
"""Set value in nested dictionary using dot notation"""
|
||||
keys = key_path.split('.')
|
||||
current = dictionary
|
||||
|
||||
for key in keys[:-1]:
|
||||
if key not in current:
|
||||
current[key] = {}
|
||||
elif not isinstance(current[key], dict):
|
||||
# Convert existing string value to nested object
|
||||
old_value = current[key]
|
||||
current[key] = {"_value": old_value}
|
||||
current = current[key]
|
||||
|
||||
final_key = keys[-1]
|
||||
if final_key in current and isinstance(current[final_key], dict):
|
||||
# If the final key already exists as an object, store the value under "_value"
|
||||
current[final_key]["_value"] = value
|
||||
else:
|
||||
current[final_key] = value
|
||||
|
||||
def convert_all_properties():
|
||||
"""Convert all messages_*.properties files to JSON"""
|
||||
|
||||
# Get project root
|
||||
script_dir = Path(__file__).parent
|
||||
project_root = script_dir.parent
|
||||
resources_dir = project_root / 'src' / 'main' / 'resources'
|
||||
output_dir = project_root / 'frontend' / 'public' / 'locales'
|
||||
|
||||
# Create output directory
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Find all .properties files
|
||||
properties_files = list(resources_dir.glob('messages*.properties'))
|
||||
|
||||
converted_count = 0
|
||||
|
||||
for props_file in properties_files:
|
||||
# Extract locale from filename
|
||||
filename = props_file.name
|
||||
if filename == 'messages.properties':
|
||||
locale = 'en' # Default locale
|
||||
else:
|
||||
# Extract locale from messages_en_US.properties format
|
||||
locale_match = re.match(r'messages_(.+)\.properties', filename)
|
||||
if locale_match:
|
||||
locale = locale_match.group(1)
|
||||
# Convert Java locale format to standard (en_US -> en-US)
|
||||
locale = locale.replace('_', '-')
|
||||
else:
|
||||
continue
|
||||
|
||||
print(f"Converting {filename} -> {locale}.json")
|
||||
|
||||
# Convert to dictionary
|
||||
data = properties_to_dict(props_file)
|
||||
|
||||
# Create locale directory
|
||||
locale_dir = output_dir / locale
|
||||
locale_dir.mkdir(exist_ok=True)
|
||||
|
||||
# Write translation.json (react-i18next default namespace)
|
||||
output_file = locale_dir / 'translation.json'
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
converted_count += 1
|
||||
|
||||
print(f"\nConverted {converted_count} language files to {output_dir}")
|
||||
print("Languages available:", [d.name for d in output_dir.iterdir() if d.is_dir()])
|
||||
|
||||
if __name__ == '__main__':
|
||||
convert_all_properties()
|
||||
@@ -1,219 +0,0 @@
|
||||
"""A script to update language progress status in README.md based on
|
||||
properties file comparison.
|
||||
|
||||
This script compares default properties file with others in a directory to
|
||||
determine language progress.
|
||||
It then updates README.md based on provided progress list.
|
||||
|
||||
Author: Ludy87
|
||||
|
||||
Example:
|
||||
To use this script, simply run it from command line:
|
||||
$ python counter_translation.py
|
||||
""" # noqa: D205
|
||||
|
||||
import glob
|
||||
import os
|
||||
import re
|
||||
|
||||
import tomlkit
|
||||
import tomlkit.toml_file
|
||||
|
||||
|
||||
def convert_to_multiline(data: tomlkit.TOMLDocument) -> tomlkit.TOMLDocument:
|
||||
"""Converts 'ignore' and 'missing' arrays to multiline arrays and sorts the first-level keys of the TOML document.
|
||||
Enhances readability and consistency in the TOML file by ensuring arrays contain unique and sorted entries.
|
||||
|
||||
Parameters:
|
||||
data (tomlkit.TOMLDocument): The original TOML document containing the data.
|
||||
|
||||
Returns:
|
||||
tomlkit.TOMLDocument: A new TOML document with sorted keys and properly formatted arrays.
|
||||
""" # noqa: D205
|
||||
sorted_data = tomlkit.document()
|
||||
for key in sorted(data.keys()):
|
||||
value = data[key]
|
||||
if isinstance(value, dict):
|
||||
new_table = tomlkit.table()
|
||||
for subkey in ("ignore", "missing"):
|
||||
if subkey in value:
|
||||
# Convert the list to a set to remove duplicates, sort it, and convert to multiline for readability
|
||||
unique_sorted_array = sorted(set(value[subkey]))
|
||||
array = tomlkit.array()
|
||||
array.multiline(True)
|
||||
for item in unique_sorted_array:
|
||||
array.append(item)
|
||||
new_table[subkey] = array
|
||||
sorted_data[key] = new_table
|
||||
else:
|
||||
# Add other types of data unchanged
|
||||
sorted_data[key] = value
|
||||
return sorted_data
|
||||
|
||||
|
||||
def write_readme(progress_list: list[tuple[str, int]]) -> None:
|
||||
"""Updates the progress status in the README.md file based
|
||||
on the provided progress list.
|
||||
|
||||
Parameters:
|
||||
progress_list (list[tuple[str, int]]): A list of tuples containing
|
||||
language and progress percentage.
|
||||
|
||||
Returns:
|
||||
None
|
||||
""" # noqa: D205
|
||||
with open("README.md", encoding="utf-8") as file:
|
||||
content = file.readlines()
|
||||
|
||||
for i, line in enumerate(content[2:], start=2):
|
||||
for progress in progress_list:
|
||||
language, value = progress
|
||||
if language in line:
|
||||
if match := re.search(r"\!\[(\d+(\.\d+)?)%\]\(.*\)", line):
|
||||
content[i] = line.replace(
|
||||
match.group(0),
|
||||
f"",
|
||||
)
|
||||
|
||||
with open("README.md", "w", encoding="utf-8", newline="\n") as file:
|
||||
file.writelines(content)
|
||||
|
||||
|
||||
def compare_files(
|
||||
default_file_path, file_paths, ignore_translation_file
|
||||
) -> list[tuple[str, int]]:
|
||||
"""Compares the default properties file with other
|
||||
properties files in the directory.
|
||||
|
||||
Parameters:
|
||||
default_file_path (str): The path to the default properties file.
|
||||
files_directory (str): The directory containing other properties files.
|
||||
|
||||
Returns:
|
||||
list[tuple[str, int]]: A list of tuples containing
|
||||
language and progress percentage.
|
||||
""" # noqa: D205
|
||||
num_lines = sum(
|
||||
1
|
||||
for line in open(default_file_path, encoding="utf-8")
|
||||
if line.strip() and not line.strip().startswith("#")
|
||||
)
|
||||
|
||||
result_list = []
|
||||
sort_ignore_translation: tomlkit.TOMLDocument
|
||||
|
||||
# read toml
|
||||
with open(ignore_translation_file, encoding="utf-8") as f:
|
||||
sort_ignore_translation = tomlkit.parse(f.read())
|
||||
|
||||
for file_path in file_paths:
|
||||
language = (
|
||||
os.path.basename(file_path)
|
||||
.split("messages_", 1)[1]
|
||||
.split(".properties", 1)[0]
|
||||
)
|
||||
|
||||
fails = 0
|
||||
if "en_GB" in language or "en_US" in language:
|
||||
result_list.append(("en_GB", 100))
|
||||
result_list.append(("en_US", 100))
|
||||
continue
|
||||
|
||||
if language not in sort_ignore_translation:
|
||||
sort_ignore_translation[language] = tomlkit.table()
|
||||
|
||||
if (
|
||||
"ignore" not in sort_ignore_translation[language]
|
||||
or len(sort_ignore_translation[language].get("ignore", [])) < 1
|
||||
):
|
||||
sort_ignore_translation[language]["ignore"] = tomlkit.array(
|
||||
["language.direction"]
|
||||
)
|
||||
|
||||
# if "missing" not in sort_ignore_translation[language]:
|
||||
# sort_ignore_translation[language]["missing"] = tomlkit.array()
|
||||
# elif "language.direction" in sort_ignore_translation[language]["missing"]:
|
||||
# sort_ignore_translation[language]["missing"].remove("language.direction")
|
||||
|
||||
with (
|
||||
open(default_file_path, encoding="utf-8") as default_file,
|
||||
open(file_path, encoding="utf-8") as file,
|
||||
):
|
||||
for _ in range(5):
|
||||
next(default_file)
|
||||
try:
|
||||
next(file)
|
||||
except StopIteration:
|
||||
fails = num_lines
|
||||
|
||||
for line_num, (line_default, line_file) in enumerate(
|
||||
zip(default_file, file), start=6
|
||||
):
|
||||
try:
|
||||
# Ignoring empty lines and lines start with #
|
||||
if line_default.strip() == "" or line_default.startswith("#"):
|
||||
continue
|
||||
default_key, default_value = line_default.split("=", 1)
|
||||
file_key, file_value = line_file.split("=", 1)
|
||||
if (
|
||||
default_value.strip() == file_value.strip()
|
||||
and default_key.strip()
|
||||
not in sort_ignore_translation[language]["ignore"]
|
||||
):
|
||||
print(
|
||||
f"{language}: Line {line_num} is missing the translation."
|
||||
)
|
||||
# if default_key.strip() not in sort_ignore_translation[language]["missing"]:
|
||||
# missing_array = tomlkit.array()
|
||||
# missing_array.append(default_key.strip())
|
||||
# missing_array.multiline(True)
|
||||
# sort_ignore_translation[language]["missing"].extend(missing_array)
|
||||
fails += 1
|
||||
# elif default_key.strip() in sort_ignore_translation[language]["ignore"]:
|
||||
# if default_key.strip() in sort_ignore_translation[language]["missing"]:
|
||||
# sort_ignore_translation[language]["missing"].remove(default_key.strip())
|
||||
if default_value.strip() != file_value.strip():
|
||||
# if default_key.strip() in sort_ignore_translation[language]["missing"]:
|
||||
# sort_ignore_translation[language]["missing"].remove(default_key.strip())
|
||||
if (
|
||||
default_key.strip()
|
||||
in sort_ignore_translation[language]["ignore"]
|
||||
):
|
||||
sort_ignore_translation[language]["ignore"].remove(
|
||||
default_key.strip()
|
||||
)
|
||||
except ValueError as e:
|
||||
print(f"Error processing line {line_num} in {file_path}: {e}")
|
||||
print(f"{line_default}|{line_file}")
|
||||
exit(1)
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
print(f"{language}: {fails} out of {num_lines} lines are not translated.")
|
||||
result_list.append(
|
||||
(
|
||||
language,
|
||||
int((num_lines - fails) * 100 / num_lines),
|
||||
)
|
||||
)
|
||||
ignore_translation = convert_to_multiline(sort_ignore_translation)
|
||||
with open(ignore_translation_file, "w", encoding="utf-8", newline="\n") as file:
|
||||
file.write(tomlkit.dumps(ignore_translation))
|
||||
|
||||
unique_data = list(set(result_list))
|
||||
unique_data.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
return unique_data
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
directory = os.path.join(os.getcwd(), "app", "core", "src", "main", "resources")
|
||||
messages_file_paths = glob.glob(os.path.join(directory, "messages_*.properties"))
|
||||
reference_file = os.path.join(directory, "messages_en_GB.properties")
|
||||
|
||||
scripts_directory = os.path.join(os.getcwd(), "scripts")
|
||||
translation_state_file = os.path.join(scripts_directory, "ignore_translation.toml")
|
||||
|
||||
write_readme(
|
||||
compare_files(reference_file, messages_file_paths, translation_state_file)
|
||||
)
|
||||
@@ -1,21 +1,21 @@
|
||||
"""A script to update language progress status in README.md based on
|
||||
JSON translation file comparison.
|
||||
TOML translation file comparison.
|
||||
|
||||
This script compares the default translation JSON file with others in the locales directory to
|
||||
This script compares the default translation TOML file with others in the locales directory to
|
||||
determine language progress.
|
||||
It then updates README.md based on provided progress list.
|
||||
|
||||
Author: Ludy87
|
||||
Updated for TOML format
|
||||
|
||||
Example:
|
||||
To use this script, simply run it from command line:
|
||||
$ python counter_translation_v2.py
|
||||
$ python counter_translation_v3.py
|
||||
""" # noqa: D205
|
||||
|
||||
import glob
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
|
||||
import tomlkit
|
||||
import tomlkit.toml_file
|
||||
@@ -80,14 +80,14 @@ def write_readme(progress_list: list[tuple[str, int]]) -> None:
|
||||
file.writelines(content)
|
||||
|
||||
|
||||
def parse_json_file(file_path):
|
||||
def parse_toml_file(file_path):
|
||||
"""
|
||||
Parses a JSON translation file and returns a flat dictionary of all keys.
|
||||
:param file_path: Path to the JSON file.
|
||||
Parses a TOML translation file and returns a flat dictionary of all keys.
|
||||
:param file_path: Path to the TOML file.
|
||||
:return: Dictionary with flattened keys and values.
|
||||
"""
|
||||
with open(file_path, "r", encoding="utf-8") as file:
|
||||
data = json.load(file)
|
||||
data = tomlkit.parse(file.read())
|
||||
|
||||
def flatten_dict(d, parent_key="", sep="."):
|
||||
items = {}
|
||||
@@ -105,19 +105,19 @@ def parse_json_file(file_path):
|
||||
def compare_files(
|
||||
default_file_path, file_paths, ignore_translation_file
|
||||
) -> list[tuple[str, int]]:
|
||||
"""Compares the default JSON translation file with other
|
||||
"""Compares the default TOML translation file with other
|
||||
translation files in the locales directory.
|
||||
|
||||
Parameters:
|
||||
default_file_path (str): The path to the default translation JSON file.
|
||||
file_paths (list): List of paths to translation JSON files.
|
||||
default_file_path (str): The path to the default translation TOML file.
|
||||
file_paths (list): List of paths to translation TOML files.
|
||||
ignore_translation_file (str): Path to the TOML file with ignore rules.
|
||||
|
||||
Returns:
|
||||
list[tuple[str, int]]: A list of tuples containing
|
||||
language and progress percentage.
|
||||
""" # noqa: D205
|
||||
default_keys = parse_json_file(default_file_path)
|
||||
default_keys = parse_toml_file(default_file_path)
|
||||
num_keys = len(default_keys)
|
||||
|
||||
result_list = []
|
||||
@@ -152,7 +152,7 @@ def compare_files(
|
||||
["language.direction"]
|
||||
)
|
||||
|
||||
current_keys = parse_json_file(file_path)
|
||||
current_keys = parse_toml_file(file_path)
|
||||
|
||||
# Compare keys
|
||||
for default_key, default_value in default_keys.items():
|
||||
@@ -193,12 +193,12 @@ def compare_files(
|
||||
|
||||
if __name__ == "__main__":
|
||||
directory = os.path.join(os.getcwd(), "frontend", "public", "locales")
|
||||
translation_file_paths = glob.glob(os.path.join(directory, "*", "translation.json"))
|
||||
reference_file = os.path.join(directory, "en-GB", "translation.json")
|
||||
translation_file_paths = glob.glob(os.path.join(directory, "*", "translation.toml"))
|
||||
reference_file = os.path.join(directory, "en-GB", "translation.toml")
|
||||
|
||||
scripts_directory = os.path.join(os.getcwd(), "scripts")
|
||||
translation_state_file = os.path.join(scripts_directory, "ignore_translation.toml")
|
||||
|
||||
write_readme(
|
||||
compare_files(reference_file, translation_file_paths, translation_state_file)
|
||||
)
|
||||
)
|
||||
@@ -2,6 +2,12 @@
|
||||
|
||||
This directory contains Python scripts for managing frontend translations in Stirling PDF. These tools help analyze, merge, validate, and manage translations against the en-GB golden truth file.
|
||||
|
||||
## Current Format: TOML
|
||||
|
||||
**Stirling PDF uses TOML format for translations** in `frontend/public/locales/{lang}/translation.toml`.
|
||||
|
||||
**All scripts now support TOML format!**
|
||||
|
||||
## Quick Start - Automated Translation (RECOMMENDED)
|
||||
|
||||
The **fastest and easiest way** to translate a language is using the automated pipeline:
|
||||
@@ -451,18 +457,15 @@ python scripts/translations/translation_merger.py fr-FR apply-translations --tra
|
||||
|
||||
## Translation File Structure
|
||||
|
||||
Translation files are located in `frontend/public/locales/{language}/translation.json` with nested JSON structure:
|
||||
Translation files are located in `frontend/public/locales/{language}/translation.toml` with TOML structure:
|
||||
|
||||
```json
|
||||
{
|
||||
"addPageNumbers": {
|
||||
"title": "Add Page Numbers",
|
||||
"selectText": {
|
||||
"1": "Select PDF file:",
|
||||
"2": "Margin Size"
|
||||
}
|
||||
}
|
||||
}
|
||||
```toml
|
||||
[addPageNumbers]
|
||||
title = "Add Page Numbers"
|
||||
|
||||
[addPageNumbers.selectText]
|
||||
"1" = "Select PDF file:"
|
||||
"2" = "Margin Size"
|
||||
```
|
||||
|
||||
Keys use dot notation internally (e.g., `addPageNumbers.selectText.1`).
|
||||
@@ -478,7 +481,7 @@ All scripts preserve placeholders like `{n}`, `{total}`, `{filename}` in transla
|
||||
### Automatic Backups
|
||||
Scripts create timestamped backups before modifying files:
|
||||
```
|
||||
translation.backup.20241201_143022.json
|
||||
translation.backup.20241201_143022.toml
|
||||
```
|
||||
|
||||
### Context-Aware Translation
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
AI Translation Helper for Stirling PDF Frontend
|
||||
Provides utilities for AI-assisted translation workflows including
|
||||
batch processing, quality checks, and integration helpers.
|
||||
TOML format only.
|
||||
"""
|
||||
|
||||
import json
|
||||
@@ -14,31 +15,33 @@ import argparse
|
||||
import re
|
||||
from datetime import datetime
|
||||
import csv
|
||||
import tomllib
|
||||
import tomli_w
|
||||
|
||||
|
||||
class AITranslationHelper:
|
||||
def __init__(self, locales_dir: str = "frontend/public/locales"):
|
||||
self.locales_dir = Path(locales_dir)
|
||||
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.json"
|
||||
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.toml"
|
||||
|
||||
def _load_json(self, file_path: Path) -> Dict:
|
||||
"""Load JSON file with error handling."""
|
||||
def _load_translation_file(self, file_path: Path) -> Dict:
|
||||
"""Load TOML translation file."""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except (FileNotFoundError, json.JSONDecodeError) as e:
|
||||
with open(file_path, 'rb') as f:
|
||||
return tomllib.load(f)
|
||||
except (FileNotFoundError, Exception) as e:
|
||||
print(f"Error loading {file_path}: {e}")
|
||||
return {}
|
||||
|
||||
def _save_json(self, data: Dict, file_path: Path) -> None:
|
||||
"""Save JSON file."""
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
def _save_translation_file(self, data: Dict, file_path: Path) -> None:
|
||||
"""Save TOML translation file."""
|
||||
with open(file_path, 'wb') as f:
|
||||
tomli_w.dump(data, f)
|
||||
|
||||
def create_ai_batch_file(self, languages: List[str], output_file: Path,
|
||||
max_entries_per_language: int = 50) -> None:
|
||||
"""Create a batch file for AI translation with multiple languages."""
|
||||
golden_truth = self._load_json(self.golden_truth_file)
|
||||
golden_truth = self._load_translation_file(self.golden_truth_file)
|
||||
batch_data = {
|
||||
'metadata': {
|
||||
'created_at': datetime.now().isoformat(),
|
||||
@@ -56,12 +59,14 @@ class AITranslationHelper:
|
||||
}
|
||||
|
||||
for lang in languages:
|
||||
lang_file = self.locales_dir / lang / "translation.json"
|
||||
if not lang_file.exists():
|
||||
# Create empty translation structure
|
||||
lang_data = {}
|
||||
lang_dir = self.locales_dir / lang
|
||||
toml_file = lang_dir / "translation.toml"
|
||||
|
||||
if toml_file.exists():
|
||||
lang_data = self._load_translation_file(toml_file)
|
||||
else:
|
||||
lang_data = self._load_json(lang_file)
|
||||
# No translation file found, create empty structure
|
||||
lang_data = {}
|
||||
|
||||
# Find untranslated entries
|
||||
untranslated = self._find_untranslated_entries(golden_truth, lang_data)
|
||||
@@ -79,7 +84,9 @@ class AITranslationHelper:
|
||||
'context': self._get_key_context(key)
|
||||
}
|
||||
|
||||
self._save_json(batch_data, output_file)
|
||||
# Always save batch files as JSON for compatibility
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(batch_data, f, indent=2, ensure_ascii=False)
|
||||
total_entries = sum(len(lang_data) for lang_data in batch_data['translations'].values())
|
||||
print(f"Created AI batch file: {output_file}")
|
||||
print(f"Total entries to translate: {total_entries}")
|
||||
@@ -173,7 +180,9 @@ class AITranslationHelper:
|
||||
|
||||
def validate_ai_translations(self, batch_file: Path) -> Dict[str, List[str]]:
|
||||
"""Validate AI translations for common issues."""
|
||||
batch_data = self._load_json(batch_file)
|
||||
# Batch files are always JSON
|
||||
with open(batch_file, 'r', encoding='utf-8') as f:
|
||||
batch_data = json.load(f)
|
||||
issues = {'errors': [], 'warnings': []}
|
||||
|
||||
for lang, translations in batch_data.get('translations', {}).items():
|
||||
@@ -209,7 +218,9 @@ class AITranslationHelper:
|
||||
|
||||
def apply_ai_batch_translations(self, batch_file: Path, validate: bool = True) -> Dict[str, Any]:
|
||||
"""Apply translations from AI batch file to individual language files."""
|
||||
batch_data = self._load_json(batch_file)
|
||||
# Batch files are always JSON
|
||||
with open(batch_file, 'r', encoding='utf-8') as f:
|
||||
batch_data = json.load(f)
|
||||
results = {'applied': {}, 'errors': [], 'warnings': []}
|
||||
|
||||
if validate:
|
||||
@@ -226,14 +237,15 @@ class AITranslationHelper:
|
||||
print(f" WARNING: {warning}")
|
||||
|
||||
for lang, translations in batch_data.get('translations', {}).items():
|
||||
lang_file = self.locales_dir / lang / "translation.json"
|
||||
lang_dir = self.locales_dir / lang
|
||||
toml_file = lang_dir / "translation.toml"
|
||||
|
||||
# Load existing data or create new
|
||||
if lang_file.exists():
|
||||
lang_data = self._load_json(lang_file)
|
||||
if toml_file.exists():
|
||||
lang_data = self._load_translation_file(toml_file)
|
||||
else:
|
||||
# No translation file found, create new TOML file
|
||||
lang_data = {}
|
||||
lang_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
lang_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
applied_count = 0
|
||||
for key, translation_data in translations.items():
|
||||
@@ -243,7 +255,7 @@ class AITranslationHelper:
|
||||
applied_count += 1
|
||||
|
||||
if applied_count > 0:
|
||||
self._save_json(lang_data, lang_file)
|
||||
self._save_translation_file(lang_data, toml_file)
|
||||
results['applied'][lang] = applied_count
|
||||
print(f"Applied {applied_count} translations to {lang}")
|
||||
|
||||
@@ -265,7 +277,7 @@ class AITranslationHelper:
|
||||
|
||||
def export_for_external_translation(self, languages: List[str], output_format: str = 'csv') -> None:
|
||||
"""Export translations for external translation services."""
|
||||
golden_truth = self._load_json(self.golden_truth_file)
|
||||
golden_truth = self._load_translation_file(self.golden_truth_file)
|
||||
golden_flat = self._flatten_dict(golden_truth)
|
||||
|
||||
if output_format == 'csv':
|
||||
@@ -287,9 +299,11 @@ class AITranslationHelper:
|
||||
}
|
||||
|
||||
for lang in languages:
|
||||
lang_file = self.locales_dir / lang / "translation.json"
|
||||
if lang_file.exists():
|
||||
lang_data = self._load_json(lang_file)
|
||||
lang_dir = self.locales_dir / lang
|
||||
toml_file = lang_dir / "translation.toml"
|
||||
|
||||
if toml_file.exists():
|
||||
lang_data = self._load_translation_file(toml_file)
|
||||
lang_flat = self._flatten_dict(lang_data)
|
||||
value = lang_flat.get(key, '')
|
||||
if value.startswith('[UNTRANSLATED]'):
|
||||
@@ -316,21 +330,28 @@ class AITranslationHelper:
|
||||
}
|
||||
|
||||
for lang in languages:
|
||||
lang_file = self.locales_dir / lang / "translation.json"
|
||||
if lang_file.exists():
|
||||
lang_data = self._load_json(lang_file)
|
||||
lang_dir = self.locales_dir / lang
|
||||
toml_file = lang_dir / "translation.toml"
|
||||
|
||||
if toml_file.exists():
|
||||
lang_data = self._load_translation_file(toml_file)
|
||||
lang_flat = self._flatten_dict(lang_data)
|
||||
value = lang_flat.get(key, '')
|
||||
if value.startswith('[UNTRANSLATED]'):
|
||||
value = ''
|
||||
export_data['translations'][key][lang] = value
|
||||
|
||||
self._save_json(export_data, output_file)
|
||||
# Export files are always JSON
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(export_data, f, indent=2, ensure_ascii=False)
|
||||
print(f"Exported to {output_file}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='AI Translation Helper')
|
||||
parser = argparse.ArgumentParser(
|
||||
description='AI Translation Helper',
|
||||
epilog='Works with TOML translation files.'
|
||||
)
|
||||
parser.add_argument('--locales-dir', default='frontend/public/locales',
|
||||
help='Path to locales directory')
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
"""
|
||||
Automated Translation Pipeline
|
||||
Extracts, translates, merges, and beautifies translations for a language.
|
||||
TOML format only.
|
||||
"""
|
||||
|
||||
import json
|
||||
@@ -12,6 +13,8 @@ import subprocess
|
||||
from pathlib import Path
|
||||
import time
|
||||
|
||||
import tomllib
|
||||
|
||||
|
||||
def run_command(cmd, description=""):
|
||||
"""Run a shell command and return success status."""
|
||||
@@ -30,26 +33,34 @@ def run_command(cmd, description=""):
|
||||
return result.returncode == 0
|
||||
|
||||
|
||||
def find_translation_file(lang_dir):
|
||||
"""Find translation file in language directory."""
|
||||
toml_file = lang_dir / "translation.toml"
|
||||
if toml_file.exists():
|
||||
return toml_file
|
||||
return None
|
||||
|
||||
def load_translation_file(file_path):
|
||||
"""Load TOML translation file."""
|
||||
with open(file_path, 'rb') as f:
|
||||
return tomllib.load(f)
|
||||
|
||||
def extract_untranslated(language_code, batch_size=500):
|
||||
"""Extract untranslated entries and split into batches."""
|
||||
print(f"\n🔍 Extracting untranslated entries for {language_code}...")
|
||||
|
||||
# Load files
|
||||
golden_path = Path(f'frontend/public/locales/en-GB/translation.json')
|
||||
lang_path = Path(f'frontend/public/locales/{language_code}/translation.json')
|
||||
golden_path = find_translation_file(Path('frontend/public/locales/en-GB'))
|
||||
lang_path = find_translation_file(Path(f'frontend/public/locales/{language_code}'))
|
||||
|
||||
if not golden_path.exists():
|
||||
print(f"Error: Golden truth file not found: {golden_path}")
|
||||
if not golden_path:
|
||||
print(f"Error: Golden truth file not found in frontend/public/locales/en-GB")
|
||||
return None
|
||||
|
||||
if not lang_path.exists():
|
||||
print(f"Error: Language file not found: {lang_path}")
|
||||
if not lang_path:
|
||||
print(f"Error: Language file not found in frontend/public/locales/{language_code}")
|
||||
return None
|
||||
|
||||
def load_json(path):
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
|
||||
def flatten_dict(d, parent_key='', separator='.'):
|
||||
items = []
|
||||
for k, v in d.items():
|
||||
@@ -60,8 +71,12 @@ def extract_untranslated(language_code, batch_size=500):
|
||||
items.append((new_key, str(v)))
|
||||
return dict(items)
|
||||
|
||||
golden = load_json(golden_path)
|
||||
lang_data = load_json(lang_path)
|
||||
golden = load_translation_file(golden_path)
|
||||
lang_data = load_translation_file(lang_path)
|
||||
|
||||
if not golden or not lang_data:
|
||||
print(f"Error: Failed to load translation files")
|
||||
return None
|
||||
|
||||
golden_flat = flatten_dict(golden)
|
||||
lang_flat = flatten_dict(lang_data)
|
||||
@@ -186,7 +201,7 @@ def beautify_translations(language_code):
|
||||
"""Beautify translation file to match en-GB structure."""
|
||||
print(f"\n✨ Beautifying {language_code} translation file...")
|
||||
|
||||
cmd = f'python3 scripts/translations/json_beautifier.py --language {language_code}'
|
||||
cmd = f'python3 scripts/translations/toml_beautifier.py --language {language_code}'
|
||||
|
||||
if not run_command(cmd):
|
||||
print(f"✗ Failed to beautify translations")
|
||||
@@ -229,6 +244,8 @@ def main():
|
||||
description='Automated translation pipeline for Stirling PDF',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Note: This script works with TOML translation files.
|
||||
|
||||
Examples:
|
||||
# Translate Spanish with API key in environment
|
||||
export OPENAI_API_KEY=your_key_here
|
||||
|
||||
@@ -5,6 +5,8 @@ Automatically translates JSON batch files to target language while preserving:
|
||||
- Placeholders: {n}, {total}, {filename}, {{variable}}
|
||||
- HTML tags: <strong>, </strong>, etc.
|
||||
- Technical terms: PDF, API, OAuth2, SAML2, JWT, etc.
|
||||
|
||||
Note: Works with JSON batch files. Translation files can be TOML or JSON format.
|
||||
"""
|
||||
|
||||
import json
|
||||
@@ -206,9 +208,11 @@ def get_language_info(language_code: str) -> tuple:
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Translate JSON batch files using OpenAI API',
|
||||
description='Translate JSON batch files using OpenAI API (output supports TOML and JSON)',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Note: This script works with JSON batch files. The translation files it updates can be TOML or JSON.
|
||||
|
||||
Examples:
|
||||
# Translate single batch file
|
||||
python batch_translator.py zh_CN_batch_1_of_4.json --api-key YOUR_KEY --language zh-CN
|
||||
|
||||
305
scripts/translations/bulk_auto_translate.py
Normal file
305
scripts/translations/bulk_auto_translate.py
Normal file
@@ -0,0 +1,305 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Bulk Auto-Translate All Languages
|
||||
Automatically translates all languages in parallel using OpenAI API.
|
||||
Supports concurrent translation with configurable thread pool.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
import subprocess
|
||||
from typing import List, Tuple, Optional
|
||||
import threading
|
||||
|
||||
import tomllib
|
||||
|
||||
|
||||
# Thread-safe print lock
|
||||
print_lock = threading.Lock()
|
||||
|
||||
|
||||
def safe_print(*args, **kwargs):
|
||||
"""Thread-safe print function."""
|
||||
with print_lock:
|
||||
print(*args, **kwargs)
|
||||
|
||||
|
||||
def get_all_languages(locales_dir: Path) -> List[str]:
|
||||
"""Get all language codes from locales directory."""
|
||||
languages = []
|
||||
|
||||
if not locales_dir.exists():
|
||||
print(f"Error: Locales directory not found: {locales_dir}")
|
||||
return []
|
||||
|
||||
for lang_dir in sorted(locales_dir.iterdir()):
|
||||
if lang_dir.is_dir() and lang_dir.name != "en-GB":
|
||||
toml_file = lang_dir / "translation.toml"
|
||||
if toml_file.exists():
|
||||
languages.append(lang_dir.name)
|
||||
|
||||
return languages
|
||||
|
||||
|
||||
def get_language_completion(locales_dir: Path, language: str) -> Optional[float]:
|
||||
"""Get completion percentage for a language."""
|
||||
lang_dir = locales_dir / language
|
||||
toml_file = lang_dir / "translation.toml"
|
||||
|
||||
if not toml_file.exists():
|
||||
return None
|
||||
|
||||
try:
|
||||
with open(toml_file, 'rb') as f:
|
||||
target_data = tomllib.load(f)
|
||||
|
||||
# Load en-GB reference
|
||||
en_gb_file = locales_dir / 'en-GB' / 'translation.toml'
|
||||
with open(en_gb_file, 'rb') as f:
|
||||
en_gb_data = tomllib.load(f)
|
||||
|
||||
# Flatten and count
|
||||
def flatten(d, parent=''):
|
||||
items = {}
|
||||
for k, v in d.items():
|
||||
key = f"{parent}.{k}" if parent else k
|
||||
if isinstance(v, dict):
|
||||
items.update(flatten(v, key))
|
||||
else:
|
||||
items[key] = v
|
||||
return items
|
||||
|
||||
en_gb_flat = flatten(en_gb_data)
|
||||
target_flat = flatten(target_data)
|
||||
|
||||
# Count translated (not equal to en-GB)
|
||||
translated = sum(1 for k in en_gb_flat if k in target_flat and target_flat[k] != en_gb_flat[k])
|
||||
total = len(en_gb_flat)
|
||||
|
||||
return (translated / total * 100) if total > 0 else 0.0
|
||||
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not calculate completion for {language}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def translate_language(language: str, api_key: str, batch_size: int, timeout: int, skip_verification: bool) -> Tuple[str, bool, str]:
|
||||
"""
|
||||
Translate a single language.
|
||||
Returns: (language_code, success, message)
|
||||
"""
|
||||
safe_print(f"[{language}] Starting translation...")
|
||||
|
||||
cmd = [
|
||||
'python3', 'scripts/translations/auto_translate.py',
|
||||
language,
|
||||
'--api-key', api_key,
|
||||
'--batch-size', str(batch_size),
|
||||
'--timeout', str(timeout)
|
||||
]
|
||||
|
||||
if skip_verification:
|
||||
cmd.append('--skip-verification')
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout * 5 # Overall timeout = 5x per-batch timeout
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
# Check if nothing to translate
|
||||
if "Nothing to translate!" in result.stdout:
|
||||
safe_print(f"[{language}] ✓ Already complete")
|
||||
return (language, True, "Already complete")
|
||||
safe_print(f"[{language}] ✓ Success")
|
||||
return (language, True, "Success")
|
||||
else:
|
||||
error_msg = result.stderr.strip() or result.stdout.strip() or "Unknown error"
|
||||
safe_print(f"[{language}] ✗ Failed: {error_msg[:100]}")
|
||||
return (language, False, error_msg[:200]) # Truncate long errors
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
safe_print(f"[{language}] ✗ Timeout exceeded")
|
||||
return (language, False, "Timeout exceeded")
|
||||
except Exception as e:
|
||||
safe_print(f"[{language}] ✗ Error: {str(e)}")
|
||||
return (language, False, str(e))
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Bulk auto-translate all languages using OpenAI API',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Translate all languages with 10 parallel threads
|
||||
python3 bulk_auto_translate.py --parallel 10
|
||||
|
||||
# Translate only incomplete languages (< 95%)
|
||||
python3 bulk_auto_translate.py --parallel 5 --threshold 95
|
||||
|
||||
# Translate specific languages only
|
||||
python3 bulk_auto_translate.py --languages de-DE fr-FR es-ES --parallel 3
|
||||
|
||||
# Dry run to see what would be translated
|
||||
python3 bulk_auto_translate.py --dry-run
|
||||
|
||||
Note: Requires OPENAI_API_KEY environment variable or --api-key argument.
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('--api-key', help='OpenAI API key (or set OPENAI_API_KEY env var)')
|
||||
parser.add_argument('--parallel', type=int, default=1,
|
||||
help='Number of parallel translation threads (default: 1)')
|
||||
parser.add_argument('--batch-size', type=int, default=500,
|
||||
help='Entries per batch for translation (default: 500)')
|
||||
parser.add_argument('--timeout', type=int, default=600,
|
||||
help='Timeout per batch in seconds (default: 600)')
|
||||
parser.add_argument('--threshold', type=float, default=0.0,
|
||||
help='Only translate languages below this completion %% (default: 0 = all)')
|
||||
parser.add_argument('--languages', nargs='+',
|
||||
help='Translate only specific languages (e.g., de-DE fr-FR)')
|
||||
parser.add_argument('--locales-dir', default='frontend/public/locales',
|
||||
help='Path to locales directory')
|
||||
parser.add_argument('--skip-verification', action='store_true',
|
||||
help='Skip final completion verification for each language')
|
||||
parser.add_argument('--dry-run', action='store_true',
|
||||
help='Show what would be translated without actually translating')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Verify API key (unless dry run)
|
||||
api_key = args.api_key or os.environ.get('OPENAI_API_KEY')
|
||||
if not args.dry_run and not api_key:
|
||||
print("Error: OpenAI API key required. Provide via --api-key or OPENAI_API_KEY environment variable")
|
||||
sys.exit(1)
|
||||
|
||||
locales_dir = Path(args.locales_dir)
|
||||
|
||||
# Get languages to translate
|
||||
if args.languages:
|
||||
languages = args.languages
|
||||
print(f"Translating specified languages: {', '.join(languages)}")
|
||||
else:
|
||||
languages = get_all_languages(locales_dir)
|
||||
print(f"Found {len(languages)} languages (excluding en-GB)")
|
||||
|
||||
if not languages:
|
||||
print("No languages to translate!")
|
||||
sys.exit(0)
|
||||
|
||||
# Filter by completion threshold
|
||||
if args.threshold > 0:
|
||||
print(f"\nFiltering languages below {args.threshold}% completion...")
|
||||
filtered = []
|
||||
for lang in languages:
|
||||
completion = get_language_completion(locales_dir, lang)
|
||||
if completion is None:
|
||||
filtered.append(lang) # Include if can't determine
|
||||
print(f" {lang}: Unknown completion - will translate")
|
||||
elif completion < args.threshold:
|
||||
filtered.append(lang)
|
||||
print(f" {lang}: {completion:.1f}% - will translate")
|
||||
else:
|
||||
print(f" {lang}: {completion:.1f}% - skipping (above threshold)")
|
||||
|
||||
languages = filtered
|
||||
|
||||
if not languages:
|
||||
print("\nNo languages below threshold!")
|
||||
sys.exit(0)
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Bulk Translation Configuration")
|
||||
print(f"{'='*60}")
|
||||
print(f"Languages to translate: {len(languages)}")
|
||||
print(f"Parallel threads: {args.parallel}")
|
||||
print(f"Batch size: {args.batch_size}")
|
||||
print(f"Timeout per batch: {args.timeout}s")
|
||||
if args.threshold > 0:
|
||||
print(f"Completion threshold: {args.threshold}%")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
if args.dry_run:
|
||||
print("DRY RUN - Languages that would be translated:")
|
||||
for lang in languages:
|
||||
completion = get_language_completion(locales_dir, lang)
|
||||
comp_str = f"{completion:.1f}%" if completion is not None else "Unknown"
|
||||
print(f" - {lang} ({comp_str})")
|
||||
print(f"\nTotal: {len(languages)} languages")
|
||||
sys.exit(0)
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# Translate in parallel
|
||||
results = {
|
||||
'success': [],
|
||||
'failed': [],
|
||||
'already_complete': []
|
||||
}
|
||||
|
||||
with ThreadPoolExecutor(max_workers=args.parallel) as executor:
|
||||
futures = {
|
||||
executor.submit(
|
||||
translate_language,
|
||||
lang,
|
||||
api_key,
|
||||
args.batch_size,
|
||||
args.timeout,
|
||||
args.skip_verification
|
||||
): lang
|
||||
for lang in languages
|
||||
}
|
||||
|
||||
for future in as_completed(futures):
|
||||
language, success, message = future.result()
|
||||
|
||||
if success:
|
||||
if message == "Already complete":
|
||||
results['already_complete'].append(language)
|
||||
else:
|
||||
results['success'].append(language)
|
||||
else:
|
||||
results['failed'].append((language, message))
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
# Print summary
|
||||
print("\n" + "="*60)
|
||||
print("Bulk Translation Summary")
|
||||
print("="*60)
|
||||
print(f"Total languages: {len(languages)}")
|
||||
print(f"Successful: {len(results['success'])}")
|
||||
print(f"Already complete: {len(results['already_complete'])}")
|
||||
print(f"Failed: {len(results['failed'])}")
|
||||
print(f"Time elapsed: {elapsed:.1f} seconds ({elapsed/60:.1f} minutes)")
|
||||
print("="*60)
|
||||
|
||||
if results['success']:
|
||||
print(f"\n✅ Successfully translated ({len(results['success'])}):")
|
||||
for lang in sorted(results['success']):
|
||||
print(f" - {lang}")
|
||||
|
||||
if results['already_complete']:
|
||||
print(f"\n✓ Already complete ({len(results['already_complete'])}):")
|
||||
for lang in sorted(results['already_complete']):
|
||||
print(f" - {lang}")
|
||||
|
||||
if results['failed']:
|
||||
print(f"\n❌ Failed ({len(results['failed'])}):")
|
||||
for lang, msg in sorted(results['failed']):
|
||||
print(f" - {lang}: {msg}")
|
||||
sys.exit(1)
|
||||
|
||||
print("\n✅ Bulk translation completed successfully!")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -2,41 +2,37 @@
|
||||
"""
|
||||
Compact Translation Extractor for Character-Limited AI Translation
|
||||
Outputs untranslated entries in minimal JSON format with whitespace stripped.
|
||||
TOML format only.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import argparse
|
||||
try:
|
||||
import tomllib # Python 3.11+
|
||||
except ImportError:
|
||||
try:
|
||||
import toml as tomllib_fallback
|
||||
tomllib = None
|
||||
except ImportError:
|
||||
tomllib = None
|
||||
tomllib_fallback = None
|
||||
import tomllib # Python 3.11+ (stdlib)
|
||||
|
||||
|
||||
class CompactTranslationExtractor:
|
||||
def __init__(self, locales_dir: str = "frontend/public/locales", ignore_file: str = "scripts/ignore_translation.toml"):
|
||||
self.locales_dir = Path(locales_dir)
|
||||
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.json"
|
||||
self.golden_truth = self._load_json(self.golden_truth_file)
|
||||
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.toml"
|
||||
if not self.golden_truth_file.exists():
|
||||
print(f"Error: en-GB translation file not found at {self.golden_truth_file}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
self.golden_truth = self._load_translation_file(self.golden_truth_file)
|
||||
self.ignore_file = Path(ignore_file)
|
||||
self.ignore_patterns = self._load_ignore_patterns()
|
||||
|
||||
def _load_json(self, file_path: Path) -> dict:
|
||||
"""Load JSON file with error handling."""
|
||||
def _load_translation_file(self, file_path: Path) -> dict:
|
||||
"""Load TOML translation file."""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
with open(file_path, 'rb') as f:
|
||||
return tomllib.load(f)
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File not found: {file_path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error: Invalid JSON in {file_path}: {e}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
print(f"Error: Invalid TOML file {file_path}: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
def _load_ignore_patterns(self) -> dict:
|
||||
@@ -45,40 +41,13 @@ class CompactTranslationExtractor:
|
||||
return {}
|
||||
|
||||
try:
|
||||
if tomllib:
|
||||
with open(self.ignore_file, 'rb') as f:
|
||||
ignore_data = tomllib.load(f)
|
||||
elif tomllib_fallback:
|
||||
ignore_data = tomllib_fallback.load(self.ignore_file)
|
||||
else:
|
||||
ignore_data = self._parse_simple_toml()
|
||||
|
||||
with open(self.ignore_file, 'rb') as f:
|
||||
ignore_data = tomllib.load(f)
|
||||
return {lang: set(data.get('ignore', [])) for lang, data in ignore_data.items()}
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not load ignore file {self.ignore_file}: {e}", file=sys.stderr)
|
||||
return {}
|
||||
|
||||
def _parse_simple_toml(self) -> dict:
|
||||
"""Simple TOML parser for ignore patterns (fallback)."""
|
||||
ignore_data = {}
|
||||
current_section = None
|
||||
|
||||
with open(self.ignore_file, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#'):
|
||||
continue
|
||||
|
||||
if line.startswith('[') and line.endswith(']'):
|
||||
current_section = line[1:-1]
|
||||
ignore_data[current_section] = {'ignore': []}
|
||||
elif line.strip().startswith("'") and current_section:
|
||||
item = line.strip().strip("',")
|
||||
if item:
|
||||
ignore_data[current_section]['ignore'].append(item)
|
||||
|
||||
return ignore_data
|
||||
|
||||
def _flatten_dict(self, d: dict, parent_key: str = '', separator: str = '.') -> dict:
|
||||
"""Flatten nested dictionary into dot-notation keys."""
|
||||
items = []
|
||||
@@ -92,13 +61,14 @@ class CompactTranslationExtractor:
|
||||
|
||||
def get_untranslated_entries(self, language: str) -> dict:
|
||||
"""Get all untranslated entries for a language in compact format."""
|
||||
target_file = self.locales_dir / language / "translation.json"
|
||||
lang_dir = self.locales_dir / language
|
||||
target_file = lang_dir / "translation.toml"
|
||||
|
||||
if not target_file.exists():
|
||||
print(f"Error: Translation file not found for language: {language}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
target_data = self._load_json(target_file)
|
||||
target_data = self._load_translation_file(target_file)
|
||||
golden_flat = self._flatten_dict(self.golden_truth)
|
||||
target_flat = self._flatten_dict(target_data)
|
||||
|
||||
@@ -145,7 +115,9 @@ class CompactTranslationExtractor:
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Extract untranslated entries in compact format for AI translation')
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Extract untranslated entries in compact format for AI translation (TOML format only)'
|
||||
)
|
||||
parser.add_argument('language', help='Language code (e.g., de-DE, fr-FR)')
|
||||
parser.add_argument('--locales-dir', default='frontend/public/locales', help='Path to locales directory')
|
||||
parser.add_argument('--ignore-file', default='scripts/ignore_translation.toml', help='Path to ignore patterns file')
|
||||
|
||||
@@ -1,259 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
JSON Validator for Translation Files
|
||||
|
||||
Validates JSON syntax in translation files and reports detailed error information.
|
||||
Useful for validating batch translation files before merging.
|
||||
|
||||
Usage:
|
||||
python3 json_validator.py <file_or_pattern>
|
||||
python3 json_validator.py ar_AR_batch_*.json
|
||||
python3 json_validator.py ar_AR_batch_1_of_3.json
|
||||
python3 json_validator.py --all-batches ar_AR
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import argparse
|
||||
import glob
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def get_line_context(file_path, line_num, context_lines=3):
|
||||
"""Get lines around the error for context"""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
start = max(0, line_num - context_lines - 1)
|
||||
end = min(len(lines), line_num + context_lines)
|
||||
|
||||
context = []
|
||||
for i in range(start, end):
|
||||
marker = ">>> " if i == line_num - 1 else " "
|
||||
context.append(f"{marker}{i+1:4d}: {lines[i].rstrip()}")
|
||||
|
||||
return "\n".join(context)
|
||||
except Exception as e:
|
||||
return f"Could not read context: {e}"
|
||||
|
||||
|
||||
def get_character_context(file_path, char_pos, context_chars=100):
|
||||
"""Get characters around the error position"""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
start = max(0, char_pos - context_chars)
|
||||
end = min(len(content), char_pos + context_chars)
|
||||
|
||||
before = content[start:char_pos]
|
||||
error_char = content[char_pos] if char_pos < len(content) else "EOF"
|
||||
after = content[char_pos+1:end]
|
||||
|
||||
return {
|
||||
'before': before,
|
||||
'error_char': error_char,
|
||||
'after': after,
|
||||
'display': f"{before}[{error_char}]{after}"
|
||||
}
|
||||
except Exception as e:
|
||||
return None
|
||||
|
||||
|
||||
def validate_json_file(file_path):
|
||||
"""Validate a single JSON file and return detailed error info"""
|
||||
result = {
|
||||
'file': str(file_path),
|
||||
'valid': False,
|
||||
'error': None,
|
||||
'line': None,
|
||||
'column': None,
|
||||
'position': None,
|
||||
'context': None,
|
||||
'char_context': None,
|
||||
'entry_count': 0
|
||||
}
|
||||
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
result['valid'] = True
|
||||
result['entry_count'] = len(data) if isinstance(data, dict) else 0
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
result['error'] = e.msg
|
||||
result['line'] = e.lineno
|
||||
result['column'] = e.colno
|
||||
result['position'] = e.pos
|
||||
result['context'] = get_line_context(file_path, e.lineno)
|
||||
result['char_context'] = get_character_context(file_path, e.pos)
|
||||
|
||||
except FileNotFoundError:
|
||||
result['error'] = "File not found"
|
||||
|
||||
except Exception as e:
|
||||
result['error'] = str(e)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def print_validation_result(result, verbose=True):
|
||||
"""Print validation result in a formatted way"""
|
||||
file_name = Path(result['file']).name
|
||||
|
||||
if result['valid']:
|
||||
print(f"✓ {file_name}: Valid JSON ({result['entry_count']} entries)")
|
||||
else:
|
||||
print(f"✗ {file_name}: Invalid JSON")
|
||||
print(f" Error: {result['error']}")
|
||||
|
||||
if result['line']:
|
||||
print(f" Location: Line {result['line']}, Column {result['column']} (character {result['position']})")
|
||||
|
||||
if verbose and result['context']:
|
||||
print(f"\n Context:")
|
||||
for line in result['context'].split('\n'):
|
||||
print(f" {line}")
|
||||
|
||||
if verbose and result['char_context']:
|
||||
print(f"\n Character context:")
|
||||
print(f" ...{result['char_context']['display'][-150:]}...")
|
||||
print(f" Error character: {repr(result['char_context']['error_char'])}")
|
||||
|
||||
print()
|
||||
|
||||
|
||||
def get_common_fixes(error_msg):
|
||||
"""Suggest common fixes based on error message"""
|
||||
fixes = []
|
||||
|
||||
if "Expecting ',' delimiter" in error_msg:
|
||||
fixes.append("Missing comma between JSON entries")
|
||||
fixes.append("Check for unescaped quotes inside string values")
|
||||
|
||||
if "Invalid \\escape" in error_msg or "Invalid escape" in error_msg:
|
||||
fixes.append("Unescaped backslash in string (use \\\\ for literal backslash)")
|
||||
fixes.append("Common in regex patterns: \\d should be \\\\d")
|
||||
|
||||
if "Expecting property name" in error_msg:
|
||||
fixes.append("Missing or extra comma")
|
||||
fixes.append("Trailing comma before closing brace")
|
||||
|
||||
if "Expecting value" in error_msg:
|
||||
fixes.append("Missing value after colon")
|
||||
fixes.append("Extra comma")
|
||||
|
||||
return fixes
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Validate JSON syntax in translation files',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
Validate single file:
|
||||
python3 json_validator.py ar_AR_batch_1_of_3.json
|
||||
|
||||
Validate all batches for a language:
|
||||
python3 json_validator.py --all-batches ar_AR
|
||||
|
||||
Validate pattern:
|
||||
python3 json_validator.py "ar_AR_batch_*.json"
|
||||
|
||||
Validate multiple files:
|
||||
python3 json_validator.py file1.json file2.json file3.json
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'files',
|
||||
nargs='*',
|
||||
help='JSON file(s) to validate (supports wildcards)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--all-batches',
|
||||
metavar='LANGUAGE',
|
||||
help='Validate all batch files for a language (e.g., ar_AR)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--quiet',
|
||||
action='store_true',
|
||||
help='Only show files with errors'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--brief',
|
||||
action='store_true',
|
||||
help='Brief output without context'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Determine which files to validate
|
||||
files_to_validate = []
|
||||
|
||||
if args.all_batches:
|
||||
pattern = f"{args.all_batches}_batch_*.json"
|
||||
files_to_validate = glob.glob(pattern)
|
||||
if not files_to_validate:
|
||||
print(f"No batch files found matching: {pattern}")
|
||||
return 1
|
||||
elif args.files:
|
||||
for file_pattern in args.files:
|
||||
if '*' in file_pattern or '?' in file_pattern:
|
||||
files_to_validate.extend(glob.glob(file_pattern))
|
||||
else:
|
||||
files_to_validate.append(file_pattern)
|
||||
else:
|
||||
parser.print_help()
|
||||
return 1
|
||||
|
||||
if not files_to_validate:
|
||||
print("No files to validate")
|
||||
return 1
|
||||
|
||||
# Sort files for consistent output
|
||||
files_to_validate.sort()
|
||||
|
||||
print(f"Validating {len(files_to_validate)} file(s)...\n")
|
||||
|
||||
# Validate each file
|
||||
results = []
|
||||
for file_path in files_to_validate:
|
||||
result = validate_json_file(file_path)
|
||||
results.append(result)
|
||||
|
||||
if not args.quiet or not result['valid']:
|
||||
print_validation_result(result, verbose=not args.brief)
|
||||
|
||||
# Summary
|
||||
valid_count = sum(1 for r in results if r['valid'])
|
||||
invalid_count = len(results) - valid_count
|
||||
|
||||
print("=" * 60)
|
||||
print(f"Summary: {valid_count} valid, {invalid_count} invalid")
|
||||
|
||||
# Show common fixes for errors
|
||||
if invalid_count > 0:
|
||||
all_errors = [r['error'] for r in results if r['error']]
|
||||
unique_error_types = set(all_errors)
|
||||
|
||||
print("\nCommon fixes:")
|
||||
fixes_shown = set()
|
||||
for error in unique_error_types:
|
||||
fixes = get_common_fixes(error)
|
||||
for fix in fixes:
|
||||
if fix not in fixes_shown:
|
||||
print(f" • {fix}")
|
||||
fixes_shown.add(fix)
|
||||
|
||||
return 0 if invalid_count == 0 else 1
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
@@ -1,10 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
JSON Beautifier and Structure Fixer for Stirling PDF Frontend
|
||||
Restructures translation JSON files to match en-GB structure and key order exactly.
|
||||
TOML Beautifier and Structure Fixer for Stirling PDF Frontend
|
||||
Restructures translation TOML files to match en-GB structure and key order exactly.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
@@ -12,34 +11,38 @@ from typing import Dict, Any, List
|
||||
import argparse
|
||||
from collections import OrderedDict
|
||||
|
||||
import tomllib
|
||||
import tomli_w
|
||||
|
||||
class JSONBeautifier:
|
||||
|
||||
class TOMLBeautifier:
|
||||
def __init__(self, locales_dir: str = "frontend/public/locales"):
|
||||
self.locales_dir = Path(locales_dir)
|
||||
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.json"
|
||||
self.golden_structure = self._load_json(self.golden_truth_file)
|
||||
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.toml"
|
||||
self.golden_structure = self._load_toml(self.golden_truth_file)
|
||||
|
||||
def _load_json(self, file_path: Path) -> Dict:
|
||||
"""Load JSON file with error handling."""
|
||||
def _load_toml(self, file_path: Path) -> Dict:
|
||||
"""Load TOML file with error handling."""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return json.load(f, object_pairs_hook=OrderedDict)
|
||||
with open(file_path, 'rb') as f:
|
||||
return tomllib.load(f)
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File not found: {file_path}")
|
||||
sys.exit(1)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error: Invalid JSON in {file_path}: {e}")
|
||||
except Exception as e:
|
||||
print(f"Error: Invalid TOML in {file_path}: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
def _save_json(self, data: Dict, file_path: Path, backup: bool = True) -> None:
|
||||
"""Save JSON file with proper formatting."""
|
||||
def _save_toml(self, data: Dict, file_path: Path, backup: bool = False) -> None:
|
||||
"""Save TOML file with proper formatting."""
|
||||
if backup and file_path.exists():
|
||||
backup_path = file_path.with_suffix(f'.backup.restructured.json')
|
||||
file_path.rename(backup_path)
|
||||
backup_path = file_path.with_suffix(f'.backup.restructured.toml')
|
||||
import shutil
|
||||
shutil.copy2(file_path, backup_path)
|
||||
print(f"Backup created: {backup_path}")
|
||||
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False, separators=(',', ': '))
|
||||
with open(file_path, 'wb') as f:
|
||||
tomli_w.dump(data, f)
|
||||
|
||||
def _flatten_dict(self, d: Dict, parent_key: str = '', separator: str = '.') -> Dict[str, Any]:
|
||||
"""Flatten nested dictionary into dot-notation keys."""
|
||||
@@ -93,7 +96,7 @@ class JSONBeautifier:
|
||||
return {}
|
||||
|
||||
# Load the target file
|
||||
target_data = self._load_json(target_file)
|
||||
target_data = self._load_toml(target_file)
|
||||
|
||||
# Flatten the target translations
|
||||
flat_target = self._flatten_dict(target_data)
|
||||
@@ -103,7 +106,7 @@ class JSONBeautifier:
|
||||
|
||||
return restructured
|
||||
|
||||
def beautify_and_restructure(self, target_file: Path, backup: bool = True) -> Dict[str, Any]:
|
||||
def beautify_and_restructure(self, target_file: Path, backup: bool = False) -> Dict[str, Any]:
|
||||
"""Main function to beautify and restructure a translation file."""
|
||||
lang_code = target_file.parent.name
|
||||
print(f"Restructuring {lang_code} translation file...")
|
||||
@@ -112,7 +115,7 @@ class JSONBeautifier:
|
||||
restructured_data = self.restructure_translation_file(target_file)
|
||||
|
||||
# Save the restructured file
|
||||
self._save_json(restructured_data, target_file, backup)
|
||||
self._save_toml(restructured_data, target_file, backup)
|
||||
|
||||
# Analyze the results
|
||||
flat_golden = self._flatten_dict(self.golden_structure)
|
||||
@@ -163,7 +166,7 @@ class JSONBeautifier:
|
||||
|
||||
def validate_key_order(self, target_file: Path) -> Dict[str, Any]:
|
||||
"""Validate that keys appear in the same order as en-GB."""
|
||||
target_data = self._load_json(target_file)
|
||||
target_data = self._load_toml(target_file)
|
||||
|
||||
def get_key_order(obj: Dict, path: str = '') -> List[str]:
|
||||
keys = []
|
||||
@@ -198,23 +201,26 @@ class JSONBeautifier:
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Beautify and restructure translation JSON files')
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Beautify and restructure translation TOML files',
|
||||
epilog='Works with TOML format translation files.'
|
||||
)
|
||||
parser.add_argument('--locales-dir', default='frontend/public/locales',
|
||||
help='Path to locales directory')
|
||||
parser.add_argument('--language', help='Restructure specific language only')
|
||||
parser.add_argument('--all-languages', action='store_true',
|
||||
help='Restructure all language files')
|
||||
parser.add_argument('--no-backup', action='store_true',
|
||||
help='Skip backup creation')
|
||||
parser.add_argument('--backup', action='store_true',
|
||||
help='Create backup files before modifying')
|
||||
parser.add_argument('--validate-only', action='store_true',
|
||||
help='Only validate structure, do not modify files')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
beautifier = JSONBeautifier(args.locales_dir)
|
||||
beautifier = TOMLBeautifier(args.locales_dir)
|
||||
|
||||
if args.language:
|
||||
target_file = Path(args.locales_dir) / args.language / "translation.json"
|
||||
target_file = Path(args.locales_dir) / args.language / "translation.toml"
|
||||
if not target_file.exists():
|
||||
print(f"Error: Translation file not found for language: {args.language}")
|
||||
sys.exit(1)
|
||||
@@ -225,7 +231,7 @@ def main():
|
||||
print(f" Order preserved: {order_result['order_preserved']}")
|
||||
print(f" Common keys: {order_result['common_keys_count']}/{order_result['golden_keys_count']}")
|
||||
else:
|
||||
result = beautifier.beautify_and_restructure(target_file, backup=not args.no_backup)
|
||||
result = beautifier.beautify_and_restructure(target_file, backup=args.backup)
|
||||
print(f"\nResults for {result['language']}:")
|
||||
print(f" Keys preserved: {result['preserved_keys']}/{result['total_reference_keys']}")
|
||||
if result['structure_match']['total_issues'] > 0:
|
||||
@@ -237,13 +243,13 @@ def main():
|
||||
results = []
|
||||
for lang_dir in Path(args.locales_dir).iterdir():
|
||||
if lang_dir.is_dir() and lang_dir.name != "en-GB":
|
||||
translation_file = lang_dir / "translation.json"
|
||||
translation_file = lang_dir / "translation.toml"
|
||||
if translation_file.exists():
|
||||
if args.validate_only:
|
||||
order_result = beautifier.validate_key_order(translation_file)
|
||||
print(f"{lang_dir.name}: Order preserved = {order_result['order_preserved']}")
|
||||
else:
|
||||
result = beautifier.beautify_and_restructure(translation_file, backup=not args.no_backup)
|
||||
result = beautifier.beautify_and_restructure(translation_file, backup=args.backup)
|
||||
results.append(result)
|
||||
|
||||
if not args.validate_only and results:
|
||||
@@ -259,4 +265,4 @@ def main():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
198
scripts/translations/toml_validator.py
Normal file
198
scripts/translations/toml_validator.py
Normal file
@@ -0,0 +1,198 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
TOML Validator for Translation Files
|
||||
|
||||
Validates TOML syntax in translation files and reports detailed error information.
|
||||
Useful for validating translation files before merging.
|
||||
|
||||
Usage:
|
||||
python3 toml_validator.py <file_or_pattern>
|
||||
python3 toml_validator.py ar_AR_batch_*.toml
|
||||
python3 toml_validator.py ar_AR_batch_1_of_3.toml
|
||||
python3 toml_validator.py --all-batches ar_AR
|
||||
"""
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
import glob
|
||||
from pathlib import Path
|
||||
|
||||
import tomllib
|
||||
|
||||
|
||||
def get_line_context(file_path, line_num, context_lines=3):
|
||||
"""Get lines around the error for context"""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
start = max(0, line_num - context_lines - 1)
|
||||
end = min(len(lines), line_num + context_lines)
|
||||
|
||||
context = []
|
||||
for i in range(start, end):
|
||||
marker = ">>> " if i == line_num - 1 else " "
|
||||
context.append(f"{marker}{i+1:4d}: {lines[i].rstrip()}")
|
||||
|
||||
return "\n".join(context)
|
||||
except Exception as e:
|
||||
return f"Could not read context: {e}"
|
||||
|
||||
|
||||
def get_character_context(file_path, char_pos, context_chars=100):
|
||||
"""Get characters around the error position"""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
start = max(0, char_pos - context_chars)
|
||||
end = min(len(content), char_pos + context_chars)
|
||||
|
||||
before = content[start:char_pos]
|
||||
error_char = content[char_pos] if char_pos < len(content) else "EOF"
|
||||
after = content[char_pos+1:end]
|
||||
|
||||
return {
|
||||
'before': before,
|
||||
'error_char': error_char,
|
||||
'after': after,
|
||||
'display': f"{before}[{error_char}]{after}"
|
||||
}
|
||||
except Exception as e:
|
||||
return None
|
||||
|
||||
|
||||
def count_keys(data, prefix=''):
|
||||
"""Recursively count all keys in nested TOML structure"""
|
||||
count = 0
|
||||
if isinstance(data, dict):
|
||||
for key, value in data.items():
|
||||
if isinstance(value, dict):
|
||||
count += count_keys(value, f"{prefix}.{key}" if prefix else key)
|
||||
else:
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
def validate_toml_file(file_path):
|
||||
"""Validate a single TOML file and return detailed error info"""
|
||||
result = {
|
||||
'file': str(file_path),
|
||||
'valid': False,
|
||||
'error': None,
|
||||
'line': None,
|
||||
'context': None,
|
||||
'entry_count': 0
|
||||
}
|
||||
|
||||
try:
|
||||
with open(file_path, 'rb') as f:
|
||||
data = tomllib.load(f)
|
||||
|
||||
result['valid'] = True
|
||||
result['entry_count'] = count_keys(data)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
result['error'] = error_msg
|
||||
|
||||
# Try to extract line number from error message
|
||||
import re
|
||||
line_match = re.search(r'line (\d+)', error_msg, re.IGNORECASE)
|
||||
if line_match:
|
||||
line_num = int(line_match.group(1))
|
||||
result['line'] = line_num
|
||||
result['context'] = get_line_context(file_path, line_num)
|
||||
|
||||
except FileNotFoundError:
|
||||
result['error'] = "File not found"
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def print_validation_result(result, brief=False, quiet=False):
|
||||
"""Print validation result in human-readable format"""
|
||||
if result['valid']:
|
||||
if not quiet:
|
||||
print(f"✓ {result['file']}")
|
||||
if not brief:
|
||||
print(f" Valid TOML with {result['entry_count']} entries")
|
||||
else:
|
||||
print(f"✗ {result['file']}")
|
||||
print(f" Error: {result['error']}")
|
||||
|
||||
if result['line']:
|
||||
print(f" Line: {result['line']}")
|
||||
|
||||
if result['context'] and not brief:
|
||||
print(f"\n Context:")
|
||||
print(f" {result['context'].replace(chr(10), chr(10) + ' ')}")
|
||||
|
||||
if not brief:
|
||||
print(f"\n Common fixes:")
|
||||
print(f" - Check for missing quotes around keys or values")
|
||||
print(f" - Ensure proper escaping of special characters")
|
||||
print(f" - Verify table header syntax: [section.subsection]")
|
||||
print(f" - Check for duplicate keys in the same table")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Validate TOML translation files')
|
||||
parser.add_argument('files', nargs='*', help='TOML file(s) or pattern to validate')
|
||||
parser.add_argument('--all-batches', metavar='LANG',
|
||||
help='Validate all batch files for a language (e.g., ar_AR)')
|
||||
parser.add_argument('--brief', action='store_true',
|
||||
help='Show brief output without context')
|
||||
parser.add_argument('--quiet', action='store_true',
|
||||
help='Only show files with errors')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Collect files to validate
|
||||
files_to_validate = []
|
||||
|
||||
if args.all_batches:
|
||||
# Find all batch files for the specified language
|
||||
pattern = f"{args.all_batches}_batch_*.toml"
|
||||
files_to_validate = glob.glob(pattern)
|
||||
if not files_to_validate:
|
||||
print(f"No batch files found matching pattern: {pattern}")
|
||||
sys.exit(1)
|
||||
elif args.files:
|
||||
for file_pattern in args.files:
|
||||
matched_files = glob.glob(file_pattern)
|
||||
if matched_files:
|
||||
files_to_validate.extend(matched_files)
|
||||
else:
|
||||
# Try as literal filename
|
||||
files_to_validate.append(file_pattern)
|
||||
else:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
# Validate all files
|
||||
results = []
|
||||
for file_path in files_to_validate:
|
||||
result = validate_toml_file(file_path)
|
||||
results.append(result)
|
||||
print_validation_result(result, brief=args.brief, quiet=args.quiet)
|
||||
if not args.brief and not args.quiet:
|
||||
print() # Empty line between files
|
||||
|
||||
# Summary
|
||||
total = len(results)
|
||||
valid = sum(1 for r in results if r['valid'])
|
||||
invalid = total - valid
|
||||
|
||||
if not args.quiet:
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Summary: {valid}/{total} files valid")
|
||||
if invalid > 0:
|
||||
print(f" {invalid} file(s) with errors")
|
||||
|
||||
# Exit with error code if any files invalid
|
||||
sys.exit(0 if invalid == 0 else 1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -10,35 +10,27 @@ import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Set, Tuple
|
||||
import argparse
|
||||
try:
|
||||
import tomllib # Python 3.11+
|
||||
except ImportError:
|
||||
try:
|
||||
import toml as tomllib_fallback
|
||||
tomllib = None
|
||||
except ImportError:
|
||||
tomllib = None
|
||||
tomllib_fallback = None
|
||||
import tomllib
|
||||
|
||||
|
||||
class TranslationAnalyzer:
|
||||
def __init__(self, locales_dir: str = "frontend/public/locales", ignore_file: str = "scripts/ignore_translation.toml"):
|
||||
self.locales_dir = Path(locales_dir)
|
||||
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.json"
|
||||
self.golden_truth = self._load_json(self.golden_truth_file)
|
||||
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.toml"
|
||||
self.golden_truth = self._load_translation_file(self.golden_truth_file)
|
||||
self.ignore_file = Path(ignore_file)
|
||||
self.ignore_patterns = self._load_ignore_patterns()
|
||||
|
||||
def _load_json(self, file_path: Path) -> Dict:
|
||||
"""Load JSON file with error handling."""
|
||||
def _load_translation_file(self, file_path: Path) -> Dict:
|
||||
"""Load TOML translation file with error handling."""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
with open(file_path, 'rb') as f:
|
||||
return tomllib.load(f)
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File not found: {file_path}")
|
||||
sys.exit(1)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error: Invalid JSON in {file_path}: {e}")
|
||||
except Exception as e:
|
||||
print(f"Error: Invalid file {file_path}: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
def _load_ignore_patterns(self) -> Dict[str, Set[str]]:
|
||||
@@ -47,16 +39,8 @@ class TranslationAnalyzer:
|
||||
return {}
|
||||
|
||||
try:
|
||||
if tomllib:
|
||||
# Use Python 3.11+ built-in
|
||||
with open(self.ignore_file, 'rb') as f:
|
||||
ignore_data = tomllib.load(f)
|
||||
elif tomllib_fallback:
|
||||
# Use toml library fallback
|
||||
ignore_data = tomllib_fallback.load(self.ignore_file)
|
||||
else:
|
||||
# Simple parser as fallback
|
||||
ignore_data = self._parse_simple_toml()
|
||||
with open(self.ignore_file, 'rb') as f:
|
||||
ignore_data = tomllib.load(f)
|
||||
|
||||
# Convert lists to sets for faster lookup
|
||||
return {lang: set(patterns) for lang, data in ignore_data.items()
|
||||
@@ -65,31 +49,6 @@ class TranslationAnalyzer:
|
||||
print(f"Warning: Could not load ignore file {self.ignore_file}: {e}")
|
||||
return {}
|
||||
|
||||
def _parse_simple_toml(self) -> Dict:
|
||||
"""Simple TOML parser for ignore patterns (fallback)."""
|
||||
ignore_data = {}
|
||||
current_section = None
|
||||
|
||||
with open(self.ignore_file, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#'):
|
||||
continue
|
||||
|
||||
if line.startswith('[') and line.endswith(']'):
|
||||
current_section = line[1:-1]
|
||||
ignore_data[current_section] = {'ignore': []}
|
||||
elif line.startswith('ignore = [') and current_section:
|
||||
# Handle ignore array
|
||||
continue
|
||||
elif line.strip().startswith("'") and current_section:
|
||||
# Extract quoted items
|
||||
item = line.strip().strip("',")
|
||||
if item:
|
||||
ignore_data[current_section]['ignore'].append(item)
|
||||
|
||||
return ignore_data
|
||||
|
||||
def _flatten_dict(self, d: Dict, parent_key: str = '', separator: str = '.') -> Dict[str, str]:
|
||||
"""Flatten nested dictionary into dot-notation keys."""
|
||||
items = []
|
||||
@@ -102,18 +61,18 @@ class TranslationAnalyzer:
|
||||
return dict(items)
|
||||
|
||||
def get_all_language_files(self) -> List[Path]:
|
||||
"""Get all translation.json files except en-GB."""
|
||||
"""Get all translation files except en-GB."""
|
||||
files = []
|
||||
for lang_dir in self.locales_dir.iterdir():
|
||||
if lang_dir.is_dir() and lang_dir.name != "en-GB":
|
||||
translation_file = lang_dir / "translation.json"
|
||||
if translation_file.exists():
|
||||
files.append(translation_file)
|
||||
toml_file = lang_dir / "translation.toml"
|
||||
if toml_file.exists():
|
||||
files.append(toml_file)
|
||||
return sorted(files)
|
||||
|
||||
def find_missing_translations(self, target_file: Path) -> Set[str]:
|
||||
"""Find keys that exist in en-GB but missing in target file."""
|
||||
target_data = self._load_json(target_file)
|
||||
target_data = self._load_translation_file(target_file)
|
||||
|
||||
golden_flat = self._flatten_dict(self.golden_truth)
|
||||
target_flat = self._flatten_dict(target_data)
|
||||
@@ -127,7 +86,7 @@ class TranslationAnalyzer:
|
||||
|
||||
def find_untranslated_entries(self, target_file: Path) -> Set[str]:
|
||||
"""Find entries that appear to be untranslated (identical to en-GB)."""
|
||||
target_data = self._load_json(target_file)
|
||||
target_data = self._load_translation_file(target_file)
|
||||
|
||||
golden_flat = self._flatten_dict(self.golden_truth)
|
||||
target_flat = self._flatten_dict(target_data)
|
||||
@@ -170,7 +129,7 @@ class TranslationAnalyzer:
|
||||
|
||||
def find_extra_translations(self, target_file: Path) -> Set[str]:
|
||||
"""Find keys that exist in target file but not in en-GB."""
|
||||
target_data = self._load_json(target_file)
|
||||
target_data = self._load_translation_file(target_file)
|
||||
|
||||
golden_flat = self._flatten_dict(self.golden_truth)
|
||||
target_flat = self._flatten_dict(target_data)
|
||||
@@ -185,7 +144,7 @@ class TranslationAnalyzer:
|
||||
untranslated = self.find_untranslated_entries(target_file)
|
||||
extra = self.find_extra_translations(target_file)
|
||||
|
||||
target_data = self._load_json(target_file)
|
||||
target_data = self._load_translation_file(target_file)
|
||||
golden_flat = self._flatten_dict(self.golden_truth)
|
||||
target_flat = self._flatten_dict(target_data)
|
||||
|
||||
@@ -249,8 +208,12 @@ def main():
|
||||
analyzer = TranslationAnalyzer(args.locales_dir, args.ignore_file)
|
||||
|
||||
if args.language:
|
||||
target_file = Path(args.locales_dir) / args.language / "translation.json"
|
||||
if not target_file.exists():
|
||||
lang_dir = Path(args.locales_dir) / args.language
|
||||
toml_file = lang_dir / "translation.toml"
|
||||
|
||||
if toml_file.exists():
|
||||
target_file = toml_file
|
||||
else:
|
||||
print(f"Error: Translation file not found for language: {args.language}")
|
||||
sys.exit(1)
|
||||
results = [analyzer.analyze_file(target_file)]
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
Translation Merger for Stirling PDF Frontend
|
||||
Merges missing translations from en-GB into target language files.
|
||||
Useful for AI-assisted translation workflows.
|
||||
TOML format only.
|
||||
"""
|
||||
|
||||
import json
|
||||
@@ -14,46 +15,39 @@ import argparse
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
|
||||
try:
|
||||
import tomllib # Python 3.11+
|
||||
except ImportError:
|
||||
try:
|
||||
import toml as tomllib_fallback
|
||||
tomllib = None
|
||||
except ImportError:
|
||||
tomllib = None
|
||||
tomllib_fallback = None
|
||||
import tomllib
|
||||
import tomli_w
|
||||
|
||||
|
||||
class TranslationMerger:
|
||||
def __init__(self, locales_dir: str = "frontend/public/locales", ignore_file: str = "scripts/ignore_translation.toml"):
|
||||
self.locales_dir = Path(locales_dir)
|
||||
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.json"
|
||||
self.golden_truth = self._load_json(self.golden_truth_file)
|
||||
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.toml"
|
||||
self.golden_truth = self._load_translation_file(self.golden_truth_file)
|
||||
self.ignore_file = Path(ignore_file)
|
||||
self.ignore_patterns = self._load_ignore_patterns()
|
||||
|
||||
def _load_json(self, file_path: Path) -> Dict:
|
||||
"""Load JSON file with error handling."""
|
||||
def _load_translation_file(self, file_path: Path) -> Dict:
|
||||
"""Load TOML translation file."""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
with open(file_path, 'rb') as f:
|
||||
return tomllib.load(f)
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File not found: {file_path}")
|
||||
sys.exit(1)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error: Invalid JSON in {file_path}: {e}")
|
||||
except Exception as e:
|
||||
print(f"Error: Invalid file {file_path}: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
def _save_json(self, data: Dict, file_path: Path, backup: bool = True) -> None:
|
||||
"""Save JSON file with backup option."""
|
||||
def _save_translation_file(self, data: Dict, file_path: Path, backup: bool = False) -> None:
|
||||
"""Save TOML translation file with backup option."""
|
||||
if backup and file_path.exists():
|
||||
backup_path = file_path.with_suffix(f'.backup.{datetime.now().strftime("%Y%m%d_%H%M%S")}.json')
|
||||
backup_path = file_path.with_suffix(f'.backup.{datetime.now().strftime("%Y%m%d_%H%M%S")}.toml')
|
||||
shutil.copy2(file_path, backup_path)
|
||||
print(f"Backup created: {backup_path}")
|
||||
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
with open(file_path, 'wb') as f:
|
||||
tomli_w.dump(data, f)
|
||||
|
||||
def _load_ignore_patterns(self) -> Dict[str, Set[str]]:
|
||||
"""Load ignore patterns from TOML file."""
|
||||
@@ -61,26 +55,11 @@ class TranslationMerger:
|
||||
return {}
|
||||
|
||||
try:
|
||||
# Simple parser for ignore patterns
|
||||
ignore_data = {}
|
||||
current_section = None
|
||||
with open(self.ignore_file, 'rb') as f:
|
||||
ignore_data = tomllib.load(f)
|
||||
|
||||
with open(self.ignore_file, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#'):
|
||||
continue
|
||||
|
||||
if line.startswith('[') and line.endswith(']'):
|
||||
current_section = line[1:-1]
|
||||
ignore_data[current_section] = set()
|
||||
elif line.strip().startswith("'") and current_section:
|
||||
# Extract quoted items
|
||||
item = line.strip().strip("',")
|
||||
if item:
|
||||
ignore_data[current_section].add(item)
|
||||
|
||||
return ignore_data
|
||||
# Convert to sets for faster lookup
|
||||
return {lang: set(data.get('ignore', [])) for lang, data in ignore_data.items()}
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not load ignore file {self.ignore_file}: {e}")
|
||||
return {}
|
||||
@@ -131,7 +110,7 @@ class TranslationMerger:
|
||||
golden_keys = set(self._flatten_dict(self.golden_truth).keys())
|
||||
return sorted(golden_keys - ignore_set)
|
||||
|
||||
target_data = self._load_json(target_file)
|
||||
target_data = self._load_translation_file(target_file)
|
||||
golden_flat = self._flatten_dict(self.golden_truth)
|
||||
target_flat = self._flatten_dict(target_data)
|
||||
|
||||
@@ -144,7 +123,7 @@ class TranslationMerger:
|
||||
if not target_file.exists():
|
||||
target_data = {}
|
||||
else:
|
||||
target_data = self._load_json(target_file)
|
||||
target_data = self._load_translation_file(target_file)
|
||||
|
||||
golden_flat = self._flatten_dict(self.golden_truth)
|
||||
missing_keys = keys_to_add or self.get_missing_keys(target_file)
|
||||
@@ -172,7 +151,7 @@ class TranslationMerger:
|
||||
print(f"Error: Target file does not exist: {target_file}")
|
||||
return {}
|
||||
|
||||
target_data = self._load_json(target_file)
|
||||
target_data = self._load_translation_file(target_file)
|
||||
golden_flat = self._flatten_dict(self.golden_truth)
|
||||
target_flat = self._flatten_dict(target_data)
|
||||
|
||||
@@ -219,13 +198,13 @@ class TranslationMerger:
|
||||
return False
|
||||
|
||||
def apply_translations(self, target_file: Path, translations: Dict[str, str],
|
||||
backup: bool = True) -> Dict:
|
||||
backup: bool = False) -> Dict:
|
||||
"""Apply provided translations to target file."""
|
||||
if not target_file.exists():
|
||||
print(f"Error: Target file does not exist: {target_file}")
|
||||
return {'success': False, 'error': 'File not found'}
|
||||
|
||||
target_data = self._load_json(target_file)
|
||||
target_data = self._load_translation_file(target_file)
|
||||
applied_count = 0
|
||||
errors = []
|
||||
|
||||
@@ -241,7 +220,7 @@ class TranslationMerger:
|
||||
errors.append(f"Error setting {key}: {e}")
|
||||
|
||||
if applied_count > 0:
|
||||
self._save_json(target_data, target_file, backup)
|
||||
self._save_translation_file(target_data, target_file, backup)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
@@ -288,7 +267,10 @@ class TranslationMerger:
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Merge and manage translation files')
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Merge and manage translation files',
|
||||
epilog='Works with TOML translation files.'
|
||||
)
|
||||
parser.add_argument('--locales-dir', default='frontend/public/locales',
|
||||
help='Path to locales directory')
|
||||
parser.add_argument('--ignore-file', default='scripts/ignore_translation.toml',
|
||||
@@ -299,7 +281,7 @@ def main():
|
||||
|
||||
# Add missing command
|
||||
add_parser = subparsers.add_parser('add-missing', help='Add missing translations from en-GB')
|
||||
add_parser.add_argument('--no-backup', action='store_true', help='Skip backup creation')
|
||||
add_parser.add_argument('--backup', action='store_true', help='Create backup before modifying files')
|
||||
add_parser.add_argument('--mark-untranslated', action='store_true', default=True,
|
||||
help='Mark added translations as [UNTRANSLATED]')
|
||||
|
||||
@@ -314,7 +296,7 @@ def main():
|
||||
# Apply translations command
|
||||
apply_parser = subparsers.add_parser('apply-translations', help='Apply translations from JSON file')
|
||||
apply_parser.add_argument('--translations-file', required=True, help='JSON file with translations')
|
||||
apply_parser.add_argument('--no-backup', action='store_true', help='Skip backup creation')
|
||||
apply_parser.add_argument('--backup', action='store_true', help='Create backup before modifying files')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -323,7 +305,10 @@ def main():
|
||||
return
|
||||
|
||||
merger = TranslationMerger(args.locales_dir, args.ignore_file)
|
||||
target_file = Path(args.locales_dir) / args.language / "translation.json"
|
||||
|
||||
# Find translation file
|
||||
lang_dir = Path(args.locales_dir) / args.language
|
||||
target_file = lang_dir / "translation.toml"
|
||||
|
||||
if args.command == 'add-missing':
|
||||
print(f"Adding missing translations to {args.language}...")
|
||||
@@ -332,7 +317,7 @@ def main():
|
||||
mark_untranslated=args.mark_untranslated
|
||||
)
|
||||
|
||||
merger._save_json(result['data'], target_file, backup=not args.no_backup)
|
||||
merger._save_translation_file(result['data'], target_file, backup=args.backup)
|
||||
print(f"Added {result['added_count']} missing translations")
|
||||
|
||||
elif args.command == 'extract-untranslated':
|
||||
@@ -355,7 +340,7 @@ def main():
|
||||
else:
|
||||
translations = translations_data
|
||||
|
||||
result = merger.apply_translations(target_file, translations, backup=not args.no_backup)
|
||||
result = merger.apply_translations(target_file, translations, backup=args.backup)
|
||||
|
||||
if result['success']:
|
||||
print(f"Applied {result['applied_count']} translations")
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Validate JSON structure and formatting of translation files.
|
||||
Validate TOML structure and formatting of translation files.
|
||||
|
||||
Checks for:
|
||||
- Valid JSON syntax
|
||||
- Valid TOML syntax
|
||||
- Consistent key structure with en-GB
|
||||
- Missing keys
|
||||
- Extra keys not in en-GB
|
||||
@@ -18,6 +18,7 @@ import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Set
|
||||
import argparse
|
||||
import tomllib # Python 3.11+ (stdlib)
|
||||
|
||||
|
||||
def get_all_keys(d: dict, parent_key: str = '', sep: str = '.') -> Set[str]:
|
||||
@@ -31,14 +32,12 @@ def get_all_keys(d: dict, parent_key: str = '', sep: str = '.') -> Set[str]:
|
||||
return keys
|
||||
|
||||
|
||||
def validate_json_file(file_path: Path) -> tuple[bool, str]:
|
||||
"""Validate that a file contains valid JSON."""
|
||||
def validate_translation_file(file_path: Path) -> tuple[bool, str]:
|
||||
"""Validate that a file contains valid TOML."""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
json.load(f)
|
||||
return True, "Valid JSON"
|
||||
except json.JSONDecodeError as e:
|
||||
return False, f"Invalid JSON at line {e.lineno}, column {e.colno}: {e.msg}"
|
||||
with open(file_path, 'rb') as f:
|
||||
tomllib.load(f)
|
||||
return True, "Valid TOML"
|
||||
except Exception as e:
|
||||
return False, f"Error reading file: {str(e)}"
|
||||
|
||||
@@ -101,9 +100,15 @@ def print_validation_result(result: Dict, verbose: bool = False):
|
||||
print("-" * 100)
|
||||
|
||||
|
||||
def load_translation_file(file_path: Path) -> dict:
|
||||
"""Load TOML translation file."""
|
||||
with open(file_path, 'rb') as f:
|
||||
return tomllib.load(f)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Validate translation JSON structure'
|
||||
description='Validate translation TOML structure'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--language',
|
||||
@@ -125,21 +130,21 @@ def main():
|
||||
|
||||
# Define paths
|
||||
locales_dir = Path('frontend/public/locales')
|
||||
en_gb_path = locales_dir / 'en-GB' / 'translation.json'
|
||||
en_gb_path = locales_dir / 'en-GB' / 'translation.toml'
|
||||
file_ext = '.toml'
|
||||
|
||||
if not en_gb_path.exists():
|
||||
print(f"❌ Error: en-GB translation file not found at {en_gb_path}")
|
||||
sys.exit(1)
|
||||
|
||||
# Validate en-GB itself
|
||||
is_valid, message = validate_json_file(en_gb_path)
|
||||
is_valid, message = validate_translation_file(en_gb_path)
|
||||
if not is_valid:
|
||||
print(f"❌ Error in en-GB file: {message}")
|
||||
sys.exit(1)
|
||||
|
||||
# Load en-GB structure
|
||||
with open(en_gb_path, 'r', encoding='utf-8') as f:
|
||||
en_gb = json.load(f)
|
||||
en_gb = load_translation_file(en_gb_path)
|
||||
|
||||
en_gb_keys = get_all_keys(en_gb)
|
||||
|
||||
@@ -147,24 +152,26 @@ def main():
|
||||
if args.language:
|
||||
languages = [args.language]
|
||||
else:
|
||||
languages = [
|
||||
d.name for d in locales_dir.iterdir()
|
||||
if d.is_dir() and d.name != 'en-GB' and (d / 'translation.json').exists()
|
||||
]
|
||||
# Validate all languages except en-GB
|
||||
languages = []
|
||||
for d in locales_dir.iterdir():
|
||||
if d.is_dir() and d.name != 'en-GB':
|
||||
if (d / 'translation.toml').exists():
|
||||
languages.append(d.name)
|
||||
|
||||
results = []
|
||||
json_errors = []
|
||||
|
||||
# Validate each language
|
||||
for lang_code in sorted(languages):
|
||||
lang_path = locales_dir / lang_code / 'translation.json'
|
||||
lang_path = locales_dir / lang_code / 'translation.toml'
|
||||
|
||||
if not lang_path.exists():
|
||||
print(f"⚠️ Warning: {lang_code}/translation.json not found, skipping")
|
||||
print(f"⚠️ Warning: {lang_code}/translation.toml not found, skipping")
|
||||
continue
|
||||
|
||||
# First check if JSON is valid
|
||||
is_valid, message = validate_json_file(lang_path)
|
||||
# First check if file is valid
|
||||
is_valid, message = validate_translation_file(lang_path)
|
||||
if not is_valid:
|
||||
json_errors.append({
|
||||
'language': lang_code,
|
||||
@@ -174,8 +181,7 @@ def main():
|
||||
continue
|
||||
|
||||
# Load and compare structure
|
||||
with open(lang_path, 'r', encoding='utf-8') as f:
|
||||
lang_data = json.load(f)
|
||||
lang_data = load_translation_file(lang_path)
|
||||
|
||||
lang_keys = get_all_keys(lang_data)
|
||||
result = validate_structure(en_gb_keys, lang_keys, lang_code)
|
||||
@@ -189,9 +195,9 @@ def main():
|
||||
}
|
||||
print(json.dumps(output, indent=2, ensure_ascii=False))
|
||||
else:
|
||||
# Print JSON errors first
|
||||
# Print syntax errors first
|
||||
if json_errors:
|
||||
print("\n❌ JSON Syntax Errors:")
|
||||
print("\n❌ Syntax Errors:")
|
||||
print("=" * 100)
|
||||
for error in json_errors:
|
||||
print(f"\nLanguage: {error['language']}")
|
||||
|
||||
@@ -15,6 +15,7 @@ import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Set, Tuple
|
||||
import argparse
|
||||
import tomllib # Python 3.11+ (stdlib)
|
||||
|
||||
|
||||
def find_placeholders(text: str) -> Set[str]:
|
||||
@@ -117,15 +118,16 @@ def main():
|
||||
|
||||
# Define paths
|
||||
locales_dir = Path('frontend/public/locales')
|
||||
en_gb_path = locales_dir / 'en-GB' / 'translation.json'
|
||||
en_gb_path = locales_dir / 'en-GB' / 'translation.toml'
|
||||
file_ext = '.toml'
|
||||
|
||||
if not en_gb_path.exists():
|
||||
print(f"❌ Error: en-GB translation file not found at {en_gb_path}")
|
||||
sys.exit(1)
|
||||
|
||||
# Load en-GB (source of truth)
|
||||
with open(en_gb_path, 'r', encoding='utf-8') as f:
|
||||
en_gb = json.load(f)
|
||||
with open(en_gb_path, 'rb') as f:
|
||||
en_gb = tomllib.load(f)
|
||||
|
||||
en_gb_flat = flatten_dict(en_gb)
|
||||
|
||||
@@ -134,23 +136,25 @@ def main():
|
||||
languages = [args.language]
|
||||
else:
|
||||
# Validate all languages except en-GB
|
||||
languages = [
|
||||
d.name for d in locales_dir.iterdir()
|
||||
if d.is_dir() and d.name != 'en-GB' and (d / 'translation.json').exists()
|
||||
]
|
||||
languages = []
|
||||
for d in locales_dir.iterdir():
|
||||
if d.is_dir() and d.name != 'en-GB':
|
||||
if (d / 'translation.toml').exists():
|
||||
languages.append(d.name)
|
||||
|
||||
all_issues = []
|
||||
|
||||
# Validate each language
|
||||
for lang_code in sorted(languages):
|
||||
lang_path = locales_dir / lang_code / 'translation.json'
|
||||
lang_path = locales_dir / lang_code / 'translation.toml'
|
||||
|
||||
if not lang_path.exists():
|
||||
print(f"⚠️ Warning: {lang_code}/translation.json not found, skipping")
|
||||
print(f"⚠️ Warning: {lang_code}/translation.toml not found, skipping")
|
||||
continue
|
||||
|
||||
with open(lang_path, 'r', encoding='utf-8') as f:
|
||||
lang_data = json.load(f)
|
||||
# Load language file
|
||||
with open(lang_path, 'rb') as f:
|
||||
lang_data = tomllib.load(f)
|
||||
|
||||
lang_flat = flatten_dict(lang_data)
|
||||
issues = validate_language(en_gb_flat, lang_flat, lang_code)
|
||||
|
||||
Reference in New Issue
Block a user