# Description of Changes

<!--
Please provide a summary of the changes, including:

- What was changed
- Why the change was made
- Any challenges encountered

Closes #(issue_number)
-->

---

## Checklist

### General

- [ ] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [ ] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [ ] I have performed a self-review of my own code
- [ ] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### Translations (if applicable)

- [ ] I ran
[`scripts/counter_translation.py`](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/docs/counter_translation.md)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.
This commit is contained in:
Anthony Stirling
2025-12-03 09:57:00 +00:00
committed by GitHub
parent f72538d30f
commit 65a3eeca76
62 changed files with 3352 additions and 1947 deletions

View File

@@ -1,113 +0,0 @@
#!/usr/bin/env python3
"""
Convert Java .properties files to JSON for react-i18next
Preserves hierarchical structure and handles special cases
"""
import os
import json
import re
from pathlib import Path
def properties_to_dict(file_path):
"""Convert .properties file to nested dictionary"""
result = {}
with open(file_path, 'r', encoding='utf-8') as f:
for line_num, line in enumerate(f, 1):
line = line.strip()
# Skip empty lines and comments
if not line or line.startswith('#'):
continue
# Handle key=value pairs
if '=' in line:
key, value = line.split('=', 1)
key = key.strip()
value = value.strip()
# Handle multiline values (ending with \)
while value.endswith('\\'):
next_line = next(f, '').strip()
value = value[:-1] + next_line
# Create nested structure from dot notation
set_nested_value(result, key, value)
return result
def set_nested_value(dictionary, key_path, value):
"""Set value in nested dictionary using dot notation"""
keys = key_path.split('.')
current = dictionary
for key in keys[:-1]:
if key not in current:
current[key] = {}
elif not isinstance(current[key], dict):
# Convert existing string value to nested object
old_value = current[key]
current[key] = {"_value": old_value}
current = current[key]
final_key = keys[-1]
if final_key in current and isinstance(current[final_key], dict):
# If the final key already exists as an object, store the value under "_value"
current[final_key]["_value"] = value
else:
current[final_key] = value
def convert_all_properties():
"""Convert all messages_*.properties files to JSON"""
# Get project root
script_dir = Path(__file__).parent
project_root = script_dir.parent
resources_dir = project_root / 'src' / 'main' / 'resources'
output_dir = project_root / 'frontend' / 'public' / 'locales'
# Create output directory
output_dir.mkdir(parents=True, exist_ok=True)
# Find all .properties files
properties_files = list(resources_dir.glob('messages*.properties'))
converted_count = 0
for props_file in properties_files:
# Extract locale from filename
filename = props_file.name
if filename == 'messages.properties':
locale = 'en' # Default locale
else:
# Extract locale from messages_en_US.properties format
locale_match = re.match(r'messages_(.+)\.properties', filename)
if locale_match:
locale = locale_match.group(1)
# Convert Java locale format to standard (en_US -> en-US)
locale = locale.replace('_', '-')
else:
continue
print(f"Converting {filename} -> {locale}.json")
# Convert to dictionary
data = properties_to_dict(props_file)
# Create locale directory
locale_dir = output_dir / locale
locale_dir.mkdir(exist_ok=True)
# Write translation.json (react-i18next default namespace)
output_file = locale_dir / 'translation.json'
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
converted_count += 1
print(f"\nConverted {converted_count} language files to {output_dir}")
print("Languages available:", [d.name for d in output_dir.iterdir() if d.is_dir()])
if __name__ == '__main__':
convert_all_properties()

View File

@@ -1,219 +0,0 @@
"""A script to update language progress status in README.md based on
properties file comparison.
This script compares default properties file with others in a directory to
determine language progress.
It then updates README.md based on provided progress list.
Author: Ludy87
Example:
To use this script, simply run it from command line:
$ python counter_translation.py
""" # noqa: D205
import glob
import os
import re
import tomlkit
import tomlkit.toml_file
def convert_to_multiline(data: tomlkit.TOMLDocument) -> tomlkit.TOMLDocument:
"""Converts 'ignore' and 'missing' arrays to multiline arrays and sorts the first-level keys of the TOML document.
Enhances readability and consistency in the TOML file by ensuring arrays contain unique and sorted entries.
Parameters:
data (tomlkit.TOMLDocument): The original TOML document containing the data.
Returns:
tomlkit.TOMLDocument: A new TOML document with sorted keys and properly formatted arrays.
""" # noqa: D205
sorted_data = tomlkit.document()
for key in sorted(data.keys()):
value = data[key]
if isinstance(value, dict):
new_table = tomlkit.table()
for subkey in ("ignore", "missing"):
if subkey in value:
# Convert the list to a set to remove duplicates, sort it, and convert to multiline for readability
unique_sorted_array = sorted(set(value[subkey]))
array = tomlkit.array()
array.multiline(True)
for item in unique_sorted_array:
array.append(item)
new_table[subkey] = array
sorted_data[key] = new_table
else:
# Add other types of data unchanged
sorted_data[key] = value
return sorted_data
def write_readme(progress_list: list[tuple[str, int]]) -> None:
"""Updates the progress status in the README.md file based
on the provided progress list.
Parameters:
progress_list (list[tuple[str, int]]): A list of tuples containing
language and progress percentage.
Returns:
None
""" # noqa: D205
with open("README.md", encoding="utf-8") as file:
content = file.readlines()
for i, line in enumerate(content[2:], start=2):
for progress in progress_list:
language, value = progress
if language in line:
if match := re.search(r"\!\[(\d+(\.\d+)?)%\]\(.*\)", line):
content[i] = line.replace(
match.group(0),
f"![{value}%](https://geps.dev/progress/{value})",
)
with open("README.md", "w", encoding="utf-8", newline="\n") as file:
file.writelines(content)
def compare_files(
default_file_path, file_paths, ignore_translation_file
) -> list[tuple[str, int]]:
"""Compares the default properties file with other
properties files in the directory.
Parameters:
default_file_path (str): The path to the default properties file.
files_directory (str): The directory containing other properties files.
Returns:
list[tuple[str, int]]: A list of tuples containing
language and progress percentage.
""" # noqa: D205
num_lines = sum(
1
for line in open(default_file_path, encoding="utf-8")
if line.strip() and not line.strip().startswith("#")
)
result_list = []
sort_ignore_translation: tomlkit.TOMLDocument
# read toml
with open(ignore_translation_file, encoding="utf-8") as f:
sort_ignore_translation = tomlkit.parse(f.read())
for file_path in file_paths:
language = (
os.path.basename(file_path)
.split("messages_", 1)[1]
.split(".properties", 1)[0]
)
fails = 0
if "en_GB" in language or "en_US" in language:
result_list.append(("en_GB", 100))
result_list.append(("en_US", 100))
continue
if language not in sort_ignore_translation:
sort_ignore_translation[language] = tomlkit.table()
if (
"ignore" not in sort_ignore_translation[language]
or len(sort_ignore_translation[language].get("ignore", [])) < 1
):
sort_ignore_translation[language]["ignore"] = tomlkit.array(
["language.direction"]
)
# if "missing" not in sort_ignore_translation[language]:
# sort_ignore_translation[language]["missing"] = tomlkit.array()
# elif "language.direction" in sort_ignore_translation[language]["missing"]:
# sort_ignore_translation[language]["missing"].remove("language.direction")
with (
open(default_file_path, encoding="utf-8") as default_file,
open(file_path, encoding="utf-8") as file,
):
for _ in range(5):
next(default_file)
try:
next(file)
except StopIteration:
fails = num_lines
for line_num, (line_default, line_file) in enumerate(
zip(default_file, file), start=6
):
try:
# Ignoring empty lines and lines start with #
if line_default.strip() == "" or line_default.startswith("#"):
continue
default_key, default_value = line_default.split("=", 1)
file_key, file_value = line_file.split("=", 1)
if (
default_value.strip() == file_value.strip()
and default_key.strip()
not in sort_ignore_translation[language]["ignore"]
):
print(
f"{language}: Line {line_num} is missing the translation."
)
# if default_key.strip() not in sort_ignore_translation[language]["missing"]:
# missing_array = tomlkit.array()
# missing_array.append(default_key.strip())
# missing_array.multiline(True)
# sort_ignore_translation[language]["missing"].extend(missing_array)
fails += 1
# elif default_key.strip() in sort_ignore_translation[language]["ignore"]:
# if default_key.strip() in sort_ignore_translation[language]["missing"]:
# sort_ignore_translation[language]["missing"].remove(default_key.strip())
if default_value.strip() != file_value.strip():
# if default_key.strip() in sort_ignore_translation[language]["missing"]:
# sort_ignore_translation[language]["missing"].remove(default_key.strip())
if (
default_key.strip()
in sort_ignore_translation[language]["ignore"]
):
sort_ignore_translation[language]["ignore"].remove(
default_key.strip()
)
except ValueError as e:
print(f"Error processing line {line_num} in {file_path}: {e}")
print(f"{line_default}|{line_file}")
exit(1)
except IndexError:
pass
print(f"{language}: {fails} out of {num_lines} lines are not translated.")
result_list.append(
(
language,
int((num_lines - fails) * 100 / num_lines),
)
)
ignore_translation = convert_to_multiline(sort_ignore_translation)
with open(ignore_translation_file, "w", encoding="utf-8", newline="\n") as file:
file.write(tomlkit.dumps(ignore_translation))
unique_data = list(set(result_list))
unique_data.sort(key=lambda x: x[1], reverse=True)
return unique_data
if __name__ == "__main__":
directory = os.path.join(os.getcwd(), "app", "core", "src", "main", "resources")
messages_file_paths = glob.glob(os.path.join(directory, "messages_*.properties"))
reference_file = os.path.join(directory, "messages_en_GB.properties")
scripts_directory = os.path.join(os.getcwd(), "scripts")
translation_state_file = os.path.join(scripts_directory, "ignore_translation.toml")
write_readme(
compare_files(reference_file, messages_file_paths, translation_state_file)
)

View File

@@ -1,21 +1,21 @@
"""A script to update language progress status in README.md based on
JSON translation file comparison.
TOML translation file comparison.
This script compares the default translation JSON file with others in the locales directory to
This script compares the default translation TOML file with others in the locales directory to
determine language progress.
It then updates README.md based on provided progress list.
Author: Ludy87
Updated for TOML format
Example:
To use this script, simply run it from command line:
$ python counter_translation_v2.py
$ python counter_translation_v3.py
""" # noqa: D205
import glob
import os
import re
import json
import tomlkit
import tomlkit.toml_file
@@ -80,14 +80,14 @@ def write_readme(progress_list: list[tuple[str, int]]) -> None:
file.writelines(content)
def parse_json_file(file_path):
def parse_toml_file(file_path):
"""
Parses a JSON translation file and returns a flat dictionary of all keys.
:param file_path: Path to the JSON file.
Parses a TOML translation file and returns a flat dictionary of all keys.
:param file_path: Path to the TOML file.
:return: Dictionary with flattened keys and values.
"""
with open(file_path, "r", encoding="utf-8") as file:
data = json.load(file)
data = tomlkit.parse(file.read())
def flatten_dict(d, parent_key="", sep="."):
items = {}
@@ -105,19 +105,19 @@ def parse_json_file(file_path):
def compare_files(
default_file_path, file_paths, ignore_translation_file
) -> list[tuple[str, int]]:
"""Compares the default JSON translation file with other
"""Compares the default TOML translation file with other
translation files in the locales directory.
Parameters:
default_file_path (str): The path to the default translation JSON file.
file_paths (list): List of paths to translation JSON files.
default_file_path (str): The path to the default translation TOML file.
file_paths (list): List of paths to translation TOML files.
ignore_translation_file (str): Path to the TOML file with ignore rules.
Returns:
list[tuple[str, int]]: A list of tuples containing
language and progress percentage.
""" # noqa: D205
default_keys = parse_json_file(default_file_path)
default_keys = parse_toml_file(default_file_path)
num_keys = len(default_keys)
result_list = []
@@ -152,7 +152,7 @@ def compare_files(
["language.direction"]
)
current_keys = parse_json_file(file_path)
current_keys = parse_toml_file(file_path)
# Compare keys
for default_key, default_value in default_keys.items():
@@ -193,12 +193,12 @@ def compare_files(
if __name__ == "__main__":
directory = os.path.join(os.getcwd(), "frontend", "public", "locales")
translation_file_paths = glob.glob(os.path.join(directory, "*", "translation.json"))
reference_file = os.path.join(directory, "en-GB", "translation.json")
translation_file_paths = glob.glob(os.path.join(directory, "*", "translation.toml"))
reference_file = os.path.join(directory, "en-GB", "translation.toml")
scripts_directory = os.path.join(os.getcwd(), "scripts")
translation_state_file = os.path.join(scripts_directory, "ignore_translation.toml")
write_readme(
compare_files(reference_file, translation_file_paths, translation_state_file)
)
)

View File

@@ -2,6 +2,12 @@
This directory contains Python scripts for managing frontend translations in Stirling PDF. These tools help analyze, merge, validate, and manage translations against the en-GB golden truth file.
## Current Format: TOML
**Stirling PDF uses TOML format for translations** in `frontend/public/locales/{lang}/translation.toml`.
**All scripts now support TOML format!**
## Quick Start - Automated Translation (RECOMMENDED)
The **fastest and easiest way** to translate a language is using the automated pipeline:
@@ -451,18 +457,15 @@ python scripts/translations/translation_merger.py fr-FR apply-translations --tra
## Translation File Structure
Translation files are located in `frontend/public/locales/{language}/translation.json` with nested JSON structure:
Translation files are located in `frontend/public/locales/{language}/translation.toml` with TOML structure:
```json
{
"addPageNumbers": {
"title": "Add Page Numbers",
"selectText": {
"1": "Select PDF file:",
"2": "Margin Size"
}
}
}
```toml
[addPageNumbers]
title = "Add Page Numbers"
[addPageNumbers.selectText]
"1" = "Select PDF file:"
"2" = "Margin Size"
```
Keys use dot notation internally (e.g., `addPageNumbers.selectText.1`).
@@ -478,7 +481,7 @@ All scripts preserve placeholders like `{n}`, `{total}`, `{filename}` in transla
### Automatic Backups
Scripts create timestamped backups before modifying files:
```
translation.backup.20241201_143022.json
translation.backup.20241201_143022.toml
```
### Context-Aware Translation

View File

@@ -3,6 +3,7 @@
AI Translation Helper for Stirling PDF Frontend
Provides utilities for AI-assisted translation workflows including
batch processing, quality checks, and integration helpers.
TOML format only.
"""
import json
@@ -14,31 +15,33 @@ import argparse
import re
from datetime import datetime
import csv
import tomllib
import tomli_w
class AITranslationHelper:
def __init__(self, locales_dir: str = "frontend/public/locales"):
self.locales_dir = Path(locales_dir)
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.json"
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.toml"
def _load_json(self, file_path: Path) -> Dict:
"""Load JSON file with error handling."""
def _load_translation_file(self, file_path: Path) -> Dict:
"""Load TOML translation file."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
return json.load(f)
except (FileNotFoundError, json.JSONDecodeError) as e:
with open(file_path, 'rb') as f:
return tomllib.load(f)
except (FileNotFoundError, Exception) as e:
print(f"Error loading {file_path}: {e}")
return {}
def _save_json(self, data: Dict, file_path: Path) -> None:
"""Save JSON file."""
with open(file_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
def _save_translation_file(self, data: Dict, file_path: Path) -> None:
"""Save TOML translation file."""
with open(file_path, 'wb') as f:
tomli_w.dump(data, f)
def create_ai_batch_file(self, languages: List[str], output_file: Path,
max_entries_per_language: int = 50) -> None:
"""Create a batch file for AI translation with multiple languages."""
golden_truth = self._load_json(self.golden_truth_file)
golden_truth = self._load_translation_file(self.golden_truth_file)
batch_data = {
'metadata': {
'created_at': datetime.now().isoformat(),
@@ -56,12 +59,14 @@ class AITranslationHelper:
}
for lang in languages:
lang_file = self.locales_dir / lang / "translation.json"
if not lang_file.exists():
# Create empty translation structure
lang_data = {}
lang_dir = self.locales_dir / lang
toml_file = lang_dir / "translation.toml"
if toml_file.exists():
lang_data = self._load_translation_file(toml_file)
else:
lang_data = self._load_json(lang_file)
# No translation file found, create empty structure
lang_data = {}
# Find untranslated entries
untranslated = self._find_untranslated_entries(golden_truth, lang_data)
@@ -79,7 +84,9 @@ class AITranslationHelper:
'context': self._get_key_context(key)
}
self._save_json(batch_data, output_file)
# Always save batch files as JSON for compatibility
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(batch_data, f, indent=2, ensure_ascii=False)
total_entries = sum(len(lang_data) for lang_data in batch_data['translations'].values())
print(f"Created AI batch file: {output_file}")
print(f"Total entries to translate: {total_entries}")
@@ -173,7 +180,9 @@ class AITranslationHelper:
def validate_ai_translations(self, batch_file: Path) -> Dict[str, List[str]]:
"""Validate AI translations for common issues."""
batch_data = self._load_json(batch_file)
# Batch files are always JSON
with open(batch_file, 'r', encoding='utf-8') as f:
batch_data = json.load(f)
issues = {'errors': [], 'warnings': []}
for lang, translations in batch_data.get('translations', {}).items():
@@ -209,7 +218,9 @@ class AITranslationHelper:
def apply_ai_batch_translations(self, batch_file: Path, validate: bool = True) -> Dict[str, Any]:
"""Apply translations from AI batch file to individual language files."""
batch_data = self._load_json(batch_file)
# Batch files are always JSON
with open(batch_file, 'r', encoding='utf-8') as f:
batch_data = json.load(f)
results = {'applied': {}, 'errors': [], 'warnings': []}
if validate:
@@ -226,14 +237,15 @@ class AITranslationHelper:
print(f" WARNING: {warning}")
for lang, translations in batch_data.get('translations', {}).items():
lang_file = self.locales_dir / lang / "translation.json"
lang_dir = self.locales_dir / lang
toml_file = lang_dir / "translation.toml"
# Load existing data or create new
if lang_file.exists():
lang_data = self._load_json(lang_file)
if toml_file.exists():
lang_data = self._load_translation_file(toml_file)
else:
# No translation file found, create new TOML file
lang_data = {}
lang_file.parent.mkdir(parents=True, exist_ok=True)
lang_dir.mkdir(parents=True, exist_ok=True)
applied_count = 0
for key, translation_data in translations.items():
@@ -243,7 +255,7 @@ class AITranslationHelper:
applied_count += 1
if applied_count > 0:
self._save_json(lang_data, lang_file)
self._save_translation_file(lang_data, toml_file)
results['applied'][lang] = applied_count
print(f"Applied {applied_count} translations to {lang}")
@@ -265,7 +277,7 @@ class AITranslationHelper:
def export_for_external_translation(self, languages: List[str], output_format: str = 'csv') -> None:
"""Export translations for external translation services."""
golden_truth = self._load_json(self.golden_truth_file)
golden_truth = self._load_translation_file(self.golden_truth_file)
golden_flat = self._flatten_dict(golden_truth)
if output_format == 'csv':
@@ -287,9 +299,11 @@ class AITranslationHelper:
}
for lang in languages:
lang_file = self.locales_dir / lang / "translation.json"
if lang_file.exists():
lang_data = self._load_json(lang_file)
lang_dir = self.locales_dir / lang
toml_file = lang_dir / "translation.toml"
if toml_file.exists():
lang_data = self._load_translation_file(toml_file)
lang_flat = self._flatten_dict(lang_data)
value = lang_flat.get(key, '')
if value.startswith('[UNTRANSLATED]'):
@@ -316,21 +330,28 @@ class AITranslationHelper:
}
for lang in languages:
lang_file = self.locales_dir / lang / "translation.json"
if lang_file.exists():
lang_data = self._load_json(lang_file)
lang_dir = self.locales_dir / lang
toml_file = lang_dir / "translation.toml"
if toml_file.exists():
lang_data = self._load_translation_file(toml_file)
lang_flat = self._flatten_dict(lang_data)
value = lang_flat.get(key, '')
if value.startswith('[UNTRANSLATED]'):
value = ''
export_data['translations'][key][lang] = value
self._save_json(export_data, output_file)
# Export files are always JSON
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(export_data, f, indent=2, ensure_ascii=False)
print(f"Exported to {output_file}")
def main():
parser = argparse.ArgumentParser(description='AI Translation Helper')
parser = argparse.ArgumentParser(
description='AI Translation Helper',
epilog='Works with TOML translation files.'
)
parser.add_argument('--locales-dir', default='frontend/public/locales',
help='Path to locales directory')

View File

@@ -2,6 +2,7 @@
"""
Automated Translation Pipeline
Extracts, translates, merges, and beautifies translations for a language.
TOML format only.
"""
import json
@@ -12,6 +13,8 @@ import subprocess
from pathlib import Path
import time
import tomllib
def run_command(cmd, description=""):
"""Run a shell command and return success status."""
@@ -30,26 +33,34 @@ def run_command(cmd, description=""):
return result.returncode == 0
def find_translation_file(lang_dir):
"""Find translation file in language directory."""
toml_file = lang_dir / "translation.toml"
if toml_file.exists():
return toml_file
return None
def load_translation_file(file_path):
"""Load TOML translation file."""
with open(file_path, 'rb') as f:
return tomllib.load(f)
def extract_untranslated(language_code, batch_size=500):
"""Extract untranslated entries and split into batches."""
print(f"\n🔍 Extracting untranslated entries for {language_code}...")
# Load files
golden_path = Path(f'frontend/public/locales/en-GB/translation.json')
lang_path = Path(f'frontend/public/locales/{language_code}/translation.json')
golden_path = find_translation_file(Path('frontend/public/locales/en-GB'))
lang_path = find_translation_file(Path(f'frontend/public/locales/{language_code}'))
if not golden_path.exists():
print(f"Error: Golden truth file not found: {golden_path}")
if not golden_path:
print(f"Error: Golden truth file not found in frontend/public/locales/en-GB")
return None
if not lang_path.exists():
print(f"Error: Language file not found: {lang_path}")
if not lang_path:
print(f"Error: Language file not found in frontend/public/locales/{language_code}")
return None
def load_json(path):
with open(path, 'r', encoding='utf-8') as f:
return json.load(f)
def flatten_dict(d, parent_key='', separator='.'):
items = []
for k, v in d.items():
@@ -60,8 +71,12 @@ def extract_untranslated(language_code, batch_size=500):
items.append((new_key, str(v)))
return dict(items)
golden = load_json(golden_path)
lang_data = load_json(lang_path)
golden = load_translation_file(golden_path)
lang_data = load_translation_file(lang_path)
if not golden or not lang_data:
print(f"Error: Failed to load translation files")
return None
golden_flat = flatten_dict(golden)
lang_flat = flatten_dict(lang_data)
@@ -186,7 +201,7 @@ def beautify_translations(language_code):
"""Beautify translation file to match en-GB structure."""
print(f"\n✨ Beautifying {language_code} translation file...")
cmd = f'python3 scripts/translations/json_beautifier.py --language {language_code}'
cmd = f'python3 scripts/translations/toml_beautifier.py --language {language_code}'
if not run_command(cmd):
print(f"✗ Failed to beautify translations")
@@ -229,6 +244,8 @@ def main():
description='Automated translation pipeline for Stirling PDF',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Note: This script works with TOML translation files.
Examples:
# Translate Spanish with API key in environment
export OPENAI_API_KEY=your_key_here

View File

@@ -5,6 +5,8 @@ Automatically translates JSON batch files to target language while preserving:
- Placeholders: {n}, {total}, {filename}, {{variable}}
- HTML tags: <strong>, </strong>, etc.
- Technical terms: PDF, API, OAuth2, SAML2, JWT, etc.
Note: Works with JSON batch files. Translation files can be TOML or JSON format.
"""
import json
@@ -206,9 +208,11 @@ def get_language_info(language_code: str) -> tuple:
def main():
parser = argparse.ArgumentParser(
description='Translate JSON batch files using OpenAI API',
description='Translate JSON batch files using OpenAI API (output supports TOML and JSON)',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Note: This script works with JSON batch files. The translation files it updates can be TOML or JSON.
Examples:
# Translate single batch file
python batch_translator.py zh_CN_batch_1_of_4.json --api-key YOUR_KEY --language zh-CN

View File

@@ -0,0 +1,305 @@
#!/usr/bin/env python3
"""
Bulk Auto-Translate All Languages
Automatically translates all languages in parallel using OpenAI API.
Supports concurrent translation with configurable thread pool.
"""
import argparse
import os
import sys
import time
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
import subprocess
from typing import List, Tuple, Optional
import threading
import tomllib
# Thread-safe print lock
print_lock = threading.Lock()
def safe_print(*args, **kwargs):
"""Thread-safe print function."""
with print_lock:
print(*args, **kwargs)
def get_all_languages(locales_dir: Path) -> List[str]:
"""Get all language codes from locales directory."""
languages = []
if not locales_dir.exists():
print(f"Error: Locales directory not found: {locales_dir}")
return []
for lang_dir in sorted(locales_dir.iterdir()):
if lang_dir.is_dir() and lang_dir.name != "en-GB":
toml_file = lang_dir / "translation.toml"
if toml_file.exists():
languages.append(lang_dir.name)
return languages
def get_language_completion(locales_dir: Path, language: str) -> Optional[float]:
"""Get completion percentage for a language."""
lang_dir = locales_dir / language
toml_file = lang_dir / "translation.toml"
if not toml_file.exists():
return None
try:
with open(toml_file, 'rb') as f:
target_data = tomllib.load(f)
# Load en-GB reference
en_gb_file = locales_dir / 'en-GB' / 'translation.toml'
with open(en_gb_file, 'rb') as f:
en_gb_data = tomllib.load(f)
# Flatten and count
def flatten(d, parent=''):
items = {}
for k, v in d.items():
key = f"{parent}.{k}" if parent else k
if isinstance(v, dict):
items.update(flatten(v, key))
else:
items[key] = v
return items
en_gb_flat = flatten(en_gb_data)
target_flat = flatten(target_data)
# Count translated (not equal to en-GB)
translated = sum(1 for k in en_gb_flat if k in target_flat and target_flat[k] != en_gb_flat[k])
total = len(en_gb_flat)
return (translated / total * 100) if total > 0 else 0.0
except Exception as e:
print(f"Warning: Could not calculate completion for {language}: {e}")
return None
def translate_language(language: str, api_key: str, batch_size: int, timeout: int, skip_verification: bool) -> Tuple[str, bool, str]:
"""
Translate a single language.
Returns: (language_code, success, message)
"""
safe_print(f"[{language}] Starting translation...")
cmd = [
'python3', 'scripts/translations/auto_translate.py',
language,
'--api-key', api_key,
'--batch-size', str(batch_size),
'--timeout', str(timeout)
]
if skip_verification:
cmd.append('--skip-verification')
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=timeout * 5 # Overall timeout = 5x per-batch timeout
)
if result.returncode == 0:
# Check if nothing to translate
if "Nothing to translate!" in result.stdout:
safe_print(f"[{language}] ✓ Already complete")
return (language, True, "Already complete")
safe_print(f"[{language}] ✓ Success")
return (language, True, "Success")
else:
error_msg = result.stderr.strip() or result.stdout.strip() or "Unknown error"
safe_print(f"[{language}] ✗ Failed: {error_msg[:100]}")
return (language, False, error_msg[:200]) # Truncate long errors
except subprocess.TimeoutExpired:
safe_print(f"[{language}] ✗ Timeout exceeded")
return (language, False, "Timeout exceeded")
except Exception as e:
safe_print(f"[{language}] ✗ Error: {str(e)}")
return (language, False, str(e))
def main():
parser = argparse.ArgumentParser(
description='Bulk auto-translate all languages using OpenAI API',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Translate all languages with 10 parallel threads
python3 bulk_auto_translate.py --parallel 10
# Translate only incomplete languages (< 95%)
python3 bulk_auto_translate.py --parallel 5 --threshold 95
# Translate specific languages only
python3 bulk_auto_translate.py --languages de-DE fr-FR es-ES --parallel 3
# Dry run to see what would be translated
python3 bulk_auto_translate.py --dry-run
Note: Requires OPENAI_API_KEY environment variable or --api-key argument.
"""
)
parser.add_argument('--api-key', help='OpenAI API key (or set OPENAI_API_KEY env var)')
parser.add_argument('--parallel', type=int, default=1,
help='Number of parallel translation threads (default: 1)')
parser.add_argument('--batch-size', type=int, default=500,
help='Entries per batch for translation (default: 500)')
parser.add_argument('--timeout', type=int, default=600,
help='Timeout per batch in seconds (default: 600)')
parser.add_argument('--threshold', type=float, default=0.0,
help='Only translate languages below this completion %% (default: 0 = all)')
parser.add_argument('--languages', nargs='+',
help='Translate only specific languages (e.g., de-DE fr-FR)')
parser.add_argument('--locales-dir', default='frontend/public/locales',
help='Path to locales directory')
parser.add_argument('--skip-verification', action='store_true',
help='Skip final completion verification for each language')
parser.add_argument('--dry-run', action='store_true',
help='Show what would be translated without actually translating')
args = parser.parse_args()
# Verify API key (unless dry run)
api_key = args.api_key or os.environ.get('OPENAI_API_KEY')
if not args.dry_run and not api_key:
print("Error: OpenAI API key required. Provide via --api-key or OPENAI_API_KEY environment variable")
sys.exit(1)
locales_dir = Path(args.locales_dir)
# Get languages to translate
if args.languages:
languages = args.languages
print(f"Translating specified languages: {', '.join(languages)}")
else:
languages = get_all_languages(locales_dir)
print(f"Found {len(languages)} languages (excluding en-GB)")
if not languages:
print("No languages to translate!")
sys.exit(0)
# Filter by completion threshold
if args.threshold > 0:
print(f"\nFiltering languages below {args.threshold}% completion...")
filtered = []
for lang in languages:
completion = get_language_completion(locales_dir, lang)
if completion is None:
filtered.append(lang) # Include if can't determine
print(f" {lang}: Unknown completion - will translate")
elif completion < args.threshold:
filtered.append(lang)
print(f" {lang}: {completion:.1f}% - will translate")
else:
print(f" {lang}: {completion:.1f}% - skipping (above threshold)")
languages = filtered
if not languages:
print("\nNo languages below threshold!")
sys.exit(0)
print(f"\n{'='*60}")
print(f"Bulk Translation Configuration")
print(f"{'='*60}")
print(f"Languages to translate: {len(languages)}")
print(f"Parallel threads: {args.parallel}")
print(f"Batch size: {args.batch_size}")
print(f"Timeout per batch: {args.timeout}s")
if args.threshold > 0:
print(f"Completion threshold: {args.threshold}%")
print(f"{'='*60}\n")
if args.dry_run:
print("DRY RUN - Languages that would be translated:")
for lang in languages:
completion = get_language_completion(locales_dir, lang)
comp_str = f"{completion:.1f}%" if completion is not None else "Unknown"
print(f" - {lang} ({comp_str})")
print(f"\nTotal: {len(languages)} languages")
sys.exit(0)
start_time = time.time()
# Translate in parallel
results = {
'success': [],
'failed': [],
'already_complete': []
}
with ThreadPoolExecutor(max_workers=args.parallel) as executor:
futures = {
executor.submit(
translate_language,
lang,
api_key,
args.batch_size,
args.timeout,
args.skip_verification
): lang
for lang in languages
}
for future in as_completed(futures):
language, success, message = future.result()
if success:
if message == "Already complete":
results['already_complete'].append(language)
else:
results['success'].append(language)
else:
results['failed'].append((language, message))
elapsed = time.time() - start_time
# Print summary
print("\n" + "="*60)
print("Bulk Translation Summary")
print("="*60)
print(f"Total languages: {len(languages)}")
print(f"Successful: {len(results['success'])}")
print(f"Already complete: {len(results['already_complete'])}")
print(f"Failed: {len(results['failed'])}")
print(f"Time elapsed: {elapsed:.1f} seconds ({elapsed/60:.1f} minutes)")
print("="*60)
if results['success']:
print(f"\n✅ Successfully translated ({len(results['success'])}):")
for lang in sorted(results['success']):
print(f" - {lang}")
if results['already_complete']:
print(f"\n✓ Already complete ({len(results['already_complete'])}):")
for lang in sorted(results['already_complete']):
print(f" - {lang}")
if results['failed']:
print(f"\n❌ Failed ({len(results['failed'])}):")
for lang, msg in sorted(results['failed']):
print(f" - {lang}: {msg}")
sys.exit(1)
print("\n✅ Bulk translation completed successfully!")
if __name__ == '__main__':
main()

View File

@@ -2,41 +2,37 @@
"""
Compact Translation Extractor for Character-Limited AI Translation
Outputs untranslated entries in minimal JSON format with whitespace stripped.
TOML format only.
"""
import json
import sys
from pathlib import Path
import argparse
try:
import tomllib # Python 3.11+
except ImportError:
try:
import toml as tomllib_fallback
tomllib = None
except ImportError:
tomllib = None
tomllib_fallback = None
import tomllib # Python 3.11+ (stdlib)
class CompactTranslationExtractor:
def __init__(self, locales_dir: str = "frontend/public/locales", ignore_file: str = "scripts/ignore_translation.toml"):
self.locales_dir = Path(locales_dir)
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.json"
self.golden_truth = self._load_json(self.golden_truth_file)
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.toml"
if not self.golden_truth_file.exists():
print(f"Error: en-GB translation file not found at {self.golden_truth_file}", file=sys.stderr)
sys.exit(1)
self.golden_truth = self._load_translation_file(self.golden_truth_file)
self.ignore_file = Path(ignore_file)
self.ignore_patterns = self._load_ignore_patterns()
def _load_json(self, file_path: Path) -> dict:
"""Load JSON file with error handling."""
def _load_translation_file(self, file_path: Path) -> dict:
"""Load TOML translation file."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
return json.load(f)
with open(file_path, 'rb') as f:
return tomllib.load(f)
except FileNotFoundError:
print(f"Error: File not found: {file_path}", file=sys.stderr)
sys.exit(1)
except json.JSONDecodeError as e:
print(f"Error: Invalid JSON in {file_path}: {e}", file=sys.stderr)
except Exception as e:
print(f"Error: Invalid TOML file {file_path}: {e}", file=sys.stderr)
sys.exit(1)
def _load_ignore_patterns(self) -> dict:
@@ -45,40 +41,13 @@ class CompactTranslationExtractor:
return {}
try:
if tomllib:
with open(self.ignore_file, 'rb') as f:
ignore_data = tomllib.load(f)
elif tomllib_fallback:
ignore_data = tomllib_fallback.load(self.ignore_file)
else:
ignore_data = self._parse_simple_toml()
with open(self.ignore_file, 'rb') as f:
ignore_data = tomllib.load(f)
return {lang: set(data.get('ignore', [])) for lang, data in ignore_data.items()}
except Exception as e:
print(f"Warning: Could not load ignore file {self.ignore_file}: {e}", file=sys.stderr)
return {}
def _parse_simple_toml(self) -> dict:
"""Simple TOML parser for ignore patterns (fallback)."""
ignore_data = {}
current_section = None
with open(self.ignore_file, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if not line or line.startswith('#'):
continue
if line.startswith('[') and line.endswith(']'):
current_section = line[1:-1]
ignore_data[current_section] = {'ignore': []}
elif line.strip().startswith("'") and current_section:
item = line.strip().strip("',")
if item:
ignore_data[current_section]['ignore'].append(item)
return ignore_data
def _flatten_dict(self, d: dict, parent_key: str = '', separator: str = '.') -> dict:
"""Flatten nested dictionary into dot-notation keys."""
items = []
@@ -92,13 +61,14 @@ class CompactTranslationExtractor:
def get_untranslated_entries(self, language: str) -> dict:
"""Get all untranslated entries for a language in compact format."""
target_file = self.locales_dir / language / "translation.json"
lang_dir = self.locales_dir / language
target_file = lang_dir / "translation.toml"
if not target_file.exists():
print(f"Error: Translation file not found for language: {language}", file=sys.stderr)
sys.exit(1)
target_data = self._load_json(target_file)
target_data = self._load_translation_file(target_file)
golden_flat = self._flatten_dict(self.golden_truth)
target_flat = self._flatten_dict(target_data)
@@ -145,7 +115,9 @@ class CompactTranslationExtractor:
def main():
parser = argparse.ArgumentParser(description='Extract untranslated entries in compact format for AI translation')
parser = argparse.ArgumentParser(
description='Extract untranslated entries in compact format for AI translation (TOML format only)'
)
parser.add_argument('language', help='Language code (e.g., de-DE, fr-FR)')
parser.add_argument('--locales-dir', default='frontend/public/locales', help='Path to locales directory')
parser.add_argument('--ignore-file', default='scripts/ignore_translation.toml', help='Path to ignore patterns file')

View File

@@ -1,259 +0,0 @@
#!/usr/bin/env python3
"""
JSON Validator for Translation Files
Validates JSON syntax in translation files and reports detailed error information.
Useful for validating batch translation files before merging.
Usage:
python3 json_validator.py <file_or_pattern>
python3 json_validator.py ar_AR_batch_*.json
python3 json_validator.py ar_AR_batch_1_of_3.json
python3 json_validator.py --all-batches ar_AR
"""
import json
import sys
import argparse
import glob
from pathlib import Path
def get_line_context(file_path, line_num, context_lines=3):
"""Get lines around the error for context"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
start = max(0, line_num - context_lines - 1)
end = min(len(lines), line_num + context_lines)
context = []
for i in range(start, end):
marker = ">>> " if i == line_num - 1 else " "
context.append(f"{marker}{i+1:4d}: {lines[i].rstrip()}")
return "\n".join(context)
except Exception as e:
return f"Could not read context: {e}"
def get_character_context(file_path, char_pos, context_chars=100):
"""Get characters around the error position"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
start = max(0, char_pos - context_chars)
end = min(len(content), char_pos + context_chars)
before = content[start:char_pos]
error_char = content[char_pos] if char_pos < len(content) else "EOF"
after = content[char_pos+1:end]
return {
'before': before,
'error_char': error_char,
'after': after,
'display': f"{before}[{error_char}]{after}"
}
except Exception as e:
return None
def validate_json_file(file_path):
"""Validate a single JSON file and return detailed error info"""
result = {
'file': str(file_path),
'valid': False,
'error': None,
'line': None,
'column': None,
'position': None,
'context': None,
'char_context': None,
'entry_count': 0
}
try:
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
result['valid'] = True
result['entry_count'] = len(data) if isinstance(data, dict) else 0
except json.JSONDecodeError as e:
result['error'] = e.msg
result['line'] = e.lineno
result['column'] = e.colno
result['position'] = e.pos
result['context'] = get_line_context(file_path, e.lineno)
result['char_context'] = get_character_context(file_path, e.pos)
except FileNotFoundError:
result['error'] = "File not found"
except Exception as e:
result['error'] = str(e)
return result
def print_validation_result(result, verbose=True):
"""Print validation result in a formatted way"""
file_name = Path(result['file']).name
if result['valid']:
print(f"{file_name}: Valid JSON ({result['entry_count']} entries)")
else:
print(f"{file_name}: Invalid JSON")
print(f" Error: {result['error']}")
if result['line']:
print(f" Location: Line {result['line']}, Column {result['column']} (character {result['position']})")
if verbose and result['context']:
print(f"\n Context:")
for line in result['context'].split('\n'):
print(f" {line}")
if verbose and result['char_context']:
print(f"\n Character context:")
print(f" ...{result['char_context']['display'][-150:]}...")
print(f" Error character: {repr(result['char_context']['error_char'])}")
print()
def get_common_fixes(error_msg):
"""Suggest common fixes based on error message"""
fixes = []
if "Expecting ',' delimiter" in error_msg:
fixes.append("Missing comma between JSON entries")
fixes.append("Check for unescaped quotes inside string values")
if "Invalid \\escape" in error_msg or "Invalid escape" in error_msg:
fixes.append("Unescaped backslash in string (use \\\\ for literal backslash)")
fixes.append("Common in regex patterns: \\d should be \\\\d")
if "Expecting property name" in error_msg:
fixes.append("Missing or extra comma")
fixes.append("Trailing comma before closing brace")
if "Expecting value" in error_msg:
fixes.append("Missing value after colon")
fixes.append("Extra comma")
return fixes
def main():
parser = argparse.ArgumentParser(
description='Validate JSON syntax in translation files',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
Validate single file:
python3 json_validator.py ar_AR_batch_1_of_3.json
Validate all batches for a language:
python3 json_validator.py --all-batches ar_AR
Validate pattern:
python3 json_validator.py "ar_AR_batch_*.json"
Validate multiple files:
python3 json_validator.py file1.json file2.json file3.json
"""
)
parser.add_argument(
'files',
nargs='*',
help='JSON file(s) to validate (supports wildcards)'
)
parser.add_argument(
'--all-batches',
metavar='LANGUAGE',
help='Validate all batch files for a language (e.g., ar_AR)'
)
parser.add_argument(
'--quiet',
action='store_true',
help='Only show files with errors'
)
parser.add_argument(
'--brief',
action='store_true',
help='Brief output without context'
)
args = parser.parse_args()
# Determine which files to validate
files_to_validate = []
if args.all_batches:
pattern = f"{args.all_batches}_batch_*.json"
files_to_validate = glob.glob(pattern)
if not files_to_validate:
print(f"No batch files found matching: {pattern}")
return 1
elif args.files:
for file_pattern in args.files:
if '*' in file_pattern or '?' in file_pattern:
files_to_validate.extend(glob.glob(file_pattern))
else:
files_to_validate.append(file_pattern)
else:
parser.print_help()
return 1
if not files_to_validate:
print("No files to validate")
return 1
# Sort files for consistent output
files_to_validate.sort()
print(f"Validating {len(files_to_validate)} file(s)...\n")
# Validate each file
results = []
for file_path in files_to_validate:
result = validate_json_file(file_path)
results.append(result)
if not args.quiet or not result['valid']:
print_validation_result(result, verbose=not args.brief)
# Summary
valid_count = sum(1 for r in results if r['valid'])
invalid_count = len(results) - valid_count
print("=" * 60)
print(f"Summary: {valid_count} valid, {invalid_count} invalid")
# Show common fixes for errors
if invalid_count > 0:
all_errors = [r['error'] for r in results if r['error']]
unique_error_types = set(all_errors)
print("\nCommon fixes:")
fixes_shown = set()
for error in unique_error_types:
fixes = get_common_fixes(error)
for fix in fixes:
if fix not in fixes_shown:
print(f"{fix}")
fixes_shown.add(fix)
return 0 if invalid_count == 0 else 1
if __name__ == '__main__':
sys.exit(main())

View File

@@ -1,10 +1,9 @@
#!/usr/bin/env python3
"""
JSON Beautifier and Structure Fixer for Stirling PDF Frontend
Restructures translation JSON files to match en-GB structure and key order exactly.
TOML Beautifier and Structure Fixer for Stirling PDF Frontend
Restructures translation TOML files to match en-GB structure and key order exactly.
"""
import json
import os
import sys
from pathlib import Path
@@ -12,34 +11,38 @@ from typing import Dict, Any, List
import argparse
from collections import OrderedDict
import tomllib
import tomli_w
class JSONBeautifier:
class TOMLBeautifier:
def __init__(self, locales_dir: str = "frontend/public/locales"):
self.locales_dir = Path(locales_dir)
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.json"
self.golden_structure = self._load_json(self.golden_truth_file)
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.toml"
self.golden_structure = self._load_toml(self.golden_truth_file)
def _load_json(self, file_path: Path) -> Dict:
"""Load JSON file with error handling."""
def _load_toml(self, file_path: Path) -> Dict:
"""Load TOML file with error handling."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
return json.load(f, object_pairs_hook=OrderedDict)
with open(file_path, 'rb') as f:
return tomllib.load(f)
except FileNotFoundError:
print(f"Error: File not found: {file_path}")
sys.exit(1)
except json.JSONDecodeError as e:
print(f"Error: Invalid JSON in {file_path}: {e}")
except Exception as e:
print(f"Error: Invalid TOML in {file_path}: {e}")
sys.exit(1)
def _save_json(self, data: Dict, file_path: Path, backup: bool = True) -> None:
"""Save JSON file with proper formatting."""
def _save_toml(self, data: Dict, file_path: Path, backup: bool = False) -> None:
"""Save TOML file with proper formatting."""
if backup and file_path.exists():
backup_path = file_path.with_suffix(f'.backup.restructured.json')
file_path.rename(backup_path)
backup_path = file_path.with_suffix(f'.backup.restructured.toml')
import shutil
shutil.copy2(file_path, backup_path)
print(f"Backup created: {backup_path}")
with open(file_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False, separators=(',', ': '))
with open(file_path, 'wb') as f:
tomli_w.dump(data, f)
def _flatten_dict(self, d: Dict, parent_key: str = '', separator: str = '.') -> Dict[str, Any]:
"""Flatten nested dictionary into dot-notation keys."""
@@ -93,7 +96,7 @@ class JSONBeautifier:
return {}
# Load the target file
target_data = self._load_json(target_file)
target_data = self._load_toml(target_file)
# Flatten the target translations
flat_target = self._flatten_dict(target_data)
@@ -103,7 +106,7 @@ class JSONBeautifier:
return restructured
def beautify_and_restructure(self, target_file: Path, backup: bool = True) -> Dict[str, Any]:
def beautify_and_restructure(self, target_file: Path, backup: bool = False) -> Dict[str, Any]:
"""Main function to beautify and restructure a translation file."""
lang_code = target_file.parent.name
print(f"Restructuring {lang_code} translation file...")
@@ -112,7 +115,7 @@ class JSONBeautifier:
restructured_data = self.restructure_translation_file(target_file)
# Save the restructured file
self._save_json(restructured_data, target_file, backup)
self._save_toml(restructured_data, target_file, backup)
# Analyze the results
flat_golden = self._flatten_dict(self.golden_structure)
@@ -163,7 +166,7 @@ class JSONBeautifier:
def validate_key_order(self, target_file: Path) -> Dict[str, Any]:
"""Validate that keys appear in the same order as en-GB."""
target_data = self._load_json(target_file)
target_data = self._load_toml(target_file)
def get_key_order(obj: Dict, path: str = '') -> List[str]:
keys = []
@@ -198,23 +201,26 @@ class JSONBeautifier:
def main():
parser = argparse.ArgumentParser(description='Beautify and restructure translation JSON files')
parser = argparse.ArgumentParser(
description='Beautify and restructure translation TOML files',
epilog='Works with TOML format translation files.'
)
parser.add_argument('--locales-dir', default='frontend/public/locales',
help='Path to locales directory')
parser.add_argument('--language', help='Restructure specific language only')
parser.add_argument('--all-languages', action='store_true',
help='Restructure all language files')
parser.add_argument('--no-backup', action='store_true',
help='Skip backup creation')
parser.add_argument('--backup', action='store_true',
help='Create backup files before modifying')
parser.add_argument('--validate-only', action='store_true',
help='Only validate structure, do not modify files')
args = parser.parse_args()
beautifier = JSONBeautifier(args.locales_dir)
beautifier = TOMLBeautifier(args.locales_dir)
if args.language:
target_file = Path(args.locales_dir) / args.language / "translation.json"
target_file = Path(args.locales_dir) / args.language / "translation.toml"
if not target_file.exists():
print(f"Error: Translation file not found for language: {args.language}")
sys.exit(1)
@@ -225,7 +231,7 @@ def main():
print(f" Order preserved: {order_result['order_preserved']}")
print(f" Common keys: {order_result['common_keys_count']}/{order_result['golden_keys_count']}")
else:
result = beautifier.beautify_and_restructure(target_file, backup=not args.no_backup)
result = beautifier.beautify_and_restructure(target_file, backup=args.backup)
print(f"\nResults for {result['language']}:")
print(f" Keys preserved: {result['preserved_keys']}/{result['total_reference_keys']}")
if result['structure_match']['total_issues'] > 0:
@@ -237,13 +243,13 @@ def main():
results = []
for lang_dir in Path(args.locales_dir).iterdir():
if lang_dir.is_dir() and lang_dir.name != "en-GB":
translation_file = lang_dir / "translation.json"
translation_file = lang_dir / "translation.toml"
if translation_file.exists():
if args.validate_only:
order_result = beautifier.validate_key_order(translation_file)
print(f"{lang_dir.name}: Order preserved = {order_result['order_preserved']}")
else:
result = beautifier.beautify_and_restructure(translation_file, backup=not args.no_backup)
result = beautifier.beautify_and_restructure(translation_file, backup=args.backup)
results.append(result)
if not args.validate_only and results:
@@ -259,4 +265,4 @@ def main():
if __name__ == "__main__":
main()
main()

View File

@@ -0,0 +1,198 @@
#!/usr/bin/env python3
"""
TOML Validator for Translation Files
Validates TOML syntax in translation files and reports detailed error information.
Useful for validating translation files before merging.
Usage:
python3 toml_validator.py <file_or_pattern>
python3 toml_validator.py ar_AR_batch_*.toml
python3 toml_validator.py ar_AR_batch_1_of_3.toml
python3 toml_validator.py --all-batches ar_AR
"""
import sys
import argparse
import glob
from pathlib import Path
import tomllib
def get_line_context(file_path, line_num, context_lines=3):
"""Get lines around the error for context"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
start = max(0, line_num - context_lines - 1)
end = min(len(lines), line_num + context_lines)
context = []
for i in range(start, end):
marker = ">>> " if i == line_num - 1 else " "
context.append(f"{marker}{i+1:4d}: {lines[i].rstrip()}")
return "\n".join(context)
except Exception as e:
return f"Could not read context: {e}"
def get_character_context(file_path, char_pos, context_chars=100):
"""Get characters around the error position"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
start = max(0, char_pos - context_chars)
end = min(len(content), char_pos + context_chars)
before = content[start:char_pos]
error_char = content[char_pos] if char_pos < len(content) else "EOF"
after = content[char_pos+1:end]
return {
'before': before,
'error_char': error_char,
'after': after,
'display': f"{before}[{error_char}]{after}"
}
except Exception as e:
return None
def count_keys(data, prefix=''):
"""Recursively count all keys in nested TOML structure"""
count = 0
if isinstance(data, dict):
for key, value in data.items():
if isinstance(value, dict):
count += count_keys(value, f"{prefix}.{key}" if prefix else key)
else:
count += 1
return count
def validate_toml_file(file_path):
"""Validate a single TOML file and return detailed error info"""
result = {
'file': str(file_path),
'valid': False,
'error': None,
'line': None,
'context': None,
'entry_count': 0
}
try:
with open(file_path, 'rb') as f:
data = tomllib.load(f)
result['valid'] = True
result['entry_count'] = count_keys(data)
except Exception as e:
error_msg = str(e)
result['error'] = error_msg
# Try to extract line number from error message
import re
line_match = re.search(r'line (\d+)', error_msg, re.IGNORECASE)
if line_match:
line_num = int(line_match.group(1))
result['line'] = line_num
result['context'] = get_line_context(file_path, line_num)
except FileNotFoundError:
result['error'] = "File not found"
return result
def print_validation_result(result, brief=False, quiet=False):
"""Print validation result in human-readable format"""
if result['valid']:
if not quiet:
print(f"{result['file']}")
if not brief:
print(f" Valid TOML with {result['entry_count']} entries")
else:
print(f"{result['file']}")
print(f" Error: {result['error']}")
if result['line']:
print(f" Line: {result['line']}")
if result['context'] and not brief:
print(f"\n Context:")
print(f" {result['context'].replace(chr(10), chr(10) + ' ')}")
if not brief:
print(f"\n Common fixes:")
print(f" - Check for missing quotes around keys or values")
print(f" - Ensure proper escaping of special characters")
print(f" - Verify table header syntax: [section.subsection]")
print(f" - Check for duplicate keys in the same table")
def main():
parser = argparse.ArgumentParser(description='Validate TOML translation files')
parser.add_argument('files', nargs='*', help='TOML file(s) or pattern to validate')
parser.add_argument('--all-batches', metavar='LANG',
help='Validate all batch files for a language (e.g., ar_AR)')
parser.add_argument('--brief', action='store_true',
help='Show brief output without context')
parser.add_argument('--quiet', action='store_true',
help='Only show files with errors')
args = parser.parse_args()
# Collect files to validate
files_to_validate = []
if args.all_batches:
# Find all batch files for the specified language
pattern = f"{args.all_batches}_batch_*.toml"
files_to_validate = glob.glob(pattern)
if not files_to_validate:
print(f"No batch files found matching pattern: {pattern}")
sys.exit(1)
elif args.files:
for file_pattern in args.files:
matched_files = glob.glob(file_pattern)
if matched_files:
files_to_validate.extend(matched_files)
else:
# Try as literal filename
files_to_validate.append(file_pattern)
else:
parser.print_help()
sys.exit(1)
# Validate all files
results = []
for file_path in files_to_validate:
result = validate_toml_file(file_path)
results.append(result)
print_validation_result(result, brief=args.brief, quiet=args.quiet)
if not args.brief and not args.quiet:
print() # Empty line between files
# Summary
total = len(results)
valid = sum(1 for r in results if r['valid'])
invalid = total - valid
if not args.quiet:
print(f"\n{'='*60}")
print(f"Summary: {valid}/{total} files valid")
if invalid > 0:
print(f" {invalid} file(s) with errors")
# Exit with error code if any files invalid
sys.exit(0 if invalid == 0 else 1)
if __name__ == '__main__':
main()

View File

@@ -10,35 +10,27 @@ import sys
from pathlib import Path
from typing import Dict, List, Set, Tuple
import argparse
try:
import tomllib # Python 3.11+
except ImportError:
try:
import toml as tomllib_fallback
tomllib = None
except ImportError:
tomllib = None
tomllib_fallback = None
import tomllib
class TranslationAnalyzer:
def __init__(self, locales_dir: str = "frontend/public/locales", ignore_file: str = "scripts/ignore_translation.toml"):
self.locales_dir = Path(locales_dir)
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.json"
self.golden_truth = self._load_json(self.golden_truth_file)
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.toml"
self.golden_truth = self._load_translation_file(self.golden_truth_file)
self.ignore_file = Path(ignore_file)
self.ignore_patterns = self._load_ignore_patterns()
def _load_json(self, file_path: Path) -> Dict:
"""Load JSON file with error handling."""
def _load_translation_file(self, file_path: Path) -> Dict:
"""Load TOML translation file with error handling."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
return json.load(f)
with open(file_path, 'rb') as f:
return tomllib.load(f)
except FileNotFoundError:
print(f"Error: File not found: {file_path}")
sys.exit(1)
except json.JSONDecodeError as e:
print(f"Error: Invalid JSON in {file_path}: {e}")
except Exception as e:
print(f"Error: Invalid file {file_path}: {e}")
sys.exit(1)
def _load_ignore_patterns(self) -> Dict[str, Set[str]]:
@@ -47,16 +39,8 @@ class TranslationAnalyzer:
return {}
try:
if tomllib:
# Use Python 3.11+ built-in
with open(self.ignore_file, 'rb') as f:
ignore_data = tomllib.load(f)
elif tomllib_fallback:
# Use toml library fallback
ignore_data = tomllib_fallback.load(self.ignore_file)
else:
# Simple parser as fallback
ignore_data = self._parse_simple_toml()
with open(self.ignore_file, 'rb') as f:
ignore_data = tomllib.load(f)
# Convert lists to sets for faster lookup
return {lang: set(patterns) for lang, data in ignore_data.items()
@@ -65,31 +49,6 @@ class TranslationAnalyzer:
print(f"Warning: Could not load ignore file {self.ignore_file}: {e}")
return {}
def _parse_simple_toml(self) -> Dict:
"""Simple TOML parser for ignore patterns (fallback)."""
ignore_data = {}
current_section = None
with open(self.ignore_file, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if not line or line.startswith('#'):
continue
if line.startswith('[') and line.endswith(']'):
current_section = line[1:-1]
ignore_data[current_section] = {'ignore': []}
elif line.startswith('ignore = [') and current_section:
# Handle ignore array
continue
elif line.strip().startswith("'") and current_section:
# Extract quoted items
item = line.strip().strip("',")
if item:
ignore_data[current_section]['ignore'].append(item)
return ignore_data
def _flatten_dict(self, d: Dict, parent_key: str = '', separator: str = '.') -> Dict[str, str]:
"""Flatten nested dictionary into dot-notation keys."""
items = []
@@ -102,18 +61,18 @@ class TranslationAnalyzer:
return dict(items)
def get_all_language_files(self) -> List[Path]:
"""Get all translation.json files except en-GB."""
"""Get all translation files except en-GB."""
files = []
for lang_dir in self.locales_dir.iterdir():
if lang_dir.is_dir() and lang_dir.name != "en-GB":
translation_file = lang_dir / "translation.json"
if translation_file.exists():
files.append(translation_file)
toml_file = lang_dir / "translation.toml"
if toml_file.exists():
files.append(toml_file)
return sorted(files)
def find_missing_translations(self, target_file: Path) -> Set[str]:
"""Find keys that exist in en-GB but missing in target file."""
target_data = self._load_json(target_file)
target_data = self._load_translation_file(target_file)
golden_flat = self._flatten_dict(self.golden_truth)
target_flat = self._flatten_dict(target_data)
@@ -127,7 +86,7 @@ class TranslationAnalyzer:
def find_untranslated_entries(self, target_file: Path) -> Set[str]:
"""Find entries that appear to be untranslated (identical to en-GB)."""
target_data = self._load_json(target_file)
target_data = self._load_translation_file(target_file)
golden_flat = self._flatten_dict(self.golden_truth)
target_flat = self._flatten_dict(target_data)
@@ -170,7 +129,7 @@ class TranslationAnalyzer:
def find_extra_translations(self, target_file: Path) -> Set[str]:
"""Find keys that exist in target file but not in en-GB."""
target_data = self._load_json(target_file)
target_data = self._load_translation_file(target_file)
golden_flat = self._flatten_dict(self.golden_truth)
target_flat = self._flatten_dict(target_data)
@@ -185,7 +144,7 @@ class TranslationAnalyzer:
untranslated = self.find_untranslated_entries(target_file)
extra = self.find_extra_translations(target_file)
target_data = self._load_json(target_file)
target_data = self._load_translation_file(target_file)
golden_flat = self._flatten_dict(self.golden_truth)
target_flat = self._flatten_dict(target_data)
@@ -249,8 +208,12 @@ def main():
analyzer = TranslationAnalyzer(args.locales_dir, args.ignore_file)
if args.language:
target_file = Path(args.locales_dir) / args.language / "translation.json"
if not target_file.exists():
lang_dir = Path(args.locales_dir) / args.language
toml_file = lang_dir / "translation.toml"
if toml_file.exists():
target_file = toml_file
else:
print(f"Error: Translation file not found for language: {args.language}")
sys.exit(1)
results = [analyzer.analyze_file(target_file)]

View File

@@ -3,6 +3,7 @@
Translation Merger for Stirling PDF Frontend
Merges missing translations from en-GB into target language files.
Useful for AI-assisted translation workflows.
TOML format only.
"""
import json
@@ -14,46 +15,39 @@ import argparse
import shutil
from datetime import datetime
try:
import tomllib # Python 3.11+
except ImportError:
try:
import toml as tomllib_fallback
tomllib = None
except ImportError:
tomllib = None
tomllib_fallback = None
import tomllib
import tomli_w
class TranslationMerger:
def __init__(self, locales_dir: str = "frontend/public/locales", ignore_file: str = "scripts/ignore_translation.toml"):
self.locales_dir = Path(locales_dir)
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.json"
self.golden_truth = self._load_json(self.golden_truth_file)
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.toml"
self.golden_truth = self._load_translation_file(self.golden_truth_file)
self.ignore_file = Path(ignore_file)
self.ignore_patterns = self._load_ignore_patterns()
def _load_json(self, file_path: Path) -> Dict:
"""Load JSON file with error handling."""
def _load_translation_file(self, file_path: Path) -> Dict:
"""Load TOML translation file."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
return json.load(f)
with open(file_path, 'rb') as f:
return tomllib.load(f)
except FileNotFoundError:
print(f"Error: File not found: {file_path}")
sys.exit(1)
except json.JSONDecodeError as e:
print(f"Error: Invalid JSON in {file_path}: {e}")
except Exception as e:
print(f"Error: Invalid file {file_path}: {e}")
sys.exit(1)
def _save_json(self, data: Dict, file_path: Path, backup: bool = True) -> None:
"""Save JSON file with backup option."""
def _save_translation_file(self, data: Dict, file_path: Path, backup: bool = False) -> None:
"""Save TOML translation file with backup option."""
if backup and file_path.exists():
backup_path = file_path.with_suffix(f'.backup.{datetime.now().strftime("%Y%m%d_%H%M%S")}.json')
backup_path = file_path.with_suffix(f'.backup.{datetime.now().strftime("%Y%m%d_%H%M%S")}.toml')
shutil.copy2(file_path, backup_path)
print(f"Backup created: {backup_path}")
with open(file_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
with open(file_path, 'wb') as f:
tomli_w.dump(data, f)
def _load_ignore_patterns(self) -> Dict[str, Set[str]]:
"""Load ignore patterns from TOML file."""
@@ -61,26 +55,11 @@ class TranslationMerger:
return {}
try:
# Simple parser for ignore patterns
ignore_data = {}
current_section = None
with open(self.ignore_file, 'rb') as f:
ignore_data = tomllib.load(f)
with open(self.ignore_file, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if not line or line.startswith('#'):
continue
if line.startswith('[') and line.endswith(']'):
current_section = line[1:-1]
ignore_data[current_section] = set()
elif line.strip().startswith("'") and current_section:
# Extract quoted items
item = line.strip().strip("',")
if item:
ignore_data[current_section].add(item)
return ignore_data
# Convert to sets for faster lookup
return {lang: set(data.get('ignore', [])) for lang, data in ignore_data.items()}
except Exception as e:
print(f"Warning: Could not load ignore file {self.ignore_file}: {e}")
return {}
@@ -131,7 +110,7 @@ class TranslationMerger:
golden_keys = set(self._flatten_dict(self.golden_truth).keys())
return sorted(golden_keys - ignore_set)
target_data = self._load_json(target_file)
target_data = self._load_translation_file(target_file)
golden_flat = self._flatten_dict(self.golden_truth)
target_flat = self._flatten_dict(target_data)
@@ -144,7 +123,7 @@ class TranslationMerger:
if not target_file.exists():
target_data = {}
else:
target_data = self._load_json(target_file)
target_data = self._load_translation_file(target_file)
golden_flat = self._flatten_dict(self.golden_truth)
missing_keys = keys_to_add or self.get_missing_keys(target_file)
@@ -172,7 +151,7 @@ class TranslationMerger:
print(f"Error: Target file does not exist: {target_file}")
return {}
target_data = self._load_json(target_file)
target_data = self._load_translation_file(target_file)
golden_flat = self._flatten_dict(self.golden_truth)
target_flat = self._flatten_dict(target_data)
@@ -219,13 +198,13 @@ class TranslationMerger:
return False
def apply_translations(self, target_file: Path, translations: Dict[str, str],
backup: bool = True) -> Dict:
backup: bool = False) -> Dict:
"""Apply provided translations to target file."""
if not target_file.exists():
print(f"Error: Target file does not exist: {target_file}")
return {'success': False, 'error': 'File not found'}
target_data = self._load_json(target_file)
target_data = self._load_translation_file(target_file)
applied_count = 0
errors = []
@@ -241,7 +220,7 @@ class TranslationMerger:
errors.append(f"Error setting {key}: {e}")
if applied_count > 0:
self._save_json(target_data, target_file, backup)
self._save_translation_file(target_data, target_file, backup)
return {
'success': True,
@@ -288,7 +267,10 @@ class TranslationMerger:
def main():
parser = argparse.ArgumentParser(description='Merge and manage translation files')
parser = argparse.ArgumentParser(
description='Merge and manage translation files',
epilog='Works with TOML translation files.'
)
parser.add_argument('--locales-dir', default='frontend/public/locales',
help='Path to locales directory')
parser.add_argument('--ignore-file', default='scripts/ignore_translation.toml',
@@ -299,7 +281,7 @@ def main():
# Add missing command
add_parser = subparsers.add_parser('add-missing', help='Add missing translations from en-GB')
add_parser.add_argument('--no-backup', action='store_true', help='Skip backup creation')
add_parser.add_argument('--backup', action='store_true', help='Create backup before modifying files')
add_parser.add_argument('--mark-untranslated', action='store_true', default=True,
help='Mark added translations as [UNTRANSLATED]')
@@ -314,7 +296,7 @@ def main():
# Apply translations command
apply_parser = subparsers.add_parser('apply-translations', help='Apply translations from JSON file')
apply_parser.add_argument('--translations-file', required=True, help='JSON file with translations')
apply_parser.add_argument('--no-backup', action='store_true', help='Skip backup creation')
apply_parser.add_argument('--backup', action='store_true', help='Create backup before modifying files')
args = parser.parse_args()
@@ -323,7 +305,10 @@ def main():
return
merger = TranslationMerger(args.locales_dir, args.ignore_file)
target_file = Path(args.locales_dir) / args.language / "translation.json"
# Find translation file
lang_dir = Path(args.locales_dir) / args.language
target_file = lang_dir / "translation.toml"
if args.command == 'add-missing':
print(f"Adding missing translations to {args.language}...")
@@ -332,7 +317,7 @@ def main():
mark_untranslated=args.mark_untranslated
)
merger._save_json(result['data'], target_file, backup=not args.no_backup)
merger._save_translation_file(result['data'], target_file, backup=args.backup)
print(f"Added {result['added_count']} missing translations")
elif args.command == 'extract-untranslated':
@@ -355,7 +340,7 @@ def main():
else:
translations = translations_data
result = merger.apply_translations(target_file, translations, backup=not args.no_backup)
result = merger.apply_translations(target_file, translations, backup=args.backup)
if result['success']:
print(f"Applied {result['applied_count']} translations")

View File

@@ -1,9 +1,9 @@
#!/usr/bin/env python3
"""
Validate JSON structure and formatting of translation files.
Validate TOML structure and formatting of translation files.
Checks for:
- Valid JSON syntax
- Valid TOML syntax
- Consistent key structure with en-GB
- Missing keys
- Extra keys not in en-GB
@@ -18,6 +18,7 @@ import sys
from pathlib import Path
from typing import Dict, List, Set
import argparse
import tomllib # Python 3.11+ (stdlib)
def get_all_keys(d: dict, parent_key: str = '', sep: str = '.') -> Set[str]:
@@ -31,14 +32,12 @@ def get_all_keys(d: dict, parent_key: str = '', sep: str = '.') -> Set[str]:
return keys
def validate_json_file(file_path: Path) -> tuple[bool, str]:
"""Validate that a file contains valid JSON."""
def validate_translation_file(file_path: Path) -> tuple[bool, str]:
"""Validate that a file contains valid TOML."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
json.load(f)
return True, "Valid JSON"
except json.JSONDecodeError as e:
return False, f"Invalid JSON at line {e.lineno}, column {e.colno}: {e.msg}"
with open(file_path, 'rb') as f:
tomllib.load(f)
return True, "Valid TOML"
except Exception as e:
return False, f"Error reading file: {str(e)}"
@@ -101,9 +100,15 @@ def print_validation_result(result: Dict, verbose: bool = False):
print("-" * 100)
def load_translation_file(file_path: Path) -> dict:
"""Load TOML translation file."""
with open(file_path, 'rb') as f:
return tomllib.load(f)
def main():
parser = argparse.ArgumentParser(
description='Validate translation JSON structure'
description='Validate translation TOML structure'
)
parser.add_argument(
'--language',
@@ -125,21 +130,21 @@ def main():
# Define paths
locales_dir = Path('frontend/public/locales')
en_gb_path = locales_dir / 'en-GB' / 'translation.json'
en_gb_path = locales_dir / 'en-GB' / 'translation.toml'
file_ext = '.toml'
if not en_gb_path.exists():
print(f"❌ Error: en-GB translation file not found at {en_gb_path}")
sys.exit(1)
# Validate en-GB itself
is_valid, message = validate_json_file(en_gb_path)
is_valid, message = validate_translation_file(en_gb_path)
if not is_valid:
print(f"❌ Error in en-GB file: {message}")
sys.exit(1)
# Load en-GB structure
with open(en_gb_path, 'r', encoding='utf-8') as f:
en_gb = json.load(f)
en_gb = load_translation_file(en_gb_path)
en_gb_keys = get_all_keys(en_gb)
@@ -147,24 +152,26 @@ def main():
if args.language:
languages = [args.language]
else:
languages = [
d.name for d in locales_dir.iterdir()
if d.is_dir() and d.name != 'en-GB' and (d / 'translation.json').exists()
]
# Validate all languages except en-GB
languages = []
for d in locales_dir.iterdir():
if d.is_dir() and d.name != 'en-GB':
if (d / 'translation.toml').exists():
languages.append(d.name)
results = []
json_errors = []
# Validate each language
for lang_code in sorted(languages):
lang_path = locales_dir / lang_code / 'translation.json'
lang_path = locales_dir / lang_code / 'translation.toml'
if not lang_path.exists():
print(f"⚠️ Warning: {lang_code}/translation.json not found, skipping")
print(f"⚠️ Warning: {lang_code}/translation.toml not found, skipping")
continue
# First check if JSON is valid
is_valid, message = validate_json_file(lang_path)
# First check if file is valid
is_valid, message = validate_translation_file(lang_path)
if not is_valid:
json_errors.append({
'language': lang_code,
@@ -174,8 +181,7 @@ def main():
continue
# Load and compare structure
with open(lang_path, 'r', encoding='utf-8') as f:
lang_data = json.load(f)
lang_data = load_translation_file(lang_path)
lang_keys = get_all_keys(lang_data)
result = validate_structure(en_gb_keys, lang_keys, lang_code)
@@ -189,9 +195,9 @@ def main():
}
print(json.dumps(output, indent=2, ensure_ascii=False))
else:
# Print JSON errors first
# Print syntax errors first
if json_errors:
print("\n JSON Syntax Errors:")
print("\n❌ Syntax Errors:")
print("=" * 100)
for error in json_errors:
print(f"\nLanguage: {error['language']}")

View File

@@ -15,6 +15,7 @@ import sys
from pathlib import Path
from typing import Dict, List, Set, Tuple
import argparse
import tomllib # Python 3.11+ (stdlib)
def find_placeholders(text: str) -> Set[str]:
@@ -117,15 +118,16 @@ def main():
# Define paths
locales_dir = Path('frontend/public/locales')
en_gb_path = locales_dir / 'en-GB' / 'translation.json'
en_gb_path = locales_dir / 'en-GB' / 'translation.toml'
file_ext = '.toml'
if not en_gb_path.exists():
print(f"❌ Error: en-GB translation file not found at {en_gb_path}")
sys.exit(1)
# Load en-GB (source of truth)
with open(en_gb_path, 'r', encoding='utf-8') as f:
en_gb = json.load(f)
with open(en_gb_path, 'rb') as f:
en_gb = tomllib.load(f)
en_gb_flat = flatten_dict(en_gb)
@@ -134,23 +136,25 @@ def main():
languages = [args.language]
else:
# Validate all languages except en-GB
languages = [
d.name for d in locales_dir.iterdir()
if d.is_dir() and d.name != 'en-GB' and (d / 'translation.json').exists()
]
languages = []
for d in locales_dir.iterdir():
if d.is_dir() and d.name != 'en-GB':
if (d / 'translation.toml').exists():
languages.append(d.name)
all_issues = []
# Validate each language
for lang_code in sorted(languages):
lang_path = locales_dir / lang_code / 'translation.json'
lang_path = locales_dir / lang_code / 'translation.toml'
if not lang_path.exists():
print(f"⚠️ Warning: {lang_code}/translation.json not found, skipping")
print(f"⚠️ Warning: {lang_code}/translation.toml not found, skipping")
continue
with open(lang_path, 'r', encoding='utf-8') as f:
lang_data = json.load(f)
# Load language file
with open(lang_path, 'rb') as f:
lang_data = tomllib.load(f)
lang_flat = flatten_dict(lang_data)
issues = validate_language(en_gb_flat, lang_flat, lang_code)