mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-12-18 20:04:17 +01:00
223 lines
7.2 KiB
Python
223 lines
7.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Validate TOML structure and formatting of translation files.
|
|
|
|
Checks for:
|
|
- Valid TOML syntax
|
|
- Consistent key structure with en-GB
|
|
- Missing keys
|
|
- Extra keys not in en-GB
|
|
- Malformed entries
|
|
|
|
Usage:
|
|
python scripts/translations/validate_json_structure.py [--language LANG]
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Dict, Set
|
|
import argparse
|
|
import tomllib # Python 3.11+ (stdlib)
|
|
|
|
|
|
def get_all_keys(d: dict, parent_key: str = "", sep: str = ".") -> Set[str]:
|
|
"""Get all keys from nested dict as dot-notation paths."""
|
|
keys = set()
|
|
for k, v in d.items():
|
|
new_key = f"{parent_key}{sep}{k}" if parent_key else k
|
|
keys.add(new_key)
|
|
if isinstance(v, dict):
|
|
keys.update(get_all_keys(v, new_key, sep=sep))
|
|
return keys
|
|
|
|
|
|
def validate_translation_file(file_path: Path) -> tuple[bool, str]:
|
|
"""Validate that a file contains valid TOML."""
|
|
try:
|
|
with open(file_path, "rb") as f:
|
|
tomllib.load(f)
|
|
return True, "Valid TOML"
|
|
except Exception as e:
|
|
return False, f"Error reading file: {str(e)}"
|
|
|
|
|
|
def validate_structure(
|
|
en_gb_keys: Set[str], lang_keys: Set[str], lang_code: str
|
|
) -> Dict:
|
|
"""Compare structure between en-GB and target language."""
|
|
missing_keys = en_gb_keys - lang_keys
|
|
extra_keys = lang_keys - en_gb_keys
|
|
|
|
return {
|
|
"language": lang_code,
|
|
"missing_keys": sorted(missing_keys),
|
|
"extra_keys": sorted(extra_keys),
|
|
"total_keys": len(lang_keys),
|
|
"expected_keys": len(en_gb_keys),
|
|
"missing_count": len(missing_keys),
|
|
"extra_count": len(extra_keys),
|
|
}
|
|
|
|
|
|
def print_validation_result(result: Dict, verbose: bool = False):
|
|
"""Print validation results in readable format."""
|
|
lang = result["language"]
|
|
|
|
print(f"\n{'=' * 100}")
|
|
print(f"Language: {lang}")
|
|
print(f"{'=' * 100}")
|
|
print(f" Total keys: {result['total_keys']}")
|
|
print(f" Expected keys (en-GB): {result['expected_keys']}")
|
|
print(f" Missing keys: {result['missing_count']}")
|
|
print(f" Extra keys: {result['extra_count']}")
|
|
|
|
if result["missing_count"] == 0 and result["extra_count"] == 0:
|
|
print(" ✅ Structure matches en-GB perfectly!")
|
|
else:
|
|
if result["missing_count"] > 0:
|
|
print(f"\n ⚠️ Missing {result['missing_count']} key(s):")
|
|
if verbose or result["missing_count"] <= 20:
|
|
for key in result["missing_keys"][:50]:
|
|
print(f" - {key}")
|
|
if result["missing_count"] > 50:
|
|
print(f" ... and {result['missing_count'] - 50} more")
|
|
else:
|
|
print(" (use --verbose to see all)")
|
|
|
|
if result["extra_count"] > 0:
|
|
print(f"\n ⚠️ Extra {result['extra_count']} key(s) not in en-GB:")
|
|
if verbose or result["extra_count"] <= 20:
|
|
for key in result["extra_keys"][:50]:
|
|
print(f" - {key}")
|
|
if result["extra_count"] > 50:
|
|
print(f" ... and {result['extra_count'] - 50} more")
|
|
else:
|
|
print(" (use --verbose to see all)")
|
|
|
|
print("-" * 100)
|
|
|
|
|
|
def load_translation_file(file_path: Path) -> dict:
|
|
"""Load TOML translation file."""
|
|
with open(file_path, "rb") as f:
|
|
return tomllib.load(f)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Validate translation TOML structure")
|
|
parser.add_argument(
|
|
"--language",
|
|
help="Specific language code to validate (e.g., es-ES)",
|
|
default=None,
|
|
)
|
|
parser.add_argument(
|
|
"--verbose", "-v", action="store_true", help="Show all missing/extra keys"
|
|
)
|
|
parser.add_argument("--json", action="store_true", help="Output results as JSON")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Define paths
|
|
locales_dir = Path("frontend/public/locales")
|
|
en_gb_path = locales_dir / "en-GB" / "translation.toml"
|
|
file_ext = ".toml"
|
|
|
|
if not en_gb_path.exists():
|
|
print(f"❌ Error: en-GB translation file not found at {en_gb_path}")
|
|
sys.exit(1)
|
|
|
|
# Validate en-GB itself
|
|
is_valid, message = validate_translation_file(en_gb_path)
|
|
if not is_valid:
|
|
print(f"❌ Error in en-GB file: {message}")
|
|
sys.exit(1)
|
|
|
|
# Load en-GB structure
|
|
en_gb = load_translation_file(en_gb_path)
|
|
|
|
en_gb_keys = get_all_keys(en_gb)
|
|
|
|
# Get list of languages to validate
|
|
if args.language:
|
|
languages = [args.language]
|
|
else:
|
|
# Validate all languages except en-GB
|
|
languages = []
|
|
for d in locales_dir.iterdir():
|
|
if d.is_dir() and d.name != "en-GB":
|
|
if (d / "translation.toml").exists():
|
|
languages.append(d.name)
|
|
|
|
results = []
|
|
json_errors = []
|
|
|
|
# Validate each language
|
|
for lang_code in sorted(languages):
|
|
lang_path = locales_dir / lang_code / "translation.toml"
|
|
|
|
if not lang_path.exists():
|
|
print(f"⚠️ Warning: {lang_code}/translation.toml not found, skipping")
|
|
continue
|
|
|
|
# First check if file is valid
|
|
is_valid, message = validate_translation_file(lang_path)
|
|
if not is_valid:
|
|
json_errors.append(
|
|
{"language": lang_code, "file": str(lang_path), "error": message}
|
|
)
|
|
continue
|
|
|
|
# Load and compare structure
|
|
lang_data = load_translation_file(lang_path)
|
|
|
|
lang_keys = get_all_keys(lang_data)
|
|
result = validate_structure(en_gb_keys, lang_keys, lang_code)
|
|
results.append(result)
|
|
|
|
# Output results
|
|
if args.json:
|
|
output = {"json_errors": json_errors, "structure_validation": results}
|
|
print(json.dumps(output, indent=2, ensure_ascii=False))
|
|
else:
|
|
# Print syntax errors first
|
|
if json_errors:
|
|
print("\n❌ Syntax Errors:")
|
|
print("=" * 100)
|
|
for error in json_errors:
|
|
print(f"\nLanguage: {error['language']}")
|
|
print(f"File: {error['file']}")
|
|
print(f"Error: {error['error']}")
|
|
print("\n")
|
|
|
|
# Print structure validation results
|
|
if results:
|
|
print("\n📊 Structure Validation Summary:")
|
|
print(f" Languages validated: {len(results)}")
|
|
|
|
perfect = sum(
|
|
1 for r in results if r["missing_count"] == 0 and r["extra_count"] == 0
|
|
)
|
|
print(f" Perfect matches: {perfect}/{len(results)}")
|
|
|
|
total_missing = sum(r["missing_count"] for r in results)
|
|
total_extra = sum(r["extra_count"] for r in results)
|
|
print(f" Total missing keys: {total_missing}")
|
|
print(f" Total extra keys: {total_extra}")
|
|
|
|
for result in results:
|
|
print_validation_result(result, verbose=args.verbose)
|
|
|
|
if not json_errors and perfect == len(results):
|
|
print("\n✅ All translations have perfect structure!")
|
|
|
|
# Exit with error code if issues found
|
|
has_issues = len(json_errors) > 0 or any(
|
|
r["missing_count"] > 0 or r["extra_count"] > 0 for r in results
|
|
)
|
|
sys.exit(1 if has_issues else 0)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|