Stirling-PDF/scripts/translations/validate_json_structure.py
Anthony Stirling 0fa53185f2
Add translations for ar-AR, de-DE, fr-FR, it-IT, pt-BR, ru-RU and ena… (#4572)
…ble in frontend

- Updated ar-AR (Arabic) to 98.7% completion (1088 entries)
- Updated fr-FR (French) to 97.3% completion (1296 entries)
- Updated pt-BR (Portuguese Brazil) to 98.6% completion (1294 entries)
- Updated ru-RU (Russian) to 98.1% completion (1277 entries)
- Updated ja-JP (Japanese) to 73.4% completion (796 entries, batches
1-2)
- Updated es-ES minor corrections
- Enabled 8 languages with >90% completion in LanguageSelector
- Added JSON validation scripts for translation quality assurance
- RTL support already enabled for ar-AR

Enabled languages: en-GB, ar-AR, de-DE, es-ES, fr-FR, it-IT, pt-BR,
ru-RU, zh-CN

🤖 Generated with [Claude Code](https://claude.com/claude-code)

# Description of Changes

<!--
Please provide a summary of the changes, including:

- What was changed
- Why the change was made
- Any challenges encountered

Closes #(issue_number)
-->

---

## Checklist

### General

- [ ] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [ ] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [ ] I have performed a self-review of my own code
- [ ] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.

Co-authored-by: Claude <noreply@anthropic.com>
2025-10-01 12:58:41 +01:00

230 lines
7.1 KiB
Python

#!/usr/bin/env python3
"""
Validate JSON structure and formatting of translation files.
Checks for:
- Valid JSON syntax
- Consistent key structure with en-GB
- Missing keys
- Extra keys not in en-GB
- Malformed entries
Usage:
python scripts/translations/validate_json_structure.py [--language LANG]
"""
import json
import sys
from pathlib import Path
from typing import Dict, List, Set
import argparse
def get_all_keys(d: dict, parent_key: str = '', sep: str = '.') -> Set[str]:
"""Get all keys from nested dict as dot-notation paths."""
keys = set()
for k, v in d.items():
new_key = f"{parent_key}{sep}{k}" if parent_key else k
keys.add(new_key)
if isinstance(v, dict):
keys.update(get_all_keys(v, new_key, sep=sep))
return keys
def validate_json_file(file_path: Path) -> tuple[bool, str]:
"""Validate that a file contains valid JSON."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
json.load(f)
return True, "Valid JSON"
except json.JSONDecodeError as e:
return False, f"Invalid JSON at line {e.lineno}, column {e.colno}: {e.msg}"
except Exception as e:
return False, f"Error reading file: {str(e)}"
def validate_structure(
en_gb_keys: Set[str],
lang_keys: Set[str],
lang_code: str
) -> Dict:
"""Compare structure between en-GB and target language."""
missing_keys = en_gb_keys - lang_keys
extra_keys = lang_keys - en_gb_keys
return {
'language': lang_code,
'missing_keys': sorted(missing_keys),
'extra_keys': sorted(extra_keys),
'total_keys': len(lang_keys),
'expected_keys': len(en_gb_keys),
'missing_count': len(missing_keys),
'extra_count': len(extra_keys)
}
def print_validation_result(result: Dict, verbose: bool = False):
"""Print validation results in readable format."""
lang = result['language']
print(f"\n{'='*100}")
print(f"Language: {lang}")
print(f"{'='*100}")
print(f" Total keys: {result['total_keys']}")
print(f" Expected keys (en-GB): {result['expected_keys']}")
print(f" Missing keys: {result['missing_count']}")
print(f" Extra keys: {result['extra_count']}")
if result['missing_count'] == 0 and result['extra_count'] == 0:
print(f" ✅ Structure matches en-GB perfectly!")
else:
if result['missing_count'] > 0:
print(f"\n ⚠️ Missing {result['missing_count']} key(s):")
if verbose or result['missing_count'] <= 20:
for key in result['missing_keys'][:50]:
print(f" - {key}")
if result['missing_count'] > 50:
print(f" ... and {result['missing_count'] - 50} more")
else:
print(f" (use --verbose to see all)")
if result['extra_count'] > 0:
print(f"\n ⚠️ Extra {result['extra_count']} key(s) not in en-GB:")
if verbose or result['extra_count'] <= 20:
for key in result['extra_keys'][:50]:
print(f" - {key}")
if result['extra_count'] > 50:
print(f" ... and {result['extra_count'] - 50} more")
else:
print(f" (use --verbose to see all)")
print("-" * 100)
def main():
parser = argparse.ArgumentParser(
description='Validate translation JSON structure'
)
parser.add_argument(
'--language',
help='Specific language code to validate (e.g., es-ES)',
default=None
)
parser.add_argument(
'--verbose', '-v',
action='store_true',
help='Show all missing/extra keys'
)
parser.add_argument(
'--json',
action='store_true',
help='Output results as JSON'
)
args = parser.parse_args()
# Define paths
locales_dir = Path('frontend/public/locales')
en_gb_path = locales_dir / 'en-GB' / 'translation.json'
if not en_gb_path.exists():
print(f"❌ Error: en-GB translation file not found at {en_gb_path}")
sys.exit(1)
# Validate en-GB itself
is_valid, message = validate_json_file(en_gb_path)
if not is_valid:
print(f"❌ Error in en-GB file: {message}")
sys.exit(1)
# Load en-GB structure
with open(en_gb_path, 'r', encoding='utf-8') as f:
en_gb = json.load(f)
en_gb_keys = get_all_keys(en_gb)
# Get list of languages to validate
if args.language:
languages = [args.language]
else:
languages = [
d.name for d in locales_dir.iterdir()
if d.is_dir() and d.name != 'en-GB' and (d / 'translation.json').exists()
]
results = []
json_errors = []
# Validate each language
for lang_code in sorted(languages):
lang_path = locales_dir / lang_code / 'translation.json'
if not lang_path.exists():
print(f"⚠️ Warning: {lang_code}/translation.json not found, skipping")
continue
# First check if JSON is valid
is_valid, message = validate_json_file(lang_path)
if not is_valid:
json_errors.append({
'language': lang_code,
'file': str(lang_path),
'error': message
})
continue
# Load and compare structure
with open(lang_path, 'r', encoding='utf-8') as f:
lang_data = json.load(f)
lang_keys = get_all_keys(lang_data)
result = validate_structure(en_gb_keys, lang_keys, lang_code)
results.append(result)
# Output results
if args.json:
output = {
'json_errors': json_errors,
'structure_validation': results
}
print(json.dumps(output, indent=2, ensure_ascii=False))
else:
# Print JSON errors first
if json_errors:
print("\n❌ JSON Syntax Errors:")
print("=" * 100)
for error in json_errors:
print(f"\nLanguage: {error['language']}")
print(f"File: {error['file']}")
print(f"Error: {error['error']}")
print("\n")
# Print structure validation results
if results:
print("\n📊 Structure Validation Summary:")
print(f" Languages validated: {len(results)}")
perfect = sum(1 for r in results if r['missing_count'] == 0 and r['extra_count'] == 0)
print(f" Perfect matches: {perfect}/{len(results)}")
total_missing = sum(r['missing_count'] for r in results)
total_extra = sum(r['extra_count'] for r in results)
print(f" Total missing keys: {total_missing}")
print(f" Total extra keys: {total_extra}")
for result in results:
print_validation_result(result, verbose=args.verbose)
if not json_errors and perfect == len(results):
print("\n✅ All translations have perfect structure!")
# Exit with error code if issues found
has_issues = len(json_errors) > 0 or any(
r['missing_count'] > 0 or r['extra_count'] > 0 for r in results
)
sys.exit(1 if has_issues else 0)
if __name__ == '__main__':
main()