#!/usr/bin/env python3 """ Validate JSON structure and formatting of translation files. Checks for: - Valid JSON syntax - Consistent key structure with en-GB - Missing keys - Extra keys not in en-GB - Malformed entries Usage: python scripts/translations/validate_json_structure.py [--language LANG] """ import json import sys from pathlib import Path from typing import Dict, List, Set import argparse def get_all_keys(d: dict, parent_key: str = '', sep: str = '.') -> Set[str]: """Get all keys from nested dict as dot-notation paths.""" keys = set() for k, v in d.items(): new_key = f"{parent_key}{sep}{k}" if parent_key else k keys.add(new_key) if isinstance(v, dict): keys.update(get_all_keys(v, new_key, sep=sep)) return keys def validate_json_file(file_path: Path) -> tuple[bool, str]: """Validate that a file contains valid JSON.""" try: with open(file_path, 'r', encoding='utf-8') as f: json.load(f) return True, "Valid JSON" except json.JSONDecodeError as e: return False, f"Invalid JSON at line {e.lineno}, column {e.colno}: {e.msg}" except Exception as e: return False, f"Error reading file: {str(e)}" def validate_structure( en_gb_keys: Set[str], lang_keys: Set[str], lang_code: str ) -> Dict: """Compare structure between en-GB and target language.""" missing_keys = en_gb_keys - lang_keys extra_keys = lang_keys - en_gb_keys return { 'language': lang_code, 'missing_keys': sorted(missing_keys), 'extra_keys': sorted(extra_keys), 'total_keys': len(lang_keys), 'expected_keys': len(en_gb_keys), 'missing_count': len(missing_keys), 'extra_count': len(extra_keys) } def print_validation_result(result: Dict, verbose: bool = False): """Print validation results in readable format.""" lang = result['language'] print(f"\n{'='*100}") print(f"Language: {lang}") print(f"{'='*100}") print(f" Total keys: {result['total_keys']}") print(f" Expected keys (en-GB): {result['expected_keys']}") print(f" Missing keys: {result['missing_count']}") print(f" Extra keys: {result['extra_count']}") if result['missing_count'] == 0 and result['extra_count'] == 0: print(f" ✅ Structure matches en-GB perfectly!") else: if result['missing_count'] > 0: print(f"\n ⚠️ Missing {result['missing_count']} key(s):") if verbose or result['missing_count'] <= 20: for key in result['missing_keys'][:50]: print(f" - {key}") if result['missing_count'] > 50: print(f" ... and {result['missing_count'] - 50} more") else: print(f" (use --verbose to see all)") if result['extra_count'] > 0: print(f"\n ⚠️ Extra {result['extra_count']} key(s) not in en-GB:") if verbose or result['extra_count'] <= 20: for key in result['extra_keys'][:50]: print(f" - {key}") if result['extra_count'] > 50: print(f" ... and {result['extra_count'] - 50} more") else: print(f" (use --verbose to see all)") print("-" * 100) def main(): parser = argparse.ArgumentParser( description='Validate translation JSON structure' ) parser.add_argument( '--language', help='Specific language code to validate (e.g., es-ES)', default=None ) parser.add_argument( '--verbose', '-v', action='store_true', help='Show all missing/extra keys' ) parser.add_argument( '--json', action='store_true', help='Output results as JSON' ) args = parser.parse_args() # Define paths locales_dir = Path('frontend/public/locales') en_gb_path = locales_dir / 'en-GB' / 'translation.json' if not en_gb_path.exists(): print(f"❌ Error: en-GB translation file not found at {en_gb_path}") sys.exit(1) # Validate en-GB itself is_valid, message = validate_json_file(en_gb_path) if not is_valid: print(f"❌ Error in en-GB file: {message}") sys.exit(1) # Load en-GB structure with open(en_gb_path, 'r', encoding='utf-8') as f: en_gb = json.load(f) en_gb_keys = get_all_keys(en_gb) # Get list of languages to validate if args.language: languages = [args.language] else: languages = [ d.name for d in locales_dir.iterdir() if d.is_dir() and d.name != 'en-GB' and (d / 'translation.json').exists() ] results = [] json_errors = [] # Validate each language for lang_code in sorted(languages): lang_path = locales_dir / lang_code / 'translation.json' if not lang_path.exists(): print(f"⚠️ Warning: {lang_code}/translation.json not found, skipping") continue # First check if JSON is valid is_valid, message = validate_json_file(lang_path) if not is_valid: json_errors.append({ 'language': lang_code, 'file': str(lang_path), 'error': message }) continue # Load and compare structure with open(lang_path, 'r', encoding='utf-8') as f: lang_data = json.load(f) lang_keys = get_all_keys(lang_data) result = validate_structure(en_gb_keys, lang_keys, lang_code) results.append(result) # Output results if args.json: output = { 'json_errors': json_errors, 'structure_validation': results } print(json.dumps(output, indent=2, ensure_ascii=False)) else: # Print JSON errors first if json_errors: print("\n❌ JSON Syntax Errors:") print("=" * 100) for error in json_errors: print(f"\nLanguage: {error['language']}") print(f"File: {error['file']}") print(f"Error: {error['error']}") print("\n") # Print structure validation results if results: print("\n📊 Structure Validation Summary:") print(f" Languages validated: {len(results)}") perfect = sum(1 for r in results if r['missing_count'] == 0 and r['extra_count'] == 0) print(f" Perfect matches: {perfect}/{len(results)}") total_missing = sum(r['missing_count'] for r in results) total_extra = sum(r['extra_count'] for r in results) print(f" Total missing keys: {total_missing}") print(f" Total extra keys: {total_extra}") for result in results: print_validation_result(result, verbose=args.verbose) if not json_errors and perfect == len(results): print("\n✅ All translations have perfect structure!") # Exit with error code if issues found has_issues = len(json_errors) > 0 or any( r['missing_count'] > 0 or r['extra_count'] > 0 for r in results ) sys.exit(1 if has_issues else 0) if __name__ == '__main__': main()