#!/usr/bin/env python3 """ JSON Beautifier and Structure Fixer for Stirling PDF Frontend Restructures translation JSON files to match en-GB structure and key order exactly. """ import json import os import sys from pathlib import Path from typing import Dict, Any, List import argparse from collections import OrderedDict class JSONBeautifier: def __init__(self, locales_dir: str = "frontend/public/locales"): self.locales_dir = Path(locales_dir) self.golden_truth_file = self.locales_dir / "en-GB" / "translation.json" self.golden_structure = self._load_json(self.golden_truth_file) def _load_json(self, file_path: Path) -> Dict: """Load JSON file with error handling.""" try: with open(file_path, 'r', encoding='utf-8') as f: return json.load(f, object_pairs_hook=OrderedDict) except FileNotFoundError: print(f"Error: File not found: {file_path}") sys.exit(1) except json.JSONDecodeError as e: print(f"Error: Invalid JSON in {file_path}: {e}") sys.exit(1) def _save_json(self, data: Dict, file_path: Path, backup: bool = True) -> None: """Save JSON file with proper formatting.""" if backup and file_path.exists(): backup_path = file_path.with_suffix(f'.backup.restructured.json') file_path.rename(backup_path) print(f"Backup created: {backup_path}") with open(file_path, 'w', encoding='utf-8') as f: json.dump(data, f, indent=2, ensure_ascii=False, separators=(',', ': ')) def _flatten_dict(self, d: Dict, parent_key: str = '', separator: str = '.') -> Dict[str, Any]: """Flatten nested dictionary into dot-notation keys.""" items = [] for k, v in d.items(): new_key = f"{parent_key}{separator}{k}" if parent_key else k if isinstance(v, dict): items.extend(self._flatten_dict(v, new_key, separator).items()) else: items.append((new_key, v)) return dict(items) def _rebuild_structure(self, flat_dict: Dict[str, Any], reference_structure: Dict) -> Dict: """Rebuild nested structure based on reference structure and available translations.""" def build_recursive(ref_obj: Any, current_path: str = '') -> Any: if isinstance(ref_obj, dict): result = OrderedDict() for key, value in ref_obj.items(): new_path = f"{current_path}.{key}" if current_path else key if new_path in flat_dict: # Direct translation exists if isinstance(value, dict): # If reference is dict but we have a string, use the string if isinstance(flat_dict[new_path], str): result[key] = flat_dict[new_path] else: # Recurse into nested structure result[key] = build_recursive(value, new_path) else: result[key] = flat_dict[new_path] else: # No direct translation, recurse to check for nested keys if isinstance(value, dict): nested_result = build_recursive(value, new_path) if nested_result: # Only add if we found some translations result[key] = nested_result # If no translation found and it's a leaf, skip it return result if result else None else: # Leaf node - return the translation if it exists return flat_dict.get(current_path, None) return build_recursive(reference_structure) or OrderedDict() def restructure_translation_file(self, target_file: Path) -> Dict[str, Any]: """Restructure a translation file to match en-GB structure exactly.""" if not target_file.exists(): print(f"Error: Target file does not exist: {target_file}") return {} # Load the target file target_data = self._load_json(target_file) # Flatten the target translations flat_target = self._flatten_dict(target_data) # Rebuild structure based on golden truth restructured = self._rebuild_structure(flat_target, self.golden_structure) return restructured def beautify_and_restructure(self, target_file: Path, backup: bool = True) -> Dict[str, Any]: """Main function to beautify and restructure a translation file.""" lang_code = target_file.parent.name print(f"Restructuring {lang_code} translation file...") # Get the restructured data restructured_data = self.restructure_translation_file(target_file) # Save the restructured file self._save_json(restructured_data, target_file, backup) # Analyze the results flat_golden = self._flatten_dict(self.golden_structure) flat_restructured = self._flatten_dict(restructured_data) total_keys = len(flat_golden) preserved_keys = len(flat_restructured) result = { 'language': lang_code, 'total_reference_keys': total_keys, 'preserved_keys': preserved_keys, 'structure_match': self._compare_structures(self.golden_structure, restructured_data) } print(f"Restructured {lang_code}: {preserved_keys}/{total_keys} keys preserved") return result def _compare_structures(self, ref: Dict, target: Dict) -> Dict[str, bool]: """Compare structures between reference and target.""" def compare_recursive(r: Any, t: Any, path: str = '') -> List[str]: issues = [] if isinstance(r, dict) and isinstance(t, dict): # Check for missing top-level sections ref_keys = set(r.keys()) target_keys = set(t.keys()) missing_sections = ref_keys - target_keys if missing_sections: for section in missing_sections: issues.append(f"Missing section: {path}.{section}" if path else section) # Recurse into common sections for key in ref_keys & target_keys: new_path = f"{path}.{key}" if path else key issues.extend(compare_recursive(r[key], t[key], new_path)) return issues issues = compare_recursive(ref, target) return { 'structures_match': len(issues) == 0, 'issues': issues[:10], # Limit to first 10 issues 'total_issues': len(issues) } def validate_key_order(self, target_file: Path) -> Dict[str, Any]: """Validate that keys appear in the same order as en-GB.""" target_data = self._load_json(target_file) def get_key_order(obj: Dict, path: str = '') -> List[str]: keys = [] for key in obj.keys(): new_path = f"{path}.{key}" if path else key keys.append(new_path) if isinstance(obj[key], dict): keys.extend(get_key_order(obj[key], new_path)) return keys golden_order = get_key_order(self.golden_structure) target_order = get_key_order(target_data) # Find common keys and check their relative order common_keys = set(golden_order) & set(target_order) golden_indices = {key: idx for idx, key in enumerate(golden_order) if key in common_keys} target_indices = {key: idx for idx, key in enumerate(target_order) if key in common_keys} order_preserved = all( golden_indices[key1] < golden_indices[key2] for key1 in common_keys for key2 in common_keys if golden_indices[key1] < golden_indices[key2] and target_indices[key1] < target_indices[key2] ) return { 'order_preserved': order_preserved, 'common_keys_count': len(common_keys), 'golden_keys_count': len(golden_order), 'target_keys_count': len(target_order) } def main(): parser = argparse.ArgumentParser(description='Beautify and restructure translation JSON files') parser.add_argument('--locales-dir', default='frontend/public/locales', help='Path to locales directory') parser.add_argument('--language', help='Restructure specific language only') parser.add_argument('--all-languages', action='store_true', help='Restructure all language files') parser.add_argument('--no-backup', action='store_true', help='Skip backup creation') parser.add_argument('--validate-only', action='store_true', help='Only validate structure, do not modify files') args = parser.parse_args() beautifier = JSONBeautifier(args.locales_dir) if args.language: target_file = Path(args.locales_dir) / args.language / "translation.json" if not target_file.exists(): print(f"Error: Translation file not found for language: {args.language}") sys.exit(1) if args.validate_only: order_result = beautifier.validate_key_order(target_file) print(f"Key order validation for {args.language}:") print(f" Order preserved: {order_result['order_preserved']}") print(f" Common keys: {order_result['common_keys_count']}/{order_result['golden_keys_count']}") else: result = beautifier.beautify_and_restructure(target_file, backup=not args.no_backup) print(f"\nResults for {result['language']}:") print(f" Keys preserved: {result['preserved_keys']}/{result['total_reference_keys']}") if result['structure_match']['total_issues'] > 0: print(f" Structure issues: {result['structure_match']['total_issues']}") for issue in result['structure_match']['issues']: print(f" - {issue}") elif args.all_languages: results = [] for lang_dir in Path(args.locales_dir).iterdir(): if lang_dir.is_dir() and lang_dir.name != "en-GB": translation_file = lang_dir / "translation.json" if translation_file.exists(): if args.validate_only: order_result = beautifier.validate_key_order(translation_file) print(f"{lang_dir.name}: Order preserved = {order_result['order_preserved']}") else: result = beautifier.beautify_and_restructure(translation_file, backup=not args.no_backup) results.append(result) if not args.validate_only and results: print(f"\n{'='*60}") print("RESTRUCTURING SUMMARY") print(f"{'='*60}") for result in sorted(results, key=lambda x: x['language']): print(f"{result['language']}: {result['preserved_keys']}/{result['total_reference_keys']} keys " f"({result['preserved_keys']/result['total_reference_keys']*100:.1f}%)") else: parser.print_help() if __name__ == "__main__": main()