#!/usr/bin/env python3 """ AI Translation Helper for Stirling PDF Frontend Provides utilities for AI-assisted translation workflows including batch processing, quality checks, and integration helpers. """ import json import os import sys from pathlib import Path from typing import Dict, List, Set, Tuple, Any, Optional import argparse import re from datetime import datetime import csv class AITranslationHelper: def __init__(self, locales_dir: str = "frontend/public/locales"): self.locales_dir = Path(locales_dir) self.golden_truth_file = self.locales_dir / "en-GB" / "translation.json" def _load_json(self, file_path: Path) -> Dict: """Load JSON file with error handling.""" try: with open(file_path, 'r', encoding='utf-8') as f: return json.load(f) except (FileNotFoundError, json.JSONDecodeError) as e: print(f"Error loading {file_path}: {e}") return {} def _save_json(self, data: Dict, file_path: Path) -> None: """Save JSON file.""" with open(file_path, 'w', encoding='utf-8') as f: json.dump(data, f, indent=2, ensure_ascii=False) def create_ai_batch_file(self, languages: List[str], output_file: Path, max_entries_per_language: int = 50) -> None: """Create a batch file for AI translation with multiple languages.""" golden_truth = self._load_json(self.golden_truth_file) batch_data = { 'metadata': { 'created_at': datetime.now().isoformat(), 'source_language': 'en-GB', 'target_languages': languages, 'max_entries_per_language': max_entries_per_language, 'instructions': { 'format': 'Translate each entry maintaining JSON structure and placeholder variables like {n}, {total}, {filename}', 'context': 'This is for a PDF manipulation tool. Keep technical terms consistent.', 'placeholders': 'Preserve all placeholders: {n}, {total}, {filename}, etc.', 'style': 'Keep translations concise and user-friendly' } }, 'translations': {} } for lang in languages: lang_file = self.locales_dir / lang / "translation.json" if not lang_file.exists(): # Create empty translation structure lang_data = {} else: lang_data = self._load_json(lang_file) # Find untranslated entries untranslated = self._find_untranslated_entries(golden_truth, lang_data) # Limit entries if specified if max_entries_per_language and len(untranslated) > max_entries_per_language: # Prioritize by key importance untranslated = self._prioritize_translation_keys(untranslated, max_entries_per_language) batch_data['translations'][lang] = {} for key, value in untranslated.items(): batch_data['translations'][lang][key] = { 'original': value, 'translated': '', # AI fills this 'context': self._get_key_context(key) } self._save_json(batch_data, output_file) total_entries = sum(len(lang_data) for lang_data in batch_data['translations'].values()) print(f"Created AI batch file: {output_file}") print(f"Total entries to translate: {total_entries}") def _find_untranslated_entries(self, golden_truth: Dict, lang_data: Dict) -> Dict[str, str]: """Find entries that need translation.""" golden_flat = self._flatten_dict(golden_truth) lang_flat = self._flatten_dict(lang_data) untranslated = {} for key, value in golden_flat.items(): if (key not in lang_flat or lang_flat[key] == value or (isinstance(lang_flat[key], str) and lang_flat[key].startswith("[UNTRANSLATED]"))): if not self._is_expected_identical(key, value): untranslated[key] = value return untranslated def _flatten_dict(self, d: Dict, parent_key: str = '', separator: str = '.') -> Dict[str, Any]: """Flatten nested dictionary.""" items = [] for k, v in d.items(): new_key = f"{parent_key}{separator}{k}" if parent_key else k if isinstance(v, dict): items.extend(self._flatten_dict(v, new_key, separator).items()) else: items.append((new_key, v)) return dict(items) def _is_expected_identical(self, key: str, value: str) -> bool: """Check if key should be identical across languages.""" if str(value).strip() in ['ltr', 'rtl', 'True', 'False', 'true', 'false']: return True return 'language.direction' in key.lower() def _prioritize_translation_keys(self, untranslated: Dict[str, str], max_count: int) -> Dict[str, str]: """Prioritize which keys to translate first based on importance.""" # Define priority order (higher score = higher priority) priority_patterns = [ ('title', 10), ('header', 9), ('submit', 8), ('selectText', 7), ('prompt', 6), ('desc', 5), ('error', 8), ('warning', 7), ('save', 8), ('download', 8), ('upload', 7), ] scored_keys = [] for key, value in untranslated.items(): score = 1 # base score for pattern, pattern_score in priority_patterns: if pattern.lower() in key.lower(): score = max(score, pattern_score) scored_keys.append((key, value, score)) # Sort by score (descending) and return top entries scored_keys.sort(key=lambda x: x[2], reverse=True) return {key: value for key, value, _ in scored_keys[:max_count]} def _get_key_context(self, key: str) -> str: """Get contextual information for a translation key.""" parts = key.split('.') contexts = { 'addPageNumbers': 'Feature for adding page numbers to PDFs', 'compress': 'PDF compression functionality', 'merge': 'PDF merging functionality', 'split': 'PDF splitting functionality', 'rotate': 'PDF rotation functionality', 'convert': 'File conversion functionality', 'security': 'PDF security and permissions', 'metadata': 'PDF metadata editing', 'watermark': 'Adding watermarks to PDFs', 'overlay': 'PDF overlay functionality', 'extract': 'Extracting content from PDFs' } if len(parts) > 0: main_section = parts[0] context = contexts.get(main_section, f'Part of {main_section} functionality') if len(parts) > 1: context += f', specifically for {parts[-1]}' return context return 'General application text' def validate_ai_translations(self, batch_file: Path) -> Dict[str, List[str]]: """Validate AI translations for common issues.""" batch_data = self._load_json(batch_file) issues = {'errors': [], 'warnings': []} for lang, translations in batch_data.get('translations', {}).items(): for key, translation_data in translations.items(): original = translation_data.get('original', '') translated = translation_data.get('translated', '') if not translated: issues['errors'].append(f"{lang}.{key}: Missing translation") continue # Check for placeholder preservation original_placeholders = re.findall(r'\{[^}]+\}', original) translated_placeholders = re.findall(r'\{[^}]+\}', translated) if set(original_placeholders) != set(translated_placeholders): issues['warnings'].append( f"{lang}.{key}: Placeholder mismatch - Original: {original_placeholders}, " f"Translated: {translated_placeholders}" ) # Check if translation is identical to original (might be untranslated) if translated == original and not self._is_expected_identical(key, original): issues['warnings'].append(f"{lang}.{key}: Translation identical to original") # Check for common AI translation artifacts artifacts = ['[TRANSLATE]', '[TODO]', 'UNTRANSLATED', '{{', '}}'] for artifact in artifacts: if artifact in translated: issues['errors'].append(f"{lang}.{key}: Contains translation artifact: {artifact}") return issues def apply_ai_batch_translations(self, batch_file: Path, validate: bool = True) -> Dict[str, Any]: """Apply translations from AI batch file to individual language files.""" batch_data = self._load_json(batch_file) results = {'applied': {}, 'errors': [], 'warnings': []} if validate: validation_issues = self.validate_ai_translations(batch_file) if validation_issues['errors']: print("Validation errors found. Fix these before applying:") for error in validation_issues['errors']: print(f" ERROR: {error}") return results if validation_issues['warnings']: print("Validation warnings (review recommended):") for warning in validation_issues['warnings'][:10]: print(f" WARNING: {warning}") for lang, translations in batch_data.get('translations', {}).items(): lang_file = self.locales_dir / lang / "translation.json" # Load existing data or create new if lang_file.exists(): lang_data = self._load_json(lang_file) else: lang_data = {} lang_file.parent.mkdir(parents=True, exist_ok=True) applied_count = 0 for key, translation_data in translations.items(): translated = translation_data.get('translated', '').strip() if translated and translated != translation_data.get('original', ''): self._set_nested_value(lang_data, key, translated) applied_count += 1 if applied_count > 0: self._save_json(lang_data, lang_file) results['applied'][lang] = applied_count print(f"Applied {applied_count} translations to {lang}") return results def _set_nested_value(self, data: Dict, key_path: str, value: Any) -> None: """Set value in nested dict using dot notation.""" keys = key_path.split('.') current = data for key in keys[:-1]: if key not in current: current[key] = {} elif not isinstance(current[key], dict): # If the current value is not a dict, we can't nest into it print(f"Warning: Converting non-dict value at '{key}' to dict to allow nesting") current[key] = {} current = current[key] current[keys[-1]] = value def export_for_external_translation(self, languages: List[str], output_format: str = 'csv') -> None: """Export translations for external translation services.""" golden_truth = self._load_json(self.golden_truth_file) golden_flat = self._flatten_dict(golden_truth) if output_format == 'csv': output_file = Path(f'translations_export_{datetime.now().strftime("%Y%m%d")}.csv') with open(output_file, 'w', newline='', encoding='utf-8') as csvfile: fieldnames = ['key', 'context', 'en_GB'] + languages writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for key, en_value in golden_flat.items(): if self._is_expected_identical(key, en_value): continue row = { 'key': key, 'context': self._get_key_context(key), 'en_GB': en_value } for lang in languages: lang_file = self.locales_dir / lang / "translation.json" if lang_file.exists(): lang_data = self._load_json(lang_file) lang_flat = self._flatten_dict(lang_data) value = lang_flat.get(key, '') if value.startswith('[UNTRANSLATED]'): value = '' row[lang] = value else: row[lang] = '' writer.writerow(row) print(f"Exported to {output_file}") elif output_format == 'json': output_file = Path(f'translations_export_{datetime.now().strftime("%Y%m%d")}.json') export_data = {'languages': languages, 'translations': {}} for key, en_value in golden_flat.items(): if self._is_expected_identical(key, en_value): continue export_data['translations'][key] = { 'en_GB': en_value, 'context': self._get_key_context(key) } for lang in languages: lang_file = self.locales_dir / lang / "translation.json" if lang_file.exists(): lang_data = self._load_json(lang_file) lang_flat = self._flatten_dict(lang_data) value = lang_flat.get(key, '') if value.startswith('[UNTRANSLATED]'): value = '' export_data['translations'][key][lang] = value self._save_json(export_data, output_file) print(f"Exported to {output_file}") def main(): parser = argparse.ArgumentParser(description='AI Translation Helper') parser.add_argument('--locales-dir', default='frontend/public/locales', help='Path to locales directory') subparsers = parser.add_subparsers(dest='command', help='Available commands') # Create batch command batch_parser = subparsers.add_parser('create-batch', help='Create AI translation batch file') batch_parser.add_argument('--languages', nargs='+', required=True, help='Language codes to include') batch_parser.add_argument('--output', required=True, help='Output batch file') batch_parser.add_argument('--max-entries', type=int, default=100, help='Max entries per language') # Validate command validate_parser = subparsers.add_parser('validate', help='Validate AI translations') validate_parser.add_argument('batch_file', help='Batch file to validate') # Apply command apply_parser = subparsers.add_parser('apply-batch', help='Apply AI batch translations') apply_parser.add_argument('batch_file', help='Batch file with translations') apply_parser.add_argument('--skip-validation', action='store_true', help='Skip validation before applying') # Export command export_parser = subparsers.add_parser('export', help='Export for external translation') export_parser.add_argument('--languages', nargs='+', required=True, help='Language codes to export') export_parser.add_argument('--format', choices=['csv', 'json'], default='csv', help='Export format') args = parser.parse_args() if not args.command: parser.print_help() return helper = AITranslationHelper(args.locales_dir) if args.command == 'create-batch': output_file = Path(args.output) helper.create_ai_batch_file(args.languages, output_file, args.max_entries) elif args.command == 'validate': batch_file = Path(args.batch_file) issues = helper.validate_ai_translations(batch_file) if issues['errors']: print("ERRORS:") for error in issues['errors']: print(f" - {error}") if issues['warnings']: print("WARNINGS:") for warning in issues['warnings']: print(f" - {warning}") if not issues['errors'] and not issues['warnings']: print("No validation issues found!") elif args.command == 'apply-batch': batch_file = Path(args.batch_file) results = helper.apply_ai_batch_translations( batch_file, validate=not args.skip_validation ) total_applied = sum(results['applied'].values()) print(f"Total translations applied: {total_applied}") elif args.command == 'export': helper.export_for_external_translation(args.languages, args.format) if __name__ == "__main__": main()