mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2026-04-22 23:08:53 +02:00
lang updates plus --include-existing flag (#5212)
# Description of Changes <!-- Please provide a summary of the changes, including: - What was changed - Why the change was made - Any challenges encountered Closes #(issue_number) --> --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details.
This commit is contained in:
@@ -174,9 +174,6 @@ Merges missing translations from en-GB into target language files and manages tr
|
||||
# Add missing translations from en-GB to French
|
||||
python scripts/translations/translation_merger.py fr-FR add-missing
|
||||
|
||||
# Add without marking as [UNTRANSLATED]
|
||||
python scripts/translations/translation_merger.py fr-FR add-missing --no-mark-untranslated
|
||||
|
||||
# Extract untranslated entries to a file
|
||||
python scripts/translations/translation_merger.py fr-FR extract-untranslated --output fr_untranslated.json
|
||||
|
||||
@@ -188,7 +185,7 @@ python scripts/translations/translation_merger.py fr-FR apply-translations --tra
|
||||
```
|
||||
|
||||
**Features:**
|
||||
- Adds missing keys from en-GB with optional [UNTRANSLATED] markers
|
||||
- Adds missing keys from en-GB (copies English text directly)
|
||||
- Extracts untranslated entries for external translation
|
||||
- Creates structured templates for AI translation
|
||||
- Applies translated content back to language files
|
||||
@@ -442,7 +439,7 @@ Repeat steps 2-5 until 100% complete.
|
||||
|
||||
#### Step 1: Add Missing Translations
|
||||
```bash
|
||||
python scripts/translations/translation_merger.py fr-FR add-missing --mark-untranslated
|
||||
python scripts/translations/translation_merger.py fr-FR add-missing
|
||||
```
|
||||
|
||||
#### Step 2: Create AI Template
|
||||
@@ -523,7 +520,7 @@ ignore = [
|
||||
|
||||
### Critical Rules for Translation
|
||||
|
||||
1. **NEVER skip entries**: Translate ALL entries in each batch to avoid [UNTRANSLATED] pollution
|
||||
1. **NEVER skip entries**: Translate ALL entries in each batch to ensure completeness
|
||||
2. **Use appropriate batch sizes**: 100 entries for systematic translation, unlimited for compact method
|
||||
3. **Skip validation for placeholders**: Use `--skip-validation` when batch contains `{{variable}}` patterns
|
||||
4. **Check progress between batches**: Use `--summary` flag to track completion percentage
|
||||
@@ -567,13 +564,6 @@ python scripts/translations/json_validator.py --all-batches ar_AR
|
||||
- Regex patterns: Double all backslashes (`\d` → `\\d`)
|
||||
- Check for missing/extra commas at line reported in error
|
||||
|
||||
#### [UNTRANSLATED] Pollution
|
||||
**Problem**: Hundreds of [UNTRANSLATED] markers from incomplete translation attempts
|
||||
**Solution**:
|
||||
- Only translate complete batches of manageable size
|
||||
- Use analyzer that counts [UNTRANSLATED] as missing translations
|
||||
- Restore from backup if pollution occurs
|
||||
|
||||
#### Validation False Positives
|
||||
**Problem**: Validator flags legitimate `{{variable}}` placeholders as artifacts
|
||||
**Solution**: Use `--skip-validation` flag when applying batches with template variables
|
||||
@@ -674,7 +664,7 @@ python scripts/translations/ai_translation_helper.py apply-batch de_batch_1.json
|
||||
- **Missing Files**: Scripts create new files when language directories don't exist
|
||||
- **Invalid JSON**: Clear error messages with line numbers
|
||||
- **Placeholder Mismatches**: Validation warnings for missing or extra placeholders
|
||||
- **[UNTRANSLATED] Entries**: Counted as missing translations to prevent pollution
|
||||
- **Legacy [UNTRANSLATED] Markers**: Detected and stripped for backwards compatibility
|
||||
- **Backup Failures**: Graceful handling with user notification
|
||||
|
||||
## Integration with Development
|
||||
|
||||
@@ -45,9 +45,10 @@ def load_translation_file(file_path):
|
||||
with open(file_path, 'rb') as f:
|
||||
return tomllib.load(f)
|
||||
|
||||
def extract_untranslated(language_code, batch_size=500):
|
||||
def extract_untranslated(language_code, batch_size=500, include_existing=False):
|
||||
"""Extract untranslated entries and split into batches."""
|
||||
print(f"\n🔍 Extracting untranslated entries for {language_code}...")
|
||||
mode = "all untranslated (including existing)" if include_existing else "new (missing)"
|
||||
print(f"\n🔍 Extracting {mode} entries for {language_code}...")
|
||||
|
||||
# Load files
|
||||
golden_path = find_translation_file(Path('frontend/public/locales/en-GB'))
|
||||
@@ -84,13 +85,19 @@ def extract_untranslated(language_code, batch_size=500):
|
||||
# Find untranslated
|
||||
untranslated = {}
|
||||
for key, value in golden_flat.items():
|
||||
if (key not in lang_flat or
|
||||
lang_flat.get(key) == value or
|
||||
(isinstance(lang_flat.get(key), str) and lang_flat.get(key).startswith("[UNTRANSLATED]"))):
|
||||
untranslated[key] = value
|
||||
if include_existing:
|
||||
# Include missing keys, keys with English values, and [UNTRANSLATED] keys
|
||||
if (key not in lang_flat or
|
||||
lang_flat.get(key) == value or
|
||||
(isinstance(lang_flat.get(key), str) and lang_flat.get(key).startswith("[UNTRANSLATED]"))):
|
||||
untranslated[key] = value
|
||||
else:
|
||||
# Only include missing keys (not in target file at all)
|
||||
if key not in lang_flat:
|
||||
untranslated[key] = value
|
||||
|
||||
total = len(untranslated)
|
||||
print(f"Found {total} untranslated entries")
|
||||
print(f"Found {total} {mode} entries")
|
||||
|
||||
if total == 0:
|
||||
print("✓ Language is already complete!")
|
||||
@@ -268,6 +275,7 @@ Examples:
|
||||
parser.add_argument('--no-cleanup', action='store_true', help='Keep temporary batch files')
|
||||
parser.add_argument('--skip-verification', action='store_true', help='Skip final completion check')
|
||||
parser.add_argument('--timeout', type=int, default=600, help='Timeout per batch in seconds (default: 600 = 10 minutes)')
|
||||
parser.add_argument('--include-existing', action='store_true', help='Also retranslate existing keys that match English (default: only translate missing keys)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -287,7 +295,7 @@ Examples:
|
||||
|
||||
try:
|
||||
# Step 1: Extract and split
|
||||
batch_files = extract_untranslated(args.language, args.batch_size)
|
||||
batch_files = extract_untranslated(args.language, args.batch_size, args.include_existing)
|
||||
if batch_files is None:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
@@ -87,7 +87,7 @@ def get_language_completion(locales_dir: Path, language: str) -> Optional[float]
|
||||
return None
|
||||
|
||||
|
||||
def translate_language(language: str, api_key: str, batch_size: int, timeout: int, skip_verification: bool) -> Tuple[str, bool, str]:
|
||||
def translate_language(language: str, api_key: str, batch_size: int, timeout: int, skip_verification: bool, include_existing: bool) -> Tuple[str, bool, str]:
|
||||
"""
|
||||
Translate a single language.
|
||||
Returns: (language_code, success, message)
|
||||
@@ -105,6 +105,9 @@ def translate_language(language: str, api_key: str, batch_size: int, timeout: in
|
||||
if skip_verification:
|
||||
cmd.append('--skip-verification')
|
||||
|
||||
if include_existing:
|
||||
cmd.append('--include-existing')
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
@@ -170,6 +173,8 @@ Note: Requires OPENAI_API_KEY environment variable or --api-key argument.
|
||||
help='Path to locales directory')
|
||||
parser.add_argument('--skip-verification', action='store_true',
|
||||
help='Skip final completion verification for each language')
|
||||
parser.add_argument('--include-existing', action='store_true',
|
||||
help='Also retranslate existing keys that match English (default: only translate missing keys)')
|
||||
parser.add_argument('--dry-run', action='store_true',
|
||||
help='Show what would be translated without actually translating')
|
||||
|
||||
@@ -253,7 +258,8 @@ Note: Requires OPENAI_API_KEY environment variable or --api-key argument.
|
||||
api_key,
|
||||
args.batch_size,
|
||||
args.timeout,
|
||||
args.skip_verification
|
||||
args.skip_verification,
|
||||
args.include_existing
|
||||
): lang
|
||||
for lang in languages
|
||||
}
|
||||
|
||||
@@ -117,8 +117,7 @@ class TranslationMerger:
|
||||
missing = set(golden_flat.keys()) - set(target_flat.keys())
|
||||
return sorted(missing - ignore_set)
|
||||
|
||||
def add_missing_translations(self, target_file: Path, keys_to_add: List[str] = None,
|
||||
mark_untranslated: bool = True) -> Dict:
|
||||
def add_missing_translations(self, target_file: Path, keys_to_add: List[str] = None) -> Dict:
|
||||
"""Add missing translations from en-GB to target file."""
|
||||
if not target_file.exists():
|
||||
target_data = {}
|
||||
@@ -132,10 +131,7 @@ class TranslationMerger:
|
||||
for key in missing_keys:
|
||||
if key in golden_flat:
|
||||
value = golden_flat[key]
|
||||
if mark_untranslated and isinstance(value, str):
|
||||
# Mark as untranslated for AI to translate later
|
||||
value = f"[UNTRANSLATED] {value}"
|
||||
|
||||
# Add the English value directly without [UNTRANSLATED] marker
|
||||
self._set_nested_value(target_data, key, value)
|
||||
added_count += 1
|
||||
|
||||
@@ -282,8 +278,6 @@ def main():
|
||||
# Add missing command
|
||||
add_parser = subparsers.add_parser('add-missing', help='Add missing translations from en-GB')
|
||||
add_parser.add_argument('--backup', action='store_true', help='Create backup before modifying files')
|
||||
add_parser.add_argument('--mark-untranslated', action='store_true', default=True,
|
||||
help='Mark added translations as [UNTRANSLATED]')
|
||||
|
||||
# Extract untranslated command
|
||||
extract_parser = subparsers.add_parser('extract-untranslated', help='Extract untranslated entries')
|
||||
@@ -312,10 +306,7 @@ def main():
|
||||
|
||||
if args.command == 'add-missing':
|
||||
print(f"Adding missing translations to {args.language}...")
|
||||
result = merger.add_missing_translations(
|
||||
target_file,
|
||||
mark_untranslated=args.mark_untranslated
|
||||
)
|
||||
result = merger.add_missing_translations(target_file)
|
||||
|
||||
merger._save_translation_file(result['data'], target_file, backup=args.backup)
|
||||
print(f"Added {result['added_count']} missing translations")
|
||||
|
||||
Reference in New Issue
Block a user