From c047bdda8c8ba40c5995714f44574acd271e5f95 Mon Sep 17 00:00:00 2001 From: Ludy Date: Fri, 26 Dec 2025 00:24:30 +0100 Subject: [PATCH] feat(translations): add command to remove unused translation keys (#5292) # Description of Changes This pull request adds a new feature to the translation management script, allowing users to automatically remove unused translations that are not present in the `en-GB` (golden truth) file. The update includes both the implementation in `translation_merger.py` and documentation in `README.md`. The most important changes are grouped below: **New Feature: Remove Unused Translations** * Added a `remove-unused` command to `translation_merger.py`, which removes translation keys from a target file if they are not present in the `en-GB` reference. This includes backup support and a CLI interface. [[1]](diffhunk://#diff-bb7f09f590ccabb456565d52afd66c1421525ddb1b6b65c0985aaab5a3c20ae7R282-R307) [[2]](diffhunk://#diff-bb7f09f590ccabb456565d52afd66c1421525ddb1b6b65c0985aaab5a3c20ae7R397-R404) [[3]](diffhunk://#diff-bb7f09f590ccabb456565d52afd66c1421525ddb1b6b65c0985aaab5a3c20ae7R464-R471) * Implemented helper methods `_delete_nested_key` (for deleting nested keys and cleaning up empty branches) and `get_unused_keys` (for identifying unused keys) in the translation merger logic. [[1]](diffhunk://#diff-bb7f09f590ccabb456565d52afd66c1421525ddb1b6b65c0985aaab5a3c20ae7R117-R139) [[2]](diffhunk://#diff-bb7f09f590ccabb456565d52afd66c1421525ddb1b6b65c0985aaab5a3c20ae7R156-R166) **Documentation Updates** * Updated `README.md` to document the new `remove-unused` command and describe its purpose and usage. [[1]](diffhunk://#diff-aeee54608ec45b06f4d52a4afcfbc7e6b08dee75f4328207cec32b8ca02fe0c6R185-R187) [[2]](diffhunk://#diff-aeee54608ec45b06f4d52a4afcfbc7e6b08dee75f4328207cec32b8ca02fe0c6R196) --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### Translations (if applicable) - [ ] I ran [`scripts/counter_translation.py`](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/docs/counter_translation.md) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. --- scripts/translations/README.md | 6 +- scripts/translations/translation_merger.py | 77 ++++++++++++++++++++++ 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/scripts/translations/README.md b/scripts/translations/README.md index f7bcf46da..93bc95abd 100644 --- a/scripts/translations/README.md +++ b/scripts/translations/README.md @@ -182,6 +182,9 @@ python scripts/translations/translation_merger.py fr-FR create-template --output # Apply translations from a file python scripts/translations/translation_merger.py fr-FR apply-translations --translations-file fr_translated.json + +# Remove unused translations not present in en-GB +python scripts/translations/translation_merger.py fr-FR remove-unused ``` **Features:** @@ -190,6 +193,7 @@ python scripts/translations/translation_merger.py fr-FR apply-translations --tra - Creates structured templates for AI translation - Applies translated content back to language files - Automatic backup creation +- Removes unused translations not present in en-GB ### 3. `ai_translation_helper.py` Specialized tool for AI-assisted translation workflows with batch processing and validation. @@ -696,4 +700,4 @@ These scripts integrate with the existing translation system: 3. **Updating Existing Language**: Use analyzer to find gaps, then compact or batch method 4. **Quality Assurance**: Use analyzer with `--summary` for completion metrics and issue detection 5. **External Translation Services**: Use export functionality to generate CSV files for translators -6. **Structure Maintenance**: Use json_beautifier to keep files aligned with en-GB structure \ No newline at end of file +6. **Structure Maintenance**: Use json_beautifier to keep files aligned with en-GB structure diff --git a/scripts/translations/translation_merger.py b/scripts/translations/translation_merger.py index 1dbfea262..61ab0021e 100644 --- a/scripts/translations/translation_merger.py +++ b/scripts/translations/translation_merger.py @@ -114,6 +114,29 @@ class TranslationMerger: items.append((new_key, v)) return dict(items) + def _delete_nested_key(self, data: Dict, key_path: str) -> bool: + """Delete a nested key using dot notation and clean up empty branches.""" + + def _delete(current: Dict, keys: List[str]) -> bool: + key = keys[0] + + if key not in current: + return False + + if len(keys) == 1: + del current[key] + return True + + if not isinstance(current[key], dict): + return False + + removed = _delete(current[key], keys[1:]) + if removed and current[key] == {}: + del current[key] + return removed + + return _delete(data, key_path.split(".")) + def get_missing_keys(self, target_file: Path) -> List[str]: """Get list of missing keys in target file.""" lang_code = target_file.parent.name.replace("-", "_") @@ -130,6 +153,17 @@ class TranslationMerger: missing = set(golden_flat.keys()) - set(target_flat.keys()) return sorted(missing - ignore_set) + def get_unused_keys(self, target_file: Path) -> List[str]: + """Get list of keys that are not present in the golden truth file.""" + if not target_file.exists(): + return [] + + target_data = self._load_translation_file(target_file) + target_flat = self._flatten_dict(target_data) + golden_flat = self._flatten_dict(self.golden_truth) + + return sorted(set(target_flat.keys()) - set(golden_flat.keys())) + def add_missing_translations( self, target_file: Path, keys_to_add: List[str] = None ) -> Dict: @@ -245,6 +279,32 @@ class TranslationMerger: "data": target_data, } + def remove_unused_translations( + self, target_file: Path, keys_to_remove: List[str] = None, backup: bool = False + ) -> Dict: + """Remove translations that are not present in the golden truth file.""" + if not target_file.exists(): + print(f"Error: Target file does not exist: {target_file}") + return {"success": False, "error": "File not found"} + + target_data = self._load_translation_file(target_file) + keys_to_remove = keys_to_remove or self.get_unused_keys(target_file) + + removed_count = 0 + + for key in keys_to_remove: + if self._delete_nested_key(target_data, key): + removed_count += 1 + + if removed_count > 0: + self._save_translation_file(target_data, target_file, backup) + + return { + "success": True, + "removed_count": removed_count, + "data": target_data, + } + def create_translation_template(self, target_file: Path, output_file: Path) -> None: """Create a template file for AI translation with context.""" untranslated = self.extract_untranslated_entries(target_file) @@ -334,6 +394,14 @@ def main(): "--backup", action="store_true", help="Create backup before modifying files" ) + # Remove unused translations command + remove_parser = subparsers.add_parser( + "remove-unused", help="Remove unused translations not present in en-GB" + ) + remove_parser.add_argument( + "--backup", action="store_true", help="Create backup before modifying files" + ) + args = parser.parse_args() if not args.command: @@ -393,6 +461,15 @@ def main(): else: print(f"Failed: {result.get('error', 'Unknown error')}") + elif args.command == "remove-unused": + print(f"Removing unused translations from {args.language}...") + result = merger.remove_unused_translations(target_file, backup=args.backup) + + if result["success"]: + print(f"Removed {result['removed_count']} unused translations") + else: + print(f"Failed: {result.get('error', 'Unknown error')}") + if __name__ == "__main__": main()