mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-12-18 20:04:17 +01:00
472 lines
16 KiB
Python
472 lines
16 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
Author: Ludy87
|
|
Description: Checks and synchronizes JSON translation files against a reference file.
|
|
It does two things:
|
|
1) CI check: verifies that all keys from the reference exist in the target (recursively),
|
|
flags extras, duplicate keys, and now also flags untranslated values (same as English).
|
|
2) Sync/update: adds missing keys (and optionally prunes extras).
|
|
|
|
Also prints a CI-friendly report (intended for PR comments).
|
|
|
|
Usage:
|
|
python sync_translations.py --reference-file <path_to_reference_json> [--branch <branch_root>] [--actor <actor_name>] [--files <list_of_target_jsons>] [--check] [--prune] [--dry-run]
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
import shutil
|
|
from pathlib import Path
|
|
from copy import deepcopy
|
|
from dataclasses import dataclass
|
|
from typing import Any, Dict, Tuple, List
|
|
|
|
JsonDict = Dict[str, Any]
|
|
|
|
|
|
@dataclass
|
|
class MergeStats:
|
|
added: int = 0
|
|
pruned: int = 0
|
|
missing_keys: list[str] | None = None
|
|
extra_keys: list[str] | None = None
|
|
# Missing translatable leaf nodes (non-dict values)
|
|
missing_leafs: int = 0
|
|
# NEW: untranslated values (same as reference English)
|
|
untranslated_leafs: int = 0
|
|
untranslated_keys: list[str] | None = None
|
|
|
|
def __post_init__(self):
|
|
self.missing_keys = []
|
|
self.extra_keys = []
|
|
self.untranslated_keys = []
|
|
|
|
|
|
def is_mapping(v: Any) -> bool:
|
|
return isinstance(v, dict)
|
|
|
|
|
|
# Count all translatable entries (non-dict values) in any nested structure
|
|
def count_leaves(obj: Any) -> int:
|
|
if is_mapping(obj):
|
|
return sum(count_leaves(v) for v in obj.values())
|
|
return 1
|
|
|
|
|
|
def normalize_text(s: str) -> str:
|
|
"""Normalize strings for a strict-but-fair equality check."""
|
|
# Trim, collapse whitespace, lower-case. Keep placeholders intact.
|
|
s = s.strip()
|
|
s = re.sub(r"\s+", " ", s)
|
|
return s.lower()
|
|
|
|
|
|
def collect_untranslated_values(ref: Any, tgt: Any, *, path: str = "", stats: MergeStats) -> None:
|
|
"""
|
|
Walk ref + target without mutating anything and find values that are present
|
|
but not translated (target string equals reference string).
|
|
"""
|
|
if is_mapping(ref) and is_mapping(tgt):
|
|
for k, ref_val in ref.items():
|
|
new_path = f"{path}.{k}" if path else k
|
|
if k in tgt:
|
|
collect_untranslated_values(ref_val, tgt[k], path=new_path, stats=stats)
|
|
return
|
|
|
|
# Only compare leaf strings
|
|
if isinstance(ref, str) and isinstance(tgt, str):
|
|
if normalize_text(ref) == normalize_text(tgt):
|
|
stats.untranslated_leafs += 1
|
|
stats.untranslated_keys.append(path)
|
|
|
|
|
|
def deep_merge_and_collect(
|
|
ref: Any, target: Any, *, prune_extras: bool, path: str = "", stats: MergeStats
|
|
) -> Any:
|
|
"""
|
|
Recursively ensure `target` contains at least the structure/keys of `ref`.
|
|
- Adds any missing keys using the reference values.
|
|
- Tracks missing keys and how many leaf nodes are missing (for %).
|
|
- Optionally prunes extra keys that don't exist in the reference.
|
|
"""
|
|
if is_mapping(ref) and is_mapping(target):
|
|
merged: JsonDict = {}
|
|
|
|
# Walk reference keys in order so we keep the same structure/order
|
|
for k, ref_val in ref.items():
|
|
new_path = f"{path}.{k}" if path else k
|
|
if k in target:
|
|
merged[k] = deep_merge_and_collect(
|
|
ref_val,
|
|
target[k],
|
|
prune_extras=prune_extras,
|
|
path=new_path,
|
|
stats=stats,
|
|
)
|
|
else:
|
|
# Entire key (possibly subtree) is missing → copy from ref
|
|
merged[k] = deepcopy(ref_val)
|
|
stats.added += 1
|
|
stats.missing_keys.append(new_path)
|
|
# Count how many translatable leaves this missing subtree contains
|
|
stats.missing_leafs += count_leaves(ref_val)
|
|
|
|
# Handle keys that exist in target but not in ref
|
|
if prune_extras:
|
|
for k in target.keys():
|
|
if k not in ref:
|
|
stats.pruned += 1
|
|
stats.extra_keys.append(f"{path}.{k}" if path else k)
|
|
# Do not copy extras when pruning
|
|
else:
|
|
# Keep extras (but still list them for the report)
|
|
for k, v in target.items():
|
|
if k not in ref:
|
|
merged[k] = deepcopy(v)
|
|
stats.extra_keys.append(f"{path}.{k}" if path else k)
|
|
|
|
return merged
|
|
|
|
# Non-dict values → keep existing translation; if it's None, count it as missing
|
|
if target is None:
|
|
stats.missing_leafs += count_leaves(ref)
|
|
return deepcopy(target if target is not None else ref)
|
|
|
|
|
|
def order_like_reference(ref: Any, obj: Any) -> Any:
|
|
"""
|
|
Reorder dict keys in `obj` to match the order in `ref` (recursively).
|
|
Extra keys are appended at the end.
|
|
"""
|
|
if not (is_mapping(ref) and is_mapping(obj)):
|
|
return obj
|
|
ordered = {}
|
|
for k in ref:
|
|
if k in obj:
|
|
ordered[k] = order_like_reference(ref[k], obj[k])
|
|
for k in obj:
|
|
if k not in ref:
|
|
ordered[k] = order_like_reference(None, obj[k])
|
|
return ordered
|
|
|
|
|
|
def read_json(path: Path) -> Any:
|
|
with path.open("r", encoding="utf-8") as f:
|
|
return json.load(f)
|
|
|
|
|
|
# Read JSON while detecting duplicate keys (json.load would normally overwrite silently)
|
|
def read_json_with_duplicates(path: Path) -> Tuple[Any, list[str]]:
|
|
"""
|
|
Returns: (data, duplicate_keys)
|
|
"""
|
|
duplicates: list[str] = []
|
|
|
|
def object_pairs_hook(pairs):
|
|
obj = {}
|
|
seen = set()
|
|
for k, v in pairs:
|
|
if k in seen:
|
|
duplicates.append(k)
|
|
else:
|
|
seen.add(k)
|
|
obj[k] = v
|
|
return obj
|
|
|
|
with path.open("r", encoding="utf-8") as f:
|
|
data = json.load(f, object_pairs_hook=object_pairs_hook)
|
|
return data, duplicates
|
|
|
|
|
|
def write_json(path: Path, data: Any) -> None:
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
with path.open("w", encoding="utf-8") as f:
|
|
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
f.write("\n")
|
|
|
|
|
|
def backup_file(path: Path) -> None:
|
|
backup = path.with_suffix(path.suffix + ".bak")
|
|
shutil.copy2(path, backup)
|
|
|
|
|
|
def sanitize_actor(s: str | None) -> str | None:
|
|
if s is None:
|
|
return None
|
|
return re.sub(r"[^a-zA-Z0-9_\-]", "", s)
|
|
|
|
|
|
def sanitize_branch(s: str | None) -> str | None:
|
|
if s is None:
|
|
return None
|
|
return re.sub(r"[^a-zA-Z0-9_\-\/\.]", "", s)
|
|
|
|
|
|
def resolve_in_branch(branch: Path | None, p: Path) -> Path:
|
|
# If no branch root or an absolute path is provided, use it as-is
|
|
if p.is_absolute() or branch is None or str(branch) == "":
|
|
return p
|
|
return (branch / p).resolve()
|
|
|
|
|
|
def is_within(base: Path | None, target: Path) -> bool:
|
|
# Allow everything if no base is provided
|
|
if base is None or str(base) == "":
|
|
return True
|
|
base_resolved = base.resolve()
|
|
target_resolved = target.resolve()
|
|
if os.name == "nt":
|
|
return str(target_resolved).lower().startswith(str(base_resolved).lower())
|
|
return str(target_resolved).startswith(str(base_resolved))
|
|
|
|
|
|
def assert_within_branch(base: Path | None, target: Path) -> None:
|
|
if not is_within(base, target):
|
|
raise ValueError(f"Unsafe path outside branch: {target}")
|
|
|
|
|
|
def process_file(
|
|
ref_path: Path,
|
|
target_path: Path,
|
|
*,
|
|
prune: bool,
|
|
dry_run: bool,
|
|
check_only: bool,
|
|
backup: bool,
|
|
) -> Tuple[MergeStats, bool, List[str], int]:
|
|
# Load both files, capturing duplicate keys in the target
|
|
ref, _ref_dupes = read_json_with_duplicates(ref_path)
|
|
target, target_dupes = read_json_with_duplicates(target_path)
|
|
|
|
# Total number of translatable leaves in the reference (for % calculation)
|
|
total_ref_leaves = count_leaves(ref)
|
|
|
|
stats = MergeStats()
|
|
|
|
# NEW: detect untranslated values before we mutate/merge anything
|
|
collect_untranslated_values(ref, target, path="", stats=stats)
|
|
|
|
merged = deep_merge_and_collect(ref, target, prune_extras=prune, stats=stats)
|
|
merged = order_like_reference(ref, merged)
|
|
|
|
# "Success" means: no missing keys, (if pruning) no extras, no duplicate keys, no untranslated values
|
|
success = (
|
|
not stats.missing_keys
|
|
and (not prune or not stats.extra_keys)
|
|
and not target_dupes
|
|
and stats.untranslated_leafs == 0
|
|
)
|
|
|
|
if not check_only and not dry_run:
|
|
if backup:
|
|
backup_file(target_path)
|
|
write_json(target_path, merged)
|
|
|
|
return stats, success, target_dupes, total_ref_leaves
|
|
|
|
|
|
def find_all_locale_files(branch_root: Path, ref_path: Path) -> List[Path]:
|
|
"""
|
|
Find all `translation.json` files under `frontend/public/locales/**`,
|
|
excluding the reference file itself.
|
|
"""
|
|
locales_dir = branch_root / "frontend" / "public" / "locales"
|
|
if not locales_dir.exists():
|
|
return []
|
|
files = sorted(locales_dir.rglob("translation.json"))
|
|
ref_resolved = ref_path.resolve()
|
|
return [f for f in files if f.resolve() != ref_resolved]
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(
|
|
description="Compare and sync translation JSON files against a reference (with branch support)."
|
|
)
|
|
parser.add_argument(
|
|
"--reference-file",
|
|
"--ref",
|
|
dest="ref",
|
|
required=True,
|
|
type=Path,
|
|
help="Path to reference JSON file (e.g., frontend/public/locales/en-GB/translation.json)",
|
|
)
|
|
parser.add_argument(
|
|
"--files",
|
|
nargs="+",
|
|
required=False,
|
|
type=Path,
|
|
help="List of target JSON files (optional; if omitted, all locales/*/translation.json will be processed)",
|
|
)
|
|
parser.add_argument(
|
|
"--branch",
|
|
type=str,
|
|
required=False,
|
|
help="Branch/checkout root directory used as prefix for --reference-file and --files",
|
|
)
|
|
parser.add_argument(
|
|
"--actor",
|
|
type=str,
|
|
required=False,
|
|
help="Actor from PR (used for CI comment mention).",
|
|
)
|
|
parser.add_argument(
|
|
"--check",
|
|
action="store_true",
|
|
help="Check mode: do not write files, only print a CI-friendly report.",
|
|
)
|
|
parser.add_argument(
|
|
"--prune",
|
|
action="store_true",
|
|
help="Remove keys that are not present in the reference.",
|
|
)
|
|
parser.add_argument(
|
|
"--dry-run",
|
|
action="store_true",
|
|
help="Dry run: do not write changes (useful for local testing).",
|
|
)
|
|
parser.add_argument(
|
|
"--no-backup",
|
|
dest="backup",
|
|
action="store_false",
|
|
help="Disable .bak backup when writing in-place.",
|
|
)
|
|
parser.set_defaults(backup=True)
|
|
args = parser.parse_args()
|
|
|
|
# Normalize inputs
|
|
actor = sanitize_actor(args.actor) if args.actor else "translator"
|
|
branch_str = sanitize_branch(args.branch) if args.branch else ""
|
|
branch_base: Path | None = Path(branch_str).resolve() if branch_str else Path.cwd()
|
|
|
|
# Resolve the reference path. First try under branch root, then fall back to raw path.
|
|
ref_path = resolve_in_branch(branch_base, args.ref)
|
|
if not ref_path.exists():
|
|
alt = Path(args.ref)
|
|
if not alt.is_absolute():
|
|
alt = (Path.cwd() / alt).resolve()
|
|
if alt.exists():
|
|
ref_path = alt
|
|
if not ref_path.exists():
|
|
raise SystemExit(f"Reference file not found: {ref_path}")
|
|
|
|
# Build the targets list. If CI passed a single space-separated string, split it.
|
|
files_list: List[Path] = []
|
|
if args.files:
|
|
if len(args.files) == 1 and " " in str(args.files[0]):
|
|
files_list = [Path(p) for p in str(args.files[0]).split()]
|
|
else:
|
|
files_list = list(args.files)
|
|
else:
|
|
base = branch_base if branch_base else Path.cwd()
|
|
files_list = find_all_locale_files(base, ref_path)
|
|
|
|
if not files_list:
|
|
raise SystemExit("No translation.json files found under locales/.")
|
|
|
|
# Build CI report
|
|
report: list[str] = []
|
|
total_added = total_pruned = 0
|
|
any_failed = False
|
|
|
|
report.append(
|
|
f"#### 🔄 Reference File: `{args.ref}` (branch root: `{branch_base if branch_base else '.'}`)"
|
|
)
|
|
report.append("")
|
|
|
|
for target_rel in files_list:
|
|
target_rel_path = Path(target_rel)
|
|
target_path = resolve_in_branch(branch_base, target_rel_path)
|
|
|
|
# Keep target access inside branch (when branch is set)
|
|
try:
|
|
assert_within_branch(branch_base, target_path)
|
|
except ValueError as e:
|
|
report.append(f"❌ {e}")
|
|
any_failed = True
|
|
continue
|
|
|
|
if not target_path.exists():
|
|
report.append(
|
|
f"❌ File not found: `{target_rel_path}` (resolved: `{target_path}`)"
|
|
)
|
|
any_failed = True
|
|
continue
|
|
|
|
stats, success, dupes, total_ref_leaves = process_file(
|
|
ref_path,
|
|
target_path,
|
|
prune=args.prune,
|
|
dry_run=args.dry_run,
|
|
check_only=args.check,
|
|
backup=args.backup,
|
|
)
|
|
|
|
total_added += stats.added
|
|
total_pruned += stats.pruned
|
|
|
|
# Missing translations (absolute + % of total reference leaves)
|
|
missing_abs = stats.missing_leafs
|
|
total_abs = total_ref_leaves if total_ref_leaves > 0 else 0
|
|
missing_pct = (missing_abs / total_abs * 100.0) if total_abs > 0 else 0.0
|
|
|
|
# Untranslated values (absolute + % of total reference leaves)
|
|
untranslated_abs = stats.untranslated_leafs
|
|
untranslated_pct = (untranslated_abs / total_abs * 100.0) if total_abs > 0 else 0.0
|
|
|
|
report.append(f"#### 📄 File: `{target_rel_path}`")
|
|
if success:
|
|
report.append("✅ **Passed:** All keys in sync.")
|
|
else:
|
|
report.append("❌ **Failed:** Differences detected.")
|
|
if stats.missing_keys:
|
|
report.append(
|
|
f"- Missing keys ({len(stats.missing_keys)}): `{', '.join(stats.missing_keys)}`"
|
|
)
|
|
if stats.extra_keys:
|
|
if args.prune:
|
|
report.append(
|
|
f"- Extra keys removed/flagged ({len(stats.extra_keys)}): `{', '.join(stats.extra_keys)}`"
|
|
)
|
|
else:
|
|
report.append(
|
|
f"- Extra keys present ({len(stats.extra_keys)}): `{', '.join(stats.extra_keys)}`"
|
|
)
|
|
if dupes:
|
|
report.append(f"- Duplicate keys ({len(dupes)}): `{', '.join(dupes)}`")
|
|
if stats.untranslated_keys:
|
|
report.append(
|
|
f"- Untranslated keys ({len(stats.untranslated_keys)}): `{', '.join(stats.untranslated_keys)}`"
|
|
)
|
|
|
|
report.append(f"- Missing translations: {missing_abs} / {total_abs} ({missing_pct:.2f}%)")
|
|
report.append(f"- Untranslated values: {untranslated_abs} / {total_abs} ({untranslated_pct:.2f}%)")
|
|
report.append(f"- Added: {stats.added}, Pruned: {stats.pruned}")
|
|
report.append("---")
|
|
report.append("")
|
|
if not success:
|
|
any_failed = True
|
|
|
|
# Final summary
|
|
report.append("## 🧾 Summary")
|
|
report.append(f"- Total added: {total_added}")
|
|
report.append(f"- Total pruned: {total_pruned}")
|
|
report.append("")
|
|
if any_failed:
|
|
report.append("## ❌ Overall Status: **Failed**")
|
|
report.append(f"@{actor} please check and sync the missing translations.")
|
|
else:
|
|
report.append("## ✅ Overall Status: **Success**")
|
|
report.append(f"Thanks @{actor} for keeping translations in sync! 🎉")
|
|
|
|
# CI comment output (for PR comment body)
|
|
print("\n".join(report))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|