Add translation sync script and CI workflow

Introduces a Python script for checking and synchronizing JSON translation files against a reference, ensuring consistency across locales. Adds a GitHub Actions workflow to automatically verify and comment on translation file changes in pull requests.
This commit is contained in:
Ludy87 2025-10-26 11:32:28 +01:00
parent 848ff9688b
commit 4b20ed5a97
No known key found for this signature in database
GPG Key ID: 92696155E0220F94
2 changed files with 634 additions and 0 deletions

342
.github/scripts/sync_translations.py vendored Normal file
View File

@ -0,0 +1,342 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Author: Ludy87
Description: This script processes JSON translation files for localization checks and synchronization.
It compares target translation files in a branch with a reference JSON file to ensure consistency.
The script performs two main tasks:
1. CI check: Verifies that all keys present in the reference exist in the target (recursively). Optionally flags extras.
2. Sync/update: Updates the target files to match the reference by adding missing keys (and optionally pruning extras).
The script also provides functionality to print a CI-friendly report (to be used as PR comment output).
Usage:
python sync_translations.py --reference-file <path_to_reference_json> [--branch <branch_root>] [--actor <actor_name>] [--files <list_of_target_jsons>] [--check] [--prune] [--dry-run]
# Sample for Windows:
# python .github\\scripts\\sync_translations.py --reference-file frontend\\public\\locales\\en-GB\\translation.json --branch "" --files frontend\\public\\locales\\de-DE\\translation.json --check --actor Ludy87
"""
from __future__ import annotations
import argparse
import json
import os
import re
import shutil
from pathlib import Path
from copy import deepcopy
from dataclasses import dataclass
from typing import Any, Dict, Tuple, List
JsonDict = Dict[str, Any]
@dataclass
class MergeStats:
added: int = 0
pruned: int = 0
missing_keys: list[str] | None = None
extra_keys: list[str] | None = None
def __post_init__(self):
self.missing_keys = []
self.extra_keys = []
def is_mapping(v: Any) -> bool:
return isinstance(v, dict)
def deep_merge_and_collect(
ref: Any, target: Any, *, prune_extras: bool, path: str = "", stats: MergeStats
) -> Any:
"""Recursively ensure `target` contains at least the structure/keys of `ref`."""
if is_mapping(ref) and is_mapping(target):
merged: JsonDict = {}
# Merge reference keys
for k, ref_val in ref.items():
new_path = f"{path}.{k}" if path else k
if k in target:
merged[k] = deep_merge_and_collect(
ref_val,
target[k],
prune_extras=prune_extras,
path=new_path,
stats=stats,
)
else:
merged[k] = deepcopy(ref_val)
stats.added += 1
stats.missing_keys.append(new_path)
# Handle extras
if prune_extras:
for k in target.keys():
if k not in ref:
stats.pruned += 1
stats.extra_keys.append(f"{path}.{k}" if path else k)
else:
for k, v in target.items():
if k not in ref:
merged[k] = deepcopy(v)
stats.extra_keys.append(f"{path}.{k}" if path else k)
return merged
# Non-dict values → keep target if it exists
return deepcopy(target if target is not None else ref)
def order_like_reference(ref: Any, obj: Any) -> Any:
"""Reorder dict keys in `obj` to follow the order in `ref` recursively."""
if not (is_mapping(ref) and is_mapping(obj)):
return obj
ordered = {}
for k in ref:
if k in obj:
ordered[k] = order_like_reference(ref[k], obj[k])
for k in obj:
if k not in ref:
ordered[k] = order_like_reference(None, obj[k])
return ordered
def read_json(path: Path) -> Any:
with path.open("r", encoding="utf-8") as f:
return json.load(f)
def write_json(path: Path, data: Any) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
f.write("\n")
def backup_file(path: Path) -> None:
backup = path.with_suffix(path.suffix + ".bak")
shutil.copy2(path, backup)
def sanitize_actor(s: str | None) -> str | None:
if s is None:
return None
return re.sub(r"[^a-zA-Z0-9_\-]", "", s)
def sanitize_branch(s: str | None) -> str | None:
if s is None:
return None
return re.sub(r"[^a-zA-Z0-9_\-\/\.]", "", s)
def resolve_in_branch(branch: Path | None, p: Path) -> Path:
if p.is_absolute() or branch is None or str(branch) == "":
return p
return (branch / p).resolve()
def assert_within_branch(base: Path | None, target: Path) -> None:
if base is None or str(base) == "":
return
base_resolved = base.resolve()
target_resolved = target.resolve()
if os.name == "nt":
if not str(target_resolved).lower().startswith(str(base_resolved).lower()):
raise ValueError(f"Unsafe path outside branch: {target}")
else:
if not str(target_resolved).startswith(str(base_resolved)):
raise ValueError(f"Unsafe path outside branch: {target}")
def process_file(
ref_path: Path,
target_path: Path,
*,
prune: bool,
dry_run: bool,
check_only: bool,
backup: bool,
) -> Tuple[MergeStats, bool]:
ref = read_json(ref_path)
target = read_json(target_path)
stats = MergeStats()
merged = deep_merge_and_collect(ref, target, prune_extras=prune, stats=stats)
merged = order_like_reference(ref, merged)
success = not stats.missing_keys and (not prune or not stats.extra_keys)
if not check_only and not dry_run:
if backup:
backup_file(target_path)
write_json(target_path, merged)
return stats, success
def find_all_locale_files(branch_root: Path, ref_path: Path) -> List[Path]:
"""Find all translation.json files under locales/, excluding the reference file."""
locales_dir = branch_root / "frontend" / "public" / "locales"
if not locales_dir.exists():
return []
files = sorted(locales_dir.rglob("translation.json"))
ref_resolved = ref_path.resolve()
return [f for f in files if f.resolve() != ref_resolved]
def main() -> None:
parser = argparse.ArgumentParser(
description="Compare and sync translation JSON files against a reference (with branch support)."
)
parser.add_argument(
"--reference-file",
"--ref",
dest="ref",
required=True,
type=Path,
help="Path to reference JSON file (e.g., frontend/public/locales/en-GB/translation.json)",
)
parser.add_argument(
"--files",
nargs="+",
required=False,
type=Path,
help="List of target JSON files (optional; if omitted, all locales/*/translation.json will be processed)",
)
parser.add_argument(
"--branch",
type=str,
required=False,
help="Branch/checkout root directory used as prefix for --reference-file and --files",
)
parser.add_argument(
"--actor",
type=str,
required=False,
help="Actor from PR (used for CI comment mention).",
)
parser.add_argument(
"--check",
action="store_true",
help="Check mode: do not write files, only print a CI-friendly report.",
)
parser.add_argument(
"--prune",
action="store_true",
help="Remove keys that are not present in the reference.",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Dry run: do not write changes (useful for local testing).",
)
parser.add_argument(
"--no-backup",
dest="backup",
action="store_false",
help="Disable .bak backup when writing in-place.",
)
parser.set_defaults(backup=True)
args = parser.parse_args()
# Sanitize inputs
actor = sanitize_actor(args.actor) if args.actor else "translator"
branch_str = sanitize_branch(args.branch) if args.branch else ""
branch_base: Path | None = Path(branch_str).resolve() if branch_str else Path.cwd()
# Resolve reference path
ref_path = resolve_in_branch(branch_base, args.ref)
assert_within_branch(branch_base, ref_path)
if not ref_path.exists():
raise SystemExit(f"Reference file not found: {ref_path}")
# Auto-detect files if none provided
if not args.files:
args.files = find_all_locale_files(branch_base, ref_path)
if not args.files:
raise SystemExit("No translation.json files found under locales/.")
# CI report
report: list[str] = []
total_added = total_pruned = 0
any_failed = False
report.append(
f"#### 🔄 Reference File: `{args.ref}` (branch root: `{branch_base if branch_base else '.'}`)"
)
report.append("")
for target_rel in args.files:
target_path = resolve_in_branch(branch_base, target_rel)
try:
assert_within_branch(branch_base, target_path)
except ValueError as e:
report.append(f"{e}")
any_failed = True
continue
if not target_path.exists():
report.append(
f"❌ File not found: `{target_rel}` (resolved: `{target_path}`)"
)
any_failed = True
continue
stats, success = process_file(
ref_path,
target_path,
prune=args.prune,
dry_run=args.dry_run,
check_only=args.check,
backup=args.backup,
)
total_added += stats.added
total_pruned += stats.pruned
report.append(f"#### 📄 File: `{target_rel}`")
if success:
report.append("✅ **Passed:** All keys in sync.")
else:
report.append("❌ **Failed:** Differences detected.")
if stats.missing_keys:
report.append(
f"- Missing keys ({len(stats.missing_keys)}): `{', '.join(stats.missing_keys)}`"
)
if stats.extra_keys:
if args.prune:
report.append(
f"- Extra keys removed/flagged ({len(stats.extra_keys)}): `{', '.join(stats.extra_keys)}`"
)
else:
report.append(
f"- Extra keys present ({len(stats.extra_keys)}): `{', '.join(stats.extra_keys)}`"
)
report.append(f"- Added: {stats.added}, Pruned: {stats.pruned}")
report.append("---")
report.append("")
if not success:
any_failed = True
# Summary
report.append("## 🧾 Summary")
report.append(f"- Total added: {total_added}")
report.append(f"- Total pruned: {total_pruned}")
report.append("")
if any_failed:
report.append("## ❌ Overall Status: **Failed**")
report.append(f"@{actor} please check and sync the missing translations.")
else:
report.append("## ✅ Overall Status: **Success**")
report.append(f"Thanks @{actor} for keeping translations in sync! 🎉")
# CI comment output
print("\n".join(report))
if __name__ == "__main__":
main()

292
.github/workflows/check_locales.yml vendored Normal file
View File

@ -0,0 +1,292 @@
name: Check Localization Files on PR
on:
pull_request_target:
types: [opened, synchronize, reopened]
paths:
- "frontend/public/locales/*-*/translation.json"
# cancel in-progress jobs if a new job is triggered
# This is useful to avoid running multiple builds for the same branch if a new commit is pushed
# or a pull request is updated.
# It helps to save resources and time by ensuring that only the latest commit is built and tested
# This is particularly useful for long-running jobs that may take a while to complete.
# The `group` is set to a combination of the workflow name, event name, and branch name.
# This ensures that jobs are grouped by the workflow and branch, allowing for cancellation of
# in-progress jobs when a new commit is pushed to the same branch or a new pull request is opened.
concurrency:
group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event.pull_request.number || github.ref_name || github.ref }}
cancel-in-progress: true
permissions:
contents: read # Allow read access to repository content
jobs:
check-files:
if: github.event_name == 'pull_request_target'
runs-on: ubuntu-latest
permissions:
issues: write # Allow posting comments on issues/PRs
pull-requests: write # Allow writing to pull requests
steps:
- name: Harden Runner
uses: step-security/harden-runner@ec9f2d5744a09debf3a187a3f4f675c53b671911 # v2.13.0
with:
egress-policy: audit
- name: Checkout main branch first
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Setup GitHub App Bot
id: setup-bot
uses: ./.github/actions/setup-bot
with:
app-id: ${{ secrets.GH_APP_ID }}
private-key: ${{ secrets.GH_APP_PRIVATE_KEY }}
- name: Get PR data
id: get-pr-data
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
with:
github-token: ${{ steps.setup-bot.outputs.token }}
script: |
const prNumber = context.payload.pull_request.number;
const repoOwner = context.payload.repository.owner.login;
const repoName = context.payload.repository.name;
const branch = context.payload.pull_request.head.ref;
console.log(`PR Number: ${prNumber}`);
console.log(`Repo Owner: ${repoOwner}`);
console.log(`Repo Name: ${repoName}`);
console.log(`Branch: ${branch}`);
core.setOutput("pr_number", prNumber);
core.setOutput("repo_owner", repoOwner);
core.setOutput("repo_name", repoName);
core.setOutput("branch", branch);
continue-on-error: true
- name: Fetch PR changed files
id: fetch-pr-changes
env:
GH_TOKEN: ${{ steps.setup-bot.outputs.token }}
run: |
echo "Fetching PR changed files..."
echo "Getting list of changed files from PR..."
# Check if PR number exists
if [ -z "${{ steps.get-pr-data.outputs.pr_number }}" ]; then
echo "Error: PR number is empty"
exit 1
fi
# Get changed files and filter for properties files, handle case where no matches are found
gh pr view ${{ steps.get-pr-data.outputs.pr_number }} --json files -q ".files[].path" | grep -E '^frontend/public/locales/[a-zA-Z_]{2}_[a-zA-Z_]{2,7}/translation.json$' > changed_files.txt || echo "No matching properties files found in PR"
# Check if any files were found
if [ ! -s changed_files.txt ]; then
echo "No properties files changed in this PR"
echo "Workflow will exit early as no relevant files to check"
exit 0
fi
echo "Found $(wc -l < changed_files.txt) matching properties files"
- name: Determine reference file test
id: determine-file
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
with:
github-token: ${{ steps.setup-bot.outputs.token }}
script: |
const fs = require("fs");
const path = require("path");
const prNumber = ${{ steps.get-pr-data.outputs.pr_number }};
const repoOwner = "${{ steps.get-pr-data.outputs.repo_owner }}";
const repoName = "${{ steps.get-pr-data.outputs.repo_name }}";
const prRepoOwner = "${{ github.event.pull_request.head.repo.owner.login }}";
const prRepoName = "${{ github.event.pull_request.head.repo.name }}";
const branch = "${{ steps.get-pr-data.outputs.branch }}";
console.log(`Determining reference file for PR #${prNumber}`);
// Validate inputs
const validateInput = (input, regex, name) => {
if (!regex.test(input)) {
throw new Error(`Invalid ${name}: ${input}`);
}
};
validateInput(repoOwner, /^[a-zA-Z0-9_-]+$/, "repository owner");
validateInput(repoName, /^[a-zA-Z0-9._-]+$/, "repository name");
validateInput(branch, /^[a-zA-Z0-9._/-]+$/, "branch name");
// Get the list of changed files in the PR
const { data: files } = await github.rest.pulls.listFiles({
owner: repoOwner,
repo: repoName,
pull_number: prNumber,
});
// Filter for relevant files based on the PR changes
const changedFiles = files
.filter(file =>
file.status !== "removed" &&
/^frontend\/public\/locales\/[a-zA-Z_]{2}_[a-zA-Z_]{2,7}\/translation\.json$/.test(file.filename)
)
.map(file => file.filename);
console.log("Changed files:", changedFiles);
// Create a temporary directory for PR files
const tempDir = "pr-branch";
if (!fs.existsSync(tempDir)) {
fs.mkdirSync(tempDir, { recursive: true });
}
// Download and save each changed file
for (const file of changedFiles) {
const { data: fileContent } = await github.rest.repos.getContent({
owner: prRepoOwner,
repo: prRepoName,
path: file,
ref: branch,
});
const content = Buffer.from(fileContent.content, "base64").toString("utf-8");
const filePath = path.join(tempDir, file);
const dirPath = path.dirname(filePath);
if (!fs.existsSync(dirPath)) {
fs.mkdirSync(dirPath, { recursive: true });
}
fs.writeFileSync(filePath, content);
console.log(`Saved file: ${filePath}`);
}
// Output the list of changed files for further processing
const fileList = changedFiles.join(" ");
core.exportVariable("FILES_LIST", fileList);
console.log("Files saved and listed in FILES_LIST.");
// Determine reference file
let referenceFilePath;
if (changedFiles.includes("frontend/public/locales/en-GB/translation.json")) {
console.log("Using PR branch reference file.");
const { data: fileContent } = await github.rest.repos.getContent({
owner: prRepoOwner,
repo: prRepoName,
path: "frontend/public/locales/en-GB/translation.json",
ref: branch,
});
referenceFilePath = "pr-branch-messages_en_GB.properties";
const content = Buffer.from(fileContent.content, "base64").toString("utf-8");
fs.writeFileSync(referenceFilePath, content);
} else {
console.log("Using main branch reference file.");
const { data: fileContent } = await github.rest.repos.getContent({
owner: repoOwner,
repo: repoName,
path: "frontend/public/locales/en-GB/translation.json",
ref: "main",
});
referenceFilePath = "main-branch-messages_en_GB.properties";
const content = Buffer.from(fileContent.content, "base64").toString("utf-8");
fs.writeFileSync(referenceFilePath, content);
}
console.log(`Reference file path: ${referenceFilePath}`);
core.exportVariable("REFERENCE_FILE", referenceFilePath);
- name: Run Python script to check files
id: run-check
run: |
echo "Running Python script to check files..."
python .github/scripts/sync_translations.py \
--actor ${{ github.event.pull_request.user.login }} \
--reference-file "${REFERENCE_FILE}" \
--branch "pr-branch" \
--files "${FILES_LIST[@]}" > result.txt
continue-on-error: true # Continue the job even if this step fails
- name: Capture output
id: capture-output
run: |
if [ -f result.txt ] && [ -s result.txt ]; then
echo "Test, capturing output..."
SCRIPT_OUTPUT=$(cat result.txt)
echo "SCRIPT_OUTPUT<<EOF" >> $GITHUB_ENV
echo "$SCRIPT_OUTPUT" >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV
echo "${SCRIPT_OUTPUT}"
# Determine job failure based on script output
if [[ "$SCRIPT_OUTPUT" == *"❌"* ]]; then
echo "FAIL_JOB=true" >> $GITHUB_ENV
else
echo "FAIL_JOB=false" >> $GITHUB_ENV
fi
else
echo "No update found."
echo "SCRIPT_OUTPUT=" >> $GITHUB_ENV
echo "FAIL_JOB=false" >> $GITHUB_ENV
fi
- name: Post comment on PR
if: env.SCRIPT_OUTPUT != ''
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
with:
github-token: ${{ steps.setup-bot.outputs.token }}
script: |
const { GITHUB_REPOSITORY, SCRIPT_OUTPUT } = process.env;
const [repoOwner, repoName] = GITHUB_REPOSITORY.split('/');
const issueNumber = context.issue.number;
// Find existing comment
const comments = await github.rest.issues.listComments({
owner: repoOwner,
repo: repoName,
issue_number: issueNumber
});
const comment = comments.data.find(c => c.body.includes("## 🚀 Translation Verification Summary"));
// Only update or create comments by the action user
const expectedActor = "${{ steps.setup-bot.outputs.app-slug }}[bot]";
if (comment && comment.user.login === expectedActor) {
// Update existing comment
await github.rest.issues.updateComment({
owner: repoOwner,
repo: repoName,
comment_id: comment.id,
body: `## 🚀 Translation Verification Summary\n\n\n${SCRIPT_OUTPUT}\n`
});
console.log("Updated existing comment.");
} else if (!comment) {
// Create new comment if no existing comment is found
await github.rest.issues.createComment({
owner: repoOwner,
repo: repoName,
issue_number: issueNumber,
body: `## 🚀 Translation Verification Summary\n\n\n${SCRIPT_OUTPUT}\n`
});
console.log("Created new comment.");
} else {
console.log("Comment update attempt denied. Actor does not match.");
}
- name: Fail job if errors found
if: env.FAIL_JOB == 'true'
run: |
echo "Failing the job because errors were detected."
exit 1
- name: Cleanup temporary files
if: always()
run: |
echo "Cleaning up temporary files..."
rm -rf pr-branch
rm -f pr-branch-messages_en_GB.properties main-branch-messages_en_GB.properties changed_files.txt result.txt
echo "Cleanup complete."
continue-on-error: true # Ensure cleanup runs even if previous steps fail