diff --git a/.github/scripts/sync_translations.py b/.github/scripts/sync_translations.py new file mode 100644 index 000000000..d93aa5a02 --- /dev/null +++ b/.github/scripts/sync_translations.py @@ -0,0 +1,342 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +Author: Ludy87 +Description: This script processes JSON translation files for localization checks and synchronization. +It compares target translation files in a branch with a reference JSON file to ensure consistency. +The script performs two main tasks: +1. CI check: Verifies that all keys present in the reference exist in the target (recursively). Optionally flags extras. +2. Sync/update: Updates the target files to match the reference by adding missing keys (and optionally pruning extras). + +The script also provides functionality to print a CI-friendly report (to be used as PR comment output). + +Usage: + python sync_translations.py --reference-file [--branch ] [--actor ] [--files ] [--check] [--prune] [--dry-run] + +# Sample for Windows: +# python .github\\scripts\\sync_translations.py --reference-file frontend\\public\\locales\\en-GB\\translation.json --branch "" --files frontend\\public\\locales\\de-DE\\translation.json --check --actor Ludy87 +""" + +from __future__ import annotations +import argparse +import json +import os +import re +import shutil +from pathlib import Path +from copy import deepcopy +from dataclasses import dataclass +from typing import Any, Dict, Tuple, List + +JsonDict = Dict[str, Any] + + +@dataclass +class MergeStats: + added: int = 0 + pruned: int = 0 + missing_keys: list[str] | None = None + extra_keys: list[str] | None = None + + def __post_init__(self): + self.missing_keys = [] + self.extra_keys = [] + + +def is_mapping(v: Any) -> bool: + return isinstance(v, dict) + + +def deep_merge_and_collect( + ref: Any, target: Any, *, prune_extras: bool, path: str = "", stats: MergeStats +) -> Any: + """Recursively ensure `target` contains at least the structure/keys of `ref`.""" + if is_mapping(ref) and is_mapping(target): + merged: JsonDict = {} + + # Merge reference keys + for k, ref_val in ref.items(): + new_path = f"{path}.{k}" if path else k + if k in target: + merged[k] = deep_merge_and_collect( + ref_val, + target[k], + prune_extras=prune_extras, + path=new_path, + stats=stats, + ) + else: + merged[k] = deepcopy(ref_val) + stats.added += 1 + stats.missing_keys.append(new_path) + + # Handle extras + if prune_extras: + for k in target.keys(): + if k not in ref: + stats.pruned += 1 + stats.extra_keys.append(f"{path}.{k}" if path else k) + else: + for k, v in target.items(): + if k not in ref: + merged[k] = deepcopy(v) + stats.extra_keys.append(f"{path}.{k}" if path else k) + + return merged + + # Non-dict values โ†’ keep target if it exists + return deepcopy(target if target is not None else ref) + + +def order_like_reference(ref: Any, obj: Any) -> Any: + """Reorder dict keys in `obj` to follow the order in `ref` recursively.""" + if not (is_mapping(ref) and is_mapping(obj)): + return obj + ordered = {} + for k in ref: + if k in obj: + ordered[k] = order_like_reference(ref[k], obj[k]) + for k in obj: + if k not in ref: + ordered[k] = order_like_reference(None, obj[k]) + return ordered + + +def read_json(path: Path) -> Any: + with path.open("r", encoding="utf-8") as f: + return json.load(f) + + +def write_json(path: Path, data: Any) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=2) + f.write("\n") + + +def backup_file(path: Path) -> None: + backup = path.with_suffix(path.suffix + ".bak") + shutil.copy2(path, backup) + + +def sanitize_actor(s: str | None) -> str | None: + if s is None: + return None + return re.sub(r"[^a-zA-Z0-9_\-]", "", s) + + +def sanitize_branch(s: str | None) -> str | None: + if s is None: + return None + return re.sub(r"[^a-zA-Z0-9_\-\/\.]", "", s) + + +def resolve_in_branch(branch: Path | None, p: Path) -> Path: + if p.is_absolute() or branch is None or str(branch) == "": + return p + return (branch / p).resolve() + + +def assert_within_branch(base: Path | None, target: Path) -> None: + if base is None or str(base) == "": + return + base_resolved = base.resolve() + target_resolved = target.resolve() + if os.name == "nt": + if not str(target_resolved).lower().startswith(str(base_resolved).lower()): + raise ValueError(f"Unsafe path outside branch: {target}") + else: + if not str(target_resolved).startswith(str(base_resolved)): + raise ValueError(f"Unsafe path outside branch: {target}") + + +def process_file( + ref_path: Path, + target_path: Path, + *, + prune: bool, + dry_run: bool, + check_only: bool, + backup: bool, +) -> Tuple[MergeStats, bool]: + ref = read_json(ref_path) + target = read_json(target_path) + + stats = MergeStats() + merged = deep_merge_and_collect(ref, target, prune_extras=prune, stats=stats) + merged = order_like_reference(ref, merged) + + success = not stats.missing_keys and (not prune or not stats.extra_keys) + + if not check_only and not dry_run: + if backup: + backup_file(target_path) + write_json(target_path, merged) + + return stats, success + + +def find_all_locale_files(branch_root: Path, ref_path: Path) -> List[Path]: + """Find all translation.json files under locales/, excluding the reference file.""" + locales_dir = branch_root / "frontend" / "public" / "locales" + if not locales_dir.exists(): + return [] + files = sorted(locales_dir.rglob("translation.json")) + ref_resolved = ref_path.resolve() + return [f for f in files if f.resolve() != ref_resolved] + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Compare and sync translation JSON files against a reference (with branch support)." + ) + parser.add_argument( + "--reference-file", + "--ref", + dest="ref", + required=True, + type=Path, + help="Path to reference JSON file (e.g., frontend/public/locales/en-GB/translation.json)", + ) + parser.add_argument( + "--files", + nargs="+", + required=False, + type=Path, + help="List of target JSON files (optional; if omitted, all locales/*/translation.json will be processed)", + ) + parser.add_argument( + "--branch", + type=str, + required=False, + help="Branch/checkout root directory used as prefix for --reference-file and --files", + ) + parser.add_argument( + "--actor", + type=str, + required=False, + help="Actor from PR (used for CI comment mention).", + ) + parser.add_argument( + "--check", + action="store_true", + help="Check mode: do not write files, only print a CI-friendly report.", + ) + parser.add_argument( + "--prune", + action="store_true", + help="Remove keys that are not present in the reference.", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Dry run: do not write changes (useful for local testing).", + ) + parser.add_argument( + "--no-backup", + dest="backup", + action="store_false", + help="Disable .bak backup when writing in-place.", + ) + parser.set_defaults(backup=True) + args = parser.parse_args() + + # Sanitize inputs + actor = sanitize_actor(args.actor) if args.actor else "translator" + branch_str = sanitize_branch(args.branch) if args.branch else "" + branch_base: Path | None = Path(branch_str).resolve() if branch_str else Path.cwd() + + # Resolve reference path + ref_path = resolve_in_branch(branch_base, args.ref) + assert_within_branch(branch_base, ref_path) + if not ref_path.exists(): + raise SystemExit(f"Reference file not found: {ref_path}") + + # Auto-detect files if none provided + if not args.files: + args.files = find_all_locale_files(branch_base, ref_path) + if not args.files: + raise SystemExit("No translation.json files found under locales/.") + + # CI report + report: list[str] = [] + total_added = total_pruned = 0 + any_failed = False + + report.append( + f"#### ๐Ÿ”„ Reference File: `{args.ref}` (branch root: `{branch_base if branch_base else '.'}`)" + ) + report.append("") + + for target_rel in args.files: + target_path = resolve_in_branch(branch_base, target_rel) + try: + assert_within_branch(branch_base, target_path) + except ValueError as e: + report.append(f"โŒ {e}") + any_failed = True + continue + + if not target_path.exists(): + report.append( + f"โŒ File not found: `{target_rel}` (resolved: `{target_path}`)" + ) + any_failed = True + continue + + stats, success = process_file( + ref_path, + target_path, + prune=args.prune, + dry_run=args.dry_run, + check_only=args.check, + backup=args.backup, + ) + + total_added += stats.added + total_pruned += stats.pruned + + report.append(f"#### ๐Ÿ“„ File: `{target_rel}`") + if success: + report.append("โœ… **Passed:** All keys in sync.") + else: + report.append("โŒ **Failed:** Differences detected.") + if stats.missing_keys: + report.append( + f"- Missing keys ({len(stats.missing_keys)}): `{', '.join(stats.missing_keys)}`" + ) + if stats.extra_keys: + if args.prune: + report.append( + f"- Extra keys removed/flagged ({len(stats.extra_keys)}): `{', '.join(stats.extra_keys)}`" + ) + else: + report.append( + f"- Extra keys present ({len(stats.extra_keys)}): `{', '.join(stats.extra_keys)}`" + ) + + report.append(f"- Added: {stats.added}, Pruned: {stats.pruned}") + report.append("---") + report.append("") + if not success: + any_failed = True + + # Summary + report.append("## ๐Ÿงพ Summary") + report.append(f"- Total added: {total_added}") + report.append(f"- Total pruned: {total_pruned}") + report.append("") + if any_failed: + report.append("## โŒ Overall Status: **Failed**") + report.append(f"@{actor} please check and sync the missing translations.") + else: + report.append("## โœ… Overall Status: **Success**") + report.append(f"Thanks @{actor} for keeping translations in sync! ๐ŸŽ‰") + + # CI comment output + print("\n".join(report)) + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/check_locales.yml b/.github/workflows/check_locales.yml new file mode 100644 index 000000000..dd98def8a --- /dev/null +++ b/.github/workflows/check_locales.yml @@ -0,0 +1,292 @@ +name: Check Localization Files on PR + +on: + pull_request_target: + types: [opened, synchronize, reopened] + paths: + - "frontend/public/locales/*-*/translation.json" + +# cancel in-progress jobs if a new job is triggered +# This is useful to avoid running multiple builds for the same branch if a new commit is pushed +# or a pull request is updated. +# It helps to save resources and time by ensuring that only the latest commit is built and tested +# This is particularly useful for long-running jobs that may take a while to complete. +# The `group` is set to a combination of the workflow name, event name, and branch name. +# This ensures that jobs are grouped by the workflow and branch, allowing for cancellation of +# in-progress jobs when a new commit is pushed to the same branch or a new pull request is opened. +concurrency: + group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event.pull_request.number || github.ref_name || github.ref }} + cancel-in-progress: true + +permissions: + contents: read # Allow read access to repository content + +jobs: + check-files: + if: github.event_name == 'pull_request_target' + runs-on: ubuntu-latest + permissions: + issues: write # Allow posting comments on issues/PRs + pull-requests: write # Allow writing to pull requests + steps: + - name: Harden Runner + uses: step-security/harden-runner@ec9f2d5744a09debf3a187a3f4f675c53b671911 # v2.13.0 + with: + egress-policy: audit + + - name: Checkout main branch first + uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 + + - name: Setup GitHub App Bot + id: setup-bot + uses: ./.github/actions/setup-bot + with: + app-id: ${{ secrets.GH_APP_ID }} + private-key: ${{ secrets.GH_APP_PRIVATE_KEY }} + + - name: Get PR data + id: get-pr-data + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + with: + github-token: ${{ steps.setup-bot.outputs.token }} + script: | + const prNumber = context.payload.pull_request.number; + const repoOwner = context.payload.repository.owner.login; + const repoName = context.payload.repository.name; + const branch = context.payload.pull_request.head.ref; + + console.log(`PR Number: ${prNumber}`); + console.log(`Repo Owner: ${repoOwner}`); + console.log(`Repo Name: ${repoName}`); + console.log(`Branch: ${branch}`); + + core.setOutput("pr_number", prNumber); + core.setOutput("repo_owner", repoOwner); + core.setOutput("repo_name", repoName); + core.setOutput("branch", branch); + continue-on-error: true + + - name: Fetch PR changed files + id: fetch-pr-changes + env: + GH_TOKEN: ${{ steps.setup-bot.outputs.token }} + run: | + echo "Fetching PR changed files..." + echo "Getting list of changed files from PR..." + # Check if PR number exists + if [ -z "${{ steps.get-pr-data.outputs.pr_number }}" ]; then + echo "Error: PR number is empty" + exit 1 + fi + # Get changed files and filter for properties files, handle case where no matches are found + gh pr view ${{ steps.get-pr-data.outputs.pr_number }} --json files -q ".files[].path" | grep -E '^frontend/public/locales/[a-zA-Z_]{2}_[a-zA-Z_]{2,7}/translation.json$' > changed_files.txt || echo "No matching properties files found in PR" + # Check if any files were found + if [ ! -s changed_files.txt ]; then + echo "No properties files changed in this PR" + echo "Workflow will exit early as no relevant files to check" + exit 0 + fi + echo "Found $(wc -l < changed_files.txt) matching properties files" + + - name: Determine reference file test + id: determine-file + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + with: + github-token: ${{ steps.setup-bot.outputs.token }} + script: | + const fs = require("fs"); + const path = require("path"); + + const prNumber = ${{ steps.get-pr-data.outputs.pr_number }}; + const repoOwner = "${{ steps.get-pr-data.outputs.repo_owner }}"; + const repoName = "${{ steps.get-pr-data.outputs.repo_name }}"; + + const prRepoOwner = "${{ github.event.pull_request.head.repo.owner.login }}"; + const prRepoName = "${{ github.event.pull_request.head.repo.name }}"; + const branch = "${{ steps.get-pr-data.outputs.branch }}"; + + console.log(`Determining reference file for PR #${prNumber}`); + + // Validate inputs + const validateInput = (input, regex, name) => { + if (!regex.test(input)) { + throw new Error(`Invalid ${name}: ${input}`); + } + }; + + validateInput(repoOwner, /^[a-zA-Z0-9_-]+$/, "repository owner"); + validateInput(repoName, /^[a-zA-Z0-9._-]+$/, "repository name"); + validateInput(branch, /^[a-zA-Z0-9._/-]+$/, "branch name"); + + // Get the list of changed files in the PR + const { data: files } = await github.rest.pulls.listFiles({ + owner: repoOwner, + repo: repoName, + pull_number: prNumber, + }); + + // Filter for relevant files based on the PR changes + const changedFiles = files + .filter(file => + file.status !== "removed" && + /^frontend\/public\/locales\/[a-zA-Z_]{2}_[a-zA-Z_]{2,7}\/translation\.json$/.test(file.filename) + ) + .map(file => file.filename); + + console.log("Changed files:", changedFiles); + + // Create a temporary directory for PR files + const tempDir = "pr-branch"; + if (!fs.existsSync(tempDir)) { + fs.mkdirSync(tempDir, { recursive: true }); + } + + // Download and save each changed file + for (const file of changedFiles) { + const { data: fileContent } = await github.rest.repos.getContent({ + owner: prRepoOwner, + repo: prRepoName, + path: file, + ref: branch, + }); + + const content = Buffer.from(fileContent.content, "base64").toString("utf-8"); + const filePath = path.join(tempDir, file); + const dirPath = path.dirname(filePath); + + if (!fs.existsSync(dirPath)) { + fs.mkdirSync(dirPath, { recursive: true }); + } + + fs.writeFileSync(filePath, content); + console.log(`Saved file: ${filePath}`); + } + + // Output the list of changed files for further processing + const fileList = changedFiles.join(" "); + core.exportVariable("FILES_LIST", fileList); + console.log("Files saved and listed in FILES_LIST."); + + // Determine reference file + let referenceFilePath; + if (changedFiles.includes("frontend/public/locales/en-GB/translation.json")) { + console.log("Using PR branch reference file."); + const { data: fileContent } = await github.rest.repos.getContent({ + owner: prRepoOwner, + repo: prRepoName, + path: "frontend/public/locales/en-GB/translation.json", + ref: branch, + }); + + referenceFilePath = "pr-branch-messages_en_GB.properties"; + const content = Buffer.from(fileContent.content, "base64").toString("utf-8"); + fs.writeFileSync(referenceFilePath, content); + } else { + console.log("Using main branch reference file."); + const { data: fileContent } = await github.rest.repos.getContent({ + owner: repoOwner, + repo: repoName, + path: "frontend/public/locales/en-GB/translation.json", + ref: "main", + }); + + referenceFilePath = "main-branch-messages_en_GB.properties"; + const content = Buffer.from(fileContent.content, "base64").toString("utf-8"); + fs.writeFileSync(referenceFilePath, content); + } + + console.log(`Reference file path: ${referenceFilePath}`); + core.exportVariable("REFERENCE_FILE", referenceFilePath); + + - name: Run Python script to check files + id: run-check + run: | + echo "Running Python script to check files..." + python .github/scripts/sync_translations.py \ + --actor ${{ github.event.pull_request.user.login }} \ + --reference-file "${REFERENCE_FILE}" \ + --branch "pr-branch" \ + --files "${FILES_LIST[@]}" > result.txt + continue-on-error: true # Continue the job even if this step fails + + - name: Capture output + id: capture-output + run: | + if [ -f result.txt ] && [ -s result.txt ]; then + echo "Test, capturing output..." + SCRIPT_OUTPUT=$(cat result.txt) + echo "SCRIPT_OUTPUT<> $GITHUB_ENV + echo "$SCRIPT_OUTPUT" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + echo "${SCRIPT_OUTPUT}" + + # Determine job failure based on script output + if [[ "$SCRIPT_OUTPUT" == *"โŒ"* ]]; then + echo "FAIL_JOB=true" >> $GITHUB_ENV + else + echo "FAIL_JOB=false" >> $GITHUB_ENV + fi + else + echo "No update found." + echo "SCRIPT_OUTPUT=" >> $GITHUB_ENV + echo "FAIL_JOB=false" >> $GITHUB_ENV + fi + + - name: Post comment on PR + if: env.SCRIPT_OUTPUT != '' + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + with: + github-token: ${{ steps.setup-bot.outputs.token }} + script: | + const { GITHUB_REPOSITORY, SCRIPT_OUTPUT } = process.env; + const [repoOwner, repoName] = GITHUB_REPOSITORY.split('/'); + const issueNumber = context.issue.number; + + // Find existing comment + const comments = await github.rest.issues.listComments({ + owner: repoOwner, + repo: repoName, + issue_number: issueNumber + }); + + const comment = comments.data.find(c => c.body.includes("## ๐Ÿš€ Translation Verification Summary")); + + // Only update or create comments by the action user + const expectedActor = "${{ steps.setup-bot.outputs.app-slug }}[bot]"; + + if (comment && comment.user.login === expectedActor) { + // Update existing comment + await github.rest.issues.updateComment({ + owner: repoOwner, + repo: repoName, + comment_id: comment.id, + body: `## ๐Ÿš€ Translation Verification Summary\n\n\n${SCRIPT_OUTPUT}\n` + }); + console.log("Updated existing comment."); + } else if (!comment) { + // Create new comment if no existing comment is found + await github.rest.issues.createComment({ + owner: repoOwner, + repo: repoName, + issue_number: issueNumber, + body: `## ๐Ÿš€ Translation Verification Summary\n\n\n${SCRIPT_OUTPUT}\n` + }); + console.log("Created new comment."); + } else { + console.log("Comment update attempt denied. Actor does not match."); + } + + - name: Fail job if errors found + if: env.FAIL_JOB == 'true' + run: | + echo "Failing the job because errors were detected." + exit 1 + + - name: Cleanup temporary files + if: always() + run: | + echo "Cleaning up temporary files..." + rm -rf pr-branch + rm -f pr-branch-messages_en_GB.properties main-branch-messages_en_GB.properties changed_files.txt result.txt + echo "Cleanup complete." + continue-on-error: true # Ensure cleanup runs even if previous steps fail