Stirling-PDF/scripts/translations/toml_validator.py

#!/usr/bin/env python3
"""
TOML Validator for Translation Files

Validates TOML syntax in translation files and reports detailed error information.
Useful for validating translation files before merging.

Usage:
    python3 toml_validator.py <file_or_pattern>
    python3 toml_validator.py ar_AR_batch_*.toml
    python3 toml_validator.py ar_AR_batch_1_of_3.toml
    python3 toml_validator.py --all-batches ar_AR
"""

import sys
import argparse
import glob

import tomllib


def get_line_context(file_path, line_num, context_lines=3):
    """Get lines around the error for context"""
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            lines = f.readlines()

        start = max(0, line_num - context_lines - 1)
        end = min(len(lines), line_num + context_lines)

        context = []
        for i in range(start, end):
            marker = ">>> " if i == line_num - 1 else "    "
            context.append(f"{marker}{i + 1:4d}: {lines[i].rstrip()}")

        return "\n".join(context)
    except Exception as e:
        return f"Could not read context: {e}"


def get_character_context(file_path, char_pos, context_chars=100):
    """Get characters around the error position"""
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            content = f.read()

        start = max(0, char_pos - context_chars)
        end = min(len(content), char_pos + context_chars)

        before = content[start:char_pos]
        error_char = content[char_pos] if char_pos < len(content) else "EOF"
        after = content[char_pos + 1 : end]

        return {
            "before": before,
            "error_char": error_char,
            "after": after,
            "display": f"{before}[{error_char}]{after}",
        }
    except Exception:
        return None


def count_keys(data, prefix=""):
    """Recursively count all keys in nested TOML structure"""
    count = 0
    if isinstance(data, dict):
        for key, value in data.items():
            if isinstance(value, dict):
                count += count_keys(value, f"{prefix}.{key}" if prefix else key)
            else:
                count += 1
    return count


def validate_toml_file(file_path):
    """Validate a single TOML file and return detailed error info"""
    result = {
        "file": str(file_path),
        "valid": False,
        "error": None,
        "line": None,
        "context": None,
        "entry_count": 0,
    }

    try:
        with open(file_path, "rb") as f:
            data = tomllib.load(f)

        result["valid"] = True
        result["entry_count"] = count_keys(data)

    except Exception as e:
        error_msg = str(e)
        result["error"] = error_msg

        # Try to extract line number from error message
        import re

        line_match = re.search(r"line (\d+)", error_msg, re.IGNORECASE)
        if line_match:
            line_num = int(line_match.group(1))
            result["line"] = line_num
            result["context"] = get_line_context(file_path, line_num)

    except FileNotFoundError:
        result["error"] = "File not found"

    return result


def print_validation_result(result, brief=False, quiet=False):
    """Print validation result in human-readable format"""
    if result["valid"]:
        if not quiet:
            print(f"✓ {result['file']}")
            if not brief:
                print(f"  Valid TOML with {result['entry_count']} entries")
    else:
        print(f"✗ {result['file']}")
        print(f"  Error: {result['error']}")

        if result["line"]:
            print(f"  Line: {result['line']}")

        if result["context"] and not brief:
            print("\n  Context:")
            print(f"  {result['context'].replace(chr(10), chr(10) + '  ')}")

        if not brief:
            print("\n  Common fixes:")
            print("  - Check for missing quotes around keys or values")
            print("  - Ensure proper escaping of special characters")
            print("  - Verify table header syntax: [section.subsection]")
            print("  - Check for duplicate keys in the same table")


def main():
    parser = argparse.ArgumentParser(description="Validate TOML translation files")
    parser.add_argument("files", nargs="*", help="TOML file(s) or pattern to validate")
    parser.add_argument(
        "--all-batches",
        metavar="LANG",
        help="Validate all batch files for a language (e.g., ar_AR)",
    )
    parser.add_argument(
        "--brief", action="store_true", help="Show brief output without context"
    )
    parser.add_argument(
        "--quiet", action="store_true", help="Only show files with errors"
    )

    args = parser.parse_args()

    # Collect files to validate
    files_to_validate = []

    if args.all_batches:
        # Find all batch files for the specified language
        pattern = f"{args.all_batches}_batch_*.toml"
        files_to_validate = glob.glob(pattern)
        if not files_to_validate:
            print(f"No batch files found matching pattern: {pattern}")
            sys.exit(1)
    elif args.files:
        for file_pattern in args.files:
            matched_files = glob.glob(file_pattern)
            if matched_files:
                files_to_validate.extend(matched_files)
            else:
                # Try as literal filename
                files_to_validate.append(file_pattern)
    else:
        parser.print_help()
        sys.exit(1)

    # Validate all files
    results = []
    for file_path in files_to_validate:
        result = validate_toml_file(file_path)
        results.append(result)
        print_validation_result(result, brief=args.brief, quiet=args.quiet)
        if not args.brief and not args.quiet:
            print()  # Empty line between files

    # Summary
    total = len(results)
    valid = sum(1 for r in results if r["valid"])
    invalid = total - valid

    if not args.quiet:
        print(f"\n{'=' * 60}")
        print(f"Summary: {valid}/{total} files valid")
        if invalid > 0:
            print(f"  {invalid} file(s) with errors")

    # Exit with error code if any files invalid
    sys.exit(0 if invalid == 0 else 1)


if __name__ == "__main__":
    main()