#!/usr/bin/env python3 """ TOML Validator for Translation Files Validates TOML syntax in translation files and reports detailed error information. Useful for validating translation files before merging. Usage: python3 toml_validator.py python3 toml_validator.py ar_AR_batch_*.toml python3 toml_validator.py ar_AR_batch_1_of_3.toml python3 toml_validator.py --all-batches ar_AR """ import sys import argparse import glob import tomllib def get_line_context(file_path, line_num, context_lines=3): """Get lines around the error for context""" try: with open(file_path, "r", encoding="utf-8") as f: lines = f.readlines() start = max(0, line_num - context_lines - 1) end = min(len(lines), line_num + context_lines) context = [] for i in range(start, end): marker = ">>> " if i == line_num - 1 else " " context.append(f"{marker}{i + 1:4d}: {lines[i].rstrip()}") return "\n".join(context) except Exception as e: return f"Could not read context: {e}" def get_character_context(file_path, char_pos, context_chars=100): """Get characters around the error position""" try: with open(file_path, "r", encoding="utf-8") as f: content = f.read() start = max(0, char_pos - context_chars) end = min(len(content), char_pos + context_chars) before = content[start:char_pos] error_char = content[char_pos] if char_pos < len(content) else "EOF" after = content[char_pos + 1 : end] return { "before": before, "error_char": error_char, "after": after, "display": f"{before}[{error_char}]{after}", } except Exception: return None def count_keys(data, prefix=""): """Recursively count all keys in nested TOML structure""" count = 0 if isinstance(data, dict): for key, value in data.items(): if isinstance(value, dict): count += count_keys(value, f"{prefix}.{key}" if prefix else key) else: count += 1 return count def validate_toml_file(file_path): """Validate a single TOML file and return detailed error info""" result = { "file": str(file_path), "valid": False, "error": None, "line": None, "context": None, "entry_count": 0, } try: with open(file_path, "rb") as f: data = tomllib.load(f) result["valid"] = True result["entry_count"] = count_keys(data) except Exception as e: error_msg = str(e) result["error"] = error_msg # Try to extract line number from error message import re line_match = re.search(r"line (\d+)", error_msg, re.IGNORECASE) if line_match: line_num = int(line_match.group(1)) result["line"] = line_num result["context"] = get_line_context(file_path, line_num) except FileNotFoundError: result["error"] = "File not found" return result def print_validation_result(result, brief=False, quiet=False): """Print validation result in human-readable format""" if result["valid"]: if not quiet: print(f"✓ {result['file']}") if not brief: print(f" Valid TOML with {result['entry_count']} entries") else: print(f"✗ {result['file']}") print(f" Error: {result['error']}") if result["line"]: print(f" Line: {result['line']}") if result["context"] and not brief: print("\n Context:") print(f" {result['context'].replace(chr(10), chr(10) + ' ')}") if not brief: print("\n Common fixes:") print(" - Check for missing quotes around keys or values") print(" - Ensure proper escaping of special characters") print(" - Verify table header syntax: [section.subsection]") print(" - Check for duplicate keys in the same table") def main(): parser = argparse.ArgumentParser(description="Validate TOML translation files") parser.add_argument("files", nargs="*", help="TOML file(s) or pattern to validate") parser.add_argument( "--all-batches", metavar="LANG", help="Validate all batch files for a language (e.g., ar_AR)", ) parser.add_argument( "--brief", action="store_true", help="Show brief output without context" ) parser.add_argument( "--quiet", action="store_true", help="Only show files with errors" ) args = parser.parse_args() # Collect files to validate files_to_validate = [] if args.all_batches: # Find all batch files for the specified language pattern = f"{args.all_batches}_batch_*.toml" files_to_validate = glob.glob(pattern) if not files_to_validate: print(f"No batch files found matching pattern: {pattern}") sys.exit(1) elif args.files: for file_pattern in args.files: matched_files = glob.glob(file_pattern) if matched_files: files_to_validate.extend(matched_files) else: # Try as literal filename files_to_validate.append(file_pattern) else: parser.print_help() sys.exit(1) # Validate all files results = [] for file_path in files_to_validate: result = validate_toml_file(file_path) results.append(result) print_validation_result(result, brief=args.brief, quiet=args.quiet) if not args.brief and not args.quiet: print() # Empty line between files # Summary total = len(results) valid = sum(1 for r in results if r["valid"]) invalid = total - valid if not args.quiet: print(f"\n{'=' * 60}") print(f"Summary: {valid}/{total} files valid") if invalid > 0: print(f" {invalid} file(s) with errors") # Exit with error code if any files invalid sys.exit(0 if invalid == 0 else 1) if __name__ == "__main__": main()