Stirling-PDF/scripts/translations/toml_validator.py
Anthony Stirling 65a3eeca76
Toml (#5115)
# Description of Changes

<!--
Please provide a summary of the changes, including:

- What was changed
- Why the change was made
- Any challenges encountered

Closes #(issue_number)
-->

---

## Checklist

### General

- [ ] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [ ] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [ ] I have performed a self-review of my own code
- [ ] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### Translations (if applicable)

- [ ] I ran
[`scripts/counter_translation.py`](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/docs/counter_translation.md)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.
2025-12-03 09:57:00 +00:00

199 lines
6.1 KiB
Python

#!/usr/bin/env python3
"""
TOML Validator for Translation Files
Validates TOML syntax in translation files and reports detailed error information.
Useful for validating translation files before merging.
Usage:
python3 toml_validator.py <file_or_pattern>
python3 toml_validator.py ar_AR_batch_*.toml
python3 toml_validator.py ar_AR_batch_1_of_3.toml
python3 toml_validator.py --all-batches ar_AR
"""
import sys
import argparse
import glob
from pathlib import Path
import tomllib
def get_line_context(file_path, line_num, context_lines=3):
"""Get lines around the error for context"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
start = max(0, line_num - context_lines - 1)
end = min(len(lines), line_num + context_lines)
context = []
for i in range(start, end):
marker = ">>> " if i == line_num - 1 else " "
context.append(f"{marker}{i+1:4d}: {lines[i].rstrip()}")
return "\n".join(context)
except Exception as e:
return f"Could not read context: {e}"
def get_character_context(file_path, char_pos, context_chars=100):
"""Get characters around the error position"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
start = max(0, char_pos - context_chars)
end = min(len(content), char_pos + context_chars)
before = content[start:char_pos]
error_char = content[char_pos] if char_pos < len(content) else "EOF"
after = content[char_pos+1:end]
return {
'before': before,
'error_char': error_char,
'after': after,
'display': f"{before}[{error_char}]{after}"
}
except Exception as e:
return None
def count_keys(data, prefix=''):
"""Recursively count all keys in nested TOML structure"""
count = 0
if isinstance(data, dict):
for key, value in data.items():
if isinstance(value, dict):
count += count_keys(value, f"{prefix}.{key}" if prefix else key)
else:
count += 1
return count
def validate_toml_file(file_path):
"""Validate a single TOML file and return detailed error info"""
result = {
'file': str(file_path),
'valid': False,
'error': None,
'line': None,
'context': None,
'entry_count': 0
}
try:
with open(file_path, 'rb') as f:
data = tomllib.load(f)
result['valid'] = True
result['entry_count'] = count_keys(data)
except Exception as e:
error_msg = str(e)
result['error'] = error_msg
# Try to extract line number from error message
import re
line_match = re.search(r'line (\d+)', error_msg, re.IGNORECASE)
if line_match:
line_num = int(line_match.group(1))
result['line'] = line_num
result['context'] = get_line_context(file_path, line_num)
except FileNotFoundError:
result['error'] = "File not found"
return result
def print_validation_result(result, brief=False, quiet=False):
"""Print validation result in human-readable format"""
if result['valid']:
if not quiet:
print(f"{result['file']}")
if not brief:
print(f" Valid TOML with {result['entry_count']} entries")
else:
print(f"{result['file']}")
print(f" Error: {result['error']}")
if result['line']:
print(f" Line: {result['line']}")
if result['context'] and not brief:
print(f"\n Context:")
print(f" {result['context'].replace(chr(10), chr(10) + ' ')}")
if not brief:
print(f"\n Common fixes:")
print(f" - Check for missing quotes around keys or values")
print(f" - Ensure proper escaping of special characters")
print(f" - Verify table header syntax: [section.subsection]")
print(f" - Check for duplicate keys in the same table")
def main():
parser = argparse.ArgumentParser(description='Validate TOML translation files')
parser.add_argument('files', nargs='*', help='TOML file(s) or pattern to validate')
parser.add_argument('--all-batches', metavar='LANG',
help='Validate all batch files for a language (e.g., ar_AR)')
parser.add_argument('--brief', action='store_true',
help='Show brief output without context')
parser.add_argument('--quiet', action='store_true',
help='Only show files with errors')
args = parser.parse_args()
# Collect files to validate
files_to_validate = []
if args.all_batches:
# Find all batch files for the specified language
pattern = f"{args.all_batches}_batch_*.toml"
files_to_validate = glob.glob(pattern)
if not files_to_validate:
print(f"No batch files found matching pattern: {pattern}")
sys.exit(1)
elif args.files:
for file_pattern in args.files:
matched_files = glob.glob(file_pattern)
if matched_files:
files_to_validate.extend(matched_files)
else:
# Try as literal filename
files_to_validate.append(file_pattern)
else:
parser.print_help()
sys.exit(1)
# Validate all files
results = []
for file_path in files_to_validate:
result = validate_toml_file(file_path)
results.append(result)
print_validation_result(result, brief=args.brief, quiet=args.quiet)
if not args.brief and not args.quiet:
print() # Empty line between files
# Summary
total = len(results)
valid = sum(1 for r in results if r['valid'])
invalid = total - valid
if not args.quiet:
print(f"\n{'='*60}")
print(f"Summary: {valid}/{total} files valid")
if invalid > 0:
print(f" {invalid} file(s) with errors")
# Exit with error code if any files invalid
sys.exit(0 if invalid == 0 else 1)
if __name__ == '__main__':
main()