mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-26 17:52:59 +02:00
Merge branch 'V2' into feature/toastsAndErrorHandling
This commit is contained in:
commit
78c6dcd185
7
.gitignore
vendored
7
.gitignore
vendored
@ -203,3 +203,10 @@ id_ed25519.pub
|
||||
|
||||
# node_modules
|
||||
node_modules/
|
||||
|
||||
# Translation temp files
|
||||
*_compact.json
|
||||
*compact*.json
|
||||
test_batch.json
|
||||
*.backup.*.json
|
||||
frontend/public/locales/*/translation.backup*.json
|
||||
|
@ -2,6 +2,7 @@
|
||||
<html lang="en-GB">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<base href="%BASE_URL%" />
|
||||
<link rel="icon" href="/favicon.ico" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="theme-color" content="#000000" />
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -5,6 +5,7 @@ import LocalIcon from './LocalIcon';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useFileHandler } from '../../hooks/useFileHandler';
|
||||
import { useFilesModalContext } from '../../contexts/FilesModalContext';
|
||||
import { BASE_PATH } from '../../constants/app';
|
||||
|
||||
const LandingPage = () => {
|
||||
const { addFiles } = useFileHandler();
|
||||
@ -72,7 +73,7 @@ const LandingPage = () => {
|
||||
}}
|
||||
>
|
||||
<img
|
||||
src={colorScheme === 'dark' ? '/branding/StirlingPDFLogoNoTextDark.svg' : '/branding/StirlingPDFLogoNoTextLight.svg'}
|
||||
src={colorScheme === 'dark' ? `${BASE_PATH}/branding/StirlingPDFLogoNoTextDark.svg` : `${BASE_PATH}/branding/StirlingPDFLogoNoTextLight.svg`}
|
||||
alt="Stirling PDF Logo"
|
||||
style={{
|
||||
height: 'auto',
|
||||
@ -98,7 +99,7 @@ const LandingPage = () => {
|
||||
{/* Stirling PDF Branding */}
|
||||
<Group gap="xs" align="center">
|
||||
<img
|
||||
src={colorScheme === 'dark' ? '/branding/StirlingPDFLogoWhiteText.svg' : '/branding/StirlingPDFLogoGreyText.svg'}
|
||||
src={colorScheme === 'dark' ? `${BASE_PATH}/branding/StirlingPDFLogoWhiteText.svg` : `${BASE_PATH}/branding/StirlingPDFLogoGreyText.svg`}
|
||||
alt="Stirling PDF"
|
||||
style={{ height: '2.2rem', width: 'auto' }}
|
||||
/>
|
||||
|
@ -6,6 +6,7 @@ import { useTooltipPosition } from '../../hooks/useTooltipPosition';
|
||||
import { TooltipTip } from '../../types/tips';
|
||||
import { TooltipContent } from './tooltip/TooltipContent';
|
||||
import { useSidebarContext } from '../../contexts/SidebarContext';
|
||||
import { BASE_PATH } from '../../constants/app';
|
||||
import styles from './tooltip/Tooltip.module.css';
|
||||
|
||||
export interface TooltipProps {
|
||||
@ -328,7 +329,7 @@ export const Tooltip: React.FC<TooltipProps> = ({
|
||||
<div className={styles['tooltip-logo']}>
|
||||
{header.logo || (
|
||||
<img
|
||||
src="/logo-tooltip.svg"
|
||||
src={`${BASE_PATH}/logo-tooltip.svg`}
|
||||
alt="Stirling PDF"
|
||||
style={{ width: '1.4rem', height: '1.4rem', display: 'block' }}
|
||||
/>
|
||||
|
@ -5,3 +5,19 @@ export const getBaseUrl = (): string => {
|
||||
const { config } = useAppConfig();
|
||||
return config?.baseUrl || 'https://stirling.com';
|
||||
};
|
||||
|
||||
// Base path from Vite config - build-time constant, normalized (no trailing slash)
|
||||
// When no subpath, use empty string instead of '.' to avoid relative path issues
|
||||
export const BASE_PATH = (import.meta.env.BASE_URL || '/').replace(/\/$/, '').replace(/^\.$/, '');
|
||||
|
||||
/** For in-app navigations when you must touch window.location (rare). */
|
||||
export const withBasePath = (path: string): string => {
|
||||
const clean = path.startsWith('/') ? path : `/${path}`;
|
||||
return `${BASE_PATH}${clean}`;
|
||||
};
|
||||
|
||||
/** For OAuth (needs absolute URL with scheme+host) */
|
||||
export const absoluteWithBasePath = (path: string): string => {
|
||||
const clean = path.startsWith('/') ? path : `/${path}`;
|
||||
return `${window.location.origin}${BASE_PATH}${clean}`;
|
||||
};
|
||||
|
@ -1,5 +1,6 @@
|
||||
import { useEffect, useState } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { BASE_PATH } from '../constants/app';
|
||||
|
||||
declare global {
|
||||
interface Window {
|
||||
@ -37,17 +38,17 @@ export const useCookieConsent = ({ analyticsEnabled = false }: CookieConsentConf
|
||||
// Load the cookie consent CSS files first
|
||||
const mainCSS = document.createElement('link');
|
||||
mainCSS.rel = 'stylesheet';
|
||||
mainCSS.href = '/css/cookieconsent.css';
|
||||
mainCSS.href = `${BASE_PATH}/css/cookieconsent.css`;
|
||||
document.head.appendChild(mainCSS);
|
||||
|
||||
const customCSS = document.createElement('link');
|
||||
customCSS.rel = 'stylesheet';
|
||||
customCSS.href = '/css/cookieconsentCustomisation.css';
|
||||
customCSS.href = `${BASE_PATH}/css/cookieconsentCustomisation.css`;
|
||||
document.head.appendChild(customCSS);
|
||||
|
||||
// Load the cookie consent library
|
||||
const script = document.createElement('script');
|
||||
script.src = '/js/thirdParty/cookieconsent.umd.js';
|
||||
script.src = `${BASE_PATH}/js/thirdParty/cookieconsent.umd.js`;
|
||||
script.onload = () => {
|
||||
// Small delay to ensure DOM is ready
|
||||
setTimeout(() => {
|
||||
|
@ -7,6 +7,7 @@ import { ToolId } from '../types/toolId';
|
||||
import { parseToolRoute, updateToolRoute, clearToolRoute } from '../utils/urlRouting';
|
||||
import { ToolRegistry } from '../data/toolsTaxonomy';
|
||||
import { firePixel } from '../utils/scarfTracking';
|
||||
import { withBasePath } from '../constants/app';
|
||||
|
||||
/**
|
||||
* Hook to sync workbench and tool with URL using registry
|
||||
@ -51,7 +52,8 @@ export function useNavigationUrlSync(
|
||||
} else if (prevSelectedTool.current !== null) {
|
||||
// Only clear URL if we had a tool before (user navigated away)
|
||||
// Don't clear on initial load when both current and previous are null
|
||||
if (window.location.pathname !== '/') {
|
||||
const homePath = withBasePath('/');
|
||||
if (window.location.pathname !== homePath) {
|
||||
clearToolRoute(false); // Use pushState for user navigation
|
||||
}
|
||||
}
|
||||
|
@ -74,7 +74,9 @@ i18n
|
||||
loadPath: (lngs: string[], namespaces: string[]) => {
|
||||
// Map 'en' to 'en-GB' for loading translations
|
||||
const lng = lngs[0] === 'en' ? 'en-GB' : lngs[0];
|
||||
return `/locales/${lng}/${namespaces[0]}.json`;
|
||||
const basePath = import.meta.env.BASE_URL || '/';
|
||||
const cleanBasePath = basePath.endsWith('/') ? basePath.slice(0, -1) : basePath;
|
||||
return `${cleanBasePath}/locales/${lng}/${namespaces[0]}.json`;
|
||||
},
|
||||
},
|
||||
|
||||
|
@ -10,6 +10,7 @@ import App from './App';
|
||||
import './i18n'; // Initialize i18next
|
||||
import posthog from 'posthog-js';
|
||||
import { PostHogProvider } from 'posthog-js/react';
|
||||
import { BASE_PATH } from './constants/app';
|
||||
|
||||
// Compute initial color scheme
|
||||
function getInitialScheme(): 'light' | 'dark' {
|
||||
@ -60,7 +61,7 @@ root.render(
|
||||
<PostHogProvider
|
||||
client={posthog}
|
||||
>
|
||||
<BrowserRouter>
|
||||
<BrowserRouter basename={BASE_PATH}>
|
||||
<App />
|
||||
</BrowserRouter>
|
||||
</PostHogProvider>
|
||||
|
@ -1,10 +1,11 @@
|
||||
import React, { useEffect } from "react";
|
||||
import { BaseToolProps } from "../types/tool";
|
||||
import { withBasePath } from "../constants/app";
|
||||
|
||||
const SwaggerUI: React.FC<BaseToolProps> = () => {
|
||||
useEffect(() => {
|
||||
// Redirect to Swagger UI
|
||||
window.open("/swagger-ui/5.21.0/index.html", "_blank");
|
||||
window.open(withBasePath("/swagger-ui/5.21.0/index.html"), "_blank");
|
||||
}, []);
|
||||
|
||||
return (
|
||||
@ -12,7 +13,7 @@ const SwaggerUI: React.FC<BaseToolProps> = () => {
|
||||
<p>Opening Swagger UI in a new tab...</p>
|
||||
<p>
|
||||
If it didn't open automatically,{" "}
|
||||
<a href="/swagger-ui/5.21.0/index.html" target="_blank" rel="noopener noreferrer">
|
||||
<a href={withBasePath("/swagger-ui/5.21.0/index.html")} target="_blank" rel="noopener noreferrer">
|
||||
click here
|
||||
</a>
|
||||
</p>
|
||||
|
@ -8,12 +8,17 @@ import { getDefaultWorkbench } from '../types/workbench';
|
||||
import { ToolRegistry, getToolWorkbench, getToolUrlPath } from '../data/toolsTaxonomy';
|
||||
import { firePixel } from './scarfTracking';
|
||||
import { URL_TO_TOOL_MAP } from './urlMapping';
|
||||
import { BASE_PATH, withBasePath } from '../constants/app';
|
||||
|
||||
/**
|
||||
* Parse the current URL to extract tool routing information
|
||||
*/
|
||||
export function parseToolRoute(registry: ToolRegistry): ToolRoute {
|
||||
const path = window.location.pathname;
|
||||
const fullPath = window.location.pathname;
|
||||
// Remove base path to get app-relative path
|
||||
const path = BASE_PATH && fullPath.startsWith(BASE_PATH)
|
||||
? fullPath.slice(BASE_PATH.length) || '/'
|
||||
: fullPath;
|
||||
const searchParams = new URLSearchParams(window.location.search);
|
||||
|
||||
// First, check URL mapping for multiple URL aliases
|
||||
@ -83,7 +88,8 @@ export function updateToolRoute(toolId: ToolId, registry: ToolRegistry, replace:
|
||||
return;
|
||||
}
|
||||
|
||||
const newPath = getToolUrlPath(toolId, tool);
|
||||
const toolPath = getToolUrlPath(toolId, tool);
|
||||
const newPath = withBasePath(toolPath);
|
||||
const searchParams = new URLSearchParams(window.location.search);
|
||||
|
||||
// Remove tool query parameter since we're using path-based routing
|
||||
@ -99,7 +105,7 @@ export function clearToolRoute(replace: boolean = false): void {
|
||||
const searchParams = new URLSearchParams(window.location.search);
|
||||
searchParams.delete('tool');
|
||||
|
||||
updateUrl('/', searchParams, replace);
|
||||
updateUrl(withBasePath('/'), searchParams, replace);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -117,11 +123,12 @@ export function generateShareableUrl(toolId: ToolId | null, registry: ToolRegist
|
||||
const baseUrl = window.location.origin;
|
||||
|
||||
if (!toolId || !registry[toolId]) {
|
||||
return baseUrl;
|
||||
return `${baseUrl}${BASE_PATH || ''}`;
|
||||
}
|
||||
|
||||
const tool = registry[toolId];
|
||||
|
||||
const path = getToolUrlPath(toolId, tool);
|
||||
return `${baseUrl}${path}`;
|
||||
const toolPath = getToolUrlPath(toolId, tool);
|
||||
const fullPath = withBasePath(toolPath);
|
||||
return `${baseUrl}${fullPath}`;
|
||||
}
|
||||
|
@ -12,5 +12,5 @@ export default defineConfig({
|
||||
},
|
||||
},
|
||||
},
|
||||
base: "./",
|
||||
base: process.env.RUN_SUBPATH ? `/${process.env.RUN_SUBPATH}` : './',
|
||||
});
|
||||
|
403
scripts/translations/README.md
Normal file
403
scripts/translations/README.md
Normal file
@ -0,0 +1,403 @@
|
||||
# Translation Management Scripts
|
||||
|
||||
This directory contains Python scripts for managing frontend translations in Stirling PDF. These tools help analyze, merge, and manage translations against the en-GB golden truth file.
|
||||
|
||||
## Scripts Overview
|
||||
|
||||
### 1. `translation_analyzer.py`
|
||||
Analyzes translation files to find missing translations, untranslated entries, and provides completion statistics.
|
||||
|
||||
**Usage:**
|
||||
```bash
|
||||
# Analyze all languages
|
||||
python scripts/translations/translation_analyzer.py
|
||||
|
||||
# Analyze specific language
|
||||
python scripts/translations/translation_analyzer.py --language fr-FR
|
||||
|
||||
# Show only missing translations
|
||||
python scripts/translations/translation_analyzer.py --missing-only
|
||||
|
||||
# Show only untranslated entries
|
||||
python scripts/translations/translation_analyzer.py --untranslated-only
|
||||
|
||||
# Show summary only
|
||||
python scripts/translations/translation_analyzer.py --summary
|
||||
|
||||
# JSON output format
|
||||
python scripts/translations/translation_analyzer.py --format json
|
||||
```
|
||||
|
||||
**Features:**
|
||||
- Finds missing translation keys
|
||||
- Identifies untranslated entries (identical to en-GB and [UNTRANSLATED] markers)
|
||||
- Shows accurate completion percentages using ignore patterns
|
||||
- Identifies extra keys not in en-GB
|
||||
- Supports JSON and text output formats
|
||||
- Uses `scripts/ignore_translation.toml` for language-specific exclusions
|
||||
|
||||
### 2. `translation_merger.py`
|
||||
Merges missing translations from en-GB into target language files and manages translation workflows.
|
||||
|
||||
**Usage:**
|
||||
```bash
|
||||
# Add missing translations from en-GB to French
|
||||
python scripts/translations/translation_merger.py fr-FR add-missing
|
||||
|
||||
# Add without marking as [UNTRANSLATED]
|
||||
python scripts/translations/translation_merger.py fr-FR add-missing --no-mark-untranslated
|
||||
|
||||
# Extract untranslated entries to a file
|
||||
python scripts/translations/translation_merger.py fr-FR extract-untranslated --output fr_untranslated.json
|
||||
|
||||
# Create a template for AI translation
|
||||
python scripts/translations/translation_merger.py fr-FR create-template --output fr_template.json
|
||||
|
||||
# Apply translations from a file
|
||||
python scripts/translations/translation_merger.py fr-FR apply-translations --translations-file fr_translated.json
|
||||
```
|
||||
|
||||
**Features:**
|
||||
- Adds missing keys from en-GB with optional [UNTRANSLATED] markers
|
||||
- Extracts untranslated entries for external translation
|
||||
- Creates structured templates for AI translation
|
||||
- Applies translated content back to language files
|
||||
- Automatic backup creation
|
||||
|
||||
### 3. `ai_translation_helper.py`
|
||||
Specialized tool for AI-assisted translation workflows with batch processing and validation.
|
||||
|
||||
**Usage:**
|
||||
```bash
|
||||
# Create batch file for AI translation (multiple languages)
|
||||
python scripts/translations/ai_translation_helper.py create-batch --languages fr-FR de-DE es-ES --output batch.json --max-entries 50
|
||||
|
||||
# Validate AI translations
|
||||
python scripts/translations/ai_translation_helper.py validate batch.json
|
||||
|
||||
# Apply validated AI translations
|
||||
python scripts/translations/ai_translation_helper.py apply-batch batch.json
|
||||
|
||||
# Export for external translation services
|
||||
python scripts/translations/ai_translation_helper.py export --languages fr-FR de-DE --format csv
|
||||
```
|
||||
|
||||
**Features:**
|
||||
- Creates batch files for AI translation of multiple languages
|
||||
- Prioritizes important translation keys
|
||||
- Validates translations for placeholders and artifacts
|
||||
- Applies batch translations with validation
|
||||
- Exports to CSV/JSON for external translation services
|
||||
|
||||
### 4. `compact_translator.py`
|
||||
Extracts untranslated entries in minimal JSON format for character-limited AI services.
|
||||
|
||||
**Usage:**
|
||||
```bash
|
||||
# Extract all untranslated entries
|
||||
python scripts/translations/compact_translator.py it-IT --output to_translate.json
|
||||
```
|
||||
|
||||
**Features:**
|
||||
- Produces minimal JSON output with no extra whitespace
|
||||
- Automatic ignore patterns for cleaner output
|
||||
- Batch size control for manageable chunks
|
||||
- 50-80% fewer characters than other extraction methods
|
||||
|
||||
### 5. `json_beautifier.py`
|
||||
Restructures and beautifies translation JSON files to match en-GB structure exactly.
|
||||
|
||||
**Usage:**
|
||||
```bash
|
||||
# Restructure single language to match en-GB structure
|
||||
python scripts/translations/json_beautifier.py --language de-DE
|
||||
|
||||
# Restructure all languages
|
||||
python scripts/translations/json_beautifier.py --all-languages
|
||||
|
||||
# Validate structure without modifying files
|
||||
python scripts/translations/json_beautifier.py --language de-DE --validate-only
|
||||
|
||||
# Skip backup creation
|
||||
python scripts/translations/json_beautifier.py --language de-DE --no-backup
|
||||
```
|
||||
|
||||
**Features:**
|
||||
- Restructures JSON to match en-GB nested structure exactly
|
||||
- Preserves key ordering for line-by-line comparison
|
||||
- Creates automatic backups before modification
|
||||
- Validates structure and key ordering
|
||||
- Handles flattened dot-notation keys (e.g., "key.subkey") properly
|
||||
|
||||
## Translation Workflows
|
||||
|
||||
### Method 1: Compact Translation Workflow (RECOMMENDED for AI)
|
||||
|
||||
**Best for character-limited AI services like Claude or ChatGPT**
|
||||
|
||||
#### Step 1: Check Current Status
|
||||
```bash
|
||||
python scripts/translations/translation_analyzer.py --language it-IT --summary
|
||||
```
|
||||
|
||||
#### Step 2: Extract Untranslated Entries
|
||||
```bash
|
||||
python scripts/translations/compact_translator.py it-IT --output to_translate.json
|
||||
```
|
||||
|
||||
**Output format**: Compact JSON with minimal whitespace
|
||||
```json
|
||||
{"key1":"English text","key2":"Another text","key3":"More text"}
|
||||
```
|
||||
|
||||
#### Step 3: AI Translation
|
||||
1. Copy the compact JSON output
|
||||
2. Give it to your AI with instructions:
|
||||
```
|
||||
Translate this JSON to Italian. Keep the same structure, translate only the values.
|
||||
Preserve placeholders like {n}, {total}, {filename}, {{variable}}.
|
||||
```
|
||||
3. Save the AI's response as `translated.json`
|
||||
|
||||
#### Step 4: Apply Translations
|
||||
```bash
|
||||
python scripts/translations/translation_merger.py it-IT apply-translations --translations-file translated.json
|
||||
```
|
||||
|
||||
#### Step 5: Verify Results
|
||||
```bash
|
||||
python scripts/translations/translation_analyzer.py --language it-IT --summary
|
||||
```
|
||||
|
||||
### Method 2: Batch Translation Workflow
|
||||
|
||||
**For complete language translation from scratch or major updates**
|
||||
|
||||
#### Step 1: Analyze Current State
|
||||
```bash
|
||||
python scripts/translations/translation_analyzer.py --language de-DE --summary
|
||||
```
|
||||
|
||||
#### Step 2: Create Translation Batches
|
||||
```bash
|
||||
# Create batches of 100 entries each for systematic translation
|
||||
python scripts/translations/ai_translation_helper.py create-batch --languages de-DE --output de_batch_1.json --max-entries 100
|
||||
```
|
||||
|
||||
#### Step 3: Translate Batch with AI
|
||||
Edit the batch file and fill in ALL `translated` fields:
|
||||
- Preserve all placeholders like `{n}`, `{total}`, `{filename}`, `{{toolName}}`
|
||||
- Keep technical terms consistent
|
||||
- Maintain JSON structure exactly
|
||||
- Consider context provided for each entry
|
||||
|
||||
#### Step 4: Apply Translations
|
||||
```bash
|
||||
# Skip validation if using legitimate placeholders ({{variable}})
|
||||
python scripts/translations/ai_translation_helper.py apply-batch de_batch_1.json --skip-validation
|
||||
```
|
||||
|
||||
#### Step 5: Check Progress and Continue
|
||||
```bash
|
||||
python scripts/translations/translation_analyzer.py --language de-DE --summary
|
||||
```
|
||||
Repeat steps 2-5 until 100% complete.
|
||||
|
||||
### Method 3: Quick Translation Workflow (Legacy)
|
||||
|
||||
**For small updates or existing translations**
|
||||
|
||||
#### Step 1: Add Missing Translations
|
||||
```bash
|
||||
python scripts/translations/translation_merger.py fr-FR add-missing --mark-untranslated
|
||||
```
|
||||
|
||||
#### Step 2: Create AI Template
|
||||
```bash
|
||||
python scripts/translations/translation_merger.py fr-FR create-template --output fr_template.json
|
||||
```
|
||||
|
||||
#### Step 3: Apply Translations
|
||||
```bash
|
||||
python scripts/translations/translation_merger.py fr-FR apply-translations --translations-file fr_translated.json
|
||||
```
|
||||
|
||||
## Translation File Structure
|
||||
|
||||
Translation files are located in `frontend/public/locales/{language}/translation.json` with nested JSON structure:
|
||||
|
||||
```json
|
||||
{
|
||||
"addPageNumbers": {
|
||||
"title": "Add Page Numbers",
|
||||
"selectText": {
|
||||
"1": "Select PDF file:",
|
||||
"2": "Margin Size"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Keys use dot notation internally (e.g., `addPageNumbers.selectText.1`).
|
||||
|
||||
## Key Features
|
||||
|
||||
### Placeholder Preservation
|
||||
All scripts preserve placeholders like `{n}`, `{total}`, `{filename}` in translations:
|
||||
```
|
||||
"customNumberDesc": "Defaults to {n}, also accepts 'Page {n} of {total}'"
|
||||
```
|
||||
|
||||
### Automatic Backups
|
||||
Scripts create timestamped backups before modifying files:
|
||||
```
|
||||
translation.backup.20241201_143022.json
|
||||
```
|
||||
|
||||
### Context-Aware Translation
|
||||
Scripts provide context information to help with accurate translations:
|
||||
```json
|
||||
{
|
||||
"addPageNumbers.title": {
|
||||
"original": "Add Page Numbers",
|
||||
"context": "Feature for adding page numbers to PDFs"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Priority-Based Translation
|
||||
Important keys (title, submit, error messages) are prioritized when limiting translation batch sizes.
|
||||
|
||||
### Ignore Patterns System
|
||||
The `scripts/ignore_translation.toml` file defines keys that should be ignored for each language, improving completion accuracy.
|
||||
|
||||
**Common ignore patterns:**
|
||||
- `language.direction`: Text direction (ltr/rtl) - universal
|
||||
- `lang.*`: Language code entries not relevant to specific locales
|
||||
- `pipeline.title`, `home.devApi.title`: Technical terms kept in English
|
||||
- Specific technical IDs, version numbers, and system identifiers
|
||||
|
||||
**Format:**
|
||||
```toml
|
||||
[de_DE]
|
||||
ignore = [
|
||||
'language.direction',
|
||||
'pipeline.title',
|
||||
'lang.afr',
|
||||
'lang.ceb',
|
||||
# ... more patterns
|
||||
]
|
||||
```
|
||||
|
||||
## Best Practices & Lessons Learned
|
||||
|
||||
### Critical Rules for Translation
|
||||
|
||||
1. **NEVER skip entries**: Translate ALL entries in each batch to avoid [UNTRANSLATED] pollution
|
||||
2. **Use appropriate batch sizes**: 100 entries for systematic translation, unlimited for compact method
|
||||
3. **Skip validation for placeholders**: Use `--skip-validation` when batch contains `{{variable}}` patterns
|
||||
4. **Check progress between batches**: Use `--summary` flag to track completion percentage
|
||||
5. **Preserve all placeholders**: Keep `{n}`, `{total}`, `{filename}`, `{{toolName}}` exactly as-is
|
||||
|
||||
### Workflow Comparison
|
||||
|
||||
| Method | Best For | Character Usage | Complexity | Speed |
|
||||
|--------|----------|----------------|------------|-------|
|
||||
| Compact | AI services | Minimal (50-80% less) | Simple | Fastest |
|
||||
| Batch | Systematic translation | Moderate | Medium | Medium |
|
||||
| Quick | Small updates | High | Low | Slow |
|
||||
|
||||
### Common Issues and Solutions
|
||||
|
||||
#### [UNTRANSLATED] Pollution
|
||||
**Problem**: Hundreds of [UNTRANSLATED] markers from incomplete translation attempts
|
||||
**Solution**:
|
||||
- Only translate complete batches of manageable size
|
||||
- Use analyzer that counts [UNTRANSLATED] as missing translations
|
||||
- Restore from backup if pollution occurs
|
||||
|
||||
#### Validation False Positives
|
||||
**Problem**: Validator flags legitimate `{{variable}}` placeholders as artifacts
|
||||
**Solution**: Use `--skip-validation` flag when applying batches with template variables
|
||||
|
||||
#### JSON Structure Mismatches
|
||||
**Problem**: Flattened dot-notation keys instead of proper nested objects
|
||||
**Solution**: Use `json_beautifier.py` to restructure files to match en-GB exactly
|
||||
|
||||
## Real-World Examples
|
||||
|
||||
### Complete Italian Translation (Compact Method)
|
||||
```bash
|
||||
# Check status
|
||||
python scripts/translations/translation_analyzer.py --language it-IT --summary
|
||||
# Result: 46.8% complete, 1147 missing
|
||||
|
||||
# Extract all entries for translation
|
||||
python scripts/translations/compact_translator.py it-IT --output batch1.json
|
||||
|
||||
# [Translate batch1.json with AI, save as batch1_translated.json]
|
||||
|
||||
# Apply translations
|
||||
python scripts/translations/translation_merger.py it-IT apply-translations --translations-file batch1_translated.json
|
||||
# Result: Applied 1147 translations
|
||||
|
||||
# Check progress
|
||||
python scripts/translations/translation_analyzer.py --language it-IT --summary
|
||||
# Result: 100% complete, 0 missing
|
||||
```
|
||||
|
||||
### German Translation (Batch Method)
|
||||
Starting from 46.3% completion, reaching 60.3% with batch method:
|
||||
|
||||
```bash
|
||||
# Initial analysis
|
||||
python scripts/translations/translation_analyzer.py --language de-DE --summary
|
||||
# Result: 46.3% complete, 1142 missing entries
|
||||
|
||||
# Batch 1 (100 entries)
|
||||
python scripts/translations/ai_translation_helper.py create-batch --languages de-DE --output de_batch_1.json --max-entries 100
|
||||
# [Translate all 100 entries in batch file]
|
||||
python scripts/translations/ai_translation_helper.py apply-batch de_batch_1.json --skip-validation
|
||||
# Progress: 46.6% → 51.2%
|
||||
|
||||
# Continue with more batches until 100% complete
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
- **Missing Files**: Scripts create new files when language directories don't exist
|
||||
- **Invalid JSON**: Clear error messages with line numbers
|
||||
- **Placeholder Mismatches**: Validation warnings for missing or extra placeholders
|
||||
- **[UNTRANSLATED] Entries**: Counted as missing translations to prevent pollution
|
||||
- **Backup Failures**: Graceful handling with user notification
|
||||
|
||||
## Integration with Development
|
||||
|
||||
These scripts integrate with the existing translation system:
|
||||
- Works with the current `frontend/public/locales/` structure
|
||||
- Compatible with the i18n system used in the React frontend
|
||||
- Respects the JSON format expected by the translation loader
|
||||
- Maintains the nested structure required by the UI components
|
||||
|
||||
## Language-Specific Notes
|
||||
|
||||
### German Translation Notes
|
||||
- Technical terms: Use German equivalents (PDF → PDF, API → API)
|
||||
- UI actions: "hochladen" (upload), "herunterladen" (download), "speichern" (save)
|
||||
- Error messages: Consistent pattern "Ein Fehler ist beim [action] aufgetreten"
|
||||
- Formal address: Use "Sie" form for user-facing text
|
||||
|
||||
### Italian Translation Notes
|
||||
- Keep technical terms in English when commonly used (PDF, API, URL)
|
||||
- Use formal address ("Lei" form) for user-facing text
|
||||
- Error messages: "Si è verificato un errore durante [action]"
|
||||
- UI actions: "carica" (upload), "scarica" (download), "salva" (save)
|
||||
|
||||
## Common Use Cases
|
||||
|
||||
1. **Complete Language Translation**: Use Compact Workflow for fastest AI-assisted translation
|
||||
2. **New Language Addition**: Start with compact workflow for comprehensive coverage
|
||||
3. **Updating Existing Language**: Use analyzer to find gaps, then compact or batch method
|
||||
4. **Quality Assurance**: Use analyzer with `--summary` for completion metrics and issue detection
|
||||
5. **External Translation Services**: Use export functionality to generate CSV files for translators
|
||||
6. **Structure Maintenance**: Use json_beautifier to keep files aligned with en-GB structure
|
408
scripts/translations/ai_translation_helper.py
Normal file
408
scripts/translations/ai_translation_helper.py
Normal file
@ -0,0 +1,408 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
AI Translation Helper for Stirling PDF Frontend
|
||||
Provides utilities for AI-assisted translation workflows including
|
||||
batch processing, quality checks, and integration helpers.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Set, Tuple, Any, Optional
|
||||
import argparse
|
||||
import re
|
||||
from datetime import datetime
|
||||
import csv
|
||||
|
||||
|
||||
class AITranslationHelper:
|
||||
def __init__(self, locales_dir: str = "frontend/public/locales"):
|
||||
self.locales_dir = Path(locales_dir)
|
||||
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.json"
|
||||
|
||||
def _load_json(self, file_path: Path) -> Dict:
|
||||
"""Load JSON file with error handling."""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except (FileNotFoundError, json.JSONDecodeError) as e:
|
||||
print(f"Error loading {file_path}: {e}")
|
||||
return {}
|
||||
|
||||
def _save_json(self, data: Dict, file_path: Path) -> None:
|
||||
"""Save JSON file."""
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
def create_ai_batch_file(self, languages: List[str], output_file: Path,
|
||||
max_entries_per_language: int = 50) -> None:
|
||||
"""Create a batch file for AI translation with multiple languages."""
|
||||
golden_truth = self._load_json(self.golden_truth_file)
|
||||
batch_data = {
|
||||
'metadata': {
|
||||
'created_at': datetime.now().isoformat(),
|
||||
'source_language': 'en-GB',
|
||||
'target_languages': languages,
|
||||
'max_entries_per_language': max_entries_per_language,
|
||||
'instructions': {
|
||||
'format': 'Translate each entry maintaining JSON structure and placeholder variables like {n}, {total}, {filename}',
|
||||
'context': 'This is for a PDF manipulation tool. Keep technical terms consistent.',
|
||||
'placeholders': 'Preserve all placeholders: {n}, {total}, {filename}, etc.',
|
||||
'style': 'Keep translations concise and user-friendly'
|
||||
}
|
||||
},
|
||||
'translations': {}
|
||||
}
|
||||
|
||||
for lang in languages:
|
||||
lang_file = self.locales_dir / lang / "translation.json"
|
||||
if not lang_file.exists():
|
||||
# Create empty translation structure
|
||||
lang_data = {}
|
||||
else:
|
||||
lang_data = self._load_json(lang_file)
|
||||
|
||||
# Find untranslated entries
|
||||
untranslated = self._find_untranslated_entries(golden_truth, lang_data)
|
||||
|
||||
# Limit entries if specified
|
||||
if max_entries_per_language and len(untranslated) > max_entries_per_language:
|
||||
# Prioritize by key importance
|
||||
untranslated = self._prioritize_translation_keys(untranslated, max_entries_per_language)
|
||||
|
||||
batch_data['translations'][lang] = {}
|
||||
for key, value in untranslated.items():
|
||||
batch_data['translations'][lang][key] = {
|
||||
'original': value,
|
||||
'translated': '', # AI fills this
|
||||
'context': self._get_key_context(key)
|
||||
}
|
||||
|
||||
self._save_json(batch_data, output_file)
|
||||
total_entries = sum(len(lang_data) for lang_data in batch_data['translations'].values())
|
||||
print(f"Created AI batch file: {output_file}")
|
||||
print(f"Total entries to translate: {total_entries}")
|
||||
|
||||
def _find_untranslated_entries(self, golden_truth: Dict, lang_data: Dict) -> Dict[str, str]:
|
||||
"""Find entries that need translation."""
|
||||
golden_flat = self._flatten_dict(golden_truth)
|
||||
lang_flat = self._flatten_dict(lang_data)
|
||||
|
||||
untranslated = {}
|
||||
for key, value in golden_flat.items():
|
||||
if (key not in lang_flat or
|
||||
lang_flat[key] == value or
|
||||
(isinstance(lang_flat[key], str) and lang_flat[key].startswith("[UNTRANSLATED]"))):
|
||||
if not self._is_expected_identical(key, value):
|
||||
untranslated[key] = value
|
||||
|
||||
return untranslated
|
||||
|
||||
def _flatten_dict(self, d: Dict, parent_key: str = '', separator: str = '.') -> Dict[str, Any]:
|
||||
"""Flatten nested dictionary."""
|
||||
items = []
|
||||
for k, v in d.items():
|
||||
new_key = f"{parent_key}{separator}{k}" if parent_key else k
|
||||
if isinstance(v, dict):
|
||||
items.extend(self._flatten_dict(v, new_key, separator).items())
|
||||
else:
|
||||
items.append((new_key, v))
|
||||
return dict(items)
|
||||
|
||||
def _is_expected_identical(self, key: str, value: str) -> bool:
|
||||
"""Check if key should be identical across languages."""
|
||||
if str(value).strip() in ['ltr', 'rtl', 'True', 'False', 'true', 'false']:
|
||||
return True
|
||||
return 'language.direction' in key.lower()
|
||||
|
||||
def _prioritize_translation_keys(self, untranslated: Dict[str, str], max_count: int) -> Dict[str, str]:
|
||||
"""Prioritize which keys to translate first based on importance."""
|
||||
# Define priority order (higher score = higher priority)
|
||||
priority_patterns = [
|
||||
('title', 10),
|
||||
('header', 9),
|
||||
('submit', 8),
|
||||
('selectText', 7),
|
||||
('prompt', 6),
|
||||
('desc', 5),
|
||||
('error', 8),
|
||||
('warning', 7),
|
||||
('save', 8),
|
||||
('download', 8),
|
||||
('upload', 7),
|
||||
]
|
||||
|
||||
scored_keys = []
|
||||
for key, value in untranslated.items():
|
||||
score = 1 # base score
|
||||
for pattern, pattern_score in priority_patterns:
|
||||
if pattern.lower() in key.lower():
|
||||
score = max(score, pattern_score)
|
||||
scored_keys.append((key, value, score))
|
||||
|
||||
# Sort by score (descending) and return top entries
|
||||
scored_keys.sort(key=lambda x: x[2], reverse=True)
|
||||
return {key: value for key, value, _ in scored_keys[:max_count]}
|
||||
|
||||
def _get_key_context(self, key: str) -> str:
|
||||
"""Get contextual information for a translation key."""
|
||||
parts = key.split('.')
|
||||
contexts = {
|
||||
'addPageNumbers': 'Feature for adding page numbers to PDFs',
|
||||
'compress': 'PDF compression functionality',
|
||||
'merge': 'PDF merging functionality',
|
||||
'split': 'PDF splitting functionality',
|
||||
'rotate': 'PDF rotation functionality',
|
||||
'convert': 'File conversion functionality',
|
||||
'security': 'PDF security and permissions',
|
||||
'metadata': 'PDF metadata editing',
|
||||
'watermark': 'Adding watermarks to PDFs',
|
||||
'overlay': 'PDF overlay functionality',
|
||||
'extract': 'Extracting content from PDFs'
|
||||
}
|
||||
|
||||
if len(parts) > 0:
|
||||
main_section = parts[0]
|
||||
context = contexts.get(main_section, f'Part of {main_section} functionality')
|
||||
if len(parts) > 1:
|
||||
context += f', specifically for {parts[-1]}'
|
||||
return context
|
||||
|
||||
return 'General application text'
|
||||
|
||||
def validate_ai_translations(self, batch_file: Path) -> Dict[str, List[str]]:
|
||||
"""Validate AI translations for common issues."""
|
||||
batch_data = self._load_json(batch_file)
|
||||
issues = {'errors': [], 'warnings': []}
|
||||
|
||||
for lang, translations in batch_data.get('translations', {}).items():
|
||||
for key, translation_data in translations.items():
|
||||
original = translation_data.get('original', '')
|
||||
translated = translation_data.get('translated', '')
|
||||
|
||||
if not translated:
|
||||
issues['errors'].append(f"{lang}.{key}: Missing translation")
|
||||
continue
|
||||
|
||||
# Check for placeholder preservation
|
||||
original_placeholders = re.findall(r'\{[^}]+\}', original)
|
||||
translated_placeholders = re.findall(r'\{[^}]+\}', translated)
|
||||
|
||||
if set(original_placeholders) != set(translated_placeholders):
|
||||
issues['warnings'].append(
|
||||
f"{lang}.{key}: Placeholder mismatch - Original: {original_placeholders}, "
|
||||
f"Translated: {translated_placeholders}"
|
||||
)
|
||||
|
||||
# Check if translation is identical to original (might be untranslated)
|
||||
if translated == original and not self._is_expected_identical(key, original):
|
||||
issues['warnings'].append(f"{lang}.{key}: Translation identical to original")
|
||||
|
||||
# Check for common AI translation artifacts
|
||||
artifacts = ['[TRANSLATE]', '[TODO]', 'UNTRANSLATED', '{{', '}}']
|
||||
for artifact in artifacts:
|
||||
if artifact in translated:
|
||||
issues['errors'].append(f"{lang}.{key}: Contains translation artifact: {artifact}")
|
||||
|
||||
return issues
|
||||
|
||||
def apply_ai_batch_translations(self, batch_file: Path, validate: bool = True) -> Dict[str, Any]:
|
||||
"""Apply translations from AI batch file to individual language files."""
|
||||
batch_data = self._load_json(batch_file)
|
||||
results = {'applied': {}, 'errors': [], 'warnings': []}
|
||||
|
||||
if validate:
|
||||
validation_issues = self.validate_ai_translations(batch_file)
|
||||
if validation_issues['errors']:
|
||||
print("Validation errors found. Fix these before applying:")
|
||||
for error in validation_issues['errors']:
|
||||
print(f" ERROR: {error}")
|
||||
return results
|
||||
|
||||
if validation_issues['warnings']:
|
||||
print("Validation warnings (review recommended):")
|
||||
for warning in validation_issues['warnings'][:10]:
|
||||
print(f" WARNING: {warning}")
|
||||
|
||||
for lang, translations in batch_data.get('translations', {}).items():
|
||||
lang_file = self.locales_dir / lang / "translation.json"
|
||||
|
||||
# Load existing data or create new
|
||||
if lang_file.exists():
|
||||
lang_data = self._load_json(lang_file)
|
||||
else:
|
||||
lang_data = {}
|
||||
lang_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
applied_count = 0
|
||||
for key, translation_data in translations.items():
|
||||
translated = translation_data.get('translated', '').strip()
|
||||
if translated and translated != translation_data.get('original', ''):
|
||||
self._set_nested_value(lang_data, key, translated)
|
||||
applied_count += 1
|
||||
|
||||
if applied_count > 0:
|
||||
self._save_json(lang_data, lang_file)
|
||||
results['applied'][lang] = applied_count
|
||||
print(f"Applied {applied_count} translations to {lang}")
|
||||
|
||||
return results
|
||||
|
||||
def _set_nested_value(self, data: Dict, key_path: str, value: Any) -> None:
|
||||
"""Set value in nested dict using dot notation."""
|
||||
keys = key_path.split('.')
|
||||
current = data
|
||||
for key in keys[:-1]:
|
||||
if key not in current:
|
||||
current[key] = {}
|
||||
elif not isinstance(current[key], dict):
|
||||
# If the current value is not a dict, we can't nest into it
|
||||
print(f"Warning: Converting non-dict value at '{key}' to dict to allow nesting")
|
||||
current[key] = {}
|
||||
current = current[key]
|
||||
current[keys[-1]] = value
|
||||
|
||||
def export_for_external_translation(self, languages: List[str], output_format: str = 'csv') -> None:
|
||||
"""Export translations for external translation services."""
|
||||
golden_truth = self._load_json(self.golden_truth_file)
|
||||
golden_flat = self._flatten_dict(golden_truth)
|
||||
|
||||
if output_format == 'csv':
|
||||
output_file = Path(f'translations_export_{datetime.now().strftime("%Y%m%d")}.csv')
|
||||
|
||||
with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
|
||||
fieldnames = ['key', 'context', 'en_GB'] + languages
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
|
||||
for key, en_value in golden_flat.items():
|
||||
if self._is_expected_identical(key, en_value):
|
||||
continue
|
||||
|
||||
row = {
|
||||
'key': key,
|
||||
'context': self._get_key_context(key),
|
||||
'en_GB': en_value
|
||||
}
|
||||
|
||||
for lang in languages:
|
||||
lang_file = self.locales_dir / lang / "translation.json"
|
||||
if lang_file.exists():
|
||||
lang_data = self._load_json(lang_file)
|
||||
lang_flat = self._flatten_dict(lang_data)
|
||||
value = lang_flat.get(key, '')
|
||||
if value.startswith('[UNTRANSLATED]'):
|
||||
value = ''
|
||||
row[lang] = value
|
||||
else:
|
||||
row[lang] = ''
|
||||
|
||||
writer.writerow(row)
|
||||
|
||||
print(f"Exported to {output_file}")
|
||||
|
||||
elif output_format == 'json':
|
||||
output_file = Path(f'translations_export_{datetime.now().strftime("%Y%m%d")}.json')
|
||||
export_data = {'languages': languages, 'translations': {}}
|
||||
|
||||
for key, en_value in golden_flat.items():
|
||||
if self._is_expected_identical(key, en_value):
|
||||
continue
|
||||
|
||||
export_data['translations'][key] = {
|
||||
'en_GB': en_value,
|
||||
'context': self._get_key_context(key)
|
||||
}
|
||||
|
||||
for lang in languages:
|
||||
lang_file = self.locales_dir / lang / "translation.json"
|
||||
if lang_file.exists():
|
||||
lang_data = self._load_json(lang_file)
|
||||
lang_flat = self._flatten_dict(lang_data)
|
||||
value = lang_flat.get(key, '')
|
||||
if value.startswith('[UNTRANSLATED]'):
|
||||
value = ''
|
||||
export_data['translations'][key][lang] = value
|
||||
|
||||
self._save_json(export_data, output_file)
|
||||
print(f"Exported to {output_file}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='AI Translation Helper')
|
||||
parser.add_argument('--locales-dir', default='frontend/public/locales',
|
||||
help='Path to locales directory')
|
||||
|
||||
subparsers = parser.add_subparsers(dest='command', help='Available commands')
|
||||
|
||||
# Create batch command
|
||||
batch_parser = subparsers.add_parser('create-batch', help='Create AI translation batch file')
|
||||
batch_parser.add_argument('--languages', nargs='+', required=True,
|
||||
help='Language codes to include')
|
||||
batch_parser.add_argument('--output', required=True, help='Output batch file')
|
||||
batch_parser.add_argument('--max-entries', type=int, default=100,
|
||||
help='Max entries per language')
|
||||
|
||||
# Validate command
|
||||
validate_parser = subparsers.add_parser('validate', help='Validate AI translations')
|
||||
validate_parser.add_argument('batch_file', help='Batch file to validate')
|
||||
|
||||
# Apply command
|
||||
apply_parser = subparsers.add_parser('apply-batch', help='Apply AI batch translations')
|
||||
apply_parser.add_argument('batch_file', help='Batch file with translations')
|
||||
apply_parser.add_argument('--skip-validation', action='store_true',
|
||||
help='Skip validation before applying')
|
||||
|
||||
# Export command
|
||||
export_parser = subparsers.add_parser('export', help='Export for external translation')
|
||||
export_parser.add_argument('--languages', nargs='+', required=True,
|
||||
help='Language codes to export')
|
||||
export_parser.add_argument('--format', choices=['csv', 'json'], default='csv',
|
||||
help='Export format')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.command:
|
||||
parser.print_help()
|
||||
return
|
||||
|
||||
helper = AITranslationHelper(args.locales_dir)
|
||||
|
||||
if args.command == 'create-batch':
|
||||
output_file = Path(args.output)
|
||||
helper.create_ai_batch_file(args.languages, output_file, args.max_entries)
|
||||
|
||||
elif args.command == 'validate':
|
||||
batch_file = Path(args.batch_file)
|
||||
issues = helper.validate_ai_translations(batch_file)
|
||||
|
||||
if issues['errors']:
|
||||
print("ERRORS:")
|
||||
for error in issues['errors']:
|
||||
print(f" - {error}")
|
||||
|
||||
if issues['warnings']:
|
||||
print("WARNINGS:")
|
||||
for warning in issues['warnings']:
|
||||
print(f" - {warning}")
|
||||
|
||||
if not issues['errors'] and not issues['warnings']:
|
||||
print("No validation issues found!")
|
||||
|
||||
elif args.command == 'apply-batch':
|
||||
batch_file = Path(args.batch_file)
|
||||
results = helper.apply_ai_batch_translations(
|
||||
batch_file,
|
||||
validate=not args.skip_validation
|
||||
)
|
||||
|
||||
total_applied = sum(results['applied'].values())
|
||||
print(f"Total translations applied: {total_applied}")
|
||||
|
||||
elif args.command == 'export':
|
||||
helper.export_for_external_translation(args.languages, args.format)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
177
scripts/translations/compact_translator.py
Normal file
177
scripts/translations/compact_translator.py
Normal file
@ -0,0 +1,177 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Compact Translation Extractor for Character-Limited AI Translation
|
||||
Outputs untranslated entries in minimal JSON format with whitespace stripped.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import argparse
|
||||
try:
|
||||
import tomllib # Python 3.11+
|
||||
except ImportError:
|
||||
try:
|
||||
import toml as tomllib_fallback
|
||||
tomllib = None
|
||||
except ImportError:
|
||||
tomllib = None
|
||||
tomllib_fallback = None
|
||||
|
||||
|
||||
class CompactTranslationExtractor:
|
||||
def __init__(self, locales_dir: str = "frontend/public/locales", ignore_file: str = "scripts/ignore_translation.toml"):
|
||||
self.locales_dir = Path(locales_dir)
|
||||
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.json"
|
||||
self.golden_truth = self._load_json(self.golden_truth_file)
|
||||
self.ignore_file = Path(ignore_file)
|
||||
self.ignore_patterns = self._load_ignore_patterns()
|
||||
|
||||
def _load_json(self, file_path: Path) -> dict:
|
||||
"""Load JSON file with error handling."""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File not found: {file_path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error: Invalid JSON in {file_path}: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
def _load_ignore_patterns(self) -> dict:
|
||||
"""Load ignore patterns from TOML file."""
|
||||
if not self.ignore_file.exists():
|
||||
return {}
|
||||
|
||||
try:
|
||||
if tomllib:
|
||||
with open(self.ignore_file, 'rb') as f:
|
||||
ignore_data = tomllib.load(f)
|
||||
elif tomllib_fallback:
|
||||
ignore_data = tomllib_fallback.load(self.ignore_file)
|
||||
else:
|
||||
ignore_data = self._parse_simple_toml()
|
||||
|
||||
return {lang: set(data.get('ignore', [])) for lang, data in ignore_data.items()}
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not load ignore file {self.ignore_file}: {e}", file=sys.stderr)
|
||||
return {}
|
||||
|
||||
def _parse_simple_toml(self) -> dict:
|
||||
"""Simple TOML parser for ignore patterns (fallback)."""
|
||||
ignore_data = {}
|
||||
current_section = None
|
||||
|
||||
with open(self.ignore_file, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#'):
|
||||
continue
|
||||
|
||||
if line.startswith('[') and line.endswith(']'):
|
||||
current_section = line[1:-1]
|
||||
ignore_data[current_section] = {'ignore': []}
|
||||
elif line.strip().startswith("'") and current_section:
|
||||
item = line.strip().strip("',")
|
||||
if item:
|
||||
ignore_data[current_section]['ignore'].append(item)
|
||||
|
||||
return ignore_data
|
||||
|
||||
def _flatten_dict(self, d: dict, parent_key: str = '', separator: str = '.') -> dict:
|
||||
"""Flatten nested dictionary into dot-notation keys."""
|
||||
items = []
|
||||
for k, v in d.items():
|
||||
new_key = f"{parent_key}{separator}{k}" if parent_key else k
|
||||
if isinstance(v, dict):
|
||||
items.extend(self._flatten_dict(v, new_key, separator).items())
|
||||
else:
|
||||
items.append((new_key, str(v)))
|
||||
return dict(items)
|
||||
|
||||
def get_untranslated_entries(self, language: str) -> dict:
|
||||
"""Get all untranslated entries for a language in compact format."""
|
||||
target_file = self.locales_dir / language / "translation.json"
|
||||
|
||||
if not target_file.exists():
|
||||
print(f"Error: Translation file not found for language: {language}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
target_data = self._load_json(target_file)
|
||||
golden_flat = self._flatten_dict(self.golden_truth)
|
||||
target_flat = self._flatten_dict(target_data)
|
||||
|
||||
lang_code = language.replace('-', '_')
|
||||
ignore_set = self.ignore_patterns.get(lang_code, set())
|
||||
|
||||
# Find missing translations
|
||||
missing_keys = set(golden_flat.keys()) - set(target_flat.keys()) - ignore_set
|
||||
|
||||
# Find untranslated entries (identical to en-GB or marked [UNTRANSLATED])
|
||||
untranslated_keys = set()
|
||||
for key in target_flat:
|
||||
if key in golden_flat and key not in ignore_set:
|
||||
target_value = target_flat[key]
|
||||
golden_value = golden_flat[key]
|
||||
|
||||
if (isinstance(target_value, str) and target_value.startswith("[UNTRANSLATED]")) or \
|
||||
(golden_value == target_value and not self._is_expected_identical(key, golden_value)):
|
||||
untranslated_keys.add(key)
|
||||
|
||||
# Combine and create compact output
|
||||
all_untranslated = missing_keys | untranslated_keys
|
||||
|
||||
compact_entries = {}
|
||||
for key in sorted(all_untranslated):
|
||||
if key in golden_flat:
|
||||
compact_entries[key] = golden_flat[key]
|
||||
|
||||
return compact_entries
|
||||
|
||||
def _is_expected_identical(self, key: str, value: str) -> bool:
|
||||
"""Check if a key-value pair is expected to be identical across languages."""
|
||||
identical_patterns = ['language.direction']
|
||||
identical_values = {'ltr', 'rtl', 'True', 'False', 'true', 'false', 'unknown'}
|
||||
|
||||
if value.strip() in identical_values:
|
||||
return True
|
||||
|
||||
for pattern in identical_patterns:
|
||||
if pattern in key.lower():
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Extract untranslated entries in compact format for AI translation')
|
||||
parser.add_argument('language', help='Language code (e.g., de-DE, fr-FR)')
|
||||
parser.add_argument('--locales-dir', default='frontend/public/locales', help='Path to locales directory')
|
||||
parser.add_argument('--ignore-file', default='scripts/ignore_translation.toml', help='Path to ignore patterns file')
|
||||
parser.add_argument('--max-entries', type=int, help='Maximum number of entries to output')
|
||||
parser.add_argument('--output', help='Output file (default: stdout)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
extractor = CompactTranslationExtractor(args.locales_dir, args.ignore_file)
|
||||
untranslated = extractor.get_untranslated_entries(args.language)
|
||||
|
||||
if args.max_entries:
|
||||
# Take first N entries
|
||||
keys = list(untranslated.keys())[:args.max_entries]
|
||||
untranslated = {k: untranslated[k] for k in keys}
|
||||
|
||||
# Output compact JSON (no indentation, minimal whitespace)
|
||||
output = json.dumps(untranslated, separators=(',', ':'), ensure_ascii=False)
|
||||
|
||||
if args.output:
|
||||
with open(args.output, 'w', encoding='utf-8') as f:
|
||||
f.write(output)
|
||||
print(f"Extracted {len(untranslated)} untranslated entries to {args.output}", file=sys.stderr)
|
||||
else:
|
||||
print(output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
262
scripts/translations/json_beautifier.py
Normal file
262
scripts/translations/json_beautifier.py
Normal file
@ -0,0 +1,262 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
JSON Beautifier and Structure Fixer for Stirling PDF Frontend
|
||||
Restructures translation JSON files to match en-GB structure and key order exactly.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, List
|
||||
import argparse
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class JSONBeautifier:
|
||||
def __init__(self, locales_dir: str = "frontend/public/locales"):
|
||||
self.locales_dir = Path(locales_dir)
|
||||
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.json"
|
||||
self.golden_structure = self._load_json(self.golden_truth_file)
|
||||
|
||||
def _load_json(self, file_path: Path) -> Dict:
|
||||
"""Load JSON file with error handling."""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return json.load(f, object_pairs_hook=OrderedDict)
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File not found: {file_path}")
|
||||
sys.exit(1)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error: Invalid JSON in {file_path}: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
def _save_json(self, data: Dict, file_path: Path, backup: bool = True) -> None:
|
||||
"""Save JSON file with proper formatting."""
|
||||
if backup and file_path.exists():
|
||||
backup_path = file_path.with_suffix(f'.backup.restructured.json')
|
||||
file_path.rename(backup_path)
|
||||
print(f"Backup created: {backup_path}")
|
||||
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False, separators=(',', ': '))
|
||||
|
||||
def _flatten_dict(self, d: Dict, parent_key: str = '', separator: str = '.') -> Dict[str, Any]:
|
||||
"""Flatten nested dictionary into dot-notation keys."""
|
||||
items = []
|
||||
for k, v in d.items():
|
||||
new_key = f"{parent_key}{separator}{k}" if parent_key else k
|
||||
if isinstance(v, dict):
|
||||
items.extend(self._flatten_dict(v, new_key, separator).items())
|
||||
else:
|
||||
items.append((new_key, v))
|
||||
return dict(items)
|
||||
|
||||
def _rebuild_structure(self, flat_dict: Dict[str, Any], reference_structure: Dict) -> Dict:
|
||||
"""Rebuild nested structure based on reference structure and available translations."""
|
||||
def build_recursive(ref_obj: Any, current_path: str = '') -> Any:
|
||||
if isinstance(ref_obj, dict):
|
||||
result = OrderedDict()
|
||||
for key, value in ref_obj.items():
|
||||
new_path = f"{current_path}.{key}" if current_path else key
|
||||
|
||||
if new_path in flat_dict:
|
||||
# Direct translation exists
|
||||
if isinstance(value, dict):
|
||||
# If reference is dict but we have a string, use the string
|
||||
if isinstance(flat_dict[new_path], str):
|
||||
result[key] = flat_dict[new_path]
|
||||
else:
|
||||
# Recurse into nested structure
|
||||
result[key] = build_recursive(value, new_path)
|
||||
else:
|
||||
result[key] = flat_dict[new_path]
|
||||
else:
|
||||
# No direct translation, recurse to check for nested keys
|
||||
if isinstance(value, dict):
|
||||
nested_result = build_recursive(value, new_path)
|
||||
if nested_result: # Only add if we found some translations
|
||||
result[key] = nested_result
|
||||
# If no translation found and it's a leaf, skip it
|
||||
|
||||
return result if result else None
|
||||
else:
|
||||
# Leaf node - return the translation if it exists
|
||||
return flat_dict.get(current_path, None)
|
||||
|
||||
return build_recursive(reference_structure) or OrderedDict()
|
||||
|
||||
def restructure_translation_file(self, target_file: Path) -> Dict[str, Any]:
|
||||
"""Restructure a translation file to match en-GB structure exactly."""
|
||||
if not target_file.exists():
|
||||
print(f"Error: Target file does not exist: {target_file}")
|
||||
return {}
|
||||
|
||||
# Load the target file
|
||||
target_data = self._load_json(target_file)
|
||||
|
||||
# Flatten the target translations
|
||||
flat_target = self._flatten_dict(target_data)
|
||||
|
||||
# Rebuild structure based on golden truth
|
||||
restructured = self._rebuild_structure(flat_target, self.golden_structure)
|
||||
|
||||
return restructured
|
||||
|
||||
def beautify_and_restructure(self, target_file: Path, backup: bool = True) -> Dict[str, Any]:
|
||||
"""Main function to beautify and restructure a translation file."""
|
||||
lang_code = target_file.parent.name
|
||||
print(f"Restructuring {lang_code} translation file...")
|
||||
|
||||
# Get the restructured data
|
||||
restructured_data = self.restructure_translation_file(target_file)
|
||||
|
||||
# Save the restructured file
|
||||
self._save_json(restructured_data, target_file, backup)
|
||||
|
||||
# Analyze the results
|
||||
flat_golden = self._flatten_dict(self.golden_structure)
|
||||
flat_restructured = self._flatten_dict(restructured_data)
|
||||
|
||||
total_keys = len(flat_golden)
|
||||
preserved_keys = len(flat_restructured)
|
||||
|
||||
result = {
|
||||
'language': lang_code,
|
||||
'total_reference_keys': total_keys,
|
||||
'preserved_keys': preserved_keys,
|
||||
'structure_match': self._compare_structures(self.golden_structure, restructured_data)
|
||||
}
|
||||
|
||||
print(f"Restructured {lang_code}: {preserved_keys}/{total_keys} keys preserved")
|
||||
return result
|
||||
|
||||
def _compare_structures(self, ref: Dict, target: Dict) -> Dict[str, bool]:
|
||||
"""Compare structures between reference and target."""
|
||||
def compare_recursive(r: Any, t: Any, path: str = '') -> List[str]:
|
||||
issues = []
|
||||
|
||||
if isinstance(r, dict) and isinstance(t, dict):
|
||||
# Check for missing top-level sections
|
||||
ref_keys = set(r.keys())
|
||||
target_keys = set(t.keys())
|
||||
|
||||
missing_sections = ref_keys - target_keys
|
||||
if missing_sections:
|
||||
for section in missing_sections:
|
||||
issues.append(f"Missing section: {path}.{section}" if path else section)
|
||||
|
||||
# Recurse into common sections
|
||||
for key in ref_keys & target_keys:
|
||||
new_path = f"{path}.{key}" if path else key
|
||||
issues.extend(compare_recursive(r[key], t[key], new_path))
|
||||
|
||||
return issues
|
||||
|
||||
issues = compare_recursive(ref, target)
|
||||
|
||||
return {
|
||||
'structures_match': len(issues) == 0,
|
||||
'issues': issues[:10], # Limit to first 10 issues
|
||||
'total_issues': len(issues)
|
||||
}
|
||||
|
||||
def validate_key_order(self, target_file: Path) -> Dict[str, Any]:
|
||||
"""Validate that keys appear in the same order as en-GB."""
|
||||
target_data = self._load_json(target_file)
|
||||
|
||||
def get_key_order(obj: Dict, path: str = '') -> List[str]:
|
||||
keys = []
|
||||
for key in obj.keys():
|
||||
new_path = f"{path}.{key}" if path else key
|
||||
keys.append(new_path)
|
||||
if isinstance(obj[key], dict):
|
||||
keys.extend(get_key_order(obj[key], new_path))
|
||||
return keys
|
||||
|
||||
golden_order = get_key_order(self.golden_structure)
|
||||
target_order = get_key_order(target_data)
|
||||
|
||||
# Find common keys and check their relative order
|
||||
common_keys = set(golden_order) & set(target_order)
|
||||
|
||||
golden_indices = {key: idx for idx, key in enumerate(golden_order) if key in common_keys}
|
||||
target_indices = {key: idx for idx, key in enumerate(target_order) if key in common_keys}
|
||||
|
||||
order_preserved = all(
|
||||
golden_indices[key1] < golden_indices[key2]
|
||||
for key1 in common_keys for key2 in common_keys
|
||||
if golden_indices[key1] < golden_indices[key2] and target_indices[key1] < target_indices[key2]
|
||||
)
|
||||
|
||||
return {
|
||||
'order_preserved': order_preserved,
|
||||
'common_keys_count': len(common_keys),
|
||||
'golden_keys_count': len(golden_order),
|
||||
'target_keys_count': len(target_order)
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Beautify and restructure translation JSON files')
|
||||
parser.add_argument('--locales-dir', default='frontend/public/locales',
|
||||
help='Path to locales directory')
|
||||
parser.add_argument('--language', help='Restructure specific language only')
|
||||
parser.add_argument('--all-languages', action='store_true',
|
||||
help='Restructure all language files')
|
||||
parser.add_argument('--no-backup', action='store_true',
|
||||
help='Skip backup creation')
|
||||
parser.add_argument('--validate-only', action='store_true',
|
||||
help='Only validate structure, do not modify files')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
beautifier = JSONBeautifier(args.locales_dir)
|
||||
|
||||
if args.language:
|
||||
target_file = Path(args.locales_dir) / args.language / "translation.json"
|
||||
if not target_file.exists():
|
||||
print(f"Error: Translation file not found for language: {args.language}")
|
||||
sys.exit(1)
|
||||
|
||||
if args.validate_only:
|
||||
order_result = beautifier.validate_key_order(target_file)
|
||||
print(f"Key order validation for {args.language}:")
|
||||
print(f" Order preserved: {order_result['order_preserved']}")
|
||||
print(f" Common keys: {order_result['common_keys_count']}/{order_result['golden_keys_count']}")
|
||||
else:
|
||||
result = beautifier.beautify_and_restructure(target_file, backup=not args.no_backup)
|
||||
print(f"\nResults for {result['language']}:")
|
||||
print(f" Keys preserved: {result['preserved_keys']}/{result['total_reference_keys']}")
|
||||
if result['structure_match']['total_issues'] > 0:
|
||||
print(f" Structure issues: {result['structure_match']['total_issues']}")
|
||||
for issue in result['structure_match']['issues']:
|
||||
print(f" - {issue}")
|
||||
|
||||
elif args.all_languages:
|
||||
results = []
|
||||
for lang_dir in Path(args.locales_dir).iterdir():
|
||||
if lang_dir.is_dir() and lang_dir.name != "en-GB":
|
||||
translation_file = lang_dir / "translation.json"
|
||||
if translation_file.exists():
|
||||
if args.validate_only:
|
||||
order_result = beautifier.validate_key_order(translation_file)
|
||||
print(f"{lang_dir.name}: Order preserved = {order_result['order_preserved']}")
|
||||
else:
|
||||
result = beautifier.beautify_and_restructure(translation_file, backup=not args.no_backup)
|
||||
results.append(result)
|
||||
|
||||
if not args.validate_only and results:
|
||||
print(f"\n{'='*60}")
|
||||
print("RESTRUCTURING SUMMARY")
|
||||
print(f"{'='*60}")
|
||||
for result in sorted(results, key=lambda x: x['language']):
|
||||
print(f"{result['language']}: {result['preserved_keys']}/{result['total_reference_keys']} keys "
|
||||
f"({result['preserved_keys']/result['total_reference_keys']*100:.1f}%)")
|
||||
|
||||
else:
|
||||
parser.print_help()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
314
scripts/translations/translation_analyzer.py
Normal file
314
scripts/translations/translation_analyzer.py
Normal file
@ -0,0 +1,314 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Translation Analyzer for Stirling PDF Frontend
|
||||
Compares language files against en-GB golden truth file.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Set, Tuple
|
||||
import argparse
|
||||
try:
|
||||
import tomllib # Python 3.11+
|
||||
except ImportError:
|
||||
try:
|
||||
import toml as tomllib_fallback
|
||||
tomllib = None
|
||||
except ImportError:
|
||||
tomllib = None
|
||||
tomllib_fallback = None
|
||||
|
||||
|
||||
class TranslationAnalyzer:
|
||||
def __init__(self, locales_dir: str = "frontend/public/locales", ignore_file: str = "scripts/ignore_translation.toml"):
|
||||
self.locales_dir = Path(locales_dir)
|
||||
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.json"
|
||||
self.golden_truth = self._load_json(self.golden_truth_file)
|
||||
self.ignore_file = Path(ignore_file)
|
||||
self.ignore_patterns = self._load_ignore_patterns()
|
||||
|
||||
def _load_json(self, file_path: Path) -> Dict:
|
||||
"""Load JSON file with error handling."""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File not found: {file_path}")
|
||||
sys.exit(1)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error: Invalid JSON in {file_path}: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
def _load_ignore_patterns(self) -> Dict[str, Set[str]]:
|
||||
"""Load ignore patterns from TOML file."""
|
||||
if not self.ignore_file.exists():
|
||||
return {}
|
||||
|
||||
try:
|
||||
if tomllib:
|
||||
# Use Python 3.11+ built-in
|
||||
with open(self.ignore_file, 'rb') as f:
|
||||
ignore_data = tomllib.load(f)
|
||||
elif tomllib_fallback:
|
||||
# Use toml library fallback
|
||||
ignore_data = tomllib_fallback.load(self.ignore_file)
|
||||
else:
|
||||
# Simple parser as fallback
|
||||
ignore_data = self._parse_simple_toml()
|
||||
|
||||
# Convert lists to sets for faster lookup
|
||||
return {lang: set(patterns) for lang, data in ignore_data.items()
|
||||
for patterns in [data.get('ignore', [])] if patterns}
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not load ignore file {self.ignore_file}: {e}")
|
||||
return {}
|
||||
|
||||
def _parse_simple_toml(self) -> Dict:
|
||||
"""Simple TOML parser for ignore patterns (fallback)."""
|
||||
ignore_data = {}
|
||||
current_section = None
|
||||
|
||||
with open(self.ignore_file, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#'):
|
||||
continue
|
||||
|
||||
if line.startswith('[') and line.endswith(']'):
|
||||
current_section = line[1:-1]
|
||||
ignore_data[current_section] = {'ignore': []}
|
||||
elif line.startswith('ignore = [') and current_section:
|
||||
# Handle ignore array
|
||||
continue
|
||||
elif line.strip().startswith("'") and current_section:
|
||||
# Extract quoted items
|
||||
item = line.strip().strip("',")
|
||||
if item:
|
||||
ignore_data[current_section]['ignore'].append(item)
|
||||
|
||||
return ignore_data
|
||||
|
||||
def _flatten_dict(self, d: Dict, parent_key: str = '', separator: str = '.') -> Dict[str, str]:
|
||||
"""Flatten nested dictionary into dot-notation keys."""
|
||||
items = []
|
||||
for k, v in d.items():
|
||||
new_key = f"{parent_key}{separator}{k}" if parent_key else k
|
||||
if isinstance(v, dict):
|
||||
items.extend(self._flatten_dict(v, new_key, separator).items())
|
||||
else:
|
||||
items.append((new_key, str(v)))
|
||||
return dict(items)
|
||||
|
||||
def get_all_language_files(self) -> List[Path]:
|
||||
"""Get all translation.json files except en-GB."""
|
||||
files = []
|
||||
for lang_dir in self.locales_dir.iterdir():
|
||||
if lang_dir.is_dir() and lang_dir.name != "en-GB":
|
||||
translation_file = lang_dir / "translation.json"
|
||||
if translation_file.exists():
|
||||
files.append(translation_file)
|
||||
return sorted(files)
|
||||
|
||||
def find_missing_translations(self, target_file: Path) -> Set[str]:
|
||||
"""Find keys that exist in en-GB but missing in target file."""
|
||||
target_data = self._load_json(target_file)
|
||||
|
||||
golden_flat = self._flatten_dict(self.golden_truth)
|
||||
target_flat = self._flatten_dict(target_data)
|
||||
|
||||
missing = set(golden_flat.keys()) - set(target_flat.keys())
|
||||
|
||||
# Filter out ignored keys
|
||||
lang_code = target_file.parent.name.replace('-', '_')
|
||||
ignore_set = self.ignore_patterns.get(lang_code, set())
|
||||
return missing - ignore_set
|
||||
|
||||
def find_untranslated_entries(self, target_file: Path) -> Set[str]:
|
||||
"""Find entries that appear to be untranslated (identical to en-GB)."""
|
||||
target_data = self._load_json(target_file)
|
||||
|
||||
golden_flat = self._flatten_dict(self.golden_truth)
|
||||
target_flat = self._flatten_dict(target_data)
|
||||
|
||||
lang_code = target_file.parent.name.replace('-', '_')
|
||||
ignore_set = self.ignore_patterns.get(lang_code, set())
|
||||
|
||||
untranslated = set()
|
||||
for key in target_flat:
|
||||
if key in golden_flat:
|
||||
target_value = target_flat[key]
|
||||
golden_value = golden_flat[key]
|
||||
|
||||
# Check if marked as [UNTRANSLATED] or identical to en-GB
|
||||
if (isinstance(target_value, str) and target_value.startswith("[UNTRANSLATED]")) or \
|
||||
(golden_value == target_value and key not in ignore_set and not self._is_expected_identical(key, golden_value)):
|
||||
untranslated.add(key)
|
||||
|
||||
return untranslated
|
||||
|
||||
def _is_expected_identical(self, key: str, value: str) -> bool:
|
||||
"""Check if a key-value pair is expected to be identical across languages."""
|
||||
# Keys that should be identical across languages
|
||||
identical_patterns = [
|
||||
'language.direction',
|
||||
'true', 'false',
|
||||
'unknown'
|
||||
]
|
||||
|
||||
# Values that are often identical (numbers, symbols, etc.)
|
||||
if value.strip() in ['ltr', 'rtl', 'True', 'False']:
|
||||
return True
|
||||
|
||||
# Check for patterns
|
||||
for pattern in identical_patterns:
|
||||
if pattern in key.lower():
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def find_extra_translations(self, target_file: Path) -> Set[str]:
|
||||
"""Find keys that exist in target file but not in en-GB."""
|
||||
target_data = self._load_json(target_file)
|
||||
|
||||
golden_flat = self._flatten_dict(self.golden_truth)
|
||||
target_flat = self._flatten_dict(target_data)
|
||||
|
||||
return set(target_flat.keys()) - set(golden_flat.keys())
|
||||
|
||||
def analyze_file(self, target_file: Path) -> Dict:
|
||||
"""Complete analysis of a single translation file."""
|
||||
lang_code = target_file.parent.name
|
||||
|
||||
missing = self.find_missing_translations(target_file)
|
||||
untranslated = self.find_untranslated_entries(target_file)
|
||||
extra = self.find_extra_translations(target_file)
|
||||
|
||||
target_data = self._load_json(target_file)
|
||||
golden_flat = self._flatten_dict(self.golden_truth)
|
||||
target_flat = self._flatten_dict(target_data)
|
||||
|
||||
# Calculate completion rate excluding ignored keys
|
||||
lang_code = target_file.parent.name.replace('-', '_')
|
||||
ignore_set = self.ignore_patterns.get(lang_code, set())
|
||||
|
||||
relevant_keys = set(golden_flat.keys()) - ignore_set
|
||||
total_keys = len(relevant_keys)
|
||||
|
||||
# Count keys that exist and are properly translated (not [UNTRANSLATED])
|
||||
properly_translated = 0
|
||||
for key in relevant_keys:
|
||||
if key in target_flat:
|
||||
value = target_flat[key]
|
||||
if not (isinstance(value, str) and value.startswith("[UNTRANSLATED]")):
|
||||
if key not in untranslated: # Not identical to en-GB (unless expected)
|
||||
properly_translated += 1
|
||||
|
||||
completion_rate = (properly_translated / total_keys) * 100 if total_keys > 0 else 0
|
||||
|
||||
return {
|
||||
'language': lang_code,
|
||||
'file': target_file,
|
||||
'missing_count': len(missing),
|
||||
'missing_keys': sorted(missing),
|
||||
'untranslated_count': len(untranslated),
|
||||
'untranslated_keys': sorted(untranslated),
|
||||
'extra_count': len(extra),
|
||||
'extra_keys': sorted(extra),
|
||||
'total_keys': total_keys,
|
||||
'completion_rate': completion_rate
|
||||
}
|
||||
|
||||
def analyze_all_files(self) -> List[Dict]:
|
||||
"""Analyze all translation files."""
|
||||
results = []
|
||||
for file_path in self.get_all_language_files():
|
||||
results.append(self.analyze_file(file_path))
|
||||
return sorted(results, key=lambda x: x['language'])
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Analyze translation files against en-GB golden truth')
|
||||
parser.add_argument('--locales-dir', default='frontend/public/locales',
|
||||
help='Path to locales directory')
|
||||
parser.add_argument('--ignore-file', default='scripts/ignore_translation.toml',
|
||||
help='Path to ignore patterns TOML file')
|
||||
parser.add_argument('--language', help='Analyze specific language only')
|
||||
parser.add_argument('--missing-only', action='store_true',
|
||||
help='Show only missing translations')
|
||||
parser.add_argument('--untranslated-only', action='store_true',
|
||||
help='Show only untranslated entries')
|
||||
parser.add_argument('--summary', action='store_true',
|
||||
help='Show summary statistics only')
|
||||
parser.add_argument('--format', choices=['text', 'json'], default='text',
|
||||
help='Output format')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
analyzer = TranslationAnalyzer(args.locales_dir, args.ignore_file)
|
||||
|
||||
if args.language:
|
||||
target_file = Path(args.locales_dir) / args.language / "translation.json"
|
||||
if not target_file.exists():
|
||||
print(f"Error: Translation file not found for language: {args.language}")
|
||||
sys.exit(1)
|
||||
results = [analyzer.analyze_file(target_file)]
|
||||
else:
|
||||
results = analyzer.analyze_all_files()
|
||||
|
||||
if args.format == 'json':
|
||||
print(json.dumps(results, indent=2, default=str))
|
||||
return
|
||||
|
||||
# Text format output
|
||||
for result in results:
|
||||
lang = result['language']
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Language: {lang}")
|
||||
print(f"File: {result['file']}")
|
||||
print(f"Completion Rate: {result['completion_rate']:.1f}%")
|
||||
print(f"Total Keys in en-GB: {result['total_keys']}")
|
||||
|
||||
if not args.summary:
|
||||
if not args.untranslated_only:
|
||||
print(f"\nMissing Translations ({result['missing_count']}):")
|
||||
for key in result['missing_keys'][:10]: # Show first 10
|
||||
print(f" - {key}")
|
||||
if len(result['missing_keys']) > 10:
|
||||
print(f" ... and {len(result['missing_keys']) - 10} more")
|
||||
|
||||
if not args.missing_only:
|
||||
print(f"\nUntranslated Entries ({result['untranslated_count']}):")
|
||||
for key in result['untranslated_keys'][:10]: # Show first 10
|
||||
print(f" - {key}")
|
||||
if len(result['untranslated_keys']) > 10:
|
||||
print(f" ... and {len(result['untranslated_keys']) - 10} more")
|
||||
|
||||
if result['extra_count'] > 0:
|
||||
print(f"\nExtra Keys Not in en-GB ({result['extra_count']}):")
|
||||
for key in result['extra_keys'][:5]:
|
||||
print(f" - {key}")
|
||||
if len(result['extra_keys']) > 5:
|
||||
print(f" ... and {len(result['extra_keys']) - 5} more")
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print("SUMMARY")
|
||||
print(f"{'='*60}")
|
||||
avg_completion = sum(r['completion_rate'] for r in results) / len(results) if results else 0
|
||||
print(f"Average Completion Rate: {avg_completion:.1f}%")
|
||||
print(f"Languages Analyzed: {len(results)}")
|
||||
|
||||
# Top languages by completion
|
||||
sorted_by_completion = sorted(results, key=lambda x: x['completion_rate'], reverse=True)
|
||||
print(f"\nTop 5 Most Complete Languages:")
|
||||
for result in sorted_by_completion[:5]:
|
||||
print(f" {result['language']}: {result['completion_rate']:.1f}%")
|
||||
|
||||
print(f"\nBottom 5 Languages Needing Attention:")
|
||||
for result in sorted_by_completion[-5:]:
|
||||
print(f" {result['language']}: {result['completion_rate']:.1f}% ({result['missing_count']} missing, {result['untranslated_count']} untranslated)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
371
scripts/translations/translation_merger.py
Normal file
371
scripts/translations/translation_merger.py
Normal file
@ -0,0 +1,371 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Translation Merger for Stirling PDF Frontend
|
||||
Merges missing translations from en-GB into target language files.
|
||||
Useful for AI-assisted translation workflows.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Set, Tuple, Any
|
||||
import argparse
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
|
||||
try:
|
||||
import tomllib # Python 3.11+
|
||||
except ImportError:
|
||||
try:
|
||||
import toml as tomllib_fallback
|
||||
tomllib = None
|
||||
except ImportError:
|
||||
tomllib = None
|
||||
tomllib_fallback = None
|
||||
|
||||
|
||||
class TranslationMerger:
|
||||
def __init__(self, locales_dir: str = "frontend/public/locales", ignore_file: str = "scripts/ignore_translation.toml"):
|
||||
self.locales_dir = Path(locales_dir)
|
||||
self.golden_truth_file = self.locales_dir / "en-GB" / "translation.json"
|
||||
self.golden_truth = self._load_json(self.golden_truth_file)
|
||||
self.ignore_file = Path(ignore_file)
|
||||
self.ignore_patterns = self._load_ignore_patterns()
|
||||
|
||||
def _load_json(self, file_path: Path) -> Dict:
|
||||
"""Load JSON file with error handling."""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File not found: {file_path}")
|
||||
sys.exit(1)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error: Invalid JSON in {file_path}: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
def _save_json(self, data: Dict, file_path: Path, backup: bool = True) -> None:
|
||||
"""Save JSON file with backup option."""
|
||||
if backup and file_path.exists():
|
||||
backup_path = file_path.with_suffix(f'.backup.{datetime.now().strftime("%Y%m%d_%H%M%S")}.json')
|
||||
shutil.copy2(file_path, backup_path)
|
||||
print(f"Backup created: {backup_path}")
|
||||
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
def _load_ignore_patterns(self) -> Dict[str, Set[str]]:
|
||||
"""Load ignore patterns from TOML file."""
|
||||
if not self.ignore_file.exists():
|
||||
return {}
|
||||
|
||||
try:
|
||||
# Simple parser for ignore patterns
|
||||
ignore_data = {}
|
||||
current_section = None
|
||||
|
||||
with open(self.ignore_file, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#'):
|
||||
continue
|
||||
|
||||
if line.startswith('[') and line.endswith(']'):
|
||||
current_section = line[1:-1]
|
||||
ignore_data[current_section] = set()
|
||||
elif line.strip().startswith("'") and current_section:
|
||||
# Extract quoted items
|
||||
item = line.strip().strip("',")
|
||||
if item:
|
||||
ignore_data[current_section].add(item)
|
||||
|
||||
return ignore_data
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not load ignore file {self.ignore_file}: {e}")
|
||||
return {}
|
||||
|
||||
def _get_nested_value(self, data: Dict, key_path: str) -> Any:
|
||||
"""Get value from nested dict using dot notation."""
|
||||
keys = key_path.split('.')
|
||||
current = data
|
||||
for key in keys:
|
||||
if isinstance(current, dict) and key in current:
|
||||
current = current[key]
|
||||
else:
|
||||
return None
|
||||
return current
|
||||
|
||||
def _set_nested_value(self, data: Dict, key_path: str, value: Any) -> None:
|
||||
"""Set value in nested dict using dot notation."""
|
||||
keys = key_path.split('.')
|
||||
current = data
|
||||
for key in keys[:-1]:
|
||||
if key not in current:
|
||||
current[key] = {}
|
||||
elif not isinstance(current[key], dict):
|
||||
# If the current value is not a dict, we can't nest into it
|
||||
# This handles cases where a key exists as a string but we need to make it a dict
|
||||
print(f"Warning: Converting non-dict value at '{key}' to dict to allow nesting")
|
||||
current[key] = {}
|
||||
current = current[key]
|
||||
current[keys[-1]] = value
|
||||
|
||||
def _flatten_dict(self, d: Dict, parent_key: str = '', separator: str = '.') -> Dict[str, Any]:
|
||||
"""Flatten nested dictionary into dot-notation keys."""
|
||||
items = []
|
||||
for k, v in d.items():
|
||||
new_key = f"{parent_key}{separator}{k}" if parent_key else k
|
||||
if isinstance(v, dict):
|
||||
items.extend(self._flatten_dict(v, new_key, separator).items())
|
||||
else:
|
||||
items.append((new_key, v))
|
||||
return dict(items)
|
||||
|
||||
def get_missing_keys(self, target_file: Path) -> List[str]:
|
||||
"""Get list of missing keys in target file."""
|
||||
lang_code = target_file.parent.name.replace('-', '_')
|
||||
ignore_set = self.ignore_patterns.get(lang_code, set())
|
||||
|
||||
if not target_file.exists():
|
||||
golden_keys = set(self._flatten_dict(self.golden_truth).keys())
|
||||
return sorted(golden_keys - ignore_set)
|
||||
|
||||
target_data = self._load_json(target_file)
|
||||
golden_flat = self._flatten_dict(self.golden_truth)
|
||||
target_flat = self._flatten_dict(target_data)
|
||||
|
||||
missing = set(golden_flat.keys()) - set(target_flat.keys())
|
||||
return sorted(missing - ignore_set)
|
||||
|
||||
def add_missing_translations(self, target_file: Path, keys_to_add: List[str] = None,
|
||||
mark_untranslated: bool = True) -> Dict:
|
||||
"""Add missing translations from en-GB to target file."""
|
||||
if not target_file.exists():
|
||||
target_data = {}
|
||||
else:
|
||||
target_data = self._load_json(target_file)
|
||||
|
||||
golden_flat = self._flatten_dict(self.golden_truth)
|
||||
missing_keys = keys_to_add or self.get_missing_keys(target_file)
|
||||
|
||||
added_count = 0
|
||||
for key in missing_keys:
|
||||
if key in golden_flat:
|
||||
value = golden_flat[key]
|
||||
if mark_untranslated and isinstance(value, str):
|
||||
# Mark as untranslated for AI to translate later
|
||||
value = f"[UNTRANSLATED] {value}"
|
||||
|
||||
self._set_nested_value(target_data, key, value)
|
||||
added_count += 1
|
||||
|
||||
return {
|
||||
'added_count': added_count,
|
||||
'missing_keys': missing_keys,
|
||||
'data': target_data
|
||||
}
|
||||
|
||||
def extract_untranslated_entries(self, target_file: Path, output_file: Path = None) -> Dict:
|
||||
"""Extract entries marked as untranslated or identical to en-GB for AI translation."""
|
||||
if not target_file.exists():
|
||||
print(f"Error: Target file does not exist: {target_file}")
|
||||
return {}
|
||||
|
||||
target_data = self._load_json(target_file)
|
||||
golden_flat = self._flatten_dict(self.golden_truth)
|
||||
target_flat = self._flatten_dict(target_data)
|
||||
|
||||
untranslated_entries = {}
|
||||
|
||||
for key, value in target_flat.items():
|
||||
if key in golden_flat:
|
||||
golden_value = golden_flat[key]
|
||||
|
||||
# Check if marked as untranslated
|
||||
if isinstance(value, str) and value.startswith("[UNTRANSLATED]"):
|
||||
untranslated_entries[key] = {
|
||||
'original': golden_value,
|
||||
'current': value,
|
||||
'reason': 'marked_untranslated'
|
||||
}
|
||||
# Check if identical to golden (and should be translated)
|
||||
elif value == golden_value and not self._is_expected_identical(key, value):
|
||||
untranslated_entries[key] = {
|
||||
'original': golden_value,
|
||||
'current': value,
|
||||
'reason': 'identical_to_english'
|
||||
}
|
||||
|
||||
if output_file:
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(untranslated_entries, f, indent=2, ensure_ascii=False)
|
||||
|
||||
return untranslated_entries
|
||||
|
||||
def _is_expected_identical(self, key: str, value: str) -> bool:
|
||||
"""Check if a key-value pair is expected to be identical across languages."""
|
||||
identical_patterns = [
|
||||
'language.direction',
|
||||
]
|
||||
|
||||
if str(value).strip() in ['ltr', 'rtl', 'True', 'False', 'true', 'false']:
|
||||
return True
|
||||
|
||||
for pattern in identical_patterns:
|
||||
if pattern in key.lower():
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def apply_translations(self, target_file: Path, translations: Dict[str, str],
|
||||
backup: bool = True) -> Dict:
|
||||
"""Apply provided translations to target file."""
|
||||
if not target_file.exists():
|
||||
print(f"Error: Target file does not exist: {target_file}")
|
||||
return {'success': False, 'error': 'File not found'}
|
||||
|
||||
target_data = self._load_json(target_file)
|
||||
applied_count = 0
|
||||
errors = []
|
||||
|
||||
for key, translation in translations.items():
|
||||
try:
|
||||
# Remove [UNTRANSLATED] marker if present
|
||||
if translation.startswith("[UNTRANSLATED]"):
|
||||
translation = translation.replace("[UNTRANSLATED]", "").strip()
|
||||
|
||||
self._set_nested_value(target_data, key, translation)
|
||||
applied_count += 1
|
||||
except Exception as e:
|
||||
errors.append(f"Error setting {key}: {e}")
|
||||
|
||||
if applied_count > 0:
|
||||
self._save_json(target_data, target_file, backup)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'applied_count': applied_count,
|
||||
'errors': errors,
|
||||
'data': target_data
|
||||
}
|
||||
|
||||
def create_translation_template(self, target_file: Path, output_file: Path) -> None:
|
||||
"""Create a template file for AI translation with context."""
|
||||
untranslated = self.extract_untranslated_entries(target_file)
|
||||
|
||||
template = {
|
||||
'metadata': {
|
||||
'source_language': 'en-GB',
|
||||
'target_language': target_file.parent.name,
|
||||
'total_entries': len(untranslated),
|
||||
'created_at': datetime.now().isoformat(),
|
||||
'instructions': 'Translate the "original" values to the target language. Keep the same keys.'
|
||||
},
|
||||
'translations': {}
|
||||
}
|
||||
|
||||
for key, entry in untranslated.items():
|
||||
template['translations'][key] = {
|
||||
'original': entry['original'],
|
||||
'translated': '', # AI should fill this
|
||||
'context': self._get_context_for_key(key),
|
||||
'reason': entry['reason']
|
||||
}
|
||||
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(template, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"Translation template created: {output_file}")
|
||||
print(f"Contains {len(untranslated)} entries to translate")
|
||||
|
||||
def _get_context_for_key(self, key: str) -> str:
|
||||
"""Get context information for a translation key."""
|
||||
parts = key.split('.')
|
||||
if len(parts) >= 2:
|
||||
return f"Section: {parts[0]}, Property: {parts[-1]}"
|
||||
return f"Property: {parts[-1]}"
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Merge and manage translation files')
|
||||
parser.add_argument('--locales-dir', default='frontend/public/locales',
|
||||
help='Path to locales directory')
|
||||
parser.add_argument('--ignore-file', default='scripts/ignore_translation.toml',
|
||||
help='Path to ignore patterns TOML file')
|
||||
parser.add_argument('language', help='Target language code (e.g., fr-FR)')
|
||||
|
||||
subparsers = parser.add_subparsers(dest='command', help='Available commands')
|
||||
|
||||
# Add missing command
|
||||
add_parser = subparsers.add_parser('add-missing', help='Add missing translations from en-GB')
|
||||
add_parser.add_argument('--no-backup', action='store_true', help='Skip backup creation')
|
||||
add_parser.add_argument('--mark-untranslated', action='store_true', default=True,
|
||||
help='Mark added translations as [UNTRANSLATED]')
|
||||
|
||||
# Extract untranslated command
|
||||
extract_parser = subparsers.add_parser('extract-untranslated', help='Extract untranslated entries')
|
||||
extract_parser.add_argument('--output', help='Output file path')
|
||||
|
||||
# Create template command
|
||||
template_parser = subparsers.add_parser('create-template', help='Create AI translation template')
|
||||
template_parser.add_argument('--output', required=True, help='Output template file path')
|
||||
|
||||
# Apply translations command
|
||||
apply_parser = subparsers.add_parser('apply-translations', help='Apply translations from JSON file')
|
||||
apply_parser.add_argument('--translations-file', required=True, help='JSON file with translations')
|
||||
apply_parser.add_argument('--no-backup', action='store_true', help='Skip backup creation')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.command:
|
||||
parser.print_help()
|
||||
return
|
||||
|
||||
merger = TranslationMerger(args.locales_dir, args.ignore_file)
|
||||
target_file = Path(args.locales_dir) / args.language / "translation.json"
|
||||
|
||||
if args.command == 'add-missing':
|
||||
print(f"Adding missing translations to {args.language}...")
|
||||
result = merger.add_missing_translations(
|
||||
target_file,
|
||||
mark_untranslated=args.mark_untranslated
|
||||
)
|
||||
|
||||
merger._save_json(result['data'], target_file, backup=not args.no_backup)
|
||||
print(f"Added {result['added_count']} missing translations")
|
||||
|
||||
elif args.command == 'extract-untranslated':
|
||||
output_file = Path(args.output) if args.output else target_file.with_suffix('.untranslated.json')
|
||||
untranslated = merger.extract_untranslated_entries(target_file, output_file)
|
||||
print(f"Extracted {len(untranslated)} untranslated entries to {output_file}")
|
||||
|
||||
elif args.command == 'create-template':
|
||||
output_file = Path(args.output)
|
||||
merger.create_translation_template(target_file, output_file)
|
||||
|
||||
elif args.command == 'apply-translations':
|
||||
with open(args.translations_file, 'r', encoding='utf-8') as f:
|
||||
translations_data = json.load(f)
|
||||
|
||||
# Extract translations from template format or simple dict
|
||||
if 'translations' in translations_data:
|
||||
translations = {k: v['translated'] for k, v in translations_data['translations'].items()
|
||||
if v.get('translated')}
|
||||
else:
|
||||
translations = translations_data
|
||||
|
||||
result = merger.apply_translations(target_file, translations, backup=not args.no_backup)
|
||||
|
||||
if result['success']:
|
||||
print(f"Applied {result['applied_count']} translations")
|
||||
if result['errors']:
|
||||
print(f"Errors: {len(result['errors'])}")
|
||||
for error in result['errors'][:5]:
|
||||
print(f" - {error}")
|
||||
else:
|
||||
print(f"Failed: {result.get('error', 'Unknown error')}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue
Block a user