2024-08-31 15:54:11 +02:00
"""
Author : Ludy87
Description : This script processes . properties files for localization checks . It compares translation files in a branch with
a reference file to ensure consistency . The script performs two main checks :
1. Verifies that the number of lines ( including comments and empty lines ) in the translation files matches the reference file .
2. Ensures that all keys in the translation files are present in the reference file and vice versa .
The script also provides functionality to update the translation files to match the reference file by adding missing keys and
adjusting the format .
Usage :
python script_name . py - - reference - file < path_to_reference_file > - - branch < branch_name > [ - - files < list_of_changed_files > ]
"""
2024-11-21 12:31:32 +01:00
2024-08-31 15:54:11 +02:00
import copy
import glob
2024-08-25 23:04:28 +02:00
import os
import argparse
2024-08-31 15:54:11 +02:00
import re
def parse_properties_file ( file_path ) :
""" Parses a .properties file and returns a list of objects (including comments, empty lines, and line numbers). """
properties_list = [ ]
with open ( file_path , " r " , encoding = " utf-8 " ) as file :
for line_number , line in enumerate ( file , start = 1 ) :
stripped_line = line . strip ( )
# Empty lines
if not stripped_line :
properties_list . append (
{ " line_number " : line_number , " type " : " empty " , " content " : " " }
)
continue
# Comments
if stripped_line . startswith ( " # " ) :
properties_list . append (
{
" line_number " : line_number ,
" type " : " comment " ,
" content " : stripped_line ,
}
)
continue
# Key-value pairs
match = re . match ( r " ^([^=]+)=(.*)$ " , line )
if match :
key , value = match . groups ( )
properties_list . append (
{
" line_number " : line_number ,
" type " : " entry " ,
" key " : key . strip ( ) ,
" value " : value . strip ( ) ,
}
)
return properties_list
def write_json_file ( file_path , updated_properties ) :
updated_lines = { entry [ " line_number " ] : entry for entry in updated_properties }
# Sort by line numbers and retain comments and empty lines
all_lines = sorted ( set ( updated_lines . keys ( ) ) )
original_format = [ ]
for line in all_lines :
if line in updated_lines :
entry = updated_lines [ line ]
else :
entry = None
ref_entry = updated_lines [ line ]
if ref_entry [ " type " ] in [ " comment " , " empty " ] :
original_format . append ( ref_entry )
elif entry is None :
# Add missing entries from the reference file
original_format . append ( ref_entry )
elif entry [ " type " ] == " entry " :
# Replace entries with those from the current JSON
original_format . append ( entry )
# Write back in the original format
with open ( file_path , " w " , encoding = " utf-8 " ) as file :
for entry in original_format :
if entry [ " type " ] == " comment " :
file . write ( f " { entry [ ' content ' ] } \n " )
elif entry [ " type " ] == " empty " :
file . write ( f " { entry [ ' content ' ] } \n " )
elif entry [ " type " ] == " entry " :
file . write ( f " { entry [ ' key ' ] } = { entry [ ' value ' ] } \n " )
def update_missing_keys ( reference_file , file_list , branch = " " ) :
reference_properties = parse_properties_file ( reference_file )
for file_path in file_list :
basename_current_file = os . path . basename ( branch + file_path )
if (
basename_current_file == os . path . basename ( reference_file )
or not file_path . endswith ( " .properties " )
or not basename_current_file . startswith ( " messages_ " )
) :
continue
current_properties = parse_properties_file ( branch + file_path )
updated_properties = [ ]
for ref_entry in reference_properties :
ref_entry_copy = copy . deepcopy ( ref_entry )
for current_entry in current_properties :
if current_entry [ " type " ] == " entry " :
if ref_entry_copy [ " type " ] != " entry " :
continue
if ref_entry_copy [ " key " ] == current_entry [ " key " ] :
ref_entry_copy [ " value " ] = current_entry [ " value " ]
updated_properties . append ( ref_entry_copy )
write_json_file ( branch + file_path , updated_properties )
def check_for_missing_keys ( reference_file , file_list , branch ) :
update_missing_keys ( reference_file , file_list , branch + " / " )
2024-08-25 23:04:28 +02:00
def read_properties ( file_path ) :
with open ( file_path , " r " , encoding = " utf-8 " ) as file :
return file . read ( ) . splitlines ( )
2024-11-21 12:31:32 +01:00
def check_for_differences ( reference_file , file_list , branch , actor ) :
2024-08-25 23:04:28 +02:00
reference_branch = reference_file . split ( " / " ) [ 0 ]
basename_reference_file = os . path . basename ( reference_file )
report = [ ]
report . append (
2024-08-31 15:54:11 +02:00
f " ### 📋 Checking with the file ` { basename_reference_file } ` from the ` { reference_branch } ` - Checking the ` { branch } ` "
2024-08-25 23:04:28 +02:00
)
2024-08-31 15:54:11 +02:00
reference_lines = read_properties ( reference_file )
has_differences = False
only_reference_file = True
2024-08-25 23:04:28 +02:00
for file_path in file_list :
basename_current_file = os . path . basename ( branch + " / " + file_path )
if (
2024-08-31 15:54:11 +02:00
basename_current_file == basename_reference_file
2024-08-25 23:04:28 +02:00
or not file_path . endswith ( " .properties " )
or not basename_current_file . startswith ( " messages_ " )
) :
continue
2024-08-31 15:54:11 +02:00
only_reference_file = False
report . append ( f " #### 🗂️ **Checking File:** ` { basename_current_file } `... " )
current_lines = read_properties ( branch + " / " + file_path )
reference_line_count = len ( reference_lines )
current_line_count = len ( current_lines )
2024-08-25 23:04:28 +02:00
2024-08-31 15:54:11 +02:00
if reference_line_count != current_line_count :
2024-08-25 23:04:28 +02:00
report . append ( " " )
2024-08-31 15:54:11 +02:00
report . append ( " - **Test 1 Status:** ❌ Failed " )
has_differences = True
if reference_line_count > current_line_count :
2024-08-25 23:04:28 +02:00
report . append (
2024-08-31 15:54:11 +02:00
f " - **Issue:** Missing lines! Comments, empty lines, or translation strings are missing. Details: { reference_line_count } (reference) vs { current_line_count } (current). "
2024-08-25 23:04:28 +02:00
)
2024-08-31 15:54:11 +02:00
elif reference_line_count < current_line_count :
2024-08-25 23:04:28 +02:00
report . append (
2024-08-31 15:54:11 +02:00
f " - **Issue:** Too many lines! Check your translation files! Details: { reference_line_count } (reference) vs { current_line_count } (current). "
2024-08-25 23:04:28 +02:00
)
else :
2024-08-31 15:54:11 +02:00
report . append ( " - **Test 1 Status:** ✅ Passed " )
# Check for missing or extra keys
current_keys = [ ]
reference_keys = [ ]
for line in current_lines :
if not line . startswith ( " # " ) and line != " " and " = " in line :
key , _ = line . split ( " = " , 1 )
current_keys . append ( key )
for line in reference_lines :
if not line . startswith ( " # " ) and line != " " and " = " in line :
key , _ = line . split ( " = " , 1 )
reference_keys . append ( key )
current_keys_set = set ( current_keys )
reference_keys_set = set ( reference_keys )
missing_keys = current_keys_set . difference ( reference_keys_set )
extra_keys = reference_keys_set . difference ( current_keys_set )
missing_keys_list = list ( missing_keys )
extra_keys_list = list ( extra_keys )
2024-08-25 23:04:28 +02:00
2024-08-31 15:54:11 +02:00
if missing_keys_list or extra_keys_list :
has_differences = True
missing_keys_str = " `, ` " . join ( missing_keys_list )
extra_keys_str = " `, ` " . join ( extra_keys_list )
report . append ( " - **Test 2 Status:** ❌ Failed " )
if missing_keys_list :
report . append (
f " - **Issue:** There are keys in *** { basename_current_file } *** ` { missing_keys_str } ` that are not present in *** { basename_reference_file } ***! "
)
if extra_keys_list :
report . append (
f " - **Issue:** There are keys in *** { basename_reference_file } *** ` { extra_keys_str } ` that are not present in *** { basename_current_file } ***! "
)
else :
report . append ( " - **Test 2 Status:** ✅ Passed " )
2024-08-31 22:48:40 +02:00
report . append ( " " )
2024-08-31 15:54:11 +02:00
report . append ( " --- " )
report . append ( " " )
if has_differences :
report . append ( " ## ❌ Overall Check Status: **_Failed_** " )
2024-11-21 12:31:32 +01:00
report . append ( " " )
report . append (
f " @ { actor } please check your translation if it conforms to the standard. Follow the format of [messages_en_GB.properties](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/src/main/resources/messages_en_GB.properties) "
)
2024-08-25 23:04:28 +02:00
else :
2024-08-31 15:54:11 +02:00
report . append ( " ## ✅ Overall Check Status: **_Success_** " )
2024-11-21 12:31:32 +01:00
report . append ( " " )
report . append (
f " Thanks @ { actor } for your help in keeping the translations up to date. "
)
2024-08-31 15:54:11 +02:00
if not only_reference_file :
print ( " \n " . join ( report ) )
2024-08-25 23:04:28 +02:00
if __name__ == " __main__ " :
parser = argparse . ArgumentParser ( description = " Find missing keys " )
2024-11-21 12:31:32 +01:00
parser . add_argument (
" --actor " ,
required = False ,
help = " Actor from PR. " ,
)
2024-08-25 23:04:28 +02:00
parser . add_argument (
" --reference-file " ,
required = True ,
help = " Path to the reference file. " ,
)
parser . add_argument (
" --branch " ,
type = str ,
required = True ,
help = " Branch name. " ,
)
parser . add_argument (
" --files " ,
nargs = " + " ,
2024-08-31 15:54:11 +02:00
required = False ,
2024-08-25 23:04:28 +02:00
help = " List of changed files, separated by spaces. " ,
)
args = parser . parse_args ( )
file_list = args . files
2024-08-31 15:54:11 +02:00
if file_list is None :
file_list = glob . glob (
os . getcwd ( ) + " /src/**/messages_*.properties " , recursive = True
)
update_missing_keys ( args . reference_file , file_list )
else :
2024-11-21 12:31:32 +01:00
check_for_differences ( args . reference_file , file_list , args . branch , args . actor )