2024-08-31 15:54:11 +02:00
"""
Author : Ludy87
Description : This script processes . properties files for localization checks . It compares translation files in a branch with
a reference file to ensure consistency . The script performs two main checks :
1. Verifies that the number of lines ( including comments and empty lines ) in the translation files matches the reference file .
2. Ensures that all keys in the translation files are present in the reference file and vice versa .
The script also provides functionality to update the translation files to match the reference file by adding missing keys and
adjusting the format .
Usage :
2024-11-23 12:49:49 +01:00
python check_language_properties . py - - reference - file < path_to_reference_file > - - branch < branch_name > [ - - actor < actor_name > ] [ - - files < list_of_changed_files > ]
2024-08-31 15:54:11 +02:00
"""
2024-11-21 12:31:32 +01:00
2024-08-31 15:54:11 +02:00
import copy
import glob
2024-08-25 23:04:28 +02:00
import os
import argparse
2024-08-31 15:54:11 +02:00
import re
2024-11-23 12:49:49 +01:00
# Maximum size for properties files (e.g., 200 KB)
MAX_FILE_SIZE = 200 * 1024
2024-08-31 15:54:11 +02:00
def parse_properties_file ( file_path ) :
""" Parses a .properties file and returns a list of objects (including comments, empty lines, and line numbers). """
properties_list = [ ]
with open ( file_path , " r " , encoding = " utf-8 " ) as file :
for line_number , line in enumerate ( file , start = 1 ) :
stripped_line = line . strip ( )
# Empty lines
if not stripped_line :
properties_list . append (
{ " line_number " : line_number , " type " : " empty " , " content " : " " }
)
continue
# Comments
if stripped_line . startswith ( " # " ) :
properties_list . append (
{
" line_number " : line_number ,
" type " : " comment " ,
" content " : stripped_line ,
}
)
continue
# Key-value pairs
match = re . match ( r " ^([^=]+)=(.*)$ " , line )
if match :
key , value = match . groups ( )
properties_list . append (
{
" line_number " : line_number ,
" type " : " entry " ,
" key " : key . strip ( ) ,
" value " : value . strip ( ) ,
}
)
return properties_list
def write_json_file ( file_path , updated_properties ) :
updated_lines = { entry [ " line_number " ] : entry for entry in updated_properties }
# Sort by line numbers and retain comments and empty lines
all_lines = sorted ( set ( updated_lines . keys ( ) ) )
original_format = [ ]
for line in all_lines :
if line in updated_lines :
entry = updated_lines [ line ]
else :
entry = None
ref_entry = updated_lines [ line ]
if ref_entry [ " type " ] in [ " comment " , " empty " ] :
original_format . append ( ref_entry )
elif entry is None :
# Add missing entries from the reference file
original_format . append ( ref_entry )
elif entry [ " type " ] == " entry " :
# Replace entries with those from the current JSON
original_format . append ( entry )
# Write back in the original format
with open ( file_path , " w " , encoding = " utf-8 " ) as file :
for entry in original_format :
if entry [ " type " ] == " comment " :
file . write ( f " { entry [ ' content ' ] } \n " )
elif entry [ " type " ] == " empty " :
file . write ( f " { entry [ ' content ' ] } \n " )
elif entry [ " type " ] == " entry " :
file . write ( f " { entry [ ' key ' ] } = { entry [ ' value ' ] } \n " )
def update_missing_keys ( reference_file , file_list , branch = " " ) :
reference_properties = parse_properties_file ( reference_file )
for file_path in file_list :
2024-11-23 12:49:49 +01:00
basename_current_file = os . path . basename ( os . path . join ( branch , file_path ) )
2024-08-31 15:54:11 +02:00
if (
basename_current_file == os . path . basename ( reference_file )
or not file_path . endswith ( " .properties " )
or not basename_current_file . startswith ( " messages_ " )
) :
continue
2024-11-23 12:49:49 +01:00
current_properties = parse_properties_file ( os . path . join ( branch , file_path ) )
2024-08-31 15:54:11 +02:00
updated_properties = [ ]
for ref_entry in reference_properties :
ref_entry_copy = copy . deepcopy ( ref_entry )
for current_entry in current_properties :
if current_entry [ " type " ] == " entry " :
if ref_entry_copy [ " type " ] != " entry " :
continue
if ref_entry_copy [ " key " ] == current_entry [ " key " ] :
ref_entry_copy [ " value " ] = current_entry [ " value " ]
updated_properties . append ( ref_entry_copy )
2024-11-23 12:49:49 +01:00
write_json_file ( os . path . join ( branch , file_path ) , updated_properties )
2024-08-31 15:54:11 +02:00
def check_for_missing_keys ( reference_file , file_list , branch ) :
2024-11-23 12:49:49 +01:00
update_missing_keys ( reference_file , file_list , branch )
2024-08-25 23:04:28 +02:00
def read_properties ( file_path ) :
2024-11-23 12:49:49 +01:00
if os . path . isfile ( file_path ) and os . path . exists ( file_path ) :
2024-11-21 22:18:41 +01:00
with open ( file_path , " r " , encoding = " utf-8 " ) as file :
return file . read ( ) . splitlines ( )
return [ " " ]
2024-08-25 23:04:28 +02:00
2024-11-21 12:31:32 +01:00
def check_for_differences ( reference_file , file_list , branch , actor ) :
2024-08-25 23:04:28 +02:00
reference_branch = reference_file . split ( " / " ) [ 0 ]
basename_reference_file = os . path . basename ( reference_file )
report = [ ]
2024-11-23 23:09:46 +01:00
report . append ( f " #### 🔄 Reference Branch: ` { reference_branch } ` " )
2024-08-31 15:54:11 +02:00
reference_lines = read_properties ( reference_file )
has_differences = False
only_reference_file = True
2024-08-25 23:04:28 +02:00
2024-11-23 12:49:49 +01:00
file_arr = file_list
if len ( file_list ) == 1 :
file_arr = file_list [ 0 ] . split ( )
base_dir = os . path . abspath ( os . path . join ( os . getcwd ( ) , " src " , " main " , " resources " ) )
for file_path in file_arr :
absolute_path = os . path . abspath ( file_path )
# Verify that file is within the expected directory
if not absolute_path . startswith ( base_dir ) :
raise ValueError ( f " Unsafe file found: { file_path } " )
# Verify file size before processing
if os . path . getsize ( os . path . join ( branch , file_path ) ) > MAX_FILE_SIZE :
raise ValueError (
f " The file { file_path } is too large and could pose a security risk. "
)
basename_current_file = os . path . basename ( os . path . join ( branch , file_path ) )
2024-08-25 23:04:28 +02:00
if (
2024-08-31 15:54:11 +02:00
basename_current_file == basename_reference_file
2024-11-23 12:49:49 +01:00
or not file_path . startswith (
os . path . join ( " src " , " main " , " resources " , " messages_ " )
)
2024-08-25 23:04:28 +02:00
or not file_path . endswith ( " .properties " )
or not basename_current_file . startswith ( " messages_ " )
) :
continue
2024-08-31 15:54:11 +02:00
only_reference_file = False
2024-11-23 23:09:46 +01:00
report . append ( f " #### 📃 **File Check:** ` { basename_current_file } ` " )
2024-11-23 12:49:49 +01:00
current_lines = read_properties ( os . path . join ( branch , file_path ) )
2024-08-31 15:54:11 +02:00
reference_line_count = len ( reference_lines )
current_line_count = len ( current_lines )
2024-08-25 23:04:28 +02:00
2024-08-31 15:54:11 +02:00
if reference_line_count != current_line_count :
2024-08-25 23:04:28 +02:00
report . append ( " " )
2024-11-23 23:09:46 +01:00
report . append ( " 1. **Test Status:** ❌ **_Failed_** " )
report . append ( " - **Issue:** " )
2024-08-31 15:54:11 +02:00
has_differences = True
if reference_line_count > current_line_count :
2024-08-25 23:04:28 +02:00
report . append (
2024-11-23 23:09:46 +01:00
f " - **_Mismatched line count_**: { reference_line_count } (reference) vs { current_line_count } (current). Comments, empty lines, or translation strings are missing. "
2024-08-25 23:04:28 +02:00
)
2024-08-31 15:54:11 +02:00
elif reference_line_count < current_line_count :
2024-08-25 23:04:28 +02:00
report . append (
2024-11-23 23:09:46 +01:00
f " - **_Too many lines_**: { reference_line_count } (reference) vs { current_line_count } (current). Please verify if there is an additional line that needs to be removed. "
2024-08-25 23:04:28 +02:00
)
else :
2024-11-23 23:09:46 +01:00
report . append ( " 1. **Test Status:** ✅ **_Passed_** " )
2024-08-31 15:54:11 +02:00
# Check for missing or extra keys
current_keys = [ ]
reference_keys = [ ]
for line in current_lines :
if not line . startswith ( " # " ) and line != " " and " = " in line :
key , _ = line . split ( " = " , 1 )
current_keys . append ( key )
for line in reference_lines :
if not line . startswith ( " # " ) and line != " " and " = " in line :
key , _ = line . split ( " = " , 1 )
reference_keys . append ( key )
current_keys_set = set ( current_keys )
reference_keys_set = set ( reference_keys )
missing_keys = current_keys_set . difference ( reference_keys_set )
extra_keys = reference_keys_set . difference ( current_keys_set )
missing_keys_list = list ( missing_keys )
extra_keys_list = list ( extra_keys )
2024-08-25 23:04:28 +02:00
2024-08-31 15:54:11 +02:00
if missing_keys_list or extra_keys_list :
has_differences = True
missing_keys_str = " `, ` " . join ( missing_keys_list )
extra_keys_str = " `, ` " . join ( extra_keys_list )
2024-11-23 23:09:46 +01:00
report . append ( " 2. **Test Status:** ❌ **_Failed_** " )
report . append ( " - **Issue:** " )
2024-08-31 15:54:11 +02:00
if missing_keys_list :
2024-11-23 23:09:46 +01:00
spaces_keys_list = [ ]
2024-11-23 12:49:49 +01:00
for key in missing_keys_list :
if " " in key :
2024-11-23 23:09:46 +01:00
spaces_keys_list . append ( key )
if spaces_keys_list :
spaces_keys_str = " `, ` " . join ( spaces_keys_list )
report . append (
f " - **_Keys containing unnecessary spaces_**: ` { spaces_keys_str } `! "
)
2024-08-31 15:54:11 +02:00
report . append (
2024-11-23 23:09:46 +01:00
f " - **_Extra keys in ` { basename_current_file } `_**: ` { missing_keys_str } ` that are not present in **_` { basename_reference_file } `_**. "
2024-08-31 15:54:11 +02:00
)
if extra_keys_list :
report . append (
2024-11-23 23:09:46 +01:00
f " - **_Missing keys in ` { basename_reference_file } `_**: ` { extra_keys_str } ` that are not present in **_` { basename_current_file } `_**. "
2024-08-31 15:54:11 +02:00
)
else :
2024-11-23 23:09:46 +01:00
report . append ( " 2. **Test Status:** ✅ **_Passed_** " )
2024-08-31 22:48:40 +02:00
report . append ( " " )
2024-08-31 15:54:11 +02:00
report . append ( " --- " )
report . append ( " " )
if has_differences :
report . append ( " ## ❌ Overall Check Status: **_Failed_** " )
2024-11-21 12:31:32 +01:00
report . append ( " " )
report . append (
f " @ { actor } please check your translation if it conforms to the standard. Follow the format of [messages_en_GB.properties](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/src/main/resources/messages_en_GB.properties) "
)
2024-08-25 23:04:28 +02:00
else :
2024-08-31 15:54:11 +02:00
report . append ( " ## ✅ Overall Check Status: **_Success_** " )
2024-11-21 12:31:32 +01:00
report . append ( " " )
report . append (
f " Thanks @ { actor } for your help in keeping the translations up to date. "
)
2024-08-31 15:54:11 +02:00
if not only_reference_file :
print ( " \n " . join ( report ) )
2024-08-25 23:04:28 +02:00
if __name__ == " __main__ " :
parser = argparse . ArgumentParser ( description = " Find missing keys " )
2024-11-21 12:31:32 +01:00
parser . add_argument (
" --actor " ,
required = False ,
help = " Actor from PR. " ,
)
2024-08-25 23:04:28 +02:00
parser . add_argument (
" --reference-file " ,
required = True ,
help = " Path to the reference file. " ,
)
parser . add_argument (
" --branch " ,
type = str ,
required = True ,
help = " Branch name. " ,
)
parser . add_argument (
" --files " ,
nargs = " + " ,
2024-08-31 15:54:11 +02:00
required = False ,
2024-08-25 23:04:28 +02:00
help = " List of changed files, separated by spaces. " ,
)
args = parser . parse_args ( )
2024-11-23 12:49:49 +01:00
# Sanitize --actor input to avoid injection attacks
if args . actor :
args . actor = re . sub ( r " [^a-zA-Z0-9_ \\ -] " , " " , args . actor )
# Sanitize --branch input to avoid injection attacks
if args . branch :
args . branch = re . sub ( r " [^a-zA-Z0-9 \\ -] " , " " , args . branch )
2024-08-25 23:04:28 +02:00
file_list = args . files
2024-08-31 15:54:11 +02:00
if file_list is None :
file_list = glob . glob (
2024-11-23 12:49:49 +01:00
os . path . join (
os . getcwd ( ) , " src " , " main " , " resources " , " messages_*.properties "
)
2024-08-31 15:54:11 +02:00
)
update_missing_keys ( args . reference_file , file_list )
else :
2024-11-21 12:31:32 +01:00
check_for_differences ( args . reference_file , file_list , args . branch , args . actor )