Improves security when processing properties files (#2303)

* Improves security when processing properties files

* Check for spaces in the key

---------

Co-authored-by: Anthony Stirling <77850077+Frooodle@users.noreply.github.com>
This commit is contained in:
Ludy 2024-11-23 12:49:49 +01:00 committed by GitHub
parent 9a96109ea2
commit 3a27aa16d5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 77 additions and 30 deletions

View File

@ -9,7 +9,7 @@ The script also provides functionality to update the translation files to match
adjusting the format. adjusting the format.
Usage: Usage:
python script_name.py --reference-file <path_to_reference_file> --branch <branch_name> [--files <list_of_changed_files>] python check_language_properties.py --reference-file <path_to_reference_file> --branch <branch_name> [--actor <actor_name>] [--files <list_of_changed_files>]
""" """
import copy import copy
@ -19,6 +19,10 @@ import argparse
import re import re
# Maximum size for properties files (e.g., 200 KB)
MAX_FILE_SIZE = 200 * 1024
def parse_properties_file(file_path): def parse_properties_file(file_path):
"""Parses a .properties file and returns a list of objects (including comments, empty lines, and line numbers).""" """Parses a .properties file and returns a list of objects (including comments, empty lines, and line numbers)."""
properties_list = [] properties_list = []
@ -96,7 +100,7 @@ def write_json_file(file_path, updated_properties):
def update_missing_keys(reference_file, file_list, branch=""): def update_missing_keys(reference_file, file_list, branch=""):
reference_properties = parse_properties_file(reference_file) reference_properties = parse_properties_file(reference_file)
for file_path in file_list: for file_path in file_list:
basename_current_file = os.path.basename(branch + file_path) basename_current_file = os.path.basename(os.path.join(branch, file_path))
if ( if (
basename_current_file == os.path.basename(reference_file) basename_current_file == os.path.basename(reference_file)
or not file_path.endswith(".properties") or not file_path.endswith(".properties")
@ -104,7 +108,7 @@ def update_missing_keys(reference_file, file_list, branch=""):
): ):
continue continue
current_properties = parse_properties_file(branch + file_path) current_properties = parse_properties_file(os.path.join(branch, file_path))
updated_properties = [] updated_properties = []
for ref_entry in reference_properties: for ref_entry in reference_properties:
ref_entry_copy = copy.deepcopy(ref_entry) ref_entry_copy = copy.deepcopy(ref_entry)
@ -115,15 +119,15 @@ def update_missing_keys(reference_file, file_list, branch=""):
if ref_entry_copy["key"] == current_entry["key"]: if ref_entry_copy["key"] == current_entry["key"]:
ref_entry_copy["value"] = current_entry["value"] ref_entry_copy["value"] = current_entry["value"]
updated_properties.append(ref_entry_copy) updated_properties.append(ref_entry_copy)
write_json_file(branch + file_path, updated_properties) write_json_file(os.path.join(branch, file_path), updated_properties)
def check_for_missing_keys(reference_file, file_list, branch): def check_for_missing_keys(reference_file, file_list, branch):
update_missing_keys(reference_file, file_list, branch + "/") update_missing_keys(reference_file, file_list, branch)
def read_properties(file_path): def read_properties(file_path):
if (os.path.isfile(file_path) and os.path.exists(file_path)): if os.path.isfile(file_path) and os.path.exists(file_path):
with open(file_path, "r", encoding="utf-8") as file: with open(file_path, "r", encoding="utf-8") as file:
return file.read().splitlines() return file.read().splitlines()
return [""] return [""]
@ -142,18 +146,36 @@ def check_for_differences(reference_file, file_list, branch, actor):
only_reference_file = True only_reference_file = True
for file_path in file_list[0].split(): file_arr = file_list
basename_current_file = os.path.basename(branch + "/" + file_path)
if len(file_list) == 1:
file_arr = file_list[0].split()
base_dir = os.path.abspath(os.path.join(os.getcwd(), "src", "main", "resources"))
for file_path in file_arr:
absolute_path = os.path.abspath(file_path)
# Verify that file is within the expected directory
if not absolute_path.startswith(base_dir):
raise ValueError(f"Unsafe file found: {file_path}")
# Verify file size before processing
if os.path.getsize(os.path.join(branch, file_path)) > MAX_FILE_SIZE:
raise ValueError(
f"The file {file_path} is too large and could pose a security risk."
)
basename_current_file = os.path.basename(os.path.join(branch, file_path))
if ( if (
basename_current_file == basename_reference_file basename_current_file == basename_reference_file
or not file_path.startswith("src/main/resources/messages_") or not file_path.startswith(
os.path.join("src", "main", "resources", "messages_")
)
or not file_path.endswith(".properties") or not file_path.endswith(".properties")
or not basename_current_file.startswith("messages_") or not basename_current_file.startswith("messages_")
): ):
continue continue
only_reference_file = False only_reference_file = False
report.append(f"#### 🗂️ **Checking File:** `{basename_current_file}`...") report.append(f"#### 📃 **Checking File:** `{basename_current_file}`...")
current_lines = read_properties(branch + "/" + file_path) current_lines = read_properties(os.path.join(branch, file_path))
reference_line_count = len(reference_lines) reference_line_count = len(reference_lines)
current_line_count = len(current_lines) current_line_count = len(current_lines)
@ -197,6 +219,11 @@ def check_for_differences(reference_file, file_list, branch, actor):
extra_keys_str = "`, `".join(extra_keys_list) extra_keys_str = "`, `".join(extra_keys_list)
report.append("- **Test 2 Status:** ❌ Failed") report.append("- **Test 2 Status:** ❌ Failed")
if missing_keys_list: if missing_keys_list:
for key in missing_keys_list:
if " " in key:
report.append(
f" - **Issue:** One or more keys in ***{basename_current_file}*** contain spaces `{missing_keys_str}`!"
)
report.append( report.append(
f" - **Issue:** There are keys in ***{basename_current_file}*** `{missing_keys_str}` that are not present in ***{basename_reference_file}***!" f" - **Issue:** There are keys in ***{basename_current_file}*** `{missing_keys_str}` that are not present in ***{basename_reference_file}***!"
) )
@ -252,10 +279,20 @@ if __name__ == "__main__":
) )
args = parser.parse_args() args = parser.parse_args()
# Sanitize --actor input to avoid injection attacks
if args.actor:
args.actor = re.sub(r"[^a-zA-Z0-9_\\-]", "", args.actor)
# Sanitize --branch input to avoid injection attacks
if args.branch:
args.branch = re.sub(r"[^a-zA-Z0-9\\-]", "", args.branch)
file_list = args.files file_list = args.files
if file_list is None: if file_list is None:
file_list = glob.glob( file_list = glob.glob(
os.getcwd() + "/src/**/messages_*.properties", recursive=True os.path.join(
os.getcwd(), "src", "main", "resources", "messages_*.properties"
)
) )
update_missing_keys(args.reference_file, file_list) update_missing_keys(args.reference_file, file_list)
else: else:

View File

@ -50,10 +50,7 @@ jobs:
echo "Getting list of changed files from PR..." echo "Getting list of changed files from PR..."
gh pr view ${{ github.event.pull_request.number }} --json files -q ".files[].path" | grep -E '^src/main/resources/messages_[a-zA-Z_]+\.properties$' > ../changed_files.txt gh pr view ${{ github.event.pull_request.number }} --json files -q ".files[].path" | grep -E '^src/main/resources/messages_[a-zA-Z_]+\.properties$' > ../changed_files.txt
cd .. cd ..
echo "Setting branch path..."
BRANCH_PATH="pr-branch"
echo "BRANCH_PATH=${BRANCH_PATH}" >> $GITHUB_ENV
echo "Processing changed files..." echo "Processing changed files..."
mapfile -t CHANGED_FILES < changed_files.txt mapfile -t CHANGED_FILES < changed_files.txt
@ -61,7 +58,6 @@ jobs:
echo "CHANGED_FILES=${CHANGED_FILES_STR}" >> $GITHUB_ENV echo "CHANGED_FILES=${CHANGED_FILES_STR}" >> $GITHUB_ENV
echo "Changed files: ${CHANGED_FILES_STR}" echo "Changed files: ${CHANGED_FILES_STR}"
echo "Branch: ${BRANCH_PATH}"
- name: Determine reference file - name: Determine reference file
id: determine-file id: determine-file
@ -85,30 +81,38 @@ jobs:
python main-branch/.github/scripts/check_language_properties.py \ python main-branch/.github/scripts/check_language_properties.py \
--actor ${{ github.event.pull_request.user.login }} \ --actor ${{ github.event.pull_request.user.login }} \
--reference-file "${REFERENCE_FILE}" \ --reference-file "${REFERENCE_FILE}" \
--branch "${BRANCH_PATH}" \ --branch pr-branch \
--files "${CHANGED_FILES[@]}" > failure.txt || true --files "${CHANGED_FILES[@]}" > result.txt || true
- name: Capture output - name: Capture output
id: capture-output id: capture-output
run: | run: |
if [ -f failure.txt ] && [ -s failure.txt ]; then if [ -f result.txt ] && [ -s result.txt ]; then
echo "Test failed, capturing output..." echo "Test, capturing output..."
ERROR_OUTPUT=$(cat failure.txt) SCRIPT_OUTPUT=$(cat result.txt)
echo "ERROR_OUTPUT<<EOF" >> $GITHUB_ENV echo "SCRIPT_OUTPUT<<EOF" >> $GITHUB_ENV
echo "$ERROR_OUTPUT" >> $GITHUB_ENV echo "$SCRIPT_OUTPUT" >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV echo "EOF" >> $GITHUB_ENV
echo "${ERROR_OUTPUT}" echo "${SCRIPT_OUTPUT}"
# Set FAIL_JOB to true if SCRIPT_OUTPUT contains ❌
if [[ "$SCRIPT_OUTPUT" == *"❌"* ]]; then
echo "FAIL_JOB=true" >> $GITHUB_ENV
else else
echo "No errors found." echo "FAIL_JOB=false" >> $GITHUB_ENV
echo "ERROR_OUTPUT=" >> $GITHUB_ENV fi
else
echo "No update found."
echo "SCRIPT_OUTPUT=" >> $GITHUB_ENV
echo "FAIL_JOB=false" >> $GITHUB_ENV
fi fi
- name: Post comment on PR - name: Post comment on PR
if: env.ERROR_OUTPUT != '' if: env.SCRIPT_OUTPUT != ''
uses: actions/github-script@v7 uses: actions/github-script@v7
with: with:
script: | script: |
const { GITHUB_REPOSITORY, ERROR_OUTPUT } = process.env; const { GITHUB_REPOSITORY, SCRIPT_OUTPUT } = process.env;
const [repoOwner, repoName] = GITHUB_REPOSITORY.split('/'); const [repoOwner, repoName] = GITHUB_REPOSITORY.split('/');
const prNumber = context.issue.number; const prNumber = context.issue.number;
@ -130,7 +134,7 @@ jobs:
owner: repoOwner, owner: repoOwner,
repo: repoName, repo: repoName,
comment_id: comment.id, comment_id: comment.id,
body: `## 🚀 Translation Verification Summary\n\n\n${ERROR_OUTPUT}\n` body: `## 🚀 Translation Verification Summary\n\n\n${SCRIPT_OUTPUT}\n`
}); });
console.log("Updated existing comment."); console.log("Updated existing comment.");
} else if (!comment) { } else if (!comment) {
@ -139,13 +143,19 @@ jobs:
owner: repoOwner, owner: repoOwner,
repo: repoName, repo: repoName,
issue_number: prNumber, issue_number: prNumber,
body: `## 🚀 Translation Verification Summary\n\n\n${ERROR_OUTPUT}\n` body: `## 🚀 Translation Verification Summary\n\n\n${SCRIPT_OUTPUT}\n`
}); });
console.log("Created new comment."); console.log("Created new comment.");
} else { } else {
console.log("Comment update attempt denied. Actor does not match."); console.log("Comment update attempt denied. Actor does not match.");
} }
- name: Fail job if errors found
if: env.FAIL_JOB == 'true'
run: |
echo "Failing the job because errors were detected."
exit 1
update-translations-main: update-translations-main:
if: github.event_name == 'push' if: github.event_name == 'push'
permissions: permissions: