diff --git a/.github/scripts/check_language_properties.py b/.github/scripts/check_language_properties.py index 4bb88fdd..08042d11 100644 --- a/.github/scripts/check_language_properties.py +++ b/.github/scripts/check_language_properties.py @@ -9,7 +9,7 @@ The script also provides functionality to update the translation files to match adjusting the format. Usage: - python script_name.py --reference-file --branch [--files ] + python check_language_properties.py --reference-file --branch [--actor ] [--files ] """ import copy @@ -19,6 +19,10 @@ import argparse import re +# Maximum size for properties files (e.g., 200 KB) +MAX_FILE_SIZE = 200 * 1024 + + def parse_properties_file(file_path): """Parses a .properties file and returns a list of objects (including comments, empty lines, and line numbers).""" properties_list = [] @@ -96,7 +100,7 @@ def write_json_file(file_path, updated_properties): def update_missing_keys(reference_file, file_list, branch=""): reference_properties = parse_properties_file(reference_file) for file_path in file_list: - basename_current_file = os.path.basename(branch + file_path) + basename_current_file = os.path.basename(os.path.join(branch, file_path)) if ( basename_current_file == os.path.basename(reference_file) or not file_path.endswith(".properties") @@ -104,7 +108,7 @@ def update_missing_keys(reference_file, file_list, branch=""): ): continue - current_properties = parse_properties_file(branch + file_path) + current_properties = parse_properties_file(os.path.join(branch, file_path)) updated_properties = [] for ref_entry in reference_properties: ref_entry_copy = copy.deepcopy(ref_entry) @@ -115,15 +119,15 @@ def update_missing_keys(reference_file, file_list, branch=""): if ref_entry_copy["key"] == current_entry["key"]: ref_entry_copy["value"] = current_entry["value"] updated_properties.append(ref_entry_copy) - write_json_file(branch + file_path, updated_properties) + write_json_file(os.path.join(branch, file_path), updated_properties) def check_for_missing_keys(reference_file, file_list, branch): - update_missing_keys(reference_file, file_list, branch + "/") + update_missing_keys(reference_file, file_list, branch) def read_properties(file_path): - if (os.path.isfile(file_path) and os.path.exists(file_path)): + if os.path.isfile(file_path) and os.path.exists(file_path): with open(file_path, "r", encoding="utf-8") as file: return file.read().splitlines() return [""] @@ -142,18 +146,36 @@ def check_for_differences(reference_file, file_list, branch, actor): only_reference_file = True - for file_path in file_list[0].split(): - basename_current_file = os.path.basename(branch + "/" + file_path) + file_arr = file_list + + if len(file_list) == 1: + file_arr = file_list[0].split() + base_dir = os.path.abspath(os.path.join(os.getcwd(), "src", "main", "resources")) + + for file_path in file_arr: + absolute_path = os.path.abspath(file_path) + # Verify that file is within the expected directory + if not absolute_path.startswith(base_dir): + raise ValueError(f"Unsafe file found: {file_path}") + # Verify file size before processing + if os.path.getsize(os.path.join(branch, file_path)) > MAX_FILE_SIZE: + raise ValueError( + f"The file {file_path} is too large and could pose a security risk." + ) + + basename_current_file = os.path.basename(os.path.join(branch, file_path)) if ( basename_current_file == basename_reference_file - or not file_path.startswith("src/main/resources/messages_") + or not file_path.startswith( + os.path.join("src", "main", "resources", "messages_") + ) or not file_path.endswith(".properties") or not basename_current_file.startswith("messages_") ): continue only_reference_file = False - report.append(f"#### 🗂️ **Checking File:** `{basename_current_file}`...") - current_lines = read_properties(branch + "/" + file_path) + report.append(f"#### 📃 **Checking File:** `{basename_current_file}`...") + current_lines = read_properties(os.path.join(branch, file_path)) reference_line_count = len(reference_lines) current_line_count = len(current_lines) @@ -197,6 +219,11 @@ def check_for_differences(reference_file, file_list, branch, actor): extra_keys_str = "`, `".join(extra_keys_list) report.append("- **Test 2 Status:** ❌ Failed") if missing_keys_list: + for key in missing_keys_list: + if " " in key: + report.append( + f" - **Issue:** One or more keys in ***{basename_current_file}*** contain spaces `{missing_keys_str}`!" + ) report.append( f" - **Issue:** There are keys in ***{basename_current_file}*** `{missing_keys_str}` that are not present in ***{basename_reference_file}***!" ) @@ -252,10 +279,20 @@ if __name__ == "__main__": ) args = parser.parse_args() + # Sanitize --actor input to avoid injection attacks + if args.actor: + args.actor = re.sub(r"[^a-zA-Z0-9_\\-]", "", args.actor) + + # Sanitize --branch input to avoid injection attacks + if args.branch: + args.branch = re.sub(r"[^a-zA-Z0-9\\-]", "", args.branch) + file_list = args.files if file_list is None: file_list = glob.glob( - os.getcwd() + "/src/**/messages_*.properties", recursive=True + os.path.join( + os.getcwd(), "src", "main", "resources", "messages_*.properties" + ) ) update_missing_keys(args.reference_file, file_list) else: diff --git a/.github/workflows/check_properties.yml b/.github/workflows/check_properties.yml index 2790bb48..b46efdc5 100644 --- a/.github/workflows/check_properties.yml +++ b/.github/workflows/check_properties.yml @@ -50,10 +50,7 @@ jobs: echo "Getting list of changed files from PR..." gh pr view ${{ github.event.pull_request.number }} --json files -q ".files[].path" | grep -E '^src/main/resources/messages_[a-zA-Z_]+\.properties$' > ../changed_files.txt cd .. - echo "Setting branch path..." - BRANCH_PATH="pr-branch" - echo "BRANCH_PATH=${BRANCH_PATH}" >> $GITHUB_ENV echo "Processing changed files..." mapfile -t CHANGED_FILES < changed_files.txt @@ -61,7 +58,6 @@ jobs: echo "CHANGED_FILES=${CHANGED_FILES_STR}" >> $GITHUB_ENV echo "Changed files: ${CHANGED_FILES_STR}" - echo "Branch: ${BRANCH_PATH}" - name: Determine reference file id: determine-file @@ -85,30 +81,38 @@ jobs: python main-branch/.github/scripts/check_language_properties.py \ --actor ${{ github.event.pull_request.user.login }} \ --reference-file "${REFERENCE_FILE}" \ - --branch "${BRANCH_PATH}" \ - --files "${CHANGED_FILES[@]}" > failure.txt || true + --branch pr-branch \ + --files "${CHANGED_FILES[@]}" > result.txt || true - name: Capture output id: capture-output run: | - if [ -f failure.txt ] && [ -s failure.txt ]; then - echo "Test failed, capturing output..." - ERROR_OUTPUT=$(cat failure.txt) - echo "ERROR_OUTPUT<> $GITHUB_ENV - echo "$ERROR_OUTPUT" >> $GITHUB_ENV + if [ -f result.txt ] && [ -s result.txt ]; then + echo "Test, capturing output..." + SCRIPT_OUTPUT=$(cat result.txt) + echo "SCRIPT_OUTPUT<> $GITHUB_ENV + echo "$SCRIPT_OUTPUT" >> $GITHUB_ENV echo "EOF" >> $GITHUB_ENV - echo "${ERROR_OUTPUT}" + echo "${SCRIPT_OUTPUT}" + + # Set FAIL_JOB to true if SCRIPT_OUTPUT contains ❌ + if [[ "$SCRIPT_OUTPUT" == *"❌"* ]]; then + echo "FAIL_JOB=true" >> $GITHUB_ENV + else + echo "FAIL_JOB=false" >> $GITHUB_ENV + fi else - echo "No errors found." - echo "ERROR_OUTPUT=" >> $GITHUB_ENV + echo "No update found." + echo "SCRIPT_OUTPUT=" >> $GITHUB_ENV + echo "FAIL_JOB=false" >> $GITHUB_ENV fi - name: Post comment on PR - if: env.ERROR_OUTPUT != '' + if: env.SCRIPT_OUTPUT != '' uses: actions/github-script@v7 with: script: | - const { GITHUB_REPOSITORY, ERROR_OUTPUT } = process.env; + const { GITHUB_REPOSITORY, SCRIPT_OUTPUT } = process.env; const [repoOwner, repoName] = GITHUB_REPOSITORY.split('/'); const prNumber = context.issue.number; @@ -130,7 +134,7 @@ jobs: owner: repoOwner, repo: repoName, comment_id: comment.id, - body: `## 🚀 Translation Verification Summary\n\n\n${ERROR_OUTPUT}\n` + body: `## 🚀 Translation Verification Summary\n\n\n${SCRIPT_OUTPUT}\n` }); console.log("Updated existing comment."); } else if (!comment) { @@ -139,13 +143,19 @@ jobs: owner: repoOwner, repo: repoName, issue_number: prNumber, - body: `## 🚀 Translation Verification Summary\n\n\n${ERROR_OUTPUT}\n` + body: `## 🚀 Translation Verification Summary\n\n\n${SCRIPT_OUTPUT}\n` }); console.log("Created new comment."); } else { console.log("Comment update attempt denied. Actor does not match."); } + - name: Fail job if errors found + if: env.FAIL_JOB == 'true' + run: | + echo "Failing the job because errors were detected." + exit 1 + update-translations-main: if: github.event_name == 'push' permissions: