mirror of
				https://github.com/Frooodle/Stirling-PDF.git
				synced 2025-10-25 11:17:28 +02:00 
			
		
		
		
	clean up and more (#2756)
# Description of Changes This PR introduces multiple updates across various files and workflows: ### **What was changed:** 1. **Deleted Scripts:** - `check_duplicates.py`: Removed script that checked for duplicate keys in properties files. - `check_tabulator.py`: Removed script that ensured no tabulators existed in HTML, CSS, or JS files. 2. **Updated GitHub Actions Workflow (`pre_commit.yml`):** - Added a weekly schedule trigger (`cron`) for the pre-commit workflow. - Updated the `create-pull-request` action to exclude certain files (`.github/workflows/.*`) from formatting. - Improved detection and handling of staged changes during commit creation. 3. **`.pre-commit-config.yaml`:** - Adjusted regex for file matching in `ruff` and `codespell` hooks to ensure better file filtering. - Removed local hooks that relied on deleted scripts. 4. **Scripts (`counter_translation.py`):** - Updated file writing methods to enforce consistent newline characters (`newline="\n"`). ### **Why the change was made:** - To simplify the repository by removing unnecessary or outdated scripts (`check_duplicates.py` and `check_tabulator.py`). - To enhance the workflow automation by introducing a scheduled run for pre-commit checks. - To improve code formatting and file consistency by addressing newline character issues and refining file exclusions in `pre-commit`. ### **Challenges encountered:** - Ensuring that all references to deleted scripts were properly removed from configuration files. - Verifying that workflow and pre-commit changes do not introduce regressions in existing automation. Closes # (issue_number) --- ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [x] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [x] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [x] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details.
This commit is contained in:
		
							parent
							
								
									abc3ff3529
								
							
						
					
					
						commit
						05add001fb
					
				
							
								
								
									
										51
									
								
								.github/scripts/check_duplicates.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										51
									
								
								.github/scripts/check_duplicates.py
									
									
									
									
										vendored
									
									
								
							| @ -1,51 +0,0 @@ | ||||
| import sys | ||||
| 
 | ||||
| 
 | ||||
| def find_duplicate_keys(file_path): | ||||
|     """ | ||||
|     Finds duplicate keys in a properties file and returns their occurrences. | ||||
| 
 | ||||
|     This function reads a properties file, identifies any keys that occur more than | ||||
|     once, and returns a dictionary with these keys and the line numbers of their occurrences. | ||||
| 
 | ||||
|     Parameters: | ||||
|     file_path (str): The path to the properties file to be checked. | ||||
| 
 | ||||
|     Returns: | ||||
|     dict: A dictionary where each key is a duplicated key in the file, and the value is a list | ||||
|           of line numbers where the key occurs. | ||||
|     """ | ||||
|     with open(file_path, "r", encoding="utf-8") as file: | ||||
|         lines = file.readlines() | ||||
| 
 | ||||
|     keys = {} | ||||
|     duplicates = {} | ||||
| 
 | ||||
|     for line_number, line in enumerate(lines, start=1): | ||||
|         line = line.strip() | ||||
|         if line and not line.startswith("#") and "=" in line: | ||||
|             key = line.split("=", 1)[0].strip() | ||||
|             if key in keys: | ||||
|                 # If the key already exists, add the current line number | ||||
|                 duplicates.setdefault(key, []).append(line_number) | ||||
|                 # Also add the first instance of the key if not already done | ||||
|                 if keys[key] not in duplicates[key]: | ||||
|                     duplicates[key].insert(0, keys[key]) | ||||
|             else: | ||||
|                 # Store the line number of the first instance of the key | ||||
|                 keys[key] = line_number | ||||
| 
 | ||||
|     return duplicates | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     failed = False | ||||
|     for ar in sys.argv[1:]: | ||||
|         duplicates = find_duplicate_keys(ar) | ||||
|         if duplicates: | ||||
|             for key, lines in duplicates.items(): | ||||
|                 lines_str = ", ".join(map(str, lines)) | ||||
|                 print(f"{key} duplicated in {ar} on lines {lines_str}") | ||||
|                 failed = True | ||||
|     if failed: | ||||
|         sys.exit(1) | ||||
							
								
								
									
										85
									
								
								.github/scripts/check_tabulator.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										85
									
								
								.github/scripts/check_tabulator.py
									
									
									
									
										vendored
									
									
								
							| @ -1,85 +0,0 @@ | ||||
| """check_tabulator.py""" | ||||
| 
 | ||||
| import argparse | ||||
| import sys | ||||
| 
 | ||||
| 
 | ||||
| def check_tabs(file_path): | ||||
|     """ | ||||
|     Checks for tabs in the specified file. | ||||
| 
 | ||||
|     Args: | ||||
|         file_path (str): The path to the file to be checked. | ||||
| 
 | ||||
|     Returns: | ||||
|         bool: True if tabs are found, False otherwise. | ||||
|     """ | ||||
|     with open(file_path, "r", encoding="utf-8") as file: | ||||
|         content = file.read() | ||||
| 
 | ||||
|     if "\t" in content: | ||||
|         print(f"Tab found in {file_path}") | ||||
|         return True | ||||
|     return False | ||||
| 
 | ||||
| 
 | ||||
| def replace_tabs_with_spaces(file_path, replace_with="  "): | ||||
|     """ | ||||
|     Replaces tabs with a specified number of spaces in the file. | ||||
| 
 | ||||
|     Args: | ||||
|         file_path (str): The path to the file where tabs will be replaced. | ||||
|         replace_with (str): The character(s) to replace tabs with. Defaults to two spaces. | ||||
|     """ | ||||
|     with open(file_path, "r", encoding="utf-8") as file: | ||||
|         content = file.read() | ||||
| 
 | ||||
|     updated_content = content.replace("\t", replace_with) | ||||
| 
 | ||||
|     with open(file_path, "w", encoding="utf-8") as file: | ||||
|         file.write(updated_content) | ||||
| 
 | ||||
| 
 | ||||
| def main(): | ||||
|     """ | ||||
|     Main function to replace tabs with spaces in the provided files. | ||||
|     The replacement character and files to check are taken from command line arguments. | ||||
|     """ | ||||
|     # Create ArgumentParser instance | ||||
|     parser = argparse.ArgumentParser( | ||||
|         description="Replace tabs in files with specified characters." | ||||
|     ) | ||||
| 
 | ||||
|     # Define optional argument `--replace_with` | ||||
|     parser.add_argument( | ||||
|         "--replace_with", | ||||
|         default="  ", | ||||
|         help="Character(s) to replace tabs with. Default is two spaces.", | ||||
|     ) | ||||
| 
 | ||||
|     # Define argument for file paths | ||||
|     parser.add_argument("files", metavar="FILE", nargs="+", help="Files to process.") | ||||
| 
 | ||||
|     # Parse arguments | ||||
|     args = parser.parse_args() | ||||
| 
 | ||||
|     # Extract replacement characters and files from the parsed arguments | ||||
|     replace_with = args.replace_with | ||||
|     files_checked = args.files | ||||
| 
 | ||||
|     error = False | ||||
| 
 | ||||
|     for file_path in files_checked: | ||||
|         if check_tabs(file_path): | ||||
|             replace_tabs_with_spaces(file_path, replace_with) | ||||
|             error = True | ||||
| 
 | ||||
|     if error: | ||||
|         print("Error: Originally found tabs in HTML files, now replaced.") | ||||
|         sys.exit(1) | ||||
| 
 | ||||
|     sys.exit(0) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
							
								
								
									
										2
									
								
								.github/workflows/pre_commit.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/pre_commit.yml
									
									
									
									
										vendored
									
									
								
							| @ -2,6 +2,8 @@ name: Pre-commit | ||||
| 
 | ||||
| on: | ||||
|   workflow_dispatch: | ||||
|   schedule: | ||||
|     - cron: "0 0 * * 1" | ||||
| 
 | ||||
| permissions: | ||||
|   contents: read | ||||
|  | ||||
| @ -6,10 +6,10 @@ repos: | ||||
|         args: | ||||
|           - --fix | ||||
|           - --line-length=127 | ||||
|         files: ^((.github/scripts|scripts)/.+)?[^/]+\.py$ | ||||
|         files: ^((\.github/scripts|scripts)/.+)?[^/]+\.py$ | ||||
|         exclude: (split_photos.py) | ||||
|       - id: ruff-format | ||||
|         files: ^((.github/scripts|scripts)/.+)?[^/]+\.py$ | ||||
|         files: ^((\.github/scripts|scripts)/.+)?[^/]+\.py$ | ||||
|         exclude: (split_photos.py) | ||||
|   - repo: https://github.com/codespell-project/codespell | ||||
|     rev: v2.3.0 | ||||
| @ -19,7 +19,7 @@ repos: | ||||
|           - --ignore-words-list= | ||||
|           - --skip="./.*,*.csv,*.json,*.ambr" | ||||
|           - --quiet-level=2 | ||||
|         files: \.(properties|html|css|js|py|md)$ | ||||
|         files: \.(html|css|js|py|md)$ | ||||
|         exclude: (.vscode|.devcontainer|src/main/resources|Dockerfile|.*/pdfjs.*|.*/thirdParty.*|bootstrap.*|.*\.min\..*|.*diff\.js) | ||||
|   - repo: https://github.com/gitleaks/gitleaks | ||||
|     rev: v8.22.0 | ||||
| @ -35,23 +35,7 @@ repos: | ||||
|     hooks: | ||||
|       - id: end-of-file-fixer | ||||
|         files: ^.*(\.js|\.java|\.py|\.yml)$ | ||||
|         exclude: ^(.*/pdfjs.*|.*/thirdParty.*|bootstrap.*|.*\.min\..*|.*diff\.js$) | ||||
|         exclude: ^(.*/pdfjs.*|.*/thirdParty.*|bootstrap.*|.*\.min\..*|.*diff\.js|\.github/workflows/.*$) | ||||
|       - id: trailing-whitespace | ||||
|         files: ^.*(\.js|\.java|\.py|\.yml)$ | ||||
|         exclude: ^(.*/pdfjs.*|.*/thirdParty.*|bootstrap.*|.*\.min\..*|.*diff\.js$) | ||||
| 
 | ||||
|   - repo: local | ||||
|     hooks: | ||||
|       - id: check-duplicate-properties-keys | ||||
|         name: Check Duplicate Properties Keys | ||||
|         entry: python .github/scripts/check_duplicates.py | ||||
|         language: python | ||||
|         files: ^(src)/.+\.properties$ | ||||
|       - id: check-html-tabs | ||||
|         name: Check HTML for tabs | ||||
|         description: Ensures HTML/CSS/JS files do not contain tab characters | ||||
|         # args: ["--replace_with=  "] | ||||
|         entry: python .github/scripts/check_tabulator.py | ||||
|         language: python | ||||
|         exclude: ^(.*/pdfjs.*|.*/thirdParty.*|bootstrap.*|.*\.min\..*|.*diff\.js$) | ||||
|         files: ^.*(\.html|\.css|\.js)$ | ||||
|         exclude: ^(.*/pdfjs.*|.*/thirdParty.*|bootstrap.*|.*\.min\..*|.*diff\.js|\.github/workflows/.*$) | ||||
|  | ||||
| @ -75,7 +75,7 @@ def write_readme(progress_list: list[tuple[str, int]]) -> None: | ||||
|                         f"", | ||||
|                     ) | ||||
| 
 | ||||
|     with open("README.md", "w", encoding="utf-8") as file: | ||||
|     with open("README.md", "w", encoding="utf-8", newline="\n") as file: | ||||
|         file.writelines(content) | ||||
| 
 | ||||
| 
 | ||||
| @ -196,7 +196,7 @@ def compare_files( | ||||
|             ) | ||||
|         ) | ||||
|     ignore_translation = convert_to_multiline(sort_ignore_translation) | ||||
|     with open(ignore_translation_file, "w", encoding="utf-8") as file: | ||||
|     with open(ignore_translation_file, "w", encoding="utf-8", newline="\n") as file: | ||||
|         file.write(tomlkit.dumps(ignore_translation)) | ||||
| 
 | ||||
|     unique_data = list(set(result_list)) | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user