From 311f6c8a3aab1baafd9977b74dd2bc998c7b90f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Sz=C3=BCcs?= <127139797+balazs-szucs@users.noreply.github.com> Date: Thu, 30 Oct 2025 18:59:43 +0100 Subject: [PATCH] fix(pipeline): Update isZip method to validate against filename extensions, and keep comic archives zipped (#4741) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description of Changes This pull request updates the logic for detecting and handling zip files in the `PipelineProcessor` to distinguish between standard zip files and comic book archive formats (CBZ/CBR). The changes ensure that files with `.cbz` extensions are not treated as zip files for unzipping purposes. **Update to zip file detection and extraction:** * Modified the `isZip` method to take an optional `filename` parameter and treat files ending with `.cbz` or as non-zip, preventing them from being unzipped automatically. * Updated all usages of `isZip` to pass the filename where available, ensuring the new logic is applied during output file processing and nested unzipping. Closes: #4740 --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. --------- Signed-off-by: Balázs Szücs --- .../api/pipeline/PipelineProcessor.java | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/app/core/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessor.java b/app/core/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessor.java index 4ca863112..c32d18a4d 100644 --- a/app/core/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessor.java +++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessor.java @@ -289,7 +289,7 @@ public class PipelineProcessor { newFilename = removeTrailingNaming(extractFilename(response)); } // Check if the response body is a zip file - if (isZip(response.getBody())) { + if (isZip(response.getBody(), newFilename)) { // Unzip the file and add all the files to the new output files newOutputFiles.addAll(unzip(response.getBody())); } else { @@ -379,14 +379,25 @@ public class PipelineProcessor { return outputFiles; } - private boolean isZip(byte[] data) { + private boolean isZip(byte[] data, String filename) { if (data == null || data.length < 4) { return false; } + if (filename != null) { + String lower = filename.toLowerCase(); + if (lower.endsWith(".cbz")) { + // Treat CBZ as non-zip for our unzipping purposes + return false; + } + } // Check the first four bytes of the data against the standard zip magic number return data[0] == 0x50 && data[1] == 0x4B && data[2] == 0x03 && data[3] == 0x04; } + private boolean isZip(byte[] data) { + return isZip(data, null); + } + private List unzip(byte[] data) throws IOException { log.info("Unzipping data of length: {}", data.length); List unzippedFiles = new ArrayList<>(); @@ -410,7 +421,7 @@ public class PipelineProcessor { } }; // If the unzipped file is a zip file, unzip it - if (isZip(baos.toByteArray())) { + if (isZip(baos.toByteArray(), filename)) { log.info("File {} is a zip file. Unzipping...", filename); unzippedFiles.addAll(unzip(baos.toByteArray())); } else {