From 5a67b0cfe7a512169e802af45d2c5f5e56172b0b Mon Sep 17 00:00:00 2001 From: "pixeebot[bot]" <104101892+pixeebot[bot]@users.noreply.github.com> Date: Tue, 26 Nov 2024 20:44:07 +0000 Subject: [PATCH] Hardening suggestions for Stirling-PDF / ghostscript (#2339) * Protect `readLine()` against DoS * Sanitized user-provided file names in HTTP multipart uploads --------- Co-authored-by: pixeebot[bot] <104101892+pixeebot[bot]@users.noreply.github.com> --- .../software/SPDF/controller/api/misc/OCRController.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java index 6089c870..b2601ec5 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java @@ -1,5 +1,7 @@ package stirling.software.SPDF.controller.api.misc; +import io.github.pixee.security.BoundedLineReader; +import io.github.pixee.security.Filenames; import java.awt.image.BufferedImage; import java.io.BufferedReader; import java.io.File; @@ -144,7 +146,7 @@ public class OCRController { new BufferedReader( new InputStreamReader(process.getErrorStream()))) { String line; - while ((line = reader.readLine()) != null) { + while ((line = BoundedLineReader.readLine(reader, 5_000_000)) != null) { log.debug("Tesseract: {}", line); } } @@ -174,7 +176,7 @@ public class OCRController { // Read the final PDF file byte[] pdfContent = Files.readAllBytes(finalOutputFile); String outputFilename = - inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf"; + Filenames.toSimpleFileName(inputFile.getOriginalFilename()).replaceFirst("[.][^.]+$", "") + "_OCR.pdf"; return ResponseEntity.ok() .header(