From a61749d5003e2a2f3bc483154b9f59bbd0c99f44 Mon Sep 17 00:00:00 2001 From: Anthony Stirling <77850077+Frooodle@users.noreply.github.com> Date: Mon, 10 Mar 2025 20:17:45 +0000 Subject: [PATCH] removal of all getByte loads (#3153) # Description of Changes Please provide a summary of the changes, including: - What was changed - Why the change was made - Any challenges encountered Closes #(issue_number) --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details. --------- Co-authored-by: a --- .gitignore | 2 +- Dockerfile.fat | 10 +- build.gradle | 2 +- .../util/DeletingRandomAccessFile.java | 39 +++ .../controller/api/AnalysisController.java | 16 +- .../SPDF/controller/api/CropController.java | 2 +- .../SPDF/controller/api/MergeController.java | 4 +- .../api/MultiPageLayoutController.java | 2 +- .../api/RearrangePagesPDFController.java | 2 +- .../controller/api/ScalePagesController.java | 2 +- .../controller/api/SplitPDFController.java | 2 +- .../api/SplitPdfByChaptersController.java | 2 +- .../api/SplitPdfBySectionsController.java | 2 +- .../api/ToSinglePageController.java | 2 +- .../converters/ConvertImgPDFController.java | 7 +- .../api/converters/ConvertPDFToOffice.java | 2 +- .../api/converters/ExtractCSVController.java | 2 +- .../api/filters/FilterController.java | 10 +- .../api/misc/AutoRenameController.java | 2 +- .../api/misc/BlankPageController.java | 2 +- .../api/misc/DecompressPdfController.java | 2 +- .../api/misc/ExtractImageScansController.java | 3 +- .../api/misc/ExtractImagesController.java | 2 +- .../api/misc/FlattenController.java | 2 +- .../api/misc/MetadataController.java | 2 +- .../api/misc/PageNumbersController.java | 3 +- .../controller/api/misc/ShowJavascript.java | 2 +- .../api/security/CertSignController.java | 4 +- .../controller/api/security/GetInfoOnPDF.java | 2 +- .../SPDF/model/api/PDFWithPageNums.java | 14 -- .../SPDF/service/CustomPDDocumentFactory.java | 147 ++++++----- .../software/SPDF/utils/GeneralUtils.java | 11 +- testing/test.sh | 175 ++++++++++++- testing/test_webpages.sh | 235 +++++++++++------- 34 files changed, 504 insertions(+), 214 deletions(-) create mode 100644 src/main/java/org/apache/pdfbox/examples/util/DeletingRandomAccessFile.java diff --git a/.gitignore b/.gitignore index 55ae9bdb7..e5d8ad209 100644 --- a/.gitignore +++ b/.gitignore @@ -26,7 +26,7 @@ clientWebUI/ !cucumber/exampleFiles/ !cucumber/exampleFiles/example_html.zip exampleYmlFiles/stirling/ - +/testing/file_snapshots # Gradle .gradle .lock diff --git a/Dockerfile.fat b/Dockerfile.fat index b521b95cc..2cc2d2133 100644 --- a/Dockerfile.fat +++ b/Dockerfile.fat @@ -1,5 +1,11 @@ # Build the application -FROM gradle:8.12-jdk17 AS build +FROM gradle:8.12-jdk21 AS build + +COPY build.gradle . +COPY settings.gradle . +COPY gradlew . +COPY gradle gradle/ +RUN ./gradlew build -x spotlessApply -x spotlessCheck -x test -x sonarqube || return 0 # Set the working directory WORKDIR /app @@ -10,7 +16,7 @@ COPY . . # Build the application with DOCKER_ENABLE_SECURITY=false RUN DOCKER_ENABLE_SECURITY=true \ STIRLING_PDF_DESKTOP_UI=false \ - ./gradlew clean build + ./gradlew clean build -x spotlessApply -x spotlessCheck -x test -x sonarqube # Main stage FROM alpine:3.21.3@sha256:a8560b36e8b8210634f77d9f7f9efd7ffa463e380b75e2e74aff4511df3ef88c diff --git a/build.gradle b/build.gradle index 11d89d071..0e1ccb47b 100644 --- a/build.gradle +++ b/build.gradle @@ -25,7 +25,7 @@ ext { } group = "stirling.software" -version = "0.44.0" +version = "0.44.1" java { // 17 is lowest but we support and recommend 21 diff --git a/src/main/java/org/apache/pdfbox/examples/util/DeletingRandomAccessFile.java b/src/main/java/org/apache/pdfbox/examples/util/DeletingRandomAccessFile.java new file mode 100644 index 000000000..2c0341e19 --- /dev/null +++ b/src/main/java/org/apache/pdfbox/examples/util/DeletingRandomAccessFile.java @@ -0,0 +1,39 @@ +package org.apache.pdfbox.examples.util; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.pdfbox.io.RandomAccessReadBufferedFile; + +import lombok.extern.slf4j.Slf4j; + +/** A custom RandomAccessRead implementation that deletes the file when closed */ +@Slf4j +public class DeletingRandomAccessFile extends RandomAccessReadBufferedFile { + private final Path tempFilePath; + + public DeletingRandomAccessFile(File file) throws IOException { + super(file); + this.tempFilePath = file.toPath(); + } + + @Override + public void close() throws IOException { + try { + super.close(); + } finally { + try { + boolean deleted = Files.deleteIfExists(tempFilePath); + if (deleted) { + log.info("Successfully deleted temp file: {}", tempFilePath); + } else { + log.warn("Failed to delete temp file (may not exist): {}", tempFilePath); + } + } catch (IOException e) { + log.error("Error deleting temp file: {}", tempFilePath, e); + } + } + } +} diff --git a/src/main/java/stirling/software/SPDF/controller/api/AnalysisController.java b/src/main/java/stirling/software/SPDF/controller/api/AnalysisController.java index 8c97605b0..37941017e 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/AnalysisController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/AnalysisController.java @@ -37,7 +37,7 @@ public class AnalysisController { summary = "Get PDF page count", description = "Returns total number of pages in PDF. Input:PDF Output:JSON Type:SISO") public Map getPageCount(@ModelAttribute PDFFile file) throws IOException { - try (PDDocument document = pdfDocumentFactory.load(file.getFileInput().getBytes())) { + try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) { return Map.of("pageCount", document.getNumberOfPages()); } } @@ -47,7 +47,7 @@ public class AnalysisController { summary = "Get basic PDF information", description = "Returns page count, version, file size. Input:PDF Output:JSON Type:SISO") public Map getBasicInfo(@ModelAttribute PDFFile file) throws IOException { - try (PDDocument document = pdfDocumentFactory.load(file.getFileInput().getBytes())) { + try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) { Map info = new HashMap<>(); info.put("pageCount", document.getNumberOfPages()); info.put("pdfVersion", document.getVersion()); @@ -62,7 +62,7 @@ public class AnalysisController { description = "Returns title, author, subject, etc. Input:PDF Output:JSON Type:SISO") public Map getDocumentProperties(@ModelAttribute PDFFile file) throws IOException { - try (PDDocument document = pdfDocumentFactory.load(file.getFileInput().getBytes())) { + try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) { PDDocumentInformation info = document.getDocumentInformation(); Map properties = new HashMap<>(); properties.put("title", info.getTitle()); @@ -83,7 +83,7 @@ public class AnalysisController { description = "Returns width and height of each page. Input:PDF Output:JSON Type:SISO") public List> getPageDimensions(@ModelAttribute PDFFile file) throws IOException { - try (PDDocument document = pdfDocumentFactory.load(file.getFileInput().getBytes())) { + try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) { List> dimensions = new ArrayList<>(); PDPageTree pages = document.getPages(); @@ -103,7 +103,7 @@ public class AnalysisController { description = "Returns count and details of form fields. Input:PDF Output:JSON Type:SISO") public Map getFormFields(@ModelAttribute PDFFile file) throws IOException { - try (PDDocument document = pdfDocumentFactory.load(file.getFileInput().getBytes())) { + try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) { Map formInfo = new HashMap<>(); PDAcroForm form = document.getDocumentCatalog().getAcroForm(); @@ -125,7 +125,7 @@ public class AnalysisController { summary = "Get annotation information", description = "Returns count and types of annotations. Input:PDF Output:JSON Type:SISO") public Map getAnnotationInfo(@ModelAttribute PDFFile file) throws IOException { - try (PDDocument document = pdfDocumentFactory.load(file.getFileInput().getBytes())) { + try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) { Map annotInfo = new HashMap<>(); int totalAnnotations = 0; Map annotationTypes = new HashMap<>(); @@ -150,7 +150,7 @@ public class AnalysisController { description = "Returns list of fonts used in the document. Input:PDF Output:JSON Type:SISO") public Map getFontInfo(@ModelAttribute PDFFile file) throws IOException { - try (PDDocument document = pdfDocumentFactory.load(file.getFileInput().getBytes())) { + try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) { Map fontInfo = new HashMap<>(); Set fontNames = new HashSet<>(); @@ -172,7 +172,7 @@ public class AnalysisController { description = "Returns encryption and permission details. Input:PDF Output:JSON Type:SISO") public Map getSecurityInfo(@ModelAttribute PDFFile file) throws IOException { - try (PDDocument document = pdfDocumentFactory.load(file.getFileInput().getBytes())) { + try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) { Map securityInfo = new HashMap<>(); PDEncryption encryption = document.getEncryption(); diff --git a/src/main/java/stirling/software/SPDF/controller/api/CropController.java b/src/main/java/stirling/software/SPDF/controller/api/CropController.java index d3e4933f5..68d252a47 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/CropController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/CropController.java @@ -42,7 +42,7 @@ public class CropController { description = "This operation takes an input PDF file and crops it according to the given coordinates. Input:PDF Output:PDF Type:SISO") public ResponseEntity cropPdf(@ModelAttribute CropPdfForm form) throws IOException { - PDDocument sourceDocument = pdfDocumentFactory.load(form.getFileInput().getBytes()); + PDDocument sourceDocument = pdfDocumentFactory.load(form); PDDocument newDocument = pdfDocumentFactory.createNewDocumentBasedOnOldDocument(sourceDocument); diff --git a/src/main/java/stirling/software/SPDF/controller/api/MergeController.java b/src/main/java/stirling/software/SPDF/controller/api/MergeController.java index 416546f4a..8c63f817e 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/MergeController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/MergeController.java @@ -100,8 +100,8 @@ public class MergeController { }; case "byPDFTitle": return (file1, file2) -> { - try (PDDocument doc1 = pdfDocumentFactory.load(file1.getBytes()); - PDDocument doc2 = pdfDocumentFactory.load(file2.getBytes())) { + try (PDDocument doc1 = pdfDocumentFactory.load(file1); + PDDocument doc2 = pdfDocumentFactory.load(file2)) { String title1 = doc1.getDocumentInformation().getTitle(); String title2 = doc2.getDocumentInformation().getTitle(); return title1.compareTo(title2); diff --git a/src/main/java/stirling/software/SPDF/controller/api/MultiPageLayoutController.java b/src/main/java/stirling/software/SPDF/controller/api/MultiPageLayoutController.java index 76ad5e75a..56d02a686 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/MultiPageLayoutController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/MultiPageLayoutController.java @@ -63,7 +63,7 @@ public class MultiPageLayoutController { : (int) Math.sqrt(pagesPerSheet); int rows = pagesPerSheet == 2 || pagesPerSheet == 3 ? 1 : (int) Math.sqrt(pagesPerSheet); - PDDocument sourceDocument = pdfDocumentFactory.load(file.getBytes()); + PDDocument sourceDocument = pdfDocumentFactory.load(file); PDDocument newDocument = pdfDocumentFactory.createNewDocumentBasedOnOldDocument(sourceDocument); PDPage newPage = new PDPage(PDRectangle.A4); diff --git a/src/main/java/stirling/software/SPDF/controller/api/RearrangePagesPDFController.java b/src/main/java/stirling/software/SPDF/controller/api/RearrangePagesPDFController.java index 58e69b720..09f8afe9c 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/RearrangePagesPDFController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/RearrangePagesPDFController.java @@ -250,7 +250,7 @@ public class RearrangePagesPDFController { String sortType = request.getCustomMode(); try { // Load the input PDF - PDDocument document = pdfDocumentFactory.load(pdfFile.getBytes()); + PDDocument document = pdfDocumentFactory.load(pdfFile); // Split the page order string into an array of page numbers or range of numbers String[] pageOrderArr = pageOrder != null ? pageOrder.split(",") : new String[0]; diff --git a/src/main/java/stirling/software/SPDF/controller/api/ScalePagesController.java b/src/main/java/stirling/software/SPDF/controller/api/ScalePagesController.java index c1715347d..5fb62cafc 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/ScalePagesController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/ScalePagesController.java @@ -51,7 +51,7 @@ public class ScalePagesController { String targetPDRectangle = request.getPageSize(); float scaleFactor = request.getScaleFactor(); - PDDocument sourceDocument = pdfDocumentFactory.load(file.getBytes()); + PDDocument sourceDocument = pdfDocumentFactory.load(file); PDDocument outputDocument = pdfDocumentFactory.createNewDocumentBasedOnOldDocument(sourceDocument); diff --git a/src/main/java/stirling/software/SPDF/controller/api/SplitPDFController.java b/src/main/java/stirling/software/SPDF/controller/api/SplitPDFController.java index 573bfb7da..d0df4ced5 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/SplitPDFController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/SplitPDFController.java @@ -62,7 +62,7 @@ public class SplitPDFController { String pages = request.getPageNumbers(); // open the pdf document - document = pdfDocumentFactory.load(file.getBytes()); + document = pdfDocumentFactory.load(file); // PdfMetadata metadata = PdfMetadataService.extractMetadataFromPdf(document); int totalPages = document.getNumberOfPages(); List pageNumbers = request.getPageNumbersList(document, false); diff --git a/src/main/java/stirling/software/SPDF/controller/api/SplitPdfByChaptersController.java b/src/main/java/stirling/software/SPDF/controller/api/SplitPdfByChaptersController.java index 195dbd0dd..b774d3ced 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/SplitPdfByChaptersController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/SplitPdfByChaptersController.java @@ -139,7 +139,7 @@ public class SplitPdfByChaptersController { if (bookmarkLevel < 0) { return ResponseEntity.badRequest().body("Invalid bookmark level".getBytes()); } - sourceDocument = pdfDocumentFactory.load(file.getBytes()); + sourceDocument = pdfDocumentFactory.load(file); PDDocumentOutline outline = sourceDocument.getDocumentCatalog().getDocumentOutline(); diff --git a/src/main/java/stirling/software/SPDF/controller/api/SplitPdfBySectionsController.java b/src/main/java/stirling/software/SPDF/controller/api/SplitPdfBySectionsController.java index 1a3842e05..2a692cbca 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/SplitPdfBySectionsController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/SplitPdfBySectionsController.java @@ -56,7 +56,7 @@ public class SplitPdfBySectionsController { List splitDocumentsBoas = new ArrayList<>(); MultipartFile file = request.getFileInput(); - PDDocument sourceDocument = pdfDocumentFactory.load(file.getBytes()); + PDDocument sourceDocument = pdfDocumentFactory.load(file); // Process the PDF based on split parameters int horiz = request.getHorizontalDivisions() + 1; diff --git a/src/main/java/stirling/software/SPDF/controller/api/ToSinglePageController.java b/src/main/java/stirling/software/SPDF/controller/api/ToSinglePageController.java index 4e851a482..00bdf827e 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/ToSinglePageController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/ToSinglePageController.java @@ -45,7 +45,7 @@ public class ToSinglePageController { throws IOException { // Load the source document - PDDocument sourceDocument = pdfDocumentFactory.load(request.getFileInput().getBytes()); + PDDocument sourceDocument = pdfDocumentFactory.load(request); // Calculate total height and max width float totalHeight = 0; diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertImgPDFController.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertImgPDFController.java index 3e277ab16..103f6e1c8 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertImgPDFController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertImgPDFController.java @@ -74,7 +74,7 @@ public class ConvertImgPDFController { ; try { // Load the input PDF - byte[] newPdfBytes = rearrangePdfPages(file.getBytes(), pageOrderArr); + byte[] newPdfBytes = rearrangePdfPages(file, pageOrderArr); ImageType colorTypeResult = ImageType.RGB; if ("greyscale".equals(colorType)) { @@ -243,9 +243,10 @@ public class ConvertImgPDFController { * @return A byte array of the rearranged PDF. * @throws IOException If an error occurs while processing the PDF. */ - private byte[] rearrangePdfPages(byte[] pdfBytes, String[] pageOrderArr) throws IOException { + private byte[] rearrangePdfPages(MultipartFile pdfFile, String[] pageOrderArr) + throws IOException { // Load the input PDF - PDDocument document = pdfDocumentFactory.load(pdfBytes); + PDDocument document = pdfDocumentFactory.load(pdfFile); int totalPages = document.getNumberOfPages(); List newPageOrder = GeneralUtils.parsePageList(pageOrderArr, totalPages, false); diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToOffice.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToOffice.java index 58b6fd7fe..39c808096 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToOffice.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToOffice.java @@ -62,7 +62,7 @@ public class ConvertPDFToOffice { MultipartFile inputFile = request.getFileInput(); String outputFormat = request.getOutputFormat(); if ("txt".equals(request.getOutputFormat())) { - try (PDDocument document = pdfDocumentFactory.load(inputFile.getBytes())) { + try (PDDocument document = pdfDocumentFactory.load(inputFile)) { PDFTextStripper stripper = new PDFTextStripper(); String text = stripper.getText(document); return WebResponseUtils.bytesToWebResponse( diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ExtractCSVController.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ExtractCSVController.java index 54620113c..359d353d7 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ExtractCSVController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ExtractCSVController.java @@ -59,7 +59,7 @@ public class ExtractCSVController { String baseName = getBaseName(form.getFileInput().getOriginalFilename()); List csvEntries = new ArrayList<>(); - try (PDDocument document = pdfDocumentFactory.load(form.getFileInput().getBytes())) { + try (PDDocument document = pdfDocumentFactory.load(form)) { List pages = form.getPageNumbersList(document, true); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); CSVFormat format = diff --git a/src/main/java/stirling/software/SPDF/controller/api/filters/FilterController.java b/src/main/java/stirling/software/SPDF/controller/api/filters/FilterController.java index 7ddded2a6..66ac4cb8b 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/filters/FilterController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/filters/FilterController.java @@ -49,7 +49,7 @@ public class FilterController { String text = request.getText(); String pageNumber = request.getPageNumbers(); - PDDocument pdfDocument = pdfDocumentFactory.load(inputFile.getBytes()); + PDDocument pdfDocument = pdfDocumentFactory.load(inputFile); if (PdfUtils.hasText(pdfDocument, pageNumber, text)) return WebResponseUtils.pdfDocToWebResponse( pdfDocument, Filenames.toSimpleFileName(inputFile.getOriginalFilename())); @@ -66,7 +66,7 @@ public class FilterController { MultipartFile inputFile = request.getFileInput(); String pageNumber = request.getPageNumbers(); - PDDocument pdfDocument = pdfDocumentFactory.load(inputFile.getBytes()); + PDDocument pdfDocument = pdfDocumentFactory.load(inputFile); if (PdfUtils.hasImages(pdfDocument, pageNumber)) return WebResponseUtils.pdfDocToWebResponse( pdfDocument, Filenames.toSimpleFileName(inputFile.getOriginalFilename())); @@ -83,7 +83,7 @@ public class FilterController { String pageCount = request.getPageCount(); String comparator = request.getComparator(); // Load the PDF - PDDocument document = pdfDocumentFactory.load(inputFile.getBytes()); + PDDocument document = pdfDocumentFactory.load(inputFile); int actualPageCount = document.getNumberOfPages(); boolean valid = false; @@ -117,7 +117,7 @@ public class FilterController { String comparator = request.getComparator(); // Load the PDF - PDDocument document = pdfDocumentFactory.load(inputFile.getBytes()); + PDDocument document = pdfDocumentFactory.load(inputFile); PDPage firstPage = document.getPage(0); PDRectangle actualPageSize = firstPage.getMediaBox(); @@ -193,7 +193,7 @@ public class FilterController { String comparator = request.getComparator(); // Load the PDF - PDDocument document = pdfDocumentFactory.load(inputFile.getBytes()); + PDDocument document = pdfDocumentFactory.load(inputFile); // Get the rotation of the first page PDPage firstPage = document.getPage(0); diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/AutoRenameController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/AutoRenameController.java index 0bbe7e6be..d3d2e91cc 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/AutoRenameController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/AutoRenameController.java @@ -52,7 +52,7 @@ public class AutoRenameController { MultipartFile file = request.getFileInput(); Boolean useFirstTextAsFallback = request.isUseFirstTextAsFallback(); - PDDocument document = pdfDocumentFactory.load(file.getBytes()); + PDDocument document = pdfDocumentFactory.load(file); PDFTextStripper reader = new PDFTextStripper() { List lineInfos = new ArrayList<>(); diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/BlankPageController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/BlankPageController.java index 7fee8e2ab..0195382ea 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/BlankPageController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/BlankPageController.java @@ -84,7 +84,7 @@ public class BlankPageController { int threshold = request.getThreshold(); float whitePercent = request.getWhitePercent(); - try (PDDocument document = pdfDocumentFactory.load(inputFile.getBytes())) { + try (PDDocument document = pdfDocumentFactory.load(inputFile)) { PDPageTree pages = document.getDocumentCatalog().getPages(); PDFTextStripper textStripper = new PDFTextStripper(); diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/DecompressPdfController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/DecompressPdfController.java index 626f3568c..45767fc82 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/DecompressPdfController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/DecompressPdfController.java @@ -50,7 +50,7 @@ public class DecompressPdfController { MultipartFile file = request.getFileInput(); - try (PDDocument document = pdfDocumentFactory.load(file.getBytes())) { + try (PDDocument document = pdfDocumentFactory.load(file)) { // Process all objects in document processAllObjects(document); diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImageScansController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImageScansController.java index 3769bc235..7c4d9137f 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImageScansController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImageScansController.java @@ -95,8 +95,7 @@ public class ExtractImageScansController { // Check if input file is a PDF if ("pdf".equalsIgnoreCase(extension)) { // Load PDF document - try (PDDocument document = - pdfDocumentFactory.load(form.getFileInput().getBytes())) { + try (PDDocument document = pdfDocumentFactory.load(form.getFileInput())) { PDFRenderer pdfRenderer = new PDFRenderer(document); pdfRenderer.setSubsamplingAllowed(true); int pageCount = document.getNumberOfPages(); diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java index 4c5c60e34..3010c1be6 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java @@ -67,7 +67,7 @@ public class ExtractImagesController { MultipartFile file = request.getFileInput(); String format = request.getFormat(); boolean allowDuplicates = request.isAllowDuplicates(); - PDDocument document = pdfDocumentFactory.load(file.getBytes()); + PDDocument document = pdfDocumentFactory.load(file); // Determine if multithreading should be used based on PDF size or number of pages boolean useMultithreading = shouldUseMultithreading(file, document); diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/FlattenController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/FlattenController.java index 9b9126e17..39991a1f6 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/FlattenController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/FlattenController.java @@ -50,7 +50,7 @@ public class FlattenController { public ResponseEntity flatten(@ModelAttribute FlattenRequest request) throws Exception { MultipartFile file = request.getFileInput(); - PDDocument document = pdfDocumentFactory.load(file.getBytes()); + PDDocument document = pdfDocumentFactory.load(file); Boolean flattenOnlyForms = request.getFlattenOnlyForms(); if (Boolean.TRUE.equals(flattenOnlyForms)) { diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/MetadataController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/MetadataController.java index 69553e423..66c1c8b66 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/MetadataController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/MetadataController.java @@ -84,7 +84,7 @@ public class MetadataController { allRequestParams = new java.util.HashMap(); } // Load the PDF file into a PDDocument - PDDocument document = pdfDocumentFactory.load(pdfFile.getBytes()); + PDDocument document = pdfDocumentFactory.load(pdfFile); // Get the document information from the PDF PDDocumentInformation info = document.getDocumentInformation(); diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/PageNumbersController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/PageNumbersController.java index c3bbb721a..17d1bbaa9 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/PageNumbersController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/PageNumbersController.java @@ -55,8 +55,7 @@ public class PageNumbersController { String pagesToNumber = request.getPagesToNumber(); String customText = request.getCustomText(); int pageNumber = startingNumber; - byte[] fileBytes = file.getBytes(); - PDDocument document = pdfDocumentFactory.load(fileBytes); + PDDocument document = pdfDocumentFactory.load(file); float font_size = request.getFontSize(); String font_type = request.getFontType(); float marginFactor; diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/ShowJavascript.java b/src/main/java/stirling/software/SPDF/controller/api/misc/ShowJavascript.java index a3b9dbdca..e38657cfe 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/ShowJavascript.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/ShowJavascript.java @@ -43,7 +43,7 @@ public class ShowJavascript { MultipartFile inputFile = request.getFileInput(); String script = ""; - try (PDDocument document = pdfDocumentFactory.load(inputFile.getBytes())) { + try (PDDocument document = pdfDocumentFactory.load(inputFile)) { if (document.getDocumentCatalog() != null && document.getDocumentCatalog().getNames() != null) { diff --git a/src/main/java/stirling/software/SPDF/controller/api/security/CertSignController.java b/src/main/java/stirling/software/SPDF/controller/api/security/CertSignController.java index 3dc190982..cad762062 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/security/CertSignController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/security/CertSignController.java @@ -90,7 +90,7 @@ public class CertSignController { private static void sign( CustomPDDocumentFactory pdfDocumentFactory, - byte[] input, + MultipartFile input, OutputStream output, CreateSignature instance, Boolean showSignature, @@ -179,7 +179,7 @@ public class CertSignController { ByteArrayOutputStream baos = new ByteArrayOutputStream(); sign( pdfDocumentFactory, - pdf.getBytes(), + pdf, baos, createSignature, showSignature, diff --git a/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java b/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java index a6387adb1..026fd38a0 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java +++ b/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java @@ -126,7 +126,7 @@ public class GetInfoOnPDF { @Operation(summary = "Summary here", description = "desc. Input:PDF Output:JSON Type:SISO") public ResponseEntity getPdfInfo(@ModelAttribute PDFFile request) throws IOException { MultipartFile inputFile = request.getFileInput(); - try (PDDocument pdfBoxDoc = pdfDocumentFactory.load(inputFile.getBytes()); ) { + try (PDDocument pdfBoxDoc = pdfDocumentFactory.load(inputFile); ) { ObjectMapper objectMapper = new ObjectMapper(); ObjectNode jsonOutput = objectMapper.createObjectNode(); diff --git a/src/main/java/stirling/software/SPDF/model/api/PDFWithPageNums.java b/src/main/java/stirling/software/SPDF/model/api/PDFWithPageNums.java index 3da8b7ce9..0148b7a72 100644 --- a/src/main/java/stirling/software/SPDF/model/api/PDFWithPageNums.java +++ b/src/main/java/stirling/software/SPDF/model/api/PDFWithPageNums.java @@ -1,9 +1,7 @@ package stirling.software.SPDF.model.api; -import java.io.IOException; import java.util.List; -import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; import io.swagger.v3.oas.annotations.Hidden; @@ -32,18 +30,6 @@ public class PDFWithPageNums extends PDFFile { requiredMode = RequiredMode.NOT_REQUIRED) private String pageNumbers; - @Hidden - public List getPageNumbersList(boolean zeroCount) { - int pageCount = 0; - try { - pageCount = Loader.loadPDF(getFileInput().getBytes()).getNumberOfPages(); - } catch (IOException e) { - // TODO Auto-generated catch block - log.error("exception", e); - } - return GeneralUtils.parsePageList(pageNumbers, pageCount, zeroCount); - } - @Hidden public List getPageNumbersList(PDDocument doc, boolean oneBased) { int pageCount = 0; diff --git a/src/main/java/stirling/software/SPDF/service/CustomPDDocumentFactory.java b/src/main/java/stirling/software/SPDF/service/CustomPDDocumentFactory.java index e9bc3b1a4..6963b522c 100644 --- a/src/main/java/stirling/software/SPDF/service/CustomPDDocumentFactory.java +++ b/src/main/java/stirling/software/SPDF/service/CustomPDDocumentFactory.java @@ -10,9 +10,9 @@ import java.nio.file.StandardCopyOption; import java.util.concurrent.atomic.AtomicLong; import org.apache.pdfbox.Loader; +import org.apache.pdfbox.examples.util.DeletingRandomAccessFile; import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.io.MemoryUsageSetting; -import org.apache.pdfbox.io.RandomAccessReadBufferedFile; import org.apache.pdfbox.io.RandomAccessStreamCache.StreamCacheCreateFunction; import org.apache.pdfbox.io.ScratchFile; import org.apache.pdfbox.pdmodel.PDDocument; @@ -102,16 +102,29 @@ public class CustomPDDocumentFactory { // Since we don't know the size upfront, buffer to a temp file Path tempFile = createTempFile("pdf-stream-"); - try { - Files.copy(input, tempFile, StandardCopyOption.REPLACE_EXISTING); - return loadAdaptively(tempFile.toFile(), Files.size(tempFile)); - } catch (IOException e) { - cleanupFile(tempFile); - throw e; - } + + Files.copy(input, tempFile, StandardCopyOption.REPLACE_EXISTING); + return loadAdaptively(tempFile.toFile(), Files.size(tempFile)); } - private PDDocument loadAdaptively(Object source, long contentSize) throws IOException { + /** Load with password from InputStream */ + public PDDocument load(InputStream input, String password) throws IOException { + if (input == null) { + throw new IllegalArgumentException("InputStream cannot be null"); + } + + // Since we don't know the size upfront, buffer to a temp file + Path tempFile = createTempFile("pdf-stream-"); + + Files.copy(input, tempFile, StandardCopyOption.REPLACE_EXISTING); + return loadAdaptivelyWithPassword(tempFile.toFile(), Files.size(tempFile), password); + } + + /** + * Determine the appropriate caching strategy based on file size and available memory. This + * common method is used by both password and non-password loading paths. + */ + private StreamCacheCreateFunction getStreamCacheFunction(long contentSize) { long maxMemory = Runtime.getRuntime().maxMemory(); long freeMemory = Runtime.getRuntime().freeMemory(); long totalMemory = Runtime.getRuntime().totalMemory(); @@ -129,32 +142,38 @@ public class CustomPDDocumentFactory { usedMemory / (1024 * 1024), maxMemory / (1024 * 1024)); - // Determine caching strategy based on both file size and available memory - StreamCacheCreateFunction cacheFunction; - // If free memory is critically low, always use file-based caching - // In loadAdaptively method, replace current caching strategy decision with: if (freeMemoryPercent < MIN_FREE_MEMORY_PERCENTAGE || actualFreeMemory < MIN_FREE_MEMORY_BYTES) { log.info( "Low memory detected ({}%), forcing file-based cache", String.format("%.2f", freeMemoryPercent)); - cacheFunction = createScratchFileCacheFunction(MemoryUsageSetting.setupTempFileOnly()); + return createScratchFileCacheFunction(MemoryUsageSetting.setupTempFileOnly()); } else if (contentSize < SMALL_FILE_THRESHOLD) { log.info("Using memory-only cache for small document ({}KB)", contentSize / 1024); - cacheFunction = IOUtils.createMemoryOnlyStreamCache(); + return IOUtils.createMemoryOnlyStreamCache(); } else if (contentSize < LARGE_FILE_THRESHOLD) { // For medium files (10-50MB), use a mixed approach log.info( "Using mixed memory/file cache for medium document ({}MB)", contentSize / (1024 * 1024)); - cacheFunction = - createScratchFileCacheFunction(MemoryUsageSetting.setupMixed(LARGE_FILE_USAGE)); + return createScratchFileCacheFunction(MemoryUsageSetting.setupMixed(LARGE_FILE_USAGE)); } else { log.info("Using file-based cache for large document"); - cacheFunction = createScratchFileCacheFunction(MemoryUsageSetting.setupTempFileOnly()); + return createScratchFileCacheFunction(MemoryUsageSetting.setupTempFileOnly()); } + } + /** Update the existing loadAdaptively method to use the common function */ + private PDDocument loadAdaptively(Object source, long contentSize) throws IOException { + // Get the appropriate caching strategy + StreamCacheCreateFunction cacheFunction = getStreamCacheFunction(contentSize); + + //If small handle as bytes and remove original file + if (contentSize <= SMALL_FILE_THRESHOLD && source instanceof File file) { + source = Files.readAllBytes(file.toPath()); + file.delete(); + } PDDocument document; if (source instanceof File file) { document = loadFromFile(file, contentSize, cacheFunction); @@ -168,6 +187,50 @@ public class CustomPDDocumentFactory { return document; } + /** Load a PDF with password protection using adaptive loading strategies */ + private PDDocument loadAdaptivelyWithPassword(Object source, long contentSize, String password) + throws IOException { + // Get the appropriate caching strategy + StreamCacheCreateFunction cacheFunction = getStreamCacheFunction(contentSize); + //If small handle as bytes and remove original file + if (contentSize <= SMALL_FILE_THRESHOLD && source instanceof File file) { + source = Files.readAllBytes(file.toPath()); + file.delete(); + } + PDDocument document; + if (source instanceof File file) { + document = loadFromFileWithPassword(file, contentSize, cacheFunction, password); + } else if (source instanceof byte[] bytes) { + document = loadFromBytesWithPassword(bytes, contentSize, cacheFunction, password); + } else { + throw new IllegalArgumentException("Unsupported source type: " + source.getClass()); + } + + postProcessDocument(document); + return document; + } + + /** Load a file with password */ + private PDDocument loadFromFileWithPassword( + File file, long size, StreamCacheCreateFunction cache, String password) + throws IOException { + return Loader.loadPDF(new DeletingRandomAccessFile(file), password, null, null, cache); + } + + /** Load bytes with password */ + private PDDocument loadFromBytesWithPassword( + byte[] bytes, long size, StreamCacheCreateFunction cache, String password) + throws IOException { + if (size >= SMALL_FILE_THRESHOLD) { + log.info("Writing large byte array to temp file for password-protected PDF"); + Path tempFile = createTempFile("pdf-bytes-"); + + Files.write(tempFile, bytes); + return Loader.loadPDF(tempFile.toFile(), password, null, null, cache); + } + return Loader.loadPDF(bytes, password, null, null, cache); + } + private StreamCacheCreateFunction createScratchFileCacheFunction(MemoryUsageSetting settings) { return () -> { try { @@ -185,11 +248,7 @@ public class CustomPDDocumentFactory { private PDDocument loadFromFile(File file, long size, StreamCacheCreateFunction cache) throws IOException { - if (size >= EXTREMELY_LARGE_THRESHOLD) { - log.info("Loading extremely large file via buffered access"); - return Loader.loadPDF(new RandomAccessReadBufferedFile(file), "", null, null, cache); - } - return Loader.loadPDF(file, "", null, null, cache); + return Loader.loadPDF(new DeletingRandomAccessFile(file), "", null, null, cache); } private PDDocument loadFromBytes(byte[] bytes, long size, StreamCacheCreateFunction cache) @@ -197,12 +256,9 @@ public class CustomPDDocumentFactory { if (size >= SMALL_FILE_THRESHOLD) { log.info("Writing large byte array to temp file"); Path tempFile = createTempFile("pdf-bytes-"); - try { - Files.write(tempFile, bytes); - return Loader.loadPDF(tempFile.toFile(), "", null, null, cache); - } finally { - cleanupFile(tempFile); - } + + Files.write(tempFile, bytes); + return loadFromFile(tempFile.toFile(), size, cache); } return Loader.loadPDF(bytes, "", null, null, cache); } @@ -225,12 +281,9 @@ public class CustomPDDocumentFactory { } } else { Path tempFile = createTempFile("pdf-save-"); - try { - document.save(tempFile.toFile()); - return Files.readAllBytes(tempFile); - } finally { - cleanupFile(tempFile); - } + + document.save(tempFile.toFile()); + return Files.readAllBytes(tempFile); } } @@ -258,17 +311,6 @@ public class CustomPDDocumentFactory { return Files.createTempDirectory(prefix + tempCounter.incrementAndGet() + "-"); } - /** Clean up a temporary file */ - private void cleanupFile(Path file) { - try { - if (Files.deleteIfExists(file)) { - log.info("Deleted temp file: {}", file); - } - } catch (IOException e) { - log.info("Error deleting temp file {}", file, e); - } - } - /** Create new document bytes based on an existing document */ public byte[] createNewBytesBasedOnOldDocument(byte[] oldDocument) throws IOException { try (PDDocument document = load(oldDocument)) { @@ -339,20 +381,11 @@ public class CustomPDDocumentFactory { /** Load from a MultipartFile */ public PDDocument load(MultipartFile pdfFile) throws IOException { - return load(pdfFile.getBytes()); + return load(pdfFile.getInputStream()); } /** Load with password from MultipartFile */ public PDDocument load(MultipartFile fileInput, String password) throws IOException { - return load(fileInput.getBytes(), password); - } - - /** Load with password from byte array */ - private PDDocument load(byte[] bytes, String password) throws IOException { - // Since we don't have direct password support in the adaptive loader, - // we'll need to use PDFBox's Loader directly - PDDocument document = Loader.loadPDF(bytes, password); - pdfMetadataService.setDefaultMetadata(document); - return document; + return load(fileInput.getInputStream(), password); } } diff --git a/src/main/java/stirling/software/SPDF/utils/GeneralUtils.java b/src/main/java/stirling/software/SPDF/utils/GeneralUtils.java index 96f0aead4..d2615935f 100644 --- a/src/main/java/stirling/software/SPDF/utils/GeneralUtils.java +++ b/src/main/java/stirling/software/SPDF/utils/GeneralUtils.java @@ -32,8 +32,15 @@ public class GeneralUtils { public static File convertMultipartFileToFile(MultipartFile multipartFile) throws IOException { File tempFile = Files.createTempFile("temp", null).toFile(); - try (FileOutputStream os = new FileOutputStream(tempFile)) { - os.write(multipartFile.getBytes()); + try (InputStream inputStream = multipartFile.getInputStream(); + FileOutputStream outputStream = new FileOutputStream(tempFile)) { + + byte[] buffer = new byte[8192]; + int bytesRead; + + while ((bytesRead = inputStream.read(buffer)) != -1) { + outputStream.write(buffer, 0, bytesRead); + } } return tempFile; } diff --git a/testing/test.sh b/testing/test.sh index 7c8681580..480b4fb01 100644 --- a/testing/test.sh +++ b/testing/test.sh @@ -39,6 +39,136 @@ check_health() { return 0 } +# Function to capture file list from a Docker container +capture_file_list() { + local container_name=$1 + local output_file=$2 + + echo "Capturing file list from $container_name..." + # Get all files in one command, output directly from Docker to avoid path issues + # Skip proc, sys, dev, and the specified LibreOffice config directory + # Also skip PDFBox and LibreOffice temporary files + docker exec $container_name sh -c "find / -type f \ + -not -path '*/proc/*' \ + -not -path '*/sys/*' \ + -not -path '*/dev/*' \ + -not -path '/config/*' \ + -not -path '/logs/*' \ + -not -path '*/home/stirlingpdfuser/.config/libreoffice/*' \ + -not -path '*/tmp/PDFBox*' \ + -not -path '*/tmp/hsperfdata_stirlingpdfuser/*' \ + -not -path '*/tmp/lu*' \ + -not -path '*/tmp/tmp*' \ + 2>/dev/null | xargs -I{} sh -c 'stat -c \"%n %s %Y\" \"{}\" 2>/dev/null || true' | sort" > "$output_file" + + # Check if the output file has content + if [ ! -s "$output_file" ]; then + echo "WARNING: Failed to capture file list or container returned empty list" + echo "Trying alternative approach..." + + # Alternative simpler approach - just get paths as a fallback + docker exec $container_name sh -c "find / -type f \ + -not -path '*/proc/*' \ + -not -path '*/sys/*' \ + -not -path '*/dev/*' \ + -not -path '/config/*' \ + -not -path '/logs/*' \ + -not -path '*/home/stirlingpdfuser/.config/libreoffice/*' \ + -not -path '*/tmp/PDFBox*' \ + -not -path '*/tmp/hsperfdata_stirlingpdfuser/*' \ + -not -path '*/tmp/lu*' \ + -not -path '*/tmp/tmp*' \ + 2>/dev/null | sort" > "$output_file" + + if [ ! -s "$output_file" ]; then + echo "ERROR: All attempts to capture file list failed" + # Create a dummy entry to prevent diff errors + echo "NO_FILES_FOUND 0 0" > "$output_file" + fi + fi + + echo "File list captured to $output_file" +} + +# Function to compare before and after file lists +compare_file_lists() { + local before_file=$1 + local after_file=$2 + local diff_file=$3 + local container_name=$4 # Added container_name parameter + + echo "Comparing file lists..." + + # Check if files exist and have content + if [ ! -s "$before_file" ] || [ ! -s "$after_file" ]; then + echo "WARNING: One or both file lists are empty." + + if [ ! -s "$before_file" ]; then + echo "Before file is empty: $before_file" + fi + + if [ ! -s "$after_file" ]; then + echo "After file is empty: $after_file" + fi + + # Create empty diff file + > "$diff_file" + + # Check if we at least have the after file to look for temp files + if [ -s "$after_file" ]; then + echo "Checking for temp files in the after snapshot..." + grep -i "tmp\|temp" "$after_file" > "${diff_file}.tmp" + if [ -s "${diff_file}.tmp" ]; then + echo "WARNING: Temporary files found:" + cat "${diff_file}.tmp" + echo "Printing docker logs due to temporary file detection:" + docker logs "$container_name" # Print logs when temp files are found + return 1 + else + echo "No temporary files found in the after snapshot." + fi + fi + + return 0 + fi + + # Both files exist and have content, proceed with diff + diff "$before_file" "$after_file" > "$diff_file" + + if [ -s "$diff_file" ]; then + echo "Detected changes in files:" + cat "$diff_file" + + # Extract only added files (lines starting with ">") + grep "^>" "$diff_file" > "${diff_file}.added" || true + if [ -s "${diff_file}.added" ]; then + echo "New files created during test:" + cat "${diff_file}.added" | sed 's/^> //' + + # Check for tmp files + grep -i "tmp\|temp" "${diff_file}.added" > "${diff_file}.tmp" || true + if [ -s "${diff_file}.tmp" ]; then + echo "WARNING: Temporary files detected:" + cat "${diff_file}.tmp" + echo "Printing docker logs due to temporary file detection:" + docker logs "$container_name" # Print logs when temp files are found + return 1 + fi + fi + + # Extract only removed files (lines starting with "<") + grep "^<" "$diff_file" > "${diff_file}.removed" || true + if [ -s "${diff_file}.removed" ]; then + echo "Files removed during test:" + cat "${diff_file}.removed" | sed 's/^< //' + fi + else + echo "No file changes detected during test." + fi + + return 0 +} + # Function to test a Docker Compose configuration test_compose() { local compose_file=$1 @@ -91,7 +221,7 @@ main() { # Building Docker images # docker build --no-cache --pull --build-arg VERSION_TAG=alpha -t stirlingtools/stirling-pdf:latest -f ./Dockerfile . - docker build --no-cache --pull --build-arg VERSION_TAG=alpha -t stirlingtools/stirling-pdf:latest-ultra-lite -f ./Dockerfile.ultra-lite . + docker build --build-arg VERSION_TAG=alpha -t docker.stirlingpdf.com/stirlingtools/stirling-pdf:latest-ultra-lite -f ./Dockerfile.ultra-lite . # Test each configuration run_tests "Stirling-PDF-Ultra-Lite" "./exampleYmlFiles/docker-compose-latest-ultra-lite.yml" @@ -147,16 +277,55 @@ main() { run_tests "Stirling-PDF-Security-Fat-with-login" "./exampleYmlFiles/test_cicd.yml" if [ $? -eq 0 ]; then + # Create directory for file snapshots if it doesn't exist + SNAPSHOT_DIR="$PROJECT_ROOT/testing/file_snapshots" + mkdir -p "$SNAPSHOT_DIR" + + # Capture file list before running behave tests + BEFORE_FILE="$SNAPSHOT_DIR/files_before_behave.txt" + AFTER_FILE="$SNAPSHOT_DIR/files_after_behave.txt" + DIFF_FILE="$SNAPSHOT_DIR/files_diff.txt" + + # Define container name variable for consistency + CONTAINER_NAME="Stirling-PDF-Security-Fat-with-login" + + capture_file_list "$CONTAINER_NAME" "$BEFORE_FILE" + cd "testing/cucumber" if python -m behave; then + # Wait 10 seconds before capturing the file list after tests + echo "Waiting 5 seconds for any file operations to complete..." + sleep 5 + + # Capture file list after running behave tests + cd "$PROJECT_ROOT" + capture_file_list "$CONTAINER_NAME" "$AFTER_FILE" + + # Compare file lists + if compare_file_lists "$BEFORE_FILE" "$AFTER_FILE" "$DIFF_FILE" "$CONTAINER_NAME"; then + echo "No unexpected temporary files found." + passed_tests+=("Stirling-PDF-Regression") + else + echo "WARNING: Unexpected temporary files detected after behave tests!" + failed_tests+=("Stirling-PDF-Regression-Temp-Files") + fi + passed_tests+=("Stirling-PDF-Regression") else failed_tests+=("Stirling-PDF-Regression") echo "Printing docker logs of failed regression" - docker logs "Stirling-PDF-Security-Fat-with-login" + docker logs "$CONTAINER_NAME" echo "Printed docker logs of failed regression" + + # Still capture file list after failure for analysis + # Wait 10 seconds before capturing the file list + echo "Waiting 5 seconds before capturing file list..." + sleep 10 + + cd "$PROJECT_ROOT" + capture_file_list "$CONTAINER_NAME" "$AFTER_FILE" + compare_file_lists "$BEFORE_FILE" "$AFTER_FILE" "$DIFF_FILE" "$CONTAINER_NAME" fi - cd "$PROJECT_ROOT" fi docker-compose -f "./exampleYmlFiles/test_cicd.yml" down diff --git a/testing/test_webpages.sh b/testing/test_webpages.sh index e1207c2eb..2091995af 100644 --- a/testing/test_webpages.sh +++ b/testing/test_webpages.sh @@ -2,122 +2,173 @@ # Function to check a single webpage check_webpage() { - local url=$(echo "$1" | tr -d '\r') # Remove carriage returns - local base_url=$(echo "$2" | tr -d '\r') - local full_url="${base_url}${url}" - local timeout=10 - echo -n "Testing $full_url ... " - - # Use curl to fetch the page with timeout - response=$(curl -s -w "\n%{http_code}" --max-time $timeout "$full_url") - if [ $? -ne 0 ]; then - echo "FAILED - Connection error or timeout $full_url " - return 1 - fi + local url=$(echo "$1" | tr -d '\r') # Remove carriage returns + local base_url=$(echo "$2" | tr -d '\r') + local full_url="${base_url}${url}" + local timeout=10 + local result_file="$3" - # Split response into body and status code - HTTP_STATUS=$(echo "$response" | tail -n1) - BODY=$(echo "$response" | sed '$d') + # Use curl to fetch the page with timeout + response=$(curl -s -w "\n%{http_code}" --max-time $timeout "$full_url") + if [ $? -ne 0 ]; then + echo "FAILED - Connection error or timeout $full_url" >> "$result_file" + return 1 + fi - # Check HTTP status - if [ "$HTTP_STATUS" != "200" ]; then - echo "FAILED - HTTP Status: $HTTP_STATUS" - return 1 - fi + # Split response into body and status code + HTTP_STATUS=$(echo "$response" | tail -n1) + BODY=$(echo "$response" | sed '$d') - # Check if response contains HTML - if ! printf '%s' "$BODY" | grep -q "\|> "$result_file" + return 1 + fi - echo "OK" - return 0 + # Check if response contains HTML + if ! printf '%s' "$BODY" | grep -q "\|> "$result_file" + return 1 + fi + + echo "OK - $full_url" >> "$result_file" + return 0 } -# Main function to test all URLs from the list +# Function to test a URL and update counters +test_url() { + local url="$1" + local base_url="$2" + local tmp_dir="$3" + local url_index="$4" + local result_file="${tmp_dir}/result_${url_index}.txt" + + if ! check_webpage "$url" "$base_url" "$result_file"; then + echo "1" > "${tmp_dir}/failed_${url_index}" + else + echo "0" > "${tmp_dir}/failed_${url_index}" + fi +} + +# Main function to test all URLs from the list in parallel test_all_urls() { - local url_file=$1 - local base_url=${2:-"http://localhost:8080"} - local failed_count=0 - local total_count=0 - local start_time=$(date +%s) + local url_file="$1" + local base_url="${2:-"http://localhost:8080"}" + local max_parallel="${3:-10}" # Default to 10 parallel processes + local failed_count=0 + local total_count=0 + local start_time=$(date +%s) + local tmp_dir=$(mktemp -d) + local active_jobs=0 + local url_index=0 - echo "Starting webpage tests..." - echo "Base URL: $base_url" - echo "Number of lines: $(wc -l < "$url_file")" - echo "----------------------------------------" - - while IFS= read -r url || [ -n "$url" ]; do - # Skip empty lines and comments - [[ -z "$url" || "$url" =~ ^#.*$ ]] && continue - - ((total_count++)) - if ! check_webpage "$url" "$base_url"; then - ((failed_count++)) - fi - done < "$url_file" + echo "Starting webpage tests..." + echo "Base URL: $base_url" + echo "Number of lines: $(wc -l < "$url_file")" + echo "Max parallel jobs: $max_parallel" + echo "----------------------------------------" - local end_time=$(date +%s) - local duration=$((end_time - start_time)) + # Process each URL + while IFS= read -r url || [ -n "$url" ]; do + # Skip empty lines and comments + [[ -z "$url" || "$url" =~ ^#.*$ ]] && continue + + ((total_count++)) + ((url_index++)) - echo "----------------------------------------" - echo "Test Summary:" - echo "Total tests: $total_count" - echo "Failed tests: $failed_count" - echo "Passed tests: $((total_count - failed_count))" - echo "Duration: ${duration} seconds" + # Run the check in background + test_url "$url" "$base_url" "$tmp_dir" "$url_index" & + + # Track the job + ((active_jobs++)) + + # If we've reached max_parallel, wait for a job to finish + if [ $active_jobs -ge $max_parallel ]; then + wait -n # Wait for any child process to exit + ((active_jobs--)) + fi + done < "$url_file" - return $failed_count + # Wait for remaining jobs to finish + wait + + # Print results in order and count failures + for i in $(seq 1 $url_index); do + if [ -f "${tmp_dir}/result_${i}.txt" ]; then + cat "${tmp_dir}/result_${i}.txt" + fi + + if [ -f "${tmp_dir}/failed_${i}" ]; then + failed_count=$((failed_count + $(cat "${tmp_dir}/failed_${i}"))) + fi + done + + # Clean up + rm -rf "$tmp_dir" + + local end_time=$(date +%s) + local duration=$((end_time - start_time)) + + echo "----------------------------------------" + echo "Test Summary:" + echo "Total tests: $total_count" + echo "Failed tests: $failed_count" + echo "Passed tests: $((total_count - failed_count))" + echo "Duration: ${duration} seconds" + + return $failed_count } # Print usage information usage() { - echo "Usage: $0 [-f url_file] [-b base_url]" - echo "Options:" - echo " -f url_file Path to file containing URLs to test (required)" - echo " -b base_url Base URL to prepend to test URLs (default: http://localhost:8080)" - exit 1 + echo "Usage: $0 [-f url_file] [-b base_url] [-p max_parallel]" + echo "Options:" + echo " -f url_file Path to file containing URLs to test (required)" + echo " -b base_url Base URL to prepend to test URLs (default: http://localhost:8080)" + echo " -p max_parallel Maximum number of parallel requests (default: 10)" + exit 1 } # Main execution main() { - local url_file="" - local base_url="http://localhost:8080" + local url_file="" + local base_url="http://localhost:8080" + local max_parallel=10 - # Parse command line options - while getopts ":f:b:h" opt; do - case $opt in - f) url_file="$OPTARG" ;; - b) base_url="$OPTARG" ;; - h) usage ;; - \?) echo "Invalid option -$OPTARG" >&2; usage ;; - esac - done + # Parse command line options + while getopts ":f:b:p:h" opt; do + case $opt in + f) url_file="$OPTARG" ;; + b) base_url="$OPTARG" ;; + p) max_parallel="$OPTARG" ;; + h) usage ;; + \?) echo "Invalid option -$OPTARG" >&2; usage ;; + esac + done - # Check if URL file is provided - if [ -z "$url_file" ]; then - echo "Error: URL file is required" - usage - fi + # Check if URL file is provided + if [ -z "$url_file" ]; then + echo "Error: URL file is required" + usage + fi - # Check if URL file exists - if [ ! -f "$url_file" ]; then - echo "Error: URL list file not found: $url_file" - exit 1 - fi - - # Run tests using the URL list - if test_all_urls "$url_file" "$base_url"; then - echo "All webpage tests passed!" - exit 0 - else - echo "Some webpage tests failed!" - exit 1 - fi + # Check if URL file exists + if [ ! -f "$url_file" ]; then + echo "Error: URL list file not found: $url_file" + exit 1 + fi + + # Run tests using the URL list + if test_all_urls "$url_file" "$base_url" "$max_parallel"; then + echo "All webpage tests passed!" + exit 0 + else + echo "Some webpage tests failed!" + exit 1 + fi } # Run main if script is executed directly if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then - main "$@" + main "$@" fi \ No newline at end of file