diff --git a/app/core/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java b/app/core/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java index 2b074640d..02eb82163 100644 --- a/app/core/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java +++ b/app/core/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java @@ -401,6 +401,7 @@ public class EndpointConfiguration { /* Ghostscript */ addEndpointToGroup("Ghostscript", "repair"); addEndpointToGroup("Ghostscript", "compress-pdf"); + addEndpointToGroup("Ghostscript", "crop"); addEndpointToGroup("Ghostscript", "replace-invert-pdf"); /* tesseract */ diff --git a/app/core/src/main/java/stirling/software/SPDF/controller/api/CropController.java b/app/core/src/main/java/stirling/software/SPDF/controller/api/CropController.java index 2fbbadf5e..8ca9604ce 100644 --- a/app/core/src/main/java/stirling/software/SPDF/controller/api/CropController.java +++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/CropController.java @@ -2,6 +2,9 @@ package stirling.software.SPDF.controller.api; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; import org.apache.pdfbox.multipdf.LayerUtility; import org.apache.pdfbox.pdmodel.PDDocument; @@ -21,16 +24,19 @@ import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.tags.Tag; import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; import stirling.software.SPDF.model.api.general.CropPdfForm; import stirling.software.common.service.CustomPDFDocumentFactory; import stirling.software.common.util.GeneralUtils; +import stirling.software.common.util.ProcessExecutor; import stirling.software.common.util.WebResponseUtils; @RestController @RequestMapping("/api/v1/general") @Tag(name = "General", description = "General APIs") @RequiredArgsConstructor +@Slf4j public class CropController { private final CustomPDFDocumentFactory pdfDocumentFactory; @@ -42,6 +48,15 @@ public class CropController { "This operation takes an input PDF file and crops it according to the given" + " coordinates. Input:PDF Output:PDF Type:SISO") public ResponseEntity cropPdf(@ModelAttribute CropPdfForm request) throws IOException { + if (request.isRemoveDataOutsideCrop()) { + return cropWithGhostscript(request); + } else { + return cropWithPDFBox(request); + } + } + + private ResponseEntity cropWithPDFBox(@ModelAttribute CropPdfForm request) + throws IOException { PDDocument sourceDocument = pdfDocumentFactory.load(request); PDDocument newDocument = @@ -97,4 +112,59 @@ public class CropController { GeneralUtils.generateFilename( request.getFileInput().getOriginalFilename(), "_cropped.pdf")); } + + private ResponseEntity cropWithGhostscript(@ModelAttribute CropPdfForm request) + throws IOException { + PDDocument sourceDocument = pdfDocumentFactory.load(request); + + for (int i = 0; i < sourceDocument.getNumberOfPages(); i++) { + PDPage page = sourceDocument.getPage(i); + PDRectangle cropBox = + new PDRectangle( + request.getX(), + request.getY(), + request.getWidth(), + request.getHeight()); + page.setCropBox(cropBox); + } + + Path tempInputFile = Files.createTempFile("crop_input", ".pdf"); + Path tempOutputFile = Files.createTempFile("crop_output", ".pdf"); + + try { + sourceDocument.save(tempInputFile.toFile()); + sourceDocument.close(); + + ProcessExecutor processExecutor = + ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT); + List command = + List.of( + "gs", + "-sDEVICE=pdfwrite", + "-dUseCropBox", + "-o", + tempOutputFile.toString(), + tempInputFile.toString()); + + processExecutor.runCommandWithOutputHandling(command); + + byte[] pdfContent = Files.readAllBytes(tempOutputFile); + + return WebResponseUtils.bytesToWebResponse( + pdfContent, + request.getFileInput().getOriginalFilename().replaceFirst("[.][^.]+$", "") + + "_cropped.pdf"); + + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new IOException("Ghostscript processing was interrupted", e); + } finally { + try { + Files.deleteIfExists(tempInputFile); + Files.deleteIfExists(tempOutputFile); + } catch (IOException e) { + log.debug("Failed to delete temporary files", e); + } + } + } } diff --git a/app/core/src/main/java/stirling/software/SPDF/model/api/general/CropPdfForm.java b/app/core/src/main/java/stirling/software/SPDF/model/api/general/CropPdfForm.java index 913f94a10..480169468 100644 --- a/app/core/src/main/java/stirling/software/SPDF/model/api/general/CropPdfForm.java +++ b/app/core/src/main/java/stirling/software/SPDF/model/api/general/CropPdfForm.java @@ -26,4 +26,9 @@ public class CropPdfForm extends PDFFile { @Schema(description = "The height of the crop area", type = "number") private float height; + + @Schema( + description = "Whether to remove text outside the crop area (keeps images)", + type = "boolean") + private boolean removeDataOutsideCrop = true; } diff --git a/app/core/src/main/resources/templates/crop.html b/app/core/src/main/resources/templates/crop.html index 0617bf9b6..e91c481c3 100644 --- a/app/core/src/main/resources/templates/crop.html +++ b/app/core/src/main/resources/templates/crop.html @@ -22,6 +22,7 @@ +
diff --git a/testing/webpage_urls.txt b/testing/webpage_urls.txt index c6c713dd0..6e7874eca 100644 --- a/testing/webpage_urls.txt +++ b/testing/webpage_urls.txt @@ -8,7 +8,6 @@ /pdf-organizer /multi-page-layout /scale-pages -/crop /extract-page /pdf-to-single-page /img-to-pdf