mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-11-01 01:21:18 +01:00
feat(crop): Crop remove outside text (#4499)
# Description of Changes This PR adds option to remove text outside crop area via Ghostscript. ### Crop feature enhancements - Added a checkbox to the `crop.html` template and a corresponding label in the English properties file to allow users to select "Remove text outside crop (retains images)" when cropping PDFs. - Updated the `CropPdfForm` model to include a new boolean property `removeDataOutsideCrop` to capture the user's selection. <img width="1418" height="815" alt="image" src="https://github.com/user-attachments/assets/47785372-7609-4637-ab3b-f05ab6d95957" /> ### Backend logic changes - Modified the `CropController` so that if `removeDataOutsideCrop` is true, cropping is performed using a two-step process: first setting the crop box with PDFBox, then using Ghostscript to remove data outside the crop box. Otherwise, the crop is performed using only PDFBox. - Added necessary imports for handling files, paths, and process execution to support the new Ghostscript-based cropping workflow. ### Endpoint configuration - Registered the new "crop" endpoint under the "Ghostscript" group in the endpoint configuration, enabling routing for the enhanced cropping feature. ### UI <img width="671" height="867" alt="image" src="https://github.com/user-attachments/assets/ad01fcd4-343b-40e8-9345-135bdf746ca4" /> ### Sample files/Verification Before: <img width="969" height="747" alt="image" src="https://github.com/user-attachments/assets/d5a205f7-0aaf-4990-9b24-43e9ef9cf716" /> After: <img width="1165" height="690" alt="image" src="https://github.com/user-attachments/assets/7c73e35b-c52b-4be2-a892-72f0125f66b4" /> See for yourself with: [true-pdf-sample-1_cropped.pdf](https://github.com/user-attachments/files/22546716/true-pdf-sample-1_cropped.pdf) other sample PDF: [output.pdf](https://github.com/user-attachments/files/22546785/output.pdf) Closes #2652 <!-- Please provide a summary of the changes, including: - What was changed - Why the change was made - Any challenges encountered Closes #(issue_number) --> --- ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [x] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [x] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [x] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. --------- Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
This commit is contained in:
parent
fda1d6bc73
commit
085b8795d5
@ -401,6 +401,7 @@ public class EndpointConfiguration {
|
||||
/* Ghostscript */
|
||||
addEndpointToGroup("Ghostscript", "repair");
|
||||
addEndpointToGroup("Ghostscript", "compress-pdf");
|
||||
addEndpointToGroup("Ghostscript", "crop");
|
||||
addEndpointToGroup("Ghostscript", "replace-invert-pdf");
|
||||
|
||||
/* tesseract */
|
||||
|
||||
@ -2,6 +2,9 @@ package stirling.software.SPDF.controller.api;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.pdfbox.multipdf.LayerUtility;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
@ -21,16 +24,19 @@ import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import stirling.software.SPDF.model.api.general.CropPdfForm;
|
||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||
import stirling.software.common.util.GeneralUtils;
|
||||
import stirling.software.common.util.ProcessExecutor;
|
||||
import stirling.software.common.util.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/general")
|
||||
@Tag(name = "General", description = "General APIs")
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class CropController {
|
||||
|
||||
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
||||
@ -42,6 +48,15 @@ public class CropController {
|
||||
"This operation takes an input PDF file and crops it according to the given"
|
||||
+ " coordinates. Input:PDF Output:PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> cropPdf(@ModelAttribute CropPdfForm request) throws IOException {
|
||||
if (request.isRemoveDataOutsideCrop()) {
|
||||
return cropWithGhostscript(request);
|
||||
} else {
|
||||
return cropWithPDFBox(request);
|
||||
}
|
||||
}
|
||||
|
||||
private ResponseEntity<byte[]> cropWithPDFBox(@ModelAttribute CropPdfForm request)
|
||||
throws IOException {
|
||||
PDDocument sourceDocument = pdfDocumentFactory.load(request);
|
||||
|
||||
PDDocument newDocument =
|
||||
@ -97,4 +112,59 @@ public class CropController {
|
||||
GeneralUtils.generateFilename(
|
||||
request.getFileInput().getOriginalFilename(), "_cropped.pdf"));
|
||||
}
|
||||
|
||||
private ResponseEntity<byte[]> cropWithGhostscript(@ModelAttribute CropPdfForm request)
|
||||
throws IOException {
|
||||
PDDocument sourceDocument = pdfDocumentFactory.load(request);
|
||||
|
||||
for (int i = 0; i < sourceDocument.getNumberOfPages(); i++) {
|
||||
PDPage page = sourceDocument.getPage(i);
|
||||
PDRectangle cropBox =
|
||||
new PDRectangle(
|
||||
request.getX(),
|
||||
request.getY(),
|
||||
request.getWidth(),
|
||||
request.getHeight());
|
||||
page.setCropBox(cropBox);
|
||||
}
|
||||
|
||||
Path tempInputFile = Files.createTempFile("crop_input", ".pdf");
|
||||
Path tempOutputFile = Files.createTempFile("crop_output", ".pdf");
|
||||
|
||||
try {
|
||||
sourceDocument.save(tempInputFile.toFile());
|
||||
sourceDocument.close();
|
||||
|
||||
ProcessExecutor processExecutor =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT);
|
||||
List<String> command =
|
||||
List.of(
|
||||
"gs",
|
||||
"-sDEVICE=pdfwrite",
|
||||
"-dUseCropBox",
|
||||
"-o",
|
||||
tempOutputFile.toString(),
|
||||
tempInputFile.toString());
|
||||
|
||||
processExecutor.runCommandWithOutputHandling(command);
|
||||
|
||||
byte[] pdfContent = Files.readAllBytes(tempOutputFile);
|
||||
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
pdfContent,
|
||||
request.getFileInput().getOriginalFilename().replaceFirst("[.][^.]+$", "")
|
||||
+ "_cropped.pdf");
|
||||
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
throw new IOException("Ghostscript processing was interrupted", e);
|
||||
} finally {
|
||||
try {
|
||||
Files.deleteIfExists(tempInputFile);
|
||||
Files.deleteIfExists(tempOutputFile);
|
||||
} catch (IOException e) {
|
||||
log.debug("Failed to delete temporary files", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -26,4 +26,9 @@ public class CropPdfForm extends PDFFile {
|
||||
|
||||
@Schema(description = "The height of the crop area", type = "number")
|
||||
private float height;
|
||||
|
||||
@Schema(
|
||||
description = "Whether to remove text outside the crop area (keeps images)",
|
||||
type = "boolean")
|
||||
private boolean removeDataOutsideCrop = true;
|
||||
}
|
||||
|
||||
@ -22,6 +22,7 @@
|
||||
<input id="y" type="hidden" name="y">
|
||||
<input id="width" type="hidden" name="width">
|
||||
<input id="height" type="hidden" name="height">
|
||||
|
||||
<button type="submit" id="submitBtn" class="btn btn-primary" th:text="#{crop.submit}"></button>
|
||||
</form>
|
||||
<div id="canvasesContainer" style="position: relative; margin: 20px 0; width: auto;">
|
||||
|
||||
@ -8,7 +8,6 @@
|
||||
/pdf-organizer
|
||||
/multi-page-layout
|
||||
/scale-pages
|
||||
/crop
|
||||
/extract-page
|
||||
/pdf-to-single-page
|
||||
/img-to-pdf
|
||||
|
||||
Loading…
Reference in New Issue
Block a user