From 9b0610b2cceade2f6b3ca0ab2e70e3bcbf0017d8 Mon Sep 17 00:00:00 2001 From: intelliking Date: Mon, 23 Feb 2026 21:15:27 +0000 Subject: [PATCH] feat: split pdf into small chunks by pdfbox (#5718) Co-authored-by: Ubuntu Co-authored-by: Anthony Stirling <77850077+Frooodle@users.noreply.github.com> --- .../controller/api/PosterPdfController.java | 271 ++++++++++++++++++ .../model/api/general/PosterPdfRequest.java | 41 +++ .../public/locales/en-GB/translation.toml | 29 ++ .../components/tools/split/SplitSettings.tsx | 50 ++++ .../tooltips/useSplitSettingsTips.ts | 18 ++ frontend/src/core/constants/splitConstants.ts | 12 +- .../hooks/tools/split/useSplitOperation.ts | 8 + .../hooks/tools/split/useSplitParameters.ts | 10 + .../core/tests/missingTranslations.test.ts | 2 +- 9 files changed, 438 insertions(+), 3 deletions(-) create mode 100644 app/core/src/main/java/stirling/software/SPDF/controller/api/PosterPdfController.java create mode 100644 app/core/src/main/java/stirling/software/SPDF/model/api/general/PosterPdfRequest.java diff --git a/app/core/src/main/java/stirling/software/SPDF/controller/api/PosterPdfController.java b/app/core/src/main/java/stirling/software/SPDF/controller/api/PosterPdfController.java new file mode 100644 index 000000000..989c2a970 --- /dev/null +++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/PosterPdfController.java @@ -0,0 +1,271 @@ +package stirling.software.SPDF.controller.api; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; + +import org.apache.pdfbox.multipdf.LayerUtility; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; +import org.apache.pdfbox.util.Matrix; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.ModelAttribute; +import org.springframework.web.multipart.MultipartFile; + +import io.swagger.v3.oas.annotations.Operation; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import stirling.software.SPDF.config.swagger.MultiFileResponse; +import stirling.software.SPDF.model.api.general.PosterPdfRequest; +import stirling.software.common.annotations.AutoJobPostMapping; +import stirling.software.common.annotations.api.GeneralApi; +import stirling.software.common.service.CustomPDFDocumentFactory; +import stirling.software.common.util.ExceptionUtils; +import stirling.software.common.util.GeneralUtils; +import stirling.software.common.util.WebResponseUtils; + +@GeneralApi +@Slf4j +@RequiredArgsConstructor +public class PosterPdfController { + + private final CustomPDFDocumentFactory pdfDocumentFactory; + + @AutoJobPostMapping( + value = "/split-for-poster-print", + consumes = MediaType.MULTIPART_FORM_DATA_VALUE) + @MultiFileResponse + @Operation( + summary = "Split large PDF pages into smaller printable chunks", + description = + "This endpoint splits large or oddly-sized PDF pages into smaller chunks " + + "suitable for printing on standard paper sizes (e.g., A4, Letter). " + + "Divides each page into a grid of smaller pages using Apache PDFBox. " + + "Input: PDF Output: ZIP-PDF Type: SISO") + public ResponseEntity posterPdf(@ModelAttribute PosterPdfRequest request) + throws Exception { + + log.debug("Starting PDF poster split process with request: {}", request); + MultipartFile file = request.getFileInput(); + + String filename = GeneralUtils.generateFilename(file.getOriginalFilename(), ""); + log.debug("Base filename for output: {}", filename); + + try (PDDocument sourceDocument = pdfDocumentFactory.load(file); + PDDocument outputDocument = + pdfDocumentFactory.createNewDocumentBasedOnOldDocument(sourceDocument); + ByteArrayOutputStream pdfOutputStream = new ByteArrayOutputStream(); + ByteArrayOutputStream zipOutputStream = new ByteArrayOutputStream()) { + + // Get target page size + PDRectangle targetPageSize = getTargetPageSize(request.getPageSize()); + log.debug( + "Target page size: {} ({}x{})", + request.getPageSize(), + targetPageSize.getWidth(), + targetPageSize.getHeight()); + + // Create LayerUtility for importing pages as forms + LayerUtility layerUtility = new LayerUtility(outputDocument); + + int totalPages = sourceDocument.getNumberOfPages(); + int xFactor = request.getXFactor(); + int yFactor = request.getYFactor(); + boolean rightToLeft = request.isRightToLeft(); + + log.debug( + "Processing {} pages with grid {}x{}, RTL={}", + totalPages, + xFactor, + yFactor, + rightToLeft); + + // Process each page + for (int pageIndex = 0; pageIndex < totalPages; pageIndex++) { + PDPage sourcePage = sourceDocument.getPage(pageIndex); + + // Get both MediaBox and CropBox + PDRectangle mediaBox = sourcePage.getMediaBox(); + PDRectangle cropBox = sourcePage.getCropBox(); + + // If no CropBox is set, use MediaBox + if (cropBox == null) { + cropBox = mediaBox; + } + + // Save original boxes for restoration + PDRectangle originalMediaBox = sourcePage.getMediaBox(); + PDRectangle originalCropBox = sourcePage.getCropBox(); + + // Normalize the page: set MediaBox to CropBox + // This ensures the form's coordinate space starts at (0, 0) + // instead of having an offset from the original MediaBox + sourcePage.setMediaBox(cropBox); + sourcePage.setCropBox(cropBox); + + // Handle page rotation + int rotation = sourcePage.getRotation(); + float sourceWidth = cropBox.getWidth(); + float sourceHeight = cropBox.getHeight(); + + // Swap dimensions if rotated 90 or 270 degrees + if (rotation == 90 || rotation == 270) { + float temp = sourceWidth; + sourceWidth = sourceHeight; + sourceHeight = temp; + } + + log.debug( + "Page {}: Normalized to CropBox dimensions {}x{}, rotation {}", + pageIndex, + sourceWidth, + sourceHeight, + rotation); + + // Import source page as form (now with normalized coordinate space) + PDFormXObject form = layerUtility.importPageAsForm(sourceDocument, pageIndex); + + // Restore original boxes + sourcePage.setMediaBox(originalMediaBox); + sourcePage.setCropBox(originalCropBox); + + // Calculate cell dimensions in source page coordinates + float cellWidth = sourceWidth / xFactor; + float cellHeight = sourceHeight / yFactor; + + // Create grid cells (rows × columns) + for (int row = 0; row < yFactor; row++) { + for (int col = 0; col < xFactor; col++) { + // Apply RTL ordering for columns if enabled + int actualCol = rightToLeft ? (xFactor - 1 - col) : col; + + // Calculate crop rectangle in source coordinates + // PDF coordinates start at bottom-left + float cropX = actualCol * cellWidth; + // For Y: invert so row 0 shows TOP (following SplitPdfBySectionsController + // pattern) + float cropY = (yFactor - 1 - row) * cellHeight; + + // Create new output page with target size + PDPage outputPage = new PDPage(targetPageSize); + outputDocument.addPage(outputPage); + + try (PDPageContentStream contentStream = + new PDPageContentStream( + outputDocument, + outputPage, + PDPageContentStream.AppendMode.APPEND, + true, + true)) { + + // Calculate uniform scale to fit cell into target page + // Scale UP if cell is smaller than target, scale DOWN if larger + float scaleX = targetPageSize.getWidth() / cellWidth; + float scaleY = targetPageSize.getHeight() / cellHeight; + float scale = Math.min(scaleX, scaleY); + + // Center the scaled content on the target page + float scaledCellWidth = cellWidth * scale; + float scaledCellHeight = cellHeight * scale; + float offsetX = (targetPageSize.getWidth() - scaledCellWidth) / 2; + float offsetY = (targetPageSize.getHeight() - scaledCellHeight) / 2; + + // Apply transformations + contentStream.saveGraphicsState(); + + // Translate to center position + contentStream.transform(Matrix.getTranslateInstance(offsetX, offsetY)); + + // Scale uniformly + contentStream.transform(Matrix.getScaleInstance(scale, scale)); + + // Translate to show only the desired grid cell + // IMPORTANT: The PDFormXObject's BBox already matches the CropBox + // (including its offset), so we only need to translate by cropX/cropY + // relative to the CropBox origin, NOT the MediaBox origin + contentStream.transform(Matrix.getTranslateInstance(-cropX, -cropY)); + + // Draw the form + contentStream.drawForm(form); + + contentStream.restoreGraphicsState(); + } + + log.trace( + "Created output page for grid cell [{},{}] of page {}: cropX={}, cropY={}, translate=({}, {})", + row, + actualCol, + pageIndex, + cropX, + cropY, + -cropX, + -cropY); + } + } + } + + // Save output PDF + outputDocument.save(pdfOutputStream); + byte[] pdfData = pdfOutputStream.toByteArray(); + + log.debug( + "Generated output PDF with {} pages ({} bytes)", + outputDocument.getNumberOfPages(), + pdfData.length); + + // Create ZIP file with the result + try (ZipOutputStream zipOut = new ZipOutputStream(zipOutputStream)) { + ZipEntry zipEntry = new ZipEntry(filename + "_poster.pdf"); + zipOut.putNextEntry(zipEntry); + zipOut.write(pdfData); + zipOut.closeEntry(); + } + + byte[] zipData = zipOutputStream.toByteArray(); + log.debug("Successfully created ZIP with {} bytes", zipData.length); + + return WebResponseUtils.bytesToWebResponse( + zipData, filename + "_poster.zip", MediaType.APPLICATION_OCTET_STREAM); + + } catch (IOException e) { + ExceptionUtils.logException("PDF poster split process", e); + throw e; + } + } + + /** + * Maps page size string to PDRectangle. + * + * @param pageSize the page size name (e.g., "A4", "Letter") + * @return the corresponding PDRectangle + * @throws IllegalArgumentException if page size is not supported + */ + private PDRectangle getTargetPageSize(String pageSize) { + Map sizeMap = new HashMap<>(); + sizeMap.put("A4", PDRectangle.A4); + sizeMap.put("Letter", PDRectangle.LETTER); + sizeMap.put("A3", PDRectangle.A3); + sizeMap.put("A5", PDRectangle.A5); + sizeMap.put("Legal", PDRectangle.LEGAL); + sizeMap.put("Tabloid", new PDRectangle(792, 1224)); // 11x17 inches + + PDRectangle size = sizeMap.get(pageSize); + if (size == null) { + throw ExceptionUtils.createIllegalArgumentException( + "error.invalidPageSize", + "Invalid page size: {0}", + pageSize, + String.join(", ", sizeMap.keySet())); + } + return size; + } +} diff --git a/app/core/src/main/java/stirling/software/SPDF/model/api/general/PosterPdfRequest.java b/app/core/src/main/java/stirling/software/SPDF/model/api/general/PosterPdfRequest.java new file mode 100644 index 000000000..20d7eaf70 --- /dev/null +++ b/app/core/src/main/java/stirling/software/SPDF/model/api/general/PosterPdfRequest.java @@ -0,0 +1,41 @@ +package stirling.software.SPDF.model.api.general; + +import io.swagger.v3.oas.annotations.media.Schema; + +import lombok.Data; +import lombok.EqualsAndHashCode; + +import stirling.software.common.model.api.PDFFile; + +@Data +@EqualsAndHashCode(callSuper = true) +public class PosterPdfRequest extends PDFFile { + + @Schema( + description = "Target page size for output chunks (e.g., 'A4', 'Letter', 'A3')", + requiredMode = Schema.RequiredMode.REQUIRED, + allowableValues = {"A4", "Letter", "A3", "A5", "Legal", "Tabloid"}) + private String pageSize = "A4"; + + @Schema( + description = "Horizontal decimation factor (how many columns to split into)", + requiredMode = Schema.RequiredMode.NOT_REQUIRED, + defaultValue = "2", + minimum = "1", + maximum = "10") + private int xFactor = 2; + + @Schema( + description = "Vertical decimation factor (how many rows to split into)", + requiredMode = Schema.RequiredMode.NOT_REQUIRED, + defaultValue = "2", + minimum = "1", + maximum = "10") + private int yFactor = 2; + + @Schema( + description = "Split right-to-left instead of left-to-right", + requiredMode = Schema.RequiredMode.NOT_REQUIRED, + defaultValue = "false") + private boolean rightToLeft = false; +} diff --git a/frontend/public/locales/en-GB/translation.toml b/frontend/public/locales/en-GB/translation.toml index 36a823739..39d5a42b1 100644 --- a/frontend/public/locales/en-GB/translation.toml +++ b/frontend/public/locales/en-GB/translation.toml @@ -5988,6 +5988,11 @@ desc = "Auto-split with divider sheets" name = "Page Divider" tooltip = "Use QR code divider sheets between documents when scanning" +[split.methods.byPoster] +name = "Printable Chunks" +desc = "Split large pages into printable sizes" +tooltip = "Divide oversized pages into smaller chunks suitable for printing on standard paper (A4, Letter, etc.)" + [split.methods.byPages] desc = "Extract specific pages (1,3,5-10)" name = "Page Numbers" @@ -6006,6 +6011,7 @@ tooltip = "Specify maximum file size (e.g. 10MB, 500KB)" [split.methods.prefix] splitAt = "Split at" splitBy = "Split by" +splitInto = "Split into" [split.methodSelection.tooltip] bullet1 = "Click on a method card to select it" @@ -6025,6 +6031,21 @@ selectMethodFirst = "Please select a split method first" chooseMethod = "Choose Method" settings = "Settings" +[split.poster] +rightToLeft = "Split right-to-left" + +[split.poster.pageSize] +label = "Target Page Size" +description = "Size of output pages for printing" + +[split.poster.xFactor] +label = "Horizontal Divisions" +description = "Number of columns to split each page into" + +[split.poster.yFactor] +label = "Vertical Divisions" +description = "Number of rows to split each page into" + [split.tooltip.byChapters] bullet1 = "Bookmark Level: Which level to split on (1=top level)" bullet2 = "Include Metadata: Preserve document properties" @@ -6062,6 +6083,14 @@ bullet5 = "Enable Duplex Mode if scanning both sides of divider sheets" text = "Automatically split scanned documents using physical divider sheets with QR codes. Perfect for processing multiple documents scanned together." title = "Split by Page Divider" +[split.tooltip.byPoster] +bullet1 = "Target Page Size: Choose output paper size (A4, Letter, etc.)" +bullet2 = "Horizontal/Vertical Divisions: Grid size for splitting" +bullet3 = "Right-to-Left: Reverse column order for RTL layouts" +bullet4 = "Print and assemble the pieces to create your poster" +text = "Split large PDF pages into smaller printable chunks suitable for standard paper sizes. Perfect for creating poster prints from oversized pages." +title = "Split for Poster Print" + [split.tooltip.byPages] bullet1 = "Single split points: 3,7 (splits after pages 3 and 7)" bullet2 = "Range split points: 3-8 (splits before page 3 and after page 8)" diff --git a/frontend/src/core/components/tools/split/SplitSettings.tsx b/frontend/src/core/components/tools/split/SplitSettings.tsx index 9b60bcefa..ee33210c8 100644 --- a/frontend/src/core/components/tools/split/SplitSettings.tsx +++ b/frontend/src/core/components/tools/split/SplitSettings.tsx @@ -160,6 +160,55 @@ const SplitSettings = ({ ); + const renderByPosterForm = () => ( + +