From 1d3e018a5678911a62acdd5b1ed47e66386162d2 Mon Sep 17 00:00:00 2001 From: Anthony Stirling <77850077+Frooodle@users.noreply.github.com> Date: Mon, 11 Dec 2023 23:20:31 +0000 Subject: [PATCH] init overlay and auto split --- .../controller/api/PdfOverlayController.java | 120 +++++++++++++ .../api/SplitPdfBySizeController.java | 167 ++++++++++++++++++ .../controller/web/GeneralWebController.java | 14 ++ .../controller/web/OtherWebController.java | 2 +- .../model/api/general/OverlayPdfsRequest.java | 23 +++ .../general/SplitPdfBySizeOrCountRequest.java | 18 ++ .../software/SPDF/utils/GeneralUtils.java | 16 ++ src/main/resources/messages_en_GB.properties | 39 +++- .../resources/templates/fragments/navbar.html | 4 +- src/main/resources/templates/home.html | 4 +- src/main/resources/templates/overlay-pdf.html | 83 +++++++++ .../templates/split-by-size-or-count.html | 43 +++++ 12 files changed, 529 insertions(+), 4 deletions(-) create mode 100644 src/main/java/stirling/software/SPDF/controller/api/PdfOverlayController.java create mode 100644 src/main/java/stirling/software/SPDF/controller/api/SplitPdfBySizeController.java create mode 100644 src/main/java/stirling/software/SPDF/model/api/general/OverlayPdfsRequest.java create mode 100644 src/main/java/stirling/software/SPDF/model/api/general/SplitPdfBySizeOrCountRequest.java create mode 100644 src/main/resources/templates/overlay-pdf.html create mode 100644 src/main/resources/templates/split-by-size-or-count.html diff --git a/src/main/java/stirling/software/SPDF/controller/api/PdfOverlayController.java b/src/main/java/stirling/software/SPDF/controller/api/PdfOverlayController.java new file mode 100644 index 00000000..801ddfa0 --- /dev/null +++ b/src/main/java/stirling/software/SPDF/controller/api/PdfOverlayController.java @@ -0,0 +1,120 @@ +package stirling.software.SPDF.controller.api; +import org.apache.pdfbox.multipdf.Overlay; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.ModelAttribute; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.multipart.MultipartFile; +import org.springframework.http.ResponseEntity; +import io.swagger.v3.oas.annotations.Operation; +import io.swagger.v3.oas.annotations.tags.Tag; +import stirling.software.SPDF.model.api.general.OverlayPdfsRequest; +import stirling.software.SPDF.utils.GeneralUtils; +import stirling.software.SPDF.utils.WebResponseUtils; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import org.springframework.http.MediaType; +import java.io.File; +@RestController +@RequestMapping("/api/v1/general") +@Tag(name = "General", description = "General APIs") +public class PdfOverlayController { + + @PostMapping(value = "/overlay-pdfs", consumes = "multipart/form-data") + @Operation(summary = "Overlay PDF files in various modes", description = "Overlay PDF files onto a base PDF with different modes: Sequential, Interleaved, or Fixed Repeat. Input:PDF Output:PDF Type:MIMO") + public ResponseEntity overlayPdfs(@ModelAttribute OverlayPdfsRequest request) throws IOException { + MultipartFile baseFile = request.getFileInput(); + int overlayPos = request.getOverlayPosition(); + + MultipartFile[] overlayFiles = request.getOverlayFiles(); + File[] overlayPdfFiles = new File[overlayFiles.length]; + try{ + for (int i = 0; i < overlayFiles.length; i++) { + overlayPdfFiles[i] = GeneralUtils.multipartToFile(overlayFiles[i]); + } + + String mode = request.getOverlayMode(); // "SequentialOverlay", "InterleavedOverlay", "FixedRepeatOverlay" + int[] counts = request.getCounts(); // Used for FixedRepeatOverlay mode + + try (PDDocument basePdf = PDDocument.load(baseFile.getInputStream()); + Overlay overlay = new Overlay()) { + Map overlayGuide = prepareOverlayGuide(basePdf.getNumberOfPages(), overlayPdfFiles, mode, counts); + + overlay.setInputPDF(basePdf); + if(overlayPos == 0) { + overlay.setOverlayPosition(Overlay.Position.FOREGROUND); + } else { + overlay.setOverlayPosition(Overlay.Position.BACKGROUND); + } + + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + overlay.overlay(overlayGuide).save(outputStream); + byte[] data = outputStream.toByteArray(); + + return WebResponseUtils.bytesToWebResponse(data, "overlaid.pdf", MediaType.APPLICATION_PDF); + } + } finally { + for (File overlayPdfFile : overlayPdfFiles) { + if (overlayPdfFile != null) overlayPdfFile.delete(); + } + } + } + + private Map prepareOverlayGuide(int basePageCount, File[] overlayFiles, String mode, int[] counts) throws IOException { + Map overlayGuide = new HashMap<>(); + switch (mode) { + case "SequentialOverlay": + sequentialOverlay(overlayGuide, overlayFiles, basePageCount); + break; + case "InterleavedOverlay": + interleavedOverlay(overlayGuide, overlayFiles, basePageCount); + break; + case "FixedRepeatOverlay": + fixedRepeatOverlay(overlayGuide, overlayFiles, counts, basePageCount); + break; + default: + throw new IllegalArgumentException("Invalid overlay mode"); + } + return overlayGuide; + } + + private void sequentialOverlay(Map overlayGuide, File[] overlayFiles, int basePageCount) throws IOException { + int currentPage = 1; + for (File overlayFile : overlayFiles) { + try (PDDocument overlayPdf = PDDocument.load(overlayFile)) { + for (int i = 0; i < overlayPdf.getNumberOfPages(); i++) { + if (currentPage > basePageCount) break; + overlayGuide.put(currentPage++, overlayFile.getAbsolutePath()); + } + } + } + } + + private void interleavedOverlay(Map overlayGuide, File[] overlayFiles, int basePageCount) throws IOException { + for (int i = 0; i < basePageCount; i++) { + File overlayFile = overlayFiles[i % overlayFiles.length]; + overlayGuide.put(i + 1, overlayFile.getAbsolutePath()); + } + } + + private void fixedRepeatOverlay(Map overlayGuide, File[] overlayFiles, int[] counts, int basePageCount) throws IOException { + if (overlayFiles.length != counts.length) { + throw new IllegalArgumentException("Counts array length must match the number of overlay files"); + } + int currentPage = 1; + for (int i = 0; i < overlayFiles.length; i++) { + File overlayFile = overlayFiles[i]; + int repeatCount = counts[i]; + for (int j = 0; j < repeatCount; j++) { + if (currentPage > basePageCount) break; + overlayGuide.put(currentPage++, overlayFile.getAbsolutePath()); + } + } + } +} + +// Additional classes like OverlayPdfsRequest, WebResponseUtils, etc. are assumed to be defined elsewhere. diff --git a/src/main/java/stirling/software/SPDF/controller/api/SplitPdfBySizeController.java b/src/main/java/stirling/software/SPDF/controller/api/SplitPdfBySizeController.java new file mode 100644 index 00000000..ede045b8 --- /dev/null +++ b/src/main/java/stirling/software/SPDF/controller/api/SplitPdfBySizeController.java @@ -0,0 +1,167 @@ +package stirling.software.SPDF.controller.api; +import java.awt.image.BufferedImage; +import java.awt.image.DataBufferByte; +import java.awt.image.DataBufferInt; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.rendering.PDFRenderer; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.ModelAttribute; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.multipart.MultipartFile; + +import com.google.zxing.BinaryBitmap; +import com.google.zxing.LuminanceSource; +import com.google.zxing.MultiFormatReader; +import com.google.zxing.NotFoundException; +import com.google.zxing.PlanarYUVLuminanceSource; +import com.google.zxing.Result; +import com.google.zxing.common.HybridBinarizer; + +import io.swagger.v3.oas.annotations.Operation; +import io.swagger.v3.oas.annotations.tags.Tag; +import stirling.software.SPDF.model.api.general.SplitPdfBySizeOrCountRequest; +import stirling.software.SPDF.model.api.misc.AutoSplitPdfRequest; +import stirling.software.SPDF.utils.GeneralUtils; +import stirling.software.SPDF.utils.WebResponseUtils; + +@RestController +@RequestMapping("/api/v1/general") +@Tag(name = "Misc", description = "Miscellaneous APIs") +public class SplitPdfBySizeController { + + + @PostMapping(value = "/split-by-size-or-count", consumes = "multipart/form-data") + @Operation(summary = "Auto split PDF pages into separate documents based on size or count", description = "split PDF into multiple paged documents based on size/count, ie if 20 pages and split into 5, it does 5 documents each 4 pages\r\n" + + " if 10MB and each page is 1MB and you enter 2MB then 5 docs each 2MB (rounded so that it accepts 1.9MB but not 2.1MB) Input:PDF Output:ZIP Type:SIMO") + public ResponseEntity autoSplitPdf(@ModelAttribute SplitPdfBySizeOrCountRequest request) throws Exception { + List splitDocumentsBoas = new ArrayList(); + + + + MultipartFile file = request.getFileInput(); + PDDocument sourceDocument = PDDocument.load(file.getInputStream()); + + //0 = size, 1 = page count, 2 = doc count + int type = request.getSplitType(); + String value = request.getSplitValue(); + + if (type == 0) { // Split by size + long maxBytes = GeneralUtils.convertSizeToBytes(value); + long currentSize = 0; + PDDocument currentDoc = new PDDocument(); + + for (PDPage page : sourceDocument.getPages()) { + ByteArrayOutputStream pageOutputStream = new ByteArrayOutputStream(); + PDDocument tempDoc = new PDDocument(); + tempDoc.addPage(page); + tempDoc.save(pageOutputStream); + tempDoc.close(); + + long pageSize = pageOutputStream.size(); + if (currentSize + pageSize > maxBytes) { + // Save and reset current document + splitDocumentsBoas.add(currentDocToByteArray(currentDoc)); + currentDoc = new PDDocument(); + currentSize = 0; + } + + currentDoc.addPage(page); + currentSize += pageSize; + } + // Add the last document if it contains any pages + if (currentDoc.getPages().getCount() != 0) { + splitDocumentsBoas.add(currentDocToByteArray(currentDoc)); + } + } else if (type == 1) { // Split by page count + int pageCount = Integer.parseInt(value); + int currentPageCount = 0; + PDDocument currentDoc = new PDDocument(); + + for (PDPage page : sourceDocument.getPages()) { + currentDoc.addPage(page); + currentPageCount++; + + if (currentPageCount == pageCount) { + // Save and reset current document + splitDocumentsBoas.add(currentDocToByteArray(currentDoc)); + currentDoc = new PDDocument(); + currentPageCount = 0; + } + } + // Add the last document if it contains any pages + if (currentDoc.getPages().getCount() != 0) { + splitDocumentsBoas.add(currentDocToByteArray(currentDoc)); + } + } else if (type == 2) { // Split by doc count + int documentCount = Integer.parseInt(value); + int totalPageCount = sourceDocument.getNumberOfPages(); + int pagesPerDocument = totalPageCount / documentCount; + int extraPages = totalPageCount % documentCount; + int currentPageIndex = 0; + + for (int i = 0; i < documentCount; i++) { + PDDocument currentDoc = new PDDocument(); + int pagesToAdd = pagesPerDocument + (i < extraPages ? 1 : 0); + + for (int j = 0; j < pagesToAdd; j++) { + currentDoc.addPage(sourceDocument.getPage(currentPageIndex++)); + } + + splitDocumentsBoas.add(currentDocToByteArray(currentDoc)); + } + } else { + throw new IllegalArgumentException("Invalid argument for split type"); + } + + sourceDocument.close(); + + + + Path zipFile = Files.createTempFile("split_documents", ".zip"); + String filename = file.getOriginalFilename().replaceFirst("[.][^.]+$", ""); + byte[] data; + + try (ZipOutputStream zipOut = new ZipOutputStream(Files.newOutputStream(zipFile))) { + for (int i = 0; i < splitDocumentsBoas.size(); i++) { + String fileName = filename + "_" + (i + 1) + ".pdf"; + ByteArrayOutputStream baos = splitDocumentsBoas.get(i); + byte[] pdf = baos.toByteArray(); + + ZipEntry pdfEntry = new ZipEntry(fileName); + zipOut.putNextEntry(pdfEntry); + zipOut.write(pdf); + zipOut.closeEntry(); + } + } catch (Exception e) { + e.printStackTrace(); + } finally { + data = Files.readAllBytes(zipFile); + Files.delete(zipFile); + } + + return WebResponseUtils.bytesToWebResponse(data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM); + } + + private ByteArrayOutputStream currentDocToByteArray(PDDocument document) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + document.save(baos); + document.close(); + return baos; + } + + +} diff --git a/src/main/java/stirling/software/SPDF/controller/web/GeneralWebController.java b/src/main/java/stirling/software/SPDF/controller/web/GeneralWebController.java index 51b43c3c..04c4859a 100644 --- a/src/main/java/stirling/software/SPDF/controller/web/GeneralWebController.java +++ b/src/main/java/stirling/software/SPDF/controller/web/GeneralWebController.java @@ -155,6 +155,20 @@ public class GeneralWebController { return "scale-pages"; } + + @GetMapping("/split-by-size-or-count") + @Hidden + public String splitBySizeOrCount(Model model) { + model.addAttribute("currentPage", "split-by-size-or-count"); + return "split-by-size-or-count"; + } + + @GetMapping("/overlay-pdf") + @Hidden + public String overlayPdf(Model model) { + model.addAttribute("currentPage", "overlay-pdf"); + return "overlay-pdf"; + } @Autowired diff --git a/src/main/java/stirling/software/SPDF/controller/web/OtherWebController.java b/src/main/java/stirling/software/SPDF/controller/web/OtherWebController.java index cc02b030..8438b26d 100644 --- a/src/main/java/stirling/software/SPDF/controller/web/OtherWebController.java +++ b/src/main/java/stirling/software/SPDF/controller/web/OtherWebController.java @@ -98,7 +98,7 @@ public class OtherWebController { return modelAndView; } - + @GetMapping("/add-image") @Hidden public String overlayImage(Model model) { diff --git a/src/main/java/stirling/software/SPDF/model/api/general/OverlayPdfsRequest.java b/src/main/java/stirling/software/SPDF/model/api/general/OverlayPdfsRequest.java new file mode 100644 index 00000000..e312c6b7 --- /dev/null +++ b/src/main/java/stirling/software/SPDF/model/api/general/OverlayPdfsRequest.java @@ -0,0 +1,23 @@ +package stirling.software.SPDF.model.api.general; +import org.springframework.web.multipart.MultipartFile; +import io.swagger.v3.oas.annotations.media.Schema; +import lombok.Data; +import lombok.EqualsAndHashCode; +import stirling.software.SPDF.model.api.PDFFile; + +@Data +@EqualsAndHashCode(callSuper = true) +public class OverlayPdfsRequest extends PDFFile { + + @Schema(description = "An array of PDF files to be used as overlays on the base PDF. The order in these files is applied based on the selected mode.") + private MultipartFile[] overlayFiles; + + @Schema(description = "The mode of overlaying: 'SequentialOverlay' for sequential application, 'InterleavedOverlay' for round-robin application, 'FixedRepeatOverlay' for fixed repetition based on provided counts", required = true) + private String overlayMode; + + @Schema(description = "An array of integers specifying the number of times each corresponding overlay file should be applied in the 'FixedRepeatOverlay' mode. This should match the length of the overlayFiles array.", required = false) + private int[] counts; + + @Schema(description = "Overlay position 0 is Foregound, 1 is Background") + private int overlayPosition; +} diff --git a/src/main/java/stirling/software/SPDF/model/api/general/SplitPdfBySizeOrCountRequest.java b/src/main/java/stirling/software/SPDF/model/api/general/SplitPdfBySizeOrCountRequest.java new file mode 100644 index 00000000..087ce80c --- /dev/null +++ b/src/main/java/stirling/software/SPDF/model/api/general/SplitPdfBySizeOrCountRequest.java @@ -0,0 +1,18 @@ +package stirling.software.SPDF.model.api.general; + +import io.swagger.v3.oas.annotations.media.Schema; +import lombok.Data; +import lombok.EqualsAndHashCode; +import stirling.software.SPDF.model.api.PDFFile; + +@Data +@EqualsAndHashCode(callSuper=true) +public class SplitPdfBySizeOrCountRequest extends PDFFile { + + @Schema(description = "Determines the type of split: 0 for size, 1 for page count, 2 for document count", required = false, defaultValue = "0") + private int splitType; + + + @Schema(description = "Value for split: size in MB (e.g., '10MB') or number of pages (e.g., '5')", required = false, defaultValue = "10MB") + private String splitValue; +} diff --git a/src/main/java/stirling/software/SPDF/utils/GeneralUtils.java b/src/main/java/stirling/software/SPDF/utils/GeneralUtils.java index 3097fc1b..7724e095 100644 --- a/src/main/java/stirling/software/SPDF/utils/GeneralUtils.java +++ b/src/main/java/stirling/software/SPDF/utils/GeneralUtils.java @@ -1,6 +1,8 @@ package stirling.software.SPDF.utils; +import java.io.File; import java.io.IOException; +import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.nio.file.FileVisitResult; @@ -12,6 +14,8 @@ import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; import java.util.List; +import org.springframework.web.multipart.MultipartFile; +import java.io.FileOutputStream; public class GeneralUtils { public static void deleteDirectory(Path path) throws IOException { @@ -48,6 +52,18 @@ public class GeneralUtils { } } + public static File multipartToFile(MultipartFile multipart) throws IOException { + Path tempFile = Files.createTempFile("overlay-", ".pdf"); + try (InputStream in = multipart.getInputStream(); + FileOutputStream out = new FileOutputStream(tempFile.toFile())) { + byte[] buffer = new byte[1024]; + int bytesRead; + while ((bytesRead = in.read(buffer)) != -1) { + out.write(buffer, 0, bytesRead); + } + } + return tempFile.toFile(); + } public static Long convertSizeToBytes(String sizeStr) { if (sizeStr == null) { diff --git a/src/main/resources/messages_en_GB.properties b/src/main/resources/messages_en_GB.properties index 159c4b4d..ecdec292 100644 --- a/src/main/resources/messages_en_GB.properties +++ b/src/main/resources/messages_en_GB.properties @@ -340,6 +340,16 @@ home.tableExtraxt.title=PDF to CSV home.tableExtraxt.desc=Extracts Tables from a PDF converting it to CSV tableExtraxt.tags=CSV,Table Extraction,extract,convert + +home.autoSizeSplitPDF.title=Auto Split by Size/Count +home.autoSizeSplitPDF.desc=Split a single PDF into multiple documents based on size, page count, or document count +autoSizeSplitPDF.tags=pdf,split,document,organization + + +home.overlay-pdfs.title=Overlay PDFs +home.overlay-pdfs.desc=Overlays PDFs on-top of another PDF +overlay-pdfs.tags=Overlay + ########################### # # # WEB PAGES # @@ -832,4 +842,31 @@ PDFToXML.submit=Convert PDFToCSV.title=PDF to CSV PDFToCSV.header=PDF to CSV PDFToCSV.prompt=Choose page to extract table -PDFToCSV.submit=Extract \ No newline at end of file +PDFToCSV.submit=Extract + +#split-by-size-or-count +split-by-size-or-count.header=Split PDF by Size or Count +split-by-size-or-count.type.label=Select Split Type +split-by-size-or-count.type.size=By Size +split-by-size-or-count.type.pageCount=By Page Count +split-by-size-or-count.type.docCount=By Document Count +split-by-size-or-count.value.label=Enter Value +split-by-size-or-count.value.placeholder=Enter size (e.g., 2MB or 3KB) or count (e.g., 5) +split-by-size-or-count.submit=Submit + + +#overlay-pdfs +overlay-pdfs.header=Overlay PDF Files +overlay-pdfs.baseFile.label=Select Base PDF File +overlay-pdfs.overlayFiles.label=Select Overlay PDF Files +overlay-pdfs.mode.label=Select Overlay Mode +overlay-pdfs.mode.sequential=Sequential Overlay +overlay-pdfs.mode.interleaved=Interleaved Overlay +overlay-pdfs.mode.fixedRepeat=Fixed Repeat Overlay +overlay-pdfs.counts.label=Overlay Counts (for Fixed Repeat Mode) +overlay-pdfs.counts.placeholder=Enter comma-separated counts (e.g., 2,3,1) +overlay-pdfs.position.label=Select Overlay Position +overlay-pdfs.position.foreground=Foreground +overlay-pdfs.position.background=Background +overlay-pdfs.submit=Submit + diff --git a/src/main/resources/templates/fragments/navbar.html b/src/main/resources/templates/fragments/navbar.html index e16d1853..731ce091 100644 --- a/src/main/resources/templates/fragments/navbar.html +++ b/src/main/resources/templates/fragments/navbar.html @@ -59,7 +59,9 @@
- +
+
+ diff --git a/src/main/resources/templates/home.html b/src/main/resources/templates/home.html index a932fad8..bb156282 100644 --- a/src/main/resources/templates/home.html +++ b/src/main/resources/templates/home.html @@ -94,7 +94,9 @@
- +
+
+ diff --git a/src/main/resources/templates/overlay-pdf.html b/src/main/resources/templates/overlay-pdf.html new file mode 100644 index 00000000..31275cad --- /dev/null +++ b/src/main/resources/templates/overlay-pdf.html @@ -0,0 +1,83 @@ + + + + + + + + +
+
+
+

+
+
+
+ + +

+
+
+
+ + + +
+ + +
+ + + +
+ + + + + +
+
+
+ +
+
+
+ + diff --git a/src/main/resources/templates/split-by-size-or-count.html b/src/main/resources/templates/split-by-size-or-count.html new file mode 100644 index 00000000..1afc707d --- /dev/null +++ b/src/main/resources/templates/split-by-size-or-count.html @@ -0,0 +1,43 @@ + + + + + + + + +
+
+
+

+
+
+
+

+
+
+ + + +
+ + + +
+ + +
+ +
+
+
+ +
+
+
+ +