itext removal

This commit is contained in:
Anthony Stirling 2023-09-02 20:21:55 +01:00
parent 862086eae5
commit ef07963d79
4 changed files with 128 additions and 266 deletions

View File

@ -1,9 +1,14 @@
package stirling.software.SPDF.controller.api;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.ResponseEntity;
@ -11,16 +16,10 @@ import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import org.apache.pdfbox.util.Matrix;
import com.itextpdf.kernel.geom.PageSize;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfPage;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
import com.itextpdf.kernel.pdf.xobject.PdfFormXObject;
import org.apache.pdfbox.multipdf.LayerUtility;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.media.Schema;
@ -34,68 +33,73 @@ public class MultiPageLayoutController {
private static final Logger logger = LoggerFactory.getLogger(MultiPageLayoutController.class);
@PostMapping(value = "/multi-page-layout", consumes = "multipart/form-data")
@Operation(summary = "Merge multiple pages of a PDF document into a single page", description = "This operation takes an input PDF file and the number of pages to merge into a single sheet in the output PDF file. Input:PDF Output:PDF Type:SISO")
@Operation(
summary = "Merge multiple pages of a PDF document into a single page",
description = "This operation takes an input PDF file and the number of pages to merge into a single sheet in the output PDF file. Input:PDF Output:PDF Type:SISO"
)
public ResponseEntity<byte[]> mergeMultiplePagesIntoOne(
@Parameter(description = "The input PDF file", required = true) @RequestParam("fileInput") MultipartFile file,
@Parameter(description = "The number of pages to fit onto a single sheet in the output PDF. Acceptable values are 2, 3, 4, 9, 16.", required = true, schema = @Schema(type = "integer", allowableValues = {
"2", "3", "4", "9", "16" })) @RequestParam("pagesPerSheet") int pagesPerSheet)
throws IOException {
@Parameter(description = "The input PDF file", required = true) @RequestParam("fileInput") MultipartFile file,
@Parameter(description = "The number of pages to fit onto a single sheet in the output PDF. Acceptable values are 2, 3, 4, 9, 16.", required = true, schema = @Schema(type = "integer", allowableValues = {
"2", "3", "4", "9", "16" })) @RequestParam("pagesPerSheet") int pagesPerSheet)
throws IOException {
if (pagesPerSheet != 2 && pagesPerSheet != 3
&& pagesPerSheet != (int) Math.sqrt(pagesPerSheet) * Math.sqrt(pagesPerSheet)) {
throw new IllegalArgumentException("pagesPerSheet must be 2, 3 or a perfect square");
}
if (pagesPerSheet != 2 && pagesPerSheet != 3 && pagesPerSheet != (int) Math.sqrt(pagesPerSheet) * Math.sqrt(pagesPerSheet)) {
throw new IllegalArgumentException("pagesPerSheet must be 2, 3 or a perfect square");
}
int cols = pagesPerSheet == 2 || pagesPerSheet == 3 ? pagesPerSheet : (int) Math.sqrt(pagesPerSheet);
int rows = pagesPerSheet == 2 || pagesPerSheet == 3 ? 1 : (int) Math.sqrt(pagesPerSheet);
int cols = pagesPerSheet == 2 || pagesPerSheet == 3 ? pagesPerSheet : (int) Math.sqrt(pagesPerSheet);
int rows = pagesPerSheet == 2 || pagesPerSheet == 3 ? 1 : (int) Math.sqrt(pagesPerSheet);
byte[] bytes = file.getBytes();
PdfReader reader = new PdfReader(new ByteArrayInputStream(bytes));
PdfDocument pdfDoc = new PdfDocument(reader);
PDDocument sourceDocument = PDDocument.load(file.getInputStream());
PDDocument newDocument = new PDDocument();
PDPage newPage = new PDPage(PDRectangle.A4);
newDocument.addPage(newPage);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PdfWriter writer = new PdfWriter(baos);
PdfDocument outputPdf = new PdfDocument(writer);
PageSize pageSize = new PageSize(PageSize.A4.rotate());
int totalPages = sourceDocument.getNumberOfPages();
float cellWidth = newPage.getMediaBox().getWidth() / cols;
float cellHeight = newPage.getMediaBox().getHeight() / rows;
int totalPages = pdfDoc.getNumberOfPages();
float cellWidth = pageSize.getWidth() / cols;
float cellHeight = pageSize.getHeight() / rows;
PDPageContentStream contentStream = new PDPageContentStream(newDocument, newPage, PDPageContentStream.AppendMode.APPEND, true, true);
for (int i = 1; i <= totalPages; i += pagesPerSheet) {
PdfPage page = outputPdf.addNewPage(pageSize);
PdfCanvas pdfCanvas = new PdfCanvas(page);
LayerUtility layerUtility = new LayerUtility(newDocument);
for (int row = 0; row < rows; row++) {
for (int col = 0; col < cols; col++) {
int index = i + row * cols + col;
if (index <= totalPages) {
// Get the page and calculate scaling factors
Rectangle rect = pdfDoc.getPage(index).getPageSize();
float scaleWidth = cellWidth / rect.getWidth();
float scaleHeight = cellHeight / rect.getHeight();
float scale = Math.min(scaleWidth, scaleHeight);
for (int i = 0; i < totalPages; i++) {
PDPage sourcePage = sourceDocument.getPage(i);
System.out.println("Reading page " + (i+1));
PDRectangle rect = sourcePage.getMediaBox();
float scaleWidth = cellWidth / rect.getWidth();
float scaleHeight = cellHeight / rect.getHeight();
float scale = Math.min(scaleWidth, scaleHeight);
System.out.println("Scale for page " + (i+1) + ": " + scale);
PdfFormXObject formXObject = pdfDoc.getPage(index).copyAsFormXObject(outputPdf);
float x = col * cellWidth + (cellWidth - rect.getWidth() * scale) / 2;
float y = (rows - 1 - row) * cellHeight + (cellHeight - rect.getHeight() * scale) / 2;
// Save the graphics state, apply the transformations, add the object, and then
// restore the graphics state
pdfCanvas.saveState();
pdfCanvas.concatMatrix(scale, 0, 0, scale, x, y);
pdfCanvas.addXObject(formXObject, 0, 0);
pdfCanvas.restoreState();
}
}
}
}
int rowIndex = i / cols;
int colIndex = i % cols;
outputPdf.close();
byte[] pdfContent = baos.toByteArray();
pdfDoc.close();
float x = colIndex * cellWidth + (cellWidth - rect.getWidth() * scale) / 2;
float y = newPage.getMediaBox().getHeight() - ((rowIndex + 1) * cellHeight - (cellHeight - rect.getHeight() * scale) / 2);
return WebResponseUtils.bytesToWebResponse(pdfContent, file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_layoutChanged.pdf");
contentStream.saveGraphicsState();
contentStream.transform(Matrix.getTranslateInstance(x, y));
contentStream.transform(Matrix.getScaleInstance(scale, scale));
PDFormXObject formXObject = layerUtility.importPageAsForm(sourceDocument, i);
contentStream.drawForm(formXObject);
contentStream.restoreGraphicsState();
}
contentStream.close();
sourceDocument.close();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
newDocument.save(baos);
newDocument.close();
byte[] result = baos.toByteArray();
return WebResponseUtils.bytesToWebResponse(result, file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_layoutChanged.pdf");
}
}

View File

@ -11,6 +11,12 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.util.Matrix;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.HttpHeaders;
@ -21,19 +27,9 @@ import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import com.itextpdf.kernel.geom.PageSize;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfPage;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
import com.itextpdf.kernel.pdf.canvas.parser.EventType;
import com.itextpdf.kernel.pdf.canvas.parser.PdfCanvasProcessor;
import com.itextpdf.kernel.pdf.canvas.parser.data.IEventData;
import com.itextpdf.kernel.pdf.canvas.parser.data.TextRenderInfo;
import com.itextpdf.kernel.pdf.canvas.parser.listener.IEventListener;
import com.itextpdf.kernel.pdf.xobject.PdfFormXObject;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import io.swagger.v3.oas.annotations.Hidden;
import io.swagger.v3.oas.annotations.Operation;
@ -41,7 +37,8 @@ import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.media.Schema;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.WebResponseUtils;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.multipdf.LayerUtility;
@RestController
@Tag(name = "General", description = "General APIs")
public class ScalePagesController {
@ -55,189 +52,76 @@ public class ScalePagesController {
@Parameter(description = "The scale of pages in the output PDF. Acceptable values are A0-A10, B0-B9, LETTER, TABLOID, LEDGER, LEGAL, EXECUTIVE.", required = true, schema = @Schema(type = "string", allowableValues = {
"A0", "A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10", "B0", "B1", "B2", "B3", "B4",
"B5", "B6", "B7", "B8", "B9", "LETTER", "TABLOID", "LEDGER", "LEGAL",
"EXECUTIVE" })) @RequestParam("pageSize") String targetPageSize,
"EXECUTIVE" })) @RequestParam("pageSize") String targetPDRectangle,
@Parameter(description = "The scale of the content on the pages of the output PDF. Acceptable values are floats.", required = true, schema = @Schema(type = "integer")) @RequestParam("scaleFactor") float scaleFactor)
throws IOException {
Map<String, PageSize> sizeMap = new HashMap<>();
Map<String, PDRectangle> sizeMap = new HashMap<>();
// Add A0 - A10
sizeMap.put("A0", PageSize.A0);
sizeMap.put("A1", PageSize.A1);
sizeMap.put("A2", PageSize.A2);
sizeMap.put("A3", PageSize.A3);
sizeMap.put("A4", PageSize.A4);
sizeMap.put("A5", PageSize.A5);
sizeMap.put("A6", PageSize.A6);
sizeMap.put("A7", PageSize.A7);
sizeMap.put("A8", PageSize.A8);
sizeMap.put("A9", PageSize.A9);
sizeMap.put("A10", PageSize.A10);
// Add B0 - B9
sizeMap.put("B0", PageSize.B0);
sizeMap.put("B1", PageSize.B1);
sizeMap.put("B2", PageSize.B2);
sizeMap.put("B3", PageSize.B3);
sizeMap.put("B4", PageSize.B4);
sizeMap.put("B5", PageSize.B5);
sizeMap.put("B6", PageSize.B6);
sizeMap.put("B7", PageSize.B7);
sizeMap.put("B8", PageSize.B8);
sizeMap.put("B9", PageSize.B9);
sizeMap.put("A0", PDRectangle.A0);
sizeMap.put("A1", PDRectangle.A1);
sizeMap.put("A2", PDRectangle.A2);
sizeMap.put("A3", PDRectangle.A3);
sizeMap.put("A4", PDRectangle.A4);
sizeMap.put("A5", PDRectangle.A5);
sizeMap.put("A6", PDRectangle.A6);
// Add other sizes
sizeMap.put("LETTER", PageSize.LETTER);
sizeMap.put("TABLOID", PageSize.TABLOID);
sizeMap.put("LEDGER", PageSize.LEDGER);
sizeMap.put("LEGAL", PageSize.LEGAL);
sizeMap.put("EXECUTIVE", PageSize.EXECUTIVE);
sizeMap.put("LETTER", PDRectangle.LETTER);
sizeMap.put("LEGAL", PDRectangle.LEGAL);
if (!sizeMap.containsKey(targetPageSize)) {
if (!sizeMap.containsKey(targetPDRectangle)) {
throw new IllegalArgumentException(
"Invalid pageSize. It must be one of the following: A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10");
"Invalid PDRectangle. It must be one of the following: A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10");
}
PageSize pageSize = sizeMap.get(targetPageSize);
PDRectangle targetSize = sizeMap.get(targetPDRectangle);
byte[] bytes = file.getBytes();
PdfReader reader = new PdfReader(new ByteArrayInputStream(bytes));
PdfDocument pdfDoc = new PdfDocument(reader);
PDDocument sourceDocument = PDDocument.load(file.getBytes());
PDDocument outputDocument = new PDDocument();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PdfWriter writer = new PdfWriter(baos);
PdfDocument outputPdf = new PdfDocument(writer);
int totalPages = sourceDocument.getNumberOfPages();
for (int i = 0; i < totalPages; i++) {
PDPage sourcePage = sourceDocument.getPage(i);
PDRectangle sourceSize = sourcePage.getMediaBox();
int totalPages = pdfDoc.getNumberOfPages();
float scaleWidth = targetSize.getWidth() / sourceSize.getWidth();
float scaleHeight = targetSize.getHeight() / sourceSize.getHeight();
float scale = Math.min(scaleWidth, scaleHeight) * scaleFactor;
for (int i = 1; i <= totalPages; i++) {
PdfPage page = outputPdf.addNewPage(pageSize);
PdfCanvas pdfCanvas = new PdfCanvas(page);
PDPage newPage = new PDPage(targetSize);
outputDocument.addPage(newPage);
// Get the page and calculate scaling factors
Rectangle rect = pdfDoc.getPage(i).getPageSize();
float scaleWidth = pageSize.getWidth() / rect.getWidth();
float scaleHeight = pageSize.getHeight() / rect.getHeight();
float scale = Math.min(scaleWidth, scaleHeight) * scaleFactor;
System.out.println("Scale: " + scale);
PDPageContentStream contentStream = new PDPageContentStream(outputDocument, newPage, PDPageContentStream.AppendMode.APPEND, true);
PdfFormXObject formXObject = pdfDoc.getPage(i).copyAsFormXObject(outputPdf);
float x = (pageSize.getWidth() - rect.getWidth() * scale) / 2; // Center Page
float y = (pageSize.getHeight() - rect.getHeight() * scale) / 2;
float x = (targetSize.getWidth() - sourceSize.getWidth() * scale) / 2;
float y = (targetSize.getHeight() - sourceSize.getHeight() * scale) / 2;
// Save the graphics state, apply the transformations, add the object, and then
// restore the graphics state
pdfCanvas.saveState();
pdfCanvas.concatMatrix(scale, 0, 0, scale, x, y);
pdfCanvas.addXObject(formXObject, 0, 0);
pdfCanvas.restoreState();
}
contentStream.saveGraphicsState();
contentStream.transform(Matrix.getTranslateInstance(x, y));
contentStream.transform(Matrix.getScaleInstance(scale, scale));
outputPdf.close();
byte[] pdfContent = baos.toByteArray();
pdfDoc.close();
return WebResponseUtils.bytesToWebResponse(pdfContent,
LayerUtility layerUtility = new LayerUtility(outputDocument);
PDFormXObject form = layerUtility.importPageAsForm(sourceDocument, i);
contentStream.drawForm(form);
contentStream.restoreGraphicsState();
contentStream.close();
}
ByteArrayOutputStream baos = new ByteArrayOutputStream();
outputDocument.save(baos);
outputDocument.close();
sourceDocument.close();
return WebResponseUtils.bytesToWebResponse(baos.toByteArray(),
file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_scaled.pdf");
}
//TODO
@Hidden
@PostMapping(value = "/auto-crop", consumes = "multipart/form-data")
public ResponseEntity<byte[]> cropPdf(@RequestParam("fileInput") MultipartFile file) throws IOException {
byte[] bytes = file.getBytes();
PdfReader reader = new PdfReader(new ByteArrayInputStream(bytes));
PdfDocument pdfDoc = new PdfDocument(reader);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PdfWriter writer = new PdfWriter(baos);
PdfDocument outputPdf = new PdfDocument(writer);
int totalPages = pdfDoc.getNumberOfPages();
for (int i = 1; i <= totalPages; i++) {
PdfPage page = pdfDoc.getPage(i);
Rectangle originalMediaBox = page.getMediaBox();
Rectangle contentBox = determineContentBox(page);
// Make sure we don't go outside the original media box.
Rectangle intersection = originalMediaBox.getIntersection(contentBox);
page.setCropBox(intersection);
// Copy page to the new document
outputPdf.addPage(page.copyTo(outputPdf));
}
outputPdf.close();
byte[] pdfContent = baos.toByteArray();
pdfDoc.close();
return ResponseEntity.ok()
.header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\""
+ file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_cropped.pdf\"")
.contentType(MediaType.APPLICATION_PDF).body(pdfContent);
}
private Rectangle determineContentBox(PdfPage page) {
// Extract the text from the page and find the bounding box.
TextBoundingRectangleFinder finder = new TextBoundingRectangleFinder();
PdfCanvasProcessor processor = new PdfCanvasProcessor(finder);
processor.processPageContent(page);
return finder.getBoundingBox();
}
private static class TextBoundingRectangleFinder implements IEventListener {
private List<Rectangle> allTextBoxes = new ArrayList<>();
public Rectangle getBoundingBox() {
// Sort the text boxes based on their vertical position
allTextBoxes.sort(Comparator.comparingDouble(Rectangle::getTop));
// Consider a box an outlier if its top is more than 1.5 times the IQR above the
// third quartile.
int q1Index = allTextBoxes.size() / 4;
int q3Index = 3 * allTextBoxes.size() / 4;
double iqr = allTextBoxes.get(q3Index).getTop() - allTextBoxes.get(q1Index).getTop();
double threshold = allTextBoxes.get(q3Index).getTop() + 1.5 * iqr;
// Initialize boundingBox to the first non-outlier box
int i = 0;
while (i < allTextBoxes.size() && allTextBoxes.get(i).getTop() > threshold) {
i++;
}
if (i == allTextBoxes.size()) {
// If all boxes are outliers, just return the first one
return allTextBoxes.get(0);
}
Rectangle boundingBox = allTextBoxes.get(i);
// Extend the bounding box to include all non-outlier boxes
for (; i < allTextBoxes.size(); i++) {
Rectangle textBoundingBox = allTextBoxes.get(i);
if (textBoundingBox.getTop() > threshold) {
// This box is an outlier, skip it
continue;
}
float left = Math.min(boundingBox.getLeft(), textBoundingBox.getLeft());
float bottom = Math.min(boundingBox.getBottom(), textBoundingBox.getBottom());
float right = Math.max(boundingBox.getRight(), textBoundingBox.getRight());
float top = Math.max(boundingBox.getTop(), textBoundingBox.getTop());
// Add a small padding around the bounding box
float padding = 10;
boundingBox = new Rectangle(left - padding, bottom - padding, right - left + 2 * padding,
top - bottom + 2 * padding);
}
return boundingBox;
}
@Override
public void eventOccurred(IEventData data, EventType type) {
if (type == EventType.RENDER_TEXT) {
TextRenderInfo renderInfo = (TextRenderInfo) data;
allTextBoxes.add(renderInfo.getBaseline().getBoundingRectangle());
}
}
@Override
public Set<EventType> getSupportedEvents() {
return Collections.singleton(EventType.RENDER_TEXT);
}
}
}

View File

@ -11,7 +11,6 @@ import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.ResponseEntity;
@ -20,14 +19,7 @@ import org.springframework.web.bind.annotation.RequestPart;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import com.itextpdf.kernel.geom.PageSize;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.xobject.PdfFormXObject;
import com.itextpdf.layout.Document;
import com.itextpdf.layout.element.Image;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;

View File

@ -19,7 +19,6 @@
<div class="mb-3">
<label for="pageSize" th:text="#{scalePages.pageSize}"></label>
<select id="pageSize" name="pageSize" required>
<option value="A0">A0</option>
<option value="A1">A1</option>
<option value="A2">A2</option>
@ -27,25 +26,8 @@
<option value="A4" selected>A4</option>
<option value="A5">A5</option>
<option value="A6">A6</option>
<option value="A7">A7</option>
<option value="A8">A8</option>
<option value="A9">A9</option>
<option value="A10">A10</option>
<option value="B0">B0</option>
<option value="B1">B1</option>
<option value="B2">B2</option>
<option value="B3">B3</option>
<option value="B4">B4</option>
<option value="B5">B5</option>
<option value="B6">B6</option>
<option value="B7">B7</option>
<option value="B8">B8</option>
<option value="B9">B9</option>
<option value="LETTER">Letter</option>
<option value="LEGAL">Legal</option>
<option value="EXECUTIVE">Executive</option>
<option value="TABLOID">Tabloid</option>
<option value="LEDGER">Ledger</option>
</select>
</div>
<div class="mb-3">