feat: split pdf into small chunks by pdfbox (#5718)

Co-authored-by: Ubuntu <ubuntu@vps-1aebde64.vps.ovh.ca>
Co-authored-by: Anthony Stirling <77850077+Frooodle@users.noreply.github.com>
This commit is contained in:
intelliking
2026-02-23 21:15:27 +00:00
committed by GitHub
parent 73213901d1
commit 9b0610b2cc
9 changed files with 438 additions and 3 deletions

View File

@@ -0,0 +1,271 @@
package stirling.software.SPDF.controller.api;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.apache.pdfbox.multipdf.LayerUtility;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.util.Matrix;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.Operation;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.config.swagger.MultiFileResponse;
import stirling.software.SPDF.model.api.general.PosterPdfRequest;
import stirling.software.common.annotations.AutoJobPostMapping;
import stirling.software.common.annotations.api.GeneralApi;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.ExceptionUtils;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.WebResponseUtils;
@GeneralApi
@Slf4j
@RequiredArgsConstructor
public class PosterPdfController {
private final CustomPDFDocumentFactory pdfDocumentFactory;
@AutoJobPostMapping(
value = "/split-for-poster-print",
consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
@MultiFileResponse
@Operation(
summary = "Split large PDF pages into smaller printable chunks",
description =
"This endpoint splits large or oddly-sized PDF pages into smaller chunks "
+ "suitable for printing on standard paper sizes (e.g., A4, Letter). "
+ "Divides each page into a grid of smaller pages using Apache PDFBox. "
+ "Input: PDF Output: ZIP-PDF Type: SISO")
public ResponseEntity<byte[]> posterPdf(@ModelAttribute PosterPdfRequest request)
throws Exception {
log.debug("Starting PDF poster split process with request: {}", request);
MultipartFile file = request.getFileInput();
String filename = GeneralUtils.generateFilename(file.getOriginalFilename(), "");
log.debug("Base filename for output: {}", filename);
try (PDDocument sourceDocument = pdfDocumentFactory.load(file);
PDDocument outputDocument =
pdfDocumentFactory.createNewDocumentBasedOnOldDocument(sourceDocument);
ByteArrayOutputStream pdfOutputStream = new ByteArrayOutputStream();
ByteArrayOutputStream zipOutputStream = new ByteArrayOutputStream()) {
// Get target page size
PDRectangle targetPageSize = getTargetPageSize(request.getPageSize());
log.debug(
"Target page size: {} ({}x{})",
request.getPageSize(),
targetPageSize.getWidth(),
targetPageSize.getHeight());
// Create LayerUtility for importing pages as forms
LayerUtility layerUtility = new LayerUtility(outputDocument);
int totalPages = sourceDocument.getNumberOfPages();
int xFactor = request.getXFactor();
int yFactor = request.getYFactor();
boolean rightToLeft = request.isRightToLeft();
log.debug(
"Processing {} pages with grid {}x{}, RTL={}",
totalPages,
xFactor,
yFactor,
rightToLeft);
// Process each page
for (int pageIndex = 0; pageIndex < totalPages; pageIndex++) {
PDPage sourcePage = sourceDocument.getPage(pageIndex);
// Get both MediaBox and CropBox
PDRectangle mediaBox = sourcePage.getMediaBox();
PDRectangle cropBox = sourcePage.getCropBox();
// If no CropBox is set, use MediaBox
if (cropBox == null) {
cropBox = mediaBox;
}
// Save original boxes for restoration
PDRectangle originalMediaBox = sourcePage.getMediaBox();
PDRectangle originalCropBox = sourcePage.getCropBox();
// Normalize the page: set MediaBox to CropBox
// This ensures the form's coordinate space starts at (0, 0)
// instead of having an offset from the original MediaBox
sourcePage.setMediaBox(cropBox);
sourcePage.setCropBox(cropBox);
// Handle page rotation
int rotation = sourcePage.getRotation();
float sourceWidth = cropBox.getWidth();
float sourceHeight = cropBox.getHeight();
// Swap dimensions if rotated 90 or 270 degrees
if (rotation == 90 || rotation == 270) {
float temp = sourceWidth;
sourceWidth = sourceHeight;
sourceHeight = temp;
}
log.debug(
"Page {}: Normalized to CropBox dimensions {}x{}, rotation {}",
pageIndex,
sourceWidth,
sourceHeight,
rotation);
// Import source page as form (now with normalized coordinate space)
PDFormXObject form = layerUtility.importPageAsForm(sourceDocument, pageIndex);
// Restore original boxes
sourcePage.setMediaBox(originalMediaBox);
sourcePage.setCropBox(originalCropBox);
// Calculate cell dimensions in source page coordinates
float cellWidth = sourceWidth / xFactor;
float cellHeight = sourceHeight / yFactor;
// Create grid cells (rows × columns)
for (int row = 0; row < yFactor; row++) {
for (int col = 0; col < xFactor; col++) {
// Apply RTL ordering for columns if enabled
int actualCol = rightToLeft ? (xFactor - 1 - col) : col;
// Calculate crop rectangle in source coordinates
// PDF coordinates start at bottom-left
float cropX = actualCol * cellWidth;
// For Y: invert so row 0 shows TOP (following SplitPdfBySectionsController
// pattern)
float cropY = (yFactor - 1 - row) * cellHeight;
// Create new output page with target size
PDPage outputPage = new PDPage(targetPageSize);
outputDocument.addPage(outputPage);
try (PDPageContentStream contentStream =
new PDPageContentStream(
outputDocument,
outputPage,
PDPageContentStream.AppendMode.APPEND,
true,
true)) {
// Calculate uniform scale to fit cell into target page
// Scale UP if cell is smaller than target, scale DOWN if larger
float scaleX = targetPageSize.getWidth() / cellWidth;
float scaleY = targetPageSize.getHeight() / cellHeight;
float scale = Math.min(scaleX, scaleY);
// Center the scaled content on the target page
float scaledCellWidth = cellWidth * scale;
float scaledCellHeight = cellHeight * scale;
float offsetX = (targetPageSize.getWidth() - scaledCellWidth) / 2;
float offsetY = (targetPageSize.getHeight() - scaledCellHeight) / 2;
// Apply transformations
contentStream.saveGraphicsState();
// Translate to center position
contentStream.transform(Matrix.getTranslateInstance(offsetX, offsetY));
// Scale uniformly
contentStream.transform(Matrix.getScaleInstance(scale, scale));
// Translate to show only the desired grid cell
// IMPORTANT: The PDFormXObject's BBox already matches the CropBox
// (including its offset), so we only need to translate by cropX/cropY
// relative to the CropBox origin, NOT the MediaBox origin
contentStream.transform(Matrix.getTranslateInstance(-cropX, -cropY));
// Draw the form
contentStream.drawForm(form);
contentStream.restoreGraphicsState();
}
log.trace(
"Created output page for grid cell [{},{}] of page {}: cropX={}, cropY={}, translate=({}, {})",
row,
actualCol,
pageIndex,
cropX,
cropY,
-cropX,
-cropY);
}
}
}
// Save output PDF
outputDocument.save(pdfOutputStream);
byte[] pdfData = pdfOutputStream.toByteArray();
log.debug(
"Generated output PDF with {} pages ({} bytes)",
outputDocument.getNumberOfPages(),
pdfData.length);
// Create ZIP file with the result
try (ZipOutputStream zipOut = new ZipOutputStream(zipOutputStream)) {
ZipEntry zipEntry = new ZipEntry(filename + "_poster.pdf");
zipOut.putNextEntry(zipEntry);
zipOut.write(pdfData);
zipOut.closeEntry();
}
byte[] zipData = zipOutputStream.toByteArray();
log.debug("Successfully created ZIP with {} bytes", zipData.length);
return WebResponseUtils.bytesToWebResponse(
zipData, filename + "_poster.zip", MediaType.APPLICATION_OCTET_STREAM);
} catch (IOException e) {
ExceptionUtils.logException("PDF poster split process", e);
throw e;
}
}
/**
* Maps page size string to PDRectangle.
*
* @param pageSize the page size name (e.g., "A4", "Letter")
* @return the corresponding PDRectangle
* @throws IllegalArgumentException if page size is not supported
*/
private PDRectangle getTargetPageSize(String pageSize) {
Map<String, PDRectangle> sizeMap = new HashMap<>();
sizeMap.put("A4", PDRectangle.A4);
sizeMap.put("Letter", PDRectangle.LETTER);
sizeMap.put("A3", PDRectangle.A3);
sizeMap.put("A5", PDRectangle.A5);
sizeMap.put("Legal", PDRectangle.LEGAL);
sizeMap.put("Tabloid", new PDRectangle(792, 1224)); // 11x17 inches
PDRectangle size = sizeMap.get(pageSize);
if (size == null) {
throw ExceptionUtils.createIllegalArgumentException(
"error.invalidPageSize",
"Invalid page size: {0}",
pageSize,
String.join(", ", sizeMap.keySet()));
}
return size;
}
}

View File

@@ -0,0 +1,41 @@
package stirling.software.SPDF.model.api.general;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Data;
import lombok.EqualsAndHashCode;
import stirling.software.common.model.api.PDFFile;
@Data
@EqualsAndHashCode(callSuper = true)
public class PosterPdfRequest extends PDFFile {
@Schema(
description = "Target page size for output chunks (e.g., 'A4', 'Letter', 'A3')",
requiredMode = Schema.RequiredMode.REQUIRED,
allowableValues = {"A4", "Letter", "A3", "A5", "Legal", "Tabloid"})
private String pageSize = "A4";
@Schema(
description = "Horizontal decimation factor (how many columns to split into)",
requiredMode = Schema.RequiredMode.NOT_REQUIRED,
defaultValue = "2",
minimum = "1",
maximum = "10")
private int xFactor = 2;
@Schema(
description = "Vertical decimation factor (how many rows to split into)",
requiredMode = Schema.RequiredMode.NOT_REQUIRED,
defaultValue = "2",
minimum = "1",
maximum = "10")
private int yFactor = 2;
@Schema(
description = "Split right-to-left instead of left-to-right",
requiredMode = Schema.RequiredMode.NOT_REQUIRED,
defaultValue = "false")
private boolean rightToLeft = false;
}

View File

@@ -5988,6 +5988,11 @@ desc = "Auto-split with divider sheets"
name = "Page Divider"
tooltip = "Use QR code divider sheets between documents when scanning"
[split.methods.byPoster]
name = "Printable Chunks"
desc = "Split large pages into printable sizes"
tooltip = "Divide oversized pages into smaller chunks suitable for printing on standard paper (A4, Letter, etc.)"
[split.methods.byPages]
desc = "Extract specific pages (1,3,5-10)"
name = "Page Numbers"
@@ -6006,6 +6011,7 @@ tooltip = "Specify maximum file size (e.g. 10MB, 500KB)"
[split.methods.prefix]
splitAt = "Split at"
splitBy = "Split by"
splitInto = "Split into"
[split.methodSelection.tooltip]
bullet1 = "Click on a method card to select it"
@@ -6025,6 +6031,21 @@ selectMethodFirst = "Please select a split method first"
chooseMethod = "Choose Method"
settings = "Settings"
[split.poster]
rightToLeft = "Split right-to-left"
[split.poster.pageSize]
label = "Target Page Size"
description = "Size of output pages for printing"
[split.poster.xFactor]
label = "Horizontal Divisions"
description = "Number of columns to split each page into"
[split.poster.yFactor]
label = "Vertical Divisions"
description = "Number of rows to split each page into"
[split.tooltip.byChapters]
bullet1 = "Bookmark Level: Which level to split on (1=top level)"
bullet2 = "Include Metadata: Preserve document properties"
@@ -6062,6 +6083,14 @@ bullet5 = "Enable Duplex Mode if scanning both sides of divider sheets"
text = "Automatically split scanned documents using physical divider sheets with QR codes. Perfect for processing multiple documents scanned together."
title = "Split by Page Divider"
[split.tooltip.byPoster]
bullet1 = "Target Page Size: Choose output paper size (A4, Letter, etc.)"
bullet2 = "Horizontal/Vertical Divisions: Grid size for splitting"
bullet3 = "Right-to-Left: Reverse column order for RTL layouts"
bullet4 = "Print and assemble the pieces to create your poster"
text = "Split large PDF pages into smaller printable chunks suitable for standard paper sizes. Perfect for creating poster prints from oversized pages."
title = "Split for Poster Print"
[split.tooltip.byPages]
bullet1 = "Single split points: 3,7 (splits after pages 3 and 7)"
bullet2 = "Range split points: 3-8 (splits before page 3 and after page 8)"

View File

@@ -160,6 +160,55 @@ const SplitSettings = ({
</Stack>
);
const renderByPosterForm = () => (
<Stack gap="sm">
<Select
label={t("split.poster.pageSize.label", "Target Page Size")}
description={t("split.poster.pageSize.description", "Size of output pages for printing")}
value={parameters.pageSize || 'A4'}
onChange={(value) => onParameterChange('pageSize', value || 'A4')}
data={[
{ value: 'A4', label: 'A4 (210 × 297 mm)' },
{ value: 'Letter', label: 'Letter (8.5 × 11 in)' },
{ value: 'A3', label: 'A3 (297 × 420 mm)' },
{ value: 'A5', label: 'A5 (148 × 210 mm)' },
{ value: 'Legal', label: 'Legal (8.5 × 14 in)' },
{ value: 'Tabloid', label: 'Tabloid (11 × 17 in)' },
]}
disabled={disabled}
comboboxProps={{ withinPortal: true, zIndex: Z_INDEX_AUTOMATE_DROPDOWN }}
/>
<TextInput
label={t("split.poster.xFactor.label", "Horizontal Divisions")}
description={t("split.poster.xFactor.description", "Number of columns to split each page into")}
type="number"
min="1"
max="10"
value={parameters.xFactor || 2}
onChange={(e) => onParameterChange('xFactor', e.target.value)}
placeholder="2"
disabled={disabled}
/>
<TextInput
label={t("split.poster.yFactor.label", "Vertical Divisions")}
description={t("split.poster.yFactor.description", "Number of rows to split each page into")}
type="number"
min="1"
max="10"
value={parameters.yFactor || 2}
onChange={(e) => onParameterChange('yFactor', e.target.value)}
placeholder="2"
disabled={disabled}
/>
<Checkbox
label={t("split.poster.rightToLeft", "Split right-to-left")}
checked={parameters.rightToLeft || false}
onChange={(e) => onParameterChange('rightToLeft', e.currentTarget.checked)}
disabled={disabled}
/>
</Stack>
);
// Don't render anything if no method is selected
if (!parameters.method) {
return (
@@ -181,6 +230,7 @@ const SplitSettings = ({
parameters.method === SPLIT_METHODS.BY_DOC_COUNT) && renderSplitValueForm()}
{parameters.method === SPLIT_METHODS.BY_CHAPTERS && renderByChaptersForm()}
{parameters.method === SPLIT_METHODS.BY_PAGE_DIVIDER && renderByPageDividerForm()}
{parameters.method === SPLIT_METHODS.BY_POSTER && renderByPosterForm()}
</Stack>
);
};

View File

@@ -127,6 +127,24 @@ export const useSplitSettingsTips = (method: SplitMethod | ''): TooltipContent |
]
}
]
},
[SPLIT_METHODS.BY_POSTER]: {
header: {
title: t("split.tooltip.byPoster.title", "Split for Poster Print")
},
tips: [
{
title: t("split.tooltip.byPoster.title", "Split for Poster Print"),
description: t("split.tooltip.byPoster.text", "Split large PDF pages into smaller printable chunks suitable for standard paper sizes. Perfect for creating poster prints from oversized pages."),
bullets: [
t("split.tooltip.byPoster.bullet1", "Target Page Size: Choose output paper size (A4, Letter, etc.)"),
t("split.tooltip.byPoster.bullet2", "Horizontal/Vertical Divisions: Grid size for splitting"),
t("split.tooltip.byPoster.bullet3", "Right-to-Left: Reverse column order for RTL layouts"),
t("split.tooltip.byPoster.bullet4", "Print and assemble the pieces to create your poster")
]
}
]
}
};

View File

@@ -5,7 +5,8 @@ export const SPLIT_METHODS = {
BY_PAGE_COUNT: 'byPageCount',
BY_DOC_COUNT: 'byDocCount',
BY_CHAPTERS: 'byChapters',
BY_PAGE_DIVIDER: 'byPageDivider'
BY_PAGE_DIVIDER: 'byPageDivider',
BY_POSTER: 'byPoster'
} as const;
@@ -16,7 +17,8 @@ export const ENDPOINTS = {
[SPLIT_METHODS.BY_PAGE_COUNT]: 'split-by-size-or-count',
[SPLIT_METHODS.BY_DOC_COUNT]: 'split-by-size-or-count',
[SPLIT_METHODS.BY_CHAPTERS]: 'split-pdf-by-chapters',
[SPLIT_METHODS.BY_PAGE_DIVIDER]: 'auto-split-pdf'
[SPLIT_METHODS.BY_PAGE_DIVIDER]: 'auto-split-pdf',
[SPLIT_METHODS.BY_POSTER]: 'split-for-poster-print'
} as const;
export type SplitMethod = typeof SPLIT_METHODS[keyof typeof SPLIT_METHODS];
@@ -72,6 +74,12 @@ export const METHOD_OPTIONS: MethodOption[] = [
prefixKey: "split.methods.prefix.splitBy",
nameKey: "split.methods.byPageDivider.name",
tooltipKey: "split.methods.byPageDivider.tooltip"
},
{
value: SPLIT_METHODS.BY_POSTER,
prefixKey: "split.methods.prefix.splitInto",
nameKey: "split.methods.byPoster.name",
tooltipKey: "split.methods.byPoster.tooltip"
}
];

View File

@@ -45,6 +45,12 @@ export const buildSplitFormData = (parameters: SplitParameters, file: File): For
case SPLIT_METHODS.BY_PAGE_DIVIDER:
formData.append("duplexMode", (parameters.duplexMode ?? false).toString());
break;
case SPLIT_METHODS.BY_POSTER:
formData.append("pageSize", parameters.pageSize || 'A4');
formData.append("xFactor", parameters.xFactor || '2');
formData.append("yFactor", parameters.yFactor || '2');
formData.append("rightToLeft", (parameters.rightToLeft ?? false).toString());
break;
default:
throw new Error(`Unknown split method: ${parameters.method}`);
}
@@ -66,6 +72,8 @@ export const getSplitEndpoint = (parameters: SplitParameters): string => {
return "/api/v1/general/split-pdf-by-chapters";
case SPLIT_METHODS.BY_PAGE_DIVIDER:
return "/api/v1/misc/auto-split-pdf";
case SPLIT_METHODS.BY_POSTER:
return "/api/v1/general/split-for-poster-print";
default:
throw new Error(`Unknown split method: ${parameters.method}`);
}

View File

@@ -15,6 +15,10 @@ export interface SplitParameters extends BaseParameters {
duplexMode: boolean;
splitMode?: string;
customPages?: string;
pageSize?: string;
xFactor?: string;
yFactor?: string;
rightToLeft?: boolean;
}
export type SplitParametersHook = BaseParametersHook<SplitParameters>;
@@ -32,6 +36,10 @@ export const defaultParameters: SplitParameters = {
duplexMode: false,
splitMode: 'SPLIT_ALL',
customPages: '',
pageSize: 'A4',
xFactor: '2',
yFactor: '2',
rightToLeft: false,
};
export const useSplitParameters = (): SplitParametersHook => {
@@ -61,6 +69,8 @@ export const useSplitParameters = (): SplitParametersHook => {
return params.bookmarkLevel !== "";
case SPLIT_METHODS.BY_PAGE_DIVIDER:
return true; // No required parameters
case SPLIT_METHODS.BY_POSTER:
return params.pageSize !== "" && params.xFactor !== "" && params.yFactor !== "";
default:
return false;
}

View File

@@ -147,7 +147,7 @@ const extractKeys = (file: string): FoundKey[] => {
};
describe('Missing translation coverage', () => {
test('fails if any en-GB translation key used in source is missing', () => {
test('fails if any en-GB translation key used in source is missing', { timeout: 10000 }, () => {
expect(fs.existsSync(EN_GB_FILE)).toBe(true);
const localeContent = fs.readFileSync(EN_GB_FILE, 'utf8');