mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-17 17:52:28 +02:00
Compression fix
This commit is contained in:
parent
0fb76a1f4c
commit
853744864b
@ -25,7 +25,7 @@ ext {
|
|||||||
}
|
}
|
||||||
|
|
||||||
group = "stirling.software"
|
group = "stirling.software"
|
||||||
version = "0.44.2"
|
version = "0.44.3"
|
||||||
|
|
||||||
java {
|
java {
|
||||||
// 17 is lowest but we support and recommend 21
|
// 17 is lowest but we support and recommend 21
|
||||||
|
@ -25,12 +25,23 @@ import javax.imageio.ImageWriter;
|
|||||||
import javax.imageio.plugins.jpeg.JPEGImageWriteParam;
|
import javax.imageio.plugins.jpeg.JPEGImageWriteParam;
|
||||||
import javax.imageio.stream.ImageOutputStream;
|
import javax.imageio.stream.ImageOutputStream;
|
||||||
|
|
||||||
|
import org.apache.pdfbox.contentstream.PDFStreamEngine;
|
||||||
|
import org.apache.pdfbox.contentstream.operator.Operator;
|
||||||
|
import org.apache.pdfbox.cos.COSArray;
|
||||||
|
import org.apache.pdfbox.cos.COSBase;
|
||||||
|
import org.apache.pdfbox.cos.COSDictionary;
|
||||||
import org.apache.pdfbox.cos.COSName;
|
import org.apache.pdfbox.cos.COSName;
|
||||||
|
import org.apache.pdfbox.cos.COSStream;
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
|
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
|
||||||
import org.apache.pdfbox.pdmodel.PDPage;
|
import org.apache.pdfbox.pdmodel.PDPage;
|
||||||
import org.apache.pdfbox.pdmodel.PDResources;
|
import org.apache.pdfbox.pdmodel.PDResources;
|
||||||
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
|
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
|
||||||
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
||||||
|
import org.apache.pdfbox.pdmodel.graphics.image.PDInlineImage;
|
||||||
|
import org.apache.pdfbox.pdmodel.graphics.pattern.PDAbstractPattern;
|
||||||
|
import org.apache.pdfbox.pdmodel.graphics.pattern.PDTilingPattern;
|
||||||
|
import org.apache.pdfbox.pdmodel.graphics.shading.PDShading;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
import org.springframework.http.ResponseEntity;
|
import org.springframework.http.ResponseEntity;
|
||||||
import org.springframework.web.bind.annotation.ModelAttribute;
|
import org.springframework.web.bind.annotation.ModelAttribute;
|
||||||
@ -78,11 +89,19 @@ public class CompressController {
|
|||||||
int pageNum; // Page number where the image appears
|
int pageNum; // Page number where the image appears
|
||||||
COSName name; // The name used to reference this image
|
COSName name; // The name used to reference this image
|
||||||
}
|
}
|
||||||
|
@Data
|
||||||
|
@AllArgsConstructor
|
||||||
|
@NoArgsConstructor
|
||||||
|
private static class NestedImageReference extends ImageReference {
|
||||||
|
COSName formName; // Name of the form XObject containing the image
|
||||||
|
COSName imageName; // Name of the image within the form
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public Path compressImagesInPDF(
|
public Path compressImagesInPDF(
|
||||||
Path pdfFile, double scaleFactor, float jpegQuality, boolean convertToGrayscale)
|
Path pdfFile, double scaleFactor, float jpegQuality, boolean convertToGrayscale)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
Path newCompressedPDF = Files.createTempFile("compressedPDF", ".pdf");
|
Path newCompressedPDF = Files.createTempFile("compressedPDF", ".pdf");
|
||||||
long originalFileSize = Files.size(pdfFile);
|
long originalFileSize = Files.size(pdfFile);
|
||||||
log.info(
|
log.info(
|
||||||
"Starting image compression with scale factor: {}, JPEG quality: {}, grayscale: {} on file size: {}",
|
"Starting image compression with scale factor: {}, JPEG quality: {}, grayscale: {} on file size: {}",
|
||||||
@ -92,42 +111,84 @@ public class CompressController {
|
|||||||
GeneralUtils.formatBytes(originalFileSize));
|
GeneralUtils.formatBytes(originalFileSize));
|
||||||
|
|
||||||
try (PDDocument doc = pdfDocumentFactory.load(pdfFile)) {
|
try (PDDocument doc = pdfDocumentFactory.load(pdfFile)) {
|
||||||
|
|
||||||
// Collect all unique images by content hash
|
// Collect all unique images by content hash
|
||||||
Map<String, List<ImageReference>> uniqueImages = new HashMap<>();
|
Map<String, List<ImageReference>> uniqueImages = new HashMap<>();
|
||||||
Map<String, PDImageXObject> compressedVersions = new HashMap<>();
|
Map<String, PDImageXObject> compressedVersions = new HashMap<>();
|
||||||
|
|
||||||
int totalImages = 0;
|
int totalImages = 0;
|
||||||
|
int nestedImages = 0;
|
||||||
|
|
||||||
|
// FIRST PASS: Collect all images (direct and nested)
|
||||||
for (int pageNum = 0; pageNum < doc.getNumberOfPages(); pageNum++) {
|
for (int pageNum = 0; pageNum < doc.getNumberOfPages(); pageNum++) {
|
||||||
PDPage page = doc.getPage(pageNum);
|
PDPage page = doc.getPage(pageNum);
|
||||||
PDResources res = page.getResources();
|
PDResources res = page.getResources();
|
||||||
if (res == null || res.getXObjectNames() == null) continue;
|
if (res == null || res.getXObjectNames() == null) continue;
|
||||||
|
|
||||||
|
// Process direct XObjects on page
|
||||||
for (COSName name : res.getXObjectNames()) {
|
for (COSName name : res.getXObjectNames()) {
|
||||||
PDXObject xobj = res.getXObject(name);
|
PDXObject xobj = res.getXObject(name);
|
||||||
if (!(xobj instanceof PDImageXObject)) continue;
|
|
||||||
|
|
||||||
totalImages++;
|
// Direct image
|
||||||
PDImageXObject image = (PDImageXObject) xobj;
|
if (xobj instanceof PDImageXObject) {
|
||||||
String imageHash = generateImageHash(image);
|
totalImages++;
|
||||||
|
PDImageXObject image = (PDImageXObject) xobj;
|
||||||
|
String imageHash = generateImageHash(image);
|
||||||
|
|
||||||
// Store only page number and name reference
|
ImageReference ref = new ImageReference();
|
||||||
ImageReference ref = new ImageReference();
|
ref.pageNum = pageNum;
|
||||||
ref.pageNum = pageNum;
|
ref.name = name;
|
||||||
ref.name = name;
|
|
||||||
|
|
||||||
uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(ref);
|
log.info("Found direct image '{}' on page {} - {}x{}",
|
||||||
|
name.getName(), pageNum + 1, image.getWidth(), image.getHeight());
|
||||||
|
|
||||||
|
uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(ref);
|
||||||
|
}
|
||||||
|
// Form XObject may contain nested images
|
||||||
|
else if (xobj instanceof org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject) {
|
||||||
|
org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject formXObj =
|
||||||
|
(org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject) xobj;
|
||||||
|
|
||||||
|
PDResources formResources = formXObj.getResources();
|
||||||
|
if (formResources != null && formResources.getXObjectNames() != null) {
|
||||||
|
// Process nested XObjects within the form
|
||||||
|
log.info("Checking form XObject '{}' on page {} for nested images",
|
||||||
|
name.getName(), pageNum + 1);
|
||||||
|
|
||||||
|
for (COSName nestedName : formResources.getXObjectNames()) {
|
||||||
|
PDXObject nestedXobj = formResources.getXObject(nestedName);
|
||||||
|
|
||||||
|
if (nestedXobj instanceof PDImageXObject) {
|
||||||
|
nestedImages++;
|
||||||
|
totalImages++;
|
||||||
|
PDImageXObject nestedImage = (PDImageXObject) nestedXobj;
|
||||||
|
|
||||||
|
log.info("Found nested image '{}' in form '{}' on page {} - {}x{}",
|
||||||
|
nestedName.getName(), name.getName(), pageNum + 1,
|
||||||
|
nestedImage.getWidth(), nestedImage.getHeight());
|
||||||
|
|
||||||
|
// Create a specialized reference for the nested image
|
||||||
|
NestedImageReference nestedRef = new NestedImageReference();
|
||||||
|
nestedRef.pageNum = pageNum;
|
||||||
|
nestedRef.formName = name;
|
||||||
|
nestedRef.imageName = nestedName;
|
||||||
|
|
||||||
|
String imageHash = generateImageHash(nestedImage);
|
||||||
|
uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(nestedRef);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int uniqueImagesCount = uniqueImages.size();
|
int uniqueImagesCount = uniqueImages.size();
|
||||||
int duplicatedImages = totalImages - uniqueImagesCount;
|
int duplicatedImages = totalImages - uniqueImagesCount;
|
||||||
log.info(
|
log.info(
|
||||||
"Found {} unique images and {} duplicated instances across {} pages",
|
"Found {} unique images and {} duplicated instances across {} pages ({} nested images in form XObjects)",
|
||||||
uniqueImagesCount,
|
uniqueImagesCount,
|
||||||
duplicatedImages,
|
duplicatedImages,
|
||||||
doc.getNumberOfPages());
|
doc.getNumberOfPages(),
|
||||||
|
nestedImages);
|
||||||
|
|
||||||
// SECOND PASS: Process each unique image exactly once
|
// SECOND PASS: Process each unique image exactly once
|
||||||
int compressedImages = 0;
|
int compressedImages = 0;
|
||||||
@ -143,10 +204,33 @@ public class CompressController {
|
|||||||
|
|
||||||
// Get the first instance of this image
|
// Get the first instance of this image
|
||||||
ImageReference firstRef = references.get(0);
|
ImageReference firstRef = references.get(0);
|
||||||
PDPage firstPage = doc.getPage(firstRef.pageNum);
|
PDImageXObject originalImage;
|
||||||
PDResources firstPageResources = firstPage.getResources();
|
|
||||||
PDImageXObject originalImage =
|
// Handle differently based on whether it's a direct or nested image
|
||||||
(PDImageXObject) firstPageResources.getXObject(firstRef.name);
|
if (firstRef instanceof NestedImageReference) {
|
||||||
|
// Get the nested image from within a form XObject
|
||||||
|
NestedImageReference nestedRef = (NestedImageReference) firstRef;
|
||||||
|
PDPage firstPage = doc.getPage(nestedRef.pageNum);
|
||||||
|
PDResources pageResources = firstPage.getResources();
|
||||||
|
|
||||||
|
// Get the form XObject
|
||||||
|
org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject formXObj =
|
||||||
|
(org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject) pageResources.getXObject(nestedRef.formName);
|
||||||
|
|
||||||
|
// Get the nested image from the form's resources
|
||||||
|
PDResources formResources = formXObj.getResources();
|
||||||
|
originalImage = (PDImageXObject) formResources.getXObject(nestedRef.imageName);
|
||||||
|
|
||||||
|
log.info("Processing nested image '{}' from form '{}'",
|
||||||
|
nestedRef.imageName.getName(), nestedRef.formName.getName());
|
||||||
|
} else {
|
||||||
|
// Get direct image from page resources
|
||||||
|
PDPage firstPage = doc.getPage(firstRef.pageNum);
|
||||||
|
PDResources firstPageResources = firstPage.getResources();
|
||||||
|
originalImage = (PDImageXObject) firstPageResources.getXObject(firstRef.name);
|
||||||
|
|
||||||
|
log.debug("Processing direct image '{}'", firstRef.name.getName());
|
||||||
|
}
|
||||||
|
|
||||||
// Track original size
|
// Track original size
|
||||||
int originalSize = (int) originalImage.getCOSObject().getLength();
|
int originalSize = (int) originalImage.getCOSObject().getLength();
|
||||||
@ -185,14 +269,36 @@ public class CompressController {
|
|||||||
|
|
||||||
// Replace ALL instances with the compressed version
|
// Replace ALL instances with the compressed version
|
||||||
for (ImageReference ref : references) {
|
for (ImageReference ref : references) {
|
||||||
// Get the page and resources when needed
|
if (ref instanceof NestedImageReference) {
|
||||||
PDPage page = doc.getPage(ref.pageNum);
|
// Replace nested image within form XObject
|
||||||
PDResources resources = page.getResources();
|
NestedImageReference nestedRef = (NestedImageReference) ref;
|
||||||
resources.put(ref.name, compressedImage);
|
PDPage page = doc.getPage(nestedRef.pageNum);
|
||||||
|
PDResources pageResources = page.getResources();
|
||||||
|
|
||||||
log.info(
|
// Get the form XObject
|
||||||
"Replaced image on page {} with compressed version",
|
org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject formXObj =
|
||||||
ref.pageNum + 1);
|
(org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject)
|
||||||
|
pageResources.getXObject(nestedRef.formName);
|
||||||
|
|
||||||
|
// Replace the nested image in the form's resources
|
||||||
|
PDResources formResources = formXObj.getResources();
|
||||||
|
formResources.put(nestedRef.imageName, compressedImage);
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"Replaced nested image '{}' in form '{}' on page {} with compressed version",
|
||||||
|
nestedRef.imageName.getName(),
|
||||||
|
nestedRef.formName.getName(),
|
||||||
|
nestedRef.pageNum + 1);
|
||||||
|
} else {
|
||||||
|
// Replace direct image in page resources
|
||||||
|
PDPage page = doc.getPage(ref.pageNum);
|
||||||
|
PDResources resources = page.getResources();
|
||||||
|
resources.put(ref.name, compressedImage);
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"Replaced direct image on page {} with compressed version",
|
||||||
|
ref.pageNum + 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
totalCompressedBytes += compressedData.length * references.size();
|
totalCompressedBytes += compressedData.length * references.size();
|
||||||
@ -216,11 +322,12 @@ public class CompressController {
|
|||||||
: 0;
|
: 0;
|
||||||
|
|
||||||
log.info(
|
log.info(
|
||||||
"Image compression summary - Total unique: {}, Compressed: {}, Skipped: {}, Duplicates: {}",
|
"Image compression summary - Total unique: {}, Compressed: {}, Skipped: {}, Duplicates: {}, Nested: {}",
|
||||||
uniqueImagesCount,
|
uniqueImagesCount,
|
||||||
compressedImages,
|
compressedImages,
|
||||||
skippedImages,
|
skippedImages,
|
||||||
duplicatedImages);
|
duplicatedImages,
|
||||||
|
nestedImages);
|
||||||
log.info(
|
log.info(
|
||||||
"Total original image size: {}, compressed: {} (reduced by {}%)",
|
"Total original image size: {}, compressed: {} (reduced by {}%)",
|
||||||
GeneralUtils.formatBytes(totalOriginalBytes),
|
GeneralUtils.formatBytes(totalOriginalBytes),
|
||||||
@ -245,7 +352,6 @@ public class CompressController {
|
|||||||
String.format("%.1f", overallReduction));
|
String.format("%.1f", overallReduction));
|
||||||
return newCompressedPDF;
|
return newCompressedPDF;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private BufferedImage convertToGrayscale(BufferedImage image) {
|
private BufferedImage convertToGrayscale(BufferedImage image) {
|
||||||
@ -611,7 +717,7 @@ public class CompressController {
|
|||||||
|
|
||||||
// Check if optimized file is larger than the original
|
// Check if optimized file is larger than the original
|
||||||
long finalFileSize = Files.size(currentFile);
|
long finalFileSize = Files.size(currentFile);
|
||||||
if (finalFileSize > inputFileSize) {
|
if (finalFileSize >= inputFileSize) {
|
||||||
log.warn("Optimized file is larger than the original. Using the original file instead.");
|
log.warn("Optimized file is larger than the original. Using the original file instead.");
|
||||||
// Use the stored reference to the original file
|
// Use the stored reference to the original file
|
||||||
currentFile = originalFile;
|
currentFile = originalFile;
|
||||||
|
Loading…
Reference in New Issue
Block a user