From 83922c4131814597555702a2d3ba5c9631697c80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bal=C3=A1zs=20Sz=C3=BCcs?=
<127139797+balazs-szucs@users.noreply.github.com>
Date: Thu, 30 Oct 2025 19:13:11 +0100
Subject: [PATCH] feat(compress): add advanced compression parameters and
better optimization levels (#4703)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
# Description of Changes
This PR reworks the compression endpoints, to be more consistent and
apply better compression on higher levels. Some improvement on lower is
also expected.
### Overview:
- Added new global flags: --recompress-flate --compress-streams=y
--object-streams=generate
- Added --decode-level=generalized, so that if the original PDF was
poorly compressed then, it can re-compress it better
- Added --stream-data=compress for better compression of vector/text
parts
- Now conditional: --preserve-unreferenced kept only for safe levels ≤3
- Added --optimize-images --jpeg-quality 35–75 depending on level (this
brings significant drop in file size, compared to earlier.)
- Added QPDF_ZOPFLI for level ≥8 which is better algorithm for
compressing JPEG. Comes with approx 5% gain per JPEG on PDFs. See here
for details:
https://qpdf.readthedocs.io/en/stable/cli.html#zopfli-compression-algorithm
- level 9, Ghostscript uses strongest downsampling (72/72/150) and CMYK
-> RGB
### Images:
- Scale factor now ramps 1 -> 9: 0.98, 0.95, 0.88, 0.78, 0.68, 0.58,
0.48, 0.38, 0.28.
- JPEG quality ramps 1 -> 9: 0.92, 0.88, 0.85, 0.80, 0.72, 0.65, 0.55,
0.45, 0.35.
### Results:
Sample files used:
[image-doc.pdf](https://github.com/user-attachments/files/22984417/image-doc.pdf)
[lorem-ipsum.pdf](https://github.com/user-attachments/files/22984418/lorem-ipsum.pdf)
[Create Microsoft Word Document
Templates.pdf](https://github.com/user-attachments/files/22984419/Create.Microsoft.Word.Document.Templates.pdf)
[sample-1.pdf](https://github.com/user-attachments/files/22984420/sample-1.pdf)
### But is it better than the old? (On level 9)
#### Old vs new:
- image-doc: 1.0 MB vs 454 KB
- create-word: 426 KB vs 247 KB
- lorem-ipsum: 44.7 KB vs 44.4
Most of changes are focused on images, so more images = better gains.
Even on lorem-ipsum text PDF (with no images) there still some gains but
not significant.
(so, it is better)
Closes: #4442
Closes: #4720
---
## Checklist
### General
- [x] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [x] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [x] I have performed a self-review of my own code
- [x] My changes generate no new warnings
### Documentation
- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)
### UI Changes (if applicable)
- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)
### Testing (if applicable)
- [x] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.
---------
Signed-off-by: Balázs Szücs
---
.../api/misc/CompressController.java | 776 ++++++++++++------
1 file changed, 542 insertions(+), 234 deletions(-)
diff --git a/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java b/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java
index 46f062775..4e3ef63a1 100644
--- a/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java
+++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java
@@ -5,6 +5,8 @@ import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.InterruptedIOException;
+import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
@@ -13,6 +15,7 @@ import java.security.NoSuchAlgorithmException;
import java.util.*;
import java.util.List;
import java.util.Map.Entry;
+import java.util.Objects;
import javax.imageio.IIOImage;
import javax.imageio.ImageIO;
@@ -27,6 +30,7 @@ import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
+import org.apache.pdfbox.pdmodel.graphics.image.PDImage;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
@@ -100,64 +104,32 @@ public class CompressController {
long totalCompressedBytes = 0;
}
- public TempFile compressImagesInPDF(
- Path pdfFile, double scaleFactor, float jpegQuality, boolean convertToGrayscale)
- throws Exception {
- TempFile newCompressedPDF = tempFileManager.createManagedTempFile(".pdf");
- long originalFileSize = Files.size(pdfFile);
- log.info(
- "Starting image compression with scale factor: {}, JPEG quality: {}, grayscale: {}"
- + " on file size: {}",
- scaleFactor,
- jpegQuality,
- convertToGrayscale,
- GeneralUtils.formatBytes(originalFileSize));
+ // Replace all instances of original images with their compressed versions
+ private static void replaceImages(
+ PDDocument doc,
+ Map> uniqueImages,
+ Map compressedVersions)
+ throws IOException {
- try (PDDocument doc = pdfDocumentFactory.load(pdfFile)) {
- // Find all unique images in the document
- Map> uniqueImages = findImages(doc);
+ for (Entry> entry : uniqueImages.entrySet()) {
+ ImageIdentity imageIdentity = entry.getKey();
+ List references = entry.getValue();
- // Get statistics
- CompressionStats stats = new CompressionStats();
- stats.uniqueImagesCount = uniqueImages.size();
- calculateImageStats(uniqueImages, stats);
+ // Skip if no compressed version exists
+ PDImageXObject compressedImage = compressedVersions.get(imageIdentity);
+ if (compressedImage == null) continue;
- // Create compressed versions of unique images
- Map compressedVersions =
- createCompressedImages(
- doc, uniqueImages, scaleFactor, jpegQuality, convertToGrayscale, stats);
-
- // Replace all instances with compressed versions
- replaceImages(doc, uniqueImages, compressedVersions, stats);
-
- // Log compression statistics
- logCompressionStats(stats, originalFileSize);
-
- // Free memory before saving
- compressedVersions.clear();
- uniqueImages.clear();
-
- log.info("Saving compressed PDF to {}", newCompressedPDF.getPath());
- doc.save(newCompressedPDF.getAbsolutePath());
-
- // Log overall file size reduction
- long compressedFileSize = Files.size(newCompressedPDF.getPath());
- double overallReduction = 100.0 - ((compressedFileSize * 100.0) / originalFileSize);
- log.info(
- "Overall PDF compression: {} → {} (reduced by {}%)",
- GeneralUtils.formatBytes(originalFileSize),
- GeneralUtils.formatBytes(compressedFileSize),
- String.format("%.1f", overallReduction));
- return newCompressedPDF;
- } catch (Exception e) {
- newCompressedPDF.close();
- throw e;
+ // Replace ALL instances with the compressed version
+ for (ImageReference ref : references) {
+ replaceImageReference(doc, ref, compressedImage);
+ }
}
}
// Find all images in the document, both direct and nested within forms
- private Map> findImages(PDDocument doc) throws IOException {
- Map> uniqueImages = new HashMap<>();
+ private static Map> findImages(PDDocument doc)
+ throws IOException {
+ Map> uniqueImages = new HashMap<>();
// Scan through all pages in the document
for (int pageNum = 0; pageNum < doc.getNumberOfPages(); pageNum++) {
@@ -176,8 +148,8 @@ public class CompressController {
"Found direct image '{}' on page {} - {}x{}",
name.getName(),
pageNum + 1,
- ((PDImageXObject) xobj).getWidth(),
- ((PDImageXObject) xobj).getHeight());
+ ((PDImage) xobj).getWidth(),
+ ((PDImage) xobj).getHeight());
}
// Form XObject that may contain nested images
else if (isForm(xobj)) {
@@ -189,36 +161,28 @@ public class CompressController {
return uniqueImages;
}
- private boolean isImage(PDXObject xobj) {
- return xobj instanceof PDImageXObject;
- }
-
- private boolean isForm(PDXObject xobj) {
- return xobj instanceof PDFormXObject;
- }
-
- private ImageReference addDirectImage(
+ private static ImageReference addDirectImage(
int pageNum,
COSName name,
PDImageXObject image,
- Map> uniqueImages)
+ Map> uniqueImages)
throws IOException {
ImageReference ref = new ImageReference();
ref.pageNum = pageNum;
ref.name = name;
- String imageHash = generateImageHash(image);
- uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(ref);
+ ImageIdentity identity = new ImageIdentity(image);
+ uniqueImages.computeIfAbsent(identity, k -> new ArrayList<>()).add(ref);
return ref;
}
// Look for images inside form XObjects
- private void checkFormForImages(
+ private static void checkFormForImages(
int pageNum,
COSName formName,
PDFormXObject formXObj,
- Map> uniqueImages)
+ Map> uniqueImages)
throws IOException {
PDResources formResources = formXObj.getResources();
if (formResources == null || formResources.getXObjectNames() == null) {
@@ -251,15 +215,15 @@ public class CompressController {
nestedRef.formName = formName;
nestedRef.imageName = nestedName;
- String imageHash = generateImageHash(nestedImage);
- uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(nestedRef);
+ ImageIdentity identity = new ImageIdentity(nestedImage);
+ uniqueImages.computeIfAbsent(identity, k -> new ArrayList<>()).add(nestedRef);
}
}
}
// Count total images and nested images
- private void calculateImageStats(
- Map> uniqueImages, CompressionStats stats) {
+ private static void calculateImageStats(
+ Map> uniqueImages, CompressionStats stats) {
for (List references : uniqueImages.values()) {
for (ImageReference ref : references) {
stats.totalImages++;
@@ -270,21 +234,29 @@ public class CompressController {
}
}
+ private static boolean isImage(PDXObject xobj) {
+ return xobj instanceof PDImageXObject;
+ }
+
+ private static boolean isForm(PDXObject xobj) {
+ return xobj instanceof PDFormXObject;
+ }
+
// Create compressed versions of all unique images
- private Map createCompressedImages(
+ private static Map createCompressedImages(
PDDocument doc,
- Map> uniqueImages,
+ Map> uniqueImages,
double scaleFactor,
float jpegQuality,
boolean convertToGrayscale,
CompressionStats stats)
throws IOException {
- Map compressedVersions = new HashMap<>();
+ Map compressedVersions = new HashMap<>();
// Process each unique image exactly once
- for (Entry> entry : uniqueImages.entrySet()) {
- String imageHash = entry.getKey();
+ for (Entry> entry : uniqueImages.entrySet()) {
+ ImageIdentity imageIdentity = entry.getKey();
List references = entry.getValue();
if (references.isEmpty()) continue;
@@ -308,23 +280,24 @@ public class CompressController {
if (compressedImage != null) {
// Store the compressed version in our map
- compressedVersions.put(imageHash, compressedImage);
+ compressedVersions.put(imageIdentity, compressedImage);
stats.compressedImages++;
// Update compression stats
int compressedSize = (int) compressedImage.getCOSObject().getLength();
- stats.totalCompressedBytes += compressedSize * references.size();
+ stats.totalCompressedBytes += (long) compressedSize * references.size();
double reductionPercentage = 100.0 - ((compressedSize * 100.0) / originalSize);
log.info(
- "Image hash {}: Compressed from {} to {} (reduced by {}%)",
- imageHash,
+ "Image identity {}: Compressed from {} to {} (reduced by {}%)",
+ imageIdentity,
GeneralUtils.formatBytes(originalSize),
GeneralUtils.formatBytes(compressedSize),
String.format("%.1f", reductionPercentage));
} else {
- log.info("Image hash {}: Not suitable for compression, skipping", imageHash);
- stats.totalCompressedBytes += originalSize * references.size();
+ log.info(
+ "Image identity {}: Not suitable for compression, skipping", imageIdentity);
+ stats.totalCompressedBytes += (long) originalSize * references.size();
stats.skippedImages++;
}
}
@@ -332,10 +305,130 @@ public class CompressController {
return compressedVersions;
}
+ // Enhanced hash function to identify identical images with more data
+ private static String generateImageHash(PDImageXObject image) {
+ try {
+ // Create a stream for the raw stream data
+ try (InputStream stream = image.getCOSObject().createRawInputStream()) {
+ // Read more data for better hash accuracy (16KB instead of 8KB)
+ byte[] buffer = new byte[16384];
+ int bytesRead = stream.read(buffer);
+ if (bytesRead > 0) {
+ byte[] dataToHash =
+ bytesRead == buffer.length ? buffer : Arrays.copyOf(buffer, bytesRead);
+
+ // Also include image dimensions and color space in the hash
+ String enhancedData =
+ new String(dataToHash, StandardCharsets.UTF_8)
+ + "_"
+ + image.getWidth()
+ + "_"
+ + image.getHeight()
+ + "_"
+ + image.getColorSpace().getName()
+ + "_"
+ + image.getBitsPerComponent();
+
+ return bytesToHexString(generateMD5(enhancedData.getBytes()));
+ }
+ return "empty-stream";
+ }
+ } catch (Exception e) {
+ ExceptionUtils.logException("image hash generation", e);
+ return "fallback-" + System.identityHashCode(image);
+ }
+ }
+
+ public TempFile compressImagesInPDF(
+ Path pdfFile, double scaleFactor, float jpegQuality, boolean convertToGrayscale)
+ throws Exception {
+ TempFile newCompressedPDF = tempFileManager.createManagedTempFile(".pdf");
+ long originalFileSize = Files.size(pdfFile);
+ log.info(
+ "Starting image compression with scale factor: {}, JPEG quality: {}, grayscale: {}"
+ + " on file size: {}",
+ scaleFactor,
+ jpegQuality,
+ convertToGrayscale,
+ GeneralUtils.formatBytes(originalFileSize));
+
+ try (PDDocument doc = pdfDocumentFactory.load(pdfFile)) {
+ // Find all unique images in the document
+ Map> uniqueImages = findImages(doc);
+
+ // Get statistics
+ CompressionStats stats = new CompressionStats();
+ stats.uniqueImagesCount = uniqueImages.size();
+ calculateImageStats(uniqueImages, stats);
+
+ // Create compressed versions of unique images
+ Map compressedVersions =
+ createCompressedImages(
+ doc, uniqueImages, scaleFactor, jpegQuality, convertToGrayscale, stats);
+
+ // Replace all instances with compressed versions
+ replaceImages(doc, uniqueImages, compressedVersions);
+
+ // Log compression statistics
+ logCompressionStats(stats, originalFileSize);
+
+ // Free memory before saving
+ compressedVersions.clear();
+ uniqueImages.clear();
+
+ log.info("Saving compressed PDF to {}", newCompressedPDF.getPath());
+ doc.save(newCompressedPDF.getAbsolutePath());
+
+ // Log overall file size reduction
+ long compressedFileSize = Files.size(newCompressedPDF.getPath());
+ double overallReduction = 100.0 - ((compressedFileSize * 100.0) / originalFileSize);
+ log.info(
+ "Overall PDF compression: {} → {} (reduced by {}%)",
+ GeneralUtils.formatBytes(originalFileSize),
+ GeneralUtils.formatBytes(compressedFileSize),
+ String.format("%.1f", overallReduction));
+ return newCompressedPDF;
+ } catch (Exception e) {
+ newCompressedPDF.close();
+ throw e;
+ }
+ }
+
+ // Hash function to identify identical masks
+ private static String generateMaskHash(PDImageXObject image) {
+ try {
+ // Try to get mask data from either getMask() or getSoftMask()
+ PDImageXObject mask = image.getMask();
+ if (mask == null) {
+ mask = image.getSoftMask();
+ }
+
+ if (mask != null) {
+ try (InputStream stream = mask.getCOSObject().createRawInputStream()) {
+ // Read up to first 4KB of mask data for the hash
+ byte[] buffer = new byte[4096];
+ int bytesRead = stream.read(buffer);
+ if (bytesRead > 0) {
+ byte[] dataToHash =
+ bytesRead == buffer.length
+ ? buffer
+ : Arrays.copyOf(buffer, bytesRead);
+ return bytesToHexString(generateMD5(dataToHash));
+ }
+ return "empty-mask";
+ }
+ }
+ return "no-mask";
+ } catch (Exception e) {
+ ExceptionUtils.logException("mask hash generation", e);
+ return "fallback-mask-" + System.identityHashCode(image);
+ }
+ }
+
// Get original image from a reference
- private PDImageXObject getOriginalImage(PDDocument doc, ImageReference ref) throws IOException {
+ private static PDImageXObject getOriginalImage(PDDocument doc, ImageReference ref)
+ throws IOException {
if (ref instanceof NestedImageReference nestedRef) {
- // Get the nested image from within a form XObject
PDPage page = doc.getPage(nestedRef.pageNum);
PDResources pageResources = page.getResources();
@@ -354,7 +447,7 @@ public class CompressController {
}
// Try to compress an image if it makes sense
- private PDImageXObject compressImage(
+ private static PDImageXObject compressImage(
PDDocument doc,
PDImageXObject originalImage,
int originalSize,
@@ -385,31 +478,16 @@ public class CompressController {
return null;
}
- // Replace all instances of original images with their compressed versions
- private void replaceImages(
- PDDocument doc,
- Map> uniqueImages,
- Map compressedVersions,
- CompressionStats stats)
- throws IOException {
-
- for (Entry> entry : uniqueImages.entrySet()) {
- String imageHash = entry.getKey();
- List references = entry.getValue();
-
- // Skip if no compressed version exists
- PDImageXObject compressedImage = compressedVersions.get(imageHash);
- if (compressedImage == null) continue;
-
- // Replace ALL instances with the compressed version
- for (ImageReference ref : references) {
- replaceImageReference(doc, ref, compressedImage);
- }
+ private static String bytesToHexString(byte[] bytes) {
+ StringBuilder sb = new StringBuilder();
+ for (byte b : bytes) {
+ sb.append(String.format("%02x", b));
}
+ return sb.toString();
}
// Replace a specific image reference with a compressed version
- private void replaceImageReference(
+ private static void replaceImageReference(
PDDocument doc, ImageReference ref, PDImageXObject compressedImage) throws IOException {
if (ref instanceof NestedImageReference nestedRef) {
// Replace nested image within form XObject
@@ -439,7 +517,7 @@ public class CompressController {
}
// Log final stats about the compression
- private void logCompressionStats(CompressionStats stats, long originalFileSize) {
+ private static void logCompressionStats(CompressionStats stats, long originalFileSize) {
// Calculate image reduction percentage
double overallImageReduction =
stats.totalOriginalBytes > 0
@@ -463,7 +541,7 @@ public class CompressController {
String.format("%.1f", overallImageReduction));
}
- private BufferedImage convertToGrayscale(BufferedImage image) {
+ private static BufferedImage convertToGrayscale(BufferedImage image) {
BufferedImage grayImage =
new BufferedImage(
image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY);
@@ -476,7 +554,7 @@ public class CompressController {
}
// Resize and optionally convert to grayscale
- private BufferedImage processAndCompressImage(
+ private static BufferedImage processAndCompressImage(
PDImageXObject image, double scaleFactor, float jpegQuality, boolean convertToGrayscale)
throws IOException {
BufferedImage bufferedImage = image.getImage();
@@ -558,7 +636,8 @@ public class CompressController {
}
// Convert image to byte array with quality settings
- private byte[] convertToBytes(BufferedImage scaledImage, float jpegQuality) throws IOException {
+ private static byte[] convertToBytes(BufferedImage scaledImage, float jpegQuality)
+ throws IOException {
String format = scaledImage.getColorModel().hasAlpha() ? "png" : "jpeg";
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
@@ -588,36 +667,77 @@ public class CompressController {
return outputStream.toByteArray();
}
- // Hash function to identify identical images
- private String generateImageHash(PDImageXObject image) {
+ // Get image filter/compression type
+ private static String getImageFilter(PDImageXObject image) {
try {
- // Create a stream for the raw stream data
- try (InputStream stream = image.getCOSObject().createRawInputStream()) {
- // Read up to first 8KB of data for the hash
- byte[] buffer = new byte[8192];
- int bytesRead = stream.read(buffer);
- if (bytesRead > 0) {
- byte[] dataToHash =
- bytesRead == buffer.length ? buffer : Arrays.copyOf(buffer, bytesRead);
- return bytesToHexString(generateMD5(dataToHash));
- }
- return "empty-stream";
+ return image.getCOSObject().getDictionaryObject(COSName.FILTER).toString();
+ } catch (Exception e) {
+ return "unknown";
+ }
+ }
+
+ // Get color profile information
+ private static String getColorProfileInfo(PDImageXObject image) {
+ try {
+ // Try to get ICC profile information
+ if (image.getColorSpace() != null) {
+ return image.getColorSpace().getName()
+ + "_"
+ + image.getColorSpace().getNumberOfComponents();
+ }
+ return "no-profile";
+ } catch (Exception e) {
+ return "error-profile";
+ }
+ }
+
+ // Determine image type from stream data
+ private static String getImageType(PDImageXObject image) {
+ try {
+ String filter = getImageFilter(image);
+ if (filter.contains("DCTDecode") || filter.contains("JPXDecode")) {
+ return "JPEG";
+ } else if (filter.contains("FlateDecode")) {
+ return "PNG";
+ } else if (filter.contains("CCITTFaxDecode")) {
+ return "TIFF";
+ } else if (filter.contains("JBIG2Decode")) {
+ return "JBIG2";
+ } else {
+ return "RAW";
}
} catch (Exception e) {
- ExceptionUtils.logException("image hash generation", e);
- return "fallback-" + System.identityHashCode(image);
+ return "unknown";
}
}
- private String bytesToHexString(byte[] bytes) {
- StringBuilder sb = new StringBuilder();
- for (byte b : bytes) {
- sb.append(String.format("%02x", b));
+ // Generate hash of decode parameters
+ private static String generateDecodeParamsHash(PDImageXObject image) {
+ try {
+ // Get decode parameters that affect how the image is processed
+ StringBuilder params = new StringBuilder();
+
+ // Add filter information
+ params.append(getImageFilter(image));
+
+ // Add color space components
+ params.append("_").append(image.getColorSpace().getNumberOfComponents());
+
+ // Add bits per component
+ params.append("_").append(image.getBitsPerComponent());
+
+ // Add any decode array parameters
+ if (image.getDecode() != null) {
+ params.append("_").append(image.getDecode().toString());
+ }
+
+ return bytesToHexString(generateMD5(params.toString().getBytes()));
+ } catch (Exception e) {
+ return "fallback-decode-" + System.identityHashCode(image);
}
- return sb.toString();
}
- private byte[] generateMD5(byte[] data) throws IOException {
+ private static byte[] generateMD5(byte[] data) {
try {
MessageDigest md = MessageDigest.getInstance("MD5");
return md.digest(data); // Get the MD5 hash of the image bytes
@@ -626,36 +746,181 @@ public class CompressController {
}
}
- // Scale factors for different optimization levels
- private double getScaleFactorForLevel(int optimizeLevel) {
+ private static class ImageIdentity {
+ private final String pixelHash; // Hash of pixel data
+ private final String colorSpace; // e.g., "DeviceRGB", "DeviceCMYK"
+ private final int bitsPerComponent;
+ private final boolean hasMask; // Has transparency
+ private final String maskHash; // Hash of mask data if present
+ private final int width; // Image width in pixels
+ private final int height; // Image height in pixels
+ private final String filter; // Image filter/compression type
+ private final String colorProfile; // Color profile information
+ private final long streamLength; // Original stream length
+ private final String imageType; // Image type (JPEG, PNG, etc.)
+ private final String decodeParams; // Decode parameters hash
+ private final String metadataHash; // Hash of image metadata
+
+ public ImageIdentity(PDImageXObject image) throws IOException {
+ this.pixelHash = generateImageHash(image);
+ this.colorSpace = image.getColorSpace().getName();
+ this.bitsPerComponent = image.getBitsPerComponent();
+ this.hasMask = image.getMask() != null || image.getSoftMask() != null;
+ this.maskHash = hasMask ? generateMaskHash(image) : null;
+ this.width = image.getWidth();
+ this.height = image.getHeight();
+ this.filter = getImageFilter(image);
+ this.colorProfile = getColorProfileInfo(image);
+ this.streamLength = image.getCOSObject().getLength();
+ this.imageType = getImageType(image);
+ this.decodeParams = generateDecodeParamsHash(image);
+ this.metadataHash = this.generateMetadataHash(image);
+ }
+
+ // Generate hash of image metadata
+ private String generateMetadataHash(PDImageXObject image) {
+ try {
+ StringBuilder metadata = new StringBuilder();
+
+ // Add image dimensions
+ metadata.append(image.getWidth()).append("x").append(image.getHeight());
+
+ // Add color space info
+ metadata.append("_").append(image.getColorSpace().getName());
+
+ // Add bits per component
+ metadata.append("_").append(image.getBitsPerComponent());
+
+ // Add stream length
+ metadata.append("_").append(image.getCOSObject().getLength());
+
+ // Add mask information
+ if (image.getMask() != null) {
+ metadata.append("_mask");
+ }
+ if (image.getSoftMask() != null) {
+ metadata.append("_softmask");
+ }
+
+ return bytesToHexString(generateMD5(metadata.toString().getBytes()));
+ } catch (Exception e) {
+ return "fallback-meta-" + System.identityHashCode(image);
+ }
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof ImageIdentity that)) return false;
+ return bitsPerComponent == that.bitsPerComponent
+ && hasMask == that.hasMask
+ && width == that.width
+ && height == that.height
+ && streamLength == that.streamLength
+ && Objects.equals(pixelHash, that.pixelHash)
+ && Objects.equals(colorSpace, that.colorSpace)
+ && Objects.equals(maskHash, that.maskHash)
+ && Objects.equals(filter, that.filter)
+ && Objects.equals(colorProfile, that.colorProfile)
+ && Objects.equals(imageType, that.imageType)
+ && Objects.equals(decodeParams, that.decodeParams)
+ && Objects.equals(metadataHash, that.metadataHash);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(
+ pixelHash,
+ colorSpace,
+ bitsPerComponent,
+ hasMask,
+ maskHash,
+ width,
+ height,
+ filter,
+ colorProfile,
+ streamLength,
+ imageType,
+ decodeParams,
+ metadataHash);
+ }
+
+ @Override
+ public String toString() {
+ return String.format(
+ "%s_%s_%d_%dx%d_%s_%s_%d_%s_%s_%s",
+ pixelHash.substring(0, Math.min(8, pixelHash.length())),
+ colorSpace,
+ bitsPerComponent,
+ width,
+ height,
+ filter,
+ imageType,
+ streamLength,
+ hasMask ? "masked" : "nomask",
+ decodeParams.substring(0, Math.min(4, decodeParams.length())),
+ metadataHash.substring(0, Math.min(4, metadataHash.length())));
+ }
+ }
+
+ // Scale factors for different optimization levels (lower => smaller)
+ private static double getScaleFactorForLevel(int optimizeLevel) {
return switch (optimizeLevel) {
- case 3 -> 0.85;
- case 4 -> 0.75;
- case 5 -> 0.65;
- case 6 -> 0.55;
- case 7 -> 0.45;
- case 8 -> 0.35;
- case 9 -> 0.25;
- case 10 -> 0.15;
+ case 1 -> 0.98; // negligible resizing
+ case 2 -> 0.95;
+ case 3 -> 0.88;
+ case 4 -> 0.78;
+ case 5 -> 0.68;
+ case 6 -> 0.58;
+ case 7 -> 0.48;
+ case 8 -> 0.38;
+ case 9 -> 0.28;
default -> 1.0;
};
}
- // JPEG quality for different optimization levels
- private float getJpegQualityForLevel(int optimizeLevel) {
+ // JPEG quality for different optimization levels (lower => smaller)
+ private static float getJpegQualityForLevel(int optimizeLevel) {
return switch (optimizeLevel) {
+ case 1 -> 0.92f; // very light
+ case 2 -> 0.88f;
case 3 -> 0.85f;
case 4 -> 0.80f;
- case 5 -> 0.75f;
- case 6 -> 0.70f;
- case 7 -> 0.60f;
- case 8 -> 0.50f;
- case 9 -> 0.35f;
- case 10 -> 0.2f;
- default -> 0.7f;
+ case 5 -> 0.72f;
+ case 6 -> 0.65f;
+ case 7 -> 0.55f;
+ case 8 -> 0.45f;
+ case 9 -> 0.35f; // aggressive
+ default -> 0.75f;
};
}
+ // Pick optimization level based on target size
+ private static int determineOptimizeLevel(double sizeReductionRatio) {
+ if (sizeReductionRatio > 0.9) return 1;
+ if (sizeReductionRatio > 0.8) return 2;
+ if (sizeReductionRatio > 0.7) return 3;
+ if (sizeReductionRatio > 0.6) return 4;
+ if (sizeReductionRatio > 0.3) return 5;
+ if (sizeReductionRatio > 0.2) return 6;
+ if (sizeReductionRatio > 0.15) return 7;
+ if (sizeReductionRatio > 0.1) return 8;
+ return 9;
+ }
+
+ // Increment optimization level if we need more compression
+ private static int incrementOptimizeLevel(int currentLevel, long currentSize, long targetSize) {
+ double currentRatio = currentSize / (double) targetSize;
+ log.info("Current compression ratio: {}", String.format("%.2f", currentRatio));
+
+ if (currentRatio > 2.0) {
+ return Math.min(9, currentLevel + 3);
+ } else if (currentRatio > 1.5) {
+ return Math.min(9, currentLevel + 2);
+ }
+ return Math.min(9, currentLevel + 1);
+ }
+
@PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/compress-pdf")
@Operation(
summary = "Optimize PDF file",
@@ -705,65 +970,56 @@ public class CompressController {
while (!sizeMet && optimizeLevel <= 9) {
// Apply external compression first
- if (!externalCompressionApplied) {
- boolean ghostscriptSuccess = false;
+ boolean ghostscriptSuccess = false;
- // Try Ghostscript first if available - for ANY compression level
- if (isGhostscriptEnabled()) {
- try {
- applyGhostscriptCompression(request, optimizeLevel, currentFile);
- log.info("Ghostscript compression applied successfully");
- ghostscriptSuccess = true;
- } catch (IOException e) {
- log.warn("Ghostscript compression failed, trying fallback methods");
- }
+ if (isGhostscriptEnabled() && optimizeLevel >= 6) {
+ try {
+ applyGhostscriptCompression(request, optimizeLevel, currentFile);
+ log.info("Ghostscript compression applied successfully");
+ ghostscriptSuccess = true;
+ } catch (IOException e) {
+ log.warn("Ghostscript compression failed, continuing with other methods");
}
+ }
- // Fallback to QPDF if Ghostscript failed or not available (levels 1-3 only)
- if (!ghostscriptSuccess && isQpdfEnabled() && optimizeLevel <= 3) {
- try {
- applyQpdfCompression(request, optimizeLevel, currentFile);
- log.info("QPDF compression applied successfully");
- } catch (IOException e) {
- log.warn("QPDF compression also failed");
- }
+ // Always apply QPDF when enabled to recompress/optimize structure
+ if (isQpdfEnabled()) {
+ try {
+ applyQpdfCompression(request, optimizeLevel, currentFile);
+ log.info("QPDF compression applied successfully");
+ } catch (IOException e) {
+ log.warn("QPDF compression failed");
}
+ } else if (!ghostscriptSuccess) {
+ log.info(
+ "No external compression tools available, using image compression"
+ + " only");
+ }
- if (!ghostscriptSuccess && !isQpdfEnabled()) {
- log.info(
- "No external compression tools available, using image compression"
- + " only");
- }
+ externalCompressionApplied = true;
- externalCompressionApplied = true;
-
- // Skip image compression if Ghostscript succeeded
- if (ghostscriptSuccess) {
- imageCompressionApplied = true;
- }
+ // Skip image compression if Ghostscript succeeded
+ if (ghostscriptSuccess) {
+ imageCompressionApplied = true;
}
// Apply image compression for levels 4+ only if Ghostscript didn't run
if ((optimizeLevel >= 4 || Boolean.TRUE.equals(convertToGrayscale))
&& !imageCompressionApplied) {
// Use different scale factors based on level
- double scaleFactor =
- switch (optimizeLevel) {
- case 4 -> 0.95; // 95% of original size
- case 5 -> 0.9; // 90% of original size
- case 6 -> 0.8; // 80% of original size
- case 7 -> 0.7; // 70% of original size
- case 8 -> 0.65; // 65% of original size
- case 9 -> 0.5; // 50% of original size
- default -> 1.0;
- };
+ double scaleFactor = getScaleFactorForLevel(optimizeLevel);
+ // Use JPEG quality settings based on optimization level
+ float jpegQuality = getJpegQualityForLevel(optimizeLevel);
- log.info("Applying image compression with scale factor: {}", scaleFactor);
+ log.info(
+ "Applying image compression with scale factor: {} and JPEG quality: {}",
+ scaleFactor,
+ jpegQuality);
TempFile compressedImageFile =
compressImagesInPDF(
currentFile,
scaleFactor,
- 0.7f, // Default JPEG quality
+ jpegQuality,
Boolean.TRUE.equals(convertToGrayscale));
tempFiles.add(compressedImageFile);
@@ -793,7 +1049,11 @@ public class CompressController {
}
}
- // Use original if optimized file is somehow larger
+ // Use original if optimized file is somehow larger or currentFile is invalid
+ if (!Files.exists(currentFile)) {
+ log.warn("Optimized file missing or invalid. Using the original file instead.");
+ currentFile = originalFile;
+ }
long finalFileSize = Files.size(currentFile);
if (finalFileSize >= inputFileSize) {
log.warn(
@@ -840,6 +1100,12 @@ public class CompressController {
command.add("-dQUIET");
command.add("-dBATCH");
+ // General compression enhancements
+ command.add("-dDetectDuplicateImages=true");
+ command.add("-dDownsampleColorImages=true");
+ command.add("-dCompressFonts=true");
+ command.add("-dSubsetFonts=true");
+
// Map optimization levels to Ghostscript settings
switch (optimizeLevel) {
case 1:
@@ -865,9 +1131,16 @@ public class CompressController {
case 8:
case 9:
command.add("-dPDFSETTINGS=/screen");
- command.add("-dColorImageResolution=100");
- command.add("-dGrayImageResolution=100");
- command.add("-dMonoImageResolution=200");
+ // Use stronger downsampling at the highest level
+ if (optimizeLevel == 9) {
+ command.add("-dColorImageResolution=72");
+ command.add("-dGrayImageResolution=72");
+ command.add("-dMonoImageResolution=150");
+ } else {
+ command.add("-dColorImageResolution=100");
+ command.add("-dGrayImageResolution=100");
+ command.add("-dMonoImageResolution=200");
+ }
break;
case 10:
command.add("-dPDFSETTINGS=/screen");
@@ -880,6 +1153,19 @@ public class CompressController {
break;
}
+ // If grayscale conversion requested, enforce grayscale color processing in Ghostscript
+ boolean grayscaleRequested = Boolean.TRUE.equals(request.getGrayscale());
+ if (grayscaleRequested) {
+ command.add("-dColorConversionStrategy=/Gray");
+ command.add("-dProcessColorModel=/DeviceGray");
+ }
+
+ // Optional conversion: CMYK -> RGB for color output only (avoid conflict with
+ // grayscale)
+ if (optimizeLevel >= 7 && !grayscaleRequested) {
+ command.add("-dConvertCMYKImagesToRGB=true");
+ }
+
command.add("-sOutputFile=" + gsOutputPath.toString());
command.add(currentFile.toString());
@@ -906,6 +1192,14 @@ public class CompressController {
throw new IOException("Ghostscript compression failed");
}
+ // replace the existing catch with these two catches
+ } catch (InterruptedException e) {
+ // restore interrupted status and propagate as an IOException
+ Thread.currentThread().interrupt();
+ InterruptedIOException ie =
+ new InterruptedIOException("Ghostscript command interrupted");
+ ie.initCause(e);
+ throw ie;
} catch (Exception e) {
log.warn("Ghostscript compression failed, will fallback to other methods", e);
throw new IOException("Ghostscript compression failed", e);
@@ -921,14 +1215,13 @@ public class CompressController {
log.info("Pre-QPDF file size: {}", GeneralUtils.formatBytes(preQpdfSize));
// Map optimization levels to QPDF compression levels
- int qpdfCompressionLevel;
- if (optimizeLevel == 1) {
- qpdfCompressionLevel = 5;
- } else if (optimizeLevel == 2) {
- qpdfCompressionLevel = 9;
- } else {
- qpdfCompressionLevel = 9;
- }
+ int qpdfCompressionLevel =
+ switch (optimizeLevel) {
+ case 1 -> 3; // faster, lighter
+ case 2 -> 5;
+ case 3, 4, 5 -> 7;
+ default -> 9; // 6-9 use max
+ };
try (TempFile qpdfOutputFile = tempFileManager.createManagedTempFile(".pdf")) {
Path qpdfOutputPath = qpdfOutputFile.getPath();
@@ -936,24 +1229,59 @@ public class CompressController {
// Build QPDF command
List command = new ArrayList<>();
command.add("qpdf");
- if (request.getNormalize()) {
+ if (Boolean.TRUE.equals(request.getNormalize())) {
command.add("--normalize-content=y");
}
- if (request.getLinearize()) {
+ if (Boolean.TRUE.equals(request.getLinearize())) {
command.add("--linearize");
}
+ // Decode/encode settings for maximal recompression
+ command.add("--decode-level=generalized");
command.add("--recompress-flate");
command.add("--compression-level=" + qpdfCompressionLevel);
command.add("--compress-streams=y");
+ command.add("--stream-data=compress");
+ // Preserve unreferenced only at lower levels for safety; skip at highest levels for
+ // size
+ if (optimizeLevel <= 3) {
+ command.add("--preserve-unreferenced");
+ }
+ // Optional image optimization in qpdf (no resampling; lossy JPEG when beneficial)
+ // Enable qpdf image optimization at medium/high levels
+ if (optimizeLevel >= 5) {
+ command.add("--optimize-images");
+ // Map optimize level to JPEG quality (lower number => smaller size)
+ Integer jpegQuality =
+ switch (optimizeLevel) {
+ case 5 -> 78;
+ case 6 -> 68;
+ case 7 -> 58;
+ case 8 -> 46;
+ default -> 34; // 9+
+ };
+ command.add("--jpeg-quality=" + jpegQuality);
+ }
command.add("--object-streams=generate");
command.add(currentFile.toString());
command.add(qpdfOutputPath.toString());
ProcessExecutorResult returnCode = null;
try {
+ // On high levels, prefer zopfli if platform supports env wrapper
+ if (optimizeLevel >= 8) {
+ String os = System.getProperty("os.name").toLowerCase(Locale.ROOT);
+ if (!os.contains("win")) {
+ // Prepend env QPDF_ZOPFLI=silent for Unix-like systems
+ List zopfliCommand = new ArrayList<>();
+ zopfliCommand.add("env");
+ zopfliCommand.add("QPDF_ZOPFLI=silent");
+ zopfliCommand.addAll(command);
+ command = zopfliCommand;
+ }
+ }
returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF)
- .runCommandWithOutputHandling(command);
+ .runCommandWithOutputHandling(command, null);
// Update current file to the QPDF output
Files.copy(qpdfOutputPath, currentFile, StandardCopyOption.REPLACE_EXISTING);
@@ -965,39 +1293,19 @@ public class CompressController {
GeneralUtils.formatBytes(postQpdfSize),
String.format("%.1f", qpdfReduction));
- } catch (Exception e) {
+ } catch (IOException e) {
if (returnCode != null && returnCode.getRc() != 3) {
throw new IOException("QPDF command failed", e);
}
// If QPDF fails, keep using the current file
log.warn("QPDF compression failed, continuing with current file", e);
+ } catch (InterruptedException e) {
+ // restore interrupted status and propagate as an IOException
+ Thread.currentThread().interrupt();
+ InterruptedIOException ie = new InterruptedIOException("QPDF command interrupted");
+ ie.initCause(e);
+ throw ie;
}
}
}
-
- // Pick optimization level based on target size
- private int determineOptimizeLevel(double sizeReductionRatio) {
- if (sizeReductionRatio > 0.9) return 1;
- if (sizeReductionRatio > 0.8) return 2;
- if (sizeReductionRatio > 0.7) return 3;
- if (sizeReductionRatio > 0.6) return 4;
- if (sizeReductionRatio > 0.3) return 5;
- if (sizeReductionRatio > 0.2) return 6;
- if (sizeReductionRatio > 0.15) return 7;
- if (sizeReductionRatio > 0.1) return 8;
- return 9;
- }
-
- // Increment optimization level if we need more compression
- private int incrementOptimizeLevel(int currentLevel, long currentSize, long targetSize) {
- double currentRatio = currentSize / (double) targetSize;
- log.info("Current compression ratio: {}", String.format("%.2f", currentRatio));
-
- if (currentRatio > 2.0) {
- return Math.min(9, currentLevel + 3);
- } else if (currentRatio > 1.5) {
- return Math.min(9, currentLevel + 2);
- }
- return Math.min(9, currentLevel + 1);
- }
}