diff --git a/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java b/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java index 46f062775..4e3ef63a1 100644 --- a/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java +++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java @@ -5,6 +5,8 @@ import java.awt.image.BufferedImage; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.InterruptedIOException; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardCopyOption; @@ -13,6 +15,7 @@ import java.security.NoSuchAlgorithmException; import java.util.*; import java.util.List; import java.util.Map.Entry; +import java.util.Objects; import javax.imageio.IIOImage; import javax.imageio.ImageIO; @@ -27,6 +30,7 @@ import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.graphics.PDXObject; import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; +import org.apache.pdfbox.pdmodel.graphics.image.PDImage; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; @@ -100,64 +104,32 @@ public class CompressController { long totalCompressedBytes = 0; } - public TempFile compressImagesInPDF( - Path pdfFile, double scaleFactor, float jpegQuality, boolean convertToGrayscale) - throws Exception { - TempFile newCompressedPDF = tempFileManager.createManagedTempFile(".pdf"); - long originalFileSize = Files.size(pdfFile); - log.info( - "Starting image compression with scale factor: {}, JPEG quality: {}, grayscale: {}" - + " on file size: {}", - scaleFactor, - jpegQuality, - convertToGrayscale, - GeneralUtils.formatBytes(originalFileSize)); + // Replace all instances of original images with their compressed versions + private static void replaceImages( + PDDocument doc, + Map> uniqueImages, + Map compressedVersions) + throws IOException { - try (PDDocument doc = pdfDocumentFactory.load(pdfFile)) { - // Find all unique images in the document - Map> uniqueImages = findImages(doc); + for (Entry> entry : uniqueImages.entrySet()) { + ImageIdentity imageIdentity = entry.getKey(); + List references = entry.getValue(); - // Get statistics - CompressionStats stats = new CompressionStats(); - stats.uniqueImagesCount = uniqueImages.size(); - calculateImageStats(uniqueImages, stats); + // Skip if no compressed version exists + PDImageXObject compressedImage = compressedVersions.get(imageIdentity); + if (compressedImage == null) continue; - // Create compressed versions of unique images - Map compressedVersions = - createCompressedImages( - doc, uniqueImages, scaleFactor, jpegQuality, convertToGrayscale, stats); - - // Replace all instances with compressed versions - replaceImages(doc, uniqueImages, compressedVersions, stats); - - // Log compression statistics - logCompressionStats(stats, originalFileSize); - - // Free memory before saving - compressedVersions.clear(); - uniqueImages.clear(); - - log.info("Saving compressed PDF to {}", newCompressedPDF.getPath()); - doc.save(newCompressedPDF.getAbsolutePath()); - - // Log overall file size reduction - long compressedFileSize = Files.size(newCompressedPDF.getPath()); - double overallReduction = 100.0 - ((compressedFileSize * 100.0) / originalFileSize); - log.info( - "Overall PDF compression: {} → {} (reduced by {}%)", - GeneralUtils.formatBytes(originalFileSize), - GeneralUtils.formatBytes(compressedFileSize), - String.format("%.1f", overallReduction)); - return newCompressedPDF; - } catch (Exception e) { - newCompressedPDF.close(); - throw e; + // Replace ALL instances with the compressed version + for (ImageReference ref : references) { + replaceImageReference(doc, ref, compressedImage); + } } } // Find all images in the document, both direct and nested within forms - private Map> findImages(PDDocument doc) throws IOException { - Map> uniqueImages = new HashMap<>(); + private static Map> findImages(PDDocument doc) + throws IOException { + Map> uniqueImages = new HashMap<>(); // Scan through all pages in the document for (int pageNum = 0; pageNum < doc.getNumberOfPages(); pageNum++) { @@ -176,8 +148,8 @@ public class CompressController { "Found direct image '{}' on page {} - {}x{}", name.getName(), pageNum + 1, - ((PDImageXObject) xobj).getWidth(), - ((PDImageXObject) xobj).getHeight()); + ((PDImage) xobj).getWidth(), + ((PDImage) xobj).getHeight()); } // Form XObject that may contain nested images else if (isForm(xobj)) { @@ -189,36 +161,28 @@ public class CompressController { return uniqueImages; } - private boolean isImage(PDXObject xobj) { - return xobj instanceof PDImageXObject; - } - - private boolean isForm(PDXObject xobj) { - return xobj instanceof PDFormXObject; - } - - private ImageReference addDirectImage( + private static ImageReference addDirectImage( int pageNum, COSName name, PDImageXObject image, - Map> uniqueImages) + Map> uniqueImages) throws IOException { ImageReference ref = new ImageReference(); ref.pageNum = pageNum; ref.name = name; - String imageHash = generateImageHash(image); - uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(ref); + ImageIdentity identity = new ImageIdentity(image); + uniqueImages.computeIfAbsent(identity, k -> new ArrayList<>()).add(ref); return ref; } // Look for images inside form XObjects - private void checkFormForImages( + private static void checkFormForImages( int pageNum, COSName formName, PDFormXObject formXObj, - Map> uniqueImages) + Map> uniqueImages) throws IOException { PDResources formResources = formXObj.getResources(); if (formResources == null || formResources.getXObjectNames() == null) { @@ -251,15 +215,15 @@ public class CompressController { nestedRef.formName = formName; nestedRef.imageName = nestedName; - String imageHash = generateImageHash(nestedImage); - uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(nestedRef); + ImageIdentity identity = new ImageIdentity(nestedImage); + uniqueImages.computeIfAbsent(identity, k -> new ArrayList<>()).add(nestedRef); } } } // Count total images and nested images - private void calculateImageStats( - Map> uniqueImages, CompressionStats stats) { + private static void calculateImageStats( + Map> uniqueImages, CompressionStats stats) { for (List references : uniqueImages.values()) { for (ImageReference ref : references) { stats.totalImages++; @@ -270,21 +234,29 @@ public class CompressController { } } + private static boolean isImage(PDXObject xobj) { + return xobj instanceof PDImageXObject; + } + + private static boolean isForm(PDXObject xobj) { + return xobj instanceof PDFormXObject; + } + // Create compressed versions of all unique images - private Map createCompressedImages( + private static Map createCompressedImages( PDDocument doc, - Map> uniqueImages, + Map> uniqueImages, double scaleFactor, float jpegQuality, boolean convertToGrayscale, CompressionStats stats) throws IOException { - Map compressedVersions = new HashMap<>(); + Map compressedVersions = new HashMap<>(); // Process each unique image exactly once - for (Entry> entry : uniqueImages.entrySet()) { - String imageHash = entry.getKey(); + for (Entry> entry : uniqueImages.entrySet()) { + ImageIdentity imageIdentity = entry.getKey(); List references = entry.getValue(); if (references.isEmpty()) continue; @@ -308,23 +280,24 @@ public class CompressController { if (compressedImage != null) { // Store the compressed version in our map - compressedVersions.put(imageHash, compressedImage); + compressedVersions.put(imageIdentity, compressedImage); stats.compressedImages++; // Update compression stats int compressedSize = (int) compressedImage.getCOSObject().getLength(); - stats.totalCompressedBytes += compressedSize * references.size(); + stats.totalCompressedBytes += (long) compressedSize * references.size(); double reductionPercentage = 100.0 - ((compressedSize * 100.0) / originalSize); log.info( - "Image hash {}: Compressed from {} to {} (reduced by {}%)", - imageHash, + "Image identity {}: Compressed from {} to {} (reduced by {}%)", + imageIdentity, GeneralUtils.formatBytes(originalSize), GeneralUtils.formatBytes(compressedSize), String.format("%.1f", reductionPercentage)); } else { - log.info("Image hash {}: Not suitable for compression, skipping", imageHash); - stats.totalCompressedBytes += originalSize * references.size(); + log.info( + "Image identity {}: Not suitable for compression, skipping", imageIdentity); + stats.totalCompressedBytes += (long) originalSize * references.size(); stats.skippedImages++; } } @@ -332,10 +305,130 @@ public class CompressController { return compressedVersions; } + // Enhanced hash function to identify identical images with more data + private static String generateImageHash(PDImageXObject image) { + try { + // Create a stream for the raw stream data + try (InputStream stream = image.getCOSObject().createRawInputStream()) { + // Read more data for better hash accuracy (16KB instead of 8KB) + byte[] buffer = new byte[16384]; + int bytesRead = stream.read(buffer); + if (bytesRead > 0) { + byte[] dataToHash = + bytesRead == buffer.length ? buffer : Arrays.copyOf(buffer, bytesRead); + + // Also include image dimensions and color space in the hash + String enhancedData = + new String(dataToHash, StandardCharsets.UTF_8) + + "_" + + image.getWidth() + + "_" + + image.getHeight() + + "_" + + image.getColorSpace().getName() + + "_" + + image.getBitsPerComponent(); + + return bytesToHexString(generateMD5(enhancedData.getBytes())); + } + return "empty-stream"; + } + } catch (Exception e) { + ExceptionUtils.logException("image hash generation", e); + return "fallback-" + System.identityHashCode(image); + } + } + + public TempFile compressImagesInPDF( + Path pdfFile, double scaleFactor, float jpegQuality, boolean convertToGrayscale) + throws Exception { + TempFile newCompressedPDF = tempFileManager.createManagedTempFile(".pdf"); + long originalFileSize = Files.size(pdfFile); + log.info( + "Starting image compression with scale factor: {}, JPEG quality: {}, grayscale: {}" + + " on file size: {}", + scaleFactor, + jpegQuality, + convertToGrayscale, + GeneralUtils.formatBytes(originalFileSize)); + + try (PDDocument doc = pdfDocumentFactory.load(pdfFile)) { + // Find all unique images in the document + Map> uniqueImages = findImages(doc); + + // Get statistics + CompressionStats stats = new CompressionStats(); + stats.uniqueImagesCount = uniqueImages.size(); + calculateImageStats(uniqueImages, stats); + + // Create compressed versions of unique images + Map compressedVersions = + createCompressedImages( + doc, uniqueImages, scaleFactor, jpegQuality, convertToGrayscale, stats); + + // Replace all instances with compressed versions + replaceImages(doc, uniqueImages, compressedVersions); + + // Log compression statistics + logCompressionStats(stats, originalFileSize); + + // Free memory before saving + compressedVersions.clear(); + uniqueImages.clear(); + + log.info("Saving compressed PDF to {}", newCompressedPDF.getPath()); + doc.save(newCompressedPDF.getAbsolutePath()); + + // Log overall file size reduction + long compressedFileSize = Files.size(newCompressedPDF.getPath()); + double overallReduction = 100.0 - ((compressedFileSize * 100.0) / originalFileSize); + log.info( + "Overall PDF compression: {} → {} (reduced by {}%)", + GeneralUtils.formatBytes(originalFileSize), + GeneralUtils.formatBytes(compressedFileSize), + String.format("%.1f", overallReduction)); + return newCompressedPDF; + } catch (Exception e) { + newCompressedPDF.close(); + throw e; + } + } + + // Hash function to identify identical masks + private static String generateMaskHash(PDImageXObject image) { + try { + // Try to get mask data from either getMask() or getSoftMask() + PDImageXObject mask = image.getMask(); + if (mask == null) { + mask = image.getSoftMask(); + } + + if (mask != null) { + try (InputStream stream = mask.getCOSObject().createRawInputStream()) { + // Read up to first 4KB of mask data for the hash + byte[] buffer = new byte[4096]; + int bytesRead = stream.read(buffer); + if (bytesRead > 0) { + byte[] dataToHash = + bytesRead == buffer.length + ? buffer + : Arrays.copyOf(buffer, bytesRead); + return bytesToHexString(generateMD5(dataToHash)); + } + return "empty-mask"; + } + } + return "no-mask"; + } catch (Exception e) { + ExceptionUtils.logException("mask hash generation", e); + return "fallback-mask-" + System.identityHashCode(image); + } + } + // Get original image from a reference - private PDImageXObject getOriginalImage(PDDocument doc, ImageReference ref) throws IOException { + private static PDImageXObject getOriginalImage(PDDocument doc, ImageReference ref) + throws IOException { if (ref instanceof NestedImageReference nestedRef) { - // Get the nested image from within a form XObject PDPage page = doc.getPage(nestedRef.pageNum); PDResources pageResources = page.getResources(); @@ -354,7 +447,7 @@ public class CompressController { } // Try to compress an image if it makes sense - private PDImageXObject compressImage( + private static PDImageXObject compressImage( PDDocument doc, PDImageXObject originalImage, int originalSize, @@ -385,31 +478,16 @@ public class CompressController { return null; } - // Replace all instances of original images with their compressed versions - private void replaceImages( - PDDocument doc, - Map> uniqueImages, - Map compressedVersions, - CompressionStats stats) - throws IOException { - - for (Entry> entry : uniqueImages.entrySet()) { - String imageHash = entry.getKey(); - List references = entry.getValue(); - - // Skip if no compressed version exists - PDImageXObject compressedImage = compressedVersions.get(imageHash); - if (compressedImage == null) continue; - - // Replace ALL instances with the compressed version - for (ImageReference ref : references) { - replaceImageReference(doc, ref, compressedImage); - } + private static String bytesToHexString(byte[] bytes) { + StringBuilder sb = new StringBuilder(); + for (byte b : bytes) { + sb.append(String.format("%02x", b)); } + return sb.toString(); } // Replace a specific image reference with a compressed version - private void replaceImageReference( + private static void replaceImageReference( PDDocument doc, ImageReference ref, PDImageXObject compressedImage) throws IOException { if (ref instanceof NestedImageReference nestedRef) { // Replace nested image within form XObject @@ -439,7 +517,7 @@ public class CompressController { } // Log final stats about the compression - private void logCompressionStats(CompressionStats stats, long originalFileSize) { + private static void logCompressionStats(CompressionStats stats, long originalFileSize) { // Calculate image reduction percentage double overallImageReduction = stats.totalOriginalBytes > 0 @@ -463,7 +541,7 @@ public class CompressController { String.format("%.1f", overallImageReduction)); } - private BufferedImage convertToGrayscale(BufferedImage image) { + private static BufferedImage convertToGrayscale(BufferedImage image) { BufferedImage grayImage = new BufferedImage( image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY); @@ -476,7 +554,7 @@ public class CompressController { } // Resize and optionally convert to grayscale - private BufferedImage processAndCompressImage( + private static BufferedImage processAndCompressImage( PDImageXObject image, double scaleFactor, float jpegQuality, boolean convertToGrayscale) throws IOException { BufferedImage bufferedImage = image.getImage(); @@ -558,7 +636,8 @@ public class CompressController { } // Convert image to byte array with quality settings - private byte[] convertToBytes(BufferedImage scaledImage, float jpegQuality) throws IOException { + private static byte[] convertToBytes(BufferedImage scaledImage, float jpegQuality) + throws IOException { String format = scaledImage.getColorModel().hasAlpha() ? "png" : "jpeg"; ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); @@ -588,36 +667,77 @@ public class CompressController { return outputStream.toByteArray(); } - // Hash function to identify identical images - private String generateImageHash(PDImageXObject image) { + // Get image filter/compression type + private static String getImageFilter(PDImageXObject image) { try { - // Create a stream for the raw stream data - try (InputStream stream = image.getCOSObject().createRawInputStream()) { - // Read up to first 8KB of data for the hash - byte[] buffer = new byte[8192]; - int bytesRead = stream.read(buffer); - if (bytesRead > 0) { - byte[] dataToHash = - bytesRead == buffer.length ? buffer : Arrays.copyOf(buffer, bytesRead); - return bytesToHexString(generateMD5(dataToHash)); - } - return "empty-stream"; + return image.getCOSObject().getDictionaryObject(COSName.FILTER).toString(); + } catch (Exception e) { + return "unknown"; + } + } + + // Get color profile information + private static String getColorProfileInfo(PDImageXObject image) { + try { + // Try to get ICC profile information + if (image.getColorSpace() != null) { + return image.getColorSpace().getName() + + "_" + + image.getColorSpace().getNumberOfComponents(); + } + return "no-profile"; + } catch (Exception e) { + return "error-profile"; + } + } + + // Determine image type from stream data + private static String getImageType(PDImageXObject image) { + try { + String filter = getImageFilter(image); + if (filter.contains("DCTDecode") || filter.contains("JPXDecode")) { + return "JPEG"; + } else if (filter.contains("FlateDecode")) { + return "PNG"; + } else if (filter.contains("CCITTFaxDecode")) { + return "TIFF"; + } else if (filter.contains("JBIG2Decode")) { + return "JBIG2"; + } else { + return "RAW"; } } catch (Exception e) { - ExceptionUtils.logException("image hash generation", e); - return "fallback-" + System.identityHashCode(image); + return "unknown"; } } - private String bytesToHexString(byte[] bytes) { - StringBuilder sb = new StringBuilder(); - for (byte b : bytes) { - sb.append(String.format("%02x", b)); + // Generate hash of decode parameters + private static String generateDecodeParamsHash(PDImageXObject image) { + try { + // Get decode parameters that affect how the image is processed + StringBuilder params = new StringBuilder(); + + // Add filter information + params.append(getImageFilter(image)); + + // Add color space components + params.append("_").append(image.getColorSpace().getNumberOfComponents()); + + // Add bits per component + params.append("_").append(image.getBitsPerComponent()); + + // Add any decode array parameters + if (image.getDecode() != null) { + params.append("_").append(image.getDecode().toString()); + } + + return bytesToHexString(generateMD5(params.toString().getBytes())); + } catch (Exception e) { + return "fallback-decode-" + System.identityHashCode(image); } - return sb.toString(); } - private byte[] generateMD5(byte[] data) throws IOException { + private static byte[] generateMD5(byte[] data) { try { MessageDigest md = MessageDigest.getInstance("MD5"); return md.digest(data); // Get the MD5 hash of the image bytes @@ -626,36 +746,181 @@ public class CompressController { } } - // Scale factors for different optimization levels - private double getScaleFactorForLevel(int optimizeLevel) { + private static class ImageIdentity { + private final String pixelHash; // Hash of pixel data + private final String colorSpace; // e.g., "DeviceRGB", "DeviceCMYK" + private final int bitsPerComponent; + private final boolean hasMask; // Has transparency + private final String maskHash; // Hash of mask data if present + private final int width; // Image width in pixels + private final int height; // Image height in pixels + private final String filter; // Image filter/compression type + private final String colorProfile; // Color profile information + private final long streamLength; // Original stream length + private final String imageType; // Image type (JPEG, PNG, etc.) + private final String decodeParams; // Decode parameters hash + private final String metadataHash; // Hash of image metadata + + public ImageIdentity(PDImageXObject image) throws IOException { + this.pixelHash = generateImageHash(image); + this.colorSpace = image.getColorSpace().getName(); + this.bitsPerComponent = image.getBitsPerComponent(); + this.hasMask = image.getMask() != null || image.getSoftMask() != null; + this.maskHash = hasMask ? generateMaskHash(image) : null; + this.width = image.getWidth(); + this.height = image.getHeight(); + this.filter = getImageFilter(image); + this.colorProfile = getColorProfileInfo(image); + this.streamLength = image.getCOSObject().getLength(); + this.imageType = getImageType(image); + this.decodeParams = generateDecodeParamsHash(image); + this.metadataHash = this.generateMetadataHash(image); + } + + // Generate hash of image metadata + private String generateMetadataHash(PDImageXObject image) { + try { + StringBuilder metadata = new StringBuilder(); + + // Add image dimensions + metadata.append(image.getWidth()).append("x").append(image.getHeight()); + + // Add color space info + metadata.append("_").append(image.getColorSpace().getName()); + + // Add bits per component + metadata.append("_").append(image.getBitsPerComponent()); + + // Add stream length + metadata.append("_").append(image.getCOSObject().getLength()); + + // Add mask information + if (image.getMask() != null) { + metadata.append("_mask"); + } + if (image.getSoftMask() != null) { + metadata.append("_softmask"); + } + + return bytesToHexString(generateMD5(metadata.toString().getBytes())); + } catch (Exception e) { + return "fallback-meta-" + System.identityHashCode(image); + } + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof ImageIdentity that)) return false; + return bitsPerComponent == that.bitsPerComponent + && hasMask == that.hasMask + && width == that.width + && height == that.height + && streamLength == that.streamLength + && Objects.equals(pixelHash, that.pixelHash) + && Objects.equals(colorSpace, that.colorSpace) + && Objects.equals(maskHash, that.maskHash) + && Objects.equals(filter, that.filter) + && Objects.equals(colorProfile, that.colorProfile) + && Objects.equals(imageType, that.imageType) + && Objects.equals(decodeParams, that.decodeParams) + && Objects.equals(metadataHash, that.metadataHash); + } + + @Override + public int hashCode() { + return Objects.hash( + pixelHash, + colorSpace, + bitsPerComponent, + hasMask, + maskHash, + width, + height, + filter, + colorProfile, + streamLength, + imageType, + decodeParams, + metadataHash); + } + + @Override + public String toString() { + return String.format( + "%s_%s_%d_%dx%d_%s_%s_%d_%s_%s_%s", + pixelHash.substring(0, Math.min(8, pixelHash.length())), + colorSpace, + bitsPerComponent, + width, + height, + filter, + imageType, + streamLength, + hasMask ? "masked" : "nomask", + decodeParams.substring(0, Math.min(4, decodeParams.length())), + metadataHash.substring(0, Math.min(4, metadataHash.length()))); + } + } + + // Scale factors for different optimization levels (lower => smaller) + private static double getScaleFactorForLevel(int optimizeLevel) { return switch (optimizeLevel) { - case 3 -> 0.85; - case 4 -> 0.75; - case 5 -> 0.65; - case 6 -> 0.55; - case 7 -> 0.45; - case 8 -> 0.35; - case 9 -> 0.25; - case 10 -> 0.15; + case 1 -> 0.98; // negligible resizing + case 2 -> 0.95; + case 3 -> 0.88; + case 4 -> 0.78; + case 5 -> 0.68; + case 6 -> 0.58; + case 7 -> 0.48; + case 8 -> 0.38; + case 9 -> 0.28; default -> 1.0; }; } - // JPEG quality for different optimization levels - private float getJpegQualityForLevel(int optimizeLevel) { + // JPEG quality for different optimization levels (lower => smaller) + private static float getJpegQualityForLevel(int optimizeLevel) { return switch (optimizeLevel) { + case 1 -> 0.92f; // very light + case 2 -> 0.88f; case 3 -> 0.85f; case 4 -> 0.80f; - case 5 -> 0.75f; - case 6 -> 0.70f; - case 7 -> 0.60f; - case 8 -> 0.50f; - case 9 -> 0.35f; - case 10 -> 0.2f; - default -> 0.7f; + case 5 -> 0.72f; + case 6 -> 0.65f; + case 7 -> 0.55f; + case 8 -> 0.45f; + case 9 -> 0.35f; // aggressive + default -> 0.75f; }; } + // Pick optimization level based on target size + private static int determineOptimizeLevel(double sizeReductionRatio) { + if (sizeReductionRatio > 0.9) return 1; + if (sizeReductionRatio > 0.8) return 2; + if (sizeReductionRatio > 0.7) return 3; + if (sizeReductionRatio > 0.6) return 4; + if (sizeReductionRatio > 0.3) return 5; + if (sizeReductionRatio > 0.2) return 6; + if (sizeReductionRatio > 0.15) return 7; + if (sizeReductionRatio > 0.1) return 8; + return 9; + } + + // Increment optimization level if we need more compression + private static int incrementOptimizeLevel(int currentLevel, long currentSize, long targetSize) { + double currentRatio = currentSize / (double) targetSize; + log.info("Current compression ratio: {}", String.format("%.2f", currentRatio)); + + if (currentRatio > 2.0) { + return Math.min(9, currentLevel + 3); + } else if (currentRatio > 1.5) { + return Math.min(9, currentLevel + 2); + } + return Math.min(9, currentLevel + 1); + } + @PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE, value = "/compress-pdf") @Operation( summary = "Optimize PDF file", @@ -705,65 +970,56 @@ public class CompressController { while (!sizeMet && optimizeLevel <= 9) { // Apply external compression first - if (!externalCompressionApplied) { - boolean ghostscriptSuccess = false; + boolean ghostscriptSuccess = false; - // Try Ghostscript first if available - for ANY compression level - if (isGhostscriptEnabled()) { - try { - applyGhostscriptCompression(request, optimizeLevel, currentFile); - log.info("Ghostscript compression applied successfully"); - ghostscriptSuccess = true; - } catch (IOException e) { - log.warn("Ghostscript compression failed, trying fallback methods"); - } + if (isGhostscriptEnabled() && optimizeLevel >= 6) { + try { + applyGhostscriptCompression(request, optimizeLevel, currentFile); + log.info("Ghostscript compression applied successfully"); + ghostscriptSuccess = true; + } catch (IOException e) { + log.warn("Ghostscript compression failed, continuing with other methods"); } + } - // Fallback to QPDF if Ghostscript failed or not available (levels 1-3 only) - if (!ghostscriptSuccess && isQpdfEnabled() && optimizeLevel <= 3) { - try { - applyQpdfCompression(request, optimizeLevel, currentFile); - log.info("QPDF compression applied successfully"); - } catch (IOException e) { - log.warn("QPDF compression also failed"); - } + // Always apply QPDF when enabled to recompress/optimize structure + if (isQpdfEnabled()) { + try { + applyQpdfCompression(request, optimizeLevel, currentFile); + log.info("QPDF compression applied successfully"); + } catch (IOException e) { + log.warn("QPDF compression failed"); } + } else if (!ghostscriptSuccess) { + log.info( + "No external compression tools available, using image compression" + + " only"); + } - if (!ghostscriptSuccess && !isQpdfEnabled()) { - log.info( - "No external compression tools available, using image compression" - + " only"); - } + externalCompressionApplied = true; - externalCompressionApplied = true; - - // Skip image compression if Ghostscript succeeded - if (ghostscriptSuccess) { - imageCompressionApplied = true; - } + // Skip image compression if Ghostscript succeeded + if (ghostscriptSuccess) { + imageCompressionApplied = true; } // Apply image compression for levels 4+ only if Ghostscript didn't run if ((optimizeLevel >= 4 || Boolean.TRUE.equals(convertToGrayscale)) && !imageCompressionApplied) { // Use different scale factors based on level - double scaleFactor = - switch (optimizeLevel) { - case 4 -> 0.95; // 95% of original size - case 5 -> 0.9; // 90% of original size - case 6 -> 0.8; // 80% of original size - case 7 -> 0.7; // 70% of original size - case 8 -> 0.65; // 65% of original size - case 9 -> 0.5; // 50% of original size - default -> 1.0; - }; + double scaleFactor = getScaleFactorForLevel(optimizeLevel); + // Use JPEG quality settings based on optimization level + float jpegQuality = getJpegQualityForLevel(optimizeLevel); - log.info("Applying image compression with scale factor: {}", scaleFactor); + log.info( + "Applying image compression with scale factor: {} and JPEG quality: {}", + scaleFactor, + jpegQuality); TempFile compressedImageFile = compressImagesInPDF( currentFile, scaleFactor, - 0.7f, // Default JPEG quality + jpegQuality, Boolean.TRUE.equals(convertToGrayscale)); tempFiles.add(compressedImageFile); @@ -793,7 +1049,11 @@ public class CompressController { } } - // Use original if optimized file is somehow larger + // Use original if optimized file is somehow larger or currentFile is invalid + if (!Files.exists(currentFile)) { + log.warn("Optimized file missing or invalid. Using the original file instead."); + currentFile = originalFile; + } long finalFileSize = Files.size(currentFile); if (finalFileSize >= inputFileSize) { log.warn( @@ -840,6 +1100,12 @@ public class CompressController { command.add("-dQUIET"); command.add("-dBATCH"); + // General compression enhancements + command.add("-dDetectDuplicateImages=true"); + command.add("-dDownsampleColorImages=true"); + command.add("-dCompressFonts=true"); + command.add("-dSubsetFonts=true"); + // Map optimization levels to Ghostscript settings switch (optimizeLevel) { case 1: @@ -865,9 +1131,16 @@ public class CompressController { case 8: case 9: command.add("-dPDFSETTINGS=/screen"); - command.add("-dColorImageResolution=100"); - command.add("-dGrayImageResolution=100"); - command.add("-dMonoImageResolution=200"); + // Use stronger downsampling at the highest level + if (optimizeLevel == 9) { + command.add("-dColorImageResolution=72"); + command.add("-dGrayImageResolution=72"); + command.add("-dMonoImageResolution=150"); + } else { + command.add("-dColorImageResolution=100"); + command.add("-dGrayImageResolution=100"); + command.add("-dMonoImageResolution=200"); + } break; case 10: command.add("-dPDFSETTINGS=/screen"); @@ -880,6 +1153,19 @@ public class CompressController { break; } + // If grayscale conversion requested, enforce grayscale color processing in Ghostscript + boolean grayscaleRequested = Boolean.TRUE.equals(request.getGrayscale()); + if (grayscaleRequested) { + command.add("-dColorConversionStrategy=/Gray"); + command.add("-dProcessColorModel=/DeviceGray"); + } + + // Optional conversion: CMYK -> RGB for color output only (avoid conflict with + // grayscale) + if (optimizeLevel >= 7 && !grayscaleRequested) { + command.add("-dConvertCMYKImagesToRGB=true"); + } + command.add("-sOutputFile=" + gsOutputPath.toString()); command.add(currentFile.toString()); @@ -906,6 +1192,14 @@ public class CompressController { throw new IOException("Ghostscript compression failed"); } + // replace the existing catch with these two catches + } catch (InterruptedException e) { + // restore interrupted status and propagate as an IOException + Thread.currentThread().interrupt(); + InterruptedIOException ie = + new InterruptedIOException("Ghostscript command interrupted"); + ie.initCause(e); + throw ie; } catch (Exception e) { log.warn("Ghostscript compression failed, will fallback to other methods", e); throw new IOException("Ghostscript compression failed", e); @@ -921,14 +1215,13 @@ public class CompressController { log.info("Pre-QPDF file size: {}", GeneralUtils.formatBytes(preQpdfSize)); // Map optimization levels to QPDF compression levels - int qpdfCompressionLevel; - if (optimizeLevel == 1) { - qpdfCompressionLevel = 5; - } else if (optimizeLevel == 2) { - qpdfCompressionLevel = 9; - } else { - qpdfCompressionLevel = 9; - } + int qpdfCompressionLevel = + switch (optimizeLevel) { + case 1 -> 3; // faster, lighter + case 2 -> 5; + case 3, 4, 5 -> 7; + default -> 9; // 6-9 use max + }; try (TempFile qpdfOutputFile = tempFileManager.createManagedTempFile(".pdf")) { Path qpdfOutputPath = qpdfOutputFile.getPath(); @@ -936,24 +1229,59 @@ public class CompressController { // Build QPDF command List command = new ArrayList<>(); command.add("qpdf"); - if (request.getNormalize()) { + if (Boolean.TRUE.equals(request.getNormalize())) { command.add("--normalize-content=y"); } - if (request.getLinearize()) { + if (Boolean.TRUE.equals(request.getLinearize())) { command.add("--linearize"); } + // Decode/encode settings for maximal recompression + command.add("--decode-level=generalized"); command.add("--recompress-flate"); command.add("--compression-level=" + qpdfCompressionLevel); command.add("--compress-streams=y"); + command.add("--stream-data=compress"); + // Preserve unreferenced only at lower levels for safety; skip at highest levels for + // size + if (optimizeLevel <= 3) { + command.add("--preserve-unreferenced"); + } + // Optional image optimization in qpdf (no resampling; lossy JPEG when beneficial) + // Enable qpdf image optimization at medium/high levels + if (optimizeLevel >= 5) { + command.add("--optimize-images"); + // Map optimize level to JPEG quality (lower number => smaller size) + Integer jpegQuality = + switch (optimizeLevel) { + case 5 -> 78; + case 6 -> 68; + case 7 -> 58; + case 8 -> 46; + default -> 34; // 9+ + }; + command.add("--jpeg-quality=" + jpegQuality); + } command.add("--object-streams=generate"); command.add(currentFile.toString()); command.add(qpdfOutputPath.toString()); ProcessExecutorResult returnCode = null; try { + // On high levels, prefer zopfli if platform supports env wrapper + if (optimizeLevel >= 8) { + String os = System.getProperty("os.name").toLowerCase(Locale.ROOT); + if (!os.contains("win")) { + // Prepend env QPDF_ZOPFLI=silent for Unix-like systems + List zopfliCommand = new ArrayList<>(); + zopfliCommand.add("env"); + zopfliCommand.add("QPDF_ZOPFLI=silent"); + zopfliCommand.addAll(command); + command = zopfliCommand; + } + } returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF) - .runCommandWithOutputHandling(command); + .runCommandWithOutputHandling(command, null); // Update current file to the QPDF output Files.copy(qpdfOutputPath, currentFile, StandardCopyOption.REPLACE_EXISTING); @@ -965,39 +1293,19 @@ public class CompressController { GeneralUtils.formatBytes(postQpdfSize), String.format("%.1f", qpdfReduction)); - } catch (Exception e) { + } catch (IOException e) { if (returnCode != null && returnCode.getRc() != 3) { throw new IOException("QPDF command failed", e); } // If QPDF fails, keep using the current file log.warn("QPDF compression failed, continuing with current file", e); + } catch (InterruptedException e) { + // restore interrupted status and propagate as an IOException + Thread.currentThread().interrupt(); + InterruptedIOException ie = new InterruptedIOException("QPDF command interrupted"); + ie.initCause(e); + throw ie; } } } - - // Pick optimization level based on target size - private int determineOptimizeLevel(double sizeReductionRatio) { - if (sizeReductionRatio > 0.9) return 1; - if (sizeReductionRatio > 0.8) return 2; - if (sizeReductionRatio > 0.7) return 3; - if (sizeReductionRatio > 0.6) return 4; - if (sizeReductionRatio > 0.3) return 5; - if (sizeReductionRatio > 0.2) return 6; - if (sizeReductionRatio > 0.15) return 7; - if (sizeReductionRatio > 0.1) return 8; - return 9; - } - - // Increment optimization level if we need more compression - private int incrementOptimizeLevel(int currentLevel, long currentSize, long targetSize) { - double currentRatio = currentSize / (double) targetSize; - log.info("Current compression ratio: {}", String.format("%.2f", currentRatio)); - - if (currentRatio > 2.0) { - return Math.min(9, currentLevel + 3); - } else if (currentRatio > 1.5) { - return Math.min(9, currentLevel + 2); - } - return Math.min(9, currentLevel + 1); - } }