cleanups and fix for #3207

This commit is contained in:
Anthony Stirling 2025-03-20 00:06:10 +00:00
parent 52d4adc473
commit f97f1d792d
8 changed files with 478 additions and 403 deletions

View File

@ -1,7 +1,6 @@
package stirling.software.SPDF.config; package stirling.software.SPDF.config;
import java.lang.reflect.Method; import java.lang.reflect.Method;
import java.util.Collections;
import java.util.HashSet; import java.util.HashSet;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
@ -43,51 +42,39 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
private void discoverEndpoints() { private void discoverEndpoints() {
try { try {
// Get all request mapping beans from the application context
Map<String, RequestMappingHandlerMapping> mappings = Map<String, RequestMappingHandlerMapping> mappings =
applicationContext.getBeansOfType(RequestMappingHandlerMapping.class); applicationContext.getBeansOfType(RequestMappingHandlerMapping.class);
// Process each mapping bean
for (Map.Entry<String, RequestMappingHandlerMapping> entry : mappings.entrySet()) { for (Map.Entry<String, RequestMappingHandlerMapping> entry : mappings.entrySet()) {
RequestMappingHandlerMapping mapping = entry.getValue(); RequestMappingHandlerMapping mapping = entry.getValue();
// Get all handler methods registered in this mapping
Map<RequestMappingInfo, HandlerMethod> handlerMethods = mapping.getHandlerMethods(); Map<RequestMappingInfo, HandlerMethod> handlerMethods = mapping.getHandlerMethods();
// Process each handler method
for (Map.Entry<RequestMappingInfo, HandlerMethod> handlerEntry : for (Map.Entry<RequestMappingInfo, HandlerMethod> handlerEntry :
handlerMethods.entrySet()) { handlerMethods.entrySet()) {
RequestMappingInfo mappingInfo = handlerEntry.getKey(); RequestMappingInfo mappingInfo = handlerEntry.getKey();
HandlerMethod handlerMethod = handlerEntry.getValue(); HandlerMethod handlerMethod = handlerEntry.getValue();
// Check if the method handles GET requests
boolean isGetHandler = false; boolean isGetHandler = false;
try { try {
Set<RequestMethod> methods = mappingInfo.getMethodsCondition().getMethods(); Set<RequestMethod> methods = mappingInfo.getMethodsCondition().getMethods();
// Either explicitly handles GET or handles all methods (empty set)
isGetHandler = methods.isEmpty() || methods.contains(RequestMethod.GET); isGetHandler = methods.isEmpty() || methods.contains(RequestMethod.GET);
} catch (Exception e) { } catch (Exception e) {
// If we can't determine methods, assume it could handle GET
isGetHandler = true; isGetHandler = true;
} }
if (isGetHandler) { if (isGetHandler) {
// Since we know getDirectPaths works, use it directly
Set<String> patterns = extractPatternsUsingDirectPaths(mappingInfo); Set<String> patterns = extractPatternsUsingDirectPaths(mappingInfo);
// If that fails, try string parsing as fallback
if (patterns.isEmpty()) { if (patterns.isEmpty()) {
patterns = extractPatternsFromString(mappingInfo); patterns = extractPatternsFromString(mappingInfo);
} }
// Add all valid patterns
validGetEndpoints.addAll(patterns); validGetEndpoints.addAll(patterns);
} }
} }
} }
if (validGetEndpoints.isEmpty()) { if (validGetEndpoints.isEmpty()) {
// If we still couldn't find any endpoints, add some common ones as a fallback
logger.warn("No endpoints discovered. Adding common endpoints as fallback."); logger.warn("No endpoints discovered. Adding common endpoints as fallback.");
validGetEndpoints.add("/"); validGetEndpoints.add("/");
validGetEndpoints.add("/api/**"); validGetEndpoints.add("/api/**");
@ -98,9 +85,6 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
} }
} }
/**
* Extract patterns using the getDirectPaths method that works in this environment
*/
private Set<String> extractPatternsUsingDirectPaths(RequestMappingInfo mappingInfo) { private Set<String> extractPatternsUsingDirectPaths(RequestMappingInfo mappingInfo) {
Set<String> patterns = new HashSet<>(); Set<String> patterns = new HashSet<>();
@ -113,7 +97,7 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
patterns.addAll(resultSet); patterns.addAll(resultSet);
} }
} catch (Exception e) { } catch (Exception e) {
// Just return empty set if method not found or fails // Return empty set if method not found or fails
} }
return patterns; return patterns;
@ -125,9 +109,7 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
String infoString = mappingInfo.toString(); String infoString = mappingInfo.toString();
if (infoString.contains("{")) { if (infoString.contains("{")) {
String patternsSection = String patternsSection =
infoString.substring( infoString.substring(infoString.indexOf("{") + 1, infoString.indexOf("}"));
infoString.indexOf("{") + 1,
infoString.indexOf("}"));
for (String pattern : patternsSection.split(",")) { for (String pattern : patternsSection.split(",")) {
pattern = pattern.trim(); pattern = pattern.trim();
@ -137,39 +119,38 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
} }
} }
} catch (Exception e) { } catch (Exception e) {
// Just return empty set if parsing fails // Return empty set if parsing fails
} }
return patterns; return patterns;
} }
/**
* Check if a URI corresponds to a valid GET endpoint - Fixed to handle path variables safely
*/
public boolean isValidGetEndpoint(String uri) { public boolean isValidGetEndpoint(String uri) {
// Ensure endpoints are discovered
if (!endpointsDiscovered) { if (!endpointsDiscovered) {
discoverEndpoints(); discoverEndpoints();
endpointsDiscovered = true; endpointsDiscovered = true;
} }
// If no endpoints were discovered, assume all endpoints are valid
if (validGetEndpoints.isEmpty()) {
logger.warn("No valid endpoints were discovered. Assuming all GET endpoints are valid.");
return true;
}
// Direct match
if (validGetEndpoints.contains(uri)) { if (validGetEndpoints.contains(uri)) {
return true; return true;
} }
// Try simple prefix matching for wildcards and path variables if (matchesWildcardOrPathVariable(uri)) {
return true;
}
if (matchesPathSegments(uri)) {
return true;
}
return false;
}
private boolean matchesWildcardOrPathVariable(String uri) {
for (String pattern : validGetEndpoints) { for (String pattern : validGetEndpoints) {
if (pattern.contains("*") || pattern.contains("{")) { if (pattern.contains("*") || pattern.contains("{")) {
int wildcardIndex = pattern.indexOf('*'); int wildcardIndex = pattern.indexOf('*');
int variableIndex = pattern.indexOf('{'); int variableIndex = pattern.indexOf('{');
// Find the earliest special character
int cutoffIndex; int cutoffIndex;
if (wildcardIndex < 0) { if (wildcardIndex < 0) {
cutoffIndex = variableIndex; cutoffIndex = variableIndex;
@ -179,29 +160,26 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
cutoffIndex = Math.min(wildcardIndex, variableIndex); cutoffIndex = Math.min(wildcardIndex, variableIndex);
} }
// Get the static part of the pattern
String staticPrefix = pattern.substring(0, cutoffIndex); String staticPrefix = pattern.substring(0, cutoffIndex);
// If the URI starts with this prefix, consider it a match
if (uri.startsWith(staticPrefix)) { if (uri.startsWith(staticPrefix)) {
return true; return true;
} }
} }
} }
return false;
}
// For patterns without wildcards or variables, try path-segment-by-segment matching private boolean matchesPathSegments(String uri) {
for (String pattern : validGetEndpoints) { for (String pattern : validGetEndpoints) {
if (!pattern.contains("*") && !pattern.contains("{")) { if (!pattern.contains("*") && !pattern.contains("{")) {
// Split the pattern and URI into path segments
String[] patternSegments = pattern.split("/"); String[] patternSegments = pattern.split("/");
String[] uriSegments = uri.split("/"); String[] uriSegments = uri.split("/");
// If URI has fewer segments than the pattern, it can't match
if (uriSegments.length < patternSegments.length) { if (uriSegments.length < patternSegments.length) {
continue; continue;
} }
// Check each segment
boolean match = true; boolean match = true;
for (int i = 0; i < patternSegments.length; i++) { for (int i = 0; i < patternSegments.length; i++) {
if (!patternSegments[i].equals(uriSegments[i])) { if (!patternSegments[i].equals(uriSegments[i])) {
@ -215,14 +193,10 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
} }
} }
} }
// If no match was found, the URI is not valid
return false; return false;
} }
/** Get all discovered valid GET endpoints */
public Set<String> getValidGetEndpoints() { public Set<String> getValidGetEndpoints() {
// Ensure endpoints are discovered
if (!endpointsDiscovered) { if (!endpointsDiscovered) {
discoverEndpoints(); discoverEndpoints();
endpointsDiscovered = true; endpointsDiscovered = true;
@ -230,7 +204,6 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
return new HashSet<>(validGetEndpoints); return new HashSet<>(validGetEndpoints);
} }
//For debugging when needed
private void logAllEndpoints() { private void logAllEndpoints() {
Set<String> sortedEndpoints = new TreeSet<>(validGetEndpoints); Set<String> sortedEndpoints = new TreeSet<>(validGetEndpoints);
@ -239,7 +212,5 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
logger.info("Endpoint: {}", endpoint); logger.info("Endpoint: {}", endpoint);
} }
logger.info("=== END: All discovered GET endpoints ==="); logger.info("=== END: All discovered GET endpoints ===");
} }
} }

View File

@ -25,24 +25,13 @@ import javax.imageio.ImageWriter;
import javax.imageio.plugins.jpeg.JPEGImageWriteParam; import javax.imageio.plugins.jpeg.JPEGImageWriteParam;
import javax.imageio.stream.ImageOutputStream; import javax.imageio.stream.ImageOutputStream;
import org.apache.pdfbox.contentstream.PDFStreamEngine;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.graphics.PDXObject; import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDInlineImage;
import org.apache.pdfbox.pdmodel.graphics.pattern.PDAbstractPattern;
import org.apache.pdfbox.pdmodel.graphics.pattern.PDTilingPattern;
import org.apache.pdfbox.pdmodel.graphics.shading.PDShading;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity; import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute; import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.PostMapping;
@ -58,6 +47,7 @@ import lombok.AllArgsConstructor;
import lombok.Data; import lombok.Data;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.config.EndpointConfiguration; import stirling.software.SPDF.config.EndpointConfiguration;
import stirling.software.SPDF.model.api.misc.OptimizePdfRequest; import stirling.software.SPDF.model.api.misc.OptimizePdfRequest;
import stirling.software.SPDF.service.CustomPDFDocumentFactory; import stirling.software.SPDF.service.CustomPDFDocumentFactory;
@ -76,12 +66,13 @@ public class CompressController {
private final CustomPDFDocumentFactory pdfDocumentFactory; private final CustomPDFDocumentFactory pdfDocumentFactory;
private final boolean qpdfEnabled; private final boolean qpdfEnabled;
public CompressController(CustomPDFDocumentFactory pdfDocumentFactory, EndpointConfiguration endpointConfiguration) { public CompressController(
CustomPDFDocumentFactory pdfDocumentFactory,
EndpointConfiguration endpointConfiguration) {
this.pdfDocumentFactory = pdfDocumentFactory; this.pdfDocumentFactory = pdfDocumentFactory;
this.qpdfEnabled = endpointConfiguration.isGroupEnabled("qpdf"); this.qpdfEnabled = endpointConfiguration.isGroupEnabled("qpdf");
} }
@Data @Data
@AllArgsConstructor @AllArgsConstructor
@NoArgsConstructor @NoArgsConstructor
@ -89,6 +80,7 @@ public class CompressController {
int pageNum; // Page number where the image appears int pageNum; // Page number where the image appears
COSName name; // The name used to reference this image COSName name; // The name used to reference this image
} }
@Data @Data
@AllArgsConstructor @AllArgsConstructor
@NoArgsConstructor @NoArgsConstructor
@ -97,6 +89,16 @@ public class CompressController {
COSName imageName; // Name of the image within the form COSName imageName; // Name of the image within the form
} }
// Image compression statistics for reporting
private static class CompressionStats {
int totalImages = 0;
int nestedImages = 0;
int uniqueImagesCount = 0;
int compressedImages = 0;
int skippedImages = 0;
long totalOriginalBytes = 0;
long totalCompressedBytes = 0;
}
public Path compressImagesInPDF( public Path compressImagesInPDF(
Path pdfFile, double scaleFactor, float jpegQuality, boolean convertToGrayscale) Path pdfFile, double scaleFactor, float jpegQuality, boolean convertToGrayscale)
@ -111,228 +113,24 @@ public class CompressController {
GeneralUtils.formatBytes(originalFileSize)); GeneralUtils.formatBytes(originalFileSize));
try (PDDocument doc = pdfDocumentFactory.load(pdfFile)) { try (PDDocument doc = pdfDocumentFactory.load(pdfFile)) {
// Collect all unique images by content hash // Step 1: Find all unique images in the document
Map<String, List<ImageReference>> uniqueImages = new HashMap<>(); Map<String, List<ImageReference>> uniqueImages = findImages(doc);
Map<String, PDImageXObject> compressedVersions = new HashMap<>();
int totalImages = 0; // Get statistics
int nestedImages = 0; CompressionStats stats = new CompressionStats();
stats.uniqueImagesCount = uniqueImages.size();
calculateImageStats(uniqueImages, stats);
// FIRST PASS: Collect all images (direct and nested) // Step 2: Create compressed versions of unique images
for (int pageNum = 0; pageNum < doc.getNumberOfPages(); pageNum++) { Map<String, PDImageXObject> compressedVersions =
PDPage page = doc.getPage(pageNum); createCompressedImages(
PDResources res = page.getResources(); doc, uniqueImages, scaleFactor, jpegQuality, convertToGrayscale, stats);
if (res == null || res.getXObjectNames() == null) continue;
// Process direct XObjects on page // Step 3: Replace all instances with compressed versions
for (COSName name : res.getXObjectNames()) { replaceImages(doc, uniqueImages, compressedVersions, stats);
PDXObject xobj = res.getXObject(name);
// Direct image
if (xobj instanceof PDImageXObject) {
totalImages++;
PDImageXObject image = (PDImageXObject) xobj;
String imageHash = generateImageHash(image);
ImageReference ref = new ImageReference();
ref.pageNum = pageNum;
ref.name = name;
log.info("Found direct image '{}' on page {} - {}x{}",
name.getName(), pageNum + 1, image.getWidth(), image.getHeight());
uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(ref);
}
// Form XObject may contain nested images
else if (xobj instanceof org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject) {
org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject formXObj =
(org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject) xobj;
PDResources formResources = formXObj.getResources();
if (formResources != null && formResources.getXObjectNames() != null) {
// Process nested XObjects within the form
log.info("Checking form XObject '{}' on page {} for nested images",
name.getName(), pageNum + 1);
for (COSName nestedName : formResources.getXObjectNames()) {
PDXObject nestedXobj = formResources.getXObject(nestedName);
if (nestedXobj instanceof PDImageXObject) {
nestedImages++;
totalImages++;
PDImageXObject nestedImage = (PDImageXObject) nestedXobj;
log.info("Found nested image '{}' in form '{}' on page {} - {}x{}",
nestedName.getName(), name.getName(), pageNum + 1,
nestedImage.getWidth(), nestedImage.getHeight());
// Create a specialized reference for the nested image
NestedImageReference nestedRef = new NestedImageReference();
nestedRef.pageNum = pageNum;
nestedRef.formName = name;
nestedRef.imageName = nestedName;
String imageHash = generateImageHash(nestedImage);
uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(nestedRef);
}
}
}
}
}
}
int uniqueImagesCount = uniqueImages.size();
int duplicatedImages = totalImages - uniqueImagesCount;
log.info(
"Found {} unique images and {} duplicated instances across {} pages ({} nested images in form XObjects)",
uniqueImagesCount,
duplicatedImages,
doc.getNumberOfPages(),
nestedImages);
// SECOND PASS: Process each unique image exactly once
int compressedImages = 0;
int skippedImages = 0;
long totalOriginalBytes = 0;
long totalCompressedBytes = 0;
for (Entry<String, List<ImageReference>> entry : uniqueImages.entrySet()) {
String imageHash = entry.getKey();
List<ImageReference> references = entry.getValue();
if (references.isEmpty()) continue;
// Get the first instance of this image
ImageReference firstRef = references.get(0);
PDImageXObject originalImage;
// Handle differently based on whether it's a direct or nested image
if (firstRef instanceof NestedImageReference) {
// Get the nested image from within a form XObject
NestedImageReference nestedRef = (NestedImageReference) firstRef;
PDPage firstPage = doc.getPage(nestedRef.pageNum);
PDResources pageResources = firstPage.getResources();
// Get the form XObject
org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject formXObj =
(org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject) pageResources.getXObject(nestedRef.formName);
// Get the nested image from the form's resources
PDResources formResources = formXObj.getResources();
originalImage = (PDImageXObject) formResources.getXObject(nestedRef.imageName);
log.info("Processing nested image '{}' from form '{}'",
nestedRef.imageName.getName(), nestedRef.formName.getName());
} else {
// Get direct image from page resources
PDPage firstPage = doc.getPage(firstRef.pageNum);
PDResources firstPageResources = firstPage.getResources();
originalImage = (PDImageXObject) firstPageResources.getXObject(firstRef.name);
log.debug("Processing direct image '{}'", firstRef.name.getName());
}
// Track original size
int originalSize = (int) originalImage.getCOSObject().getLength();
totalOriginalBytes += originalSize;
// Process this unique image once
BufferedImage processedImage =
processAndCompressImage(
originalImage, scaleFactor, jpegQuality, convertToGrayscale);
if (processedImage != null) {
// Convert to bytes for storage
byte[] compressedData = convertToBytes(processedImage, jpegQuality);
// Check if compression is beneficial
if (compressedData.length < originalSize || convertToGrayscale) {
// Create a single compressed version
PDImageXObject compressedImage =
PDImageXObject.createFromByteArray(
doc,
compressedData,
originalImage.getCOSObject().toString());
// Store the compressed version only once in our map
compressedVersions.put(imageHash, compressedImage);
// Report compression stats
double reductionPercentage =
100.0 - ((compressedData.length * 100.0) / originalSize);
log.info(
"Image hash {}: Compressed from {} to {} (reduced by {}%)",
imageHash,
GeneralUtils.formatBytes(originalSize),
GeneralUtils.formatBytes(compressedData.length),
String.format("%.1f", reductionPercentage));
// Replace ALL instances with the compressed version
for (ImageReference ref : references) {
if (ref instanceof NestedImageReference) {
// Replace nested image within form XObject
NestedImageReference nestedRef = (NestedImageReference) ref;
PDPage page = doc.getPage(nestedRef.pageNum);
PDResources pageResources = page.getResources();
// Get the form XObject
org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject formXObj =
(org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject)
pageResources.getXObject(nestedRef.formName);
// Replace the nested image in the form's resources
PDResources formResources = formXObj.getResources();
formResources.put(nestedRef.imageName, compressedImage);
log.info(
"Replaced nested image '{}' in form '{}' on page {} with compressed version",
nestedRef.imageName.getName(),
nestedRef.formName.getName(),
nestedRef.pageNum + 1);
} else {
// Replace direct image in page resources
PDPage page = doc.getPage(ref.pageNum);
PDResources resources = page.getResources();
resources.put(ref.name, compressedImage);
log.info(
"Replaced direct image on page {} with compressed version",
ref.pageNum + 1);
}
}
totalCompressedBytes += compressedData.length * references.size();
compressedImages++;
} else {
log.info("Image hash {}: Compression not beneficial, skipping", imageHash);
totalCompressedBytes += originalSize * references.size();
skippedImages++;
}
} else {
log.info("Image hash {}: Not suitable for compression, skipping", imageHash);
totalCompressedBytes += originalSize * references.size();
skippedImages++;
}
}
// Log compression statistics // Log compression statistics
double overallImageReduction = logCompressionStats(stats, originalFileSize);
totalOriginalBytes > 0
? 100.0 - ((totalCompressedBytes * 100.0) / totalOriginalBytes)
: 0;
log.info(
"Image compression summary - Total unique: {}, Compressed: {}, Skipped: {}, Duplicates: {}, Nested: {}",
uniqueImagesCount,
compressedImages,
skippedImages,
duplicatedImages,
nestedImages);
log.info(
"Total original image size: {}, compressed: {} (reduced by {}%)",
GeneralUtils.formatBytes(totalOriginalBytes),
GeneralUtils.formatBytes(totalCompressedBytes),
String.format("%.1f", overallImageReduction));
// Free memory before saving // Free memory before saving
compressedVersions.clear(); compressedVersions.clear();
@ -354,6 +152,315 @@ public class CompressController {
} }
} }
/** Find all images in the document, both direct and nested within forms. */
private Map<String, List<ImageReference>> findImages(PDDocument doc) throws IOException {
Map<String, List<ImageReference>> uniqueImages = new HashMap<>();
// Scan through all pages in the document
for (int pageNum = 0; pageNum < doc.getNumberOfPages(); pageNum++) {
PDPage page = doc.getPage(pageNum);
PDResources res = page.getResources();
if (res == null || res.getXObjectNames() == null) continue;
// Process all XObjects on the page
for (COSName name : res.getXObjectNames()) {
PDXObject xobj = res.getXObject(name);
// Process direct image
if (isImage(xobj)) {
addDirectImage(pageNum, name, (PDImageXObject) xobj, uniqueImages);
log.info(
"Found direct image '{}' on page {} - {}x{}",
name.getName(),
pageNum + 1,
((PDImageXObject) xobj).getWidth(),
((PDImageXObject) xobj).getHeight());
}
// Process form XObject that may contain nested images
else if (isForm(xobj)) {
checkFormForImages(pageNum, name, (PDFormXObject) xobj, uniqueImages);
}
}
}
return uniqueImages;
}
private boolean isImage(PDXObject xobj) {
return xobj instanceof PDImageXObject;
}
private boolean isForm(PDXObject xobj) {
return xobj instanceof PDFormXObject;
}
private ImageReference addDirectImage(
int pageNum,
COSName name,
PDImageXObject image,
Map<String, List<ImageReference>> uniqueImages)
throws IOException {
ImageReference ref = new ImageReference();
ref.pageNum = pageNum;
ref.name = name;
String imageHash = generateImageHash(image);
uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(ref);
return ref;
}
/** Check a form XObject for nested images. */
private void checkFormForImages(
int pageNum,
COSName formName,
PDFormXObject formXObj,
Map<String, List<ImageReference>> uniqueImages)
throws IOException {
PDResources formResources = formXObj.getResources();
if (formResources == null || formResources.getXObjectNames() == null) {
return;
}
log.info(
"Checking form XObject '{}' on page {} for nested images",
formName.getName(),
pageNum + 1);
// Process all XObjects within the form
for (COSName nestedName : formResources.getXObjectNames()) {
PDXObject nestedXobj = formResources.getXObject(nestedName);
if (isImage(nestedXobj)) {
PDImageXObject nestedImage = (PDImageXObject) nestedXobj;
log.info(
"Found nested image '{}' in form '{}' on page {} - {}x{}",
nestedName.getName(),
formName.getName(),
pageNum + 1,
nestedImage.getWidth(),
nestedImage.getHeight());
// Create specialized reference for the nested image
NestedImageReference nestedRef = new NestedImageReference();
nestedRef.pageNum = pageNum;
nestedRef.formName = formName;
nestedRef.imageName = nestedName;
String imageHash = generateImageHash(nestedImage);
uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(nestedRef);
}
}
}
/** Calculate statistics about the images found in the document. */
private void calculateImageStats(
Map<String, List<ImageReference>> uniqueImages, CompressionStats stats) {
for (List<ImageReference> references : uniqueImages.values()) {
for (ImageReference ref : references) {
stats.totalImages++;
if (ref instanceof NestedImageReference) {
stats.nestedImages++;
}
}
}
}
/** Create compressed versions of all unique images. */
private Map<String, PDImageXObject> createCompressedImages(
PDDocument doc,
Map<String, List<ImageReference>> uniqueImages,
double scaleFactor,
float jpegQuality,
boolean convertToGrayscale,
CompressionStats stats)
throws IOException {
Map<String, PDImageXObject> compressedVersions = new HashMap<>();
// Process each unique image exactly once
for (Entry<String, List<ImageReference>> entry : uniqueImages.entrySet()) {
String imageHash = entry.getKey();
List<ImageReference> references = entry.getValue();
if (references.isEmpty()) continue;
// Get the first instance of this image
PDImageXObject originalImage = getOriginalImage(doc, references.get(0));
// Track original size
int originalSize = (int) originalImage.getCOSObject().getLength();
stats.totalOriginalBytes += originalSize;
// Process this unique image
PDImageXObject compressedImage =
compressImage(
doc,
originalImage,
originalSize,
scaleFactor,
jpegQuality,
convertToGrayscale);
if (compressedImage != null) {
// Store the compressed version in our map
compressedVersions.put(imageHash, compressedImage);
stats.compressedImages++;
// Update compression stats
int compressedSize = (int) compressedImage.getCOSObject().getLength();
stats.totalCompressedBytes += compressedSize * references.size();
double reductionPercentage = 100.0 - ((compressedSize * 100.0) / originalSize);
log.info(
"Image hash {}: Compressed from {} to {} (reduced by {}%)",
imageHash,
GeneralUtils.formatBytes(originalSize),
GeneralUtils.formatBytes(compressedSize),
String.format("%.1f", reductionPercentage));
} else {
log.info("Image hash {}: Not suitable for compression, skipping", imageHash);
stats.totalCompressedBytes += originalSize * references.size();
stats.skippedImages++;
}
}
return compressedVersions;
}
/** Get the original image from an image reference. */
private PDImageXObject getOriginalImage(PDDocument doc, ImageReference ref) throws IOException {
if (ref instanceof NestedImageReference) {
// Get the nested image from within a form XObject
NestedImageReference nestedRef = (NestedImageReference) ref;
PDPage page = doc.getPage(nestedRef.pageNum);
PDResources pageResources = page.getResources();
// Get the form XObject
PDFormXObject formXObj = (PDFormXObject) pageResources.getXObject(nestedRef.formName);
// Get the nested image from the form's resources
PDResources formResources = formXObj.getResources();
return (PDImageXObject) formResources.getXObject(nestedRef.imageName);
} else {
// Get direct image from page resources
PDPage page = doc.getPage(ref.pageNum);
PDResources resources = page.getResources();
return (PDImageXObject) resources.getXObject(ref.name);
}
}
/** Process an individual image and return a compressed version if beneficial. */
private PDImageXObject compressImage(
PDDocument doc,
PDImageXObject originalImage,
int originalSize,
double scaleFactor,
float jpegQuality,
boolean convertToGrayscale)
throws IOException {
// Process and compress the image
BufferedImage processedImage =
processAndCompressImage(
originalImage, scaleFactor, jpegQuality, convertToGrayscale);
if (processedImage == null) {
return null;
}
// Convert to bytes for storage
byte[] compressedData = convertToBytes(processedImage, jpegQuality);
// Check if compression is beneficial
if (compressedData.length < originalSize || convertToGrayscale) {
// Create a compressed version
return PDImageXObject.createFromByteArray(
doc, compressedData, originalImage.getCOSObject().toString());
}
return null;
}
/** Replace all instances of original images with their compressed versions. */
private void replaceImages(
PDDocument doc,
Map<String, List<ImageReference>> uniqueImages,
Map<String, PDImageXObject> compressedVersions,
CompressionStats stats)
throws IOException {
for (Entry<String, List<ImageReference>> entry : uniqueImages.entrySet()) {
String imageHash = entry.getKey();
List<ImageReference> references = entry.getValue();
// Skip if no compressed version exists
PDImageXObject compressedImage = compressedVersions.get(imageHash);
if (compressedImage == null) continue;
// Replace ALL instances with the compressed version
for (ImageReference ref : references) {
replaceImageReference(doc, ref, compressedImage);
}
}
}
/** Replace a specific image reference with a compressed version. */
private void replaceImageReference(
PDDocument doc, ImageReference ref, PDImageXObject compressedImage) throws IOException {
if (ref instanceof NestedImageReference) {
// Replace nested image within form XObject
NestedImageReference nestedRef = (NestedImageReference) ref;
PDPage page = doc.getPage(nestedRef.pageNum);
PDResources pageResources = page.getResources();
// Get the form XObject
PDFormXObject formXObj = (PDFormXObject) pageResources.getXObject(nestedRef.formName);
// Replace the nested image in the form's resources
PDResources formResources = formXObj.getResources();
formResources.put(nestedRef.imageName, compressedImage);
log.info(
"Replaced nested image '{}' in form '{}' on page {} with compressed version",
nestedRef.imageName.getName(),
nestedRef.formName.getName(),
nestedRef.pageNum + 1);
} else {
// Replace direct image in page resources
PDPage page = doc.getPage(ref.pageNum);
PDResources resources = page.getResources();
resources.put(ref.name, compressedImage);
log.info("Replaced direct image on page {} with compressed version", ref.pageNum + 1);
}
}
/** Log compression statistics. */
private void logCompressionStats(CompressionStats stats, long originalFileSize) {
// Calculate image reduction percentage
double overallImageReduction =
stats.totalOriginalBytes > 0
? 100.0 - ((stats.totalCompressedBytes * 100.0) / stats.totalOriginalBytes)
: 0;
int duplicatedImages = stats.totalImages - stats.uniqueImagesCount;
log.info(
"Image compression summary - Total unique: {}, Compressed: {}, Skipped: {}, Duplicates: {}, Nested: {}",
stats.uniqueImagesCount,
stats.compressedImages,
stats.skippedImages,
duplicatedImages,
stats.nestedImages);
log.info(
"Total original image size: {}, compressed: {} (reduced by {}%)",
GeneralUtils.formatBytes(stats.totalOriginalBytes),
GeneralUtils.formatBytes(stats.totalCompressedBytes),
String.format("%.1f", overallImageReduction));
}
private BufferedImage convertToGrayscale(BufferedImage image) { private BufferedImage convertToGrayscale(BufferedImage image) {
BufferedImage grayImage = BufferedImage grayImage =
new BufferedImage( new BufferedImage(
@ -523,23 +630,6 @@ public class CompressController {
} }
} }
private byte[] generateImageMD5(PDImageXObject image) throws IOException {
return generatMD5(ImageProcessingUtils.getImageData(image.getImage()));
}
/** Generates a hash string from a byte array */
private String generateHashFromBytes(byte[] data) {
try {
// Use the existing method to generate MD5 hash
byte[] hash = generatMD5(data);
return bytesToHexString(hash);
} catch (Exception e) {
log.error("Error generating hash from bytes", e);
// Return a unique string as fallback
return "fallback-" + System.identityHashCode(data);
}
}
// Updated scale factor method for levels 4-9 // Updated scale factor method for levels 4-9
private double getScaleFactorForLevel(int optimizeLevel) { private double getScaleFactorForLevel(int optimizeLevel) {
return switch (optimizeLevel) { return switch (optimizeLevel) {
@ -611,6 +701,7 @@ public class CompressController {
if (qpdfEnabled && optimizeLevel <= 3) { if (qpdfEnabled && optimizeLevel <= 3) {
optimizeLevel = 4; optimizeLevel = 4;
} }
while (!sizeMet && optimizeLevel <= 9) { while (!sizeMet && optimizeLevel <= 9) {
// Apply image compression for levels 4-9 // Apply image compression for levels 4-9
if ((optimizeLevel >= 4 || Boolean.TRUE.equals(convertToGrayscale)) if ((optimizeLevel >= 4 || Boolean.TRUE.equals(convertToGrayscale))
@ -619,7 +710,8 @@ public class CompressController {
float jpegQuality = getJpegQualityForLevel(optimizeLevel); float jpegQuality = getJpegQualityForLevel(optimizeLevel);
// Use the returned path from compressImagesInPDF // Use the returned path from compressImagesInPDF
Path compressedImageFile = compressImagesInPDF( Path compressedImageFile =
compressImagesInPDF(
currentFile, currentFile,
scaleFactor, scaleFactor,
jpegQuality, jpegQuality,
@ -633,11 +725,81 @@ public class CompressController {
// Apply QPDF compression for all levels // Apply QPDF compression for all levels
if (!qpdfCompressionApplied && qpdfEnabled) { if (!qpdfCompressionApplied && qpdfEnabled) {
applyQpdfCompression(request, optimizeLevel, currentFile, tempFiles);
qpdfCompressionApplied = true;
} else if (!qpdfCompressionApplied) {
// If QPDF is disabled, mark as applied and log
if (!qpdfEnabled) {
log.info("Skipping QPDF compression as QPDF group is disabled");
}
qpdfCompressionApplied = true;
}
// Check if file size is within expected size or not auto mode
long outputFileSize = Files.size(currentFile);
if (outputFileSize <= expectedOutputSize || !autoMode) {
sizeMet = true;
} else {
int newOptimizeLevel =
incrementOptimizeLevel(
optimizeLevel, outputFileSize, expectedOutputSize);
// Check if we can't increase the level further
if (newOptimizeLevel == optimizeLevel) {
if (autoMode) {
log.info(
"Maximum optimization level reached without meeting target size.");
sizeMet = true;
}
} else {
// Reset flags for next iteration with higher optimization level
imageCompressionApplied = false;
qpdfCompressionApplied = false;
optimizeLevel = newOptimizeLevel;
}
}
}
// Check if optimized file is larger than the original
long finalFileSize = Files.size(currentFile);
if (finalFileSize >= inputFileSize) {
log.warn(
"Optimized file is larger than the original. Using the original file instead.");
// Use the stored reference to the original file
currentFile = originalFile;
}
String outputFilename =
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_Optimized.pdf";
return WebResponseUtils.pdfDocToWebResponse(
pdfDocumentFactory.load(currentFile.toFile()), outputFilename);
} finally {
// Clean up all temporary files
for (Path tempFile : tempFiles) {
try {
Files.deleteIfExists(tempFile);
} catch (IOException e) {
log.warn("Failed to delete temporary file: " + tempFile, e);
}
}
}
}
/** Apply QPDF compression to a PDF file */
private void applyQpdfCompression(
OptimizePdfRequest request, int optimizeLevel, Path currentFile, List<Path> tempFiles)
throws IOException {
long preQpdfSize = Files.size(currentFile); long preQpdfSize = Files.size(currentFile);
log.info("Pre-QPDF file size: {}", GeneralUtils.formatBytes(preQpdfSize)); log.info("Pre-QPDF file size: {}", GeneralUtils.formatBytes(preQpdfSize));
// Map optimization levels to QPDF compression levels // Map optimization levels to QPDF compression levels
int qpdfCompressionLevel = optimizeLevel <= 3 int qpdfCompressionLevel =
optimizeLevel <= 3
? optimizeLevel * 3 // Level 1->3, 2->6, 3->9 ? optimizeLevel * 3 // Level 1->3, 2->6, 3->9
: 9; // Max compression for levels 4-9 : 9; // Max compression for levels 4-9
@ -663,85 +825,29 @@ public class CompressController {
ProcessExecutorResult returnCode = null; ProcessExecutorResult returnCode = null;
try { try {
returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF) returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF)
.runCommandWithOutputHandling(command); .runCommandWithOutputHandling(command);
qpdfCompressionApplied = true;
// Update current file to the QPDF output // Update current file to the QPDF output
currentFile = qpdfOutputFile; Files.copy(qpdfOutputFile, currentFile, StandardCopyOption.REPLACE_EXISTING);
long postQpdfSize = Files.size(currentFile); long postQpdfSize = Files.size(currentFile);
double qpdfReduction = 100.0 - ((postQpdfSize * 100.0) / preQpdfSize); double qpdfReduction = 100.0 - ((postQpdfSize * 100.0) / preQpdfSize);
log.info( log.info(
"Post-QPDF file size: {} (reduced by {}%)", "Post-QPDF file size: {} (reduced by {}%)",
GeneralUtils.formatBytes(postQpdfSize), GeneralUtils.formatBytes(postQpdfSize), String.format("%.1f", qpdfReduction));
String.format("%.1f", qpdfReduction));
} catch (Exception e) { } catch (Exception e) {
if (returnCode != null && returnCode.getRc() != 3) { if (returnCode != null && returnCode.getRc() != 3) {
throw e; throw new IOException("QPDF command failed", e);
} }
// If QPDF fails, keep using the current file // If QPDF fails, keep using the current file
log.warn("QPDF compression failed, continuing with current file"); log.warn("QPDF compression failed, continuing with current file", e);
}
} else if (!qpdfCompressionApplied) {
// If QPDF is disabled, mark as applied and log
if (!qpdfEnabled) {
log.info("Skipping QPDF compression as QPDF group is disabled");
}
qpdfCompressionApplied = true;
}
// Check if file size is within expected size or not auto mode
long outputFileSize = Files.size(currentFile);
if (outputFileSize <= expectedOutputSize || !autoMode) {
sizeMet = true;
} else {
int newOptimizeLevel = incrementOptimizeLevel(
optimizeLevel, outputFileSize, expectedOutputSize);
// Check if we can't increase the level further
if (newOptimizeLevel == optimizeLevel) {
if (autoMode) {
log.info("Maximum optimization level reached without meeting target size.");
sizeMet = true;
}
} else {
// Reset flags for next iteration with higher optimization level
imageCompressionApplied = false;
qpdfCompressionApplied = false;
optimizeLevel = newOptimizeLevel;
}
}
}
// Check if optimized file is larger than the original
long finalFileSize = Files.size(currentFile);
if (finalFileSize >= inputFileSize) {
log.warn("Optimized file is larger than the original. Using the original file instead.");
// Use the stored reference to the original file
currentFile = originalFile;
}
String outputFilename = Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_Optimized.pdf";
return WebResponseUtils.pdfDocToWebResponse(
pdfDocumentFactory.load(currentFile.toFile()), outputFilename);
} finally {
// Clean up all temporary files
for (Path tempFile : tempFiles) {
try {
Files.deleteIfExists(tempFile);
} catch (IOException e) {
log.warn("Failed to delete temporary file: " + tempFile, e);
}
}
} }
} }
/** Determine the appropriate optimization level based on the desired size reduction ratio */
private int determineOptimizeLevel(double sizeReductionRatio) { private int determineOptimizeLevel(double sizeReductionRatio) {
if (sizeReductionRatio > 0.9) return 1; if (sizeReductionRatio > 0.9) return 1;
if (sizeReductionRatio > 0.8) return 2; if (sizeReductionRatio > 0.8) return 2;
@ -754,6 +860,7 @@ public class CompressController {
return 9; return 9;
} }
/** Increment optimization level based on current size vs target size */
private int incrementOptimizeLevel(int currentLevel, long currentSize, long targetSize) { private int incrementOptimizeLevel(int currentLevel, long currentSize, long targetSize) {
double currentRatio = currentSize / (double) targetSize; double currentRatio = currentSize / (double) targetSize;
log.info("Current compression ratio: {}", String.format("%.2f", currentRatio)); log.info("Current compression ratio: {}", String.format("%.2f", currentRatio));

View File

@ -45,8 +45,8 @@ public class PipelineController {
private final PostHogService postHogService; private final PostHogService postHogService;
public PipelineController(PipelineProcessor processor, ObjectMapper objectMapper, public PipelineController(
PostHogService postHogService) { PipelineProcessor processor, ObjectMapper objectMapper, PostHogService postHogService) {
this.processor = processor; this.processor = processor;
this.objectMapper = objectMapper; this.objectMapper = objectMapper;
this.postHogService = postHogService; this.postHogService = postHogService;
@ -63,8 +63,8 @@ public class PipelineController {
PipelineConfig config = objectMapper.readValue(jsonString, PipelineConfig.class); PipelineConfig config = objectMapper.readValue(jsonString, PipelineConfig.class);
log.info("Received POST request to /handleData with {} files", files.length); log.info("Received POST request to /handleData with {} files", files.length);
List<String> operationNames =
List<String> operationNames = config.getOperations().stream() config.getOperations().stream()
.map(PipelineOperation::getOperation) .map(PipelineOperation::getOperation)
.collect(Collectors.toList()); .collect(Collectors.toList());

View File

@ -21,7 +21,6 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Optional; import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import org.springframework.core.io.ByteArrayResource; import org.springframework.core.io.ByteArrayResource;
@ -155,9 +154,8 @@ public class PipelineDirectoryProcessor {
return; return;
} }
List<String> operationNames = config.getOperations().stream() List<String> operationNames =
.map(PipelineOperation::getOperation) config.getOperations().stream().map(PipelineOperation::getOperation).toList();
.toList();
Map<String, Object> properties = new HashMap<>(); Map<String, Object> properties = new HashMap<>();
properties.put("operations", operationNames); properties.put("operations", operationNames);
properties.put("fileCount", files.length); properties.put("fileCount", files.length);
@ -263,7 +261,8 @@ public class PipelineDirectoryProcessor {
try { try {
Thread.sleep(retryDelayMs * (int) Math.pow(2, attempt - 1)); Thread.sleep(retryDelayMs * (int) Math.pow(2, attempt - 1));
} catch (InterruptedException e1) { } catch (InterruptedException e1) {
log.error("prepareFilesForProcessing failure",e); } log.error("prepareFilesForProcessing failure", e);
}
} }
} }
} }

View File

@ -14,7 +14,7 @@ public class OptimizePdfRequest extends PDFFile {
@Schema( @Schema(
description = description =
"The level of optimization to apply to the PDF file. Higher values indicate greater compression but may reduce quality.", "The level of optimization to apply to the PDF file. Higher values indicate greater compression but may reduce quality.",
allowableValues = {"1", "2", "3", "4", "5"}) allowableValues = {"1", "2", "3", "4", "5", "6", "7", "8", "9"})
private Integer optimizeLevel; private Integer optimizeLevel;
@Schema(description = "The expected output size, e.g. '100MB', '25KB', etc.") @Schema(description = "The expected output size, e.g. '100MB', '25KB', etc.")

View File

@ -261,7 +261,6 @@ public class CustomPDFDocumentFactory {
removePassword(doc); removePassword(doc);
} }
private PDDocument loadFromFile(File file, long size, StreamCacheCreateFunction cache) private PDDocument loadFromFile(File file, long size, StreamCacheCreateFunction cache)
throws IOException { throws IOException {
return Loader.loadPDF(new DeletingRandomAccessFile(file), "", null, null, cache); return Loader.loadPDF(new DeletingRandomAccessFile(file), "", null, null, cache);

View File

@ -66,7 +66,6 @@ public class MetricsAggregatorService {
return; return;
} }
if (uri.contains(".txt")) { if (uri.contains(".txt")) {
return; return;
} }