cleanups and fix for #3207

This commit is contained in:
Anthony Stirling 2025-03-20 00:06:10 +00:00
parent 52d4adc473
commit f97f1d792d
8 changed files with 478 additions and 403 deletions

View File

@ -54,16 +54,16 @@ public class EndpointConfiguration {
log.debug("Group '{}' does not exist or has no endpoints", group);
return false;
}
for (String endpoint : endpoints) {
if (!isEndpointEnabled(endpoint)) {
return false;
}
}
return true;
}
public void addEndpointToGroup(String group, String endpoint) {
endpointGroups.computeIfAbsent(group, k -> new HashSet<>()).add(endpoint);
}

View File

@ -1,7 +1,6 @@
package stirling.software.SPDF.config;
import java.lang.reflect.Method;
import java.util.Collections;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
@ -43,51 +42,39 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
private void discoverEndpoints() {
try {
// Get all request mapping beans from the application context
Map<String, RequestMappingHandlerMapping> mappings =
applicationContext.getBeansOfType(RequestMappingHandlerMapping.class);
// Process each mapping bean
for (Map.Entry<String, RequestMappingHandlerMapping> entry : mappings.entrySet()) {
RequestMappingHandlerMapping mapping = entry.getValue();
// Get all handler methods registered in this mapping
Map<RequestMappingInfo, HandlerMethod> handlerMethods = mapping.getHandlerMethods();
// Process each handler method
for (Map.Entry<RequestMappingInfo, HandlerMethod> handlerEntry :
handlerMethods.entrySet()) {
RequestMappingInfo mappingInfo = handlerEntry.getKey();
HandlerMethod handlerMethod = handlerEntry.getValue();
// Check if the method handles GET requests
boolean isGetHandler = false;
try {
Set<RequestMethod> methods = mappingInfo.getMethodsCondition().getMethods();
// Either explicitly handles GET or handles all methods (empty set)
isGetHandler = methods.isEmpty() || methods.contains(RequestMethod.GET);
} catch (Exception e) {
// If we can't determine methods, assume it could handle GET
isGetHandler = true;
}
if (isGetHandler) {
// Since we know getDirectPaths works, use it directly
Set<String> patterns = extractPatternsUsingDirectPaths(mappingInfo);
// If that fails, try string parsing as fallback
if (patterns.isEmpty()) {
patterns = extractPatternsFromString(mappingInfo);
}
// Add all valid patterns
validGetEndpoints.addAll(patterns);
}
}
}
if (validGetEndpoints.isEmpty()) {
// If we still couldn't find any endpoints, add some common ones as a fallback
logger.warn("No endpoints discovered. Adding common endpoints as fallback.");
validGetEndpoints.add("/");
validGetEndpoints.add("/api/**");
@ -98,12 +85,9 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
}
}
/**
* Extract patterns using the getDirectPaths method that works in this environment
*/
private Set<String> extractPatternsUsingDirectPaths(RequestMappingInfo mappingInfo) {
Set<String> patterns = new HashSet<>();
try {
Method getDirectPathsMethod = mappingInfo.getClass().getMethod("getDirectPaths");
Object result = getDirectPathsMethod.invoke(mappingInfo);
@ -113,9 +97,9 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
patterns.addAll(resultSet);
}
} catch (Exception e) {
// Just return empty set if method not found or fails
// Return empty set if method not found or fails
}
return patterns;
}
@ -125,9 +109,7 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
String infoString = mappingInfo.toString();
if (infoString.contains("{")) {
String patternsSection =
infoString.substring(
infoString.indexOf("{") + 1,
infoString.indexOf("}"));
infoString.substring(infoString.indexOf("{") + 1, infoString.indexOf("}"));
for (String pattern : patternsSection.split(",")) {
pattern = pattern.trim();
@ -137,39 +119,38 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
}
}
} catch (Exception e) {
// Just return empty set if parsing fails
// Return empty set if parsing fails
}
return patterns;
}
/**
* Check if a URI corresponds to a valid GET endpoint - Fixed to handle path variables safely
*/
public boolean isValidGetEndpoint(String uri) {
// Ensure endpoints are discovered
if (!endpointsDiscovered) {
discoverEndpoints();
endpointsDiscovered = true;
}
// If no endpoints were discovered, assume all endpoints are valid
if (validGetEndpoints.isEmpty()) {
logger.warn("No valid endpoints were discovered. Assuming all GET endpoints are valid.");
return true;
}
// Direct match
if (validGetEndpoints.contains(uri)) {
return true;
}
// Try simple prefix matching for wildcards and path variables
if (matchesWildcardOrPathVariable(uri)) {
return true;
}
if (matchesPathSegments(uri)) {
return true;
}
return false;
}
private boolean matchesWildcardOrPathVariable(String uri) {
for (String pattern : validGetEndpoints) {
if (pattern.contains("*") || pattern.contains("{")) {
int wildcardIndex = pattern.indexOf('*');
int variableIndex = pattern.indexOf('{');
// Find the earliest special character
int cutoffIndex;
if (wildcardIndex < 0) {
cutoffIndex = variableIndex;
@ -179,29 +160,26 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
cutoffIndex = Math.min(wildcardIndex, variableIndex);
}
// Get the static part of the pattern
String staticPrefix = pattern.substring(0, cutoffIndex);
// If the URI starts with this prefix, consider it a match
if (uri.startsWith(staticPrefix)) {
return true;
}
}
}
return false;
}
// For patterns without wildcards or variables, try path-segment-by-segment matching
private boolean matchesPathSegments(String uri) {
for (String pattern : validGetEndpoints) {
if (!pattern.contains("*") && !pattern.contains("{")) {
// Split the pattern and URI into path segments
String[] patternSegments = pattern.split("/");
String[] uriSegments = uri.split("/");
// If URI has fewer segments than the pattern, it can't match
if (uriSegments.length < patternSegments.length) {
continue;
}
// Check each segment
boolean match = true;
for (int i = 0; i < patternSegments.length; i++) {
if (!patternSegments[i].equals(uriSegments[i])) {
@ -215,31 +193,24 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
}
}
}
// If no match was found, the URI is not valid
return false;
}
/** Get all discovered valid GET endpoints */
public Set<String> getValidGetEndpoints() {
// Ensure endpoints are discovered
if (!endpointsDiscovered) {
discoverEndpoints();
endpointsDiscovered = true;
}
return new HashSet<>(validGetEndpoints);
}
//For debugging when needed
private void logAllEndpoints() {
Set<String> sortedEndpoints = new TreeSet<>(validGetEndpoints);
logger.info("=== BEGIN: All discovered GET endpoints ===");
for (String endpoint : sortedEndpoints) {
logger.info("Endpoint: {}", endpoint);
}
logger.info("=== END: All discovered GET endpoints ===");
}
}
}

View File

@ -25,24 +25,13 @@ import javax.imageio.ImageWriter;
import javax.imageio.plugins.jpeg.JPEGImageWriteParam;
import javax.imageio.stream.ImageOutputStream;
import org.apache.pdfbox.contentstream.PDFStreamEngine;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDInlineImage;
import org.apache.pdfbox.pdmodel.graphics.pattern.PDAbstractPattern;
import org.apache.pdfbox.pdmodel.graphics.pattern.PDTilingPattern;
import org.apache.pdfbox.pdmodel.graphics.shading.PDShading;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
@ -58,6 +47,7 @@ import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.config.EndpointConfiguration;
import stirling.software.SPDF.model.api.misc.OptimizePdfRequest;
import stirling.software.SPDF.service.CustomPDFDocumentFactory;
@ -75,13 +65,14 @@ public class CompressController {
private final CustomPDFDocumentFactory pdfDocumentFactory;
private final boolean qpdfEnabled;
public CompressController(CustomPDFDocumentFactory pdfDocumentFactory, EndpointConfiguration endpointConfiguration) {
public CompressController(
CustomPDFDocumentFactory pdfDocumentFactory,
EndpointConfiguration endpointConfiguration) {
this.pdfDocumentFactory = pdfDocumentFactory;
this.qpdfEnabled = endpointConfiguration.isGroupEnabled("qpdf");
}
@Data
@AllArgsConstructor
@NoArgsConstructor
@ -89,14 +80,25 @@ public class CompressController {
int pageNum; // Page number where the image appears
COSName name; // The name used to reference this image
}
@Data
@AllArgsConstructor
@NoArgsConstructor
private static class NestedImageReference extends ImageReference {
COSName formName; // Name of the form XObject containing the image
COSName imageName; // Name of the image within the form
COSName formName; // Name of the form XObject containing the image
COSName imageName; // Name of the image within the form
}
// Image compression statistics for reporting
private static class CompressionStats {
int totalImages = 0;
int nestedImages = 0;
int uniqueImagesCount = 0;
int compressedImages = 0;
int skippedImages = 0;
long totalOriginalBytes = 0;
long totalCompressedBytes = 0;
}
public Path compressImagesInPDF(
Path pdfFile, double scaleFactor, float jpegQuality, boolean convertToGrayscale)
@ -111,228 +113,24 @@ public class CompressController {
GeneralUtils.formatBytes(originalFileSize));
try (PDDocument doc = pdfDocumentFactory.load(pdfFile)) {
// Collect all unique images by content hash
Map<String, List<ImageReference>> uniqueImages = new HashMap<>();
Map<String, PDImageXObject> compressedVersions = new HashMap<>();
// Step 1: Find all unique images in the document
Map<String, List<ImageReference>> uniqueImages = findImages(doc);
int totalImages = 0;
int nestedImages = 0;
// Get statistics
CompressionStats stats = new CompressionStats();
stats.uniqueImagesCount = uniqueImages.size();
calculateImageStats(uniqueImages, stats);
// FIRST PASS: Collect all images (direct and nested)
for (int pageNum = 0; pageNum < doc.getNumberOfPages(); pageNum++) {
PDPage page = doc.getPage(pageNum);
PDResources res = page.getResources();
if (res == null || res.getXObjectNames() == null) continue;
// Step 2: Create compressed versions of unique images
Map<String, PDImageXObject> compressedVersions =
createCompressedImages(
doc, uniqueImages, scaleFactor, jpegQuality, convertToGrayscale, stats);
// Process direct XObjects on page
for (COSName name : res.getXObjectNames()) {
PDXObject xobj = res.getXObject(name);
// Direct image
if (xobj instanceof PDImageXObject) {
totalImages++;
PDImageXObject image = (PDImageXObject) xobj;
String imageHash = generateImageHash(image);
ImageReference ref = new ImageReference();
ref.pageNum = pageNum;
ref.name = name;
log.info("Found direct image '{}' on page {} - {}x{}",
name.getName(), pageNum + 1, image.getWidth(), image.getHeight());
uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(ref);
}
// Form XObject may contain nested images
else if (xobj instanceof org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject) {
org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject formXObj =
(org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject) xobj;
PDResources formResources = formXObj.getResources();
if (formResources != null && formResources.getXObjectNames() != null) {
// Process nested XObjects within the form
log.info("Checking form XObject '{}' on page {} for nested images",
name.getName(), pageNum + 1);
for (COSName nestedName : formResources.getXObjectNames()) {
PDXObject nestedXobj = formResources.getXObject(nestedName);
if (nestedXobj instanceof PDImageXObject) {
nestedImages++;
totalImages++;
PDImageXObject nestedImage = (PDImageXObject) nestedXobj;
log.info("Found nested image '{}' in form '{}' on page {} - {}x{}",
nestedName.getName(), name.getName(), pageNum + 1,
nestedImage.getWidth(), nestedImage.getHeight());
// Create a specialized reference for the nested image
NestedImageReference nestedRef = new NestedImageReference();
nestedRef.pageNum = pageNum;
nestedRef.formName = name;
nestedRef.imageName = nestedName;
String imageHash = generateImageHash(nestedImage);
uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(nestedRef);
}
}
}
}
}
}
int uniqueImagesCount = uniqueImages.size();
int duplicatedImages = totalImages - uniqueImagesCount;
log.info(
"Found {} unique images and {} duplicated instances across {} pages ({} nested images in form XObjects)",
uniqueImagesCount,
duplicatedImages,
doc.getNumberOfPages(),
nestedImages);
// SECOND PASS: Process each unique image exactly once
int compressedImages = 0;
int skippedImages = 0;
long totalOriginalBytes = 0;
long totalCompressedBytes = 0;
for (Entry<String, List<ImageReference>> entry : uniqueImages.entrySet()) {
String imageHash = entry.getKey();
List<ImageReference> references = entry.getValue();
if (references.isEmpty()) continue;
// Get the first instance of this image
ImageReference firstRef = references.get(0);
PDImageXObject originalImage;
// Handle differently based on whether it's a direct or nested image
if (firstRef instanceof NestedImageReference) {
// Get the nested image from within a form XObject
NestedImageReference nestedRef = (NestedImageReference) firstRef;
PDPage firstPage = doc.getPage(nestedRef.pageNum);
PDResources pageResources = firstPage.getResources();
// Get the form XObject
org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject formXObj =
(org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject) pageResources.getXObject(nestedRef.formName);
// Get the nested image from the form's resources
PDResources formResources = formXObj.getResources();
originalImage = (PDImageXObject) formResources.getXObject(nestedRef.imageName);
log.info("Processing nested image '{}' from form '{}'",
nestedRef.imageName.getName(), nestedRef.formName.getName());
} else {
// Get direct image from page resources
PDPage firstPage = doc.getPage(firstRef.pageNum);
PDResources firstPageResources = firstPage.getResources();
originalImage = (PDImageXObject) firstPageResources.getXObject(firstRef.name);
log.debug("Processing direct image '{}'", firstRef.name.getName());
}
// Track original size
int originalSize = (int) originalImage.getCOSObject().getLength();
totalOriginalBytes += originalSize;
// Process this unique image once
BufferedImage processedImage =
processAndCompressImage(
originalImage, scaleFactor, jpegQuality, convertToGrayscale);
if (processedImage != null) {
// Convert to bytes for storage
byte[] compressedData = convertToBytes(processedImage, jpegQuality);
// Check if compression is beneficial
if (compressedData.length < originalSize || convertToGrayscale) {
// Create a single compressed version
PDImageXObject compressedImage =
PDImageXObject.createFromByteArray(
doc,
compressedData,
originalImage.getCOSObject().toString());
// Store the compressed version only once in our map
compressedVersions.put(imageHash, compressedImage);
// Report compression stats
double reductionPercentage =
100.0 - ((compressedData.length * 100.0) / originalSize);
log.info(
"Image hash {}: Compressed from {} to {} (reduced by {}%)",
imageHash,
GeneralUtils.formatBytes(originalSize),
GeneralUtils.formatBytes(compressedData.length),
String.format("%.1f", reductionPercentage));
// Replace ALL instances with the compressed version
for (ImageReference ref : references) {
if (ref instanceof NestedImageReference) {
// Replace nested image within form XObject
NestedImageReference nestedRef = (NestedImageReference) ref;
PDPage page = doc.getPage(nestedRef.pageNum);
PDResources pageResources = page.getResources();
// Get the form XObject
org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject formXObj =
(org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject)
pageResources.getXObject(nestedRef.formName);
// Replace the nested image in the form's resources
PDResources formResources = formXObj.getResources();
formResources.put(nestedRef.imageName, compressedImage);
log.info(
"Replaced nested image '{}' in form '{}' on page {} with compressed version",
nestedRef.imageName.getName(),
nestedRef.formName.getName(),
nestedRef.pageNum + 1);
} else {
// Replace direct image in page resources
PDPage page = doc.getPage(ref.pageNum);
PDResources resources = page.getResources();
resources.put(ref.name, compressedImage);
log.info(
"Replaced direct image on page {} with compressed version",
ref.pageNum + 1);
}
}
totalCompressedBytes += compressedData.length * references.size();
compressedImages++;
} else {
log.info("Image hash {}: Compression not beneficial, skipping", imageHash);
totalCompressedBytes += originalSize * references.size();
skippedImages++;
}
} else {
log.info("Image hash {}: Not suitable for compression, skipping", imageHash);
totalCompressedBytes += originalSize * references.size();
skippedImages++;
}
}
// Step 3: Replace all instances with compressed versions
replaceImages(doc, uniqueImages, compressedVersions, stats);
// Log compression statistics
double overallImageReduction =
totalOriginalBytes > 0
? 100.0 - ((totalCompressedBytes * 100.0) / totalOriginalBytes)
: 0;
log.info(
"Image compression summary - Total unique: {}, Compressed: {}, Skipped: {}, Duplicates: {}, Nested: {}",
uniqueImagesCount,
compressedImages,
skippedImages,
duplicatedImages,
nestedImages);
log.info(
"Total original image size: {}, compressed: {} (reduced by {}%)",
GeneralUtils.formatBytes(totalOriginalBytes),
GeneralUtils.formatBytes(totalCompressedBytes),
String.format("%.1f", overallImageReduction));
logCompressionStats(stats, originalFileSize);
// Free memory before saving
compressedVersions.clear();
@ -354,6 +152,315 @@ public class CompressController {
}
}
/** Find all images in the document, both direct and nested within forms. */
private Map<String, List<ImageReference>> findImages(PDDocument doc) throws IOException {
Map<String, List<ImageReference>> uniqueImages = new HashMap<>();
// Scan through all pages in the document
for (int pageNum = 0; pageNum < doc.getNumberOfPages(); pageNum++) {
PDPage page = doc.getPage(pageNum);
PDResources res = page.getResources();
if (res == null || res.getXObjectNames() == null) continue;
// Process all XObjects on the page
for (COSName name : res.getXObjectNames()) {
PDXObject xobj = res.getXObject(name);
// Process direct image
if (isImage(xobj)) {
addDirectImage(pageNum, name, (PDImageXObject) xobj, uniqueImages);
log.info(
"Found direct image '{}' on page {} - {}x{}",
name.getName(),
pageNum + 1,
((PDImageXObject) xobj).getWidth(),
((PDImageXObject) xobj).getHeight());
}
// Process form XObject that may contain nested images
else if (isForm(xobj)) {
checkFormForImages(pageNum, name, (PDFormXObject) xobj, uniqueImages);
}
}
}
return uniqueImages;
}
private boolean isImage(PDXObject xobj) {
return xobj instanceof PDImageXObject;
}
private boolean isForm(PDXObject xobj) {
return xobj instanceof PDFormXObject;
}
private ImageReference addDirectImage(
int pageNum,
COSName name,
PDImageXObject image,
Map<String, List<ImageReference>> uniqueImages)
throws IOException {
ImageReference ref = new ImageReference();
ref.pageNum = pageNum;
ref.name = name;
String imageHash = generateImageHash(image);
uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(ref);
return ref;
}
/** Check a form XObject for nested images. */
private void checkFormForImages(
int pageNum,
COSName formName,
PDFormXObject formXObj,
Map<String, List<ImageReference>> uniqueImages)
throws IOException {
PDResources formResources = formXObj.getResources();
if (formResources == null || formResources.getXObjectNames() == null) {
return;
}
log.info(
"Checking form XObject '{}' on page {} for nested images",
formName.getName(),
pageNum + 1);
// Process all XObjects within the form
for (COSName nestedName : formResources.getXObjectNames()) {
PDXObject nestedXobj = formResources.getXObject(nestedName);
if (isImage(nestedXobj)) {
PDImageXObject nestedImage = (PDImageXObject) nestedXobj;
log.info(
"Found nested image '{}' in form '{}' on page {} - {}x{}",
nestedName.getName(),
formName.getName(),
pageNum + 1,
nestedImage.getWidth(),
nestedImage.getHeight());
// Create specialized reference for the nested image
NestedImageReference nestedRef = new NestedImageReference();
nestedRef.pageNum = pageNum;
nestedRef.formName = formName;
nestedRef.imageName = nestedName;
String imageHash = generateImageHash(nestedImage);
uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(nestedRef);
}
}
}
/** Calculate statistics about the images found in the document. */
private void calculateImageStats(
Map<String, List<ImageReference>> uniqueImages, CompressionStats stats) {
for (List<ImageReference> references : uniqueImages.values()) {
for (ImageReference ref : references) {
stats.totalImages++;
if (ref instanceof NestedImageReference) {
stats.nestedImages++;
}
}
}
}
/** Create compressed versions of all unique images. */
private Map<String, PDImageXObject> createCompressedImages(
PDDocument doc,
Map<String, List<ImageReference>> uniqueImages,
double scaleFactor,
float jpegQuality,
boolean convertToGrayscale,
CompressionStats stats)
throws IOException {
Map<String, PDImageXObject> compressedVersions = new HashMap<>();
// Process each unique image exactly once
for (Entry<String, List<ImageReference>> entry : uniqueImages.entrySet()) {
String imageHash = entry.getKey();
List<ImageReference> references = entry.getValue();
if (references.isEmpty()) continue;
// Get the first instance of this image
PDImageXObject originalImage = getOriginalImage(doc, references.get(0));
// Track original size
int originalSize = (int) originalImage.getCOSObject().getLength();
stats.totalOriginalBytes += originalSize;
// Process this unique image
PDImageXObject compressedImage =
compressImage(
doc,
originalImage,
originalSize,
scaleFactor,
jpegQuality,
convertToGrayscale);
if (compressedImage != null) {
// Store the compressed version in our map
compressedVersions.put(imageHash, compressedImage);
stats.compressedImages++;
// Update compression stats
int compressedSize = (int) compressedImage.getCOSObject().getLength();
stats.totalCompressedBytes += compressedSize * references.size();
double reductionPercentage = 100.0 - ((compressedSize * 100.0) / originalSize);
log.info(
"Image hash {}: Compressed from {} to {} (reduced by {}%)",
imageHash,
GeneralUtils.formatBytes(originalSize),
GeneralUtils.formatBytes(compressedSize),
String.format("%.1f", reductionPercentage));
} else {
log.info("Image hash {}: Not suitable for compression, skipping", imageHash);
stats.totalCompressedBytes += originalSize * references.size();
stats.skippedImages++;
}
}
return compressedVersions;
}
/** Get the original image from an image reference. */
private PDImageXObject getOriginalImage(PDDocument doc, ImageReference ref) throws IOException {
if (ref instanceof NestedImageReference) {
// Get the nested image from within a form XObject
NestedImageReference nestedRef = (NestedImageReference) ref;
PDPage page = doc.getPage(nestedRef.pageNum);
PDResources pageResources = page.getResources();
// Get the form XObject
PDFormXObject formXObj = (PDFormXObject) pageResources.getXObject(nestedRef.formName);
// Get the nested image from the form's resources
PDResources formResources = formXObj.getResources();
return (PDImageXObject) formResources.getXObject(nestedRef.imageName);
} else {
// Get direct image from page resources
PDPage page = doc.getPage(ref.pageNum);
PDResources resources = page.getResources();
return (PDImageXObject) resources.getXObject(ref.name);
}
}
/** Process an individual image and return a compressed version if beneficial. */
private PDImageXObject compressImage(
PDDocument doc,
PDImageXObject originalImage,
int originalSize,
double scaleFactor,
float jpegQuality,
boolean convertToGrayscale)
throws IOException {
// Process and compress the image
BufferedImage processedImage =
processAndCompressImage(
originalImage, scaleFactor, jpegQuality, convertToGrayscale);
if (processedImage == null) {
return null;
}
// Convert to bytes for storage
byte[] compressedData = convertToBytes(processedImage, jpegQuality);
// Check if compression is beneficial
if (compressedData.length < originalSize || convertToGrayscale) {
// Create a compressed version
return PDImageXObject.createFromByteArray(
doc, compressedData, originalImage.getCOSObject().toString());
}
return null;
}
/** Replace all instances of original images with their compressed versions. */
private void replaceImages(
PDDocument doc,
Map<String, List<ImageReference>> uniqueImages,
Map<String, PDImageXObject> compressedVersions,
CompressionStats stats)
throws IOException {
for (Entry<String, List<ImageReference>> entry : uniqueImages.entrySet()) {
String imageHash = entry.getKey();
List<ImageReference> references = entry.getValue();
// Skip if no compressed version exists
PDImageXObject compressedImage = compressedVersions.get(imageHash);
if (compressedImage == null) continue;
// Replace ALL instances with the compressed version
for (ImageReference ref : references) {
replaceImageReference(doc, ref, compressedImage);
}
}
}
/** Replace a specific image reference with a compressed version. */
private void replaceImageReference(
PDDocument doc, ImageReference ref, PDImageXObject compressedImage) throws IOException {
if (ref instanceof NestedImageReference) {
// Replace nested image within form XObject
NestedImageReference nestedRef = (NestedImageReference) ref;
PDPage page = doc.getPage(nestedRef.pageNum);
PDResources pageResources = page.getResources();
// Get the form XObject
PDFormXObject formXObj = (PDFormXObject) pageResources.getXObject(nestedRef.formName);
// Replace the nested image in the form's resources
PDResources formResources = formXObj.getResources();
formResources.put(nestedRef.imageName, compressedImage);
log.info(
"Replaced nested image '{}' in form '{}' on page {} with compressed version",
nestedRef.imageName.getName(),
nestedRef.formName.getName(),
nestedRef.pageNum + 1);
} else {
// Replace direct image in page resources
PDPage page = doc.getPage(ref.pageNum);
PDResources resources = page.getResources();
resources.put(ref.name, compressedImage);
log.info("Replaced direct image on page {} with compressed version", ref.pageNum + 1);
}
}
/** Log compression statistics. */
private void logCompressionStats(CompressionStats stats, long originalFileSize) {
// Calculate image reduction percentage
double overallImageReduction =
stats.totalOriginalBytes > 0
? 100.0 - ((stats.totalCompressedBytes * 100.0) / stats.totalOriginalBytes)
: 0;
int duplicatedImages = stats.totalImages - stats.uniqueImagesCount;
log.info(
"Image compression summary - Total unique: {}, Compressed: {}, Skipped: {}, Duplicates: {}, Nested: {}",
stats.uniqueImagesCount,
stats.compressedImages,
stats.skippedImages,
duplicatedImages,
stats.nestedImages);
log.info(
"Total original image size: {}, compressed: {} (reduced by {}%)",
GeneralUtils.formatBytes(stats.totalOriginalBytes),
GeneralUtils.formatBytes(stats.totalCompressedBytes),
String.format("%.1f", overallImageReduction));
}
private BufferedImage convertToGrayscale(BufferedImage image) {
BufferedImage grayImage =
new BufferedImage(
@ -523,23 +630,6 @@ public class CompressController {
}
}
private byte[] generateImageMD5(PDImageXObject image) throws IOException {
return generatMD5(ImageProcessingUtils.getImageData(image.getImage()));
}
/** Generates a hash string from a byte array */
private String generateHashFromBytes(byte[] data) {
try {
// Use the existing method to generate MD5 hash
byte[] hash = generatMD5(data);
return bytesToHexString(hash);
} catch (Exception e) {
log.error("Error generating hash from bytes", e);
// Return a unique string as fallback
return "fallback-" + System.identityHashCode(data);
}
}
// Updated scale factor method for levels 4-9
private double getScaleFactorForLevel(int optimizeLevel) {
return switch (optimizeLevel) {
@ -590,10 +680,10 @@ public class CompressController {
Path originalFile = Files.createTempFile("original_", ".pdf");
inputFile.transferTo(originalFile.toFile());
long inputFileSize = Files.size(originalFile);
Path currentFile = Files.createTempFile("working_", ".pdf");
Files.copy(originalFile, currentFile, StandardCopyOption.REPLACE_EXISTING);
// Keep track of all temporary files for cleanup
List<Path> tempFiles = new ArrayList<>();
tempFiles.add(originalFile);
@ -607,24 +697,26 @@ public class CompressController {
boolean sizeMet = false;
boolean imageCompressionApplied = false;
boolean qpdfCompressionApplied = false;
if(qpdfEnabled && optimizeLevel <= 3) {
optimizeLevel = 4;
if (qpdfEnabled && optimizeLevel <= 3) {
optimizeLevel = 4;
}
while (!sizeMet && optimizeLevel <= 9) {
// Apply image compression for levels 4-9
if ((optimizeLevel >= 4 || Boolean.TRUE.equals(convertToGrayscale))
&& !imageCompressionApplied) {
double scaleFactor = getScaleFactorForLevel(optimizeLevel);
float jpegQuality = getJpegQualityForLevel(optimizeLevel);
// Use the returned path from compressImagesInPDF
Path compressedImageFile = compressImagesInPDF(
currentFile,
scaleFactor,
jpegQuality,
Boolean.TRUE.equals(convertToGrayscale));
Path compressedImageFile =
compressImagesInPDF(
currentFile,
scaleFactor,
jpegQuality,
Boolean.TRUE.equals(convertToGrayscale));
// Add to temp files list and update current file
tempFiles.add(compressedImageFile);
currentFile = compressedImageFile;
@ -633,57 +725,8 @@ public class CompressController {
// Apply QPDF compression for all levels
if (!qpdfCompressionApplied && qpdfEnabled) {
long preQpdfSize = Files.size(currentFile);
log.info("Pre-QPDF file size: {}", GeneralUtils.formatBytes(preQpdfSize));
// Map optimization levels to QPDF compression levels
int qpdfCompressionLevel = optimizeLevel <= 3
? optimizeLevel * 3 // Level 1->3, 2->6, 3->9
: 9; // Max compression for levels 4-9
// Create output file for QPDF
Path qpdfOutputFile = Files.createTempFile("qpdf_output_", ".pdf");
tempFiles.add(qpdfOutputFile);
// Run QPDF optimization
List<String> command = new ArrayList<>();
command.add("qpdf");
if (request.getNormalize()) {
command.add("--normalize-content=y");
}
if (request.getLinearize()) {
command.add("--linearize");
}
command.add("--recompress-flate");
command.add("--compression-level=" + qpdfCompressionLevel);
command.add("--compress-streams=y");
command.add("--object-streams=generate");
command.add(currentFile.toString());
command.add(qpdfOutputFile.toString());
ProcessExecutorResult returnCode = null;
try {
returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF)
.runCommandWithOutputHandling(command);
qpdfCompressionApplied = true;
// Update current file to the QPDF output
currentFile = qpdfOutputFile;
long postQpdfSize = Files.size(currentFile);
double qpdfReduction = 100.0 - ((postQpdfSize * 100.0) / preQpdfSize);
log.info(
"Post-QPDF file size: {} (reduced by {}%)",
GeneralUtils.formatBytes(postQpdfSize),
String.format("%.1f", qpdfReduction));
} catch (Exception e) {
if (returnCode != null && returnCode.getRc() != 3) {
throw e;
}
// If QPDF fails, keep using the current file
log.warn("QPDF compression failed, continuing with current file");
}
applyQpdfCompression(request, optimizeLevel, currentFile, tempFiles);
qpdfCompressionApplied = true;
} else if (!qpdfCompressionApplied) {
// If QPDF is disabled, mark as applied and log
if (!qpdfEnabled) {
@ -697,13 +740,15 @@ public class CompressController {
if (outputFileSize <= expectedOutputSize || !autoMode) {
sizeMet = true;
} else {
int newOptimizeLevel = incrementOptimizeLevel(
optimizeLevel, outputFileSize, expectedOutputSize);
int newOptimizeLevel =
incrementOptimizeLevel(
optimizeLevel, outputFileSize, expectedOutputSize);
// Check if we can't increase the level further
if (newOptimizeLevel == optimizeLevel) {
if (autoMode) {
log.info("Maximum optimization level reached without meeting target size.");
log.info(
"Maximum optimization level reached without meeting target size.");
sizeMet = true;
}
} else {
@ -718,15 +763,17 @@ public class CompressController {
// Check if optimized file is larger than the original
long finalFileSize = Files.size(currentFile);
if (finalFileSize >= inputFileSize) {
log.warn("Optimized file is larger than the original. Using the original file instead.");
log.warn(
"Optimized file is larger than the original. Using the original file instead.");
// Use the stored reference to the original file
currentFile = originalFile;
}
String outputFilename = Filenames.toSimpleFileName(inputFile.getOriginalFilename())
String outputFilename =
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_Optimized.pdf";
return WebResponseUtils.pdfDocToWebResponse(
pdfDocumentFactory.load(currentFile.toFile()), outputFilename);
@ -742,6 +789,65 @@ public class CompressController {
}
}
/** Apply QPDF compression to a PDF file */
private void applyQpdfCompression(
OptimizePdfRequest request, int optimizeLevel, Path currentFile, List<Path> tempFiles)
throws IOException {
long preQpdfSize = Files.size(currentFile);
log.info("Pre-QPDF file size: {}", GeneralUtils.formatBytes(preQpdfSize));
// Map optimization levels to QPDF compression levels
int qpdfCompressionLevel =
optimizeLevel <= 3
? optimizeLevel * 3 // Level 1->3, 2->6, 3->9
: 9; // Max compression for levels 4-9
// Create output file for QPDF
Path qpdfOutputFile = Files.createTempFile("qpdf_output_", ".pdf");
tempFiles.add(qpdfOutputFile);
// Run QPDF optimization
List<String> command = new ArrayList<>();
command.add("qpdf");
if (request.getNormalize()) {
command.add("--normalize-content=y");
}
if (request.getLinearize()) {
command.add("--linearize");
}
command.add("--recompress-flate");
command.add("--compression-level=" + qpdfCompressionLevel);
command.add("--compress-streams=y");
command.add("--object-streams=generate");
command.add(currentFile.toString());
command.add(qpdfOutputFile.toString());
ProcessExecutorResult returnCode = null;
try {
returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF)
.runCommandWithOutputHandling(command);
// Update current file to the QPDF output
Files.copy(qpdfOutputFile, currentFile, StandardCopyOption.REPLACE_EXISTING);
long postQpdfSize = Files.size(currentFile);
double qpdfReduction = 100.0 - ((postQpdfSize * 100.0) / preQpdfSize);
log.info(
"Post-QPDF file size: {} (reduced by {}%)",
GeneralUtils.formatBytes(postQpdfSize), String.format("%.1f", qpdfReduction));
} catch (Exception e) {
if (returnCode != null && returnCode.getRc() != 3) {
throw new IOException("QPDF command failed", e);
}
// If QPDF fails, keep using the current file
log.warn("QPDF compression failed, continuing with current file", e);
}
}
/** Determine the appropriate optimization level based on the desired size reduction ratio */
private int determineOptimizeLevel(double sizeReductionRatio) {
if (sizeReductionRatio > 0.9) return 1;
if (sizeReductionRatio > 0.8) return 2;
@ -754,6 +860,7 @@ public class CompressController {
return 9;
}
/** Increment optimization level based on current size vs target size */
private int incrementOptimizeLevel(int currentLevel, long currentSize, long targetSize) {
double currentRatio = currentSize / (double) targetSize;
log.info("Current compression ratio: {}", String.format("%.2f", currentRatio));

View File

@ -44,9 +44,9 @@ public class PipelineController {
private final ObjectMapper objectMapper;
private final PostHogService postHogService;
public PipelineController(PipelineProcessor processor, ObjectMapper objectMapper,
PostHogService postHogService) {
public PipelineController(
PipelineProcessor processor, ObjectMapper objectMapper, PostHogService postHogService) {
this.processor = processor;
this.objectMapper = objectMapper;
this.postHogService = postHogService;
@ -62,18 +62,18 @@ public class PipelineController {
}
PipelineConfig config = objectMapper.readValue(jsonString, PipelineConfig.class);
log.info("Received POST request to /handleData with {} files", files.length);
List<String> operationNames = config.getOperations().stream()
.map(PipelineOperation::getOperation)
.collect(Collectors.toList());
List<String> operationNames =
config.getOperations().stream()
.map(PipelineOperation::getOperation)
.collect(Collectors.toList());
Map<String, Object> properties = new HashMap<>();
properties.put("operations", operationNames);
properties.put("fileCount", files.length);
postHogService.captureEvent("pipeline_api_event", properties);
try {
List<Resource> inputFiles = processor.generateInputFiles(files);
if (inputFiles == null || inputFiles.size() == 0) {

View File

@ -21,7 +21,6 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.springframework.core.io.ByteArrayResource;
@ -154,15 +153,14 @@ public class PipelineDirectoryProcessor {
log.debug("No files detected for {} ", dir);
return;
}
List<String> operationNames = config.getOperations().stream()
.map(PipelineOperation::getOperation)
.toList();
List<String> operationNames =
config.getOperations().stream().map(PipelineOperation::getOperation).toList();
Map<String, Object> properties = new HashMap<>();
properties.put("operations", operationNames);
properties.put("fileCount", files.length);
postHogService.captureEvent("pipeline_directory_event", properties);
List<File> filesToProcess = prepareFilesForProcessing(files, processingDir);
runPipelineAgainstFiles(filesToProcess, config, dir, processingDir);
}
@ -263,7 +261,8 @@ public class PipelineDirectoryProcessor {
try {
Thread.sleep(retryDelayMs * (int) Math.pow(2, attempt - 1));
} catch (InterruptedException e1) {
log.error("prepareFilesForProcessing failure",e); }
log.error("prepareFilesForProcessing failure", e);
}
}
}
}
@ -307,7 +306,7 @@ public class PipelineDirectoryProcessor {
processor.generateInputFiles(filesToProcess.toArray(new File[0]));
if (inputFiles == null || inputFiles.isEmpty()) {
return;
}
}
PipelineResult result = processor.runPipelineAgainstFiles(inputFiles, config);
if (result.isHasErrors()) {
@ -404,4 +403,4 @@ public class PipelineDirectoryProcessor {
}
}
}
}
}

View File

@ -14,7 +14,7 @@ public class OptimizePdfRequest extends PDFFile {
@Schema(
description =
"The level of optimization to apply to the PDF file. Higher values indicate greater compression but may reduce quality.",
allowableValues = {"1", "2", "3", "4", "5"})
allowableValues = {"1", "2", "3", "4", "5", "6", "7", "8", "9"})
private Integer optimizeLevel;
@Schema(description = "The expected output size, e.g. '100MB', '25KB', etc.")

View File

@ -261,7 +261,6 @@ public class CustomPDFDocumentFactory {
removePassword(doc);
}
private PDDocument loadFromFile(File file, long size, StreamCacheCreateFunction cache)
throws IOException {
return Loader.loadPDF(new DeletingRandomAccessFile(file), "", null, null, cache);

View File

@ -66,9 +66,8 @@ public class MetricsAggregatorService {
return;
}
if(uri.contains(".txt")) {
return;
if (uri.contains(".txt")) {
return;
}
// For GET requests, validate if we have a list of valid endpoints
if ("GET".equals(method)