mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-09-12 17:52:13 +02:00
cleanups and fix for #3207
This commit is contained in:
parent
52d4adc473
commit
f97f1d792d
@ -1,7 +1,6 @@
|
|||||||
package stirling.software.SPDF.config;
|
package stirling.software.SPDF.config;
|
||||||
|
|
||||||
import java.lang.reflect.Method;
|
import java.lang.reflect.Method;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
@ -43,51 +42,39 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
|
|||||||
|
|
||||||
private void discoverEndpoints() {
|
private void discoverEndpoints() {
|
||||||
try {
|
try {
|
||||||
// Get all request mapping beans from the application context
|
|
||||||
Map<String, RequestMappingHandlerMapping> mappings =
|
Map<String, RequestMappingHandlerMapping> mappings =
|
||||||
applicationContext.getBeansOfType(RequestMappingHandlerMapping.class);
|
applicationContext.getBeansOfType(RequestMappingHandlerMapping.class);
|
||||||
|
|
||||||
// Process each mapping bean
|
|
||||||
for (Map.Entry<String, RequestMappingHandlerMapping> entry : mappings.entrySet()) {
|
for (Map.Entry<String, RequestMappingHandlerMapping> entry : mappings.entrySet()) {
|
||||||
RequestMappingHandlerMapping mapping = entry.getValue();
|
RequestMappingHandlerMapping mapping = entry.getValue();
|
||||||
|
|
||||||
// Get all handler methods registered in this mapping
|
|
||||||
Map<RequestMappingInfo, HandlerMethod> handlerMethods = mapping.getHandlerMethods();
|
Map<RequestMappingInfo, HandlerMethod> handlerMethods = mapping.getHandlerMethods();
|
||||||
|
|
||||||
// Process each handler method
|
|
||||||
for (Map.Entry<RequestMappingInfo, HandlerMethod> handlerEntry :
|
for (Map.Entry<RequestMappingInfo, HandlerMethod> handlerEntry :
|
||||||
handlerMethods.entrySet()) {
|
handlerMethods.entrySet()) {
|
||||||
RequestMappingInfo mappingInfo = handlerEntry.getKey();
|
RequestMappingInfo mappingInfo = handlerEntry.getKey();
|
||||||
HandlerMethod handlerMethod = handlerEntry.getValue();
|
HandlerMethod handlerMethod = handlerEntry.getValue();
|
||||||
|
|
||||||
// Check if the method handles GET requests
|
|
||||||
boolean isGetHandler = false;
|
boolean isGetHandler = false;
|
||||||
try {
|
try {
|
||||||
Set<RequestMethod> methods = mappingInfo.getMethodsCondition().getMethods();
|
Set<RequestMethod> methods = mappingInfo.getMethodsCondition().getMethods();
|
||||||
// Either explicitly handles GET or handles all methods (empty set)
|
|
||||||
isGetHandler = methods.isEmpty() || methods.contains(RequestMethod.GET);
|
isGetHandler = methods.isEmpty() || methods.contains(RequestMethod.GET);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// If we can't determine methods, assume it could handle GET
|
|
||||||
isGetHandler = true;
|
isGetHandler = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isGetHandler) {
|
if (isGetHandler) {
|
||||||
// Since we know getDirectPaths works, use it directly
|
|
||||||
Set<String> patterns = extractPatternsUsingDirectPaths(mappingInfo);
|
Set<String> patterns = extractPatternsUsingDirectPaths(mappingInfo);
|
||||||
|
|
||||||
// If that fails, try string parsing as fallback
|
|
||||||
if (patterns.isEmpty()) {
|
if (patterns.isEmpty()) {
|
||||||
patterns = extractPatternsFromString(mappingInfo);
|
patterns = extractPatternsFromString(mappingInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add all valid patterns
|
|
||||||
validGetEndpoints.addAll(patterns);
|
validGetEndpoints.addAll(patterns);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (validGetEndpoints.isEmpty()) {
|
if (validGetEndpoints.isEmpty()) {
|
||||||
// If we still couldn't find any endpoints, add some common ones as a fallback
|
|
||||||
logger.warn("No endpoints discovered. Adding common endpoints as fallback.");
|
logger.warn("No endpoints discovered. Adding common endpoints as fallback.");
|
||||||
validGetEndpoints.add("/");
|
validGetEndpoints.add("/");
|
||||||
validGetEndpoints.add("/api/**");
|
validGetEndpoints.add("/api/**");
|
||||||
@ -98,9 +85,6 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Extract patterns using the getDirectPaths method that works in this environment
|
|
||||||
*/
|
|
||||||
private Set<String> extractPatternsUsingDirectPaths(RequestMappingInfo mappingInfo) {
|
private Set<String> extractPatternsUsingDirectPaths(RequestMappingInfo mappingInfo) {
|
||||||
Set<String> patterns = new HashSet<>();
|
Set<String> patterns = new HashSet<>();
|
||||||
|
|
||||||
@ -113,7 +97,7 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
|
|||||||
patterns.addAll(resultSet);
|
patterns.addAll(resultSet);
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// Just return empty set if method not found or fails
|
// Return empty set if method not found or fails
|
||||||
}
|
}
|
||||||
|
|
||||||
return patterns;
|
return patterns;
|
||||||
@ -125,9 +109,7 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
|
|||||||
String infoString = mappingInfo.toString();
|
String infoString = mappingInfo.toString();
|
||||||
if (infoString.contains("{")) {
|
if (infoString.contains("{")) {
|
||||||
String patternsSection =
|
String patternsSection =
|
||||||
infoString.substring(
|
infoString.substring(infoString.indexOf("{") + 1, infoString.indexOf("}"));
|
||||||
infoString.indexOf("{") + 1,
|
|
||||||
infoString.indexOf("}"));
|
|
||||||
|
|
||||||
for (String pattern : patternsSection.split(",")) {
|
for (String pattern : patternsSection.split(",")) {
|
||||||
pattern = pattern.trim();
|
pattern = pattern.trim();
|
||||||
@ -137,39 +119,38 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// Just return empty set if parsing fails
|
// Return empty set if parsing fails
|
||||||
}
|
}
|
||||||
return patterns;
|
return patterns;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if a URI corresponds to a valid GET endpoint - Fixed to handle path variables safely
|
|
||||||
*/
|
|
||||||
public boolean isValidGetEndpoint(String uri) {
|
public boolean isValidGetEndpoint(String uri) {
|
||||||
// Ensure endpoints are discovered
|
|
||||||
if (!endpointsDiscovered) {
|
if (!endpointsDiscovered) {
|
||||||
discoverEndpoints();
|
discoverEndpoints();
|
||||||
endpointsDiscovered = true;
|
endpointsDiscovered = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If no endpoints were discovered, assume all endpoints are valid
|
|
||||||
if (validGetEndpoints.isEmpty()) {
|
|
||||||
logger.warn("No valid endpoints were discovered. Assuming all GET endpoints are valid.");
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Direct match
|
|
||||||
if (validGetEndpoints.contains(uri)) {
|
if (validGetEndpoints.contains(uri)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try simple prefix matching for wildcards and path variables
|
if (matchesWildcardOrPathVariable(uri)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (matchesPathSegments(uri)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean matchesWildcardOrPathVariable(String uri) {
|
||||||
for (String pattern : validGetEndpoints) {
|
for (String pattern : validGetEndpoints) {
|
||||||
if (pattern.contains("*") || pattern.contains("{")) {
|
if (pattern.contains("*") || pattern.contains("{")) {
|
||||||
int wildcardIndex = pattern.indexOf('*');
|
int wildcardIndex = pattern.indexOf('*');
|
||||||
int variableIndex = pattern.indexOf('{');
|
int variableIndex = pattern.indexOf('{');
|
||||||
|
|
||||||
// Find the earliest special character
|
|
||||||
int cutoffIndex;
|
int cutoffIndex;
|
||||||
if (wildcardIndex < 0) {
|
if (wildcardIndex < 0) {
|
||||||
cutoffIndex = variableIndex;
|
cutoffIndex = variableIndex;
|
||||||
@ -179,29 +160,26 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
|
|||||||
cutoffIndex = Math.min(wildcardIndex, variableIndex);
|
cutoffIndex = Math.min(wildcardIndex, variableIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the static part of the pattern
|
|
||||||
String staticPrefix = pattern.substring(0, cutoffIndex);
|
String staticPrefix = pattern.substring(0, cutoffIndex);
|
||||||
|
|
||||||
// If the URI starts with this prefix, consider it a match
|
|
||||||
if (uri.startsWith(staticPrefix)) {
|
if (uri.startsWith(staticPrefix)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// For patterns without wildcards or variables, try path-segment-by-segment matching
|
private boolean matchesPathSegments(String uri) {
|
||||||
for (String pattern : validGetEndpoints) {
|
for (String pattern : validGetEndpoints) {
|
||||||
if (!pattern.contains("*") && !pattern.contains("{")) {
|
if (!pattern.contains("*") && !pattern.contains("{")) {
|
||||||
// Split the pattern and URI into path segments
|
|
||||||
String[] patternSegments = pattern.split("/");
|
String[] patternSegments = pattern.split("/");
|
||||||
String[] uriSegments = uri.split("/");
|
String[] uriSegments = uri.split("/");
|
||||||
|
|
||||||
// If URI has fewer segments than the pattern, it can't match
|
|
||||||
if (uriSegments.length < patternSegments.length) {
|
if (uriSegments.length < patternSegments.length) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check each segment
|
|
||||||
boolean match = true;
|
boolean match = true;
|
||||||
for (int i = 0; i < patternSegments.length; i++) {
|
for (int i = 0; i < patternSegments.length; i++) {
|
||||||
if (!patternSegments[i].equals(uriSegments[i])) {
|
if (!patternSegments[i].equals(uriSegments[i])) {
|
||||||
@ -215,14 +193,10 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If no match was found, the URI is not valid
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Get all discovered valid GET endpoints */
|
|
||||||
public Set<String> getValidGetEndpoints() {
|
public Set<String> getValidGetEndpoints() {
|
||||||
// Ensure endpoints are discovered
|
|
||||||
if (!endpointsDiscovered) {
|
if (!endpointsDiscovered) {
|
||||||
discoverEndpoints();
|
discoverEndpoints();
|
||||||
endpointsDiscovered = true;
|
endpointsDiscovered = true;
|
||||||
@ -230,7 +204,6 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
|
|||||||
return new HashSet<>(validGetEndpoints);
|
return new HashSet<>(validGetEndpoints);
|
||||||
}
|
}
|
||||||
|
|
||||||
//For debugging when needed
|
|
||||||
private void logAllEndpoints() {
|
private void logAllEndpoints() {
|
||||||
Set<String> sortedEndpoints = new TreeSet<>(validGetEndpoints);
|
Set<String> sortedEndpoints = new TreeSet<>(validGetEndpoints);
|
||||||
|
|
||||||
@ -239,7 +212,5 @@ public class EndpointInspector implements ApplicationListener<ContextRefreshedEv
|
|||||||
logger.info("Endpoint: {}", endpoint);
|
logger.info("Endpoint: {}", endpoint);
|
||||||
}
|
}
|
||||||
logger.info("=== END: All discovered GET endpoints ===");
|
logger.info("=== END: All discovered GET endpoints ===");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
@ -25,24 +25,13 @@ import javax.imageio.ImageWriter;
|
|||||||
import javax.imageio.plugins.jpeg.JPEGImageWriteParam;
|
import javax.imageio.plugins.jpeg.JPEGImageWriteParam;
|
||||||
import javax.imageio.stream.ImageOutputStream;
|
import javax.imageio.stream.ImageOutputStream;
|
||||||
|
|
||||||
import org.apache.pdfbox.contentstream.PDFStreamEngine;
|
|
||||||
import org.apache.pdfbox.contentstream.operator.Operator;
|
|
||||||
import org.apache.pdfbox.cos.COSArray;
|
|
||||||
import org.apache.pdfbox.cos.COSBase;
|
|
||||||
import org.apache.pdfbox.cos.COSDictionary;
|
|
||||||
import org.apache.pdfbox.cos.COSName;
|
import org.apache.pdfbox.cos.COSName;
|
||||||
import org.apache.pdfbox.cos.COSStream;
|
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
|
|
||||||
import org.apache.pdfbox.pdmodel.PDPage;
|
import org.apache.pdfbox.pdmodel.PDPage;
|
||||||
import org.apache.pdfbox.pdmodel.PDResources;
|
import org.apache.pdfbox.pdmodel.PDResources;
|
||||||
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
|
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
|
||||||
|
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
|
||||||
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
||||||
import org.apache.pdfbox.pdmodel.graphics.image.PDInlineImage;
|
|
||||||
import org.apache.pdfbox.pdmodel.graphics.pattern.PDAbstractPattern;
|
|
||||||
import org.apache.pdfbox.pdmodel.graphics.pattern.PDTilingPattern;
|
|
||||||
import org.apache.pdfbox.pdmodel.graphics.shading.PDShading;
|
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
|
||||||
import org.springframework.http.ResponseEntity;
|
import org.springframework.http.ResponseEntity;
|
||||||
import org.springframework.web.bind.annotation.ModelAttribute;
|
import org.springframework.web.bind.annotation.ModelAttribute;
|
||||||
import org.springframework.web.bind.annotation.PostMapping;
|
import org.springframework.web.bind.annotation.PostMapping;
|
||||||
@ -58,6 +47,7 @@ import lombok.AllArgsConstructor;
|
|||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
import stirling.software.SPDF.config.EndpointConfiguration;
|
import stirling.software.SPDF.config.EndpointConfiguration;
|
||||||
import stirling.software.SPDF.model.api.misc.OptimizePdfRequest;
|
import stirling.software.SPDF.model.api.misc.OptimizePdfRequest;
|
||||||
import stirling.software.SPDF.service.CustomPDFDocumentFactory;
|
import stirling.software.SPDF.service.CustomPDFDocumentFactory;
|
||||||
@ -76,12 +66,13 @@ public class CompressController {
|
|||||||
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
||||||
private final boolean qpdfEnabled;
|
private final boolean qpdfEnabled;
|
||||||
|
|
||||||
public CompressController(CustomPDFDocumentFactory pdfDocumentFactory, EndpointConfiguration endpointConfiguration) {
|
public CompressController(
|
||||||
|
CustomPDFDocumentFactory pdfDocumentFactory,
|
||||||
|
EndpointConfiguration endpointConfiguration) {
|
||||||
this.pdfDocumentFactory = pdfDocumentFactory;
|
this.pdfDocumentFactory = pdfDocumentFactory;
|
||||||
this.qpdfEnabled = endpointConfiguration.isGroupEnabled("qpdf");
|
this.qpdfEnabled = endpointConfiguration.isGroupEnabled("qpdf");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
@NoArgsConstructor
|
@NoArgsConstructor
|
||||||
@ -89,6 +80,7 @@ public class CompressController {
|
|||||||
int pageNum; // Page number where the image appears
|
int pageNum; // Page number where the image appears
|
||||||
COSName name; // The name used to reference this image
|
COSName name; // The name used to reference this image
|
||||||
}
|
}
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
@NoArgsConstructor
|
@NoArgsConstructor
|
||||||
@ -97,6 +89,16 @@ public class CompressController {
|
|||||||
COSName imageName; // Name of the image within the form
|
COSName imageName; // Name of the image within the form
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Image compression statistics for reporting
|
||||||
|
private static class CompressionStats {
|
||||||
|
int totalImages = 0;
|
||||||
|
int nestedImages = 0;
|
||||||
|
int uniqueImagesCount = 0;
|
||||||
|
int compressedImages = 0;
|
||||||
|
int skippedImages = 0;
|
||||||
|
long totalOriginalBytes = 0;
|
||||||
|
long totalCompressedBytes = 0;
|
||||||
|
}
|
||||||
|
|
||||||
public Path compressImagesInPDF(
|
public Path compressImagesInPDF(
|
||||||
Path pdfFile, double scaleFactor, float jpegQuality, boolean convertToGrayscale)
|
Path pdfFile, double scaleFactor, float jpegQuality, boolean convertToGrayscale)
|
||||||
@ -111,228 +113,24 @@ public class CompressController {
|
|||||||
GeneralUtils.formatBytes(originalFileSize));
|
GeneralUtils.formatBytes(originalFileSize));
|
||||||
|
|
||||||
try (PDDocument doc = pdfDocumentFactory.load(pdfFile)) {
|
try (PDDocument doc = pdfDocumentFactory.load(pdfFile)) {
|
||||||
// Collect all unique images by content hash
|
// Step 1: Find all unique images in the document
|
||||||
Map<String, List<ImageReference>> uniqueImages = new HashMap<>();
|
Map<String, List<ImageReference>> uniqueImages = findImages(doc);
|
||||||
Map<String, PDImageXObject> compressedVersions = new HashMap<>();
|
|
||||||
|
|
||||||
int totalImages = 0;
|
// Get statistics
|
||||||
int nestedImages = 0;
|
CompressionStats stats = new CompressionStats();
|
||||||
|
stats.uniqueImagesCount = uniqueImages.size();
|
||||||
|
calculateImageStats(uniqueImages, stats);
|
||||||
|
|
||||||
// FIRST PASS: Collect all images (direct and nested)
|
// Step 2: Create compressed versions of unique images
|
||||||
for (int pageNum = 0; pageNum < doc.getNumberOfPages(); pageNum++) {
|
Map<String, PDImageXObject> compressedVersions =
|
||||||
PDPage page = doc.getPage(pageNum);
|
createCompressedImages(
|
||||||
PDResources res = page.getResources();
|
doc, uniqueImages, scaleFactor, jpegQuality, convertToGrayscale, stats);
|
||||||
if (res == null || res.getXObjectNames() == null) continue;
|
|
||||||
|
|
||||||
// Process direct XObjects on page
|
// Step 3: Replace all instances with compressed versions
|
||||||
for (COSName name : res.getXObjectNames()) {
|
replaceImages(doc, uniqueImages, compressedVersions, stats);
|
||||||
PDXObject xobj = res.getXObject(name);
|
|
||||||
|
|
||||||
// Direct image
|
|
||||||
if (xobj instanceof PDImageXObject) {
|
|
||||||
totalImages++;
|
|
||||||
PDImageXObject image = (PDImageXObject) xobj;
|
|
||||||
String imageHash = generateImageHash(image);
|
|
||||||
|
|
||||||
ImageReference ref = new ImageReference();
|
|
||||||
ref.pageNum = pageNum;
|
|
||||||
ref.name = name;
|
|
||||||
|
|
||||||
log.info("Found direct image '{}' on page {} - {}x{}",
|
|
||||||
name.getName(), pageNum + 1, image.getWidth(), image.getHeight());
|
|
||||||
|
|
||||||
uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(ref);
|
|
||||||
}
|
|
||||||
// Form XObject may contain nested images
|
|
||||||
else if (xobj instanceof org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject) {
|
|
||||||
org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject formXObj =
|
|
||||||
(org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject) xobj;
|
|
||||||
|
|
||||||
PDResources formResources = formXObj.getResources();
|
|
||||||
if (formResources != null && formResources.getXObjectNames() != null) {
|
|
||||||
// Process nested XObjects within the form
|
|
||||||
log.info("Checking form XObject '{}' on page {} for nested images",
|
|
||||||
name.getName(), pageNum + 1);
|
|
||||||
|
|
||||||
for (COSName nestedName : formResources.getXObjectNames()) {
|
|
||||||
PDXObject nestedXobj = formResources.getXObject(nestedName);
|
|
||||||
|
|
||||||
if (nestedXobj instanceof PDImageXObject) {
|
|
||||||
nestedImages++;
|
|
||||||
totalImages++;
|
|
||||||
PDImageXObject nestedImage = (PDImageXObject) nestedXobj;
|
|
||||||
|
|
||||||
log.info("Found nested image '{}' in form '{}' on page {} - {}x{}",
|
|
||||||
nestedName.getName(), name.getName(), pageNum + 1,
|
|
||||||
nestedImage.getWidth(), nestedImage.getHeight());
|
|
||||||
|
|
||||||
// Create a specialized reference for the nested image
|
|
||||||
NestedImageReference nestedRef = new NestedImageReference();
|
|
||||||
nestedRef.pageNum = pageNum;
|
|
||||||
nestedRef.formName = name;
|
|
||||||
nestedRef.imageName = nestedName;
|
|
||||||
|
|
||||||
String imageHash = generateImageHash(nestedImage);
|
|
||||||
uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(nestedRef);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int uniqueImagesCount = uniqueImages.size();
|
|
||||||
int duplicatedImages = totalImages - uniqueImagesCount;
|
|
||||||
log.info(
|
|
||||||
"Found {} unique images and {} duplicated instances across {} pages ({} nested images in form XObjects)",
|
|
||||||
uniqueImagesCount,
|
|
||||||
duplicatedImages,
|
|
||||||
doc.getNumberOfPages(),
|
|
||||||
nestedImages);
|
|
||||||
|
|
||||||
// SECOND PASS: Process each unique image exactly once
|
|
||||||
int compressedImages = 0;
|
|
||||||
int skippedImages = 0;
|
|
||||||
long totalOriginalBytes = 0;
|
|
||||||
long totalCompressedBytes = 0;
|
|
||||||
|
|
||||||
for (Entry<String, List<ImageReference>> entry : uniqueImages.entrySet()) {
|
|
||||||
String imageHash = entry.getKey();
|
|
||||||
List<ImageReference> references = entry.getValue();
|
|
||||||
|
|
||||||
if (references.isEmpty()) continue;
|
|
||||||
|
|
||||||
// Get the first instance of this image
|
|
||||||
ImageReference firstRef = references.get(0);
|
|
||||||
PDImageXObject originalImage;
|
|
||||||
|
|
||||||
// Handle differently based on whether it's a direct or nested image
|
|
||||||
if (firstRef instanceof NestedImageReference) {
|
|
||||||
// Get the nested image from within a form XObject
|
|
||||||
NestedImageReference nestedRef = (NestedImageReference) firstRef;
|
|
||||||
PDPage firstPage = doc.getPage(nestedRef.pageNum);
|
|
||||||
PDResources pageResources = firstPage.getResources();
|
|
||||||
|
|
||||||
// Get the form XObject
|
|
||||||
org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject formXObj =
|
|
||||||
(org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject) pageResources.getXObject(nestedRef.formName);
|
|
||||||
|
|
||||||
// Get the nested image from the form's resources
|
|
||||||
PDResources formResources = formXObj.getResources();
|
|
||||||
originalImage = (PDImageXObject) formResources.getXObject(nestedRef.imageName);
|
|
||||||
|
|
||||||
log.info("Processing nested image '{}' from form '{}'",
|
|
||||||
nestedRef.imageName.getName(), nestedRef.formName.getName());
|
|
||||||
} else {
|
|
||||||
// Get direct image from page resources
|
|
||||||
PDPage firstPage = doc.getPage(firstRef.pageNum);
|
|
||||||
PDResources firstPageResources = firstPage.getResources();
|
|
||||||
originalImage = (PDImageXObject) firstPageResources.getXObject(firstRef.name);
|
|
||||||
|
|
||||||
log.debug("Processing direct image '{}'", firstRef.name.getName());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Track original size
|
|
||||||
int originalSize = (int) originalImage.getCOSObject().getLength();
|
|
||||||
totalOriginalBytes += originalSize;
|
|
||||||
|
|
||||||
// Process this unique image once
|
|
||||||
BufferedImage processedImage =
|
|
||||||
processAndCompressImage(
|
|
||||||
originalImage, scaleFactor, jpegQuality, convertToGrayscale);
|
|
||||||
|
|
||||||
if (processedImage != null) {
|
|
||||||
// Convert to bytes for storage
|
|
||||||
byte[] compressedData = convertToBytes(processedImage, jpegQuality);
|
|
||||||
|
|
||||||
// Check if compression is beneficial
|
|
||||||
if (compressedData.length < originalSize || convertToGrayscale) {
|
|
||||||
// Create a single compressed version
|
|
||||||
PDImageXObject compressedImage =
|
|
||||||
PDImageXObject.createFromByteArray(
|
|
||||||
doc,
|
|
||||||
compressedData,
|
|
||||||
originalImage.getCOSObject().toString());
|
|
||||||
|
|
||||||
// Store the compressed version only once in our map
|
|
||||||
compressedVersions.put(imageHash, compressedImage);
|
|
||||||
|
|
||||||
// Report compression stats
|
|
||||||
double reductionPercentage =
|
|
||||||
100.0 - ((compressedData.length * 100.0) / originalSize);
|
|
||||||
log.info(
|
|
||||||
"Image hash {}: Compressed from {} to {} (reduced by {}%)",
|
|
||||||
imageHash,
|
|
||||||
GeneralUtils.formatBytes(originalSize),
|
|
||||||
GeneralUtils.formatBytes(compressedData.length),
|
|
||||||
String.format("%.1f", reductionPercentage));
|
|
||||||
|
|
||||||
// Replace ALL instances with the compressed version
|
|
||||||
for (ImageReference ref : references) {
|
|
||||||
if (ref instanceof NestedImageReference) {
|
|
||||||
// Replace nested image within form XObject
|
|
||||||
NestedImageReference nestedRef = (NestedImageReference) ref;
|
|
||||||
PDPage page = doc.getPage(nestedRef.pageNum);
|
|
||||||
PDResources pageResources = page.getResources();
|
|
||||||
|
|
||||||
// Get the form XObject
|
|
||||||
org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject formXObj =
|
|
||||||
(org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject)
|
|
||||||
pageResources.getXObject(nestedRef.formName);
|
|
||||||
|
|
||||||
// Replace the nested image in the form's resources
|
|
||||||
PDResources formResources = formXObj.getResources();
|
|
||||||
formResources.put(nestedRef.imageName, compressedImage);
|
|
||||||
|
|
||||||
log.info(
|
|
||||||
"Replaced nested image '{}' in form '{}' on page {} with compressed version",
|
|
||||||
nestedRef.imageName.getName(),
|
|
||||||
nestedRef.formName.getName(),
|
|
||||||
nestedRef.pageNum + 1);
|
|
||||||
} else {
|
|
||||||
// Replace direct image in page resources
|
|
||||||
PDPage page = doc.getPage(ref.pageNum);
|
|
||||||
PDResources resources = page.getResources();
|
|
||||||
resources.put(ref.name, compressedImage);
|
|
||||||
|
|
||||||
log.info(
|
|
||||||
"Replaced direct image on page {} with compressed version",
|
|
||||||
ref.pageNum + 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
totalCompressedBytes += compressedData.length * references.size();
|
|
||||||
compressedImages++;
|
|
||||||
} else {
|
|
||||||
log.info("Image hash {}: Compression not beneficial, skipping", imageHash);
|
|
||||||
totalCompressedBytes += originalSize * references.size();
|
|
||||||
skippedImages++;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
log.info("Image hash {}: Not suitable for compression, skipping", imageHash);
|
|
||||||
totalCompressedBytes += originalSize * references.size();
|
|
||||||
skippedImages++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Log compression statistics
|
// Log compression statistics
|
||||||
double overallImageReduction =
|
logCompressionStats(stats, originalFileSize);
|
||||||
totalOriginalBytes > 0
|
|
||||||
? 100.0 - ((totalCompressedBytes * 100.0) / totalOriginalBytes)
|
|
||||||
: 0;
|
|
||||||
|
|
||||||
log.info(
|
|
||||||
"Image compression summary - Total unique: {}, Compressed: {}, Skipped: {}, Duplicates: {}, Nested: {}",
|
|
||||||
uniqueImagesCount,
|
|
||||||
compressedImages,
|
|
||||||
skippedImages,
|
|
||||||
duplicatedImages,
|
|
||||||
nestedImages);
|
|
||||||
log.info(
|
|
||||||
"Total original image size: {}, compressed: {} (reduced by {}%)",
|
|
||||||
GeneralUtils.formatBytes(totalOriginalBytes),
|
|
||||||
GeneralUtils.formatBytes(totalCompressedBytes),
|
|
||||||
String.format("%.1f", overallImageReduction));
|
|
||||||
|
|
||||||
// Free memory before saving
|
// Free memory before saving
|
||||||
compressedVersions.clear();
|
compressedVersions.clear();
|
||||||
@ -354,6 +152,315 @@ public class CompressController {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Find all images in the document, both direct and nested within forms. */
|
||||||
|
private Map<String, List<ImageReference>> findImages(PDDocument doc) throws IOException {
|
||||||
|
Map<String, List<ImageReference>> uniqueImages = new HashMap<>();
|
||||||
|
|
||||||
|
// Scan through all pages in the document
|
||||||
|
for (int pageNum = 0; pageNum < doc.getNumberOfPages(); pageNum++) {
|
||||||
|
PDPage page = doc.getPage(pageNum);
|
||||||
|
PDResources res = page.getResources();
|
||||||
|
if (res == null || res.getXObjectNames() == null) continue;
|
||||||
|
|
||||||
|
// Process all XObjects on the page
|
||||||
|
for (COSName name : res.getXObjectNames()) {
|
||||||
|
PDXObject xobj = res.getXObject(name);
|
||||||
|
|
||||||
|
// Process direct image
|
||||||
|
if (isImage(xobj)) {
|
||||||
|
addDirectImage(pageNum, name, (PDImageXObject) xobj, uniqueImages);
|
||||||
|
log.info(
|
||||||
|
"Found direct image '{}' on page {} - {}x{}",
|
||||||
|
name.getName(),
|
||||||
|
pageNum + 1,
|
||||||
|
((PDImageXObject) xobj).getWidth(),
|
||||||
|
((PDImageXObject) xobj).getHeight());
|
||||||
|
}
|
||||||
|
// Process form XObject that may contain nested images
|
||||||
|
else if (isForm(xobj)) {
|
||||||
|
checkFormForImages(pageNum, name, (PDFormXObject) xobj, uniqueImages);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return uniqueImages;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isImage(PDXObject xobj) {
|
||||||
|
return xobj instanceof PDImageXObject;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isForm(PDXObject xobj) {
|
||||||
|
return xobj instanceof PDFormXObject;
|
||||||
|
}
|
||||||
|
|
||||||
|
private ImageReference addDirectImage(
|
||||||
|
int pageNum,
|
||||||
|
COSName name,
|
||||||
|
PDImageXObject image,
|
||||||
|
Map<String, List<ImageReference>> uniqueImages)
|
||||||
|
throws IOException {
|
||||||
|
ImageReference ref = new ImageReference();
|
||||||
|
ref.pageNum = pageNum;
|
||||||
|
ref.name = name;
|
||||||
|
|
||||||
|
String imageHash = generateImageHash(image);
|
||||||
|
uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(ref);
|
||||||
|
|
||||||
|
return ref;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Check a form XObject for nested images. */
|
||||||
|
private void checkFormForImages(
|
||||||
|
int pageNum,
|
||||||
|
COSName formName,
|
||||||
|
PDFormXObject formXObj,
|
||||||
|
Map<String, List<ImageReference>> uniqueImages)
|
||||||
|
throws IOException {
|
||||||
|
PDResources formResources = formXObj.getResources();
|
||||||
|
if (formResources == null || formResources.getXObjectNames() == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"Checking form XObject '{}' on page {} for nested images",
|
||||||
|
formName.getName(),
|
||||||
|
pageNum + 1);
|
||||||
|
|
||||||
|
// Process all XObjects within the form
|
||||||
|
for (COSName nestedName : formResources.getXObjectNames()) {
|
||||||
|
PDXObject nestedXobj = formResources.getXObject(nestedName);
|
||||||
|
|
||||||
|
if (isImage(nestedXobj)) {
|
||||||
|
PDImageXObject nestedImage = (PDImageXObject) nestedXobj;
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"Found nested image '{}' in form '{}' on page {} - {}x{}",
|
||||||
|
nestedName.getName(),
|
||||||
|
formName.getName(),
|
||||||
|
pageNum + 1,
|
||||||
|
nestedImage.getWidth(),
|
||||||
|
nestedImage.getHeight());
|
||||||
|
|
||||||
|
// Create specialized reference for the nested image
|
||||||
|
NestedImageReference nestedRef = new NestedImageReference();
|
||||||
|
nestedRef.pageNum = pageNum;
|
||||||
|
nestedRef.formName = formName;
|
||||||
|
nestedRef.imageName = nestedName;
|
||||||
|
|
||||||
|
String imageHash = generateImageHash(nestedImage);
|
||||||
|
uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(nestedRef);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Calculate statistics about the images found in the document. */
|
||||||
|
private void calculateImageStats(
|
||||||
|
Map<String, List<ImageReference>> uniqueImages, CompressionStats stats) {
|
||||||
|
for (List<ImageReference> references : uniqueImages.values()) {
|
||||||
|
for (ImageReference ref : references) {
|
||||||
|
stats.totalImages++;
|
||||||
|
if (ref instanceof NestedImageReference) {
|
||||||
|
stats.nestedImages++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Create compressed versions of all unique images. */
|
||||||
|
private Map<String, PDImageXObject> createCompressedImages(
|
||||||
|
PDDocument doc,
|
||||||
|
Map<String, List<ImageReference>> uniqueImages,
|
||||||
|
double scaleFactor,
|
||||||
|
float jpegQuality,
|
||||||
|
boolean convertToGrayscale,
|
||||||
|
CompressionStats stats)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
Map<String, PDImageXObject> compressedVersions = new HashMap<>();
|
||||||
|
|
||||||
|
// Process each unique image exactly once
|
||||||
|
for (Entry<String, List<ImageReference>> entry : uniqueImages.entrySet()) {
|
||||||
|
String imageHash = entry.getKey();
|
||||||
|
List<ImageReference> references = entry.getValue();
|
||||||
|
|
||||||
|
if (references.isEmpty()) continue;
|
||||||
|
|
||||||
|
// Get the first instance of this image
|
||||||
|
PDImageXObject originalImage = getOriginalImage(doc, references.get(0));
|
||||||
|
|
||||||
|
// Track original size
|
||||||
|
int originalSize = (int) originalImage.getCOSObject().getLength();
|
||||||
|
stats.totalOriginalBytes += originalSize;
|
||||||
|
|
||||||
|
// Process this unique image
|
||||||
|
PDImageXObject compressedImage =
|
||||||
|
compressImage(
|
||||||
|
doc,
|
||||||
|
originalImage,
|
||||||
|
originalSize,
|
||||||
|
scaleFactor,
|
||||||
|
jpegQuality,
|
||||||
|
convertToGrayscale);
|
||||||
|
|
||||||
|
if (compressedImage != null) {
|
||||||
|
// Store the compressed version in our map
|
||||||
|
compressedVersions.put(imageHash, compressedImage);
|
||||||
|
stats.compressedImages++;
|
||||||
|
|
||||||
|
// Update compression stats
|
||||||
|
int compressedSize = (int) compressedImage.getCOSObject().getLength();
|
||||||
|
stats.totalCompressedBytes += compressedSize * references.size();
|
||||||
|
|
||||||
|
double reductionPercentage = 100.0 - ((compressedSize * 100.0) / originalSize);
|
||||||
|
log.info(
|
||||||
|
"Image hash {}: Compressed from {} to {} (reduced by {}%)",
|
||||||
|
imageHash,
|
||||||
|
GeneralUtils.formatBytes(originalSize),
|
||||||
|
GeneralUtils.formatBytes(compressedSize),
|
||||||
|
String.format("%.1f", reductionPercentage));
|
||||||
|
} else {
|
||||||
|
log.info("Image hash {}: Not suitable for compression, skipping", imageHash);
|
||||||
|
stats.totalCompressedBytes += originalSize * references.size();
|
||||||
|
stats.skippedImages++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return compressedVersions;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Get the original image from an image reference. */
|
||||||
|
private PDImageXObject getOriginalImage(PDDocument doc, ImageReference ref) throws IOException {
|
||||||
|
if (ref instanceof NestedImageReference) {
|
||||||
|
// Get the nested image from within a form XObject
|
||||||
|
NestedImageReference nestedRef = (NestedImageReference) ref;
|
||||||
|
PDPage page = doc.getPage(nestedRef.pageNum);
|
||||||
|
PDResources pageResources = page.getResources();
|
||||||
|
|
||||||
|
// Get the form XObject
|
||||||
|
PDFormXObject formXObj = (PDFormXObject) pageResources.getXObject(nestedRef.formName);
|
||||||
|
|
||||||
|
// Get the nested image from the form's resources
|
||||||
|
PDResources formResources = formXObj.getResources();
|
||||||
|
return (PDImageXObject) formResources.getXObject(nestedRef.imageName);
|
||||||
|
} else {
|
||||||
|
// Get direct image from page resources
|
||||||
|
PDPage page = doc.getPage(ref.pageNum);
|
||||||
|
PDResources resources = page.getResources();
|
||||||
|
return (PDImageXObject) resources.getXObject(ref.name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Process an individual image and return a compressed version if beneficial. */
|
||||||
|
private PDImageXObject compressImage(
|
||||||
|
PDDocument doc,
|
||||||
|
PDImageXObject originalImage,
|
||||||
|
int originalSize,
|
||||||
|
double scaleFactor,
|
||||||
|
float jpegQuality,
|
||||||
|
boolean convertToGrayscale)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
// Process and compress the image
|
||||||
|
BufferedImage processedImage =
|
||||||
|
processAndCompressImage(
|
||||||
|
originalImage, scaleFactor, jpegQuality, convertToGrayscale);
|
||||||
|
|
||||||
|
if (processedImage == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to bytes for storage
|
||||||
|
byte[] compressedData = convertToBytes(processedImage, jpegQuality);
|
||||||
|
|
||||||
|
// Check if compression is beneficial
|
||||||
|
if (compressedData.length < originalSize || convertToGrayscale) {
|
||||||
|
// Create a compressed version
|
||||||
|
return PDImageXObject.createFromByteArray(
|
||||||
|
doc, compressedData, originalImage.getCOSObject().toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Replace all instances of original images with their compressed versions. */
|
||||||
|
private void replaceImages(
|
||||||
|
PDDocument doc,
|
||||||
|
Map<String, List<ImageReference>> uniqueImages,
|
||||||
|
Map<String, PDImageXObject> compressedVersions,
|
||||||
|
CompressionStats stats)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
for (Entry<String, List<ImageReference>> entry : uniqueImages.entrySet()) {
|
||||||
|
String imageHash = entry.getKey();
|
||||||
|
List<ImageReference> references = entry.getValue();
|
||||||
|
|
||||||
|
// Skip if no compressed version exists
|
||||||
|
PDImageXObject compressedImage = compressedVersions.get(imageHash);
|
||||||
|
if (compressedImage == null) continue;
|
||||||
|
|
||||||
|
// Replace ALL instances with the compressed version
|
||||||
|
for (ImageReference ref : references) {
|
||||||
|
replaceImageReference(doc, ref, compressedImage);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Replace a specific image reference with a compressed version. */
|
||||||
|
private void replaceImageReference(
|
||||||
|
PDDocument doc, ImageReference ref, PDImageXObject compressedImage) throws IOException {
|
||||||
|
if (ref instanceof NestedImageReference) {
|
||||||
|
// Replace nested image within form XObject
|
||||||
|
NestedImageReference nestedRef = (NestedImageReference) ref;
|
||||||
|
PDPage page = doc.getPage(nestedRef.pageNum);
|
||||||
|
PDResources pageResources = page.getResources();
|
||||||
|
|
||||||
|
// Get the form XObject
|
||||||
|
PDFormXObject formXObj = (PDFormXObject) pageResources.getXObject(nestedRef.formName);
|
||||||
|
|
||||||
|
// Replace the nested image in the form's resources
|
||||||
|
PDResources formResources = formXObj.getResources();
|
||||||
|
formResources.put(nestedRef.imageName, compressedImage);
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"Replaced nested image '{}' in form '{}' on page {} with compressed version",
|
||||||
|
nestedRef.imageName.getName(),
|
||||||
|
nestedRef.formName.getName(),
|
||||||
|
nestedRef.pageNum + 1);
|
||||||
|
} else {
|
||||||
|
// Replace direct image in page resources
|
||||||
|
PDPage page = doc.getPage(ref.pageNum);
|
||||||
|
PDResources resources = page.getResources();
|
||||||
|
resources.put(ref.name, compressedImage);
|
||||||
|
|
||||||
|
log.info("Replaced direct image on page {} with compressed version", ref.pageNum + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Log compression statistics. */
|
||||||
|
private void logCompressionStats(CompressionStats stats, long originalFileSize) {
|
||||||
|
// Calculate image reduction percentage
|
||||||
|
double overallImageReduction =
|
||||||
|
stats.totalOriginalBytes > 0
|
||||||
|
? 100.0 - ((stats.totalCompressedBytes * 100.0) / stats.totalOriginalBytes)
|
||||||
|
: 0;
|
||||||
|
|
||||||
|
int duplicatedImages = stats.totalImages - stats.uniqueImagesCount;
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"Image compression summary - Total unique: {}, Compressed: {}, Skipped: {}, Duplicates: {}, Nested: {}",
|
||||||
|
stats.uniqueImagesCount,
|
||||||
|
stats.compressedImages,
|
||||||
|
stats.skippedImages,
|
||||||
|
duplicatedImages,
|
||||||
|
stats.nestedImages);
|
||||||
|
log.info(
|
||||||
|
"Total original image size: {}, compressed: {} (reduced by {}%)",
|
||||||
|
GeneralUtils.formatBytes(stats.totalOriginalBytes),
|
||||||
|
GeneralUtils.formatBytes(stats.totalCompressedBytes),
|
||||||
|
String.format("%.1f", overallImageReduction));
|
||||||
|
}
|
||||||
|
|
||||||
private BufferedImage convertToGrayscale(BufferedImage image) {
|
private BufferedImage convertToGrayscale(BufferedImage image) {
|
||||||
BufferedImage grayImage =
|
BufferedImage grayImage =
|
||||||
new BufferedImage(
|
new BufferedImage(
|
||||||
@ -523,23 +630,6 @@ public class CompressController {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private byte[] generateImageMD5(PDImageXObject image) throws IOException {
|
|
||||||
return generatMD5(ImageProcessingUtils.getImageData(image.getImage()));
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Generates a hash string from a byte array */
|
|
||||||
private String generateHashFromBytes(byte[] data) {
|
|
||||||
try {
|
|
||||||
// Use the existing method to generate MD5 hash
|
|
||||||
byte[] hash = generatMD5(data);
|
|
||||||
return bytesToHexString(hash);
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.error("Error generating hash from bytes", e);
|
|
||||||
// Return a unique string as fallback
|
|
||||||
return "fallback-" + System.identityHashCode(data);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Updated scale factor method for levels 4-9
|
// Updated scale factor method for levels 4-9
|
||||||
private double getScaleFactorForLevel(int optimizeLevel) {
|
private double getScaleFactorForLevel(int optimizeLevel) {
|
||||||
return switch (optimizeLevel) {
|
return switch (optimizeLevel) {
|
||||||
@ -611,6 +701,7 @@ public class CompressController {
|
|||||||
if (qpdfEnabled && optimizeLevel <= 3) {
|
if (qpdfEnabled && optimizeLevel <= 3) {
|
||||||
optimizeLevel = 4;
|
optimizeLevel = 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (!sizeMet && optimizeLevel <= 9) {
|
while (!sizeMet && optimizeLevel <= 9) {
|
||||||
// Apply image compression for levels 4-9
|
// Apply image compression for levels 4-9
|
||||||
if ((optimizeLevel >= 4 || Boolean.TRUE.equals(convertToGrayscale))
|
if ((optimizeLevel >= 4 || Boolean.TRUE.equals(convertToGrayscale))
|
||||||
@ -619,7 +710,8 @@ public class CompressController {
|
|||||||
float jpegQuality = getJpegQualityForLevel(optimizeLevel);
|
float jpegQuality = getJpegQualityForLevel(optimizeLevel);
|
||||||
|
|
||||||
// Use the returned path from compressImagesInPDF
|
// Use the returned path from compressImagesInPDF
|
||||||
Path compressedImageFile = compressImagesInPDF(
|
Path compressedImageFile =
|
||||||
|
compressImagesInPDF(
|
||||||
currentFile,
|
currentFile,
|
||||||
scaleFactor,
|
scaleFactor,
|
||||||
jpegQuality,
|
jpegQuality,
|
||||||
@ -633,11 +725,81 @@ public class CompressController {
|
|||||||
|
|
||||||
// Apply QPDF compression for all levels
|
// Apply QPDF compression for all levels
|
||||||
if (!qpdfCompressionApplied && qpdfEnabled) {
|
if (!qpdfCompressionApplied && qpdfEnabled) {
|
||||||
|
applyQpdfCompression(request, optimizeLevel, currentFile, tempFiles);
|
||||||
|
qpdfCompressionApplied = true;
|
||||||
|
} else if (!qpdfCompressionApplied) {
|
||||||
|
// If QPDF is disabled, mark as applied and log
|
||||||
|
if (!qpdfEnabled) {
|
||||||
|
log.info("Skipping QPDF compression as QPDF group is disabled");
|
||||||
|
}
|
||||||
|
qpdfCompressionApplied = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if file size is within expected size or not auto mode
|
||||||
|
long outputFileSize = Files.size(currentFile);
|
||||||
|
if (outputFileSize <= expectedOutputSize || !autoMode) {
|
||||||
|
sizeMet = true;
|
||||||
|
} else {
|
||||||
|
int newOptimizeLevel =
|
||||||
|
incrementOptimizeLevel(
|
||||||
|
optimizeLevel, outputFileSize, expectedOutputSize);
|
||||||
|
|
||||||
|
// Check if we can't increase the level further
|
||||||
|
if (newOptimizeLevel == optimizeLevel) {
|
||||||
|
if (autoMode) {
|
||||||
|
log.info(
|
||||||
|
"Maximum optimization level reached without meeting target size.");
|
||||||
|
sizeMet = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Reset flags for next iteration with higher optimization level
|
||||||
|
imageCompressionApplied = false;
|
||||||
|
qpdfCompressionApplied = false;
|
||||||
|
optimizeLevel = newOptimizeLevel;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if optimized file is larger than the original
|
||||||
|
long finalFileSize = Files.size(currentFile);
|
||||||
|
if (finalFileSize >= inputFileSize) {
|
||||||
|
log.warn(
|
||||||
|
"Optimized file is larger than the original. Using the original file instead.");
|
||||||
|
// Use the stored reference to the original file
|
||||||
|
currentFile = originalFile;
|
||||||
|
}
|
||||||
|
|
||||||
|
String outputFilename =
|
||||||
|
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
|
||||||
|
.replaceFirst("[.][^.]+$", "")
|
||||||
|
+ "_Optimized.pdf";
|
||||||
|
|
||||||
|
return WebResponseUtils.pdfDocToWebResponse(
|
||||||
|
pdfDocumentFactory.load(currentFile.toFile()), outputFilename);
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
// Clean up all temporary files
|
||||||
|
for (Path tempFile : tempFiles) {
|
||||||
|
try {
|
||||||
|
Files.deleteIfExists(tempFile);
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.warn("Failed to delete temporary file: " + tempFile, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Apply QPDF compression to a PDF file */
|
||||||
|
private void applyQpdfCompression(
|
||||||
|
OptimizePdfRequest request, int optimizeLevel, Path currentFile, List<Path> tempFiles)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
long preQpdfSize = Files.size(currentFile);
|
long preQpdfSize = Files.size(currentFile);
|
||||||
log.info("Pre-QPDF file size: {}", GeneralUtils.formatBytes(preQpdfSize));
|
log.info("Pre-QPDF file size: {}", GeneralUtils.formatBytes(preQpdfSize));
|
||||||
|
|
||||||
// Map optimization levels to QPDF compression levels
|
// Map optimization levels to QPDF compression levels
|
||||||
int qpdfCompressionLevel = optimizeLevel <= 3
|
int qpdfCompressionLevel =
|
||||||
|
optimizeLevel <= 3
|
||||||
? optimizeLevel * 3 // Level 1->3, 2->6, 3->9
|
? optimizeLevel * 3 // Level 1->3, 2->6, 3->9
|
||||||
: 9; // Max compression for levels 4-9
|
: 9; // Max compression for levels 4-9
|
||||||
|
|
||||||
@ -663,85 +825,29 @@ public class CompressController {
|
|||||||
|
|
||||||
ProcessExecutorResult returnCode = null;
|
ProcessExecutorResult returnCode = null;
|
||||||
try {
|
try {
|
||||||
returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF)
|
returnCode =
|
||||||
|
ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF)
|
||||||
.runCommandWithOutputHandling(command);
|
.runCommandWithOutputHandling(command);
|
||||||
qpdfCompressionApplied = true;
|
|
||||||
|
|
||||||
// Update current file to the QPDF output
|
// Update current file to the QPDF output
|
||||||
currentFile = qpdfOutputFile;
|
Files.copy(qpdfOutputFile, currentFile, StandardCopyOption.REPLACE_EXISTING);
|
||||||
|
|
||||||
long postQpdfSize = Files.size(currentFile);
|
long postQpdfSize = Files.size(currentFile);
|
||||||
double qpdfReduction = 100.0 - ((postQpdfSize * 100.0) / preQpdfSize);
|
double qpdfReduction = 100.0 - ((postQpdfSize * 100.0) / preQpdfSize);
|
||||||
log.info(
|
log.info(
|
||||||
"Post-QPDF file size: {} (reduced by {}%)",
|
"Post-QPDF file size: {} (reduced by {}%)",
|
||||||
GeneralUtils.formatBytes(postQpdfSize),
|
GeneralUtils.formatBytes(postQpdfSize), String.format("%.1f", qpdfReduction));
|
||||||
String.format("%.1f", qpdfReduction));
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
if (returnCode != null && returnCode.getRc() != 3) {
|
if (returnCode != null && returnCode.getRc() != 3) {
|
||||||
throw e;
|
throw new IOException("QPDF command failed", e);
|
||||||
}
|
}
|
||||||
// If QPDF fails, keep using the current file
|
// If QPDF fails, keep using the current file
|
||||||
log.warn("QPDF compression failed, continuing with current file");
|
log.warn("QPDF compression failed, continuing with current file", e);
|
||||||
}
|
|
||||||
} else if (!qpdfCompressionApplied) {
|
|
||||||
// If QPDF is disabled, mark as applied and log
|
|
||||||
if (!qpdfEnabled) {
|
|
||||||
log.info("Skipping QPDF compression as QPDF group is disabled");
|
|
||||||
}
|
|
||||||
qpdfCompressionApplied = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if file size is within expected size or not auto mode
|
|
||||||
long outputFileSize = Files.size(currentFile);
|
|
||||||
if (outputFileSize <= expectedOutputSize || !autoMode) {
|
|
||||||
sizeMet = true;
|
|
||||||
} else {
|
|
||||||
int newOptimizeLevel = incrementOptimizeLevel(
|
|
||||||
optimizeLevel, outputFileSize, expectedOutputSize);
|
|
||||||
|
|
||||||
// Check if we can't increase the level further
|
|
||||||
if (newOptimizeLevel == optimizeLevel) {
|
|
||||||
if (autoMode) {
|
|
||||||
log.info("Maximum optimization level reached without meeting target size.");
|
|
||||||
sizeMet = true;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Reset flags for next iteration with higher optimization level
|
|
||||||
imageCompressionApplied = false;
|
|
||||||
qpdfCompressionApplied = false;
|
|
||||||
optimizeLevel = newOptimizeLevel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if optimized file is larger than the original
|
|
||||||
long finalFileSize = Files.size(currentFile);
|
|
||||||
if (finalFileSize >= inputFileSize) {
|
|
||||||
log.warn("Optimized file is larger than the original. Using the original file instead.");
|
|
||||||
// Use the stored reference to the original file
|
|
||||||
currentFile = originalFile;
|
|
||||||
}
|
|
||||||
|
|
||||||
String outputFilename = Filenames.toSimpleFileName(inputFile.getOriginalFilename())
|
|
||||||
.replaceFirst("[.][^.]+$", "")
|
|
||||||
+ "_Optimized.pdf";
|
|
||||||
|
|
||||||
return WebResponseUtils.pdfDocToWebResponse(
|
|
||||||
pdfDocumentFactory.load(currentFile.toFile()), outputFilename);
|
|
||||||
|
|
||||||
} finally {
|
|
||||||
// Clean up all temporary files
|
|
||||||
for (Path tempFile : tempFiles) {
|
|
||||||
try {
|
|
||||||
Files.deleteIfExists(tempFile);
|
|
||||||
} catch (IOException e) {
|
|
||||||
log.warn("Failed to delete temporary file: " + tempFile, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Determine the appropriate optimization level based on the desired size reduction ratio */
|
||||||
private int determineOptimizeLevel(double sizeReductionRatio) {
|
private int determineOptimizeLevel(double sizeReductionRatio) {
|
||||||
if (sizeReductionRatio > 0.9) return 1;
|
if (sizeReductionRatio > 0.9) return 1;
|
||||||
if (sizeReductionRatio > 0.8) return 2;
|
if (sizeReductionRatio > 0.8) return 2;
|
||||||
@ -754,6 +860,7 @@ public class CompressController {
|
|||||||
return 9;
|
return 9;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Increment optimization level based on current size vs target size */
|
||||||
private int incrementOptimizeLevel(int currentLevel, long currentSize, long targetSize) {
|
private int incrementOptimizeLevel(int currentLevel, long currentSize, long targetSize) {
|
||||||
double currentRatio = currentSize / (double) targetSize;
|
double currentRatio = currentSize / (double) targetSize;
|
||||||
log.info("Current compression ratio: {}", String.format("%.2f", currentRatio));
|
log.info("Current compression ratio: {}", String.format("%.2f", currentRatio));
|
||||||
|
@ -45,8 +45,8 @@ public class PipelineController {
|
|||||||
|
|
||||||
private final PostHogService postHogService;
|
private final PostHogService postHogService;
|
||||||
|
|
||||||
public PipelineController(PipelineProcessor processor, ObjectMapper objectMapper,
|
public PipelineController(
|
||||||
PostHogService postHogService) {
|
PipelineProcessor processor, ObjectMapper objectMapper, PostHogService postHogService) {
|
||||||
this.processor = processor;
|
this.processor = processor;
|
||||||
this.objectMapper = objectMapper;
|
this.objectMapper = objectMapper;
|
||||||
this.postHogService = postHogService;
|
this.postHogService = postHogService;
|
||||||
@ -63,8 +63,8 @@ public class PipelineController {
|
|||||||
PipelineConfig config = objectMapper.readValue(jsonString, PipelineConfig.class);
|
PipelineConfig config = objectMapper.readValue(jsonString, PipelineConfig.class);
|
||||||
log.info("Received POST request to /handleData with {} files", files.length);
|
log.info("Received POST request to /handleData with {} files", files.length);
|
||||||
|
|
||||||
|
List<String> operationNames =
|
||||||
List<String> operationNames = config.getOperations().stream()
|
config.getOperations().stream()
|
||||||
.map(PipelineOperation::getOperation)
|
.map(PipelineOperation::getOperation)
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
@ -21,7 +21,6 @@ import java.util.HashMap;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import org.springframework.core.io.ByteArrayResource;
|
import org.springframework.core.io.ByteArrayResource;
|
||||||
@ -155,9 +154,8 @@ public class PipelineDirectoryProcessor {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
List<String> operationNames = config.getOperations().stream()
|
List<String> operationNames =
|
||||||
.map(PipelineOperation::getOperation)
|
config.getOperations().stream().map(PipelineOperation::getOperation).toList();
|
||||||
.toList();
|
|
||||||
Map<String, Object> properties = new HashMap<>();
|
Map<String, Object> properties = new HashMap<>();
|
||||||
properties.put("operations", operationNames);
|
properties.put("operations", operationNames);
|
||||||
properties.put("fileCount", files.length);
|
properties.put("fileCount", files.length);
|
||||||
@ -263,7 +261,8 @@ public class PipelineDirectoryProcessor {
|
|||||||
try {
|
try {
|
||||||
Thread.sleep(retryDelayMs * (int) Math.pow(2, attempt - 1));
|
Thread.sleep(retryDelayMs * (int) Math.pow(2, attempt - 1));
|
||||||
} catch (InterruptedException e1) {
|
} catch (InterruptedException e1) {
|
||||||
log.error("prepareFilesForProcessing failure",e); }
|
log.error("prepareFilesForProcessing failure", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -14,7 +14,7 @@ public class OptimizePdfRequest extends PDFFile {
|
|||||||
@Schema(
|
@Schema(
|
||||||
description =
|
description =
|
||||||
"The level of optimization to apply to the PDF file. Higher values indicate greater compression but may reduce quality.",
|
"The level of optimization to apply to the PDF file. Higher values indicate greater compression but may reduce quality.",
|
||||||
allowableValues = {"1", "2", "3", "4", "5"})
|
allowableValues = {"1", "2", "3", "4", "5", "6", "7", "8", "9"})
|
||||||
private Integer optimizeLevel;
|
private Integer optimizeLevel;
|
||||||
|
|
||||||
@Schema(description = "The expected output size, e.g. '100MB', '25KB', etc.")
|
@Schema(description = "The expected output size, e.g. '100MB', '25KB', etc.")
|
||||||
|
@ -261,7 +261,6 @@ public class CustomPDFDocumentFactory {
|
|||||||
removePassword(doc);
|
removePassword(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private PDDocument loadFromFile(File file, long size, StreamCacheCreateFunction cache)
|
private PDDocument loadFromFile(File file, long size, StreamCacheCreateFunction cache)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
return Loader.loadPDF(new DeletingRandomAccessFile(file), "", null, null, cache);
|
return Loader.loadPDF(new DeletingRandomAccessFile(file), "", null, null, cache);
|
||||||
|
@ -66,7 +66,6 @@ public class MetricsAggregatorService {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (uri.contains(".txt")) {
|
if (uri.contains(".txt")) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user