mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-05-24 01:16:29 +02:00
Add default languages to OCR, fix compression for QPDF and embedded images (#3202)
# Description of Changes This pull request includes several changes to the codebase, focusing on enhancing OCR support, improving endpoint management, and adding new functionality for PDF compression. The most important changes are detailed below. ### Enhancements to OCR support: * `Dockerfile` and `Dockerfile.fat`: Added support for multiple new OCR languages including Chinese (Simplified), German, French, and Portuguese. (Our top 5 languages including English) [[1]](diffhunk://#diff-dd2c0eb6ea5cfc6c4bd4eac30934e2d5746747af48fef6da689e85b752f39557R69-R72) [[2]](diffhunk://#diff-571631582b988e88c52c86960cc083b0b8fa63cf88f056f26e9e684195221c27L78-R81) ### Improvements to endpoint management: * [`src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java`](diffhunk://#diff-750f31f6ecbd64b025567108a33775cad339e835a04360affff82a09410b697dR51-R66): Added a new method `isGroupEnabled` to check if a group of endpoints is enabled. * [`src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java`](diffhunk://#diff-750f31f6ecbd64b025567108a33775cad339e835a04360affff82a09410b697dL179-L193): Updated endpoint groups and removed redundant qpdf endpoints. [[1]](diffhunk://#diff-750f31f6ecbd64b025567108a33775cad339e835a04360affff82a09410b697dL179-L193) [[2]](diffhunk://#diff-750f31f6ecbd64b025567108a33775cad339e835a04360affff82a09410b697dL243-L244) * [`src/main/java/stirling/software/SPDF/config/EndpointInspector.java`](diffhunk://#diff-845de13e140bb1264014539714860f044405274ad2a9481f38befdd1c1333818R1-R291): Introduced a new `EndpointInspector` class to discover and validate GET endpoints dynamically. ### New functionality for PDF compression: * [`src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java`](diffhunk://#diff-c307589e9f958f2593c9567c5ad9d63cd03788aa4803b3017b1c13b0d0485805R10): Enhanced the `CompressController` to handle nested images within form XObjects, improving the accuracy of image compression in PDFs. Remove Compresses Dependency on QPDF [[1]](diffhunk://#diff-c307589e9f958f2593c9567c5ad9d63cd03788aa4803b3017b1c13b0d0485805R10) [[2]](diffhunk://#diff-c307589e9f958f2593c9567c5ad9d63cd03788aa4803b3017b1c13b0d0485805R28-R44) [[3]](diffhunk://#diff-c307589e9f958f2593c9567c5ad9d63cd03788aa4803b3017b1c13b0d0485805L49-R61) [[4]](diffhunk://#diff-c307589e9f958f2593c9567c5ad9d63cd03788aa4803b3017b1c13b0d0485805R77-R99) [[5]](diff hunk://#diff-c307589e9f958f2593c9567c5ad9d63cd03788aa4803b3017b1c13b0d0485805L92-R191) Closes #(issue_number) --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details. --------- Co-authored-by: a <a>
This commit is contained in:
parent
748ac494e6
commit
d8cca66560
@ -66,6 +66,10 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a
|
|||||||
poppler-utils \
|
poppler-utils \
|
||||||
# OCR MY PDF (unpaper for descew and other advanced features)
|
# OCR MY PDF (unpaper for descew and other advanced features)
|
||||||
tesseract-ocr-data-eng \
|
tesseract-ocr-data-eng \
|
||||||
|
tesseract-ocr-data-chi_sim \
|
||||||
|
tesseract-ocr-data-deu \
|
||||||
|
tesseract-ocr-data-fra \
|
||||||
|
tesseract-ocr-data-por \
|
||||||
# CV
|
# CV
|
||||||
py3-opencv \
|
py3-opencv \
|
||||||
python3 \
|
python3 \
|
||||||
|
@ -75,7 +75,10 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a
|
|||||||
# OCR MY PDF (unpaper for descew and other advanced featues)
|
# OCR MY PDF (unpaper for descew and other advanced featues)
|
||||||
qpdf \
|
qpdf \
|
||||||
tesseract-ocr-data-eng \
|
tesseract-ocr-data-eng \
|
||||||
|
tesseract-ocr-data-chi_sim \
|
||||||
|
tesseract-ocr-data-deu \
|
||||||
|
tesseract-ocr-data-fra \
|
||||||
|
tesseract-ocr-data-por \
|
||||||
font-terminus font-dejavu font-noto font-noto-cjk font-awesome font-noto-extra font-liberation font-linux-libertine \
|
font-terminus font-dejavu font-noto font-noto-cjk font-awesome font-noto-extra font-liberation font-linux-libertine \
|
||||||
# CV
|
# CV
|
||||||
py3-opencv \
|
py3-opencv \
|
||||||
|
@ -25,7 +25,7 @@ ext {
|
|||||||
}
|
}
|
||||||
|
|
||||||
group = "stirling.software"
|
group = "stirling.software"
|
||||||
version = "0.44.2"
|
version = "0.44.3"
|
||||||
|
|
||||||
java {
|
java {
|
||||||
// 17 is lowest but we support and recommend 21
|
// 17 is lowest but we support and recommend 21
|
||||||
|
@ -48,6 +48,22 @@ public class EndpointConfiguration {
|
|||||||
return endpointStatuses.getOrDefault(endpoint, true);
|
return endpointStatuses.getOrDefault(endpoint, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isGroupEnabled(String group) {
|
||||||
|
Set<String> endpoints = endpointGroups.get(group);
|
||||||
|
if (endpoints == null || endpoints.isEmpty()) {
|
||||||
|
log.debug("Group '{}' does not exist or has no endpoints", group);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (String endpoint : endpoints) {
|
||||||
|
if (!isEndpointEnabled(endpoint)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
public void addEndpointToGroup(String group, String endpoint) {
|
public void addEndpointToGroup(String group, String endpoint) {
|
||||||
endpointGroups.computeIfAbsent(group, k -> new HashSet<>()).add(endpoint);
|
endpointGroups.computeIfAbsent(group, k -> new HashSet<>()).add(endpoint);
|
||||||
}
|
}
|
||||||
@ -176,21 +192,17 @@ public class EndpointConfiguration {
|
|||||||
addEndpointToGroup("OpenCV", "extract-image-scans");
|
addEndpointToGroup("OpenCV", "extract-image-scans");
|
||||||
|
|
||||||
// LibreOffice
|
// LibreOffice
|
||||||
addEndpointToGroup("qpdf", "repair");
|
|
||||||
addEndpointToGroup("LibreOffice", "file-to-pdf");
|
addEndpointToGroup("LibreOffice", "file-to-pdf");
|
||||||
addEndpointToGroup("LibreOffice", "pdf-to-word");
|
addEndpointToGroup("LibreOffice", "pdf-to-word");
|
||||||
addEndpointToGroup("LibreOffice", "pdf-to-presentation");
|
addEndpointToGroup("LibreOffice", "pdf-to-presentation");
|
||||||
addEndpointToGroup("LibreOffice", "pdf-to-rtf");
|
addEndpointToGroup("LibreOffice", "pdf-to-rtf");
|
||||||
addEndpointToGroup("LibreOffice", "pdf-to-html");
|
addEndpointToGroup("LibreOffice", "pdf-to-html");
|
||||||
addEndpointToGroup("LibreOffice", "pdf-to-xml");
|
addEndpointToGroup("LibreOffice", "pdf-to-xml");
|
||||||
|
addEndpointToGroup("LibreOffice", "pdf-to-pdfa");
|
||||||
|
|
||||||
// Unoconvert
|
// Unoconvert
|
||||||
addEndpointToGroup("Unoconvert", "file-to-pdf");
|
addEndpointToGroup("Unoconvert", "file-to-pdf");
|
||||||
|
|
||||||
// qpdf
|
|
||||||
addEndpointToGroup("qpdf", "compress-pdf");
|
|
||||||
addEndpointToGroup("qpdf", "pdf-to-pdfa");
|
|
||||||
|
|
||||||
addEndpointToGroup("tesseract", "ocr-pdf");
|
addEndpointToGroup("tesseract", "ocr-pdf");
|
||||||
|
|
||||||
// Java
|
// Java
|
||||||
@ -240,8 +252,6 @@ public class EndpointConfiguration {
|
|||||||
addEndpointToGroup("Javascript", "adjust-contrast");
|
addEndpointToGroup("Javascript", "adjust-contrast");
|
||||||
|
|
||||||
// qpdf dependent endpoints
|
// qpdf dependent endpoints
|
||||||
addEndpointToGroup("qpdf", "compress-pdf");
|
|
||||||
addEndpointToGroup("qpdf", "pdf-to-pdfa");
|
|
||||||
addEndpointToGroup("qpdf", "repair");
|
addEndpointToGroup("qpdf", "repair");
|
||||||
|
|
||||||
// Weasyprint dependent endpoints
|
// Weasyprint dependent endpoints
|
||||||
|
@ -0,0 +1,216 @@
|
|||||||
|
package stirling.software.SPDF.config;
|
||||||
|
|
||||||
|
import java.lang.reflect.Method;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
import org.springframework.context.ApplicationContext;
|
||||||
|
import org.springframework.context.ApplicationListener;
|
||||||
|
import org.springframework.context.event.ContextRefreshedEvent;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
import org.springframework.web.bind.annotation.RequestMethod;
|
||||||
|
import org.springframework.web.method.HandlerMethod;
|
||||||
|
import org.springframework.web.servlet.mvc.method.RequestMappingInfo;
|
||||||
|
import org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerMapping;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class EndpointInspector implements ApplicationListener<ContextRefreshedEvent> {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(EndpointInspector.class);
|
||||||
|
|
||||||
|
private final ApplicationContext applicationContext;
|
||||||
|
private final Set<String> validGetEndpoints = new HashSet<>();
|
||||||
|
private boolean endpointsDiscovered = false;
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
public EndpointInspector(ApplicationContext applicationContext) {
|
||||||
|
this.applicationContext = applicationContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onApplicationEvent(ContextRefreshedEvent event) {
|
||||||
|
if (!endpointsDiscovered) {
|
||||||
|
discoverEndpoints();
|
||||||
|
endpointsDiscovered = true;
|
||||||
|
logger.info("Discovered {} valid GET endpoints", validGetEndpoints.size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void discoverEndpoints() {
|
||||||
|
try {
|
||||||
|
Map<String, RequestMappingHandlerMapping> mappings =
|
||||||
|
applicationContext.getBeansOfType(RequestMappingHandlerMapping.class);
|
||||||
|
|
||||||
|
for (Map.Entry<String, RequestMappingHandlerMapping> entry : mappings.entrySet()) {
|
||||||
|
RequestMappingHandlerMapping mapping = entry.getValue();
|
||||||
|
Map<RequestMappingInfo, HandlerMethod> handlerMethods = mapping.getHandlerMethods();
|
||||||
|
|
||||||
|
for (Map.Entry<RequestMappingInfo, HandlerMethod> handlerEntry :
|
||||||
|
handlerMethods.entrySet()) {
|
||||||
|
RequestMappingInfo mappingInfo = handlerEntry.getKey();
|
||||||
|
HandlerMethod handlerMethod = handlerEntry.getValue();
|
||||||
|
|
||||||
|
boolean isGetHandler = false;
|
||||||
|
try {
|
||||||
|
Set<RequestMethod> methods = mappingInfo.getMethodsCondition().getMethods();
|
||||||
|
isGetHandler = methods.isEmpty() || methods.contains(RequestMethod.GET);
|
||||||
|
} catch (Exception e) {
|
||||||
|
isGetHandler = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isGetHandler) {
|
||||||
|
Set<String> patterns = extractPatternsUsingDirectPaths(mappingInfo);
|
||||||
|
|
||||||
|
if (patterns.isEmpty()) {
|
||||||
|
patterns = extractPatternsFromString(mappingInfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
validGetEndpoints.addAll(patterns);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (validGetEndpoints.isEmpty()) {
|
||||||
|
logger.warn("No endpoints discovered. Adding common endpoints as fallback.");
|
||||||
|
validGetEndpoints.add("/");
|
||||||
|
validGetEndpoints.add("/api/**");
|
||||||
|
validGetEndpoints.add("/**");
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("Error discovering endpoints", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Set<String> extractPatternsUsingDirectPaths(RequestMappingInfo mappingInfo) {
|
||||||
|
Set<String> patterns = new HashSet<>();
|
||||||
|
|
||||||
|
try {
|
||||||
|
Method getDirectPathsMethod = mappingInfo.getClass().getMethod("getDirectPaths");
|
||||||
|
Object result = getDirectPathsMethod.invoke(mappingInfo);
|
||||||
|
if (result instanceof Set) {
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
Set<String> resultSet = (Set<String>) result;
|
||||||
|
patterns.addAll(resultSet);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
// Return empty set if method not found or fails
|
||||||
|
}
|
||||||
|
|
||||||
|
return patterns;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Set<String> extractPatternsFromString(RequestMappingInfo mappingInfo) {
|
||||||
|
Set<String> patterns = new HashSet<>();
|
||||||
|
try {
|
||||||
|
String infoString = mappingInfo.toString();
|
||||||
|
if (infoString.contains("{")) {
|
||||||
|
String patternsSection =
|
||||||
|
infoString.substring(infoString.indexOf("{") + 1, infoString.indexOf("}"));
|
||||||
|
|
||||||
|
for (String pattern : patternsSection.split(",")) {
|
||||||
|
pattern = pattern.trim();
|
||||||
|
if (!pattern.isEmpty()) {
|
||||||
|
patterns.add(pattern);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
// Return empty set if parsing fails
|
||||||
|
}
|
||||||
|
return patterns;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isValidGetEndpoint(String uri) {
|
||||||
|
if (!endpointsDiscovered) {
|
||||||
|
discoverEndpoints();
|
||||||
|
endpointsDiscovered = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (validGetEndpoints.contains(uri)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (matchesWildcardOrPathVariable(uri)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (matchesPathSegments(uri)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean matchesWildcardOrPathVariable(String uri) {
|
||||||
|
for (String pattern : validGetEndpoints) {
|
||||||
|
if (pattern.contains("*") || pattern.contains("{")) {
|
||||||
|
int wildcardIndex = pattern.indexOf('*');
|
||||||
|
int variableIndex = pattern.indexOf('{');
|
||||||
|
|
||||||
|
int cutoffIndex;
|
||||||
|
if (wildcardIndex < 0) {
|
||||||
|
cutoffIndex = variableIndex;
|
||||||
|
} else if (variableIndex < 0) {
|
||||||
|
cutoffIndex = wildcardIndex;
|
||||||
|
} else {
|
||||||
|
cutoffIndex = Math.min(wildcardIndex, variableIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
String staticPrefix = pattern.substring(0, cutoffIndex);
|
||||||
|
|
||||||
|
if (uri.startsWith(staticPrefix)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean matchesPathSegments(String uri) {
|
||||||
|
for (String pattern : validGetEndpoints) {
|
||||||
|
if (!pattern.contains("*") && !pattern.contains("{")) {
|
||||||
|
String[] patternSegments = pattern.split("/");
|
||||||
|
String[] uriSegments = uri.split("/");
|
||||||
|
|
||||||
|
if (uriSegments.length < patternSegments.length) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean match = true;
|
||||||
|
for (int i = 0; i < patternSegments.length; i++) {
|
||||||
|
if (!patternSegments[i].equals(uriSegments[i])) {
|
||||||
|
match = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (match) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Set<String> getValidGetEndpoints() {
|
||||||
|
if (!endpointsDiscovered) {
|
||||||
|
discoverEndpoints();
|
||||||
|
endpointsDiscovered = true;
|
||||||
|
}
|
||||||
|
return new HashSet<>(validGetEndpoints);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void logAllEndpoints() {
|
||||||
|
Set<String> sortedEndpoints = new TreeSet<>(validGetEndpoints);
|
||||||
|
|
||||||
|
logger.info("=== BEGIN: All discovered GET endpoints ===");
|
||||||
|
for (String endpoint : sortedEndpoints) {
|
||||||
|
logger.info("Endpoint: {}", endpoint);
|
||||||
|
}
|
||||||
|
logger.info("=== END: All discovered GET endpoints ===");
|
||||||
|
}
|
||||||
|
}
|
@ -7,6 +7,7 @@ import java.io.IOException;
|
|||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.StandardCopyOption;
|
||||||
import java.security.MessageDigest;
|
import java.security.MessageDigest;
|
||||||
import java.security.NoSuchAlgorithmException;
|
import java.security.NoSuchAlgorithmException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
@ -29,8 +30,8 @@ import org.apache.pdfbox.pdmodel.PDDocument;
|
|||||||
import org.apache.pdfbox.pdmodel.PDPage;
|
import org.apache.pdfbox.pdmodel.PDPage;
|
||||||
import org.apache.pdfbox.pdmodel.PDResources;
|
import org.apache.pdfbox.pdmodel.PDResources;
|
||||||
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
|
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
|
||||||
|
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
|
||||||
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
|
||||||
import org.springframework.http.ResponseEntity;
|
import org.springframework.http.ResponseEntity;
|
||||||
import org.springframework.web.bind.annotation.ModelAttribute;
|
import org.springframework.web.bind.annotation.ModelAttribute;
|
||||||
import org.springframework.web.bind.annotation.PostMapping;
|
import org.springframework.web.bind.annotation.PostMapping;
|
||||||
@ -44,13 +45,14 @@ import io.swagger.v3.oas.annotations.tags.Tag;
|
|||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
|
import lombok.EqualsAndHashCode;
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
|
import stirling.software.SPDF.config.EndpointConfiguration;
|
||||||
import stirling.software.SPDF.model.api.misc.OptimizePdfRequest;
|
import stirling.software.SPDF.model.api.misc.OptimizePdfRequest;
|
||||||
import stirling.software.SPDF.service.CustomPDFDocumentFactory;
|
import stirling.software.SPDF.service.CustomPDFDocumentFactory;
|
||||||
import stirling.software.SPDF.utils.GeneralUtils;
|
import stirling.software.SPDF.utils.GeneralUtils;
|
||||||
import stirling.software.SPDF.utils.ImageProcessingUtils;
|
|
||||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||||
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
||||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||||
@ -62,10 +64,13 @@ import stirling.software.SPDF.utils.WebResponseUtils;
|
|||||||
public class CompressController {
|
public class CompressController {
|
||||||
|
|
||||||
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
||||||
|
private final boolean qpdfEnabled;
|
||||||
|
|
||||||
@Autowired
|
public CompressController(
|
||||||
public CompressController(CustomPDFDocumentFactory pdfDocumentFactory) {
|
CustomPDFDocumentFactory pdfDocumentFactory,
|
||||||
|
EndpointConfiguration endpointConfiguration) {
|
||||||
this.pdfDocumentFactory = pdfDocumentFactory;
|
this.pdfDocumentFactory = pdfDocumentFactory;
|
||||||
|
this.qpdfEnabled = endpointConfiguration.isGroupEnabled("qpdf");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@ -76,10 +81,30 @@ public class CompressController {
|
|||||||
COSName name; // The name used to reference this image
|
COSName name; // The name used to reference this image
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Data
|
||||||
|
@EqualsAndHashCode(callSuper = true)
|
||||||
|
@AllArgsConstructor
|
||||||
|
@NoArgsConstructor
|
||||||
|
private static class NestedImageReference extends ImageReference {
|
||||||
|
COSName formName; // Name of the form XObject containing the image
|
||||||
|
COSName imageName; // Name of the image within the form
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tracks compression stats for reporting
|
||||||
|
private static class CompressionStats {
|
||||||
|
int totalImages = 0;
|
||||||
|
int nestedImages = 0;
|
||||||
|
int uniqueImagesCount = 0;
|
||||||
|
int compressedImages = 0;
|
||||||
|
int skippedImages = 0;
|
||||||
|
long totalOriginalBytes = 0;
|
||||||
|
long totalCompressedBytes = 0;
|
||||||
|
}
|
||||||
|
|
||||||
public Path compressImagesInPDF(
|
public Path compressImagesInPDF(
|
||||||
Path pdfFile, double scaleFactor, float jpegQuality, boolean convertToGrayscale)
|
Path pdfFile, double scaleFactor, float jpegQuality, boolean convertToGrayscale)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
Path newCompressedPDF = Files.createTempFile("compressedPDF", ".pdf");
|
Path newCompressedPDF = Files.createTempFile("compressedPDF", ".pdf");
|
||||||
long originalFileSize = Files.size(pdfFile);
|
long originalFileSize = Files.size(pdfFile);
|
||||||
log.info(
|
log.info(
|
||||||
"Starting image compression with scale factor: {}, JPEG quality: {}, grayscale: {} on file size: {}",
|
"Starting image compression with scale factor: {}, JPEG quality: {}, grayscale: {} on file size: {}",
|
||||||
@ -89,146 +114,29 @@ public class CompressController {
|
|||||||
GeneralUtils.formatBytes(originalFileSize));
|
GeneralUtils.formatBytes(originalFileSize));
|
||||||
|
|
||||||
try (PDDocument doc = pdfDocumentFactory.load(pdfFile)) {
|
try (PDDocument doc = pdfDocumentFactory.load(pdfFile)) {
|
||||||
|
// Find all unique images in the document
|
||||||
|
Map<String, List<ImageReference>> uniqueImages = findImages(doc);
|
||||||
|
|
||||||
// Collect all unique images by content hash
|
// Get statistics
|
||||||
Map<String, List<ImageReference>> uniqueImages = new HashMap<>();
|
CompressionStats stats = new CompressionStats();
|
||||||
Map<String, PDImageXObject> compressedVersions = new HashMap<>();
|
stats.uniqueImagesCount = uniqueImages.size();
|
||||||
|
calculateImageStats(uniqueImages, stats);
|
||||||
|
|
||||||
int totalImages = 0;
|
// Create compressed versions of unique images
|
||||||
|
Map<String, PDImageXObject> compressedVersions =
|
||||||
|
createCompressedImages(
|
||||||
|
doc, uniqueImages, scaleFactor, jpegQuality, convertToGrayscale, stats);
|
||||||
|
|
||||||
for (int pageNum = 0; pageNum < doc.getNumberOfPages(); pageNum++) {
|
// Replace all instances with compressed versions
|
||||||
PDPage page = doc.getPage(pageNum);
|
replaceImages(doc, uniqueImages, compressedVersions, stats);
|
||||||
PDResources res = page.getResources();
|
|
||||||
if (res == null || res.getXObjectNames() == null) continue;
|
|
||||||
|
|
||||||
for (COSName name : res.getXObjectNames()) {
|
|
||||||
PDXObject xobj = res.getXObject(name);
|
|
||||||
if (!(xobj instanceof PDImageXObject)) continue;
|
|
||||||
|
|
||||||
totalImages++;
|
|
||||||
PDImageXObject image = (PDImageXObject) xobj;
|
|
||||||
String imageHash = generateImageHash(image);
|
|
||||||
|
|
||||||
// Store only page number and name reference
|
|
||||||
ImageReference ref = new ImageReference();
|
|
||||||
ref.pageNum = pageNum;
|
|
||||||
ref.name = name;
|
|
||||||
|
|
||||||
uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(ref);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int uniqueImagesCount = uniqueImages.size();
|
|
||||||
int duplicatedImages = totalImages - uniqueImagesCount;
|
|
||||||
log.info(
|
|
||||||
"Found {} unique images and {} duplicated instances across {} pages",
|
|
||||||
uniqueImagesCount,
|
|
||||||
duplicatedImages,
|
|
||||||
doc.getNumberOfPages());
|
|
||||||
|
|
||||||
// SECOND PASS: Process each unique image exactly once
|
|
||||||
int compressedImages = 0;
|
|
||||||
int skippedImages = 0;
|
|
||||||
long totalOriginalBytes = 0;
|
|
||||||
long totalCompressedBytes = 0;
|
|
||||||
|
|
||||||
for (Entry<String, List<ImageReference>> entry : uniqueImages.entrySet()) {
|
|
||||||
String imageHash = entry.getKey();
|
|
||||||
List<ImageReference> references = entry.getValue();
|
|
||||||
|
|
||||||
if (references.isEmpty()) continue;
|
|
||||||
|
|
||||||
// Get the first instance of this image
|
|
||||||
ImageReference firstRef = references.get(0);
|
|
||||||
PDPage firstPage = doc.getPage(firstRef.pageNum);
|
|
||||||
PDResources firstPageResources = firstPage.getResources();
|
|
||||||
PDImageXObject originalImage =
|
|
||||||
(PDImageXObject) firstPageResources.getXObject(firstRef.name);
|
|
||||||
|
|
||||||
// Track original size
|
|
||||||
int originalSize = (int) originalImage.getCOSObject().getLength();
|
|
||||||
totalOriginalBytes += originalSize;
|
|
||||||
|
|
||||||
// Process this unique image once
|
|
||||||
BufferedImage processedImage =
|
|
||||||
processAndCompressImage(
|
|
||||||
originalImage, scaleFactor, jpegQuality, convertToGrayscale);
|
|
||||||
|
|
||||||
if (processedImage != null) {
|
|
||||||
// Convert to bytes for storage
|
|
||||||
byte[] compressedData = convertToBytes(processedImage, jpegQuality);
|
|
||||||
|
|
||||||
// Check if compression is beneficial
|
|
||||||
if (compressedData.length < originalSize || convertToGrayscale) {
|
|
||||||
// Create a single compressed version
|
|
||||||
PDImageXObject compressedImage =
|
|
||||||
PDImageXObject.createFromByteArray(
|
|
||||||
doc,
|
|
||||||
compressedData,
|
|
||||||
originalImage.getCOSObject().toString());
|
|
||||||
|
|
||||||
// Store the compressed version only once in our map
|
|
||||||
compressedVersions.put(imageHash, compressedImage);
|
|
||||||
|
|
||||||
// Report compression stats
|
|
||||||
double reductionPercentage =
|
|
||||||
100.0 - ((compressedData.length * 100.0) / originalSize);
|
|
||||||
log.info(
|
|
||||||
"Image hash {}: Compressed from {} to {} (reduced by {}%)",
|
|
||||||
imageHash,
|
|
||||||
GeneralUtils.formatBytes(originalSize),
|
|
||||||
GeneralUtils.formatBytes(compressedData.length),
|
|
||||||
String.format("%.1f", reductionPercentage));
|
|
||||||
|
|
||||||
// Replace ALL instances with the compressed version
|
|
||||||
for (ImageReference ref : references) {
|
|
||||||
// Get the page and resources when needed
|
|
||||||
PDPage page = doc.getPage(ref.pageNum);
|
|
||||||
PDResources resources = page.getResources();
|
|
||||||
resources.put(ref.name, compressedImage);
|
|
||||||
|
|
||||||
log.info(
|
|
||||||
"Replaced image on page {} with compressed version",
|
|
||||||
ref.pageNum + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
totalCompressedBytes += compressedData.length * references.size();
|
|
||||||
compressedImages++;
|
|
||||||
} else {
|
|
||||||
log.info("Image hash {}: Compression not beneficial, skipping", imageHash);
|
|
||||||
totalCompressedBytes += originalSize * references.size();
|
|
||||||
skippedImages++;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
log.info("Image hash {}: Not suitable for compression, skipping", imageHash);
|
|
||||||
totalCompressedBytes += originalSize * references.size();
|
|
||||||
skippedImages++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Log compression statistics
|
// Log compression statistics
|
||||||
double overallImageReduction =
|
logCompressionStats(stats, originalFileSize);
|
||||||
totalOriginalBytes > 0
|
|
||||||
? 100.0 - ((totalCompressedBytes * 100.0) / totalOriginalBytes)
|
|
||||||
: 0;
|
|
||||||
|
|
||||||
log.info(
|
|
||||||
"Image compression summary - Total unique: {}, Compressed: {}, Skipped: {}, Duplicates: {}",
|
|
||||||
uniqueImagesCount,
|
|
||||||
compressedImages,
|
|
||||||
skippedImages,
|
|
||||||
duplicatedImages);
|
|
||||||
log.info(
|
|
||||||
"Total original image size: {}, compressed: {} (reduced by {}%)",
|
|
||||||
GeneralUtils.formatBytes(totalOriginalBytes),
|
|
||||||
GeneralUtils.formatBytes(totalCompressedBytes),
|
|
||||||
String.format("%.1f", overallImageReduction));
|
|
||||||
|
|
||||||
// Free memory before saving
|
// Free memory before saving
|
||||||
compressedVersions.clear();
|
compressedVersions.clear();
|
||||||
uniqueImages.clear();
|
uniqueImages.clear();
|
||||||
|
|
||||||
// Save the document
|
|
||||||
log.info("Saving compressed PDF to {}", newCompressedPDF.toString());
|
log.info("Saving compressed PDF to {}", newCompressedPDF.toString());
|
||||||
doc.save(newCompressedPDF.toString());
|
doc.save(newCompressedPDF.toString());
|
||||||
|
|
||||||
@ -242,7 +150,315 @@ public class CompressController {
|
|||||||
String.format("%.1f", overallReduction));
|
String.format("%.1f", overallReduction));
|
||||||
return newCompressedPDF;
|
return newCompressedPDF;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find all images in the document, both direct and nested within forms
|
||||||
|
private Map<String, List<ImageReference>> findImages(PDDocument doc) throws IOException {
|
||||||
|
Map<String, List<ImageReference>> uniqueImages = new HashMap<>();
|
||||||
|
|
||||||
|
// Scan through all pages in the document
|
||||||
|
for (int pageNum = 0; pageNum < doc.getNumberOfPages(); pageNum++) {
|
||||||
|
PDPage page = doc.getPage(pageNum);
|
||||||
|
PDResources res = page.getResources();
|
||||||
|
if (res == null || res.getXObjectNames() == null) continue;
|
||||||
|
|
||||||
|
// Process all XObjects on the page
|
||||||
|
for (COSName name : res.getXObjectNames()) {
|
||||||
|
PDXObject xobj = res.getXObject(name);
|
||||||
|
|
||||||
|
// Direct image
|
||||||
|
if (isImage(xobj)) {
|
||||||
|
addDirectImage(pageNum, name, (PDImageXObject) xobj, uniqueImages);
|
||||||
|
log.info(
|
||||||
|
"Found direct image '{}' on page {} - {}x{}",
|
||||||
|
name.getName(),
|
||||||
|
pageNum + 1,
|
||||||
|
((PDImageXObject) xobj).getWidth(),
|
||||||
|
((PDImageXObject) xobj).getHeight());
|
||||||
|
}
|
||||||
|
// Form XObject that may contain nested images
|
||||||
|
else if (isForm(xobj)) {
|
||||||
|
checkFormForImages(pageNum, name, (PDFormXObject) xobj, uniqueImages);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return uniqueImages;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isImage(PDXObject xobj) {
|
||||||
|
return xobj instanceof PDImageXObject;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isForm(PDXObject xobj) {
|
||||||
|
return xobj instanceof PDFormXObject;
|
||||||
|
}
|
||||||
|
|
||||||
|
private ImageReference addDirectImage(
|
||||||
|
int pageNum,
|
||||||
|
COSName name,
|
||||||
|
PDImageXObject image,
|
||||||
|
Map<String, List<ImageReference>> uniqueImages)
|
||||||
|
throws IOException {
|
||||||
|
ImageReference ref = new ImageReference();
|
||||||
|
ref.pageNum = pageNum;
|
||||||
|
ref.name = name;
|
||||||
|
|
||||||
|
String imageHash = generateImageHash(image);
|
||||||
|
uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(ref);
|
||||||
|
|
||||||
|
return ref;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Look for images inside form XObjects
|
||||||
|
private void checkFormForImages(
|
||||||
|
int pageNum,
|
||||||
|
COSName formName,
|
||||||
|
PDFormXObject formXObj,
|
||||||
|
Map<String, List<ImageReference>> uniqueImages)
|
||||||
|
throws IOException {
|
||||||
|
PDResources formResources = formXObj.getResources();
|
||||||
|
if (formResources == null || formResources.getXObjectNames() == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"Checking form XObject '{}' on page {} for nested images",
|
||||||
|
formName.getName(),
|
||||||
|
pageNum + 1);
|
||||||
|
|
||||||
|
// Process all XObjects within the form
|
||||||
|
for (COSName nestedName : formResources.getXObjectNames()) {
|
||||||
|
PDXObject nestedXobj = formResources.getXObject(nestedName);
|
||||||
|
|
||||||
|
if (isImage(nestedXobj)) {
|
||||||
|
PDImageXObject nestedImage = (PDImageXObject) nestedXobj;
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"Found nested image '{}' in form '{}' on page {} - {}x{}",
|
||||||
|
nestedName.getName(),
|
||||||
|
formName.getName(),
|
||||||
|
pageNum + 1,
|
||||||
|
nestedImage.getWidth(),
|
||||||
|
nestedImage.getHeight());
|
||||||
|
|
||||||
|
// Create specialized reference for the nested image
|
||||||
|
NestedImageReference nestedRef = new NestedImageReference();
|
||||||
|
nestedRef.pageNum = pageNum;
|
||||||
|
nestedRef.formName = formName;
|
||||||
|
nestedRef.imageName = nestedName;
|
||||||
|
|
||||||
|
String imageHash = generateImageHash(nestedImage);
|
||||||
|
uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(nestedRef);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Count total images and nested images
|
||||||
|
private void calculateImageStats(
|
||||||
|
Map<String, List<ImageReference>> uniqueImages, CompressionStats stats) {
|
||||||
|
for (List<ImageReference> references : uniqueImages.values()) {
|
||||||
|
for (ImageReference ref : references) {
|
||||||
|
stats.totalImages++;
|
||||||
|
if (ref instanceof NestedImageReference) {
|
||||||
|
stats.nestedImages++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create compressed versions of all unique images
|
||||||
|
private Map<String, PDImageXObject> createCompressedImages(
|
||||||
|
PDDocument doc,
|
||||||
|
Map<String, List<ImageReference>> uniqueImages,
|
||||||
|
double scaleFactor,
|
||||||
|
float jpegQuality,
|
||||||
|
boolean convertToGrayscale,
|
||||||
|
CompressionStats stats)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
Map<String, PDImageXObject> compressedVersions = new HashMap<>();
|
||||||
|
|
||||||
|
// Process each unique image exactly once
|
||||||
|
for (Entry<String, List<ImageReference>> entry : uniqueImages.entrySet()) {
|
||||||
|
String imageHash = entry.getKey();
|
||||||
|
List<ImageReference> references = entry.getValue();
|
||||||
|
|
||||||
|
if (references.isEmpty()) continue;
|
||||||
|
|
||||||
|
// Get the first instance of this image
|
||||||
|
PDImageXObject originalImage = getOriginalImage(doc, references.get(0));
|
||||||
|
|
||||||
|
// Track original size
|
||||||
|
int originalSize = (int) originalImage.getCOSObject().getLength();
|
||||||
|
stats.totalOriginalBytes += originalSize;
|
||||||
|
|
||||||
|
// Process this unique image
|
||||||
|
PDImageXObject compressedImage =
|
||||||
|
compressImage(
|
||||||
|
doc,
|
||||||
|
originalImage,
|
||||||
|
originalSize,
|
||||||
|
scaleFactor,
|
||||||
|
jpegQuality,
|
||||||
|
convertToGrayscale);
|
||||||
|
|
||||||
|
if (compressedImage != null) {
|
||||||
|
// Store the compressed version in our map
|
||||||
|
compressedVersions.put(imageHash, compressedImage);
|
||||||
|
stats.compressedImages++;
|
||||||
|
|
||||||
|
// Update compression stats
|
||||||
|
int compressedSize = (int) compressedImage.getCOSObject().getLength();
|
||||||
|
stats.totalCompressedBytes += compressedSize * references.size();
|
||||||
|
|
||||||
|
double reductionPercentage = 100.0 - ((compressedSize * 100.0) / originalSize);
|
||||||
|
log.info(
|
||||||
|
"Image hash {}: Compressed from {} to {} (reduced by {}%)",
|
||||||
|
imageHash,
|
||||||
|
GeneralUtils.formatBytes(originalSize),
|
||||||
|
GeneralUtils.formatBytes(compressedSize),
|
||||||
|
String.format("%.1f", reductionPercentage));
|
||||||
|
} else {
|
||||||
|
log.info("Image hash {}: Not suitable for compression, skipping", imageHash);
|
||||||
|
stats.totalCompressedBytes += originalSize * references.size();
|
||||||
|
stats.skippedImages++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return compressedVersions;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get original image from a reference
|
||||||
|
private PDImageXObject getOriginalImage(PDDocument doc, ImageReference ref) throws IOException {
|
||||||
|
if (ref instanceof NestedImageReference) {
|
||||||
|
// Get the nested image from within a form XObject
|
||||||
|
NestedImageReference nestedRef = (NestedImageReference) ref;
|
||||||
|
PDPage page = doc.getPage(nestedRef.pageNum);
|
||||||
|
PDResources pageResources = page.getResources();
|
||||||
|
|
||||||
|
// Get the form XObject
|
||||||
|
PDFormXObject formXObj = (PDFormXObject) pageResources.getXObject(nestedRef.formName);
|
||||||
|
|
||||||
|
// Get the nested image from the form's resources
|
||||||
|
PDResources formResources = formXObj.getResources();
|
||||||
|
return (PDImageXObject) formResources.getXObject(nestedRef.imageName);
|
||||||
|
} else {
|
||||||
|
// Get direct image from page resources
|
||||||
|
PDPage page = doc.getPage(ref.pageNum);
|
||||||
|
PDResources resources = page.getResources();
|
||||||
|
return (PDImageXObject) resources.getXObject(ref.name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to compress an image if it makes sense
|
||||||
|
private PDImageXObject compressImage(
|
||||||
|
PDDocument doc,
|
||||||
|
PDImageXObject originalImage,
|
||||||
|
int originalSize,
|
||||||
|
double scaleFactor,
|
||||||
|
float jpegQuality,
|
||||||
|
boolean convertToGrayscale)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
// Process and compress the image
|
||||||
|
BufferedImage processedImage =
|
||||||
|
processAndCompressImage(
|
||||||
|
originalImage, scaleFactor, jpegQuality, convertToGrayscale);
|
||||||
|
|
||||||
|
if (processedImage == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to bytes for storage
|
||||||
|
byte[] compressedData = convertToBytes(processedImage, jpegQuality);
|
||||||
|
|
||||||
|
// Check if compression is beneficial
|
||||||
|
if (compressedData.length < originalSize || convertToGrayscale) {
|
||||||
|
// Create a compressed version
|
||||||
|
return PDImageXObject.createFromByteArray(
|
||||||
|
doc, compressedData, originalImage.getCOSObject().toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace all instances of original images with their compressed versions
|
||||||
|
private void replaceImages(
|
||||||
|
PDDocument doc,
|
||||||
|
Map<String, List<ImageReference>> uniqueImages,
|
||||||
|
Map<String, PDImageXObject> compressedVersions,
|
||||||
|
CompressionStats stats)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
for (Entry<String, List<ImageReference>> entry : uniqueImages.entrySet()) {
|
||||||
|
String imageHash = entry.getKey();
|
||||||
|
List<ImageReference> references = entry.getValue();
|
||||||
|
|
||||||
|
// Skip if no compressed version exists
|
||||||
|
PDImageXObject compressedImage = compressedVersions.get(imageHash);
|
||||||
|
if (compressedImage == null) continue;
|
||||||
|
|
||||||
|
// Replace ALL instances with the compressed version
|
||||||
|
for (ImageReference ref : references) {
|
||||||
|
replaceImageReference(doc, ref, compressedImage);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace a specific image reference with a compressed version
|
||||||
|
private void replaceImageReference(
|
||||||
|
PDDocument doc, ImageReference ref, PDImageXObject compressedImage) throws IOException {
|
||||||
|
if (ref instanceof NestedImageReference) {
|
||||||
|
// Replace nested image within form XObject
|
||||||
|
NestedImageReference nestedRef = (NestedImageReference) ref;
|
||||||
|
PDPage page = doc.getPage(nestedRef.pageNum);
|
||||||
|
PDResources pageResources = page.getResources();
|
||||||
|
|
||||||
|
// Get the form XObject
|
||||||
|
PDFormXObject formXObj = (PDFormXObject) pageResources.getXObject(nestedRef.formName);
|
||||||
|
|
||||||
|
// Replace the nested image in the form's resources
|
||||||
|
PDResources formResources = formXObj.getResources();
|
||||||
|
formResources.put(nestedRef.imageName, compressedImage);
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"Replaced nested image '{}' in form '{}' on page {} with compressed version",
|
||||||
|
nestedRef.imageName.getName(),
|
||||||
|
nestedRef.formName.getName(),
|
||||||
|
nestedRef.pageNum + 1);
|
||||||
|
} else {
|
||||||
|
// Replace direct image in page resources
|
||||||
|
PDPage page = doc.getPage(ref.pageNum);
|
||||||
|
PDResources resources = page.getResources();
|
||||||
|
resources.put(ref.name, compressedImage);
|
||||||
|
|
||||||
|
log.info("Replaced direct image on page {} with compressed version", ref.pageNum + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Log final stats about the compression
|
||||||
|
private void logCompressionStats(CompressionStats stats, long originalFileSize) {
|
||||||
|
// Calculate image reduction percentage
|
||||||
|
double overallImageReduction =
|
||||||
|
stats.totalOriginalBytes > 0
|
||||||
|
? 100.0 - ((stats.totalCompressedBytes * 100.0) / stats.totalOriginalBytes)
|
||||||
|
: 0;
|
||||||
|
|
||||||
|
int duplicatedImages = stats.totalImages - stats.uniqueImagesCount;
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"Image compression summary - Total unique: {}, Compressed: {}, Skipped: {}, Duplicates: {}, Nested: {}",
|
||||||
|
stats.uniqueImagesCount,
|
||||||
|
stats.compressedImages,
|
||||||
|
stats.skippedImages,
|
||||||
|
duplicatedImages,
|
||||||
|
stats.nestedImages);
|
||||||
|
log.info(
|
||||||
|
"Total original image size: {}, compressed: {} (reduced by {}%)",
|
||||||
|
GeneralUtils.formatBytes(stats.totalOriginalBytes),
|
||||||
|
GeneralUtils.formatBytes(stats.totalCompressedBytes),
|
||||||
|
String.format("%.1f", overallImageReduction));
|
||||||
}
|
}
|
||||||
|
|
||||||
private BufferedImage convertToGrayscale(BufferedImage image) {
|
private BufferedImage convertToGrayscale(BufferedImage image) {
|
||||||
@ -257,10 +473,7 @@ public class CompressController {
|
|||||||
return grayImage;
|
return grayImage;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// Resize and optionally convert to grayscale
|
||||||
* Processes and compresses an image if beneficial. Returns the processed image if compression
|
|
||||||
* is worthwhile, null otherwise.
|
|
||||||
*/
|
|
||||||
private BufferedImage processAndCompressImage(
|
private BufferedImage processAndCompressImage(
|
||||||
PDImageXObject image, double scaleFactor, float jpegQuality, boolean convertToGrayscale)
|
PDImageXObject image, double scaleFactor, float jpegQuality, boolean convertToGrayscale)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
@ -342,10 +555,7 @@ public class CompressController {
|
|||||||
return scaledImage;
|
return scaledImage;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// Convert image to byte array with quality settings
|
||||||
* Converts a BufferedImage to a byte array with specified JPEG quality. Checks if compression
|
|
||||||
* is beneficial compared to original.
|
|
||||||
*/
|
|
||||||
private byte[] convertToBytes(BufferedImage scaledImage, float jpegQuality) throws IOException {
|
private byte[] convertToBytes(BufferedImage scaledImage, float jpegQuality) throws IOException {
|
||||||
String format = scaledImage.getColorModel().hasAlpha() ? "png" : "jpeg";
|
String format = scaledImage.getColorModel().hasAlpha() ? "png" : "jpeg";
|
||||||
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
||||||
@ -376,7 +586,7 @@ public class CompressController {
|
|||||||
return outputStream.toByteArray();
|
return outputStream.toByteArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Modified hash function to consistently identify identical image content */
|
// Hash function to identify identical images
|
||||||
private String generateImageHash(PDImageXObject image) {
|
private String generateImageHash(PDImageXObject image) {
|
||||||
try {
|
try {
|
||||||
// Create a stream for the raw stream data
|
// Create a stream for the raw stream data
|
||||||
@ -414,43 +624,26 @@ public class CompressController {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private byte[] generateImageMD5(PDImageXObject image) throws IOException {
|
// Scale factors for different optimization levels
|
||||||
return generatMD5(ImageProcessingUtils.getImageData(image.getImage()));
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Generates a hash string from a byte array */
|
|
||||||
private String generateHashFromBytes(byte[] data) {
|
|
||||||
try {
|
|
||||||
// Use the existing method to generate MD5 hash
|
|
||||||
byte[] hash = generatMD5(data);
|
|
||||||
return bytesToHexString(hash);
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.error("Error generating hash from bytes", e);
|
|
||||||
// Return a unique string as fallback
|
|
||||||
return "fallback-" + System.identityHashCode(data);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Updated scale factor method for levels 4-9
|
|
||||||
private double getScaleFactorForLevel(int optimizeLevel) {
|
private double getScaleFactorForLevel(int optimizeLevel) {
|
||||||
return switch (optimizeLevel) {
|
return switch (optimizeLevel) {
|
||||||
case 4 -> 0.9; // 90% of original size - lite image compression
|
case 4 -> 0.9; // 90% - lite compression
|
||||||
case 5 -> 0.8; // 80% of original size - lite image compression
|
case 5 -> 0.8; // 80% - lite compression
|
||||||
case 6 -> 0.7; // 70% of original size - lite image compression
|
case 6 -> 0.7; // 70% - lite compression
|
||||||
case 7 -> 0.6; // 60% of original size - intense image compression
|
case 7 -> 0.6; // 60% - intense compression
|
||||||
case 8 -> 0.5; // 50% of original size - intense image compression
|
case 8 -> 0.5; // 50% - intense compression
|
||||||
case 9, 10 -> 0.4; // 40% of original size - intense image compression
|
case 9, 10 -> 0.4; // 40% - intense compression
|
||||||
default -> 1.0; // No image scaling for levels 1-3
|
default -> 1.0; // No scaling for levels 1-3
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// New method for JPEG quality based on optimization level
|
// JPEG quality for different optimization levels
|
||||||
private float getJpegQualityForLevel(int optimizeLevel) {
|
private float getJpegQualityForLevel(int optimizeLevel) {
|
||||||
return switch (optimizeLevel) {
|
return switch (optimizeLevel) {
|
||||||
case 7 -> 0.8f; // 80% quality - intense compression
|
case 7 -> 0.8f; // 80% quality
|
||||||
case 8 -> 0.6f; // 60% quality - more intense compression
|
case 8 -> 0.6f; // 60% quality
|
||||||
case 9, 10 -> 0.4f; // 40% quality - most intense compression
|
case 9, 10 -> 0.4f; // 40% quality
|
||||||
default -> 0.7f; // 70% quality for levels 1-6 (higher quality)
|
default -> 0.7f; // 70% quality for levels 1-6
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -478,17 +671,17 @@ public class CompressController {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Create initial input file
|
// Create initial input file
|
||||||
Path originalFile = Files.createTempFile("input_", ".pdf");
|
Path originalFile = Files.createTempFile("original_", ".pdf");
|
||||||
inputFile.transferTo(originalFile.toFile());
|
inputFile.transferTo(originalFile.toFile());
|
||||||
long inputFileSize = Files.size(originalFile);
|
long inputFileSize = Files.size(originalFile);
|
||||||
|
|
||||||
// Start with original as current working file
|
Path currentFile = Files.createTempFile("working_", ".pdf");
|
||||||
Path currentFile = originalFile;
|
Files.copy(originalFile, currentFile, StandardCopyOption.REPLACE_EXISTING);
|
||||||
|
|
||||||
// Keep track of all temporary files for cleanup
|
// Keep track of all temporary files for cleanup
|
||||||
List<Path> tempFiles = new ArrayList<>();
|
List<Path> tempFiles = new ArrayList<>();
|
||||||
tempFiles.add(originalFile);
|
tempFiles.add(originalFile);
|
||||||
|
tempFiles.add(currentFile);
|
||||||
try {
|
try {
|
||||||
if (autoMode) {
|
if (autoMode) {
|
||||||
double sizeReductionRatio = expectedOutputSize / (double) inputFileSize;
|
double sizeReductionRatio = expectedOutputSize / (double) inputFileSize;
|
||||||
@ -499,93 +692,56 @@ public class CompressController {
|
|||||||
boolean imageCompressionApplied = false;
|
boolean imageCompressionApplied = false;
|
||||||
boolean qpdfCompressionApplied = false;
|
boolean qpdfCompressionApplied = false;
|
||||||
|
|
||||||
|
if (qpdfEnabled && optimizeLevel <= 3) {
|
||||||
|
optimizeLevel = 4;
|
||||||
|
}
|
||||||
|
|
||||||
while (!sizeMet && optimizeLevel <= 9) {
|
while (!sizeMet && optimizeLevel <= 9) {
|
||||||
// Apply image compression for levels 4-9
|
// Apply image compression for levels 4-9
|
||||||
if ((optimizeLevel >= 4 || Boolean.TRUE.equals(convertToGrayscale))
|
if ((optimizeLevel >= 4 || Boolean.TRUE.equals(convertToGrayscale))
|
||||||
&& !imageCompressionApplied) {
|
&& !imageCompressionApplied) {
|
||||||
double scaleFactor = getScaleFactorForLevel(optimizeLevel);
|
double scaleFactor = getScaleFactorForLevel(optimizeLevel);
|
||||||
float jpegQuality = getJpegQualityForLevel(optimizeLevel);
|
float jpegQuality = getJpegQualityForLevel(optimizeLevel);
|
||||||
|
|
||||||
// Use the returned path from compressImagesInPDF
|
// Compress images
|
||||||
Path compressedImageFile = compressImagesInPDF(
|
Path compressedImageFile =
|
||||||
currentFile,
|
compressImagesInPDF(
|
||||||
scaleFactor,
|
currentFile,
|
||||||
jpegQuality,
|
scaleFactor,
|
||||||
Boolean.TRUE.equals(convertToGrayscale));
|
jpegQuality,
|
||||||
|
Boolean.TRUE.equals(convertToGrayscale));
|
||||||
// Add to temp files list and update current file
|
|
||||||
tempFiles.add(compressedImageFile);
|
tempFiles.add(compressedImageFile);
|
||||||
currentFile = compressedImageFile;
|
currentFile = compressedImageFile;
|
||||||
imageCompressionApplied = true;
|
imageCompressionApplied = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply QPDF compression for all levels
|
// Apply QPDF compression for all levels
|
||||||
if (!qpdfCompressionApplied) {
|
if (!qpdfCompressionApplied && qpdfEnabled) {
|
||||||
long preQpdfSize = Files.size(currentFile);
|
applyQpdfCompression(request, optimizeLevel, currentFile, tempFiles);
|
||||||
log.info("Pre-QPDF file size: {}", GeneralUtils.formatBytes(preQpdfSize));
|
qpdfCompressionApplied = true;
|
||||||
|
} else if (!qpdfCompressionApplied) {
|
||||||
// Map optimization levels to QPDF compression levels
|
// If QPDF is disabled, mark as applied and log
|
||||||
int qpdfCompressionLevel = optimizeLevel <= 3
|
if (!qpdfEnabled) {
|
||||||
? optimizeLevel * 3 // Level 1->3, 2->6, 3->9
|
log.info("Skipping QPDF compression as QPDF group is disabled");
|
||||||
: 9; // Max compression for levels 4-9
|
|
||||||
|
|
||||||
// Create output file for QPDF
|
|
||||||
Path qpdfOutputFile = Files.createTempFile("qpdf_output_", ".pdf");
|
|
||||||
tempFiles.add(qpdfOutputFile);
|
|
||||||
|
|
||||||
// Run QPDF optimization
|
|
||||||
List<String> command = new ArrayList<>();
|
|
||||||
command.add("qpdf");
|
|
||||||
if (request.getNormalize()) {
|
|
||||||
command.add("--normalize-content=y");
|
|
||||||
}
|
|
||||||
if (request.getLinearize()) {
|
|
||||||
command.add("--linearize");
|
|
||||||
}
|
|
||||||
command.add("--recompress-flate");
|
|
||||||
command.add("--compression-level=" + qpdfCompressionLevel);
|
|
||||||
command.add("--compress-streams=y");
|
|
||||||
command.add("--object-streams=generate");
|
|
||||||
command.add(currentFile.toString());
|
|
||||||
command.add(qpdfOutputFile.toString());
|
|
||||||
|
|
||||||
ProcessExecutorResult returnCode = null;
|
|
||||||
try {
|
|
||||||
returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF)
|
|
||||||
.runCommandWithOutputHandling(command);
|
|
||||||
qpdfCompressionApplied = true;
|
|
||||||
|
|
||||||
// Update current file to the QPDF output
|
|
||||||
currentFile = qpdfOutputFile;
|
|
||||||
|
|
||||||
long postQpdfSize = Files.size(currentFile);
|
|
||||||
double qpdfReduction = 100.0 - ((postQpdfSize * 100.0) / preQpdfSize);
|
|
||||||
log.info(
|
|
||||||
"Post-QPDF file size: {} (reduced by {}%)",
|
|
||||||
GeneralUtils.formatBytes(postQpdfSize),
|
|
||||||
String.format("%.1f", qpdfReduction));
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
if (returnCode != null && returnCode.getRc() != 3) {
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
// If QPDF fails, keep using the current file
|
|
||||||
log.warn("QPDF compression failed, continuing with current file");
|
|
||||||
}
|
}
|
||||||
|
qpdfCompressionApplied = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if file size is within expected size or not auto mode
|
// Check if target size reached or not in auto mode
|
||||||
long outputFileSize = Files.size(currentFile);
|
long outputFileSize = Files.size(currentFile);
|
||||||
if (outputFileSize <= expectedOutputSize || !autoMode) {
|
if (outputFileSize <= expectedOutputSize || !autoMode) {
|
||||||
sizeMet = true;
|
sizeMet = true;
|
||||||
} else {
|
} else {
|
||||||
int newOptimizeLevel = incrementOptimizeLevel(
|
int newOptimizeLevel =
|
||||||
optimizeLevel, outputFileSize, expectedOutputSize);
|
incrementOptimizeLevel(
|
||||||
|
optimizeLevel, outputFileSize, expectedOutputSize);
|
||||||
|
|
||||||
// Check if we can't increase the level further
|
// Check if we can't increase the level further
|
||||||
if (newOptimizeLevel == optimizeLevel) {
|
if (newOptimizeLevel == optimizeLevel) {
|
||||||
if (autoMode) {
|
if (autoMode) {
|
||||||
log.info("Maximum optimization level reached without meeting target size.");
|
log.info(
|
||||||
|
"Maximum optimization level reached without meeting target size.");
|
||||||
sizeMet = true;
|
sizeMet = true;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -597,18 +753,19 @@ public class CompressController {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if optimized file is larger than the original
|
// Use original if optimized file is somehow larger
|
||||||
long finalFileSize = Files.size(currentFile);
|
long finalFileSize = Files.size(currentFile);
|
||||||
if (finalFileSize > inputFileSize) {
|
if (finalFileSize >= inputFileSize) {
|
||||||
log.warn("Optimized file is larger than the original. Using the original file instead.");
|
log.warn(
|
||||||
// Use the stored reference to the original file
|
"Optimized file is larger than the original. Using the original file instead.");
|
||||||
currentFile = originalFile;
|
currentFile = originalFile;
|
||||||
}
|
}
|
||||||
|
|
||||||
String outputFilename = Filenames.toSimpleFileName(inputFile.getOriginalFilename())
|
String outputFilename =
|
||||||
|
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
|
||||||
.replaceFirst("[.][^.]+$", "")
|
.replaceFirst("[.][^.]+$", "")
|
||||||
+ "_Optimized.pdf";
|
+ "_Optimized.pdf";
|
||||||
|
|
||||||
return WebResponseUtils.pdfDocToWebResponse(
|
return WebResponseUtils.pdfDocToWebResponse(
|
||||||
pdfDocumentFactory.load(currentFile.toFile()), outputFilename);
|
pdfDocumentFactory.load(currentFile.toFile()), outputFilename);
|
||||||
|
|
||||||
@ -624,6 +781,65 @@ public class CompressController {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Run QPDF compression
|
||||||
|
private void applyQpdfCompression(
|
||||||
|
OptimizePdfRequest request, int optimizeLevel, Path currentFile, List<Path> tempFiles)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
long preQpdfSize = Files.size(currentFile);
|
||||||
|
log.info("Pre-QPDF file size: {}", GeneralUtils.formatBytes(preQpdfSize));
|
||||||
|
|
||||||
|
// Map optimization levels to QPDF compression levels
|
||||||
|
int qpdfCompressionLevel =
|
||||||
|
optimizeLevel <= 3
|
||||||
|
? optimizeLevel * 3 // Level 1->3, 2->6, 3->9
|
||||||
|
: 9; // Max compression for levels 4-9
|
||||||
|
|
||||||
|
// Create output file for QPDF
|
||||||
|
Path qpdfOutputFile = Files.createTempFile("qpdf_output_", ".pdf");
|
||||||
|
tempFiles.add(qpdfOutputFile);
|
||||||
|
|
||||||
|
// Build QPDF command
|
||||||
|
List<String> command = new ArrayList<>();
|
||||||
|
command.add("qpdf");
|
||||||
|
if (request.getNormalize()) {
|
||||||
|
command.add("--normalize-content=y");
|
||||||
|
}
|
||||||
|
if (request.getLinearize()) {
|
||||||
|
command.add("--linearize");
|
||||||
|
}
|
||||||
|
command.add("--recompress-flate");
|
||||||
|
command.add("--compression-level=" + qpdfCompressionLevel);
|
||||||
|
command.add("--compress-streams=y");
|
||||||
|
command.add("--object-streams=generate");
|
||||||
|
command.add(currentFile.toString());
|
||||||
|
command.add(qpdfOutputFile.toString());
|
||||||
|
|
||||||
|
ProcessExecutorResult returnCode = null;
|
||||||
|
try {
|
||||||
|
returnCode =
|
||||||
|
ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF)
|
||||||
|
.runCommandWithOutputHandling(command);
|
||||||
|
|
||||||
|
// Update current file to the QPDF output
|
||||||
|
Files.copy(qpdfOutputFile, currentFile, StandardCopyOption.REPLACE_EXISTING);
|
||||||
|
|
||||||
|
long postQpdfSize = Files.size(currentFile);
|
||||||
|
double qpdfReduction = 100.0 - ((postQpdfSize * 100.0) / preQpdfSize);
|
||||||
|
log.info(
|
||||||
|
"Post-QPDF file size: {} (reduced by {}%)",
|
||||||
|
GeneralUtils.formatBytes(postQpdfSize), String.format("%.1f", qpdfReduction));
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
if (returnCode != null && returnCode.getRc() != 3) {
|
||||||
|
throw new IOException("QPDF command failed", e);
|
||||||
|
}
|
||||||
|
// If QPDF fails, keep using the current file
|
||||||
|
log.warn("QPDF compression failed, continuing with current file", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pick optimization level based on target size
|
||||||
private int determineOptimizeLevel(double sizeReductionRatio) {
|
private int determineOptimizeLevel(double sizeReductionRatio) {
|
||||||
if (sizeReductionRatio > 0.9) return 1;
|
if (sizeReductionRatio > 0.9) return 1;
|
||||||
if (sizeReductionRatio > 0.8) return 2;
|
if (sizeReductionRatio > 0.8) return 2;
|
||||||
@ -636,6 +852,7 @@ public class CompressController {
|
|||||||
return 9;
|
return 9;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Increment optimization level if we need more compression
|
||||||
private int incrementOptimizeLevel(int currentLevel, long currentSize, long targetSize) {
|
private int incrementOptimizeLevel(int currentLevel, long currentSize, long targetSize) {
|
||||||
double currentRatio = currentSize / (double) targetSize;
|
double currentRatio = currentSize / (double) targetSize;
|
||||||
log.info("Current compression ratio: {}", String.format("%.2f", currentRatio));
|
log.info("Current compression ratio: {}", String.format("%.2f", currentRatio));
|
||||||
@ -647,4 +864,4 @@ public class CompressController {
|
|||||||
}
|
}
|
||||||
return Math.min(9, currentLevel + 1);
|
return Math.min(9, currentLevel + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -5,6 +5,7 @@ import java.io.InputStream;
|
|||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
import java.util.zip.ZipEntry;
|
import java.util.zip.ZipEntry;
|
||||||
import java.util.zip.ZipOutputStream;
|
import java.util.zip.ZipOutputStream;
|
||||||
|
|
||||||
@ -26,8 +27,10 @@ import io.swagger.v3.oas.annotations.tags.Tag;
|
|||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
import stirling.software.SPDF.model.PipelineConfig;
|
import stirling.software.SPDF.model.PipelineConfig;
|
||||||
|
import stirling.software.SPDF.model.PipelineOperation;
|
||||||
import stirling.software.SPDF.model.PipelineResult;
|
import stirling.software.SPDF.model.PipelineResult;
|
||||||
import stirling.software.SPDF.model.api.HandleDataRequest;
|
import stirling.software.SPDF.model.api.HandleDataRequest;
|
||||||
|
import stirling.software.SPDF.service.PostHogService;
|
||||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||||
|
|
||||||
@RestController
|
@RestController
|
||||||
@ -40,9 +43,13 @@ public class PipelineController {
|
|||||||
|
|
||||||
private final ObjectMapper objectMapper;
|
private final ObjectMapper objectMapper;
|
||||||
|
|
||||||
public PipelineController(PipelineProcessor processor, ObjectMapper objectMapper) {
|
private final PostHogService postHogService;
|
||||||
|
|
||||||
|
public PipelineController(
|
||||||
|
PipelineProcessor processor, ObjectMapper objectMapper, PostHogService postHogService) {
|
||||||
this.processor = processor;
|
this.processor = processor;
|
||||||
this.objectMapper = objectMapper;
|
this.objectMapper = objectMapper;
|
||||||
|
this.postHogService = postHogService;
|
||||||
}
|
}
|
||||||
|
|
||||||
@PostMapping("/handleData")
|
@PostMapping("/handleData")
|
||||||
@ -55,6 +62,18 @@ public class PipelineController {
|
|||||||
}
|
}
|
||||||
PipelineConfig config = objectMapper.readValue(jsonString, PipelineConfig.class);
|
PipelineConfig config = objectMapper.readValue(jsonString, PipelineConfig.class);
|
||||||
log.info("Received POST request to /handleData with {} files", files.length);
|
log.info("Received POST request to /handleData with {} files", files.length);
|
||||||
|
|
||||||
|
List<String> operationNames =
|
||||||
|
config.getOperations().stream()
|
||||||
|
.map(PipelineOperation::getOperation)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
Map<String, Object> properties = new HashMap<>();
|
||||||
|
properties.put("operations", operationNames);
|
||||||
|
properties.put("fileCount", files.length);
|
||||||
|
|
||||||
|
postHogService.captureEvent("pipeline_api_event", properties);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
List<Resource> inputFiles = processor.generateInputFiles(files);
|
List<Resource> inputFiles = processor.generateInputFiles(files);
|
||||||
if (inputFiles == null || inputFiles.size() == 0) {
|
if (inputFiles == null || inputFiles.size() == 0) {
|
||||||
|
@ -17,7 +17,9 @@ import java.time.LocalDate;
|
|||||||
import java.time.LocalTime;
|
import java.time.LocalTime;
|
||||||
import java.time.format.DateTimeFormatter;
|
import java.time.format.DateTimeFormatter;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
@ -34,6 +36,7 @@ import stirling.software.SPDF.config.RuntimePathConfig;
|
|||||||
import stirling.software.SPDF.model.PipelineConfig;
|
import stirling.software.SPDF.model.PipelineConfig;
|
||||||
import stirling.software.SPDF.model.PipelineOperation;
|
import stirling.software.SPDF.model.PipelineOperation;
|
||||||
import stirling.software.SPDF.model.PipelineResult;
|
import stirling.software.SPDF.model.PipelineResult;
|
||||||
|
import stirling.software.SPDF.service.PostHogService;
|
||||||
import stirling.software.SPDF.utils.FileMonitor;
|
import stirling.software.SPDF.utils.FileMonitor;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
@ -41,15 +44,11 @@ import stirling.software.SPDF.utils.FileMonitor;
|
|||||||
public class PipelineDirectoryProcessor {
|
public class PipelineDirectoryProcessor {
|
||||||
|
|
||||||
private final ObjectMapper objectMapper;
|
private final ObjectMapper objectMapper;
|
||||||
|
|
||||||
private final ApiDocService apiDocService;
|
private final ApiDocService apiDocService;
|
||||||
|
|
||||||
private final PipelineProcessor processor;
|
private final PipelineProcessor processor;
|
||||||
|
|
||||||
private final FileMonitor fileMonitor;
|
private final FileMonitor fileMonitor;
|
||||||
|
private final PostHogService postHogService;
|
||||||
private final String watchedFoldersDir;
|
private final String watchedFoldersDir;
|
||||||
|
|
||||||
private final String finishedFoldersDir;
|
private final String finishedFoldersDir;
|
||||||
|
|
||||||
public PipelineDirectoryProcessor(
|
public PipelineDirectoryProcessor(
|
||||||
@ -57,13 +56,15 @@ public class PipelineDirectoryProcessor {
|
|||||||
ApiDocService apiDocService,
|
ApiDocService apiDocService,
|
||||||
PipelineProcessor processor,
|
PipelineProcessor processor,
|
||||||
FileMonitor fileMonitor,
|
FileMonitor fileMonitor,
|
||||||
|
PostHogService postHogService,
|
||||||
RuntimePathConfig runtimePathConfig) {
|
RuntimePathConfig runtimePathConfig) {
|
||||||
this.objectMapper = objectMapper;
|
this.objectMapper = objectMapper;
|
||||||
this.apiDocService = apiDocService;
|
this.apiDocService = apiDocService;
|
||||||
this.watchedFoldersDir = runtimePathConfig.getPipelineWatchedFoldersPath();
|
|
||||||
this.finishedFoldersDir = runtimePathConfig.getPipelineFinishedFoldersPath();
|
|
||||||
this.processor = processor;
|
this.processor = processor;
|
||||||
this.fileMonitor = fileMonitor;
|
this.fileMonitor = fileMonitor;
|
||||||
|
this.postHogService = postHogService;
|
||||||
|
this.watchedFoldersDir = runtimePathConfig.getPipelineWatchedFoldersPath();
|
||||||
|
this.finishedFoldersDir = runtimePathConfig.getPipelineFinishedFoldersPath();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Scheduled(fixedRate = 60000)
|
@Scheduled(fixedRate = 60000)
|
||||||
@ -152,6 +153,14 @@ public class PipelineDirectoryProcessor {
|
|||||||
log.debug("No files detected for {} ", dir);
|
log.debug("No files detected for {} ", dir);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
List<String> operationNames =
|
||||||
|
config.getOperations().stream().map(PipelineOperation::getOperation).toList();
|
||||||
|
Map<String, Object> properties = new HashMap<>();
|
||||||
|
properties.put("operations", operationNames);
|
||||||
|
properties.put("fileCount", files.length);
|
||||||
|
postHogService.captureEvent("pipeline_directory_event", properties);
|
||||||
|
|
||||||
List<File> filesToProcess = prepareFilesForProcessing(files, processingDir);
|
List<File> filesToProcess = prepareFilesForProcessing(files, processingDir);
|
||||||
runPipelineAgainstFiles(filesToProcess, config, dir, processingDir);
|
runPipelineAgainstFiles(filesToProcess, config, dir, processingDir);
|
||||||
}
|
}
|
||||||
@ -252,8 +261,7 @@ public class PipelineDirectoryProcessor {
|
|||||||
try {
|
try {
|
||||||
Thread.sleep(retryDelayMs * (int) Math.pow(2, attempt - 1));
|
Thread.sleep(retryDelayMs * (int) Math.pow(2, attempt - 1));
|
||||||
} catch (InterruptedException e1) {
|
} catch (InterruptedException e1) {
|
||||||
// TODO Auto-generated catch block
|
log.error("prepareFilesForProcessing failure", e);
|
||||||
e1.printStackTrace();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -14,7 +14,7 @@ public class OptimizePdfRequest extends PDFFile {
|
|||||||
@Schema(
|
@Schema(
|
||||||
description =
|
description =
|
||||||
"The level of optimization to apply to the PDF file. Higher values indicate greater compression but may reduce quality.",
|
"The level of optimization to apply to the PDF file. Higher values indicate greater compression but may reduce quality.",
|
||||||
allowableValues = {"1", "2", "3", "4", "5"})
|
allowableValues = {"1", "2", "3", "4", "5", "6", "7", "8", "9"})
|
||||||
private Integer optimizeLevel;
|
private Integer optimizeLevel;
|
||||||
|
|
||||||
@Schema(description = "The expected output size, e.g. '100MB', '25KB', etc.")
|
@Schema(description = "The expected output size, e.g. '100MB', '25KB', etc.")
|
||||||
|
@ -77,7 +77,7 @@ public class CustomPDFDocumentFactory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
long fileSize = file.length();
|
long fileSize = file.length();
|
||||||
log.info("Loading PDF from file, size: {}MB", fileSize / (1024 * 1024));
|
log.debug("Loading PDF from file, size: {}MB", fileSize / (1024 * 1024));
|
||||||
|
|
||||||
return loadAdaptively(file, fileSize);
|
return loadAdaptively(file, fileSize);
|
||||||
}
|
}
|
||||||
@ -92,7 +92,7 @@ public class CustomPDFDocumentFactory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
long fileSize = Files.size(path);
|
long fileSize = Files.size(path);
|
||||||
log.info("Loading PDF from file, size: {}MB", fileSize / (1024 * 1024));
|
log.debug("Loading PDF from file, size: {}MB", fileSize / (1024 * 1024));
|
||||||
|
|
||||||
return loadAdaptively(path.toFile(), fileSize);
|
return loadAdaptively(path.toFile(), fileSize);
|
||||||
}
|
}
|
||||||
@ -104,7 +104,7 @@ public class CustomPDFDocumentFactory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
long dataSize = input.length;
|
long dataSize = input.length;
|
||||||
log.info("Loading PDF from byte array, size: {}MB", dataSize / (1024 * 1024));
|
log.debug("Loading PDF from byte array, size: {}MB", dataSize / (1024 * 1024));
|
||||||
|
|
||||||
return loadAdaptively(input, dataSize);
|
return loadAdaptively(input, dataSize);
|
||||||
}
|
}
|
||||||
@ -150,7 +150,7 @@ public class CustomPDFDocumentFactory {
|
|||||||
long actualFreeMemory = maxMemory - usedMemory;
|
long actualFreeMemory = maxMemory - usedMemory;
|
||||||
|
|
||||||
// Log memory status
|
// Log memory status
|
||||||
log.info(
|
log.debug(
|
||||||
"Memory status - Free: {}MB ({}%), Used: {}MB, Max: {}MB",
|
"Memory status - Free: {}MB ({}%), Used: {}MB, Max: {}MB",
|
||||||
actualFreeMemory / (1024 * 1024),
|
actualFreeMemory / (1024 * 1024),
|
||||||
String.format("%.2f", freeMemoryPercent),
|
String.format("%.2f", freeMemoryPercent),
|
||||||
@ -160,21 +160,21 @@ public class CustomPDFDocumentFactory {
|
|||||||
// If free memory is critically low, always use file-based caching
|
// If free memory is critically low, always use file-based caching
|
||||||
if (freeMemoryPercent < MIN_FREE_MEMORY_PERCENTAGE
|
if (freeMemoryPercent < MIN_FREE_MEMORY_PERCENTAGE
|
||||||
|| actualFreeMemory < MIN_FREE_MEMORY_BYTES) {
|
|| actualFreeMemory < MIN_FREE_MEMORY_BYTES) {
|
||||||
log.info(
|
log.debug(
|
||||||
"Low memory detected ({}%), forcing file-based cache",
|
"Low memory detected ({}%), forcing file-based cache",
|
||||||
String.format("%.2f", freeMemoryPercent));
|
String.format("%.2f", freeMemoryPercent));
|
||||||
return createScratchFileCacheFunction(MemoryUsageSetting.setupTempFileOnly());
|
return createScratchFileCacheFunction(MemoryUsageSetting.setupTempFileOnly());
|
||||||
} else if (contentSize < SMALL_FILE_THRESHOLD) {
|
} else if (contentSize < SMALL_FILE_THRESHOLD) {
|
||||||
log.info("Using memory-only cache for small document ({}KB)", contentSize / 1024);
|
log.debug("Using memory-only cache for small document ({}KB)", contentSize / 1024);
|
||||||
return IOUtils.createMemoryOnlyStreamCache();
|
return IOUtils.createMemoryOnlyStreamCache();
|
||||||
} else if (contentSize < LARGE_FILE_THRESHOLD) {
|
} else if (contentSize < LARGE_FILE_THRESHOLD) {
|
||||||
// For medium files (10-50MB), use a mixed approach
|
// For medium files (10-50MB), use a mixed approach
|
||||||
log.info(
|
log.debug(
|
||||||
"Using mixed memory/file cache for medium document ({}MB)",
|
"Using mixed memory/file cache for medium document ({}MB)",
|
||||||
contentSize / (1024 * 1024));
|
contentSize / (1024 * 1024));
|
||||||
return createScratchFileCacheFunction(MemoryUsageSetting.setupMixed(LARGE_FILE_USAGE));
|
return createScratchFileCacheFunction(MemoryUsageSetting.setupMixed(LARGE_FILE_USAGE));
|
||||||
} else {
|
} else {
|
||||||
log.info("Using file-based cache for large document");
|
log.debug("Using file-based cache for large document");
|
||||||
return createScratchFileCacheFunction(MemoryUsageSetting.setupTempFileOnly());
|
return createScratchFileCacheFunction(MemoryUsageSetting.setupTempFileOnly());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -237,7 +237,7 @@ public class CustomPDFDocumentFactory {
|
|||||||
byte[] bytes, long size, StreamCacheCreateFunction cache, String password)
|
byte[] bytes, long size, StreamCacheCreateFunction cache, String password)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
if (size >= SMALL_FILE_THRESHOLD) {
|
if (size >= SMALL_FILE_THRESHOLD) {
|
||||||
log.info("Writing large byte array to temp file for password-protected PDF");
|
log.debug("Writing large byte array to temp file for password-protected PDF");
|
||||||
Path tempFile = createTempFile("pdf-bytes-");
|
Path tempFile = createTempFile("pdf-bytes-");
|
||||||
|
|
||||||
Files.write(tempFile, bytes);
|
Files.write(tempFile, bytes);
|
||||||
@ -261,7 +261,6 @@ public class CustomPDFDocumentFactory {
|
|||||||
removePassword(doc);
|
removePassword(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private PDDocument loadFromFile(File file, long size, StreamCacheCreateFunction cache)
|
private PDDocument loadFromFile(File file, long size, StreamCacheCreateFunction cache)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
return Loader.loadPDF(new DeletingRandomAccessFile(file), "", null, null, cache);
|
return Loader.loadPDF(new DeletingRandomAccessFile(file), "", null, null, cache);
|
||||||
@ -270,7 +269,7 @@ public class CustomPDFDocumentFactory {
|
|||||||
private PDDocument loadFromBytes(byte[] bytes, long size, StreamCacheCreateFunction cache)
|
private PDDocument loadFromBytes(byte[] bytes, long size, StreamCacheCreateFunction cache)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
if (size >= SMALL_FILE_THRESHOLD) {
|
if (size >= SMALL_FILE_THRESHOLD) {
|
||||||
log.info("Writing large byte array to temp file");
|
log.debug("Writing large byte array to temp file");
|
||||||
Path tempFile = createTempFile("pdf-bytes-");
|
Path tempFile = createTempFile("pdf-bytes-");
|
||||||
|
|
||||||
Files.write(tempFile, bytes);
|
Files.write(tempFile, bytes);
|
||||||
@ -318,7 +317,7 @@ public class CustomPDFDocumentFactory {
|
|||||||
// Temp file handling with enhanced logging
|
// Temp file handling with enhanced logging
|
||||||
private Path createTempFile(String prefix) throws IOException {
|
private Path createTempFile(String prefix) throws IOException {
|
||||||
Path file = Files.createTempFile(prefix + tempCounter.incrementAndGet() + "-", ".tmp");
|
Path file = Files.createTempFile(prefix + tempCounter.incrementAndGet() + "-", ".tmp");
|
||||||
log.info("Created temp file: {}", file);
|
log.debug("Created temp file: {}", file);
|
||||||
return file;
|
return file;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,6 +4,8 @@ import java.util.HashMap;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
import org.springframework.scheduling.annotation.Scheduled;
|
import org.springframework.scheduling.annotation.Scheduled;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
@ -11,22 +13,32 @@ import org.springframework.stereotype.Service;
|
|||||||
import io.micrometer.core.instrument.MeterRegistry;
|
import io.micrometer.core.instrument.MeterRegistry;
|
||||||
import io.micrometer.core.instrument.search.Search;
|
import io.micrometer.core.instrument.search.Search;
|
||||||
|
|
||||||
|
import stirling.software.SPDF.config.EndpointInspector;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
public class MetricsAggregatorService {
|
public class MetricsAggregatorService {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(MetricsAggregatorService.class);
|
||||||
|
|
||||||
private final MeterRegistry meterRegistry;
|
private final MeterRegistry meterRegistry;
|
||||||
private final PostHogService postHogService;
|
private final PostHogService postHogService;
|
||||||
|
private final EndpointInspector endpointInspector;
|
||||||
private final Map<String, Double> lastSentMetrics = new ConcurrentHashMap<>();
|
private final Map<String, Double> lastSentMetrics = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
@Autowired
|
@Autowired
|
||||||
public MetricsAggregatorService(MeterRegistry meterRegistry, PostHogService postHogService) {
|
public MetricsAggregatorService(
|
||||||
|
MeterRegistry meterRegistry,
|
||||||
|
PostHogService postHogService,
|
||||||
|
EndpointInspector endpointInspector) {
|
||||||
this.meterRegistry = meterRegistry;
|
this.meterRegistry = meterRegistry;
|
||||||
this.postHogService = postHogService;
|
this.postHogService = postHogService;
|
||||||
|
this.endpointInspector = endpointInspector;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Scheduled(fixedRate = 7200000) // Run every 2 hours
|
@Scheduled(fixedRate = 7200000) // Run every 2 hours
|
||||||
public void aggregateAndSendMetrics() {
|
public void aggregateAndSendMetrics() {
|
||||||
Map<String, Object> metrics = new HashMap<>();
|
Map<String, Object> metrics = new HashMap<>();
|
||||||
|
|
||||||
|
final boolean validateGetEndpoints = endpointInspector.getValidGetEndpoints().size() != 0;
|
||||||
Search.in(meterRegistry)
|
Search.in(meterRegistry)
|
||||||
.name("http.requests")
|
.name("http.requests")
|
||||||
.counters()
|
.counters()
|
||||||
@ -34,35 +46,52 @@ public class MetricsAggregatorService {
|
|||||||
counter -> {
|
counter -> {
|
||||||
String method = counter.getId().getTag("method");
|
String method = counter.getId().getTag("method");
|
||||||
String uri = counter.getId().getTag("uri");
|
String uri = counter.getId().getTag("uri");
|
||||||
|
|
||||||
// Skip if either method or uri is null
|
// Skip if either method or uri is null
|
||||||
if (method == null || uri == null) {
|
if (method == null || uri == null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Skip URIs that are 2 characters or shorter
|
||||||
|
if (uri.length() <= 2) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip non-GET and non-POST requests
|
||||||
if (!"GET".equals(method) && !"POST".equals(method)) {
|
if (!"GET".equals(method) && !"POST".equals(method)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Skip URIs that are 2 characters or shorter
|
|
||||||
if (uri.length() <= 2) {
|
// For POST requests, only include if they start with /api/v1
|
||||||
|
if ("POST".equals(method) && !uri.contains("api/v1")) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (uri.contains(".txt")) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// For GET requests, validate if we have a list of valid endpoints
|
||||||
|
if ("GET".equals(method)
|
||||||
|
&& validateGetEndpoints
|
||||||
|
&& !endpointInspector.isValidGetEndpoint(uri)) {
|
||||||
|
logger.debug("Skipping invalid GET endpoint: {}", uri);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
String key =
|
String key =
|
||||||
String.format(
|
String.format(
|
||||||
"http_requests_%s_%s", method, uri.replace("/", "_"));
|
"http_requests_%s_%s", method, uri.replace("/", "_"));
|
||||||
|
|
||||||
double currentCount = counter.count();
|
double currentCount = counter.count();
|
||||||
double lastCount = lastSentMetrics.getOrDefault(key, 0.0);
|
double lastCount = lastSentMetrics.getOrDefault(key, 0.0);
|
||||||
double difference = currentCount - lastCount;
|
double difference = currentCount - lastCount;
|
||||||
|
|
||||||
if (difference > 0) {
|
if (difference > 0) {
|
||||||
|
logger.info("{}, {}", key, difference);
|
||||||
metrics.put(key, difference);
|
metrics.put(key, difference);
|
||||||
lastSentMetrics.put(key, currentCount);
|
lastSentMetrics.put(key, currentCount);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// Send aggregated metrics to PostHog
|
// Send aggregated metrics to PostHog
|
||||||
if (!metrics.isEmpty()) {
|
if (!metrics.isEmpty()) {
|
||||||
|
|
||||||
postHogService.captureEvent("aggregated_metrics", metrics);
|
postHogService.captureEvent("aggregated_metrics", metrics);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -141,7 +141,17 @@ function setupFileInput(chooser) {
|
|||||||
allFiles = Array.from(isDragAndDrop ? allFiles : [element.files[0]]);
|
allFiles = Array.from(isDragAndDrop ? allFiles : [element.files[0]]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const originalText = inputContainer.querySelector('#fileInputText').innerHTML;
|
||||||
|
|
||||||
|
inputContainer.querySelector('#fileInputText').innerHTML = window.fileInput.loading;
|
||||||
|
|
||||||
async function checkZipFile() {
|
async function checkZipFile() {
|
||||||
|
const hasZipFiles = allFiles.some(file => zipTypes.includes(file.type));
|
||||||
|
|
||||||
|
// Only change to extractPDF message if we actually have zip files
|
||||||
|
if (hasZipFiles) {
|
||||||
|
inputContainer.querySelector('#fileInputText').innerHTML = window.fileInput.extractPDF;
|
||||||
|
}
|
||||||
|
|
||||||
const promises = allFiles.map(async (file, index) => {
|
const promises = allFiles.map(async (file, index) => {
|
||||||
try {
|
try {
|
||||||
@ -156,13 +166,10 @@ function setupFileInput(chooser) {
|
|||||||
});
|
});
|
||||||
|
|
||||||
await Promise.all(promises);
|
await Promise.all(promises);
|
||||||
|
|
||||||
}
|
}
|
||||||
const originalText = inputContainer.querySelector('#fileInputText').innerHTML;
|
|
||||||
const decryptFile = new DecryptFile();
|
const decryptFile = new DecryptFile();
|
||||||
|
|
||||||
inputContainer.querySelector('#fileInputText').innerHTML = window.fileInput.extractPDF;
|
|
||||||
|
|
||||||
await checkZipFile();
|
await checkZipFile();
|
||||||
|
|
||||||
allFiles = await Promise.all(
|
allFiles = await Promise.all(
|
||||||
@ -224,26 +231,26 @@ function setupFileInput(chooser) {
|
|||||||
.then(function (zip) {
|
.then(function (zip) {
|
||||||
var extractionPromises = [];
|
var extractionPromises = [];
|
||||||
|
|
||||||
zip.forEach(function (relativePath, zipEntry) {
|
zip.forEach(function (relativePath, zipEntry) {
|
||||||
|
const promise = zipEntry.async('blob').then(function (content) {
|
||||||
const promise = zipEntry.async('blob').then(function (content) {
|
// Assuming that folders have size zero
|
||||||
// Assuming that folders have size zero
|
if (content.size > 0) {
|
||||||
if (content.size > 0) {
|
const extension = zipEntry.name.split('.').pop().toLowerCase();
|
||||||
const extension = zipEntry.name.split('.').pop().toLowerCase();
|
const mimeType = mimeTypes[extension] || 'application/octet-stream';
|
||||||
const mimeType = mimeTypes[extension];
|
|
||||||
|
// Check if we're accepting ONLY ZIP files (in which case extract everything)
|
||||||
// Check for file extension
|
// or if the file type matches the accepted type
|
||||||
if (mimeType && (mimeType.startsWith(acceptedFileType.split('/')[0]) || acceptedFileType === mimeType)) {
|
if (zipTypes.includes(acceptedFileType) ||
|
||||||
|
acceptedFileType === '*/*' ||
|
||||||
var file = new File([content], zipEntry.name, { type: mimeType });
|
(mimeType && (mimeType.startsWith(acceptedFileType.split('/')[0]) || acceptedFileType === mimeType))) {
|
||||||
file.uniqueId = UUID.uuidv4();
|
var file = new File([content], zipEntry.name, { type: mimeType });
|
||||||
allFiles.push(file);
|
file.uniqueId = UUID.uuidv4();
|
||||||
|
allFiles.push(file);
|
||||||
} else {
|
} else {
|
||||||
console.log(`File ${zipEntry.name} skipped. MIME type (${mimeType}) does not match accepted type (${acceptedFileType})`);
|
console.log(`File ${zipEntry.name} skipped. MIME type (${mimeType}) does not match accepted type (${acceptedFileType})`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
extractionPromises.push(promise);
|
extractionPromises.push(promise);
|
||||||
});
|
});
|
||||||
|
@ -224,15 +224,20 @@
|
|||||||
window.fileInput = {
|
window.fileInput = {
|
||||||
dragAndDropPDF: '[[#{fileChooser.dragAndDropPDF}]]',
|
dragAndDropPDF: '[[#{fileChooser.dragAndDropPDF}]]',
|
||||||
dragAndDropImage: '[[#{fileChooser.dragAndDropImage}]]',
|
dragAndDropImage: '[[#{fileChooser.dragAndDropImage}]]',
|
||||||
extractPDF: '[[#{fileChooser.extractPDF}]]'
|
extractPDF: '[[#{fileChooser.extractPDF}]]',
|
||||||
|
loading: '[[#{loading}]]'
|
||||||
};</script>
|
};</script>
|
||||||
<div class="custom-file-chooser mb-3"
|
<div class="custom-file-chooser mb-3"
|
||||||
th:attr="data-bs-unique-id=${name}, data-bs-element-id=${name+'-input'}, data-bs-element-container-id=${name+'-input-container'}, data-bs-show-uploads=${showUploads}, data-bs-files-selected=#{filesSelected}, data-bs-pdf-prompt=#{pdfPrompt}">
|
th:attr="data-bs-unique-id=${name}, data-bs-element-id=${name+'-input'}, data-bs-element-container-id=${name+'-input-container'}, data-bs-show-uploads=${showUploads}, data-bs-files-selected=#{filesSelected}, data-bs-pdf-prompt=#{pdfPrompt}">
|
||||||
<div class="mb-3 d-flex flex-row justify-content-center align-items-center flex-wrap input-container"
|
<div class="mb-3 d-flex flex-row justify-content-center align-items-center flex-wrap input-container"
|
||||||
th:name="${name}+'-input'" th:id="${name}+'-input-container'" th:data-text="#{fileChooser.hoveredDragAndDrop}">
|
th:name="${name}+'-input'" th:id="${name}+'-input-container'" th:data-text="#{fileChooser.hoveredDragAndDrop}">
|
||||||
<label class="file-input-btn d-none">
|
<label class="file-input-btn d-none">
|
||||||
<input type="file" class="form-control" th:name="${name}" th:id="${name}+'-input'" th:accept="${accept} + ',.zip'"
|
<input type="file" class="form-control"
|
||||||
th:attr="multiple=${!disableMultipleFiles}" th:required="${notRequired} ? null : 'required'">
|
th:name="${name}"
|
||||||
|
th:id="${name}+'-input'"
|
||||||
|
th:accept="${accept == null ? '*/*': ((accept == '*/*') ? accept : (accept + ',.zip'))}"
|
||||||
|
th:attr="multiple=${!disableMultipleFiles}"
|
||||||
|
th:required="${notRequired} ? null : 'required'">
|
||||||
Browse
|
Browse
|
||||||
</label>
|
</label>
|
||||||
<div class="d-flex justify-content-start align-items-center" id="fileInputText">
|
<div class="d-flex justify-content-start align-items-center" id="fileInputText">
|
||||||
|
@ -64,7 +64,7 @@
|
|||||||
</div>
|
</div>
|
||||||
<div class="element-margin">
|
<div class="element-margin">
|
||||||
<div
|
<div
|
||||||
th:replace="~{fragments/common :: fileSelector(name='fileInput', multipleInputsForSingleRequest=true)}"
|
th:replace="~{fragments/common :: fileSelector(name='fileInput', multipleInputsForSingleRequest=true, accept='*/*')}"
|
||||||
></div>
|
></div>
|
||||||
</div>
|
</div>
|
||||||
<div class="element-margin text-start">
|
<div class="element-margin text-start">
|
||||||
@ -93,7 +93,7 @@
|
|||||||
|
|
||||||
<!-- The Modal -->
|
<!-- The Modal -->
|
||||||
<div class="modal" id="pipelineSettingsModal">
|
<div class="modal" id="pipelineSettingsModal">
|
||||||
<div class="modal-dialog modal-lg">
|
<div class="modal-dialog modal-dialog-centered modal-lg">
|
||||||
<div class="modal-content dark-card">
|
<div class="modal-content dark-card">
|
||||||
<!-- Modal Header -->
|
<!-- Modal Header -->
|
||||||
<div class="modal-header">
|
<div class="modal-header">
|
||||||
|
@ -212,6 +212,8 @@ main() {
|
|||||||
|
|
||||||
cd "$PROJECT_ROOT"
|
cd "$PROJECT_ROOT"
|
||||||
|
|
||||||
|
export DOCKER_CLI_EXPERIMENTAL=enabled
|
||||||
|
export COMPOSE_DOCKER_CLI_BUILD=0
|
||||||
export DOCKER_ENABLE_SECURITY=false
|
export DOCKER_ENABLE_SECURITY=false
|
||||||
# Run the gradlew build command and check if it fails
|
# Run the gradlew build command and check if it fails
|
||||||
if ! ./gradlew clean build; then
|
if ! ./gradlew clean build; then
|
||||||
@ -250,7 +252,7 @@ main() {
|
|||||||
# Building Docker images with security enabled
|
# Building Docker images with security enabled
|
||||||
# docker build --no-cache --pull --build-arg VERSION_TAG=alpha -t stirlingtools/stirling-pdf:latest -f ./Dockerfile .
|
# docker build --no-cache --pull --build-arg VERSION_TAG=alpha -t stirlingtools/stirling-pdf:latest -f ./Dockerfile .
|
||||||
# docker build --no-cache --pull --build-arg VERSION_TAG=alpha -t stirlingtools/stirling-pdf:latest-ultra-lite -f ./Dockerfile.ultra-lite .
|
# docker build --no-cache --pull --build-arg VERSION_TAG=alpha -t stirlingtools/stirling-pdf:latest-ultra-lite -f ./Dockerfile.ultra-lite .
|
||||||
docker build --no-cache --pull --build-arg VERSION_TAG=alpha -t stirlingtools/stirling-pdf:latest-fat -f ./Dockerfile.fat .
|
docker build --no-cache --pull --build-arg VERSION_TAG=alpha -t docker.stirlingpdf.com/stirlingtools/stirling-pdf:latest-fat -f ./Dockerfile.fat .
|
||||||
|
|
||||||
|
|
||||||
# Test each configuration with security
|
# Test each configuration with security
|
||||||
|
Loading…
Reference in New Issue
Block a user