mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2026-04-22 23:08:53 +02:00
Cleanup work + stream endpoints to reduce memory usage (#6106)
This commit is contained in:
@@ -1,8 +1,10 @@
|
||||
package stirling.software.common.service;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.UUID;
|
||||
@@ -10,6 +12,7 @@ import java.util.UUID;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
@@ -143,6 +146,24 @@ public class FileStorage {
|
||||
return new StoredFile(fileId, size);
|
||||
}
|
||||
|
||||
public String storeFromStreamingBody(StreamingResponseBody body, String originalName)
|
||||
throws IOException {
|
||||
String fileId = generateFileId();
|
||||
Path filePath = getFilePath(fileId);
|
||||
Files.createDirectories(filePath.getParent());
|
||||
boolean success = false;
|
||||
try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(filePath))) {
|
||||
body.writeTo(os);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
Files.deleteIfExists(filePath);
|
||||
}
|
||||
}
|
||||
log.debug("Stored StreamingResponseBody with ID: {}", fileId);
|
||||
return fileId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a file by its ID
|
||||
*
|
||||
|
||||
@@ -16,6 +16,7 @@ import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody;
|
||||
|
||||
import jakarta.servlet.http.HttpServletRequest;
|
||||
|
||||
@@ -305,33 +306,21 @@ public class JobExecutorService {
|
||||
Object body = response.getBody();
|
||||
|
||||
if (body instanceof byte[]) {
|
||||
// Extract filename from content-disposition header if available
|
||||
String filename = "result.pdf";
|
||||
String contentType = MediaType.APPLICATION_PDF_VALUE;
|
||||
String filename = extractResponseFilename(response);
|
||||
String contentType = extractResponseContentType(response);
|
||||
|
||||
if (response.getHeaders().getContentDisposition() != null) {
|
||||
String disposition =
|
||||
response.getHeaders().getContentDisposition().toString();
|
||||
if (disposition.contains("filename=")) {
|
||||
filename =
|
||||
disposition.substring(
|
||||
disposition.indexOf("filename=") + 9,
|
||||
disposition.lastIndexOf('"'));
|
||||
}
|
||||
}
|
||||
|
||||
MediaType mediaType = response.getHeaders().getContentType();
|
||||
|
||||
if (mediaType != null) {
|
||||
contentType = mediaType.toString();
|
||||
}
|
||||
|
||||
// Store byte array directly to disk
|
||||
String fileId = fileStorage.storeBytes((byte[]) body, filename);
|
||||
taskManager.setFileResult(jobId, fileId, filename, contentType);
|
||||
log.debug("Stored ResponseEntity<byte[]> result with fileId: {}", fileId);
|
||||
} else if (body instanceof StreamingResponseBody streamingBody) {
|
||||
String filename = extractResponseFilename(response);
|
||||
String contentType = extractResponseContentType(response);
|
||||
|
||||
// Let the GC handle the memory naturally
|
||||
String fileId = fileStorage.storeFromStreamingBody(streamingBody, filename);
|
||||
taskManager.setFileResult(jobId, fileId, filename, contentType);
|
||||
log.debug(
|
||||
"Stored ResponseEntity<StreamingResponseBody> result with fileId: {}",
|
||||
fileId);
|
||||
} else {
|
||||
// Check if the response body contains a fileId
|
||||
if (body != null && body.toString().contains("fileId")) {
|
||||
@@ -481,6 +470,21 @@ public class JobExecutorService {
|
||||
}
|
||||
}
|
||||
|
||||
private static String extractResponseFilename(ResponseEntity<?> response) {
|
||||
if (response.getHeaders().getContentDisposition() != null) {
|
||||
String filename = response.getHeaders().getContentDisposition().getFilename();
|
||||
if (filename != null && !filename.isEmpty()) {
|
||||
return filename;
|
||||
}
|
||||
}
|
||||
return "result.pdf";
|
||||
}
|
||||
|
||||
private static String extractResponseContentType(ResponseEntity<?> response) {
|
||||
MediaType mediaType = response.getHeaders().getContentType();
|
||||
return mediaType != null ? mediaType.toString() : MediaType.APPLICATION_PDF_VALUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse session timeout string (e.g., "30m", "1h") to milliseconds
|
||||
*
|
||||
|
||||
@@ -401,7 +401,7 @@ public class JobQueue implements SmartLifecycle {
|
||||
* @throws Exception If there is an execution error
|
||||
*/
|
||||
private <T> T executeWithTimeout(Supplier<T> supplier, long timeoutMs) throws Exception {
|
||||
CompletableFuture<T> future = CompletableFuture.supplyAsync(supplier);
|
||||
CompletableFuture<T> future = CompletableFuture.supplyAsync(supplier, jobExecutor);
|
||||
|
||||
try {
|
||||
if (timeoutMs <= 0) {
|
||||
|
||||
@@ -7,11 +7,8 @@ import java.lang.management.MemoryMXBean;
|
||||
import java.lang.management.OperatingSystemMXBean;
|
||||
import java.lang.management.RuntimeMXBean;
|
||||
import java.lang.management.ThreadMXBean;
|
||||
import java.net.InetAddress;
|
||||
import java.net.NetworkInterface;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.Enumeration;
|
||||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
@@ -94,21 +91,12 @@ public class PostHogService {
|
||||
metrics.put("os_name", System.getProperty("os.name"));
|
||||
metrics.put("os_version", System.getProperty("os.version"));
|
||||
metrics.put("java_version", System.getProperty("java.version"));
|
||||
metrics.put("user_name", System.getProperty("user.name"));
|
||||
metrics.put("user_home", System.getProperty("user.home"));
|
||||
metrics.put("user_dir", System.getProperty("user.dir"));
|
||||
|
||||
// CPU and Memory
|
||||
metrics.put("cpu_cores", Runtime.getRuntime().availableProcessors());
|
||||
metrics.put("total_memory", Runtime.getRuntime().totalMemory());
|
||||
metrics.put("free_memory", Runtime.getRuntime().freeMemory());
|
||||
|
||||
// Network and Server Identity
|
||||
InetAddress localHost = InetAddress.getLocalHost();
|
||||
metrics.put("ip_address", localHost.getHostAddress());
|
||||
metrics.put("hostname", localHost.getHostName());
|
||||
metrics.put("mac_address", getMacAddress());
|
||||
|
||||
// JVM info
|
||||
metrics.put("jvm_vendor", System.getProperty("java.vendor"));
|
||||
metrics.put("jvm_version", System.getProperty("java.vm.version"));
|
||||
@@ -153,9 +141,6 @@ public class PostHogService {
|
||||
metrics.put("gc_" + gcBean.getName() + "_time", gcBean.getCollectionTime());
|
||||
}
|
||||
|
||||
// Network interfaces
|
||||
metrics.put("network_interfaces", getNetworkInterfacesInfo());
|
||||
|
||||
// Docker detection and stats
|
||||
boolean isDocker = isRunningInDocker();
|
||||
if (isDocker) {
|
||||
@@ -353,30 +338,6 @@ public class PostHogService {
|
||||
.getProFeatures()
|
||||
.getCustomMetadata()
|
||||
.isAutoUpdateMetadata());
|
||||
addIfNotEmpty(
|
||||
properties,
|
||||
"enterpriseEdition_customMetadata_author",
|
||||
applicationProperties
|
||||
.getPremium()
|
||||
.getProFeatures()
|
||||
.getCustomMetadata()
|
||||
.getAuthor());
|
||||
addIfNotEmpty(
|
||||
properties,
|
||||
"enterpriseEdition_customMetadata_creator",
|
||||
applicationProperties
|
||||
.getPremium()
|
||||
.getProFeatures()
|
||||
.getCustomMetadata()
|
||||
.getCreator());
|
||||
addIfNotEmpty(
|
||||
properties,
|
||||
"enterpriseEdition_customMetadata_producer",
|
||||
applicationProperties
|
||||
.getPremium()
|
||||
.getProFeatures()
|
||||
.getCustomMetadata()
|
||||
.getProducer());
|
||||
}
|
||||
// Capture AutoPipeline properties
|
||||
addIfNotEmpty(
|
||||
@@ -386,39 +347,4 @@ public class PostHogService {
|
||||
|
||||
return properties;
|
||||
}
|
||||
|
||||
private String getMacAddress() {
|
||||
try {
|
||||
Enumeration<NetworkInterface> networkInterfaces =
|
||||
NetworkInterface.getNetworkInterfaces();
|
||||
while (networkInterfaces.hasMoreElements()) {
|
||||
NetworkInterface ni = networkInterfaces.nextElement();
|
||||
byte[] hardwareAddress = ni.getHardwareAddress();
|
||||
if (hardwareAddress != null) {
|
||||
String[] hexadecimal = new String[hardwareAddress.length];
|
||||
for (int i = 0; i < hardwareAddress.length; i++) {
|
||||
hexadecimal[i] = String.format("%02X", hardwareAddress[i]);
|
||||
}
|
||||
return String.join("-", hexadecimal);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
// Handle exception
|
||||
}
|
||||
return "Unknown";
|
||||
}
|
||||
|
||||
private Map<String, String> getNetworkInterfacesInfo() {
|
||||
Map<String, String> interfacesInfo = new HashMap<>();
|
||||
try {
|
||||
Enumeration<NetworkInterface> nets = NetworkInterface.getNetworkInterfaces();
|
||||
while (nets.hasMoreElements()) {
|
||||
NetworkInterface netint = nets.nextElement();
|
||||
interfacesInfo.put(netint.getName(), netint.getDisplayName());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
interfacesInfo.put("error", e.getMessage());
|
||||
}
|
||||
return interfacesInfo;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
package stirling.software.common.util;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.UncheckedIOException;
|
||||
@@ -66,16 +65,7 @@ public class FileToPdf {
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
|
||||
.runCommandWithOutputHandling(command);
|
||||
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile.getPath());
|
||||
try {
|
||||
return pdfBytes;
|
||||
} catch (Exception e) {
|
||||
pdfBytes = Files.readAllBytes(tempOutputFile.getPath());
|
||||
if (pdfBytes.length < 1) {
|
||||
throw e;
|
||||
}
|
||||
return pdfBytes;
|
||||
}
|
||||
return Files.readAllBytes(tempOutputFile.getPath());
|
||||
} // tempInputFile auto-closed
|
||||
} // tempOutputFile auto-closed
|
||||
}
|
||||
@@ -92,8 +82,7 @@ public class FileToPdf {
|
||||
throws IOException {
|
||||
try (TempDirectory tempUnzippedDir = new TempDirectory(tempFileManager)) {
|
||||
try (ZipInputStream zipIn =
|
||||
ZipSecurity.createHardenedInputStream(
|
||||
new ByteArrayInputStream(Files.readAllBytes(zipFilePath)))) {
|
||||
ZipSecurity.createHardenedInputStream(Files.newInputStream(zipFilePath))) {
|
||||
ZipEntry entry = zipIn.getNextEntry();
|
||||
while (entry != null) {
|
||||
Path filePath =
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
package stirling.software.common.util;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
@@ -20,6 +20,7 @@ import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody;
|
||||
|
||||
import com.vladsch.flexmark.html2md.converter.FlexmarkHtmlConverter;
|
||||
import com.vladsch.flexmark.util.data.MutableDataSet;
|
||||
@@ -48,7 +49,7 @@ public class PDFToFile {
|
||||
this.runtimePathConfig = runtimePathConfig;
|
||||
}
|
||||
|
||||
public ResponseEntity<byte[]> processPdfToMarkdown(MultipartFile inputFile)
|
||||
public ResponseEntity<StreamingResponseBody> processPdfToMarkdown(MultipartFile inputFile)
|
||||
throws IOException, InterruptedException {
|
||||
if (!MediaType.APPLICATION_PDF_VALUE.equals(inputFile.getContentType())) {
|
||||
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
|
||||
@@ -85,78 +86,77 @@ public class PDFToFile {
|
||||
pdfBaseName = originalPdfFileName.substring(0, originalPdfFileName.lastIndexOf('.'));
|
||||
}
|
||||
|
||||
byte[] fileBytes;
|
||||
String fileName;
|
||||
String fileName = pdfBaseName + "ToMarkdown.zip";
|
||||
TempFile finalOut = tempFileManager.createManagedTempFile(".zip");
|
||||
try {
|
||||
try (TempFile tempInputFile = new TempFile(tempFileManager, ".pdf");
|
||||
TempDirectory tempOutputDir = new TempDirectory(tempFileManager)) {
|
||||
inputFile.transferTo(tempInputFile.getFile());
|
||||
|
||||
try (TempFile tempInputFile = new TempFile(tempFileManager, ".pdf");
|
||||
TempDirectory tempOutputDir = new TempDirectory(tempFileManager)) {
|
||||
inputFile.transferTo(tempInputFile.getFile());
|
||||
List<String> command =
|
||||
new ArrayList<>(
|
||||
Arrays.asList(
|
||||
"pdftohtml",
|
||||
"-s",
|
||||
"-noframes",
|
||||
"-c",
|
||||
tempInputFile.getAbsolutePath(),
|
||||
pdfBaseName));
|
||||
|
||||
List<String> command =
|
||||
new ArrayList<>(
|
||||
Arrays.asList(
|
||||
"pdftohtml",
|
||||
"-s",
|
||||
"-noframes",
|
||||
"-c",
|
||||
tempInputFile.getAbsolutePath(),
|
||||
pdfBaseName));
|
||||
ProcessExecutorResult returnCode =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.PDFTOHTML)
|
||||
.runCommandWithOutputHandling(
|
||||
command, tempOutputDir.getPath().toFile());
|
||||
// Process HTML files to Markdown
|
||||
File[] outputFiles =
|
||||
Objects.requireNonNull(tempOutputDir.getPath().toFile().listFiles());
|
||||
List<File> markdownFiles = new ArrayList<>();
|
||||
List<File> imageFiles = new ArrayList<>();
|
||||
|
||||
ProcessExecutorResult returnCode =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.PDFTOHTML)
|
||||
.runCommandWithOutputHandling(
|
||||
command, tempOutputDir.getPath().toFile());
|
||||
// Process HTML files to Markdown
|
||||
File[] outputFiles =
|
||||
Objects.requireNonNull(tempOutputDir.getPath().toFile().listFiles());
|
||||
List<File> markdownFiles = new ArrayList<>();
|
||||
List<File> imageFiles = new ArrayList<>();
|
||||
// Convert HTML files to Markdown and collect image files
|
||||
for (File outputFile : outputFiles) {
|
||||
if (outputFile.getName().endsWith(".html")) {
|
||||
String html = Files.readString(outputFile.toPath());
|
||||
String markdown = htmlToMarkdownConverter.convert(html);
|
||||
|
||||
// Convert HTML files to Markdown and collect image files
|
||||
for (File outputFile : outputFiles) {
|
||||
if (outputFile.getName().endsWith(".html")) {
|
||||
String html = Files.readString(outputFile.toPath());
|
||||
String markdown = htmlToMarkdownConverter.convert(html);
|
||||
// Update image references to point to images/ folder
|
||||
markdown = updateImageReferences(markdown);
|
||||
|
||||
// Update image references to point to images/ folder
|
||||
markdown = updateImageReferences(markdown);
|
||||
String mdFileName = outputFile.getName().replace(".html", ".md");
|
||||
File mdFile = new File(tempOutputDir.getPath().toFile(), mdFileName);
|
||||
Files.writeString(mdFile.toPath(), markdown);
|
||||
markdownFiles.add(mdFile);
|
||||
} else if (!outputFile.getName().endsWith(".md")) {
|
||||
// Collect non-HTML, non-MD files as images/assets
|
||||
imageFiles.add(outputFile);
|
||||
}
|
||||
}
|
||||
|
||||
String mdFileName = outputFile.getName().replace(".html", ".md");
|
||||
File mdFile = new File(tempOutputDir.getPath().toFile(), mdFileName);
|
||||
Files.writeString(mdFile.toPath(), markdown);
|
||||
markdownFiles.add(mdFile);
|
||||
} else if (!outputFile.getName().endsWith(".md")) {
|
||||
// Collect non-HTML, non-MD files as images/assets
|
||||
imageFiles.add(outputFile);
|
||||
try (OutputStream fos = Files.newOutputStream(finalOut.getPath());
|
||||
ZipOutputStream zipOutputStream = new ZipOutputStream(fos)) {
|
||||
// Add markdown files to root of ZIP
|
||||
for (File mdFile : markdownFiles) {
|
||||
ZipEntry mdEntry = new ZipEntry(mdFile.getName());
|
||||
zipOutputStream.putNextEntry(mdEntry);
|
||||
Files.copy(mdFile.toPath(), zipOutputStream);
|
||||
zipOutputStream.closeEntry();
|
||||
}
|
||||
|
||||
// Add images and other assets to images/ folder
|
||||
for (File imageFile : imageFiles) {
|
||||
ZipEntry assetEntry = new ZipEntry("images/" + imageFile.getName());
|
||||
zipOutputStream.putNextEntry(assetEntry);
|
||||
Files.copy(imageFile.toPath(), zipOutputStream);
|
||||
zipOutputStream.closeEntry();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Always create a ZIP file
|
||||
fileName = pdfBaseName + "ToMarkdown.zip";
|
||||
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
|
||||
|
||||
try (ZipOutputStream zipOutputStream = new ZipOutputStream(byteArrayOutputStream)) {
|
||||
// Add markdown files to root of ZIP
|
||||
for (File mdFile : markdownFiles) {
|
||||
ZipEntry mdEntry = new ZipEntry(mdFile.getName());
|
||||
zipOutputStream.putNextEntry(mdEntry);
|
||||
Files.copy(mdFile.toPath(), zipOutputStream);
|
||||
zipOutputStream.closeEntry();
|
||||
}
|
||||
|
||||
// Add images and other assets to images/ folder
|
||||
for (File imageFile : imageFiles) {
|
||||
ZipEntry assetEntry = new ZipEntry("images/" + imageFile.getName());
|
||||
zipOutputStream.putNextEntry(assetEntry);
|
||||
Files.copy(imageFile.toPath(), zipOutputStream);
|
||||
zipOutputStream.closeEntry();
|
||||
}
|
||||
}
|
||||
|
||||
fileBytes = byteArrayOutputStream.toByteArray();
|
||||
} catch (Exception e) {
|
||||
finalOut.close();
|
||||
throw e;
|
||||
}
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
fileBytes, fileName, MediaType.APPLICATION_OCTET_STREAM);
|
||||
return WebResponseUtils.fileToWebResponse(
|
||||
finalOut, fileName, MediaType.APPLICATION_OCTET_STREAM);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -169,7 +169,7 @@ public class PDFToFile {
|
||||
return PATTERN.matcher(markdown).replaceAll("$1(images/$2)");
|
||||
}
|
||||
|
||||
public ResponseEntity<byte[]> processPdfToHtml(MultipartFile inputFile)
|
||||
public ResponseEntity<StreamingResponseBody> processPdfToHtml(MultipartFile inputFile)
|
||||
throws IOException, InterruptedException {
|
||||
if (!MediaType.APPLICATION_PDF_VALUE.equals(inputFile.getContentType())) {
|
||||
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
|
||||
@@ -182,56 +182,57 @@ public class PDFToFile {
|
||||
pdfBaseName = originalPdfFileName.substring(0, originalPdfFileName.lastIndexOf('.'));
|
||||
}
|
||||
|
||||
byte[] fileBytes;
|
||||
String fileName;
|
||||
String fileName = pdfBaseName + "ToHtml.zip";
|
||||
TempFile finalOut = tempFileManager.createManagedTempFile(".zip");
|
||||
try {
|
||||
try (TempFile inputFileTemp = new TempFile(tempFileManager, ".pdf");
|
||||
TempDirectory outputDirTemp = new TempDirectory(tempFileManager)) {
|
||||
|
||||
try (TempFile inputFileTemp = new TempFile(tempFileManager, ".pdf");
|
||||
TempDirectory outputDirTemp = new TempDirectory(tempFileManager)) {
|
||||
Path tempInputFile = inputFileTemp.getPath();
|
||||
Path tempOutputDir = outputDirTemp.getPath();
|
||||
|
||||
Path tempInputFile = inputFileTemp.getPath();
|
||||
Path tempOutputDir = outputDirTemp.getPath();
|
||||
// Save the uploaded file to a temporary location
|
||||
inputFile.transferTo(tempInputFile);
|
||||
|
||||
// Save the uploaded file to a temporary location
|
||||
inputFile.transferTo(tempInputFile);
|
||||
// Run the pdftohtml command with complex output
|
||||
List<String> command =
|
||||
new ArrayList<>(
|
||||
Arrays.asList(
|
||||
"pdftohtml", "-c", tempInputFile.toString(), pdfBaseName));
|
||||
|
||||
// Run the pdftohtml command with complex output
|
||||
List<String> command =
|
||||
new ArrayList<>(
|
||||
Arrays.asList(
|
||||
"pdftohtml", "-c", tempInputFile.toString(), pdfBaseName));
|
||||
ProcessExecutorResult returnCode =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.PDFTOHTML)
|
||||
.runCommandWithOutputHandling(command, tempOutputDir.toFile());
|
||||
|
||||
ProcessExecutorResult returnCode =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.PDFTOHTML)
|
||||
.runCommandWithOutputHandling(command, tempOutputDir.toFile());
|
||||
// Get output files
|
||||
File[] outputFiles = Objects.requireNonNull(tempOutputDir.toFile().listFiles());
|
||||
|
||||
// Get output files
|
||||
File[] outputFiles = Objects.requireNonNull(tempOutputDir.toFile().listFiles());
|
||||
|
||||
// Return output files in a ZIP archive
|
||||
fileName = pdfBaseName + "ToHtml.zip";
|
||||
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
|
||||
try (ZipOutputStream zipOutputStream = new ZipOutputStream(byteArrayOutputStream)) {
|
||||
for (File outputFile : outputFiles) {
|
||||
ZipEntry entry = new ZipEntry(outputFile.getName());
|
||||
zipOutputStream.putNextEntry(entry);
|
||||
try (FileInputStream fis = new FileInputStream(outputFile)) {
|
||||
IOUtils.copy(fis, zipOutputStream);
|
||||
} catch (IOException e) {
|
||||
log.error("Exception writing zip entry", e);
|
||||
try (OutputStream fos = Files.newOutputStream(finalOut.getPath());
|
||||
ZipOutputStream zipOutputStream = new ZipOutputStream(fos)) {
|
||||
for (File outputFile : outputFiles) {
|
||||
ZipEntry entry = new ZipEntry(outputFile.getName());
|
||||
zipOutputStream.putNextEntry(entry);
|
||||
try (FileInputStream fis = new FileInputStream(outputFile)) {
|
||||
IOUtils.copy(fis, zipOutputStream);
|
||||
} catch (IOException e) {
|
||||
log.error("Exception writing zip entry", e);
|
||||
}
|
||||
zipOutputStream.closeEntry();
|
||||
}
|
||||
zipOutputStream.closeEntry();
|
||||
} catch (IOException e) {
|
||||
log.error("Exception writing zip", e);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
log.error("Exception writing zip", e);
|
||||
}
|
||||
fileBytes = byteArrayOutputStream.toByteArray();
|
||||
} catch (Exception e) {
|
||||
finalOut.close();
|
||||
throw e;
|
||||
}
|
||||
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
fileBytes, fileName, MediaType.APPLICATION_OCTET_STREAM);
|
||||
return WebResponseUtils.fileToWebResponse(
|
||||
finalOut, fileName, MediaType.APPLICATION_OCTET_STREAM);
|
||||
}
|
||||
|
||||
public ResponseEntity<byte[]> processPdfToOfficeFormat(
|
||||
public ResponseEntity<StreamingResponseBody> processPdfToOfficeFormat(
|
||||
MultipartFile inputFile, String outputFormat, String libreOfficeFilter)
|
||||
throws IOException, InterruptedException {
|
||||
|
||||
@@ -257,109 +258,115 @@ public class PDFToFile {
|
||||
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
|
||||
}
|
||||
|
||||
byte[] fileBytes;
|
||||
String fileName;
|
||||
|
||||
TempFile finalOut =
|
||||
tempFileManager.createManagedTempFile("." + resolvePrimaryExtension(outputFormat));
|
||||
Path libreOfficeProfile = null;
|
||||
try (TempFile inputFileTemp = new TempFile(tempFileManager, ".pdf");
|
||||
TempDirectory outputDirTemp = new TempDirectory(tempFileManager)) {
|
||||
try {
|
||||
try (TempFile inputFileTemp = new TempFile(tempFileManager, ".pdf");
|
||||
TempDirectory outputDirTemp = new TempDirectory(tempFileManager)) {
|
||||
|
||||
Path tempInputFile = inputFileTemp.getPath();
|
||||
Path tempOutputDir = outputDirTemp.getPath();
|
||||
Path unoOutputFile =
|
||||
tempOutputDir.resolve(
|
||||
pdfBaseName + "." + resolvePrimaryExtension(outputFormat));
|
||||
Path tempInputFile = inputFileTemp.getPath();
|
||||
Path tempOutputDir = outputDirTemp.getPath();
|
||||
Path unoOutputFile =
|
||||
tempOutputDir.resolve(
|
||||
pdfBaseName + "." + resolvePrimaryExtension(outputFormat));
|
||||
|
||||
// Save the uploaded file to a temporary location
|
||||
inputFile.transferTo(tempInputFile);
|
||||
// Save the uploaded file to a temporary location
|
||||
inputFile.transferTo(tempInputFile);
|
||||
|
||||
// Run the LibreOffice command
|
||||
ProcessExecutorResult returnCode = null;
|
||||
IOException unoconvertException = null;
|
||||
// Run the LibreOffice command
|
||||
ProcessExecutorResult returnCode = null;
|
||||
IOException unoconvertException = null;
|
||||
|
||||
if (isUnoConvertEnabled()) {
|
||||
try {
|
||||
List<String> unoCommand =
|
||||
buildUnoConvertCommand(
|
||||
tempInputFile, unoOutputFile, outputFormat, libreOfficeFilter);
|
||||
returnCode =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE)
|
||||
.runCommandWithOutputHandling(unoCommand);
|
||||
} catch (IOException e) {
|
||||
unoconvertException = e;
|
||||
log.warn(
|
||||
"Unoconvert command failed ({}). Falling back to soffice command.",
|
||||
e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
if (returnCode == null) {
|
||||
// Run the LibreOffice command as a fallback
|
||||
libreOfficeProfile = Files.createTempDirectory("libreoffice_profile_");
|
||||
List<String> command = new ArrayList<>();
|
||||
command.add(runtimePathConfig.getSOfficePath());
|
||||
command.add("-env:UserInstallation=" + libreOfficeProfile.toUri().toString());
|
||||
command.add("--headless");
|
||||
command.add("--nologo");
|
||||
command.add("--infilter=" + libreOfficeFilter);
|
||||
command.add("--convert-to");
|
||||
command.add(outputFormat);
|
||||
command.add("--outdir");
|
||||
command.add(tempOutputDir.toString());
|
||||
command.add(tempInputFile.toString());
|
||||
|
||||
try {
|
||||
returnCode =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE)
|
||||
.runCommandWithOutputHandling(command);
|
||||
} catch (IOException e) {
|
||||
if (unoconvertException != null) {
|
||||
e.addSuppressed(unoconvertException);
|
||||
if (isUnoConvertEnabled()) {
|
||||
try {
|
||||
List<String> unoCommand =
|
||||
buildUnoConvertCommand(
|
||||
tempInputFile,
|
||||
unoOutputFile,
|
||||
outputFormat,
|
||||
libreOfficeFilter);
|
||||
returnCode =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE)
|
||||
.runCommandWithOutputHandling(unoCommand);
|
||||
} catch (IOException e) {
|
||||
unoconvertException = e;
|
||||
log.warn(
|
||||
"Unoconvert command failed ({}). Falling back to soffice command.",
|
||||
e.getMessage());
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
// Get output files
|
||||
List<File> outputFiles = Arrays.asList(tempOutputDir.toFile().listFiles());
|
||||
if (returnCode == null) {
|
||||
// Run the LibreOffice command as a fallback
|
||||
libreOfficeProfile = Files.createTempDirectory("libreoffice_profile_");
|
||||
List<String> command = new ArrayList<>();
|
||||
command.add(runtimePathConfig.getSOfficePath());
|
||||
command.add("-env:UserInstallation=" + libreOfficeProfile.toUri().toString());
|
||||
command.add("--headless");
|
||||
command.add("--nologo");
|
||||
command.add("--infilter=" + libreOfficeFilter);
|
||||
command.add("--convert-to");
|
||||
command.add(outputFormat);
|
||||
command.add("--outdir");
|
||||
command.add(tempOutputDir.toString());
|
||||
command.add(tempInputFile.toString());
|
||||
|
||||
if (outputFiles.size() == 1) {
|
||||
// Return single output file
|
||||
File outputFile = outputFiles.get(0);
|
||||
if ("txt:Text".equals(outputFormat)) {
|
||||
outputFormat = "txt";
|
||||
}
|
||||
fileName = pdfBaseName + "." + outputFormat;
|
||||
fileBytes = FileUtils.readFileToByteArray(outputFile);
|
||||
} else {
|
||||
// Return output files in a ZIP archive
|
||||
fileName = pdfBaseName + "To" + outputFormat + ".zip";
|
||||
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
|
||||
try (ZipOutputStream zipOutputStream = new ZipOutputStream(byteArrayOutputStream)) {
|
||||
for (File outputFile : outputFiles) {
|
||||
ZipEntry entry = new ZipEntry(outputFile.getName());
|
||||
zipOutputStream.putNextEntry(entry);
|
||||
try (FileInputStream fis = new FileInputStream(outputFile)) {
|
||||
IOUtils.copy(fis, zipOutputStream);
|
||||
} catch (IOException e) {
|
||||
log.error("Exception writing zip entry", e);
|
||||
try {
|
||||
returnCode =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE)
|
||||
.runCommandWithOutputHandling(command);
|
||||
} catch (IOException e) {
|
||||
if (unoconvertException != null) {
|
||||
e.addSuppressed(unoconvertException);
|
||||
}
|
||||
|
||||
zipOutputStream.closeEntry();
|
||||
throw e;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
log.error("Exception writing zip", e);
|
||||
}
|
||||
|
||||
fileBytes = byteArrayOutputStream.toByteArray();
|
||||
// Get output files
|
||||
List<File> outputFiles = Arrays.asList(tempOutputDir.toFile().listFiles());
|
||||
|
||||
if (outputFiles.size() == 1) {
|
||||
// Return single output file
|
||||
File outputFile = outputFiles.get(0);
|
||||
if ("txt:Text".equals(outputFormat)) {
|
||||
outputFormat = "txt";
|
||||
}
|
||||
fileName = pdfBaseName + "." + outputFormat;
|
||||
FileUtils.copyFile(outputFile, finalOut.getFile());
|
||||
} else {
|
||||
// Return output files in a ZIP archive
|
||||
fileName = pdfBaseName + "To" + outputFormat + ".zip";
|
||||
try (OutputStream fos = Files.newOutputStream(finalOut.getPath());
|
||||
ZipOutputStream zipOutputStream = new ZipOutputStream(fos)) {
|
||||
for (File outputFile : outputFiles) {
|
||||
ZipEntry entry = new ZipEntry(outputFile.getName());
|
||||
zipOutputStream.putNextEntry(entry);
|
||||
try (FileInputStream fis = new FileInputStream(outputFile)) {
|
||||
IOUtils.copy(fis, zipOutputStream);
|
||||
} catch (IOException e) {
|
||||
log.error("Exception writing zip entry", e);
|
||||
}
|
||||
|
||||
zipOutputStream.closeEntry();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
log.error("Exception writing zip", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
finalOut.close();
|
||||
throw e;
|
||||
} finally {
|
||||
if (libreOfficeProfile != null) {
|
||||
FileUtils.deleteQuietly(libreOfficeProfile.toFile());
|
||||
}
|
||||
}
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
fileBytes, fileName, MediaType.APPLICATION_OCTET_STREAM);
|
||||
return WebResponseUtils.fileToWebResponse(
|
||||
finalOut, fileName, MediaType.APPLICATION_OCTET_STREAM);
|
||||
}
|
||||
|
||||
private boolean isUnoConvertEnabled() {
|
||||
|
||||
@@ -73,6 +73,19 @@ public class WebResponseUtils {
|
||||
return baosToWebResponse(baos, docName);
|
||||
}
|
||||
|
||||
public static ResponseEntity<StreamingResponseBody> pdfDocToWebResponse(
|
||||
PDDocument document, String docName, TempFileManager tempFileManager)
|
||||
throws IOException {
|
||||
TempFile tempFile = tempFileManager.createManagedTempFile(".pdf");
|
||||
try {
|
||||
document.save(tempFile.getFile());
|
||||
} catch (IOException e) {
|
||||
tempFile.close();
|
||||
throw e;
|
||||
}
|
||||
return pdfFileToWebResponse(tempFile, docName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a File to a web response (PDF default).
|
||||
*
|
||||
@@ -108,23 +121,37 @@ public class WebResponseUtils {
|
||||
public static ResponseEntity<StreamingResponseBody> fileToWebResponse(
|
||||
TempFile outputTempFile, String docName, MediaType mediaType) throws IOException {
|
||||
|
||||
Path path = outputTempFile.getFile().toPath().normalize();
|
||||
long len = Files.size(path);
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setContentType(mediaType);
|
||||
headers.setContentLength(len);
|
||||
headers.add(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"" + docName + "\"");
|
||||
try {
|
||||
Path path = outputTempFile.getFile().toPath().normalize();
|
||||
long len = Files.size(path);
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setContentType(mediaType);
|
||||
headers.setContentLength(len);
|
||||
String encodedDocName =
|
||||
RegexPatternUtils.getInstance()
|
||||
.getPlusSignPattern()
|
||||
.matcher(URLEncoder.encode(docName, StandardCharsets.UTF_8))
|
||||
.replaceAll("%20");
|
||||
headers.setContentDispositionFormData("attachment", encodedDocName);
|
||||
|
||||
StreamingResponseBody body =
|
||||
os -> {
|
||||
try (os) {
|
||||
Files.copy(path, os);
|
||||
os.flush();
|
||||
} finally {
|
||||
outputTempFile.close();
|
||||
}
|
||||
};
|
||||
StreamingResponseBody body =
|
||||
os -> {
|
||||
try (os) {
|
||||
Files.copy(path, os);
|
||||
os.flush();
|
||||
} finally {
|
||||
outputTempFile.close();
|
||||
}
|
||||
};
|
||||
|
||||
return new ResponseEntity<>(body, headers, HttpStatus.OK);
|
||||
return new ResponseEntity<>(body, headers, HttpStatus.OK);
|
||||
} catch (IOException | RuntimeException e) {
|
||||
try {
|
||||
outputTempFile.close();
|
||||
} catch (Exception closeEx) {
|
||||
e.addSuppressed(closeEx);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ import static org.mockito.Mockito.lenient;
|
||||
import static org.mockito.Mockito.mockStatic;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
@@ -29,6 +30,7 @@ import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.mock.web.MockMultipartFile;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody;
|
||||
|
||||
import io.github.pixee.security.ZipSecurity;
|
||||
|
||||
@@ -59,6 +61,19 @@ class PDFToFileTest {
|
||||
.thenAnswer(
|
||||
invocation ->
|
||||
Files.createTempFile("test", invocation.getArgument(0)).toFile());
|
||||
lenient()
|
||||
.when(mockTempFileManager.createManagedTempFile(anyString()))
|
||||
.thenAnswer(
|
||||
invocation -> {
|
||||
File f =
|
||||
Files.createTempFile("test", invocation.<String>getArgument(0))
|
||||
.toFile();
|
||||
TempFile tf = org.mockito.Mockito.mock(TempFile.class);
|
||||
lenient().when(tf.getFile()).thenReturn(f);
|
||||
lenient().when(tf.getPath()).thenReturn(f.toPath());
|
||||
lenient().when(tf.getAbsolutePath()).thenReturn(f.getAbsolutePath());
|
||||
return tf;
|
||||
});
|
||||
lenient()
|
||||
.when(mockTempFileManager.createTempDirectory())
|
||||
.thenAnswer(invocation -> Files.createTempDirectory("test"));
|
||||
@@ -68,6 +83,12 @@ class PDFToFileTest {
|
||||
pdfToFile = new PDFToFile(mockTempFileManager, mockRuntimePathConfig);
|
||||
}
|
||||
|
||||
private static byte[] drain(ResponseEntity<StreamingResponseBody> response) throws IOException {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
response.getBody().writeTo(baos);
|
||||
return baos.toByteArray();
|
||||
}
|
||||
|
||||
@Test
|
||||
void testProcessPdfToMarkdown_InvalidContentType() throws IOException, InterruptedException {
|
||||
// Prepare
|
||||
@@ -79,7 +100,7 @@ class PDFToFileTest {
|
||||
"This is not a PDF".getBytes());
|
||||
|
||||
// Execute
|
||||
ResponseEntity<byte[]> response = pdfToFile.processPdfToMarkdown(nonPdfFile);
|
||||
ResponseEntity<StreamingResponseBody> response = pdfToFile.processPdfToMarkdown(nonPdfFile);
|
||||
|
||||
// Verify
|
||||
assertEquals(HttpStatus.BAD_REQUEST, response.getStatusCode());
|
||||
@@ -96,7 +117,7 @@ class PDFToFileTest {
|
||||
"This is not a PDF".getBytes());
|
||||
|
||||
// Execute
|
||||
ResponseEntity<byte[]> response = pdfToFile.processPdfToHtml(nonPdfFile);
|
||||
ResponseEntity<StreamingResponseBody> response = pdfToFile.processPdfToHtml(nonPdfFile);
|
||||
|
||||
// Verify
|
||||
assertEquals(HttpStatus.BAD_REQUEST, response.getStatusCode());
|
||||
@@ -114,7 +135,7 @@ class PDFToFileTest {
|
||||
"This is not a PDF".getBytes());
|
||||
|
||||
// Execute
|
||||
ResponseEntity<byte[]> response =
|
||||
ResponseEntity<StreamingResponseBody> response =
|
||||
pdfToFile.processPdfToOfficeFormat(nonPdfFile, "docx", "draw_pdf_import");
|
||||
|
||||
// Verify
|
||||
@@ -133,7 +154,7 @@ class PDFToFileTest {
|
||||
"Fake PDF content".getBytes());
|
||||
|
||||
// Execute with invalid format
|
||||
ResponseEntity<byte[]> response =
|
||||
ResponseEntity<StreamingResponseBody> response =
|
||||
pdfToFile.processPdfToOfficeFormat(pdfFile, "invalid_format", "draw_pdf_import");
|
||||
|
||||
// Verify
|
||||
@@ -184,12 +205,14 @@ class PDFToFileTest {
|
||||
});
|
||||
|
||||
// Execute the method
|
||||
ResponseEntity<byte[]> response = pdfToFile.processPdfToMarkdown(pdfFile);
|
||||
ResponseEntity<StreamingResponseBody> response =
|
||||
pdfToFile.processPdfToMarkdown(pdfFile);
|
||||
|
||||
// Verify - should now return a ZIP file instead of plain markdown
|
||||
assertEquals(HttpStatus.OK, response.getStatusCode());
|
||||
assertNotNull(response.getBody());
|
||||
assertTrue(response.getBody().length > 0);
|
||||
byte[] bodyBytes = drain(response);
|
||||
assertNotNull(bodyBytes);
|
||||
assertTrue(bodyBytes.length > 0);
|
||||
|
||||
// Verify content disposition indicates a ZIP file
|
||||
assertTrue(
|
||||
@@ -201,7 +224,7 @@ class PDFToFileTest {
|
||||
// Verify the content by unzipping it
|
||||
try (ZipInputStream zipStream =
|
||||
ZipSecurity.createHardenedInputStream(
|
||||
new java.io.ByteArrayInputStream(response.getBody()))) {
|
||||
new java.io.ByteArrayInputStream(bodyBytes))) {
|
||||
ZipEntry entry;
|
||||
boolean foundMdFile = false;
|
||||
boolean foundImageInFolder = false;
|
||||
@@ -275,12 +298,14 @@ class PDFToFileTest {
|
||||
});
|
||||
|
||||
// Execute the method
|
||||
ResponseEntity<byte[]> response = pdfToFile.processPdfToMarkdown(pdfFile);
|
||||
ResponseEntity<StreamingResponseBody> response =
|
||||
pdfToFile.processPdfToMarkdown(pdfFile);
|
||||
|
||||
// Verify
|
||||
assertEquals(HttpStatus.OK, response.getStatusCode());
|
||||
assertNotNull(response.getBody());
|
||||
assertTrue(response.getBody().length > 0);
|
||||
byte[] bodyBytes = drain(response);
|
||||
assertNotNull(bodyBytes);
|
||||
assertTrue(bodyBytes.length > 0);
|
||||
|
||||
// Verify content disposition indicates a zip file
|
||||
assertTrue(
|
||||
@@ -292,7 +317,7 @@ class PDFToFileTest {
|
||||
// Verify the content by unzipping it
|
||||
try (ZipInputStream zipStream =
|
||||
ZipSecurity.createHardenedInputStream(
|
||||
new java.io.ByteArrayInputStream(response.getBody()))) {
|
||||
new java.io.ByteArrayInputStream(bodyBytes))) {
|
||||
ZipEntry entry;
|
||||
boolean foundMdFiles = false;
|
||||
boolean foundImage = false;
|
||||
@@ -352,12 +377,13 @@ class PDFToFileTest {
|
||||
});
|
||||
|
||||
// Execute the method
|
||||
ResponseEntity<byte[]> response = pdfToFile.processPdfToHtml(pdfFile);
|
||||
ResponseEntity<StreamingResponseBody> response = pdfToFile.processPdfToHtml(pdfFile);
|
||||
|
||||
// Verify
|
||||
assertEquals(HttpStatus.OK, response.getStatusCode());
|
||||
assertNotNull(response.getBody());
|
||||
assertTrue(response.getBody().length > 0);
|
||||
byte[] bodyBytes = drain(response);
|
||||
assertNotNull(bodyBytes);
|
||||
assertTrue(bodyBytes.length > 0);
|
||||
|
||||
// Verify content disposition indicates a zip file
|
||||
assertTrue(
|
||||
@@ -369,7 +395,7 @@ class PDFToFileTest {
|
||||
// Verify the content by unzipping it
|
||||
try (ZipInputStream zipStream =
|
||||
ZipSecurity.createHardenedInputStream(
|
||||
new java.io.ByteArrayInputStream(response.getBody()))) {
|
||||
new java.io.ByteArrayInputStream(bodyBytes))) {
|
||||
ZipEntry entry;
|
||||
boolean foundMainHtml = false;
|
||||
boolean foundIndexHtml = false;
|
||||
@@ -437,13 +463,14 @@ class PDFToFileTest {
|
||||
});
|
||||
|
||||
// Execute the method with docx format
|
||||
ResponseEntity<byte[]> response =
|
||||
ResponseEntity<StreamingResponseBody> response =
|
||||
pdfToFile.processPdfToOfficeFormat(pdfFile, "docx", "draw_pdf_import");
|
||||
|
||||
// Verify
|
||||
assertEquals(HttpStatus.OK, response.getStatusCode());
|
||||
assertNotNull(response.getBody());
|
||||
assertTrue(response.getBody().length > 0);
|
||||
byte[] bodyBytes = drain(response);
|
||||
assertNotNull(bodyBytes);
|
||||
assertTrue(bodyBytes.length > 0);
|
||||
|
||||
// Verify content disposition has correct filename
|
||||
assertTrue(
|
||||
@@ -508,13 +535,14 @@ class PDFToFileTest {
|
||||
});
|
||||
|
||||
// Execute the method with ODP format
|
||||
ResponseEntity<byte[]> response =
|
||||
ResponseEntity<StreamingResponseBody> response =
|
||||
pdfToFile.processPdfToOfficeFormat(pdfFile, "odp", "draw_pdf_import");
|
||||
|
||||
// Verify
|
||||
assertEquals(HttpStatus.OK, response.getStatusCode());
|
||||
assertNotNull(response.getBody());
|
||||
assertTrue(response.getBody().length > 0);
|
||||
byte[] bodyBytes = drain(response);
|
||||
assertNotNull(bodyBytes);
|
||||
assertTrue(bodyBytes.length > 0);
|
||||
|
||||
// Verify content disposition for zip file
|
||||
assertTrue(
|
||||
@@ -526,7 +554,7 @@ class PDFToFileTest {
|
||||
// Verify the content by unzipping it
|
||||
try (ZipInputStream zipStream =
|
||||
ZipSecurity.createHardenedInputStream(
|
||||
new java.io.ByteArrayInputStream(response.getBody()))) {
|
||||
new java.io.ByteArrayInputStream(bodyBytes))) {
|
||||
ZipEntry entry;
|
||||
boolean foundMainFile = false;
|
||||
boolean foundMediaFiles = false;
|
||||
@@ -592,13 +620,14 @@ class PDFToFileTest {
|
||||
});
|
||||
|
||||
// Execute the method with text format
|
||||
ResponseEntity<byte[]> response =
|
||||
ResponseEntity<StreamingResponseBody> response =
|
||||
pdfToFile.processPdfToOfficeFormat(pdfFile, "txt:Text", "draw_pdf_import");
|
||||
|
||||
// Verify
|
||||
assertEquals(HttpStatus.OK, response.getStatusCode());
|
||||
assertNotNull(response.getBody());
|
||||
assertTrue(response.getBody().length > 0);
|
||||
byte[] bodyBytes = drain(response);
|
||||
assertNotNull(bodyBytes);
|
||||
assertTrue(bodyBytes.length > 0);
|
||||
|
||||
// Verify content disposition has txt extension
|
||||
assertTrue(
|
||||
@@ -650,13 +679,14 @@ class PDFToFileTest {
|
||||
});
|
||||
|
||||
// Execute the method
|
||||
ResponseEntity<byte[]> response =
|
||||
ResponseEntity<StreamingResponseBody> response =
|
||||
pdfToFile.processPdfToOfficeFormat(pdfFile, "docx", "draw_pdf_import");
|
||||
|
||||
// Verify
|
||||
assertEquals(HttpStatus.OK, response.getStatusCode());
|
||||
assertNotNull(response.getBody());
|
||||
assertTrue(response.getBody().length > 0);
|
||||
byte[] bodyBytes = drain(response);
|
||||
assertNotNull(bodyBytes);
|
||||
assertTrue(bodyBytes.length > 0);
|
||||
|
||||
// Verify content disposition contains output.docx
|
||||
assertTrue(
|
||||
@@ -696,12 +726,13 @@ class PDFToFileTest {
|
||||
return mockExecutorResult;
|
||||
});
|
||||
|
||||
ResponseEntity<byte[]> response =
|
||||
ResponseEntity<StreamingResponseBody> response =
|
||||
pdfToFileWithUno.processPdfToOfficeFormat(pdfFile, "docx", "writer_pdf_import");
|
||||
|
||||
assertEquals(HttpStatus.OK, response.getStatusCode());
|
||||
assertNotNull(response.getBody());
|
||||
assertTrue(response.getBody().length > 0);
|
||||
byte[] bodyBytes = drain(response);
|
||||
assertNotNull(bodyBytes);
|
||||
assertTrue(bodyBytes.length > 0);
|
||||
assertTrue(
|
||||
response.getHeaders()
|
||||
.getContentDisposition()
|
||||
@@ -759,12 +790,13 @@ class PDFToFileTest {
|
||||
return mockExecutorResult;
|
||||
});
|
||||
|
||||
ResponseEntity<byte[]> response =
|
||||
ResponseEntity<StreamingResponseBody> response =
|
||||
pdfToFileWithUno.processPdfToOfficeFormat(pdfFile, "docx", "writer_pdf_import");
|
||||
|
||||
assertEquals(HttpStatus.OK, response.getStatusCode());
|
||||
assertNotNull(response.getBody());
|
||||
assertTrue(response.getBody().length > 0);
|
||||
byte[] bodyBytes = drain(response);
|
||||
assertNotNull(bodyBytes);
|
||||
assertTrue(bodyBytes.length > 0);
|
||||
assertTrue(
|
||||
response.getHeaders()
|
||||
.getContentDisposition()
|
||||
|
||||
Reference in New Issue
Block a user