Support multi-file async job results and ZIP extraction (#3922)

# Description of Changes

This PR introduces multi-file support for asynchronous jobs in the
Stirling PDF backend, enabling jobs to return and manage multiple result
files. Previously, job results were limited to a single file represented
by fileId, originalFileName, and contentType. This change replaces that
legacy structure with a new ResultFile abstraction and expands the
functionality throughout the core system.

ZIP File Support
If a job result is a ZIP file:
It is automatically unpacked using buffered streaming.
Each contained file is stored individually and recorded as a ResultFile.
The original ZIP is deleted after successful extraction.
If ZIP extraction fails, the job result is treated as a single file.


New and Updated API Endpoints

1. GET /api/v1/general/job/{jobId}/result

If the job has multiple files → returns a JSON metadata list.

If the job has a single file → streams the file directly.

Includes UTF-8-safe Content-Disposition headers for filename support.

2. GET /api/v1/general/job/{jobId}/result/files
New endpoint that returns:

```json
{
  "jobId": "123",
  "fileCount": 2,
  "files": [
    {
      "fileId": "abc",
      "fileName": "page1.pdf",
      "contentType": "application/pdf",
      "fileSize": 12345
    },
    ...
  ]
}
```


3. GET /api/v1/general/files/{fileId}/metadata
Returns metadata for a specific file:


4. GET /api/v1/general/files/{fileId}
Downloads a file by fileId, using metadata to determine filename and
content type.

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: pixeebot[bot] <104101892+pixeebot[bot]@users.noreply.github.com>
This commit is contained in:
Anthony Stirling
2025-07-11 13:15:55 +01:00
committed by GitHub
parent d17d10b240
commit bbf5d5f6d4
8 changed files with 493 additions and 59 deletions

View File

@@ -10,7 +10,6 @@ import java.util.Properties;
import java.util.function.Predicate;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingClass;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@@ -151,9 +150,8 @@ public class AppConfig {
@Bean(name = "activeSecurity")
public boolean missingActiveSecurity() {
return ClassUtils.isPresent(
"stirling.software.proprietary.security.configuration.SecurityConfiguration",
this.getClass().getClassLoader()
);
"stirling.software.proprietary.security.configuration.SecurityConfiguration",
this.getClass().getClassLoader());
}
@Bean(name = "directoryFilter")

View File

@@ -1,6 +1,7 @@
package stirling.software.common.model.job;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;
@@ -26,14 +27,8 @@ public class JobResult {
/** Error message if the job failed */
private String error;
/** The file ID of the result file, if applicable */
private String fileId;
/** Original file name, if applicable */
private String originalFileName;
/** MIME type of the result, if applicable */
private String contentType;
/** List of result files for jobs that produce files */
private List<ResultFile> resultFiles;
/** Time when the job was created */
private LocalDateTime createdAt;
@@ -64,21 +59,6 @@ public class JobResult {
.build();
}
/**
* Mark this job as complete with a file result
*
* @param fileId The file ID of the result
* @param originalFileName The original file name
* @param contentType The content type of the file
*/
public void completeWithFile(String fileId, String originalFileName, String contentType) {
this.complete = true;
this.fileId = fileId;
this.originalFileName = originalFileName;
this.contentType = contentType;
this.completedAt = LocalDateTime.now();
}
/**
* Mark this job as complete with a general result
*
@@ -101,6 +81,67 @@ public class JobResult {
this.completedAt = LocalDateTime.now();
}
/**
* Mark this job as complete with multiple file results
*
* @param resultFiles The list of result files
*/
public void completeWithFiles(List<ResultFile> resultFiles) {
this.complete = true;
this.resultFiles = new ArrayList<>(resultFiles);
this.completedAt = LocalDateTime.now();
}
/**
* Mark this job as complete with a single file result (convenience method)
*
* @param fileId The file ID of the result
* @param fileName The file name
* @param contentType The content type of the file
* @param fileSize The size of the file in bytes
*/
public void completeWithSingleFile(
String fileId, String fileName, String contentType, long fileSize) {
ResultFile resultFile =
ResultFile.builder()
.fileId(fileId)
.fileName(fileName)
.contentType(contentType)
.fileSize(fileSize)
.build();
completeWithFiles(List.of(resultFile));
}
/**
* Check if this job has file results
*
* @return true if this job has file results, false otherwise
*/
public boolean hasFiles() {
return resultFiles != null && !resultFiles.isEmpty();
}
/**
* Check if this job has multiple file results
*
* @return true if this job has multiple file results, false otherwise
*/
public boolean hasMultipleFiles() {
return resultFiles != null && resultFiles.size() > 1;
}
/**
* Get all result files
*
* @return List of result files
*/
public List<ResultFile> getAllResultFiles() {
if (resultFiles != null && !resultFiles.isEmpty()) {
return Collections.unmodifiableList(resultFiles);
}
return Collections.emptyList();
}
/**
* Add a note to this job
*

View File

@@ -0,0 +1,26 @@
package stirling.software.common.model.job;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
/** Represents a single file result from a job execution */
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class ResultFile {
/** The file ID for accessing the file */
private String fileId;
/** The original file name */
private String fileName;
/** MIME type of the file */
private String contentType;
/** Size of the file in bytes */
private long fileSize;
}

View File

@@ -131,14 +131,46 @@ public class FileStorage {
return Files.exists(filePath);
}
/**
* Get the size of a file by its ID without loading the content into memory
*
* @param fileId The ID of the file
* @return The size of the file in bytes
* @throws IOException If the file doesn't exist or can't be read
*/
public long getFileSize(String fileId) throws IOException {
Path filePath = getFilePath(fileId);
if (!Files.exists(filePath)) {
throw new IOException("File not found with ID: " + fileId);
}
return Files.size(filePath);
}
/**
* Get the path for a file ID
*
* @param fileId The ID of the file
* @return The path to the file
* @throws IllegalArgumentException if fileId contains path traversal characters or resolves
* outside base directory
*/
private Path getFilePath(String fileId) {
return Path.of(tempDirPath).resolve(fileId);
// Validate fileId to prevent path traversal
if (fileId.contains("..") || fileId.contains("/") || fileId.contains("\\")) {
throw new IllegalArgumentException("Invalid file ID");
}
Path basePath = Path.of(tempDirPath).normalize().toAbsolutePath();
Path resolvedPath = basePath.resolve(fileId).normalize();
// Ensure resolved path is within the base directory
if (!resolvedPath.startsWith(basePath)) {
throw new IllegalArgumentException("File ID resolves to an invalid path");
}
return resolvedPath;
}
/**

View File

@@ -1,15 +1,25 @@
package stirling.software.common.service;
import io.github.pixee.security.ZipSecurity;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.time.LocalDateTime;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.http.MediaType;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import jakarta.annotation.PreDestroy;
@@ -17,6 +27,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.common.model.job.JobResult;
import stirling.software.common.model.job.JobStats;
import stirling.software.common.model.job.ResultFile;
/** Manages async tasks and their results */
@Service
@@ -80,8 +91,53 @@ public class TaskManager {
public void setFileResult(
String jobId, String fileId, String originalFileName, String contentType) {
JobResult jobResult = getOrCreateJobResult(jobId);
jobResult.completeWithFile(fileId, originalFileName, contentType);
log.debug("Set file result for job ID: {} with file ID: {}", jobId, fileId);
// Check if this is a ZIP file that should be extracted
if (isZipFile(contentType, originalFileName)) {
try {
List<ResultFile> extractedFiles =
extractZipToIndividualFiles(fileId, originalFileName);
if (!extractedFiles.isEmpty()) {
jobResult.completeWithFiles(extractedFiles);
log.debug(
"Set multiple file results for job ID: {} with {} files extracted from ZIP",
jobId,
extractedFiles.size());
return;
}
} catch (Exception e) {
log.warn(
"Failed to extract ZIP file for job {}: {}. Falling back to single file result.",
jobId,
e.getMessage());
}
}
// Handle as single file using new ResultFile approach
try {
long fileSize = fileStorage.getFileSize(fileId);
jobResult.completeWithSingleFile(fileId, originalFileName, contentType, fileSize);
log.debug("Set single file result for job ID: {} with file ID: {}", jobId, fileId);
} catch (Exception e) {
log.warn(
"Failed to get file size for job {}: {}. Using size 0.", jobId, e.getMessage());
jobResult.completeWithSingleFile(fileId, originalFileName, contentType, 0);
}
}
/**
* Set the result of a task as multiple files
*
* @param jobId The job ID
* @param resultFiles The list of result files
*/
public void setMultipleFileResults(String jobId, List<ResultFile> resultFiles) {
JobResult jobResult = getOrCreateJobResult(jobId);
jobResult.completeWithFiles(resultFiles);
log.debug(
"Set multiple file results for job ID: {} with {} files",
jobId,
resultFiles.size());
}
/**
@@ -104,7 +160,7 @@ public class TaskManager {
public void setComplete(String jobId) {
JobResult jobResult = getOrCreateJobResult(jobId);
if (jobResult.getResult() == null
&& jobResult.getFileId() == null
&& !jobResult.hasFiles()
&& jobResult.getError() == null) {
// If no result or error has been set, mark it as complete with an empty result
jobResult.completeWithResult("Task completed successfully");
@@ -186,7 +242,7 @@ public class TaskManager {
failedJobs++;
} else {
successfulJobs++;
if (result.getFileId() != null) {
if (result.hasFiles()) {
fileResultJobs++;
}
}
@@ -250,17 +306,8 @@ public class TaskManager {
&& result.getCompletedAt() != null
&& result.getCompletedAt().isBefore(expiryThreshold)) {
// If the job has a file result, delete the file
if (result.getFileId() != null) {
try {
fileStorage.deleteFile(result.getFileId());
} catch (Exception e) {
log.warn(
"Failed to delete file for job {}: {}",
entry.getKey(),
e.getMessage());
}
}
// Clean up file results
cleanupJobFiles(result, entry.getKey());
// Remove the job result
jobResults.remove(entry.getKey());
@@ -290,4 +337,128 @@ public class TaskManager {
cleanupExecutor.shutdownNow();
}
}
/** Check if a file is a ZIP file based on content type and filename */
private boolean isZipFile(String contentType, String fileName) {
if (contentType != null
&& (contentType.equals("application/zip")
|| contentType.equals("application/x-zip-compressed"))) {
return true;
}
if (fileName != null && fileName.toLowerCase().endsWith(".zip")) {
return true;
}
return false;
}
/** Extract a ZIP file into individual files and store them */
private List<ResultFile> extractZipToIndividualFiles(
String zipFileId, String originalZipFileName) throws IOException {
List<ResultFile> extractedFiles = new ArrayList<>();
MultipartFile zipFile = fileStorage.retrieveFile(zipFileId);
try (ZipInputStream zipIn =
ZipSecurity.createHardenedInputStream(new ByteArrayInputStream(zipFile.getBytes()))) {
ZipEntry entry;
while ((entry = zipIn.getNextEntry()) != null) {
if (!entry.isDirectory()) {
// Use buffered reading for memory safety
ByteArrayOutputStream out = new ByteArrayOutputStream();
byte[] buffer = new byte[4096];
int bytesRead;
while ((bytesRead = zipIn.read(buffer)) != -1) {
out.write(buffer, 0, bytesRead);
}
byte[] fileContent = out.toByteArray();
String contentType = determineContentType(entry.getName());
String individualFileId = fileStorage.storeBytes(fileContent, entry.getName());
ResultFile resultFile =
ResultFile.builder()
.fileId(individualFileId)
.fileName(entry.getName())
.contentType(contentType)
.fileSize(fileContent.length)
.build();
extractedFiles.add(resultFile);
log.debug(
"Extracted file: {} (size: {} bytes)",
entry.getName(),
fileContent.length);
}
zipIn.closeEntry();
}
}
// Clean up the original ZIP file after extraction
try {
fileStorage.deleteFile(zipFileId);
log.debug("Cleaned up original ZIP file: {}", zipFileId);
} catch (Exception e) {
log.warn("Failed to clean up original ZIP file {}: {}", zipFileId, e.getMessage());
}
return extractedFiles;
}
/** Determine content type based on file extension */
private String determineContentType(String fileName) {
if (fileName == null) {
return MediaType.APPLICATION_OCTET_STREAM_VALUE;
}
String lowerName = fileName.toLowerCase();
if (lowerName.endsWith(".pdf")) {
return MediaType.APPLICATION_PDF_VALUE;
} else if (lowerName.endsWith(".txt")) {
return MediaType.TEXT_PLAIN_VALUE;
} else if (lowerName.endsWith(".json")) {
return MediaType.APPLICATION_JSON_VALUE;
} else if (lowerName.endsWith(".xml")) {
return MediaType.APPLICATION_XML_VALUE;
} else if (lowerName.endsWith(".jpg") || lowerName.endsWith(".jpeg")) {
return MediaType.IMAGE_JPEG_VALUE;
} else if (lowerName.endsWith(".png")) {
return MediaType.IMAGE_PNG_VALUE;
} else {
return MediaType.APPLICATION_OCTET_STREAM_VALUE;
}
}
/** Clean up files associated with a job result */
private void cleanupJobFiles(JobResult result, String jobId) {
// Clean up all result files
if (result.hasFiles()) {
for (ResultFile resultFile : result.getAllResultFiles()) {
try {
fileStorage.deleteFile(resultFile.getFileId());
} catch (Exception e) {
log.warn(
"Failed to delete file {} for job {}: {}",
resultFile.getFileId(),
jobId,
e.getMessage());
}
}
}
}
/** Find the ResultFile metadata for a given file ID by searching through all job results */
public ResultFile findResultFileByFileId(String fileId) {
for (JobResult jobResult : jobResults.values()) {
if (jobResult.hasFiles()) {
for (ResultFile resultFile : jobResult.getAllResultFiles()) {
if (fileId.equals(resultFile.getFileId())) {
return resultFile;
}
}
}
}
return null;
}
}

View File

@@ -18,6 +18,7 @@ import org.springframework.test.util.ReflectionTestUtils;
import stirling.software.common.model.job.JobResult;
import stirling.software.common.model.job.JobStats;
import stirling.software.common.model.job.ResultFile;
class TaskManagerTest {
@@ -73,13 +74,17 @@ class TaskManagerTest {
}
@Test
void testSetFileResult() {
void testSetFileResult() throws Exception {
// Arrange
String jobId = UUID.randomUUID().toString();
taskManager.createTask(jobId);
String fileId = "file-id";
String originalFileName = "test.pdf";
String contentType = "application/pdf";
long fileSize = 1024L;
// Mock the fileStorage.getFileSize() call
when(fileStorage.getFileSize(fileId)).thenReturn(fileSize);
// Act
taskManager.setFileResult(jobId, fileId, originalFileName, contentType);
@@ -88,9 +93,17 @@ class TaskManagerTest {
JobResult result = taskManager.getJobResult(jobId);
assertNotNull(result);
assertTrue(result.isComplete());
assertEquals(fileId, result.getFileId());
assertEquals(originalFileName, result.getOriginalFileName());
assertEquals(contentType, result.getContentType());
assertTrue(result.hasFiles());
assertFalse(result.hasMultipleFiles());
var resultFiles = result.getAllResultFiles();
assertEquals(1, resultFiles.size());
ResultFile resultFile = resultFiles.get(0);
assertEquals(fileId, resultFile.getFileId());
assertEquals(originalFileName, resultFile.getFileName());
assertEquals(contentType, resultFile.getContentType());
assertEquals(fileSize, resultFile.getFileSize());
assertNotNull(result.getCompletedAt());
}
@@ -163,8 +176,11 @@ class TaskManagerTest {
}
@Test
void testGetJobStats() {
void testGetJobStats() throws Exception {
// Arrange
// Mock fileStorage.getFileSize for file operations
when(fileStorage.getFileSize("file-id")).thenReturn(1024L);
// 1. Create active job
String activeJobId = "active-job";
taskManager.createTask(activeJobId);
@@ -216,9 +232,15 @@ class TaskManagerTest {
LocalDateTime oldTime = LocalDateTime.now().minusHours(1);
ReflectionTestUtils.setField(oldJob, "completedAt", oldTime);
ReflectionTestUtils.setField(oldJob, "complete", true);
ReflectionTestUtils.setField(oldJob, "fileId", "file-id");
ReflectionTestUtils.setField(oldJob, "originalFileName", "test.pdf");
ReflectionTestUtils.setField(oldJob, "contentType", "application/pdf");
// Create a ResultFile and set it using the new approach
ResultFile resultFile = ResultFile.builder()
.fileId("file-id")
.fileName("test.pdf")
.contentType("application/pdf")
.fileSize(1024L)
.build();
ReflectionTestUtils.setField(oldJob, "resultFiles", java.util.List.of(resultFile));
when(fileStorage.deleteFile("file-id")).thenReturn(true);