refactor: PipelineProcessor inputFiles

Rework of `generateInputFiles` and `runPipelineAgainstFiles` methods to use `Map<String, Resource>` instead of `List<Resource>` for better file handling and mapping.

You can now add multiple files in `fileInput` and refer to a file by its name in `operations.parameters.inputFile`.
This commit is contained in:
Florian.FAYEULLE 2025-10-17 16:20:31 +02:00
parent ac88a952d5
commit addcb2e2a1
5 changed files with 193 additions and 195 deletions

View File

@ -2,11 +2,11 @@ package stirling.software.common.model;
import static org.junit.jupiter.api.Assertions.*; import static org.junit.jupiter.api.Assertions.*;
import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.function.Function;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@ -17,6 +17,14 @@ import stirling.software.common.model.exception.UnsupportedProviderException;
class ApplicationPropertiesLogicTest { class ApplicationPropertiesLogicTest {
private static String normalize(String path) {
return normalize(Path.of(path));
}
private static String normalize(Path path) {
return path.normalize().toString().replace("\\", "/");
}
@Test @Test
void system_isAnalyticsEnabled_null_false_true() { void system_isAnalyticsEnabled_null_false_true() {
ApplicationProperties.System sys = new ApplicationProperties.System(); ApplicationProperties.System sys = new ApplicationProperties.System();
@ -33,23 +41,22 @@ class ApplicationPropertiesLogicTest {
@Test @Test
void tempFileManagement_defaults_and_overrides() { void tempFileManagement_defaults_and_overrides() {
Function<String, String> normalize = s -> Paths.get(s).normalize().toString();
ApplicationProperties.TempFileManagement tfm = ApplicationProperties.TempFileManagement tfm =
new ApplicationProperties.TempFileManagement(); new ApplicationProperties.TempFileManagement();
String expectedBase = String expectedBase =
Paths.get(java.lang.System.getProperty("java.io.tmpdir"), "stirling-pdf") Paths.get(java.lang.System.getProperty("java.io.tmpdir"), "stirling-pdf")
.toString(); .toString();
assertEquals(expectedBase, tfm.getBaseTmpDir()); assertEquals(normalize(expectedBase), normalize(tfm.getBaseTmpDir()));
String expectedLibre = Paths.get(expectedBase, "libreoffice").toString(); String expectedLibre = Paths.get(expectedBase, "libreoffice").toString();
assertEquals(expectedLibre, tfm.getLibreofficeDir()); assertEquals(normalize(expectedLibre), normalize(tfm.getLibreofficeDir()));
tfm.setBaseTmpDir("/custom/base"); tfm.setBaseTmpDir("/custom/base");
assertEquals("/custom/base", normalize.apply(tfm.getBaseTmpDir())); assertEquals("/custom/base", normalize(tfm.getBaseTmpDir()));
tfm.setLibreofficeDir("/opt/libre"); tfm.setLibreofficeDir("/opt/libre");
assertEquals("/opt/libre", normalize.apply(tfm.getLibreofficeDir())); assertEquals("/opt/libre", normalize(tfm.getLibreofficeDir()));
} }
@Test @Test

View File

@ -68,7 +68,7 @@ public class PipelineController {
postHogService.captureEvent("pipeline_api_event", properties); postHogService.captureEvent("pipeline_api_event", properties);
try { try {
List<Resource> inputFiles = processor.generateInputFiles(files); Map<String, Resource> inputFiles = processor.generateInputFiles(files);
if (inputFiles == null || inputFiles.isEmpty()) { if (inputFiles == null || inputFiles.isEmpty()) {
return null; return null;
} }

View File

@ -304,7 +304,7 @@ public class PipelineDirectoryProcessor {
List<File> filesToProcess, PipelineConfig config, Path dir, Path processingDir) List<File> filesToProcess, PipelineConfig config, Path dir, Path processingDir)
throws IOException { throws IOException {
try { try {
List<Resource> inputFiles = Map<String, Resource> inputFiles =
processor.generateInputFiles(filesToProcess.toArray(new File[0])); processor.generateInputFiles(filesToProcess.toArray(new File[0]));
if (inputFiles == null || inputFiles.isEmpty()) { if (inputFiles == null || inputFiles.isEmpty()) {
return; return;

View File

@ -6,10 +6,9 @@ import java.nio.charset.StandardCharsets;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.util.ArrayList; import java.util.*;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry; import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream; import java.util.zip.ZipInputStream;
@ -42,12 +41,16 @@ import stirling.software.common.service.UserServiceInterface;
@Slf4j @Slf4j
public class PipelineProcessor { public class PipelineProcessor {
// ------------------------
// AUTOWIRED
// ------------------------
private final ApiDocService apiDocService; private final ApiDocService apiDocService;
private final UserServiceInterface userService; private final UserServiceInterface userService;
private final ServletContext servletContext; private final ServletContext servletContext;
// ------------------------
// CONSTRUCTORS
// ------------------------
public PipelineProcessor( public PipelineProcessor(
ApiDocService apiDocService, ApiDocService apiDocService,
@Autowired(required = false) UserServiceInterface userService, @Autowired(required = false) UserServiceInterface userService,
@ -57,7 +60,92 @@ public class PipelineProcessor {
this.servletContext = servletContext; this.servletContext = servletContext;
} }
public static String removeTrailingNaming(String filename) { // ------------------------
// METHODS
// ------------------------
PipelineResult runPipelineAgainstFiles(Map<String, Resource> files, PipelineConfig config)
throws Exception {
ByteArrayOutputStream logStream = new ByteArrayOutputStream();
PrintStream logPrintStream = new PrintStream(logStream);
boolean hasErrors = false;
boolean filtersApplied = false;
List<Resource> lastOutputFiles = new ArrayList<>();
for (PipelineOperation pipelineOperation : config.getOperations()) {
// prepare operation
String operation = pipelineOperation.getOperation();
boolean isMultiInputOperation = apiDocService.isMultiInput(operation);
log.info(
"Running operation: {} isMultiInputOperation {}",
operation,
isMultiInputOperation);
Map<String, Object> parameters = pipelineOperation.getParameters();
if (!apiDocService.isValidOperation(operation, parameters)) {
log.error("Invalid operation or parameters: o:{} p:{}", operation, parameters);
throw new IllegalArgumentException(
"Invalid operation: " + operation + " with parameters: " + parameters);
}
String url = getBaseUrl() + operation;
// convert operation's parameters to Request Body
MultiValueMap<String, Object> body = this.convertToRequestBody(parameters);
// inject files (inputFile and others referenced in parameters)
this.replaceWithRessource(body, files);
if (!body.containsKey("inputFile") && !body.containsKey("fileId")) {
// retrieve inputFile from apiDoc
Map<String, Resource> inputFiles = this.extractInputFiles(files, operation);
inputFiles.forEach((k, file) -> body.add("fileInput", file));
if (inputFiles.isEmpty()) {
String expectedTypes = String.join(", ", this.expectedTypes(operation));
String fileNames = String.join(", ", files.keySet());
logPrintStream.printf(
"No files with extensions [%s] found for operation '%s'. Provided files [%s]%n",
expectedTypes, operation, fileNames);
hasErrors = true;
continue;
}
}
// run request
ResponseEntity<byte[]> response = sendWebRequest(url, body);
// handle response
if (operation.startsWith("/api/v1/filter/filter-")
&& (response.getBody() == null || response.getBody().length == 0)) {
filtersApplied = true;
log.info("Skipping file due to filtering {}", operation);
continue;
}
if (!HttpStatus.OK.equals(response.getStatusCode())) {
logPrintStream.printf(
"Error in operation: %s response: %s", operation, response.getBody());
hasErrors = true;
continue;
}
Map<String, Resource> outputFiles = processOutputFiles(operation, response);
lastOutputFiles = new ArrayList<>(outputFiles.values());
files.putAll(outputFiles); // add|replace for next operations
}
logPrintStream.close();
if (hasErrors) {
log.error("Errors occurred during processing. Log: {}", logStream);
}
PipelineResult result = new PipelineResult();
result.setHasErrors(hasErrors);
result.setFiltersApplied(filtersApplied);
result.setOutputFiles(lastOutputFiles);
return result;
}
// ------------------------
// UTILS
// ------------------------
private String removeTrailingNaming(String filename) {
// Splitting filename into name and extension // Splitting filename into name and extension
int dotIndex = filename.lastIndexOf("."); int dotIndex = filename.lastIndexOf(".");
if (dotIndex == -1) { if (dotIndex == -1) {
@ -87,118 +175,48 @@ public class PipelineProcessor {
return "http://localhost:" + port + contextPath + "/"; return "http://localhost:" + port + contextPath + "/";
} }
PipelineResult runPipelineAgainstFiles(List<Resource> outputFiles, PipelineConfig config) private Set<String> expectedTypes(String operation) {
throws Exception { // get expected input types
PipelineResult result = new PipelineResult();
ByteArrayOutputStream logStream = new ByteArrayOutputStream();
PrintStream logPrintStream = new PrintStream(logStream);
boolean hasErrors = false;
boolean filtersApplied = false;
for (PipelineOperation pipelineOperation : config.getOperations()) {
String operation = pipelineOperation.getOperation();
boolean isMultiInputOperation = apiDocService.isMultiInput(operation);
log.info(
"Running operation: {} isMultiInputOperation {}",
operation,
isMultiInputOperation);
Map<String, Object> parameters = pipelineOperation.getParameters();
List<String> inputFileTypes = apiDocService.getExtensionTypes(false, operation); List<String> inputFileTypes = apiDocService.getExtensionTypes(false, operation);
if (inputFileTypes == null) { if (inputFileTypes == null) return Set.of("ALL"); // early exit (ALL files)
inputFileTypes = new ArrayList<>(List.of("ALL")); return new HashSet<>(inputFileTypes);
} }
if (!apiDocService.isValidOperation(operation, parameters)) { /**
log.error("Invalid operation or parameters: o:{} p:{}", operation, parameters); * Extracts and filters the input files based on the expected types for a given operation. The
throw new IllegalArgumentException( * method checks the file extensions against the expected types and returns a map of the
"Invalid operation: " + operation + " with parameters: " + parameters); * filtered files.
} *
* @param files a map of file names as keys and their corresponding {@link Resource} as values
* @param operation the specific operation for which files need to be filtered
* @return a map containing only the files with extensions matching the expected types for the
* given operation
*/
private Map<String, Resource> extractInputFiles(Map<String, Resource> files, String operation) {
if (files == null) return Map.of(); // early exit
String url = getBaseUrl() + operation; // get expected input types from apiDoc
List<Resource> newOutputFiles = new ArrayList<>(); Set<String> types = this.expectedTypes(operation);
if (!isMultiInputOperation) { if (types.contains("ALL")) return files; // early exit
for (Resource file : outputFiles) {
boolean hasInputFileType = false;
for (String extension : inputFileTypes) {
if ("ALL".equals(extension)
|| file.getFilename().toLowerCase().endsWith(extension)) {
hasInputFileType = true;
MultiValueMap<String, Object> body = new LinkedMultiValueMap<>();
body.add("fileInput", file);
for (Entry<String, Object> entry : parameters.entrySet()) {
if (entry.getValue() instanceof List<?> entryList) {
for (Object item : entryList) {
body.add(entry.getKey(), item);
}
} else {
body.add(entry.getKey(), entry.getValue());
}
}
ResponseEntity<byte[]> response = sendWebRequest(url, body);
// If the operation is filter and the response body is null or empty,
// skip
// this
// file
if (operation.startsWith("/api/v1/filter/filter-")
&& (response.getBody() == null
|| response.getBody().length == 0)) {
filtersApplied = true;
log.info("Skipping file due to filtering {}", operation);
continue;
}
if (!HttpStatus.OK.equals(response.getStatusCode())) {
logPrintStream.println("Error: " + response.getBody());
hasErrors = true;
continue;
}
processOutputFiles(operation, response, newOutputFiles);
}
}
if (!hasInputFileType) {
String filename = file.getFilename();
String providedExtension = "no extension";
if (filename != null && filename.contains(".")) {
providedExtension =
filename.substring(filename.lastIndexOf(".")).toLowerCase();
}
logPrintStream.println( // filter out files that don't match the expected input types
"No files with extension " return files.entrySet().stream()
+ String.join(", ", inputFileTypes)
+ " found for operation "
+ operation
+ ". Provided file '"
+ filename
+ "' has extension: "
+ providedExtension);
hasErrors = true;
}
}
} else {
// Filter and collect all files that match the inputFileExtension
List<Resource> matchingFiles;
if (inputFileTypes.contains("ALL")) {
matchingFiles = new ArrayList<>(outputFiles);
} else {
final List<String> finalinputFileTypes = inputFileTypes;
matchingFiles =
outputFiles.stream()
.filter( .filter(
file -> entry -> {
finalinputFileTypes.stream() String filename = entry.getKey();
.anyMatch( String ext =
file.getFilename().toLowerCase() filename.substring(filename.lastIndexOf(".") + 1).toLowerCase();
::endsWith)) return types.contains(ext);
.toList(); })
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
} }
// Check if there are matching files
if (!matchingFiles.isEmpty()) { /**
// Create a new MultiValueMap for the request body * Converts a given map of parameters into a MultiValueMap to represent the request body. This
* is useful for preparing data for a form-data or application/x-www-form-urlencoded request.
*/
private MultiValueMap<String, Object> convertToRequestBody(Map<String, Object> parameters) {
MultiValueMap<String, Object> body = new LinkedMultiValueMap<>(); MultiValueMap<String, Object> body = new LinkedMultiValueMap<>();
// Add all matching files to the body
for (Resource file : matchingFiles) {
body.add("fileInput", file);
}
for (Entry<String, Object> entry : parameters.entrySet()) { for (Entry<String, Object> entry : parameters.entrySet()) {
if (entry.getValue() instanceof List<?> entryList) { if (entry.getValue() instanceof List<?> entryList) {
for (Object item : entryList) { for (Object item : entryList) {
@ -208,56 +226,24 @@ public class PipelineProcessor {
body.add(entry.getKey(), entry.getValue()); body.add(entry.getKey(), entry.getValue());
} }
} }
ResponseEntity<byte[]> response = sendWebRequest(url, body); return body;
// Handle the response
if (HttpStatus.OK.equals(response.getStatusCode())) {
processOutputFiles(operation, response, newOutputFiles);
} else {
// Log error if the response status is not OK
logPrintStream.println(
"Error in multi-input operation: " + response.getBody());
hasErrors = true;
} }
} else {
// Get details about what files were actually provided
List<String> providedExtensions =
outputFiles.stream()
.map(
file -> {
String filename = file.getFilename();
if (filename != null && filename.contains(".")) {
return filename.substring(
filename.lastIndexOf("."))
.toLowerCase();
}
return "no extension";
})
.distinct()
.toList();
logPrintStream.println( /**
"No files with extension " * Replaces occurrences of file names in the provided body with corresponding resource objects
+ String.join(", ", inputFileTypes) * from the given files map.
+ " found for multi-input operation " */
+ operation private void replaceWithRessource(
+ ". Provided files have extensions: " MultiValueMap<String, Object> body, Map<String, Resource> files) {
+ String.join(", ", providedExtensions) Set<String> fileNames = files.keySet();
+ " (total files: " body.forEach(
+ outputFiles.size() (key, values) ->
+ ")"); values.replaceAll(
hasErrors = true; value ->
} (value instanceof String && fileNames.contains(value))
} ? files.get(value) // replace it
logPrintStream.close(); : value // keep it
outputFiles = newOutputFiles; ));
}
if (hasErrors) {
log.error("Errors occurred during processing. Log: {}", logStream.toString());
}
result.setHasErrors(hasErrors);
result.setFiltersApplied(filtersApplied);
result.setOutputFiles(outputFiles);
return result;
} }
/* package */ ResponseEntity<byte[]> sendWebRequest( /* package */ ResponseEntity<byte[]> sendWebRequest(
@ -274,9 +260,11 @@ public class PipelineProcessor {
return restTemplate.exchange(url, HttpMethod.POST, entity, byte[].class); return restTemplate.exchange(url, HttpMethod.POST, entity, byte[].class);
} }
private List<Resource> processOutputFiles( private Map<String, Resource> processOutputFiles(
String operation, ResponseEntity<byte[]> response, List<Resource> newOutputFiles) String operation, ResponseEntity<byte[]> response) throws IOException {
throws IOException { if (response.getBody() == null || response.getBody().length == 0)
return Map.of(); // early exit
// Define filename // Define filename
String newFilename; String newFilename;
if (operation.contains("auto-rename")) { if (operation.contains("auto-rename")) {
@ -286,12 +274,13 @@ public class PipelineProcessor {
newFilename = extractFilename(response); newFilename = extractFilename(response);
} else { } else {
// Otherwise, keep the original filename. // Otherwise, keep the original filename.
newFilename = removeTrailingNaming(extractFilename(response)); newFilename = this.removeTrailingNaming(extractFilename(response));
} }
Map<String, Resource> outputFiles = new HashMap<>();
// Check if the response body is a zip file // Check if the response body is a zip file
if (isZip(response.getBody())) { if (isZip(response.getBody())) {
// Unzip the file and add all the files to the new output files // Unzip the file and add all the files to the new output files
newOutputFiles.addAll(unzip(response.getBody())); unzip(response.getBody()).forEach(file -> outputFiles.put(file.getFilename(), file));
} else { } else {
Resource outputResource = Resource outputResource =
new ByteArrayResource(response.getBody()) { new ByteArrayResource(response.getBody()) {
@ -301,12 +290,12 @@ public class PipelineProcessor {
return newFilename; return newFilename;
} }
}; };
newOutputFiles.add(outputResource); outputFiles.put(newFilename, outputResource);
} }
return newOutputFiles; return outputFiles;
} }
public String extractFilename(ResponseEntity<byte[]> response) { private String extractFilename(ResponseEntity<byte[]> response) {
// Default filename if not found // Default filename if not found
String filename = "default-filename.ext"; String filename = "default-filename.ext";
HttpHeaders headers = response.getHeaders(); HttpHeaders headers = response.getHeaders();
@ -325,12 +314,13 @@ public class PipelineProcessor {
return filename; return filename;
} }
List<Resource> generateInputFiles(File[] files) throws Exception { Map<String, Resource> generateInputFiles(File[] files) throws Exception {
if (files == null || files.length == 0) { if (files == null || files.length == 0) {
log.info("No files"); log.info("No files");
return null; return Map.of(); // early exit
} }
List<Resource> outputFiles = new ArrayList<>();
Map<String, Resource> outputFiles = new HashMap<>();
for (File file : files) { for (File file : files) {
Path normalizedPath = Paths.get(file.getName()).normalize(); Path normalizedPath = Paths.get(file.getName()).normalize();
if (normalizedPath.startsWith("..")) { if (normalizedPath.startsWith("..")) {
@ -349,7 +339,7 @@ public class PipelineProcessor {
return file.getName(); return file.getName();
} }
}; };
outputFiles.add(fileResource); outputFiles.put(fileResource.getFilename(), fileResource);
} else { } else {
log.info("File not found: {}", path); log.info("File not found: {}", path);
} }
@ -358,12 +348,13 @@ public class PipelineProcessor {
return outputFiles; return outputFiles;
} }
List<Resource> generateInputFiles(MultipartFile[] files) throws Exception { Map<String, Resource> generateInputFiles(MultipartFile[] files) throws Exception {
if (files == null || files.length == 0) { if (files == null || files.length == 0) {
log.info("No files"); log.warn("No files");
return null; return Map.of(); // early exit
} }
List<Resource> outputFiles = new ArrayList<>();
Map<String, Resource> outputFiles = new HashMap<>();
for (MultipartFile file : files) { for (MultipartFile file : files) {
Resource fileResource = Resource fileResource =
new ByteArrayResource(file.getBytes()) { new ByteArrayResource(file.getBytes()) {
@ -373,7 +364,7 @@ public class PipelineProcessor {
return Filenames.toSimpleFileName(file.getOriginalFilename()); return Filenames.toSimpleFileName(file.getOriginalFilename());
} }
}; };
outputFiles.add(fileResource); outputFiles.put(fileResource.getFilename(), fileResource);
} }
log.info("Files successfully loaded. Starting processing..."); log.info("Files successfully loaded. Starting processing...");
return outputFiles; return outputFiles;

View File

@ -57,7 +57,7 @@ class PipelineProcessorTest {
} }
}; };
List<Resource> files = List.of(file); Map<String, Resource> files = Map.of(file.getFilename(), file);
when(apiDocService.isMultiInput("/api/v1/filter/filter-page-count")).thenReturn(false); when(apiDocService.isMultiInput("/api/v1/filter/filter-page-count")).thenReturn(false);
when(apiDocService.getExtensionTypes(false, "/api/v1/filter/filter-page-count")) when(apiDocService.getExtensionTypes(false, "/api/v1/filter/filter-page-count"))