mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2026-03-28 02:31:17 +01:00
FileReadiness (#5985)
This commit is contained in:
@@ -150,6 +150,44 @@ public class ApplicationProperties {
|
||||
@Data
|
||||
public static class AutoPipeline {
|
||||
private String outputFolder;
|
||||
private FileReadiness fileReadiness = new FileReadiness();
|
||||
|
||||
/**
|
||||
* Configuration for the {@link stirling.software.common.util.FileReadinessChecker}.
|
||||
* Controls how the pipeline determines whether a file is fully written and stable before
|
||||
* processing begins.
|
||||
*/
|
||||
@Data
|
||||
public static class FileReadiness {
|
||||
/**
|
||||
* Master toggle. When {@code false} every readiness check is skipped and all files are
|
||||
* considered immediately ready (preserves legacy behaviour).
|
||||
*/
|
||||
private boolean enabled = true;
|
||||
|
||||
/**
|
||||
* How long (in milliseconds) a file must remain unmodified before it is considered
|
||||
* stable. Files modified more recently than this threshold are skipped and retried on
|
||||
* the next scan cycle. Default: 5 000 ms (5 seconds).
|
||||
*/
|
||||
private long settleTimeMillis = 5000;
|
||||
|
||||
/**
|
||||
* How long (in milliseconds) to pause between two consecutive file-size reads when
|
||||
* checking whether a file is still being written. If the size differs between the two
|
||||
* reads the file is considered unstable. This catches active copies on Linux/macOS
|
||||
* where advisory locking alone cannot detect a mid-copy file. Default: 500 ms.
|
||||
*/
|
||||
private long sizeCheckDelayMillis = 500;
|
||||
|
||||
/**
|
||||
* Optional list of file extensions (without the leading dot, case-insensitive) that are
|
||||
* allowed through the readiness check. An empty list means all extensions are accepted.
|
||||
* Example: {@code ["pdf", "tiff"]} will skip any file whose extension is not {@code
|
||||
* pdf} or {@code tiff}.
|
||||
*/
|
||||
private List<String> allowedExtensions = new java.util.ArrayList<>();
|
||||
}
|
||||
}
|
||||
|
||||
@Data
|
||||
@@ -696,8 +734,7 @@ public class ApplicationProperties {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return
|
||||
"""
|
||||
return """
|
||||
Driver {
|
||||
driverName='%s'
|
||||
}
|
||||
|
||||
@@ -341,8 +341,7 @@ public class EmlProcessingUtils {
|
||||
}
|
||||
|
||||
private String getFallbackStyles() {
|
||||
return
|
||||
"""
|
||||
return """
|
||||
/* Minimal fallback - main CSS resource failed to load */
|
||||
body {
|
||||
font-family: var(--font-family, Helvetica, sans-serif);
|
||||
|
||||
@@ -23,7 +23,8 @@ public class EmlToPdf {
|
||||
|
||||
EmlParser.EmailContent emailContent =
|
||||
EmlParser.extractEmailContent(emlBytes, request, customHtmlSanitizer);
|
||||
return EmlProcessingUtils.generateEnhancedEmailHtml(emailContent, request, customHtmlSanitizer);
|
||||
return EmlProcessingUtils.generateEnhancedEmailHtml(
|
||||
emailContent, request, customHtmlSanitizer);
|
||||
}
|
||||
|
||||
public static byte[] convertEmlToPdf(
|
||||
|
||||
@@ -0,0 +1,217 @@
|
||||
package stirling.software.common.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.RandomAccessFile;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.channels.FileLock;
|
||||
import java.nio.channels.OverlappingFileLockException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import stirling.software.common.model.ApplicationProperties;
|
||||
import stirling.software.common.model.ApplicationProperties.AutoPipeline.FileReadiness;
|
||||
|
||||
/**
|
||||
* Stateless safety checker that decides whether a file is stable and ready for pipeline processing.
|
||||
* Call {@link #isReady(Path)} before moving or processing any file picked up from a watched folder.
|
||||
*
|
||||
* <p>A file is considered ready when ALL of the following hold:
|
||||
*
|
||||
* <ol>
|
||||
* <li>The file exists on disk.
|
||||
* <li>The path refers to a regular file, not a directory.
|
||||
* <li>The file's extension matches the configured allow-list (if one is set).
|
||||
* <li>The file has not been modified within the configured settle window ({@code
|
||||
* settleTimeMillis}), meaning it is no longer being written.
|
||||
* <li>The file size is stable: two reads separated by {@code sizeCheckDelayMillis} return the
|
||||
* same value. This catches active copies on Linux/macOS where advisory file locking alone
|
||||
* cannot detect a mid-copy file.
|
||||
* <li>An exclusive file-system lock can be acquired, confirming no other process holds it.
|
||||
* </ol>
|
||||
*
|
||||
* <p>All behaviour is controlled through {@link FileReadiness} inside {@link
|
||||
* ApplicationProperties.AutoPipeline}. Setting {@code enabled: false} makes every call return
|
||||
* {@code true} so the checker is a no-op drop-in.
|
||||
*/
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class FileReadinessChecker {
|
||||
|
||||
private final ApplicationProperties applicationProperties;
|
||||
|
||||
/**
|
||||
* Returns {@code true} when the file at {@code path} passes every readiness check and is safe
|
||||
* to hand off to the pipeline for processing. Returns {@code false} when any check fails; the
|
||||
* caller should skip the file and retry on the next scan cycle.
|
||||
*/
|
||||
public boolean isReady(Path path) {
|
||||
FileReadiness config = applicationProperties.getAutoPipeline().getFileReadiness();
|
||||
|
||||
if (!config.isEnabled()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!existsAsRegularFile(path)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!isExtensionAllowed(path, config.getAllowedExtensions())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!hasSettled(path, config.getSettleTimeMillis())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!hasSizeStabilized(path, config.getSizeCheckDelayMillis())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (isLocked(path)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Individual checks
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
private boolean existsAsRegularFile(Path path) {
|
||||
if (!Files.exists(path)) {
|
||||
log.debug("File does not exist, skipping: {}", path);
|
||||
return false;
|
||||
}
|
||||
if (!Files.isRegularFile(path)) {
|
||||
log.debug("Path is not a regular file (directory or symlink?), skipping: {}", path);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns {@code true} when {@code allowedExtensions} is empty (no filter) or when the file's
|
||||
* extension (case-insensitive) appears in the list.
|
||||
*/
|
||||
private boolean isExtensionAllowed(Path path, List<String> allowedExtensions) {
|
||||
if (allowedExtensions == null || allowedExtensions.isEmpty()) {
|
||||
return true;
|
||||
}
|
||||
String filename = path.getFileName().toString();
|
||||
String extension =
|
||||
filename.contains(".")
|
||||
? filename.substring(filename.lastIndexOf('.') + 1).toLowerCase(Locale.ROOT)
|
||||
: "";
|
||||
boolean allowed =
|
||||
allowedExtensions.stream().anyMatch(ext -> ext.equalsIgnoreCase(extension));
|
||||
if (!allowed) {
|
||||
log.debug(
|
||||
"File '{}' has extension '{}' which is not in the allowed list {}, skipping",
|
||||
filename,
|
||||
extension,
|
||||
allowedExtensions);
|
||||
}
|
||||
return allowed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns {@code true} when the file's last-modified timestamp is at least {@code
|
||||
* settleTimeMillis} milliseconds in the past, indicating the write has completed and the file
|
||||
* has "settled".
|
||||
*/
|
||||
private boolean hasSettled(Path path, long settleTimeMillis) {
|
||||
try {
|
||||
long lastModified = Files.getLastModifiedTime(path).toMillis();
|
||||
long ageMillis = System.currentTimeMillis() - lastModified;
|
||||
boolean settled = ageMillis >= settleTimeMillis;
|
||||
if (!settled) {
|
||||
log.debug(
|
||||
"File '{}' was modified {}ms ago (settle threshold: {}ms), not yet ready",
|
||||
path.getFileName(),
|
||||
ageMillis,
|
||||
settleTimeMillis);
|
||||
}
|
||||
return settled;
|
||||
} catch (IOException e) {
|
||||
log.warn(
|
||||
"Could not read last-modified time for '{}', treating as not settled: {}",
|
||||
path,
|
||||
e.getMessage());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns {@code true} when the file size is the same before and after a short pause of {@code
|
||||
* sizeCheckDelayMillis} milliseconds. A size change indicates another process is still
|
||||
* appending to the file. This is the primary write-detection mechanism on Linux/macOS, where
|
||||
* mandatory file locking is not enforced by the OS.
|
||||
*/
|
||||
private boolean hasSizeStabilized(Path path, long sizeCheckDelayMillis) {
|
||||
try {
|
||||
long sizeBefore = Files.size(path);
|
||||
Thread.sleep(sizeCheckDelayMillis);
|
||||
long sizeAfter = Files.size(path);
|
||||
boolean stable = sizeBefore == sizeAfter;
|
||||
if (!stable) {
|
||||
log.debug(
|
||||
"File '{}' size changed from {} to {} bytes during stability check,"
|
||||
+ " not yet ready",
|
||||
path.getFileName(),
|
||||
sizeBefore,
|
||||
sizeAfter);
|
||||
}
|
||||
return stable;
|
||||
} catch (IOException e) {
|
||||
log.warn(
|
||||
"Could not read file size for '{}', treating as unstable: {}",
|
||||
path,
|
||||
e.getMessage());
|
||||
return false;
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
log.warn(
|
||||
"Size stability check interrupted for '{}', treating as unstable",
|
||||
path.getFileName());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns {@code true} when an exclusive file-system lock cannot be acquired, which indicates
|
||||
* another process still holds the file open for writing.
|
||||
*
|
||||
* <p>{@link OverlappingFileLockException} is also treated as locked: the JVM already holds a
|
||||
* lock on this file (e.g. from another thread), so it is unsafe to process.
|
||||
*/
|
||||
private boolean isLocked(Path path) {
|
||||
try (RandomAccessFile raf = new RandomAccessFile(path.toFile(), "rw");
|
||||
FileChannel channel = raf.getChannel()) {
|
||||
FileLock lock = channel.tryLock();
|
||||
if (lock == null) {
|
||||
log.debug("File '{}' is locked by another process", path.getFileName());
|
||||
return true;
|
||||
}
|
||||
lock.release();
|
||||
return false;
|
||||
} catch (OverlappingFileLockException e) {
|
||||
log.debug("File '{}' is already locked by this JVM", path.getFileName());
|
||||
return true;
|
||||
} catch (IOException e) {
|
||||
log.debug(
|
||||
"Could not acquire lock on '{}', treating as locked: {}",
|
||||
path.getFileName(),
|
||||
e.getMessage());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,357 @@
|
||||
package stirling.software.common.util;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import static org.mockito.Answers.CALLS_REAL_METHODS;
|
||||
import static org.mockito.Mockito.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.RandomAccessFile;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.channels.FileLock;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.attribute.FileTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.DisplayName;
|
||||
import org.junit.jupiter.api.Nested;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.MockedStatic;
|
||||
import org.mockito.MockitoAnnotations;
|
||||
|
||||
import stirling.software.common.model.ApplicationProperties;
|
||||
import stirling.software.common.model.ApplicationProperties.AutoPipeline.FileReadiness;
|
||||
|
||||
@DisplayName("FileReadinessChecker")
|
||||
class FileReadinessCheckerTest {
|
||||
|
||||
@TempDir Path tempDir;
|
||||
|
||||
@Mock ApplicationProperties applicationProperties;
|
||||
@Mock ApplicationProperties.AutoPipeline autoPipeline;
|
||||
|
||||
/** Real config object — easier to tweak per test than chaining multiple stubs. */
|
||||
FileReadiness config;
|
||||
|
||||
FileReadinessChecker checker;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
MockitoAnnotations.openMocks(this);
|
||||
|
||||
config = new FileReadiness();
|
||||
config.setEnabled(true);
|
||||
config.setSettleTimeMillis(0); // instant settle by default — individual tests override
|
||||
config.setSizeCheckDelayMillis(1); // minimal pause keeps tests fast
|
||||
config.setAllowedExtensions(new ArrayList<>());
|
||||
|
||||
when(applicationProperties.getAutoPipeline()).thenReturn(autoPipeline);
|
||||
when(autoPipeline.getFileReadiness()).thenReturn(config);
|
||||
|
||||
checker = new FileReadinessChecker(applicationProperties);
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Master toggle
|
||||
// =========================================================================
|
||||
|
||||
@Nested
|
||||
@DisplayName("when enabled=false")
|
||||
class WhenDisabled {
|
||||
|
||||
@Test
|
||||
@DisplayName("always returns true regardless of file state")
|
||||
void alwaysReady() throws IOException {
|
||||
config.setEnabled(false);
|
||||
|
||||
// Non-existent path — would normally fail check #1
|
||||
Path ghost = tempDir.resolve("does-not-exist.pdf");
|
||||
assertTrue(checker.isReady(ghost));
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Check #1 + #2: existence and regular-file guard
|
||||
// =========================================================================
|
||||
|
||||
@Nested
|
||||
@DisplayName("existence and file-type checks")
|
||||
class ExistenceChecks {
|
||||
|
||||
@Test
|
||||
@DisplayName("non-existent path → not ready")
|
||||
void fileDoesNotExist() {
|
||||
Path ghost = tempDir.resolve("ghost.pdf");
|
||||
assertFalse(checker.isReady(ghost));
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("path is a directory → not ready")
|
||||
void pathIsDirectory() throws IOException {
|
||||
Path dir = tempDir.resolve("subdir");
|
||||
Files.createDirectory(dir);
|
||||
assertFalse(checker.isReady(dir));
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("path is a regular file → passes existence checks")
|
||||
void regularFilePassesExistenceCheck() throws IOException {
|
||||
Path file = realFile("test.pdf", "content");
|
||||
setLastModifiedInPast(file, 60_000);
|
||||
assertTrue(checker.isReady(file));
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Check #3: extension filter
|
||||
// =========================================================================
|
||||
|
||||
@Nested
|
||||
@DisplayName("extension filter")
|
||||
class ExtensionFilter {
|
||||
|
||||
@Test
|
||||
@DisplayName("empty allow-list → all extensions accepted")
|
||||
void emptyAllowListAcceptsAll() throws IOException {
|
||||
config.setAllowedExtensions(new ArrayList<>()); // empty = no filter
|
||||
Path file = realFile("report.docx", "data");
|
||||
setLastModifiedInPast(file, 60_000);
|
||||
assertTrue(checker.isReady(file));
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("extension in allow-list → passes")
|
||||
void extensionInAllowList() throws IOException {
|
||||
config.setAllowedExtensions(List.of("pdf", "tiff"));
|
||||
Path file = realFile("scan.pdf", "data");
|
||||
setLastModifiedInPast(file, 60_000);
|
||||
assertTrue(checker.isReady(file));
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("extension not in allow-list → not ready")
|
||||
void extensionNotInAllowList() throws IOException {
|
||||
config.setAllowedExtensions(List.of("pdf", "tiff"));
|
||||
Path file = realFile("document.docx", "data");
|
||||
setLastModifiedInPast(file, 60_000);
|
||||
assertFalse(checker.isReady(file));
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("extension matching is case-insensitive")
|
||||
void extensionMatchIsCaseInsensitive() throws IOException {
|
||||
config.setAllowedExtensions(List.of("PDF"));
|
||||
Path file = realFile("scan.pdf", "data");
|
||||
setLastModifiedInPast(file, 60_000);
|
||||
assertTrue(checker.isReady(file));
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("file without extension and non-empty allow-list → not ready")
|
||||
void fileWithNoExtension() throws IOException {
|
||||
config.setAllowedExtensions(List.of("pdf"));
|
||||
Path file = realFile("README", "data");
|
||||
setLastModifiedInPast(file, 60_000);
|
||||
assertFalse(checker.isReady(file));
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Check #4: settle-time (last-modified age)
|
||||
// =========================================================================
|
||||
|
||||
@Nested
|
||||
@DisplayName("settle-time check")
|
||||
class SettleTime {
|
||||
|
||||
@Test
|
||||
@DisplayName("recently modified file → not ready")
|
||||
void recentlyModified_notReady() throws IOException {
|
||||
config.setSettleTimeMillis(60_000); // require 1 minute of quiet
|
||||
Path file = realFile("new.pdf", "data");
|
||||
// last-modified is now (just created) — well within the threshold
|
||||
assertFalse(checker.isReady(file));
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("file settled for longer than threshold → ready")
|
||||
void settled_ready() throws IOException {
|
||||
config.setSettleTimeMillis(5_000);
|
||||
Path file = realFile("old.pdf", "data");
|
||||
setLastModifiedInPast(file, 10_000); // 10 s ago — older than 5 s threshold
|
||||
assertTrue(checker.isReady(file));
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("settle threshold of 0 ms passes any file")
|
||||
void zeroThreshold_alwaysPasses() throws IOException {
|
||||
config.setSettleTimeMillis(0);
|
||||
Path file = realFile("instant.pdf", "data");
|
||||
// last-modified is right now; 0 ms threshold means anything passes
|
||||
assertTrue(checker.isReady(file));
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Check #5: size stability
|
||||
// =========================================================================
|
||||
|
||||
@Nested
|
||||
@DisplayName("size-stability check")
|
||||
class SizeStability {
|
||||
|
||||
@Test
|
||||
@DisplayName("size unchanged between two reads → ready")
|
||||
void sizeStable_ready() throws IOException {
|
||||
config.setSizeCheckDelayMillis(1);
|
||||
Path file = realFile("stable.pdf", "fixed content");
|
||||
setLastModifiedInPast(file, 60_000);
|
||||
assertTrue(checker.isReady(file));
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("size changes between two reads → not ready")
|
||||
void sizeChanging_notReady() throws IOException {
|
||||
config.setSizeCheckDelayMillis(1);
|
||||
Path file = realFile("growing.pdf", "initial");
|
||||
setLastModifiedInPast(file, 60_000);
|
||||
|
||||
// Use MockedStatic to control what Files.size() returns on each call
|
||||
// while leaving all other Files.* methods intact.
|
||||
AtomicInteger sizeCallCount = new AtomicInteger(0);
|
||||
try (MockedStatic<Files> mockedFiles = mockStatic(Files.class, CALLS_REAL_METHODS)) {
|
||||
mockedFiles
|
||||
.when(() -> Files.size(file))
|
||||
.thenAnswer(
|
||||
inv ->
|
||||
sizeCallCount.incrementAndGet() == 1
|
||||
? 100L // first read: 100 bytes
|
||||
: 200L); // second read: 200 bytes — changed!
|
||||
assertFalse(checker.isReady(file));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Check #6: file-lock check
|
||||
// =========================================================================
|
||||
|
||||
@Nested
|
||||
@DisplayName("file-lock check")
|
||||
class FileLockCheck {
|
||||
|
||||
@Test
|
||||
@DisplayName("file held open with exclusive lock by another thread → not ready")
|
||||
void fileLocked_notReady() throws IOException, InterruptedException {
|
||||
Path file = realFile("locked.pdf", "data");
|
||||
setLastModifiedInPast(file, 60_000);
|
||||
|
||||
CountDownLatch lockAcquired = new CountDownLatch(1);
|
||||
CountDownLatch testDone = new CountDownLatch(1);
|
||||
AtomicInteger lockThreadFailed = new AtomicInteger(0);
|
||||
|
||||
Thread lockHolder =
|
||||
new Thread(
|
||||
() -> {
|
||||
try (RandomAccessFile raf =
|
||||
new RandomAccessFile(file.toFile(), "rw");
|
||||
FileChannel channel = raf.getChannel();
|
||||
FileLock lock = channel.lock()) {
|
||||
lockAcquired.countDown();
|
||||
testDone.await(10, TimeUnit.SECONDS);
|
||||
} catch (Exception e) {
|
||||
lockThreadFailed.set(1);
|
||||
lockAcquired.countDown();
|
||||
}
|
||||
});
|
||||
lockHolder.setDaemon(true);
|
||||
lockHolder.start();
|
||||
lockAcquired.await(5, TimeUnit.SECONDS);
|
||||
|
||||
try {
|
||||
if (lockThreadFailed.get() == 0) {
|
||||
// Lock was successfully held — the checker must see it as locked.
|
||||
// On JVM, tryLock() from a second thread in the same process throws
|
||||
// OverlappingFileLockException (or returns null on some platforms), both of
|
||||
// which isLocked() maps to true.
|
||||
assertFalse(checker.isReady(file));
|
||||
}
|
||||
// If locking failed on this platform we simply skip the assertion rather than
|
||||
// failing the build — the logic path is still exercised by other tests.
|
||||
} finally {
|
||||
testDone.countDown();
|
||||
lockHolder.join(5_000);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("file with no external lock and all checks passing → ready")
|
||||
void noLock_ready() throws IOException {
|
||||
Path file = realFile("unlocked.pdf", "data");
|
||||
setLastModifiedInPast(file, 60_000);
|
||||
assertTrue(checker.isReady(file));
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Full happy-path integration
|
||||
// =========================================================================
|
||||
|
||||
@Nested
|
||||
@DisplayName("full happy path")
|
||||
class HappyPath {
|
||||
|
||||
@Test
|
||||
@DisplayName("all checks pass → ready")
|
||||
void allChecksPass_ready() throws IOException {
|
||||
config.setSettleTimeMillis(5_000);
|
||||
config.setSizeCheckDelayMillis(1);
|
||||
config.setAllowedExtensions(List.of("pdf"));
|
||||
|
||||
Path file = realFile("invoice.pdf", "PDF content");
|
||||
setLastModifiedInPast(file, 10_000);
|
||||
|
||||
assertTrue(checker.isReady(file));
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("first failing check short-circuits evaluation")
|
||||
void shortCircuitsOnFirstFailure() throws IOException {
|
||||
// Extension filter will reject — settle / size / lock checks must never run
|
||||
config.setAllowedExtensions(List.of("pdf"));
|
||||
config.setSettleTimeMillis(0);
|
||||
config.setSizeCheckDelayMillis(1);
|
||||
|
||||
Path file = realFile("archive.zip", "ZIP data");
|
||||
setLastModifiedInPast(file, 60_000);
|
||||
|
||||
assertFalse(checker.isReady(file));
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Helpers
|
||||
// =========================================================================
|
||||
|
||||
private Path realFile(String name, String content) throws IOException {
|
||||
Path file = tempDir.resolve(name);
|
||||
Files.writeString(file, content);
|
||||
return file;
|
||||
}
|
||||
|
||||
/**
|
||||
* Back-dates the last-modified time of {@code path} by {@code millisAgo} so that settle-time
|
||||
* checks pass without actually waiting.
|
||||
*/
|
||||
private void setLastModifiedInPast(Path path, long millisAgo) throws IOException {
|
||||
Files.setLastModifiedTime(
|
||||
path, FileTime.fromMillis(System.currentTimeMillis() - millisAgo));
|
||||
}
|
||||
}
|
||||
@@ -66,7 +66,8 @@ public class ExtractImagesController {
|
||||
Set<Integer> processedImageHashes = new HashSet<>();
|
||||
|
||||
TempFile zipFile = new TempFile(tempFileManager, ".zip");
|
||||
try (ZipOutputStream zipStream = new ZipOutputStream(Files.newOutputStream(zipFile.getPath()));
|
||||
try (ZipOutputStream zipStream =
|
||||
new ZipOutputStream(Files.newOutputStream(zipFile.getPath()));
|
||||
PDDocument pdfDoc = pdfDocumentFactory.load(file)) {
|
||||
|
||||
zipStream.setLevel(Deflater.BEST_COMPRESSION);
|
||||
@@ -75,8 +76,12 @@ public class ExtractImagesController {
|
||||
for (int pageIndex = 0; pageIndex < totalPages; pageIndex++) {
|
||||
PDPage currentPage = pdfDoc.getPage(pageIndex);
|
||||
extractAndAddImagesToZip(
|
||||
currentPage, imageFormat, baseFilename, pageIndex + 1,
|
||||
processedImageHashes, zipStream);
|
||||
currentPage,
|
||||
imageFormat,
|
||||
baseFilename,
|
||||
pageIndex + 1,
|
||||
processedImageHashes,
|
||||
zipStream);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
zipFile.close();
|
||||
@@ -119,7 +124,12 @@ public class ExtractImagesController {
|
||||
BufferedImage convertedImage = convertImageToFormat(sourceImage, imageFormat);
|
||||
|
||||
String imagePath =
|
||||
baseFilename + "_page_" + pageNumber + "_" + imageCount++ + "."
|
||||
baseFilename
|
||||
+ "_page_"
|
||||
+ pageNumber
|
||||
+ "_"
|
||||
+ imageCount++
|
||||
+ "."
|
||||
+ imageFormat;
|
||||
ByteArrayOutputStream imageBuffer = new ByteArrayOutputStream();
|
||||
ImageIO.write(convertedImage, imageFormat, imageBuffer);
|
||||
|
||||
@@ -36,6 +36,7 @@ import stirling.software.SPDF.model.PipelineResult;
|
||||
import stirling.software.SPDF.service.ApiDocService;
|
||||
import stirling.software.common.configuration.RuntimePathConfig;
|
||||
import stirling.software.common.service.PostHogService;
|
||||
import stirling.software.common.util.FileReadinessChecker;
|
||||
|
||||
import tools.jackson.databind.ObjectMapper;
|
||||
|
||||
@@ -50,6 +51,7 @@ public class PipelineDirectoryProcessor {
|
||||
private final ApiDocService apiDocService;
|
||||
private final PipelineProcessor processor;
|
||||
private final PostHogService postHogService;
|
||||
private final FileReadinessChecker fileReadinessChecker;
|
||||
private final List<String> watchedFoldersDirs;
|
||||
private final String finishedFoldersDir;
|
||||
|
||||
@@ -62,11 +64,13 @@ public class PipelineDirectoryProcessor {
|
||||
ApiDocService apiDocService,
|
||||
PipelineProcessor processor,
|
||||
PostHogService postHogService,
|
||||
FileReadinessChecker fileReadinessChecker,
|
||||
RuntimePathConfig runtimePathConfig) {
|
||||
this.objectMapper = objectMapper;
|
||||
this.apiDocService = apiDocService;
|
||||
this.processor = processor;
|
||||
this.postHogService = postHogService;
|
||||
this.fileReadinessChecker = fileReadinessChecker;
|
||||
this.watchedFoldersDirs = runtimePathConfig.getPipelineWatchedFoldersPaths();
|
||||
this.finishedFoldersDir = runtimePathConfig.getPipelineFinishedFoldersPath();
|
||||
}
|
||||
@@ -269,6 +273,18 @@ public class PipelineDirectoryProcessor {
|
||||
}
|
||||
return isAllowed;
|
||||
})
|
||||
.filter(
|
||||
path -> {
|
||||
if (!fileReadinessChecker.isReady(path)) {
|
||||
log.info(
|
||||
"File '{}' is not yet ready for processing"
|
||||
+ " (still being written or locked),"
|
||||
+ " will retry on next scan cycle",
|
||||
path.getFileName());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
})
|
||||
.map(Path::toAbsolutePath)
|
||||
.filter(path -> true)
|
||||
.map(Path::toFile)
|
||||
|
||||
@@ -178,8 +178,7 @@ public class ReactRoutingController {
|
||||
String escapedBaseUrlJs = JavaScriptUtils.javaScriptEscape(baseUrl);
|
||||
|
||||
String serverUrl = "(window.location.origin + '" + escapedBaseUrlJs + "')";
|
||||
return
|
||||
"""
|
||||
return """
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
@@ -238,8 +237,7 @@ public class ReactRoutingController {
|
||||
String escapedBaseUrlJs = JavaScriptUtils.javaScriptEscape(baseUrl);
|
||||
|
||||
String serverUrl = "(window.location.origin + '" + escapedBaseUrlJs + "')";
|
||||
return
|
||||
"""
|
||||
return """
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
|
||||
@@ -581,10 +581,10 @@ public class PdfJsonFallbackFontService {
|
||||
|
||||
Character.UnicodeScript script = Character.UnicodeScript.of(codePoint);
|
||||
return switch (script) {
|
||||
// HAN script is used by both Simplified and Traditional Chinese
|
||||
// Default to Simplified (mainland China, 1.4B speakers) as it's more common
|
||||
// Traditional Chinese PDFs are detected via font name aliases (MingLiU, PMingLiU,
|
||||
// etc.)
|
||||
// HAN script is used by both Simplified and Traditional Chinese
|
||||
// Default to Simplified (mainland China, 1.4B speakers) as it's more common
|
||||
// Traditional Chinese PDFs are detected via font name aliases (MingLiU, PMingLiU,
|
||||
// etc.)
|
||||
case HAN -> FALLBACK_FONT_CJK_ID;
|
||||
case HIRAGANA, KATAKANA -> FALLBACK_FONT_JP_ID;
|
||||
case HANGUL -> FALLBACK_FONT_KR_ID;
|
||||
|
||||
@@ -237,6 +237,14 @@ system:
|
||||
databaseBackup:
|
||||
cron: "0 0 0 * * ?" # Cron expression for automatic database backups "0 0 0 * * ?" daily at midnight
|
||||
|
||||
autoPipeline:
|
||||
outputFolder: "" # Output folder for processed pipeline files (leave empty for default)
|
||||
fileReadiness:
|
||||
enabled: true # Set to 'false' to skip all readiness checks and process files immediately (legacy behaviour)
|
||||
settleTimeMillis: 5000 # How long (ms) a file must be unmodified before it is considered fully written and stable. Default: 5000 (5 seconds)
|
||||
sizeCheckDelayMillis: 500 # Pause (ms) between two file-size reads used to detect active writes (Linux/macOS mid-copy detection). Default: 500
|
||||
allowedExtensions: [] # Optional extension allow-list (case-insensitive, without the leading dot). Empty list = accept all extensions. Example: ["pdf", "tiff"]
|
||||
|
||||
ui:
|
||||
appNameNavbar: "" # name displayed on the navigation bar
|
||||
logoStyle: classic # Options: 'classic' (default - classic S icon) or 'modern' (minimalist logo)
|
||||
|
||||
@@ -157,8 +157,7 @@ public class SecurityConfiguration {
|
||||
Pattern allowedChars = Pattern.compile("[\\p{IsAssigned}&&[^\\p{IsControl}]]*");
|
||||
|
||||
firewall.setAllowedHeaderValues(
|
||||
headerValue ->
|
||||
headerValue != null && allowedChars.matcher(headerValue).matches());
|
||||
headerValue -> headerValue != null && allowedChars.matcher(headerValue).matches());
|
||||
|
||||
// Apply the same rules to parameter values for consistency.
|
||||
firewall.setAllowedParameterValues(
|
||||
|
||||
@@ -161,7 +161,7 @@ public class EmailService {
|
||||
String subject = "Welcome to Stirling PDF";
|
||||
|
||||
String body =
|
||||
"""
|
||||
"""
|
||||
<html><body style="margin: 0; padding: 0;">
|
||||
<div style="font-family: Arial, sans-serif; background-color: #f8f9fa; padding: 20px;">
|
||||
<div style="max-width: 600px; margin: auto; background-color: #ffffff; border-radius: 8px; overflow: hidden; border: 1px solid #e0e0e0;">
|
||||
@@ -220,7 +220,7 @@ public class EmailService {
|
||||
String subject = "You've been invited to Stirling PDF";
|
||||
|
||||
String body =
|
||||
"""
|
||||
"""
|
||||
<html><body style="margin: 0; padding: 0;">
|
||||
<div style="font-family: Arial, sans-serif; background-color: #f8f9fa; padding: 20px;">
|
||||
<div style="max-width: 600px; margin: auto; background-color: #ffffff; border-radius: 8px; overflow: hidden; border: 1px solid #e0e0e0;">
|
||||
@@ -269,8 +269,7 @@ public class EmailService {
|
||||
String passwordSection =
|
||||
newPassword == null
|
||||
? ""
|
||||
:
|
||||
"""
|
||||
: """
|
||||
<div style=\"background-color: #f8f9fa; border-left: 4px solid #007bff; padding: 15px; margin: 20px 0; border-radius: 4px;\">
|
||||
<p style=\"margin: 0;\"><strong>Temporary Password:</strong> %s</p>
|
||||
</div>
|
||||
@@ -278,7 +277,7 @@ public class EmailService {
|
||||
.formatted(newPassword);
|
||||
|
||||
String body =
|
||||
"""
|
||||
"""
|
||||
<html><body style=\"margin: 0; padding: 0;\">
|
||||
<div style=\"font-family: Arial, sans-serif; background-color: #f8f9fa; padding: 20px;\">
|
||||
<div style=\"max-width: 600px; margin: auto; background-color: #ffffff; border-radius: 8px; overflow: hidden; border: 1px solid #e0e0e0;\">
|
||||
|
||||
Reference in New Issue
Block a user