This commit is contained in:
Anthony Stirling 2025-07-21 13:08:21 +01:00
parent fe4a452a54
commit 73a340cbe2
2 changed files with 137 additions and 114 deletions

View File

@ -23,53 +23,60 @@ public class CleanupAsyncConfig {
exec.setMaxPoolSize(1);
exec.setQueueCapacity(100);
exec.setThreadNamePrefix("cleanup-");
// Set custom rejection handler to log when queue is full
exec.setRejectedExecutionHandler(new RejectedExecutionHandler() {
private volatile long lastRejectionTime = 0;
private volatile int rejectionCount = 0;
@Override
public void rejectedExecution(Runnable r, ThreadPoolExecutor executor) {
long currentTime = System.currentTimeMillis();
rejectionCount++;
// Rate-limit logging to avoid spam
if (currentTime - lastRejectionTime > 60000) { // Log at most once per minute
log.warn("Cleanup task rejected #{} - queue full! Active: {}, Queue size: {}, Pool size: {}",
rejectionCount,
executor.getActiveCount(),
executor.getQueue().size(),
executor.getPoolSize());
lastRejectionTime = currentTime;
}
// Try to discard oldest task and add this one
if (executor.getQueue().poll() != null) {
log.debug("Discarded oldest queued cleanup task to make room");
try {
executor.execute(r);
return;
} catch (Exception e) {
// If still rejected, fall back to caller-runs
exec.setRejectedExecutionHandler(
new RejectedExecutionHandler() {
private volatile long lastRejectionTime = 0;
private volatile int rejectionCount = 0;
@Override
public void rejectedExecution(Runnable r, ThreadPoolExecutor executor) {
long currentTime = System.currentTimeMillis();
rejectionCount++;
// Rate-limit logging to avoid spam
if (currentTime - lastRejectionTime
> 60000) { // Log at most once per minute
log.warn(
"Cleanup task rejected #{} - queue full! Active: {}, Queue size: {}, Pool size: {}",
rejectionCount,
executor.getActiveCount(),
executor.getQueue().size(),
executor.getPoolSize());
lastRejectionTime = currentTime;
}
// Try to discard oldest task and add this one
if (executor.getQueue().poll() != null) {
log.debug("Discarded oldest queued cleanup task to make room");
try {
executor.execute(r);
return;
} catch (Exception e) {
// If still rejected, fall back to caller-runs
}
}
// Last resort: caller-runs with timeout protection
log.warn(
"Executing cleanup task #{} on scheduler thread as last resort",
rejectionCount);
long startTime = System.currentTimeMillis();
try {
r.run();
long duration = System.currentTimeMillis() - startTime;
if (duration > 30000) { // Warn if cleanup blocks scheduler for >30s
log.warn(
"Cleanup task on scheduler thread took {}ms - consider tuning",
duration);
}
} catch (Exception e) {
log.error("Cleanup task failed on scheduler thread", e);
}
}
}
// Last resort: caller-runs with timeout protection
log.warn("Executing cleanup task #{} on scheduler thread as last resort", rejectionCount);
long startTime = System.currentTimeMillis();
try {
r.run();
long duration = System.currentTimeMillis() - startTime;
if (duration > 30000) { // Warn if cleanup blocks scheduler for >30s
log.warn("Cleanup task on scheduler thread took {}ms - consider tuning", duration);
}
} catch (Exception e) {
log.error("Cleanup task failed on scheduler thread", e);
}
}
});
});
exec.initialize();
return exec;
}

View File

@ -49,10 +49,10 @@ public class TempFileCleanupService {
// Maximum recursion depth for directory traversal
private static final int MAX_RECURSION_DEPTH = 5;
// Maximum consecutive failures before aborting batch cleanup
private static final int MAX_CONSECUTIVE_FAILURES = 10;
// Cleanup state management
private final AtomicBoolean cleanupRunning = new AtomicBoolean(false);
private final AtomicLong lastCleanupDuration = new AtomicLong(0);
@ -142,46 +142,68 @@ public class TempFileCleanupService {
public CompletableFuture<Void> scheduledCleanup() {
// Check if cleanup is already running
if (!cleanupRunning.compareAndSet(false, true)) {
log.warn("Cleanup already in progress (running for {}ms), skipping this cycle",
System.currentTimeMillis() - lastCleanupTimestamp.get());
log.warn(
"Cleanup already in progress (running for {}ms), skipping this cycle",
System.currentTimeMillis() - lastCleanupTimestamp.get());
return CompletableFuture.completedFuture(null);
}
// Calculate timeout as 2x cleanup interval
long timeoutMinutes = applicationProperties.getSystem().getTempFileManagement().getCleanupIntervalMinutes() * 2;
return CompletableFuture.supplyAsync(() -> {
long startTime = System.currentTimeMillis();
lastCleanupTimestamp.set(startTime);
long cleanupNumber = cleanupCount.incrementAndGet();
try {
log.info("Starting cleanup #{} with {}min timeout", cleanupNumber, timeoutMinutes);
doScheduledCleanup();
long duration = System.currentTimeMillis() - startTime;
lastCleanupDuration.set(duration);
log.info("Cleanup #{} completed successfully in {}ms", cleanupNumber, duration);
return null;
} catch (Exception e) {
long duration = System.currentTimeMillis() - startTime;
lastCleanupDuration.set(duration);
log.error("Cleanup #{} failed after {}ms", cleanupNumber, duration, e);
return null;
} finally {
cleanupRunning.set(false);
}
}).orTimeout(timeoutMinutes, TimeUnit.MINUTES)
.exceptionally(throwable -> {
if (throwable.getCause() instanceof TimeoutException) {
log.error("Cleanup #{} timed out after {}min - forcing cleanup state reset",
cleanupCount.get(), timeoutMinutes);
cleanupRunning.set(false);
}
return null;
});
long timeoutMinutes =
applicationProperties
.getSystem()
.getTempFileManagement()
.getCleanupIntervalMinutes()
* 2;
CompletableFuture<Void> cleanupFuture =
CompletableFuture.runAsync(
() -> {
long startTime = System.currentTimeMillis();
lastCleanupTimestamp.set(startTime);
long cleanupNumber = cleanupCount.incrementAndGet();
try {
log.info(
"Starting cleanup #{} with {}min timeout",
cleanupNumber,
timeoutMinutes);
doScheduledCleanup();
long duration = System.currentTimeMillis() - startTime;
lastCleanupDuration.set(duration);
log.info(
"Cleanup #{} completed successfully in {}ms",
cleanupNumber,
duration);
} catch (Exception e) {
long duration = System.currentTimeMillis() - startTime;
lastCleanupDuration.set(duration);
log.error(
"Cleanup #{} failed after {}ms",
cleanupNumber,
duration,
e);
} finally {
cleanupRunning.set(false);
}
});
return cleanupFuture
.orTimeout(timeoutMinutes, TimeUnit.MINUTES)
.exceptionally(
throwable -> {
if (throwable.getCause() instanceof TimeoutException) {
log.error(
"Cleanup #{} timed out after {}min - forcing cleanup state reset",
cleanupCount.get(),
timeoutMinutes);
cleanupRunning.set(false);
}
return null;
});
}
/** Internal method that performs the actual cleanup work */
private void doScheduledCleanup() {
long maxAgeMillis = tempFileManager.getMaxAgeMillis();
@ -407,10 +429,12 @@ public class TempFileCleanupService {
} else {
log.warn("Failed to delete temp file: {}", path, e);
}
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
log.error("Aborting directory cleanup after {} consecutive failures in: {}",
consecutiveFailures, directory);
log.error(
"Aborting directory cleanup after {} consecutive failures in: {}",
consecutiveFailures,
directory);
return; // Early exit from cleanup
}
}
@ -418,10 +442,12 @@ public class TempFileCleanupService {
} catch (Exception e) {
consecutiveFailures++;
log.warn("Error processing path: {}", path, e);
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
log.error("Aborting directory cleanup after {} consecutive failures in: {}",
consecutiveFailures, directory);
log.error(
"Aborting directory cleanup after {} consecutive failures in: {}",
consecutiveFailures,
directory);
return; // Early exit from cleanup
}
}
@ -536,10 +562,8 @@ public class TempFileCleanupService {
log.warn("Failed to clean up PDFBox cache file", e);
}
}
/**
* Get cleanup status and metrics for monitoring
*/
/** Get cleanup status and metrics for monitoring */
public String getCleanupStatus() {
if (cleanupRunning.get()) {
long runningTime = System.currentTimeMillis() - lastCleanupTimestamp.get();
@ -549,38 +573,30 @@ public class TempFileCleanupService {
long lastTime = lastCleanupTimestamp.get();
if (lastTime > 0) {
long timeSinceLastRun = System.currentTimeMillis() - lastTime;
return String.format("Last cleanup #%d: %dms duration, %dms ago",
cleanupCount.get(), lastDuration, timeSinceLastRun);
return String.format(
"Last cleanup #%d: %dms duration, %dms ago",
cleanupCount.get(), lastDuration, timeSinceLastRun);
} else {
return "No cleanup runs yet";
}
}
}
/**
* Check if cleanup is currently running
*/
/** Check if cleanup is currently running */
public boolean isCleanupRunning() {
return cleanupRunning.get();
}
/**
* Get cleanup metrics
*/
/** Get cleanup metrics */
public CleanupMetrics getMetrics() {
return new CleanupMetrics(
cleanupCount.get(),
lastCleanupDuration.get(),
lastCleanupTimestamp.get(),
cleanupRunning.get()
);
cleanupCount.get(),
lastCleanupDuration.get(),
lastCleanupTimestamp.get(),
cleanupRunning.get());
}
/** Simple record for cleanup metrics */
public record CleanupMetrics(
long totalRuns,
long lastDurationMs,
long lastRunTimestamp,
boolean currentlyRunning
) {}
long totalRuns, long lastDurationMs, long lastRunTimestamp, boolean currentlyRunning) {}
}