This commit is contained in:
Anthony Stirling 2025-07-21 13:08:21 +01:00
parent fe4a452a54
commit 73a340cbe2
2 changed files with 137 additions and 114 deletions

View File

@ -25,7 +25,8 @@ public class CleanupAsyncConfig {
exec.setThreadNamePrefix("cleanup-"); exec.setThreadNamePrefix("cleanup-");
// Set custom rejection handler to log when queue is full // Set custom rejection handler to log when queue is full
exec.setRejectedExecutionHandler(new RejectedExecutionHandler() { exec.setRejectedExecutionHandler(
new RejectedExecutionHandler() {
private volatile long lastRejectionTime = 0; private volatile long lastRejectionTime = 0;
private volatile int rejectionCount = 0; private volatile int rejectionCount = 0;
@ -35,8 +36,10 @@ public class CleanupAsyncConfig {
rejectionCount++; rejectionCount++;
// Rate-limit logging to avoid spam // Rate-limit logging to avoid spam
if (currentTime - lastRejectionTime > 60000) { // Log at most once per minute if (currentTime - lastRejectionTime
log.warn("Cleanup task rejected #{} - queue full! Active: {}, Queue size: {}, Pool size: {}", > 60000) { // Log at most once per minute
log.warn(
"Cleanup task rejected #{} - queue full! Active: {}, Queue size: {}, Pool size: {}",
rejectionCount, rejectionCount,
executor.getActiveCount(), executor.getActiveCount(),
executor.getQueue().size(), executor.getQueue().size(),
@ -56,13 +59,17 @@ public class CleanupAsyncConfig {
} }
// Last resort: caller-runs with timeout protection // Last resort: caller-runs with timeout protection
log.warn("Executing cleanup task #{} on scheduler thread as last resort", rejectionCount); log.warn(
"Executing cleanup task #{} on scheduler thread as last resort",
rejectionCount);
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
try { try {
r.run(); r.run();
long duration = System.currentTimeMillis() - startTime; long duration = System.currentTimeMillis() - startTime;
if (duration > 30000) { // Warn if cleanup blocks scheduler for >30s if (duration > 30000) { // Warn if cleanup blocks scheduler for >30s
log.warn("Cleanup task on scheduler thread took {}ms - consider tuning", duration); log.warn(
"Cleanup task on scheduler thread took {}ms - consider tuning",
duration);
} }
} catch (Exception e) { } catch (Exception e) {
log.error("Cleanup task failed on scheduler thread", e); log.error("Cleanup task failed on scheduler thread", e);

View File

@ -142,40 +142,62 @@ public class TempFileCleanupService {
public CompletableFuture<Void> scheduledCleanup() { public CompletableFuture<Void> scheduledCleanup() {
// Check if cleanup is already running // Check if cleanup is already running
if (!cleanupRunning.compareAndSet(false, true)) { if (!cleanupRunning.compareAndSet(false, true)) {
log.warn("Cleanup already in progress (running for {}ms), skipping this cycle", log.warn(
"Cleanup already in progress (running for {}ms), skipping this cycle",
System.currentTimeMillis() - lastCleanupTimestamp.get()); System.currentTimeMillis() - lastCleanupTimestamp.get());
return CompletableFuture.completedFuture(null); return CompletableFuture.completedFuture(null);
} }
// Calculate timeout as 2x cleanup interval // Calculate timeout as 2x cleanup interval
long timeoutMinutes = applicationProperties.getSystem().getTempFileManagement().getCleanupIntervalMinutes() * 2; long timeoutMinutes =
applicationProperties
.getSystem()
.getTempFileManagement()
.getCleanupIntervalMinutes()
* 2;
return CompletableFuture.supplyAsync(() -> { CompletableFuture<Void> cleanupFuture =
CompletableFuture.runAsync(
() -> {
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
lastCleanupTimestamp.set(startTime); lastCleanupTimestamp.set(startTime);
long cleanupNumber = cleanupCount.incrementAndGet(); long cleanupNumber = cleanupCount.incrementAndGet();
try { try {
log.info("Starting cleanup #{} with {}min timeout", cleanupNumber, timeoutMinutes); log.info(
"Starting cleanup #{} with {}min timeout",
cleanupNumber,
timeoutMinutes);
doScheduledCleanup(); doScheduledCleanup();
long duration = System.currentTimeMillis() - startTime; long duration = System.currentTimeMillis() - startTime;
lastCleanupDuration.set(duration); lastCleanupDuration.set(duration);
log.info("Cleanup #{} completed successfully in {}ms", cleanupNumber, duration); log.info(
return null; "Cleanup #{} completed successfully in {}ms",
cleanupNumber,
duration);
} catch (Exception e) { } catch (Exception e) {
long duration = System.currentTimeMillis() - startTime; long duration = System.currentTimeMillis() - startTime;
lastCleanupDuration.set(duration); lastCleanupDuration.set(duration);
log.error("Cleanup #{} failed after {}ms", cleanupNumber, duration, e); log.error(
return null; "Cleanup #{} failed after {}ms",
cleanupNumber,
duration,
e);
} finally { } finally {
cleanupRunning.set(false); cleanupRunning.set(false);
} }
}).orTimeout(timeoutMinutes, TimeUnit.MINUTES) });
.exceptionally(throwable -> {
return cleanupFuture
.orTimeout(timeoutMinutes, TimeUnit.MINUTES)
.exceptionally(
throwable -> {
if (throwable.getCause() instanceof TimeoutException) { if (throwable.getCause() instanceof TimeoutException) {
log.error("Cleanup #{} timed out after {}min - forcing cleanup state reset", log.error(
cleanupCount.get(), timeoutMinutes); "Cleanup #{} timed out after {}min - forcing cleanup state reset",
cleanupCount.get(),
timeoutMinutes);
cleanupRunning.set(false); cleanupRunning.set(false);
} }
return null; return null;
@ -409,8 +431,10 @@ public class TempFileCleanupService {
} }
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) { if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
log.error("Aborting directory cleanup after {} consecutive failures in: {}", log.error(
consecutiveFailures, directory); "Aborting directory cleanup after {} consecutive failures in: {}",
consecutiveFailures,
directory);
return; // Early exit from cleanup return; // Early exit from cleanup
} }
} }
@ -420,8 +444,10 @@ public class TempFileCleanupService {
log.warn("Error processing path: {}", path, e); log.warn("Error processing path: {}", path, e);
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) { if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
log.error("Aborting directory cleanup after {} consecutive failures in: {}", log.error(
consecutiveFailures, directory); "Aborting directory cleanup after {} consecutive failures in: {}",
consecutiveFailures,
directory);
return; // Early exit from cleanup return; // Early exit from cleanup
} }
} }
@ -537,9 +563,7 @@ public class TempFileCleanupService {
} }
} }
/** /** Get cleanup status and metrics for monitoring */
* Get cleanup status and metrics for monitoring
*/
public String getCleanupStatus() { public String getCleanupStatus() {
if (cleanupRunning.get()) { if (cleanupRunning.get()) {
long runningTime = System.currentTimeMillis() - lastCleanupTimestamp.get(); long runningTime = System.currentTimeMillis() - lastCleanupTimestamp.get();
@ -549,7 +573,8 @@ public class TempFileCleanupService {
long lastTime = lastCleanupTimestamp.get(); long lastTime = lastCleanupTimestamp.get();
if (lastTime > 0) { if (lastTime > 0) {
long timeSinceLastRun = System.currentTimeMillis() - lastTime; long timeSinceLastRun = System.currentTimeMillis() - lastTime;
return String.format("Last cleanup #%d: %dms duration, %dms ago", return String.format(
"Last cleanup #%d: %dms duration, %dms ago",
cleanupCount.get(), lastDuration, timeSinceLastRun); cleanupCount.get(), lastDuration, timeSinceLastRun);
} else { } else {
return "No cleanup runs yet"; return "No cleanup runs yet";
@ -557,30 +582,21 @@ public class TempFileCleanupService {
} }
} }
/** /** Check if cleanup is currently running */
* Check if cleanup is currently running
*/
public boolean isCleanupRunning() { public boolean isCleanupRunning() {
return cleanupRunning.get(); return cleanupRunning.get();
} }
/** /** Get cleanup metrics */
* Get cleanup metrics
*/
public CleanupMetrics getMetrics() { public CleanupMetrics getMetrics() {
return new CleanupMetrics( return new CleanupMetrics(
cleanupCount.get(), cleanupCount.get(),
lastCleanupDuration.get(), lastCleanupDuration.get(),
lastCleanupTimestamp.get(), lastCleanupTimestamp.get(),
cleanupRunning.get() cleanupRunning.get());
);
} }
/** Simple record for cleanup metrics */ /** Simple record for cleanup metrics */
public record CleanupMetrics( public record CleanupMetrics(
long totalRuns, long totalRuns, long lastDurationMs, long lastRunTimestamp, boolean currentlyRunning) {}
long lastDurationMs,
long lastRunTimestamp,
boolean currentlyRunning
) {}
} }