Cache fix issues

2026-02-17 13:52:14 +01:00 · 2025-12-10 23:26:35 +00:00 · 2025-12-10 23:26:35 +00:00 · a73636a597
commit a73636a597
parent d6a83fe6a1
10 changed files with 501 additions and 104 deletions
--- a/app/common/src/main/java/stirling/software/common/model/ApplicationProperties.java
+++ b/app/common/src/main/java/stirling/software/common/model/ApplicationProperties.java
@ -68,6 +68,7 @@ public class ApplicationProperties {

    private AutoPipeline autoPipeline = new AutoPipeline();
    private ProcessExecutor processExecutor = new ProcessExecutor();
+    private PdfEditor pdfEditor = new PdfEditor();

    @Bean
    public PropertySource<?> dynamicYamlPropertySource(ConfigurableEnvironment environment)
@ -100,6 +101,46 @@ public class ApplicationProperties {
        private String outputFolder;
    }

+    @Data
+    public static class PdfEditor {
+        private Cache cache = new Cache();
+        private FontNormalization fontNormalization = new FontNormalization();
+        private CffConverter cffConverter = new CffConverter();
+        private Type3 type3 = new Type3();
+        private String fallbackFont = "classpath:/static/fonts/NotoSans-Regular.ttf";
+
+        @Data
+        public static class Cache {
+            private long maxBytes = -1;
+            private int maxPercent = 20;
+        }
+
+        @Data
+        public static class FontNormalization {
+            private boolean enabled = false;
+        }
+
+        @Data
+        public static class CffConverter {
+            private boolean enabled = true;
+            private String method = "python";
+            private String pythonCommand = "/opt/venv/bin/python3";
+            private String pythonScript = "/scripts/convert_cff_to_ttf.py";
+            private String fontforgeCommand = "fontforge";
+        }
+
+        @Data
+        public static class Type3 {
+            private Library library = new Library();
+
+            @Data
+            public static class Library {
+                private boolean enabled = true;
+                private String index = "classpath:/type3/library/index.json";
+            }
+        }
+    }
+
    @Data
    public static class Legal {
        private String termsAndConditions;
--- a/app/core/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPdfJsonExceptionHandler.java
+++ b/app/core/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPdfJsonExceptionHandler.java
@ -0,0 +1,44 @@
+package stirling.software.SPDF.controller.api.converters;
+
+import org.springframework.http.HttpStatus;
+import org.springframework.http.ResponseEntity;
+import org.springframework.web.bind.annotation.ControllerAdvice;
+import org.springframework.web.bind.annotation.ExceptionHandler;
+import org.springframework.web.bind.annotation.ResponseBody;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+
+import stirling.software.SPDF.exception.CacheUnavailableException;
+
+@ControllerAdvice(assignableTypes = ConvertPdfJsonController.class)
+@Slf4j
+@RequiredArgsConstructor
+public class ConvertPdfJsonExceptionHandler {
+
+    private final ObjectMapper objectMapper;
+
+    @ExceptionHandler(CacheUnavailableException.class)
+    @ResponseBody
+    public ResponseEntity<byte[]> handleCacheUnavailable(CacheUnavailableException ex) {
+        try {
+            byte[] body =
+                    objectMapper.writeValueAsBytes(
+                            java.util.Map.of(
+                                    "error", "cache_unavailable",
+                                    "action", "reupload",
+                                    "message", ex.getMessage()));
+            return ResponseEntity.status(HttpStatus.GONE).body(body);
+        } catch (Exception e) {
+            log.warn("Failed to serialize cache_unavailable response: {}", e.getMessage());
+            return ResponseEntity.status(HttpStatus.GONE)
+                    .body(
+                            ("{\"error\":\"cache_unavailable\",\"action\":\"reupload\",\"message\":\""
+                                            + ex.getMessage()
+                                            + "\"}")
+                                    .getBytes());
+        }
+    }
+}
--- a/app/core/src/main/java/stirling/software/SPDF/exception/CacheUnavailableException.java
+++ b/app/core/src/main/java/stirling/software/SPDF/exception/CacheUnavailableException.java
@ -0,0 +1,8 @@
+package stirling.software.SPDF.exception;
+
+public class CacheUnavailableException extends RuntimeException {
+
+    public CacheUnavailableException(String message) {
+        super(message);
+    }
+}
--- a/app/core/src/main/java/stirling/software/SPDF/service/PdfJsonConversionService.java
+++ b/app/core/src/main/java/stirling/software/SPDF/service/PdfJsonConversionService.java
@ -144,14 +144,21 @@ public class PdfJsonConversionService {
    private final PdfJsonFontService fontService;
    private final Type3FontConversionService type3FontConversionService;
    private final Type3GlyphExtractor type3GlyphExtractor;
+    private final stirling.software.common.model.ApplicationProperties applicationProperties;
    private final Map<String, PDFont> type3NormalizedFontCache = new ConcurrentHashMap<>();
    private final Map<String, Set<Integer>> type3GlyphCoverageCache = new ConcurrentHashMap<>();

-    @Value("${stirling.pdf.json.font-normalization.enabled:true}")
    private boolean fontNormalizationEnabled;
+    private long cacheMaxBytes;
+    private int cacheMaxPercent;

    /** Cache for storing PDDocuments for lazy page loading. Key is jobId. */
    private final Map<String, CachedPdfDocument> documentCache = new ConcurrentHashMap<>();
+    private final java.util.LinkedHashMap<String, CachedPdfDocument> lruCache =
+            new java.util.LinkedHashMap<>(16, 0.75f, true);
+    private final Object cacheLock = new Object();
+    private volatile long currentCacheBytes = 0L;
+    private volatile long cacheBudgetBytes = -1L;

    private volatile boolean ghostscriptAvailable;

@ -161,7 +168,23 @@ public class PdfJsonConversionService {

    @PostConstruct
    private void initializeToolAvailability() {
+        loadConfigurationFromProperties();
        initializeGhostscriptAvailability();
+        initializeCacheBudget();
+    }
+
+    private void loadConfigurationFromProperties() {
+        stirling.software.common.model.ApplicationProperties.PdfEditor cfg =
+                applicationProperties.getPdfEditor();
+        if (cfg != null) {
+            fontNormalizationEnabled = cfg.getFontNormalization().isEnabled();
+            cacheMaxBytes = cfg.getCache().getMaxBytes();
+            cacheMaxPercent = cfg.getCache().getMaxPercent();
+        } else {
+            fontNormalizationEnabled = false;
+            cacheMaxBytes = -1;
+            cacheMaxPercent = 20;
+        }
    }

    private void initializeGhostscriptAvailability() {
@ -202,6 +225,25 @@ public class PdfJsonConversionService {
        }
    }

+    private void initializeCacheBudget() {
+        long effective = -1L;
+        if (cacheMaxBytes > 0) {
+            effective = cacheMaxBytes;
+        } else if (cacheMaxPercent > 0) {
+            long maxMem = Runtime.getRuntime().maxMemory();
+            effective = Math.max(0L, (maxMem * cacheMaxPercent) / 100);
+        }
+        cacheBudgetBytes = effective;
+        if (cacheBudgetBytes > 0) {
+            log.info(
+                    "PDF JSON cache budget configured: {} bytes (source: {})",
+                    cacheBudgetBytes,
+                    cacheMaxBytes > 0 ? "max-bytes" : "max-percent");
+        } else {
+            log.info("PDF JSON cache budget: unlimited");
+        }
+    }
+
    public byte[] convertPdfToJson(MultipartFile file) throws IOException {
        return convertPdfToJson(file, null, false);
    }
@ -318,9 +360,9 @@ public class PdfJsonConversionService {

            try (PDDocument document = pdfDocumentFactory.load(workingPath, true)) {
                int totalPages = document.getNumberOfPages();
-                // Only use lazy images for real async jobs where client can access the cache
-                // Synchronous calls with synthetic jobId should do full extraction
-                boolean useLazyImages = totalPages > 5 && isRealJobId;
+                // Always enable lazy mode for real async jobs so cache is available regardless of
+                // page count. Synchronous calls with synthetic jobId still do full extraction.
+                boolean useLazyImages = isRealJobId;
                Map<COSBase, FontModelCacheEntry> fontCache = new IdentityHashMap<>();
                Map<COSBase, EncodedImage> imageCache = new IdentityHashMap<>();
                log.debug(
@ -435,15 +477,16 @@ public class PdfJsonConversionService {
                        cachedPdfBytes = Files.readAllBytes(workingPath);
                    }
                    CachedPdfDocument cached =
-                            new CachedPdfDocument(
-                                    cachedPdfBytes, docMetadata, fonts, pageFontResources);
-                    documentCache.put(jobId, cached);
+                            buildCachedDocument(
+                                    jobId, cachedPdfBytes, docMetadata, fonts, pageFontResources);
+                    putCachedDocument(jobId, cached);
                    log.debug(
-                            "Cached PDF bytes ({} bytes, {} pages, {} fonts) for lazy images, jobId: {}",
-                            cachedPdfBytes.length,
+                            "Cached PDF bytes ({} bytes, {} pages, {} fonts) for lazy images, jobId: {} (diskBacked={})",
+                            cached.getPdfSize(),
                            totalPages,
                            fonts.size(),
-                            jobId);
+                            jobId,
+                            cached.isDiskBacked());
                    scheduleDocumentCleanup(jobId);
                }

@ -2973,6 +3016,130 @@ public class PdfJsonConversionService {
        }
    }

+    // Cache helpers
+    private CachedPdfDocument buildCachedDocument(
+            String jobId,
+            byte[] pdfBytes,
+            PdfJsonDocumentMetadata metadata,
+            Map<String, PdfJsonFont> fonts,
+            Map<Integer, Map<PDFont, String>> pageFontResources)
+            throws IOException {
+        if (pdfBytes == null) {
+            throw new IllegalArgumentException("pdfBytes must not be null");
+        }
+        long budget = cacheBudgetBytes;
+        // If single document is larger than budget, spill straight to disk
+        if (budget > 0 && pdfBytes.length > budget) {
+            TempFile tempFile = new TempFile(tempFileManager, ".pdfjsoncache");
+            Files.write(tempFile.getPath(), pdfBytes);
+            log.debug(
+                    "Cached PDF spilled to disk ({} bytes exceeds budget {}) for jobId {}",
+                    pdfBytes.length,
+                    budget,
+                    jobId);
+            return new CachedPdfDocument(
+                    null, tempFile, pdfBytes.length, metadata, fonts, pageFontResources);
+        }
+        return new CachedPdfDocument(
+                pdfBytes, null, pdfBytes.length, metadata, fonts, pageFontResources);
+    }
+
+    private void putCachedDocument(String jobId, CachedPdfDocument cached) {
+        synchronized (cacheLock) {
+            CachedPdfDocument existing = documentCache.put(jobId, cached);
+            if (existing != null) {
+                lruCache.remove(jobId);
+                currentCacheBytes = Math.max(0L, currentCacheBytes - existing.getInMemorySize());
+                closeQuietly(existing.pdfTempFile);
+            }
+            lruCache.put(jobId, cached);
+            currentCacheBytes += cached.getInMemorySize();
+            enforceCacheBudget();
+        }
+    }
+
+    private CachedPdfDocument getCachedDocument(String jobId) {
+        synchronized (cacheLock) {
+            CachedPdfDocument cached = documentCache.get(jobId);
+            if (cached != null) {
+                lruCache.remove(jobId);
+                lruCache.put(jobId, cached);
+            }
+            return cached;
+        }
+    }
+
+    private void enforceCacheBudget() {
+        if (cacheBudgetBytes <= 0) {
+            return;
+        }
+        synchronized (cacheLock) {
+            java.util.Iterator<java.util.Map.Entry<String, CachedPdfDocument>> it =
+                    lruCache.entrySet().iterator();
+            while (currentCacheBytes > cacheBudgetBytes && it.hasNext()) {
+                java.util.Map.Entry<String, CachedPdfDocument> entry = it.next();
+                it.remove();
+                CachedPdfDocument removed = entry.getValue();
+                documentCache.remove(entry.getKey(), removed);
+                currentCacheBytes =
+                        Math.max(0L, currentCacheBytes - removed.getInMemorySize());
+                removed.close();
+                log.debug(
+                        "Evicted cached PDF for jobId {} to enforce cache budget", entry.getKey());
+            }
+            if (currentCacheBytes > cacheBudgetBytes && !lruCache.isEmpty()) {
+                // Spill the most recently used large entry to disk
+                String key =
+                        lruCache.entrySet().stream()
+                                .reduce((first, second) -> second)
+                                .map(java.util.Map.Entry::getKey)
+                                .orElse(null);
+                if (key != null) {
+                    CachedPdfDocument doc = lruCache.get(key);
+                    if (doc != null && doc.getInMemorySize() > 0) {
+                        try {
+                            CachedPdfDocument diskDoc =
+                                    buildCachedDocument(
+                                            key,
+                                            doc.getPdfBytes(),
+                                            doc.getMetadata(),
+                                            doc.getFonts(),
+                                            doc.getPageFontResources());
+                            lruCache.put(key, diskDoc);
+                            documentCache.put(key, diskDoc);
+                            currentCacheBytes =
+                                    Math.max(0L, currentCacheBytes - doc.getInMemorySize())
+                                            + diskDoc.getInMemorySize();
+                            doc.close();
+                            log.debug(
+                                    "Spilled cached PDF for jobId {} to disk to satisfy budget",
+                                    key);
+                        } catch (IOException ex) {
+                            log.warn(
+                                    "Failed to spill cached PDF for jobId {} to disk: {}",
+                                    key,
+                                    ex.getMessage());
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    private void removeCachedDocument(String jobId) {
+        CachedPdfDocument removed = null;
+        synchronized (cacheLock) {
+            removed = documentCache.remove(jobId);
+            if (removed != null) {
+                lruCache.remove(jobId);
+                currentCacheBytes = Math.max(0L, currentCacheBytes - removed.getInMemorySize());
+            }
+        }
+        if (removed != null) {
+            removed.close();
+        }
+    }
+
    private void applyTextState(PDPageContentStream contentStream, PdfJsonTextElement element)
            throws IOException {
        if (element.getCharacterSpacing() != null) {
@ -5311,6 +5478,8 @@ public class PdfJsonConversionService {
     */
    private static class CachedPdfDocument {
        private final byte[] pdfBytes;
+        private final TempFile pdfTempFile;
+        private final long pdfSize;
        private final PdfJsonDocumentMetadata metadata;
        private final Map<String, PdfJsonFont> fonts; // Font map with UIDs for consistency
        private final Map<Integer, Map<PDFont, String>> pageFontResources; // Page font resources
@ -5318,10 +5487,14 @@ public class PdfJsonConversionService {

        public CachedPdfDocument(
                byte[] pdfBytes,
+                TempFile pdfTempFile,
+                long pdfSize,
                PdfJsonDocumentMetadata metadata,
                Map<String, PdfJsonFont> fonts,
                Map<Integer, Map<PDFont, String>> pageFontResources) {
            this.pdfBytes = pdfBytes;
+            this.pdfTempFile = pdfTempFile;
+            this.pdfSize = pdfSize;
            this.metadata = metadata;
            // Create defensive copies to prevent mutation of shared maps
            this.fonts =
@ -5336,8 +5509,14 @@ public class PdfJsonConversionService {
        }

        // Getters return defensive copies to prevent external mutation
-        public byte[] getPdfBytes() {
-            return pdfBytes;
+        public byte[] getPdfBytes() throws IOException {
+            if (pdfBytes != null) {
+                return pdfBytes;
+            }
+            if (pdfTempFile != null) {
+                return Files.readAllBytes(pdfTempFile.getPath());
+            }
+            throw new IOException("Cached PDF backing missing");
        }

        public PdfJsonDocumentMetadata getMetadata() {
@ -5352,6 +5531,18 @@ public class PdfJsonConversionService {
            return new java.util.concurrent.ConcurrentHashMap<>(pageFontResources);
        }

+        public long getPdfSize() {
+            return pdfSize;
+        }
+
+        public long getInMemorySize() {
+            return pdfBytes != null ? pdfBytes.length : 0L;
+        }
+
+        public boolean isDiskBacked() {
+            return pdfBytes == null && pdfTempFile != null;
+        }
+
        public long getTimestamp() {
            return timestamp;
        }
@ -5363,7 +5554,14 @@ public class PdfJsonConversionService {
        public CachedPdfDocument withUpdatedFonts(
                byte[] nextBytes, Map<String, PdfJsonFont> nextFonts) {
            Map<String, PdfJsonFont> fontsToUse = nextFonts != null ? nextFonts : this.fonts;
-            return new CachedPdfDocument(nextBytes, metadata, fontsToUse, pageFontResources);
+            return new CachedPdfDocument(
+                    nextBytes, null, nextBytes != null ? nextBytes.length : 0, metadata, fontsToUse, pageFontResources);
+        }
+
+        public void close() {
+            if (pdfTempFile != null) {
+                pdfTempFile.close();
+            }
        }
    }

@ -5444,14 +5642,15 @@ public class PdfJsonConversionService {
            // Cache PDF bytes, metadata, and fonts for lazy page loading
            if (jobId != null) {
                CachedPdfDocument cached =
-                        new CachedPdfDocument(pdfBytes, docMetadata, fonts, pageFontResources);
-                documentCache.put(jobId, cached);
+                        buildCachedDocument(jobId, pdfBytes, docMetadata, fonts, pageFontResources);
+                putCachedDocument(jobId, cached);
                log.debug(
-                        "Cached PDF bytes ({} bytes, {} pages, {} fonts) for lazy loading, jobId: {}",
-                        pdfBytes.length,
+                        "Cached PDF bytes ({} bytes, {} pages, {} fonts) for lazy loading, jobId: {} (diskBacked={})",
+                        cached.getPdfSize(),
                        totalPages,
                        fonts.size(),
-                        jobId);
+                        jobId,
+                        cached.isDiskBacked());

                // Schedule cleanup after 30 minutes
                scheduleDocumentCleanup(jobId);
@ -5466,9 +5665,10 @@ public class PdfJsonConversionService {

    /** Extracts a single page from cached PDF bytes. Re-loads the PDF for each request. */
    public byte[] extractSinglePage(String jobId, int pageNumber) throws IOException {
-        CachedPdfDocument cached = documentCache.get(jobId);
+        CachedPdfDocument cached = getCachedDocument(jobId);
        if (cached == null) {
-            throw new IllegalArgumentException("No cached document found for jobId: " + jobId);
+            throw new stirling.software.SPDF.exception.CacheUnavailableException(
+                    "No cached document found for jobId: " + jobId);
        }

        int pageIndex = pageNumber - 1;
@ -5480,8 +5680,8 @@ public class PdfJsonConversionService {
        }

        log.debug(
-                "Loading PDF from bytes ({} bytes) to extract page {} (jobId: {})",
-                cached.getPdfBytes().length,
+                "Loading PDF from {} to extract page {} (jobId: {})",
+                cached.isDiskBacked() ? "disk cache" : "memory cache",
                pageNumber,
                jobId);

@ -5627,9 +5827,10 @@ public class PdfJsonConversionService {
        if (jobId == null || jobId.isBlank()) {
            throw new IllegalArgumentException("jobId is required for incremental export");
        }
-        CachedPdfDocument cached = documentCache.get(jobId);
+        CachedPdfDocument cached = getCachedDocument(jobId);
        if (cached == null) {
-            throw new IllegalArgumentException("No cached document available for jobId: " + jobId);
+            throw new stirling.software.SPDF.exception.CacheUnavailableException(
+                    "No cached document available for jobId: " + jobId);
        }
        if (updates == null || updates.getPages() == null || updates.getPages().isEmpty()) {
            log.debug(
@ -5709,7 +5910,14 @@ public class PdfJsonConversionService {
            document.save(baos);
            byte[] updatedBytes = baos.toByteArray();

-            documentCache.put(jobId, cached.withUpdatedFonts(updatedBytes, mergedFonts));
+            CachedPdfDocument updated =
+                    buildCachedDocument(
+                            jobId,
+                            updatedBytes,
+                            cached.getMetadata(),
+                            mergedFonts,
+                            cached.getPageFontResources());
+            putCachedDocument(jobId, updated);

            // Clear Type3 cache entries for this incremental update
            clearType3CacheEntriesForJob(updateJobId);
@ -5724,11 +5932,13 @@ public class PdfJsonConversionService {

    /** Clears a cached document. */
    public void clearCachedDocument(String jobId) {
-        CachedPdfDocument cached = documentCache.remove(jobId);
+        CachedPdfDocument cached = getCachedDocument(jobId);
+        removeCachedDocument(jobId);
        if (cached != null) {
            log.debug(
-                    "Removed cached PDF bytes ({} bytes) for jobId: {}",
-                    cached.getPdfBytes().length,
+                    "Removed cached PDF ({} bytes, diskBacked={}) for jobId: {}",
+                    cached.getPdfSize(),
+                    cached.isDiskBacked(),
                    jobId);
        }

--- a/app/core/src/main/java/stirling/software/SPDF/service/PdfJsonFallbackFontService.java
+++ b/app/core/src/main/java/stirling/software/SPDF/service/PdfJsonFallbackFontService.java
@ -312,12 +312,26 @@ public class PdfJsonFallbackFontService {
                                    "ttf")));

    private final ResourceLoader resourceLoader;
+    private final stirling.software.common.model.ApplicationProperties applicationProperties;

    @Value("${stirling.pdf.fallback-font:" + DEFAULT_FALLBACK_FONT_LOCATION + "}")
+    private String legacyFallbackFontLocation;
+
    private String fallbackFontLocation;

    private final Map<String, byte[]> fallbackFontCache = new ConcurrentHashMap<>();

+    @jakarta.annotation.PostConstruct
+    private void loadConfig() {
+        String configured = applicationProperties.getPdfEditor().getFallbackFont();
+        if (configured != null && !configured.isBlank()) {
+            fallbackFontLocation = configured;
+        } else {
+            fallbackFontLocation = legacyFallbackFontLocation;
+        }
+        log.info("Using fallback font location: {}", fallbackFontLocation);
+    }
+
    public PdfJsonFont buildFallbackFontModel() throws IOException {
        return buildFallbackFontModel(FALLBACK_FONT_ID);
    }
--- a/app/core/src/main/java/stirling/software/SPDF/service/pdfjson/PdfJsonFontService.java
+++ b/app/core/src/main/java/stirling/software/SPDF/service/pdfjson/PdfJsonFontService.java
@ -25,22 +25,18 @@ import stirling.software.common.util.TempFileManager;
 public class PdfJsonFontService {

    private final TempFileManager tempFileManager;
+    private final stirling.software.common.model.ApplicationProperties applicationProperties;

    @Getter
-    @Value("${stirling.pdf.json.cff-converter.enabled:true}")
    private boolean cffConversionEnabled;

    @Getter
-    @Value("${stirling.pdf.json.cff-converter.method:python}")
    private String cffConverterMethod;

-    @Value("${stirling.pdf.json.cff-converter.python-command:/opt/venv/bin/python3}")
    private String pythonCommand;

-    @Value("${stirling.pdf.json.cff-converter.python-script:/scripts/convert_cff_to_ttf.py}")
    private String pythonScript;

-    @Value("${stirling.pdf.json.cff-converter.fontforge-command:fontforge}")
    private String fontforgeCommand;

    private volatile boolean pythonCffConverterAvailable;
@ -48,6 +44,7 @@ public class PdfJsonFontService {

    @PostConstruct
    private void initialiseCffConverterAvailability() {
+        loadConfiguration();
        if (!cffConversionEnabled) {
            log.warn("[FONT-DEBUG] CFF conversion is DISABLED in configuration");
            pythonCffConverterAvailable = false;
@ -77,6 +74,15 @@ public class PdfJsonFontService {
        log.info("[FONT-DEBUG] Selected CFF converter method: {}", cffConverterMethod);
    }

+    private void loadConfiguration() {
+        var cfg = applicationProperties.getPdfEditor().getCffConverter();
+        this.cffConversionEnabled = cfg.isEnabled();
+        this.cffConverterMethod = cfg.getMethod();
+        this.pythonCommand = cfg.getPythonCommand();
+        this.pythonScript = cfg.getPythonScript();
+        this.fontforgeCommand = cfg.getFontforgeCommand();
+    }
+
    public byte[] convertCffProgramToTrueType(byte[] fontBytes, String toUnicode) {
        if (!cffConversionEnabled || fontBytes == null || fontBytes.length == 0) {
            log.warn(
--- a/app/core/src/main/java/stirling/software/SPDF/service/pdfjson/type3/Type3LibraryStrategy.java
+++ b/app/core/src/main/java/stirling/software/SPDF/service/pdfjson/type3/Type3LibraryStrategy.java
@ -23,8 +23,8 @@ import stirling.software.SPDF.service.pdfjson.type3.library.Type3FontLibraryPayl
 public class Type3LibraryStrategy implements Type3ConversionStrategy {

    private final Type3FontLibrary fontLibrary;
+    private final stirling.software.common.model.ApplicationProperties applicationProperties;

-    @Value("${stirling.pdf.json.type3.library.enabled:true}")
    private boolean enabled;

    @Override
@ -42,6 +42,12 @@ public class Type3LibraryStrategy implements Type3ConversionStrategy {
        return enabled && fontLibrary != null && fontLibrary.isLoaded();
    }

+    @jakarta.annotation.PostConstruct
+    private void loadConfiguration() {
+        var cfg = applicationProperties.getPdfEditor().getType3().getLibrary();
+        this.enabled = cfg.isEnabled();
+    }
+
    @Override
    public PdfJsonFontConversionCandidate convert(
            Type3ConversionRequest request, Type3GlyphContext context) throws IOException {
--- a/app/core/src/main/java/stirling/software/SPDF/service/pdfjson/type3/library/Type3FontLibrary.java
+++ b/app/core/src/main/java/stirling/software/SPDF/service/pdfjson/type3/library/Type3FontLibrary.java
@ -34,8 +34,8 @@ public class Type3FontLibrary {

    private final ObjectMapper objectMapper;
    private final ResourceLoader resourceLoader;
+    private final stirling.software.common.model.ApplicationProperties applicationProperties;

-    @Value("${stirling.pdf.json.type3.library.index:classpath:/type3/library/index.json}")
    private String indexLocation;

    private final Map<String, Type3FontLibraryEntry> signatureIndex = new ConcurrentHashMap<>();
@ -44,6 +44,8 @@ public class Type3FontLibrary {

    @jakarta.annotation.PostConstruct
    void initialise() {
+        this.indexLocation =
+                applicationProperties.getPdfEditor().getType3().getLibrary().getIndex();
        Resource resource = resourceLoader.getResource(indexLocation);
        if (!resource.exists()) {
            log.info("[TYPE3] Library index {} not found; Type3 library disabled", indexLocation);
--- a/app/core/src/main/resources/settings.yml.template
+++ b/app/core/src/main/resources/settings.yml.template
@ -178,23 +178,6 @@ system:
  databaseBackup:
    cron: '0 0 0 * * ?' # Cron expression for automatic database backups "0 0 0 * * ?" daily at midnight

-stirling:
-  pdf:
-    fallback-font: classpath:/static/fonts/NotoSans-Regular.ttf # Override to point at a custom fallback font
-    json:
-      font-normalization:
-        enabled: false # IMPORTANT: Disable to preserve ToUnicode CMaps for correct font rendering. Ghostscript strips Unicode mappings from CID fonts.
-      cff-converter:
-        enabled: true # Wrap CFF/Type1C fonts as OpenType-CFF for browser compatibility
-        method: python # Converter method: 'python' (fontTools, recommended - wraps as OTF), 'fontforge' (legacy - converts to TTF, may hang on CID fonts)
-        python-command: /opt/venv/bin/python3 # Python interpreter path
-        python-script: /scripts/convert_cff_to_ttf.py # Path to font wrapping script
-        fontforge-command: fontforge # Override if FontForge is installed under a different name/path
-      type3:
-        library:
-          enabled: true # Match common Type3 fonts against the built-in library of converted programs
-          index: classpath:/type3/library/index.json # Override to point at a custom index.json (supports http:, file:, classpath:)
-
 ui:
  appNameNavbar: '' # name displayed on the navigation bar
  logoStyle: classic # Options: 'classic' (default - classic S icon) or 'modern' (minimalist logo)
@ -236,3 +219,21 @@ processExecutor:
    qpdfTimeoutMinutes: 30
    ghostscriptTimeoutMinutes: 30
    ocrMyPdfTimeoutMinutes: 30
+
+pdfEditor:
+  fallback-font: classpath:/static/fonts/NotoSans-Regular.ttf # Override to point at a custom fallback font
+  cache:
+    max-bytes: -1 # Max in-memory cache size in bytes; -1 disables byte cap
+    max-percent: 20 # Max in-memory cache as % of JVM max; used when max-bytes <= 0
+  font-normalization:
+    enabled: false # IMPORTANT: Disable to preserve ToUnicode CMaps for correct font rendering. Ghostscript strips Unicode mappings from CID fonts.
+  cff-converter:
+    enabled: true # Wrap CFF/Type1CFF fonts as OpenType-CFF for browser compatibility
+    method: python # Converter method: 'python' (fontTools, recommended - wraps as OTF), 'fontforge' (legacy - converts to TTF, may hang on CID fonts)
+    python-command: /opt/venv/bin/python3 # Python interpreter path
+    python-script: /scripts/convert_cff_to_ttf.py # Path to font wrapping script
+    fontforge-command: fontforge # Override if FontForge is installed under a different name/path
+  type3:
+    library:
+      enabled: true # Match common Type3 fonts against the built-in library of converted programs
+      index: classpath:/type3/library/index.json # Override to point at a custom index.json (supports http:, file:, classpath:)
--- a/frontend/src/core/tools/pdfTextEditor/PdfTextEditor.tsx
+++ b/frontend/src/core/tools/pdfTextEditor/PdfTextEditor.tsx
@ -238,6 +238,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
  const originalImagesRef = useRef<PdfJsonImageElement[][]>([]);
  const originalGroupsRef = useRef<TextGroup[][]>([]);
  const imagesByPageRef = useRef<PdfJsonImageElement[][]>([]);
+  const lastLoadedFileRef = useRef<File | null>(null);
  const autoLoadKeyRef = useRef<string | null>(null);
  const sourceFileIdRef = useRef<string | null>(null);
  const loadRequestIdRef = useRef(0);
@ -251,6 +252,8 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
  const pagePreviewsRef = useRef<Map<number, string>>(pagePreviews);
  const previewScaleRef = useRef<Map<number, number>>(new Map());
  const cachedJobIdRef = useRef<string | null>(null);
+  const cacheRecoveryInProgressRef = useRef(false);
+  const recoverCacheAndReloadRef = useRef<() => Promise<boolean>>(async () => false);

  // Keep ref in sync with state for access in async callbacks
  useEffect(() => {
@ -279,6 +282,13 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
    };
  }, []);

+  const isCacheUnavailableError = useCallback((error: any): boolean => {
+    const status = error?.response?.status;
+    const data = error?.response?.data;
+    const code = (data && (data.error || data.code)) ?? undefined;
+    return status === 410 && code === 'cache_unavailable';
+  }, []);
+
  const dirtyPages = useMemo(
    () => getDirtyPages(groupsByPage, imagesByPage, originalGroupsRef.current, originalImagesRef.current),
    [groupsByPage, imagesByPage],
@ -316,6 +326,8 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
      loadedImagePagesRef.current = new Set();
      loadingImagePagesRef.current = new Set();
      setSelectedPage(0);
+      setIsLazyMode(false);
+      setCachedJobId(null);
      return;
    }
    const cloned = deepCloneDocument(document);
@ -404,7 +416,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {

  // Load images for a page in lazy mode
  const loadImagesForPage = useCallback(
-    async (pageIndex: number) => {
+    async (pageIndex: number, fromRecovery = false) => {
      if (!isLazyMode) {
        return;
      }
@ -489,6 +501,12 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
        );
      } catch (error) {
        console.error(`[loadImagesForPage] Failed to load images for page ${pageNumber}:`, error);
+        if (!fromRecovery && isCacheUnavailableError(error)) {
+          const recovered = await recoverCacheAndReloadRef.current();
+          if (recovered) {
+            return loadImagesForPage(pageIndex, true);
+          }
+        }
      } finally {
        loadingImagePagesRef.current.delete(pageIndex);
        setLoadingImagePages((prev) => {
@ -498,7 +516,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
        });
      }
    },
-    [isLazyMode, cachedJobId],
+    [isLazyMode, cachedJobId, isCacheUnavailableError],
  );

  const handleLoadFile = useCallback(
@ -507,6 +525,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
        return;
      }

+      lastLoadedFileRef.current = file;
      const requestId = loadRequestIdRef.current + 1;
      loadRequestIdRef.current = requestId;

@ -555,59 +574,35 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
            message: 'Starting conversion...',
          });

-          let jobComplete = false;
-          let attempts = 0;
-          const maxAttempts = 600;
+        let jobComplete = false;
+        let attempts = 0;
+        const maxAttempts = 600;
+        let pollDelay = 500;

-          while (!jobComplete && attempts < maxAttempts) {
-            await new Promise((resolve) => setTimeout(resolve, 1000));
-            attempts += 1;
+        while (!jobComplete && attempts < maxAttempts) {
+          await new Promise((resolve) => setTimeout(resolve, pollDelay));
+          attempts += 1;
+          if (pollDelay < 10000) {
+            pollDelay = Math.min(10000, Math.floor(pollDelay * 1.5));
+          }

            try {
              const statusResponse = await apiClient.get(`/api/v1/general/job/${jobId}`);
              const jobStatus = statusResponse.data;
              console.log(`Job status (attempt ${attempts}):`, jobStatus);

-              if (jobStatus.notes && jobStatus.notes.length > 0) {
-                const lastNote = jobStatus.notes[jobStatus.notes.length - 1];
-                console.log('Latest note:', lastNote);
-                const matchWithCount = lastNote.match(
-                  /\[(\d+)%\]\s+(\w+):\s+(.+?)\s+\((\d+)\/(\d+)\)/,
-                );
-                if (matchWithCount) {
-                  const percent = parseInt(matchWithCount[1], 10);
-                  const stage = matchWithCount[2];
-                  const message = matchWithCount[3];
-                  const current = parseInt(matchWithCount[4], 10);
-                  const total = parseInt(matchWithCount[5], 10);
-                  setConversionProgress({
-                    percent,
-                    stage,
-                    message,
-                    current,
-                    total,
-                  });
-                } else {
-                  const match = lastNote.match(/\[(\d+)%\]\s+(\w+):\s+(.+)/);
-                  if (match) {
-                    const percent = parseInt(match[1], 10);
-                    const stage = match[2];
-                    const message = match[3];
-                    setConversionProgress({
-                      percent,
-                      stage,
-                      message,
-                    });
-                  }
-                }
-              } else if (jobStatus.progress !== undefined) {
-                const percent = Math.min(Math.max(jobStatus.progress, 0), 100);
-                setConversionProgress({
-                  percent,
-                  stage: jobStatus.stage || 'processing',
-                  message: jobStatus.note || 'Converting PDF to JSON...',
-                });
-              }
+              const percent = Math.min(Math.max(jobStatus.progress ?? 0, 0), 100);
+              const stage = jobStatus.stage || 'processing';
+              const message = jobStatus.note || 'Converting PDF to JSON...';
+              const current = jobStatus.current ?? undefined;
+              const total = jobStatus.total ?? undefined;
+              setConversionProgress({
+                percent,
+                stage,
+                message,
+                current,
+                total,
+              });

              if (jobStatus.complete) {
                if (jobStatus.error) {
@ -719,6 +714,8 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
        setLoadedDocument(null);
        resetToDocument(null, groupingMode);
        clearPdfPreview();
+        setIsLazyMode(false);
+        setCachedJobId(null);

        if (isPdf) {
          const errorMsg =
@ -743,6 +740,55 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
    [groupingMode, resetToDocument, t],
  );

+  const recoverCacheAndReload = useCallback(async () => {
+    if (cacheRecoveryInProgressRef.current) {
+      return false;
+    }
+    if ((recoverCacheAndReloadRef as any).attempts === undefined) {
+      (recoverCacheAndReloadRef as any).attempts = 0;
+    }
+    if ((recoverCacheAndReloadRef as any).attempts >= 2) {
+      setErrorMessage(
+        t(
+          'pdfTextEditor.errors.cacheRecoveryLimit',
+          'Cache was unavailable after multiple attempts. Please reload the file manually.',
+        ),
+      );
+      return false;
+    }
+    (recoverCacheAndReloadRef as any).attempts += 1;
+    const file = lastLoadedFileRef.current;
+    if (!file) {
+      setErrorMessage(
+        t(
+          'pdfTextEditor.errors.cacheMissingFile',
+          'Session expired. Please reload the PDF file to continue.',
+        ),
+      );
+      return false;
+    }
+    cacheRecoveryInProgressRef.current = true;
+    try {
+      await handleLoadFile(file);
+      return true;
+    } catch (error) {
+      console.error('[PdfTextEditor] Cache recovery failed', error);
+      setErrorMessage(
+        t(
+          'pdfTextEditor.errors.cacheReloadFailed',
+          'Cache expired and reload failed. Please reselect the file.',
+        ),
+      );
+      return false;
+    } finally {
+      cacheRecoveryInProgressRef.current = false;
+    }
+  }, [handleLoadFile, t]);
+
+  useEffect(() => {
+    recoverCacheAndReloadRef.current = recoverCacheAndReload;
+  }, [recoverCacheAndReload]);
+
  // Wrapper for loading files from the dropzone - adds to workbench first
  const handleLoadFileFromDropzone = useCallback(
    async (file: File) => {
@ -1054,10 +1100,11 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
      if (canUseIncremental) {
        await ensureImagesForPages(dirtyPageIndices);

-        try {
+        let incrementalRetried = false;
+        const attemptIncrementalExport = async () => {
          const payload = buildPayload();
          if (!payload) {
-            return;
+            return false;
          }

          const { document, filename } = payload;
@ -1076,7 +1123,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
          const baseName = sanitizeBaseName(filename).replace(/-edited$/u, '');
          const expectedName = `${baseName || 'document'}.pdf`;
          const response = await apiClient.post(
-            `/api/v1/convert/pdf/text-editor/partial/${cachedJobId}?filename=${encodeURIComponent(expectedName)}`,
+            `/api/v1/convert/pdf/text-editor/partial/${cachedJobIdRef.current}?filename=${encodeURIComponent(expectedName)}`,
            partialDocument,
            {
              responseType: 'blob',
@ -1094,8 +1141,26 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
            onComplete([pdfFile]);
          }
          setErrorMessage(null);
-          return;
+          return true;
+        };
+
+        try {
+          const success = await attemptIncrementalExport();
+          if (success) {
+            return;
+          }
        } catch (incrementalError) {
+          if (!incrementalRetried && isCacheUnavailableError(incrementalError)) {
+            const recovered = await recoverCacheAndReloadRef.current();
+            incrementalRetried = true;
+            if (recovered) {
+              await ensureImagesForPages(dirtyPageIndices);
+              const success = await attemptIncrementalExport();
+              if (success) {
+                return;
+              }
+            }
+          }
          console.warn(
            '[handleGeneratePdf] Incremental export failed, falling back to full export',
            incrementalError,