mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2026-01-14 20:11:17 +01:00
Cache fix issues
This commit is contained in:
parent
d6a83fe6a1
commit
a73636a597
@ -68,6 +68,7 @@ public class ApplicationProperties {
|
||||
|
||||
private AutoPipeline autoPipeline = new AutoPipeline();
|
||||
private ProcessExecutor processExecutor = new ProcessExecutor();
|
||||
private PdfEditor pdfEditor = new PdfEditor();
|
||||
|
||||
@Bean
|
||||
public PropertySource<?> dynamicYamlPropertySource(ConfigurableEnvironment environment)
|
||||
@ -100,6 +101,46 @@ public class ApplicationProperties {
|
||||
private String outputFolder;
|
||||
}
|
||||
|
||||
@Data
|
||||
public static class PdfEditor {
|
||||
private Cache cache = new Cache();
|
||||
private FontNormalization fontNormalization = new FontNormalization();
|
||||
private CffConverter cffConverter = new CffConverter();
|
||||
private Type3 type3 = new Type3();
|
||||
private String fallbackFont = "classpath:/static/fonts/NotoSans-Regular.ttf";
|
||||
|
||||
@Data
|
||||
public static class Cache {
|
||||
private long maxBytes = -1;
|
||||
private int maxPercent = 20;
|
||||
}
|
||||
|
||||
@Data
|
||||
public static class FontNormalization {
|
||||
private boolean enabled = false;
|
||||
}
|
||||
|
||||
@Data
|
||||
public static class CffConverter {
|
||||
private boolean enabled = true;
|
||||
private String method = "python";
|
||||
private String pythonCommand = "/opt/venv/bin/python3";
|
||||
private String pythonScript = "/scripts/convert_cff_to_ttf.py";
|
||||
private String fontforgeCommand = "fontforge";
|
||||
}
|
||||
|
||||
@Data
|
||||
public static class Type3 {
|
||||
private Library library = new Library();
|
||||
|
||||
@Data
|
||||
public static class Library {
|
||||
private boolean enabled = true;
|
||||
private String index = "classpath:/type3/library/index.json";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Data
|
||||
public static class Legal {
|
||||
private String termsAndConditions;
|
||||
|
||||
@ -0,0 +1,44 @@
|
||||
package stirling.software.SPDF.controller.api.converters;
|
||||
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.ControllerAdvice;
|
||||
import org.springframework.web.bind.annotation.ExceptionHandler;
|
||||
import org.springframework.web.bind.annotation.ResponseBody;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import stirling.software.SPDF.exception.CacheUnavailableException;
|
||||
|
||||
@ControllerAdvice(assignableTypes = ConvertPdfJsonController.class)
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
public class ConvertPdfJsonExceptionHandler {
|
||||
|
||||
private final ObjectMapper objectMapper;
|
||||
|
||||
@ExceptionHandler(CacheUnavailableException.class)
|
||||
@ResponseBody
|
||||
public ResponseEntity<byte[]> handleCacheUnavailable(CacheUnavailableException ex) {
|
||||
try {
|
||||
byte[] body =
|
||||
objectMapper.writeValueAsBytes(
|
||||
java.util.Map.of(
|
||||
"error", "cache_unavailable",
|
||||
"action", "reupload",
|
||||
"message", ex.getMessage()));
|
||||
return ResponseEntity.status(HttpStatus.GONE).body(body);
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to serialize cache_unavailable response: {}", e.getMessage());
|
||||
return ResponseEntity.status(HttpStatus.GONE)
|
||||
.body(
|
||||
("{\"error\":\"cache_unavailable\",\"action\":\"reupload\",\"message\":\""
|
||||
+ ex.getMessage()
|
||||
+ "\"}")
|
||||
.getBytes());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,8 @@
|
||||
package stirling.software.SPDF.exception;
|
||||
|
||||
public class CacheUnavailableException extends RuntimeException {
|
||||
|
||||
public CacheUnavailableException(String message) {
|
||||
super(message);
|
||||
}
|
||||
}
|
||||
@ -144,14 +144,21 @@ public class PdfJsonConversionService {
|
||||
private final PdfJsonFontService fontService;
|
||||
private final Type3FontConversionService type3FontConversionService;
|
||||
private final Type3GlyphExtractor type3GlyphExtractor;
|
||||
private final stirling.software.common.model.ApplicationProperties applicationProperties;
|
||||
private final Map<String, PDFont> type3NormalizedFontCache = new ConcurrentHashMap<>();
|
||||
private final Map<String, Set<Integer>> type3GlyphCoverageCache = new ConcurrentHashMap<>();
|
||||
|
||||
@Value("${stirling.pdf.json.font-normalization.enabled:true}")
|
||||
private boolean fontNormalizationEnabled;
|
||||
private long cacheMaxBytes;
|
||||
private int cacheMaxPercent;
|
||||
|
||||
/** Cache for storing PDDocuments for lazy page loading. Key is jobId. */
|
||||
private final Map<String, CachedPdfDocument> documentCache = new ConcurrentHashMap<>();
|
||||
private final java.util.LinkedHashMap<String, CachedPdfDocument> lruCache =
|
||||
new java.util.LinkedHashMap<>(16, 0.75f, true);
|
||||
private final Object cacheLock = new Object();
|
||||
private volatile long currentCacheBytes = 0L;
|
||||
private volatile long cacheBudgetBytes = -1L;
|
||||
|
||||
private volatile boolean ghostscriptAvailable;
|
||||
|
||||
@ -161,7 +168,23 @@ public class PdfJsonConversionService {
|
||||
|
||||
@PostConstruct
|
||||
private void initializeToolAvailability() {
|
||||
loadConfigurationFromProperties();
|
||||
initializeGhostscriptAvailability();
|
||||
initializeCacheBudget();
|
||||
}
|
||||
|
||||
private void loadConfigurationFromProperties() {
|
||||
stirling.software.common.model.ApplicationProperties.PdfEditor cfg =
|
||||
applicationProperties.getPdfEditor();
|
||||
if (cfg != null) {
|
||||
fontNormalizationEnabled = cfg.getFontNormalization().isEnabled();
|
||||
cacheMaxBytes = cfg.getCache().getMaxBytes();
|
||||
cacheMaxPercent = cfg.getCache().getMaxPercent();
|
||||
} else {
|
||||
fontNormalizationEnabled = false;
|
||||
cacheMaxBytes = -1;
|
||||
cacheMaxPercent = 20;
|
||||
}
|
||||
}
|
||||
|
||||
private void initializeGhostscriptAvailability() {
|
||||
@ -202,6 +225,25 @@ public class PdfJsonConversionService {
|
||||
}
|
||||
}
|
||||
|
||||
private void initializeCacheBudget() {
|
||||
long effective = -1L;
|
||||
if (cacheMaxBytes > 0) {
|
||||
effective = cacheMaxBytes;
|
||||
} else if (cacheMaxPercent > 0) {
|
||||
long maxMem = Runtime.getRuntime().maxMemory();
|
||||
effective = Math.max(0L, (maxMem * cacheMaxPercent) / 100);
|
||||
}
|
||||
cacheBudgetBytes = effective;
|
||||
if (cacheBudgetBytes > 0) {
|
||||
log.info(
|
||||
"PDF JSON cache budget configured: {} bytes (source: {})",
|
||||
cacheBudgetBytes,
|
||||
cacheMaxBytes > 0 ? "max-bytes" : "max-percent");
|
||||
} else {
|
||||
log.info("PDF JSON cache budget: unlimited");
|
||||
}
|
||||
}
|
||||
|
||||
public byte[] convertPdfToJson(MultipartFile file) throws IOException {
|
||||
return convertPdfToJson(file, null, false);
|
||||
}
|
||||
@ -318,9 +360,9 @@ public class PdfJsonConversionService {
|
||||
|
||||
try (PDDocument document = pdfDocumentFactory.load(workingPath, true)) {
|
||||
int totalPages = document.getNumberOfPages();
|
||||
// Only use lazy images for real async jobs where client can access the cache
|
||||
// Synchronous calls with synthetic jobId should do full extraction
|
||||
boolean useLazyImages = totalPages > 5 && isRealJobId;
|
||||
// Always enable lazy mode for real async jobs so cache is available regardless of
|
||||
// page count. Synchronous calls with synthetic jobId still do full extraction.
|
||||
boolean useLazyImages = isRealJobId;
|
||||
Map<COSBase, FontModelCacheEntry> fontCache = new IdentityHashMap<>();
|
||||
Map<COSBase, EncodedImage> imageCache = new IdentityHashMap<>();
|
||||
log.debug(
|
||||
@ -435,15 +477,16 @@ public class PdfJsonConversionService {
|
||||
cachedPdfBytes = Files.readAllBytes(workingPath);
|
||||
}
|
||||
CachedPdfDocument cached =
|
||||
new CachedPdfDocument(
|
||||
cachedPdfBytes, docMetadata, fonts, pageFontResources);
|
||||
documentCache.put(jobId, cached);
|
||||
buildCachedDocument(
|
||||
jobId, cachedPdfBytes, docMetadata, fonts, pageFontResources);
|
||||
putCachedDocument(jobId, cached);
|
||||
log.debug(
|
||||
"Cached PDF bytes ({} bytes, {} pages, {} fonts) for lazy images, jobId: {}",
|
||||
cachedPdfBytes.length,
|
||||
"Cached PDF bytes ({} bytes, {} pages, {} fonts) for lazy images, jobId: {} (diskBacked={})",
|
||||
cached.getPdfSize(),
|
||||
totalPages,
|
||||
fonts.size(),
|
||||
jobId);
|
||||
jobId,
|
||||
cached.isDiskBacked());
|
||||
scheduleDocumentCleanup(jobId);
|
||||
}
|
||||
|
||||
@ -2973,6 +3016,130 @@ public class PdfJsonConversionService {
|
||||
}
|
||||
}
|
||||
|
||||
// Cache helpers
|
||||
private CachedPdfDocument buildCachedDocument(
|
||||
String jobId,
|
||||
byte[] pdfBytes,
|
||||
PdfJsonDocumentMetadata metadata,
|
||||
Map<String, PdfJsonFont> fonts,
|
||||
Map<Integer, Map<PDFont, String>> pageFontResources)
|
||||
throws IOException {
|
||||
if (pdfBytes == null) {
|
||||
throw new IllegalArgumentException("pdfBytes must not be null");
|
||||
}
|
||||
long budget = cacheBudgetBytes;
|
||||
// If single document is larger than budget, spill straight to disk
|
||||
if (budget > 0 && pdfBytes.length > budget) {
|
||||
TempFile tempFile = new TempFile(tempFileManager, ".pdfjsoncache");
|
||||
Files.write(tempFile.getPath(), pdfBytes);
|
||||
log.debug(
|
||||
"Cached PDF spilled to disk ({} bytes exceeds budget {}) for jobId {}",
|
||||
pdfBytes.length,
|
||||
budget,
|
||||
jobId);
|
||||
return new CachedPdfDocument(
|
||||
null, tempFile, pdfBytes.length, metadata, fonts, pageFontResources);
|
||||
}
|
||||
return new CachedPdfDocument(
|
||||
pdfBytes, null, pdfBytes.length, metadata, fonts, pageFontResources);
|
||||
}
|
||||
|
||||
private void putCachedDocument(String jobId, CachedPdfDocument cached) {
|
||||
synchronized (cacheLock) {
|
||||
CachedPdfDocument existing = documentCache.put(jobId, cached);
|
||||
if (existing != null) {
|
||||
lruCache.remove(jobId);
|
||||
currentCacheBytes = Math.max(0L, currentCacheBytes - existing.getInMemorySize());
|
||||
closeQuietly(existing.pdfTempFile);
|
||||
}
|
||||
lruCache.put(jobId, cached);
|
||||
currentCacheBytes += cached.getInMemorySize();
|
||||
enforceCacheBudget();
|
||||
}
|
||||
}
|
||||
|
||||
private CachedPdfDocument getCachedDocument(String jobId) {
|
||||
synchronized (cacheLock) {
|
||||
CachedPdfDocument cached = documentCache.get(jobId);
|
||||
if (cached != null) {
|
||||
lruCache.remove(jobId);
|
||||
lruCache.put(jobId, cached);
|
||||
}
|
||||
return cached;
|
||||
}
|
||||
}
|
||||
|
||||
private void enforceCacheBudget() {
|
||||
if (cacheBudgetBytes <= 0) {
|
||||
return;
|
||||
}
|
||||
synchronized (cacheLock) {
|
||||
java.util.Iterator<java.util.Map.Entry<String, CachedPdfDocument>> it =
|
||||
lruCache.entrySet().iterator();
|
||||
while (currentCacheBytes > cacheBudgetBytes && it.hasNext()) {
|
||||
java.util.Map.Entry<String, CachedPdfDocument> entry = it.next();
|
||||
it.remove();
|
||||
CachedPdfDocument removed = entry.getValue();
|
||||
documentCache.remove(entry.getKey(), removed);
|
||||
currentCacheBytes =
|
||||
Math.max(0L, currentCacheBytes - removed.getInMemorySize());
|
||||
removed.close();
|
||||
log.debug(
|
||||
"Evicted cached PDF for jobId {} to enforce cache budget", entry.getKey());
|
||||
}
|
||||
if (currentCacheBytes > cacheBudgetBytes && !lruCache.isEmpty()) {
|
||||
// Spill the most recently used large entry to disk
|
||||
String key =
|
||||
lruCache.entrySet().stream()
|
||||
.reduce((first, second) -> second)
|
||||
.map(java.util.Map.Entry::getKey)
|
||||
.orElse(null);
|
||||
if (key != null) {
|
||||
CachedPdfDocument doc = lruCache.get(key);
|
||||
if (doc != null && doc.getInMemorySize() > 0) {
|
||||
try {
|
||||
CachedPdfDocument diskDoc =
|
||||
buildCachedDocument(
|
||||
key,
|
||||
doc.getPdfBytes(),
|
||||
doc.getMetadata(),
|
||||
doc.getFonts(),
|
||||
doc.getPageFontResources());
|
||||
lruCache.put(key, diskDoc);
|
||||
documentCache.put(key, diskDoc);
|
||||
currentCacheBytes =
|
||||
Math.max(0L, currentCacheBytes - doc.getInMemorySize())
|
||||
+ diskDoc.getInMemorySize();
|
||||
doc.close();
|
||||
log.debug(
|
||||
"Spilled cached PDF for jobId {} to disk to satisfy budget",
|
||||
key);
|
||||
} catch (IOException ex) {
|
||||
log.warn(
|
||||
"Failed to spill cached PDF for jobId {} to disk: {}",
|
||||
key,
|
||||
ex.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void removeCachedDocument(String jobId) {
|
||||
CachedPdfDocument removed = null;
|
||||
synchronized (cacheLock) {
|
||||
removed = documentCache.remove(jobId);
|
||||
if (removed != null) {
|
||||
lruCache.remove(jobId);
|
||||
currentCacheBytes = Math.max(0L, currentCacheBytes - removed.getInMemorySize());
|
||||
}
|
||||
}
|
||||
if (removed != null) {
|
||||
removed.close();
|
||||
}
|
||||
}
|
||||
|
||||
private void applyTextState(PDPageContentStream contentStream, PdfJsonTextElement element)
|
||||
throws IOException {
|
||||
if (element.getCharacterSpacing() != null) {
|
||||
@ -5311,6 +5478,8 @@ public class PdfJsonConversionService {
|
||||
*/
|
||||
private static class CachedPdfDocument {
|
||||
private final byte[] pdfBytes;
|
||||
private final TempFile pdfTempFile;
|
||||
private final long pdfSize;
|
||||
private final PdfJsonDocumentMetadata metadata;
|
||||
private final Map<String, PdfJsonFont> fonts; // Font map with UIDs for consistency
|
||||
private final Map<Integer, Map<PDFont, String>> pageFontResources; // Page font resources
|
||||
@ -5318,10 +5487,14 @@ public class PdfJsonConversionService {
|
||||
|
||||
public CachedPdfDocument(
|
||||
byte[] pdfBytes,
|
||||
TempFile pdfTempFile,
|
||||
long pdfSize,
|
||||
PdfJsonDocumentMetadata metadata,
|
||||
Map<String, PdfJsonFont> fonts,
|
||||
Map<Integer, Map<PDFont, String>> pageFontResources) {
|
||||
this.pdfBytes = pdfBytes;
|
||||
this.pdfTempFile = pdfTempFile;
|
||||
this.pdfSize = pdfSize;
|
||||
this.metadata = metadata;
|
||||
// Create defensive copies to prevent mutation of shared maps
|
||||
this.fonts =
|
||||
@ -5336,8 +5509,14 @@ public class PdfJsonConversionService {
|
||||
}
|
||||
|
||||
// Getters return defensive copies to prevent external mutation
|
||||
public byte[] getPdfBytes() {
|
||||
return pdfBytes;
|
||||
public byte[] getPdfBytes() throws IOException {
|
||||
if (pdfBytes != null) {
|
||||
return pdfBytes;
|
||||
}
|
||||
if (pdfTempFile != null) {
|
||||
return Files.readAllBytes(pdfTempFile.getPath());
|
||||
}
|
||||
throw new IOException("Cached PDF backing missing");
|
||||
}
|
||||
|
||||
public PdfJsonDocumentMetadata getMetadata() {
|
||||
@ -5352,6 +5531,18 @@ public class PdfJsonConversionService {
|
||||
return new java.util.concurrent.ConcurrentHashMap<>(pageFontResources);
|
||||
}
|
||||
|
||||
public long getPdfSize() {
|
||||
return pdfSize;
|
||||
}
|
||||
|
||||
public long getInMemorySize() {
|
||||
return pdfBytes != null ? pdfBytes.length : 0L;
|
||||
}
|
||||
|
||||
public boolean isDiskBacked() {
|
||||
return pdfBytes == null && pdfTempFile != null;
|
||||
}
|
||||
|
||||
public long getTimestamp() {
|
||||
return timestamp;
|
||||
}
|
||||
@ -5363,7 +5554,14 @@ public class PdfJsonConversionService {
|
||||
public CachedPdfDocument withUpdatedFonts(
|
||||
byte[] nextBytes, Map<String, PdfJsonFont> nextFonts) {
|
||||
Map<String, PdfJsonFont> fontsToUse = nextFonts != null ? nextFonts : this.fonts;
|
||||
return new CachedPdfDocument(nextBytes, metadata, fontsToUse, pageFontResources);
|
||||
return new CachedPdfDocument(
|
||||
nextBytes, null, nextBytes != null ? nextBytes.length : 0, metadata, fontsToUse, pageFontResources);
|
||||
}
|
||||
|
||||
public void close() {
|
||||
if (pdfTempFile != null) {
|
||||
pdfTempFile.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -5444,14 +5642,15 @@ public class PdfJsonConversionService {
|
||||
// Cache PDF bytes, metadata, and fonts for lazy page loading
|
||||
if (jobId != null) {
|
||||
CachedPdfDocument cached =
|
||||
new CachedPdfDocument(pdfBytes, docMetadata, fonts, pageFontResources);
|
||||
documentCache.put(jobId, cached);
|
||||
buildCachedDocument(jobId, pdfBytes, docMetadata, fonts, pageFontResources);
|
||||
putCachedDocument(jobId, cached);
|
||||
log.debug(
|
||||
"Cached PDF bytes ({} bytes, {} pages, {} fonts) for lazy loading, jobId: {}",
|
||||
pdfBytes.length,
|
||||
"Cached PDF bytes ({} bytes, {} pages, {} fonts) for lazy loading, jobId: {} (diskBacked={})",
|
||||
cached.getPdfSize(),
|
||||
totalPages,
|
||||
fonts.size(),
|
||||
jobId);
|
||||
jobId,
|
||||
cached.isDiskBacked());
|
||||
|
||||
// Schedule cleanup after 30 minutes
|
||||
scheduleDocumentCleanup(jobId);
|
||||
@ -5466,9 +5665,10 @@ public class PdfJsonConversionService {
|
||||
|
||||
/** Extracts a single page from cached PDF bytes. Re-loads the PDF for each request. */
|
||||
public byte[] extractSinglePage(String jobId, int pageNumber) throws IOException {
|
||||
CachedPdfDocument cached = documentCache.get(jobId);
|
||||
CachedPdfDocument cached = getCachedDocument(jobId);
|
||||
if (cached == null) {
|
||||
throw new IllegalArgumentException("No cached document found for jobId: " + jobId);
|
||||
throw new stirling.software.SPDF.exception.CacheUnavailableException(
|
||||
"No cached document found for jobId: " + jobId);
|
||||
}
|
||||
|
||||
int pageIndex = pageNumber - 1;
|
||||
@ -5480,8 +5680,8 @@ public class PdfJsonConversionService {
|
||||
}
|
||||
|
||||
log.debug(
|
||||
"Loading PDF from bytes ({} bytes) to extract page {} (jobId: {})",
|
||||
cached.getPdfBytes().length,
|
||||
"Loading PDF from {} to extract page {} (jobId: {})",
|
||||
cached.isDiskBacked() ? "disk cache" : "memory cache",
|
||||
pageNumber,
|
||||
jobId);
|
||||
|
||||
@ -5627,9 +5827,10 @@ public class PdfJsonConversionService {
|
||||
if (jobId == null || jobId.isBlank()) {
|
||||
throw new IllegalArgumentException("jobId is required for incremental export");
|
||||
}
|
||||
CachedPdfDocument cached = documentCache.get(jobId);
|
||||
CachedPdfDocument cached = getCachedDocument(jobId);
|
||||
if (cached == null) {
|
||||
throw new IllegalArgumentException("No cached document available for jobId: " + jobId);
|
||||
throw new stirling.software.SPDF.exception.CacheUnavailableException(
|
||||
"No cached document available for jobId: " + jobId);
|
||||
}
|
||||
if (updates == null || updates.getPages() == null || updates.getPages().isEmpty()) {
|
||||
log.debug(
|
||||
@ -5709,7 +5910,14 @@ public class PdfJsonConversionService {
|
||||
document.save(baos);
|
||||
byte[] updatedBytes = baos.toByteArray();
|
||||
|
||||
documentCache.put(jobId, cached.withUpdatedFonts(updatedBytes, mergedFonts));
|
||||
CachedPdfDocument updated =
|
||||
buildCachedDocument(
|
||||
jobId,
|
||||
updatedBytes,
|
||||
cached.getMetadata(),
|
||||
mergedFonts,
|
||||
cached.getPageFontResources());
|
||||
putCachedDocument(jobId, updated);
|
||||
|
||||
// Clear Type3 cache entries for this incremental update
|
||||
clearType3CacheEntriesForJob(updateJobId);
|
||||
@ -5724,11 +5932,13 @@ public class PdfJsonConversionService {
|
||||
|
||||
/** Clears a cached document. */
|
||||
public void clearCachedDocument(String jobId) {
|
||||
CachedPdfDocument cached = documentCache.remove(jobId);
|
||||
CachedPdfDocument cached = getCachedDocument(jobId);
|
||||
removeCachedDocument(jobId);
|
||||
if (cached != null) {
|
||||
log.debug(
|
||||
"Removed cached PDF bytes ({} bytes) for jobId: {}",
|
||||
cached.getPdfBytes().length,
|
||||
"Removed cached PDF ({} bytes, diskBacked={}) for jobId: {}",
|
||||
cached.getPdfSize(),
|
||||
cached.isDiskBacked(),
|
||||
jobId);
|
||||
}
|
||||
|
||||
|
||||
@ -312,12 +312,26 @@ public class PdfJsonFallbackFontService {
|
||||
"ttf")));
|
||||
|
||||
private final ResourceLoader resourceLoader;
|
||||
private final stirling.software.common.model.ApplicationProperties applicationProperties;
|
||||
|
||||
@Value("${stirling.pdf.fallback-font:" + DEFAULT_FALLBACK_FONT_LOCATION + "}")
|
||||
private String legacyFallbackFontLocation;
|
||||
|
||||
private String fallbackFontLocation;
|
||||
|
||||
private final Map<String, byte[]> fallbackFontCache = new ConcurrentHashMap<>();
|
||||
|
||||
@jakarta.annotation.PostConstruct
|
||||
private void loadConfig() {
|
||||
String configured = applicationProperties.getPdfEditor().getFallbackFont();
|
||||
if (configured != null && !configured.isBlank()) {
|
||||
fallbackFontLocation = configured;
|
||||
} else {
|
||||
fallbackFontLocation = legacyFallbackFontLocation;
|
||||
}
|
||||
log.info("Using fallback font location: {}", fallbackFontLocation);
|
||||
}
|
||||
|
||||
public PdfJsonFont buildFallbackFontModel() throws IOException {
|
||||
return buildFallbackFontModel(FALLBACK_FONT_ID);
|
||||
}
|
||||
|
||||
@ -25,22 +25,18 @@ import stirling.software.common.util.TempFileManager;
|
||||
public class PdfJsonFontService {
|
||||
|
||||
private final TempFileManager tempFileManager;
|
||||
private final stirling.software.common.model.ApplicationProperties applicationProperties;
|
||||
|
||||
@Getter
|
||||
@Value("${stirling.pdf.json.cff-converter.enabled:true}")
|
||||
private boolean cffConversionEnabled;
|
||||
|
||||
@Getter
|
||||
@Value("${stirling.pdf.json.cff-converter.method:python}")
|
||||
private String cffConverterMethod;
|
||||
|
||||
@Value("${stirling.pdf.json.cff-converter.python-command:/opt/venv/bin/python3}")
|
||||
private String pythonCommand;
|
||||
|
||||
@Value("${stirling.pdf.json.cff-converter.python-script:/scripts/convert_cff_to_ttf.py}")
|
||||
private String pythonScript;
|
||||
|
||||
@Value("${stirling.pdf.json.cff-converter.fontforge-command:fontforge}")
|
||||
private String fontforgeCommand;
|
||||
|
||||
private volatile boolean pythonCffConverterAvailable;
|
||||
@ -48,6 +44,7 @@ public class PdfJsonFontService {
|
||||
|
||||
@PostConstruct
|
||||
private void initialiseCffConverterAvailability() {
|
||||
loadConfiguration();
|
||||
if (!cffConversionEnabled) {
|
||||
log.warn("[FONT-DEBUG] CFF conversion is DISABLED in configuration");
|
||||
pythonCffConverterAvailable = false;
|
||||
@ -77,6 +74,15 @@ public class PdfJsonFontService {
|
||||
log.info("[FONT-DEBUG] Selected CFF converter method: {}", cffConverterMethod);
|
||||
}
|
||||
|
||||
private void loadConfiguration() {
|
||||
var cfg = applicationProperties.getPdfEditor().getCffConverter();
|
||||
this.cffConversionEnabled = cfg.isEnabled();
|
||||
this.cffConverterMethod = cfg.getMethod();
|
||||
this.pythonCommand = cfg.getPythonCommand();
|
||||
this.pythonScript = cfg.getPythonScript();
|
||||
this.fontforgeCommand = cfg.getFontforgeCommand();
|
||||
}
|
||||
|
||||
public byte[] convertCffProgramToTrueType(byte[] fontBytes, String toUnicode) {
|
||||
if (!cffConversionEnabled || fontBytes == null || fontBytes.length == 0) {
|
||||
log.warn(
|
||||
|
||||
@ -23,8 +23,8 @@ import stirling.software.SPDF.service.pdfjson.type3.library.Type3FontLibraryPayl
|
||||
public class Type3LibraryStrategy implements Type3ConversionStrategy {
|
||||
|
||||
private final Type3FontLibrary fontLibrary;
|
||||
private final stirling.software.common.model.ApplicationProperties applicationProperties;
|
||||
|
||||
@Value("${stirling.pdf.json.type3.library.enabled:true}")
|
||||
private boolean enabled;
|
||||
|
||||
@Override
|
||||
@ -42,6 +42,12 @@ public class Type3LibraryStrategy implements Type3ConversionStrategy {
|
||||
return enabled && fontLibrary != null && fontLibrary.isLoaded();
|
||||
}
|
||||
|
||||
@jakarta.annotation.PostConstruct
|
||||
private void loadConfiguration() {
|
||||
var cfg = applicationProperties.getPdfEditor().getType3().getLibrary();
|
||||
this.enabled = cfg.isEnabled();
|
||||
}
|
||||
|
||||
@Override
|
||||
public PdfJsonFontConversionCandidate convert(
|
||||
Type3ConversionRequest request, Type3GlyphContext context) throws IOException {
|
||||
|
||||
@ -34,8 +34,8 @@ public class Type3FontLibrary {
|
||||
|
||||
private final ObjectMapper objectMapper;
|
||||
private final ResourceLoader resourceLoader;
|
||||
private final stirling.software.common.model.ApplicationProperties applicationProperties;
|
||||
|
||||
@Value("${stirling.pdf.json.type3.library.index:classpath:/type3/library/index.json}")
|
||||
private String indexLocation;
|
||||
|
||||
private final Map<String, Type3FontLibraryEntry> signatureIndex = new ConcurrentHashMap<>();
|
||||
@ -44,6 +44,8 @@ public class Type3FontLibrary {
|
||||
|
||||
@jakarta.annotation.PostConstruct
|
||||
void initialise() {
|
||||
this.indexLocation =
|
||||
applicationProperties.getPdfEditor().getType3().getLibrary().getIndex();
|
||||
Resource resource = resourceLoader.getResource(indexLocation);
|
||||
if (!resource.exists()) {
|
||||
log.info("[TYPE3] Library index {} not found; Type3 library disabled", indexLocation);
|
||||
|
||||
@ -178,23 +178,6 @@ system:
|
||||
databaseBackup:
|
||||
cron: '0 0 0 * * ?' # Cron expression for automatic database backups "0 0 0 * * ?" daily at midnight
|
||||
|
||||
stirling:
|
||||
pdf:
|
||||
fallback-font: classpath:/static/fonts/NotoSans-Regular.ttf # Override to point at a custom fallback font
|
||||
json:
|
||||
font-normalization:
|
||||
enabled: false # IMPORTANT: Disable to preserve ToUnicode CMaps for correct font rendering. Ghostscript strips Unicode mappings from CID fonts.
|
||||
cff-converter:
|
||||
enabled: true # Wrap CFF/Type1C fonts as OpenType-CFF for browser compatibility
|
||||
method: python # Converter method: 'python' (fontTools, recommended - wraps as OTF), 'fontforge' (legacy - converts to TTF, may hang on CID fonts)
|
||||
python-command: /opt/venv/bin/python3 # Python interpreter path
|
||||
python-script: /scripts/convert_cff_to_ttf.py # Path to font wrapping script
|
||||
fontforge-command: fontforge # Override if FontForge is installed under a different name/path
|
||||
type3:
|
||||
library:
|
||||
enabled: true # Match common Type3 fonts against the built-in library of converted programs
|
||||
index: classpath:/type3/library/index.json # Override to point at a custom index.json (supports http:, file:, classpath:)
|
||||
|
||||
ui:
|
||||
appNameNavbar: '' # name displayed on the navigation bar
|
||||
logoStyle: classic # Options: 'classic' (default - classic S icon) or 'modern' (minimalist logo)
|
||||
@ -236,3 +219,21 @@ processExecutor:
|
||||
qpdfTimeoutMinutes: 30
|
||||
ghostscriptTimeoutMinutes: 30
|
||||
ocrMyPdfTimeoutMinutes: 30
|
||||
|
||||
pdfEditor:
|
||||
fallback-font: classpath:/static/fonts/NotoSans-Regular.ttf # Override to point at a custom fallback font
|
||||
cache:
|
||||
max-bytes: -1 # Max in-memory cache size in bytes; -1 disables byte cap
|
||||
max-percent: 20 # Max in-memory cache as % of JVM max; used when max-bytes <= 0
|
||||
font-normalization:
|
||||
enabled: false # IMPORTANT: Disable to preserve ToUnicode CMaps for correct font rendering. Ghostscript strips Unicode mappings from CID fonts.
|
||||
cff-converter:
|
||||
enabled: true # Wrap CFF/Type1CFF fonts as OpenType-CFF for browser compatibility
|
||||
method: python # Converter method: 'python' (fontTools, recommended - wraps as OTF), 'fontforge' (legacy - converts to TTF, may hang on CID fonts)
|
||||
python-command: /opt/venv/bin/python3 # Python interpreter path
|
||||
python-script: /scripts/convert_cff_to_ttf.py # Path to font wrapping script
|
||||
fontforge-command: fontforge # Override if FontForge is installed under a different name/path
|
||||
type3:
|
||||
library:
|
||||
enabled: true # Match common Type3 fonts against the built-in library of converted programs
|
||||
index: classpath:/type3/library/index.json # Override to point at a custom index.json (supports http:, file:, classpath:)
|
||||
|
||||
@ -238,6 +238,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
const originalImagesRef = useRef<PdfJsonImageElement[][]>([]);
|
||||
const originalGroupsRef = useRef<TextGroup[][]>([]);
|
||||
const imagesByPageRef = useRef<PdfJsonImageElement[][]>([]);
|
||||
const lastLoadedFileRef = useRef<File | null>(null);
|
||||
const autoLoadKeyRef = useRef<string | null>(null);
|
||||
const sourceFileIdRef = useRef<string | null>(null);
|
||||
const loadRequestIdRef = useRef(0);
|
||||
@ -251,6 +252,8 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
const pagePreviewsRef = useRef<Map<number, string>>(pagePreviews);
|
||||
const previewScaleRef = useRef<Map<number, number>>(new Map());
|
||||
const cachedJobIdRef = useRef<string | null>(null);
|
||||
const cacheRecoveryInProgressRef = useRef(false);
|
||||
const recoverCacheAndReloadRef = useRef<() => Promise<boolean>>(async () => false);
|
||||
|
||||
// Keep ref in sync with state for access in async callbacks
|
||||
useEffect(() => {
|
||||
@ -279,6 +282,13 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
};
|
||||
}, []);
|
||||
|
||||
const isCacheUnavailableError = useCallback((error: any): boolean => {
|
||||
const status = error?.response?.status;
|
||||
const data = error?.response?.data;
|
||||
const code = (data && (data.error || data.code)) ?? undefined;
|
||||
return status === 410 && code === 'cache_unavailable';
|
||||
}, []);
|
||||
|
||||
const dirtyPages = useMemo(
|
||||
() => getDirtyPages(groupsByPage, imagesByPage, originalGroupsRef.current, originalImagesRef.current),
|
||||
[groupsByPage, imagesByPage],
|
||||
@ -316,6 +326,8 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
loadedImagePagesRef.current = new Set();
|
||||
loadingImagePagesRef.current = new Set();
|
||||
setSelectedPage(0);
|
||||
setIsLazyMode(false);
|
||||
setCachedJobId(null);
|
||||
return;
|
||||
}
|
||||
const cloned = deepCloneDocument(document);
|
||||
@ -404,7 +416,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
|
||||
// Load images for a page in lazy mode
|
||||
const loadImagesForPage = useCallback(
|
||||
async (pageIndex: number) => {
|
||||
async (pageIndex: number, fromRecovery = false) => {
|
||||
if (!isLazyMode) {
|
||||
return;
|
||||
}
|
||||
@ -489,6 +501,12 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
);
|
||||
} catch (error) {
|
||||
console.error(`[loadImagesForPage] Failed to load images for page ${pageNumber}:`, error);
|
||||
if (!fromRecovery && isCacheUnavailableError(error)) {
|
||||
const recovered = await recoverCacheAndReloadRef.current();
|
||||
if (recovered) {
|
||||
return loadImagesForPage(pageIndex, true);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
loadingImagePagesRef.current.delete(pageIndex);
|
||||
setLoadingImagePages((prev) => {
|
||||
@ -498,7 +516,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
});
|
||||
}
|
||||
},
|
||||
[isLazyMode, cachedJobId],
|
||||
[isLazyMode, cachedJobId, isCacheUnavailableError],
|
||||
);
|
||||
|
||||
const handleLoadFile = useCallback(
|
||||
@ -507,6 +525,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
return;
|
||||
}
|
||||
|
||||
lastLoadedFileRef.current = file;
|
||||
const requestId = loadRequestIdRef.current + 1;
|
||||
loadRequestIdRef.current = requestId;
|
||||
|
||||
@ -555,59 +574,35 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
message: 'Starting conversion...',
|
||||
});
|
||||
|
||||
let jobComplete = false;
|
||||
let attempts = 0;
|
||||
const maxAttempts = 600;
|
||||
let jobComplete = false;
|
||||
let attempts = 0;
|
||||
const maxAttempts = 600;
|
||||
let pollDelay = 500;
|
||||
|
||||
while (!jobComplete && attempts < maxAttempts) {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
attempts += 1;
|
||||
while (!jobComplete && attempts < maxAttempts) {
|
||||
await new Promise((resolve) => setTimeout(resolve, pollDelay));
|
||||
attempts += 1;
|
||||
if (pollDelay < 10000) {
|
||||
pollDelay = Math.min(10000, Math.floor(pollDelay * 1.5));
|
||||
}
|
||||
|
||||
try {
|
||||
const statusResponse = await apiClient.get(`/api/v1/general/job/${jobId}`);
|
||||
const jobStatus = statusResponse.data;
|
||||
console.log(`Job status (attempt ${attempts}):`, jobStatus);
|
||||
|
||||
if (jobStatus.notes && jobStatus.notes.length > 0) {
|
||||
const lastNote = jobStatus.notes[jobStatus.notes.length - 1];
|
||||
console.log('Latest note:', lastNote);
|
||||
const matchWithCount = lastNote.match(
|
||||
/\[(\d+)%\]\s+(\w+):\s+(.+?)\s+\((\d+)\/(\d+)\)/,
|
||||
);
|
||||
if (matchWithCount) {
|
||||
const percent = parseInt(matchWithCount[1], 10);
|
||||
const stage = matchWithCount[2];
|
||||
const message = matchWithCount[3];
|
||||
const current = parseInt(matchWithCount[4], 10);
|
||||
const total = parseInt(matchWithCount[5], 10);
|
||||
setConversionProgress({
|
||||
percent,
|
||||
stage,
|
||||
message,
|
||||
current,
|
||||
total,
|
||||
});
|
||||
} else {
|
||||
const match = lastNote.match(/\[(\d+)%\]\s+(\w+):\s+(.+)/);
|
||||
if (match) {
|
||||
const percent = parseInt(match[1], 10);
|
||||
const stage = match[2];
|
||||
const message = match[3];
|
||||
setConversionProgress({
|
||||
percent,
|
||||
stage,
|
||||
message,
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if (jobStatus.progress !== undefined) {
|
||||
const percent = Math.min(Math.max(jobStatus.progress, 0), 100);
|
||||
setConversionProgress({
|
||||
percent,
|
||||
stage: jobStatus.stage || 'processing',
|
||||
message: jobStatus.note || 'Converting PDF to JSON...',
|
||||
});
|
||||
}
|
||||
const percent = Math.min(Math.max(jobStatus.progress ?? 0, 0), 100);
|
||||
const stage = jobStatus.stage || 'processing';
|
||||
const message = jobStatus.note || 'Converting PDF to JSON...';
|
||||
const current = jobStatus.current ?? undefined;
|
||||
const total = jobStatus.total ?? undefined;
|
||||
setConversionProgress({
|
||||
percent,
|
||||
stage,
|
||||
message,
|
||||
current,
|
||||
total,
|
||||
});
|
||||
|
||||
if (jobStatus.complete) {
|
||||
if (jobStatus.error) {
|
||||
@ -719,6 +714,8 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
setLoadedDocument(null);
|
||||
resetToDocument(null, groupingMode);
|
||||
clearPdfPreview();
|
||||
setIsLazyMode(false);
|
||||
setCachedJobId(null);
|
||||
|
||||
if (isPdf) {
|
||||
const errorMsg =
|
||||
@ -743,6 +740,55 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
[groupingMode, resetToDocument, t],
|
||||
);
|
||||
|
||||
const recoverCacheAndReload = useCallback(async () => {
|
||||
if (cacheRecoveryInProgressRef.current) {
|
||||
return false;
|
||||
}
|
||||
if ((recoverCacheAndReloadRef as any).attempts === undefined) {
|
||||
(recoverCacheAndReloadRef as any).attempts = 0;
|
||||
}
|
||||
if ((recoverCacheAndReloadRef as any).attempts >= 2) {
|
||||
setErrorMessage(
|
||||
t(
|
||||
'pdfTextEditor.errors.cacheRecoveryLimit',
|
||||
'Cache was unavailable after multiple attempts. Please reload the file manually.',
|
||||
),
|
||||
);
|
||||
return false;
|
||||
}
|
||||
(recoverCacheAndReloadRef as any).attempts += 1;
|
||||
const file = lastLoadedFileRef.current;
|
||||
if (!file) {
|
||||
setErrorMessage(
|
||||
t(
|
||||
'pdfTextEditor.errors.cacheMissingFile',
|
||||
'Session expired. Please reload the PDF file to continue.',
|
||||
),
|
||||
);
|
||||
return false;
|
||||
}
|
||||
cacheRecoveryInProgressRef.current = true;
|
||||
try {
|
||||
await handleLoadFile(file);
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('[PdfTextEditor] Cache recovery failed', error);
|
||||
setErrorMessage(
|
||||
t(
|
||||
'pdfTextEditor.errors.cacheReloadFailed',
|
||||
'Cache expired and reload failed. Please reselect the file.',
|
||||
),
|
||||
);
|
||||
return false;
|
||||
} finally {
|
||||
cacheRecoveryInProgressRef.current = false;
|
||||
}
|
||||
}, [handleLoadFile, t]);
|
||||
|
||||
useEffect(() => {
|
||||
recoverCacheAndReloadRef.current = recoverCacheAndReload;
|
||||
}, [recoverCacheAndReload]);
|
||||
|
||||
// Wrapper for loading files from the dropzone - adds to workbench first
|
||||
const handleLoadFileFromDropzone = useCallback(
|
||||
async (file: File) => {
|
||||
@ -1054,10 +1100,11 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
if (canUseIncremental) {
|
||||
await ensureImagesForPages(dirtyPageIndices);
|
||||
|
||||
try {
|
||||
let incrementalRetried = false;
|
||||
const attemptIncrementalExport = async () => {
|
||||
const payload = buildPayload();
|
||||
if (!payload) {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
const { document, filename } = payload;
|
||||
@ -1076,7 +1123,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
const baseName = sanitizeBaseName(filename).replace(/-edited$/u, '');
|
||||
const expectedName = `${baseName || 'document'}.pdf`;
|
||||
const response = await apiClient.post(
|
||||
`/api/v1/convert/pdf/text-editor/partial/${cachedJobId}?filename=${encodeURIComponent(expectedName)}`,
|
||||
`/api/v1/convert/pdf/text-editor/partial/${cachedJobIdRef.current}?filename=${encodeURIComponent(expectedName)}`,
|
||||
partialDocument,
|
||||
{
|
||||
responseType: 'blob',
|
||||
@ -1094,8 +1141,26 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
onComplete([pdfFile]);
|
||||
}
|
||||
setErrorMessage(null);
|
||||
return;
|
||||
return true;
|
||||
};
|
||||
|
||||
try {
|
||||
const success = await attemptIncrementalExport();
|
||||
if (success) {
|
||||
return;
|
||||
}
|
||||
} catch (incrementalError) {
|
||||
if (!incrementalRetried && isCacheUnavailableError(incrementalError)) {
|
||||
const recovered = await recoverCacheAndReloadRef.current();
|
||||
incrementalRetried = true;
|
||||
if (recovered) {
|
||||
await ensureImagesForPages(dirtyPageIndices);
|
||||
const success = await attemptIncrementalExport();
|
||||
if (success) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
console.warn(
|
||||
'[handleGeneratePdf] Incremental export failed, falling back to full export',
|
||||
incrementalError,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user