mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-11-16 01:21:16 +01:00
fix issues with type0
This commit is contained in:
parent
6db66f1f1a
commit
f645eaff18
BIN
app/core/src/main/resources/static/pdfjs-legacy/standard_fonts/LiberationMono-Regular.ttf
vendored
Normal file
BIN
app/core/src/main/resources/static/pdfjs-legacy/standard_fonts/LiberationMono-Regular.ttf
vendored
Normal file
Binary file not shown.
BIN
app/core/src/main/resources/static/pdfjs-legacy/standard_fonts/LiberationSerif-Regular.ttf
vendored
Normal file
BIN
app/core/src/main/resources/static/pdfjs-legacy/standard_fonts/LiberationSerif-Regular.ttf
vendored
Normal file
Binary file not shown.
@ -311,13 +311,17 @@ public class PdfJsonConversionService {
|
||||
|
||||
try (PDDocument document = pdfDocumentFactory.load(workingPath, true)) {
|
||||
int totalPages = document.getNumberOfPages();
|
||||
boolean useLazyImages = totalPages > 5 && jobId != null;
|
||||
// Only use lazy images for real async jobs where client can access the cache
|
||||
// Synchronous calls with synthetic jobId should do full extraction
|
||||
boolean useLazyImages = totalPages > 5 && isRealJobId;
|
||||
Map<COSBase, FontModelCacheEntry> fontCache = new IdentityHashMap<>();
|
||||
Map<COSBase, EncodedImage> imageCache = new IdentityHashMap<>();
|
||||
log.debug(
|
||||
"Converting PDF to JSON ({} pages) - {} mode",
|
||||
"Converting PDF to JSON ({} pages) - {} mode (jobId: {}, isRealJobId: {})",
|
||||
totalPages,
|
||||
useLazyImages ? "lazy image" : "standard");
|
||||
useLazyImages ? "lazy image" : "standard",
|
||||
jobId,
|
||||
isRealJobId);
|
||||
Map<String, PdfJsonFont> fonts = new LinkedHashMap<>();
|
||||
Map<Integer, List<PdfJsonTextElement>> textByPage = new LinkedHashMap<>();
|
||||
Map<Integer, Map<PDFont, String>> pageFontResources = new HashMap<>();
|
||||
@ -327,7 +331,8 @@ public class PdfJsonConversionService {
|
||||
int pageNumber = 1;
|
||||
for (PDPage page : document.getPages()) {
|
||||
Map<PDFont, String> resourceMap =
|
||||
collectFontsForPage(document, page, pageNumber, fonts, fontCache, jobId);
|
||||
collectFontsForPage(
|
||||
document, page, pageNumber, fonts, fontCache, jobId);
|
||||
pageFontResources.put(pageNumber, resourceMap);
|
||||
log.debug(
|
||||
"PDF→JSON: collected {} font resources on page {}",
|
||||
@ -444,8 +449,9 @@ public class PdfJsonConversionService {
|
||||
byte[] result = objectMapper.writeValueAsBytes(pdfJson);
|
||||
progress.accept(PdfJsonConversionProgress.complete());
|
||||
|
||||
// If document wasn't cached, clear Type3 cache entries immediately
|
||||
// (jobId is always set now, either from request context or synthetic)
|
||||
// Clear Type3 cache entries immediately for non-cached conversions
|
||||
// Cached conversions (useLazyImages=true) are cleaned when cache expires
|
||||
// Synchronous conversions always clear immediately since they don't use lazy mode
|
||||
if (!useLazyImages) {
|
||||
clearType3CacheEntriesForJob(jobId);
|
||||
}
|
||||
@ -718,7 +724,8 @@ public class PdfJsonConversionService {
|
||||
mapping.put(font, fontId);
|
||||
String key = buildFontKey(jobId, pageNumber, fontId);
|
||||
if (!fonts.containsKey(key)) {
|
||||
fonts.put(key, buildFontModel(document, font, fontId, pageNumber, fontCache, jobId));
|
||||
fonts.put(
|
||||
key, buildFontModel(document, font, fontId, pageNumber, fontCache, jobId));
|
||||
}
|
||||
}
|
||||
|
||||
@ -873,15 +880,13 @@ public class PdfJsonConversionService {
|
||||
|| hasPayload(font.getWebProgram())
|
||||
|| hasPayload(font.getProgram());
|
||||
|
||||
// Keep cosDictionary for TrueType and Type0 fonts even with usable program
|
||||
// Subsetted fonts need the ToUnicode CMap from the original dictionary
|
||||
// Only clear cosDictionary for Type3 fonts (which have inline content streams)
|
||||
// All other font types may need ToUnicode CMap or encoding from the dictionary
|
||||
// Conservative approach: better to keep extra data than lose encoding info
|
||||
String subtype = font.getSubtype();
|
||||
boolean needsCosDictionary =
|
||||
subtype != null
|
||||
&& (subtype.equalsIgnoreCase("TrueType")
|
||||
|| subtype.equalsIgnoreCase("Type0"));
|
||||
boolean isType3 = subtype != null && subtype.equalsIgnoreCase("Type3");
|
||||
|
||||
if (hasUsableProgram && !needsCosDictionary) {
|
||||
if (hasUsableProgram && isType3) {
|
||||
font.setCosDictionary(null);
|
||||
}
|
||||
}
|
||||
@ -1380,9 +1385,20 @@ public class PdfJsonConversionService {
|
||||
String key = buildFontKey(null, -1, effectiveId);
|
||||
PDFont font = fontMap.get(key);
|
||||
if (font != null) {
|
||||
log.debug(
|
||||
"[FALLBACK-DEBUG] Reusing cached fallback font {} (key: {})", effectiveId, key);
|
||||
return font;
|
||||
}
|
||||
log.info(
|
||||
"[FALLBACK-DEBUG] Loading fallback font {} (key: {}) via fallbackFontService",
|
||||
effectiveId,
|
||||
key);
|
||||
PDFont loaded = fallbackFontService.loadFallbackPdfFont(document, effectiveId);
|
||||
log.info(
|
||||
"[FALLBACK-DEBUG] Loaded fallback font {} - PDFont class: {}, name: {}",
|
||||
effectiveId,
|
||||
loaded.getClass().getSimpleName(),
|
||||
loaded.getName());
|
||||
fontMap.put(key, loaded);
|
||||
if (fontModels != null
|
||||
&& fontModels.stream().noneMatch(f -> effectiveId.equals(f.getId()))) {
|
||||
@ -2561,10 +2577,16 @@ public class PdfJsonConversionService {
|
||||
&& runFontModel.getType3Glyphs() != null
|
||||
&& !runFontModel.getType3Glyphs().isEmpty();
|
||||
|
||||
if (isNormalizedType3) {
|
||||
// For normalized Type3 fonts, use original text directly
|
||||
// The font has proper Unicode mappings, so PDFBox can encode it
|
||||
// correctly
|
||||
// For fonts with proper Unicode mappings, let PDFBox handle encoding
|
||||
// This includes: normalized Type3 fonts, PDType0Font (composite fonts)
|
||||
boolean useDirectText =
|
||||
isNormalizedType3
|
||||
|| run.font()
|
||||
instanceof
|
||||
org.apache.pdfbox.pdmodel.font.PDType0Font;
|
||||
|
||||
if (useDirectText) {
|
||||
// Pass text directly - PDFBox handles encoding internally
|
||||
contentStream.showText(run.text());
|
||||
} else {
|
||||
// For actual Type3 fonts and other fonts, encode manually
|
||||
@ -2582,6 +2604,14 @@ public class PdfJsonConversionService {
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
log.debug(
|
||||
"[ENCODE-DEBUG] Encoding text '{}' with font {} (fontId={}, runFontModel={})",
|
||||
run.text(),
|
||||
run.font().getName(),
|
||||
run.fontId(),
|
||||
runFontModel != null
|
||||
? runFontModel.getId()
|
||||
: "null");
|
||||
encoded =
|
||||
encodeTextWithFont(
|
||||
run.font(),
|
||||
@ -2590,9 +2620,13 @@ public class PdfJsonConversionService {
|
||||
run.charCodes());
|
||||
} catch (IOException ex) {
|
||||
log.warn(
|
||||
"Failed to encode text '{}' with font {} on page {}: {}",
|
||||
"Failed to encode text '{}' with font {} (fontId={}, runFontModel={}) on page {}: {}",
|
||||
run.text(),
|
||||
run.font().getName(),
|
||||
run.fontId(),
|
||||
runFontModel != null
|
||||
? runFontModel.getId()
|
||||
: "null",
|
||||
pageNumber,
|
||||
ex.getMessage());
|
||||
continue;
|
||||
@ -2725,7 +2759,11 @@ public class PdfJsonConversionService {
|
||||
}
|
||||
if (targetFont == null || !fallbackFontService.canEncode(targetFont, glyph)) {
|
||||
fallbackApplied = true;
|
||||
String fallbackId = fallbackFontService.resolveFallbackFontId(codePoint);
|
||||
// Try to match fallback font to original font family for visual consistency
|
||||
String originalFontName =
|
||||
baseFontModel != null ? baseFontModel.getBaseName() : null;
|
||||
String fallbackId =
|
||||
fallbackFontService.resolveFallbackFontId(originalFontName, codePoint);
|
||||
targetFont = ensureFallbackFont(document, fontMap, fontModels, fallbackId);
|
||||
targetFontId = fallbackId != null ? fallbackId : FALLBACK_FONT_ID;
|
||||
if (targetFont == null || !fallbackFontService.canEncode(targetFont, glyph)) {
|
||||
@ -3335,7 +3373,8 @@ public class PdfJsonConversionService {
|
||||
// or return null to trigger fallback font
|
||||
} else if (!isType3Font || fontModel == null) {
|
||||
// For non-Type3 fonts without Type3 metadata, use standard encoding
|
||||
return sanitizeEncoded(font.encode(text));
|
||||
byte[] encoded = font.encode(text);
|
||||
return sanitizeEncoded(encoded);
|
||||
}
|
||||
|
||||
// Type3 glyph mapping logic (for actual Type3 fonts AND normalized Type3 fonts)
|
||||
@ -3750,18 +3789,41 @@ public class PdfJsonConversionService {
|
||||
}
|
||||
// Fall through to Standard14 fallback below if nothing else succeeded.
|
||||
} else {
|
||||
// For TrueType and Type0 fonts, prioritize cosDictionary restoration
|
||||
// These fonts often use ToUnicode CMap which is preserved in the dictionary
|
||||
String subtype = fontModel.getSubtype();
|
||||
boolean preferDictionary =
|
||||
subtype != null
|
||||
&& (subtype.equalsIgnoreCase("TrueType")
|
||||
|| subtype.equalsIgnoreCase("Type0"));
|
||||
|
||||
if (preferDictionary) {
|
||||
PDFont restored = restoreFontFromDictionary(document, fontModel);
|
||||
if (restored != null) {
|
||||
log.debug(
|
||||
"Font {} restored from cosDictionary (preferred for subsetted {})",
|
||||
fontModel.getId(),
|
||||
subtype);
|
||||
return restored;
|
||||
}
|
||||
// If dictionary restoration fails, fall back to font program bytes
|
||||
log.debug(
|
||||
"Font {} cosDictionary restoration failed, trying font program bytes",
|
||||
fontModel.getId());
|
||||
}
|
||||
|
||||
PDFont loaded =
|
||||
loadFirstAvailableFont(document, fontModel, orderedCandidates, originalFormat);
|
||||
if (loaded != null) {
|
||||
return loaded;
|
||||
}
|
||||
}
|
||||
|
||||
// Try to restore from COS dictionary if font programs failed
|
||||
if (!isType3Font) {
|
||||
PDFont restored = restoreFontFromDictionary(document, fontModel);
|
||||
if (restored != null) {
|
||||
return restored;
|
||||
// Try to restore from COS dictionary if font programs failed and we haven't tried yet
|
||||
if (!preferDictionary) {
|
||||
PDFont restored = restoreFontFromDictionary(document, fontModel);
|
||||
if (restored != null) {
|
||||
return restored;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -3972,34 +4034,74 @@ public class PdfJsonConversionService {
|
||||
log.debug("[FONT-RESTORE] Font {} has no cosDictionary", fontModel.getId());
|
||||
return null;
|
||||
}
|
||||
COSBase restored = cosMapper.deserializeCosValue(fontModel.getCosDictionary(), document);
|
||||
|
||||
// Deserialize the cosDictionary - cosMapper handles validation internally
|
||||
COSBase restored;
|
||||
try {
|
||||
restored = cosMapper.deserializeCosValue(fontModel.getCosDictionary(), document);
|
||||
} catch (Exception ex) {
|
||||
log.warn(
|
||||
"[FONT-RESTORE] Font {} cosDictionary deserialization failed: {}",
|
||||
fontModel.getId(),
|
||||
ex.getMessage());
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!(restored instanceof COSDictionary cosDictionary)) {
|
||||
log.debug(
|
||||
log.warn(
|
||||
"[FONT-RESTORE] Font {} cosDictionary deserialized to {} instead of COSDictionary",
|
||||
fontModel.getId(),
|
||||
restored != null ? restored.getClass().getSimpleName() : "null");
|
||||
return null;
|
||||
}
|
||||
|
||||
// Validate that dictionary contains required font keys
|
||||
if (!cosDictionary.containsKey(org.apache.pdfbox.cos.COSName.TYPE)
|
||||
|| !cosDictionary.containsKey(org.apache.pdfbox.cos.COSName.SUBTYPE)) {
|
||||
log.warn(
|
||||
"[FONT-RESTORE] Font {} cosDictionary missing required Type or Subtype keys",
|
||||
fontModel.getId());
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
PDFont font = PDFontFactory.createFont(cosDictionary);
|
||||
if (font != null && font.isEmbedded()) {
|
||||
applyAdditionalFontMetadata(document, font, fontModel);
|
||||
log.info(
|
||||
"[FONT-RESTORE] Successfully restored embedded font {} from original dictionary",
|
||||
if (font == null) {
|
||||
log.warn(
|
||||
"[FONT-RESTORE] Font {} PDFontFactory returned null for valid dictionary",
|
||||
fontModel.getId());
|
||||
return font;
|
||||
return null;
|
||||
}
|
||||
log.warn(
|
||||
"[FONT-RESTORE] Restored font {} from dictionary but font was {}embedded; rejecting",
|
||||
|
||||
if (!font.isEmbedded()) {
|
||||
log.warn(
|
||||
"[FONT-RESTORE] Font {} restored from dictionary but is not embedded; rejecting to avoid system font substitution",
|
||||
fontModel.getId());
|
||||
return null;
|
||||
}
|
||||
|
||||
applyAdditionalFontMetadata(document, font, fontModel);
|
||||
log.info(
|
||||
"[FONT-RESTORE] Successfully restored embedded font {} (subtype={}) from original dictionary",
|
||||
fontModel.getId(),
|
||||
font != null && font.isEmbedded() ? "" : "not ");
|
||||
font.getSubType());
|
||||
return font;
|
||||
|
||||
} catch (IOException ex) {
|
||||
log.warn(
|
||||
"[FONT-RESTORE] Failed to restore font {} from stored dictionary: {}",
|
||||
"[FONT-RESTORE] Failed to restore font {} from dictionary ({}): {}",
|
||||
fontModel.getId(),
|
||||
fontModel.getSubtype(),
|
||||
ex.getMessage());
|
||||
return null;
|
||||
} catch (Exception ex) {
|
||||
log.error(
|
||||
"[FONT-RESTORE] Unexpected error restoring font {} from dictionary: {}",
|
||||
fontModel.getId(),
|
||||
ex.getMessage(),
|
||||
ex);
|
||||
return null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private boolean isType1Format(String format) {
|
||||
@ -4948,7 +5050,8 @@ public class PdfJsonConversionService {
|
||||
}
|
||||
String key = buildFontKey(jobId, currentPage, fontId);
|
||||
if (!fonts.containsKey(key)) {
|
||||
fonts.put(key, buildFontModel(document, font, fontId, currentPage, fontCache, jobId));
|
||||
fonts.put(
|
||||
key, buildFontModel(document, font, fontId, currentPage, fontCache, jobId));
|
||||
}
|
||||
return fontId;
|
||||
}
|
||||
@ -5514,8 +5617,8 @@ public class PdfJsonConversionService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear job-specific entries from Type3 font caches. Font UIDs include jobId prefix, so we
|
||||
* can identify and remove them.
|
||||
* Clear job-specific entries from Type3 font caches. Font UIDs include jobId prefix, so we can
|
||||
* identify and remove them.
|
||||
*/
|
||||
private void clearType3CacheEntriesForJob(String jobId) {
|
||||
if (jobId == null || jobId.isEmpty()) {
|
||||
|
||||
@ -36,6 +36,21 @@ public class PdfJsonFallbackFontService {
|
||||
public static final String FALLBACK_FONT_AR_ID = "fallback-noto-arabic";
|
||||
public static final String FALLBACK_FONT_TH_ID = "fallback-noto-thai";
|
||||
|
||||
// Font name aliases map PDF font names to available fallback fonts
|
||||
// This provides better visual consistency when editing PDFs
|
||||
private static final Map<String, String> FONT_NAME_ALIASES =
|
||||
Map.ofEntries(
|
||||
// Liberation fonts are metric-compatible with Microsoft core fonts
|
||||
Map.entry("arial", "fallback-liberation-sans"),
|
||||
Map.entry("helvetica", "fallback-liberation-sans"),
|
||||
Map.entry("arimo", "fallback-liberation-sans"),
|
||||
Map.entry("times", "fallback-liberation-serif"),
|
||||
Map.entry("timesnewroman", "fallback-liberation-serif"),
|
||||
Map.entry("tinos", "fallback-liberation-serif"),
|
||||
Map.entry("courier", "fallback-liberation-mono"),
|
||||
Map.entry("couriernew", "fallback-liberation-mono"),
|
||||
Map.entry("cousine", "fallback-liberation-mono"));
|
||||
|
||||
private static final Map<String, FallbackFontSpec> BUILT_IN_FALLBACK_FONTS =
|
||||
Map.ofEntries(
|
||||
Map.entry(
|
||||
@ -65,6 +80,45 @@ public class PdfJsonFallbackFontService {
|
||||
new FallbackFontSpec(
|
||||
"classpath:/static/fonts/NotoSansThai-Regular.ttf",
|
||||
"NotoSansThai-Regular",
|
||||
"ttf")),
|
||||
// Liberation Sans family
|
||||
Map.entry(
|
||||
"fallback-liberation-sans",
|
||||
new FallbackFontSpec(
|
||||
"classpath:/static/pdfjs-legacy/standard_fonts/LiberationSans-Regular.ttf",
|
||||
"LiberationSans-Regular",
|
||||
"ttf")),
|
||||
Map.entry(
|
||||
"fallback-liberation-sans-bold",
|
||||
new FallbackFontSpec(
|
||||
"classpath:/static/pdfjs-legacy/standard_fonts/LiberationSans-Bold.ttf",
|
||||
"LiberationSans-Bold",
|
||||
"ttf")),
|
||||
Map.entry(
|
||||
"fallback-liberation-sans-italic",
|
||||
new FallbackFontSpec(
|
||||
"classpath:/static/pdfjs-legacy/standard_fonts/LiberationSans-Italic.ttf",
|
||||
"LiberationSans-Italic",
|
||||
"ttf")),
|
||||
Map.entry(
|
||||
"fallback-liberation-sans-bolditalic",
|
||||
new FallbackFontSpec(
|
||||
"classpath:/static/pdfjs-legacy/standard_fonts/LiberationSans-BoldItalic.ttf",
|
||||
"LiberationSans-BoldItalic",
|
||||
"ttf")),
|
||||
// Liberation Serif family
|
||||
Map.entry(
|
||||
"fallback-liberation-serif",
|
||||
new FallbackFontSpec(
|
||||
"classpath:/static/pdfjs-legacy/standard_fonts/LiberationSerif-Regular.ttf",
|
||||
"LiberationSerif-Regular",
|
||||
"ttf")),
|
||||
// Liberation Mono family
|
||||
Map.entry(
|
||||
"fallback-liberation-mono",
|
||||
new FallbackFontSpec(
|
||||
"classpath:/static/pdfjs-legacy/standard_fonts/LiberationMono-Regular.ttf",
|
||||
"LiberationMono-Regular",
|
||||
"ttf")));
|
||||
|
||||
private final ResourceLoader resourceLoader;
|
||||
@ -107,7 +161,9 @@ public class PdfJsonFallbackFontService {
|
||||
}
|
||||
byte[] bytes = loadFallbackFontBytes(fallbackId, spec);
|
||||
try (InputStream stream = new ByteArrayInputStream(bytes)) {
|
||||
return PDType0Font.load(document, stream, true);
|
||||
// Load with embedSubset=false to ensure full glyph coverage
|
||||
// Fallback fonts need all glyphs available for substituting missing characters
|
||||
return PDType0Font.load(document, stream, false);
|
||||
}
|
||||
}
|
||||
|
||||
@ -140,6 +196,53 @@ public class PdfJsonFallbackFontService {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve fallback font ID based on the original font name and code point. Attempts to match
|
||||
* font family for visual consistency.
|
||||
*
|
||||
* @param originalFontName the name of the original font (may be null)
|
||||
* @param codePoint the Unicode code point that needs to be rendered
|
||||
* @return fallback font ID
|
||||
*/
|
||||
public String resolveFallbackFontId(String originalFontName, int codePoint) {
|
||||
// First try to match based on original font name for visual consistency
|
||||
if (originalFontName != null && !originalFontName.isEmpty()) {
|
||||
// Normalize font name: remove subset prefix (e.g. "PXAAAC+"), convert to lowercase,
|
||||
// remove spaces
|
||||
String normalized =
|
||||
originalFontName
|
||||
.replaceAll("^[A-Z]{6}\\+", "") // Remove subset prefix
|
||||
.toLowerCase()
|
||||
.replaceAll("\\s+", ""); // Remove spaces (e.g. "Times New Roman" ->
|
||||
// "timesnewroman")
|
||||
|
||||
// Extract base name without weight/style suffixes
|
||||
// Split on common delimiters: hyphen, underscore, comma, plus
|
||||
// Handles: "Arimo_700wght" -> "arimo", "Arial-Bold" -> "arial", "Arial,Bold" -> "arial"
|
||||
String baseName = normalized.split("[-_,+]")[0];
|
||||
|
||||
String aliasedFontId = FONT_NAME_ALIASES.get(baseName);
|
||||
if (aliasedFontId != null) {
|
||||
log.debug(
|
||||
"Matched font '{}' (normalized: '{}', base: '{}') to fallback '{}'",
|
||||
originalFontName,
|
||||
normalized,
|
||||
baseName,
|
||||
aliasedFontId);
|
||||
return aliasedFontId;
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to Unicode-based selection
|
||||
return resolveFallbackFontId(codePoint);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve fallback font ID based on Unicode code point properties.
|
||||
*
|
||||
* @param codePoint the Unicode code point
|
||||
* @return fallback font ID
|
||||
*/
|
||||
public String resolveFallbackFontId(int codePoint) {
|
||||
Character.UnicodeBlock block = Character.UnicodeBlock.of(codePoint);
|
||||
if (block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|
||||
|
||||
@ -13,11 +13,25 @@
|
||||
- Reuse the existing job cache (`documentCache`) to serve these on demand and clean up after timeouts (`PdfJsonConversionService.java:3608-3687`).
|
||||
|
||||
- **Editor UX Safeguards**
|
||||
- Respect `fallbackFontService` indicators; mark groups using fallback glyphs so the UI can warn about possible appearance shifts (`frontend/src/proprietary/components/tools/pdfJsonEditor/PdfJsonEditorView.tsx:1260-1287`).
|
||||
- Mark groups using fallback glyphs so the UI can warn about possible appearance shifts. Font family matching is now implemented (Liberation fonts), but weight matching is still TODO, so bold/italic text using fallbacks may appear lighter than original.
|
||||
- Surface when Type3 conversion was downgraded (e.g., rasterized glyphs) and limit editing to operations that keep the PDF stable.
|
||||
- Reference: `frontend/src/proprietary/components/tools/pdfJsonEditor/PdfJsonEditorView.tsx:1260-1287`
|
||||
|
||||
- **Canonical Font Sharing**
|
||||
- Emit fonts once per unique embedded program. Add a `canonicalFonts` array containing the full payload (program, ToUnicode, metadata) and a compact `fontAliases` mapping `{pageNumber, fontId, canonicalUid}` so text elements can still reference per-page IDs.
|
||||
- Store COS dictionaries only on canonical entries; aliases should keep light fields (e.g., size adjustments) if they differ.
|
||||
- Note: COS dictionaries are currently preserved for TrueType/Type0 fonts (needed for ToUnicode CMap). The canonical approach should maintain this preservation while deduplicating font programs.
|
||||
- Update `buildFontMap` to resolve aliases when recreating PDFBox fonts, and adjust the front end to load programs via the canonical UID.
|
||||
- Optional: expose a lazy endpoint for the original COS dictionary if the canonical record strips it, so export still reconstructs untouched fonts.
|
||||
|
||||
- **Font Weight Matching for Fallback Fonts**
|
||||
- Font family matching is now implemented (Arial→LiberationSans, Times→LiberationSerif, Courier→LiberationMono).
|
||||
- However, fallback fonts still use Regular weight for all missing glyphs, regardless of the original font weight (e.g., bold text falls back to regular weight).
|
||||
- TODO: Parse weight from font names (e.g., `Arimo_700wght`, `Arial-Bold`, `TimesNewRoman,SemiBold`) and map to corresponding Liberation font variants:
|
||||
- Regular/Normal → LiberationSans-Regular, LiberationSerif-Regular, LiberationMono-Regular
|
||||
- Bold/700 → LiberationSans-Bold, LiberationSerif-Bold, LiberationMono-Bold
|
||||
- Italic/Oblique → LiberationSans-Italic, LiberationSerif-Italic, LiberationMono-Italic
|
||||
- BoldItalic → LiberationSans-BoldItalic, LiberationSerif-BoldItalic, LiberationMono-BoldItalic
|
||||
- Add all Liberation font variants to `BUILT_IN_FALLBACK_FONTS` map with appropriate IDs (e.g., `fallback-liberation-sans-bold`).
|
||||
- Update `resolveFallbackFontId(String originalFontName, int codePoint)` in `PdfJsonFallbackFontService.java` to detect weight/style and return the matching variant ID.
|
||||
- Benefits: Better visual consistency when editing text in bold/italic fonts, as missing characters will match the original weight.
|
||||
- Implementation reference: `app/proprietary/src/main/java/stirling/software/SPDF/service/PdfJsonFallbackFontService.java:186-213`
|
||||
|
||||
1431
docs/pdf_json_threading_analysis.md
Normal file
1431
docs/pdf_json_threading_analysis.md
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user