mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-11-16 01:21:16 +01:00
garbage commit font remappings
This commit is contained in:
parent
c7c5613c13
commit
0d9321e6a1
@ -44,6 +44,12 @@ public class PdfJsonFont {
|
|||||||
/** Hint describing the font program type (ttf, otf, cff, pfb, etc.). */
|
/** Hint describing the font program type (ttf, otf, cff, pfb, etc.). */
|
||||||
private String programFormat;
|
private String programFormat;
|
||||||
|
|
||||||
|
/** Web-optimized font program (e.g. converted TrueType) encoded as Base64. */
|
||||||
|
private String webProgram;
|
||||||
|
|
||||||
|
/** Format hint for the webProgram payload. */
|
||||||
|
private String webProgramFormat;
|
||||||
|
|
||||||
/** ToUnicode stream encoded as Base64 when present. */
|
/** ToUnicode stream encoded as Base64 when present. */
|
||||||
private String toUnicode;
|
private String toUnicode;
|
||||||
|
|
||||||
@ -70,4 +76,7 @@ public class PdfJsonFont {
|
|||||||
|
|
||||||
/** Units per em extracted from the font matrix. */
|
/** Units per em extracted from the font matrix. */
|
||||||
private Integer unitsPerEm;
|
private Integer unitsPerEm;
|
||||||
|
|
||||||
|
/** Serialized COS dictionary describing the original font resource. */
|
||||||
|
private PdfJsonCosValue cosDictionary;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -37,4 +37,5 @@ public class PdfJsonTextElement {
|
|||||||
private PdfJsonTextColor fillColor;
|
private PdfJsonTextColor fillColor;
|
||||||
private PdfJsonTextColor strokeColor;
|
private PdfJsonTextColor strokeColor;
|
||||||
private Integer renderingMode;
|
private Integer renderingMode;
|
||||||
|
private Boolean fallbackUsed;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -34,6 +34,7 @@ import java.util.Set;
|
|||||||
import java.util.TimeZone;
|
import java.util.TimeZone;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
import javax.imageio.ImageIO;
|
import javax.imageio.ImageIO;
|
||||||
|
|
||||||
@ -64,6 +65,7 @@ import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
|||||||
import org.apache.pdfbox.pdmodel.common.PDStream;
|
import org.apache.pdfbox.pdmodel.common.PDStream;
|
||||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||||
import org.apache.pdfbox.pdmodel.font.PDFontDescriptor;
|
import org.apache.pdfbox.pdmodel.font.PDFontDescriptor;
|
||||||
|
import org.apache.pdfbox.pdmodel.font.PDFontFactory;
|
||||||
import org.apache.pdfbox.pdmodel.font.PDType0Font;
|
import org.apache.pdfbox.pdmodel.font.PDType0Font;
|
||||||
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||||
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
|
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
|
||||||
@ -90,6 +92,8 @@ import org.springframework.web.multipart.MultipartFile;
|
|||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import jakarta.annotation.PostConstruct;
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
@ -173,11 +177,61 @@ public class PdfJsonConversionService {
|
|||||||
@Value("${stirling.pdf.json.cff-converter.enabled:true}")
|
@Value("${stirling.pdf.json.cff-converter.enabled:true}")
|
||||||
private boolean cffConversionEnabled;
|
private boolean cffConversionEnabled;
|
||||||
|
|
||||||
|
@Value("${stirling.pdf.json.cff-converter.method:python}")
|
||||||
|
private String cffConverterMethod;
|
||||||
|
|
||||||
|
@Value("${stirling.pdf.json.cff-converter.python-command:/opt/venv/bin/python3}")
|
||||||
|
private String pythonCommand;
|
||||||
|
|
||||||
|
@Value("${stirling.pdf.json.cff-converter.python-script:/scripts/convert_cff_to_ttf.py}")
|
||||||
|
private String pythonScript;
|
||||||
|
|
||||||
@Value("${stirling.pdf.json.cff-converter.fontforge-command:fontforge}")
|
@Value("${stirling.pdf.json.cff-converter.fontforge-command:fontforge}")
|
||||||
private String fontforgeCommand;
|
private String fontforgeCommand;
|
||||||
|
|
||||||
private final Map<String, byte[]> fallbackFontCache = new ConcurrentHashMap<>();
|
private final Map<String, byte[]> fallbackFontCache = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
|
private volatile boolean ghostscriptAvailable;
|
||||||
|
|
||||||
|
@PostConstruct
|
||||||
|
private void initializeGhostscriptAvailability() {
|
||||||
|
if (!fontNormalizationEnabled) {
|
||||||
|
ghostscriptAvailable = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isGhostscriptGroupEnabled()) {
|
||||||
|
ghostscriptAvailable = false;
|
||||||
|
log.warn(
|
||||||
|
"Ghostscript font normalization disabled: Ghostscript group is not enabled in configuration");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
List<String> command = List.of("gs", "-version");
|
||||||
|
try {
|
||||||
|
ProcessExecutorResult result =
|
||||||
|
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
|
||||||
|
.runCommandWithOutputHandling(command);
|
||||||
|
ghostscriptAvailable = result.getRc() == 0;
|
||||||
|
if (!ghostscriptAvailable) {
|
||||||
|
log.warn(
|
||||||
|
"Ghostscript executable not available (exit code {}); font normalization will be skipped",
|
||||||
|
result.getRc());
|
||||||
|
}
|
||||||
|
} catch (InterruptedException ex) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
ghostscriptAvailable = false;
|
||||||
|
log.warn(
|
||||||
|
"Ghostscript availability check interrupted; font normalization will be skipped: {}",
|
||||||
|
ex.getMessage());
|
||||||
|
} catch (IOException ex) {
|
||||||
|
ghostscriptAvailable = false;
|
||||||
|
log.warn(
|
||||||
|
"Ghostscript executable not found or failed to start; font normalization will be skipped: {}",
|
||||||
|
ex.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public byte[] convertPdfToJson(MultipartFile file) throws IOException {
|
public byte[] convertPdfToJson(MultipartFile file) throws IOException {
|
||||||
if (file == null) {
|
if (file == null) {
|
||||||
throw ExceptionUtils.createNullArgumentException("fileInput");
|
throw ExceptionUtils.createNullArgumentException("fileInput");
|
||||||
@ -452,10 +506,22 @@ public class PdfJsonConversionService {
|
|||||||
String encoding = resolveEncoding(font);
|
String encoding = resolveEncoding(font);
|
||||||
PdfJsonFontCidSystemInfo cidInfo = extractCidSystemInfo(font.getCOSObject());
|
PdfJsonFontCidSystemInfo cidInfo = extractCidSystemInfo(font.getCOSObject());
|
||||||
boolean embedded = font.isEmbedded();
|
boolean embedded = font.isEmbedded();
|
||||||
FontProgramData programData = embedded ? extractFontProgram(font) : null;
|
|
||||||
String toUnicode = extractToUnicode(font.getCOSObject());
|
String toUnicode = extractToUnicode(font.getCOSObject());
|
||||||
|
// Build complete CharCode→CID→GID→Unicode mapping for CID fonts
|
||||||
|
String unicodeMapping = buildUnicodeMapping(font, toUnicode);
|
||||||
|
FontProgramData programData = embedded ? extractFontProgram(font, unicodeMapping) : null;
|
||||||
String standard14Name = resolveStandard14Name(font);
|
String standard14Name = resolveStandard14Name(font);
|
||||||
Integer flags = descriptor != null ? descriptor.getFlags() : null;
|
Integer flags = descriptor != null ? descriptor.getFlags() : null;
|
||||||
|
PdfJsonCosValue cosDictionary = serializeCosValue(font.getCOSObject());
|
||||||
|
|
||||||
|
log.debug(
|
||||||
|
"Building font model: id={}, baseName={}, subtype={}, embedded={}, hasProgram={}, hasWebProgram={}",
|
||||||
|
fontId,
|
||||||
|
font.getName(),
|
||||||
|
subtype,
|
||||||
|
embedded,
|
||||||
|
programData != null && programData.getBase64() != null,
|
||||||
|
programData != null && programData.getWebBase64() != null);
|
||||||
|
|
||||||
return PdfJsonFont.builder()
|
return PdfJsonFont.builder()
|
||||||
.id(fontId)
|
.id(fontId)
|
||||||
@ -468,6 +534,8 @@ public class PdfJsonConversionService {
|
|||||||
.embedded(embedded)
|
.embedded(embedded)
|
||||||
.program(programData != null ? programData.getBase64() : null)
|
.program(programData != null ? programData.getBase64() : null)
|
||||||
.programFormat(programData != null ? programData.getFormat() : null)
|
.programFormat(programData != null ? programData.getFormat() : null)
|
||||||
|
.webProgram(programData != null ? programData.getWebBase64() : null)
|
||||||
|
.webProgramFormat(programData != null ? programData.getWebFormat() : null)
|
||||||
.toUnicode(toUnicode)
|
.toUnicode(toUnicode)
|
||||||
.standard14Name(standard14Name)
|
.standard14Name(standard14Name)
|
||||||
.fontDescriptorFlags(flags)
|
.fontDescriptorFlags(flags)
|
||||||
@ -477,6 +545,7 @@ public class PdfJsonConversionService {
|
|||||||
.xHeight(descriptor != null ? descriptor.getXHeight() : null)
|
.xHeight(descriptor != null ? descriptor.getXHeight() : null)
|
||||||
.italicAngle(descriptor != null ? descriptor.getItalicAngle() : null)
|
.italicAngle(descriptor != null ? descriptor.getItalicAngle() : null)
|
||||||
.unitsPerEm(extractUnitsPerEm(font))
|
.unitsPerEm(extractUnitsPerEm(font))
|
||||||
|
.cosDictionary(cosDictionary)
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -508,11 +577,13 @@ public class PdfJsonConversionService {
|
|||||||
if (font == null) {
|
if (font == null) {
|
||||||
fallbackNeeded = true;
|
fallbackNeeded = true;
|
||||||
fallbackIds.add(FALLBACK_FONT_ID);
|
fallbackIds.add(FALLBACK_FONT_ID);
|
||||||
|
element.setFallbackUsed(Boolean.TRUE);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!canEncodeFully(font, text)) {
|
if (!canEncodeFully(font, text)) {
|
||||||
fallbackNeeded = true;
|
fallbackNeeded = true;
|
||||||
|
element.setFallbackUsed(Boolean.TRUE);
|
||||||
for (int offset = 0; offset < text.length(); ) {
|
for (int offset = 0; offset < text.length(); ) {
|
||||||
int codePoint = text.codePointAt(offset);
|
int codePoint = text.codePointAt(offset);
|
||||||
offset += Character.charCount(codePoint);
|
offset += Character.charCount(codePoint);
|
||||||
@ -682,11 +753,25 @@ public class PdfJsonConversionService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private boolean canRunGhostscript() {
|
private boolean canRunGhostscript() {
|
||||||
|
if (!fontNormalizationEnabled) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!isGhostscriptGroupEnabled()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!ghostscriptAvailable) {
|
||||||
|
log.debug("Skipping Ghostscript normalization; executable not available");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isGhostscriptGroupEnabled() {
|
||||||
try {
|
try {
|
||||||
return endpointConfiguration != null
|
return endpointConfiguration != null
|
||||||
&& endpointConfiguration.isGroupEnabled("Ghostscript");
|
&& endpointConfiguration.isGroupEnabled("Ghostscript");
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
log.debug("Ghostscript availability check failed: {}", ex.getMessage());
|
log.debug("Ghostscript group check failed: {}", ex.getMessage());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -736,12 +821,129 @@ public class PdfJsonConversionService {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private byte[] convertCffProgramToTrueType(byte[] fontBytes) {
|
private byte[] convertCffProgramToTrueType(byte[] fontBytes, String toUnicode) {
|
||||||
if (!cffConversionEnabled
|
if (!cffConversionEnabled || fontBytes == null || fontBytes.length == 0) {
|
||||||
|| fontforgeCommand == null
|
return null;
|
||||||
|| fontforgeCommand.isBlank()
|
}
|
||||||
|| fontBytes == null
|
|
||||||
|| fontBytes.length == 0) {
|
// Determine which converter to use
|
||||||
|
if ("python".equalsIgnoreCase(cffConverterMethod)) {
|
||||||
|
return convertCffUsingPython(fontBytes, toUnicode);
|
||||||
|
} else if ("fontforge".equalsIgnoreCase(cffConverterMethod)) {
|
||||||
|
return convertCffUsingFontForge(fontBytes);
|
||||||
|
} else {
|
||||||
|
log.warn("Unknown CFF converter method: {}, falling back to Python", cffConverterMethod);
|
||||||
|
return convertCffUsingPython(fontBytes, toUnicode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] convertCffUsingPython(byte[] fontBytes, String toUnicode) {
|
||||||
|
if (pythonCommand == null
|
||||||
|
|| pythonCommand.isBlank()
|
||||||
|
|| pythonScript == null
|
||||||
|
|| pythonScript.isBlank()) {
|
||||||
|
log.debug("Python converter not configured");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try (TempFile inputFile = new TempFile(tempFileManager, ".cff");
|
||||||
|
TempFile outputFile = new TempFile(tempFileManager, ".otf");
|
||||||
|
TempFile toUnicodeFile = toUnicode != null ? new TempFile(tempFileManager, ".tounicode") : null) {
|
||||||
|
Files.write(inputFile.getPath(), fontBytes);
|
||||||
|
|
||||||
|
// Write ToUnicode CMap data if available
|
||||||
|
if (toUnicode != null && toUnicodeFile != null) {
|
||||||
|
byte[] toUnicodeBytes = Base64.getDecoder().decode(toUnicode);
|
||||||
|
Files.write(toUnicodeFile.getPath(), toUnicodeBytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
List<String> command = new ArrayList<>();
|
||||||
|
command.add(pythonCommand);
|
||||||
|
command.add(pythonScript);
|
||||||
|
command.add(inputFile.getAbsolutePath());
|
||||||
|
command.add(outputFile.getAbsolutePath());
|
||||||
|
// Add optional ToUnicode file path
|
||||||
|
if (toUnicodeFile != null) {
|
||||||
|
command.add(toUnicodeFile.getAbsolutePath());
|
||||||
|
}
|
||||||
|
|
||||||
|
ProcessBuilder builder = new ProcessBuilder(command);
|
||||||
|
builder.redirectErrorStream(true);
|
||||||
|
Process process = builder.start();
|
||||||
|
|
||||||
|
StringBuilder output = new StringBuilder();
|
||||||
|
Thread reader =
|
||||||
|
new Thread(
|
||||||
|
() -> {
|
||||||
|
try (BufferedReader br =
|
||||||
|
new BufferedReader(
|
||||||
|
new InputStreamReader(
|
||||||
|
process.getInputStream(),
|
||||||
|
StandardCharsets.UTF_8))) {
|
||||||
|
String line;
|
||||||
|
while ((line = br.readLine()) != null) {
|
||||||
|
output.append(line).append('\n');
|
||||||
|
}
|
||||||
|
} catch (IOException ignored) {
|
||||||
|
}
|
||||||
|
});
|
||||||
|
reader.start();
|
||||||
|
|
||||||
|
// Wait with timeout (Python fontTools is usually fast, but provide safety margin)
|
||||||
|
boolean finished = process.waitFor(30, TimeUnit.SECONDS);
|
||||||
|
if (!finished) {
|
||||||
|
process.destroyForcibly();
|
||||||
|
reader.interrupt();
|
||||||
|
log.warn(
|
||||||
|
"Python CFF→OTF wrapping timed out after 30 seconds - font may be corrupted");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
int exitCode = process.exitValue();
|
||||||
|
reader.join(5000);
|
||||||
|
|
||||||
|
if (exitCode == 0 && Files.exists(outputFile.getPath())) {
|
||||||
|
byte[] convertedBytes = Files.readAllBytes(outputFile.getPath());
|
||||||
|
if (convertedBytes.length > 0) {
|
||||||
|
String validationError = validateFontTables(convertedBytes);
|
||||||
|
if (validationError != null) {
|
||||||
|
log.warn("Python converter produced invalid font: {}", validationError);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Log Python script output for debugging
|
||||||
|
String outputStr = output.toString().trim();
|
||||||
|
if (!outputStr.isEmpty()) {
|
||||||
|
log.debug("Python script output: {}", outputStr);
|
||||||
|
}
|
||||||
|
|
||||||
|
log.debug(
|
||||||
|
"Python CFF→OTF wrapping successful: {} bytes → {} bytes",
|
||||||
|
fontBytes.length,
|
||||||
|
convertedBytes.length);
|
||||||
|
return convertedBytes;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
String outputStr = output.toString().trim();
|
||||||
|
if (!outputStr.isEmpty()) {
|
||||||
|
log.warn("Python CFF→OTF wrapping failed with exit code {}: {}", exitCode, outputStr);
|
||||||
|
} else {
|
||||||
|
log.warn("Python CFF→OTF wrapping failed with exit code {}", exitCode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (InterruptedException ex) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
log.debug("Python CFF conversion interrupted", ex);
|
||||||
|
} catch (IOException ex) {
|
||||||
|
log.debug("Python CFF conversion I/O error", ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] convertCffUsingFontForge(byte[] fontBytes) {
|
||||||
|
if (fontforgeCommand == null || fontforgeCommand.isBlank()) {
|
||||||
|
log.debug("FontForge converter not configured");
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -754,8 +956,18 @@ public class PdfJsonConversionService {
|
|||||||
command.add("-lang=ff");
|
command.add("-lang=ff");
|
||||||
command.add("-c");
|
command.add("-c");
|
||||||
command.add(
|
command.add(
|
||||||
"Open($1); SelectWorthOutputting(); SetFontOrder(2); Reencode(\"unicode\"); "
|
"Open($1); "
|
||||||
+ "Generate($2); Close(); Quit()");
|
+ "ScaleToEm(1000); " // Force 1000 units per em (standard for Type1)
|
||||||
|
+ "SelectWorthOutputting(); "
|
||||||
|
+ "SetFontOrder(2); "
|
||||||
|
+ "Reencode(\"unicode\"); "
|
||||||
|
+ "RoundToInt(); "
|
||||||
|
+ "RemoveOverlap(); "
|
||||||
|
+ "Simplify(); "
|
||||||
|
+ "CorrectDirection(); "
|
||||||
|
+ "Generate($2, \"\", 4+16+32); "
|
||||||
|
+ "Close(); "
|
||||||
|
+ "Quit()");
|
||||||
command.add(inputFile.getAbsolutePath());
|
command.add(inputFile.getAbsolutePath());
|
||||||
command.add(outputFile.getAbsolutePath());
|
command.add(outputFile.getAbsolutePath());
|
||||||
|
|
||||||
@ -780,11 +992,59 @@ public class PdfJsonConversionService {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
reader.start();
|
reader.start();
|
||||||
int exitCode = process.waitFor();
|
|
||||||
reader.join();
|
// Wait with timeout to prevent hanging on problematic fonts
|
||||||
|
boolean finished = process.waitFor(30, TimeUnit.SECONDS);
|
||||||
|
if (!finished) {
|
||||||
|
process.destroyForcibly();
|
||||||
|
reader.interrupt();
|
||||||
|
log.warn("FontForge conversion timed out after 30 seconds - font may be too complex or causing FontForge to hang");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
int exitCode = process.exitValue();
|
||||||
|
reader.join(5000); // Wait max 5 seconds for reader thread
|
||||||
|
|
||||||
if (exitCode == 0 && Files.exists(outputFile.getPath())) {
|
if (exitCode == 0 && Files.exists(outputFile.getPath())) {
|
||||||
return Files.readAllBytes(outputFile.getPath());
|
byte[] convertedBytes = Files.readAllBytes(outputFile.getPath());
|
||||||
|
if (convertedBytes.length > 0) {
|
||||||
|
// Basic validation: check for TrueType magic number and critical tables
|
||||||
|
if (convertedBytes.length >= 4) {
|
||||||
|
int magic =
|
||||||
|
((convertedBytes[0] & 0xFF) << 24)
|
||||||
|
| ((convertedBytes[1] & 0xFF) << 16)
|
||||||
|
| ((convertedBytes[2] & 0xFF) << 8)
|
||||||
|
| (convertedBytes[3] & 0xFF);
|
||||||
|
boolean validTrueType =
|
||||||
|
magic == 0x00010000 || magic == 0x74727565; // 1.0 or 'true'
|
||||||
|
boolean validOpenType = magic == 0x4F54544F; // 'OTTO'
|
||||||
|
|
||||||
|
if (validTrueType || validOpenType) {
|
||||||
|
// Additional validation: check unitsPerEm in head table
|
||||||
|
String validationError = validateFontTables(convertedBytes);
|
||||||
|
if (validationError != null) {
|
||||||
|
log.warn(
|
||||||
|
"FontForge produced invalid font: {}",
|
||||||
|
validationError);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
log.debug(
|
||||||
|
"FontForge CFF→TrueType conversion successful: {} bytes, magic: 0x{}, type: {}",
|
||||||
|
convertedBytes.length,
|
||||||
|
Integer.toHexString(magic),
|
||||||
|
validOpenType ? "OpenType" : "TrueType");
|
||||||
|
return convertedBytes;
|
||||||
|
} else {
|
||||||
|
log.warn(
|
||||||
|
"FontForge produced invalid font: magic number 0x{} (expected TrueType or OpenType)",
|
||||||
|
Integer.toHexString(magic));
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.warn("FontForge produced empty output file");
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
log.warn(
|
log.warn(
|
||||||
@ -801,6 +1061,127 @@ public class PdfJsonConversionService {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validates critical OpenType/TrueType font tables to ensure browser compatibility.
|
||||||
|
* @return Error message if invalid, null if valid
|
||||||
|
*/
|
||||||
|
private String validateFontTables(byte[] fontBytes) {
|
||||||
|
try {
|
||||||
|
if (fontBytes.length < 12) {
|
||||||
|
return "Font file too small";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read table directory
|
||||||
|
int numTables = ((fontBytes[4] & 0xFF) << 8) | (fontBytes[5] & 0xFF);
|
||||||
|
if (numTables == 0 || numTables > 100) {
|
||||||
|
return "Invalid table count: " + numTables;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find head table
|
||||||
|
int offset = 12; // Skip sfnt header
|
||||||
|
for (int i = 0; i < numTables && offset + 16 <= fontBytes.length; i++) {
|
||||||
|
String tag = new String(fontBytes, offset, 4, StandardCharsets.US_ASCII);
|
||||||
|
int tableOffset = ((fontBytes[offset + 8] & 0xFF) << 24)
|
||||||
|
| ((fontBytes[offset + 9] & 0xFF) << 16)
|
||||||
|
| ((fontBytes[offset + 10] & 0xFF) << 8)
|
||||||
|
| (fontBytes[offset + 11] & 0xFF);
|
||||||
|
int tableLength = ((fontBytes[offset + 12] & 0xFF) << 24)
|
||||||
|
| ((fontBytes[offset + 13] & 0xFF) << 16)
|
||||||
|
| ((fontBytes[offset + 14] & 0xFF) << 8)
|
||||||
|
| (fontBytes[offset + 15] & 0xFF);
|
||||||
|
|
||||||
|
if ("head".equals(tag)) {
|
||||||
|
if (tableOffset + 18 > fontBytes.length) {
|
||||||
|
return "head table truncated";
|
||||||
|
}
|
||||||
|
// Check unitsPerEm at offset 18 in head table
|
||||||
|
int unitsPerEm = ((fontBytes[tableOffset + 18] & 0xFF) << 8)
|
||||||
|
| (fontBytes[tableOffset + 19] & 0xFF);
|
||||||
|
if (unitsPerEm < 16 || unitsPerEm > 16384) {
|
||||||
|
return "Invalid unitsPerEm: " + unitsPerEm + " (must be 16-16384)";
|
||||||
|
}
|
||||||
|
return null; // Valid
|
||||||
|
}
|
||||||
|
offset += 16;
|
||||||
|
}
|
||||||
|
return "head table not found";
|
||||||
|
} catch (Exception ex) {
|
||||||
|
return "Validation error: " + ex.getMessage();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String buildUnicodeMapping(PDFont font, String toUnicodeBase64) throws IOException {
|
||||||
|
log.debug("buildUnicodeMapping called for font: {}, hasToUnicode: {}, isCID: {}",
|
||||||
|
font.getName(), toUnicodeBase64 != null, font instanceof PDType0Font);
|
||||||
|
|
||||||
|
if (toUnicodeBase64 == null || toUnicodeBase64.isBlank()) {
|
||||||
|
log.debug("No ToUnicode data for font: {}", font.getName());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For CID fonts (Type0), build complete CharCode→CID→GID→Unicode mapping
|
||||||
|
if (!(font instanceof PDType0Font type0Font)) {
|
||||||
|
// For non-CID fonts, just return ToUnicode as-is
|
||||||
|
log.debug("Non-CID font {}, returning raw ToUnicode", font.getName());
|
||||||
|
return toUnicodeBase64;
|
||||||
|
}
|
||||||
|
|
||||||
|
log.debug("Building JSON mapping for CID font: {}", font.getName());
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Build a map of CharCode → Unicode from ToUnicode
|
||||||
|
Map<Integer, Integer> charCodeToUnicode = new HashMap<>();
|
||||||
|
byte[] toUnicodeBytes = Base64.getDecoder().decode(toUnicodeBase64);
|
||||||
|
String toUnicodeStr = new String(toUnicodeBytes, StandardCharsets.UTF_8);
|
||||||
|
|
||||||
|
// Parse ToUnicode CMap for bfchar and bfrange
|
||||||
|
java.util.regex.Pattern bfcharPattern = java.util.regex.Pattern.compile("<([0-9A-Fa-f]+)>\\s*<([0-9A-Fa-f]+)>");
|
||||||
|
java.util.regex.Matcher matcher = bfcharPattern.matcher(toUnicodeStr);
|
||||||
|
while (matcher.find()) {
|
||||||
|
int charCode = Integer.parseInt(matcher.group(1), 16);
|
||||||
|
int unicode = Integer.parseInt(matcher.group(2), 16);
|
||||||
|
charCodeToUnicode.put(charCode, unicode);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build JSON mapping: CharCode → CID → GID → Unicode
|
||||||
|
StringBuilder json = new StringBuilder();
|
||||||
|
json.append("{\"isCID\":true,\"cidToGidIdentity\":true,\"entries\":[");
|
||||||
|
|
||||||
|
boolean first = true;
|
||||||
|
for (Map.Entry<Integer, Integer> entry : charCodeToUnicode.entrySet()) {
|
||||||
|
int charCode = entry.getKey();
|
||||||
|
int unicode = entry.getValue();
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Get CID from char code
|
||||||
|
int cid = type0Font.codeToCID(charCode);
|
||||||
|
// For Identity-H/V encoding, GID == CID
|
||||||
|
int gid = cid;
|
||||||
|
|
||||||
|
if (!first) {
|
||||||
|
json.append(",");
|
||||||
|
}
|
||||||
|
first = false;
|
||||||
|
json.append(String.format("{\"code\":%d,\"cid\":%d,\"gid\":%d,\"unicode\":%d}",
|
||||||
|
charCode, cid, gid, unicode));
|
||||||
|
} catch (Exception e) {
|
||||||
|
// Skip entries that fail to map
|
||||||
|
log.debug("Failed to map charCode {} in font {}: {}", charCode, font.getName(), e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
json.append("]}");
|
||||||
|
String jsonStr = json.toString();
|
||||||
|
log.debug("Built Unicode mapping for CID font {} with {} entries",
|
||||||
|
font.getName(), charCodeToUnicode.size());
|
||||||
|
return Base64.getEncoder().encodeToString(jsonStr.getBytes(StandardCharsets.UTF_8));
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.warn("Failed to build Unicode mapping for font {}: {}", font.getName(), e.getMessage());
|
||||||
|
return toUnicodeBase64; // Fall back to raw ToUnicode
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private PdfJsonFontCidSystemInfo extractCidSystemInfo(COSDictionary fontDictionary) {
|
private PdfJsonFontCidSystemInfo extractCidSystemInfo(COSDictionary fontDictionary) {
|
||||||
if (fontDictionary == null) {
|
if (fontDictionary == null) {
|
||||||
return null;
|
return null;
|
||||||
@ -824,7 +1205,7 @@ public class PdfJsonConversionService {
|
|||||||
return info;
|
return info;
|
||||||
}
|
}
|
||||||
|
|
||||||
private FontProgramData extractFontProgram(PDFont font) throws IOException {
|
private FontProgramData extractFontProgram(PDFont font, String toUnicode) throws IOException {
|
||||||
PDFontDescriptor descriptor = font.getFontDescriptor();
|
PDFontDescriptor descriptor = font.getFontDescriptor();
|
||||||
if (descriptor == null) {
|
if (descriptor == null) {
|
||||||
return null;
|
return null;
|
||||||
@ -833,24 +1214,24 @@ public class PdfJsonConversionService {
|
|||||||
PDStream fontFile3 = descriptor.getFontFile3();
|
PDStream fontFile3 = descriptor.getFontFile3();
|
||||||
if (fontFile3 != null) {
|
if (fontFile3 != null) {
|
||||||
String subtype = fontFile3.getCOSObject().getNameAsString(COSName.SUBTYPE);
|
String subtype = fontFile3.getCOSObject().getNameAsString(COSName.SUBTYPE);
|
||||||
return readFontProgram(fontFile3, subtype != null ? subtype : "fontfile3", false);
|
return readFontProgram(fontFile3, subtype != null ? subtype : "fontfile3", false, toUnicode);
|
||||||
}
|
}
|
||||||
|
|
||||||
PDStream fontFile2 = descriptor.getFontFile2();
|
PDStream fontFile2 = descriptor.getFontFile2();
|
||||||
if (fontFile2 != null) {
|
if (fontFile2 != null) {
|
||||||
return readFontProgram(fontFile2, null, true);
|
return readFontProgram(fontFile2, null, true, toUnicode);
|
||||||
}
|
}
|
||||||
|
|
||||||
PDStream fontFile = descriptor.getFontFile();
|
PDStream fontFile = descriptor.getFontFile();
|
||||||
if (fontFile != null) {
|
if (fontFile != null) {
|
||||||
return readFontProgram(fontFile, "type1", false);
|
return readFontProgram(fontFile, "type1", false, toUnicode);
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private FontProgramData readFontProgram(
|
private FontProgramData readFontProgram(
|
||||||
PDStream stream, String formatHint, boolean detectTrueType) throws IOException {
|
PDStream stream, String formatHint, boolean detectTrueType, String toUnicode) throws IOException {
|
||||||
try (InputStream inputStream = stream.createInputStream();
|
try (InputStream inputStream = stream.createInputStream();
|
||||||
ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||||
inputStream.transferTo(baos);
|
inputStream.transferTo(baos);
|
||||||
@ -859,8 +1240,21 @@ public class PdfJsonConversionService {
|
|||||||
if (detectTrueType) {
|
if (detectTrueType) {
|
||||||
format = detectTrueTypeFormat(data);
|
format = detectTrueTypeFormat(data);
|
||||||
}
|
}
|
||||||
|
String webBase64 = null;
|
||||||
|
String webFormat = null;
|
||||||
|
if (format != null && isCffFormat(format)) {
|
||||||
|
log.debug("Detected CFF font format: {}, wrapping as OpenType-CFF for web preview", format);
|
||||||
|
byte[] converted = convertCffProgramToTrueType(data, toUnicode);
|
||||||
|
if (converted != null && converted.length > 0) {
|
||||||
|
webBase64 = Base64.getEncoder().encodeToString(converted);
|
||||||
|
webFormat = "otf";
|
||||||
|
log.debug("CFF→OTF wrapping successful: {} bytes → {} bytes", data.length, converted.length);
|
||||||
|
} else {
|
||||||
|
log.debug("CFF→OTF wrapping returned null or empty result");
|
||||||
|
}
|
||||||
|
}
|
||||||
String base64 = Base64.getEncoder().encodeToString(data);
|
String base64 = Base64.getEncoder().encodeToString(data);
|
||||||
return new FontProgramData(base64, format);
|
return new FontProgramData(base64, format, webBase64, webFormat);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1759,8 +2153,12 @@ public class PdfJsonConversionService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
PDFont baseFont = primaryFont;
|
PDFont baseFont = primaryFont;
|
||||||
|
boolean fallbackApplied = primaryFont == null;
|
||||||
if (baseFont == null) {
|
if (baseFont == null) {
|
||||||
baseFont = ensureFallbackFont(document, fontMap, fontModels, FALLBACK_FONT_ID);
|
baseFont = ensureFallbackFont(document, fontMap, fontModels, FALLBACK_FONT_ID);
|
||||||
|
if (baseFont != null) {
|
||||||
|
fallbackApplied = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (baseFont == null) {
|
if (baseFont == null) {
|
||||||
log.warn("Unable to resolve a base font for text element; skipping text content");
|
log.warn("Unable to resolve a base font for text element; skipping text content");
|
||||||
@ -1777,6 +2175,7 @@ public class PdfJsonConversionService {
|
|||||||
PDFont targetFont = currentFont;
|
PDFont targetFont = currentFont;
|
||||||
|
|
||||||
if (!canEncode(baseFont, codePoint)) {
|
if (!canEncode(baseFont, codePoint)) {
|
||||||
|
fallbackApplied = true;
|
||||||
String fallbackId = resolveFallbackFontId(codePoint);
|
String fallbackId = resolveFallbackFontId(codePoint);
|
||||||
targetFont = ensureFallbackFont(document, fontMap, fontModels, fallbackId);
|
targetFont = ensureFallbackFont(document, fontMap, fontModels, fallbackId);
|
||||||
if (targetFont == null || !canEncode(targetFont, glyph)) {
|
if (targetFont == null || !canEncode(targetFont, glyph)) {
|
||||||
@ -1823,6 +2222,10 @@ public class PdfJsonConversionService {
|
|||||||
runs.add(new FontRun(currentFont, buffer.toString()));
|
runs.add(new FontRun(currentFont, buffer.toString()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (fallbackApplied) {
|
||||||
|
element.setFallbackUsed(Boolean.TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
return runs;
|
return runs;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2019,10 +2422,14 @@ public class PdfJsonConversionService {
|
|||||||
private static class FontProgramData {
|
private static class FontProgramData {
|
||||||
private final String base64;
|
private final String base64;
|
||||||
private final String format;
|
private final String format;
|
||||||
|
private final String webBase64;
|
||||||
|
private final String webFormat;
|
||||||
|
|
||||||
private FontProgramData(String base64, String format) {
|
private FontProgramData(String base64, String format, String webBase64, String webFormat) {
|
||||||
this.base64 = base64;
|
this.base64 = base64;
|
||||||
this.format = format;
|
this.format = format;
|
||||||
|
this.webBase64 = webBase64;
|
||||||
|
this.webFormat = webFormat;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getBase64() {
|
private String getBase64() {
|
||||||
@ -2032,6 +2439,14 @@ public class PdfJsonConversionService {
|
|||||||
private String getFormat() {
|
private String getFormat() {
|
||||||
return format;
|
return format;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String getWebBase64() {
|
||||||
|
return webBase64;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getWebFormat() {
|
||||||
|
return webFormat;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final class PreflightResult {
|
private static final class PreflightResult {
|
||||||
@ -2371,46 +2786,106 @@ public class PdfJsonConversionService {
|
|||||||
return loadFallbackPdfFont(document);
|
return loadFallbackPdfFont(document);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IMPORTANT: Dictionary restoration is disabled because deserialized dictionaries
|
||||||
|
// don't properly include the font stream references (FontFile/FontFile2/FontFile3).
|
||||||
|
// This results in fonts that structurally exist but can't encode glyphs, causing
|
||||||
|
// fallback to NotoSans. Instead, we ALWAYS use program bytes for reliable encoding.
|
||||||
|
// The cosDictionary field is preserved in the JSON for potential future use, but
|
||||||
|
// for now we rely on direct font program loading.
|
||||||
|
if (false && fontModel.getCosDictionary() != null) {
|
||||||
|
// Dictionary restoration code kept for reference but disabled
|
||||||
|
COSBase restored = deserializeCosValue(fontModel.getCosDictionary(), document);
|
||||||
|
if (restored instanceof COSDictionary cosDictionary) {
|
||||||
|
try {
|
||||||
|
PDFont font = PDFontFactory.createFont(cosDictionary);
|
||||||
|
if (font != null && font.isEmbedded()) {
|
||||||
|
// Verify font can actually encode a basic character
|
||||||
|
try {
|
||||||
|
font.encode("A");
|
||||||
|
applyAdditionalFontMetadata(document, font, fontModel);
|
||||||
|
log.debug("Successfully restored embedded font {} from dictionary", fontModel.getId());
|
||||||
|
return font;
|
||||||
|
} catch (IOException | IllegalArgumentException encodingEx) {
|
||||||
|
log.warn(
|
||||||
|
"Font {} restored from dictionary but failed encoding test: {}; falling back to program bytes",
|
||||||
|
fontModel.getId(),
|
||||||
|
encodingEx.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException ex) {
|
||||||
|
log.warn(
|
||||||
|
"Failed to restore font {} from stored dictionary: {}; falling back to program bytes",
|
||||||
|
fontModel.getId(),
|
||||||
|
ex.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
byte[] fontBytes = null;
|
||||||
|
String format = null;
|
||||||
|
|
||||||
|
// For CFF/Type1C fonts, prefer the webProgram (converted TrueType) because:
|
||||||
|
// 1. PDFBox's PDType0Font.load() expects TrueType/OpenType format
|
||||||
|
// 2. Raw CFF program bytes lack the descriptor context needed for reconstruction
|
||||||
|
// 3. FontForge-converted TrueType is reliable for both web preview and PDF export
|
||||||
|
String originalFormat =
|
||||||
|
fontModel.getProgramFormat() != null
|
||||||
|
? fontModel.getProgramFormat().toLowerCase(Locale.ROOT)
|
||||||
|
: null;
|
||||||
|
// For JSON→PDF conversion, always use original font bytes
|
||||||
|
// (PDFBox doesn't support OpenType-CFF; webProgram is only for frontend web preview)
|
||||||
String program = fontModel.getProgram();
|
String program = fontModel.getProgram();
|
||||||
if (program != null && !program.isBlank()) {
|
if (program != null && !program.isBlank()) {
|
||||||
byte[] fontBytes = Base64.getDecoder().decode(program);
|
fontBytes = Base64.getDecoder().decode(program);
|
||||||
String format =
|
format = originalFormat;
|
||||||
fontModel.getProgramFormat() != null
|
log.debug("Using original font program for {} (format: {})", fontModel.getId(), originalFormat);
|
||||||
? fontModel.getProgramFormat().toLowerCase(Locale.ROOT)
|
} else if (fontModel.getWebProgram() != null && !fontModel.getWebProgram().isBlank()) {
|
||||||
: "";
|
// Fallback to webProgram if original program is unavailable
|
||||||
|
fontBytes = Base64.getDecoder().decode(fontModel.getWebProgram());
|
||||||
|
format =
|
||||||
|
fontModel.getWebProgramFormat() != null
|
||||||
|
? fontModel.getWebProgramFormat().toLowerCase(Locale.ROOT)
|
||||||
|
: null;
|
||||||
|
log.debug("Using web-optimized font program for {} (original program unavailable)", fontModel.getId());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fontBytes != null && fontBytes.length > 0) {
|
||||||
try {
|
try {
|
||||||
if (isCffFormat(format)) {
|
|
||||||
byte[] converted = convertCffProgramToTrueType(fontBytes);
|
|
||||||
if (converted != null) {
|
|
||||||
fontBytes = converted;
|
|
||||||
format = "ttf";
|
|
||||||
log.debug(
|
|
||||||
"Converted CFF font {} to TrueType outlines for embedding",
|
|
||||||
fontModel.getId());
|
|
||||||
} else {
|
|
||||||
log.debug(
|
|
||||||
"Unable to convert CFF font {} to TrueType; attempting direct load",
|
|
||||||
fontModel.getId());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (isType1Format(format)) {
|
if (isType1Format(format)) {
|
||||||
try (InputStream stream = new ByteArrayInputStream(fontBytes)) {
|
try (InputStream stream = new ByteArrayInputStream(fontBytes)) {
|
||||||
PDFont font = new PDType1Font(document, stream);
|
PDFont font = new PDType1Font(document, stream);
|
||||||
applyAdditionalFontMetadata(document, font, fontModel);
|
applyAdditionalFontMetadata(document, font, fontModel);
|
||||||
|
log.debug(
|
||||||
|
"Successfully loaded Type1 font {} from program bytes (format: {}, originalFormat: {})",
|
||||||
|
fontModel.getId(),
|
||||||
|
format,
|
||||||
|
originalFormat);
|
||||||
return font;
|
return font;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
try (InputStream stream = new ByteArrayInputStream(fontBytes)) {
|
try (InputStream stream = new ByteArrayInputStream(fontBytes)) {
|
||||||
PDFont font = PDType0Font.load(document, stream, true);
|
PDFont font = PDType0Font.load(document, stream, true);
|
||||||
applyAdditionalFontMetadata(document, font, fontModel);
|
applyAdditionalFontMetadata(document, font, fontModel);
|
||||||
|
log.debug(
|
||||||
|
"Successfully loaded Type0 font {} from program bytes (format: {}, originalFormat: {})",
|
||||||
|
fontModel.getId(),
|
||||||
|
format,
|
||||||
|
originalFormat);
|
||||||
return font;
|
return font;
|
||||||
}
|
}
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
log.debug(
|
log.warn(
|
||||||
"Unable to load embedded font program for {}: {}",
|
"Unable to load embedded font program for {} (format: {}, originalFormat: {}): {}; falling back to Standard 14 or default",
|
||||||
fontModel.getId(),
|
fontModel.getId(),
|
||||||
|
format,
|
||||||
|
originalFormat,
|
||||||
ex.getMessage());
|
ex.getMessage());
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
log.warn(
|
||||||
|
"Font {} has no program bytes available (originalFormat: {})",
|
||||||
|
fontModel.getId(),
|
||||||
|
originalFormat);
|
||||||
}
|
}
|
||||||
|
|
||||||
String standardName = fontModel.getStandard14Name();
|
String standardName = fontModel.getStandard14Name();
|
||||||
|
|||||||
@ -173,9 +173,12 @@ stirling:
|
|||||||
fallback-font: classpath:/static/fonts/NotoSans-Regular.ttf # Override to point at a custom fallback font
|
fallback-font: classpath:/static/fonts/NotoSans-Regular.ttf # Override to point at a custom fallback font
|
||||||
json:
|
json:
|
||||||
font-normalization:
|
font-normalization:
|
||||||
enabled: true # Run Ghostscript preflight to normalize fonts before PDF→JSON
|
enabled: false # IMPORTANT: Disable to preserve ToUnicode CMaps for correct font rendering. Ghostscript strips Unicode mappings from CID fonts.
|
||||||
cff-converter:
|
cff-converter:
|
||||||
enabled: true # Attempt to transcode CFF/Type1C programs to OTF using FontForge when available
|
enabled: true # Wrap CFF/Type1C fonts as OpenType-CFF for browser compatibility
|
||||||
|
method: python # Converter method: 'python' (fontTools, recommended - wraps as OTF), 'fontforge' (legacy - converts to TTF, may hang on CID fonts)
|
||||||
|
python-command: /opt/venv/bin/python3 # Python interpreter path
|
||||||
|
python-script: /scripts/convert_cff_to_ttf.py # Path to font wrapping script
|
||||||
fontforge-command: fontforge # Override if FontForge is installed under a different name/path
|
fontforge-command: fontforge # Override if FontForge is installed under a different name/path
|
||||||
|
|
||||||
ui:
|
ui:
|
||||||
|
|||||||
@ -5,10 +5,6 @@ services:
|
|||||||
dockerfile: docker/backend/Dockerfile
|
dockerfile: docker/backend/Dockerfile
|
||||||
container_name: stirling-pdf-backend
|
container_name: stirling-pdf-backend
|
||||||
restart: on-failure:5
|
restart: on-failure:5
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
memory: 4G
|
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "curl -f http://localhost:8080/api/v1/info/status | grep -q 'UP'"]
|
test: ["CMD-SHELL", "curl -f http://localhost:8080/api/v1/info/status | grep -q 'UP'"]
|
||||||
interval: 5s
|
interval: 5s
|
||||||
|
|||||||
@ -95,8 +95,9 @@ const decodeBase64ToUint8Array = (value: string): Uint8Array => {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const buildFontFamilyName = (font: PdfJsonFont): string => {
|
const buildFontFamilyName = (font: PdfJsonFont): string => {
|
||||||
const base = (font.uid ?? font.id ?? 'font').toString();
|
const preferred = (font.baseName ?? '').trim();
|
||||||
return `pdf-font-${base.replace(/[^a-zA-Z0-9_-]/g, '')}`;
|
const identifier = preferred.length > 0 ? preferred : (font.uid ?? font.id ?? 'font').toString();
|
||||||
|
return `pdf-font-${identifier.replace(/[^a-zA-Z0-9_-]/g, '')}`;
|
||||||
};
|
};
|
||||||
|
|
||||||
const getCaretOffset = (element: HTMLElement): number => {
|
const getCaretOffset = (element: HTMLElement): number => {
|
||||||
@ -313,18 +314,34 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
|
|
||||||
const next = new Map<string, string>();
|
const next = new Map<string, string>();
|
||||||
for (const font of fonts) {
|
for (const font of fonts) {
|
||||||
if (!font?.id || !font.program) {
|
if (!font?.id) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const programSource = font.webProgram && font.webProgram.length > 0 ? font.webProgram : font.program;
|
||||||
|
if (!programSource) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
const format = normalizeFontFormat(font.programFormat);
|
const formatSource = font.webProgram && font.webProgram.length > 0 ? font.webProgramFormat : font.programFormat;
|
||||||
const data = decodeBase64ToUint8Array(font.program);
|
const format = normalizeFontFormat(formatSource);
|
||||||
|
const data = decodeBase64ToUint8Array(programSource);
|
||||||
const blob = new Blob([data as BlobPart], { type: getFontMimeType(format) });
|
const blob = new Blob([data as BlobPart], { type: getFontMimeType(format) });
|
||||||
const url = URL.createObjectURL(blob);
|
const url = URL.createObjectURL(blob);
|
||||||
const formatHint = getFontFormatHint(format);
|
const formatHint = getFontFormatHint(format);
|
||||||
const familyName = buildFontFamilyName(font);
|
const familyName = buildFontFamilyName(font);
|
||||||
const source = formatHint ? `url(${url}) format('${formatHint}')` : `url(${url})`;
|
const source = formatHint ? `url(${url}) format('${formatHint}')` : `url(${url})`;
|
||||||
const fontFace = new FontFace(familyName, source);
|
const fontFace = new FontFace(familyName, source);
|
||||||
|
|
||||||
|
console.debug(`[FontLoader] Loading font ${font.id} (${font.baseName}):`, {
|
||||||
|
formatSource,
|
||||||
|
format,
|
||||||
|
formatHint,
|
||||||
|
familyName,
|
||||||
|
dataLength: data.length,
|
||||||
|
hasWebProgram: !!font.webProgram,
|
||||||
|
hasProgram: !!font.program
|
||||||
|
});
|
||||||
|
|
||||||
await fontFace.load();
|
await fontFace.load();
|
||||||
if (disposed) {
|
if (disposed) {
|
||||||
document.fonts.delete(fontFace);
|
document.fonts.delete(fontFace);
|
||||||
@ -334,8 +351,14 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
document.fonts.add(fontFace);
|
document.fonts.add(fontFace);
|
||||||
active.push({ fontFace, url });
|
active.push({ fontFace, url });
|
||||||
next.set(font.id, familyName);
|
next.set(font.id, familyName);
|
||||||
|
console.debug(`[FontLoader] Successfully loaded font ${font.id}`);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// Silently ignore font loading failures - embedded PDF fonts often lack web font tables
|
console.warn(`[FontLoader] Failed to load font ${font.id} (${font.baseName}):`, {
|
||||||
|
error: error instanceof Error ? error.message : String(error),
|
||||||
|
formatSource: font.webProgram && font.webProgram.length > 0 ? font.webProgramFormat : font.programFormat,
|
||||||
|
hasWebProgram: !!font.webProgram,
|
||||||
|
hasProgram: !!font.program
|
||||||
|
});
|
||||||
// Fallback to web-safe fonts is already implemented via getFontFamily()
|
// Fallback to web-safe fonts is already implemented via getFontFamily()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -776,7 +799,8 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
const fontFamily = getFontFamily(group.fontId);
|
const fontFamily = getFontFamily(group.fontId);
|
||||||
const lineHeightPx = getLineHeightPx(group.fontId, fontSizePx);
|
const lineHeightPx = getLineHeightPx(group.fontId, fontSizePx);
|
||||||
const lineHeightRatio = fontSizePx > 0 ? Math.max(lineHeightPx / fontSizePx, 1.05) : 1.2;
|
const lineHeightRatio = fontSizePx > 0 ? Math.max(lineHeightPx / fontSizePx, 1.05) : 1.2;
|
||||||
const hasRotation = group.rotation != null && Math.abs(group.rotation) > 0.5;
|
const rotation = group.rotation ?? 0;
|
||||||
|
const hasRotation = Math.abs(rotation) > 0.5;
|
||||||
const baselineLength = group.baselineLength ?? Math.max(group.bounds.right - group.bounds.left, 0);
|
const baselineLength = group.baselineLength ?? Math.max(group.bounds.right - group.bounds.left, 0);
|
||||||
|
|
||||||
let containerLeft = bounds.left;
|
let containerLeft = bounds.left;
|
||||||
@ -795,7 +819,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
containerHeight = Math.max(lineHeightPx, fontSizePx * lineHeightRatio);
|
containerHeight = Math.max(lineHeightPx, fontSizePx * lineHeightRatio);
|
||||||
transformOrigin = 'left bottom';
|
transformOrigin = 'left bottom';
|
||||||
// Negate rotation because Y-axis is flipped from PDF to web coordinates
|
// Negate rotation because Y-axis is flipped from PDF to web coordinates
|
||||||
transform = `rotate(${-group.rotation}deg)`;
|
transform = `rotate(${-rotation}deg)`;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract styling from group
|
// Extract styling from group
|
||||||
|
|||||||
@ -9,6 +9,14 @@ export interface PdfJsonTextColor {
|
|||||||
components?: number[] | null;
|
components?: number[] | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface PdfJsonCosValue {
|
||||||
|
type?: string | null;
|
||||||
|
value?: unknown;
|
||||||
|
items?: PdfJsonCosValue[] | null;
|
||||||
|
entries?: Record<string, PdfJsonCosValue | null> | null;
|
||||||
|
stream?: PdfJsonStream | null;
|
||||||
|
}
|
||||||
|
|
||||||
export interface PdfJsonFont {
|
export interface PdfJsonFont {
|
||||||
id?: string;
|
id?: string;
|
||||||
pageNumber?: number | null;
|
pageNumber?: number | null;
|
||||||
@ -20,6 +28,8 @@ export interface PdfJsonFont {
|
|||||||
embedded?: boolean | null;
|
embedded?: boolean | null;
|
||||||
program?: string | null;
|
program?: string | null;
|
||||||
programFormat?: string | null;
|
programFormat?: string | null;
|
||||||
|
webProgram?: string | null;
|
||||||
|
webProgramFormat?: string | null;
|
||||||
toUnicode?: string | null;
|
toUnicode?: string | null;
|
||||||
standard14Name?: string | null;
|
standard14Name?: string | null;
|
||||||
fontDescriptorFlags?: number | null;
|
fontDescriptorFlags?: number | null;
|
||||||
@ -29,6 +39,7 @@ export interface PdfJsonFont {
|
|||||||
xHeight?: number | null;
|
xHeight?: number | null;
|
||||||
italicAngle?: number | null;
|
italicAngle?: number | null;
|
||||||
unitsPerEm?: number | null;
|
unitsPerEm?: number | null;
|
||||||
|
cosDictionary?: PdfJsonCosValue | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface PdfJsonTextElement {
|
export interface PdfJsonTextElement {
|
||||||
@ -52,6 +63,7 @@ export interface PdfJsonTextElement {
|
|||||||
textMatrix?: number[] | null;
|
textMatrix?: number[] | null;
|
||||||
fillColor?: PdfJsonTextColor | null;
|
fillColor?: PdfJsonTextColor | null;
|
||||||
strokeColor?: PdfJsonTextColor | null;
|
strokeColor?: PdfJsonTextColor | null;
|
||||||
|
fallbackUsed?: boolean | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface PdfJsonImageElement {
|
export interface PdfJsonImageElement {
|
||||||
|
|||||||
@ -16,6 +16,48 @@ const MIN_CHAR_WIDTH_FACTOR = 0.35;
|
|||||||
const MAX_CHAR_WIDTH_FACTOR = 1.25;
|
const MAX_CHAR_WIDTH_FACTOR = 1.25;
|
||||||
const EXTRA_GAP_RATIO = 0.8;
|
const EXTRA_GAP_RATIO = 0.8;
|
||||||
|
|
||||||
|
type FontMetrics = {
|
||||||
|
unitsPerEm: number;
|
||||||
|
ascent: number;
|
||||||
|
descent: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
type FontMetricsMap = Map<string, FontMetrics>;
|
||||||
|
|
||||||
|
const countGraphemes = (text: string): number => {
|
||||||
|
if (!text) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return Array.from(text).length;
|
||||||
|
};
|
||||||
|
|
||||||
|
const metricsFor = (metrics: FontMetricsMap | undefined, fontId?: string | null): FontMetrics | undefined => {
|
||||||
|
if (!metrics || !fontId) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
return metrics.get(fontId) ?? undefined;
|
||||||
|
};
|
||||||
|
|
||||||
|
const buildFontMetrics = (document: PdfJsonDocument | null | undefined): FontMetricsMap => {
|
||||||
|
const metrics: FontMetricsMap = new Map();
|
||||||
|
document?.fonts?.forEach((font) => {
|
||||||
|
if (!font) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const unitsPerEm = font.unitsPerEm && font.unitsPerEm > 0 ? font.unitsPerEm : 1000;
|
||||||
|
const ascent = font.ascent ?? unitsPerEm * 0.8;
|
||||||
|
const descent = font.descent ?? -(unitsPerEm * 0.2);
|
||||||
|
const metric: FontMetrics = { unitsPerEm, ascent, descent };
|
||||||
|
if (font.id) {
|
||||||
|
metrics.set(font.id, metric);
|
||||||
|
}
|
||||||
|
if (font.uid) {
|
||||||
|
metrics.set(font.uid, metric);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return metrics;
|
||||||
|
};
|
||||||
|
|
||||||
export const valueOr = (value: number | null | undefined, fallback = 0): number => {
|
export const valueOr = (value: number | null | undefined, fallback = 0): number => {
|
||||||
if (value === null || value === undefined || Number.isNaN(value)) {
|
if (value === null || value === undefined || Number.isNaN(value)) {
|
||||||
return fallback;
|
return fallback;
|
||||||
@ -47,37 +89,87 @@ const getX = (element: PdfJsonTextElement): number => {
|
|||||||
return valueOr(element.x);
|
return valueOr(element.x);
|
||||||
};
|
};
|
||||||
|
|
||||||
const getWidth = (element: PdfJsonTextElement): number => {
|
const getWidth = (element: PdfJsonTextElement, metrics?: FontMetricsMap): number => {
|
||||||
const width = valueOr(element.width, 0);
|
const width = valueOr(element.width, 0);
|
||||||
if (width === 0 && element.text) {
|
if (width > 0) {
|
||||||
const fontSize = valueOr(element.fontSize, 12);
|
return width;
|
||||||
return fontSize * Math.max(element.text.length * 0.45, 0.5);
|
|
||||||
}
|
}
|
||||||
return width;
|
|
||||||
|
const text = element.text ?? '';
|
||||||
|
const glyphCount = Math.max(1, countGraphemes(text));
|
||||||
|
const spacingFallback = Math.max(
|
||||||
|
valueOr(element.spaceWidth, 0),
|
||||||
|
valueOr(element.wordSpacing, 0),
|
||||||
|
valueOr(element.characterSpacing, 0),
|
||||||
|
);
|
||||||
|
|
||||||
|
if (spacingFallback > 0 && text.trim().length === 0) {
|
||||||
|
return spacingFallback;
|
||||||
|
}
|
||||||
|
|
||||||
|
const fontSize = getFontSize(element);
|
||||||
|
const fontMetrics = metricsFor(metrics, element.fontId);
|
||||||
|
if (fontMetrics) {
|
||||||
|
const unitsPerEm = fontMetrics.unitsPerEm > 0 ? fontMetrics.unitsPerEm : 1000;
|
||||||
|
const ascentUnits = fontMetrics.ascent ?? unitsPerEm * 0.8;
|
||||||
|
const descentUnits = Math.abs(fontMetrics.descent ?? -(unitsPerEm * 0.2));
|
||||||
|
const combinedUnits = Math.max(unitsPerEm * 0.8, ascentUnits + descentUnits);
|
||||||
|
const averageAdvanceUnits = Math.max(unitsPerEm * 0.5, combinedUnits / Math.max(1, glyphCount));
|
||||||
|
const fallbackWidth = (averageAdvanceUnits / unitsPerEm) * glyphCount * fontSize;
|
||||||
|
if (fallbackWidth > 0) {
|
||||||
|
return fallbackWidth;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return fontSize * glyphCount * 0.5;
|
||||||
};
|
};
|
||||||
|
|
||||||
const getFontSize = (element: PdfJsonTextElement): number => valueOr(element.fontMatrixSize ?? element.fontSize, 12);
|
const getFontSize = (element: PdfJsonTextElement): number => valueOr(element.fontMatrixSize ?? element.fontSize, 12);
|
||||||
|
|
||||||
const getHeight = (element: PdfJsonTextElement): number => {
|
const getHeight = (element: PdfJsonTextElement, metrics?: FontMetricsMap): number => {
|
||||||
const height = valueOr(element.height);
|
const height = valueOr(element.height, 0);
|
||||||
if (height === 0) {
|
if (height > 0) {
|
||||||
return getFontSize(element) * 1.05;
|
return height;
|
||||||
}
|
}
|
||||||
return height;
|
const fontSize = getFontSize(element);
|
||||||
|
const fontMetrics = metricsFor(metrics, element.fontId);
|
||||||
|
if (fontMetrics) {
|
||||||
|
const unitsPerEm = fontMetrics.unitsPerEm > 0 ? fontMetrics.unitsPerEm : 1000;
|
||||||
|
const ascentUnits = fontMetrics.ascent ?? unitsPerEm * 0.8;
|
||||||
|
const descentUnits = Math.abs(fontMetrics.descent ?? -(unitsPerEm * 0.2));
|
||||||
|
const totalUnits = Math.max(unitsPerEm, ascentUnits + descentUnits);
|
||||||
|
if (totalUnits > 0) {
|
||||||
|
return (totalUnits / unitsPerEm) * fontSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return fontSize;
|
||||||
};
|
};
|
||||||
|
|
||||||
const getElementBounds = (element: PdfJsonTextElement): BoundingBox => {
|
const getElementBounds = (
|
||||||
|
element: PdfJsonTextElement,
|
||||||
|
metrics?: FontMetricsMap,
|
||||||
|
): BoundingBox => {
|
||||||
const left = getX(element);
|
const left = getX(element);
|
||||||
const width = getWidth(element);
|
const width = getWidth(element, metrics);
|
||||||
const baseline = getBaseline(element);
|
const baseline = getBaseline(element);
|
||||||
const height = getHeight(element);
|
const height = getHeight(element, metrics);
|
||||||
// In PDF coordinates, baseline is where text sits
|
|
||||||
// Typical typography: ~80% of height above baseline (ascenders), ~20% below (descenders)
|
let ascentRatio = 0.8;
|
||||||
// Using codebase's inverted naming: bottom (visual top) > top (visual bottom)
|
let descentRatio = 0.2;
|
||||||
const ascent = height * 0.8;
|
const fontMetrics = metricsFor(metrics, element.fontId);
|
||||||
const descent = height * 0.2;
|
if (fontMetrics) {
|
||||||
const bottom = baseline + ascent; // Visual top of text
|
const unitsPerEm = fontMetrics.unitsPerEm > 0 ? fontMetrics.unitsPerEm : 1000;
|
||||||
const top = baseline - descent; // Visual bottom (includes descenders)
|
const ascentUnits = fontMetrics.ascent ?? unitsPerEm * 0.8;
|
||||||
|
const descentUnits = Math.abs(fontMetrics.descent ?? -(unitsPerEm * 0.2));
|
||||||
|
const totalUnits = Math.max(unitsPerEm, ascentUnits + descentUnits);
|
||||||
|
if (totalUnits > 0) {
|
||||||
|
ascentRatio = ascentUnits / totalUnits;
|
||||||
|
descentRatio = descentUnits / totalUnits;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const bottom = baseline + height * ascentRatio;
|
||||||
|
const top = baseline - height * descentRatio;
|
||||||
return {
|
return {
|
||||||
left,
|
left,
|
||||||
right: left + width,
|
right: left + width,
|
||||||
@ -114,8 +206,12 @@ const getSpacingHint = (element: PdfJsonTextElement): number => {
|
|||||||
return Math.max(characterSpacing, 0);
|
return Math.max(characterSpacing, 0);
|
||||||
};
|
};
|
||||||
|
|
||||||
const estimateCharWidth = (element: PdfJsonTextElement, avgFontSize: number): number => {
|
const estimateCharWidth = (
|
||||||
const rawWidth = getWidth(element);
|
element: PdfJsonTextElement,
|
||||||
|
avgFontSize: number,
|
||||||
|
metrics?: FontMetricsMap,
|
||||||
|
): number => {
|
||||||
|
const rawWidth = getWidth(element, metrics);
|
||||||
const minWidth = avgFontSize * MIN_CHAR_WIDTH_FACTOR;
|
const minWidth = avgFontSize * MIN_CHAR_WIDTH_FACTOR;
|
||||||
const maxWidth = avgFontSize * MAX_CHAR_WIDTH_FACTOR;
|
const maxWidth = avgFontSize * MAX_CHAR_WIDTH_FACTOR;
|
||||||
return Math.min(Math.max(rawWidth, minWidth), maxWidth);
|
return Math.min(Math.max(rawWidth, minWidth), maxWidth);
|
||||||
@ -136,12 +232,16 @@ const mergeBounds = (bounds: BoundingBox[]): BoundingBox => {
|
|||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
const shouldInsertSpace = (prev: PdfJsonTextElement, current: PdfJsonTextElement): boolean => {
|
const shouldInsertSpace = (
|
||||||
const prevRight = getX(prev) + getWidth(prev);
|
prev: PdfJsonTextElement,
|
||||||
|
current: PdfJsonTextElement,
|
||||||
|
metrics?: FontMetricsMap,
|
||||||
|
): boolean => {
|
||||||
|
const prevRight = getX(prev) + getWidth(prev, metrics);
|
||||||
const trailingGap = Math.max(0, getX(current) - prevRight);
|
const trailingGap = Math.max(0, getX(current) - prevRight);
|
||||||
const avgFontSize = (getFontSize(prev) + getFontSize(current)) / 2;
|
const avgFontSize = (getFontSize(prev) + getFontSize(current)) / 2;
|
||||||
const baselineAdvance = Math.max(0, getX(current) - getX(prev));
|
const baselineAdvance = Math.max(0, getX(current) - getX(prev));
|
||||||
const charWidthEstimate = estimateCharWidth(prev, avgFontSize);
|
const charWidthEstimate = estimateCharWidth(prev, avgFontSize, metrics);
|
||||||
const inferredGap = Math.max(0, baselineAdvance - charWidthEstimate);
|
const inferredGap = Math.max(0, baselineAdvance - charWidthEstimate);
|
||||||
const spacingHint = Math.max(
|
const spacingHint = Math.max(
|
||||||
SPACE_MIN_GAP,
|
SPACE_MIN_GAP,
|
||||||
@ -166,7 +266,7 @@ const shouldInsertSpace = (prev: PdfJsonTextElement, current: PdfJsonTextElement
|
|||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
|
||||||
const buildGroupText = (elements: PdfJsonTextElement[]): string => {
|
const buildGroupText = (elements: PdfJsonTextElement[], metrics?: FontMetricsMap): string => {
|
||||||
let result = '';
|
let result = '';
|
||||||
elements.forEach((element, index) => {
|
elements.forEach((element, index) => {
|
||||||
const value = element.text ?? '';
|
const value = element.text ?? '';
|
||||||
@ -176,7 +276,7 @@ const buildGroupText = (elements: PdfJsonTextElement[]): string => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const previous = elements[index - 1];
|
const previous = elements[index - 1];
|
||||||
const needsSpace = shouldInsertSpace(previous, element);
|
const needsSpace = shouldInsertSpace(previous, element, metrics);
|
||||||
const startsWithWhitespace = /^\s/u.test(value);
|
const startsWithWhitespace = /^\s/u.test(value);
|
||||||
|
|
||||||
if (needsSpace && !startsWithWhitespace) {
|
if (needsSpace && !startsWithWhitespace) {
|
||||||
@ -314,21 +414,24 @@ const getAnchorPoint = (element: PdfJsonTextElement): { x: number; y: number } =
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
const computeBaselineLength = (elements: PdfJsonTextElement[]): number =>
|
const computeBaselineLength = (
|
||||||
elements.reduce((acc, current) => acc + getWidth(current), 0);
|
elements: PdfJsonTextElement[],
|
||||||
|
metrics?: FontMetricsMap,
|
||||||
|
): number => elements.reduce((acc, current) => acc + getWidth(current, metrics), 0);
|
||||||
|
|
||||||
const createGroup = (
|
const createGroup = (
|
||||||
pageIndex: number,
|
pageIndex: number,
|
||||||
idSuffix: number,
|
idSuffix: number,
|
||||||
elements: PdfJsonTextElement[],
|
elements: PdfJsonTextElement[],
|
||||||
|
metrics?: FontMetricsMap,
|
||||||
): TextGroup => {
|
): TextGroup => {
|
||||||
const clones = elements.map(cloneTextElement);
|
const clones = elements.map(cloneTextElement);
|
||||||
const originalClones = clones.map(cloneTextElement);
|
const originalClones = clones.map(cloneTextElement);
|
||||||
const bounds = mergeBounds(elements.map(getElementBounds));
|
const bounds = mergeBounds(elements.map((element) => getElementBounds(element, metrics)));
|
||||||
const firstElement = elements[0];
|
const firstElement = elements[0];
|
||||||
const rotation = computeGroupRotation(elements);
|
const rotation = computeGroupRotation(elements);
|
||||||
const anchor = rotation !== null ? getAnchorPoint(firstElement) : null;
|
const anchor = rotation !== null ? getAnchorPoint(firstElement) : null;
|
||||||
const baselineLength = computeBaselineLength(elements);
|
const baselineLength = computeBaselineLength(elements, metrics);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
id: `${pageIndex}-${idSuffix}`,
|
id: `${pageIndex}-${idSuffix}`,
|
||||||
@ -343,13 +446,17 @@ const createGroup = (
|
|||||||
baselineLength,
|
baselineLength,
|
||||||
elements: clones,
|
elements: clones,
|
||||||
originalElements: originalClones,
|
originalElements: originalClones,
|
||||||
text: buildGroupText(elements),
|
text: buildGroupText(elements, metrics),
|
||||||
originalText: buildGroupText(elements),
|
originalText: buildGroupText(elements, metrics),
|
||||||
bounds,
|
bounds,
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
export const groupPageTextElements = (page: PdfJsonPage | null | undefined, pageIndex: number): TextGroup[] => {
|
export const groupPageTextElements = (
|
||||||
|
page: PdfJsonPage | null | undefined,
|
||||||
|
pageIndex: number,
|
||||||
|
metrics?: FontMetricsMap,
|
||||||
|
): TextGroup[] => {
|
||||||
if (!page?.textElements || page.textElements.length === 0) {
|
if (!page?.textElements || page.textElements.length === 0) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
@ -393,7 +500,7 @@ export const groupPageTextElements = (page: PdfJsonPage | null | undefined, page
|
|||||||
}
|
}
|
||||||
|
|
||||||
const previous = currentBucket[currentBucket.length - 1];
|
const previous = currentBucket[currentBucket.length - 1];
|
||||||
const gap = getX(element) - (getX(previous) + getWidth(previous));
|
const gap = getX(element) - (getX(previous) + getWidth(previous, metrics));
|
||||||
const avgFontSize = (getFontSize(previous) + getFontSize(element)) / 2;
|
const avgFontSize = (getFontSize(previous) + getFontSize(element)) / 2;
|
||||||
const splitThreshold = Math.max(SPACE_MIN_GAP, avgFontSize * GAP_FACTOR);
|
const splitThreshold = Math.max(SPACE_MIN_GAP, avgFontSize * GAP_FACTOR);
|
||||||
|
|
||||||
@ -412,7 +519,7 @@ export const groupPageTextElements = (page: PdfJsonPage | null | undefined, page
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (shouldSplit) {
|
if (shouldSplit) {
|
||||||
groups.push(createGroup(pageIndex, groupCounter, currentBucket));
|
groups.push(createGroup(pageIndex, groupCounter, currentBucket, metrics));
|
||||||
groupCounter += 1;
|
groupCounter += 1;
|
||||||
currentBucket = [element];
|
currentBucket = [element];
|
||||||
} else {
|
} else {
|
||||||
@ -421,7 +528,7 @@ export const groupPageTextElements = (page: PdfJsonPage | null | undefined, page
|
|||||||
});
|
});
|
||||||
|
|
||||||
if (currentBucket.length > 0) {
|
if (currentBucket.length > 0) {
|
||||||
groups.push(createGroup(pageIndex, groupCounter, currentBucket));
|
groups.push(createGroup(pageIndex, groupCounter, currentBucket, metrics));
|
||||||
groupCounter += 1;
|
groupCounter += 1;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@ -431,7 +538,8 @@ export const groupPageTextElements = (page: PdfJsonPage | null | undefined, page
|
|||||||
|
|
||||||
export const groupDocumentText = (document: PdfJsonDocument | null | undefined): TextGroup[][] => {
|
export const groupDocumentText = (document: PdfJsonDocument | null | undefined): TextGroup[][] => {
|
||||||
const pages = document?.pages ?? [];
|
const pages = document?.pages ?? [];
|
||||||
return pages.map((page, index) => groupPageTextElements(page, index));
|
const metrics = buildFontMetrics(document);
|
||||||
|
return pages.map((page, index) => groupPageTextElements(page, index, metrics));
|
||||||
};
|
};
|
||||||
|
|
||||||
export const extractPageImages = (
|
export const extractPageImages = (
|
||||||
|
|||||||
492
scripts/convert_cff_to_ttf.py
Normal file
492
scripts/convert_cff_to_ttf.py
Normal file
@ -0,0 +1,492 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Wrap raw CFF/Type1C data (extracted from PDFs) as OpenType-CFF for web compatibility.
|
||||||
|
Builds proper Unicode cmap from PDF ToUnicode data.
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from io import BytesIO
|
||||||
|
from fontTools.ttLib import TTFont, newTable
|
||||||
|
from fontTools.cffLib import CFFFontSet
|
||||||
|
from fontTools.ttLib.tables._c_m_a_p import cmap_format_4, cmap_format_12
|
||||||
|
from fontTools.ttLib.tables._n_a_m_e import NameRecord
|
||||||
|
from fontTools.ttLib.tables.O_S_2f_2 import Panose
|
||||||
|
|
||||||
|
def parse_unicode_mapping(mapping_path):
|
||||||
|
"""
|
||||||
|
Parse Unicode mapping (either JSON with CharCode→CID→GID→Unicode or raw ToUnicode CMap).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict[int, int]: GID → Unicode codepoint
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open(mapping_path, 'rb') as f:
|
||||||
|
data = f.read().decode('utf-8', errors='ignore')
|
||||||
|
|
||||||
|
# Try parsing as JSON first (CID font with complete mapping)
|
||||||
|
if data.strip().startswith('{'):
|
||||||
|
import json
|
||||||
|
try:
|
||||||
|
mapping_data = json.loads(data)
|
||||||
|
if mapping_data.get('isCID'):
|
||||||
|
# Build GID → Unicode mapping from entries
|
||||||
|
gid_to_unicode = {}
|
||||||
|
for entry in mapping_data.get('entries', []):
|
||||||
|
gid = entry['gid']
|
||||||
|
unicode_val = entry['unicode']
|
||||||
|
if unicode_val > 0:
|
||||||
|
gid_to_unicode[gid] = unicode_val
|
||||||
|
print(f"Parsed JSON mapping: {len(gid_to_unicode)} GID→Unicode entries", file=sys.stderr)
|
||||||
|
return gid_to_unicode
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Fall back to parsing raw ToUnicode CMap (non-CID fonts)
|
||||||
|
# For non-CID fonts, CID/GID is the same as array index
|
||||||
|
gid_to_unicode = {}
|
||||||
|
|
||||||
|
# Pattern for bfchar entries
|
||||||
|
bfchar_pattern = r'<([0-9A-Fa-f]+)>\s*<([0-9A-Fa-f]+)>'
|
||||||
|
for match in re.finditer(bfchar_pattern, data):
|
||||||
|
gid = int(match.group(1), 16) # For non-CID, char code == GID
|
||||||
|
unicode_val = int(match.group(2), 16)
|
||||||
|
if unicode_val > 0:
|
||||||
|
gid_to_unicode[gid] = unicode_val
|
||||||
|
|
||||||
|
# Pattern for bfrange entries
|
||||||
|
bfrange_pattern = r'<([0-9A-Fa-f]+)>\s*<([0-9A-Fa-f]+)>\s*<([0-9A-Fa-f]+)>'
|
||||||
|
for match in re.finditer(bfrange_pattern, data):
|
||||||
|
start_gid = int(match.group(1), 16)
|
||||||
|
end_gid = int(match.group(2), 16)
|
||||||
|
start_unicode = int(match.group(3), 16)
|
||||||
|
for i, gid in enumerate(range(start_gid, end_gid + 1)):
|
||||||
|
unicode_val = start_unicode + i
|
||||||
|
if unicode_val > 0:
|
||||||
|
gid_to_unicode[gid] = unicode_val
|
||||||
|
|
||||||
|
print(f"Parsed ToUnicode CMap: {len(gid_to_unicode)} mappings", file=sys.stderr)
|
||||||
|
return gid_to_unicode
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: Failed to parse Unicode mapping: {e}", file=sys.stderr)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def wrap_cff_as_otf(input_path, output_path, tounicode_path=None):
|
||||||
|
"""
|
||||||
|
Wrap raw CFF data (from PDF font stream) as OpenType-CFF.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_path: Path to input CFF data file
|
||||||
|
output_path: Path to output OTF font
|
||||||
|
tounicode_path: Optional path to ToUnicode CMap file
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if successful, False otherwise
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Read raw CFF data
|
||||||
|
with open(input_path, 'rb') as f:
|
||||||
|
cff_data = f.read()
|
||||||
|
|
||||||
|
# Parse raw CFF data
|
||||||
|
cff_fontset = CFFFontSet()
|
||||||
|
cff_fontset.decompile(BytesIO(cff_data), None)
|
||||||
|
|
||||||
|
# Get the first (and usually only) font in the CFF set
|
||||||
|
if len(cff_fontset.fontNames) == 0:
|
||||||
|
print("ERROR: No fonts found in CFF data", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
|
cff_font = cff_fontset[cff_fontset.fontNames[0]]
|
||||||
|
|
||||||
|
# Parse Unicode mapping (JSON or raw ToUnicode CMap) if provided
|
||||||
|
gid_to_unicode = {}
|
||||||
|
if tounicode_path:
|
||||||
|
gid_to_unicode = parse_unicode_mapping(tounicode_path)
|
||||||
|
|
||||||
|
# Create a new OTF font
|
||||||
|
otf = TTFont(sfntVersion='OTTO') # 'OTTO' = CFF-flavored OpenType
|
||||||
|
|
||||||
|
# Get glyph names
|
||||||
|
if hasattr(cff_font, 'charset') and cff_font.charset is not None:
|
||||||
|
glyph_order = ['.notdef'] + [name for name in cff_font.charset if name != '.notdef']
|
||||||
|
else:
|
||||||
|
# Fallback to CharStrings keys
|
||||||
|
charstrings = cff_font.CharStrings
|
||||||
|
glyph_order = ['.notdef'] + [name for name in charstrings.keys() if name != '.notdef']
|
||||||
|
|
||||||
|
otf.setGlyphOrder(glyph_order)
|
||||||
|
|
||||||
|
# === Add CFF table (the actual font outlines) ===
|
||||||
|
cff_table = newTable('CFF ')
|
||||||
|
cff_table.cff = cff_fontset
|
||||||
|
otf['CFF '] = cff_table
|
||||||
|
|
||||||
|
# === Calculate metrics from CFF ===
|
||||||
|
charstrings = cff_font.CharStrings
|
||||||
|
|
||||||
|
# Get defaults from CFF Private dict
|
||||||
|
private_dict = getattr(cff_font, 'Private', None)
|
||||||
|
default_width = getattr(private_dict, 'defaultWidthX', 500) if private_dict else 500
|
||||||
|
|
||||||
|
# Calculate bounding box, widths, and LSBs
|
||||||
|
x_min = 0
|
||||||
|
y_min = -200
|
||||||
|
x_max = 1000
|
||||||
|
y_max = 800
|
||||||
|
max_advance = 0
|
||||||
|
min_lsb = 0
|
||||||
|
min_rsb = 0
|
||||||
|
max_extent = 0
|
||||||
|
|
||||||
|
widths = {}
|
||||||
|
lsbs = {}
|
||||||
|
|
||||||
|
for glyph_name in glyph_order:
|
||||||
|
lsb = 0
|
||||||
|
width = int(default_width)
|
||||||
|
|
||||||
|
if glyph_name in charstrings:
|
||||||
|
try:
|
||||||
|
cs = charstrings[glyph_name]
|
||||||
|
|
||||||
|
# Get width from charstring
|
||||||
|
if hasattr(cs, 'width'):
|
||||||
|
width = int(cs.width)
|
||||||
|
|
||||||
|
# Calculate bounds for LSB and bbox
|
||||||
|
try:
|
||||||
|
bounds = cs.calcBounds(None)
|
||||||
|
if bounds:
|
||||||
|
glyph_xmin = int(bounds[0])
|
||||||
|
glyph_ymin = int(bounds[1])
|
||||||
|
glyph_xmax = int(bounds[2])
|
||||||
|
glyph_ymax = int(bounds[3])
|
||||||
|
|
||||||
|
lsb = glyph_xmin
|
||||||
|
rsb = width - glyph_xmax
|
||||||
|
extent = lsb + glyph_xmax
|
||||||
|
|
||||||
|
# Update global bounds
|
||||||
|
x_min = min(x_min, glyph_xmin)
|
||||||
|
y_min = min(y_min, glyph_ymin)
|
||||||
|
x_max = max(x_max, glyph_xmax)
|
||||||
|
y_max = max(y_max, glyph_ymax)
|
||||||
|
|
||||||
|
# Update hhea metrics
|
||||||
|
min_lsb = min(min_lsb, lsb)
|
||||||
|
min_rsb = min(min_rsb, rsb)
|
||||||
|
max_extent = max(max_extent, extent)
|
||||||
|
except:
|
||||||
|
pass # Some glyphs may not have outlines
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
pass # Use defaults
|
||||||
|
|
||||||
|
widths[glyph_name] = width
|
||||||
|
lsbs[glyph_name] = lsb
|
||||||
|
max_advance = max(max_advance, width)
|
||||||
|
|
||||||
|
if max_advance == 0:
|
||||||
|
max_advance = 1000
|
||||||
|
if max_extent == 0:
|
||||||
|
max_extent = x_max
|
||||||
|
|
||||||
|
units_per_em = 1000 # Standard for Type1/CFF
|
||||||
|
|
||||||
|
# === Create head table ===
|
||||||
|
head = newTable('head')
|
||||||
|
head.tableVersion = 1.0
|
||||||
|
head.fontRevision = 1.0
|
||||||
|
head.checkSumAdjustment = 0
|
||||||
|
head.magicNumber = 0x5F0F3CF5
|
||||||
|
head.flags = 0x000B # Baseline at y=0, LSB at x=0, integer PPEM
|
||||||
|
head.unitsPerEm = units_per_em
|
||||||
|
head.created = 3600000000
|
||||||
|
head.modified = 3600000000
|
||||||
|
head.xMin = x_min
|
||||||
|
head.yMin = y_min
|
||||||
|
head.xMax = x_max
|
||||||
|
head.yMax = y_max
|
||||||
|
head.macStyle = 0
|
||||||
|
head.fontDirectionHint = 2
|
||||||
|
head.indexToLocFormat = 0
|
||||||
|
head.glyphDataFormat = 0
|
||||||
|
head.lowestRecPPEM = 8
|
||||||
|
otf['head'] = head
|
||||||
|
|
||||||
|
# === Create hhea table with correct metrics ===
|
||||||
|
hhea = newTable('hhea')
|
||||||
|
hhea.tableVersion = 0x00010000
|
||||||
|
hhea.ascent = max(y_max, 800)
|
||||||
|
hhea.descent = min(y_min, -200)
|
||||||
|
hhea.lineGap = 0
|
||||||
|
hhea.advanceWidthMax = max_advance
|
||||||
|
hhea.minLeftSideBearing = min_lsb
|
||||||
|
hhea.minRightSideBearing = min_rsb
|
||||||
|
hhea.xMaxExtent = max_extent
|
||||||
|
hhea.caretSlopeRise = 1
|
||||||
|
hhea.caretSlopeRun = 0
|
||||||
|
hhea.caretOffset = 0
|
||||||
|
hhea.reserved0 = 0
|
||||||
|
hhea.reserved1 = 0
|
||||||
|
hhea.reserved2 = 0
|
||||||
|
hhea.reserved3 = 0
|
||||||
|
hhea.metricDataFormat = 0
|
||||||
|
hhea.numberOfHMetrics = len(glyph_order)
|
||||||
|
otf['hhea'] = hhea
|
||||||
|
|
||||||
|
# === Create hmtx table with correct LSBs ===
|
||||||
|
hmtx = newTable('hmtx')
|
||||||
|
hmtx.metrics = {}
|
||||||
|
for glyph_name in glyph_order:
|
||||||
|
hmtx.metrics[glyph_name] = (widths.get(glyph_name, default_width), lsbs.get(glyph_name, 0))
|
||||||
|
otf['hmtx'] = hmtx
|
||||||
|
|
||||||
|
# === Create maxp table (simpler for CFF) ===
|
||||||
|
maxp = newTable('maxp')
|
||||||
|
maxp.tableVersion = 0x00005000 # CFF version (0.5)
|
||||||
|
maxp.numGlyphs = len(glyph_order)
|
||||||
|
otf['maxp'] = maxp
|
||||||
|
|
||||||
|
# === Build Unicode cmap from GID→Unicode mapping ===
|
||||||
|
unicode_to_glyph = {}
|
||||||
|
|
||||||
|
if gid_to_unicode:
|
||||||
|
# Debug: Show first few glyph names to understand naming convention
|
||||||
|
sample_glyphs = glyph_order[:min(10, len(glyph_order))]
|
||||||
|
print(f"Sample glyph names: {sample_glyphs}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Debug: Show which GIDs we have mappings for
|
||||||
|
sample_gids = sorted(gid_to_unicode.keys())[:10]
|
||||||
|
print(f"Sample GIDs from mapping: {sample_gids}", file=sys.stderr)
|
||||||
|
|
||||||
|
# For CID fonts: glyph names are "cid00123" (5-digit zero-padded)
|
||||||
|
# For non-CID fonts: glyph names vary but GID == array index
|
||||||
|
is_cid_font = any(gn.startswith('cid') for gn in glyph_order[1:6]) # Check first few non-.notdef glyphs
|
||||||
|
|
||||||
|
for gid, unicode_val in gid_to_unicode.items():
|
||||||
|
if unicode_val > 0:
|
||||||
|
if is_cid_font:
|
||||||
|
# Build glyph name as cidNNNNN (5 digits, zero-padded)
|
||||||
|
glyph_name = f"cid{gid:05d}"
|
||||||
|
# Verify this glyph exists in glyph_order
|
||||||
|
if glyph_name in glyph_order:
|
||||||
|
unicode_to_glyph[unicode_val] = glyph_name
|
||||||
|
else:
|
||||||
|
# Try without padding (some fonts use "cid123" not "cid00123")
|
||||||
|
glyph_name_alt = f"cid{gid}"
|
||||||
|
if glyph_name_alt in glyph_order:
|
||||||
|
unicode_to_glyph[unicode_val] = glyph_name_alt
|
||||||
|
else:
|
||||||
|
# Non-CID font: GID is array index
|
||||||
|
if 0 <= gid < len(glyph_order):
|
||||||
|
glyph_name = glyph_order[gid]
|
||||||
|
unicode_to_glyph[unicode_val] = glyph_name
|
||||||
|
|
||||||
|
print(f"Mapped {len(unicode_to_glyph)} Unicode codepoints (isCID={is_cid_font if gid_to_unicode else 'unknown'})", file=sys.stderr)
|
||||||
|
|
||||||
|
# Also try to map from glyph names (uni0041 → U+0041)
|
||||||
|
for glyph_name in glyph_order:
|
||||||
|
if glyph_name.startswith('uni') and len(glyph_name) == 7:
|
||||||
|
try:
|
||||||
|
unicode_val = int(glyph_name[3:], 16)
|
||||||
|
if unicode_val not in unicode_to_glyph:
|
||||||
|
unicode_to_glyph[unicode_val] = glyph_name
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
elif glyph_name.startswith('u') and len(glyph_name) >= 5:
|
||||||
|
try:
|
||||||
|
unicode_val = int(glyph_name[1:], 16)
|
||||||
|
if unicode_val not in unicode_to_glyph:
|
||||||
|
unicode_to_glyph[unicode_val] = glyph_name
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# === Create cmap table ===
|
||||||
|
cmap = newTable('cmap')
|
||||||
|
cmap.tableVersion = 0
|
||||||
|
cmap_tables = []
|
||||||
|
|
||||||
|
# Windows Unicode BMP (format 4) - required
|
||||||
|
cmap4_win = cmap_format_4(4)
|
||||||
|
cmap4_win.platformID = 3 # Windows
|
||||||
|
cmap4_win.platEncID = 1 # Unicode BMP
|
||||||
|
cmap4_win.language = 0
|
||||||
|
cmap4_win.cmap = {cp: gn for cp, gn in unicode_to_glyph.items() if cp <= 0xFFFF}
|
||||||
|
cmap_tables.append(cmap4_win)
|
||||||
|
|
||||||
|
# Windows Unicode UCS-4 (format 12) - for >BMP
|
||||||
|
if any(cp > 0xFFFF for cp in unicode_to_glyph):
|
||||||
|
cmap12_win = cmap_format_12(12)
|
||||||
|
cmap12_win.platformID = 3 # Windows
|
||||||
|
cmap12_win.platEncID = 10 # Unicode UCS-4
|
||||||
|
cmap12_win.language = 0
|
||||||
|
cmap12_win.cmap = dict(unicode_to_glyph)
|
||||||
|
cmap_tables.append(cmap12_win)
|
||||||
|
|
||||||
|
# Mac Unicode (format 4) - for compatibility
|
||||||
|
cmap4_mac = cmap_format_4(4)
|
||||||
|
cmap4_mac.platformID = 1 # Mac
|
||||||
|
cmap4_mac.platEncID = 0 # Roman
|
||||||
|
cmap4_mac.language = 0
|
||||||
|
cmap4_mac.cmap = {cp: gn for cp, gn in unicode_to_glyph.items() if cp <= 0xFFFF}
|
||||||
|
cmap_tables.append(cmap4_mac)
|
||||||
|
|
||||||
|
cmap.tables = [t for t in cmap_tables if t.cmap] or [cmap4_win] # Ensure at least one
|
||||||
|
otf['cmap'] = cmap
|
||||||
|
|
||||||
|
print(f"Built cmap with {len(unicode_to_glyph)} Unicode mappings", file=sys.stderr)
|
||||||
|
|
||||||
|
# === Create OS/2 table with correct metrics ===
|
||||||
|
os2 = newTable('OS/2')
|
||||||
|
os2.version = 4
|
||||||
|
os2.xAvgCharWidth = int(sum(widths.values()) / len(widths)) if widths else 500
|
||||||
|
os2.usWeightClass = 400 # Normal
|
||||||
|
os2.usWidthClass = 5 # Medium
|
||||||
|
os2.fsType = 0 # Installable embedding
|
||||||
|
os2.ySubscriptXSize = 650
|
||||||
|
os2.ySubscriptYSize = 600
|
||||||
|
os2.ySubscriptXOffset = 0
|
||||||
|
os2.ySubscriptYOffset = 75
|
||||||
|
os2.ySuperscriptXSize = 650
|
||||||
|
os2.ySuperscriptYSize = 600
|
||||||
|
os2.ySuperscriptXOffset = 0
|
||||||
|
os2.ySuperscriptYOffset = 350
|
||||||
|
os2.yStrikeoutSize = 50
|
||||||
|
os2.yStrikeoutPosition = 300
|
||||||
|
os2.sFamilyClass = 0
|
||||||
|
|
||||||
|
# PANOSE - use proper object structure
|
||||||
|
os2.panose = Panose()
|
||||||
|
os2.panose.bFamilyType = 0
|
||||||
|
os2.panose.bSerifStyle = 0
|
||||||
|
os2.panose.bWeight = 0
|
||||||
|
os2.panose.bProportion = 0
|
||||||
|
os2.panose.bContrast = 0
|
||||||
|
os2.panose.bStrokeVariation = 0
|
||||||
|
os2.panose.bArmStyle = 0
|
||||||
|
os2.panose.bLetterForm = 0
|
||||||
|
os2.panose.bMidline = 0
|
||||||
|
os2.panose.bXHeight = 0
|
||||||
|
|
||||||
|
os2.ulUnicodeRange1 = 0
|
||||||
|
os2.ulUnicodeRange2 = 0
|
||||||
|
os2.ulUnicodeRange3 = 0
|
||||||
|
os2.ulUnicodeRange4 = 0
|
||||||
|
os2.achVendID = 'SPDF'
|
||||||
|
os2.fsSelection = 0x0040 # REGULAR bit
|
||||||
|
|
||||||
|
# Set character index range from actual cmap
|
||||||
|
if unicode_to_glyph:
|
||||||
|
codepoints = sorted(unicode_to_glyph.keys())
|
||||||
|
os2.usFirstCharIndex = codepoints[0]
|
||||||
|
os2.usLastCharIndex = codepoints[-1]
|
||||||
|
else:
|
||||||
|
os2.usFirstCharIndex = 0x20 # space
|
||||||
|
os2.usLastCharIndex = 0x7E # tilde
|
||||||
|
|
||||||
|
# Typo metrics match hhea
|
||||||
|
os2.sTypoAscender = hhea.ascent
|
||||||
|
os2.sTypoDescender = hhea.descent
|
||||||
|
os2.sTypoLineGap = hhea.lineGap
|
||||||
|
|
||||||
|
# Windows metrics (positive values, cover bbox)
|
||||||
|
os2.usWinAscent = max(0, y_max)
|
||||||
|
os2.usWinDescent = max(0, -y_min)
|
||||||
|
|
||||||
|
os2.ulCodePageRange1 = 0x00000001 # Latin 1
|
||||||
|
os2.ulCodePageRange2 = 0
|
||||||
|
os2.sxHeight = 500
|
||||||
|
os2.sCapHeight = 700
|
||||||
|
os2.usDefaultChar = 0
|
||||||
|
os2.usBreakChar = 32
|
||||||
|
os2.usMaxContext = 0
|
||||||
|
otf['OS/2'] = os2
|
||||||
|
|
||||||
|
# === Create name table with Windows and Mac records ===
|
||||||
|
name = newTable('name')
|
||||||
|
name.names = []
|
||||||
|
|
||||||
|
# Get font name from CFF if available
|
||||||
|
font_name = cff_fontset.fontNames[0] if cff_fontset.fontNames else "Converted"
|
||||||
|
|
||||||
|
name_strings = {
|
||||||
|
1: font_name, # Font Family
|
||||||
|
2: "Regular", # Subfamily
|
||||||
|
3: f"Stirling-PDF: {font_name}", # Unique ID
|
||||||
|
4: font_name, # Full Name
|
||||||
|
5: "Version 1.0", # Version
|
||||||
|
6: font_name.replace(' ', '-'), # PostScript Name
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add both Windows and Mac name records
|
||||||
|
for name_id, value in name_strings.items():
|
||||||
|
# Windows (platform 3, encoding 1, language 0x0409 = en-US)
|
||||||
|
rec_win = NameRecord()
|
||||||
|
rec_win.nameID = name_id
|
||||||
|
rec_win.platformID = 3
|
||||||
|
rec_win.platEncID = 1
|
||||||
|
rec_win.langID = 0x0409
|
||||||
|
rec_win.string = value
|
||||||
|
name.names.append(rec_win)
|
||||||
|
|
||||||
|
# Mac (platform 1, encoding 0, language 0)
|
||||||
|
rec_mac = NameRecord()
|
||||||
|
rec_mac.nameID = name_id
|
||||||
|
rec_mac.platformID = 1
|
||||||
|
rec_mac.platEncID = 0
|
||||||
|
rec_mac.langID = 0
|
||||||
|
rec_mac.string = value
|
||||||
|
name.names.append(rec_mac)
|
||||||
|
|
||||||
|
otf['name'] = name
|
||||||
|
|
||||||
|
# === Create post table (format 3.0 for smaller web fonts) ===
|
||||||
|
post = newTable('post')
|
||||||
|
post.formatType = 3.0 # No glyph names (smaller, web-optimized)
|
||||||
|
post.italicAngle = 0
|
||||||
|
post.underlinePosition = -100
|
||||||
|
post.underlineThickness = 50
|
||||||
|
post.isFixedPitch = 0
|
||||||
|
post.minMemType42 = 0
|
||||||
|
post.maxMemType42 = 0
|
||||||
|
post.minMemType1 = 0
|
||||||
|
post.maxMemType1 = 0
|
||||||
|
otf['post'] = post
|
||||||
|
|
||||||
|
# Save the OTF font
|
||||||
|
otf.save(output_path)
|
||||||
|
otf.close()
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"ERROR: Conversion failed: {str(e)}", file=sys.stderr)
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc(file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if len(sys.argv) < 3:
|
||||||
|
print("Usage: convert_cff_to_ttf.py <input.cff> <output.otf> [tounicode.cmap]", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
input_path = Path(sys.argv[1])
|
||||||
|
output_path = Path(sys.argv[2])
|
||||||
|
tounicode_path = Path(sys.argv[3]) if len(sys.argv) > 3 else None
|
||||||
|
|
||||||
|
if not input_path.exists():
|
||||||
|
print(f"ERROR: Input file not found: {input_path}", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if tounicode_path and not tounicode_path.exists():
|
||||||
|
print(f"Warning: ToUnicode file not found: {tounicode_path}", file=sys.stderr)
|
||||||
|
tounicode_path = None
|
||||||
|
|
||||||
|
success = wrap_cff_as_otf(str(input_path), str(output_path), str(tounicode_path) if tounicode_path else None)
|
||||||
|
sys.exit(0 if success else 1)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
Loading…
Reference in New Issue
Block a user