diff --git a/app/core/src/main/java/stirling/software/SPDF/model/json/PdfJsonAnnotation.java b/app/core/src/main/java/stirling/software/SPDF/model/json/PdfJsonAnnotation.java
new file mode 100644
index 000000000..b994279fe
--- /dev/null
+++ b/app/core/src/main/java/stirling/software/SPDF/model/json/PdfJsonAnnotation.java
@@ -0,0 +1,61 @@
+package stirling.software.SPDF.model.json;
+
+import java.util.List;
+
+import com.fasterxml.jackson.annotation.JsonInclude;
+
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+/**
+ * Represents a PDF annotation (comments, highlights, stamps, etc.). Annotations often contain OCR
+ * text layers or other metadata not visible in content streams.
+ */
+@Data
+@Builder
+@NoArgsConstructor
+@AllArgsConstructor
+@JsonInclude(JsonInclude.Include.NON_NULL)
+public class PdfJsonAnnotation {
+
+    /** Annotation subtype (Text, Highlight, Link, Stamp, Widget, etc.) */
+    private String subtype;
+
+    /** Human-readable text content of the annotation */
+    private String contents;
+
+    /** Annotation rectangle [x1, y1, x2, y2] */
+    private List<Float> rect;
+
+    /** Annotation appearance characteristics */
+    private String appearanceState;
+
+    /** Color components (e.g., [r, g, b] for RGB) */
+    private List<Float> color;
+
+    /** Annotation flags (print, hidden, etc.) */
+    private Integer flags;
+
+    /** For link annotations: destination or action */
+    private String destination;
+
+    /** For text annotations: icon name */
+    private String iconName;
+
+    /** Subject/title of the annotation */
+    private String subject;
+
+    /** Author of the annotation */
+    private String author;
+
+    /** Creation date (ISO 8601 format) */
+    private String creationDate;
+
+    /** Modification date (ISO 8601 format) */
+    private String modificationDate;
+
+    /** Full annotation dictionary for lossless round-tripping */
+    private PdfJsonCosValue rawData;
+}
diff --git a/app/core/src/main/java/stirling/software/SPDF/model/json/PdfJsonDocument.java b/app/core/src/main/java/stirling/software/SPDF/model/json/PdfJsonDocument.java
index 3f5bd1f8b..d590b34b9 100644
--- a/app/core/src/main/java/stirling/software/SPDF/model/json/PdfJsonDocument.java
+++ b/app/core/src/main/java/stirling/software/SPDF/model/json/PdfJsonDocument.java
@@ -25,4 +25,7 @@ public class PdfJsonDocument {
     @Builder.Default private List<PdfJsonFont> fonts = new ArrayList<>();
 
     @Builder.Default private List<PdfJsonPage> pages = new ArrayList<>();
+
+    /** Form fields (AcroForm) at document level */
+    @Builder.Default private List<PdfJsonFormField> formFields = new ArrayList<>();
 }
diff --git a/app/core/src/main/java/stirling/software/SPDF/model/json/PdfJsonFont.java b/app/core/src/main/java/stirling/software/SPDF/model/json/PdfJsonFont.java
index 98d251103..a3496677c 100644
--- a/app/core/src/main/java/stirling/software/SPDF/model/json/PdfJsonFont.java
+++ b/app/core/src/main/java/stirling/software/SPDF/model/json/PdfJsonFont.java
@@ -52,4 +52,22 @@ public class PdfJsonFont {
 
     /** Font descriptor flags copied from the source document. */
     private Integer fontDescriptorFlags;
+
+    /** Font ascent in glyph units (typically 1/1000). */
+    private Float ascent;
+
+    /** Font descent in glyph units (typically negative). */
+    private Float descent;
+
+    /** Capital height when available. */
+    private Float capHeight;
+
+    /** x-height when available. */
+    private Float xHeight;
+
+    /** Italic angle reported by the font descriptor. */
+    private Float italicAngle;
+
+    /** Units per em extracted from the font matrix. */
+    private Integer unitsPerEm;
 }
diff --git a/app/core/src/main/java/stirling/software/SPDF/model/json/PdfJsonFormField.java b/app/core/src/main/java/stirling/software/SPDF/model/json/PdfJsonFormField.java
new file mode 100644
index 000000000..2a7c220a8
--- /dev/null
+++ b/app/core/src/main/java/stirling/software/SPDF/model/json/PdfJsonFormField.java
@@ -0,0 +1,66 @@
+package stirling.software.SPDF.model.json;
+
+import java.util.List;
+
+import com.fasterxml.jackson.annotation.JsonInclude;
+
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+/** Represents a PDF form field (AcroForm). */
+@Data
+@Builder
+@NoArgsConstructor
+@AllArgsConstructor
+@JsonInclude(JsonInclude.Include.NON_NULL)
+public class PdfJsonFormField {
+
+    /** Fully qualified field name (e.g., "form1.textfield1") */
+    private String name;
+
+    /** Partial field name (last component) */
+    private String partialName;
+
+    /** Field type (Tx=text, Btn=button, Ch=choice, Sig=signature) */
+    private String fieldType;
+
+    /** Field value as string */
+    private String value;
+
+    /** Default value */
+    private String defaultValue;
+
+    /** Field flags (readonly, required, multiline, etc.) */
+    private Integer flags;
+
+    /** Alternative field name (for accessibility) */
+    private String alternateFieldName;
+
+    /** Mapping name (for export) */
+    private String mappingName;
+
+    /** Page number where field appears (1-indexed) */
+    private Integer pageNumber;
+
+    /** Field rectangle [x1, y1, x2, y2] on the page */
+    private List<Float> rect;
+
+    /** For choice fields: list of options */
+    private List<String> options;
+
+    /** For choice fields: selected indices */
+    private List<Integer> selectedIndices;
+
+    /** For button fields: whether it's checked */
+    private Boolean checked;
+
+    /** Font information for text fields */
+    private String fontName;
+
+    private Float fontSize;
+
+    /** Full field dictionary for lossless round-tripping */
+    private PdfJsonCosValue rawData;
+}
diff --git a/app/core/src/main/java/stirling/software/SPDF/model/json/PdfJsonPage.java b/app/core/src/main/java/stirling/software/SPDF/model/json/PdfJsonPage.java
index 8bc7c6d65..fa1417d5c 100644
--- a/app/core/src/main/java/stirling/software/SPDF/model/json/PdfJsonPage.java
+++ b/app/core/src/main/java/stirling/software/SPDF/model/json/PdfJsonPage.java
@@ -24,6 +24,7 @@ public class PdfJsonPage {
 
     @Builder.Default private List<PdfJsonTextElement> textElements = new ArrayList<>();
     @Builder.Default private List<PdfJsonImageElement> imageElements = new ArrayList<>();
+    @Builder.Default private List<PdfJsonAnnotation> annotations = new ArrayList<>();
 
     /** Serialized representation of the page resources dictionary. */
     private PdfJsonCosValue resources;
diff --git a/app/core/src/main/java/stirling/software/SPDF/service/PdfJsonConversionService.java b/app/core/src/main/java/stirling/software/SPDF/service/PdfJsonConversionService.java
index 4e6e03920..31387eeae 100644
--- a/app/core/src/main/java/stirling/software/SPDF/service/PdfJsonConversionService.java
+++ b/app/core/src/main/java/stirling/software/SPDF/service/PdfJsonConversionService.java
@@ -3,12 +3,16 @@ package stirling.software.SPDF.service;
 import java.awt.geom.AffineTransform;
 import java.awt.geom.Point2D;
 import java.awt.image.BufferedImage;
+import java.io.BufferedReader;
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.InputStreamReader;
 import java.io.OutputStream;
 import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
 import java.time.Instant;
 import java.time.format.DateTimeParseException;
 import java.util.ArrayList;
@@ -17,15 +21,19 @@ import java.util.Calendar;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Optional;
+import java.util.Set;
 import java.util.TimeZone;
 import java.util.UUID;
+import java.util.concurrent.ConcurrentHashMap;
 
 import javax.imageio.ImageIO;
 
@@ -66,8 +74,13 @@ import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
 import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState;
 import org.apache.pdfbox.pdmodel.graphics.state.PDTextState;
 import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode;
+import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
+import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget;
+import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
+import org.apache.pdfbox.pdmodel.interactive.form.PDField;
 import org.apache.pdfbox.text.PDFTextStripper;
 import org.apache.pdfbox.text.TextPosition;
+import org.apache.pdfbox.util.DateConverter;
 import org.apache.pdfbox.util.Matrix;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.core.io.Resource;
@@ -80,10 +93,13 @@ import com.fasterxml.jackson.databind.ObjectMapper;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 
+import stirling.software.SPDF.config.EndpointConfiguration;
+import stirling.software.SPDF.model.json.PdfJsonAnnotation;
 import stirling.software.SPDF.model.json.PdfJsonCosValue;
 import stirling.software.SPDF.model.json.PdfJsonDocument;
 import stirling.software.SPDF.model.json.PdfJsonFont;
 import stirling.software.SPDF.model.json.PdfJsonFontCidSystemInfo;
+import stirling.software.SPDF.model.json.PdfJsonFormField;
 import stirling.software.SPDF.model.json.PdfJsonImageElement;
 import stirling.software.SPDF.model.json.PdfJsonMetadata;
 import stirling.software.SPDF.model.json.PdfJsonPage;
@@ -92,6 +108,10 @@ import stirling.software.SPDF.model.json.PdfJsonTextColor;
 import stirling.software.SPDF.model.json.PdfJsonTextElement;
 import stirling.software.common.service.CustomPDFDocumentFactory;
 import stirling.software.common.util.ExceptionUtils;
+import stirling.software.common.util.ProcessExecutor;
+import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
+import stirling.software.common.util.TempFile;
+import stirling.software.common.util.TempFileManager;
 
 @Slf4j
 @Service
@@ -101,62 +121,139 @@ public class PdfJsonConversionService {
     private final CustomPDFDocumentFactory pdfDocumentFactory;
     private final ObjectMapper objectMapper;
     private final ResourceLoader resourceLoader;
+    private final EndpointConfiguration endpointConfiguration;
+    private final TempFileManager tempFileManager;
 
     private static final String FALLBACK_FONT_ID = "fallback-noto-sans";
     private static final String DEFAULT_FALLBACK_FONT_LOCATION =
             "classpath:/static/fonts/NotoSans-Regular.ttf";
+    private static final String FALLBACK_FONT_CJK_ID = "fallback-noto-cjk";
+    private static final String FALLBACK_FONT_JP_ID = "fallback-noto-jp";
+    private static final String FALLBACK_FONT_KR_ID = "fallback-noto-korean";
+    private static final String FALLBACK_FONT_AR_ID = "fallback-noto-arabic";
+    private static final String FALLBACK_FONT_TH_ID = "fallback-noto-thai";
+
+    private static final Map<String, FallbackFontSpec> BUILT_IN_FALLBACK_FONTS =
+            Map.ofEntries(
+                    Map.entry(
+                            FALLBACK_FONT_CJK_ID,
+                            new FallbackFontSpec(
+                                    "classpath:/static/fonts/NotoSansSC-Regular.ttf",
+                                    "NotoSansSC-Regular",
+                                    "ttf")),
+                    Map.entry(
+                            FALLBACK_FONT_JP_ID,
+                            new FallbackFontSpec(
+                                    "classpath:/static/fonts/NotoSansJP-Regular.ttf",
+                                    "NotoSansJP-Regular",
+                                    "ttf")),
+                    Map.entry(
+                            FALLBACK_FONT_KR_ID,
+                            new FallbackFontSpec(
+                                    "classpath:/static/fonts/malgun.ttf", "MalgunGothic", "ttf")),
+                    Map.entry(
+                            FALLBACK_FONT_AR_ID,
+                            new FallbackFontSpec(
+                                    "classpath:/static/fonts/NotoSansArabic-Regular.ttf",
+                                    "NotoSansArabic-Regular",
+                                    "ttf")),
+                    Map.entry(
+                            FALLBACK_FONT_TH_ID,
+                            new FallbackFontSpec(
+                                    "classpath:/static/fonts/NotoSansThai-Regular.ttf",
+                                    "NotoSansThai-Regular",
+                                    "ttf")));
 
     @Value("${stirling.pdf.fallback-font:" + DEFAULT_FALLBACK_FONT_LOCATION + "}")
     private String fallbackFontLocation;
 
-    private byte[] fallbackFontBytes;
+    @Value("${stirling.pdf.json.font-normalization.enabled:true}")
+    private boolean fontNormalizationEnabled;
+
+    @Value("${stirling.pdf.json.cff-converter.enabled:true}")
+    private boolean cffConversionEnabled;
+
+    @Value("${stirling.pdf.json.cff-converter.fontforge-command:fontforge}")
+    private String fontforgeCommand;
+
+    private final Map<String, byte[]> fallbackFontCache = new ConcurrentHashMap<>();
 
     public byte[] convertPdfToJson(MultipartFile file) throws IOException {
         if (file == null) {
             throw ExceptionUtils.createNullArgumentException("fileInput");
         }
-        try (PDDocument document = pdfDocumentFactory.load(file.getInputStream(), true)) {
-            int totalPages = document.getNumberOfPages();
-            log.info("Converting PDF to JSON ({} pages)", totalPages);
-            Map<String, PdfJsonFont> fonts = new LinkedHashMap<>();
-            Map<Integer, List<PdfJsonTextElement>> textByPage = new LinkedHashMap<>();
 
-            Map<Integer, Map<PDFont, String>> pageFontResources = new HashMap<>();
-            int pageNumber = 1;
-            for (PDPage page : document.getPages()) {
-                Map<PDFont, String> resourceMap =
-                        collectFontsForPage(document, page, pageNumber, fonts);
-                pageFontResources.put(pageNumber, resourceMap);
-                log.debug(
-                        "PDF→JSON: collected {} font resources on page {}",
-                        resourceMap.size(),
-                        pageNumber);
-                pageNumber++;
+        TempFile normalizedFile = null;
+        try (TempFile originalFile = new TempFile(tempFileManager, ".pdf")) {
+            file.transferTo(originalFile.getFile());
+            Path workingPath = originalFile.getPath();
+
+            if (fontNormalizationEnabled && canRunGhostscript()) {
+                try {
+                    normalizedFile = normalizePdfFonts(workingPath);
+                    if (normalizedFile != null && normalizedFile.exists()) {
+                        workingPath = normalizedFile.getPath();
+                        log.info("Using Ghostscript-normalized PDF for JSON export");
+                    }
+                } catch (IOException ex) {
+                    log.warn(
+                            "Ghostscript font normalization failed ({}); using original PDF",
+                            ex.getMessage());
+                    closeQuietly(normalizedFile);
+                    normalizedFile = null;
+                }
             }
 
-            TextCollectingStripper stripper =
-                    new TextCollectingStripper(document, fonts, textByPage, pageFontResources);
-            stripper.setSortByPosition(true);
-            stripper.getText(document);
+            try (PDDocument document = pdfDocumentFactory.load(workingPath, true)) {
+                int totalPages = document.getNumberOfPages();
+                log.info("Converting PDF to JSON ({} pages)", totalPages);
+                Map<String, PdfJsonFont> fonts = new LinkedHashMap<>();
+                Map<Integer, List<PdfJsonTextElement>> textByPage = new LinkedHashMap<>();
 
-            Map<Integer, List<PdfJsonImageElement>> imagesByPage = collectImages(document);
+                Map<Integer, Map<PDFont, String>> pageFontResources = new HashMap<>();
+                int pageNumber = 1;
+                for (PDPage page : document.getPages()) {
+                    Map<PDFont, String> resourceMap =
+                            collectFontsForPage(document, page, pageNumber, fonts);
+                    pageFontResources.put(pageNumber, resourceMap);
+                    log.debug(
+                            "PDF→JSON: collected {} font resources on page {}",
+                            resourceMap.size(),
+                            pageNumber);
+                    pageNumber++;
+                }
 
-            PdfJsonDocument pdfJson = new PdfJsonDocument();
-            pdfJson.setMetadata(extractMetadata(document));
-            pdfJson.setXmpMetadata(extractXmpMetadata(document));
-            List<PdfJsonFont> serializedFonts = new ArrayList<>(fonts.values());
-            serializedFonts.sort(
-                    Comparator.comparing(
-                            PdfJsonFont::getUid, Comparator.nullsLast(Comparator.naturalOrder())));
-            pdfJson.setFonts(serializedFonts);
-            pdfJson.setPages(extractPages(document, textByPage, imagesByPage));
+                TextCollectingStripper stripper =
+                        new TextCollectingStripper(document, fonts, textByPage, pageFontResources);
+                stripper.setSortByPosition(true);
+                stripper.getText(document);
 
-            log.info(
-                    "PDF→JSON conversion complete (fonts: {}, pages: {})",
-                    serializedFonts.size(),
-                    pdfJson.getPages().size());
+                Map<Integer, List<PdfJsonImageElement>> imagesByPage = collectImages(document);
+                Map<Integer, List<PdfJsonAnnotation>> annotationsByPage =
+                        collectAnnotations(document);
 
-            return objectMapper.writerWithDefaultPrettyPrinter().writeValueAsBytes(pdfJson);
+                PdfJsonDocument pdfJson = new PdfJsonDocument();
+                pdfJson.setMetadata(extractMetadata(document));
+                pdfJson.setXmpMetadata(extractXmpMetadata(document));
+                List<PdfJsonFont> serializedFonts = new ArrayList<>(fonts.values());
+                serializedFonts.sort(
+                        Comparator.comparing(
+                                PdfJsonFont::getUid,
+                                Comparator.nullsLast(Comparator.naturalOrder())));
+                pdfJson.setFonts(serializedFonts);
+                pdfJson.setPages(
+                        extractPages(document, textByPage, imagesByPage, annotationsByPage));
+                pdfJson.setFormFields(collectFormFields(document));
+
+                log.info(
+                        "PDF→JSON conversion complete (fonts: {}, pages: {})",
+                        serializedFonts.size(),
+                        pdfJson.getPages().size());
+
+                return objectMapper.writerWithDefaultPrettyPrinter().writeValueAsBytes(pdfJson);
+            }
+        } finally {
+            closeQuietly(normalizedFile);
         }
     }
 
@@ -210,16 +307,23 @@ public class PdfJsonConversionService {
                     page.setContents(preservedStreams);
                 }
 
-                List<PdfJsonTextElement> elements =
-                        pageModel.getTextElements() != null
-                                ? pageModel.getTextElements()
-                                : new ArrayList<>();
                 List<PdfJsonImageElement> imageElements =
                         pageModel.getImageElements() != null
                                 ? pageModel.getImageElements()
                                 : new ArrayList<>();
 
-                boolean fallbackAssigned =
+                // Reconstruct image XObjects if content streams are preserved
+                // (images were filtered out during serialization to avoid duplication)
+                if (!preservedStreams.isEmpty() && !imageElements.isEmpty()) {
+                    reconstructImageXObjects(document, page, preservedStreams, imageElements);
+                }
+
+                List<PdfJsonTextElement> elements =
+                        pageModel.getTextElements() != null
+                                ? pageModel.getTextElements()
+                                : new ArrayList<>();
+
+                PreflightResult preflightResult =
                         preflightTextElements(
                                 document, fontMap, fontModels, elements, pageNumberValue);
 
@@ -227,11 +331,14 @@ public class PdfJsonConversionService {
                         "Page {} preflight complete (elements={}, fallbackApplied={})",
                         pageNumberValue,
                         elements.size(),
-                        fallbackAssigned);
+                        preflightResult.usesFallback());
 
-                if (elements.stream().anyMatch(el -> FALLBACK_FONT_ID.equals(el.getFontId()))) {
-                    ensureFallbackResource(page, fontMap.get(buildFontKey(-1, FALLBACK_FONT_ID)));
-                    log.info("Page {} uses fallback font for some elements", pageNumberValue);
+                if (!preflightResult.fallbackFontIds().isEmpty()) {
+                    ensureFallbackResources(page, preflightResult.fallbackFontIds(), fontMap);
+                    log.info(
+                            "Page {} registered fallback fonts: {}",
+                            pageNumberValue,
+                            preflightResult.fallbackFontIds());
                 }
 
                 boolean hasText = !elements.isEmpty();
@@ -239,7 +346,7 @@ public class PdfJsonConversionService {
                 boolean rewriteSucceeded = true;
 
                 if (hasText) {
-                    if (fallbackAssigned) {
+                    if (preflightResult.usesFallback()) {
                         rewriteSucceeded = false;
                     } else if (!preservedStreams.isEmpty()) {
                         log.info("Attempting token rewrite for page {}", pageNumberValue);
@@ -272,12 +379,31 @@ public class PdfJsonConversionService {
                 if (shouldRegenerate) {
                     log.info("Regenerating page content for page {}", pageNumberValue);
                     regeneratePageContent(
-                            document, page, elements, imageElements, fontMap, pageNumberValue);
+                            document,
+                            page,
+                            elements,
+                            imageElements,
+                            fontMap,
+                            fontModels,
+                            pageNumberValue);
                     log.info("Page content regeneration complete for page {}", pageNumberValue);
                 }
+
+                // Restore annotations for this page
+                List<PdfJsonAnnotation> annotations =
+                        pageModel.getAnnotations() != null
+                                ? pageModel.getAnnotations()
+                                : new ArrayList<>();
+                restoreAnnotations(document, page, annotations);
+
                 pageIndex++;
             }
 
+            // Restore form fields
+            List<PdfJsonFormField> formFields =
+                    pdfJson.getFormFields() != null ? pdfJson.getFormFields() : new ArrayList<>();
+            restoreFormFields(document, formFields);
+
             try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
                 document.save(baos);
                 return baos.toByteArray();
@@ -321,6 +447,7 @@ public class PdfJsonConversionService {
 
     private PdfJsonFont buildFontModel(
             PDDocument document, PDFont font, String fontId, int pageNumber) throws IOException {
+        PDFontDescriptor descriptor = font.getFontDescriptor();
         String subtype = font.getCOSObject().getNameAsString(COSName.SUBTYPE);
         String encoding = resolveEncoding(font);
         PdfJsonFontCidSystemInfo cidInfo = extractCidSystemInfo(font.getCOSObject());
@@ -328,8 +455,7 @@ public class PdfJsonConversionService {
         FontProgramData programData = embedded ? extractFontProgram(font) : null;
         String toUnicode = extractToUnicode(font.getCOSObject());
         String standard14Name = resolveStandard14Name(font);
-        Integer flags =
-                font.getFontDescriptor() != null ? font.getFontDescriptor().getFlags() : null;
+        Integer flags = descriptor != null ? descriptor.getFlags() : null;
 
         return PdfJsonFont.builder()
                 .id(fontId)
@@ -345,10 +471,16 @@ public class PdfJsonConversionService {
                 .toUnicode(toUnicode)
                 .standard14Name(standard14Name)
                 .fontDescriptorFlags(flags)
+                .ascent(descriptor != null ? descriptor.getAscent() : null)
+                .descent(descriptor != null ? descriptor.getDescent() : null)
+                .capHeight(descriptor != null ? descriptor.getCapHeight() : null)
+                .xHeight(descriptor != null ? descriptor.getXHeight() : null)
+                .italicAngle(descriptor != null ? descriptor.getItalicAngle() : null)
+                .unitsPerEm(extractUnitsPerEm(font))
                 .build();
     }
 
-    private boolean preflightTextElements(
+    private PreflightResult preflightTextElements(
             PDDocument document,
             Map<String, PDFont> fontMap,
             List<PdfJsonFont> fontModels,
@@ -356,11 +488,12 @@ public class PdfJsonConversionService {
             int pageNumber)
             throws IOException {
         if (elements == null || elements.isEmpty()) {
-            return false;
+            return PreflightResult.empty();
         }
 
-        PDFont fallbackFont = fontMap.get(buildFontKey(-1, FALLBACK_FONT_ID));
-        boolean fallbackApplied = false;
+        Set<String> fallbackIds = new LinkedHashSet<>();
+        boolean fallbackNeeded = false;
+
         for (PdfJsonTextElement element : elements) {
             String text = Objects.toString(element.getText(), "");
             if (text.isEmpty()) {
@@ -368,57 +501,99 @@ public class PdfJsonConversionService {
             }
 
             PDFont font = fontMap.get(buildFontKey(pageNumber, element.getFontId()));
-            boolean encodable = false;
-            if (font != null) {
-                try {
-                    font.encode(text);
-                    encodable = true;
-                } catch (IOException | IllegalArgumentException ex) {
-                    log.debug(
-                            "Font {} missing glyphs for text '{}': {}",
-                            element.getFontId(),
-                            text,
-                            ex.getMessage());
-                }
+            if (font == null && element.getFontId() != null) {
+                font = fontMap.get(buildFontKey(-1, element.getFontId()));
             }
 
-            if (encodable) {
+            if (font == null) {
+                fallbackNeeded = true;
+                fallbackIds.add(FALLBACK_FONT_ID);
                 continue;
             }
 
-            element.setFontId(FALLBACK_FONT_ID);
-            log.info(
-                    "Assigning fallback font to text element on page {} (text='{}')",
-                    pageNumber,
-                    abbreviate(text));
-            if (fallbackFont == null) {
-                fallbackFont = loadFallbackPdfFont(document);
-                fontMap.put(buildFontKey(-1, FALLBACK_FONT_ID), fallbackFont);
-                if (fontModels.stream().noneMatch(f -> FALLBACK_FONT_ID.equals(f.getId()))) {
-                    fontModels.add(buildFallbackFontModel());
+            if (!canEncodeFully(font, text)) {
+                fallbackNeeded = true;
+                for (int offset = 0; offset < text.length(); ) {
+                    int codePoint = text.codePointAt(offset);
+                    offset += Character.charCount(codePoint);
+                    if (!canEncode(font, codePoint)) {
+                        String fallbackId = resolveFallbackFontId(codePoint);
+                        fallbackIds.add(fallbackId != null ? fallbackId : FALLBACK_FONT_ID);
+                    }
                 }
             }
-            fallbackApplied = true;
         }
-        return fallbackApplied;
+
+        for (String fallbackId : fallbackIds) {
+            ensureFallbackFont(document, fontMap, fontModels, fallbackId);
+        }
+
+        if (fallbackNeeded && fallbackIds.isEmpty()) {
+            fallbackIds.add(FALLBACK_FONT_ID);
+            ensureFallbackFont(document, fontMap, fontModels, FALLBACK_FONT_ID);
+        }
+
+        return new PreflightResult(fallbackNeeded, fallbackIds);
     }
 
     private PdfJsonFont buildFallbackFontModel() throws IOException {
-        byte[] bytes = loadFallbackFontBytes();
+        return buildFallbackFontModel(FALLBACK_FONT_ID);
+    }
+
+    private PdfJsonFont buildFallbackFontModel(String fallbackId) throws IOException {
+        FallbackFontSpec spec = getFallbackFontSpec(fallbackId);
+        if (spec == null) {
+            throw new IOException("Unknown fallback font id " + fallbackId);
+        }
+        byte[] bytes = loadFallbackFontBytes(fallbackId, spec);
         String base64 = Base64.getEncoder().encodeToString(bytes);
         return PdfJsonFont.builder()
-                .id(FALLBACK_FONT_ID)
-                .uid(FALLBACK_FONT_ID)
-                .baseName("NotoSans-Regular")
+                .id(fallbackId)
+                .uid(fallbackId)
+                .baseName(spec.baseName())
                 .subtype("TrueType")
                 .embedded(true)
                 .program(base64)
-                .programFormat("ttf")
+                .programFormat(spec.format())
                 .build();
     }
 
-    private void ensureFallbackResource(PDPage page, PDFont fallbackFont) {
-        if (fallbackFont == null) {
+    private FallbackFontSpec getFallbackFontSpec(String fallbackId) {
+        if (FALLBACK_FONT_ID.equals(fallbackId)) {
+            String baseName = inferBaseName(fallbackFontLocation, "NotoSans-Regular");
+            String format = inferFormat(fallbackFontLocation, "ttf");
+            return new FallbackFontSpec(fallbackFontLocation, baseName, format);
+        }
+        return BUILT_IN_FALLBACK_FONTS.get(fallbackId);
+    }
+
+    private String inferBaseName(String location, String defaultName) {
+        if (location == null || location.isBlank()) {
+            return defaultName;
+        }
+        int slash = location.lastIndexOf('/');
+        String fileName = slash >= 0 ? location.substring(slash + 1) : location;
+        int dot = fileName.lastIndexOf('.');
+        if (dot > 0) {
+            fileName = fileName.substring(0, dot);
+        }
+        return fileName.isEmpty() ? defaultName : fileName;
+    }
+
+    private String inferFormat(String location, String defaultFormat) {
+        if (location == null || location.isBlank()) {
+            return defaultFormat;
+        }
+        int dot = location.lastIndexOf('.');
+        if (dot >= 0 && dot < location.length() - 1) {
+            return location.substring(dot + 1).toLowerCase(Locale.ROOT);
+        }
+        return defaultFormat;
+    }
+
+    private void ensureFallbackResources(
+            PDPage page, Set<String> fallbackFontIds, Map<String, PDFont> fontMap) {
+        if (fallbackFontIds == null || fallbackFontIds.isEmpty()) {
             return;
         }
         PDResources resources = page.getResources();
@@ -426,40 +601,204 @@ public class PdfJsonConversionService {
             resources = new PDResources();
             page.setResources(resources);
         }
-        COSName fallbackName = COSName.getPDFName(FALLBACK_FONT_ID);
-        boolean exists = false;
-        for (COSName name : resources.getFontNames()) {
-            if (fallbackName.equals(name)) {
-                exists = true;
-                break;
+        for (String fallbackId : fallbackFontIds) {
+            if (fallbackId == null) {
+                continue;
+            }
+            PDFont fallbackFont = fontMap.get(buildFontKey(-1, fallbackId));
+            if (fallbackFont == null) {
+                continue;
+            }
+            COSName fallbackName = COSName.getPDFName(fallbackId);
+            boolean exists = false;
+            for (COSName name : resources.getFontNames()) {
+                if (fallbackName.equals(name)) {
+                    exists = true;
+                    break;
+                }
+            }
+            if (!exists) {
+                resources.put(fallbackName, fallbackFont);
             }
-        }
-        if (!exists) {
-            resources.put(fallbackName, fallbackFont);
         }
     }
 
     private PDFont loadFallbackPdfFont(PDDocument document) throws IOException {
-        byte[] bytes = loadFallbackFontBytes();
+        return loadFallbackPdfFont(document, FALLBACK_FONT_ID);
+    }
+
+    private PDFont loadFallbackPdfFont(PDDocument document, String fallbackId) throws IOException {
+        FallbackFontSpec spec = getFallbackFontSpec(fallbackId);
+        if (spec == null) {
+            throw new IOException("Unknown fallback font id " + fallbackId);
+        }
+        byte[] bytes = loadFallbackFontBytes(fallbackId, spec);
         try (InputStream stream = new ByteArrayInputStream(bytes)) {
             return PDType0Font.load(document, stream, true);
         }
     }
 
-    private byte[] loadFallbackFontBytes() throws IOException {
-        if (fallbackFontBytes == null) {
-            Resource resource = resourceLoader.getResource(fallbackFontLocation);
-            if (!resource.exists()) {
-                throw new IOException(
-                        "Fallback font resource not found at " + fallbackFontLocation);
-            }
-            try (InputStream inputStream = resource.getInputStream();
-                    ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
-                inputStream.transferTo(baos);
-                fallbackFontBytes = baos.toByteArray();
-            }
+    private PDFont ensureFallbackFont(
+            PDDocument document,
+            Map<String, PDFont> fontMap,
+            List<PdfJsonFont> fontModels,
+            String fallbackId)
+            throws IOException {
+        String effectiveId = fallbackId != null ? fallbackId : FALLBACK_FONT_ID;
+        String key = buildFontKey(-1, effectiveId);
+        PDFont font = fontMap.get(key);
+        if (font != null) {
+            return font;
         }
-        return fallbackFontBytes;
+        PDFont loaded = loadFallbackPdfFont(document, effectiveId);
+        fontMap.put(key, loaded);
+        if (fontModels != null
+                && fontModels.stream().noneMatch(f -> effectiveId.equals(f.getId()))) {
+            fontModels.add(buildFallbackFontModel(effectiveId));
+        }
+        return loaded;
+    }
+
+    private byte[] loadFallbackFontBytes(String fallbackId, FallbackFontSpec spec)
+            throws IOException {
+        if (spec == null) {
+            throw new IOException("No fallback font specification for " + fallbackId);
+        }
+        byte[] cached = fallbackFontCache.get(fallbackId);
+        if (cached != null) {
+            return cached;
+        }
+        Resource resource = resourceLoader.getResource(spec.resourceLocation());
+        if (!resource.exists()) {
+            throw new IOException("Fallback font resource not found at " + spec.resourceLocation());
+        }
+        try (InputStream inputStream = resource.getInputStream();
+                ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
+            inputStream.transferTo(baos);
+            byte[] bytes = baos.toByteArray();
+            fallbackFontCache.put(fallbackId, bytes);
+            return bytes;
+        }
+    }
+
+    private boolean canRunGhostscript() {
+        try {
+            return endpointConfiguration != null
+                    && endpointConfiguration.isGroupEnabled("Ghostscript");
+        } catch (Exception ex) {
+            log.debug("Ghostscript availability check failed: {}", ex.getMessage());
+            return false;
+        }
+    }
+
+    private TempFile normalizePdfFonts(Path sourcePath) throws IOException {
+        if (sourcePath == null || !Files.exists(sourcePath)) {
+            return null;
+        }
+        TempFile outputFile = new TempFile(tempFileManager, ".pdf");
+        List<String> command = new ArrayList<>();
+        command.add("gs");
+        command.add("-sDEVICE=pdfwrite");
+        command.add("-dCompatibilityLevel=1.7");
+        command.add("-dPDFSETTINGS=/prepress");
+        command.add("-dEmbedAllFonts=true");
+        command.add("-dSubsetFonts=true");
+        command.add("-dCompressFonts=true");
+        command.add("-dNOPAUSE");
+        command.add("-dBATCH");
+        command.add("-dQUIET");
+        command.add("-o");
+        command.add(outputFile.getAbsolutePath());
+        command.add("-c");
+        command.add("<</NeverEmbed[]>> setdistillerparams");
+        command.add("-f");
+        command.add(sourcePath.toString());
+        try {
+            ProcessExecutorResult result =
+                    ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
+                            .runCommandWithOutputHandling(command);
+            if (result.getRc() == 0
+                    && Files.exists(outputFile.getPath())
+                    && Files.size(outputFile.getPath()) > 0) {
+                return outputFile;
+            }
+            log.warn("Ghostscript normalization exited with code {}", result.getRc());
+        } catch (InterruptedException ex) {
+            Thread.currentThread().interrupt();
+            closeQuietly(outputFile);
+            throw new IOException("Ghostscript normalization interrupted", ex);
+        } catch (IOException ex) {
+            closeQuietly(outputFile);
+            throw ex;
+        }
+
+        closeQuietly(outputFile);
+        return null;
+    }
+
+    private byte[] convertCffProgramToTrueType(byte[] fontBytes) {
+        if (!cffConversionEnabled
+                || fontforgeCommand == null
+                || fontforgeCommand.isBlank()
+                || fontBytes == null
+                || fontBytes.length == 0) {
+            return null;
+        }
+
+        try (TempFile inputFile = new TempFile(tempFileManager, ".cff");
+                TempFile outputFile = new TempFile(tempFileManager, ".ttf")) {
+            Files.write(inputFile.getPath(), fontBytes);
+
+            List<String> command = new ArrayList<>();
+            command.add(fontforgeCommand);
+            command.add("-lang=ff");
+            command.add("-c");
+            command.add(
+                    "Open($1); SelectWorthOutputting(); SetFontOrder(2); Reencode(\"unicode\"); "
+                            + "Generate($2); Close(); Quit()");
+            command.add(inputFile.getAbsolutePath());
+            command.add(outputFile.getAbsolutePath());
+
+            ProcessBuilder builder = new ProcessBuilder(command);
+            builder.redirectErrorStream(true);
+            Process process = builder.start();
+
+            StringBuilder output = new StringBuilder();
+            Thread reader =
+                    new Thread(
+                            () -> {
+                                try (BufferedReader br =
+                                        new BufferedReader(
+                                                new InputStreamReader(
+                                                        process.getInputStream(),
+                                                        StandardCharsets.UTF_8))) {
+                                    String line;
+                                    while ((line = br.readLine()) != null) {
+                                        output.append(line).append('\n');
+                                    }
+                                } catch (IOException ignored) {
+                                }
+                            });
+            reader.start();
+            int exitCode = process.waitFor();
+            reader.join();
+
+            if (exitCode == 0 && Files.exists(outputFile.getPath())) {
+                return Files.readAllBytes(outputFile.getPath());
+            }
+
+            log.warn(
+                    "FontForge conversion exited with code {}: {}",
+                    exitCode,
+                    output.toString().trim());
+        } catch (InterruptedException ex) {
+            Thread.currentThread().interrupt();
+            log.warn("FontForge conversion interrupted");
+        } catch (IOException ex) {
+            log.warn("FontForge conversion failed: {}", ex.getMessage());
+        }
+
+        return null;
     }
 
     private PdfJsonFontCidSystemInfo extractCidSystemInfo(COSDictionary fontDictionary) {
@@ -599,7 +938,8 @@ public class PdfJsonConversionService {
     private List<PdfJsonPage> extractPages(
             PDDocument document,
             Map<Integer, List<PdfJsonTextElement>> textByPage,
-            Map<Integer, List<PdfJsonImageElement>> imagesByPage)
+            Map<Integer, List<PdfJsonImageElement>> imagesByPage,
+            Map<Integer, List<PdfJsonAnnotation>> annotationsByPage)
             throws IOException {
         List<PdfJsonPage> pages = new ArrayList<>();
         int pageIndex = 0;
@@ -612,8 +952,13 @@ public class PdfJsonConversionService {
             pageModel.setRotation(page.getRotation());
             pageModel.setTextElements(textByPage.getOrDefault(pageIndex + 1, new ArrayList<>()));
             pageModel.setImageElements(imagesByPage.getOrDefault(pageIndex + 1, new ArrayList<>()));
-            pageModel.setResources(
-                    serializeCosValue(page.getCOSObject().getDictionaryObject(COSName.RESOURCES)));
+            pageModel.setAnnotations(
+                    annotationsByPage.getOrDefault(pageIndex + 1, new ArrayList<>()));
+            // Serialize resources but exclude image XObject streams to avoid duplication with
+            // imageElements
+            COSBase resourcesBase = page.getCOSObject().getDictionaryObject(COSName.RESOURCES);
+            COSBase filteredResources = filterImageXObjectsFromResources(resourcesBase);
+            pageModel.setResources(serializeCosValue(filteredResources));
             pageModel.setContentStreams(extractContentStreams(page));
             pages.add(pageModel);
             pageIndex++;
@@ -634,6 +979,215 @@ public class PdfJsonConversionService {
         return imagesByPage;
     }
 
+    private Map<Integer, List<PdfJsonAnnotation>> collectAnnotations(PDDocument document)
+            throws IOException {
+        Map<Integer, List<PdfJsonAnnotation>> annotationsByPage = new LinkedHashMap<>();
+        int pageNumber = 1;
+        for (PDPage page : document.getPages()) {
+            List<PdfJsonAnnotation> annotations = new ArrayList<>();
+            for (PDAnnotation annotation : page.getAnnotations()) {
+                try {
+                    PdfJsonAnnotation ann = new PdfJsonAnnotation();
+                    ann.setSubtype(annotation.getSubtype());
+                    ann.setContents(annotation.getContents());
+
+                    PDRectangle rect = annotation.getRectangle();
+                    if (rect != null) {
+                        ann.setRect(
+                                List.of(
+                                        rect.getLowerLeftX(),
+                                        rect.getLowerLeftY(),
+                                        rect.getUpperRightX(),
+                                        rect.getUpperRightY()));
+                    }
+
+                    COSName appearanceState = annotation.getAppearanceState();
+                    if (appearanceState != null) {
+                        ann.setAppearanceState(appearanceState.getName());
+                    }
+
+                    if (annotation.getColor() != null) {
+                        float[] colorComponents = annotation.getColor().getComponents();
+                        List<Float> colorList = new ArrayList<>(colorComponents.length);
+                        for (float c : colorComponents) {
+                            colorList.add(c);
+                        }
+                        ann.setColor(colorList);
+                    }
+
+                    COSDictionary annotDict = annotation.getCOSObject();
+                    COSString title = (COSString) annotDict.getDictionaryObject(COSName.T);
+                    if (title != null) {
+                        ann.setAuthor(title.getString());
+                    }
+
+                    COSString subj = (COSString) annotDict.getDictionaryObject(COSName.SUBJ);
+                    if (subj != null) {
+                        ann.setSubject(subj.getString());
+                    }
+
+                    COSString creationDateStr =
+                            (COSString) annotDict.getDictionaryObject(COSName.CREATION_DATE);
+                    if (creationDateStr != null) {
+                        try {
+                            Calendar creationDate =
+                                    DateConverter.toCalendar(creationDateStr.getString());
+                            ann.setCreationDate(formatCalendar(creationDate));
+                        } catch (Exception e) {
+                            log.debug(
+                                    "Failed to parse annotation creation date: {}", e.getMessage());
+                        }
+                    }
+
+                    COSString modDateStr = (COSString) annotDict.getDictionaryObject(COSName.M);
+                    if (modDateStr != null) {
+                        try {
+                            Calendar modDate = DateConverter.toCalendar(modDateStr.getString());
+                            ann.setModificationDate(formatCalendar(modDate));
+                        } catch (Exception e) {
+                            log.debug(
+                                    "Failed to parse annotation modification date: {}",
+                                    e.getMessage());
+                        }
+                    }
+
+                    // Store raw dictionary for lossless round-trip
+                    ann.setRawData(serializeCosValue(annotDict));
+
+                    annotations.add(ann);
+                } catch (Exception e) {
+                    log.warn(
+                            "Failed to extract annotation on page {}: {}",
+                            pageNumber,
+                            e.getMessage());
+                }
+            }
+            if (!annotations.isEmpty()) {
+                annotationsByPage.put(pageNumber, annotations);
+            }
+            pageNumber++;
+        }
+        return annotationsByPage;
+    }
+
+    private List<PdfJsonFormField> collectFormFields(PDDocument document) {
+        List<PdfJsonFormField> formFields = new ArrayList<>();
+        PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm();
+        if (acroForm == null) {
+            return formFields;
+        }
+
+        try {
+            for (PDField field : acroForm.getFields()) {
+                try {
+                    PdfJsonFormField formField = new PdfJsonFormField();
+                    formField.setName(field.getFullyQualifiedName());
+                    formField.setPartialName(field.getPartialName());
+                    formField.setFieldType(field.getFieldType());
+                    formField.setValue(field.getValueAsString());
+
+                    // Get default value from COS dictionary
+                    COSBase dv = field.getCOSObject().getDictionaryObject(COSName.DV);
+                    if (dv != null) {
+                        if (dv instanceof COSString) {
+                            formField.setDefaultValue(((COSString) dv).getString());
+                        } else if (dv instanceof COSName) {
+                            formField.setDefaultValue(((COSName) dv).getName());
+                        }
+                    }
+
+                    formField.setFlags(field.getFieldFlags());
+                    formField.setAlternateFieldName(field.getAlternateFieldName());
+                    formField.setMappingName(field.getMappingName());
+
+                    // Find which page the field is on
+                    PDAnnotationWidget widget =
+                            field.getWidgets().isEmpty() ? null : field.getWidgets().get(0);
+                    if (widget != null) {
+                        PDPage fieldPage = widget.getPage();
+                        if (fieldPage != null) {
+                            int pageNum = document.getPages().indexOf(fieldPage) + 1;
+                            formField.setPageNumber(pageNum);
+
+                            PDRectangle rect = widget.getRectangle();
+                            if (rect != null) {
+                                formField.setRect(
+                                        List.of(
+                                                rect.getLowerLeftX(),
+                                                rect.getLowerLeftY(),
+                                                rect.getUpperRightX(),
+                                                rect.getUpperRightY()));
+                            }
+                        }
+                    }
+
+                    // Store raw dictionary for lossless round-trip
+                    formField.setRawData(serializeCosValue(field.getCOSObject()));
+
+                    formFields.add(formField);
+                } catch (Exception e) {
+                    log.warn(
+                            "Failed to extract form field {}: {}",
+                            field.getFullyQualifiedName(),
+                            e.getMessage());
+                }
+            }
+        } catch (Exception e) {
+            log.warn("Failed to extract form fields: {}", e.getMessage());
+        }
+
+        return formFields;
+    }
+
+    /**
+     * Filters out image XObject streams from resources to avoid duplication with imageElements.
+     * Images are already captured in imageElements[] with their base64 data, so we don't need them
+     * in the resources dictionary.
+     */
+    private COSBase filterImageXObjectsFromResources(COSBase resourcesBase) {
+        if (!(resourcesBase instanceof COSDictionary)) {
+            return resourcesBase;
+        }
+
+        // Clone the resources dictionary
+        COSDictionary resources = new COSDictionary((COSDictionary) resourcesBase);
+
+        // Get the XObject dictionary
+        COSBase xobjectBase = resources.getDictionaryObject(COSName.XOBJECT);
+        if (!(xobjectBase instanceof COSDictionary)) {
+            return resources;
+        }
+
+        COSDictionary xobjects = (COSDictionary) xobjectBase;
+        COSDictionary filteredXObjects = new COSDictionary();
+
+        // Copy all XObjects except images
+        for (COSName key : xobjects.keySet()) {
+            COSBase value = xobjects.getDictionaryObject(key);
+            if (value instanceof COSStream) {
+                COSStream stream = (COSStream) value;
+                COSName type = (COSName) stream.getDictionaryObject(COSName.TYPE);
+                COSName subtype = (COSName) stream.getDictionaryObject(COSName.SUBTYPE);
+
+                // Skip if this is an Image XObject
+                if (COSName.XOBJECT.equals(type) && COSName.IMAGE.equals(subtype)) {
+                    continue;
+                }
+            }
+            // Keep non-image XObjects (Form XObjects, etc.)
+            filteredXObjects.setItem(key, value);
+        }
+
+        // If all XObjects were images, remove the XObject entry entirely
+        if (filteredXObjects.keySet().isEmpty()) {
+            resources.removeItem(COSName.XOBJECT);
+        } else {
+            resources.setItem(COSName.XOBJECT, filteredXObjects);
+        }
+
+        return resources;
+    }
+
     private PdfJsonMetadata extractMetadata(PDDocument document) {
         PdfJsonMetadata metadata = new PdfJsonMetadata();
         PDDocumentInformation info = document.getDocumentInformation();
@@ -709,6 +1263,93 @@ public class PdfJsonConversionService {
         }
     }
 
+    private void restoreAnnotations(
+            PDDocument document, PDPage page, List<PdfJsonAnnotation> annotations) {
+        if (annotations == null || annotations.isEmpty()) {
+            return;
+        }
+
+        for (PdfJsonAnnotation annModel : annotations) {
+            try {
+                // Restore from raw COS data if available for lossless round-trip
+                if (annModel.getRawData() != null) {
+                    COSBase rawAnnot = deserializeCosValue(annModel.getRawData(), document);
+                    if (rawAnnot instanceof COSDictionary) {
+                        PDAnnotation annotation =
+                                PDAnnotation.createAnnotation((COSDictionary) rawAnnot);
+                        page.getAnnotations().add(annotation);
+                        log.debug("Restored annotation from raw data: {}", annModel.getSubtype());
+                        continue;
+                    }
+                }
+
+                // Fallback: reconstruct from structured fields
+                // Note: This is simplified - full annotation reconstruction is complex
+                // Most use cases should rely on rawData for lossless round-trip
+                log.debug(
+                        "Warning: Annotation {} has no rawData, basic reconstruction may lose information",
+                        annModel.getSubtype());
+
+            } catch (Exception e) {
+                log.warn(
+                        "Failed to restore annotation {}: {}",
+                        annModel.getSubtype(),
+                        e.getMessage());
+            }
+        }
+    }
+
+    private void restoreFormFields(PDDocument document, List<PdfJsonFormField> formFields) {
+        if (formFields == null || formFields.isEmpty()) {
+            return;
+        }
+
+        try {
+            PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm();
+            if (acroForm == null) {
+                acroForm = new PDAcroForm(document);
+                document.getDocumentCatalog().setAcroForm(acroForm);
+            }
+
+            COSArray fieldsArray =
+                    (COSArray) acroForm.getCOSObject().getDictionaryObject(COSName.FIELDS);
+            if (fieldsArray == null) {
+                fieldsArray = new COSArray();
+                acroForm.getCOSObject().setItem(COSName.FIELDS, fieldsArray);
+            }
+
+            for (PdfJsonFormField fieldModel : formFields) {
+                try {
+                    // Restore from raw COS data if available for lossless round-trip
+                    if (fieldModel.getRawData() != null) {
+                        COSBase rawField = deserializeCosValue(fieldModel.getRawData(), document);
+                        if (rawField instanceof COSDictionary) {
+                            // Add the field dictionary directly to the fields array
+                            fieldsArray.add(rawField);
+                            log.debug(
+                                    "Restored form field from raw data: {}", fieldModel.getName());
+                            continue;
+                        }
+                    }
+
+                    // Fallback: reconstruct from structured fields
+                    // Note: This is simplified - full field reconstruction is complex
+                    log.debug(
+                            "Warning: Form field {} has no rawData, basic reconstruction may lose information",
+                            fieldModel.getName());
+
+                } catch (Exception e) {
+                    log.warn(
+                            "Failed to restore form field {}: {}",
+                            fieldModel.getName(),
+                            e.getMessage());
+                }
+            }
+        } catch (Exception e) {
+            log.warn("Failed to restore form fields: {}", e.getMessage());
+        }
+    }
+
     private void applyPageResources(
             PDDocument document, PDPage page, PdfJsonCosValue resourcesModel) throws IOException {
         if (resourcesModel == null) {
@@ -720,6 +1361,81 @@ public class PdfJsonConversionService {
         }
     }
 
+    /**
+     * Reconstructs image XObjects from imageElements when content streams are preserved. During
+     * serialization, image streams are filtered out from resources to avoid duplication. This
+     * method adds them back by scanning content streams for XObject references and matching them
+     * with imageElements by objectName.
+     */
+    private void reconstructImageXObjects(
+            PDDocument document,
+            PDPage page,
+            List<PDStream> contentStreams,
+            List<PdfJsonImageElement> imageElements)
+            throws IOException {
+
+        // Build map of objectName -> imageElement
+        Map<String, PdfJsonImageElement> imageMap = new HashMap<>();
+        for (PdfJsonImageElement img : imageElements) {
+            if (img.getObjectName() != null && !img.getObjectName().isBlank()) {
+                imageMap.put(img.getObjectName(), img);
+            }
+        }
+
+        if (imageMap.isEmpty()) {
+            return;
+        }
+
+        // Scan content streams for image XObject references
+        Set<String> referencedXObjects = new HashSet<>();
+        for (PDStream stream : contentStreams) {
+            try {
+                byte[] contentBytes = stream.toByteArray();
+                PDFStreamParser parser = new PDFStreamParser(contentBytes);
+                List<Object> tokens = parser.parse();
+
+                for (int i = 0; i < tokens.size(); i++) {
+                    Object token = tokens.get(i);
+                    if (token instanceof Operator op
+                            && OperatorName.DRAW_OBJECT.equals(op.getName())) {
+                        if (i > 0 && tokens.get(i - 1) instanceof COSName name) {
+                            referencedXObjects.add(name.getName());
+                        }
+                    }
+                }
+            } catch (Exception e) {
+                log.warn("Failed to parse content stream for image references: {}", e.getMessage());
+            }
+        }
+
+        // Reconstruct referenced image XObjects
+        PDResources resources = page.getResources();
+        if (resources == null) {
+            resources = new PDResources();
+            page.setResources(resources);
+        }
+
+        for (String xobjName : referencedXObjects) {
+            PdfJsonImageElement imageElement = imageMap.get(xobjName);
+            if (imageElement == null) {
+                log.warn(
+                        "Content stream references image XObject '{}' but no matching imageElement found",
+                        xobjName);
+                continue;
+            }
+
+            try {
+                PDImageXObject image = createImageXObject(document, imageElement);
+                if (image != null) {
+                    resources.put(COSName.getPDFName(xobjName), image);
+                    log.debug("Reconstructed image XObject: {}", xobjName);
+                }
+            } catch (Exception e) {
+                log.warn("Failed to reconstruct image XObject '{}': {}", xobjName, e.getMessage());
+            }
+        }
+    }
+
     private List<PDStream> buildContentStreams(
             PDDocument document, List<PdfJsonStream> streamModels) throws IOException {
         List<PDStream> streams = new ArrayList<>();
@@ -959,6 +1675,7 @@ public class PdfJsonConversionService {
             List<PdfJsonTextElement> textElements,
             List<PdfJsonImageElement> imageElements,
             Map<String, PDFont> fontMap,
+            List<PdfJsonFont> fontModels,
             int pageNumber)
             throws IOException {
         List<DrawableElement> drawables = mergeDrawables(textElements, imageElements);
@@ -974,50 +1691,39 @@ public class PdfJsonConversionService {
                         if (element == null) {
                             continue;
                         }
-                        PDFont font = fontMap.get(buildFontKey(pageNumber, element.getFontId()));
-                        if (font == null && FALLBACK_FONT_ID.equals(element.getFontId())) {
-                            font = fontMap.get(buildFontKey(-1, FALLBACK_FONT_ID));
-                        }
-                        float fontScale = resolveFontMatrixSize(element);
                         String text = Objects.toString(element.getText(), "");
 
-                        if (font != null) {
-                            try {
-                                font.encode(text);
-                            } catch (IOException | IllegalArgumentException ex) {
-                                log.debug(
-                                        "Edited text contains glyphs missing from font {} ({}), switching to fallback",
-                                        element.getFontId(),
-                                        ex.getMessage());
-                                font = fontMap.get(buildFontKey(-1, FALLBACK_FONT_ID));
-                                element.setFontId(FALLBACK_FONT_ID);
-                                if (font == null) {
-                                    font = loadFallbackPdfFont(document);
-                                    fontMap.put(buildFontKey(-1, FALLBACK_FONT_ID), font);
-                                }
-                            }
-                        }
-                        if (font == null) {
-                            element.setFontId(FALLBACK_FONT_ID);
-                            font = fontMap.get(buildFontKey(-1, FALLBACK_FONT_ID));
-                            if (font == null) {
-                                font = loadFallbackPdfFont(document);
-                                fontMap.put(buildFontKey(-1, FALLBACK_FONT_ID), font);
-                            }
-                        }
-
                         if (!textOpen) {
                             contentStream.beginText();
                             textOpen = true;
                         }
 
+                        PDFont baseFont =
+                                fontMap.get(buildFontKey(pageNumber, element.getFontId()));
+                        if (baseFont == null && element.getFontId() != null) {
+                            baseFont = fontMap.get(buildFontKey(-1, element.getFontId()));
+                        }
+
+                        float fontScale = resolveFontMatrixSize(element);
+
                         applyTextState(contentStream, element);
-                        contentStream.setFont(font, fontScale);
                         applyRenderingMode(contentStream, element.getRenderingMode());
                         applyTextMatrix(contentStream, element);
-                        String sanitized = sanitizeForFont(font, text);
-                        if (!sanitized.isEmpty()) {
-                            contentStream.showText(sanitized);
+
+                        List<FontRun> runs =
+                                buildFontRuns(
+                                        document, fontMap, fontModels, baseFont, text, element);
+
+                        PDFont activeFont = null;
+                        for (FontRun run : runs) {
+                            if (run == null || run.text().isEmpty()) {
+                                continue;
+                            }
+                            if (run.font() != activeFont) {
+                                contentStream.setFont(run.font(), fontScale);
+                                activeFont = run.font();
+                            }
+                            contentStream.showText(run.text());
                         }
                     }
                     case IMAGE -> {
@@ -1039,39 +1745,156 @@ public class PdfJsonConversionService {
         }
     }
 
-    private String sanitizeForFont(PDFont font, String text) {
+    private List<FontRun> buildFontRuns(
+            PDDocument document,
+            Map<String, PDFont> fontMap,
+            List<PdfJsonFont> fontModels,
+            PDFont primaryFont,
+            String text,
+            PdfJsonTextElement element)
+            throws IOException {
+        List<FontRun> runs = new ArrayList<>();
         if (text == null || text.isEmpty()) {
-            return "";
+            return runs;
+        }
+
+        PDFont baseFont = primaryFont;
+        if (baseFont == null) {
+            baseFont = ensureFallbackFont(document, fontMap, fontModels, FALLBACK_FONT_ID);
+        }
+        if (baseFont == null) {
+            log.warn("Unable to resolve a base font for text element; skipping text content");
+            return runs;
+        }
+
+        StringBuilder buffer = new StringBuilder();
+        PDFont currentFont = baseFont;
+
+        for (int offset = 0; offset < text.length(); ) {
+            int codePoint = text.codePointAt(offset);
+            offset += Character.charCount(codePoint);
+            String glyph = new String(Character.toChars(codePoint));
+            PDFont targetFont = currentFont;
+
+            if (!canEncode(baseFont, codePoint)) {
+                String fallbackId = resolveFallbackFontId(codePoint);
+                targetFont = ensureFallbackFont(document, fontMap, fontModels, fallbackId);
+                if (targetFont == null || !canEncode(targetFont, glyph)) {
+                    String mapped = mapUnsupportedGlyph(codePoint);
+                    if (mapped != null) {
+                        if (canEncode(baseFont, mapped)) {
+                            glyph = mapped;
+                            targetFont = baseFont;
+                        } else if (targetFont != null && canEncode(targetFont, mapped)) {
+                            glyph = mapped;
+                        }
+                    }
+                }
+                if (targetFont == null || !canEncode(targetFont, glyph)) {
+                    glyph = "?";
+                    targetFont =
+                            ensureFallbackFont(document, fontMap, fontModels, FALLBACK_FONT_ID);
+                    if (targetFont == null || !canEncode(targetFont, glyph)) {
+                        log.debug(
+                                "Dropping unsupported glyph U+{} for text element",
+                                Integer.toHexString(codePoint));
+                        continue;
+                    }
+                }
+                if (targetFont != baseFont) {
+                    log.trace(
+                            "Using fallback font '{}' for code point U+{}",
+                            targetFont.getName(),
+                            Integer.toHexString(codePoint));
+                }
+            }
+
+            if (targetFont != currentFont) {
+                if (buffer.length() > 0) {
+                    runs.add(new FontRun(currentFont, buffer.toString()));
+                    buffer.setLength(0);
+                }
+                currentFont = targetFont;
+            }
+            buffer.append(glyph);
+        }
+
+        if (buffer.length() > 0) {
+            runs.add(new FontRun(currentFont, buffer.toString()));
+        }
+
+        return runs;
+    }
+
+    private Integer extractUnitsPerEm(PDFont font) {
+        if (font == null) {
+            return null;
+        }
+        Matrix matrix = font.getFontMatrix();
+        if (matrix != null) {
+            float scaleX = matrix.getScaleX();
+            if (scaleX != 0f) {
+                int units = Math.round(Math.abs(1f / scaleX));
+                if (units > 0 && units < 10_000) {
+                    return units;
+                }
+            }
+        }
+        return 1000;
+    }
+
+    private boolean canEncodeFully(PDFont font, String text) {
+        return canEncode(font, text);
+    }
+
+    private boolean canEncode(PDFont font, int codePoint) {
+        return canEncode(font, new String(Character.toChars(codePoint)));
+    }
+
+    private boolean canEncode(PDFont font, String text) {
+        if (font == null || text == null || text.isEmpty()) {
+            return false;
+        }
+        try {
+            font.encode(text);
+            return true;
+        } catch (IOException | IllegalArgumentException ex) {
+            return false;
+        }
+    }
+
+    private String resolveFallbackFontId(int codePoint) {
+        Character.UnicodeBlock block = Character.UnicodeBlock.of(codePoint);
+        if (block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
+                || block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
+                || block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
+                || block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C
+                || block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D
+                || block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E
+                || block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F
+                || block == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
+                || block == Character.UnicodeBlock.BOPOMOFO
+                || block == Character.UnicodeBlock.BOPOMOFO_EXTENDED
+                || block == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
+            return FALLBACK_FONT_CJK_ID;
+        }
+
+        Character.UnicodeScript script = Character.UnicodeScript.of(codePoint);
+        switch (script) {
+            case HAN:
+                return FALLBACK_FONT_CJK_ID;
+            case HIRAGANA:
+            case KATAKANA:
+                return FALLBACK_FONT_JP_ID;
+            case HANGUL:
+                return FALLBACK_FONT_KR_ID;
+            case ARABIC:
+                return FALLBACK_FONT_AR_ID;
+            case THAI:
+                return FALLBACK_FONT_TH_ID;
+            default:
+                return FALLBACK_FONT_ID;
         }
-        StringBuilder builder = new StringBuilder(text.length());
-        text.codePoints()
-                .forEach(
-                        codePoint -> {
-                            String candidate = new String(Character.toChars(codePoint));
-                            try {
-                                font.encode(candidate);
-                                builder.append(candidate);
-                                return;
-                            } catch (IOException | IllegalArgumentException ex) {
-                                String mapped = mapUnsupportedGlyph(codePoint);
-                                if (mapped != null) {
-                                    try {
-                                        font.encode(mapped);
-                                        builder.append(mapped);
-                                        return;
-                                    } catch (IOException | IllegalArgumentException ignore) {
-                                        // fall through to generic replacement
-                                    }
-                                }
-                                log.debug(
-                                        "Replacing unsupported glyph {} ({}) with '?' for font {}",
-                                        candidate,
-                                        String.format("U+%04X", codePoint),
-                                        font.getName());
-                                builder.append('?');
-                            }
-                        });
-        return builder.toString();
     }
 
     private String mapUnsupportedGlyph(int codePoint) {
@@ -1082,6 +1905,17 @@ public class PdfJsonConversionService {
         };
     }
 
+    private void closeQuietly(TempFile tempFile) {
+        if (tempFile == null) {
+            return;
+        }
+        try {
+            tempFile.close();
+        } catch (Exception ex) {
+            log.debug("Failed to close temporary file: {}", ex.getMessage());
+        }
+    }
+
     private void applyTextState(PDPageContentStream contentStream, PdfJsonTextElement element)
             throws IOException {
         if (element.getCharacterSpacing() != null) {
@@ -1200,6 +2034,72 @@ public class PdfJsonConversionService {
         }
     }
 
+    private static final class PreflightResult {
+        private static final PreflightResult EMPTY = new PreflightResult(false, Set.of());
+
+        private final boolean usesFallback;
+        private final Set<String> fallbackFontIds;
+
+        private PreflightResult(boolean usesFallback, Set<String> fallbackFontIds) {
+            this.usesFallback = usesFallback;
+            this.fallbackFontIds = fallbackFontIds != null ? Set.copyOf(fallbackFontIds) : Set.of();
+        }
+
+        private static PreflightResult empty() {
+            return EMPTY;
+        }
+
+        private boolean usesFallback() {
+            return usesFallback;
+        }
+
+        private Set<String> fallbackFontIds() {
+            return fallbackFontIds;
+        }
+    }
+
+    private static final class FallbackFontSpec {
+        private final String resourceLocation;
+        private final String baseName;
+        private final String format;
+
+        private FallbackFontSpec(String resourceLocation, String baseName, String format) {
+            this.resourceLocation = resourceLocation;
+            this.baseName = baseName;
+            this.format = format;
+        }
+
+        private String resourceLocation() {
+            return resourceLocation;
+        }
+
+        private String baseName() {
+            return baseName;
+        }
+
+        private String format() {
+            return format;
+        }
+    }
+
+    private static final class FontRun {
+        private final PDFont font;
+        private final String text;
+
+        private FontRun(PDFont font, String text) {
+            this.font = font;
+            this.text = text;
+        }
+
+        private PDFont font() {
+            return font;
+        }
+
+        private String text() {
+            return text;
+        }
+    }
+
     private boolean rewriteTextOperators(
             PDDocument document, PDPage page, List<PdfJsonTextElement> elements) {
         if (elements == null || elements.isEmpty()) {
@@ -1479,6 +2379,20 @@ public class PdfJsonConversionService {
                             ? fontModel.getProgramFormat().toLowerCase(Locale.ROOT)
                             : "";
             try {
+                if (isCffFormat(format)) {
+                    byte[] converted = convertCffProgramToTrueType(fontBytes);
+                    if (converted != null) {
+                        fontBytes = converted;
+                        format = "ttf";
+                        log.debug(
+                                "Converted CFF font {} to TrueType outlines for embedding",
+                                fontModel.getId());
+                    } else {
+                        log.debug(
+                                "Unable to convert CFF font {} to TrueType; attempting direct load",
+                                fontModel.getId());
+                    }
+                }
                 if (isType1Format(format)) {
                     try (InputStream stream = new ByteArrayInputStream(fontBytes)) {
                         PDFont font = new PDType1Font(document, stream);
@@ -1528,6 +2442,16 @@ public class PdfJsonConversionService {
         return "type1".equals(format) || format.endsWith("pfb");
     }
 
+    private boolean isCffFormat(String format) {
+        if (format == null) {
+            return false;
+        }
+        String normalized = format.toLowerCase(Locale.ROOT);
+        return normalized.contains("type1c")
+                || normalized.contains("cidfonttype0c")
+                || "cff".equals(normalized);
+    }
+
     private void applyAdditionalFontMetadata(
             PDDocument document, PDFont font, PdfJsonFont fontModel) throws IOException {
         if (fontModel.getToUnicode() != null && !fontModel.getToUnicode().isBlank()) {
@@ -1593,9 +2517,13 @@ public class PdfJsonConversionService {
             float b = matrix.get(1);
             float c = matrix.get(2);
             float d = matrix.get(3);
-            float scale = (float) Math.max(Math.hypot(a, c), Math.hypot(b, d));
-            if (scale > 0f) {
-                return scale;
+            float verticalScale = (float) Math.hypot(b, d);
+            if (verticalScale > 0f) {
+                return verticalScale;
+            }
+            float horizontalScale = (float) Math.hypot(a, c);
+            if (horizontalScale > 0f) {
+                return horizontalScale;
             }
         }
         return safeFloat(element.getFontSize(), 12f);
@@ -1945,6 +2873,20 @@ public class PdfJsonConversionService {
             cache.put(cacheKey, image);
         }
 
+        List<Float> transform = element.getTransform();
+        if (transform != null && transform.size() == 6) {
+            Matrix matrix =
+                    new Matrix(
+                            safeFloat(transform.get(0), 1f),
+                            safeFloat(transform.get(1), 0f),
+                            safeFloat(transform.get(2), 0f),
+                            safeFloat(transform.get(3), 1f),
+                            safeFloat(transform.get(4), 0f),
+                            safeFloat(transform.get(5), 0f));
+            contentStream.drawImage(image, matrix);
+            return;
+        }
+
         float width = safeFloat(element.getWidth(), fallbackWidth(element));
         float height = safeFloat(element.getHeight(), fallbackHeight(element));
         if (width <= 0f) {
diff --git a/app/core/src/main/resources/settings.yml.template b/app/core/src/main/resources/settings.yml.template
index 6bf882685..fd389337e 100644
--- a/app/core/src/main/resources/settings.yml.template
+++ b/app/core/src/main/resources/settings.yml.template
@@ -168,6 +168,16 @@ system:
     startupCleanup: true # Clean up old temp files on startup
     cleanupSystemTemp: false # Whether to clean broader system temp directory
 
+stirling:
+  pdf:
+    fallback-font: classpath:/static/fonts/NotoSans-Regular.ttf # Override to point at a custom fallback font
+    json:
+      font-normalization:
+        enabled: true # Run Ghostscript preflight to normalize fonts before PDF→JSON
+      cff-converter:
+        enabled: true # Attempt to transcode CFF/Type1C programs to OTF using FontForge when available
+        fontforge-command: fontforge # Override if FontForge is installed under a different name/path
+
 ui:
   appName: '' # application's visible name
   homeDescription: '' # short description or tagline shown on the homepage
diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile
index 58655dfdb..b154ec782 100644
--- a/docker/backend/Dockerfile
+++ b/docker/backend/Dockerfile
@@ -83,6 +83,8 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a
     gcompat \
     libc6-compat \
     libreoffice \
+    ghostscript \
+    fontforge \
     # pdftohtml
     poppler-utils \
     # OCR MY PDF (unpaper for descew and other advanced features)
@@ -119,4 +121,4 @@ EXPOSE 8080/tcp
 
 # Set user and run command
 ENTRYPOINT ["tini", "--", "/scripts/init.sh"]
-CMD ["sh", "-c", "java -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/tmp/stirling-pdf -jar /app.jar & /opt/venv/bin/unoserver --port 2003 --interface 127.0.0.1"]
\ No newline at end of file
+CMD ["sh", "-c", "java -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/tmp/stirling-pdf -jar /app.jar & /opt/venv/bin/unoserver --port 2003 --interface 127.0.0.1"]
diff --git a/docker/backend/Dockerfile.fat b/docker/backend/Dockerfile.fat
index bd12e3063..25fa2a0b8 100644
--- a/docker/backend/Dockerfile.fat
+++ b/docker/backend/Dockerfile.fat
@@ -73,6 +73,8 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a
     gcompat \
     libc6-compat \
     libreoffice \
+    ghostscript \
+    fontforge \
     # pdftohtml
     poppler-utils \
     # OCR MY PDF (unpaper for descew and other advanced featues)
@@ -109,4 +111,4 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a
 EXPOSE 8080/tcp
 # Set user and run command
 ENTRYPOINT ["tini", "--", "/scripts/init.sh"]
-CMD ["sh", "-c", "java -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/tmp/stirling-pdf -jar /app.jar & /opt/venv/bin/unoserver --port 2003 --interface 127.0.0.1"]
\ No newline at end of file
+CMD ["sh", "-c", "java -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/tmp/stirling-pdf -jar /app.jar & /opt/venv/bin/unoserver --port 2003 --interface 127.0.0.1"]
diff --git a/docker/backend/Dockerfile.ultra-lite b/docker/backend/Dockerfile.ultra-lite
index 0b74e3b0a..e18e4a0b4 100644
--- a/docker/backend/Dockerfile.ultra-lite
+++ b/docker/backend/Dockerfile.ultra-lite
@@ -59,7 +59,9 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et
         curl \
         shadow \
         su-exec \
-        openjdk21-jre && \
+        openjdk21-jre \
+        ghostscript \
+        fontforge && \
     # User permissions
     mkdir -p /configs /logs /customFiles /usr/share/fonts/opentype/noto /tmp/stirling-pdf /pipeline/watchedFolders /pipeline/finishedFolders && \
     chmod +x /scripts/*.sh && \
diff --git a/docker/frontend/nginx.conf b/docker/frontend/nginx.conf
index ffe913738..3be5ec900 100644
--- a/docker/frontend/nginx.conf
+++ b/docker/frontend/nginx.conf
@@ -24,7 +24,7 @@ http {
         index index.html index.htm;
         
         # Global settings for file uploads
-        client_max_body_size 100m;
+        client_max_body_size 0;
 
         # Handle client-side routing - support subpaths
         location / {
@@ -48,12 +48,12 @@ http {
             proxy_cache off;
             
             # Timeout settings for large file uploads
-            proxy_connect_timeout 60s;
-            proxy_send_timeout 60s;
-            proxy_read_timeout 60s;
-            
+            proxy_connect_timeout 600s;
+            proxy_send_timeout 600s;
+            proxy_read_timeout 600s;
+
             # Request size limits for file uploads
-            client_max_body_size 100m;
+            client_max_body_size 0;
             proxy_request_buffering off;
         }
 
diff --git a/frontend/public/locales/en-GB/translation.json b/frontend/public/locales/en-GB/translation.json
index d58014625..4ff3a2f4d 100644
--- a/frontend/public/locales/en-GB/translation.json
+++ b/frontend/public/locales/en-GB/translation.json
@@ -4031,6 +4031,7 @@
     "fontSizeValue": "{{size}}pt",
     "noTextOnPage": "No editable text was detected on this page.",
     "emptyGroup": "[Empty Group]",
+    "imageLabel": "Placed image",
     "empty": {
       "title": "No document loaded",
       "subtitle": "Load a PDF or JSON file to begin editing text content."
diff --git a/frontend/src/components/tools/pdfJsonEditor/PdfJsonEditorView.tsx b/frontend/src/components/tools/pdfJsonEditor/PdfJsonEditorView.tsx
index c54e38bb3..f317042c5 100644
--- a/frontend/src/components/tools/pdfJsonEditor/PdfJsonEditorView.tsx
+++ b/frontend/src/components/tools/pdfJsonEditor/PdfJsonEditorView.tsx
@@ -1,10 +1,12 @@
 import React, { useCallback, useEffect, useLayoutEffect, useMemo, useRef, useState } from 'react';
 import {
+  ActionIcon,
   Alert,
   Badge,
   Box,
   Button,
   Card,
+  Collapse,
   Divider,
   FileButton,
   Group,
@@ -21,10 +23,13 @@ import PictureAsPdfIcon from '@mui/icons-material/PictureAsPdfOutlined';
 import AutorenewIcon from '@mui/icons-material/Autorenew';
 import WarningAmberIcon from '@mui/icons-material/WarningAmber';
 import UploadIcon from '@mui/icons-material/Upload';
+import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
+import ExpandLessIcon from '@mui/icons-material/ExpandLess';
 import { Rnd } from 'react-rnd';
 
 import {
   PdfJsonEditorViewData,
+  PdfJsonFont,
   PdfJsonPage,
 } from '../../../tools/pdfJsonEditorTypes';
 import { getImageBounds, pageDimensions } from '../../../tools/pdfJsonEditorUtils';
@@ -32,6 +37,68 @@ import { getImageBounds, pageDimensions } from '../../../tools/pdfJsonEditorUtil
 const MAX_RENDER_WIDTH = 820;
 const MIN_BOX_SIZE = 18;
 
+const normalizeFontFormat = (format?: string | null): string => {
+  if (!format) {
+    return 'ttf';
+  }
+  const lower = format.toLowerCase();
+  if (lower.includes('woff2')) {
+    return 'woff2';
+  }
+  if (lower.includes('woff')) {
+    return 'woff';
+  }
+  if (lower.includes('otf')) {
+    return 'otf';
+  }
+  if (lower.includes('cff')) {
+    return 'otf';
+  }
+  return 'ttf';
+};
+
+const getFontMimeType = (format: string): string => {
+  switch (format) {
+    case 'woff2':
+      return 'font/woff2';
+    case 'woff':
+      return 'font/woff';
+    case 'otf':
+      return 'font/otf';
+    default:
+      return 'font/ttf';
+  }
+};
+
+const getFontFormatHint = (format: string): string | null => {
+  switch (format) {
+    case 'woff2':
+      return 'woff2';
+    case 'woff':
+      return 'woff';
+    case 'otf':
+      return 'opentype';
+    case 'ttf':
+      return 'truetype';
+    default:
+      return null;
+  }
+};
+
+const decodeBase64ToUint8Array = (value: string): Uint8Array => {
+  const binary = window.atob(value);
+  const bytes = new Uint8Array(binary.length);
+  for (let index = 0; index < binary.length; index += 1) {
+    bytes[index] = binary.charCodeAt(index);
+  }
+  return bytes;
+};
+
+const buildFontFamilyName = (font: PdfJsonFont): string => {
+  const base = (font.uid ?? font.id ?? 'font').toString();
+  return `pdf-font-${base.replace(/[^a-zA-Z0-9_-]/g, '')}`;
+};
+
 const getCaretOffset = (element: HTMLElement): number => {
   const selection = window.getSelection();
   if (!selection || selection.rangeCount === 0 || !element.contains(selection.focusNode)) {
@@ -85,11 +152,13 @@ const toCssBounds = (
   bounds: { left: number; right: number; top: number; bottom: number },
 ) => {
   const width = Math.max(bounds.right - bounds.left, 1);
+  // Note: This codebase uses inverted naming where bounds.bottom > bounds.top
+  // bounds.bottom = visually upper edge (larger Y in PDF coords)
+  // bounds.top = visually lower edge (smaller Y in PDF coords)
   const height = Math.max(bounds.bottom - bounds.top, 1);
-  // Add 20% buffer to width to account for padding and font rendering variations
-  const bufferedWidth = width * 1.2;
-  const scaledWidth = Math.max(bufferedWidth * scale, MIN_BOX_SIZE);
+  const scaledWidth = Math.max(width * scale, MIN_BOX_SIZE);
   const scaledHeight = Math.max(height * scale, MIN_BOX_SIZE / 2);
+  // Convert PDF's visually upper edge (bounds.bottom) to CSS top
   const top = Math.max(pageHeight - bounds.bottom, 0) * scale;
 
   return {
@@ -105,6 +174,8 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
   const [activeGroupId, setActiveGroupId] = useState<string | null>(null);
   const [editingGroupId, setEditingGroupId] = useState<string | null>(null);
   const [activeImageId, setActiveImageId] = useState<string | null>(null);
+  const [fontFamilies, setFontFamilies] = useState<Map<string, string>>(new Map());
+  const [textGroupsExpanded, setTextGroupsExpanded] = useState(false);
   const containerRef = useRef<HTMLDivElement | null>(null);
   const editorRefs = useRef<Map<string, HTMLDivElement>>(new Map());
   const caretOffsetsRef = useRef<Map<string, number>>(new Map());
@@ -135,6 +206,10 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
     if (!fontId || !pdfDocument?.fonts) {
       return 'sans-serif';
     }
+    const loadedFamily = fontFamilies.get(fontId);
+    if (loadedFamily) {
+      return `'${loadedFamily}', sans-serif`;
+    }
     const font = pdfDocument.fonts.find((f) => f.id === fontId);
     if (!font) {
       return 'sans-serif';
@@ -161,10 +236,134 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
     return 'Arial, Helvetica, sans-serif';
   };
 
+  const getLineHeightPx = (fontId: string | null | undefined, fontSizePx: number): number => {
+    if (fontSizePx <= 0) {
+      return fontSizePx;
+    }
+    const metrics = fontId ? fontMetrics.get(fontId) : undefined;
+    if (!metrics || metrics.unitsPerEm <= 0) {
+      return fontSizePx * 1.2;
+    }
+    const totalUnits = metrics.ascent - metrics.descent;
+    if (totalUnits <= 0) {
+      return fontSizePx * 1.2;
+    }
+    const lineHeight = (totalUnits / metrics.unitsPerEm) * fontSizePx;
+    return Math.max(lineHeight, fontSizePx * 1.05);
+  };
+
+  const getFontWeight = (fontId: string | null | undefined): number | 'normal' | 'bold' => {
+    if (!fontId || !pdfDocument?.fonts) {
+      return 'normal';
+    }
+    const font = pdfDocument.fonts.find((f) => f.id === fontId);
+    if (!font || !font.fontDescriptorFlags) {
+      return 'normal';
+    }
+
+    // PDF font descriptor flag bit 18 (value 262144 = 0x40000) indicates ForceBold
+    const FORCE_BOLD_FLAG = 262144;
+    if ((font.fontDescriptorFlags & FORCE_BOLD_FLAG) !== 0) {
+      return 'bold';
+    }
+
+    // Also check if font name contains "Bold"
+    const fontName = font.standard14Name || font.baseName || '';
+    if (fontName.toLowerCase().includes('bold')) {
+      return 'bold';
+    }
+
+    return 'normal';
+  };
+
   const pages = pdfDocument?.pages ?? [];
   const currentPage = pages[selectedPage] ?? null;
   const pageGroups = groupsByPage[selectedPage] ?? [];
   const pageImages = imagesByPage[selectedPage] ?? [];
+
+  const fontMetrics = useMemo(() => {
+    const metrics = new Map<string, { unitsPerEm: number; ascent: number; descent: number }>();
+    pdfDocument?.fonts?.forEach((font) => {
+      if (!font?.id) {
+        return;
+      }
+      const unitsPerEm = font.unitsPerEm && font.unitsPerEm > 0 ? font.unitsPerEm : 1000;
+      const ascent = font.ascent ?? unitsPerEm;
+      const descent = font.descent ?? -(unitsPerEm * 0.2);
+      metrics.set(font.id, { unitsPerEm, ascent, descent });
+    });
+    return metrics;
+  }, [pdfDocument?.fonts]);
+
+  useEffect(() => {
+    if (typeof FontFace === 'undefined') {
+      setFontFamilies(new Map());
+      return undefined;
+    }
+
+    let disposed = false;
+    const active: { fontFace: FontFace; url?: string }[] = [];
+
+    const registerFonts = async () => {
+      const fonts = pdfDocument?.fonts ?? [];
+      if (fonts.length === 0) {
+        setFontFamilies(new Map());
+        return;
+      }
+
+      const next = new Map<string, string>();
+      for (const font of fonts) {
+        if (!font?.id || !font.program) {
+          continue;
+        }
+        try {
+          const format = normalizeFontFormat(font.programFormat);
+          const data = decodeBase64ToUint8Array(font.program);
+          const blob = new Blob([data as BlobPart], { type: getFontMimeType(format) });
+          const url = URL.createObjectURL(blob);
+          const formatHint = getFontFormatHint(format);
+          const familyName = buildFontFamilyName(font);
+          const source = formatHint ? `url(${url}) format('${formatHint}')` : `url(${url})`;
+          const fontFace = new FontFace(familyName, source);
+          await fontFace.load();
+          if (disposed) {
+            document.fonts.delete(fontFace);
+            URL.revokeObjectURL(url);
+            continue;
+          }
+          document.fonts.add(fontFace);
+          active.push({ fontFace, url });
+          next.set(font.id, familyName);
+        } catch (error) {
+          // Silently ignore font loading failures - embedded PDF fonts often lack web font tables
+          // Fallback to web-safe fonts is already implemented via getFontFamily()
+        }
+      }
+
+      if (!disposed) {
+        setFontFamilies(next);
+      } else {
+        active.forEach(({ fontFace, url }) => {
+          document.fonts.delete(fontFace);
+          if (url) {
+            URL.revokeObjectURL(url);
+          }
+        });
+      }
+    };
+
+    registerFonts();
+
+    return () => {
+      disposed = true;
+      active.forEach(({ fontFace, url }) => {
+        document.fonts.delete(fontFace);
+        if (url) {
+          URL.revokeObjectURL(url);
+        }
+      });
+    };
+  }, [pdfDocument?.fonts]);
   const visibleGroups = useMemo(
     () =>
       pageGroups.filter((group) => {
@@ -419,25 +618,33 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
             <ScrollArea h="100%" offsetScrollbars>
               <Box
                 style={{
-                  margin: '0 auto',
-                  background: '#f3f4f6',
-                  padding: '1.5rem',
-                  borderRadius: '0.75rem',
+                  display: 'flex',
+                  justifyContent: 'center',
+                  alignItems: 'flex-start',
+                  width: '100%',
+                  minHeight: '100%',
                 }}
-                onClick={handleBackgroundClick}
               >
                 <Box
                   style={{
-                    position: 'relative',
-                    width: `${scaledWidth}px`,
-                    height: `${scaledHeight}px`,
-                    backgroundColor: '#ffffff',
-                    boxShadow: '0 0 12px rgba(15, 23, 42, 0.12)',
-                    borderRadius: '0.5rem',
-                    overflow: 'hidden',
+                    background: '#f3f4f6',
+                    padding: '0.5rem',
+                    borderRadius: '0.75rem',
                   }}
-                  ref={containerRef}
+                  onClick={handleBackgroundClick}
                 >
+                  <Box
+                    style={{
+                      position: 'relative',
+                      width: `${scaledWidth}px`,
+                      height: `${scaledHeight}px`,
+                      backgroundColor: '#ffffff',
+                      boxShadow: '0 0 12px rgba(15, 23, 42, 0.12)',
+                      borderRadius: '0.5rem',
+                      overflow: 'hidden',
+                    }}
+                    ref={containerRef}
+                  >
                   {orderedImages.map((image, imageIndex) => {
                     if (!image?.imageData) {
                       return null;
@@ -466,7 +673,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
                           setEditingGroupId(null);
                           setActiveImageId(imageId);
                         }}
-                        onDrag={(event, data) => {
+                        onDrag={(_event, data) => {
                           emitImageTransform(
                             imageId,
                             data.x,
@@ -475,7 +682,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
                             cssHeight,
                           );
                         }}
-                        onDragStop={(event, data) => {
+                        onDragStop={(_event, data) => {
                           emitImageTransform(
                             imageId,
                             data.x,
@@ -489,7 +696,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
                           setActiveGroupId(null);
                           setEditingGroupId(null);
                         }}
-                        onResize={(event, _direction, ref, _delta, position) => {
+                        onResize={(_event, _direction, ref, _delta, position) => {
                           const nextWidth = parseFloat(ref.style.width);
                           const nextHeight = parseFloat(ref.style.height);
                           emitImageTransform(
@@ -500,7 +707,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
                             nextHeight,
                           );
                         }}
-                        onResizeStop={(event, _direction, ref, _delta, position) => {
+                        onResizeStop={(_event, _direction, ref, _delta, position) => {
                           const nextWidth = parseFloat(ref.style.width);
                           const nextHeight = parseFloat(ref.style.height);
                           emitImageTransform(
@@ -567,21 +774,48 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
                       const baseFontSize = group.fontMatrixSize ?? group.fontSize ?? 12;
                       const fontSizePx = Math.max(baseFontSize * scale, 6);
                       const fontFamily = getFontFamily(group.fontId);
+                      const lineHeightPx = getLineHeightPx(group.fontId, fontSizePx);
+                      const lineHeightRatio = fontSizePx > 0 ? Math.max(lineHeightPx / fontSizePx, 1.05) : 1.2;
+                      const hasRotation = group.rotation != null && Math.abs(group.rotation) > 0.5;
+                      const baselineLength = group.baselineLength ?? Math.max(group.bounds.right - group.bounds.left, 0);
 
-                      const visualHeight = Math.max(bounds.height, fontSizePx * 1.2);
+                      let containerLeft = bounds.left;
+                      let containerTop = bounds.top;
+                      let containerWidth = Math.max(bounds.width, fontSizePx);
+                      let containerHeight = Math.max(bounds.height, lineHeightPx);
+                      let transform: string | undefined;
+                      let transformOrigin: React.CSSProperties['transformOrigin'];
+
+                      if (hasRotation) {
+                        const anchorX = group.anchor?.x ?? group.bounds.left;
+                        const anchorY = group.anchor?.y ?? group.bounds.bottom;
+                        containerLeft = anchorX * scale;
+                        containerTop = Math.max(pageHeight - anchorY, 0) * scale;
+                        containerWidth = Math.max(baselineLength * scale, MIN_BOX_SIZE);
+                        containerHeight = Math.max(lineHeightPx, fontSizePx * lineHeightRatio);
+                        transformOrigin = 'left bottom';
+                        // Negate rotation because Y-axis is flipped from PDF to web coordinates
+                        transform = `rotate(${-group.rotation}deg)`;
+                      }
+
+                      // Extract styling from group
+                      const textColor = group.color || '#111827';
+                      const fontWeight = group.fontWeight || getFontWeight(group.fontId);
 
                       const containerStyle: React.CSSProperties = {
                         position: 'absolute',
-                        left: `${bounds.left}px`,
-                        top: `${bounds.top}px`,
-                        width: `${bounds.width}px`,
-                        height: `${visualHeight}px`,
+                        left: `${containerLeft}px`,
+                        top: `${containerTop}px`,
+                        width: `${containerWidth}px`,
+                        height: `${containerHeight}px`,
                         display: 'flex',
                         alignItems: 'flex-start',
                         justifyContent: 'flex-start',
                         pointerEvents: 'auto',
                         cursor: 'text',
                         zIndex: 2_000_000,
+                        transform,
+                        transformOrigin,
                       };
 
                       if (isEditing) {
@@ -628,17 +862,17 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
                                 style={{
                                   width: '100%',
                                   height: '100%',
-                                  padding: '3px 4px',
+                                  padding: 0,
                                   backgroundColor: 'rgba(255,255,255,0.95)',
-                                  color: '#111827',
+                                  color: textColor,
                                   fontSize: `${fontSizePx}px`,
                                   fontFamily,
-                                  lineHeight: 1.25,
+                                  fontWeight,
+                                  lineHeight: lineHeightRatio,
                                   outline: 'none',
                                   border: 'none',
                                   display: 'block',
-                                  whiteSpace: 'pre-wrap',
-                                  overflowWrap: 'anywhere',
+                                  whiteSpace: 'nowrap',
                                   cursor: 'text',
                                   overflow: 'visible',
                                 }}
@@ -660,12 +894,13 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
                               style={{
                                 width: '100%',
                                 minHeight: '100%',
-                                padding: '2px 4px',
-                                whiteSpace: 'pre-wrap',
+                                padding: 0,
+                                whiteSpace: 'nowrap',
                                 fontSize: `${fontSizePx}px`,
                                 fontFamily,
-                                lineHeight: 1.25,
-                                color: '#111827',
+                                fontWeight,
+                                lineHeight: lineHeightRatio,
+                                color: textColor,
                                 display: 'block',
                                 cursor: 'text',
                                 overflow: 'visible',
@@ -682,6 +917,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
                       );
                     })
                   )}
+                  </Box>
                 </Box>
               </Box>
             </ScrollArea>
@@ -689,48 +925,61 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
 
           <Card padding="md" withBorder radius="md">
             <Stack gap="xs">
-              <Text fw={500}>{t('pdfJsonEditor.groupList', 'Detected Text Groups')}</Text>
-              <Divider />
-              <ScrollArea h={180} offsetScrollbars>
-                <Stack gap="sm">
-                  {visibleGroups.map((group) => {
-                    const changed = group.text !== group.originalText;
-                    return (
-                      <Card
-                        key={`list-${group.id}`}
-                        padding="sm"
-                        radius="md"
-                        withBorder
-                        shadow={changed ? 'sm' : 'none'}
-                        onMouseEnter={() => setActiveGroupId(group.id)}
-                        onMouseLeave={() => setActiveGroupId((current) => (current === group.id ? null : current))}
-                        style={{ cursor: 'pointer' }}
-                        onClick={() => {
-                          setActiveGroupId(group.id);
-                          setEditingGroupId(group.id);
-                        }}
-                      >
-                        <Stack gap={4}>
-                          <Group gap="xs">
-                            {changed && <Badge color="yellow" size="xs">{t('pdfJsonEditor.badges.modified', 'Edited')}</Badge>}
-                            {group.fontId && (
-                              <Badge size="xs" variant="outline">{group.fontId}</Badge>
-                            )}
-                            {group.fontSize && (
-                              <Badge size="xs" variant="light">
-                                {t('pdfJsonEditor.fontSizeValue', '{{size}}pt', { size: group.fontSize.toFixed(1) })}
-                              </Badge>
-                            )}
-                          </Group>
-                          <Text size="sm" c="dimmed" lineClamp={2}>
-                            {group.text || t('pdfJsonEditor.emptyGroup', '[Empty Group]')}
-                          </Text>
-                        </Stack>
-                      </Card>
-                    );
-                  })}
+              <Group justify="space-between" align="center">
+                <Text fw={500}>{t('pdfJsonEditor.groupList', 'Detected Text Groups')}</Text>
+                <ActionIcon
+                  variant="subtle"
+                  onClick={() => setTextGroupsExpanded(!textGroupsExpanded)}
+                  aria-label={textGroupsExpanded ? 'Collapse' : 'Expand'}
+                >
+                  {textGroupsExpanded ? <ExpandLessIcon /> : <ExpandMoreIcon />}
+                </ActionIcon>
+              </Group>
+              <Collapse in={textGroupsExpanded}>
+                <Stack gap="xs">
+                  <Divider />
+                  <ScrollArea h={180} offsetScrollbars>
+                    <Stack gap="sm">
+                      {visibleGroups.map((group) => {
+                        const changed = group.text !== group.originalText;
+                        return (
+                          <Card
+                            key={`list-${group.id}`}
+                            padding="sm"
+                            radius="md"
+                            withBorder
+                            shadow={changed ? 'sm' : 'none'}
+                            onMouseEnter={() => setActiveGroupId(group.id)}
+                            onMouseLeave={() => setActiveGroupId((current) => (current === group.id ? null : current))}
+                            style={{ cursor: 'pointer' }}
+                            onClick={() => {
+                              setActiveGroupId(group.id);
+                              setEditingGroupId(group.id);
+                            }}
+                          >
+                            <Stack gap={4}>
+                              <Group gap="xs">
+                                {changed && <Badge color="yellow" size="xs">{t('pdfJsonEditor.badges.modified', 'Edited')}</Badge>}
+                                {group.fontId && (
+                                  <Badge size="xs" variant="outline">{group.fontId}</Badge>
+                                )}
+                                {group.fontSize && (
+                                  <Badge size="xs" variant="light">
+                                    {t('pdfJsonEditor.fontSizeValue', '{{size}}pt', { size: group.fontSize.toFixed(1) })}
+                                  </Badge>
+                                )}
+                              </Group>
+                              <Text size="sm" c="dimmed" lineClamp={2}>
+                                {group.text || t('pdfJsonEditor.emptyGroup', '[Empty Group]')}
+                              </Text>
+                            </Stack>
+                          </Card>
+                        );
+                      })}
+                    </Stack>
+                  </ScrollArea>
                 </Stack>
-              </ScrollArea>
+              </Collapse>
             </Stack>
           </Card>
         </Stack>
diff --git a/frontend/src/tools/pdfJsonEditorTypes.ts b/frontend/src/tools/pdfJsonEditorTypes.ts
index ff991b916..c1da39656 100644
--- a/frontend/src/tools/pdfJsonEditorTypes.ts
+++ b/frontend/src/tools/pdfJsonEditorTypes.ts
@@ -23,6 +23,12 @@ export interface PdfJsonFont {
   toUnicode?: string | null;
   standard14Name?: string | null;
   fontDescriptorFlags?: number | null;
+  ascent?: number | null;
+  descent?: number | null;
+  capHeight?: number | null;
+  xHeight?: number | null;
+  italicAngle?: number | null;
+  unitsPerEm?: number | null;
 }
 
 export interface PdfJsonTextElement {
@@ -117,6 +123,11 @@ export interface TextGroup {
   fontId?: string | null;
   fontSize?: number | null;
   fontMatrixSize?: number | null;
+  color?: string | null;
+  fontWeight?: number | 'normal' | 'bold' | null;
+  rotation?: number | null;
+  anchor?: { x: number; y: number } | null;
+  baselineLength?: number | null;
   elements: PdfJsonTextElement[];
   originalElements: PdfJsonTextElement[];
   text: string;
diff --git a/frontend/src/tools/pdfJsonEditorUtils.ts b/frontend/src/tools/pdfJsonEditorUtils.ts
index 425c6ba44..8085fe759 100644
--- a/frontend/src/tools/pdfJsonEditorUtils.ts
+++ b/frontend/src/tools/pdfJsonEditorUtils.ts
@@ -69,9 +69,15 @@ const getHeight = (element: PdfJsonTextElement): number => {
 const getElementBounds = (element: PdfJsonTextElement): BoundingBox => {
   const left = getX(element);
   const width = getWidth(element);
-  const bottom = getBaseline(element);
+  const baseline = getBaseline(element);
   const height = getHeight(element);
-  const top = bottom - height;
+  // In PDF coordinates, baseline is where text sits
+  // Typical typography: ~80% of height above baseline (ascenders), ~20% below (descenders)
+  // Using codebase's inverted naming: bottom (visual top) > top (visual bottom)
+  const ascent = height * 0.8;
+  const descent = height * 0.2;
+  const bottom = baseline + ascent;  // Visual top of text
+  const top = baseline - descent;    // Visual bottom (includes descenders)
   return {
     left,
     right: left + width,
@@ -181,6 +187,136 @@ const buildGroupText = (elements: PdfJsonTextElement[]): string => {
   return result;
 };
 
+const rgbToCss = (components: number[]): string => {
+  if (components.length >= 3) {
+    const r = Math.round(Math.max(0, Math.min(1, components[0])) * 255);
+    const g = Math.round(Math.max(0, Math.min(1, components[1])) * 255);
+    const b = Math.round(Math.max(0, Math.min(1, components[2])) * 255);
+    return `rgb(${r}, ${g}, ${b})`;
+  }
+  return 'rgb(0, 0, 0)';
+};
+
+const cmykToCss = (components: number[]): string => {
+  if (components.length >= 4) {
+    const c = Math.max(0, Math.min(1, components[0]));
+    const m = Math.max(0, Math.min(1, components[1]));
+    const y = Math.max(0, Math.min(1, components[2]));
+    const k = Math.max(0, Math.min(1, components[3]));
+    const r = Math.round(255 * (1 - c) * (1 - k));
+    const g = Math.round(255 * (1 - m) * (1 - k));
+    const b = Math.round(255 * (1 - y) * (1 - k));
+    return `rgb(${r}, ${g}, ${b})`;
+  }
+  return 'rgb(0, 0, 0)';
+};
+
+const grayToCss = (components: number[]): string => {
+  if (components.length >= 1) {
+    const gray = Math.round(Math.max(0, Math.min(1, components[0])) * 255);
+    return `rgb(${gray}, ${gray}, ${gray})`;
+  }
+  return 'rgb(0, 0, 0)';
+};
+
+const extractColor = (element: PdfJsonTextElement): string | null => {
+  const fillColor = element.fillColor;
+  if (!fillColor || !fillColor.components || fillColor.components.length === 0) {
+    return null;
+  }
+
+  const colorSpace = (fillColor.colorSpace ?? '').toLowerCase();
+
+  if (colorSpace.includes('rgb') || colorSpace.includes('srgb')) {
+    return rgbToCss(fillColor.components);
+  }
+  if (colorSpace.includes('cmyk')) {
+    return cmykToCss(fillColor.components);
+  }
+  if (colorSpace.includes('gray') || colorSpace.includes('grey')) {
+    return grayToCss(fillColor.components);
+  }
+
+  // Default to RGB interpretation
+  if (fillColor.components.length >= 3) {
+    return rgbToCss(fillColor.components);
+  }
+  if (fillColor.components.length === 1) {
+    return grayToCss(fillColor.components);
+  }
+
+  return null;
+};
+
+const RAD_TO_DEG = 180 / Math.PI;
+
+const normalizeAngle = (angle: number): number => {
+  let normalized = angle % 360;
+  if (normalized > 180) {
+    normalized -= 360;
+  } else if (normalized <= -180) {
+    normalized += 360;
+  }
+  return normalized;
+};
+
+const extractElementRotation = (element: PdfJsonTextElement): number | null => {
+  const matrix = element.textMatrix;
+  if (!matrix || matrix.length !== 6) {
+    return null;
+  }
+  const a = matrix[0];
+  const b = matrix[1];
+  if (Math.abs(a) < 1e-6 && Math.abs(b) < 1e-6) {
+    return null;
+  }
+  const angle = Math.atan2(b, a) * RAD_TO_DEG;
+  if (Math.abs(angle) < 0.5) {
+    return null;
+  }
+  return normalizeAngle(angle);
+};
+
+const computeGroupRotation = (elements: PdfJsonTextElement[]): number | null => {
+  const angles = elements
+    .map(extractElementRotation)
+    .filter((angle): angle is number => angle !== null);
+  if (angles.length === 0) {
+    return null;
+  }
+  const vector = angles.reduce(
+    (acc, angle) => {
+      const radians = (angle * Math.PI) / 180;
+      acc.x += Math.cos(radians);
+      acc.y += Math.sin(radians);
+      return acc;
+    },
+    { x: 0, y: 0 },
+  );
+  if (Math.abs(vector.x) < 1e-6 && Math.abs(vector.y) < 1e-6) {
+    return null;
+  }
+  const average = Math.atan2(vector.y, vector.x) * RAD_TO_DEG;
+  const normalized = normalizeAngle(average);
+  return Math.abs(normalized) < 0.5 ? null : normalized;
+};
+
+const getAnchorPoint = (element: PdfJsonTextElement): { x: number; y: number } => {
+  if (element.textMatrix && element.textMatrix.length === 6) {
+    return {
+      x: valueOr(element.textMatrix[4]),
+      y: valueOr(element.textMatrix[5]),
+    };
+  }
+  return {
+    x: valueOr(element.x),
+    y: valueOr(element.y),
+  };
+};
+
+const computeBaselineLength = (elements: PdfJsonTextElement[]): number =>
+  elements.reduce((acc, current) => acc + getWidth(current), 0);
+
 const createGroup = (
   pageIndex: number,
   idSuffix: number,
@@ -189,13 +325,22 @@ const createGroup = (
   const clones = elements.map(cloneTextElement);
   const originalClones = clones.map(cloneTextElement);
   const bounds = mergeBounds(elements.map(getElementBounds));
+  const firstElement = elements[0];
+  const rotation = computeGroupRotation(elements);
+  const anchor = rotation !== null ? getAnchorPoint(firstElement) : null;
+  const baselineLength = computeBaselineLength(elements);
 
   return {
     id: `${pageIndex}-${idSuffix}`,
     pageIndex,
-    fontId: elements[0]?.fontId,
-    fontSize: elements[0]?.fontSize,
-    fontMatrixSize: elements[0]?.fontMatrixSize,
+    fontId: firstElement?.fontId,
+    fontSize: firstElement?.fontSize,
+    fontMatrixSize: firstElement?.fontMatrixSize,
+    color: firstElement ? extractColor(firstElement) : null,
+    fontWeight: null, // Will be determined from font descriptor
+    rotation,
+    anchor,
+    baselineLength,
     elements: clones,
     originalElements: originalClones,
     text: buildGroupText(elements),
@@ -253,7 +398,18 @@ export const groupPageTextElements = (page: PdfJsonPage | null | undefined, page
       const splitThreshold = Math.max(SPACE_MIN_GAP, avgFontSize * GAP_FACTOR);
 
       const sameFont = previous.fontId === element.fontId;
-      const shouldSplit = gap > splitThreshold * (sameFont ? 1.4 : 1.0);
+      let shouldSplit = gap > splitThreshold * (sameFont ? 1.4 : 1.0);
+
+      const previousRotation = extractElementRotation(previous);
+      const currentRotation = extractElementRotation(element);
+      if (
+        shouldSplit &&
+        previousRotation !== null &&
+        currentRotation !== null &&
+        Math.abs(normalizeAngle(previousRotation - currentRotation)) < 1
+      ) {
+        shouldSplit = false;
+      }
 
       if (shouldSplit) {
         groups.push(createGroup(pageIndex, groupCounter, currentBucket));