mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-11-16 01:21:16 +01:00
clean fonts
This commit is contained in:
parent
af19a5af23
commit
c7c5613c13
@ -0,0 +1,61 @@
|
|||||||
|
package stirling.software.SPDF.model.json;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents a PDF annotation (comments, highlights, stamps, etc.). Annotations often contain OCR
|
||||||
|
* text layers or other metadata not visible in content streams.
|
||||||
|
*/
|
||||||
|
@Data
|
||||||
|
@Builder
|
||||||
|
@NoArgsConstructor
|
||||||
|
@AllArgsConstructor
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
|
public class PdfJsonAnnotation {
|
||||||
|
|
||||||
|
/** Annotation subtype (Text, Highlight, Link, Stamp, Widget, etc.) */
|
||||||
|
private String subtype;
|
||||||
|
|
||||||
|
/** Human-readable text content of the annotation */
|
||||||
|
private String contents;
|
||||||
|
|
||||||
|
/** Annotation rectangle [x1, y1, x2, y2] */
|
||||||
|
private List<Float> rect;
|
||||||
|
|
||||||
|
/** Annotation appearance characteristics */
|
||||||
|
private String appearanceState;
|
||||||
|
|
||||||
|
/** Color components (e.g., [r, g, b] for RGB) */
|
||||||
|
private List<Float> color;
|
||||||
|
|
||||||
|
/** Annotation flags (print, hidden, etc.) */
|
||||||
|
private Integer flags;
|
||||||
|
|
||||||
|
/** For link annotations: destination or action */
|
||||||
|
private String destination;
|
||||||
|
|
||||||
|
/** For text annotations: icon name */
|
||||||
|
private String iconName;
|
||||||
|
|
||||||
|
/** Subject/title of the annotation */
|
||||||
|
private String subject;
|
||||||
|
|
||||||
|
/** Author of the annotation */
|
||||||
|
private String author;
|
||||||
|
|
||||||
|
/** Creation date (ISO 8601 format) */
|
||||||
|
private String creationDate;
|
||||||
|
|
||||||
|
/** Modification date (ISO 8601 format) */
|
||||||
|
private String modificationDate;
|
||||||
|
|
||||||
|
/** Full annotation dictionary for lossless round-tripping */
|
||||||
|
private PdfJsonCosValue rawData;
|
||||||
|
}
|
||||||
@ -25,4 +25,7 @@ public class PdfJsonDocument {
|
|||||||
@Builder.Default private List<PdfJsonFont> fonts = new ArrayList<>();
|
@Builder.Default private List<PdfJsonFont> fonts = new ArrayList<>();
|
||||||
|
|
||||||
@Builder.Default private List<PdfJsonPage> pages = new ArrayList<>();
|
@Builder.Default private List<PdfJsonPage> pages = new ArrayList<>();
|
||||||
|
|
||||||
|
/** Form fields (AcroForm) at document level */
|
||||||
|
@Builder.Default private List<PdfJsonFormField> formFields = new ArrayList<>();
|
||||||
}
|
}
|
||||||
|
|||||||
@ -52,4 +52,22 @@ public class PdfJsonFont {
|
|||||||
|
|
||||||
/** Font descriptor flags copied from the source document. */
|
/** Font descriptor flags copied from the source document. */
|
||||||
private Integer fontDescriptorFlags;
|
private Integer fontDescriptorFlags;
|
||||||
|
|
||||||
|
/** Font ascent in glyph units (typically 1/1000). */
|
||||||
|
private Float ascent;
|
||||||
|
|
||||||
|
/** Font descent in glyph units (typically negative). */
|
||||||
|
private Float descent;
|
||||||
|
|
||||||
|
/** Capital height when available. */
|
||||||
|
private Float capHeight;
|
||||||
|
|
||||||
|
/** x-height when available. */
|
||||||
|
private Float xHeight;
|
||||||
|
|
||||||
|
/** Italic angle reported by the font descriptor. */
|
||||||
|
private Float italicAngle;
|
||||||
|
|
||||||
|
/** Units per em extracted from the font matrix. */
|
||||||
|
private Integer unitsPerEm;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -0,0 +1,66 @@
|
|||||||
|
package stirling.software.SPDF.model.json;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
|
/** Represents a PDF form field (AcroForm). */
|
||||||
|
@Data
|
||||||
|
@Builder
|
||||||
|
@NoArgsConstructor
|
||||||
|
@AllArgsConstructor
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
|
public class PdfJsonFormField {
|
||||||
|
|
||||||
|
/** Fully qualified field name (e.g., "form1.textfield1") */
|
||||||
|
private String name;
|
||||||
|
|
||||||
|
/** Partial field name (last component) */
|
||||||
|
private String partialName;
|
||||||
|
|
||||||
|
/** Field type (Tx=text, Btn=button, Ch=choice, Sig=signature) */
|
||||||
|
private String fieldType;
|
||||||
|
|
||||||
|
/** Field value as string */
|
||||||
|
private String value;
|
||||||
|
|
||||||
|
/** Default value */
|
||||||
|
private String defaultValue;
|
||||||
|
|
||||||
|
/** Field flags (readonly, required, multiline, etc.) */
|
||||||
|
private Integer flags;
|
||||||
|
|
||||||
|
/** Alternative field name (for accessibility) */
|
||||||
|
private String alternateFieldName;
|
||||||
|
|
||||||
|
/** Mapping name (for export) */
|
||||||
|
private String mappingName;
|
||||||
|
|
||||||
|
/** Page number where field appears (1-indexed) */
|
||||||
|
private Integer pageNumber;
|
||||||
|
|
||||||
|
/** Field rectangle [x1, y1, x2, y2] on the page */
|
||||||
|
private List<Float> rect;
|
||||||
|
|
||||||
|
/** For choice fields: list of options */
|
||||||
|
private List<String> options;
|
||||||
|
|
||||||
|
/** For choice fields: selected indices */
|
||||||
|
private List<Integer> selectedIndices;
|
||||||
|
|
||||||
|
/** For button fields: whether it's checked */
|
||||||
|
private Boolean checked;
|
||||||
|
|
||||||
|
/** Font information for text fields */
|
||||||
|
private String fontName;
|
||||||
|
|
||||||
|
private Float fontSize;
|
||||||
|
|
||||||
|
/** Full field dictionary for lossless round-tripping */
|
||||||
|
private PdfJsonCosValue rawData;
|
||||||
|
}
|
||||||
@ -24,6 +24,7 @@ public class PdfJsonPage {
|
|||||||
|
|
||||||
@Builder.Default private List<PdfJsonTextElement> textElements = new ArrayList<>();
|
@Builder.Default private List<PdfJsonTextElement> textElements = new ArrayList<>();
|
||||||
@Builder.Default private List<PdfJsonImageElement> imageElements = new ArrayList<>();
|
@Builder.Default private List<PdfJsonImageElement> imageElements = new ArrayList<>();
|
||||||
|
@Builder.Default private List<PdfJsonAnnotation> annotations = new ArrayList<>();
|
||||||
|
|
||||||
/** Serialized representation of the page resources dictionary. */
|
/** Serialized representation of the page resources dictionary. */
|
||||||
private PdfJsonCosValue resources;
|
private PdfJsonCosValue resources;
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -168,6 +168,16 @@ system:
|
|||||||
startupCleanup: true # Clean up old temp files on startup
|
startupCleanup: true # Clean up old temp files on startup
|
||||||
cleanupSystemTemp: false # Whether to clean broader system temp directory
|
cleanupSystemTemp: false # Whether to clean broader system temp directory
|
||||||
|
|
||||||
|
stirling:
|
||||||
|
pdf:
|
||||||
|
fallback-font: classpath:/static/fonts/NotoSans-Regular.ttf # Override to point at a custom fallback font
|
||||||
|
json:
|
||||||
|
font-normalization:
|
||||||
|
enabled: true # Run Ghostscript preflight to normalize fonts before PDF→JSON
|
||||||
|
cff-converter:
|
||||||
|
enabled: true # Attempt to transcode CFF/Type1C programs to OTF using FontForge when available
|
||||||
|
fontforge-command: fontforge # Override if FontForge is installed under a different name/path
|
||||||
|
|
||||||
ui:
|
ui:
|
||||||
appName: '' # application's visible name
|
appName: '' # application's visible name
|
||||||
homeDescription: '' # short description or tagline shown on the homepage
|
homeDescription: '' # short description or tagline shown on the homepage
|
||||||
|
|||||||
@ -83,6 +83,8 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a
|
|||||||
gcompat \
|
gcompat \
|
||||||
libc6-compat \
|
libc6-compat \
|
||||||
libreoffice \
|
libreoffice \
|
||||||
|
ghostscript \
|
||||||
|
fontforge \
|
||||||
# pdftohtml
|
# pdftohtml
|
||||||
poppler-utils \
|
poppler-utils \
|
||||||
# OCR MY PDF (unpaper for descew and other advanced features)
|
# OCR MY PDF (unpaper for descew and other advanced features)
|
||||||
|
|||||||
@ -73,6 +73,8 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a
|
|||||||
gcompat \
|
gcompat \
|
||||||
libc6-compat \
|
libc6-compat \
|
||||||
libreoffice \
|
libreoffice \
|
||||||
|
ghostscript \
|
||||||
|
fontforge \
|
||||||
# pdftohtml
|
# pdftohtml
|
||||||
poppler-utils \
|
poppler-utils \
|
||||||
# OCR MY PDF (unpaper for descew and other advanced featues)
|
# OCR MY PDF (unpaper for descew and other advanced featues)
|
||||||
|
|||||||
@ -59,7 +59,9 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et
|
|||||||
curl \
|
curl \
|
||||||
shadow \
|
shadow \
|
||||||
su-exec \
|
su-exec \
|
||||||
openjdk21-jre && \
|
openjdk21-jre \
|
||||||
|
ghostscript \
|
||||||
|
fontforge && \
|
||||||
# User permissions
|
# User permissions
|
||||||
mkdir -p /configs /logs /customFiles /usr/share/fonts/opentype/noto /tmp/stirling-pdf /pipeline/watchedFolders /pipeline/finishedFolders && \
|
mkdir -p /configs /logs /customFiles /usr/share/fonts/opentype/noto /tmp/stirling-pdf /pipeline/watchedFolders /pipeline/finishedFolders && \
|
||||||
chmod +x /scripts/*.sh && \
|
chmod +x /scripts/*.sh && \
|
||||||
|
|||||||
@ -24,7 +24,7 @@ http {
|
|||||||
index index.html index.htm;
|
index index.html index.htm;
|
||||||
|
|
||||||
# Global settings for file uploads
|
# Global settings for file uploads
|
||||||
client_max_body_size 100m;
|
client_max_body_size 0;
|
||||||
|
|
||||||
# Handle client-side routing - support subpaths
|
# Handle client-side routing - support subpaths
|
||||||
location / {
|
location / {
|
||||||
@ -48,12 +48,12 @@ http {
|
|||||||
proxy_cache off;
|
proxy_cache off;
|
||||||
|
|
||||||
# Timeout settings for large file uploads
|
# Timeout settings for large file uploads
|
||||||
proxy_connect_timeout 60s;
|
proxy_connect_timeout 600s;
|
||||||
proxy_send_timeout 60s;
|
proxy_send_timeout 600s;
|
||||||
proxy_read_timeout 60s;
|
proxy_read_timeout 600s;
|
||||||
|
|
||||||
# Request size limits for file uploads
|
# Request size limits for file uploads
|
||||||
client_max_body_size 100m;
|
client_max_body_size 0;
|
||||||
proxy_request_buffering off;
|
proxy_request_buffering off;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -4031,6 +4031,7 @@
|
|||||||
"fontSizeValue": "{{size}}pt",
|
"fontSizeValue": "{{size}}pt",
|
||||||
"noTextOnPage": "No editable text was detected on this page.",
|
"noTextOnPage": "No editable text was detected on this page.",
|
||||||
"emptyGroup": "[Empty Group]",
|
"emptyGroup": "[Empty Group]",
|
||||||
|
"imageLabel": "Placed image",
|
||||||
"empty": {
|
"empty": {
|
||||||
"title": "No document loaded",
|
"title": "No document loaded",
|
||||||
"subtitle": "Load a PDF or JSON file to begin editing text content."
|
"subtitle": "Load a PDF or JSON file to begin editing text content."
|
||||||
|
|||||||
@ -1,10 +1,12 @@
|
|||||||
import React, { useCallback, useEffect, useLayoutEffect, useMemo, useRef, useState } from 'react';
|
import React, { useCallback, useEffect, useLayoutEffect, useMemo, useRef, useState } from 'react';
|
||||||
import {
|
import {
|
||||||
|
ActionIcon,
|
||||||
Alert,
|
Alert,
|
||||||
Badge,
|
Badge,
|
||||||
Box,
|
Box,
|
||||||
Button,
|
Button,
|
||||||
Card,
|
Card,
|
||||||
|
Collapse,
|
||||||
Divider,
|
Divider,
|
||||||
FileButton,
|
FileButton,
|
||||||
Group,
|
Group,
|
||||||
@ -21,10 +23,13 @@ import PictureAsPdfIcon from '@mui/icons-material/PictureAsPdfOutlined';
|
|||||||
import AutorenewIcon from '@mui/icons-material/Autorenew';
|
import AutorenewIcon from '@mui/icons-material/Autorenew';
|
||||||
import WarningAmberIcon from '@mui/icons-material/WarningAmber';
|
import WarningAmberIcon from '@mui/icons-material/WarningAmber';
|
||||||
import UploadIcon from '@mui/icons-material/Upload';
|
import UploadIcon from '@mui/icons-material/Upload';
|
||||||
|
import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
|
||||||
|
import ExpandLessIcon from '@mui/icons-material/ExpandLess';
|
||||||
import { Rnd } from 'react-rnd';
|
import { Rnd } from 'react-rnd';
|
||||||
|
|
||||||
import {
|
import {
|
||||||
PdfJsonEditorViewData,
|
PdfJsonEditorViewData,
|
||||||
|
PdfJsonFont,
|
||||||
PdfJsonPage,
|
PdfJsonPage,
|
||||||
} from '../../../tools/pdfJsonEditorTypes';
|
} from '../../../tools/pdfJsonEditorTypes';
|
||||||
import { getImageBounds, pageDimensions } from '../../../tools/pdfJsonEditorUtils';
|
import { getImageBounds, pageDimensions } from '../../../tools/pdfJsonEditorUtils';
|
||||||
@ -32,6 +37,68 @@ import { getImageBounds, pageDimensions } from '../../../tools/pdfJsonEditorUtil
|
|||||||
const MAX_RENDER_WIDTH = 820;
|
const MAX_RENDER_WIDTH = 820;
|
||||||
const MIN_BOX_SIZE = 18;
|
const MIN_BOX_SIZE = 18;
|
||||||
|
|
||||||
|
const normalizeFontFormat = (format?: string | null): string => {
|
||||||
|
if (!format) {
|
||||||
|
return 'ttf';
|
||||||
|
}
|
||||||
|
const lower = format.toLowerCase();
|
||||||
|
if (lower.includes('woff2')) {
|
||||||
|
return 'woff2';
|
||||||
|
}
|
||||||
|
if (lower.includes('woff')) {
|
||||||
|
return 'woff';
|
||||||
|
}
|
||||||
|
if (lower.includes('otf')) {
|
||||||
|
return 'otf';
|
||||||
|
}
|
||||||
|
if (lower.includes('cff')) {
|
||||||
|
return 'otf';
|
||||||
|
}
|
||||||
|
return 'ttf';
|
||||||
|
};
|
||||||
|
|
||||||
|
const getFontMimeType = (format: string): string => {
|
||||||
|
switch (format) {
|
||||||
|
case 'woff2':
|
||||||
|
return 'font/woff2';
|
||||||
|
case 'woff':
|
||||||
|
return 'font/woff';
|
||||||
|
case 'otf':
|
||||||
|
return 'font/otf';
|
||||||
|
default:
|
||||||
|
return 'font/ttf';
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const getFontFormatHint = (format: string): string | null => {
|
||||||
|
switch (format) {
|
||||||
|
case 'woff2':
|
||||||
|
return 'woff2';
|
||||||
|
case 'woff':
|
||||||
|
return 'woff';
|
||||||
|
case 'otf':
|
||||||
|
return 'opentype';
|
||||||
|
case 'ttf':
|
||||||
|
return 'truetype';
|
||||||
|
default:
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const decodeBase64ToUint8Array = (value: string): Uint8Array => {
|
||||||
|
const binary = window.atob(value);
|
||||||
|
const bytes = new Uint8Array(binary.length);
|
||||||
|
for (let index = 0; index < binary.length; index += 1) {
|
||||||
|
bytes[index] = binary.charCodeAt(index);
|
||||||
|
}
|
||||||
|
return bytes;
|
||||||
|
};
|
||||||
|
|
||||||
|
const buildFontFamilyName = (font: PdfJsonFont): string => {
|
||||||
|
const base = (font.uid ?? font.id ?? 'font').toString();
|
||||||
|
return `pdf-font-${base.replace(/[^a-zA-Z0-9_-]/g, '')}`;
|
||||||
|
};
|
||||||
|
|
||||||
const getCaretOffset = (element: HTMLElement): number => {
|
const getCaretOffset = (element: HTMLElement): number => {
|
||||||
const selection = window.getSelection();
|
const selection = window.getSelection();
|
||||||
if (!selection || selection.rangeCount === 0 || !element.contains(selection.focusNode)) {
|
if (!selection || selection.rangeCount === 0 || !element.contains(selection.focusNode)) {
|
||||||
@ -85,11 +152,13 @@ const toCssBounds = (
|
|||||||
bounds: { left: number; right: number; top: number; bottom: number },
|
bounds: { left: number; right: number; top: number; bottom: number },
|
||||||
) => {
|
) => {
|
||||||
const width = Math.max(bounds.right - bounds.left, 1);
|
const width = Math.max(bounds.right - bounds.left, 1);
|
||||||
|
// Note: This codebase uses inverted naming where bounds.bottom > bounds.top
|
||||||
|
// bounds.bottom = visually upper edge (larger Y in PDF coords)
|
||||||
|
// bounds.top = visually lower edge (smaller Y in PDF coords)
|
||||||
const height = Math.max(bounds.bottom - bounds.top, 1);
|
const height = Math.max(bounds.bottom - bounds.top, 1);
|
||||||
// Add 20% buffer to width to account for padding and font rendering variations
|
const scaledWidth = Math.max(width * scale, MIN_BOX_SIZE);
|
||||||
const bufferedWidth = width * 1.2;
|
|
||||||
const scaledWidth = Math.max(bufferedWidth * scale, MIN_BOX_SIZE);
|
|
||||||
const scaledHeight = Math.max(height * scale, MIN_BOX_SIZE / 2);
|
const scaledHeight = Math.max(height * scale, MIN_BOX_SIZE / 2);
|
||||||
|
// Convert PDF's visually upper edge (bounds.bottom) to CSS top
|
||||||
const top = Math.max(pageHeight - bounds.bottom, 0) * scale;
|
const top = Math.max(pageHeight - bounds.bottom, 0) * scale;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@ -105,6 +174,8 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
const [activeGroupId, setActiveGroupId] = useState<string | null>(null);
|
const [activeGroupId, setActiveGroupId] = useState<string | null>(null);
|
||||||
const [editingGroupId, setEditingGroupId] = useState<string | null>(null);
|
const [editingGroupId, setEditingGroupId] = useState<string | null>(null);
|
||||||
const [activeImageId, setActiveImageId] = useState<string | null>(null);
|
const [activeImageId, setActiveImageId] = useState<string | null>(null);
|
||||||
|
const [fontFamilies, setFontFamilies] = useState<Map<string, string>>(new Map());
|
||||||
|
const [textGroupsExpanded, setTextGroupsExpanded] = useState(false);
|
||||||
const containerRef = useRef<HTMLDivElement | null>(null);
|
const containerRef = useRef<HTMLDivElement | null>(null);
|
||||||
const editorRefs = useRef<Map<string, HTMLDivElement>>(new Map());
|
const editorRefs = useRef<Map<string, HTMLDivElement>>(new Map());
|
||||||
const caretOffsetsRef = useRef<Map<string, number>>(new Map());
|
const caretOffsetsRef = useRef<Map<string, number>>(new Map());
|
||||||
@ -135,6 +206,10 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
if (!fontId || !pdfDocument?.fonts) {
|
if (!fontId || !pdfDocument?.fonts) {
|
||||||
return 'sans-serif';
|
return 'sans-serif';
|
||||||
}
|
}
|
||||||
|
const loadedFamily = fontFamilies.get(fontId);
|
||||||
|
if (loadedFamily) {
|
||||||
|
return `'${loadedFamily}', sans-serif`;
|
||||||
|
}
|
||||||
const font = pdfDocument.fonts.find((f) => f.id === fontId);
|
const font = pdfDocument.fonts.find((f) => f.id === fontId);
|
||||||
if (!font) {
|
if (!font) {
|
||||||
return 'sans-serif';
|
return 'sans-serif';
|
||||||
@ -161,10 +236,134 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
return 'Arial, Helvetica, sans-serif';
|
return 'Arial, Helvetica, sans-serif';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const getLineHeightPx = (fontId: string | null | undefined, fontSizePx: number): number => {
|
||||||
|
if (fontSizePx <= 0) {
|
||||||
|
return fontSizePx;
|
||||||
|
}
|
||||||
|
const metrics = fontId ? fontMetrics.get(fontId) : undefined;
|
||||||
|
if (!metrics || metrics.unitsPerEm <= 0) {
|
||||||
|
return fontSizePx * 1.2;
|
||||||
|
}
|
||||||
|
const totalUnits = metrics.ascent - metrics.descent;
|
||||||
|
if (totalUnits <= 0) {
|
||||||
|
return fontSizePx * 1.2;
|
||||||
|
}
|
||||||
|
const lineHeight = (totalUnits / metrics.unitsPerEm) * fontSizePx;
|
||||||
|
return Math.max(lineHeight, fontSizePx * 1.05);
|
||||||
|
};
|
||||||
|
|
||||||
|
const getFontWeight = (fontId: string | null | undefined): number | 'normal' | 'bold' => {
|
||||||
|
if (!fontId || !pdfDocument?.fonts) {
|
||||||
|
return 'normal';
|
||||||
|
}
|
||||||
|
const font = pdfDocument.fonts.find((f) => f.id === fontId);
|
||||||
|
if (!font || !font.fontDescriptorFlags) {
|
||||||
|
return 'normal';
|
||||||
|
}
|
||||||
|
|
||||||
|
// PDF font descriptor flag bit 18 (value 262144 = 0x40000) indicates ForceBold
|
||||||
|
const FORCE_BOLD_FLAG = 262144;
|
||||||
|
if ((font.fontDescriptorFlags & FORCE_BOLD_FLAG) !== 0) {
|
||||||
|
return 'bold';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Also check if font name contains "Bold"
|
||||||
|
const fontName = font.standard14Name || font.baseName || '';
|
||||||
|
if (fontName.toLowerCase().includes('bold')) {
|
||||||
|
return 'bold';
|
||||||
|
}
|
||||||
|
|
||||||
|
return 'normal';
|
||||||
|
};
|
||||||
|
|
||||||
const pages = pdfDocument?.pages ?? [];
|
const pages = pdfDocument?.pages ?? [];
|
||||||
const currentPage = pages[selectedPage] ?? null;
|
const currentPage = pages[selectedPage] ?? null;
|
||||||
const pageGroups = groupsByPage[selectedPage] ?? [];
|
const pageGroups = groupsByPage[selectedPage] ?? [];
|
||||||
const pageImages = imagesByPage[selectedPage] ?? [];
|
const pageImages = imagesByPage[selectedPage] ?? [];
|
||||||
|
|
||||||
|
const fontMetrics = useMemo(() => {
|
||||||
|
const metrics = new Map<string, { unitsPerEm: number; ascent: number; descent: number }>();
|
||||||
|
pdfDocument?.fonts?.forEach((font) => {
|
||||||
|
if (!font?.id) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const unitsPerEm = font.unitsPerEm && font.unitsPerEm > 0 ? font.unitsPerEm : 1000;
|
||||||
|
const ascent = font.ascent ?? unitsPerEm;
|
||||||
|
const descent = font.descent ?? -(unitsPerEm * 0.2);
|
||||||
|
metrics.set(font.id, { unitsPerEm, ascent, descent });
|
||||||
|
});
|
||||||
|
return metrics;
|
||||||
|
}, [pdfDocument?.fonts]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (typeof FontFace === 'undefined') {
|
||||||
|
setFontFamilies(new Map());
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
let disposed = false;
|
||||||
|
const active: { fontFace: FontFace; url?: string }[] = [];
|
||||||
|
|
||||||
|
const registerFonts = async () => {
|
||||||
|
const fonts = pdfDocument?.fonts ?? [];
|
||||||
|
if (fonts.length === 0) {
|
||||||
|
setFontFamilies(new Map());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const next = new Map<string, string>();
|
||||||
|
for (const font of fonts) {
|
||||||
|
if (!font?.id || !font.program) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const format = normalizeFontFormat(font.programFormat);
|
||||||
|
const data = decodeBase64ToUint8Array(font.program);
|
||||||
|
const blob = new Blob([data as BlobPart], { type: getFontMimeType(format) });
|
||||||
|
const url = URL.createObjectURL(blob);
|
||||||
|
const formatHint = getFontFormatHint(format);
|
||||||
|
const familyName = buildFontFamilyName(font);
|
||||||
|
const source = formatHint ? `url(${url}) format('${formatHint}')` : `url(${url})`;
|
||||||
|
const fontFace = new FontFace(familyName, source);
|
||||||
|
await fontFace.load();
|
||||||
|
if (disposed) {
|
||||||
|
document.fonts.delete(fontFace);
|
||||||
|
URL.revokeObjectURL(url);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
document.fonts.add(fontFace);
|
||||||
|
active.push({ fontFace, url });
|
||||||
|
next.set(font.id, familyName);
|
||||||
|
} catch (error) {
|
||||||
|
// Silently ignore font loading failures - embedded PDF fonts often lack web font tables
|
||||||
|
// Fallback to web-safe fonts is already implemented via getFontFamily()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!disposed) {
|
||||||
|
setFontFamilies(next);
|
||||||
|
} else {
|
||||||
|
active.forEach(({ fontFace, url }) => {
|
||||||
|
document.fonts.delete(fontFace);
|
||||||
|
if (url) {
|
||||||
|
URL.revokeObjectURL(url);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
registerFonts();
|
||||||
|
|
||||||
|
return () => {
|
||||||
|
disposed = true;
|
||||||
|
active.forEach(({ fontFace, url }) => {
|
||||||
|
document.fonts.delete(fontFace);
|
||||||
|
if (url) {
|
||||||
|
URL.revokeObjectURL(url);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
};
|
||||||
|
}, [pdfDocument?.fonts]);
|
||||||
const visibleGroups = useMemo(
|
const visibleGroups = useMemo(
|
||||||
() =>
|
() =>
|
||||||
pageGroups.filter((group) => {
|
pageGroups.filter((group) => {
|
||||||
@ -419,9 +618,17 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
<ScrollArea h="100%" offsetScrollbars>
|
<ScrollArea h="100%" offsetScrollbars>
|
||||||
<Box
|
<Box
|
||||||
style={{
|
style={{
|
||||||
margin: '0 auto',
|
display: 'flex',
|
||||||
|
justifyContent: 'center',
|
||||||
|
alignItems: 'flex-start',
|
||||||
|
width: '100%',
|
||||||
|
minHeight: '100%',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Box
|
||||||
|
style={{
|
||||||
background: '#f3f4f6',
|
background: '#f3f4f6',
|
||||||
padding: '1.5rem',
|
padding: '0.5rem',
|
||||||
borderRadius: '0.75rem',
|
borderRadius: '0.75rem',
|
||||||
}}
|
}}
|
||||||
onClick={handleBackgroundClick}
|
onClick={handleBackgroundClick}
|
||||||
@ -466,7 +673,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
setEditingGroupId(null);
|
setEditingGroupId(null);
|
||||||
setActiveImageId(imageId);
|
setActiveImageId(imageId);
|
||||||
}}
|
}}
|
||||||
onDrag={(event, data) => {
|
onDrag={(_event, data) => {
|
||||||
emitImageTransform(
|
emitImageTransform(
|
||||||
imageId,
|
imageId,
|
||||||
data.x,
|
data.x,
|
||||||
@ -475,7 +682,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
cssHeight,
|
cssHeight,
|
||||||
);
|
);
|
||||||
}}
|
}}
|
||||||
onDragStop={(event, data) => {
|
onDragStop={(_event, data) => {
|
||||||
emitImageTransform(
|
emitImageTransform(
|
||||||
imageId,
|
imageId,
|
||||||
data.x,
|
data.x,
|
||||||
@ -489,7 +696,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
setActiveGroupId(null);
|
setActiveGroupId(null);
|
||||||
setEditingGroupId(null);
|
setEditingGroupId(null);
|
||||||
}}
|
}}
|
||||||
onResize={(event, _direction, ref, _delta, position) => {
|
onResize={(_event, _direction, ref, _delta, position) => {
|
||||||
const nextWidth = parseFloat(ref.style.width);
|
const nextWidth = parseFloat(ref.style.width);
|
||||||
const nextHeight = parseFloat(ref.style.height);
|
const nextHeight = parseFloat(ref.style.height);
|
||||||
emitImageTransform(
|
emitImageTransform(
|
||||||
@ -500,7 +707,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
nextHeight,
|
nextHeight,
|
||||||
);
|
);
|
||||||
}}
|
}}
|
||||||
onResizeStop={(event, _direction, ref, _delta, position) => {
|
onResizeStop={(_event, _direction, ref, _delta, position) => {
|
||||||
const nextWidth = parseFloat(ref.style.width);
|
const nextWidth = parseFloat(ref.style.width);
|
||||||
const nextHeight = parseFloat(ref.style.height);
|
const nextHeight = parseFloat(ref.style.height);
|
||||||
emitImageTransform(
|
emitImageTransform(
|
||||||
@ -567,21 +774,48 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
const baseFontSize = group.fontMatrixSize ?? group.fontSize ?? 12;
|
const baseFontSize = group.fontMatrixSize ?? group.fontSize ?? 12;
|
||||||
const fontSizePx = Math.max(baseFontSize * scale, 6);
|
const fontSizePx = Math.max(baseFontSize * scale, 6);
|
||||||
const fontFamily = getFontFamily(group.fontId);
|
const fontFamily = getFontFamily(group.fontId);
|
||||||
|
const lineHeightPx = getLineHeightPx(group.fontId, fontSizePx);
|
||||||
|
const lineHeightRatio = fontSizePx > 0 ? Math.max(lineHeightPx / fontSizePx, 1.05) : 1.2;
|
||||||
|
const hasRotation = group.rotation != null && Math.abs(group.rotation) > 0.5;
|
||||||
|
const baselineLength = group.baselineLength ?? Math.max(group.bounds.right - group.bounds.left, 0);
|
||||||
|
|
||||||
const visualHeight = Math.max(bounds.height, fontSizePx * 1.2);
|
let containerLeft = bounds.left;
|
||||||
|
let containerTop = bounds.top;
|
||||||
|
let containerWidth = Math.max(bounds.width, fontSizePx);
|
||||||
|
let containerHeight = Math.max(bounds.height, lineHeightPx);
|
||||||
|
let transform: string | undefined;
|
||||||
|
let transformOrigin: React.CSSProperties['transformOrigin'];
|
||||||
|
|
||||||
|
if (hasRotation) {
|
||||||
|
const anchorX = group.anchor?.x ?? group.bounds.left;
|
||||||
|
const anchorY = group.anchor?.y ?? group.bounds.bottom;
|
||||||
|
containerLeft = anchorX * scale;
|
||||||
|
containerTop = Math.max(pageHeight - anchorY, 0) * scale;
|
||||||
|
containerWidth = Math.max(baselineLength * scale, MIN_BOX_SIZE);
|
||||||
|
containerHeight = Math.max(lineHeightPx, fontSizePx * lineHeightRatio);
|
||||||
|
transformOrigin = 'left bottom';
|
||||||
|
// Negate rotation because Y-axis is flipped from PDF to web coordinates
|
||||||
|
transform = `rotate(${-group.rotation}deg)`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract styling from group
|
||||||
|
const textColor = group.color || '#111827';
|
||||||
|
const fontWeight = group.fontWeight || getFontWeight(group.fontId);
|
||||||
|
|
||||||
const containerStyle: React.CSSProperties = {
|
const containerStyle: React.CSSProperties = {
|
||||||
position: 'absolute',
|
position: 'absolute',
|
||||||
left: `${bounds.left}px`,
|
left: `${containerLeft}px`,
|
||||||
top: `${bounds.top}px`,
|
top: `${containerTop}px`,
|
||||||
width: `${bounds.width}px`,
|
width: `${containerWidth}px`,
|
||||||
height: `${visualHeight}px`,
|
height: `${containerHeight}px`,
|
||||||
display: 'flex',
|
display: 'flex',
|
||||||
alignItems: 'flex-start',
|
alignItems: 'flex-start',
|
||||||
justifyContent: 'flex-start',
|
justifyContent: 'flex-start',
|
||||||
pointerEvents: 'auto',
|
pointerEvents: 'auto',
|
||||||
cursor: 'text',
|
cursor: 'text',
|
||||||
zIndex: 2_000_000,
|
zIndex: 2_000_000,
|
||||||
|
transform,
|
||||||
|
transformOrigin,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (isEditing) {
|
if (isEditing) {
|
||||||
@ -628,17 +862,17 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
style={{
|
style={{
|
||||||
width: '100%',
|
width: '100%',
|
||||||
height: '100%',
|
height: '100%',
|
||||||
padding: '3px 4px',
|
padding: 0,
|
||||||
backgroundColor: 'rgba(255,255,255,0.95)',
|
backgroundColor: 'rgba(255,255,255,0.95)',
|
||||||
color: '#111827',
|
color: textColor,
|
||||||
fontSize: `${fontSizePx}px`,
|
fontSize: `${fontSizePx}px`,
|
||||||
fontFamily,
|
fontFamily,
|
||||||
lineHeight: 1.25,
|
fontWeight,
|
||||||
|
lineHeight: lineHeightRatio,
|
||||||
outline: 'none',
|
outline: 'none',
|
||||||
border: 'none',
|
border: 'none',
|
||||||
display: 'block',
|
display: 'block',
|
||||||
whiteSpace: 'pre-wrap',
|
whiteSpace: 'nowrap',
|
||||||
overflowWrap: 'anywhere',
|
|
||||||
cursor: 'text',
|
cursor: 'text',
|
||||||
overflow: 'visible',
|
overflow: 'visible',
|
||||||
}}
|
}}
|
||||||
@ -660,12 +894,13 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
style={{
|
style={{
|
||||||
width: '100%',
|
width: '100%',
|
||||||
minHeight: '100%',
|
minHeight: '100%',
|
||||||
padding: '2px 4px',
|
padding: 0,
|
||||||
whiteSpace: 'pre-wrap',
|
whiteSpace: 'nowrap',
|
||||||
fontSize: `${fontSizePx}px`,
|
fontSize: `${fontSizePx}px`,
|
||||||
fontFamily,
|
fontFamily,
|
||||||
lineHeight: 1.25,
|
fontWeight,
|
||||||
color: '#111827',
|
lineHeight: lineHeightRatio,
|
||||||
|
color: textColor,
|
||||||
display: 'block',
|
display: 'block',
|
||||||
cursor: 'text',
|
cursor: 'text',
|
||||||
overflow: 'visible',
|
overflow: 'visible',
|
||||||
@ -684,12 +919,24 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
)}
|
)}
|
||||||
</Box>
|
</Box>
|
||||||
</Box>
|
</Box>
|
||||||
|
</Box>
|
||||||
</ScrollArea>
|
</ScrollArea>
|
||||||
</Card>
|
</Card>
|
||||||
|
|
||||||
<Card padding="md" withBorder radius="md">
|
<Card padding="md" withBorder radius="md">
|
||||||
<Stack gap="xs">
|
<Stack gap="xs">
|
||||||
|
<Group justify="space-between" align="center">
|
||||||
<Text fw={500}>{t('pdfJsonEditor.groupList', 'Detected Text Groups')}</Text>
|
<Text fw={500}>{t('pdfJsonEditor.groupList', 'Detected Text Groups')}</Text>
|
||||||
|
<ActionIcon
|
||||||
|
variant="subtle"
|
||||||
|
onClick={() => setTextGroupsExpanded(!textGroupsExpanded)}
|
||||||
|
aria-label={textGroupsExpanded ? 'Collapse' : 'Expand'}
|
||||||
|
>
|
||||||
|
{textGroupsExpanded ? <ExpandLessIcon /> : <ExpandMoreIcon />}
|
||||||
|
</ActionIcon>
|
||||||
|
</Group>
|
||||||
|
<Collapse in={textGroupsExpanded}>
|
||||||
|
<Stack gap="xs">
|
||||||
<Divider />
|
<Divider />
|
||||||
<ScrollArea h={180} offsetScrollbars>
|
<ScrollArea h={180} offsetScrollbars>
|
||||||
<Stack gap="sm">
|
<Stack gap="sm">
|
||||||
@ -732,6 +979,8 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
</Stack>
|
</Stack>
|
||||||
</ScrollArea>
|
</ScrollArea>
|
||||||
</Stack>
|
</Stack>
|
||||||
|
</Collapse>
|
||||||
|
</Stack>
|
||||||
</Card>
|
</Card>
|
||||||
</Stack>
|
</Stack>
|
||||||
)}
|
)}
|
||||||
|
|||||||
@ -23,6 +23,12 @@ export interface PdfJsonFont {
|
|||||||
toUnicode?: string | null;
|
toUnicode?: string | null;
|
||||||
standard14Name?: string | null;
|
standard14Name?: string | null;
|
||||||
fontDescriptorFlags?: number | null;
|
fontDescriptorFlags?: number | null;
|
||||||
|
ascent?: number | null;
|
||||||
|
descent?: number | null;
|
||||||
|
capHeight?: number | null;
|
||||||
|
xHeight?: number | null;
|
||||||
|
italicAngle?: number | null;
|
||||||
|
unitsPerEm?: number | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface PdfJsonTextElement {
|
export interface PdfJsonTextElement {
|
||||||
@ -117,6 +123,11 @@ export interface TextGroup {
|
|||||||
fontId?: string | null;
|
fontId?: string | null;
|
||||||
fontSize?: number | null;
|
fontSize?: number | null;
|
||||||
fontMatrixSize?: number | null;
|
fontMatrixSize?: number | null;
|
||||||
|
color?: string | null;
|
||||||
|
fontWeight?: number | 'normal' | 'bold' | null;
|
||||||
|
rotation?: number | null;
|
||||||
|
anchor?: { x: number; y: number } | null;
|
||||||
|
baselineLength?: number | null;
|
||||||
elements: PdfJsonTextElement[];
|
elements: PdfJsonTextElement[];
|
||||||
originalElements: PdfJsonTextElement[];
|
originalElements: PdfJsonTextElement[];
|
||||||
text: string;
|
text: string;
|
||||||
|
|||||||
@ -69,9 +69,15 @@ const getHeight = (element: PdfJsonTextElement): number => {
|
|||||||
const getElementBounds = (element: PdfJsonTextElement): BoundingBox => {
|
const getElementBounds = (element: PdfJsonTextElement): BoundingBox => {
|
||||||
const left = getX(element);
|
const left = getX(element);
|
||||||
const width = getWidth(element);
|
const width = getWidth(element);
|
||||||
const bottom = getBaseline(element);
|
const baseline = getBaseline(element);
|
||||||
const height = getHeight(element);
|
const height = getHeight(element);
|
||||||
const top = bottom - height;
|
// In PDF coordinates, baseline is where text sits
|
||||||
|
// Typical typography: ~80% of height above baseline (ascenders), ~20% below (descenders)
|
||||||
|
// Using codebase's inverted naming: bottom (visual top) > top (visual bottom)
|
||||||
|
const ascent = height * 0.8;
|
||||||
|
const descent = height * 0.2;
|
||||||
|
const bottom = baseline + ascent; // Visual top of text
|
||||||
|
const top = baseline - descent; // Visual bottom (includes descenders)
|
||||||
return {
|
return {
|
||||||
left,
|
left,
|
||||||
right: left + width,
|
right: left + width,
|
||||||
@ -181,6 +187,136 @@ const buildGroupText = (elements: PdfJsonTextElement[]): string => {
|
|||||||
return result;
|
return result;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const rgbToCss = (components: number[]): string => {
|
||||||
|
if (components.length >= 3) {
|
||||||
|
const r = Math.round(Math.max(0, Math.min(1, components[0])) * 255);
|
||||||
|
const g = Math.round(Math.max(0, Math.min(1, components[1])) * 255);
|
||||||
|
const b = Math.round(Math.max(0, Math.min(1, components[2])) * 255);
|
||||||
|
return `rgb(${r}, ${g}, ${b})`;
|
||||||
|
}
|
||||||
|
return 'rgb(0, 0, 0)';
|
||||||
|
};
|
||||||
|
|
||||||
|
const cmykToCss = (components: number[]): string => {
|
||||||
|
if (components.length >= 4) {
|
||||||
|
const c = Math.max(0, Math.min(1, components[0]));
|
||||||
|
const m = Math.max(0, Math.min(1, components[1]));
|
||||||
|
const y = Math.max(0, Math.min(1, components[2]));
|
||||||
|
const k = Math.max(0, Math.min(1, components[3]));
|
||||||
|
const r = Math.round(255 * (1 - c) * (1 - k));
|
||||||
|
const g = Math.round(255 * (1 - m) * (1 - k));
|
||||||
|
const b = Math.round(255 * (1 - y) * (1 - k));
|
||||||
|
return `rgb(${r}, ${g}, ${b})`;
|
||||||
|
}
|
||||||
|
return 'rgb(0, 0, 0)';
|
||||||
|
};
|
||||||
|
|
||||||
|
const grayToCss = (components: number[]): string => {
|
||||||
|
if (components.length >= 1) {
|
||||||
|
const gray = Math.round(Math.max(0, Math.min(1, components[0])) * 255);
|
||||||
|
return `rgb(${gray}, ${gray}, ${gray})`;
|
||||||
|
}
|
||||||
|
return 'rgb(0, 0, 0)';
|
||||||
|
};
|
||||||
|
|
||||||
|
const extractColor = (element: PdfJsonTextElement): string | null => {
|
||||||
|
const fillColor = element.fillColor;
|
||||||
|
if (!fillColor || !fillColor.components || fillColor.components.length === 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const colorSpace = (fillColor.colorSpace ?? '').toLowerCase();
|
||||||
|
|
||||||
|
if (colorSpace.includes('rgb') || colorSpace.includes('srgb')) {
|
||||||
|
return rgbToCss(fillColor.components);
|
||||||
|
}
|
||||||
|
if (colorSpace.includes('cmyk')) {
|
||||||
|
return cmykToCss(fillColor.components);
|
||||||
|
}
|
||||||
|
if (colorSpace.includes('gray') || colorSpace.includes('grey')) {
|
||||||
|
return grayToCss(fillColor.components);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default to RGB interpretation
|
||||||
|
if (fillColor.components.length >= 3) {
|
||||||
|
return rgbToCss(fillColor.components);
|
||||||
|
}
|
||||||
|
if (fillColor.components.length === 1) {
|
||||||
|
return grayToCss(fillColor.components);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
};
|
||||||
|
|
||||||
|
const RAD_TO_DEG = 180 / Math.PI;
|
||||||
|
|
||||||
|
const normalizeAngle = (angle: number): number => {
|
||||||
|
let normalized = angle % 360;
|
||||||
|
if (normalized > 180) {
|
||||||
|
normalized -= 360;
|
||||||
|
} else if (normalized <= -180) {
|
||||||
|
normalized += 360;
|
||||||
|
}
|
||||||
|
return normalized;
|
||||||
|
};
|
||||||
|
|
||||||
|
const extractElementRotation = (element: PdfJsonTextElement): number | null => {
|
||||||
|
const matrix = element.textMatrix;
|
||||||
|
if (!matrix || matrix.length !== 6) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const a = matrix[0];
|
||||||
|
const b = matrix[1];
|
||||||
|
if (Math.abs(a) < 1e-6 && Math.abs(b) < 1e-6) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const angle = Math.atan2(b, a) * RAD_TO_DEG;
|
||||||
|
if (Math.abs(angle) < 0.5) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return normalizeAngle(angle);
|
||||||
|
};
|
||||||
|
|
||||||
|
const computeGroupRotation = (elements: PdfJsonTextElement[]): number | null => {
|
||||||
|
const angles = elements
|
||||||
|
.map(extractElementRotation)
|
||||||
|
.filter((angle): angle is number => angle !== null);
|
||||||
|
if (angles.length === 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const vector = angles.reduce(
|
||||||
|
(acc, angle) => {
|
||||||
|
const radians = (angle * Math.PI) / 180;
|
||||||
|
acc.x += Math.cos(radians);
|
||||||
|
acc.y += Math.sin(radians);
|
||||||
|
return acc;
|
||||||
|
},
|
||||||
|
{ x: 0, y: 0 },
|
||||||
|
);
|
||||||
|
if (Math.abs(vector.x) < 1e-6 && Math.abs(vector.y) < 1e-6) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const average = Math.atan2(vector.y, vector.x) * RAD_TO_DEG;
|
||||||
|
const normalized = normalizeAngle(average);
|
||||||
|
return Math.abs(normalized) < 0.5 ? null : normalized;
|
||||||
|
};
|
||||||
|
|
||||||
|
const getAnchorPoint = (element: PdfJsonTextElement): { x: number; y: number } => {
|
||||||
|
if (element.textMatrix && element.textMatrix.length === 6) {
|
||||||
|
return {
|
||||||
|
x: valueOr(element.textMatrix[4]),
|
||||||
|
y: valueOr(element.textMatrix[5]),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
x: valueOr(element.x),
|
||||||
|
y: valueOr(element.y),
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
const computeBaselineLength = (elements: PdfJsonTextElement[]): number =>
|
||||||
|
elements.reduce((acc, current) => acc + getWidth(current), 0);
|
||||||
|
|
||||||
const createGroup = (
|
const createGroup = (
|
||||||
pageIndex: number,
|
pageIndex: number,
|
||||||
idSuffix: number,
|
idSuffix: number,
|
||||||
@ -189,13 +325,22 @@ const createGroup = (
|
|||||||
const clones = elements.map(cloneTextElement);
|
const clones = elements.map(cloneTextElement);
|
||||||
const originalClones = clones.map(cloneTextElement);
|
const originalClones = clones.map(cloneTextElement);
|
||||||
const bounds = mergeBounds(elements.map(getElementBounds));
|
const bounds = mergeBounds(elements.map(getElementBounds));
|
||||||
|
const firstElement = elements[0];
|
||||||
|
const rotation = computeGroupRotation(elements);
|
||||||
|
const anchor = rotation !== null ? getAnchorPoint(firstElement) : null;
|
||||||
|
const baselineLength = computeBaselineLength(elements);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
id: `${pageIndex}-${idSuffix}`,
|
id: `${pageIndex}-${idSuffix}`,
|
||||||
pageIndex,
|
pageIndex,
|
||||||
fontId: elements[0]?.fontId,
|
fontId: firstElement?.fontId,
|
||||||
fontSize: elements[0]?.fontSize,
|
fontSize: firstElement?.fontSize,
|
||||||
fontMatrixSize: elements[0]?.fontMatrixSize,
|
fontMatrixSize: firstElement?.fontMatrixSize,
|
||||||
|
color: firstElement ? extractColor(firstElement) : null,
|
||||||
|
fontWeight: null, // Will be determined from font descriptor
|
||||||
|
rotation,
|
||||||
|
anchor,
|
||||||
|
baselineLength,
|
||||||
elements: clones,
|
elements: clones,
|
||||||
originalElements: originalClones,
|
originalElements: originalClones,
|
||||||
text: buildGroupText(elements),
|
text: buildGroupText(elements),
|
||||||
@ -253,7 +398,18 @@ export const groupPageTextElements = (page: PdfJsonPage | null | undefined, page
|
|||||||
const splitThreshold = Math.max(SPACE_MIN_GAP, avgFontSize * GAP_FACTOR);
|
const splitThreshold = Math.max(SPACE_MIN_GAP, avgFontSize * GAP_FACTOR);
|
||||||
|
|
||||||
const sameFont = previous.fontId === element.fontId;
|
const sameFont = previous.fontId === element.fontId;
|
||||||
const shouldSplit = gap > splitThreshold * (sameFont ? 1.4 : 1.0);
|
let shouldSplit = gap > splitThreshold * (sameFont ? 1.4 : 1.0);
|
||||||
|
|
||||||
|
const previousRotation = extractElementRotation(previous);
|
||||||
|
const currentRotation = extractElementRotation(element);
|
||||||
|
if (
|
||||||
|
shouldSplit &&
|
||||||
|
previousRotation !== null &&
|
||||||
|
currentRotation !== null &&
|
||||||
|
Math.abs(normalizeAngle(previousRotation - currentRotation)) < 1
|
||||||
|
) {
|
||||||
|
shouldSplit = false;
|
||||||
|
}
|
||||||
|
|
||||||
if (shouldSplit) {
|
if (shouldSplit) {
|
||||||
groups.push(createGroup(pageIndex, groupCounter, currentBucket));
|
groups.push(createGroup(pageIndex, groupCounter, currentBucket));
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user