mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-11-16 01:21:16 +01:00
clean fonts
This commit is contained in:
parent
af19a5af23
commit
c7c5613c13
@ -0,0 +1,61 @@
|
||||
package stirling.software.SPDF.model.json;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
/**
|
||||
* Represents a PDF annotation (comments, highlights, stamps, etc.). Annotations often contain OCR
|
||||
* text layers or other metadata not visible in content streams.
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public class PdfJsonAnnotation {
|
||||
|
||||
/** Annotation subtype (Text, Highlight, Link, Stamp, Widget, etc.) */
|
||||
private String subtype;
|
||||
|
||||
/** Human-readable text content of the annotation */
|
||||
private String contents;
|
||||
|
||||
/** Annotation rectangle [x1, y1, x2, y2] */
|
||||
private List<Float> rect;
|
||||
|
||||
/** Annotation appearance characteristics */
|
||||
private String appearanceState;
|
||||
|
||||
/** Color components (e.g., [r, g, b] for RGB) */
|
||||
private List<Float> color;
|
||||
|
||||
/** Annotation flags (print, hidden, etc.) */
|
||||
private Integer flags;
|
||||
|
||||
/** For link annotations: destination or action */
|
||||
private String destination;
|
||||
|
||||
/** For text annotations: icon name */
|
||||
private String iconName;
|
||||
|
||||
/** Subject/title of the annotation */
|
||||
private String subject;
|
||||
|
||||
/** Author of the annotation */
|
||||
private String author;
|
||||
|
||||
/** Creation date (ISO 8601 format) */
|
||||
private String creationDate;
|
||||
|
||||
/** Modification date (ISO 8601 format) */
|
||||
private String modificationDate;
|
||||
|
||||
/** Full annotation dictionary for lossless round-tripping */
|
||||
private PdfJsonCosValue rawData;
|
||||
}
|
||||
@ -25,4 +25,7 @@ public class PdfJsonDocument {
|
||||
@Builder.Default private List<PdfJsonFont> fonts = new ArrayList<>();
|
||||
|
||||
@Builder.Default private List<PdfJsonPage> pages = new ArrayList<>();
|
||||
|
||||
/** Form fields (AcroForm) at document level */
|
||||
@Builder.Default private List<PdfJsonFormField> formFields = new ArrayList<>();
|
||||
}
|
||||
|
||||
@ -52,4 +52,22 @@ public class PdfJsonFont {
|
||||
|
||||
/** Font descriptor flags copied from the source document. */
|
||||
private Integer fontDescriptorFlags;
|
||||
|
||||
/** Font ascent in glyph units (typically 1/1000). */
|
||||
private Float ascent;
|
||||
|
||||
/** Font descent in glyph units (typically negative). */
|
||||
private Float descent;
|
||||
|
||||
/** Capital height when available. */
|
||||
private Float capHeight;
|
||||
|
||||
/** x-height when available. */
|
||||
private Float xHeight;
|
||||
|
||||
/** Italic angle reported by the font descriptor. */
|
||||
private Float italicAngle;
|
||||
|
||||
/** Units per em extracted from the font matrix. */
|
||||
private Integer unitsPerEm;
|
||||
}
|
||||
|
||||
@ -0,0 +1,66 @@
|
||||
package stirling.software.SPDF.model.json;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
/** Represents a PDF form field (AcroForm). */
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public class PdfJsonFormField {
|
||||
|
||||
/** Fully qualified field name (e.g., "form1.textfield1") */
|
||||
private String name;
|
||||
|
||||
/** Partial field name (last component) */
|
||||
private String partialName;
|
||||
|
||||
/** Field type (Tx=text, Btn=button, Ch=choice, Sig=signature) */
|
||||
private String fieldType;
|
||||
|
||||
/** Field value as string */
|
||||
private String value;
|
||||
|
||||
/** Default value */
|
||||
private String defaultValue;
|
||||
|
||||
/** Field flags (readonly, required, multiline, etc.) */
|
||||
private Integer flags;
|
||||
|
||||
/** Alternative field name (for accessibility) */
|
||||
private String alternateFieldName;
|
||||
|
||||
/** Mapping name (for export) */
|
||||
private String mappingName;
|
||||
|
||||
/** Page number where field appears (1-indexed) */
|
||||
private Integer pageNumber;
|
||||
|
||||
/** Field rectangle [x1, y1, x2, y2] on the page */
|
||||
private List<Float> rect;
|
||||
|
||||
/** For choice fields: list of options */
|
||||
private List<String> options;
|
||||
|
||||
/** For choice fields: selected indices */
|
||||
private List<Integer> selectedIndices;
|
||||
|
||||
/** For button fields: whether it's checked */
|
||||
private Boolean checked;
|
||||
|
||||
/** Font information for text fields */
|
||||
private String fontName;
|
||||
|
||||
private Float fontSize;
|
||||
|
||||
/** Full field dictionary for lossless round-tripping */
|
||||
private PdfJsonCosValue rawData;
|
||||
}
|
||||
@ -24,6 +24,7 @@ public class PdfJsonPage {
|
||||
|
||||
@Builder.Default private List<PdfJsonTextElement> textElements = new ArrayList<>();
|
||||
@Builder.Default private List<PdfJsonImageElement> imageElements = new ArrayList<>();
|
||||
@Builder.Default private List<PdfJsonAnnotation> annotations = new ArrayList<>();
|
||||
|
||||
/** Serialized representation of the page resources dictionary. */
|
||||
private PdfJsonCosValue resources;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -168,6 +168,16 @@ system:
|
||||
startupCleanup: true # Clean up old temp files on startup
|
||||
cleanupSystemTemp: false # Whether to clean broader system temp directory
|
||||
|
||||
stirling:
|
||||
pdf:
|
||||
fallback-font: classpath:/static/fonts/NotoSans-Regular.ttf # Override to point at a custom fallback font
|
||||
json:
|
||||
font-normalization:
|
||||
enabled: true # Run Ghostscript preflight to normalize fonts before PDF→JSON
|
||||
cff-converter:
|
||||
enabled: true # Attempt to transcode CFF/Type1C programs to OTF using FontForge when available
|
||||
fontforge-command: fontforge # Override if FontForge is installed under a different name/path
|
||||
|
||||
ui:
|
||||
appName: '' # application's visible name
|
||||
homeDescription: '' # short description or tagline shown on the homepage
|
||||
|
||||
@ -83,6 +83,8 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a
|
||||
gcompat \
|
||||
libc6-compat \
|
||||
libreoffice \
|
||||
ghostscript \
|
||||
fontforge \
|
||||
# pdftohtml
|
||||
poppler-utils \
|
||||
# OCR MY PDF (unpaper for descew and other advanced features)
|
||||
@ -119,4 +121,4 @@ EXPOSE 8080/tcp
|
||||
|
||||
# Set user and run command
|
||||
ENTRYPOINT ["tini", "--", "/scripts/init.sh"]
|
||||
CMD ["sh", "-c", "java -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/tmp/stirling-pdf -jar /app.jar & /opt/venv/bin/unoserver --port 2003 --interface 127.0.0.1"]
|
||||
CMD ["sh", "-c", "java -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/tmp/stirling-pdf -jar /app.jar & /opt/venv/bin/unoserver --port 2003 --interface 127.0.0.1"]
|
||||
|
||||
@ -73,6 +73,8 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a
|
||||
gcompat \
|
||||
libc6-compat \
|
||||
libreoffice \
|
||||
ghostscript \
|
||||
fontforge \
|
||||
# pdftohtml
|
||||
poppler-utils \
|
||||
# OCR MY PDF (unpaper for descew and other advanced featues)
|
||||
@ -109,4 +111,4 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a
|
||||
EXPOSE 8080/tcp
|
||||
# Set user and run command
|
||||
ENTRYPOINT ["tini", "--", "/scripts/init.sh"]
|
||||
CMD ["sh", "-c", "java -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/tmp/stirling-pdf -jar /app.jar & /opt/venv/bin/unoserver --port 2003 --interface 127.0.0.1"]
|
||||
CMD ["sh", "-c", "java -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/tmp/stirling-pdf -jar /app.jar & /opt/venv/bin/unoserver --port 2003 --interface 127.0.0.1"]
|
||||
|
||||
@ -59,7 +59,9 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et
|
||||
curl \
|
||||
shadow \
|
||||
su-exec \
|
||||
openjdk21-jre && \
|
||||
openjdk21-jre \
|
||||
ghostscript \
|
||||
fontforge && \
|
||||
# User permissions
|
||||
mkdir -p /configs /logs /customFiles /usr/share/fonts/opentype/noto /tmp/stirling-pdf /pipeline/watchedFolders /pipeline/finishedFolders && \
|
||||
chmod +x /scripts/*.sh && \
|
||||
|
||||
@ -24,7 +24,7 @@ http {
|
||||
index index.html index.htm;
|
||||
|
||||
# Global settings for file uploads
|
||||
client_max_body_size 100m;
|
||||
client_max_body_size 0;
|
||||
|
||||
# Handle client-side routing - support subpaths
|
||||
location / {
|
||||
@ -48,12 +48,12 @@ http {
|
||||
proxy_cache off;
|
||||
|
||||
# Timeout settings for large file uploads
|
||||
proxy_connect_timeout 60s;
|
||||
proxy_send_timeout 60s;
|
||||
proxy_read_timeout 60s;
|
||||
|
||||
proxy_connect_timeout 600s;
|
||||
proxy_send_timeout 600s;
|
||||
proxy_read_timeout 600s;
|
||||
|
||||
# Request size limits for file uploads
|
||||
client_max_body_size 100m;
|
||||
client_max_body_size 0;
|
||||
proxy_request_buffering off;
|
||||
}
|
||||
|
||||
|
||||
@ -4031,6 +4031,7 @@
|
||||
"fontSizeValue": "{{size}}pt",
|
||||
"noTextOnPage": "No editable text was detected on this page.",
|
||||
"emptyGroup": "[Empty Group]",
|
||||
"imageLabel": "Placed image",
|
||||
"empty": {
|
||||
"title": "No document loaded",
|
||||
"subtitle": "Load a PDF or JSON file to begin editing text content."
|
||||
|
||||
@ -1,10 +1,12 @@
|
||||
import React, { useCallback, useEffect, useLayoutEffect, useMemo, useRef, useState } from 'react';
|
||||
import {
|
||||
ActionIcon,
|
||||
Alert,
|
||||
Badge,
|
||||
Box,
|
||||
Button,
|
||||
Card,
|
||||
Collapse,
|
||||
Divider,
|
||||
FileButton,
|
||||
Group,
|
||||
@ -21,10 +23,13 @@ import PictureAsPdfIcon from '@mui/icons-material/PictureAsPdfOutlined';
|
||||
import AutorenewIcon from '@mui/icons-material/Autorenew';
|
||||
import WarningAmberIcon from '@mui/icons-material/WarningAmber';
|
||||
import UploadIcon from '@mui/icons-material/Upload';
|
||||
import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
|
||||
import ExpandLessIcon from '@mui/icons-material/ExpandLess';
|
||||
import { Rnd } from 'react-rnd';
|
||||
|
||||
import {
|
||||
PdfJsonEditorViewData,
|
||||
PdfJsonFont,
|
||||
PdfJsonPage,
|
||||
} from '../../../tools/pdfJsonEditorTypes';
|
||||
import { getImageBounds, pageDimensions } from '../../../tools/pdfJsonEditorUtils';
|
||||
@ -32,6 +37,68 @@ import { getImageBounds, pageDimensions } from '../../../tools/pdfJsonEditorUtil
|
||||
const MAX_RENDER_WIDTH = 820;
|
||||
const MIN_BOX_SIZE = 18;
|
||||
|
||||
const normalizeFontFormat = (format?: string | null): string => {
|
||||
if (!format) {
|
||||
return 'ttf';
|
||||
}
|
||||
const lower = format.toLowerCase();
|
||||
if (lower.includes('woff2')) {
|
||||
return 'woff2';
|
||||
}
|
||||
if (lower.includes('woff')) {
|
||||
return 'woff';
|
||||
}
|
||||
if (lower.includes('otf')) {
|
||||
return 'otf';
|
||||
}
|
||||
if (lower.includes('cff')) {
|
||||
return 'otf';
|
||||
}
|
||||
return 'ttf';
|
||||
};
|
||||
|
||||
const getFontMimeType = (format: string): string => {
|
||||
switch (format) {
|
||||
case 'woff2':
|
||||
return 'font/woff2';
|
||||
case 'woff':
|
||||
return 'font/woff';
|
||||
case 'otf':
|
||||
return 'font/otf';
|
||||
default:
|
||||
return 'font/ttf';
|
||||
}
|
||||
};
|
||||
|
||||
const getFontFormatHint = (format: string): string | null => {
|
||||
switch (format) {
|
||||
case 'woff2':
|
||||
return 'woff2';
|
||||
case 'woff':
|
||||
return 'woff';
|
||||
case 'otf':
|
||||
return 'opentype';
|
||||
case 'ttf':
|
||||
return 'truetype';
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
const decodeBase64ToUint8Array = (value: string): Uint8Array => {
|
||||
const binary = window.atob(value);
|
||||
const bytes = new Uint8Array(binary.length);
|
||||
for (let index = 0; index < binary.length; index += 1) {
|
||||
bytes[index] = binary.charCodeAt(index);
|
||||
}
|
||||
return bytes;
|
||||
};
|
||||
|
||||
const buildFontFamilyName = (font: PdfJsonFont): string => {
|
||||
const base = (font.uid ?? font.id ?? 'font').toString();
|
||||
return `pdf-font-${base.replace(/[^a-zA-Z0-9_-]/g, '')}`;
|
||||
};
|
||||
|
||||
const getCaretOffset = (element: HTMLElement): number => {
|
||||
const selection = window.getSelection();
|
||||
if (!selection || selection.rangeCount === 0 || !element.contains(selection.focusNode)) {
|
||||
@ -85,11 +152,13 @@ const toCssBounds = (
|
||||
bounds: { left: number; right: number; top: number; bottom: number },
|
||||
) => {
|
||||
const width = Math.max(bounds.right - bounds.left, 1);
|
||||
// Note: This codebase uses inverted naming where bounds.bottom > bounds.top
|
||||
// bounds.bottom = visually upper edge (larger Y in PDF coords)
|
||||
// bounds.top = visually lower edge (smaller Y in PDF coords)
|
||||
const height = Math.max(bounds.bottom - bounds.top, 1);
|
||||
// Add 20% buffer to width to account for padding and font rendering variations
|
||||
const bufferedWidth = width * 1.2;
|
||||
const scaledWidth = Math.max(bufferedWidth * scale, MIN_BOX_SIZE);
|
||||
const scaledWidth = Math.max(width * scale, MIN_BOX_SIZE);
|
||||
const scaledHeight = Math.max(height * scale, MIN_BOX_SIZE / 2);
|
||||
// Convert PDF's visually upper edge (bounds.bottom) to CSS top
|
||||
const top = Math.max(pageHeight - bounds.bottom, 0) * scale;
|
||||
|
||||
return {
|
||||
@ -105,6 +174,8 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
const [activeGroupId, setActiveGroupId] = useState<string | null>(null);
|
||||
const [editingGroupId, setEditingGroupId] = useState<string | null>(null);
|
||||
const [activeImageId, setActiveImageId] = useState<string | null>(null);
|
||||
const [fontFamilies, setFontFamilies] = useState<Map<string, string>>(new Map());
|
||||
const [textGroupsExpanded, setTextGroupsExpanded] = useState(false);
|
||||
const containerRef = useRef<HTMLDivElement | null>(null);
|
||||
const editorRefs = useRef<Map<string, HTMLDivElement>>(new Map());
|
||||
const caretOffsetsRef = useRef<Map<string, number>>(new Map());
|
||||
@ -135,6 +206,10 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
if (!fontId || !pdfDocument?.fonts) {
|
||||
return 'sans-serif';
|
||||
}
|
||||
const loadedFamily = fontFamilies.get(fontId);
|
||||
if (loadedFamily) {
|
||||
return `'${loadedFamily}', sans-serif`;
|
||||
}
|
||||
const font = pdfDocument.fonts.find((f) => f.id === fontId);
|
||||
if (!font) {
|
||||
return 'sans-serif';
|
||||
@ -161,10 +236,134 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
return 'Arial, Helvetica, sans-serif';
|
||||
};
|
||||
|
||||
const getLineHeightPx = (fontId: string | null | undefined, fontSizePx: number): number => {
|
||||
if (fontSizePx <= 0) {
|
||||
return fontSizePx;
|
||||
}
|
||||
const metrics = fontId ? fontMetrics.get(fontId) : undefined;
|
||||
if (!metrics || metrics.unitsPerEm <= 0) {
|
||||
return fontSizePx * 1.2;
|
||||
}
|
||||
const totalUnits = metrics.ascent - metrics.descent;
|
||||
if (totalUnits <= 0) {
|
||||
return fontSizePx * 1.2;
|
||||
}
|
||||
const lineHeight = (totalUnits / metrics.unitsPerEm) * fontSizePx;
|
||||
return Math.max(lineHeight, fontSizePx * 1.05);
|
||||
};
|
||||
|
||||
const getFontWeight = (fontId: string | null | undefined): number | 'normal' | 'bold' => {
|
||||
if (!fontId || !pdfDocument?.fonts) {
|
||||
return 'normal';
|
||||
}
|
||||
const font = pdfDocument.fonts.find((f) => f.id === fontId);
|
||||
if (!font || !font.fontDescriptorFlags) {
|
||||
return 'normal';
|
||||
}
|
||||
|
||||
// PDF font descriptor flag bit 18 (value 262144 = 0x40000) indicates ForceBold
|
||||
const FORCE_BOLD_FLAG = 262144;
|
||||
if ((font.fontDescriptorFlags & FORCE_BOLD_FLAG) !== 0) {
|
||||
return 'bold';
|
||||
}
|
||||
|
||||
// Also check if font name contains "Bold"
|
||||
const fontName = font.standard14Name || font.baseName || '';
|
||||
if (fontName.toLowerCase().includes('bold')) {
|
||||
return 'bold';
|
||||
}
|
||||
|
||||
return 'normal';
|
||||
};
|
||||
|
||||
const pages = pdfDocument?.pages ?? [];
|
||||
const currentPage = pages[selectedPage] ?? null;
|
||||
const pageGroups = groupsByPage[selectedPage] ?? [];
|
||||
const pageImages = imagesByPage[selectedPage] ?? [];
|
||||
|
||||
const fontMetrics = useMemo(() => {
|
||||
const metrics = new Map<string, { unitsPerEm: number; ascent: number; descent: number }>();
|
||||
pdfDocument?.fonts?.forEach((font) => {
|
||||
if (!font?.id) {
|
||||
return;
|
||||
}
|
||||
const unitsPerEm = font.unitsPerEm && font.unitsPerEm > 0 ? font.unitsPerEm : 1000;
|
||||
const ascent = font.ascent ?? unitsPerEm;
|
||||
const descent = font.descent ?? -(unitsPerEm * 0.2);
|
||||
metrics.set(font.id, { unitsPerEm, ascent, descent });
|
||||
});
|
||||
return metrics;
|
||||
}, [pdfDocument?.fonts]);
|
||||
|
||||
useEffect(() => {
|
||||
if (typeof FontFace === 'undefined') {
|
||||
setFontFamilies(new Map());
|
||||
return undefined;
|
||||
}
|
||||
|
||||
let disposed = false;
|
||||
const active: { fontFace: FontFace; url?: string }[] = [];
|
||||
|
||||
const registerFonts = async () => {
|
||||
const fonts = pdfDocument?.fonts ?? [];
|
||||
if (fonts.length === 0) {
|
||||
setFontFamilies(new Map());
|
||||
return;
|
||||
}
|
||||
|
||||
const next = new Map<string, string>();
|
||||
for (const font of fonts) {
|
||||
if (!font?.id || !font.program) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
const format = normalizeFontFormat(font.programFormat);
|
||||
const data = decodeBase64ToUint8Array(font.program);
|
||||
const blob = new Blob([data as BlobPart], { type: getFontMimeType(format) });
|
||||
const url = URL.createObjectURL(blob);
|
||||
const formatHint = getFontFormatHint(format);
|
||||
const familyName = buildFontFamilyName(font);
|
||||
const source = formatHint ? `url(${url}) format('${formatHint}')` : `url(${url})`;
|
||||
const fontFace = new FontFace(familyName, source);
|
||||
await fontFace.load();
|
||||
if (disposed) {
|
||||
document.fonts.delete(fontFace);
|
||||
URL.revokeObjectURL(url);
|
||||
continue;
|
||||
}
|
||||
document.fonts.add(fontFace);
|
||||
active.push({ fontFace, url });
|
||||
next.set(font.id, familyName);
|
||||
} catch (error) {
|
||||
// Silently ignore font loading failures - embedded PDF fonts often lack web font tables
|
||||
// Fallback to web-safe fonts is already implemented via getFontFamily()
|
||||
}
|
||||
}
|
||||
|
||||
if (!disposed) {
|
||||
setFontFamilies(next);
|
||||
} else {
|
||||
active.forEach(({ fontFace, url }) => {
|
||||
document.fonts.delete(fontFace);
|
||||
if (url) {
|
||||
URL.revokeObjectURL(url);
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
registerFonts();
|
||||
|
||||
return () => {
|
||||
disposed = true;
|
||||
active.forEach(({ fontFace, url }) => {
|
||||
document.fonts.delete(fontFace);
|
||||
if (url) {
|
||||
URL.revokeObjectURL(url);
|
||||
}
|
||||
});
|
||||
};
|
||||
}, [pdfDocument?.fonts]);
|
||||
const visibleGroups = useMemo(
|
||||
() =>
|
||||
pageGroups.filter((group) => {
|
||||
@ -419,25 +618,33 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
<ScrollArea h="100%" offsetScrollbars>
|
||||
<Box
|
||||
style={{
|
||||
margin: '0 auto',
|
||||
background: '#f3f4f6',
|
||||
padding: '1.5rem',
|
||||
borderRadius: '0.75rem',
|
||||
display: 'flex',
|
||||
justifyContent: 'center',
|
||||
alignItems: 'flex-start',
|
||||
width: '100%',
|
||||
minHeight: '100%',
|
||||
}}
|
||||
onClick={handleBackgroundClick}
|
||||
>
|
||||
<Box
|
||||
style={{
|
||||
position: 'relative',
|
||||
width: `${scaledWidth}px`,
|
||||
height: `${scaledHeight}px`,
|
||||
backgroundColor: '#ffffff',
|
||||
boxShadow: '0 0 12px rgba(15, 23, 42, 0.12)',
|
||||
borderRadius: '0.5rem',
|
||||
overflow: 'hidden',
|
||||
background: '#f3f4f6',
|
||||
padding: '0.5rem',
|
||||
borderRadius: '0.75rem',
|
||||
}}
|
||||
ref={containerRef}
|
||||
onClick={handleBackgroundClick}
|
||||
>
|
||||
<Box
|
||||
style={{
|
||||
position: 'relative',
|
||||
width: `${scaledWidth}px`,
|
||||
height: `${scaledHeight}px`,
|
||||
backgroundColor: '#ffffff',
|
||||
boxShadow: '0 0 12px rgba(15, 23, 42, 0.12)',
|
||||
borderRadius: '0.5rem',
|
||||
overflow: 'hidden',
|
||||
}}
|
||||
ref={containerRef}
|
||||
>
|
||||
{orderedImages.map((image, imageIndex) => {
|
||||
if (!image?.imageData) {
|
||||
return null;
|
||||
@ -466,7 +673,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
setEditingGroupId(null);
|
||||
setActiveImageId(imageId);
|
||||
}}
|
||||
onDrag={(event, data) => {
|
||||
onDrag={(_event, data) => {
|
||||
emitImageTransform(
|
||||
imageId,
|
||||
data.x,
|
||||
@ -475,7 +682,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
cssHeight,
|
||||
);
|
||||
}}
|
||||
onDragStop={(event, data) => {
|
||||
onDragStop={(_event, data) => {
|
||||
emitImageTransform(
|
||||
imageId,
|
||||
data.x,
|
||||
@ -489,7 +696,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
setActiveGroupId(null);
|
||||
setEditingGroupId(null);
|
||||
}}
|
||||
onResize={(event, _direction, ref, _delta, position) => {
|
||||
onResize={(_event, _direction, ref, _delta, position) => {
|
||||
const nextWidth = parseFloat(ref.style.width);
|
||||
const nextHeight = parseFloat(ref.style.height);
|
||||
emitImageTransform(
|
||||
@ -500,7 +707,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
nextHeight,
|
||||
);
|
||||
}}
|
||||
onResizeStop={(event, _direction, ref, _delta, position) => {
|
||||
onResizeStop={(_event, _direction, ref, _delta, position) => {
|
||||
const nextWidth = parseFloat(ref.style.width);
|
||||
const nextHeight = parseFloat(ref.style.height);
|
||||
emitImageTransform(
|
||||
@ -567,21 +774,48 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
const baseFontSize = group.fontMatrixSize ?? group.fontSize ?? 12;
|
||||
const fontSizePx = Math.max(baseFontSize * scale, 6);
|
||||
const fontFamily = getFontFamily(group.fontId);
|
||||
const lineHeightPx = getLineHeightPx(group.fontId, fontSizePx);
|
||||
const lineHeightRatio = fontSizePx > 0 ? Math.max(lineHeightPx / fontSizePx, 1.05) : 1.2;
|
||||
const hasRotation = group.rotation != null && Math.abs(group.rotation) > 0.5;
|
||||
const baselineLength = group.baselineLength ?? Math.max(group.bounds.right - group.bounds.left, 0);
|
||||
|
||||
const visualHeight = Math.max(bounds.height, fontSizePx * 1.2);
|
||||
let containerLeft = bounds.left;
|
||||
let containerTop = bounds.top;
|
||||
let containerWidth = Math.max(bounds.width, fontSizePx);
|
||||
let containerHeight = Math.max(bounds.height, lineHeightPx);
|
||||
let transform: string | undefined;
|
||||
let transformOrigin: React.CSSProperties['transformOrigin'];
|
||||
|
||||
if (hasRotation) {
|
||||
const anchorX = group.anchor?.x ?? group.bounds.left;
|
||||
const anchorY = group.anchor?.y ?? group.bounds.bottom;
|
||||
containerLeft = anchorX * scale;
|
||||
containerTop = Math.max(pageHeight - anchorY, 0) * scale;
|
||||
containerWidth = Math.max(baselineLength * scale, MIN_BOX_SIZE);
|
||||
containerHeight = Math.max(lineHeightPx, fontSizePx * lineHeightRatio);
|
||||
transformOrigin = 'left bottom';
|
||||
// Negate rotation because Y-axis is flipped from PDF to web coordinates
|
||||
transform = `rotate(${-group.rotation}deg)`;
|
||||
}
|
||||
|
||||
// Extract styling from group
|
||||
const textColor = group.color || '#111827';
|
||||
const fontWeight = group.fontWeight || getFontWeight(group.fontId);
|
||||
|
||||
const containerStyle: React.CSSProperties = {
|
||||
position: 'absolute',
|
||||
left: `${bounds.left}px`,
|
||||
top: `${bounds.top}px`,
|
||||
width: `${bounds.width}px`,
|
||||
height: `${visualHeight}px`,
|
||||
left: `${containerLeft}px`,
|
||||
top: `${containerTop}px`,
|
||||
width: `${containerWidth}px`,
|
||||
height: `${containerHeight}px`,
|
||||
display: 'flex',
|
||||
alignItems: 'flex-start',
|
||||
justifyContent: 'flex-start',
|
||||
pointerEvents: 'auto',
|
||||
cursor: 'text',
|
||||
zIndex: 2_000_000,
|
||||
transform,
|
||||
transformOrigin,
|
||||
};
|
||||
|
||||
if (isEditing) {
|
||||
@ -628,17 +862,17 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
style={{
|
||||
width: '100%',
|
||||
height: '100%',
|
||||
padding: '3px 4px',
|
||||
padding: 0,
|
||||
backgroundColor: 'rgba(255,255,255,0.95)',
|
||||
color: '#111827',
|
||||
color: textColor,
|
||||
fontSize: `${fontSizePx}px`,
|
||||
fontFamily,
|
||||
lineHeight: 1.25,
|
||||
fontWeight,
|
||||
lineHeight: lineHeightRatio,
|
||||
outline: 'none',
|
||||
border: 'none',
|
||||
display: 'block',
|
||||
whiteSpace: 'pre-wrap',
|
||||
overflowWrap: 'anywhere',
|
||||
whiteSpace: 'nowrap',
|
||||
cursor: 'text',
|
||||
overflow: 'visible',
|
||||
}}
|
||||
@ -660,12 +894,13 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
style={{
|
||||
width: '100%',
|
||||
minHeight: '100%',
|
||||
padding: '2px 4px',
|
||||
whiteSpace: 'pre-wrap',
|
||||
padding: 0,
|
||||
whiteSpace: 'nowrap',
|
||||
fontSize: `${fontSizePx}px`,
|
||||
fontFamily,
|
||||
lineHeight: 1.25,
|
||||
color: '#111827',
|
||||
fontWeight,
|
||||
lineHeight: lineHeightRatio,
|
||||
color: textColor,
|
||||
display: 'block',
|
||||
cursor: 'text',
|
||||
overflow: 'visible',
|
||||
@ -682,6 +917,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
);
|
||||
})
|
||||
)}
|
||||
</Box>
|
||||
</Box>
|
||||
</Box>
|
||||
</ScrollArea>
|
||||
@ -689,48 +925,61 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
|
||||
<Card padding="md" withBorder radius="md">
|
||||
<Stack gap="xs">
|
||||
<Text fw={500}>{t('pdfJsonEditor.groupList', 'Detected Text Groups')}</Text>
|
||||
<Divider />
|
||||
<ScrollArea h={180} offsetScrollbars>
|
||||
<Stack gap="sm">
|
||||
{visibleGroups.map((group) => {
|
||||
const changed = group.text !== group.originalText;
|
||||
return (
|
||||
<Card
|
||||
key={`list-${group.id}`}
|
||||
padding="sm"
|
||||
radius="md"
|
||||
withBorder
|
||||
shadow={changed ? 'sm' : 'none'}
|
||||
onMouseEnter={() => setActiveGroupId(group.id)}
|
||||
onMouseLeave={() => setActiveGroupId((current) => (current === group.id ? null : current))}
|
||||
style={{ cursor: 'pointer' }}
|
||||
onClick={() => {
|
||||
setActiveGroupId(group.id);
|
||||
setEditingGroupId(group.id);
|
||||
}}
|
||||
>
|
||||
<Stack gap={4}>
|
||||
<Group gap="xs">
|
||||
{changed && <Badge color="yellow" size="xs">{t('pdfJsonEditor.badges.modified', 'Edited')}</Badge>}
|
||||
{group.fontId && (
|
||||
<Badge size="xs" variant="outline">{group.fontId}</Badge>
|
||||
)}
|
||||
{group.fontSize && (
|
||||
<Badge size="xs" variant="light">
|
||||
{t('pdfJsonEditor.fontSizeValue', '{{size}}pt', { size: group.fontSize.toFixed(1) })}
|
||||
</Badge>
|
||||
)}
|
||||
</Group>
|
||||
<Text size="sm" c="dimmed" lineClamp={2}>
|
||||
{group.text || t('pdfJsonEditor.emptyGroup', '[Empty Group]')}
|
||||
</Text>
|
||||
</Stack>
|
||||
</Card>
|
||||
);
|
||||
})}
|
||||
<Group justify="space-between" align="center">
|
||||
<Text fw={500}>{t('pdfJsonEditor.groupList', 'Detected Text Groups')}</Text>
|
||||
<ActionIcon
|
||||
variant="subtle"
|
||||
onClick={() => setTextGroupsExpanded(!textGroupsExpanded)}
|
||||
aria-label={textGroupsExpanded ? 'Collapse' : 'Expand'}
|
||||
>
|
||||
{textGroupsExpanded ? <ExpandLessIcon /> : <ExpandMoreIcon />}
|
||||
</ActionIcon>
|
||||
</Group>
|
||||
<Collapse in={textGroupsExpanded}>
|
||||
<Stack gap="xs">
|
||||
<Divider />
|
||||
<ScrollArea h={180} offsetScrollbars>
|
||||
<Stack gap="sm">
|
||||
{visibleGroups.map((group) => {
|
||||
const changed = group.text !== group.originalText;
|
||||
return (
|
||||
<Card
|
||||
key={`list-${group.id}`}
|
||||
padding="sm"
|
||||
radius="md"
|
||||
withBorder
|
||||
shadow={changed ? 'sm' : 'none'}
|
||||
onMouseEnter={() => setActiveGroupId(group.id)}
|
||||
onMouseLeave={() => setActiveGroupId((current) => (current === group.id ? null : current))}
|
||||
style={{ cursor: 'pointer' }}
|
||||
onClick={() => {
|
||||
setActiveGroupId(group.id);
|
||||
setEditingGroupId(group.id);
|
||||
}}
|
||||
>
|
||||
<Stack gap={4}>
|
||||
<Group gap="xs">
|
||||
{changed && <Badge color="yellow" size="xs">{t('pdfJsonEditor.badges.modified', 'Edited')}</Badge>}
|
||||
{group.fontId && (
|
||||
<Badge size="xs" variant="outline">{group.fontId}</Badge>
|
||||
)}
|
||||
{group.fontSize && (
|
||||
<Badge size="xs" variant="light">
|
||||
{t('pdfJsonEditor.fontSizeValue', '{{size}}pt', { size: group.fontSize.toFixed(1) })}
|
||||
</Badge>
|
||||
)}
|
||||
</Group>
|
||||
<Text size="sm" c="dimmed" lineClamp={2}>
|
||||
{group.text || t('pdfJsonEditor.emptyGroup', '[Empty Group]')}
|
||||
</Text>
|
||||
</Stack>
|
||||
</Card>
|
||||
);
|
||||
})}
|
||||
</Stack>
|
||||
</ScrollArea>
|
||||
</Stack>
|
||||
</ScrollArea>
|
||||
</Collapse>
|
||||
</Stack>
|
||||
</Card>
|
||||
</Stack>
|
||||
|
||||
@ -23,6 +23,12 @@ export interface PdfJsonFont {
|
||||
toUnicode?: string | null;
|
||||
standard14Name?: string | null;
|
||||
fontDescriptorFlags?: number | null;
|
||||
ascent?: number | null;
|
||||
descent?: number | null;
|
||||
capHeight?: number | null;
|
||||
xHeight?: number | null;
|
||||
italicAngle?: number | null;
|
||||
unitsPerEm?: number | null;
|
||||
}
|
||||
|
||||
export interface PdfJsonTextElement {
|
||||
@ -117,6 +123,11 @@ export interface TextGroup {
|
||||
fontId?: string | null;
|
||||
fontSize?: number | null;
|
||||
fontMatrixSize?: number | null;
|
||||
color?: string | null;
|
||||
fontWeight?: number | 'normal' | 'bold' | null;
|
||||
rotation?: number | null;
|
||||
anchor?: { x: number; y: number } | null;
|
||||
baselineLength?: number | null;
|
||||
elements: PdfJsonTextElement[];
|
||||
originalElements: PdfJsonTextElement[];
|
||||
text: string;
|
||||
|
||||
@ -69,9 +69,15 @@ const getHeight = (element: PdfJsonTextElement): number => {
|
||||
const getElementBounds = (element: PdfJsonTextElement): BoundingBox => {
|
||||
const left = getX(element);
|
||||
const width = getWidth(element);
|
||||
const bottom = getBaseline(element);
|
||||
const baseline = getBaseline(element);
|
||||
const height = getHeight(element);
|
||||
const top = bottom - height;
|
||||
// In PDF coordinates, baseline is where text sits
|
||||
// Typical typography: ~80% of height above baseline (ascenders), ~20% below (descenders)
|
||||
// Using codebase's inverted naming: bottom (visual top) > top (visual bottom)
|
||||
const ascent = height * 0.8;
|
||||
const descent = height * 0.2;
|
||||
const bottom = baseline + ascent; // Visual top of text
|
||||
const top = baseline - descent; // Visual bottom (includes descenders)
|
||||
return {
|
||||
left,
|
||||
right: left + width,
|
||||
@ -181,6 +187,136 @@ const buildGroupText = (elements: PdfJsonTextElement[]): string => {
|
||||
return result;
|
||||
};
|
||||
|
||||
const rgbToCss = (components: number[]): string => {
|
||||
if (components.length >= 3) {
|
||||
const r = Math.round(Math.max(0, Math.min(1, components[0])) * 255);
|
||||
const g = Math.round(Math.max(0, Math.min(1, components[1])) * 255);
|
||||
const b = Math.round(Math.max(0, Math.min(1, components[2])) * 255);
|
||||
return `rgb(${r}, ${g}, ${b})`;
|
||||
}
|
||||
return 'rgb(0, 0, 0)';
|
||||
};
|
||||
|
||||
const cmykToCss = (components: number[]): string => {
|
||||
if (components.length >= 4) {
|
||||
const c = Math.max(0, Math.min(1, components[0]));
|
||||
const m = Math.max(0, Math.min(1, components[1]));
|
||||
const y = Math.max(0, Math.min(1, components[2]));
|
||||
const k = Math.max(0, Math.min(1, components[3]));
|
||||
const r = Math.round(255 * (1 - c) * (1 - k));
|
||||
const g = Math.round(255 * (1 - m) * (1 - k));
|
||||
const b = Math.round(255 * (1 - y) * (1 - k));
|
||||
return `rgb(${r}, ${g}, ${b})`;
|
||||
}
|
||||
return 'rgb(0, 0, 0)';
|
||||
};
|
||||
|
||||
const grayToCss = (components: number[]): string => {
|
||||
if (components.length >= 1) {
|
||||
const gray = Math.round(Math.max(0, Math.min(1, components[0])) * 255);
|
||||
return `rgb(${gray}, ${gray}, ${gray})`;
|
||||
}
|
||||
return 'rgb(0, 0, 0)';
|
||||
};
|
||||
|
||||
const extractColor = (element: PdfJsonTextElement): string | null => {
|
||||
const fillColor = element.fillColor;
|
||||
if (!fillColor || !fillColor.components || fillColor.components.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const colorSpace = (fillColor.colorSpace ?? '').toLowerCase();
|
||||
|
||||
if (colorSpace.includes('rgb') || colorSpace.includes('srgb')) {
|
||||
return rgbToCss(fillColor.components);
|
||||
}
|
||||
if (colorSpace.includes('cmyk')) {
|
||||
return cmykToCss(fillColor.components);
|
||||
}
|
||||
if (colorSpace.includes('gray') || colorSpace.includes('grey')) {
|
||||
return grayToCss(fillColor.components);
|
||||
}
|
||||
|
||||
// Default to RGB interpretation
|
||||
if (fillColor.components.length >= 3) {
|
||||
return rgbToCss(fillColor.components);
|
||||
}
|
||||
if (fillColor.components.length === 1) {
|
||||
return grayToCss(fillColor.components);
|
||||
}
|
||||
|
||||
return null;
|
||||
};
|
||||
|
||||
const RAD_TO_DEG = 180 / Math.PI;
|
||||
|
||||
const normalizeAngle = (angle: number): number => {
|
||||
let normalized = angle % 360;
|
||||
if (normalized > 180) {
|
||||
normalized -= 360;
|
||||
} else if (normalized <= -180) {
|
||||
normalized += 360;
|
||||
}
|
||||
return normalized;
|
||||
};
|
||||
|
||||
const extractElementRotation = (element: PdfJsonTextElement): number | null => {
|
||||
const matrix = element.textMatrix;
|
||||
if (!matrix || matrix.length !== 6) {
|
||||
return null;
|
||||
}
|
||||
const a = matrix[0];
|
||||
const b = matrix[1];
|
||||
if (Math.abs(a) < 1e-6 && Math.abs(b) < 1e-6) {
|
||||
return null;
|
||||
}
|
||||
const angle = Math.atan2(b, a) * RAD_TO_DEG;
|
||||
if (Math.abs(angle) < 0.5) {
|
||||
return null;
|
||||
}
|
||||
return normalizeAngle(angle);
|
||||
};
|
||||
|
||||
const computeGroupRotation = (elements: PdfJsonTextElement[]): number | null => {
|
||||
const angles = elements
|
||||
.map(extractElementRotation)
|
||||
.filter((angle): angle is number => angle !== null);
|
||||
if (angles.length === 0) {
|
||||
return null;
|
||||
}
|
||||
const vector = angles.reduce(
|
||||
(acc, angle) => {
|
||||
const radians = (angle * Math.PI) / 180;
|
||||
acc.x += Math.cos(radians);
|
||||
acc.y += Math.sin(radians);
|
||||
return acc;
|
||||
},
|
||||
{ x: 0, y: 0 },
|
||||
);
|
||||
if (Math.abs(vector.x) < 1e-6 && Math.abs(vector.y) < 1e-6) {
|
||||
return null;
|
||||
}
|
||||
const average = Math.atan2(vector.y, vector.x) * RAD_TO_DEG;
|
||||
const normalized = normalizeAngle(average);
|
||||
return Math.abs(normalized) < 0.5 ? null : normalized;
|
||||
};
|
||||
|
||||
const getAnchorPoint = (element: PdfJsonTextElement): { x: number; y: number } => {
|
||||
if (element.textMatrix && element.textMatrix.length === 6) {
|
||||
return {
|
||||
x: valueOr(element.textMatrix[4]),
|
||||
y: valueOr(element.textMatrix[5]),
|
||||
};
|
||||
}
|
||||
return {
|
||||
x: valueOr(element.x),
|
||||
y: valueOr(element.y),
|
||||
};
|
||||
};
|
||||
|
||||
const computeBaselineLength = (elements: PdfJsonTextElement[]): number =>
|
||||
elements.reduce((acc, current) => acc + getWidth(current), 0);
|
||||
|
||||
const createGroup = (
|
||||
pageIndex: number,
|
||||
idSuffix: number,
|
||||
@ -189,13 +325,22 @@ const createGroup = (
|
||||
const clones = elements.map(cloneTextElement);
|
||||
const originalClones = clones.map(cloneTextElement);
|
||||
const bounds = mergeBounds(elements.map(getElementBounds));
|
||||
const firstElement = elements[0];
|
||||
const rotation = computeGroupRotation(elements);
|
||||
const anchor = rotation !== null ? getAnchorPoint(firstElement) : null;
|
||||
const baselineLength = computeBaselineLength(elements);
|
||||
|
||||
return {
|
||||
id: `${pageIndex}-${idSuffix}`,
|
||||
pageIndex,
|
||||
fontId: elements[0]?.fontId,
|
||||
fontSize: elements[0]?.fontSize,
|
||||
fontMatrixSize: elements[0]?.fontMatrixSize,
|
||||
fontId: firstElement?.fontId,
|
||||
fontSize: firstElement?.fontSize,
|
||||
fontMatrixSize: firstElement?.fontMatrixSize,
|
||||
color: firstElement ? extractColor(firstElement) : null,
|
||||
fontWeight: null, // Will be determined from font descriptor
|
||||
rotation,
|
||||
anchor,
|
||||
baselineLength,
|
||||
elements: clones,
|
||||
originalElements: originalClones,
|
||||
text: buildGroupText(elements),
|
||||
@ -253,7 +398,18 @@ export const groupPageTextElements = (page: PdfJsonPage | null | undefined, page
|
||||
const splitThreshold = Math.max(SPACE_MIN_GAP, avgFontSize * GAP_FACTOR);
|
||||
|
||||
const sameFont = previous.fontId === element.fontId;
|
||||
const shouldSplit = gap > splitThreshold * (sameFont ? 1.4 : 1.0);
|
||||
let shouldSplit = gap > splitThreshold * (sameFont ? 1.4 : 1.0);
|
||||
|
||||
const previousRotation = extractElementRotation(previous);
|
||||
const currentRotation = extractElementRotation(element);
|
||||
if (
|
||||
shouldSplit &&
|
||||
previousRotation !== null &&
|
||||
currentRotation !== null &&
|
||||
Math.abs(normalizeAngle(previousRotation - currentRotation)) < 1
|
||||
) {
|
||||
shouldSplit = false;
|
||||
}
|
||||
|
||||
if (shouldSplit) {
|
||||
groups.push(createGroup(pageIndex, groupCounter, currentBucket));
|
||||
|
||||
Loading…
Reference in New Issue
Block a user