mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-11-16 01:21:16 +01:00
test
This commit is contained in:
parent
5780b3a119
commit
4d9cf45009
@ -31,7 +31,8 @@ public class ConvertPdfJsonController {
|
||||
description =
|
||||
"Extracts PDF text, fonts, and metadata into an editable JSON structure that can be"
|
||||
+ " transformed back into a PDF. Input:PDF Output:JSON Type:SISO")
|
||||
public ResponseEntity<byte[]> convertPdfToJson(@ModelAttribute PDFFile request) throws Exception {
|
||||
public ResponseEntity<byte[]> convertPdfToJson(@ModelAttribute PDFFile request)
|
||||
throws Exception {
|
||||
MultipartFile inputFile = request.getFileInput();
|
||||
if (inputFile == null) {
|
||||
throw ExceptionUtils.createNullArgumentException("fileInput");
|
||||
@ -44,8 +45,7 @@ public class ConvertPdfJsonController {
|
||||
? Filenames.toSimpleFileName(originalName).replaceFirst("[.][^.]+$", "")
|
||||
: "document";
|
||||
String docName = baseName + ".json";
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
jsonBytes, docName, MediaType.APPLICATION_JSON);
|
||||
return WebResponseUtils.bytesToWebResponse(jsonBytes, docName, MediaType.APPLICATION_JSON);
|
||||
}
|
||||
|
||||
@AutoJobPostMapping(consumes = "multipart/form-data", value = "/json/pdf")
|
||||
@ -55,7 +55,8 @@ public class ConvertPdfJsonController {
|
||||
description =
|
||||
"Rebuilds a PDF from the editable JSON structure generated by the PDF to JSON"
|
||||
+ " endpoint. Input:JSON Output:PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> convertJsonToPdf(@ModelAttribute GeneralFile request) throws Exception {
|
||||
public ResponseEntity<byte[]> convertJsonToPdf(@ModelAttribute GeneralFile request)
|
||||
throws Exception {
|
||||
MultipartFile jsonFile = request.getFileInput();
|
||||
if (jsonFile == null) {
|
||||
throw ExceptionUtils.createNullArgumentException("fileInput");
|
||||
|
||||
@ -0,0 +1,49 @@
|
||||
package stirling.software.SPDF.model.json;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public class PdfJsonCosValue {
|
||||
|
||||
public enum Type {
|
||||
NULL,
|
||||
BOOLEAN,
|
||||
INTEGER,
|
||||
FLOAT,
|
||||
NAME,
|
||||
STRING,
|
||||
ARRAY,
|
||||
DICTIONARY,
|
||||
STREAM
|
||||
}
|
||||
|
||||
private Type type;
|
||||
|
||||
/**
|
||||
* Holds the decoded value for primitives (boolean, integer, float, name, string). For name
|
||||
* values the stored value is the PDF name literal. For string values the content is Base64
|
||||
* encoded to safely transport arbitrary binaries.
|
||||
*/
|
||||
private Object value;
|
||||
|
||||
/** Reference to nested values for arrays. */
|
||||
private List<PdfJsonCosValue> items;
|
||||
|
||||
/** Reference to nested values for dictionaries. */
|
||||
private Map<String, PdfJsonCosValue> entries;
|
||||
|
||||
/** Stream payload when {@code type == STREAM}. */
|
||||
private PdfJsonStream stream;
|
||||
}
|
||||
@ -19,6 +19,9 @@ public class PdfJsonDocument {
|
||||
|
||||
private PdfJsonMetadata metadata;
|
||||
|
||||
/** Optional XMP metadata packet stored as Base64. */
|
||||
private String xmpMetadata;
|
||||
|
||||
@Builder.Default private List<PdfJsonFont> fonts = new ArrayList<>();
|
||||
|
||||
@Builder.Default private List<PdfJsonPage> pages = new ArrayList<>();
|
||||
|
||||
@ -14,12 +14,42 @@ import lombok.NoArgsConstructor;
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public class PdfJsonFont {
|
||||
|
||||
/** PDF resource name (e.g. F1) used as the primary identifier. */
|
||||
private String id;
|
||||
private String name;
|
||||
|
||||
/** Logical page number that owns this font resource. */
|
||||
private Integer pageNumber;
|
||||
|
||||
/** Stable UID combining page number and resource for diagnostics. */
|
||||
private String uid;
|
||||
|
||||
/** Reported PostScript/Base font name. */
|
||||
private String baseName;
|
||||
|
||||
/** Declared subtype in the COS dictionary. */
|
||||
private String subtype;
|
||||
|
||||
/** Encoding dictionary or name. */
|
||||
private String encoding;
|
||||
|
||||
/** CID system info for Type0 fonts. */
|
||||
private PdfJsonFontCidSystemInfo cidSystemInfo;
|
||||
|
||||
/** True when the original PDF embedded the font program. */
|
||||
private Boolean embedded;
|
||||
|
||||
/** Font program bytes (TTF/OTF/CFF/PFB) encoded as Base64. */
|
||||
private String program;
|
||||
|
||||
/** Hint describing the font program type (ttf, otf, cff, pfb, etc.). */
|
||||
private String programFormat;
|
||||
|
||||
/** ToUnicode stream encoded as Base64 when present. */
|
||||
private String toUnicode;
|
||||
|
||||
/** Mapped Standard 14 font name when available. */
|
||||
private String standard14Name;
|
||||
|
||||
/** Font descriptor flags copied from the source document. */
|
||||
private Integer fontDescriptorFlags;
|
||||
private String base64Data;
|
||||
}
|
||||
|
||||
@ -0,0 +1,20 @@
|
||||
package stirling.software.SPDF.model.json;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public class PdfJsonFontCidSystemInfo {
|
||||
|
||||
private String registry;
|
||||
private String ordering;
|
||||
private Integer supplement;
|
||||
}
|
||||
@ -23,4 +23,10 @@ public class PdfJsonPage {
|
||||
private Integer rotation;
|
||||
|
||||
@Builder.Default private List<PdfJsonTextElement> textElements = new ArrayList<>();
|
||||
|
||||
/** Serialized representation of the page resources dictionary. */
|
||||
private PdfJsonCosValue resources;
|
||||
|
||||
/** Raw content streams associated with the page, preserved for lossless round-tripping. */
|
||||
@Builder.Default private List<PdfJsonStream> contentStreams = new ArrayList<>();
|
||||
}
|
||||
|
||||
@ -0,0 +1,27 @@
|
||||
package stirling.software.SPDF.model.json;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public class PdfJsonStream {
|
||||
|
||||
/**
|
||||
* A dictionary of entries that describe the stream metadata (Filter, DecodeParms, etc). Each
|
||||
* entry is represented using {@link PdfJsonCosValue} so nested structures are supported.
|
||||
*/
|
||||
private Map<String, PdfJsonCosValue> dictionary;
|
||||
|
||||
/** Raw stream bytes in Base64 form. Data is stored exactly as it appeared in the source PDF. */
|
||||
private String rawData;
|
||||
}
|
||||
@ -20,6 +20,8 @@ public class PdfJsonTextElement {
|
||||
private String text;
|
||||
private String fontId;
|
||||
private Float fontSize;
|
||||
private Float fontMatrixSize;
|
||||
private Float fontSizeInPt;
|
||||
private Float x;
|
||||
private Float y;
|
||||
private Float width;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -7,6 +7,7 @@ logging.level.org.eclipse.jetty=WARN
|
||||
#logging.level.org.opensaml=DEBUG
|
||||
#logging.level.stirling.software.proprietary.security=DEBUG
|
||||
logging.level.com.zaxxer.hikari=WARN
|
||||
logging.level.stirling.software.SPDF.service.PdfJsonConversionService=TRACE
|
||||
spring.jpa.open-in-view=false
|
||||
server.forward-headers-strategy=NATIVE
|
||||
server.error.path=/error
|
||||
|
||||
43
compare_json.py
Normal file
43
compare_json.py
Normal file
@ -0,0 +1,43 @@
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
if len(sys.argv) != 3:
|
||||
print('Usage: compare_json.py <file1> <file2>')
|
||||
sys.exit(1)
|
||||
|
||||
path1, path2 = map(Path, sys.argv[1:])
|
||||
|
||||
def load(path):
|
||||
with path.open('r', encoding='utf-8') as fh:
|
||||
return json.load(fh)
|
||||
|
||||
doc1 = load(path1)
|
||||
doc2 = load(path2)
|
||||
|
||||
if doc1 == doc2:
|
||||
print('Documents identical')
|
||||
sys.exit(0)
|
||||
|
||||
pages1 = doc1.get('pages', [])
|
||||
pages2 = doc2.get('pages', [])
|
||||
|
||||
for page_index, (p1, p2) in enumerate(zip(pages1, pages2), start=1):
|
||||
elems1 = p1.get('textElements') or []
|
||||
elems2 = p2.get('textElements') or []
|
||||
if len(elems1) != len(elems2):
|
||||
print(f'Page {page_index}: element count {len(elems1)} vs {len(elems2)}')
|
||||
diff_found = False
|
||||
for elem_index, (e1, e2) in enumerate(zip(elems1, elems2)):
|
||||
if e1 == e2:
|
||||
continue
|
||||
diff_found = True
|
||||
print(f'Page {page_index} element {elem_index} differs')
|
||||
common_keys = sorted(set(e1) | set(e2))
|
||||
for key in common_keys:
|
||||
if e1.get(key) != e2.get(key):
|
||||
print(f' {key}: {e1.get(key)!r} -> {e2.get(key)!r}')
|
||||
break
|
||||
if diff_found:
|
||||
break
|
||||
|
||||
@ -4006,5 +4006,8 @@
|
||||
"finish": "Finish",
|
||||
"startTour": "Start Tour",
|
||||
"startTourDescription": "Take a guided tour of Stirling PDF's key features"
|
||||
},
|
||||
"pdfJsonEditor": {
|
||||
"viewLabel": "JSON Editor"
|
||||
}
|
||||
}
|
||||
|
||||
@ -0,0 +1,463 @@
|
||||
import React, { useEffect, useMemo, useState } from 'react';
|
||||
import {
|
||||
Alert,
|
||||
Badge,
|
||||
Box,
|
||||
Button,
|
||||
Card,
|
||||
Divider,
|
||||
FileButton,
|
||||
Group,
|
||||
Pagination,
|
||||
ScrollArea,
|
||||
Stack,
|
||||
Text,
|
||||
Title,
|
||||
} from '@mantine/core';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import DescriptionIcon from '@mui/icons-material/DescriptionOutlined';
|
||||
import FileDownloadIcon from '@mui/icons-material/FileDownloadOutlined';
|
||||
import PictureAsPdfIcon from '@mui/icons-material/PictureAsPdfOutlined';
|
||||
import AutorenewIcon from '@mui/icons-material/Autorenew';
|
||||
import WarningAmberIcon from '@mui/icons-material/WarningAmber';
|
||||
import UploadIcon from '@mui/icons-material/Upload';
|
||||
|
||||
import {
|
||||
PdfJsonEditorViewData,
|
||||
PdfJsonPage,
|
||||
} from '../../../tools/pdfJsonEditorTypes';
|
||||
import { pageDimensions } from '../../../tools/pdfJsonEditorUtils';
|
||||
|
||||
const MAX_RENDER_WIDTH = 820;
|
||||
const MIN_BOX_SIZE = 18;
|
||||
|
||||
interface PdfJsonEditorViewProps {
|
||||
data: PdfJsonEditorViewData;
|
||||
}
|
||||
|
||||
const toCssBounds = (
|
||||
page: PdfJsonPage | null | undefined,
|
||||
pageHeight: number,
|
||||
scale: number,
|
||||
bounds: { left: number; right: number; top: number; bottom: number },
|
||||
) => {
|
||||
const width = Math.max(bounds.right - bounds.left, 1);
|
||||
const height = Math.max(bounds.bottom - bounds.top, 1);
|
||||
const scaledWidth = Math.max(width * scale, MIN_BOX_SIZE);
|
||||
const scaledHeight = Math.max(height * scale, MIN_BOX_SIZE / 2);
|
||||
const top = Math.max(pageHeight - bounds.bottom, 0) * scale;
|
||||
|
||||
return {
|
||||
left: bounds.left * scale,
|
||||
top,
|
||||
width: scaledWidth,
|
||||
height: scaledHeight,
|
||||
};
|
||||
};
|
||||
|
||||
const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
const { t } = useTranslation();
|
||||
const [activeGroupId, setActiveGroupId] = useState<string | null>(null);
|
||||
const [editingGroupId, setEditingGroupId] = useState<string | null>(null);
|
||||
|
||||
const {
|
||||
document: pdfDocument,
|
||||
groupsByPage,
|
||||
selectedPage,
|
||||
dirtyPages,
|
||||
hasDocument,
|
||||
fileName,
|
||||
errorMessage,
|
||||
isGeneratingPdf,
|
||||
hasChanges,
|
||||
onLoadJson,
|
||||
onSelectPage,
|
||||
onGroupEdit,
|
||||
onReset,
|
||||
onDownloadJson,
|
||||
onGeneratePdf,
|
||||
} = data;
|
||||
|
||||
const pages = pdfDocument?.pages ?? [];
|
||||
const currentPage = pages[selectedPage] ?? null;
|
||||
const pageGroups = groupsByPage[selectedPage] ?? [];
|
||||
const visibleGroups = useMemo(
|
||||
() =>
|
||||
pageGroups.filter((group) => {
|
||||
const hasContent = ((group.text ?? '').trim().length > 0) || ((group.originalText ?? '').trim().length > 0);
|
||||
return hasContent || editingGroupId === group.id;
|
||||
}),
|
||||
[editingGroupId, pageGroups]
|
||||
);
|
||||
|
||||
const { width: pageWidth, height: pageHeight } = pageDimensions(currentPage);
|
||||
const scale = useMemo(() => Math.min(MAX_RENDER_WIDTH / pageWidth, 1.5), [pageWidth]);
|
||||
const scaledWidth = pageWidth * scale;
|
||||
const scaledHeight = pageHeight * scale;
|
||||
|
||||
useEffect(() => {
|
||||
setActiveGroupId(null);
|
||||
setEditingGroupId(null);
|
||||
}, [selectedPage]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!editingGroupId) {
|
||||
return;
|
||||
}
|
||||
const editor = document.querySelector<HTMLElement>(`[data-editor-group="${editingGroupId}"]`);
|
||||
if (editor) {
|
||||
editor.focus();
|
||||
const selection = window.getSelection();
|
||||
if (selection) {
|
||||
selection.removeAllRanges();
|
||||
const range = document.createRange();
|
||||
range.selectNodeContents(editor);
|
||||
range.collapse(false);
|
||||
selection.addRange(range);
|
||||
}
|
||||
}
|
||||
}, [editingGroupId]);
|
||||
|
||||
const handlePageChange = (pageNumber: number) => {
|
||||
setActiveGroupId(null);
|
||||
setEditingGroupId(null);
|
||||
onSelectPage(pageNumber - 1);
|
||||
};
|
||||
|
||||
const handleBackgroundClick = () => {
|
||||
setEditingGroupId(null);
|
||||
setActiveGroupId(null);
|
||||
};
|
||||
|
||||
const renderGroupContainer = (
|
||||
groupId: string,
|
||||
isActive: boolean,
|
||||
isChanged: boolean,
|
||||
content: React.ReactNode,
|
||||
onActivate?: (event: React.MouseEvent) => void,
|
||||
) => (
|
||||
<Box
|
||||
component="div"
|
||||
style={{
|
||||
width: '100%',
|
||||
height: '100%',
|
||||
border: isActive
|
||||
? '2px solid var(--mantine-color-blue-5)'
|
||||
: isChanged
|
||||
? '1px solid var(--mantine-color-yellow-5)'
|
||||
: '1px solid transparent',
|
||||
borderRadius: 6,
|
||||
backgroundColor: isChanged || isActive ? 'rgba(250,255,189,0.28)' : 'transparent',
|
||||
transition: 'border 120ms ease, background-color 120ms ease',
|
||||
pointerEvents: 'auto',
|
||||
overflow: 'hidden',
|
||||
display: 'flex',
|
||||
alignItems: 'flex-start',
|
||||
justifyContent: 'flex-start',
|
||||
padding: 0,
|
||||
}}
|
||||
onClick={(event) => {
|
||||
event.stopPropagation();
|
||||
onActivate?.(event);
|
||||
}}
|
||||
onMouseEnter={() => setActiveGroupId(groupId)}
|
||||
onMouseLeave={() => {
|
||||
if (editingGroupId !== groupId) {
|
||||
setActiveGroupId((current) => (current === groupId ? null : current));
|
||||
}
|
||||
}}
|
||||
>
|
||||
{content}
|
||||
</Box>
|
||||
);
|
||||
|
||||
return (
|
||||
<Stack gap="xl" className="h-full" style={{ padding: '1.5rem', overflow: 'auto' }}>
|
||||
<Card withBorder radius="md" shadow="xs" padding="lg">
|
||||
<Stack gap="sm">
|
||||
<Group justify="space-between" align="center">
|
||||
<Group gap="xs" align="center">
|
||||
<DescriptionIcon fontSize="small" />
|
||||
<Title order={3}>{t('pdfJsonEditor.title', 'PDF JSON Editor')}</Title>
|
||||
{hasChanges && <Badge color="yellow" size="sm">{t('pdfJsonEditor.badges.unsaved', 'Edited')}</Badge>}
|
||||
</Group>
|
||||
<Group gap="sm">
|
||||
<FileButton onChange={onLoadJson} accept="application/json">
|
||||
{(props) => (
|
||||
<Button variant="light" leftSection={<UploadIcon fontSize="small" />} {...props}>
|
||||
{t('pdfJsonEditor.actions.load', 'Load JSON')}
|
||||
</Button>
|
||||
)}
|
||||
</FileButton>
|
||||
<Button
|
||||
variant="subtle"
|
||||
leftSection={<AutorenewIcon fontSize="small" />}
|
||||
onClick={onReset}
|
||||
disabled={!hasDocument}
|
||||
>
|
||||
{t('pdfJsonEditor.actions.reset', 'Reset Changes')}
|
||||
</Button>
|
||||
<Button
|
||||
variant="default"
|
||||
leftSection={<FileDownloadIcon fontSize="small" />}
|
||||
onClick={onDownloadJson}
|
||||
disabled={!hasDocument}
|
||||
>
|
||||
{t('pdfJsonEditor.actions.downloadJson', 'Download JSON')}
|
||||
</Button>
|
||||
<Button
|
||||
leftSection={<PictureAsPdfIcon fontSize="small" />}
|
||||
onClick={onGeneratePdf}
|
||||
loading={isGeneratingPdf}
|
||||
disabled={!hasDocument || !hasChanges}
|
||||
>
|
||||
{t('pdfJsonEditor.actions.generatePdf', 'Generate PDF')}
|
||||
</Button>
|
||||
</Group>
|
||||
</Group>
|
||||
|
||||
{fileName && (
|
||||
<Text size="sm" c="dimmed">
|
||||
{t('pdfJsonEditor.currentFile', 'Current file: {{name}}', { name: fileName })}
|
||||
</Text>
|
||||
)}
|
||||
</Stack>
|
||||
</Card>
|
||||
|
||||
{errorMessage && (
|
||||
<Alert icon={<WarningAmberIcon fontSize="small" />} color="red" radius="md">
|
||||
{errorMessage}
|
||||
</Alert>
|
||||
)}
|
||||
|
||||
{!hasDocument && (
|
||||
<Card withBorder radius="md" padding="xl">
|
||||
<Stack align="center" gap="md">
|
||||
<DescriptionIcon sx={{ fontSize: 48 }} />
|
||||
<Text size="lg" fw={600}>
|
||||
{t('pdfJsonEditor.empty.title', 'No JSON loaded yet')}
|
||||
</Text>
|
||||
<Text size="sm" c="dimmed" ta="center" maw={420}>
|
||||
{t('pdfJsonEditor.empty.subtitle', 'Use the Load JSON button above to open a file generated by the PDF → JSON converter.')}
|
||||
</Text>
|
||||
</Stack>
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{hasDocument && (
|
||||
<Stack gap="lg" className="flex-1" style={{ minHeight: 0 }}>
|
||||
<Group justify="space-between" align="center">
|
||||
<Group gap="sm">
|
||||
<Text fw={500}>
|
||||
{t('pdfJsonEditor.pageSummary', 'Page {{number}} of {{total}}', {
|
||||
number: selectedPage + 1,
|
||||
total: pages.length,
|
||||
})}
|
||||
</Text>
|
||||
{dirtyPages[selectedPage] && (
|
||||
<Badge color="yellow" size="xs">
|
||||
{t('pdfJsonEditor.badges.modified', 'Edited')}
|
||||
</Badge>
|
||||
)}
|
||||
</Group>
|
||||
{pages.length > 1 && (
|
||||
<Pagination
|
||||
value={selectedPage + 1}
|
||||
onChange={handlePageChange}
|
||||
total={pages.length}
|
||||
size="sm"
|
||||
/>
|
||||
)}
|
||||
</Group>
|
||||
|
||||
<Card withBorder padding="md" radius="md" shadow="xs" style={{ flex: 1, minHeight: 0 }}>
|
||||
<ScrollArea h="100%" offsetScrollbars>
|
||||
<Box
|
||||
style={{
|
||||
margin: '0 auto',
|
||||
background: '#f3f4f6',
|
||||
padding: '1.5rem',
|
||||
borderRadius: '0.75rem',
|
||||
}}
|
||||
onClick={handleBackgroundClick}
|
||||
>
|
||||
<Box
|
||||
style={{
|
||||
position: 'relative',
|
||||
width: `${scaledWidth}px`,
|
||||
height: `${scaledHeight}px`,
|
||||
backgroundColor: '#ffffff',
|
||||
boxShadow: '0 0 12px rgba(15, 23, 42, 0.12)',
|
||||
borderRadius: '0.5rem',
|
||||
overflow: 'hidden',
|
||||
}}
|
||||
>
|
||||
{visibleGroups.length === 0 ? (
|
||||
<Group justify="center" align="center" style={{ height: '100%' }}>
|
||||
<Stack gap={4} align="center">
|
||||
<Text size="sm" c="dimmed">
|
||||
{t('pdfJsonEditor.noTextOnPage', 'No editable text was detected on this page.')}
|
||||
</Text>
|
||||
</Stack>
|
||||
</Group>
|
||||
) : (
|
||||
visibleGroups.map((group) => {
|
||||
const bounds = toCssBounds(currentPage, pageHeight, scale, group.bounds);
|
||||
const changed = group.text !== group.originalText;
|
||||
const isActive = activeGroupId === group.id || editingGroupId === group.id;
|
||||
const isEditing = editingGroupId === group.id;
|
||||
const fontSizePx = Math.max((group.fontSize ?? 12) * scale, 8);
|
||||
|
||||
const visualHeight = Math.max(bounds.height, fontSizePx * 1.35);
|
||||
|
||||
const containerStyle: React.CSSProperties = {
|
||||
position: 'absolute',
|
||||
left: `${bounds.left}px`,
|
||||
top: `${bounds.top}px`,
|
||||
width: `${bounds.width}px`,
|
||||
height: `${visualHeight}px`,
|
||||
display: 'flex',
|
||||
alignItems: 'flex-start',
|
||||
justifyContent: 'flex-start',
|
||||
pointerEvents: 'auto',
|
||||
cursor: 'text',
|
||||
};
|
||||
|
||||
const commonProps = {
|
||||
key: group.id,
|
||||
style: containerStyle,
|
||||
};
|
||||
|
||||
if (isEditing) {
|
||||
return (
|
||||
<Box {...commonProps}>
|
||||
{renderGroupContainer(
|
||||
group.id,
|
||||
true,
|
||||
changed,
|
||||
<div
|
||||
contentEditable
|
||||
suppressContentEditableWarning
|
||||
data-editor-group={group.id}
|
||||
onBlur={(event) => {
|
||||
const value = event.currentTarget.innerText.replace(/\u00A0/g, ' ');
|
||||
onGroupEdit(group.pageIndex, group.id, value);
|
||||
setEditingGroupId(null);
|
||||
}}
|
||||
onInput={(event) => {
|
||||
const value = event.currentTarget.innerText.replace(/\u00A0/g, ' ');
|
||||
onGroupEdit(group.pageIndex, group.id, value);
|
||||
}}
|
||||
style={{
|
||||
width: '100%',
|
||||
height: '100%',
|
||||
padding: '3px 4px',
|
||||
backgroundColor: 'rgba(255,255,255,0.95)',
|
||||
color: '#111827',
|
||||
fontSize: `${fontSizePx}px`,
|
||||
lineHeight: 1.25,
|
||||
outline: 'none',
|
||||
border: 'none',
|
||||
display: 'block',
|
||||
whiteSpace: 'pre-wrap',
|
||||
overflowWrap: 'anywhere',
|
||||
cursor: 'text',
|
||||
}}
|
||||
>
|
||||
{group.text || '\u00A0'}
|
||||
</div>,
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<Box
|
||||
{...commonProps}
|
||||
>
|
||||
{renderGroupContainer(
|
||||
group.id,
|
||||
isActive,
|
||||
changed,
|
||||
<div
|
||||
style={{
|
||||
width: '100%',
|
||||
minHeight: '100%',
|
||||
padding: '2px 4px',
|
||||
whiteSpace: 'pre-wrap',
|
||||
fontSize: `${fontSizePx}px`,
|
||||
lineHeight: 1.25,
|
||||
color: '#111827',
|
||||
display: 'block',
|
||||
cursor: 'text',
|
||||
}}
|
||||
>
|
||||
<span style={{ pointerEvents: 'none' }}>{group.text || '\u00A0'}</span>
|
||||
</div>,
|
||||
() => {
|
||||
setEditingGroupId(group.id);
|
||||
setActiveGroupId(group.id);
|
||||
},
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
})
|
||||
)}
|
||||
</Box>
|
||||
</Box>
|
||||
</ScrollArea>
|
||||
</Card>
|
||||
|
||||
<Card padding="md" withBorder radius="md">
|
||||
<Stack gap="xs">
|
||||
<Text fw={500}>{t('pdfJsonEditor.groupList', 'Detected Text Groups')}</Text>
|
||||
<Divider />
|
||||
<ScrollArea h={180} offsetScrollbars>
|
||||
<Stack gap="sm">
|
||||
{visibleGroups.map((group) => {
|
||||
const changed = group.text !== group.originalText;
|
||||
return (
|
||||
<Card
|
||||
key={`list-${group.id}`}
|
||||
padding="sm"
|
||||
radius="md"
|
||||
withBorder
|
||||
shadow={changed ? 'sm' : 'none'}
|
||||
onMouseEnter={() => setActiveGroupId(group.id)}
|
||||
onMouseLeave={() => setActiveGroupId((current) => (current === group.id ? null : current))}
|
||||
style={{ cursor: 'pointer' }}
|
||||
onClick={() => {
|
||||
setActiveGroupId(group.id);
|
||||
setEditingGroupId(group.id);
|
||||
}}
|
||||
>
|
||||
<Stack gap={4}>
|
||||
<Group gap="xs">
|
||||
{changed && <Badge color="yellow" size="xs">{t('pdfJsonEditor.badges.modified', 'Edited')}</Badge>}
|
||||
{group.fontId && (
|
||||
<Badge size="xs" variant="outline">{group.fontId}</Badge>
|
||||
)}
|
||||
{group.fontSize && (
|
||||
<Badge size="xs" variant="light">
|
||||
{t('pdfJsonEditor.fontSizeValue', '{{size}}pt', { size: group.fontSize.toFixed(1) })}
|
||||
</Badge>
|
||||
)}
|
||||
</Group>
|
||||
<Text size="sm" c="dimmed" lineClamp={2}>
|
||||
{group.text || t('pdfJsonEditor.emptyGroup', '[Empty Group]')}
|
||||
</Text>
|
||||
</Stack>
|
||||
</Card>
|
||||
);
|
||||
})}
|
||||
</Stack>
|
||||
</ScrollArea>
|
||||
</Stack>
|
||||
</Card>
|
||||
</Stack>
|
||||
)}
|
||||
</Stack>
|
||||
);
|
||||
};
|
||||
|
||||
export default PdfJsonEditorView;
|
||||
@ -31,7 +31,9 @@ export const CONVERSION_ENDPOINTS = {
|
||||
'pdf-pdfa': '/api/v1/convert/pdf/pdfa',
|
||||
'html-pdf': '/api/v1/convert/html/pdf',
|
||||
'markdown-pdf': '/api/v1/convert/markdown/pdf',
|
||||
'eml-pdf': '/api/v1/convert/eml/pdf'
|
||||
'eml-pdf': '/api/v1/convert/eml/pdf',
|
||||
'pdf-json': '/api/v1/convert/pdf/json',
|
||||
'json-pdf': '/api/v1/convert/json/pdf'
|
||||
} as const;
|
||||
|
||||
export const ENDPOINT_NAMES = {
|
||||
@ -48,7 +50,9 @@ export const ENDPOINT_NAMES = {
|
||||
'pdf-pdfa': 'pdf-to-pdfa',
|
||||
'html-pdf': 'html-to-pdf',
|
||||
'markdown-pdf': 'markdown-to-pdf',
|
||||
'eml-pdf': 'eml-to-pdf'
|
||||
'eml-pdf': 'eml-to-pdf',
|
||||
'pdf-json': 'pdf-to-json',
|
||||
'json-pdf': 'json-to-pdf'
|
||||
} as const;
|
||||
|
||||
|
||||
@ -80,6 +84,7 @@ export const FROM_FORMAT_OPTIONS = [
|
||||
{ value: 'txt', label: 'TXT', group: 'Text' },
|
||||
{ value: 'rtf', label: 'RTF', group: 'Text' },
|
||||
{ value: 'eml', label: 'EML', group: 'Email' },
|
||||
{ value: 'json', label: 'JSON', group: 'Data' },
|
||||
];
|
||||
|
||||
export const TO_FORMAT_OPTIONS = [
|
||||
@ -101,13 +106,14 @@ export const TO_FORMAT_OPTIONS = [
|
||||
{ value: 'webp', label: 'WEBP', group: 'Image' },
|
||||
{ value: 'html', label: 'HTML', group: 'Web' },
|
||||
{ value: 'xml', label: 'XML', group: 'Web' },
|
||||
{ value: 'json', label: 'JSON', group: 'Data' },
|
||||
];
|
||||
|
||||
// Conversion matrix - what each source format can convert to
|
||||
export const CONVERSION_MATRIX: Record<string, string[]> = {
|
||||
'any': ['pdf'], // Mixed files always convert to PDF
|
||||
'image': ['pdf'], // Multiple images always convert to PDF
|
||||
'pdf': ['png', 'jpg', 'gif', 'tiff', 'bmp', 'webp', 'docx', 'odt', 'pptx', 'odp', 'csv', 'txt', 'rtf', 'md', 'html', 'xml', 'pdfa'],
|
||||
'pdf': ['png', 'jpg', 'gif', 'tiff', 'bmp', 'webp', 'docx', 'odt', 'pptx', 'odp', 'csv', 'txt', 'rtf', 'md', 'html', 'xml', 'pdfa', 'json'],
|
||||
'docx': ['pdf'], 'doc': ['pdf'], 'odt': ['pdf'],
|
||||
'xlsx': ['pdf'], 'xls': ['pdf'], 'ods': ['pdf'],
|
||||
'pptx': ['pdf'], 'ppt': ['pdf'], 'odp': ['pdf'],
|
||||
@ -116,7 +122,8 @@ export const CONVERSION_MATRIX: Record<string, string[]> = {
|
||||
'zip': ['pdf'],
|
||||
'md': ['pdf'],
|
||||
'txt': ['pdf'], 'rtf': ['pdf'],
|
||||
'eml': ['pdf']
|
||||
'eml': ['pdf'],
|
||||
'json': ['pdf']
|
||||
};
|
||||
|
||||
// Map extensions to endpoint keys
|
||||
@ -130,7 +137,8 @@ export const EXTENSION_TO_ENDPOINT: Record<string, Record<string, string>> = {
|
||||
'csv': 'pdf-to-csv',
|
||||
'txt': 'pdf-to-text', 'rtf': 'pdf-to-text', 'md': 'pdf-to-markdown',
|
||||
'html': 'pdf-to-html', 'xml': 'pdf-to-xml',
|
||||
'pdfa': 'pdf-to-pdfa'
|
||||
'pdfa': 'pdf-to-pdfa',
|
||||
'json': 'pdf-to-json'
|
||||
},
|
||||
'docx': { 'pdf': 'file-to-pdf' }, 'doc': { 'pdf': 'file-to-pdf' }, 'odt': { 'pdf': 'file-to-pdf' },
|
||||
'xlsx': { 'pdf': 'file-to-pdf' }, 'xls': { 'pdf': 'file-to-pdf' }, 'ods': { 'pdf': 'file-to-pdf' },
|
||||
@ -141,7 +149,8 @@ export const EXTENSION_TO_ENDPOINT: Record<string, Record<string, string>> = {
|
||||
'zip': { 'pdf': 'html-to-pdf' },
|
||||
'md': { 'pdf': 'markdown-to-pdf' },
|
||||
'txt': { 'pdf': 'file-to-pdf' }, 'rtf': { 'pdf': 'file-to-pdf' },
|
||||
'eml': { 'pdf': 'eml-to-pdf' }
|
||||
'eml': { 'pdf': 'eml-to-pdf' },
|
||||
'json': { 'pdf': 'json-to-pdf' }
|
||||
};
|
||||
|
||||
export type ColorType = typeof COLOR_TYPES[keyof typeof COLOR_TYPES];
|
||||
|
||||
@ -5,7 +5,7 @@ export const CONVERT_SUPPORTED_FORMATS = [
|
||||
// OpenDocument
|
||||
'odt', 'ott', 'ods', 'ots', 'odp', 'otp', 'odg', 'otg',
|
||||
// Text formats
|
||||
'txt', 'text', 'xml', 'rtf', 'html', 'lwp', 'md',
|
||||
'txt', 'text', 'xml', 'rtf', 'html', 'lwp', 'md', 'json',
|
||||
// Images
|
||||
'bmp', 'gif', 'jpeg', 'jpg', 'png', 'tif', 'tiff', 'pbm', 'pgm', 'ppm', 'ras', 'xbm', 'xpm', 'svg', 'svm', 'wmf', 'webp',
|
||||
// StarOffice
|
||||
|
||||
@ -5,6 +5,7 @@ import SplitPdfPanel from "../tools/Split";
|
||||
import CompressPdfPanel from "../tools/Compress";
|
||||
import OCRPanel from "../tools/OCR";
|
||||
import ConvertPanel from "../tools/Convert";
|
||||
import PdfJsonEditor from "../tools/PdfJsonEditor";
|
||||
import Sanitize from "../tools/Sanitize";
|
||||
import AddPassword from "../tools/AddPassword";
|
||||
import ChangePermissions from "../tools/ChangePermissions";
|
||||
@ -710,6 +711,19 @@ export function useTranslatedToolCatalog(): TranslatedToolCatalog {
|
||||
supportsAutomate: false,
|
||||
automationSettings: null
|
||||
},
|
||||
pdfJsonEditor: {
|
||||
icon: <LocalIcon icon="code-rounded" width="1.5rem" height="1.5rem" />,
|
||||
name: t("home.pdfJsonEditor.title", "PDF JSON Editor"),
|
||||
component: PdfJsonEditor,
|
||||
description: t("home.pdfJsonEditor.desc", "Review and edit Stirling PDF JSON exports with grouped text editing and PDF regeneration"),
|
||||
categoryId: ToolCategoryId.ADVANCED_TOOLS,
|
||||
subcategoryId: SubcategoryId.DEVELOPER_TOOLS,
|
||||
workbench: 'custom:pdfJsonEditor',
|
||||
endpoints: ["json-pdf"],
|
||||
synonyms: getSynonyms(t, "pdfJsonEditor"),
|
||||
supportsAutomate: false,
|
||||
automationSettings: null
|
||||
},
|
||||
devApi: {
|
||||
icon: <LocalIcon icon="open-in-new-rounded" width="1.5rem" height="1.5rem" style={{ color: "#2F7BF6" }} />,
|
||||
name: t("home.devApi.title", "API"),
|
||||
|
||||
289
frontend/src/tools/PdfJsonEditor.tsx
Normal file
289
frontend/src/tools/PdfJsonEditor.tsx
Normal file
@ -0,0 +1,289 @@
|
||||
import { useCallback, useEffect, useMemo, useState, useRef } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import DescriptionIcon from '@mui/icons-material/DescriptionOutlined';
|
||||
|
||||
import { useToolWorkflow } from '../contexts/ToolWorkflowContext';
|
||||
import { useNavigationActions, useNavigationState } from '../contexts/NavigationContext';
|
||||
import { BaseToolProps, ToolComponent } from '../types/tool';
|
||||
import { CONVERSION_ENDPOINTS } from '../constants/convertConstants';
|
||||
import apiClient from '../services/apiClient';
|
||||
import { downloadBlob, downloadTextAsFile } from '../utils/downloadUtils';
|
||||
import { getFilenameFromHeaders } from '../utils/fileResponseUtils';
|
||||
import {
|
||||
PdfJsonDocument,
|
||||
TextGroup,
|
||||
PdfJsonEditorViewData,
|
||||
} from './pdfJsonEditorTypes';
|
||||
import {
|
||||
deepCloneDocument,
|
||||
getDirtyPages,
|
||||
groupDocumentText,
|
||||
restoreGlyphElements,
|
||||
} from './pdfJsonEditorUtils';
|
||||
import PdfJsonEditorView from '../components/tools/pdfJsonEditor/PdfJsonEditorView';
|
||||
|
||||
const VIEW_ID = 'pdfJsonEditorView';
|
||||
const WORKBENCH_ID = 'custom:pdfJsonEditor' as const;
|
||||
|
||||
const sanitizeBaseName = (name?: string | null): string => {
|
||||
if (!name || name.trim().length === 0) {
|
||||
return 'document';
|
||||
}
|
||||
return name.replace(/\.[^.]+$/u, '');
|
||||
};
|
||||
|
||||
const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
const { t } = useTranslation();
|
||||
const {
|
||||
registerCustomWorkbenchView,
|
||||
unregisterCustomWorkbenchView,
|
||||
setCustomWorkbenchViewData,
|
||||
clearCustomWorkbenchViewData,
|
||||
setLeftPanelView,
|
||||
} = useToolWorkflow();
|
||||
const { actions: navigationActions } = useNavigationActions();
|
||||
const navigationState = useNavigationState();
|
||||
|
||||
const [loadedDocument, setLoadedDocument] = useState<PdfJsonDocument | null>(null);
|
||||
const [groupsByPage, setGroupsByPage] = useState<TextGroup[][]>([]);
|
||||
const [selectedPage, setSelectedPage] = useState(0);
|
||||
const [fileName, setFileName] = useState('');
|
||||
const [errorMessage, setErrorMessage] = useState<string | null>(null);
|
||||
const [isGeneratingPdf, setIsGeneratingPdf] = useState(false);
|
||||
|
||||
const dirtyPages = useMemo(() => getDirtyPages(groupsByPage), [groupsByPage]);
|
||||
const hasChanges = useMemo(() => dirtyPages.some(Boolean), [dirtyPages]);
|
||||
const hasDocument = loadedDocument !== null;
|
||||
const viewLabel = useMemo(() => t('pdfJsonEditor.viewLabel', 'JSON Editor'), [t]);
|
||||
|
||||
const resetToDocument = useCallback((document: PdfJsonDocument | null) => {
|
||||
if (!document) {
|
||||
setGroupsByPage([]);
|
||||
setSelectedPage(0);
|
||||
return;
|
||||
}
|
||||
const cloned = deepCloneDocument(document);
|
||||
const groups = groupDocumentText(cloned);
|
||||
setGroupsByPage(groups);
|
||||
setSelectedPage(0);
|
||||
}, []);
|
||||
|
||||
const handleLoadFile = useCallback(
|
||||
async (file: File | null) => {
|
||||
if (!file) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const content = await file.text();
|
||||
const parsed = JSON.parse(content) as PdfJsonDocument;
|
||||
setLoadedDocument(parsed);
|
||||
resetToDocument(parsed);
|
||||
setFileName(file.name);
|
||||
setErrorMessage(null);
|
||||
} catch (error) {
|
||||
console.error('Failed to parse JSON', error);
|
||||
setLoadedDocument(null);
|
||||
setGroupsByPage([]);
|
||||
setErrorMessage(
|
||||
t(
|
||||
'pdfJsonEditor.errors.invalidJson',
|
||||
'Unable to read the JSON file. Ensure it was generated by the PDF to JSON tool.'
|
||||
)
|
||||
);
|
||||
}
|
||||
},
|
||||
[resetToDocument, t]
|
||||
);
|
||||
|
||||
const handleSelectPage = useCallback((pageIndex: number) => {
|
||||
setSelectedPage(pageIndex);
|
||||
}, []);
|
||||
|
||||
const handleGroupTextChange = useCallback((pageIndex: number, groupId: string, value: string) => {
|
||||
setGroupsByPage((previous) =>
|
||||
previous.map((groups, idx) =>
|
||||
idx !== pageIndex
|
||||
? groups
|
||||
: groups.map((group) => (group.id === groupId ? { ...group, text: value } : group))
|
||||
)
|
||||
);
|
||||
}, []);
|
||||
|
||||
const handleResetEdits = useCallback(() => {
|
||||
if (!loadedDocument) {
|
||||
return;
|
||||
}
|
||||
resetToDocument(loadedDocument);
|
||||
setErrorMessage(null);
|
||||
}, [loadedDocument, resetToDocument]);
|
||||
|
||||
const buildPayload = useCallback(() => {
|
||||
if (!loadedDocument) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const updatedDocument = restoreGlyphElements(loadedDocument, groupsByPage);
|
||||
const baseName = sanitizeBaseName(fileName || loadedDocument.metadata?.title || undefined);
|
||||
return {
|
||||
document: updatedDocument,
|
||||
filename: `${baseName}.json`,
|
||||
};
|
||||
}, [fileName, groupsByPage, loadedDocument]);
|
||||
|
||||
const handleDownloadJson = useCallback(() => {
|
||||
const payload = buildPayload();
|
||||
if (!payload) {
|
||||
return;
|
||||
}
|
||||
|
||||
const { document, filename } = payload;
|
||||
const serialized = JSON.stringify(document, null, 2);
|
||||
downloadTextAsFile(serialized, filename, 'application/json');
|
||||
|
||||
if (onComplete) {
|
||||
const exportedFile = new File([serialized], filename, { type: 'application/json' });
|
||||
onComplete([exportedFile]);
|
||||
}
|
||||
}, [buildPayload, onComplete]);
|
||||
|
||||
const handleGeneratePdf = useCallback(async () => {
|
||||
const payload = buildPayload();
|
||||
if (!payload) {
|
||||
return;
|
||||
}
|
||||
|
||||
const { document, filename } = payload;
|
||||
const serialized = JSON.stringify(document, null, 2);
|
||||
const jsonFile = new File([serialized], filename, { type: 'application/json' });
|
||||
|
||||
const formData = new FormData();
|
||||
formData.append('fileInput', jsonFile);
|
||||
|
||||
try {
|
||||
setIsGeneratingPdf(true);
|
||||
const response = await apiClient.post(CONVERSION_ENDPOINTS['json-pdf'], formData, {
|
||||
responseType: 'blob',
|
||||
});
|
||||
|
||||
const contentDisposition = response.headers?.['content-disposition'] ?? '';
|
||||
const detectedName = getFilenameFromHeaders(contentDisposition);
|
||||
const baseName = sanitizeBaseName(filename).replace(/-edited$/u, '');
|
||||
const downloadName = detectedName || `${baseName || 'document'}.pdf`;
|
||||
|
||||
downloadBlob(response.data, downloadName);
|
||||
|
||||
if (onComplete) {
|
||||
const pdfFile = new File([response.data], downloadName, { type: 'application/pdf' });
|
||||
onComplete([pdfFile]);
|
||||
}
|
||||
setErrorMessage(null);
|
||||
} catch (error: any) {
|
||||
console.error('Failed to convert JSON back to PDF', error);
|
||||
const message =
|
||||
error?.response?.data ||
|
||||
error?.message ||
|
||||
t('pdfJsonEditor.errors.pdfConversion', 'Unable to convert the edited JSON back into a PDF.');
|
||||
const msgString = typeof message === 'string' ? message : String(message);
|
||||
setErrorMessage(msgString);
|
||||
if (onError) {
|
||||
onError(msgString);
|
||||
}
|
||||
} finally {
|
||||
setIsGeneratingPdf(false);
|
||||
}
|
||||
}, [buildPayload, onComplete, onError, t]);
|
||||
|
||||
const viewData = useMemo<PdfJsonEditorViewData>(() => ({
|
||||
document: loadedDocument,
|
||||
groupsByPage,
|
||||
selectedPage,
|
||||
dirtyPages,
|
||||
hasDocument,
|
||||
fileName,
|
||||
errorMessage,
|
||||
isGeneratingPdf,
|
||||
hasChanges,
|
||||
onLoadJson: handleLoadFile,
|
||||
onSelectPage: handleSelectPage,
|
||||
onGroupEdit: handleGroupTextChange,
|
||||
onReset: handleResetEdits,
|
||||
onDownloadJson: handleDownloadJson,
|
||||
onGeneratePdf: handleGeneratePdf,
|
||||
}), [
|
||||
dirtyPages,
|
||||
errorMessage,
|
||||
fileName,
|
||||
groupsByPage,
|
||||
handleDownloadJson,
|
||||
handleGeneratePdf,
|
||||
handleGroupTextChange,
|
||||
handleLoadFile,
|
||||
handleResetEdits,
|
||||
handleSelectPage,
|
||||
hasChanges,
|
||||
hasDocument,
|
||||
isGeneratingPdf,
|
||||
loadedDocument,
|
||||
selectedPage,
|
||||
]);
|
||||
|
||||
const latestViewDataRef = useRef<PdfJsonEditorViewData>(viewData);
|
||||
latestViewDataRef.current = viewData;
|
||||
|
||||
useEffect(() => {
|
||||
registerCustomWorkbenchView({
|
||||
id: VIEW_ID,
|
||||
workbenchId: WORKBENCH_ID,
|
||||
label: viewLabel,
|
||||
icon: <DescriptionIcon fontSize="small" />,
|
||||
component: PdfJsonEditorView,
|
||||
});
|
||||
setLeftPanelView('hidden');
|
||||
setCustomWorkbenchViewData(VIEW_ID, latestViewDataRef.current);
|
||||
|
||||
return () => {
|
||||
clearCustomWorkbenchViewData(VIEW_ID);
|
||||
unregisterCustomWorkbenchView(VIEW_ID);
|
||||
setLeftPanelView('toolPicker');
|
||||
};
|
||||
}, [
|
||||
clearCustomWorkbenchViewData,
|
||||
registerCustomWorkbenchView,
|
||||
setCustomWorkbenchViewData,
|
||||
setLeftPanelView,
|
||||
viewLabel,
|
||||
unregisterCustomWorkbenchView,
|
||||
]);
|
||||
|
||||
useEffect(() => {
|
||||
if (
|
||||
navigationState.selectedTool === 'pdfJsonEditor' &&
|
||||
navigationState.workbench !== WORKBENCH_ID
|
||||
) {
|
||||
navigationActions.setWorkbench(WORKBENCH_ID);
|
||||
}
|
||||
}, [navigationActions, navigationState.selectedTool, navigationState.workbench]);
|
||||
|
||||
const lastSentViewDataRef = useRef<PdfJsonEditorViewData | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (lastSentViewDataRef.current === viewData) {
|
||||
return;
|
||||
}
|
||||
lastSentViewDataRef.current = viewData;
|
||||
setCustomWorkbenchViewData(VIEW_ID, viewData);
|
||||
}, [setCustomWorkbenchViewData, viewData]);
|
||||
|
||||
// All editing happens in the custom workbench view.
|
||||
return null;
|
||||
};
|
||||
|
||||
(PdfJsonEditor as ToolComponent).tool = () => {
|
||||
throw new Error('PDF JSON Editor does not support automation operations.');
|
||||
};
|
||||
|
||||
(PdfJsonEditor as ToolComponent).getDefaultParameters = () => ({
|
||||
groups: [],
|
||||
});
|
||||
|
||||
export default PdfJsonEditor as ToolComponent;
|
||||
110
frontend/src/tools/pdfJsonEditorTypes.ts
Normal file
110
frontend/src/tools/pdfJsonEditorTypes.ts
Normal file
@ -0,0 +1,110 @@
|
||||
export interface PdfJsonFontCidSystemInfo {
|
||||
registry?: string | null;
|
||||
ordering?: string | null;
|
||||
supplement?: number | null;
|
||||
}
|
||||
|
||||
export interface PdfJsonFont {
|
||||
id?: string;
|
||||
pageNumber?: number | null;
|
||||
uid?: string | null;
|
||||
baseName?: string | null;
|
||||
subtype?: string | null;
|
||||
encoding?: string | null;
|
||||
cidSystemInfo?: PdfJsonFontCidSystemInfo | null;
|
||||
embedded?: boolean | null;
|
||||
program?: string | null;
|
||||
programFormat?: string | null;
|
||||
toUnicode?: string | null;
|
||||
standard14Name?: string | null;
|
||||
fontDescriptorFlags?: number | null;
|
||||
}
|
||||
|
||||
export interface PdfJsonTextElement {
|
||||
text?: string | null;
|
||||
fontId?: string | null;
|
||||
fontSize?: number | null;
|
||||
fontMatrixSize?: number | null;
|
||||
fontSizeInPt?: number | null;
|
||||
renderingMode?: number | null;
|
||||
x?: number | null;
|
||||
y?: number | null;
|
||||
width?: number | null;
|
||||
height?: number | null;
|
||||
textMatrix?: number[] | null;
|
||||
}
|
||||
|
||||
export interface PdfJsonStream {
|
||||
dictionary?: Record<string, unknown> | null;
|
||||
rawData?: string | null;
|
||||
}
|
||||
|
||||
export interface PdfJsonPage {
|
||||
pageNumber?: number | null;
|
||||
width?: number | null;
|
||||
height?: number | null;
|
||||
rotation?: number | null;
|
||||
textElements?: PdfJsonTextElement[] | null;
|
||||
resources?: unknown;
|
||||
contentStreams?: PdfJsonStream[] | null;
|
||||
}
|
||||
|
||||
export interface PdfJsonMetadata {
|
||||
title?: string | null;
|
||||
author?: string | null;
|
||||
subject?: string | null;
|
||||
keywords?: string | null;
|
||||
creator?: string | null;
|
||||
producer?: string | null;
|
||||
creationDate?: string | null;
|
||||
modificationDate?: string | null;
|
||||
trapped?: string | null;
|
||||
numberOfPages?: number | null;
|
||||
}
|
||||
|
||||
export interface PdfJsonDocument {
|
||||
metadata?: PdfJsonMetadata | null;
|
||||
xmpMetadata?: string | null;
|
||||
fonts?: PdfJsonFont[] | null;
|
||||
pages?: PdfJsonPage[] | null;
|
||||
}
|
||||
|
||||
export interface BoundingBox {
|
||||
left: number;
|
||||
right: number;
|
||||
top: number;
|
||||
bottom: number;
|
||||
}
|
||||
|
||||
export interface TextGroup {
|
||||
id: string;
|
||||
pageIndex: number;
|
||||
fontId?: string | null;
|
||||
fontSize?: number | null;
|
||||
elements: PdfJsonTextElement[];
|
||||
originalElements: PdfJsonTextElement[];
|
||||
text: string;
|
||||
originalText: string;
|
||||
bounds: BoundingBox;
|
||||
}
|
||||
|
||||
export const DEFAULT_PAGE_WIDTH = 612;
|
||||
export const DEFAULT_PAGE_HEIGHT = 792;
|
||||
|
||||
export interface PdfJsonEditorViewData {
|
||||
document: PdfJsonDocument | null;
|
||||
groupsByPage: TextGroup[][];
|
||||
selectedPage: number;
|
||||
dirtyPages: boolean[];
|
||||
hasDocument: boolean;
|
||||
fileName: string;
|
||||
errorMessage: string | null;
|
||||
isGeneratingPdf: boolean;
|
||||
hasChanges: boolean;
|
||||
onLoadJson: (file: File | null) => Promise<void> | void;
|
||||
onSelectPage: (pageIndex: number) => void;
|
||||
onGroupEdit: (pageIndex: number, groupId: string, value: string) => void;
|
||||
onReset: () => void;
|
||||
onDownloadJson: () => void;
|
||||
onGeneratePdf: () => void;
|
||||
}
|
||||
344
frontend/src/tools/pdfJsonEditorUtils.ts
Normal file
344
frontend/src/tools/pdfJsonEditorUtils.ts
Normal file
@ -0,0 +1,344 @@
|
||||
import {
|
||||
BoundingBox,
|
||||
PdfJsonDocument,
|
||||
PdfJsonPage,
|
||||
PdfJsonTextElement,
|
||||
TextGroup,
|
||||
DEFAULT_PAGE_HEIGHT,
|
||||
DEFAULT_PAGE_WIDTH,
|
||||
} from './pdfJsonEditorTypes';
|
||||
|
||||
const LINE_TOLERANCE = 2;
|
||||
const GAP_FACTOR = 0.6;
|
||||
const SPACE_MIN_GAP = 1.5;
|
||||
|
||||
export const valueOr = (value: number | null | undefined, fallback = 0): number => {
|
||||
if (value === null || value === undefined || Number.isNaN(value)) {
|
||||
return fallback;
|
||||
}
|
||||
return value;
|
||||
};
|
||||
|
||||
export const cloneTextElement = (element: PdfJsonTextElement): PdfJsonTextElement => ({
|
||||
...element,
|
||||
textMatrix: element.textMatrix ? [...element.textMatrix] : element.textMatrix ?? undefined,
|
||||
});
|
||||
|
||||
const getBaseline = (element: PdfJsonTextElement): number => {
|
||||
if (element.textMatrix && element.textMatrix.length === 6) {
|
||||
return valueOr(element.textMatrix[5]);
|
||||
}
|
||||
return valueOr(element.y);
|
||||
};
|
||||
|
||||
const getX = (element: PdfJsonTextElement): number => {
|
||||
if (element.textMatrix && element.textMatrix.length === 6) {
|
||||
return valueOr(element.textMatrix[4]);
|
||||
}
|
||||
return valueOr(element.x);
|
||||
};
|
||||
|
||||
const getWidth = (element: PdfJsonTextElement): number => {
|
||||
const width = valueOr(element.width, 0);
|
||||
if (width === 0 && element.text) {
|
||||
const fontSize = valueOr(element.fontSize, 12);
|
||||
return fontSize * Math.max(element.text.length * 0.45, 0.5);
|
||||
}
|
||||
return width;
|
||||
};
|
||||
|
||||
const getFontSize = (element: PdfJsonTextElement): number => valueOr(element.fontSize, 12);
|
||||
|
||||
const getHeight = (element: PdfJsonTextElement): number => {
|
||||
const height = valueOr(element.height);
|
||||
if (height === 0) {
|
||||
return getFontSize(element) * 1.05;
|
||||
}
|
||||
return height;
|
||||
};
|
||||
|
||||
const getElementBounds = (element: PdfJsonTextElement): BoundingBox => {
|
||||
const left = getX(element);
|
||||
const width = getWidth(element);
|
||||
const bottom = getBaseline(element);
|
||||
const height = getHeight(element);
|
||||
const top = bottom - height;
|
||||
return {
|
||||
left,
|
||||
right: left + width,
|
||||
top,
|
||||
bottom,
|
||||
};
|
||||
};
|
||||
|
||||
const mergeBounds = (bounds: BoundingBox[]): BoundingBox => {
|
||||
if (bounds.length === 0) {
|
||||
return { left: 0, right: 0, top: 0, bottom: 0 };
|
||||
}
|
||||
return bounds.reduce(
|
||||
(acc, current) => ({
|
||||
left: Math.min(acc.left, current.left),
|
||||
right: Math.max(acc.right, current.right),
|
||||
top: Math.min(acc.top, current.top),
|
||||
bottom: Math.max(acc.bottom, current.bottom),
|
||||
}),
|
||||
{ ...bounds[0] }
|
||||
);
|
||||
};
|
||||
|
||||
const shouldInsertSpace = (prev: PdfJsonTextElement, current: PdfJsonTextElement): boolean => {
|
||||
const prevRight = getX(prev) + getWidth(prev);
|
||||
const gap = getX(current) - prevRight;
|
||||
const avgFontSize = (getFontSize(prev) + getFontSize(current)) / 2;
|
||||
const threshold = Math.max(SPACE_MIN_GAP, avgFontSize * GAP_FACTOR);
|
||||
return gap > threshold;
|
||||
};
|
||||
|
||||
const buildGroupText = (elements: PdfJsonTextElement[]): string => {
|
||||
let result = '';
|
||||
elements.forEach((element, index) => {
|
||||
const value = element.text ?? '';
|
||||
if (index === 0) {
|
||||
result += value;
|
||||
return;
|
||||
}
|
||||
|
||||
const previous = elements[index - 1];
|
||||
const needsSpace = shouldInsertSpace(previous, element);
|
||||
const startsWithWhitespace = /^\s/u.test(value);
|
||||
|
||||
if (needsSpace && !startsWithWhitespace) {
|
||||
result += ' ';
|
||||
}
|
||||
result += value;
|
||||
});
|
||||
return result;
|
||||
};
|
||||
|
||||
const createGroup = (
|
||||
pageIndex: number,
|
||||
idSuffix: number,
|
||||
elements: PdfJsonTextElement[],
|
||||
): TextGroup => {
|
||||
const clones = elements.map(cloneTextElement);
|
||||
const originalClones = clones.map(cloneTextElement);
|
||||
const bounds = mergeBounds(elements.map(getElementBounds));
|
||||
|
||||
return {
|
||||
id: `${pageIndex}-${idSuffix}`,
|
||||
pageIndex,
|
||||
fontId: elements[0]?.fontId,
|
||||
fontSize: elements[0]?.fontSize,
|
||||
elements: clones,
|
||||
originalElements: originalClones,
|
||||
text: buildGroupText(elements),
|
||||
originalText: buildGroupText(elements),
|
||||
bounds,
|
||||
};
|
||||
};
|
||||
|
||||
export const groupPageTextElements = (page: PdfJsonPage | null | undefined, pageIndex: number): TextGroup[] => {
|
||||
if (!page?.textElements || page.textElements.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const elements = page.textElements
|
||||
.map(cloneTextElement)
|
||||
.filter((element) => element.text !== null && element.text !== undefined);
|
||||
|
||||
elements.sort((a, b) => getBaseline(b) - getBaseline(a));
|
||||
|
||||
const lines: { baseline: number; elements: PdfJsonTextElement[] }[] = [];
|
||||
|
||||
elements.forEach((element) => {
|
||||
const baseline = getBaseline(element);
|
||||
const fontSize = getFontSize(element);
|
||||
const tolerance = Math.max(LINE_TOLERANCE, fontSize * 0.12);
|
||||
|
||||
const existingLine = lines.find((line) => Math.abs(line.baseline - baseline) <= tolerance);
|
||||
|
||||
if (existingLine) {
|
||||
existingLine.elements.push(element);
|
||||
} else {
|
||||
lines.push({ baseline, elements: [element] });
|
||||
}
|
||||
});
|
||||
|
||||
lines.forEach((line) => {
|
||||
line.elements.sort((a, b) => getX(a) - getX(b));
|
||||
});
|
||||
|
||||
let groupCounter = 0;
|
||||
const groups: TextGroup[] = [];
|
||||
|
||||
lines.forEach((line) => {
|
||||
let currentBucket: PdfJsonTextElement[] = [];
|
||||
|
||||
line.elements.forEach((element) => {
|
||||
if (currentBucket.length === 0) {
|
||||
currentBucket.push(element);
|
||||
return;
|
||||
}
|
||||
|
||||
const previous = currentBucket[currentBucket.length - 1];
|
||||
const gap = getX(element) - (getX(previous) + getWidth(previous));
|
||||
const avgFontSize = (getFontSize(previous) + getFontSize(element)) / 2;
|
||||
const splitThreshold = Math.max(SPACE_MIN_GAP, avgFontSize * GAP_FACTOR);
|
||||
|
||||
const sameFont = previous.fontId === element.fontId;
|
||||
const shouldSplit = gap > splitThreshold * (sameFont ? 1.4 : 1.0);
|
||||
|
||||
if (shouldSplit) {
|
||||
groups.push(createGroup(pageIndex, groupCounter, currentBucket));
|
||||
groupCounter += 1;
|
||||
currentBucket = [element];
|
||||
} else {
|
||||
currentBucket.push(element);
|
||||
}
|
||||
});
|
||||
|
||||
if (currentBucket.length > 0) {
|
||||
groups.push(createGroup(pageIndex, groupCounter, currentBucket));
|
||||
groupCounter += 1;
|
||||
}
|
||||
});
|
||||
|
||||
return groups;
|
||||
};
|
||||
|
||||
export const groupDocumentText = (document: PdfJsonDocument | null | undefined): TextGroup[][] => {
|
||||
const pages = document?.pages ?? [];
|
||||
return pages.map((page, index) => groupPageTextElements(page, index));
|
||||
};
|
||||
|
||||
export const deepCloneDocument = (document: PdfJsonDocument): PdfJsonDocument => {
|
||||
if (typeof structuredClone === 'function') {
|
||||
return structuredClone(document);
|
||||
}
|
||||
return JSON.parse(JSON.stringify(document));
|
||||
};
|
||||
|
||||
export const pageDimensions = (page: PdfJsonPage | null | undefined): { width: number; height: number } => {
|
||||
return {
|
||||
width: valueOr(page?.width, DEFAULT_PAGE_WIDTH),
|
||||
height: valueOr(page?.height, DEFAULT_PAGE_HEIGHT),
|
||||
};
|
||||
};
|
||||
|
||||
export const createMergedElement = (group: TextGroup): PdfJsonTextElement => {
|
||||
const reference = group.originalElements[0];
|
||||
const merged = cloneTextElement(reference);
|
||||
merged.text = group.text;
|
||||
if (reference.textMatrix && reference.textMatrix.length === 6) {
|
||||
merged.textMatrix = [...reference.textMatrix];
|
||||
}
|
||||
return merged;
|
||||
};
|
||||
|
||||
const distributeTextAcrossElements = (text: string | undefined, elements: PdfJsonTextElement[]): void => {
|
||||
if (elements.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const targetChars = Array.from(text ?? '');
|
||||
let cursor = 0;
|
||||
|
||||
elements.forEach((element, index) => {
|
||||
const originalText = element.text ?? '';
|
||||
let sliceLength = Array.from(originalText).length;
|
||||
if (sliceLength <= 0) {
|
||||
sliceLength = 1;
|
||||
}
|
||||
|
||||
if (index === elements.length - 1) {
|
||||
element.text = targetChars.slice(cursor).join('');
|
||||
cursor = targetChars.length;
|
||||
return;
|
||||
}
|
||||
|
||||
const slice = targetChars.slice(cursor, cursor + sliceLength).join('');
|
||||
element.text = slice;
|
||||
cursor = Math.min(cursor + sliceLength, targetChars.length);
|
||||
});
|
||||
|
||||
if (cursor < targetChars.length) {
|
||||
const last = elements[elements.length - 1];
|
||||
last.text = (last.text ?? '') + targetChars.slice(cursor).join('');
|
||||
}
|
||||
|
||||
elements.forEach((element) => {
|
||||
if (element.text == null) {
|
||||
element.text = '';
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
export const buildUpdatedDocument = (
|
||||
source: PdfJsonDocument,
|
||||
groupsByPage: TextGroup[][],
|
||||
): PdfJsonDocument => {
|
||||
const updated = deepCloneDocument(source);
|
||||
const pages = updated.pages ?? [];
|
||||
|
||||
updated.pages = pages.map((page, pageIndex) => {
|
||||
const groups = groupsByPage[pageIndex] ?? [];
|
||||
if (!groups.length) {
|
||||
return page;
|
||||
}
|
||||
|
||||
const hasPageChanges = groups.some((group) => group.text !== group.originalText);
|
||||
const updatedElements: PdfJsonTextElement[] = groups.flatMap((group) => {
|
||||
if (group.text === group.originalText) {
|
||||
return group.originalElements.map(cloneTextElement);
|
||||
}
|
||||
return [createMergedElement(group)];
|
||||
});
|
||||
|
||||
return {
|
||||
...page,
|
||||
textElements: updatedElements,
|
||||
contentStreams: page.contentStreams ?? [],
|
||||
};
|
||||
});
|
||||
|
||||
return updated;
|
||||
};
|
||||
|
||||
export const restoreGlyphElements = (
|
||||
source: PdfJsonDocument,
|
||||
groupsByPage: TextGroup[][],
|
||||
): PdfJsonDocument => {
|
||||
const updated = deepCloneDocument(source);
|
||||
const pages = updated.pages ?? [];
|
||||
|
||||
updated.pages = pages.map((page, pageIndex) => {
|
||||
const groups = groupsByPage[pageIndex] ?? [];
|
||||
if (!groups.length) {
|
||||
return page;
|
||||
}
|
||||
|
||||
const rebuiltElements: PdfJsonTextElement[] = [];
|
||||
let pageChanged = false;
|
||||
|
||||
groups.forEach((group) => {
|
||||
const originals = group.originalElements.map(cloneTextElement);
|
||||
if (group.text !== group.originalText) {
|
||||
pageChanged = true;
|
||||
distributeTextAcrossElements(group.text, originals);
|
||||
}
|
||||
rebuiltElements.push(...originals);
|
||||
});
|
||||
|
||||
return {
|
||||
...page,
|
||||
textElements: rebuiltElements,
|
||||
contentStreams: page.contentStreams ?? [],
|
||||
};
|
||||
});
|
||||
|
||||
return updated;
|
||||
};
|
||||
|
||||
export const getDirtyPages = (groupsByPage: TextGroup[][]): boolean[] => {
|
||||
return groupsByPage.map((groups) => groups.some((group) => group.text !== group.originalText));
|
||||
};
|
||||
@ -46,6 +46,7 @@ export const REGULAR_TOOL_IDS = [
|
||||
'validateSignature',
|
||||
'replaceColor',
|
||||
'showJS',
|
||||
'pdfJsonEditor',
|
||||
'bookletImposition',
|
||||
] as const;
|
||||
|
||||
@ -92,4 +93,3 @@ type Disjoint<A, B> = [A & B] extends [never] ? true : false;
|
||||
type _Check1 = Assert<Disjoint<RegularToolId, SuperToolId>>;
|
||||
type _Check2 = Assert<Disjoint<RegularToolId, LinkToolId>>;
|
||||
type _Check3 = Assert<Disjoint<SuperToolId, LinkToolId>>;
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user