testiong more

This commit is contained in:
Anthony Stirling 2025-10-23 12:50:30 +01:00
parent 4d9cf45009
commit 716fb3bbde
8 changed files with 356 additions and 51 deletions

View File

@ -0,0 +1,21 @@
package stirling.software.SPDF.model.json;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonInclude;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@JsonInclude(JsonInclude.Include.NON_NULL)
public class PdfJsonTextColor {
private String colorSpace;
private List<Float> components;
}

View File

@ -22,10 +22,17 @@ public class PdfJsonTextElement {
private Float fontSize; private Float fontSize;
private Float fontMatrixSize; private Float fontMatrixSize;
private Float fontSizeInPt; private Float fontSizeInPt;
private Float characterSpacing;
private Float wordSpacing;
private Float horizontalScaling;
private Float leading;
private Float rise;
private Float x; private Float x;
private Float y; private Float y;
private Float width; private Float width;
private Float height; private Float height;
@Builder.Default private List<Float> textMatrix = new ArrayList<>(); @Builder.Default private List<Float> textMatrix = new ArrayList<>();
private PdfJsonTextColor fillColor;
private PdfJsonTextColor strokeColor;
private Integer renderingMode; private Integer renderingMode;
} }

View File

@ -51,6 +51,10 @@ import org.apache.pdfbox.pdmodel.font.PDFontDescriptor;
import org.apache.pdfbox.pdmodel.font.PDType0Font; import org.apache.pdfbox.pdmodel.font.PDType0Font;
import org.apache.pdfbox.pdmodel.font.PDType1Font; import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.font.Standard14Fonts; import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace;
import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState;
import org.apache.pdfbox.pdmodel.graphics.state.PDTextState;
import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode; import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode;
import org.apache.pdfbox.text.PDFTextStripper; import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition; import org.apache.pdfbox.text.TextPosition;
@ -73,6 +77,7 @@ import stirling.software.SPDF.model.json.PdfJsonFontCidSystemInfo;
import stirling.software.SPDF.model.json.PdfJsonMetadata; import stirling.software.SPDF.model.json.PdfJsonMetadata;
import stirling.software.SPDF.model.json.PdfJsonPage; import stirling.software.SPDF.model.json.PdfJsonPage;
import stirling.software.SPDF.model.json.PdfJsonStream; import stirling.software.SPDF.model.json.PdfJsonStream;
import stirling.software.SPDF.model.json.PdfJsonTextColor;
import stirling.software.SPDF.model.json.PdfJsonTextElement; import stirling.software.SPDF.model.json.PdfJsonTextElement;
import stirling.software.common.service.CustomPDFDocumentFactory; import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.ExceptionUtils; import stirling.software.common.util.ExceptionUtils;
@ -921,10 +926,7 @@ public class PdfJsonConversionService {
if (font == null && FALLBACK_FONT_ID.equals(element.getFontId())) { if (font == null && FALLBACK_FONT_ID.equals(element.getFontId())) {
font = fontMap.get(buildFontKey(-1, FALLBACK_FONT_ID)); font = fontMap.get(buildFontKey(-1, FALLBACK_FONT_ID));
} }
float fontScale = safeFloat(element.getFontMatrixSize(), 0f); float fontScale = resolveFontMatrixSize(element);
if (fontScale == 0f) {
fontScale = safeFloat(element.getFontSize(), 12f);
}
String text = Objects.toString(element.getText(), ""); String text = Objects.toString(element.getText(), "");
if (font != null) { if (font != null) {
@ -958,6 +960,7 @@ public class PdfJsonConversionService {
textOpen = true; textOpen = true;
} }
applyTextState(contentStream, element);
contentStream.setFont(font, fontScale); contentStream.setFont(font, fontScale);
applyRenderingMode(contentStream, element.getRenderingMode()); applyRenderingMode(contentStream, element.getRenderingMode());
applyTextMatrix(contentStream, element); applyTextMatrix(contentStream, element);
@ -976,6 +979,95 @@ public class PdfJsonConversionService {
font.encode(text); font.encode(text);
} }
private void applyTextState(PDPageContentStream contentStream, PdfJsonTextElement element)
throws IOException {
if (element.getCharacterSpacing() != null) {
contentStream.setCharacterSpacing(element.getCharacterSpacing());
}
if (element.getWordSpacing() != null) {
contentStream.setWordSpacing(element.getWordSpacing());
}
if (element.getHorizontalScaling() != null) {
contentStream.setHorizontalScaling(element.getHorizontalScaling());
}
if (element.getLeading() != null) {
contentStream.setLeading(element.getLeading());
}
if (element.getRise() != null) {
contentStream.setTextRise(element.getRise());
}
applyColor(contentStream, element.getFillColor(), true);
applyColor(contentStream, element.getStrokeColor(), false);
}
private void applyColor(
PDPageContentStream contentStream, PdfJsonTextColor color, boolean nonStroking)
throws IOException {
if (color == null || color.getComponents() == null) {
return;
}
float[] components = new float[color.getComponents().size()];
for (int i = 0; i < components.length; i++) {
components[i] = color.getComponents().get(i);
}
String space = color.getColorSpace();
if (space == null) {
// Infer color space from component count
PDColorSpace colorSpace;
if (components.length == 1) {
colorSpace = PDColorSpace.create(COSName.DEVICEGRAY);
} else if (components.length == 3) {
colorSpace = PDColorSpace.create(COSName.DEVICERGB);
} else if (components.length == 4) {
colorSpace = PDColorSpace.create(COSName.DEVICECMYK);
} else {
// Default to RGB if unsure
colorSpace = PDColorSpace.create(COSName.DEVICERGB);
}
PDColor pdColor = new PDColor(components, colorSpace);
if (nonStroking) {
contentStream.setNonStrokingColor(pdColor);
} else {
contentStream.setStrokingColor(pdColor);
}
return;
}
switch (space) {
case "DeviceRGB":
if (components.length >= 3) {
if (nonStroking) {
contentStream.setNonStrokingColor(
components[0], components[1], components[2]);
} else {
contentStream.setStrokingColor(components[0], components[1], components[2]);
}
}
break;
case "DeviceCMYK":
if (components.length >= 4) {
if (nonStroking) {
contentStream.setNonStrokingColor(
components[0], components[1], components[2], components[3]);
} else {
contentStream.setStrokingColor(
components[0], components[1], components[2], components[3]);
}
}
break;
case "DeviceGray":
if (components.length >= 1) {
if (nonStroking) {
contentStream.setNonStrokingColor(components[0]);
} else {
contentStream.setStrokingColor(components[0]);
}
}
break;
default:
log.debug("Skipping unsupported color space {}", space);
}
}
private String abbreviate(String value) { private String abbreviate(String value) {
if (value == null) { if (value == null) {
return ""; return "";
@ -1362,10 +1454,7 @@ public class PdfJsonConversionService {
throws IOException { throws IOException {
List<Float> matrix = element.getTextMatrix(); List<Float> matrix = element.getTextMatrix();
if (matrix != null && matrix.size() == 6) { if (matrix != null && matrix.size() == 6) {
float fontScale = safeFloat(element.getFontMatrixSize(), 0f); float fontScale = resolveFontMatrixSize(element);
if (fontScale == 0f) {
fontScale = safeFloat(element.getFontSize(), 1f);
}
float a = matrix.get(0); float a = matrix.get(0);
float b = matrix.get(1); float b = matrix.get(1);
float c = matrix.get(2); float c = matrix.get(2);
@ -1388,6 +1477,25 @@ public class PdfJsonConversionService {
contentStream.setTextMatrix(new Matrix(1, 0, 0, 1, x, y)); contentStream.setTextMatrix(new Matrix(1, 0, 0, 1, x, y));
} }
private float resolveFontMatrixSize(PdfJsonTextElement element) {
Float fromElement = element.getFontMatrixSize();
if (fromElement != null && fromElement > 0f) {
return fromElement;
}
List<Float> matrix = element.getTextMatrix();
if (matrix != null && matrix.size() >= 4) {
float a = matrix.get(0);
float b = matrix.get(1);
float c = matrix.get(2);
float d = matrix.get(3);
float scale = (float) Math.max(Math.hypot(a, c), Math.hypot(b, d));
if (scale > 0f) {
return scale;
}
}
return safeFloat(element.getFontSize(), 12f);
}
private void applyRenderingMode(PDPageContentStream contentStream, Integer renderingMode) private void applyRenderingMode(PDPageContentStream contentStream, Integer renderingMode)
throws IOException { throws IOException {
if (renderingMode == null) { if (renderingMode == null) {
@ -1480,12 +1588,29 @@ public class PdfJsonConversionService {
element.setText(position.getUnicode()); element.setText(position.getUnicode());
element.setFontId(fontId); element.setFontId(fontId);
element.setFontSize(position.getFontSizeInPt()); element.setFontSize(position.getFontSizeInPt());
element.setFontMatrixSize(position.getFontSize()); element.setFontSizeInPt(position.getFontSizeInPt());
element.setX(position.getXDirAdj()); element.setX(position.getXDirAdj());
element.setY(position.getYDirAdj()); element.setY(position.getYDirAdj());
element.setWidth(position.getWidthDirAdj()); element.setWidth(position.getWidthDirAdj());
element.setHeight(position.getHeightDir()); element.setHeight(position.getHeightDir());
element.setTextMatrix(extractMatrix(position)); element.setTextMatrix(extractMatrix(position));
element.setFontMatrixSize(computeFontMatrixSize(element.getTextMatrix()));
PDGraphicsState graphicsState = getGraphicsState();
if (graphicsState != null) {
PDTextState textState = graphicsState.getTextState();
if (textState != null) {
element.setCharacterSpacing(textState.getCharacterSpacing());
element.setWordSpacing(textState.getWordSpacing());
element.setHorizontalScaling(textState.getHorizontalScaling());
element.setLeading(textState.getLeading());
element.setRise(textState.getRise());
if (textState.getRenderingMode() != null) {
element.setRenderingMode(textState.getRenderingMode().intValue());
}
}
element.setFillColor(toTextColor(graphicsState.getNonStrokingColor()));
element.setStrokeColor(toTextColor(graphicsState.getStrokingColor()));
}
pageElements.add(element); pageElements.add(element);
} }
} }
@ -1505,6 +1630,20 @@ public class PdfJsonConversionService {
return matrix; return matrix;
} }
private Float computeFontMatrixSize(List<Float> matrix) {
if (matrix == null || matrix.size() < 4) {
return null;
}
float a = matrix.get(0);
float b = matrix.get(1);
float c = matrix.get(2);
float d = matrix.get(3);
float scaleX = (float) Math.hypot(a, c);
float scaleY = (float) Math.hypot(b, d);
float scale = Math.max(scaleX, scaleY);
return scale > 0 ? scale : null;
}
private String registerFont(PDFont font) throws IOException { private String registerFont(PDFont font) throws IOException {
String fontId = currentFontResources.get(font); String fontId = currentFontResources.get(font);
if (fontId == null || fontId.isBlank()) { if (fontId == null || fontId.isBlank()) {
@ -1516,6 +1655,25 @@ public class PdfJsonConversionService {
} }
return fontId; return fontId;
} }
private PdfJsonTextColor toTextColor(PDColor color) {
if (color == null) {
return null;
}
PDColorSpace colorSpace = color.getColorSpace();
if (colorSpace == null) {
return null;
}
float[] components = color.getComponents();
List<Float> values = new ArrayList<>(components.length);
for (float component : components) {
values.add(component);
}
return PdfJsonTextColor.builder()
.colorSpace(colorSpace.getName())
.components(values)
.build();
}
} }
private RenderingMode toRenderingMode(Integer renderingMode) { private RenderingMode toRenderingMode(Integer renderingMode) {

View File

@ -4008,6 +4008,29 @@
"startTourDescription": "Take a guided tour of Stirling PDF's key features" "startTourDescription": "Take a guided tour of Stirling PDF's key features"
}, },
"pdfJsonEditor": { "pdfJsonEditor": {
"viewLabel": "JSON Editor" "viewLabel": "PDF Editor",
"title": "PDF Editor",
"badges": {
"unsaved": "Edited",
"modified": "Edited"
},
"actions": {
"load": "Load File",
"reset": "Reset Changes",
"downloadJson": "Download JSON",
"generatePdf": "Generate PDF"
},
"currentFile": "Current file: {{name}}",
"pageSummary": "Page {{number}} of {{total}}",
"groupList": "Detected Text Groups",
"fontSizeValue": "{{size}}pt",
"noTextOnPage": "No editable text was detected on this page.",
"emptyGroup": "[Empty Group]",
"empty": {
"title": "No document loaded",
"subtitle": "Load a PDF or JSON file to begin editing text content."
},
"converting": "Converting PDF to editable format...",
"conversionFailed": "Failed to convert PDF. Please try again."
} }
} }

View File

@ -36,14 +36,16 @@ interface PdfJsonEditorViewProps {
} }
const toCssBounds = ( const toCssBounds = (
page: PdfJsonPage | null | undefined, _page: PdfJsonPage | null | undefined,
pageHeight: number, pageHeight: number,
scale: number, scale: number,
bounds: { left: number; right: number; top: number; bottom: number }, bounds: { left: number; right: number; top: number; bottom: number },
) => { ) => {
const width = Math.max(bounds.right - bounds.left, 1); const width = Math.max(bounds.right - bounds.left, 1);
const height = Math.max(bounds.bottom - bounds.top, 1); const height = Math.max(bounds.bottom - bounds.top, 1);
const scaledWidth = Math.max(width * scale, MIN_BOX_SIZE); // Add 20% buffer to width to account for padding and font rendering variations
const bufferedWidth = width * 1.2;
const scaledWidth = Math.max(bufferedWidth * scale, MIN_BOX_SIZE);
const scaledHeight = Math.max(height * scale, MIN_BOX_SIZE / 2); const scaledHeight = Math.max(height * scale, MIN_BOX_SIZE / 2);
const top = Math.max(pageHeight - bounds.bottom, 0) * scale; const top = Math.max(pageHeight - bounds.bottom, 0) * scale;
@ -69,6 +71,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
fileName, fileName,
errorMessage, errorMessage,
isGeneratingPdf, isGeneratingPdf,
isConverting,
hasChanges, hasChanges,
onLoadJson, onLoadJson,
onSelectPage, onSelectPage,
@ -78,6 +81,36 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
onGeneratePdf, onGeneratePdf,
} = data; } = data;
const getFontFamily = (fontId: string | null | undefined): string => {
if (!fontId || !pdfDocument?.fonts) {
return 'sans-serif';
}
const font = pdfDocument.fonts.find((f) => f.id === fontId);
if (!font) {
return 'sans-serif';
}
// Map PDF fonts to web-safe fonts based on name
// Note: Embedded font data from PDFs often lacks tables required for web rendering (OS/2 table)
const fontName = font.standard14Name || font.baseName || '';
const lowerName = fontName.toLowerCase();
if (lowerName.includes('times')) {
return '"Times New Roman", Times, serif';
}
if (lowerName.includes('helvetica') || lowerName.includes('arial')) {
return 'Arial, Helvetica, sans-serif';
}
if (lowerName.includes('courier')) {
return '"Courier New", Courier, monospace';
}
if (lowerName.includes('symbol')) {
return 'Symbol, serif';
}
return 'Arial, Helvetica, sans-serif';
};
const pages = pdfDocument?.pages ?? []; const pages = pdfDocument?.pages ?? [];
const currentPage = pages[selectedPage] ?? null; const currentPage = pages[selectedPage] ?? null;
const pageGroups = groupsByPage[selectedPage] ?? []; const pageGroups = groupsByPage[selectedPage] ?? [];
@ -141,20 +174,21 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
style={{ style={{
width: '100%', width: '100%',
height: '100%', height: '100%',
border: isActive outline: isActive
? '2px solid var(--mantine-color-blue-5)' ? '2px solid var(--mantine-color-blue-5)'
: isChanged : isChanged
? '1px solid var(--mantine-color-yellow-5)' ? '1px solid var(--mantine-color-yellow-5)'
: '1px solid transparent', : 'none',
outlineOffset: '-1px',
borderRadius: 6, borderRadius: 6,
backgroundColor: isChanged || isActive ? 'rgba(250,255,189,0.28)' : 'transparent', backgroundColor: isChanged || isActive ? 'rgba(250,255,189,0.28)' : 'transparent',
transition: 'border 120ms ease, background-color 120ms ease', transition: 'outline 120ms ease, background-color 120ms ease',
pointerEvents: 'auto', pointerEvents: 'auto',
overflow: 'hidden', overflow: 'visible',
display: 'flex', display: 'flex',
alignItems: 'flex-start', alignItems: 'flex-start',
justifyContent: 'flex-start', justifyContent: 'flex-start',
padding: 0, padding: 0,
}} }}
onClick={(event) => { onClick={(event) => {
event.stopPropagation(); event.stopPropagation();
@ -182,10 +216,15 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
{hasChanges && <Badge color="yellow" size="sm">{t('pdfJsonEditor.badges.unsaved', 'Edited')}</Badge>} {hasChanges && <Badge color="yellow" size="sm">{t('pdfJsonEditor.badges.unsaved', 'Edited')}</Badge>}
</Group> </Group>
<Group gap="sm"> <Group gap="sm">
<FileButton onChange={onLoadJson} accept="application/json"> <FileButton onChange={onLoadJson} accept="application/pdf,application/json,.pdf,.json">
{(props) => ( {(props) => (
<Button variant="light" leftSection={<UploadIcon fontSize="small" />} {...props}> <Button
{t('pdfJsonEditor.actions.load', 'Load JSON')} variant="light"
leftSection={<UploadIcon fontSize="small" />}
loading={isConverting}
{...props}
>
{t('pdfJsonEditor.actions.load', 'Load File')}
</Button> </Button>
)} )}
</FileButton> </FileButton>
@ -193,7 +232,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
variant="subtle" variant="subtle"
leftSection={<AutorenewIcon fontSize="small" />} leftSection={<AutorenewIcon fontSize="small" />}
onClick={onReset} onClick={onReset}
disabled={!hasDocument} disabled={!hasDocument || isConverting}
> >
{t('pdfJsonEditor.actions.reset', 'Reset Changes')} {t('pdfJsonEditor.actions.reset', 'Reset Changes')}
</Button> </Button>
@ -201,7 +240,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
variant="default" variant="default"
leftSection={<FileDownloadIcon fontSize="small" />} leftSection={<FileDownloadIcon fontSize="small" />}
onClick={onDownloadJson} onClick={onDownloadJson}
disabled={!hasDocument} disabled={!hasDocument || isConverting}
> >
{t('pdfJsonEditor.actions.downloadJson', 'Download JSON')} {t('pdfJsonEditor.actions.downloadJson', 'Download JSON')}
</Button> </Button>
@ -209,7 +248,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
leftSection={<PictureAsPdfIcon fontSize="small" />} leftSection={<PictureAsPdfIcon fontSize="small" />}
onClick={onGeneratePdf} onClick={onGeneratePdf}
loading={isGeneratingPdf} loading={isGeneratingPdf}
disabled={!hasDocument || !hasChanges} disabled={!hasDocument || !hasChanges || isConverting}
> >
{t('pdfJsonEditor.actions.generatePdf', 'Generate PDF')} {t('pdfJsonEditor.actions.generatePdf', 'Generate PDF')}
</Button> </Button>
@ -230,15 +269,26 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
</Alert> </Alert>
)} )}
{!hasDocument && ( {!hasDocument && !isConverting && (
<Card withBorder radius="md" padding="xl"> <Card withBorder radius="md" padding="xl">
<Stack align="center" gap="md"> <Stack align="center" gap="md">
<DescriptionIcon sx={{ fontSize: 48 }} /> <DescriptionIcon sx={{ fontSize: 48 }} />
<Text size="lg" fw={600}> <Text size="lg" fw={600}>
{t('pdfJsonEditor.empty.title', 'No JSON loaded yet')} {t('pdfJsonEditor.empty.title', 'No document loaded')}
</Text> </Text>
<Text size="sm" c="dimmed" ta="center" maw={420}> <Text size="sm" c="dimmed" ta="center" maw={420}>
{t('pdfJsonEditor.empty.subtitle', 'Use the Load JSON button above to open a file generated by the PDF → JSON converter.')} {t('pdfJsonEditor.empty.subtitle', 'Load a PDF or JSON file to begin editing text content.')}
</Text>
</Stack>
</Card>
)}
{isConverting && (
<Card withBorder radius="md" padding="xl">
<Stack align="center" gap="md">
<AutorenewIcon sx={{ fontSize: 48 }} className="animate-spin" />
<Text size="lg" fw={600}>
{t('pdfJsonEditor.converting', 'Converting PDF to editable format...')}
</Text> </Text>
</Stack> </Stack>
</Card> </Card>
@ -306,9 +356,11 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
const changed = group.text !== group.originalText; const changed = group.text !== group.originalText;
const isActive = activeGroupId === group.id || editingGroupId === group.id; const isActive = activeGroupId === group.id || editingGroupId === group.id;
const isEditing = editingGroupId === group.id; const isEditing = editingGroupId === group.id;
const fontSizePx = Math.max((group.fontSize ?? 12) * scale, 8); const baseFontSize = group.fontMatrixSize ?? group.fontSize ?? 12;
const fontSizePx = Math.max(baseFontSize * scale, 6);
const fontFamily = getFontFamily(group.fontId);
const visualHeight = Math.max(bounds.height, fontSizePx * 1.35); const visualHeight = Math.max(bounds.height, fontSizePx * 1.2);
const containerStyle: React.CSSProperties = { const containerStyle: React.CSSProperties = {
position: 'absolute', position: 'absolute',
@ -323,14 +375,9 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
cursor: 'text', cursor: 'text',
}; };
const commonProps = {
key: group.id,
style: containerStyle,
};
if (isEditing) { if (isEditing) {
return ( return (
<Box {...commonProps}> <Box key={group.id} style={containerStyle}>
{renderGroupContainer( {renderGroupContainer(
group.id, group.id,
true, true,
@ -355,6 +402,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
backgroundColor: 'rgba(255,255,255,0.95)', backgroundColor: 'rgba(255,255,255,0.95)',
color: '#111827', color: '#111827',
fontSize: `${fontSizePx}px`, fontSize: `${fontSizePx}px`,
fontFamily,
lineHeight: 1.25, lineHeight: 1.25,
outline: 'none', outline: 'none',
border: 'none', border: 'none',
@ -362,6 +410,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
whiteSpace: 'pre-wrap', whiteSpace: 'pre-wrap',
overflowWrap: 'anywhere', overflowWrap: 'anywhere',
cursor: 'text', cursor: 'text',
overflow: 'visible',
}} }}
> >
{group.text || '\u00A0'} {group.text || '\u00A0'}
@ -372,9 +421,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
} }
return ( return (
<Box <Box key={group.id} style={containerStyle}>
{...commonProps}
>
{renderGroupContainer( {renderGroupContainer(
group.id, group.id,
isActive, isActive,
@ -386,10 +433,12 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
padding: '2px 4px', padding: '2px 4px',
whiteSpace: 'pre-wrap', whiteSpace: 'pre-wrap',
fontSize: `${fontSizePx}px`, fontSize: `${fontSizePx}px`,
fontFamily,
lineHeight: 1.25, lineHeight: 1.25,
color: '#111827', color: '#111827',
display: 'block', display: 'block',
cursor: 'text', cursor: 'text',
overflow: 'visible',
}} }}
> >
<span style={{ pointerEvents: 'none' }}>{group.text || '\u00A0'}</span> <span style={{ pointerEvents: 'none' }}>{group.text || '\u00A0'}</span>

View File

@ -50,11 +50,12 @@ const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
const [fileName, setFileName] = useState(''); const [fileName, setFileName] = useState('');
const [errorMessage, setErrorMessage] = useState<string | null>(null); const [errorMessage, setErrorMessage] = useState<string | null>(null);
const [isGeneratingPdf, setIsGeneratingPdf] = useState(false); const [isGeneratingPdf, setIsGeneratingPdf] = useState(false);
const [isConverting, setIsConverting] = useState(false);
const dirtyPages = useMemo(() => getDirtyPages(groupsByPage), [groupsByPage]); const dirtyPages = useMemo(() => getDirtyPages(groupsByPage), [groupsByPage]);
const hasChanges = useMemo(() => dirtyPages.some(Boolean), [dirtyPages]); const hasChanges = useMemo(() => dirtyPages.some(Boolean), [dirtyPages]);
const hasDocument = loadedDocument !== null; const hasDocument = loadedDocument !== null;
const viewLabel = useMemo(() => t('pdfJsonEditor.viewLabel', 'JSON Editor'), [t]); const viewLabel = useMemo(() => t('pdfJsonEditor.viewLabel', 'PDF Editor'), [t]);
const resetToDocument = useCallback((document: PdfJsonDocument | null) => { const resetToDocument = useCallback((document: PdfJsonDocument | null) => {
if (!document) { if (!document) {
@ -73,23 +74,55 @@ const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
if (!file) { if (!file) {
return; return;
} }
const isPdf = file.type === 'application/pdf' || file.name.toLowerCase().endsWith('.pdf');
try { try {
const content = await file.text(); let parsed: PdfJsonDocument;
const parsed = JSON.parse(content) as PdfJsonDocument;
if (isPdf) {
// Convert PDF to JSON first
setIsConverting(true);
setErrorMessage(null);
const formData = new FormData();
formData.append('fileInput', file);
const response = await apiClient.post(CONVERSION_ENDPOINTS['pdf-json'], formData, {
responseType: 'blob',
});
const jsonText = await response.data.text();
parsed = JSON.parse(jsonText) as PdfJsonDocument;
} else {
// Load JSON directly
const content = await file.text();
parsed = JSON.parse(content) as PdfJsonDocument;
}
setLoadedDocument(parsed); setLoadedDocument(parsed);
resetToDocument(parsed); resetToDocument(parsed);
setFileName(file.name); setFileName(file.name);
setErrorMessage(null); setErrorMessage(null);
} catch (error) { } catch (error) {
console.error('Failed to parse JSON', error); console.error('Failed to load file', error);
setLoadedDocument(null); setLoadedDocument(null);
setGroupsByPage([]); setGroupsByPage([]);
setErrorMessage(
t( if (isPdf) {
'pdfJsonEditor.errors.invalidJson', setErrorMessage(
'Unable to read the JSON file. Ensure it was generated by the PDF to JSON tool.' t('pdfJsonEditor.conversionFailed', 'Failed to convert PDF. Please try again.')
) );
); } else {
setErrorMessage(
t(
'pdfJsonEditor.errors.invalidJson',
'Unable to read the JSON file. Ensure it was generated by the PDF to JSON tool.'
)
);
}
} finally {
setIsConverting(false);
} }
}, },
[resetToDocument, t] [resetToDocument, t]
@ -202,6 +235,7 @@ const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
fileName, fileName,
errorMessage, errorMessage,
isGeneratingPdf, isGeneratingPdf,
isConverting,
hasChanges, hasChanges,
onLoadJson: handleLoadFile, onLoadJson: handleLoadFile,
onSelectPage: handleSelectPage, onSelectPage: handleSelectPage,
@ -223,6 +257,7 @@ const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
hasChanges, hasChanges,
hasDocument, hasDocument,
isGeneratingPdf, isGeneratingPdf,
isConverting,
loadedDocument, loadedDocument,
selectedPage, selectedPage,
]); ]);

View File

@ -4,6 +4,11 @@ export interface PdfJsonFontCidSystemInfo {
supplement?: number | null; supplement?: number | null;
} }
export interface PdfJsonTextColor {
colorSpace?: string | null;
components?: number[] | null;
}
export interface PdfJsonFont { export interface PdfJsonFont {
id?: string; id?: string;
pageNumber?: number | null; pageNumber?: number | null;
@ -26,12 +31,19 @@ export interface PdfJsonTextElement {
fontSize?: number | null; fontSize?: number | null;
fontMatrixSize?: number | null; fontMatrixSize?: number | null;
fontSizeInPt?: number | null; fontSizeInPt?: number | null;
characterSpacing?: number | null;
wordSpacing?: number | null;
horizontalScaling?: number | null;
leading?: number | null;
rise?: number | null;
renderingMode?: number | null; renderingMode?: number | null;
x?: number | null; x?: number | null;
y?: number | null; y?: number | null;
width?: number | null; width?: number | null;
height?: number | null; height?: number | null;
textMatrix?: number[] | null; textMatrix?: number[] | null;
fillColor?: PdfJsonTextColor | null;
strokeColor?: PdfJsonTextColor | null;
} }
export interface PdfJsonStream { export interface PdfJsonStream {
@ -81,6 +93,7 @@ export interface TextGroup {
pageIndex: number; pageIndex: number;
fontId?: string | null; fontId?: string | null;
fontSize?: number | null; fontSize?: number | null;
fontMatrixSize?: number | null;
elements: PdfJsonTextElement[]; elements: PdfJsonTextElement[];
originalElements: PdfJsonTextElement[]; originalElements: PdfJsonTextElement[];
text: string; text: string;
@ -100,6 +113,7 @@ export interface PdfJsonEditorViewData {
fileName: string; fileName: string;
errorMessage: string | null; errorMessage: string | null;
isGeneratingPdf: boolean; isGeneratingPdf: boolean;
isConverting: boolean;
hasChanges: boolean; hasChanges: boolean;
onLoadJson: (file: File | null) => Promise<void> | void; onLoadJson: (file: File | null) => Promise<void> | void;
onSelectPage: (pageIndex: number) => void; onSelectPage: (pageIndex: number) => void;

View File

@ -47,7 +47,7 @@ const getWidth = (element: PdfJsonTextElement): number => {
return width; return width;
}; };
const getFontSize = (element: PdfJsonTextElement): number => valueOr(element.fontSize, 12); const getFontSize = (element: PdfJsonTextElement): number => valueOr(element.fontMatrixSize ?? element.fontSize, 12);
const getHeight = (element: PdfJsonTextElement): number => { const getHeight = (element: PdfJsonTextElement): number => {
const height = valueOr(element.height); const height = valueOr(element.height);
@ -129,6 +129,7 @@ const createGroup = (
pageIndex, pageIndex,
fontId: elements[0]?.fontId, fontId: elements[0]?.fontId,
fontSize: elements[0]?.fontSize, fontSize: elements[0]?.fontSize,
fontMatrixSize: elements[0]?.fontMatrixSize,
elements: clones, elements: clones,
originalElements: originalClones, originalElements: originalClones,
text: buildGroupText(elements), text: buildGroupText(elements),
@ -286,7 +287,6 @@ export const buildUpdatedDocument = (
return page; return page;
} }
const hasPageChanges = groups.some((group) => group.text !== group.originalText);
const updatedElements: PdfJsonTextElement[] = groups.flatMap((group) => { const updatedElements: PdfJsonTextElement[] = groups.flatMap((group) => {
if (group.text === group.originalText) { if (group.text === group.originalText) {
return group.originalElements.map(cloneTextElement); return group.originalElements.map(cloneTextElement);
@ -318,12 +318,10 @@ export const restoreGlyphElements = (
} }
const rebuiltElements: PdfJsonTextElement[] = []; const rebuiltElements: PdfJsonTextElement[] = [];
let pageChanged = false;
groups.forEach((group) => { groups.forEach((group) => {
const originals = group.originalElements.map(cloneTextElement); const originals = group.originalElements.map(cloneTextElement);
if (group.text !== group.originalText) { if (group.text !== group.originalText) {
pageChanged = true;
distributeTextAcrossElements(group.text, originals); distributeTextAcrossElements(group.text, originals);
} }
rebuiltElements.push(...originals); rebuiltElements.push(...originals);