mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-11-16 01:21:16 +01:00
image stuff
This commit is contained in:
parent
930c68c8c5
commit
af19a5af23
@ -0,0 +1,37 @@
|
|||||||
|
package stirling.software.SPDF.model.json;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
@Builder
|
||||||
|
@NoArgsConstructor
|
||||||
|
@AllArgsConstructor
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
|
public class PdfJsonImageElement {
|
||||||
|
|
||||||
|
private String id;
|
||||||
|
private String objectName;
|
||||||
|
private Boolean inlineImage;
|
||||||
|
private Integer nativeWidth;
|
||||||
|
private Integer nativeHeight;
|
||||||
|
private Float x;
|
||||||
|
private Float y;
|
||||||
|
private Float width;
|
||||||
|
private Float height;
|
||||||
|
private Float left;
|
||||||
|
private Float right;
|
||||||
|
private Float top;
|
||||||
|
private Float bottom;
|
||||||
|
@Builder.Default private List<Float> transform = new ArrayList<>();
|
||||||
|
private Integer zOrder;
|
||||||
|
private String imageData;
|
||||||
|
private String imageFormat;
|
||||||
|
}
|
||||||
@ -23,6 +23,7 @@ public class PdfJsonPage {
|
|||||||
private Integer rotation;
|
private Integer rotation;
|
||||||
|
|
||||||
@Builder.Default private List<PdfJsonTextElement> textElements = new ArrayList<>();
|
@Builder.Default private List<PdfJsonTextElement> textElements = new ArrayList<>();
|
||||||
|
@Builder.Default private List<PdfJsonImageElement> imageElements = new ArrayList<>();
|
||||||
|
|
||||||
/** Serialized representation of the page resources dictionary. */
|
/** Serialized representation of the page resources dictionary. */
|
||||||
private PdfJsonCosValue resources;
|
private PdfJsonCosValue resources;
|
||||||
|
|||||||
@ -24,6 +24,8 @@ public class PdfJsonTextElement {
|
|||||||
private Float fontSizeInPt;
|
private Float fontSizeInPt;
|
||||||
private Float characterSpacing;
|
private Float characterSpacing;
|
||||||
private Float wordSpacing;
|
private Float wordSpacing;
|
||||||
|
private Float spaceWidth;
|
||||||
|
private Integer zOrder;
|
||||||
private Float horizontalScaling;
|
private Float horizontalScaling;
|
||||||
private Float leading;
|
private Float leading;
|
||||||
private Float rise;
|
private Float rise;
|
||||||
|
|||||||
@ -1,5 +1,8 @@
|
|||||||
package stirling.software.SPDF.service;
|
package stirling.software.SPDF.service;
|
||||||
|
|
||||||
|
import java.awt.geom.AffineTransform;
|
||||||
|
import java.awt.geom.Point2D;
|
||||||
|
import java.awt.image.BufferedImage;
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@ -22,8 +25,13 @@ import java.util.Map;
|
|||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.TimeZone;
|
import java.util.TimeZone;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
import javax.imageio.ImageIO;
|
||||||
|
|
||||||
|
import org.apache.pdfbox.contentstream.PDFGraphicsStreamEngine;
|
||||||
import org.apache.pdfbox.contentstream.operator.Operator;
|
import org.apache.pdfbox.contentstream.operator.Operator;
|
||||||
|
import org.apache.pdfbox.contentstream.operator.OperatorName;
|
||||||
import org.apache.pdfbox.cos.COSArray;
|
import org.apache.pdfbox.cos.COSArray;
|
||||||
import org.apache.pdfbox.cos.COSBase;
|
import org.apache.pdfbox.cos.COSBase;
|
||||||
import org.apache.pdfbox.cos.COSBoolean;
|
import org.apache.pdfbox.cos.COSBoolean;
|
||||||
@ -53,6 +61,8 @@ import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
|||||||
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
|
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
|
||||||
import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
|
import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
|
||||||
import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace;
|
import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace;
|
||||||
|
import org.apache.pdfbox.pdmodel.graphics.image.PDImage;
|
||||||
|
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
||||||
import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState;
|
import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState;
|
||||||
import org.apache.pdfbox.pdmodel.graphics.state.PDTextState;
|
import org.apache.pdfbox.pdmodel.graphics.state.PDTextState;
|
||||||
import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode;
|
import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode;
|
||||||
@ -74,6 +84,7 @@ import stirling.software.SPDF.model.json.PdfJsonCosValue;
|
|||||||
import stirling.software.SPDF.model.json.PdfJsonDocument;
|
import stirling.software.SPDF.model.json.PdfJsonDocument;
|
||||||
import stirling.software.SPDF.model.json.PdfJsonFont;
|
import stirling.software.SPDF.model.json.PdfJsonFont;
|
||||||
import stirling.software.SPDF.model.json.PdfJsonFontCidSystemInfo;
|
import stirling.software.SPDF.model.json.PdfJsonFontCidSystemInfo;
|
||||||
|
import stirling.software.SPDF.model.json.PdfJsonImageElement;
|
||||||
import stirling.software.SPDF.model.json.PdfJsonMetadata;
|
import stirling.software.SPDF.model.json.PdfJsonMetadata;
|
||||||
import stirling.software.SPDF.model.json.PdfJsonPage;
|
import stirling.software.SPDF.model.json.PdfJsonPage;
|
||||||
import stirling.software.SPDF.model.json.PdfJsonStream;
|
import stirling.software.SPDF.model.json.PdfJsonStream;
|
||||||
@ -128,6 +139,8 @@ public class PdfJsonConversionService {
|
|||||||
stripper.setSortByPosition(true);
|
stripper.setSortByPosition(true);
|
||||||
stripper.getText(document);
|
stripper.getText(document);
|
||||||
|
|
||||||
|
Map<Integer, List<PdfJsonImageElement>> imagesByPage = collectImages(document);
|
||||||
|
|
||||||
PdfJsonDocument pdfJson = new PdfJsonDocument();
|
PdfJsonDocument pdfJson = new PdfJsonDocument();
|
||||||
pdfJson.setMetadata(extractMetadata(document));
|
pdfJson.setMetadata(extractMetadata(document));
|
||||||
pdfJson.setXmpMetadata(extractXmpMetadata(document));
|
pdfJson.setXmpMetadata(extractXmpMetadata(document));
|
||||||
@ -136,7 +149,7 @@ public class PdfJsonConversionService {
|
|||||||
Comparator.comparing(
|
Comparator.comparing(
|
||||||
PdfJsonFont::getUid, Comparator.nullsLast(Comparator.naturalOrder())));
|
PdfJsonFont::getUid, Comparator.nullsLast(Comparator.naturalOrder())));
|
||||||
pdfJson.setFonts(serializedFonts);
|
pdfJson.setFonts(serializedFonts);
|
||||||
pdfJson.setPages(extractPages(document, textByPage));
|
pdfJson.setPages(extractPages(document, textByPage, imagesByPage));
|
||||||
|
|
||||||
log.info(
|
log.info(
|
||||||
"PDF→JSON conversion complete (fonts: {}, pages: {})",
|
"PDF→JSON conversion complete (fonts: {}, pages: {})",
|
||||||
@ -201,6 +214,10 @@ public class PdfJsonConversionService {
|
|||||||
pageModel.getTextElements() != null
|
pageModel.getTextElements() != null
|
||||||
? pageModel.getTextElements()
|
? pageModel.getTextElements()
|
||||||
: new ArrayList<>();
|
: new ArrayList<>();
|
||||||
|
List<PdfJsonImageElement> imageElements =
|
||||||
|
pageModel.getImageElements() != null
|
||||||
|
? pageModel.getImageElements()
|
||||||
|
: new ArrayList<>();
|
||||||
|
|
||||||
boolean fallbackAssigned =
|
boolean fallbackAssigned =
|
||||||
preflightTextElements(
|
preflightTextElements(
|
||||||
@ -218,15 +235,13 @@ public class PdfJsonConversionService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
boolean hasText = !elements.isEmpty();
|
boolean hasText = !elements.isEmpty();
|
||||||
boolean rewriteSucceeded = false;
|
boolean hasImages = !imageElements.isEmpty();
|
||||||
|
boolean rewriteSucceeded = true;
|
||||||
|
|
||||||
if (!preservedStreams.isEmpty() && hasText) {
|
if (hasText) {
|
||||||
if (fallbackAssigned) {
|
if (fallbackAssigned) {
|
||||||
log.info(
|
|
||||||
"Skipping token rewrite for page {} because fallback font was applied",
|
|
||||||
pageNumberValue);
|
|
||||||
rewriteSucceeded = false;
|
rewriteSucceeded = false;
|
||||||
} else {
|
} else if (!preservedStreams.isEmpty()) {
|
||||||
log.info("Attempting token rewrite for page {}", pageNumberValue);
|
log.info("Attempting token rewrite for page {}", pageNumberValue);
|
||||||
rewriteSucceeded = rewriteTextOperators(document, page, elements);
|
rewriteSucceeded = rewriteTextOperators(document, page, elements);
|
||||||
if (!rewriteSucceeded) {
|
if (!rewriteSucceeded) {
|
||||||
@ -236,18 +251,29 @@ public class PdfJsonConversionService {
|
|||||||
} else {
|
} else {
|
||||||
log.info("Token rewrite succeeded for page {}", pageNumberValue);
|
log.info("Token rewrite succeeded for page {}", pageNumberValue);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
rewriteSucceeded = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!hasText) {
|
boolean shouldRegenerate = preservedStreams.isEmpty();
|
||||||
|
if (hasText && !rewriteSucceeded) {
|
||||||
|
shouldRegenerate = true;
|
||||||
|
}
|
||||||
|
if (hasImages && preservedStreams.isEmpty()) {
|
||||||
|
shouldRegenerate = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(hasText || hasImages)) {
|
||||||
pageIndex++;
|
pageIndex++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!rewriteSucceeded) {
|
if (shouldRegenerate) {
|
||||||
log.info("Regenerating text content for page {}", pageNumberValue);
|
log.info("Regenerating page content for page {}", pageNumberValue);
|
||||||
regenerateTextContent(document, page, elements, fontMap, pageNumberValue);
|
regeneratePageContent(
|
||||||
log.info("Text regeneration complete for page {}", pageNumberValue);
|
document, page, elements, imageElements, fontMap, pageNumberValue);
|
||||||
|
log.info("Page content regeneration complete for page {}", pageNumberValue);
|
||||||
}
|
}
|
||||||
pageIndex++;
|
pageIndex++;
|
||||||
}
|
}
|
||||||
@ -571,7 +597,9 @@ public class PdfJsonConversionService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private List<PdfJsonPage> extractPages(
|
private List<PdfJsonPage> extractPages(
|
||||||
PDDocument document, Map<Integer, List<PdfJsonTextElement>> textByPage)
|
PDDocument document,
|
||||||
|
Map<Integer, List<PdfJsonTextElement>> textByPage,
|
||||||
|
Map<Integer, List<PdfJsonImageElement>> imagesByPage)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
List<PdfJsonPage> pages = new ArrayList<>();
|
List<PdfJsonPage> pages = new ArrayList<>();
|
||||||
int pageIndex = 0;
|
int pageIndex = 0;
|
||||||
@ -583,6 +611,7 @@ public class PdfJsonConversionService {
|
|||||||
pageModel.setHeight(mediaBox.getHeight());
|
pageModel.setHeight(mediaBox.getHeight());
|
||||||
pageModel.setRotation(page.getRotation());
|
pageModel.setRotation(page.getRotation());
|
||||||
pageModel.setTextElements(textByPage.getOrDefault(pageIndex + 1, new ArrayList<>()));
|
pageModel.setTextElements(textByPage.getOrDefault(pageIndex + 1, new ArrayList<>()));
|
||||||
|
pageModel.setImageElements(imagesByPage.getOrDefault(pageIndex + 1, new ArrayList<>()));
|
||||||
pageModel.setResources(
|
pageModel.setResources(
|
||||||
serializeCosValue(page.getCOSObject().getDictionaryObject(COSName.RESOURCES)));
|
serializeCosValue(page.getCOSObject().getDictionaryObject(COSName.RESOURCES)));
|
||||||
pageModel.setContentStreams(extractContentStreams(page));
|
pageModel.setContentStreams(extractContentStreams(page));
|
||||||
@ -592,6 +621,19 @@ public class PdfJsonConversionService {
|
|||||||
return pages;
|
return pages;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Map<Integer, List<PdfJsonImageElement>> collectImages(PDDocument document)
|
||||||
|
throws IOException {
|
||||||
|
Map<Integer, List<PdfJsonImageElement>> imagesByPage = new LinkedHashMap<>();
|
||||||
|
int pageNumber = 1;
|
||||||
|
for (PDPage page : document.getPages()) {
|
||||||
|
ImageCollectingEngine engine =
|
||||||
|
new ImageCollectingEngine(page, pageNumber, imagesByPage);
|
||||||
|
engine.processPage(page);
|
||||||
|
pageNumber++;
|
||||||
|
}
|
||||||
|
return imagesByPage;
|
||||||
|
}
|
||||||
|
|
||||||
private PdfJsonMetadata extractMetadata(PDDocument document) {
|
private PdfJsonMetadata extractMetadata(PDDocument document) {
|
||||||
PdfJsonMetadata metadata = new PdfJsonMetadata();
|
PdfJsonMetadata metadata = new PdfJsonMetadata();
|
||||||
PDDocumentInformation info = document.getDocumentInformation();
|
PDDocumentInformation info = document.getDocumentInformation();
|
||||||
@ -911,60 +953,85 @@ public class PdfJsonConversionService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void regenerateTextContent(
|
private void regeneratePageContent(
|
||||||
PDDocument document,
|
PDDocument document,
|
||||||
PDPage page,
|
PDPage page,
|
||||||
List<PdfJsonTextElement> elements,
|
List<PdfJsonTextElement> textElements,
|
||||||
|
List<PdfJsonImageElement> imageElements,
|
||||||
Map<String, PDFont> fontMap,
|
Map<String, PDFont> fontMap,
|
||||||
int pageNumber)
|
int pageNumber)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
List<DrawableElement> drawables = mergeDrawables(textElements, imageElements);
|
||||||
|
Map<String, PDImageXObject> imageCache = new HashMap<>();
|
||||||
|
|
||||||
try (PDPageContentStream contentStream =
|
try (PDPageContentStream contentStream =
|
||||||
new PDPageContentStream(document, page, AppendMode.OVERWRITE, true, true)) {
|
new PDPageContentStream(document, page, AppendMode.OVERWRITE, true, true)) {
|
||||||
boolean textOpen = false;
|
boolean textOpen = false;
|
||||||
for (PdfJsonTextElement element : elements) {
|
for (DrawableElement drawable : drawables) {
|
||||||
PDFont font = fontMap.get(buildFontKey(pageNumber, element.getFontId()));
|
switch (drawable.type()) {
|
||||||
if (font == null && FALLBACK_FONT_ID.equals(element.getFontId())) {
|
case TEXT -> {
|
||||||
font = fontMap.get(buildFontKey(-1, FALLBACK_FONT_ID));
|
PdfJsonTextElement element = drawable.textElement();
|
||||||
}
|
if (element == null) {
|
||||||
float fontScale = resolveFontMatrixSize(element);
|
continue;
|
||||||
String text = Objects.toString(element.getText(), "");
|
|
||||||
|
|
||||||
if (font != null) {
|
|
||||||
try {
|
|
||||||
encodeWithTest(font, text);
|
|
||||||
} catch (IOException | IllegalArgumentException ex) {
|
|
||||||
log.debug(
|
|
||||||
"Edited text contains glyphs missing from font {} ({}), switching to fallback",
|
|
||||||
element.getFontId(),
|
|
||||||
ex.getMessage());
|
|
||||||
font = fontMap.get(buildFontKey(-1, FALLBACK_FONT_ID));
|
|
||||||
element.setFontId(FALLBACK_FONT_ID);
|
|
||||||
if (font == null) {
|
|
||||||
font = loadFallbackPdfFont(document);
|
|
||||||
fontMap.put(buildFontKey(-1, FALLBACK_FONT_ID), font);
|
|
||||||
}
|
}
|
||||||
encodeWithTest(font, text);
|
PDFont font = fontMap.get(buildFontKey(pageNumber, element.getFontId()));
|
||||||
}
|
if (font == null && FALLBACK_FONT_ID.equals(element.getFontId())) {
|
||||||
} else {
|
font = fontMap.get(buildFontKey(-1, FALLBACK_FONT_ID));
|
||||||
element.setFontId(FALLBACK_FONT_ID);
|
}
|
||||||
font = fontMap.get(buildFontKey(-1, FALLBACK_FONT_ID));
|
float fontScale = resolveFontMatrixSize(element);
|
||||||
if (font == null) {
|
String text = Objects.toString(element.getText(), "");
|
||||||
font = loadFallbackPdfFont(document);
|
|
||||||
fontMap.put(buildFontKey(-1, FALLBACK_FONT_ID), font);
|
|
||||||
}
|
|
||||||
encodeWithTest(font, text);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!textOpen) {
|
if (font != null) {
|
||||||
contentStream.beginText();
|
try {
|
||||||
textOpen = true;
|
font.encode(text);
|
||||||
}
|
} catch (IOException | IllegalArgumentException ex) {
|
||||||
|
log.debug(
|
||||||
|
"Edited text contains glyphs missing from font {} ({}), switching to fallback",
|
||||||
|
element.getFontId(),
|
||||||
|
ex.getMessage());
|
||||||
|
font = fontMap.get(buildFontKey(-1, FALLBACK_FONT_ID));
|
||||||
|
element.setFontId(FALLBACK_FONT_ID);
|
||||||
|
if (font == null) {
|
||||||
|
font = loadFallbackPdfFont(document);
|
||||||
|
fontMap.put(buildFontKey(-1, FALLBACK_FONT_ID), font);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (font == null) {
|
||||||
|
element.setFontId(FALLBACK_FONT_ID);
|
||||||
|
font = fontMap.get(buildFontKey(-1, FALLBACK_FONT_ID));
|
||||||
|
if (font == null) {
|
||||||
|
font = loadFallbackPdfFont(document);
|
||||||
|
fontMap.put(buildFontKey(-1, FALLBACK_FONT_ID), font);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
applyTextState(contentStream, element);
|
if (!textOpen) {
|
||||||
contentStream.setFont(font, fontScale);
|
contentStream.beginText();
|
||||||
applyRenderingMode(contentStream, element.getRenderingMode());
|
textOpen = true;
|
||||||
applyTextMatrix(contentStream, element);
|
}
|
||||||
contentStream.showText(text);
|
|
||||||
|
applyTextState(contentStream, element);
|
||||||
|
contentStream.setFont(font, fontScale);
|
||||||
|
applyRenderingMode(contentStream, element.getRenderingMode());
|
||||||
|
applyTextMatrix(contentStream, element);
|
||||||
|
String sanitized = sanitizeForFont(font, text);
|
||||||
|
if (!sanitized.isEmpty()) {
|
||||||
|
contentStream.showText(sanitized);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case IMAGE -> {
|
||||||
|
if (textOpen) {
|
||||||
|
contentStream.endText();
|
||||||
|
textOpen = false;
|
||||||
|
}
|
||||||
|
PdfJsonImageElement element = drawable.imageElement();
|
||||||
|
if (element == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
drawImageElement(contentStream, document, element, imageCache);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (textOpen) {
|
if (textOpen) {
|
||||||
contentStream.endText();
|
contentStream.endText();
|
||||||
@ -972,11 +1039,47 @@ public class PdfJsonConversionService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void encodeWithTest(PDFont font, String text) throws IOException {
|
private String sanitizeForFont(PDFont font, String text) {
|
||||||
if (text == null || text.isEmpty()) {
|
if (text == null || text.isEmpty()) {
|
||||||
return;
|
return "";
|
||||||
}
|
}
|
||||||
font.encode(text);
|
StringBuilder builder = new StringBuilder(text.length());
|
||||||
|
text.codePoints()
|
||||||
|
.forEach(
|
||||||
|
codePoint -> {
|
||||||
|
String candidate = new String(Character.toChars(codePoint));
|
||||||
|
try {
|
||||||
|
font.encode(candidate);
|
||||||
|
builder.append(candidate);
|
||||||
|
return;
|
||||||
|
} catch (IOException | IllegalArgumentException ex) {
|
||||||
|
String mapped = mapUnsupportedGlyph(codePoint);
|
||||||
|
if (mapped != null) {
|
||||||
|
try {
|
||||||
|
font.encode(mapped);
|
||||||
|
builder.append(mapped);
|
||||||
|
return;
|
||||||
|
} catch (IOException | IllegalArgumentException ignore) {
|
||||||
|
// fall through to generic replacement
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.debug(
|
||||||
|
"Replacing unsupported glyph {} ({}) with '?' for font {}",
|
||||||
|
candidate,
|
||||||
|
String.format("U+%04X", codePoint),
|
||||||
|
font.getName());
|
||||||
|
builder.append('?');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return builder.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String mapUnsupportedGlyph(int codePoint) {
|
||||||
|
return switch (codePoint) {
|
||||||
|
case 0x276E -> "<";
|
||||||
|
case 0x276F -> ">";
|
||||||
|
default -> null;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
private void applyTextState(PDPageContentStream contentStream, PdfJsonTextElement element)
|
private void applyTextState(PDPageContentStream contentStream, PdfJsonTextElement element)
|
||||||
@ -1198,7 +1301,7 @@ public class PdfJsonConversionService {
|
|||||||
byte[] encoded = font.encode(replacement);
|
byte[] encoded = font.encode(replacement);
|
||||||
cosString.setValue(encoded);
|
cosString.setValue(encoded);
|
||||||
return true;
|
return true;
|
||||||
} catch (IOException | IllegalArgumentException ex) {
|
} catch (IOException | IllegalArgumentException | UnsupportedOperationException ex) {
|
||||||
log.debug("Failed to encode replacement text: {}", ex.getMessage());
|
log.debug("Failed to encode replacement text: {}", ex.getMessage());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -1222,7 +1325,9 @@ public class PdfJsonConversionService {
|
|||||||
try {
|
try {
|
||||||
byte[] encoded = font.encode(replacement);
|
byte[] encoded = font.encode(replacement);
|
||||||
array.set(i, new COSString(encoded));
|
array.set(i, new COSString(encoded));
|
||||||
} catch (IOException | IllegalArgumentException ex) {
|
} catch (IOException
|
||||||
|
| IllegalArgumentException
|
||||||
|
| UnsupportedOperationException ex) {
|
||||||
log.debug("Failed to encode replacement text in TJ array: {}", ex.getMessage());
|
log.debug("Failed to encode replacement text in TJ array: {}", ex.getMessage());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -1542,6 +1647,377 @@ public class PdfJsonConversionService {
|
|||||||
return calendar;
|
return calendar;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private class ImageCollectingEngine extends PDFGraphicsStreamEngine {
|
||||||
|
|
||||||
|
private final int pageNumber;
|
||||||
|
private final Map<Integer, List<PdfJsonImageElement>> imagesByPage;
|
||||||
|
|
||||||
|
private COSName currentXObjectName;
|
||||||
|
private int imageCounter = 0;
|
||||||
|
|
||||||
|
protected ImageCollectingEngine(
|
||||||
|
PDPage page, int pageNumber, Map<Integer, List<PdfJsonImageElement>> imagesByPage)
|
||||||
|
throws IOException {
|
||||||
|
super(page);
|
||||||
|
this.pageNumber = pageNumber;
|
||||||
|
this.imagesByPage = imagesByPage;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void processPage(PDPage page) throws IOException {
|
||||||
|
super.processPage(page);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void drawImage(PDImage pdImage) throws IOException {
|
||||||
|
EncodedImage encoded = encodeImage(pdImage);
|
||||||
|
if (encoded == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Matrix ctm = getGraphicsState().getCurrentTransformationMatrix();
|
||||||
|
Bounds bounds = computeBounds(ctm);
|
||||||
|
List<Float> matrixValues = toMatrixValues(ctm);
|
||||||
|
|
||||||
|
PdfJsonImageElement element =
|
||||||
|
PdfJsonImageElement.builder()
|
||||||
|
.id(UUID.randomUUID().toString())
|
||||||
|
.objectName(
|
||||||
|
currentXObjectName != null
|
||||||
|
? currentXObjectName.getName()
|
||||||
|
: null)
|
||||||
|
.inlineImage(!(pdImage instanceof PDImageXObject))
|
||||||
|
.nativeWidth(pdImage.getWidth())
|
||||||
|
.nativeHeight(pdImage.getHeight())
|
||||||
|
.x(bounds.left)
|
||||||
|
.y(bounds.bottom)
|
||||||
|
.width(bounds.width())
|
||||||
|
.height(bounds.height())
|
||||||
|
.left(bounds.left)
|
||||||
|
.right(bounds.right)
|
||||||
|
.top(bounds.top)
|
||||||
|
.bottom(bounds.bottom)
|
||||||
|
.transform(matrixValues)
|
||||||
|
.zOrder(-1_000_000 + imageCounter)
|
||||||
|
.imageData(encoded.base64())
|
||||||
|
.imageFormat(encoded.format())
|
||||||
|
.build();
|
||||||
|
imageCounter++;
|
||||||
|
imagesByPage.computeIfAbsent(pageNumber, key -> new ArrayList<>()).add(element);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3)
|
||||||
|
throws IOException {
|
||||||
|
// Not needed for image extraction
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void clip(int windingRule) throws IOException {
|
||||||
|
// Not needed for image extraction
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void moveTo(float x, float y) throws IOException {
|
||||||
|
// Not needed for image extraction
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void lineTo(float x, float y) throws IOException {
|
||||||
|
// Not needed for image extraction
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3)
|
||||||
|
throws IOException {
|
||||||
|
// Not needed for image extraction
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Point2D getCurrentPoint() throws IOException {
|
||||||
|
return new Point2D.Float();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void closePath() throws IOException {
|
||||||
|
// Not needed for image extraction
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void endPath() throws IOException {
|
||||||
|
// Not needed for image extraction
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void shadingFill(COSName shadingName) throws IOException {
|
||||||
|
// Not needed for image extraction
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void fillAndStrokePath(int windingRule) throws IOException {
|
||||||
|
// Not needed for image extraction
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void fillPath(int windingRule) throws IOException {
|
||||||
|
// Not needed for image extraction
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void strokePath() throws IOException {
|
||||||
|
// Not needed for image extraction
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void processOperator(Operator operator, List<COSBase> operands)
|
||||||
|
throws IOException {
|
||||||
|
if (OperatorName.DRAW_OBJECT.equals(operator.getName())
|
||||||
|
&& !operands.isEmpty()
|
||||||
|
&& operands.get(0) instanceof COSName name) {
|
||||||
|
currentXObjectName = name;
|
||||||
|
}
|
||||||
|
super.processOperator(operator, operands);
|
||||||
|
currentXObjectName = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Bounds computeBounds(Matrix ctm) {
|
||||||
|
AffineTransform transform = ctm.createAffineTransform();
|
||||||
|
Point2D.Float p0 = new Point2D.Float(0, 0);
|
||||||
|
Point2D.Float p1 = new Point2D.Float(1, 0);
|
||||||
|
Point2D.Float p2 = new Point2D.Float(0, 1);
|
||||||
|
Point2D.Float p3 = new Point2D.Float(1, 1);
|
||||||
|
transform.transform(p0, p0);
|
||||||
|
transform.transform(p1, p1);
|
||||||
|
transform.transform(p2, p2);
|
||||||
|
transform.transform(p3, p3);
|
||||||
|
|
||||||
|
float minX = Math.min(Math.min(p0.x, p1.x), Math.min(p2.x, p3.x));
|
||||||
|
float maxX = Math.max(Math.max(p0.x, p1.x), Math.max(p2.x, p3.x));
|
||||||
|
float minY = Math.min(Math.min(p0.y, p1.y), Math.min(p2.y, p3.y));
|
||||||
|
float maxY = Math.max(Math.max(p0.y, p1.y), Math.max(p2.y, p3.y));
|
||||||
|
|
||||||
|
if (!Float.isFinite(minX) || !Float.isFinite(minY)) {
|
||||||
|
return new Bounds(0f, 0f, 0f, 0f);
|
||||||
|
}
|
||||||
|
return new Bounds(minX, maxX, minY, maxY);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private record Bounds(float left, float right, float bottom, float top) {
|
||||||
|
float width() {
|
||||||
|
return Math.max(0f, right - left);
|
||||||
|
}
|
||||||
|
|
||||||
|
float height() {
|
||||||
|
return Math.max(0f, top - bottom);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private enum DrawableType {
|
||||||
|
TEXT,
|
||||||
|
IMAGE
|
||||||
|
}
|
||||||
|
|
||||||
|
private record DrawableElement(
|
||||||
|
DrawableType type,
|
||||||
|
PdfJsonTextElement textElement,
|
||||||
|
PdfJsonImageElement imageElement,
|
||||||
|
int zOrder,
|
||||||
|
int sequence) {}
|
||||||
|
|
||||||
|
private record EncodedImage(String base64, String format) {}
|
||||||
|
|
||||||
|
private List<Float> toMatrixValues(Matrix matrix) {
|
||||||
|
List<Float> values = new ArrayList<>(6);
|
||||||
|
values.add(matrix.getValue(0, 0));
|
||||||
|
values.add(matrix.getValue(0, 1));
|
||||||
|
values.add(matrix.getValue(1, 0));
|
||||||
|
values.add(matrix.getValue(1, 1));
|
||||||
|
values.add(matrix.getValue(2, 0));
|
||||||
|
values.add(matrix.getValue(2, 1));
|
||||||
|
return values;
|
||||||
|
}
|
||||||
|
|
||||||
|
private EncodedImage encodeImage(PDImage image) {
|
||||||
|
try {
|
||||||
|
BufferedImage bufferedImage = image.getImage();
|
||||||
|
if (bufferedImage == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
String format = resolveImageFormat(image);
|
||||||
|
if (format == null || format.isBlank()) {
|
||||||
|
format = "png";
|
||||||
|
}
|
||||||
|
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||||
|
boolean written = ImageIO.write(bufferedImage, format, baos);
|
||||||
|
if (!written) {
|
||||||
|
if (!"png".equalsIgnoreCase(format)) {
|
||||||
|
baos.reset();
|
||||||
|
if (!ImageIO.write(bufferedImage, "png", baos)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
format = "png";
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new EncodedImage(Base64.getEncoder().encodeToString(baos.toByteArray()), format);
|
||||||
|
} catch (IOException ex) {
|
||||||
|
log.debug("Failed to encode image: {}", ex.getMessage());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String resolveImageFormat(PDImage image) {
|
||||||
|
if (image instanceof PDImageXObject xObject) {
|
||||||
|
String suffix = xObject.getSuffix();
|
||||||
|
if (suffix != null && !suffix.isBlank()) {
|
||||||
|
return suffix.toLowerCase(Locale.ROOT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "png";
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<DrawableElement> mergeDrawables(
|
||||||
|
List<PdfJsonTextElement> textElements, List<PdfJsonImageElement> imageElements) {
|
||||||
|
List<DrawableElement> drawables = new ArrayList<>();
|
||||||
|
int sequence = 0;
|
||||||
|
|
||||||
|
if (imageElements != null) {
|
||||||
|
int imageIndex = 0;
|
||||||
|
for (PdfJsonImageElement imageElement : imageElements) {
|
||||||
|
if (imageElement == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
int order =
|
||||||
|
imageElement.getZOrder() != null
|
||||||
|
? imageElement.getZOrder()
|
||||||
|
: Integer.MIN_VALUE / 2 + imageIndex;
|
||||||
|
drawables.add(
|
||||||
|
new DrawableElement(
|
||||||
|
DrawableType.IMAGE, null, imageElement, order, sequence++));
|
||||||
|
imageIndex++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (textElements != null) {
|
||||||
|
int textIndex = 0;
|
||||||
|
for (PdfJsonTextElement textElement : textElements) {
|
||||||
|
if (textElement == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
int order =
|
||||||
|
textElement.getZOrder() != null
|
||||||
|
? textElement.getZOrder()
|
||||||
|
: 1_000_000 + textIndex;
|
||||||
|
drawables.add(
|
||||||
|
new DrawableElement(
|
||||||
|
DrawableType.TEXT, textElement, null, order, sequence++));
|
||||||
|
textIndex++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
drawables.sort(
|
||||||
|
Comparator.comparingInt(DrawableElement::zOrder)
|
||||||
|
.thenComparingInt(DrawableElement::sequence));
|
||||||
|
return drawables;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void drawImageElement(
|
||||||
|
PDPageContentStream contentStream,
|
||||||
|
PDDocument document,
|
||||||
|
PdfJsonImageElement element,
|
||||||
|
Map<String, PDImageXObject> cache)
|
||||||
|
throws IOException {
|
||||||
|
if (element == null || element.getImageData() == null || element.getImageData().isBlank()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
String cacheKey =
|
||||||
|
element.getId() != null && !element.getId().isBlank()
|
||||||
|
? element.getId()
|
||||||
|
: Integer.toHexString(System.identityHashCode(element));
|
||||||
|
PDImageXObject image = cache.get(cacheKey);
|
||||||
|
if (image == null) {
|
||||||
|
image = createImageXObject(document, element);
|
||||||
|
if (image == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
cache.put(cacheKey, image);
|
||||||
|
}
|
||||||
|
|
||||||
|
float width = safeFloat(element.getWidth(), fallbackWidth(element));
|
||||||
|
float height = safeFloat(element.getHeight(), fallbackHeight(element));
|
||||||
|
if (width <= 0f) {
|
||||||
|
width = Math.max(1f, fallbackWidth(element));
|
||||||
|
}
|
||||||
|
if (height <= 0f) {
|
||||||
|
height = Math.max(1f, fallbackHeight(element));
|
||||||
|
}
|
||||||
|
float left = resolveLeft(element, width);
|
||||||
|
float bottom = resolveBottom(element, height);
|
||||||
|
|
||||||
|
contentStream.drawImage(image, left, bottom, width, height);
|
||||||
|
}
|
||||||
|
|
||||||
|
private PDImageXObject createImageXObject(PDDocument document, PdfJsonImageElement element)
|
||||||
|
throws IOException {
|
||||||
|
byte[] data;
|
||||||
|
try {
|
||||||
|
data = Base64.getDecoder().decode(element.getImageData());
|
||||||
|
} catch (IllegalArgumentException ex) {
|
||||||
|
log.debug("Failed to decode image element: {}", ex.getMessage());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
String name = element.getId() != null ? element.getId() : UUID.randomUUID().toString();
|
||||||
|
return PDImageXObject.createFromByteArray(document, data, name);
|
||||||
|
}
|
||||||
|
|
||||||
|
private float fallbackWidth(PdfJsonImageElement element) {
|
||||||
|
if (element.getRight() != null && element.getLeft() != null) {
|
||||||
|
return Math.max(0f, element.getRight() - element.getLeft());
|
||||||
|
}
|
||||||
|
if (element.getNativeWidth() != null) {
|
||||||
|
return element.getNativeWidth();
|
||||||
|
}
|
||||||
|
return 1f;
|
||||||
|
}
|
||||||
|
|
||||||
|
private float resolveLeft(PdfJsonImageElement element, float width) {
|
||||||
|
if (element.getLeft() != null) {
|
||||||
|
return element.getLeft();
|
||||||
|
}
|
||||||
|
if (element.getX() != null) {
|
||||||
|
return element.getX();
|
||||||
|
}
|
||||||
|
if (element.getRight() != null) {
|
||||||
|
return element.getRight() - width;
|
||||||
|
}
|
||||||
|
return 0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
private float resolveBottom(PdfJsonImageElement element, float height) {
|
||||||
|
if (element.getBottom() != null) {
|
||||||
|
return element.getBottom();
|
||||||
|
}
|
||||||
|
if (element.getY() != null) {
|
||||||
|
return element.getY();
|
||||||
|
}
|
||||||
|
if (element.getTop() != null) {
|
||||||
|
return element.getTop() - height;
|
||||||
|
}
|
||||||
|
return 0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
private float fallbackHeight(PdfJsonImageElement element) {
|
||||||
|
if (element.getTop() != null && element.getBottom() != null) {
|
||||||
|
return Math.max(0f, element.getTop() - element.getBottom());
|
||||||
|
}
|
||||||
|
if (element.getNativeHeight() != null) {
|
||||||
|
return element.getNativeHeight();
|
||||||
|
}
|
||||||
|
return 1f;
|
||||||
|
}
|
||||||
|
|
||||||
private class TextCollectingStripper extends PDFTextStripper {
|
private class TextCollectingStripper extends PDFTextStripper {
|
||||||
|
|
||||||
private final PDDocument document;
|
private final PDDocument document;
|
||||||
@ -1595,6 +2071,7 @@ public class PdfJsonConversionService {
|
|||||||
element.setHeight(position.getHeightDir());
|
element.setHeight(position.getHeightDir());
|
||||||
element.setTextMatrix(extractMatrix(position));
|
element.setTextMatrix(extractMatrix(position));
|
||||||
element.setFontMatrixSize(computeFontMatrixSize(element.getTextMatrix()));
|
element.setFontMatrixSize(computeFontMatrixSize(element.getTextMatrix()));
|
||||||
|
element.setSpaceWidth(position.getWidthOfSpace());
|
||||||
PDGraphicsState graphicsState = getGraphicsState();
|
PDGraphicsState graphicsState = getGraphicsState();
|
||||||
if (graphicsState != null) {
|
if (graphicsState != null) {
|
||||||
PDTextState textState = graphicsState.getTextState();
|
PDTextState textState = graphicsState.getTextState();
|
||||||
@ -1611,6 +2088,7 @@ public class PdfJsonConversionService {
|
|||||||
element.setFillColor(toTextColor(graphicsState.getNonStrokingColor()));
|
element.setFillColor(toTextColor(graphicsState.getNonStrokingColor()));
|
||||||
element.setStrokeColor(toTextColor(graphicsState.getStrokingColor()));
|
element.setStrokeColor(toTextColor(graphicsState.getStrokingColor()));
|
||||||
}
|
}
|
||||||
|
element.setZOrder(1_000_000 + pageElements.size());
|
||||||
pageElements.add(element);
|
pageElements.add(element);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
55
frontend/package-lock.json
generated
55
frontend/package-lock.json
generated
@ -54,6 +54,7 @@
|
|||||||
"react": "^19.1.1",
|
"react": "^19.1.1",
|
||||||
"react-dom": "^19.1.1",
|
"react-dom": "^19.1.1",
|
||||||
"react-i18next": "^15.7.3",
|
"react-i18next": "^15.7.3",
|
||||||
|
"react-rnd": "^10.5.2",
|
||||||
"react-router-dom": "^7.9.1",
|
"react-router-dom": "^7.9.1",
|
||||||
"signature_pad": "^5.0.4",
|
"signature_pad": "^5.0.4",
|
||||||
"tailwindcss": "^4.1.13",
|
"tailwindcss": "^4.1.13",
|
||||||
@ -11036,6 +11037,16 @@
|
|||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/re-resizable": {
|
||||||
|
"version": "6.11.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/re-resizable/-/re-resizable-6.11.2.tgz",
|
||||||
|
"integrity": "sha512-2xI2P3OHs5qw7K0Ud1aLILK6MQxW50TcO+DetD9eIV58j84TqYeHoZcL9H4GXFXXIh7afhH8mv5iUCXII7OW7A==",
|
||||||
|
"license": "MIT",
|
||||||
|
"peerDependencies": {
|
||||||
|
"react": "^16.13.1 || ^17.0.0 || ^18.0.0 || ^19.0.0",
|
||||||
|
"react-dom": "^16.13.1 || ^17.0.0 || ^18.0.0 || ^19.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/react": {
|
"node_modules/react": {
|
||||||
"version": "19.1.1",
|
"version": "19.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/react/-/react-19.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/react/-/react-19.1.1.tgz",
|
||||||
@ -11057,6 +11068,29 @@
|
|||||||
"react": "^19.1.1"
|
"react": "^19.1.1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/react-draggable": {
|
||||||
|
"version": "4.4.6",
|
||||||
|
"resolved": "https://registry.npmjs.org/react-draggable/-/react-draggable-4.4.6.tgz",
|
||||||
|
"integrity": "sha512-LtY5Xw1zTPqHkVmtM3X8MUOxNDOUhv/khTgBgrUvwaS064bwVvxT+q5El0uUFNx5IEPKXuRejr7UqLwBIg5pdw==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"clsx": "^1.1.1",
|
||||||
|
"prop-types": "^15.8.1"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"react": ">= 16.3.0",
|
||||||
|
"react-dom": ">= 16.3.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/react-draggable/node_modules/clsx": {
|
||||||
|
"version": "1.2.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/clsx/-/clsx-1.2.1.tgz",
|
||||||
|
"integrity": "sha512-EcR6r5a8bj6pu3ycsa/E/cKVGuTgZJZdsyUYHOksG/UHIiKfjxzRxYJpyVBwYaQeOvghal9fcc4PidlgzugAQg==",
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=6"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/react-dropzone": {
|
"node_modules/react-dropzone": {
|
||||||
"version": "14.3.8",
|
"version": "14.3.8",
|
||||||
"resolved": "https://registry.npmjs.org/react-dropzone/-/react-dropzone-14.3.8.tgz",
|
"resolved": "https://registry.npmjs.org/react-dropzone/-/react-dropzone-14.3.8.tgz",
|
||||||
@ -11175,6 +11209,27 @@
|
|||||||
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
|
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
|
||||||
"license": "0BSD"
|
"license": "0BSD"
|
||||||
},
|
},
|
||||||
|
"node_modules/react-rnd": {
|
||||||
|
"version": "10.5.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/react-rnd/-/react-rnd-10.5.2.tgz",
|
||||||
|
"integrity": "sha512-0Tm4x7k7pfHf2snewJA8x7Nwgt3LV+58MVEWOVsFjk51eYruFEa6Wy7BNdxt4/lH0wIRsu7Gm3KjSXY2w7YaNw==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"re-resizable": "6.11.2",
|
||||||
|
"react-draggable": "4.4.6",
|
||||||
|
"tslib": "2.6.2"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"react": ">=16.3.0",
|
||||||
|
"react-dom": ">=16.3.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/react-rnd/node_modules/tslib": {
|
||||||
|
"version": "2.6.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz",
|
||||||
|
"integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==",
|
||||||
|
"license": "0BSD"
|
||||||
|
},
|
||||||
"node_modules/react-router": {
|
"node_modules/react-router": {
|
||||||
"version": "7.9.1",
|
"version": "7.9.1",
|
||||||
"resolved": "https://registry.npmjs.org/react-router/-/react-router-7.9.1.tgz",
|
"resolved": "https://registry.npmjs.org/react-router/-/react-router-7.9.1.tgz",
|
||||||
|
|||||||
@ -49,6 +49,7 @@
|
|||||||
"posthog-js": "^1.268.0",
|
"posthog-js": "^1.268.0",
|
||||||
"react": "^19.1.1",
|
"react": "^19.1.1",
|
||||||
"react-dom": "^19.1.1",
|
"react-dom": "^19.1.1",
|
||||||
|
"react-rnd": "^10.5.2",
|
||||||
"react-i18next": "^15.7.3",
|
"react-i18next": "^15.7.3",
|
||||||
"react-router-dom": "^7.9.1",
|
"react-router-dom": "^7.9.1",
|
||||||
"signature_pad": "^5.0.4",
|
"signature_pad": "^5.0.4",
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
import React, { useEffect, useMemo, useState } from 'react';
|
import React, { useCallback, useEffect, useLayoutEffect, useMemo, useRef, useState } from 'react';
|
||||||
import {
|
import {
|
||||||
Alert,
|
Alert,
|
||||||
Badge,
|
Badge,
|
||||||
@ -21,16 +21,59 @@ import PictureAsPdfIcon from '@mui/icons-material/PictureAsPdfOutlined';
|
|||||||
import AutorenewIcon from '@mui/icons-material/Autorenew';
|
import AutorenewIcon from '@mui/icons-material/Autorenew';
|
||||||
import WarningAmberIcon from '@mui/icons-material/WarningAmber';
|
import WarningAmberIcon from '@mui/icons-material/WarningAmber';
|
||||||
import UploadIcon from '@mui/icons-material/Upload';
|
import UploadIcon from '@mui/icons-material/Upload';
|
||||||
|
import { Rnd } from 'react-rnd';
|
||||||
|
|
||||||
import {
|
import {
|
||||||
PdfJsonEditorViewData,
|
PdfJsonEditorViewData,
|
||||||
PdfJsonPage,
|
PdfJsonPage,
|
||||||
} from '../../../tools/pdfJsonEditorTypes';
|
} from '../../../tools/pdfJsonEditorTypes';
|
||||||
import { pageDimensions } from '../../../tools/pdfJsonEditorUtils';
|
import { getImageBounds, pageDimensions } from '../../../tools/pdfJsonEditorUtils';
|
||||||
|
|
||||||
const MAX_RENDER_WIDTH = 820;
|
const MAX_RENDER_WIDTH = 820;
|
||||||
const MIN_BOX_SIZE = 18;
|
const MIN_BOX_SIZE = 18;
|
||||||
|
|
||||||
|
const getCaretOffset = (element: HTMLElement): number => {
|
||||||
|
const selection = window.getSelection();
|
||||||
|
if (!selection || selection.rangeCount === 0 || !element.contains(selection.focusNode)) {
|
||||||
|
return element.innerText.length;
|
||||||
|
}
|
||||||
|
const range = selection.getRangeAt(0).cloneRange();
|
||||||
|
range.selectNodeContents(element);
|
||||||
|
range.setEnd(selection.focusNode as Node, selection.focusOffset);
|
||||||
|
return range.toString().length;
|
||||||
|
};
|
||||||
|
|
||||||
|
const setCaretOffset = (element: HTMLElement, offset: number): void => {
|
||||||
|
const selection = window.getSelection();
|
||||||
|
if (!selection) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const targetOffset = Math.max(0, Math.min(offset, element.innerText.length));
|
||||||
|
const range = document.createRange();
|
||||||
|
let remaining = targetOffset;
|
||||||
|
const walker = document.createTreeWalker(element, NodeFilter.SHOW_TEXT);
|
||||||
|
|
||||||
|
let node = walker.nextNode();
|
||||||
|
while (node) {
|
||||||
|
const textNode = node as Text;
|
||||||
|
const length = textNode.length;
|
||||||
|
if (remaining <= length) {
|
||||||
|
range.setStart(textNode, remaining);
|
||||||
|
range.collapse(true);
|
||||||
|
selection.removeAllRanges();
|
||||||
|
selection.addRange(range);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
remaining -= length;
|
||||||
|
node = walker.nextNode();
|
||||||
|
}
|
||||||
|
|
||||||
|
range.selectNodeContents(element);
|
||||||
|
range.collapse(false);
|
||||||
|
selection.removeAllRanges();
|
||||||
|
selection.addRange(range);
|
||||||
|
};
|
||||||
|
|
||||||
interface PdfJsonEditorViewProps {
|
interface PdfJsonEditorViewProps {
|
||||||
data: PdfJsonEditorViewData;
|
data: PdfJsonEditorViewData;
|
||||||
}
|
}
|
||||||
@ -61,10 +104,15 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
const { t } = useTranslation();
|
const { t } = useTranslation();
|
||||||
const [activeGroupId, setActiveGroupId] = useState<string | null>(null);
|
const [activeGroupId, setActiveGroupId] = useState<string | null>(null);
|
||||||
const [editingGroupId, setEditingGroupId] = useState<string | null>(null);
|
const [editingGroupId, setEditingGroupId] = useState<string | null>(null);
|
||||||
|
const [activeImageId, setActiveImageId] = useState<string | null>(null);
|
||||||
|
const containerRef = useRef<HTMLDivElement | null>(null);
|
||||||
|
const editorRefs = useRef<Map<string, HTMLDivElement>>(new Map());
|
||||||
|
const caretOffsetsRef = useRef<Map<string, number>>(new Map());
|
||||||
|
|
||||||
const {
|
const {
|
||||||
document: pdfDocument,
|
document: pdfDocument,
|
||||||
groupsByPage,
|
groupsByPage,
|
||||||
|
imagesByPage,
|
||||||
selectedPage,
|
selectedPage,
|
||||||
dirtyPages,
|
dirtyPages,
|
||||||
hasDocument,
|
hasDocument,
|
||||||
@ -76,6 +124,8 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
onLoadJson,
|
onLoadJson,
|
||||||
onSelectPage,
|
onSelectPage,
|
||||||
onGroupEdit,
|
onGroupEdit,
|
||||||
|
onImageTransform,
|
||||||
|
onImageReset,
|
||||||
onReset,
|
onReset,
|
||||||
onDownloadJson,
|
onDownloadJson,
|
||||||
onGeneratePdf,
|
onGeneratePdf,
|
||||||
@ -114,6 +164,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
const pages = pdfDocument?.pages ?? [];
|
const pages = pdfDocument?.pages ?? [];
|
||||||
const currentPage = pages[selectedPage] ?? null;
|
const currentPage = pages[selectedPage] ?? null;
|
||||||
const pageGroups = groupsByPage[selectedPage] ?? [];
|
const pageGroups = groupsByPage[selectedPage] ?? [];
|
||||||
|
const pageImages = imagesByPage[selectedPage] ?? [];
|
||||||
const visibleGroups = useMemo(
|
const visibleGroups = useMemo(
|
||||||
() =>
|
() =>
|
||||||
pageGroups.filter((group) => {
|
pageGroups.filter((group) => {
|
||||||
@ -123,6 +174,14 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
[editingGroupId, pageGroups]
|
[editingGroupId, pageGroups]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
const orderedImages = useMemo(
|
||||||
|
() =>
|
||||||
|
[...pageImages].sort(
|
||||||
|
(first, second) => (first?.zOrder ?? -1_000_000) - (second?.zOrder ?? -1_000_000),
|
||||||
|
),
|
||||||
|
[pageImages],
|
||||||
|
);
|
||||||
|
|
||||||
const { width: pageWidth, height: pageHeight } = pageDimensions(currentPage);
|
const { width: pageWidth, height: pageHeight } = pageDimensions(currentPage);
|
||||||
const scale = useMemo(() => Math.min(MAX_RENDER_WIDTH / pageWidth, 1.5), [pageWidth]);
|
const scale = useMemo(() => Math.min(MAX_RENDER_WIDTH / pageWidth, 1.5), [pageWidth]);
|
||||||
const scaledWidth = pageWidth * scale;
|
const scaledWidth = pageWidth * scale;
|
||||||
@ -131,8 +190,21 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
setActiveGroupId(null);
|
setActiveGroupId(null);
|
||||||
setEditingGroupId(null);
|
setEditingGroupId(null);
|
||||||
|
setActiveImageId(null);
|
||||||
}, [selectedPage]);
|
}, [selectedPage]);
|
||||||
|
|
||||||
|
useLayoutEffect(() => {
|
||||||
|
if (!editingGroupId) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const editor = editorRefs.current.get(editingGroupId);
|
||||||
|
if (!editor) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const offset = caretOffsetsRef.current.get(editingGroupId) ?? editor.innerText.length;
|
||||||
|
setCaretOffset(editor, offset);
|
||||||
|
}, [editingGroupId, groupsByPage, imagesByPage]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!editingGroupId) {
|
if (!editingGroupId) {
|
||||||
return;
|
return;
|
||||||
@ -160,6 +232,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
const handleBackgroundClick = () => {
|
const handleBackgroundClick = () => {
|
||||||
setEditingGroupId(null);
|
setEditingGroupId(null);
|
||||||
setActiveGroupId(null);
|
setActiveGroupId(null);
|
||||||
|
setActiveImageId(null);
|
||||||
};
|
};
|
||||||
|
|
||||||
const renderGroupContainer = (
|
const renderGroupContainer = (
|
||||||
@ -205,6 +278,28 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
</Box>
|
</Box>
|
||||||
);
|
);
|
||||||
|
|
||||||
|
const emitImageTransform = useCallback(
|
||||||
|
(
|
||||||
|
imageId: string,
|
||||||
|
leftPx: number,
|
||||||
|
topPx: number,
|
||||||
|
widthPx: number,
|
||||||
|
heightPx: number,
|
||||||
|
) => {
|
||||||
|
const rawLeft = leftPx / scale;
|
||||||
|
const rawTop = pageHeight - topPx / scale;
|
||||||
|
const width = Math.max(widthPx / scale, 0.01);
|
||||||
|
const height = Math.max(heightPx / scale, 0.01);
|
||||||
|
const maxLeft = Math.max(pageWidth - width, 0);
|
||||||
|
const left = Math.min(Math.max(rawLeft, 0), maxLeft);
|
||||||
|
const minTop = Math.min(height, pageHeight);
|
||||||
|
const top = Math.min(Math.max(rawTop, minTop), pageHeight);
|
||||||
|
const bottom = Math.max(top - height, 0);
|
||||||
|
onImageTransform(selectedPage, imageId, { left, bottom, width, height, transform: [] });
|
||||||
|
},
|
||||||
|
[onImageTransform, pageHeight, pageWidth, scale, selectedPage],
|
||||||
|
);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Stack gap="xl" className="h-full" style={{ padding: '1.5rem', overflow: 'auto' }}>
|
<Stack gap="xl" className="h-full" style={{ padding: '1.5rem', overflow: 'auto' }}>
|
||||||
<Card withBorder radius="md" shadow="xs" padding="lg">
|
<Card withBorder radius="md" shadow="xs" padding="lg">
|
||||||
@ -341,8 +436,121 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
borderRadius: '0.5rem',
|
borderRadius: '0.5rem',
|
||||||
overflow: 'hidden',
|
overflow: 'hidden',
|
||||||
}}
|
}}
|
||||||
|
ref={containerRef}
|
||||||
>
|
>
|
||||||
{visibleGroups.length === 0 ? (
|
{orderedImages.map((image, imageIndex) => {
|
||||||
|
if (!image?.imageData) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const bounds = getImageBounds(image);
|
||||||
|
const width = Math.max(bounds.right - bounds.left, 1);
|
||||||
|
const height = Math.max(bounds.top - bounds.bottom, 1);
|
||||||
|
const cssWidth = Math.max(width * scale, 2);
|
||||||
|
const cssHeight = Math.max(height * scale, 2);
|
||||||
|
const cssLeft = bounds.left * scale;
|
||||||
|
const cssTop = (pageHeight - bounds.top) * scale;
|
||||||
|
const imageId = image.id ?? `page-${selectedPage}-image-${imageIndex}`;
|
||||||
|
const isActive = activeImageId === imageId;
|
||||||
|
const src = `data:image/${image.imageFormat ?? 'png'};base64,${image.imageData}`;
|
||||||
|
const baseZIndex = (image.zOrder ?? -1_000_000) + 1_050_000;
|
||||||
|
const zIndex = isActive ? baseZIndex + 1_000_000 : baseZIndex;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Rnd
|
||||||
|
key={`image-${imageId}`}
|
||||||
|
bounds="parent"
|
||||||
|
size={{ width: cssWidth, height: cssHeight }}
|
||||||
|
position={{ x: cssLeft, y: cssTop }}
|
||||||
|
onDragStart={() => {
|
||||||
|
setActiveGroupId(null);
|
||||||
|
setEditingGroupId(null);
|
||||||
|
setActiveImageId(imageId);
|
||||||
|
}}
|
||||||
|
onDrag={(event, data) => {
|
||||||
|
emitImageTransform(
|
||||||
|
imageId,
|
||||||
|
data.x,
|
||||||
|
data.y,
|
||||||
|
cssWidth,
|
||||||
|
cssHeight,
|
||||||
|
);
|
||||||
|
}}
|
||||||
|
onDragStop={(event, data) => {
|
||||||
|
emitImageTransform(
|
||||||
|
imageId,
|
||||||
|
data.x,
|
||||||
|
data.y,
|
||||||
|
cssWidth,
|
||||||
|
cssHeight,
|
||||||
|
);
|
||||||
|
}}
|
||||||
|
onResizeStart={() => {
|
||||||
|
setActiveImageId(imageId);
|
||||||
|
setActiveGroupId(null);
|
||||||
|
setEditingGroupId(null);
|
||||||
|
}}
|
||||||
|
onResize={(event, _direction, ref, _delta, position) => {
|
||||||
|
const nextWidth = parseFloat(ref.style.width);
|
||||||
|
const nextHeight = parseFloat(ref.style.height);
|
||||||
|
emitImageTransform(
|
||||||
|
imageId,
|
||||||
|
position.x,
|
||||||
|
position.y,
|
||||||
|
nextWidth,
|
||||||
|
nextHeight,
|
||||||
|
);
|
||||||
|
}}
|
||||||
|
onResizeStop={(event, _direction, ref, _delta, position) => {
|
||||||
|
const nextWidth = parseFloat(ref.style.width);
|
||||||
|
const nextHeight = parseFloat(ref.style.height);
|
||||||
|
emitImageTransform(
|
||||||
|
imageId,
|
||||||
|
position.x,
|
||||||
|
position.y,
|
||||||
|
nextWidth,
|
||||||
|
nextHeight,
|
||||||
|
);
|
||||||
|
}}
|
||||||
|
style={{ zIndex }}
|
||||||
|
>
|
||||||
|
<Box
|
||||||
|
onMouseEnter={() => setActiveImageId(imageId)}
|
||||||
|
onMouseLeave={() => {
|
||||||
|
setActiveImageId((current) => (current === imageId ? null : current));
|
||||||
|
}}
|
||||||
|
onDoubleClick={(event) => {
|
||||||
|
event.stopPropagation();
|
||||||
|
onImageReset(selectedPage, imageId);
|
||||||
|
}}
|
||||||
|
style={{
|
||||||
|
width: '100%',
|
||||||
|
height: '100%',
|
||||||
|
cursor: isActive ? 'grabbing' : 'grab',
|
||||||
|
outline: isActive
|
||||||
|
? '2px solid rgba(59, 130, 246, 0.9)'
|
||||||
|
: '1px solid rgba(148, 163, 184, 0.4)',
|
||||||
|
outlineOffset: '-1px',
|
||||||
|
borderRadius: 4,
|
||||||
|
backgroundColor: 'rgba(255,255,255,0.04)',
|
||||||
|
transition: 'outline 120ms ease',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<img
|
||||||
|
src={src}
|
||||||
|
alt={t('pdfJsonEditor.imageLabel', 'Placed image')}
|
||||||
|
style={{
|
||||||
|
width: '100%',
|
||||||
|
height: '100%',
|
||||||
|
objectFit: 'contain',
|
||||||
|
pointerEvents: 'none',
|
||||||
|
userSelect: 'none',
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</Box>
|
||||||
|
</Rnd>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
{visibleGroups.length === 0 && orderedImages.length === 0 ? (
|
||||||
<Group justify="center" align="center" style={{ height: '100%' }}>
|
<Group justify="center" align="center" style={{ height: '100%' }}>
|
||||||
<Stack gap={4} align="center">
|
<Stack gap={4} align="center">
|
||||||
<Text size="sm" c="dimmed">
|
<Text size="sm" c="dimmed">
|
||||||
@ -373,6 +581,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
justifyContent: 'flex-start',
|
justifyContent: 'flex-start',
|
||||||
pointerEvents: 'auto',
|
pointerEvents: 'auto',
|
||||||
cursor: 'text',
|
cursor: 'text',
|
||||||
|
zIndex: 2_000_000,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (isEditing) {
|
if (isEditing) {
|
||||||
@ -383,17 +592,38 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
|||||||
true,
|
true,
|
||||||
changed,
|
changed,
|
||||||
<div
|
<div
|
||||||
|
ref={(node) => {
|
||||||
|
if (node) {
|
||||||
|
editorRefs.current.set(group.id, node);
|
||||||
|
} else {
|
||||||
|
editorRefs.current.delete(group.id);
|
||||||
|
}
|
||||||
|
}}
|
||||||
contentEditable
|
contentEditable
|
||||||
suppressContentEditableWarning
|
suppressContentEditableWarning
|
||||||
data-editor-group={group.id}
|
data-editor-group={group.id}
|
||||||
onBlur={(event) => {
|
onBlur={(event) => {
|
||||||
const value = event.currentTarget.innerText.replace(/\u00A0/g, ' ');
|
const value = event.currentTarget.innerText.replace(/\u00A0/g, ' ');
|
||||||
|
caretOffsetsRef.current.delete(group.id);
|
||||||
|
editorRefs.current.delete(group.id);
|
||||||
|
setActiveGroupId(null);
|
||||||
onGroupEdit(group.pageIndex, group.id, value);
|
onGroupEdit(group.pageIndex, group.id, value);
|
||||||
setEditingGroupId(null);
|
setEditingGroupId(null);
|
||||||
}}
|
}}
|
||||||
onInput={(event) => {
|
onInput={(event) => {
|
||||||
const value = event.currentTarget.innerText.replace(/\u00A0/g, ' ');
|
const value = event.currentTarget.innerText.replace(/\u00A0/g, ' ');
|
||||||
|
const offset = getCaretOffset(event.currentTarget);
|
||||||
|
caretOffsetsRef.current.set(group.id, offset);
|
||||||
onGroupEdit(group.pageIndex, group.id, value);
|
onGroupEdit(group.pageIndex, group.id, value);
|
||||||
|
requestAnimationFrame(() => {
|
||||||
|
if (editingGroupId !== group.id) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const editor = editorRefs.current.get(group.id);
|
||||||
|
if (editor) {
|
||||||
|
setCaretOffset(editor, caretOffsetsRef.current.get(group.id) ?? editor.innerText.length);
|
||||||
|
}
|
||||||
|
});
|
||||||
}}
|
}}
|
||||||
style={{
|
style={{
|
||||||
width: '100%',
|
width: '100%',
|
||||||
|
|||||||
@ -11,6 +11,7 @@ import { downloadBlob, downloadTextAsFile } from '../utils/downloadUtils';
|
|||||||
import { getFilenameFromHeaders } from '../utils/fileResponseUtils';
|
import { getFilenameFromHeaders } from '../utils/fileResponseUtils';
|
||||||
import {
|
import {
|
||||||
PdfJsonDocument,
|
PdfJsonDocument,
|
||||||
|
PdfJsonImageElement,
|
||||||
TextGroup,
|
TextGroup,
|
||||||
PdfJsonEditorViewData,
|
PdfJsonEditorViewData,
|
||||||
} from './pdfJsonEditorTypes';
|
} from './pdfJsonEditorTypes';
|
||||||
@ -19,6 +20,9 @@ import {
|
|||||||
getDirtyPages,
|
getDirtyPages,
|
||||||
groupDocumentText,
|
groupDocumentText,
|
||||||
restoreGlyphElements,
|
restoreGlyphElements,
|
||||||
|
extractDocumentImages,
|
||||||
|
cloneImageElement,
|
||||||
|
valueOr,
|
||||||
} from './pdfJsonEditorUtils';
|
} from './pdfJsonEditorUtils';
|
||||||
import PdfJsonEditorView from '../components/tools/pdfJsonEditor/PdfJsonEditorView';
|
import PdfJsonEditorView from '../components/tools/pdfJsonEditor/PdfJsonEditorView';
|
||||||
|
|
||||||
@ -46,13 +50,19 @@ const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
|
|||||||
|
|
||||||
const [loadedDocument, setLoadedDocument] = useState<PdfJsonDocument | null>(null);
|
const [loadedDocument, setLoadedDocument] = useState<PdfJsonDocument | null>(null);
|
||||||
const [groupsByPage, setGroupsByPage] = useState<TextGroup[][]>([]);
|
const [groupsByPage, setGroupsByPage] = useState<TextGroup[][]>([]);
|
||||||
|
const [imagesByPage, setImagesByPage] = useState<PdfJsonImageElement[][]>([]);
|
||||||
const [selectedPage, setSelectedPage] = useState(0);
|
const [selectedPage, setSelectedPage] = useState(0);
|
||||||
const [fileName, setFileName] = useState('');
|
const [fileName, setFileName] = useState('');
|
||||||
const [errorMessage, setErrorMessage] = useState<string | null>(null);
|
const [errorMessage, setErrorMessage] = useState<string | null>(null);
|
||||||
const [isGeneratingPdf, setIsGeneratingPdf] = useState(false);
|
const [isGeneratingPdf, setIsGeneratingPdf] = useState(false);
|
||||||
const [isConverting, setIsConverting] = useState(false);
|
const [isConverting, setIsConverting] = useState(false);
|
||||||
|
|
||||||
const dirtyPages = useMemo(() => getDirtyPages(groupsByPage), [groupsByPage]);
|
const originalImagesRef = useRef<PdfJsonImageElement[][]>([]);
|
||||||
|
|
||||||
|
const dirtyPages = useMemo(
|
||||||
|
() => getDirtyPages(groupsByPage, imagesByPage, originalImagesRef.current),
|
||||||
|
[groupsByPage, imagesByPage],
|
||||||
|
);
|
||||||
const hasChanges = useMemo(() => dirtyPages.some(Boolean), [dirtyPages]);
|
const hasChanges = useMemo(() => dirtyPages.some(Boolean), [dirtyPages]);
|
||||||
const hasDocument = loadedDocument !== null;
|
const hasDocument = loadedDocument !== null;
|
||||||
const viewLabel = useMemo(() => t('pdfJsonEditor.viewLabel', 'PDF Editor'), [t]);
|
const viewLabel = useMemo(() => t('pdfJsonEditor.viewLabel', 'PDF Editor'), [t]);
|
||||||
@ -60,12 +70,17 @@ const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
|
|||||||
const resetToDocument = useCallback((document: PdfJsonDocument | null) => {
|
const resetToDocument = useCallback((document: PdfJsonDocument | null) => {
|
||||||
if (!document) {
|
if (!document) {
|
||||||
setGroupsByPage([]);
|
setGroupsByPage([]);
|
||||||
|
setImagesByPage([]);
|
||||||
|
originalImagesRef.current = [];
|
||||||
setSelectedPage(0);
|
setSelectedPage(0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const cloned = deepCloneDocument(document);
|
const cloned = deepCloneDocument(document);
|
||||||
const groups = groupDocumentText(cloned);
|
const groups = groupDocumentText(cloned);
|
||||||
|
const images = extractDocumentImages(cloned);
|
||||||
|
originalImagesRef.current = images.map((page) => page.map(cloneImageElement));
|
||||||
setGroupsByPage(groups);
|
setGroupsByPage(groups);
|
||||||
|
setImagesByPage(images);
|
||||||
setSelectedPage(0);
|
setSelectedPage(0);
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
@ -108,6 +123,8 @@ const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
|
|||||||
console.error('Failed to load file', error);
|
console.error('Failed to load file', error);
|
||||||
setLoadedDocument(null);
|
setLoadedDocument(null);
|
||||||
setGroupsByPage([]);
|
setGroupsByPage([]);
|
||||||
|
setImagesByPage([]);
|
||||||
|
originalImagesRef.current = [];
|
||||||
|
|
||||||
if (isPdf) {
|
if (isPdf) {
|
||||||
setErrorMessage(
|
setErrorMessage(
|
||||||
@ -142,6 +159,80 @@ const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
|
|||||||
);
|
);
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
|
const handleImageTransform = useCallback(
|
||||||
|
(
|
||||||
|
pageIndex: number,
|
||||||
|
imageId: string,
|
||||||
|
next: { left: number; bottom: number; width: number; height: number; transform: number[] },
|
||||||
|
) => {
|
||||||
|
setImagesByPage((previous) =>
|
||||||
|
previous.map((images, idx) => {
|
||||||
|
if (idx !== pageIndex) {
|
||||||
|
return images;
|
||||||
|
}
|
||||||
|
let changed = false;
|
||||||
|
const updated = images.map((image) => {
|
||||||
|
if ((image.id ?? '') !== imageId) {
|
||||||
|
return image;
|
||||||
|
}
|
||||||
|
const originalTransform = image.transform ?? originalImagesRef.current[idx]?.find((base) => (base.id ?? '') === imageId)?.transform;
|
||||||
|
const scaleXSign = originalTransform && originalTransform.length >= 6 ? Math.sign(originalTransform[0]) || 1 : 1;
|
||||||
|
const scaleYSign = originalTransform && originalTransform.length >= 6 ? Math.sign(originalTransform[3]) || 1 : 1;
|
||||||
|
const right = next.left + next.width;
|
||||||
|
const top = next.bottom + next.height;
|
||||||
|
const updatedImage: PdfJsonImageElement = {
|
||||||
|
...image,
|
||||||
|
x: next.left,
|
||||||
|
y: next.bottom,
|
||||||
|
left: next.left,
|
||||||
|
bottom: next.bottom,
|
||||||
|
right,
|
||||||
|
top,
|
||||||
|
width: next.width,
|
||||||
|
height: next.height,
|
||||||
|
transform: scaleXSign < 0 || scaleYSign < 0 ? [
|
||||||
|
next.width * scaleXSign,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
next.height * scaleYSign,
|
||||||
|
next.left,
|
||||||
|
scaleYSign >= 0 ? next.bottom : next.bottom + next.height,
|
||||||
|
] : null,
|
||||||
|
};
|
||||||
|
|
||||||
|
const isSame =
|
||||||
|
Math.abs(valueOr(image.left, 0) - next.left) < 1e-4 &&
|
||||||
|
Math.abs(valueOr(image.bottom, 0) - next.bottom) < 1e-4 &&
|
||||||
|
Math.abs(valueOr(image.width, 0) - next.width) < 1e-4 &&
|
||||||
|
Math.abs(valueOr(image.height, 0) - next.height) < 1e-4;
|
||||||
|
|
||||||
|
if (!isSame) {
|
||||||
|
changed = true;
|
||||||
|
}
|
||||||
|
return updatedImage;
|
||||||
|
});
|
||||||
|
return changed ? updated : images;
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
},
|
||||||
|
[],
|
||||||
|
);
|
||||||
|
|
||||||
|
const handleImageReset = useCallback((pageIndex: number, imageId: string) => {
|
||||||
|
const baseline = originalImagesRef.current[pageIndex]?.find((image) => (image.id ?? '') === imageId);
|
||||||
|
if (!baseline) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
setImagesByPage((previous) =>
|
||||||
|
previous.map((images, idx) => {
|
||||||
|
if (idx !== pageIndex) {
|
||||||
|
return images;
|
||||||
|
}
|
||||||
|
return images.map((image) => ((image.id ?? '') === imageId ? cloneImageElement(baseline) : image));
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}, []);
|
||||||
|
|
||||||
const handleResetEdits = useCallback(() => {
|
const handleResetEdits = useCallback(() => {
|
||||||
if (!loadedDocument) {
|
if (!loadedDocument) {
|
||||||
return;
|
return;
|
||||||
@ -155,13 +246,18 @@ const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
const updatedDocument = restoreGlyphElements(loadedDocument, groupsByPage);
|
const updatedDocument = restoreGlyphElements(
|
||||||
|
loadedDocument,
|
||||||
|
groupsByPage,
|
||||||
|
imagesByPage,
|
||||||
|
originalImagesRef.current,
|
||||||
|
);
|
||||||
const baseName = sanitizeBaseName(fileName || loadedDocument.metadata?.title || undefined);
|
const baseName = sanitizeBaseName(fileName || loadedDocument.metadata?.title || undefined);
|
||||||
return {
|
return {
|
||||||
document: updatedDocument,
|
document: updatedDocument,
|
||||||
filename: `${baseName}.json`,
|
filename: `${baseName}.json`,
|
||||||
};
|
};
|
||||||
}, [fileName, groupsByPage, loadedDocument]);
|
}, [fileName, groupsByPage, imagesByPage, loadedDocument]);
|
||||||
|
|
||||||
const handleDownloadJson = useCallback(() => {
|
const handleDownloadJson = useCallback(() => {
|
||||||
const payload = buildPayload();
|
const payload = buildPayload();
|
||||||
@ -229,6 +325,7 @@ const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
|
|||||||
const viewData = useMemo<PdfJsonEditorViewData>(() => ({
|
const viewData = useMemo<PdfJsonEditorViewData>(() => ({
|
||||||
document: loadedDocument,
|
document: loadedDocument,
|
||||||
groupsByPage,
|
groupsByPage,
|
||||||
|
imagesByPage,
|
||||||
selectedPage,
|
selectedPage,
|
||||||
dirtyPages,
|
dirtyPages,
|
||||||
hasDocument,
|
hasDocument,
|
||||||
@ -240,10 +337,14 @@ const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
|
|||||||
onLoadJson: handleLoadFile,
|
onLoadJson: handleLoadFile,
|
||||||
onSelectPage: handleSelectPage,
|
onSelectPage: handleSelectPage,
|
||||||
onGroupEdit: handleGroupTextChange,
|
onGroupEdit: handleGroupTextChange,
|
||||||
|
onImageTransform: handleImageTransform,
|
||||||
|
onImageReset: handleImageReset,
|
||||||
onReset: handleResetEdits,
|
onReset: handleResetEdits,
|
||||||
onDownloadJson: handleDownloadJson,
|
onDownloadJson: handleDownloadJson,
|
||||||
onGeneratePdf: handleGeneratePdf,
|
onGeneratePdf: handleGeneratePdf,
|
||||||
}), [
|
}), [
|
||||||
|
handleImageTransform,
|
||||||
|
imagesByPage,
|
||||||
dirtyPages,
|
dirtyPages,
|
||||||
errorMessage,
|
errorMessage,
|
||||||
fileName,
|
fileName,
|
||||||
@ -251,6 +352,7 @@ const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
|
|||||||
handleDownloadJson,
|
handleDownloadJson,
|
||||||
handleGeneratePdf,
|
handleGeneratePdf,
|
||||||
handleGroupTextChange,
|
handleGroupTextChange,
|
||||||
|
handleImageReset,
|
||||||
handleLoadFile,
|
handleLoadFile,
|
||||||
handleResetEdits,
|
handleResetEdits,
|
||||||
handleSelectPage,
|
handleSelectPage,
|
||||||
|
|||||||
@ -33,6 +33,8 @@ export interface PdfJsonTextElement {
|
|||||||
fontSizeInPt?: number | null;
|
fontSizeInPt?: number | null;
|
||||||
characterSpacing?: number | null;
|
characterSpacing?: number | null;
|
||||||
wordSpacing?: number | null;
|
wordSpacing?: number | null;
|
||||||
|
spaceWidth?: number | null;
|
||||||
|
zOrder?: number | null;
|
||||||
horizontalScaling?: number | null;
|
horizontalScaling?: number | null;
|
||||||
leading?: number | null;
|
leading?: number | null;
|
||||||
rise?: number | null;
|
rise?: number | null;
|
||||||
@ -46,6 +48,26 @@ export interface PdfJsonTextElement {
|
|||||||
strokeColor?: PdfJsonTextColor | null;
|
strokeColor?: PdfJsonTextColor | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface PdfJsonImageElement {
|
||||||
|
id?: string | null;
|
||||||
|
objectName?: string | null;
|
||||||
|
inlineImage?: boolean | null;
|
||||||
|
nativeWidth?: number | null;
|
||||||
|
nativeHeight?: number | null;
|
||||||
|
x?: number | null;
|
||||||
|
y?: number | null;
|
||||||
|
width?: number | null;
|
||||||
|
height?: number | null;
|
||||||
|
left?: number | null;
|
||||||
|
right?: number | null;
|
||||||
|
top?: number | null;
|
||||||
|
bottom?: number | null;
|
||||||
|
transform?: number[] | null;
|
||||||
|
zOrder?: number | null;
|
||||||
|
imageData?: string | null;
|
||||||
|
imageFormat?: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
export interface PdfJsonStream {
|
export interface PdfJsonStream {
|
||||||
dictionary?: Record<string, unknown> | null;
|
dictionary?: Record<string, unknown> | null;
|
||||||
rawData?: string | null;
|
rawData?: string | null;
|
||||||
@ -57,6 +79,7 @@ export interface PdfJsonPage {
|
|||||||
height?: number | null;
|
height?: number | null;
|
||||||
rotation?: number | null;
|
rotation?: number | null;
|
||||||
textElements?: PdfJsonTextElement[] | null;
|
textElements?: PdfJsonTextElement[] | null;
|
||||||
|
imageElements?: PdfJsonImageElement[] | null;
|
||||||
resources?: unknown;
|
resources?: unknown;
|
||||||
contentStreams?: PdfJsonStream[] | null;
|
contentStreams?: PdfJsonStream[] | null;
|
||||||
}
|
}
|
||||||
@ -107,6 +130,7 @@ export const DEFAULT_PAGE_HEIGHT = 792;
|
|||||||
export interface PdfJsonEditorViewData {
|
export interface PdfJsonEditorViewData {
|
||||||
document: PdfJsonDocument | null;
|
document: PdfJsonDocument | null;
|
||||||
groupsByPage: TextGroup[][];
|
groupsByPage: TextGroup[][];
|
||||||
|
imagesByPage: PdfJsonImageElement[][];
|
||||||
selectedPage: number;
|
selectedPage: number;
|
||||||
dirtyPages: boolean[];
|
dirtyPages: boolean[];
|
||||||
hasDocument: boolean;
|
hasDocument: boolean;
|
||||||
@ -118,6 +142,18 @@ export interface PdfJsonEditorViewData {
|
|||||||
onLoadJson: (file: File | null) => Promise<void> | void;
|
onLoadJson: (file: File | null) => Promise<void> | void;
|
||||||
onSelectPage: (pageIndex: number) => void;
|
onSelectPage: (pageIndex: number) => void;
|
||||||
onGroupEdit: (pageIndex: number, groupId: string, value: string) => void;
|
onGroupEdit: (pageIndex: number, groupId: string, value: string) => void;
|
||||||
|
onImageTransform: (
|
||||||
|
pageIndex: number,
|
||||||
|
imageId: string,
|
||||||
|
next: {
|
||||||
|
left: number;
|
||||||
|
bottom: number;
|
||||||
|
width: number;
|
||||||
|
height: number;
|
||||||
|
transform: number[];
|
||||||
|
},
|
||||||
|
) => void;
|
||||||
|
onImageReset: (pageIndex: number, imageId: string) => void;
|
||||||
onReset: () => void;
|
onReset: () => void;
|
||||||
onDownloadJson: () => void;
|
onDownloadJson: () => void;
|
||||||
onGeneratePdf: () => void;
|
onGeneratePdf: () => void;
|
||||||
|
|||||||
@ -3,6 +3,7 @@ import {
|
|||||||
PdfJsonDocument,
|
PdfJsonDocument,
|
||||||
PdfJsonPage,
|
PdfJsonPage,
|
||||||
PdfJsonTextElement,
|
PdfJsonTextElement,
|
||||||
|
PdfJsonImageElement,
|
||||||
TextGroup,
|
TextGroup,
|
||||||
DEFAULT_PAGE_HEIGHT,
|
DEFAULT_PAGE_HEIGHT,
|
||||||
DEFAULT_PAGE_WIDTH,
|
DEFAULT_PAGE_WIDTH,
|
||||||
@ -11,6 +12,9 @@ import {
|
|||||||
const LINE_TOLERANCE = 2;
|
const LINE_TOLERANCE = 2;
|
||||||
const GAP_FACTOR = 0.6;
|
const GAP_FACTOR = 0.6;
|
||||||
const SPACE_MIN_GAP = 1.5;
|
const SPACE_MIN_GAP = 1.5;
|
||||||
|
const MIN_CHAR_WIDTH_FACTOR = 0.35;
|
||||||
|
const MAX_CHAR_WIDTH_FACTOR = 1.25;
|
||||||
|
const EXTRA_GAP_RATIO = 0.8;
|
||||||
|
|
||||||
export const valueOr = (value: number | null | undefined, fallback = 0): number => {
|
export const valueOr = (value: number | null | undefined, fallback = 0): number => {
|
||||||
if (value === null || value === undefined || Number.isNaN(value)) {
|
if (value === null || value === undefined || Number.isNaN(value)) {
|
||||||
@ -24,6 +28,11 @@ export const cloneTextElement = (element: PdfJsonTextElement): PdfJsonTextElemen
|
|||||||
textMatrix: element.textMatrix ? [...element.textMatrix] : element.textMatrix ?? undefined,
|
textMatrix: element.textMatrix ? [...element.textMatrix] : element.textMatrix ?? undefined,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
export const cloneImageElement = (element: PdfJsonImageElement): PdfJsonImageElement => ({
|
||||||
|
...element,
|
||||||
|
transform: element.transform ? [...element.transform] : element.transform ?? undefined,
|
||||||
|
});
|
||||||
|
|
||||||
const getBaseline = (element: PdfJsonTextElement): number => {
|
const getBaseline = (element: PdfJsonTextElement): number => {
|
||||||
if (element.textMatrix && element.textMatrix.length === 6) {
|
if (element.textMatrix && element.textMatrix.length === 6) {
|
||||||
return valueOr(element.textMatrix[5]);
|
return valueOr(element.textMatrix[5]);
|
||||||
@ -71,6 +80,41 @@ const getElementBounds = (element: PdfJsonTextElement): BoundingBox => {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const getImageBounds = (element: PdfJsonImageElement): BoundingBox => {
|
||||||
|
const left = valueOr(element.left ?? element.x, 0);
|
||||||
|
const computedWidth = valueOr(element.width, Math.max(valueOr(element.right, left) - left, 0));
|
||||||
|
const right = valueOr(element.right ?? left + computedWidth, left + computedWidth);
|
||||||
|
const bottom = valueOr(element.bottom ?? element.y, 0);
|
||||||
|
const computedHeight = valueOr(element.height, Math.max(valueOr(element.top, bottom) - bottom, 0));
|
||||||
|
const top = valueOr(element.top ?? bottom + computedHeight, bottom + computedHeight);
|
||||||
|
return {
|
||||||
|
left,
|
||||||
|
right,
|
||||||
|
bottom,
|
||||||
|
top,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
const getSpacingHint = (element: PdfJsonTextElement): number => {
|
||||||
|
const spaceWidth = valueOr(element.spaceWidth, 0);
|
||||||
|
if (spaceWidth > 0) {
|
||||||
|
return spaceWidth;
|
||||||
|
}
|
||||||
|
const wordSpacing = valueOr(element.wordSpacing, 0);
|
||||||
|
if (wordSpacing > 0) {
|
||||||
|
return wordSpacing;
|
||||||
|
}
|
||||||
|
const characterSpacing = valueOr(element.characterSpacing, 0);
|
||||||
|
return Math.max(characterSpacing, 0);
|
||||||
|
};
|
||||||
|
|
||||||
|
const estimateCharWidth = (element: PdfJsonTextElement, avgFontSize: number): number => {
|
||||||
|
const rawWidth = getWidth(element);
|
||||||
|
const minWidth = avgFontSize * MIN_CHAR_WIDTH_FACTOR;
|
||||||
|
const maxWidth = avgFontSize * MAX_CHAR_WIDTH_FACTOR;
|
||||||
|
return Math.min(Math.max(rawWidth, minWidth), maxWidth);
|
||||||
|
};
|
||||||
|
|
||||||
const mergeBounds = (bounds: BoundingBox[]): BoundingBox => {
|
const mergeBounds = (bounds: BoundingBox[]): BoundingBox => {
|
||||||
if (bounds.length === 0) {
|
if (bounds.length === 0) {
|
||||||
return { left: 0, right: 0, top: 0, bottom: 0 };
|
return { left: 0, right: 0, top: 0, bottom: 0 };
|
||||||
@ -88,10 +132,32 @@ const mergeBounds = (bounds: BoundingBox[]): BoundingBox => {
|
|||||||
|
|
||||||
const shouldInsertSpace = (prev: PdfJsonTextElement, current: PdfJsonTextElement): boolean => {
|
const shouldInsertSpace = (prev: PdfJsonTextElement, current: PdfJsonTextElement): boolean => {
|
||||||
const prevRight = getX(prev) + getWidth(prev);
|
const prevRight = getX(prev) + getWidth(prev);
|
||||||
const gap = getX(current) - prevRight;
|
const trailingGap = Math.max(0, getX(current) - prevRight);
|
||||||
const avgFontSize = (getFontSize(prev) + getFontSize(current)) / 2;
|
const avgFontSize = (getFontSize(prev) + getFontSize(current)) / 2;
|
||||||
const threshold = Math.max(SPACE_MIN_GAP, avgFontSize * GAP_FACTOR);
|
const baselineAdvance = Math.max(0, getX(current) - getX(prev));
|
||||||
return gap > threshold;
|
const charWidthEstimate = estimateCharWidth(prev, avgFontSize);
|
||||||
|
const inferredGap = Math.max(0, baselineAdvance - charWidthEstimate);
|
||||||
|
const spacingHint = Math.max(
|
||||||
|
SPACE_MIN_GAP,
|
||||||
|
getSpacingHint(prev),
|
||||||
|
getSpacingHint(current),
|
||||||
|
avgFontSize * GAP_FACTOR,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (trailingGap > spacingHint) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inferredGap > spacingHint * EXTRA_GAP_RATIO) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const prevText = (prev.text ?? '').trimEnd();
|
||||||
|
if (prevText.endsWith('-')) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
};
|
};
|
||||||
|
|
||||||
const buildGroupText = (elements: PdfJsonTextElement[]): string => {
|
const buildGroupText = (elements: PdfJsonTextElement[]): string => {
|
||||||
@ -212,6 +278,27 @@ export const groupDocumentText = (document: PdfJsonDocument | null | undefined):
|
|||||||
return pages.map((page, index) => groupPageTextElements(page, index));
|
return pages.map((page, index) => groupPageTextElements(page, index));
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const extractPageImages = (
|
||||||
|
page: PdfJsonPage | null | undefined,
|
||||||
|
pageIndex: number,
|
||||||
|
): PdfJsonImageElement[] => {
|
||||||
|
const images = page?.imageElements ?? [];
|
||||||
|
return images.map((image, imageIndex) => {
|
||||||
|
const clone = cloneImageElement(image);
|
||||||
|
if (!clone.id || clone.id.trim().length === 0) {
|
||||||
|
clone.id = `page-${pageIndex}-image-${imageIndex}`;
|
||||||
|
}
|
||||||
|
return clone;
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
export const extractDocumentImages = (
|
||||||
|
document: PdfJsonDocument | null | undefined,
|
||||||
|
): PdfJsonImageElement[][] => {
|
||||||
|
const pages = document?.pages ?? [];
|
||||||
|
return pages.map((page, index) => extractPageImages(page, index));
|
||||||
|
};
|
||||||
|
|
||||||
export const deepCloneDocument = (document: PdfJsonDocument): PdfJsonDocument => {
|
export const deepCloneDocument = (document: PdfJsonDocument): PdfJsonDocument => {
|
||||||
if (typeof structuredClone === 'function') {
|
if (typeof structuredClone === 'function') {
|
||||||
return structuredClone(document);
|
return structuredClone(document);
|
||||||
@ -277,14 +364,19 @@ const distributeTextAcrossElements = (text: string | undefined, elements: PdfJso
|
|||||||
export const buildUpdatedDocument = (
|
export const buildUpdatedDocument = (
|
||||||
source: PdfJsonDocument,
|
source: PdfJsonDocument,
|
||||||
groupsByPage: TextGroup[][],
|
groupsByPage: TextGroup[][],
|
||||||
|
imagesByPage: PdfJsonImageElement[][],
|
||||||
): PdfJsonDocument => {
|
): PdfJsonDocument => {
|
||||||
const updated = deepCloneDocument(source);
|
const updated = deepCloneDocument(source);
|
||||||
const pages = updated.pages ?? [];
|
const pages = updated.pages ?? [];
|
||||||
|
|
||||||
updated.pages = pages.map((page, pageIndex) => {
|
updated.pages = pages.map((page, pageIndex) => {
|
||||||
const groups = groupsByPage[pageIndex] ?? [];
|
const groups = groupsByPage[pageIndex] ?? [];
|
||||||
|
const images = imagesByPage[pageIndex] ?? [];
|
||||||
if (!groups.length) {
|
if (!groups.length) {
|
||||||
return page;
|
return {
|
||||||
|
...page,
|
||||||
|
imageElements: images.map(cloneImageElement),
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const updatedElements: PdfJsonTextElement[] = groups.flatMap((group) => {
|
const updatedElements: PdfJsonTextElement[] = groups.flatMap((group) => {
|
||||||
@ -297,6 +389,7 @@ export const buildUpdatedDocument = (
|
|||||||
return {
|
return {
|
||||||
...page,
|
...page,
|
||||||
textElements: updatedElements,
|
textElements: updatedElements,
|
||||||
|
imageElements: images.map(cloneImageElement),
|
||||||
contentStreams: page.contentStreams ?? [],
|
contentStreams: page.contentStreams ?? [],
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
@ -307,14 +400,22 @@ export const buildUpdatedDocument = (
|
|||||||
export const restoreGlyphElements = (
|
export const restoreGlyphElements = (
|
||||||
source: PdfJsonDocument,
|
source: PdfJsonDocument,
|
||||||
groupsByPage: TextGroup[][],
|
groupsByPage: TextGroup[][],
|
||||||
|
imagesByPage: PdfJsonImageElement[][],
|
||||||
|
originalImagesByPage: PdfJsonImageElement[][],
|
||||||
): PdfJsonDocument => {
|
): PdfJsonDocument => {
|
||||||
const updated = deepCloneDocument(source);
|
const updated = deepCloneDocument(source);
|
||||||
const pages = updated.pages ?? [];
|
const pages = updated.pages ?? [];
|
||||||
|
|
||||||
updated.pages = pages.map((page, pageIndex) => {
|
updated.pages = pages.map((page, pageIndex) => {
|
||||||
const groups = groupsByPage[pageIndex] ?? [];
|
const groups = groupsByPage[pageIndex] ?? [];
|
||||||
|
const images = imagesByPage[pageIndex] ?? [];
|
||||||
|
const baselineImages = originalImagesByPage[pageIndex] ?? [];
|
||||||
|
|
||||||
if (!groups.length) {
|
if (!groups.length) {
|
||||||
return page;
|
return {
|
||||||
|
...page,
|
||||||
|
imageElements: images.map(cloneImageElement),
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const rebuiltElements: PdfJsonTextElement[] = [];
|
const rebuiltElements: PdfJsonTextElement[] = [];
|
||||||
@ -327,16 +428,105 @@ export const restoreGlyphElements = (
|
|||||||
rebuiltElements.push(...originals);
|
rebuiltElements.push(...originals);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const textDirty = groups.some((group) => group.text !== group.originalText);
|
||||||
|
const imageDirty = areImageListsDifferent(images, baselineImages);
|
||||||
|
const nextStreams = textDirty || imageDirty ? [] : page.contentStreams ?? [];
|
||||||
|
|
||||||
return {
|
return {
|
||||||
...page,
|
...page,
|
||||||
textElements: rebuiltElements,
|
textElements: rebuiltElements,
|
||||||
contentStreams: page.contentStreams ?? [],
|
imageElements: images.map(cloneImageElement),
|
||||||
|
contentStreams: nextStreams,
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
return updated;
|
return updated;
|
||||||
};
|
};
|
||||||
|
|
||||||
export const getDirtyPages = (groupsByPage: TextGroup[][]): boolean[] => {
|
const approxEqual = (a: number | null | undefined, b: number | null | undefined, tolerance = 0.25): boolean => {
|
||||||
return groupsByPage.map((groups) => groups.some((group) => group.text !== group.originalText));
|
const first = typeof a === 'number' && Number.isFinite(a) ? a : 0;
|
||||||
|
const second = typeof b === 'number' && Number.isFinite(b) ? b : 0;
|
||||||
|
return Math.abs(first - second) <= tolerance;
|
||||||
|
};
|
||||||
|
|
||||||
|
const arrayApproxEqual = (
|
||||||
|
first: number[] | null | undefined,
|
||||||
|
second: number[] | null | undefined,
|
||||||
|
tolerance = 0.25,
|
||||||
|
): boolean => {
|
||||||
|
if (!first && !second) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (!first || !second) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (first.length !== second.length) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
for (let index = 0; index < first.length; index += 1) {
|
||||||
|
if (!approxEqual(first[index], second[index], tolerance)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
const areImageElementsEqual = (
|
||||||
|
current: PdfJsonImageElement,
|
||||||
|
original: PdfJsonImageElement,
|
||||||
|
): boolean => {
|
||||||
|
if (current === original) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (!current || !original) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const sameData = (current.imageData ?? null) === (original.imageData ?? null);
|
||||||
|
const sameFormat = (current.imageFormat ?? null) === (original.imageFormat ?? null);
|
||||||
|
|
||||||
|
return (
|
||||||
|
sameData &&
|
||||||
|
sameFormat &&
|
||||||
|
approxEqual(current.x, original.x) &&
|
||||||
|
approxEqual(current.y, original.y) &&
|
||||||
|
approxEqual(current.width, original.width) &&
|
||||||
|
approxEqual(current.height, original.height) &&
|
||||||
|
approxEqual(current.left, original.left) &&
|
||||||
|
approxEqual(current.right, original.right) &&
|
||||||
|
approxEqual(current.top, original.top) &&
|
||||||
|
approxEqual(current.bottom, original.bottom) &&
|
||||||
|
(current.zOrder ?? null) === (original.zOrder ?? null) &&
|
||||||
|
arrayApproxEqual(current.transform, original.transform)
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export const areImageListsDifferent = (
|
||||||
|
current: PdfJsonImageElement[],
|
||||||
|
original: PdfJsonImageElement[],
|
||||||
|
): boolean => {
|
||||||
|
if (current.length !== original.length) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
for (let index = 0; index < current.length; index += 1) {
|
||||||
|
if (!areImageElementsEqual(current[index], original[index])) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
export const getDirtyPages = (
|
||||||
|
groupsByPage: TextGroup[][],
|
||||||
|
imagesByPage: PdfJsonImageElement[][],
|
||||||
|
originalImagesByPage: PdfJsonImageElement[][],
|
||||||
|
): boolean[] => {
|
||||||
|
return groupsByPage.map((groups, index) => {
|
||||||
|
const textDirty = groups.some((group) => group.text !== group.originalText);
|
||||||
|
const imageDirty = areImageListsDifferent(
|
||||||
|
imagesByPage[index] ?? [],
|
||||||
|
originalImagesByPage[index] ?? [],
|
||||||
|
);
|
||||||
|
return textDirty || imageDirty;
|
||||||
|
});
|
||||||
};
|
};
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user