mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-11-16 01:21:16 +01:00
image stuff
This commit is contained in:
parent
930c68c8c5
commit
af19a5af23
@ -0,0 +1,37 @@
|
||||
package stirling.software.SPDF.model.json;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public class PdfJsonImageElement {
|
||||
|
||||
private String id;
|
||||
private String objectName;
|
||||
private Boolean inlineImage;
|
||||
private Integer nativeWidth;
|
||||
private Integer nativeHeight;
|
||||
private Float x;
|
||||
private Float y;
|
||||
private Float width;
|
||||
private Float height;
|
||||
private Float left;
|
||||
private Float right;
|
||||
private Float top;
|
||||
private Float bottom;
|
||||
@Builder.Default private List<Float> transform = new ArrayList<>();
|
||||
private Integer zOrder;
|
||||
private String imageData;
|
||||
private String imageFormat;
|
||||
}
|
||||
@ -23,6 +23,7 @@ public class PdfJsonPage {
|
||||
private Integer rotation;
|
||||
|
||||
@Builder.Default private List<PdfJsonTextElement> textElements = new ArrayList<>();
|
||||
@Builder.Default private List<PdfJsonImageElement> imageElements = new ArrayList<>();
|
||||
|
||||
/** Serialized representation of the page resources dictionary. */
|
||||
private PdfJsonCosValue resources;
|
||||
|
||||
@ -24,6 +24,8 @@ public class PdfJsonTextElement {
|
||||
private Float fontSizeInPt;
|
||||
private Float characterSpacing;
|
||||
private Float wordSpacing;
|
||||
private Float spaceWidth;
|
||||
private Integer zOrder;
|
||||
private Float horizontalScaling;
|
||||
private Float leading;
|
||||
private Float rise;
|
||||
|
||||
@ -1,5 +1,8 @@
|
||||
package stirling.software.SPDF.service;
|
||||
|
||||
import java.awt.geom.AffineTransform;
|
||||
import java.awt.geom.Point2D;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
@ -22,8 +25,13 @@ import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.TimeZone;
|
||||
import java.util.UUID;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
|
||||
import org.apache.pdfbox.contentstream.PDFGraphicsStreamEngine;
|
||||
import org.apache.pdfbox.contentstream.operator.Operator;
|
||||
import org.apache.pdfbox.contentstream.operator.OperatorName;
|
||||
import org.apache.pdfbox.cos.COSArray;
|
||||
import org.apache.pdfbox.cos.COSBase;
|
||||
import org.apache.pdfbox.cos.COSBoolean;
|
||||
@ -53,6 +61,8 @@ import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
|
||||
import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
|
||||
import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace;
|
||||
import org.apache.pdfbox.pdmodel.graphics.image.PDImage;
|
||||
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
||||
import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState;
|
||||
import org.apache.pdfbox.pdmodel.graphics.state.PDTextState;
|
||||
import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode;
|
||||
@ -74,6 +84,7 @@ import stirling.software.SPDF.model.json.PdfJsonCosValue;
|
||||
import stirling.software.SPDF.model.json.PdfJsonDocument;
|
||||
import stirling.software.SPDF.model.json.PdfJsonFont;
|
||||
import stirling.software.SPDF.model.json.PdfJsonFontCidSystemInfo;
|
||||
import stirling.software.SPDF.model.json.PdfJsonImageElement;
|
||||
import stirling.software.SPDF.model.json.PdfJsonMetadata;
|
||||
import stirling.software.SPDF.model.json.PdfJsonPage;
|
||||
import stirling.software.SPDF.model.json.PdfJsonStream;
|
||||
@ -128,6 +139,8 @@ public class PdfJsonConversionService {
|
||||
stripper.setSortByPosition(true);
|
||||
stripper.getText(document);
|
||||
|
||||
Map<Integer, List<PdfJsonImageElement>> imagesByPage = collectImages(document);
|
||||
|
||||
PdfJsonDocument pdfJson = new PdfJsonDocument();
|
||||
pdfJson.setMetadata(extractMetadata(document));
|
||||
pdfJson.setXmpMetadata(extractXmpMetadata(document));
|
||||
@ -136,7 +149,7 @@ public class PdfJsonConversionService {
|
||||
Comparator.comparing(
|
||||
PdfJsonFont::getUid, Comparator.nullsLast(Comparator.naturalOrder())));
|
||||
pdfJson.setFonts(serializedFonts);
|
||||
pdfJson.setPages(extractPages(document, textByPage));
|
||||
pdfJson.setPages(extractPages(document, textByPage, imagesByPage));
|
||||
|
||||
log.info(
|
||||
"PDF→JSON conversion complete (fonts: {}, pages: {})",
|
||||
@ -201,6 +214,10 @@ public class PdfJsonConversionService {
|
||||
pageModel.getTextElements() != null
|
||||
? pageModel.getTextElements()
|
||||
: new ArrayList<>();
|
||||
List<PdfJsonImageElement> imageElements =
|
||||
pageModel.getImageElements() != null
|
||||
? pageModel.getImageElements()
|
||||
: new ArrayList<>();
|
||||
|
||||
boolean fallbackAssigned =
|
||||
preflightTextElements(
|
||||
@ -218,15 +235,13 @@ public class PdfJsonConversionService {
|
||||
}
|
||||
|
||||
boolean hasText = !elements.isEmpty();
|
||||
boolean rewriteSucceeded = false;
|
||||
boolean hasImages = !imageElements.isEmpty();
|
||||
boolean rewriteSucceeded = true;
|
||||
|
||||
if (!preservedStreams.isEmpty() && hasText) {
|
||||
if (hasText) {
|
||||
if (fallbackAssigned) {
|
||||
log.info(
|
||||
"Skipping token rewrite for page {} because fallback font was applied",
|
||||
pageNumberValue);
|
||||
rewriteSucceeded = false;
|
||||
} else {
|
||||
} else if (!preservedStreams.isEmpty()) {
|
||||
log.info("Attempting token rewrite for page {}", pageNumberValue);
|
||||
rewriteSucceeded = rewriteTextOperators(document, page, elements);
|
||||
if (!rewriteSucceeded) {
|
||||
@ -236,18 +251,29 @@ public class PdfJsonConversionService {
|
||||
} else {
|
||||
log.info("Token rewrite succeeded for page {}", pageNumberValue);
|
||||
}
|
||||
} else {
|
||||
rewriteSucceeded = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasText) {
|
||||
boolean shouldRegenerate = preservedStreams.isEmpty();
|
||||
if (hasText && !rewriteSucceeded) {
|
||||
shouldRegenerate = true;
|
||||
}
|
||||
if (hasImages && preservedStreams.isEmpty()) {
|
||||
shouldRegenerate = true;
|
||||
}
|
||||
|
||||
if (!(hasText || hasImages)) {
|
||||
pageIndex++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!rewriteSucceeded) {
|
||||
log.info("Regenerating text content for page {}", pageNumberValue);
|
||||
regenerateTextContent(document, page, elements, fontMap, pageNumberValue);
|
||||
log.info("Text regeneration complete for page {}", pageNumberValue);
|
||||
if (shouldRegenerate) {
|
||||
log.info("Regenerating page content for page {}", pageNumberValue);
|
||||
regeneratePageContent(
|
||||
document, page, elements, imageElements, fontMap, pageNumberValue);
|
||||
log.info("Page content regeneration complete for page {}", pageNumberValue);
|
||||
}
|
||||
pageIndex++;
|
||||
}
|
||||
@ -571,7 +597,9 @@ public class PdfJsonConversionService {
|
||||
}
|
||||
|
||||
private List<PdfJsonPage> extractPages(
|
||||
PDDocument document, Map<Integer, List<PdfJsonTextElement>> textByPage)
|
||||
PDDocument document,
|
||||
Map<Integer, List<PdfJsonTextElement>> textByPage,
|
||||
Map<Integer, List<PdfJsonImageElement>> imagesByPage)
|
||||
throws IOException {
|
||||
List<PdfJsonPage> pages = new ArrayList<>();
|
||||
int pageIndex = 0;
|
||||
@ -583,6 +611,7 @@ public class PdfJsonConversionService {
|
||||
pageModel.setHeight(mediaBox.getHeight());
|
||||
pageModel.setRotation(page.getRotation());
|
||||
pageModel.setTextElements(textByPage.getOrDefault(pageIndex + 1, new ArrayList<>()));
|
||||
pageModel.setImageElements(imagesByPage.getOrDefault(pageIndex + 1, new ArrayList<>()));
|
||||
pageModel.setResources(
|
||||
serializeCosValue(page.getCOSObject().getDictionaryObject(COSName.RESOURCES)));
|
||||
pageModel.setContentStreams(extractContentStreams(page));
|
||||
@ -592,6 +621,19 @@ public class PdfJsonConversionService {
|
||||
return pages;
|
||||
}
|
||||
|
||||
private Map<Integer, List<PdfJsonImageElement>> collectImages(PDDocument document)
|
||||
throws IOException {
|
||||
Map<Integer, List<PdfJsonImageElement>> imagesByPage = new LinkedHashMap<>();
|
||||
int pageNumber = 1;
|
||||
for (PDPage page : document.getPages()) {
|
||||
ImageCollectingEngine engine =
|
||||
new ImageCollectingEngine(page, pageNumber, imagesByPage);
|
||||
engine.processPage(page);
|
||||
pageNumber++;
|
||||
}
|
||||
return imagesByPage;
|
||||
}
|
||||
|
||||
private PdfJsonMetadata extractMetadata(PDDocument document) {
|
||||
PdfJsonMetadata metadata = new PdfJsonMetadata();
|
||||
PDDocumentInformation info = document.getDocumentInformation();
|
||||
@ -911,17 +953,27 @@ public class PdfJsonConversionService {
|
||||
}
|
||||
}
|
||||
|
||||
private void regenerateTextContent(
|
||||
private void regeneratePageContent(
|
||||
PDDocument document,
|
||||
PDPage page,
|
||||
List<PdfJsonTextElement> elements,
|
||||
List<PdfJsonTextElement> textElements,
|
||||
List<PdfJsonImageElement> imageElements,
|
||||
Map<String, PDFont> fontMap,
|
||||
int pageNumber)
|
||||
throws IOException {
|
||||
List<DrawableElement> drawables = mergeDrawables(textElements, imageElements);
|
||||
Map<String, PDImageXObject> imageCache = new HashMap<>();
|
||||
|
||||
try (PDPageContentStream contentStream =
|
||||
new PDPageContentStream(document, page, AppendMode.OVERWRITE, true, true)) {
|
||||
boolean textOpen = false;
|
||||
for (PdfJsonTextElement element : elements) {
|
||||
for (DrawableElement drawable : drawables) {
|
||||
switch (drawable.type()) {
|
||||
case TEXT -> {
|
||||
PdfJsonTextElement element = drawable.textElement();
|
||||
if (element == null) {
|
||||
continue;
|
||||
}
|
||||
PDFont font = fontMap.get(buildFontKey(pageNumber, element.getFontId()));
|
||||
if (font == null && FALLBACK_FONT_ID.equals(element.getFontId())) {
|
||||
font = fontMap.get(buildFontKey(-1, FALLBACK_FONT_ID));
|
||||
@ -931,7 +983,7 @@ public class PdfJsonConversionService {
|
||||
|
||||
if (font != null) {
|
||||
try {
|
||||
encodeWithTest(font, text);
|
||||
font.encode(text);
|
||||
} catch (IOException | IllegalArgumentException ex) {
|
||||
log.debug(
|
||||
"Edited text contains glyphs missing from font {} ({}), switching to fallback",
|
||||
@ -943,16 +995,15 @@ public class PdfJsonConversionService {
|
||||
font = loadFallbackPdfFont(document);
|
||||
fontMap.put(buildFontKey(-1, FALLBACK_FONT_ID), font);
|
||||
}
|
||||
encodeWithTest(font, text);
|
||||
}
|
||||
} else {
|
||||
}
|
||||
if (font == null) {
|
||||
element.setFontId(FALLBACK_FONT_ID);
|
||||
font = fontMap.get(buildFontKey(-1, FALLBACK_FONT_ID));
|
||||
if (font == null) {
|
||||
font = loadFallbackPdfFont(document);
|
||||
fontMap.put(buildFontKey(-1, FALLBACK_FONT_ID), font);
|
||||
}
|
||||
encodeWithTest(font, text);
|
||||
}
|
||||
|
||||
if (!textOpen) {
|
||||
@ -964,7 +1015,23 @@ public class PdfJsonConversionService {
|
||||
contentStream.setFont(font, fontScale);
|
||||
applyRenderingMode(contentStream, element.getRenderingMode());
|
||||
applyTextMatrix(contentStream, element);
|
||||
contentStream.showText(text);
|
||||
String sanitized = sanitizeForFont(font, text);
|
||||
if (!sanitized.isEmpty()) {
|
||||
contentStream.showText(sanitized);
|
||||
}
|
||||
}
|
||||
case IMAGE -> {
|
||||
if (textOpen) {
|
||||
contentStream.endText();
|
||||
textOpen = false;
|
||||
}
|
||||
PdfJsonImageElement element = drawable.imageElement();
|
||||
if (element == null) {
|
||||
continue;
|
||||
}
|
||||
drawImageElement(contentStream, document, element, imageCache);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (textOpen) {
|
||||
contentStream.endText();
|
||||
@ -972,11 +1039,47 @@ public class PdfJsonConversionService {
|
||||
}
|
||||
}
|
||||
|
||||
private void encodeWithTest(PDFont font, String text) throws IOException {
|
||||
private String sanitizeForFont(PDFont font, String text) {
|
||||
if (text == null || text.isEmpty()) {
|
||||
return;
|
||||
return "";
|
||||
}
|
||||
font.encode(text);
|
||||
StringBuilder builder = new StringBuilder(text.length());
|
||||
text.codePoints()
|
||||
.forEach(
|
||||
codePoint -> {
|
||||
String candidate = new String(Character.toChars(codePoint));
|
||||
try {
|
||||
font.encode(candidate);
|
||||
builder.append(candidate);
|
||||
return;
|
||||
} catch (IOException | IllegalArgumentException ex) {
|
||||
String mapped = mapUnsupportedGlyph(codePoint);
|
||||
if (mapped != null) {
|
||||
try {
|
||||
font.encode(mapped);
|
||||
builder.append(mapped);
|
||||
return;
|
||||
} catch (IOException | IllegalArgumentException ignore) {
|
||||
// fall through to generic replacement
|
||||
}
|
||||
}
|
||||
log.debug(
|
||||
"Replacing unsupported glyph {} ({}) with '?' for font {}",
|
||||
candidate,
|
||||
String.format("U+%04X", codePoint),
|
||||
font.getName());
|
||||
builder.append('?');
|
||||
}
|
||||
});
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
private String mapUnsupportedGlyph(int codePoint) {
|
||||
return switch (codePoint) {
|
||||
case 0x276E -> "<";
|
||||
case 0x276F -> ">";
|
||||
default -> null;
|
||||
};
|
||||
}
|
||||
|
||||
private void applyTextState(PDPageContentStream contentStream, PdfJsonTextElement element)
|
||||
@ -1198,7 +1301,7 @@ public class PdfJsonConversionService {
|
||||
byte[] encoded = font.encode(replacement);
|
||||
cosString.setValue(encoded);
|
||||
return true;
|
||||
} catch (IOException | IllegalArgumentException ex) {
|
||||
} catch (IOException | IllegalArgumentException | UnsupportedOperationException ex) {
|
||||
log.debug("Failed to encode replacement text: {}", ex.getMessage());
|
||||
return false;
|
||||
}
|
||||
@ -1222,7 +1325,9 @@ public class PdfJsonConversionService {
|
||||
try {
|
||||
byte[] encoded = font.encode(replacement);
|
||||
array.set(i, new COSString(encoded));
|
||||
} catch (IOException | IllegalArgumentException ex) {
|
||||
} catch (IOException
|
||||
| IllegalArgumentException
|
||||
| UnsupportedOperationException ex) {
|
||||
log.debug("Failed to encode replacement text in TJ array: {}", ex.getMessage());
|
||||
return false;
|
||||
}
|
||||
@ -1542,6 +1647,377 @@ public class PdfJsonConversionService {
|
||||
return calendar;
|
||||
}
|
||||
|
||||
private class ImageCollectingEngine extends PDFGraphicsStreamEngine {
|
||||
|
||||
private final int pageNumber;
|
||||
private final Map<Integer, List<PdfJsonImageElement>> imagesByPage;
|
||||
|
||||
private COSName currentXObjectName;
|
||||
private int imageCounter = 0;
|
||||
|
||||
protected ImageCollectingEngine(
|
||||
PDPage page, int pageNumber, Map<Integer, List<PdfJsonImageElement>> imagesByPage)
|
||||
throws IOException {
|
||||
super(page);
|
||||
this.pageNumber = pageNumber;
|
||||
this.imagesByPage = imagesByPage;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processPage(PDPage page) throws IOException {
|
||||
super.processPage(page);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void drawImage(PDImage pdImage) throws IOException {
|
||||
EncodedImage encoded = encodeImage(pdImage);
|
||||
if (encoded == null) {
|
||||
return;
|
||||
}
|
||||
Matrix ctm = getGraphicsState().getCurrentTransformationMatrix();
|
||||
Bounds bounds = computeBounds(ctm);
|
||||
List<Float> matrixValues = toMatrixValues(ctm);
|
||||
|
||||
PdfJsonImageElement element =
|
||||
PdfJsonImageElement.builder()
|
||||
.id(UUID.randomUUID().toString())
|
||||
.objectName(
|
||||
currentXObjectName != null
|
||||
? currentXObjectName.getName()
|
||||
: null)
|
||||
.inlineImage(!(pdImage instanceof PDImageXObject))
|
||||
.nativeWidth(pdImage.getWidth())
|
||||
.nativeHeight(pdImage.getHeight())
|
||||
.x(bounds.left)
|
||||
.y(bounds.bottom)
|
||||
.width(bounds.width())
|
||||
.height(bounds.height())
|
||||
.left(bounds.left)
|
||||
.right(bounds.right)
|
||||
.top(bounds.top)
|
||||
.bottom(bounds.bottom)
|
||||
.transform(matrixValues)
|
||||
.zOrder(-1_000_000 + imageCounter)
|
||||
.imageData(encoded.base64())
|
||||
.imageFormat(encoded.format())
|
||||
.build();
|
||||
imageCounter++;
|
||||
imagesByPage.computeIfAbsent(pageNumber, key -> new ArrayList<>()).add(element);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3)
|
||||
throws IOException {
|
||||
// Not needed for image extraction
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clip(int windingRule) throws IOException {
|
||||
// Not needed for image extraction
|
||||
}
|
||||
|
||||
@Override
|
||||
public void moveTo(float x, float y) throws IOException {
|
||||
// Not needed for image extraction
|
||||
}
|
||||
|
||||
@Override
|
||||
public void lineTo(float x, float y) throws IOException {
|
||||
// Not needed for image extraction
|
||||
}
|
||||
|
||||
@Override
|
||||
public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3)
|
||||
throws IOException {
|
||||
// Not needed for image extraction
|
||||
}
|
||||
|
||||
@Override
|
||||
public Point2D getCurrentPoint() throws IOException {
|
||||
return new Point2D.Float();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void closePath() throws IOException {
|
||||
// Not needed for image extraction
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endPath() throws IOException {
|
||||
// Not needed for image extraction
|
||||
}
|
||||
|
||||
@Override
|
||||
public void shadingFill(COSName shadingName) throws IOException {
|
||||
// Not needed for image extraction
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fillAndStrokePath(int windingRule) throws IOException {
|
||||
// Not needed for image extraction
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fillPath(int windingRule) throws IOException {
|
||||
// Not needed for image extraction
|
||||
}
|
||||
|
||||
@Override
|
||||
public void strokePath() throws IOException {
|
||||
// Not needed for image extraction
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void processOperator(Operator operator, List<COSBase> operands)
|
||||
throws IOException {
|
||||
if (OperatorName.DRAW_OBJECT.equals(operator.getName())
|
||||
&& !operands.isEmpty()
|
||||
&& operands.get(0) instanceof COSName name) {
|
||||
currentXObjectName = name;
|
||||
}
|
||||
super.processOperator(operator, operands);
|
||||
currentXObjectName = null;
|
||||
}
|
||||
|
||||
private Bounds computeBounds(Matrix ctm) {
|
||||
AffineTransform transform = ctm.createAffineTransform();
|
||||
Point2D.Float p0 = new Point2D.Float(0, 0);
|
||||
Point2D.Float p1 = new Point2D.Float(1, 0);
|
||||
Point2D.Float p2 = new Point2D.Float(0, 1);
|
||||
Point2D.Float p3 = new Point2D.Float(1, 1);
|
||||
transform.transform(p0, p0);
|
||||
transform.transform(p1, p1);
|
||||
transform.transform(p2, p2);
|
||||
transform.transform(p3, p3);
|
||||
|
||||
float minX = Math.min(Math.min(p0.x, p1.x), Math.min(p2.x, p3.x));
|
||||
float maxX = Math.max(Math.max(p0.x, p1.x), Math.max(p2.x, p3.x));
|
||||
float minY = Math.min(Math.min(p0.y, p1.y), Math.min(p2.y, p3.y));
|
||||
float maxY = Math.max(Math.max(p0.y, p1.y), Math.max(p2.y, p3.y));
|
||||
|
||||
if (!Float.isFinite(minX) || !Float.isFinite(minY)) {
|
||||
return new Bounds(0f, 0f, 0f, 0f);
|
||||
}
|
||||
return new Bounds(minX, maxX, minY, maxY);
|
||||
}
|
||||
}
|
||||
|
||||
private record Bounds(float left, float right, float bottom, float top) {
|
||||
float width() {
|
||||
return Math.max(0f, right - left);
|
||||
}
|
||||
|
||||
float height() {
|
||||
return Math.max(0f, top - bottom);
|
||||
}
|
||||
}
|
||||
|
||||
private enum DrawableType {
|
||||
TEXT,
|
||||
IMAGE
|
||||
}
|
||||
|
||||
private record DrawableElement(
|
||||
DrawableType type,
|
||||
PdfJsonTextElement textElement,
|
||||
PdfJsonImageElement imageElement,
|
||||
int zOrder,
|
||||
int sequence) {}
|
||||
|
||||
private record EncodedImage(String base64, String format) {}
|
||||
|
||||
private List<Float> toMatrixValues(Matrix matrix) {
|
||||
List<Float> values = new ArrayList<>(6);
|
||||
values.add(matrix.getValue(0, 0));
|
||||
values.add(matrix.getValue(0, 1));
|
||||
values.add(matrix.getValue(1, 0));
|
||||
values.add(matrix.getValue(1, 1));
|
||||
values.add(matrix.getValue(2, 0));
|
||||
values.add(matrix.getValue(2, 1));
|
||||
return values;
|
||||
}
|
||||
|
||||
private EncodedImage encodeImage(PDImage image) {
|
||||
try {
|
||||
BufferedImage bufferedImage = image.getImage();
|
||||
if (bufferedImage == null) {
|
||||
return null;
|
||||
}
|
||||
String format = resolveImageFormat(image);
|
||||
if (format == null || format.isBlank()) {
|
||||
format = "png";
|
||||
}
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
boolean written = ImageIO.write(bufferedImage, format, baos);
|
||||
if (!written) {
|
||||
if (!"png".equalsIgnoreCase(format)) {
|
||||
baos.reset();
|
||||
if (!ImageIO.write(bufferedImage, "png", baos)) {
|
||||
return null;
|
||||
}
|
||||
format = "png";
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return new EncodedImage(Base64.getEncoder().encodeToString(baos.toByteArray()), format);
|
||||
} catch (IOException ex) {
|
||||
log.debug("Failed to encode image: {}", ex.getMessage());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private String resolveImageFormat(PDImage image) {
|
||||
if (image instanceof PDImageXObject xObject) {
|
||||
String suffix = xObject.getSuffix();
|
||||
if (suffix != null && !suffix.isBlank()) {
|
||||
return suffix.toLowerCase(Locale.ROOT);
|
||||
}
|
||||
}
|
||||
return "png";
|
||||
}
|
||||
|
||||
private List<DrawableElement> mergeDrawables(
|
||||
List<PdfJsonTextElement> textElements, List<PdfJsonImageElement> imageElements) {
|
||||
List<DrawableElement> drawables = new ArrayList<>();
|
||||
int sequence = 0;
|
||||
|
||||
if (imageElements != null) {
|
||||
int imageIndex = 0;
|
||||
for (PdfJsonImageElement imageElement : imageElements) {
|
||||
if (imageElement == null) {
|
||||
continue;
|
||||
}
|
||||
int order =
|
||||
imageElement.getZOrder() != null
|
||||
? imageElement.getZOrder()
|
||||
: Integer.MIN_VALUE / 2 + imageIndex;
|
||||
drawables.add(
|
||||
new DrawableElement(
|
||||
DrawableType.IMAGE, null, imageElement, order, sequence++));
|
||||
imageIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
if (textElements != null) {
|
||||
int textIndex = 0;
|
||||
for (PdfJsonTextElement textElement : textElements) {
|
||||
if (textElement == null) {
|
||||
continue;
|
||||
}
|
||||
int order =
|
||||
textElement.getZOrder() != null
|
||||
? textElement.getZOrder()
|
||||
: 1_000_000 + textIndex;
|
||||
drawables.add(
|
||||
new DrawableElement(
|
||||
DrawableType.TEXT, textElement, null, order, sequence++));
|
||||
textIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
drawables.sort(
|
||||
Comparator.comparingInt(DrawableElement::zOrder)
|
||||
.thenComparingInt(DrawableElement::sequence));
|
||||
return drawables;
|
||||
}
|
||||
|
||||
private void drawImageElement(
|
||||
PDPageContentStream contentStream,
|
||||
PDDocument document,
|
||||
PdfJsonImageElement element,
|
||||
Map<String, PDImageXObject> cache)
|
||||
throws IOException {
|
||||
if (element == null || element.getImageData() == null || element.getImageData().isBlank()) {
|
||||
return;
|
||||
}
|
||||
|
||||
String cacheKey =
|
||||
element.getId() != null && !element.getId().isBlank()
|
||||
? element.getId()
|
||||
: Integer.toHexString(System.identityHashCode(element));
|
||||
PDImageXObject image = cache.get(cacheKey);
|
||||
if (image == null) {
|
||||
image = createImageXObject(document, element);
|
||||
if (image == null) {
|
||||
return;
|
||||
}
|
||||
cache.put(cacheKey, image);
|
||||
}
|
||||
|
||||
float width = safeFloat(element.getWidth(), fallbackWidth(element));
|
||||
float height = safeFloat(element.getHeight(), fallbackHeight(element));
|
||||
if (width <= 0f) {
|
||||
width = Math.max(1f, fallbackWidth(element));
|
||||
}
|
||||
if (height <= 0f) {
|
||||
height = Math.max(1f, fallbackHeight(element));
|
||||
}
|
||||
float left = resolveLeft(element, width);
|
||||
float bottom = resolveBottom(element, height);
|
||||
|
||||
contentStream.drawImage(image, left, bottom, width, height);
|
||||
}
|
||||
|
||||
private PDImageXObject createImageXObject(PDDocument document, PdfJsonImageElement element)
|
||||
throws IOException {
|
||||
byte[] data;
|
||||
try {
|
||||
data = Base64.getDecoder().decode(element.getImageData());
|
||||
} catch (IllegalArgumentException ex) {
|
||||
log.debug("Failed to decode image element: {}", ex.getMessage());
|
||||
return null;
|
||||
}
|
||||
String name = element.getId() != null ? element.getId() : UUID.randomUUID().toString();
|
||||
return PDImageXObject.createFromByteArray(document, data, name);
|
||||
}
|
||||
|
||||
private float fallbackWidth(PdfJsonImageElement element) {
|
||||
if (element.getRight() != null && element.getLeft() != null) {
|
||||
return Math.max(0f, element.getRight() - element.getLeft());
|
||||
}
|
||||
if (element.getNativeWidth() != null) {
|
||||
return element.getNativeWidth();
|
||||
}
|
||||
return 1f;
|
||||
}
|
||||
|
||||
private float resolveLeft(PdfJsonImageElement element, float width) {
|
||||
if (element.getLeft() != null) {
|
||||
return element.getLeft();
|
||||
}
|
||||
if (element.getX() != null) {
|
||||
return element.getX();
|
||||
}
|
||||
if (element.getRight() != null) {
|
||||
return element.getRight() - width;
|
||||
}
|
||||
return 0f;
|
||||
}
|
||||
|
||||
private float resolveBottom(PdfJsonImageElement element, float height) {
|
||||
if (element.getBottom() != null) {
|
||||
return element.getBottom();
|
||||
}
|
||||
if (element.getY() != null) {
|
||||
return element.getY();
|
||||
}
|
||||
if (element.getTop() != null) {
|
||||
return element.getTop() - height;
|
||||
}
|
||||
return 0f;
|
||||
}
|
||||
|
||||
private float fallbackHeight(PdfJsonImageElement element) {
|
||||
if (element.getTop() != null && element.getBottom() != null) {
|
||||
return Math.max(0f, element.getTop() - element.getBottom());
|
||||
}
|
||||
if (element.getNativeHeight() != null) {
|
||||
return element.getNativeHeight();
|
||||
}
|
||||
return 1f;
|
||||
}
|
||||
|
||||
private class TextCollectingStripper extends PDFTextStripper {
|
||||
|
||||
private final PDDocument document;
|
||||
@ -1595,6 +2071,7 @@ public class PdfJsonConversionService {
|
||||
element.setHeight(position.getHeightDir());
|
||||
element.setTextMatrix(extractMatrix(position));
|
||||
element.setFontMatrixSize(computeFontMatrixSize(element.getTextMatrix()));
|
||||
element.setSpaceWidth(position.getWidthOfSpace());
|
||||
PDGraphicsState graphicsState = getGraphicsState();
|
||||
if (graphicsState != null) {
|
||||
PDTextState textState = graphicsState.getTextState();
|
||||
@ -1611,6 +2088,7 @@ public class PdfJsonConversionService {
|
||||
element.setFillColor(toTextColor(graphicsState.getNonStrokingColor()));
|
||||
element.setStrokeColor(toTextColor(graphicsState.getStrokingColor()));
|
||||
}
|
||||
element.setZOrder(1_000_000 + pageElements.size());
|
||||
pageElements.add(element);
|
||||
}
|
||||
}
|
||||
|
||||
55
frontend/package-lock.json
generated
55
frontend/package-lock.json
generated
@ -54,6 +54,7 @@
|
||||
"react": "^19.1.1",
|
||||
"react-dom": "^19.1.1",
|
||||
"react-i18next": "^15.7.3",
|
||||
"react-rnd": "^10.5.2",
|
||||
"react-router-dom": "^7.9.1",
|
||||
"signature_pad": "^5.0.4",
|
||||
"tailwindcss": "^4.1.13",
|
||||
@ -11036,6 +11037,16 @@
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/re-resizable": {
|
||||
"version": "6.11.2",
|
||||
"resolved": "https://registry.npmjs.org/re-resizable/-/re-resizable-6.11.2.tgz",
|
||||
"integrity": "sha512-2xI2P3OHs5qw7K0Ud1aLILK6MQxW50TcO+DetD9eIV58j84TqYeHoZcL9H4GXFXXIh7afhH8mv5iUCXII7OW7A==",
|
||||
"license": "MIT",
|
||||
"peerDependencies": {
|
||||
"react": "^16.13.1 || ^17.0.0 || ^18.0.0 || ^19.0.0",
|
||||
"react-dom": "^16.13.1 || ^17.0.0 || ^18.0.0 || ^19.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/react": {
|
||||
"version": "19.1.1",
|
||||
"resolved": "https://registry.npmjs.org/react/-/react-19.1.1.tgz",
|
||||
@ -11057,6 +11068,29 @@
|
||||
"react": "^19.1.1"
|
||||
}
|
||||
},
|
||||
"node_modules/react-draggable": {
|
||||
"version": "4.4.6",
|
||||
"resolved": "https://registry.npmjs.org/react-draggable/-/react-draggable-4.4.6.tgz",
|
||||
"integrity": "sha512-LtY5Xw1zTPqHkVmtM3X8MUOxNDOUhv/khTgBgrUvwaS064bwVvxT+q5El0uUFNx5IEPKXuRejr7UqLwBIg5pdw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"clsx": "^1.1.1",
|
||||
"prop-types": "^15.8.1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": ">= 16.3.0",
|
||||
"react-dom": ">= 16.3.0"
|
||||
}
|
||||
},
|
||||
"node_modules/react-draggable/node_modules/clsx": {
|
||||
"version": "1.2.1",
|
||||
"resolved": "https://registry.npmjs.org/clsx/-/clsx-1.2.1.tgz",
|
||||
"integrity": "sha512-EcR6r5a8bj6pu3ycsa/E/cKVGuTgZJZdsyUYHOksG/UHIiKfjxzRxYJpyVBwYaQeOvghal9fcc4PidlgzugAQg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/react-dropzone": {
|
||||
"version": "14.3.8",
|
||||
"resolved": "https://registry.npmjs.org/react-dropzone/-/react-dropzone-14.3.8.tgz",
|
||||
@ -11175,6 +11209,27 @@
|
||||
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
|
||||
"license": "0BSD"
|
||||
},
|
||||
"node_modules/react-rnd": {
|
||||
"version": "10.5.2",
|
||||
"resolved": "https://registry.npmjs.org/react-rnd/-/react-rnd-10.5.2.tgz",
|
||||
"integrity": "sha512-0Tm4x7k7pfHf2snewJA8x7Nwgt3LV+58MVEWOVsFjk51eYruFEa6Wy7BNdxt4/lH0wIRsu7Gm3KjSXY2w7YaNw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"re-resizable": "6.11.2",
|
||||
"react-draggable": "4.4.6",
|
||||
"tslib": "2.6.2"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": ">=16.3.0",
|
||||
"react-dom": ">=16.3.0"
|
||||
}
|
||||
},
|
||||
"node_modules/react-rnd/node_modules/tslib": {
|
||||
"version": "2.6.2",
|
||||
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz",
|
||||
"integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==",
|
||||
"license": "0BSD"
|
||||
},
|
||||
"node_modules/react-router": {
|
||||
"version": "7.9.1",
|
||||
"resolved": "https://registry.npmjs.org/react-router/-/react-router-7.9.1.tgz",
|
||||
|
||||
@ -49,6 +49,7 @@
|
||||
"posthog-js": "^1.268.0",
|
||||
"react": "^19.1.1",
|
||||
"react-dom": "^19.1.1",
|
||||
"react-rnd": "^10.5.2",
|
||||
"react-i18next": "^15.7.3",
|
||||
"react-router-dom": "^7.9.1",
|
||||
"signature_pad": "^5.0.4",
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import React, { useEffect, useMemo, useState } from 'react';
|
||||
import React, { useCallback, useEffect, useLayoutEffect, useMemo, useRef, useState } from 'react';
|
||||
import {
|
||||
Alert,
|
||||
Badge,
|
||||
@ -21,16 +21,59 @@ import PictureAsPdfIcon from '@mui/icons-material/PictureAsPdfOutlined';
|
||||
import AutorenewIcon from '@mui/icons-material/Autorenew';
|
||||
import WarningAmberIcon from '@mui/icons-material/WarningAmber';
|
||||
import UploadIcon from '@mui/icons-material/Upload';
|
||||
import { Rnd } from 'react-rnd';
|
||||
|
||||
import {
|
||||
PdfJsonEditorViewData,
|
||||
PdfJsonPage,
|
||||
} from '../../../tools/pdfJsonEditorTypes';
|
||||
import { pageDimensions } from '../../../tools/pdfJsonEditorUtils';
|
||||
import { getImageBounds, pageDimensions } from '../../../tools/pdfJsonEditorUtils';
|
||||
|
||||
const MAX_RENDER_WIDTH = 820;
|
||||
const MIN_BOX_SIZE = 18;
|
||||
|
||||
const getCaretOffset = (element: HTMLElement): number => {
|
||||
const selection = window.getSelection();
|
||||
if (!selection || selection.rangeCount === 0 || !element.contains(selection.focusNode)) {
|
||||
return element.innerText.length;
|
||||
}
|
||||
const range = selection.getRangeAt(0).cloneRange();
|
||||
range.selectNodeContents(element);
|
||||
range.setEnd(selection.focusNode as Node, selection.focusOffset);
|
||||
return range.toString().length;
|
||||
};
|
||||
|
||||
const setCaretOffset = (element: HTMLElement, offset: number): void => {
|
||||
const selection = window.getSelection();
|
||||
if (!selection) {
|
||||
return;
|
||||
}
|
||||
const targetOffset = Math.max(0, Math.min(offset, element.innerText.length));
|
||||
const range = document.createRange();
|
||||
let remaining = targetOffset;
|
||||
const walker = document.createTreeWalker(element, NodeFilter.SHOW_TEXT);
|
||||
|
||||
let node = walker.nextNode();
|
||||
while (node) {
|
||||
const textNode = node as Text;
|
||||
const length = textNode.length;
|
||||
if (remaining <= length) {
|
||||
range.setStart(textNode, remaining);
|
||||
range.collapse(true);
|
||||
selection.removeAllRanges();
|
||||
selection.addRange(range);
|
||||
return;
|
||||
}
|
||||
remaining -= length;
|
||||
node = walker.nextNode();
|
||||
}
|
||||
|
||||
range.selectNodeContents(element);
|
||||
range.collapse(false);
|
||||
selection.removeAllRanges();
|
||||
selection.addRange(range);
|
||||
};
|
||||
|
||||
interface PdfJsonEditorViewProps {
|
||||
data: PdfJsonEditorViewData;
|
||||
}
|
||||
@ -61,10 +104,15 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
const { t } = useTranslation();
|
||||
const [activeGroupId, setActiveGroupId] = useState<string | null>(null);
|
||||
const [editingGroupId, setEditingGroupId] = useState<string | null>(null);
|
||||
const [activeImageId, setActiveImageId] = useState<string | null>(null);
|
||||
const containerRef = useRef<HTMLDivElement | null>(null);
|
||||
const editorRefs = useRef<Map<string, HTMLDivElement>>(new Map());
|
||||
const caretOffsetsRef = useRef<Map<string, number>>(new Map());
|
||||
|
||||
const {
|
||||
document: pdfDocument,
|
||||
groupsByPage,
|
||||
imagesByPage,
|
||||
selectedPage,
|
||||
dirtyPages,
|
||||
hasDocument,
|
||||
@ -76,6 +124,8 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
onLoadJson,
|
||||
onSelectPage,
|
||||
onGroupEdit,
|
||||
onImageTransform,
|
||||
onImageReset,
|
||||
onReset,
|
||||
onDownloadJson,
|
||||
onGeneratePdf,
|
||||
@ -114,6 +164,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
const pages = pdfDocument?.pages ?? [];
|
||||
const currentPage = pages[selectedPage] ?? null;
|
||||
const pageGroups = groupsByPage[selectedPage] ?? [];
|
||||
const pageImages = imagesByPage[selectedPage] ?? [];
|
||||
const visibleGroups = useMemo(
|
||||
() =>
|
||||
pageGroups.filter((group) => {
|
||||
@ -123,6 +174,14 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
[editingGroupId, pageGroups]
|
||||
);
|
||||
|
||||
const orderedImages = useMemo(
|
||||
() =>
|
||||
[...pageImages].sort(
|
||||
(first, second) => (first?.zOrder ?? -1_000_000) - (second?.zOrder ?? -1_000_000),
|
||||
),
|
||||
[pageImages],
|
||||
);
|
||||
|
||||
const { width: pageWidth, height: pageHeight } = pageDimensions(currentPage);
|
||||
const scale = useMemo(() => Math.min(MAX_RENDER_WIDTH / pageWidth, 1.5), [pageWidth]);
|
||||
const scaledWidth = pageWidth * scale;
|
||||
@ -131,8 +190,21 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
useEffect(() => {
|
||||
setActiveGroupId(null);
|
||||
setEditingGroupId(null);
|
||||
setActiveImageId(null);
|
||||
}, [selectedPage]);
|
||||
|
||||
useLayoutEffect(() => {
|
||||
if (!editingGroupId) {
|
||||
return;
|
||||
}
|
||||
const editor = editorRefs.current.get(editingGroupId);
|
||||
if (!editor) {
|
||||
return;
|
||||
}
|
||||
const offset = caretOffsetsRef.current.get(editingGroupId) ?? editor.innerText.length;
|
||||
setCaretOffset(editor, offset);
|
||||
}, [editingGroupId, groupsByPage, imagesByPage]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!editingGroupId) {
|
||||
return;
|
||||
@ -160,6 +232,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
const handleBackgroundClick = () => {
|
||||
setEditingGroupId(null);
|
||||
setActiveGroupId(null);
|
||||
setActiveImageId(null);
|
||||
};
|
||||
|
||||
const renderGroupContainer = (
|
||||
@ -205,6 +278,28 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
</Box>
|
||||
);
|
||||
|
||||
const emitImageTransform = useCallback(
|
||||
(
|
||||
imageId: string,
|
||||
leftPx: number,
|
||||
topPx: number,
|
||||
widthPx: number,
|
||||
heightPx: number,
|
||||
) => {
|
||||
const rawLeft = leftPx / scale;
|
||||
const rawTop = pageHeight - topPx / scale;
|
||||
const width = Math.max(widthPx / scale, 0.01);
|
||||
const height = Math.max(heightPx / scale, 0.01);
|
||||
const maxLeft = Math.max(pageWidth - width, 0);
|
||||
const left = Math.min(Math.max(rawLeft, 0), maxLeft);
|
||||
const minTop = Math.min(height, pageHeight);
|
||||
const top = Math.min(Math.max(rawTop, minTop), pageHeight);
|
||||
const bottom = Math.max(top - height, 0);
|
||||
onImageTransform(selectedPage, imageId, { left, bottom, width, height, transform: [] });
|
||||
},
|
||||
[onImageTransform, pageHeight, pageWidth, scale, selectedPage],
|
||||
);
|
||||
|
||||
return (
|
||||
<Stack gap="xl" className="h-full" style={{ padding: '1.5rem', overflow: 'auto' }}>
|
||||
<Card withBorder radius="md" shadow="xs" padding="lg">
|
||||
@ -341,8 +436,121 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
borderRadius: '0.5rem',
|
||||
overflow: 'hidden',
|
||||
}}
|
||||
ref={containerRef}
|
||||
>
|
||||
{visibleGroups.length === 0 ? (
|
||||
{orderedImages.map((image, imageIndex) => {
|
||||
if (!image?.imageData) {
|
||||
return null;
|
||||
}
|
||||
const bounds = getImageBounds(image);
|
||||
const width = Math.max(bounds.right - bounds.left, 1);
|
||||
const height = Math.max(bounds.top - bounds.bottom, 1);
|
||||
const cssWidth = Math.max(width * scale, 2);
|
||||
const cssHeight = Math.max(height * scale, 2);
|
||||
const cssLeft = bounds.left * scale;
|
||||
const cssTop = (pageHeight - bounds.top) * scale;
|
||||
const imageId = image.id ?? `page-${selectedPage}-image-${imageIndex}`;
|
||||
const isActive = activeImageId === imageId;
|
||||
const src = `data:image/${image.imageFormat ?? 'png'};base64,${image.imageData}`;
|
||||
const baseZIndex = (image.zOrder ?? -1_000_000) + 1_050_000;
|
||||
const zIndex = isActive ? baseZIndex + 1_000_000 : baseZIndex;
|
||||
|
||||
return (
|
||||
<Rnd
|
||||
key={`image-${imageId}`}
|
||||
bounds="parent"
|
||||
size={{ width: cssWidth, height: cssHeight }}
|
||||
position={{ x: cssLeft, y: cssTop }}
|
||||
onDragStart={() => {
|
||||
setActiveGroupId(null);
|
||||
setEditingGroupId(null);
|
||||
setActiveImageId(imageId);
|
||||
}}
|
||||
onDrag={(event, data) => {
|
||||
emitImageTransform(
|
||||
imageId,
|
||||
data.x,
|
||||
data.y,
|
||||
cssWidth,
|
||||
cssHeight,
|
||||
);
|
||||
}}
|
||||
onDragStop={(event, data) => {
|
||||
emitImageTransform(
|
||||
imageId,
|
||||
data.x,
|
||||
data.y,
|
||||
cssWidth,
|
||||
cssHeight,
|
||||
);
|
||||
}}
|
||||
onResizeStart={() => {
|
||||
setActiveImageId(imageId);
|
||||
setActiveGroupId(null);
|
||||
setEditingGroupId(null);
|
||||
}}
|
||||
onResize={(event, _direction, ref, _delta, position) => {
|
||||
const nextWidth = parseFloat(ref.style.width);
|
||||
const nextHeight = parseFloat(ref.style.height);
|
||||
emitImageTransform(
|
||||
imageId,
|
||||
position.x,
|
||||
position.y,
|
||||
nextWidth,
|
||||
nextHeight,
|
||||
);
|
||||
}}
|
||||
onResizeStop={(event, _direction, ref, _delta, position) => {
|
||||
const nextWidth = parseFloat(ref.style.width);
|
||||
const nextHeight = parseFloat(ref.style.height);
|
||||
emitImageTransform(
|
||||
imageId,
|
||||
position.x,
|
||||
position.y,
|
||||
nextWidth,
|
||||
nextHeight,
|
||||
);
|
||||
}}
|
||||
style={{ zIndex }}
|
||||
>
|
||||
<Box
|
||||
onMouseEnter={() => setActiveImageId(imageId)}
|
||||
onMouseLeave={() => {
|
||||
setActiveImageId((current) => (current === imageId ? null : current));
|
||||
}}
|
||||
onDoubleClick={(event) => {
|
||||
event.stopPropagation();
|
||||
onImageReset(selectedPage, imageId);
|
||||
}}
|
||||
style={{
|
||||
width: '100%',
|
||||
height: '100%',
|
||||
cursor: isActive ? 'grabbing' : 'grab',
|
||||
outline: isActive
|
||||
? '2px solid rgba(59, 130, 246, 0.9)'
|
||||
: '1px solid rgba(148, 163, 184, 0.4)',
|
||||
outlineOffset: '-1px',
|
||||
borderRadius: 4,
|
||||
backgroundColor: 'rgba(255,255,255,0.04)',
|
||||
transition: 'outline 120ms ease',
|
||||
}}
|
||||
>
|
||||
<img
|
||||
src={src}
|
||||
alt={t('pdfJsonEditor.imageLabel', 'Placed image')}
|
||||
style={{
|
||||
width: '100%',
|
||||
height: '100%',
|
||||
objectFit: 'contain',
|
||||
pointerEvents: 'none',
|
||||
userSelect: 'none',
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
</Rnd>
|
||||
);
|
||||
})}
|
||||
{visibleGroups.length === 0 && orderedImages.length === 0 ? (
|
||||
<Group justify="center" align="center" style={{ height: '100%' }}>
|
||||
<Stack gap={4} align="center">
|
||||
<Text size="sm" c="dimmed">
|
||||
@ -373,6 +581,7 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
justifyContent: 'flex-start',
|
||||
pointerEvents: 'auto',
|
||||
cursor: 'text',
|
||||
zIndex: 2_000_000,
|
||||
};
|
||||
|
||||
if (isEditing) {
|
||||
@ -383,17 +592,38 @@ const PdfJsonEditorView = ({ data }: PdfJsonEditorViewProps) => {
|
||||
true,
|
||||
changed,
|
||||
<div
|
||||
ref={(node) => {
|
||||
if (node) {
|
||||
editorRefs.current.set(group.id, node);
|
||||
} else {
|
||||
editorRefs.current.delete(group.id);
|
||||
}
|
||||
}}
|
||||
contentEditable
|
||||
suppressContentEditableWarning
|
||||
data-editor-group={group.id}
|
||||
onBlur={(event) => {
|
||||
const value = event.currentTarget.innerText.replace(/\u00A0/g, ' ');
|
||||
caretOffsetsRef.current.delete(group.id);
|
||||
editorRefs.current.delete(group.id);
|
||||
setActiveGroupId(null);
|
||||
onGroupEdit(group.pageIndex, group.id, value);
|
||||
setEditingGroupId(null);
|
||||
}}
|
||||
onInput={(event) => {
|
||||
const value = event.currentTarget.innerText.replace(/\u00A0/g, ' ');
|
||||
const offset = getCaretOffset(event.currentTarget);
|
||||
caretOffsetsRef.current.set(group.id, offset);
|
||||
onGroupEdit(group.pageIndex, group.id, value);
|
||||
requestAnimationFrame(() => {
|
||||
if (editingGroupId !== group.id) {
|
||||
return;
|
||||
}
|
||||
const editor = editorRefs.current.get(group.id);
|
||||
if (editor) {
|
||||
setCaretOffset(editor, caretOffsetsRef.current.get(group.id) ?? editor.innerText.length);
|
||||
}
|
||||
});
|
||||
}}
|
||||
style={{
|
||||
width: '100%',
|
||||
|
||||
@ -11,6 +11,7 @@ import { downloadBlob, downloadTextAsFile } from '../utils/downloadUtils';
|
||||
import { getFilenameFromHeaders } from '../utils/fileResponseUtils';
|
||||
import {
|
||||
PdfJsonDocument,
|
||||
PdfJsonImageElement,
|
||||
TextGroup,
|
||||
PdfJsonEditorViewData,
|
||||
} from './pdfJsonEditorTypes';
|
||||
@ -19,6 +20,9 @@ import {
|
||||
getDirtyPages,
|
||||
groupDocumentText,
|
||||
restoreGlyphElements,
|
||||
extractDocumentImages,
|
||||
cloneImageElement,
|
||||
valueOr,
|
||||
} from './pdfJsonEditorUtils';
|
||||
import PdfJsonEditorView from '../components/tools/pdfJsonEditor/PdfJsonEditorView';
|
||||
|
||||
@ -46,13 +50,19 @@ const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
|
||||
const [loadedDocument, setLoadedDocument] = useState<PdfJsonDocument | null>(null);
|
||||
const [groupsByPage, setGroupsByPage] = useState<TextGroup[][]>([]);
|
||||
const [imagesByPage, setImagesByPage] = useState<PdfJsonImageElement[][]>([]);
|
||||
const [selectedPage, setSelectedPage] = useState(0);
|
||||
const [fileName, setFileName] = useState('');
|
||||
const [errorMessage, setErrorMessage] = useState<string | null>(null);
|
||||
const [isGeneratingPdf, setIsGeneratingPdf] = useState(false);
|
||||
const [isConverting, setIsConverting] = useState(false);
|
||||
|
||||
const dirtyPages = useMemo(() => getDirtyPages(groupsByPage), [groupsByPage]);
|
||||
const originalImagesRef = useRef<PdfJsonImageElement[][]>([]);
|
||||
|
||||
const dirtyPages = useMemo(
|
||||
() => getDirtyPages(groupsByPage, imagesByPage, originalImagesRef.current),
|
||||
[groupsByPage, imagesByPage],
|
||||
);
|
||||
const hasChanges = useMemo(() => dirtyPages.some(Boolean), [dirtyPages]);
|
||||
const hasDocument = loadedDocument !== null;
|
||||
const viewLabel = useMemo(() => t('pdfJsonEditor.viewLabel', 'PDF Editor'), [t]);
|
||||
@ -60,12 +70,17 @@ const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
const resetToDocument = useCallback((document: PdfJsonDocument | null) => {
|
||||
if (!document) {
|
||||
setGroupsByPage([]);
|
||||
setImagesByPage([]);
|
||||
originalImagesRef.current = [];
|
||||
setSelectedPage(0);
|
||||
return;
|
||||
}
|
||||
const cloned = deepCloneDocument(document);
|
||||
const groups = groupDocumentText(cloned);
|
||||
const images = extractDocumentImages(cloned);
|
||||
originalImagesRef.current = images.map((page) => page.map(cloneImageElement));
|
||||
setGroupsByPage(groups);
|
||||
setImagesByPage(images);
|
||||
setSelectedPage(0);
|
||||
}, []);
|
||||
|
||||
@ -108,6 +123,8 @@ const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
console.error('Failed to load file', error);
|
||||
setLoadedDocument(null);
|
||||
setGroupsByPage([]);
|
||||
setImagesByPage([]);
|
||||
originalImagesRef.current = [];
|
||||
|
||||
if (isPdf) {
|
||||
setErrorMessage(
|
||||
@ -142,6 +159,80 @@ const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
);
|
||||
}, []);
|
||||
|
||||
const handleImageTransform = useCallback(
|
||||
(
|
||||
pageIndex: number,
|
||||
imageId: string,
|
||||
next: { left: number; bottom: number; width: number; height: number; transform: number[] },
|
||||
) => {
|
||||
setImagesByPage((previous) =>
|
||||
previous.map((images, idx) => {
|
||||
if (idx !== pageIndex) {
|
||||
return images;
|
||||
}
|
||||
let changed = false;
|
||||
const updated = images.map((image) => {
|
||||
if ((image.id ?? '') !== imageId) {
|
||||
return image;
|
||||
}
|
||||
const originalTransform = image.transform ?? originalImagesRef.current[idx]?.find((base) => (base.id ?? '') === imageId)?.transform;
|
||||
const scaleXSign = originalTransform && originalTransform.length >= 6 ? Math.sign(originalTransform[0]) || 1 : 1;
|
||||
const scaleYSign = originalTransform && originalTransform.length >= 6 ? Math.sign(originalTransform[3]) || 1 : 1;
|
||||
const right = next.left + next.width;
|
||||
const top = next.bottom + next.height;
|
||||
const updatedImage: PdfJsonImageElement = {
|
||||
...image,
|
||||
x: next.left,
|
||||
y: next.bottom,
|
||||
left: next.left,
|
||||
bottom: next.bottom,
|
||||
right,
|
||||
top,
|
||||
width: next.width,
|
||||
height: next.height,
|
||||
transform: scaleXSign < 0 || scaleYSign < 0 ? [
|
||||
next.width * scaleXSign,
|
||||
0,
|
||||
0,
|
||||
next.height * scaleYSign,
|
||||
next.left,
|
||||
scaleYSign >= 0 ? next.bottom : next.bottom + next.height,
|
||||
] : null,
|
||||
};
|
||||
|
||||
const isSame =
|
||||
Math.abs(valueOr(image.left, 0) - next.left) < 1e-4 &&
|
||||
Math.abs(valueOr(image.bottom, 0) - next.bottom) < 1e-4 &&
|
||||
Math.abs(valueOr(image.width, 0) - next.width) < 1e-4 &&
|
||||
Math.abs(valueOr(image.height, 0) - next.height) < 1e-4;
|
||||
|
||||
if (!isSame) {
|
||||
changed = true;
|
||||
}
|
||||
return updatedImage;
|
||||
});
|
||||
return changed ? updated : images;
|
||||
}),
|
||||
);
|
||||
},
|
||||
[],
|
||||
);
|
||||
|
||||
const handleImageReset = useCallback((pageIndex: number, imageId: string) => {
|
||||
const baseline = originalImagesRef.current[pageIndex]?.find((image) => (image.id ?? '') === imageId);
|
||||
if (!baseline) {
|
||||
return;
|
||||
}
|
||||
setImagesByPage((previous) =>
|
||||
previous.map((images, idx) => {
|
||||
if (idx !== pageIndex) {
|
||||
return images;
|
||||
}
|
||||
return images.map((image) => ((image.id ?? '') === imageId ? cloneImageElement(baseline) : image));
|
||||
}),
|
||||
);
|
||||
}, []);
|
||||
|
||||
const handleResetEdits = useCallback(() => {
|
||||
if (!loadedDocument) {
|
||||
return;
|
||||
@ -155,13 +246,18 @@ const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
return null;
|
||||
}
|
||||
|
||||
const updatedDocument = restoreGlyphElements(loadedDocument, groupsByPage);
|
||||
const updatedDocument = restoreGlyphElements(
|
||||
loadedDocument,
|
||||
groupsByPage,
|
||||
imagesByPage,
|
||||
originalImagesRef.current,
|
||||
);
|
||||
const baseName = sanitizeBaseName(fileName || loadedDocument.metadata?.title || undefined);
|
||||
return {
|
||||
document: updatedDocument,
|
||||
filename: `${baseName}.json`,
|
||||
};
|
||||
}, [fileName, groupsByPage, loadedDocument]);
|
||||
}, [fileName, groupsByPage, imagesByPage, loadedDocument]);
|
||||
|
||||
const handleDownloadJson = useCallback(() => {
|
||||
const payload = buildPayload();
|
||||
@ -229,6 +325,7 @@ const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
const viewData = useMemo<PdfJsonEditorViewData>(() => ({
|
||||
document: loadedDocument,
|
||||
groupsByPage,
|
||||
imagesByPage,
|
||||
selectedPage,
|
||||
dirtyPages,
|
||||
hasDocument,
|
||||
@ -240,10 +337,14 @@ const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
onLoadJson: handleLoadFile,
|
||||
onSelectPage: handleSelectPage,
|
||||
onGroupEdit: handleGroupTextChange,
|
||||
onImageTransform: handleImageTransform,
|
||||
onImageReset: handleImageReset,
|
||||
onReset: handleResetEdits,
|
||||
onDownloadJson: handleDownloadJson,
|
||||
onGeneratePdf: handleGeneratePdf,
|
||||
}), [
|
||||
handleImageTransform,
|
||||
imagesByPage,
|
||||
dirtyPages,
|
||||
errorMessage,
|
||||
fileName,
|
||||
@ -251,6 +352,7 @@ const PdfJsonEditor = ({ onComplete, onError }: BaseToolProps) => {
|
||||
handleDownloadJson,
|
||||
handleGeneratePdf,
|
||||
handleGroupTextChange,
|
||||
handleImageReset,
|
||||
handleLoadFile,
|
||||
handleResetEdits,
|
||||
handleSelectPage,
|
||||
|
||||
@ -33,6 +33,8 @@ export interface PdfJsonTextElement {
|
||||
fontSizeInPt?: number | null;
|
||||
characterSpacing?: number | null;
|
||||
wordSpacing?: number | null;
|
||||
spaceWidth?: number | null;
|
||||
zOrder?: number | null;
|
||||
horizontalScaling?: number | null;
|
||||
leading?: number | null;
|
||||
rise?: number | null;
|
||||
@ -46,6 +48,26 @@ export interface PdfJsonTextElement {
|
||||
strokeColor?: PdfJsonTextColor | null;
|
||||
}
|
||||
|
||||
export interface PdfJsonImageElement {
|
||||
id?: string | null;
|
||||
objectName?: string | null;
|
||||
inlineImage?: boolean | null;
|
||||
nativeWidth?: number | null;
|
||||
nativeHeight?: number | null;
|
||||
x?: number | null;
|
||||
y?: number | null;
|
||||
width?: number | null;
|
||||
height?: number | null;
|
||||
left?: number | null;
|
||||
right?: number | null;
|
||||
top?: number | null;
|
||||
bottom?: number | null;
|
||||
transform?: number[] | null;
|
||||
zOrder?: number | null;
|
||||
imageData?: string | null;
|
||||
imageFormat?: string | null;
|
||||
}
|
||||
|
||||
export interface PdfJsonStream {
|
||||
dictionary?: Record<string, unknown> | null;
|
||||
rawData?: string | null;
|
||||
@ -57,6 +79,7 @@ export interface PdfJsonPage {
|
||||
height?: number | null;
|
||||
rotation?: number | null;
|
||||
textElements?: PdfJsonTextElement[] | null;
|
||||
imageElements?: PdfJsonImageElement[] | null;
|
||||
resources?: unknown;
|
||||
contentStreams?: PdfJsonStream[] | null;
|
||||
}
|
||||
@ -107,6 +130,7 @@ export const DEFAULT_PAGE_HEIGHT = 792;
|
||||
export interface PdfJsonEditorViewData {
|
||||
document: PdfJsonDocument | null;
|
||||
groupsByPage: TextGroup[][];
|
||||
imagesByPage: PdfJsonImageElement[][];
|
||||
selectedPage: number;
|
||||
dirtyPages: boolean[];
|
||||
hasDocument: boolean;
|
||||
@ -118,6 +142,18 @@ export interface PdfJsonEditorViewData {
|
||||
onLoadJson: (file: File | null) => Promise<void> | void;
|
||||
onSelectPage: (pageIndex: number) => void;
|
||||
onGroupEdit: (pageIndex: number, groupId: string, value: string) => void;
|
||||
onImageTransform: (
|
||||
pageIndex: number,
|
||||
imageId: string,
|
||||
next: {
|
||||
left: number;
|
||||
bottom: number;
|
||||
width: number;
|
||||
height: number;
|
||||
transform: number[];
|
||||
},
|
||||
) => void;
|
||||
onImageReset: (pageIndex: number, imageId: string) => void;
|
||||
onReset: () => void;
|
||||
onDownloadJson: () => void;
|
||||
onGeneratePdf: () => void;
|
||||
|
||||
@ -3,6 +3,7 @@ import {
|
||||
PdfJsonDocument,
|
||||
PdfJsonPage,
|
||||
PdfJsonTextElement,
|
||||
PdfJsonImageElement,
|
||||
TextGroup,
|
||||
DEFAULT_PAGE_HEIGHT,
|
||||
DEFAULT_PAGE_WIDTH,
|
||||
@ -11,6 +12,9 @@ import {
|
||||
const LINE_TOLERANCE = 2;
|
||||
const GAP_FACTOR = 0.6;
|
||||
const SPACE_MIN_GAP = 1.5;
|
||||
const MIN_CHAR_WIDTH_FACTOR = 0.35;
|
||||
const MAX_CHAR_WIDTH_FACTOR = 1.25;
|
||||
const EXTRA_GAP_RATIO = 0.8;
|
||||
|
||||
export const valueOr = (value: number | null | undefined, fallback = 0): number => {
|
||||
if (value === null || value === undefined || Number.isNaN(value)) {
|
||||
@ -24,6 +28,11 @@ export const cloneTextElement = (element: PdfJsonTextElement): PdfJsonTextElemen
|
||||
textMatrix: element.textMatrix ? [...element.textMatrix] : element.textMatrix ?? undefined,
|
||||
});
|
||||
|
||||
export const cloneImageElement = (element: PdfJsonImageElement): PdfJsonImageElement => ({
|
||||
...element,
|
||||
transform: element.transform ? [...element.transform] : element.transform ?? undefined,
|
||||
});
|
||||
|
||||
const getBaseline = (element: PdfJsonTextElement): number => {
|
||||
if (element.textMatrix && element.textMatrix.length === 6) {
|
||||
return valueOr(element.textMatrix[5]);
|
||||
@ -71,6 +80,41 @@ const getElementBounds = (element: PdfJsonTextElement): BoundingBox => {
|
||||
};
|
||||
};
|
||||
|
||||
export const getImageBounds = (element: PdfJsonImageElement): BoundingBox => {
|
||||
const left = valueOr(element.left ?? element.x, 0);
|
||||
const computedWidth = valueOr(element.width, Math.max(valueOr(element.right, left) - left, 0));
|
||||
const right = valueOr(element.right ?? left + computedWidth, left + computedWidth);
|
||||
const bottom = valueOr(element.bottom ?? element.y, 0);
|
||||
const computedHeight = valueOr(element.height, Math.max(valueOr(element.top, bottom) - bottom, 0));
|
||||
const top = valueOr(element.top ?? bottom + computedHeight, bottom + computedHeight);
|
||||
return {
|
||||
left,
|
||||
right,
|
||||
bottom,
|
||||
top,
|
||||
};
|
||||
};
|
||||
|
||||
const getSpacingHint = (element: PdfJsonTextElement): number => {
|
||||
const spaceWidth = valueOr(element.spaceWidth, 0);
|
||||
if (spaceWidth > 0) {
|
||||
return spaceWidth;
|
||||
}
|
||||
const wordSpacing = valueOr(element.wordSpacing, 0);
|
||||
if (wordSpacing > 0) {
|
||||
return wordSpacing;
|
||||
}
|
||||
const characterSpacing = valueOr(element.characterSpacing, 0);
|
||||
return Math.max(characterSpacing, 0);
|
||||
};
|
||||
|
||||
const estimateCharWidth = (element: PdfJsonTextElement, avgFontSize: number): number => {
|
||||
const rawWidth = getWidth(element);
|
||||
const minWidth = avgFontSize * MIN_CHAR_WIDTH_FACTOR;
|
||||
const maxWidth = avgFontSize * MAX_CHAR_WIDTH_FACTOR;
|
||||
return Math.min(Math.max(rawWidth, minWidth), maxWidth);
|
||||
};
|
||||
|
||||
const mergeBounds = (bounds: BoundingBox[]): BoundingBox => {
|
||||
if (bounds.length === 0) {
|
||||
return { left: 0, right: 0, top: 0, bottom: 0 };
|
||||
@ -88,10 +132,32 @@ const mergeBounds = (bounds: BoundingBox[]): BoundingBox => {
|
||||
|
||||
const shouldInsertSpace = (prev: PdfJsonTextElement, current: PdfJsonTextElement): boolean => {
|
||||
const prevRight = getX(prev) + getWidth(prev);
|
||||
const gap = getX(current) - prevRight;
|
||||
const trailingGap = Math.max(0, getX(current) - prevRight);
|
||||
const avgFontSize = (getFontSize(prev) + getFontSize(current)) / 2;
|
||||
const threshold = Math.max(SPACE_MIN_GAP, avgFontSize * GAP_FACTOR);
|
||||
return gap > threshold;
|
||||
const baselineAdvance = Math.max(0, getX(current) - getX(prev));
|
||||
const charWidthEstimate = estimateCharWidth(prev, avgFontSize);
|
||||
const inferredGap = Math.max(0, baselineAdvance - charWidthEstimate);
|
||||
const spacingHint = Math.max(
|
||||
SPACE_MIN_GAP,
|
||||
getSpacingHint(prev),
|
||||
getSpacingHint(current),
|
||||
avgFontSize * GAP_FACTOR,
|
||||
);
|
||||
|
||||
if (trailingGap > spacingHint) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (inferredGap > spacingHint * EXTRA_GAP_RATIO) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const prevText = (prev.text ?? '').trimEnd();
|
||||
if (prevText.endsWith('-')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
const buildGroupText = (elements: PdfJsonTextElement[]): string => {
|
||||
@ -212,6 +278,27 @@ export const groupDocumentText = (document: PdfJsonDocument | null | undefined):
|
||||
return pages.map((page, index) => groupPageTextElements(page, index));
|
||||
};
|
||||
|
||||
export const extractPageImages = (
|
||||
page: PdfJsonPage | null | undefined,
|
||||
pageIndex: number,
|
||||
): PdfJsonImageElement[] => {
|
||||
const images = page?.imageElements ?? [];
|
||||
return images.map((image, imageIndex) => {
|
||||
const clone = cloneImageElement(image);
|
||||
if (!clone.id || clone.id.trim().length === 0) {
|
||||
clone.id = `page-${pageIndex}-image-${imageIndex}`;
|
||||
}
|
||||
return clone;
|
||||
});
|
||||
};
|
||||
|
||||
export const extractDocumentImages = (
|
||||
document: PdfJsonDocument | null | undefined,
|
||||
): PdfJsonImageElement[][] => {
|
||||
const pages = document?.pages ?? [];
|
||||
return pages.map((page, index) => extractPageImages(page, index));
|
||||
};
|
||||
|
||||
export const deepCloneDocument = (document: PdfJsonDocument): PdfJsonDocument => {
|
||||
if (typeof structuredClone === 'function') {
|
||||
return structuredClone(document);
|
||||
@ -277,14 +364,19 @@ const distributeTextAcrossElements = (text: string | undefined, elements: PdfJso
|
||||
export const buildUpdatedDocument = (
|
||||
source: PdfJsonDocument,
|
||||
groupsByPage: TextGroup[][],
|
||||
imagesByPage: PdfJsonImageElement[][],
|
||||
): PdfJsonDocument => {
|
||||
const updated = deepCloneDocument(source);
|
||||
const pages = updated.pages ?? [];
|
||||
|
||||
updated.pages = pages.map((page, pageIndex) => {
|
||||
const groups = groupsByPage[pageIndex] ?? [];
|
||||
const images = imagesByPage[pageIndex] ?? [];
|
||||
if (!groups.length) {
|
||||
return page;
|
||||
return {
|
||||
...page,
|
||||
imageElements: images.map(cloneImageElement),
|
||||
};
|
||||
}
|
||||
|
||||
const updatedElements: PdfJsonTextElement[] = groups.flatMap((group) => {
|
||||
@ -297,6 +389,7 @@ export const buildUpdatedDocument = (
|
||||
return {
|
||||
...page,
|
||||
textElements: updatedElements,
|
||||
imageElements: images.map(cloneImageElement),
|
||||
contentStreams: page.contentStreams ?? [],
|
||||
};
|
||||
});
|
||||
@ -307,14 +400,22 @@ export const buildUpdatedDocument = (
|
||||
export const restoreGlyphElements = (
|
||||
source: PdfJsonDocument,
|
||||
groupsByPage: TextGroup[][],
|
||||
imagesByPage: PdfJsonImageElement[][],
|
||||
originalImagesByPage: PdfJsonImageElement[][],
|
||||
): PdfJsonDocument => {
|
||||
const updated = deepCloneDocument(source);
|
||||
const pages = updated.pages ?? [];
|
||||
|
||||
updated.pages = pages.map((page, pageIndex) => {
|
||||
const groups = groupsByPage[pageIndex] ?? [];
|
||||
const images = imagesByPage[pageIndex] ?? [];
|
||||
const baselineImages = originalImagesByPage[pageIndex] ?? [];
|
||||
|
||||
if (!groups.length) {
|
||||
return page;
|
||||
return {
|
||||
...page,
|
||||
imageElements: images.map(cloneImageElement),
|
||||
};
|
||||
}
|
||||
|
||||
const rebuiltElements: PdfJsonTextElement[] = [];
|
||||
@ -327,16 +428,105 @@ export const restoreGlyphElements = (
|
||||
rebuiltElements.push(...originals);
|
||||
});
|
||||
|
||||
const textDirty = groups.some((group) => group.text !== group.originalText);
|
||||
const imageDirty = areImageListsDifferent(images, baselineImages);
|
||||
const nextStreams = textDirty || imageDirty ? [] : page.contentStreams ?? [];
|
||||
|
||||
return {
|
||||
...page,
|
||||
textElements: rebuiltElements,
|
||||
contentStreams: page.contentStreams ?? [],
|
||||
imageElements: images.map(cloneImageElement),
|
||||
contentStreams: nextStreams,
|
||||
};
|
||||
});
|
||||
|
||||
return updated;
|
||||
};
|
||||
|
||||
export const getDirtyPages = (groupsByPage: TextGroup[][]): boolean[] => {
|
||||
return groupsByPage.map((groups) => groups.some((group) => group.text !== group.originalText));
|
||||
const approxEqual = (a: number | null | undefined, b: number | null | undefined, tolerance = 0.25): boolean => {
|
||||
const first = typeof a === 'number' && Number.isFinite(a) ? a : 0;
|
||||
const second = typeof b === 'number' && Number.isFinite(b) ? b : 0;
|
||||
return Math.abs(first - second) <= tolerance;
|
||||
};
|
||||
|
||||
const arrayApproxEqual = (
|
||||
first: number[] | null | undefined,
|
||||
second: number[] | null | undefined,
|
||||
tolerance = 0.25,
|
||||
): boolean => {
|
||||
if (!first && !second) {
|
||||
return true;
|
||||
}
|
||||
if (!first || !second) {
|
||||
return false;
|
||||
}
|
||||
if (first.length !== second.length) {
|
||||
return false;
|
||||
}
|
||||
for (let index = 0; index < first.length; index += 1) {
|
||||
if (!approxEqual(first[index], second[index], tolerance)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
const areImageElementsEqual = (
|
||||
current: PdfJsonImageElement,
|
||||
original: PdfJsonImageElement,
|
||||
): boolean => {
|
||||
if (current === original) {
|
||||
return true;
|
||||
}
|
||||
if (!current || !original) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const sameData = (current.imageData ?? null) === (original.imageData ?? null);
|
||||
const sameFormat = (current.imageFormat ?? null) === (original.imageFormat ?? null);
|
||||
|
||||
return (
|
||||
sameData &&
|
||||
sameFormat &&
|
||||
approxEqual(current.x, original.x) &&
|
||||
approxEqual(current.y, original.y) &&
|
||||
approxEqual(current.width, original.width) &&
|
||||
approxEqual(current.height, original.height) &&
|
||||
approxEqual(current.left, original.left) &&
|
||||
approxEqual(current.right, original.right) &&
|
||||
approxEqual(current.top, original.top) &&
|
||||
approxEqual(current.bottom, original.bottom) &&
|
||||
(current.zOrder ?? null) === (original.zOrder ?? null) &&
|
||||
arrayApproxEqual(current.transform, original.transform)
|
||||
);
|
||||
};
|
||||
|
||||
export const areImageListsDifferent = (
|
||||
current: PdfJsonImageElement[],
|
||||
original: PdfJsonImageElement[],
|
||||
): boolean => {
|
||||
if (current.length !== original.length) {
|
||||
return true;
|
||||
}
|
||||
for (let index = 0; index < current.length; index += 1) {
|
||||
if (!areImageElementsEqual(current[index], original[index])) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
export const getDirtyPages = (
|
||||
groupsByPage: TextGroup[][],
|
||||
imagesByPage: PdfJsonImageElement[][],
|
||||
originalImagesByPage: PdfJsonImageElement[][],
|
||||
): boolean[] => {
|
||||
return groupsByPage.map((groups, index) => {
|
||||
const textDirty = groups.some((group) => group.text !== group.originalText);
|
||||
const imageDirty = areImageListsDifferent(
|
||||
imagesByPage[index] ?? [],
|
||||
originalImagesByPage[index] ?? [],
|
||||
);
|
||||
return textDirty || imageDirty;
|
||||
});
|
||||
};
|
||||
|
||||
Loading…
Reference in New Issue
Block a user