mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-11-16 01:21:16 +01:00
editor carriies over vector
This commit is contained in:
parent
29e8270eea
commit
efaec14e08
@ -401,17 +401,29 @@ public class PdfJsonConversionService {
|
|||||||
boolean rewriteSucceeded = true;
|
boolean rewriteSucceeded = true;
|
||||||
|
|
||||||
if (hasText) {
|
if (hasText) {
|
||||||
if (preflightResult.usesFallback()) {
|
if (!preservedStreams.isEmpty()) {
|
||||||
rewriteSucceeded = false;
|
if (preflightResult.usesFallback()) {
|
||||||
} else if (!preservedStreams.isEmpty()) {
|
|
||||||
log.info("Attempting token rewrite for page {}", pageNumberValue);
|
|
||||||
rewriteSucceeded = rewriteTextOperators(document, page, elements);
|
|
||||||
if (!rewriteSucceeded) {
|
|
||||||
log.info(
|
log.info(
|
||||||
"Token rewrite failed for page {}, regenerating text stream",
|
"Fallback fonts required for page {}; clearing original text tokens",
|
||||||
pageNumberValue);
|
pageNumberValue);
|
||||||
|
rewriteSucceeded =
|
||||||
|
rewriteTextOperators(document, page, elements, true);
|
||||||
|
if (!rewriteSucceeded) {
|
||||||
|
log.info(
|
||||||
|
"Failed to clear original text tokens on page {}; forcing regeneration",
|
||||||
|
pageNumberValue);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
log.info("Token rewrite succeeded for page {}", pageNumberValue);
|
log.info("Attempting token rewrite for page {}", pageNumberValue);
|
||||||
|
rewriteSucceeded =
|
||||||
|
rewriteTextOperators(document, page, elements, false);
|
||||||
|
if (!rewriteSucceeded) {
|
||||||
|
log.info(
|
||||||
|
"Token rewrite failed for page {}, regenerating text stream",
|
||||||
|
pageNumberValue);
|
||||||
|
} else {
|
||||||
|
log.info("Token rewrite succeeded for page {}", pageNumberValue);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
rewriteSucceeded = false;
|
rewriteSucceeded = false;
|
||||||
@ -419,7 +431,7 @@ public class PdfJsonConversionService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
boolean shouldRegenerate = preservedStreams.isEmpty();
|
boolean shouldRegenerate = preservedStreams.isEmpty();
|
||||||
if (hasText && !rewriteSucceeded) {
|
if (hasText && (!rewriteSucceeded || preflightResult.usesFallback())) {
|
||||||
shouldRegenerate = true;
|
shouldRegenerate = true;
|
||||||
}
|
}
|
||||||
if (hasImages && preservedStreams.isEmpty()) {
|
if (hasImages && preservedStreams.isEmpty()) {
|
||||||
@ -433,6 +445,17 @@ public class PdfJsonConversionService {
|
|||||||
|
|
||||||
if (shouldRegenerate) {
|
if (shouldRegenerate) {
|
||||||
log.info("Regenerating page content for page {}", pageNumberValue);
|
log.info("Regenerating page content for page {}", pageNumberValue);
|
||||||
|
AppendMode appendMode = AppendMode.OVERWRITE;
|
||||||
|
if (!preservedStreams.isEmpty()) {
|
||||||
|
PDStream vectorStream =
|
||||||
|
extractVectorGraphics(document, preservedStreams, imageElements);
|
||||||
|
if (vectorStream != null) {
|
||||||
|
page.setContents(Collections.singletonList(vectorStream));
|
||||||
|
appendMode = AppendMode.APPEND;
|
||||||
|
} else {
|
||||||
|
page.setContents(new ArrayList<>());
|
||||||
|
}
|
||||||
|
}
|
||||||
regeneratePageContent(
|
regeneratePageContent(
|
||||||
document,
|
document,
|
||||||
page,
|
page,
|
||||||
@ -440,7 +463,8 @@ public class PdfJsonConversionService {
|
|||||||
imageElements,
|
imageElements,
|
||||||
fontMap,
|
fontMap,
|
||||||
fontModels,
|
fontModels,
|
||||||
pageNumberValue);
|
pageNumberValue,
|
||||||
|
appendMode);
|
||||||
log.info("Page content regeneration complete for page {}", pageNumberValue);
|
log.info("Page content regeneration complete for page {}", pageNumberValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2141,6 +2165,116 @@ public class PdfJsonConversionService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private PDStream extractVectorGraphics(
|
||||||
|
PDDocument document,
|
||||||
|
List<PDStream> preservedStreams,
|
||||||
|
List<PdfJsonImageElement> imageElements)
|
||||||
|
throws IOException {
|
||||||
|
if (preservedStreams == null || preservedStreams.isEmpty()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
Set<String> imageObjectNames = new HashSet<>();
|
||||||
|
if (imageElements != null) {
|
||||||
|
for (PdfJsonImageElement element : imageElements) {
|
||||||
|
if (element == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
String objectName = element.getObjectName();
|
||||||
|
if (objectName != null && !objectName.isBlank()) {
|
||||||
|
imageObjectNames.add(objectName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
List<Object> filteredTokens = new ArrayList<>();
|
||||||
|
for (PDStream stream : preservedStreams) {
|
||||||
|
if (stream == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
PDFStreamParser parser = new PDFStreamParser(stream.toByteArray());
|
||||||
|
List<Object> tokens = parser.parse();
|
||||||
|
collectVectorTokens(tokens, filteredTokens, imageObjectNames);
|
||||||
|
} catch (IOException ex) {
|
||||||
|
log.debug(
|
||||||
|
"Failed to parse preserved content stream for vector extraction: {}",
|
||||||
|
ex.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (filteredTokens.isEmpty()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
PDStream vectorStream = new PDStream(document);
|
||||||
|
try (OutputStream outputStream = vectorStream.createOutputStream(COSName.FLATE_DECODE)) {
|
||||||
|
new ContentStreamWriter(outputStream).writeTokens(filteredTokens);
|
||||||
|
}
|
||||||
|
return vectorStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void collectVectorTokens(
|
||||||
|
List<Object> sourceTokens,
|
||||||
|
List<Object> targetTokens,
|
||||||
|
Set<String> imageObjectNames) {
|
||||||
|
if (sourceTokens == null || sourceTokens.isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean insideText = false;
|
||||||
|
boolean insideInlineImage = false;
|
||||||
|
|
||||||
|
for (Object token : sourceTokens) {
|
||||||
|
if (token instanceof Operator operator) {
|
||||||
|
String name = operator.getName();
|
||||||
|
if (OperatorName.BEGIN_TEXT.equals(name)) {
|
||||||
|
insideText = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (OperatorName.END_TEXT.equals(name)) {
|
||||||
|
insideText = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (OperatorName.BEGIN_INLINE_IMAGE.equals(name)
|
||||||
|
|| OperatorName.BEGIN_INLINE_IMAGE_DATA.equals(name)) {
|
||||||
|
if (!insideText) {
|
||||||
|
targetTokens.add(operator);
|
||||||
|
}
|
||||||
|
insideInlineImage = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (OperatorName.END_INLINE_IMAGE.equals(name)) {
|
||||||
|
if (!insideText) {
|
||||||
|
targetTokens.add(operator);
|
||||||
|
}
|
||||||
|
insideInlineImage = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (insideText && !insideInlineImage) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (OperatorName.DRAW_OBJECT.equals(name)
|
||||||
|
&& imageObjectNames != null
|
||||||
|
&& !imageObjectNames.isEmpty()
|
||||||
|
&& !targetTokens.isEmpty()) {
|
||||||
|
Object previous = targetTokens.get(targetTokens.size() - 1);
|
||||||
|
if (previous instanceof COSName cosName
|
||||||
|
&& imageObjectNames.contains(cosName.getName())) {
|
||||||
|
targetTokens.remove(targetTokens.size() - 1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
targetTokens.add(operator);
|
||||||
|
} else {
|
||||||
|
if (insideText && !insideInlineImage) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
targetTokens.add(token);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void regeneratePageContent(
|
private void regeneratePageContent(
|
||||||
PDDocument document,
|
PDDocument document,
|
||||||
PDPage page,
|
PDPage page,
|
||||||
@ -2148,13 +2282,15 @@ public class PdfJsonConversionService {
|
|||||||
List<PdfJsonImageElement> imageElements,
|
List<PdfJsonImageElement> imageElements,
|
||||||
Map<String, PDFont> fontMap,
|
Map<String, PDFont> fontMap,
|
||||||
List<PdfJsonFont> fontModels,
|
List<PdfJsonFont> fontModels,
|
||||||
int pageNumber)
|
int pageNumber,
|
||||||
|
AppendMode appendMode)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
List<DrawableElement> drawables = mergeDrawables(textElements, imageElements);
|
List<DrawableElement> drawables = mergeDrawables(textElements, imageElements);
|
||||||
Map<String, PDImageXObject> imageCache = new HashMap<>();
|
Map<String, PDImageXObject> imageCache = new HashMap<>();
|
||||||
|
|
||||||
|
AppendMode mode = appendMode != null ? appendMode : AppendMode.OVERWRITE;
|
||||||
try (PDPageContentStream contentStream =
|
try (PDPageContentStream contentStream =
|
||||||
new PDPageContentStream(document, page, AppendMode.OVERWRITE, true, true)) {
|
new PDPageContentStream(document, page, mode, true, true)) {
|
||||||
boolean textOpen = false;
|
boolean textOpen = false;
|
||||||
for (DrawableElement drawable : drawables) {
|
for (DrawableElement drawable : drawables) {
|
||||||
switch (drawable.type()) {
|
switch (drawable.type()) {
|
||||||
@ -2618,7 +2754,10 @@ public class PdfJsonConversionService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private boolean rewriteTextOperators(
|
private boolean rewriteTextOperators(
|
||||||
PDDocument document, PDPage page, List<PdfJsonTextElement> elements) {
|
PDDocument document,
|
||||||
|
PDPage page,
|
||||||
|
List<PdfJsonTextElement> elements,
|
||||||
|
boolean removeOnly) {
|
||||||
if (elements == null || elements.isEmpty()) {
|
if (elements == null || elements.isEmpty()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -2663,7 +2802,8 @@ public class PdfJsonConversionService {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
log.trace("Rewriting Tj operator using font {}", currentFontName);
|
log.trace("Rewriting Tj operator using font {}", currentFontName);
|
||||||
if (!rewriteShowText(cosString, currentFont, currentFontName, cursor)) {
|
if (!rewriteShowText(
|
||||||
|
cosString, currentFont, currentFontName, cursor, removeOnly)) {
|
||||||
log.debug("Failed to rewrite Tj operator; aborting rewrite");
|
log.debug("Failed to rewrite Tj operator; aborting rewrite");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -2674,7 +2814,8 @@ public class PdfJsonConversionService {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
log.trace("Rewriting TJ operator using font {}", currentFontName);
|
log.trace("Rewriting TJ operator using font {}", currentFontName);
|
||||||
if (!rewriteShowTextArray(array, currentFont, currentFontName, cursor)) {
|
if (!rewriteShowTextArray(
|
||||||
|
array, currentFont, currentFontName, cursor, removeOnly)) {
|
||||||
log.debug("Failed to rewrite TJ operator; aborting rewrite");
|
log.debug("Failed to rewrite TJ operator; aborting rewrite");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -2703,7 +2844,11 @@ public class PdfJsonConversionService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private boolean rewriteShowText(
|
private boolean rewriteShowText(
|
||||||
COSString cosString, PDFont font, String expectedFontName, TextElementCursor cursor)
|
COSString cosString,
|
||||||
|
PDFont font,
|
||||||
|
String expectedFontName,
|
||||||
|
TextElementCursor cursor,
|
||||||
|
boolean removeOnly)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
if (font == null) {
|
if (font == null) {
|
||||||
return false;
|
return false;
|
||||||
@ -2713,6 +2858,10 @@ public class PdfJsonConversionService {
|
|||||||
if (consumed == null) {
|
if (consumed == null) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (removeOnly) {
|
||||||
|
cosString.setValue(new byte[0]);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
String replacement = mergeText(consumed);
|
String replacement = mergeText(consumed);
|
||||||
try {
|
try {
|
||||||
byte[] encoded = font.encode(replacement);
|
byte[] encoded = font.encode(replacement);
|
||||||
@ -2725,7 +2874,11 @@ public class PdfJsonConversionService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private boolean rewriteShowTextArray(
|
private boolean rewriteShowTextArray(
|
||||||
COSArray array, PDFont font, String expectedFontName, TextElementCursor cursor)
|
COSArray array,
|
||||||
|
PDFont font,
|
||||||
|
String expectedFontName,
|
||||||
|
TextElementCursor cursor,
|
||||||
|
boolean removeOnly)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
if (font == null) {
|
if (font == null) {
|
||||||
return false;
|
return false;
|
||||||
@ -2738,6 +2891,10 @@ public class PdfJsonConversionService {
|
|||||||
if (consumed == null) {
|
if (consumed == null) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (removeOnly) {
|
||||||
|
array.set(i, new COSString(new byte[0]));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
String replacement = mergeText(consumed);
|
String replacement = mergeText(consumed);
|
||||||
try {
|
try {
|
||||||
byte[] encoded = font.encode(replacement);
|
byte[] encoded = font.encode(replacement);
|
||||||
|
|||||||
@ -733,15 +733,11 @@ export const restoreGlyphElements = (
|
|||||||
rebuiltElements.push(...group.originalElements.map(cloneTextElement));
|
rebuiltElements.push(...group.originalElements.map(cloneTextElement));
|
||||||
});
|
});
|
||||||
|
|
||||||
const textDirty = groups.some((group) => group.text !== group.originalText);
|
|
||||||
const imageDirty = areImageListsDifferent(images, baselineImages);
|
|
||||||
const nextStreams = textDirty || imageDirty ? [] : page.contentStreams ?? [];
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
...page,
|
...page,
|
||||||
textElements: rebuiltElements,
|
textElements: rebuiltElements,
|
||||||
imageElements: images.map(cloneImageElement),
|
imageElements: images.map(cloneImageElement),
|
||||||
contentStreams: nextStreams,
|
contentStreams: page.contentStreams ?? [],
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user