PDF Text editor changes (#5726)

# Description of Changes

 - Reduced lightweight editor JSON size:
- Omit heavy page resources and contentStreams in lazy/lightweight
flows.
      - Omit form fields in lazy metadata/editor bootstrapping flows.
      - Strip inline font program blobs from lazy initial payloads.
  - Added page-based font loading:
      - New endpoint to fetch fonts for a specific cached page:
        GET /api/v1/convert/pdf/text-editor/fonts/{jobId}/{pageNumber}
- Frontend now loads page fonts alongside page data and merges into
local doc state.
  - Reduced save payload duplication:
- Partial export now sends only changed pages (no repeated full-document
font/metadata payload each save).
  - Preserved round-trip/export safety:
- Missing lightweight fields (resources/contentStreams) are interpreted
as “preserve existing from cached PDF.”
- Annotation semantics fixed so explicit empty annotation lists can
clear annotations.
- Fixed a regression where lazy mode could fall back to full export and
lose overlays; lazy now stays on cached
        partial export path when dirty pages exist.
  - Logging/noise reduction
  - Transport optimization:
- Enabled HTTP compression for JSON/problem responses. (might remove
later tho in testing)
      
      
      ### Outcome

  - Much smaller JSON payloads for giant PDFs.
  - Fewer duplicated bytes over the wire.
  - Page-scoped loading of heavy font data.
- Better reliability for preserving overlays/vector/background content
during export.


## Checklist

### General

- [ ] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [ ] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [ ] I have performed a self-review of my own code
- [ ] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### Translations (if applicable)

- [ ] I ran
[`scripts/counter_translation.py`](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/docs/counter_translation.md)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.
This commit is contained in:
Anthony Stirling
2026-02-16 17:36:13 +00:00
committed by GitHub
parent d5cf77cf50
commit 772dd4632e
5 changed files with 1062 additions and 141 deletions

View File

@@ -16,6 +16,7 @@ import { pdfWorkerManager } from '@app/services/pdfWorkerManager';
import { Util } from 'pdfjs-dist/legacy/build/pdf.mjs';
import {
PdfJsonDocument,
PdfJsonFont,
PdfJsonImageElement,
PdfJsonPage,
TextGroup,
@@ -450,14 +451,25 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
const start = performance.now();
try {
const response = await apiClient.get(
`/api/v1/convert/pdf/text-editor/page/${cachedJobId}/${pageNumber}`,
{
responseType: 'json',
},
);
const [pageResponse, pageFontsResponse] = await Promise.all([
apiClient.get(
`/api/v1/convert/pdf/text-editor/page/${cachedJobId}/${pageNumber}`,
{
responseType: 'json',
},
),
apiClient.get(
`/api/v1/convert/pdf/text-editor/fonts/${cachedJobId}/${pageNumber}`,
{
responseType: 'json',
},
),
]);
const pageData = response.data as PdfJsonPage;
const pageData = pageResponse.data as PdfJsonPage;
const pageFonts = Array.isArray(pageFontsResponse.data)
? (pageFontsResponse.data as PdfJsonFont[])
: [];
const normalizedImages = (pageData.imageElements ?? []).map(cloneImageElement);
if (imagesByPageRef.current.length <= pageIndex) {
@@ -471,12 +483,31 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
}
const nextPages = [...prevDoc.pages];
const existingPage = nextPages[pageIndex] ?? {};
const fontMap = new Map<string, PdfJsonFont>();
for (const existingFont of prevDoc.fonts ?? []) {
if (!existingFont) {
continue;
}
const existingKey = existingFont.uid || `${existingFont.pageNumber ?? -1}:${existingFont.id ?? ''}`;
fontMap.set(existingKey, existingFont);
}
if (pageFonts.length > 0) {
for (const font of pageFonts) {
if (!font) {
continue;
}
const key = font.uid || `${font.pageNumber ?? -1}:${font.id ?? ''}`;
fontMap.set(key, font);
}
}
const nextFonts = Array.from(fontMap.values());
nextPages[pageIndex] = {
...existingPage,
imageElements: normalizedImages.map(cloneImageElement),
};
return {
...prevDoc,
fonts: nextFonts,
pages: nextPages,
};
});
@@ -1087,8 +1118,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
const canUseIncremental =
isLazyMode &&
cachedJobId &&
dirtyPageIndices.length > 0 &&
dirtyPageIndices.length < totalPages;
dirtyPageIndices.length > 0;
if (canUseIncremental) {
await ensureImagesForPages(dirtyPageIndices);
@@ -1105,10 +1135,8 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
document.pages?.filter((_, index) => dirtyPageSet.has(index)) ?? [];
const partialDocument: PdfJsonDocument = {
metadata: document.metadata,
xmpMetadata: document.xmpMetadata,
fonts: document.fonts,
lazyImages: true,
// Incremental export only needs changed pages.
// Fonts/resources/content streams are resolved from server-side cache.
pages: partialPages,
};
@@ -1135,11 +1163,13 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
setErrorMessage(null);
return;
} catch (incrementalError) {
if (isLazyMode && cachedJobIdRef.current) {
throw new Error('Incremental export failed for cached document. Please reload and retry.');
}
console.warn(
'[handleGeneratePdf] Incremental export failed, falling back to full export',
incrementalError,
);
// Fall through to full export below
}
}
@@ -1272,8 +1302,7 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
const canUseIncremental =
isLazyMode &&
cachedJobId &&
dirtyPageIndices.length > 0 &&
dirtyPageIndices.length < totalPages;
dirtyPageIndices.length > 0;
if (canUseIncremental) {
await ensureImagesForPages(dirtyPageIndices);
@@ -1290,10 +1319,8 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
document.pages?.filter((_, index) => dirtyPageSet.has(index)) ?? [];
const partialDocument: PdfJsonDocument = {
metadata: document.metadata,
xmpMetadata: document.xmpMetadata,
fonts: document.fonts,
lazyImages: true,
// Incremental export only needs changed pages.
// Fonts/resources/content streams are resolved from server-side cache.
pages: partialPages,
};
@@ -1312,6 +1339,9 @@ const PdfTextEditor = ({ onComplete, onError }: BaseToolProps) => {
downloadName = detectedName || expectedName;
pdfBlob = response.data;
} catch (incrementalError) {
if (isLazyMode && cachedJobId) {
throw new Error('Incremental export failed for cached document. Please reload and retry.');
}
console.warn(
'[handleSaveToWorkbench] Incremental export failed, falling back to full export',
incrementalError,

View File

@@ -1209,7 +1209,7 @@ export const buildUpdatedDocument = (
...page,
textElements: updatedElements,
imageElements: images.map(cloneImageElement),
contentStreams: page.contentStreams ?? [],
contentStreams: page.contentStreams ?? null,
};
});
@@ -1282,7 +1282,7 @@ export const restoreGlyphElements = (
...page,
textElements: rebuiltElements,
imageElements: images.map(cloneImageElement),
contentStreams: page.contentStreams ?? [],
contentStreams: page.contentStreams ?? null,
};
});