fix(frontend/pdfjs): ensure CID character rendering via CMaps & stabilize PDF compare/preview (#4762)

# Description of Changes

## What was changed
- Introduced a shared `PDFJS_DEFAULT_OPTIONS` object and applied it
across frontend modules using PDF.js:
- Sets `cMapUrl`, `cMapPacked`, and `standardFontDataUrl` so PDF.js can
correctly load CMaps and standard fonts.
- Switches all `GlobalWorkerOptions.workerSrc` usages to the dynamic
`pdfjsPath + 'pdf.worker.mjs'`.
- Exposed `pdfjsPath` globally in `navbar.html` to support deployments
under subpaths/reverse proxies.
- Updated multiple pages and utilities to use the new defaults:
- `DecryptFiles.js`, `downloader.js`, `merge.js`, Multi-Tool
(`PdfContainer.js`), and feature pages (`add-image.js`,
`adjust-contrast.js`, `change-metadata.js`, `crop.js`, `pdf-to-csv.js`,
`sign.js`, `rotate-pdf.html`, `convert/pdf-to-pdfa.html`,
`merge-pdfs.html`).
- Comparison tool hardening:
- Added robust worker protocol (`type: 'COMPARE' | 'SET_*'`) and safer
logs.
- Improved text tokenization, adaptive batch diffing with overlap
de-duplication, and color fallbacks.
- Early validation for empty/oversized/invalid PDFs with clearer user
messages.
- Disabled PDF.js worker in specific templates where legacy CMap
handling caused issues (`disableWorker: true`) to prevent rendering
failures.
- UI/UX tweaks: processing state on the compare button, progress hints
during text extraction, and more resilient error handling.
- Fixed relative path to popularity data (`./files/popularity.txt`) to
respect base paths.

## Why the change was made
- PDFs using CID fonts (e.g., CJK and other complex scripts) were
rendering with missing glyphs or falling back incorrectly because CMaps
and standard font data were not being provided to PDF.js. Providing
proper CMap and font resources resolves CID character visibility issues
and related console warnings.
- Some environments (subpath deployments, reverse proxies) broke PDF.js
worker/static asset resolution; centralizing `pdfjsPath` and using it
consistently fixes this.
- The comparison feature struggled with large/complex documents and
lacked robust validation; improvements reduce timeouts, improve
accuracy, and provide clearer feedback.

Closes #4391

---

## Checklist

### General

- [x] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [x] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [x] I have performed a self-review of my own code
- [x] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.
This commit is contained in:
Ludy
2025-11-25 10:52:42 +01:00
committed by GitHub
parent 27ccb3e4f1
commit c760d1a93a
17 changed files with 523 additions and 248 deletions

View File

@@ -1,3 +1,9 @@
const PDFJS_DEFAULT_OPTIONS = {
cMapUrl: pdfjsPath + 'cmaps/',
cMapPacked: true,
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
};
function formatProblemDetailsJson(input) {
try {
const obj = typeof input === 'string' ? JSON.parse(input) : input;
@@ -238,7 +244,7 @@ export class DecryptFile {
return {isEncrypted: false, requiresPassword: false};
}
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
const arrayBuffer = await file.arrayBuffer();
const arrayBufferForPdfLib = arrayBuffer.slice(0);
@@ -246,12 +252,14 @@ export class DecryptFile {
if(this.decryptWorker == null){
loadingTask = pdfjsLib.getDocument({
...PDFJS_DEFAULT_OPTIONS,
data: arrayBuffer,
});
this.decryptWorker = loadingTask._worker
}else {
loadingTask = pdfjsLib.getDocument({
...PDFJS_DEFAULT_OPTIONS,
data: arrayBuffer,
worker: this.decryptWorker
});

View File

@@ -1,18 +1,39 @@
importScripts('./diff.js');
let complexMessage = 'One or both of the provided documents are large files, accuracy of comparison may be reduced';
let largeFilesMessage = 'One or Both of the provided documents are too large to process';
// Early: Listener for SET messages (before onmessage)
self.addEventListener('message', (event) => {
if (event.data.type === 'SET_COMPLEX_MESSAGE') {
complexMessage = event.data.message;
} else if (event.data.type === 'SET_TOO_LARGE_MESSAGE') {
largeFilesMessage = event.data.message;
}
});
self.onmessage = async function (e) {
const { text1, text2, color1, color2 } = e.data;
console.log('Received text for comparison:', { text1, text2 });
const data = e.data;
if (data.type !== 'COMPARE') {
console.log('Worker ignored non-COMPARE message');
return;
}
const { text1, text2, color1, color2 } = data;
console.log('Received text for comparison:', { lengths: { text1: text1.length, text2: text2.length } }); // Safe Log
const startTime = performance.now();
if (text1.trim() === "" || text2.trim() === "") {
// Safe Trim
if (!text1 || !text2 || text1.trim() === "" || text2.trim() === "") {
self.postMessage({ status: 'error', message: 'One or both of the texts are empty.' });
return;
}
const words1 = text1.split(' ');
const words2 = text2.split(' ');
// Robust Word-Split (handles spaces/punctuation better)
const words1 = text1.trim().split(/\s+/).filter(w => w.length > 0);
const words2 = text2.trim().split(/\s+/).filter(w => w.length > 0);
const MAX_WORD_COUNT = 150000;
const COMPLEX_WORD_COUNT = 50000;
const BATCH_SIZE = 5000; // Define a suitable batch size for processing
@@ -21,44 +42,28 @@ self.onmessage = async function (e) {
const isComplex = words1.length > COMPLEX_WORD_COUNT || words2.length > COMPLEX_WORD_COUNT;
const isTooLarge = words1.length > MAX_WORD_COUNT || words2.length > MAX_WORD_COUNT;
let complexMessage = 'One or both of the provided documents are large files, accuracy of comparison may be reduced';
let tooLargeMessage = 'One or Both of the provided documents are too large to process';
// Listen for messages from the main thread
self.addEventListener('message', (event) => {
if (event.data.type === 'SET_TOO_LARGE_MESSAGE') {
tooLargeMessage = event.data.message;
}
if (event.data.type === 'SET_COMPLEX_MESSAGE') {
complexMessage = event.data.message;
}
});
if (isTooLarge) {
self.postMessage({
status: 'warning',
message: tooLargeMessage,
});
self.postMessage({ status: 'error', message: largeFilesMessage });
return;
} else {
if (isComplex) {
self.postMessage({
status: 'warning',
message: complexMessage,
});
}
// Perform diff operation depending on document size
const differences = isComplex
? await staggeredBatchDiff(words1, words2, color1, color2, BATCH_SIZE, OVERLAP_SIZE)
: diff(words1, words2, color1, color2);
console.log(`Diff operation took ${performance.now() - startTime} milliseconds`);
self.postMessage({ status: 'success', differences });
}
if (isComplex) {
self.postMessage({ status: 'warning', message: complexMessage });
}
// Diff based on size
let differences;
if (isComplex) {
differences = await staggeredBatchDiff(words1, words2, color1 || '#ff0000', color2 || '#008000', BATCH_SIZE, OVERLAP_SIZE);
} else {
differences = diff(words1, words2, color1 || '#ff0000', color2 || '#008000');
}
console.log(`Diff took ${performance.now() - startTime} ms for ${words1.length + words2.length} words`);
self.postMessage({ status: 'success', differences });
};
//Splits text into smaller batches to run through diff checking algorithms. overlaps the batches to help ensure
// Splits text into smaller batches to run through diff checking algorithms. overlaps the batches to help ensure
async function staggeredBatchDiff(words1, words2, color1, color2, batchSize, overlapSize) {
const differences = [];
const totalWords1 = words1.length;
@@ -67,10 +72,9 @@ async function staggeredBatchDiff(words1, words2, color1, color2, batchSize, ove
let previousEnd1 = 0; // Track where the last batch ended in words1
let previousEnd2 = 0; // Track where the last batch ended in words2
// Function to determine if differences are large, differences that are too large indicate potential error in batching
const isLargeDifference = (differences) => {
return differences.length > 50;
};
// Track processed indices to dedupe overlaps
const processed1 = new Set();
const processed2 = new Set();
while (previousEnd1 < totalWords1 || previousEnd2 < totalWords2) {
// Define the next chunk boundaries
@@ -80,66 +84,130 @@ async function staggeredBatchDiff(words1, words2, color1, color2, batchSize, ove
const start2 = previousEnd2;
const end2 = Math.min(start2 + batchSize, totalWords2);
//If difference is too high decrease batch size for more granular check
const dynamicBatchSize = isLargeDifference(differences) ? batchSize / 2 : batchSize;
// Adaptive: If many diffs, smaller batch (max 3x downscale)
const recentDiffs = differences.slice(-100).filter(([c]) => c !== 'black').length;
// If difference is too high decrease batch size for more granular check
const dynamicBatchSize = Math.max(batchSize / Math.min(8, 1 + recentDiffs / 50), batchSize / 8);
// Adjust the size of the current chunk using dynamic batch size
const batchWords1 = words1.slice(start1, end1 + dynamicBatchSize);
const batchWords2 = words2.slice(start2, end2 + dynamicBatchSize);
const extendedEnd1 = Math.min(end1 + dynamicBatchSize, totalWords1);
const extendedEnd2 = Math.min(end2 + dynamicBatchSize, totalWords2);
const batchWords1 = words1.slice(start1, extendedEnd1);
const batchWords2 = words2.slice(start2, extendedEnd2);
// Include overlap from the previous chunk
const overlapWords1 = previousEnd1 > 0 ? words1.slice(Math.max(0, previousEnd1 - overlapSize), previousEnd1) : [];
const overlapWords2 = previousEnd2 > 0 ? words2.slice(Math.max(0, previousEnd2 - overlapSize), previousEnd2) : [];
const overlapStart1 = Math.max(0, previousEnd1 - overlapSize);
const overlapStart2 = Math.max(0, previousEnd2 - overlapSize);
const overlapWords1 = previousEnd1 > 0 ? words1.slice(overlapStart1, previousEnd1) : [];
const overlapWords2 = previousEnd2 > 0 ? words2.slice(overlapStart2, previousEnd2) : [];
// Combine overlaps and current batches for comparison
const combinedWords1 = overlapWords1.concat(batchWords1);
const combinedWords2 = overlapWords2.concat(batchWords2);
const combinedWords1 = [...overlapWords1, ...batchWords1];
const combinedWords2 = [...overlapWords2, ...batchWords2];
// Perform the diff on the combined words
const batchDifferences = diff(combinedWords1, combinedWords2, color1, color2);
differences.push(...batchDifferences);
// Update the previous end indices based on the results of this batch
const combinedIndices1 = [];
for (let i = overlapStart1; i < previousEnd1; i++) {
combinedIndices1.push(i);
}
for (let i = start1; i < extendedEnd1; i++) {
combinedIndices1.push(i);
}
const combinedIndices2 = [];
for (let i = overlapStart2; i < previousEnd2; i++) {
combinedIndices2.push(i);
}
for (let i = start2; i < extendedEnd2; i++) {
combinedIndices2.push(i);
}
let pointer1 = 0;
let pointer2 = 0;
const filteredBatch = [];
batchDifferences.forEach(([color, word]) => {
if (color === color1) {
const globalIndex1 = combinedIndices1[pointer1];
if (globalIndex1 === undefined || !processed1.has(globalIndex1)) {
filteredBatch.push([color, word]);
}
if (globalIndex1 !== undefined) {
processed1.add(globalIndex1);
}
pointer1++;
} else if (color === color2) {
const globalIndex2 = combinedIndices2[pointer2];
if (globalIndex2 === undefined || !processed2.has(globalIndex2)) {
filteredBatch.push([color, word]);
}
if (globalIndex2 !== undefined) {
processed2.add(globalIndex2);
}
pointer2++;
} else {
const globalIndex1 = combinedIndices1[pointer1];
const globalIndex2 = combinedIndices2[pointer2];
const alreadyProcessed = (globalIndex1 !== undefined && processed1.has(globalIndex1)) && (globalIndex2 !== undefined && processed2.has(globalIndex2));
if (!alreadyProcessed) {
filteredBatch.push([color, word]);
}
if (globalIndex1 !== undefined) {
processed1.add(globalIndex1);
}
if (globalIndex2 !== undefined) {
processed2.add(globalIndex2);
}
pointer1++;
pointer2++;
}
});
differences.push(...filteredBatch);
// Mark as processed
for (let k = start1; k < end1; k++) processed1.add(k);
for (let k = start2; k < end2; k++) processed2.add(k);
previousEnd1 = end1;
previousEnd2 = end2;
// Yield for async (avoids blocking)
await new Promise(resolve => setTimeout(resolve, 0));
}
return differences;
}
// Standard diff function for small text comparisons
function diff(words1, words2, color1, color2) {
console.log(`Starting diff between ${words1.length} words and ${words2.length} words`);
const matrix = Array.from({ length: words1.length + 1 }, () => Array(words2.length + 1).fill(0));
console.log(`Diff: ${words1.length} vs ${words2.length} words`);
const oldStr = words1.join(' '); // As string for diff.js
const newStr = words2.join(' ');
// Static method: No 'new' needed, avoids constructor error
const changes = Diff.diffWords(oldStr, newStr, { ignoreWhitespace: true });
for (let i = 1; i <= words1.length; i++) {
for (let j = 1; j <= words2.length; j++) {
matrix[i][j] = words1[i - 1] === words2[j - 1]
? matrix[i - 1][j - 1] + 1
: Math.max(matrix[i][j - 1], matrix[i - 1][j]);
}
}
return backtrack(matrix, words1, words2, color1, color2);
}
// Backtrack function to find differences
function backtrack(matrix, words1, words2, color1, color2) {
let i = words1.length, j = words2.length;
// Map changes to [color, word] format (change.value and added/removed)
const differences = [];
changes.forEach(change => {
const value = change.value;
const op = change.added ? 1 : change.removed ? -1 : 0;
while (i > 0 || j > 0) {
if (i > 0 && j > 0 && words1[i - 1] === words2[j - 1]) {
differences.unshift(['black', words1[i - 1]]);
i--; j--;
} else if (j > 0 && (i === 0 || matrix[i][j] === matrix[i][j - 1])) {
differences.unshift([color2, words2[j - 1]]);
j--;
} else {
differences.unshift([color1, words1[i - 1]]);
i--;
}
}
// Split value into words and process
const words = value.split(/\s+/).filter(w => w.length > 0);
words.forEach(word => {
if (op === 0) { // Equal
differences.push(['black', word]);
} else if (op === 1) { // Insert
differences.push([color2, word]);
} else if (op === -1) { // Delete
differences.push([color1, word]);
}
});
});
return differences;
}

View File

@@ -2,6 +2,12 @@
if (window.isDownloadScriptInitialized) return; // Prevent re-execution
window.isDownloadScriptInitialized = true;
const PDFJS_DEFAULT_OPTIONS = {
cMapUrl: pdfjsPath + 'cmaps/',
cMapPacked: true,
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
};
// Global PDF processing count tracking for survey system
window.incrementPdfProcessingCount = function() {
let pdfProcessingCount = parseInt(localStorage.getItem('pdfProcessingCount') || '0');
@@ -234,8 +240,13 @@
async function getPDFPageCount(file) {
try {
const arrayBuffer = await file.arrayBuffer();
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
const pdf = await pdfjsLib.getDocument({data: arrayBuffer}).promise;
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
const pdf = await pdfjsLib
.getDocument({
...PDFJS_DEFAULT_OPTIONS,
data: arrayBuffer,
})
.promise;
return pdf.numPages;
} catch (error) {
console.error('Error getting PDF page count:', error);
@@ -245,7 +256,7 @@
async function checkAndDecryptFiles(url, files) {
const decryptedFiles = [];
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
// Extract the base URL
const baseUrl = new URL(url);
@@ -271,7 +282,10 @@
}
try {
const arrayBuffer = await file.arrayBuffer();
const loadingTask = pdfjsLib.getDocument({data: arrayBuffer});
const loadingTask = pdfjsLib.getDocument({
...PDFJS_DEFAULT_OPTIONS,
data: arrayBuffer,
});
console.log(`Attempting to load PDF: ${file.name}`);
const pdf = await loadingTask.promise;

View File

@@ -220,7 +220,7 @@ document.addEventListener('DOMContentLoaded', async function () {
});
}
try {
const response = await fetch('/files/popularity.txt');
const response = await fetch('./files/popularity.txt');
if (!response.ok) {
const errorText = await response.text().catch(() => '');
const errorMsg = errorText || response.statusText || 'Request failed';

View File

@@ -1,3 +1,9 @@
const PDFJS_DEFAULT_OPTIONS = {
cMapUrl: pdfjsPath + 'cmaps/',
cMapPacked: true,
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
};
let currentSort = {
field: null,
descending: false,
@@ -73,7 +79,13 @@ async function displayFiles(files) {
async function getPDFPageCount(file) {
const blobUrl = URL.createObjectURL(file);
const pdf = await pdfjsLib.getDocument(blobUrl).promise;
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
const pdf = await pdfjsLib
.getDocument({
...PDFJS_DEFAULT_OPTIONS,
url: blobUrl,
})
.promise;
URL.revokeObjectURL(blobUrl);
return pdf.numPages;
}

View File

@@ -8,6 +8,12 @@ import { AddFilesCommand } from './commands/add-page.js';
import { DecryptFile } from '../DecryptFiles.js';
import { CommandSequence } from './commands/commands-sequence.js';
const PDFJS_DEFAULT_OPTIONS = {
cMapUrl: pdfjsPath + 'cmaps/',
cMapPacked: true,
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
};
const isSvgFile = (file) => {
if (!file) return false;
const type = (file.type || '').toLowerCase();
@@ -479,8 +485,11 @@ class PdfContainer {
}
async toRenderer(objectUrl) {
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
const pdf = await pdfjsLib.getDocument(objectUrl).promise;
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
const pdf = await pdfjsLib.getDocument({
url: objectUrl,
...PDFJS_DEFAULT_OPTIONS,
}).promise;
return {
document: pdf,
pageCount: pdf.numPages,

View File

@@ -1,3 +1,9 @@
const PDFJS_DEFAULT_OPTIONS = {
cMapUrl: pdfjsPath + 'cmaps/',
cMapPacked: true,
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
};
window.goToFirstOrLastPage = goToFirstOrLastPage;
document.getElementById('download-pdf').addEventListener('click', async () => {
@@ -31,8 +37,11 @@ document.querySelector('input[name=pdf-upload]').addEventListener('change', asyn
const file = allFiles[0];
originalFileName = file.name.replace(/\.[^/.]+$/, '');
const pdfData = await file.arrayBuffer();
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
const pdfDoc = await pdfjsLib.getDocument({ data: pdfData }).promise;
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
const pdfDoc = await pdfjsLib.getDocument({
...PDFJS_DEFAULT_OPTIONS,
data: pdfData,
}).promise;
await DraggableUtils.renderPage(pdfDoc, 0);
document.querySelectorAll('.show-on-file-selected').forEach((el) => {

View File

@@ -1,3 +1,9 @@
const PDFJS_DEFAULT_OPTIONS = {
cMapUrl: pdfjsPath + 'cmaps/',
cMapPacked: true,
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
};
var canvas = document.getElementById('contrast-pdf-canvas');
var context = canvas.getContext('2d');
var originalImageData = null;
@@ -9,8 +15,11 @@ async function renderPDFAndSaveOriginalImageData(file) {
var fileReader = new FileReader();
fileReader.onload = async function () {
var data = new Uint8Array(this.result);
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
pdf = await pdfjsLib.getDocument({data: data}).promise;
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
pdf = await pdfjsLib.getDocument({
...PDFJS_DEFAULT_OPTIONS,
data: data,
}).promise;
// Get the number of pages in the PDF
var numPages = pdf.numPages;

View File

@@ -1,3 +1,9 @@
const PDFJS_DEFAULT_OPTIONS = {
cMapUrl: pdfjsPath + 'cmaps/',
cMapPacked: true,
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
};
const deleteAllCheckbox = document.querySelector('#deleteAll');
let inputs = document.querySelectorAll('input');
const customMetadataDiv = document.getElementById('customMetadata');
@@ -43,8 +49,13 @@ fileInput.addEventListener('change', async function () {
customMetadataFormContainer.removeChild(customMetadataFormContainer.firstChild);
}
var url = URL.createObjectURL(file);
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
const pdf = await pdfjsLib.getDocument(url).promise;
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
const pdf = await pdfjsLib
.getDocument({
...PDFJS_DEFAULT_OPTIONS,
url: url,
})
.promise;
const pdfMetadata = await pdf.getMetadata();
lastPDFFile = pdfMetadata?.info;
console.log(pdfMetadata);

View File

@@ -1,3 +1,9 @@
const PDFJS_DEFAULT_OPTIONS = {
cMapUrl: pdfjsPath + 'cmaps/',
cMapPacked: true,
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
};
let pdfCanvas = document.getElementById('cropPdfCanvas');
let overlayCanvas = document.getElementById('overlayCanvas');
let canvasesContainer = document.getElementById('canvasesContainer');
@@ -42,12 +48,17 @@ function renderPageFromFile(file) {
let reader = new FileReader();
reader.onload = function (ev) {
let typedArray = new Uint8Array(reader.result);
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
pdfjsLib.getDocument(typedArray).promise.then(function (pdf) {
pdfDoc = pdf;
totalPages = pdf.numPages;
renderPage(currentPage);
});
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
pdfjsLib
.getDocument({
...PDFJS_DEFAULT_OPTIONS,
data: typedArray,
})
.promise.then(function (pdf) {
pdfDoc = pdf;
totalPages = pdf.numPages;
renderPage(currentPage);
});
};
reader.readAsArrayBuffer(file);
}

View File

@@ -1,3 +1,9 @@
const PDFJS_DEFAULT_OPTIONS = {
cMapUrl: pdfjsPath + 'cmaps/',
cMapPacked: true,
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
};
let pdfCanvas = document.getElementById('cropPdfCanvas');
let overlayCanvas = document.getElementById('overlayCanvas');
let canvasesContainer = document.getElementById('canvasesContainer');
@@ -37,12 +43,17 @@ btn1Object.addEventListener('click', function (e) {
let reader = new FileReader();
reader.onload = function (ev) {
let typedArray = new Uint8Array(reader.result);
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
pdfjsLib.getDocument(typedArray).promise.then(function (pdf) {
pdfDoc = pdf;
totalPages = pdf.numPages;
renderPage(currentPage);
});
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
pdfjsLib
.getDocument({
...PDFJS_DEFAULT_OPTIONS,
data: typedArray,
})
.promise.then(function (pdf) {
pdfDoc = pdf;
totalPages = pdf.numPages;
renderPage(currentPage);
});
};
reader.readAsArrayBuffer(file);
}
@@ -58,12 +69,17 @@ btn2Object.addEventListener('click', function (e) {
let reader = new FileReader();
reader.onload = function (ev) {
let typedArray = new Uint8Array(reader.result);
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
pdfjsLib.getDocument(typedArray).promise.then(function (pdf) {
pdfDoc = pdf;
totalPages = pdf.numPages;
renderPage(currentPage);
});
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
pdfjsLib
.getDocument({
...PDFJS_DEFAULT_OPTIONS,
data: typedArray,
})
.promise.then(function (pdf) {
pdfDoc = pdf;
totalPages = pdf.numPages;
renderPage(currentPage);
});
};
reader.readAsArrayBuffer(file);
}
@@ -75,12 +91,17 @@ function renderPageFromFile(file) {
let reader = new FileReader();
reader.onload = function (ev) {
let typedArray = new Uint8Array(reader.result);
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
pdfjsLib.getDocument(typedArray).promise.then(function (pdf) {
pdfDoc = pdf;
totalPages = pdf.numPages;
renderPage(currentPage);
});
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
pdfjsLib
.getDocument({
...PDFJS_DEFAULT_OPTIONS,
data: typedArray,
})
.promise.then(function (pdf) {
pdfDoc = pdf;
totalPages = pdf.numPages;
renderPage(currentPage);
});
pageNumbers.value = currentPage;
};
reader.readAsArrayBuffer(file);

View File

@@ -1,3 +1,9 @@
const PDFJS_DEFAULT_OPTIONS = {
cMapUrl: pdfjsPath + 'cmaps/',
cMapPacked: true,
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
};
window.toggleSignatureView = toggleSignatureView;
window.previewSignature = previewSignature;
window.addSignatureFromPreview = addSignatureFromPreview;
@@ -70,9 +76,11 @@ document
const file = allFiles[0];
originalFileName = file.name.replace(/\.[^/.]+$/, "");
const pdfData = await file.arrayBuffer();
pdfjsLib.GlobalWorkerOptions.workerSrc =
"./pdfjs-legacy/pdf.worker.mjs";
const pdfDoc = await pdfjsLib.getDocument({ data: pdfData }).promise;
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
const pdfDoc = await pdfjsLib.getDocument({
...PDFJS_DEFAULT_OPTIONS,
data: pdfData,
}).promise;
await DraggableUtils.renderPage(pdfDoc, 0);
document.querySelectorAll(".show-on-file-selected").forEach((el) => {

View File

@@ -41,7 +41,7 @@
<script type="module" th:src="@{'/pdfjs-legacy/pdf.mjs'}"></script>
<script th:inline="javascript">
document.getElementById('fileInput-input').addEventListener('change', async () => {
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
const fileInput = document.getElementById('fileInput-input');
const resultDiv = document.getElementById('result');

View File

@@ -49,6 +49,9 @@
const updateBreakingChanges = /*[[#{update.breakingChanges}]]*/ 'Breaking Changes:';
const updateBreakingChangesDefault = /*[[#{update.breakingChangesDefault}]]*/ 'This version contains breaking changes';
const updateMigrationGuide = /*[[#{update.migrationGuide}]]*/ 'Migration Guide';
// PDF.js path
const pdfjsPath = /*[[@{'/pdfjs-legacy/'}]]*/ './pdfjs-legacy/';
</script>
<script th:src="@{'/js/homecard.js'}"></script>
<script th:src="@{'/js/githubVersion.js'}"></script>

View File

@@ -58,7 +58,7 @@
</script>
<script type="module">
import * as pdfjsLib from './pdfjs-legacy/pdf.mjs';
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
</script>
<script th:src="@{'/js/merge.js'}"></script>
</div>

View File

@@ -79,7 +79,7 @@
</div>
</div>
<button class="btn btn-primary" onclick="comparePDFs()" th:text="#{compare.submit}"></button>
<button class="btn btn-primary" id="compareBtn" onclick="comparePDFs(event)" th:text="#{compare.submit}"></button>
<div class="row">
<div class="col-md-6">
@@ -105,7 +105,8 @@
result2.addEventListener('scroll', function () {
result1.scrollTop = result2.scrollTop;
});
async function comparePDFs() {
async function comparePDFs(event) {
const file1 = document.getElementById("fileInput-input").files[0];
const file2 = document.getElementById("fileInput2-input").files[0];
var color1 = document.getElementById('color-box1').value;
@@ -113,137 +114,216 @@
const complexMessage = /*[[#{compare.complex.message}]]*/ 'One or both of the provided documents are large files, accuracy of comparison may be reduced';
const largeFilesMessage = /*[[#{compare.large.file.message}]]*/ 'One or Both of the provided documents are too large to process';
const noTextMessage = /*[[#{compare.no.text.message}]]*/ 'One or both of the selected PDFs have no text content. Please choose PDFs with text for comparison."';
const noTextMessage = /*[[#{compare.no.text.message}]]*/ 'One or both of the selected PDFs have no text content. Please choose PDFs with text for comparison.';
const invalidPdfMessage = /*[[#{compare.invalid.pdf.message}]]*/ 'One or both files are not valid PDFs. Please check and re-upload.';
const submitText = /*[[#{compare.submit}]]*/ 'Compare';
if (!file1 || !file2) {
console.error("Please select two PDF files to compare");
alert('Please select two PDF files to compare');
return;
}
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
const [pdf1, pdf2] = await Promise.all([
pdfjsLib.getDocument(URL.createObjectURL(file1)).promise,
pdfjsLib.getDocument(URL.createObjectURL(file2)).promise
]);
const extractText = async (pdf) => {
const pages = [];
for (let i = 1; i <= pdf.numPages; i++) {
const page = await pdf.getPage(i);
const content = await page.getTextContent();
const strings = content.items.map(item => item.str);
pages.push(strings.join(" "));
}
return pages.join(" ");
};
const [text1, text2] = await Promise.all([
extractText(pdf1),
extractText(pdf2)
]);
if (text1.trim() === "" || text2.trim() === "") {
alert(noTextMessage);
// Basic checks
if (file1.size === 0 || file2.size === 0) {
alert('One or both files are empty.');
return;
}
if (file1.size > 100 * 1024 * 1024 || file2.size > 100 * 1024 * 1024) {
alert(largeFilesMessage);
return;
}
const resultDiv1 = document.getElementById("result1");
const resultDiv2 = document.getElementById("result2");
const loading = /*[[#{loading}]]*/ 'Loading...';
resultDiv1.innerHTML = loading;
resultDiv2.innerHTML = loading;
// Create a new Worker
const worker = new Worker('./js/compare/pdfWorker.js');
// Post messages to the worker
worker.postMessage({
type: 'SET_COMPLEX_MESSAGE',
message: complexMessage
});
worker.postMessage({
type: 'SET_TOO_LARGE_MESSAGE',
message: largeFilesMessage
});
// Error handling for the worker
worker.onerror = function (error) {
console.error('Worker error:', error);
// PDF.js setup (Legacy-safe: Worker disabled)
const PDFJS_DEFAULT_OPTIONS = {
cMapUrl: pdfjsPath + 'cmaps/',
cMapPacked: true,
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
disableWorker: true // Avoids Legacy CMap errors without changing PDF.js
};
worker.onmessage = function (e) {
const { status, differences, message } = e.data;
if (status === 'error') {
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
resultDiv1.innerHTML = '';
resultDiv2.innerHTML = '';
alert(message);
return;
const button = event.target;
button.disabled = true;
button.textContent = 'Processing...';
try {
// Load ArrayBuffer
const [data1, data2] = await Promise.all([
readFileAsArrayBuffer(file1),
readFileAsArrayBuffer(file2)
]);
// Header validation (prevents InvalidPDFException)
await validatePdfHeader(data1, 'File 1');
await validatePdfHeader(data2, 'File 2');
// Load PDFs
const [pdf1, pdf2] = await Promise.all([
loadPdfWithErrorHandling({ ...PDFJS_DEFAULT_OPTIONS, data: data1 }, 'File 1'),
loadPdfWithErrorHandling({ ...PDFJS_DEFAULT_OPTIONS, data: data2 }, 'File 2')
]);
// Extract text
result1.innerHTML = 'Extracting text from File 1...';
result2.innerHTML = 'Extracting text from File 2...';
const [text1, text2] = await Promise.all([
extractText(pdf1, 'File 1', result1),
extractText(pdf2, 'File 2', result2)
]);
if (text1.trim() === "" || text2.trim() === "") {
throw new Error(noTextMessage);
}
if (status === 'success' && differences) {
console.log('Differences:', differences);
displayDifferences(differences);
}
if (event.data.status === 'warning') {
console.warn(event.data.message);
alert(event.data.message);
}
};
worker.postMessage({ text1, text2, color1, color2 });
// Worker diff
await processWithWorker(text1, text2, color1, color2, complexMessage, largeFilesMessage);
const displayDifferences = (differences) => {
const resultDiv1 = document.getElementById("result1");
const resultDiv2 = document.getElementById("result2");
resultDiv1.innerHTML = "";
resultDiv2.innerHTML = "";
differences.forEach(([color, word]) => {
const span1 = document.createElement("span");
const span2 = document.createElement("span");
if (color === color2) {
span1.style.color = "transparent";
span1.style.userSelect = "none";
span2.style.color = color;
}
// If it's a deletion, show it in in the first document and transparent in the second
else if (color === color1) {
span1.style.color = color;
span2.style.color = "transparent";
span2.style.userSelect = "none";
}
// If it's unchanged, show it in black in both
else {
span1.style.color = color;
span2.style.color = color;
}
span1.textContent = word;
span2.textContent = word;
resultDiv1.appendChild(span1);
resultDiv2.appendChild(span2);
// Add space after each word, or a new line if the word ends with a full stop
const spaceOrNewline1 = document.createElement("span");
const spaceOrNewline2 = document.createElement("span");
if (word.endsWith(".")) {
spaceOrNewline1.innerHTML = "<br>";
spaceOrNewline2.innerHTML = "<br>";
} else {
spaceOrNewline1.textContent = " ";
spaceOrNewline2.textContent = " ";
}
resultDiv1.appendChild(spaceOrNewline1);
resultDiv2.appendChild(spaceOrNewline2);
});
};
} catch (error) {
console.error('Comparison failed:', error);
alert(error.message || invalidPdfMessage);
result1.innerHTML = '';
result2.innerHTML = '';
} finally {
button.disabled = false;
button.textContent = submitText;
}
}
// FileReader helper
function readFileAsArrayBuffer(file) {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = () => resolve(reader.result);
reader.onerror = reject;
reader.readAsArrayBuffer(file);
});
}
// Header validation (PDF.js-specific, but client-side)
async function validatePdfHeader(data, fileName) {
const header = new Uint8Array(data.slice(0, 8));
const headerStr = String.fromCharCode(...header);
console.log(`${fileName} header:`, headerStr);
if (!headerStr.startsWith('%PDF-')) {
throw new Error(`${fileName} is not a valid PDF (header: ${headerStr}).`);
}
if (data.byteLength < 100) {
throw new Error(`${fileName} is too short.`);
}
}
// PDF loading with catch
function loadPdfWithErrorHandling(options, fileName) {
return pdfjsLib.getDocument(options).promise
.then(pdf => {
console.log(`${fileName} loaded: ${pdf.numPages} pages`);
return pdf;
})
.catch(err => {
console.error(`${fileName} load failed:`, err);
if (err.name === 'InvalidPDFException') {
throw new Error(`${fileName}: Invalid PDF structure. Re-upload.`);
}
throw err;
});
}
// Text extraction
async function extractText(pdf, fileName, statusElement) {
const pages = [];
const totalPages = pdf.numPages;
for (let i = 1; i <= totalPages; i++) {
const page = await pdf.getPage(i);
const content = await page.getTextContent();
const strings = content.items.map(item => item.str).join(' ');
pages.push(strings);
statusElement.innerHTML = `${fileName}: ${Math.round((i / totalPages) * 100)}%`;
}
return pages.join(' ');
}
// Worker processing
async function processWithWorker(text1, text2, color1, color2, complexMessage, largeFilesMessage) {
return new Promise((resolve, reject) => {
const worker = new Worker('./js/compare/pdfWorker.js');
const timeout = setTimeout(() => {
worker.terminate();
reject(new Error('Timeout: Files too complex.'));
}, 30000);
worker.postMessage({ type: 'SET_COMPLEX_MESSAGE', message: complexMessage });
worker.postMessage({ type: 'SET_TOO_LARGE_MESSAGE', message: largeFilesMessage });
worker.onerror = (error) => {
clearTimeout(timeout);
worker.terminate();
reject(new Error('Worker error: ' + error.message));
};
worker.onmessage = (e) => {
clearTimeout(timeout);
const { status, differences, message } = e.data;
if (status === 'error') {
worker.terminate();
reject(new Error(message));
return;
}
if (status === 'warning') {
alert(message);
}
if (status === 'success' && differences) {
displayDifferences(differences, color1, color2);
worker.terminate();
resolve();
}
};
worker.postMessage({ type: 'COMPARE', text1, text2, color1, color2 });
});
}
// Display differences
function displayDifferences(differences, color1, color2) {
const resultDiv1 = document.getElementById("result1");
const resultDiv2 = document.getElementById("result2");
resultDiv1.innerHTML = "";
resultDiv2.innerHTML = "";
differences.forEach(([color, word]) => {
const span1 = document.createElement("span");
const span2 = document.createElement("span");
if (color === color2) {
span1.style.color = "transparent";
span1.style.userSelect = "none";
span2.style.color = color;
} else if (color === color1) {
span1.style.color = color;
span2.style.color = "transparent";
span2.style.userSelect = "none";
} else {
span1.style.color = color || 'black';
span2.style.color = color || 'black';
}
span1.textContent = word;
span2.textContent = word;
resultDiv1.appendChild(span1);
resultDiv2.appendChild(span2);
const spaceOrNewline1 = document.createElement("span");
const spaceOrNewline2 = document.createElement("span");
if (word.endsWith(".")) {
spaceOrNewline1.innerHTML = "<br>";
spaceOrNewline2.innerHTML = "<br>";
} else {
spaceOrNewline1.textContent = " ";
spaceOrNewline2.textContent = " ";
}
resultDiv1.appendChild(spaceOrNewline1);
resultDiv2.appendChild(spaceOrNewline2);
});
}
</script>
</div>
</div>

View File

@@ -59,12 +59,24 @@
</div>
<th:block th:insert="~{fragments/footer.html :: footer}"></th:block>
</div>
<script type="module" th:src="@{'/pdfjs-legacy/pdf.mjs'}"></script>
<script type="module">
import * as pdfjsLib from './pdfjs-legacy/pdf.mjs';
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
window.pdfjsLib = pdfjsLib;
</script>
<script>
const angleInput = document.getElementById("angleInput");
const fileInput = document.getElementById("fileInput-input");
const previewContainer = document.getElementById("previewContainer");
// const preview = document.getElementById("pdf-preview");
// PDF.js setup (with CMap options to fix font loading warning)
const PDFJS_DEFAULT_OPTIONS = {
cMapUrl: pdfjsPath + 'cmaps/',
cMapPacked: true,
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
disableWorker: true // Avoids Legacy CMap errors
};
fileInput.addEventListener("change", async function () {
console.log("loading pdf");
@@ -74,9 +86,9 @@
if (existingPreview) {
existingPreview.remove();
}
var url = URL.createObjectURL(fileInput.files[0])
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs'
const pdf = await pdfjsLib.getDocument(url).promise;
const url = URL.createObjectURL(fileInput.files[0]);
const pdf = await window.pdfjsLib.getDocument({ ...PDFJS_DEFAULT_OPTIONS, url }).promise;
const page = await pdf.getPage(1);
const canvas = document.createElement("canvas");
@@ -91,7 +103,7 @@
}
// render the page onto the canvas
var renderContext = {
const renderContext = {
canvasContext: canvas.getContext("2d"),
viewport: page.getViewport({ scale: 1 })
};