mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2026-03-28 02:31:17 +01:00
fix(frontend/pdfjs): ensure CID character rendering via CMaps & stabilize PDF compare/preview (#4762)
# Description of Changes ## What was changed - Introduced a shared `PDFJS_DEFAULT_OPTIONS` object and applied it across frontend modules using PDF.js: - Sets `cMapUrl`, `cMapPacked`, and `standardFontDataUrl` so PDF.js can correctly load CMaps and standard fonts. - Switches all `GlobalWorkerOptions.workerSrc` usages to the dynamic `pdfjsPath + 'pdf.worker.mjs'`. - Exposed `pdfjsPath` globally in `navbar.html` to support deployments under subpaths/reverse proxies. - Updated multiple pages and utilities to use the new defaults: - `DecryptFiles.js`, `downloader.js`, `merge.js`, Multi-Tool (`PdfContainer.js`), and feature pages (`add-image.js`, `adjust-contrast.js`, `change-metadata.js`, `crop.js`, `pdf-to-csv.js`, `sign.js`, `rotate-pdf.html`, `convert/pdf-to-pdfa.html`, `merge-pdfs.html`). - Comparison tool hardening: - Added robust worker protocol (`type: 'COMPARE' | 'SET_*'`) and safer logs. - Improved text tokenization, adaptive batch diffing with overlap de-duplication, and color fallbacks. - Early validation for empty/oversized/invalid PDFs with clearer user messages. - Disabled PDF.js worker in specific templates where legacy CMap handling caused issues (`disableWorker: true`) to prevent rendering failures. - UI/UX tweaks: processing state on the compare button, progress hints during text extraction, and more resilient error handling. - Fixed relative path to popularity data (`./files/popularity.txt`) to respect base paths. ## Why the change was made - PDFs using CID fonts (e.g., CJK and other complex scripts) were rendering with missing glyphs or falling back incorrectly because CMaps and standard font data were not being provided to PDF.js. Providing proper CMap and font resources resolves CID character visibility issues and related console warnings. - Some environments (subpath deployments, reverse proxies) broke PDF.js worker/static asset resolution; centralizing `pdfjsPath` and using it consistently fixes this. - The comparison feature struggled with large/complex documents and lacked robust validation; improvements reduce timeouts, improve accuracy, and provide clearer feedback. Closes #4391 --- ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [x] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details.
This commit is contained in:
@@ -1,3 +1,9 @@
|
||||
const PDFJS_DEFAULT_OPTIONS = {
|
||||
cMapUrl: pdfjsPath + 'cmaps/',
|
||||
cMapPacked: true,
|
||||
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
|
||||
};
|
||||
|
||||
function formatProblemDetailsJson(input) {
|
||||
try {
|
||||
const obj = typeof input === 'string' ? JSON.parse(input) : input;
|
||||
@@ -238,7 +244,7 @@ export class DecryptFile {
|
||||
return {isEncrypted: false, requiresPassword: false};
|
||||
}
|
||||
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
|
||||
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const arrayBufferForPdfLib = arrayBuffer.slice(0);
|
||||
@@ -246,12 +252,14 @@ export class DecryptFile {
|
||||
|
||||
if(this.decryptWorker == null){
|
||||
loadingTask = pdfjsLib.getDocument({
|
||||
...PDFJS_DEFAULT_OPTIONS,
|
||||
data: arrayBuffer,
|
||||
});
|
||||
this.decryptWorker = loadingTask._worker
|
||||
|
||||
}else {
|
||||
loadingTask = pdfjsLib.getDocument({
|
||||
...PDFJS_DEFAULT_OPTIONS,
|
||||
data: arrayBuffer,
|
||||
worker: this.decryptWorker
|
||||
});
|
||||
|
||||
@@ -1,18 +1,39 @@
|
||||
importScripts('./diff.js');
|
||||
|
||||
let complexMessage = 'One or both of the provided documents are large files, accuracy of comparison may be reduced';
|
||||
let largeFilesMessage = 'One or Both of the provided documents are too large to process';
|
||||
|
||||
// Early: Listener for SET messages (before onmessage)
|
||||
self.addEventListener('message', (event) => {
|
||||
if (event.data.type === 'SET_COMPLEX_MESSAGE') {
|
||||
complexMessage = event.data.message;
|
||||
} else if (event.data.type === 'SET_TOO_LARGE_MESSAGE') {
|
||||
largeFilesMessage = event.data.message;
|
||||
}
|
||||
});
|
||||
|
||||
self.onmessage = async function (e) {
|
||||
const { text1, text2, color1, color2 } = e.data;
|
||||
console.log('Received text for comparison:', { text1, text2 });
|
||||
const data = e.data;
|
||||
if (data.type !== 'COMPARE') {
|
||||
console.log('Worker ignored non-COMPARE message');
|
||||
return;
|
||||
}
|
||||
|
||||
const { text1, text2, color1, color2 } = data;
|
||||
console.log('Received text for comparison:', { lengths: { text1: text1.length, text2: text2.length } }); // Safe Log
|
||||
|
||||
const startTime = performance.now();
|
||||
|
||||
if (text1.trim() === "" || text2.trim() === "") {
|
||||
// Safe Trim
|
||||
if (!text1 || !text2 || text1.trim() === "" || text2.trim() === "") {
|
||||
self.postMessage({ status: 'error', message: 'One or both of the texts are empty.' });
|
||||
return;
|
||||
}
|
||||
|
||||
const words1 = text1.split(' ');
|
||||
const words2 = text2.split(' ');
|
||||
// Robust Word-Split (handles spaces/punctuation better)
|
||||
const words1 = text1.trim().split(/\s+/).filter(w => w.length > 0);
|
||||
const words2 = text2.trim().split(/\s+/).filter(w => w.length > 0);
|
||||
|
||||
const MAX_WORD_COUNT = 150000;
|
||||
const COMPLEX_WORD_COUNT = 50000;
|
||||
const BATCH_SIZE = 5000; // Define a suitable batch size for processing
|
||||
@@ -21,44 +42,28 @@ self.onmessage = async function (e) {
|
||||
const isComplex = words1.length > COMPLEX_WORD_COUNT || words2.length > COMPLEX_WORD_COUNT;
|
||||
const isTooLarge = words1.length > MAX_WORD_COUNT || words2.length > MAX_WORD_COUNT;
|
||||
|
||||
let complexMessage = 'One or both of the provided documents are large files, accuracy of comparison may be reduced';
|
||||
let tooLargeMessage = 'One or Both of the provided documents are too large to process';
|
||||
|
||||
// Listen for messages from the main thread
|
||||
self.addEventListener('message', (event) => {
|
||||
if (event.data.type === 'SET_TOO_LARGE_MESSAGE') {
|
||||
tooLargeMessage = event.data.message;
|
||||
}
|
||||
if (event.data.type === 'SET_COMPLEX_MESSAGE') {
|
||||
complexMessage = event.data.message;
|
||||
}
|
||||
});
|
||||
|
||||
if (isTooLarge) {
|
||||
self.postMessage({
|
||||
status: 'warning',
|
||||
message: tooLargeMessage,
|
||||
});
|
||||
self.postMessage({ status: 'error', message: largeFilesMessage });
|
||||
return;
|
||||
} else {
|
||||
|
||||
if (isComplex) {
|
||||
self.postMessage({
|
||||
status: 'warning',
|
||||
message: complexMessage,
|
||||
});
|
||||
}
|
||||
// Perform diff operation depending on document size
|
||||
const differences = isComplex
|
||||
? await staggeredBatchDiff(words1, words2, color1, color2, BATCH_SIZE, OVERLAP_SIZE)
|
||||
: diff(words1, words2, color1, color2);
|
||||
|
||||
console.log(`Diff operation took ${performance.now() - startTime} milliseconds`);
|
||||
self.postMessage({ status: 'success', differences });
|
||||
}
|
||||
|
||||
if (isComplex) {
|
||||
self.postMessage({ status: 'warning', message: complexMessage });
|
||||
}
|
||||
|
||||
// Diff based on size
|
||||
let differences;
|
||||
if (isComplex) {
|
||||
differences = await staggeredBatchDiff(words1, words2, color1 || '#ff0000', color2 || '#008000', BATCH_SIZE, OVERLAP_SIZE);
|
||||
} else {
|
||||
differences = diff(words1, words2, color1 || '#ff0000', color2 || '#008000');
|
||||
}
|
||||
|
||||
console.log(`Diff took ${performance.now() - startTime} ms for ${words1.length + words2.length} words`);
|
||||
self.postMessage({ status: 'success', differences });
|
||||
};
|
||||
|
||||
//Splits text into smaller batches to run through diff checking algorithms. overlaps the batches to help ensure
|
||||
// Splits text into smaller batches to run through diff checking algorithms. overlaps the batches to help ensure
|
||||
async function staggeredBatchDiff(words1, words2, color1, color2, batchSize, overlapSize) {
|
||||
const differences = [];
|
||||
const totalWords1 = words1.length;
|
||||
@@ -67,10 +72,9 @@ async function staggeredBatchDiff(words1, words2, color1, color2, batchSize, ove
|
||||
let previousEnd1 = 0; // Track where the last batch ended in words1
|
||||
let previousEnd2 = 0; // Track where the last batch ended in words2
|
||||
|
||||
// Function to determine if differences are large, differences that are too large indicate potential error in batching
|
||||
const isLargeDifference = (differences) => {
|
||||
return differences.length > 50;
|
||||
};
|
||||
// Track processed indices to dedupe overlaps
|
||||
const processed1 = new Set();
|
||||
const processed2 = new Set();
|
||||
|
||||
while (previousEnd1 < totalWords1 || previousEnd2 < totalWords2) {
|
||||
// Define the next chunk boundaries
|
||||
@@ -80,66 +84,130 @@ async function staggeredBatchDiff(words1, words2, color1, color2, batchSize, ove
|
||||
const start2 = previousEnd2;
|
||||
const end2 = Math.min(start2 + batchSize, totalWords2);
|
||||
|
||||
//If difference is too high decrease batch size for more granular check
|
||||
const dynamicBatchSize = isLargeDifference(differences) ? batchSize / 2 : batchSize;
|
||||
// Adaptive: If many diffs, smaller batch (max 3x downscale)
|
||||
const recentDiffs = differences.slice(-100).filter(([c]) => c !== 'black').length;
|
||||
// If difference is too high decrease batch size for more granular check
|
||||
const dynamicBatchSize = Math.max(batchSize / Math.min(8, 1 + recentDiffs / 50), batchSize / 8);
|
||||
|
||||
// Adjust the size of the current chunk using dynamic batch size
|
||||
const batchWords1 = words1.slice(start1, end1 + dynamicBatchSize);
|
||||
const batchWords2 = words2.slice(start2, end2 + dynamicBatchSize);
|
||||
const extendedEnd1 = Math.min(end1 + dynamicBatchSize, totalWords1);
|
||||
const extendedEnd2 = Math.min(end2 + dynamicBatchSize, totalWords2);
|
||||
|
||||
const batchWords1 = words1.slice(start1, extendedEnd1);
|
||||
const batchWords2 = words2.slice(start2, extendedEnd2);
|
||||
|
||||
// Include overlap from the previous chunk
|
||||
const overlapWords1 = previousEnd1 > 0 ? words1.slice(Math.max(0, previousEnd1 - overlapSize), previousEnd1) : [];
|
||||
const overlapWords2 = previousEnd2 > 0 ? words2.slice(Math.max(0, previousEnd2 - overlapSize), previousEnd2) : [];
|
||||
const overlapStart1 = Math.max(0, previousEnd1 - overlapSize);
|
||||
const overlapStart2 = Math.max(0, previousEnd2 - overlapSize);
|
||||
const overlapWords1 = previousEnd1 > 0 ? words1.slice(overlapStart1, previousEnd1) : [];
|
||||
const overlapWords2 = previousEnd2 > 0 ? words2.slice(overlapStart2, previousEnd2) : [];
|
||||
|
||||
|
||||
// Combine overlaps and current batches for comparison
|
||||
const combinedWords1 = overlapWords1.concat(batchWords1);
|
||||
const combinedWords2 = overlapWords2.concat(batchWords2);
|
||||
const combinedWords1 = [...overlapWords1, ...batchWords1];
|
||||
const combinedWords2 = [...overlapWords2, ...batchWords2];
|
||||
|
||||
// Perform the diff on the combined words
|
||||
const batchDifferences = diff(combinedWords1, combinedWords2, color1, color2);
|
||||
differences.push(...batchDifferences);
|
||||
|
||||
// Update the previous end indices based on the results of this batch
|
||||
const combinedIndices1 = [];
|
||||
for (let i = overlapStart1; i < previousEnd1; i++) {
|
||||
combinedIndices1.push(i);
|
||||
}
|
||||
for (let i = start1; i < extendedEnd1; i++) {
|
||||
combinedIndices1.push(i);
|
||||
}
|
||||
|
||||
const combinedIndices2 = [];
|
||||
for (let i = overlapStart2; i < previousEnd2; i++) {
|
||||
combinedIndices2.push(i);
|
||||
}
|
||||
for (let i = start2; i < extendedEnd2; i++) {
|
||||
combinedIndices2.push(i);
|
||||
}
|
||||
|
||||
let pointer1 = 0;
|
||||
let pointer2 = 0;
|
||||
|
||||
const filteredBatch = [];
|
||||
batchDifferences.forEach(([color, word]) => {
|
||||
if (color === color1) {
|
||||
const globalIndex1 = combinedIndices1[pointer1];
|
||||
if (globalIndex1 === undefined || !processed1.has(globalIndex1)) {
|
||||
filteredBatch.push([color, word]);
|
||||
}
|
||||
if (globalIndex1 !== undefined) {
|
||||
processed1.add(globalIndex1);
|
||||
}
|
||||
pointer1++;
|
||||
} else if (color === color2) {
|
||||
const globalIndex2 = combinedIndices2[pointer2];
|
||||
if (globalIndex2 === undefined || !processed2.has(globalIndex2)) {
|
||||
filteredBatch.push([color, word]);
|
||||
}
|
||||
if (globalIndex2 !== undefined) {
|
||||
processed2.add(globalIndex2);
|
||||
}
|
||||
pointer2++;
|
||||
} else {
|
||||
const globalIndex1 = combinedIndices1[pointer1];
|
||||
const globalIndex2 = combinedIndices2[pointer2];
|
||||
const alreadyProcessed = (globalIndex1 !== undefined && processed1.has(globalIndex1)) && (globalIndex2 !== undefined && processed2.has(globalIndex2));
|
||||
if (!alreadyProcessed) {
|
||||
filteredBatch.push([color, word]);
|
||||
}
|
||||
if (globalIndex1 !== undefined) {
|
||||
processed1.add(globalIndex1);
|
||||
}
|
||||
if (globalIndex2 !== undefined) {
|
||||
processed2.add(globalIndex2);
|
||||
}
|
||||
pointer1++;
|
||||
pointer2++;
|
||||
}
|
||||
});
|
||||
|
||||
differences.push(...filteredBatch);
|
||||
|
||||
// Mark as processed
|
||||
for (let k = start1; k < end1; k++) processed1.add(k);
|
||||
for (let k = start2; k < end2; k++) processed2.add(k);
|
||||
|
||||
previousEnd1 = end1;
|
||||
previousEnd2 = end2;
|
||||
|
||||
// Yield for async (avoids blocking)
|
||||
await new Promise(resolve => setTimeout(resolve, 0));
|
||||
}
|
||||
|
||||
return differences;
|
||||
}
|
||||
|
||||
|
||||
// Standard diff function for small text comparisons
|
||||
function diff(words1, words2, color1, color2) {
|
||||
console.log(`Starting diff between ${words1.length} words and ${words2.length} words`);
|
||||
const matrix = Array.from({ length: words1.length + 1 }, () => Array(words2.length + 1).fill(0));
|
||||
console.log(`Diff: ${words1.length} vs ${words2.length} words`);
|
||||
const oldStr = words1.join(' '); // As string for diff.js
|
||||
const newStr = words2.join(' ');
|
||||
// Static method: No 'new' needed, avoids constructor error
|
||||
const changes = Diff.diffWords(oldStr, newStr, { ignoreWhitespace: true });
|
||||
|
||||
for (let i = 1; i <= words1.length; i++) {
|
||||
for (let j = 1; j <= words2.length; j++) {
|
||||
matrix[i][j] = words1[i - 1] === words2[j - 1]
|
||||
? matrix[i - 1][j - 1] + 1
|
||||
: Math.max(matrix[i][j - 1], matrix[i - 1][j]);
|
||||
}
|
||||
}
|
||||
return backtrack(matrix, words1, words2, color1, color2);
|
||||
}
|
||||
|
||||
// Backtrack function to find differences
|
||||
function backtrack(matrix, words1, words2, color1, color2) {
|
||||
let i = words1.length, j = words2.length;
|
||||
// Map changes to [color, word] format (change.value and added/removed)
|
||||
const differences = [];
|
||||
changes.forEach(change => {
|
||||
const value = change.value;
|
||||
const op = change.added ? 1 : change.removed ? -1 : 0;
|
||||
|
||||
while (i > 0 || j > 0) {
|
||||
if (i > 0 && j > 0 && words1[i - 1] === words2[j - 1]) {
|
||||
differences.unshift(['black', words1[i - 1]]);
|
||||
i--; j--;
|
||||
} else if (j > 0 && (i === 0 || matrix[i][j] === matrix[i][j - 1])) {
|
||||
differences.unshift([color2, words2[j - 1]]);
|
||||
j--;
|
||||
} else {
|
||||
differences.unshift([color1, words1[i - 1]]);
|
||||
i--;
|
||||
}
|
||||
}
|
||||
// Split value into words and process
|
||||
const words = value.split(/\s+/).filter(w => w.length > 0);
|
||||
words.forEach(word => {
|
||||
if (op === 0) { // Equal
|
||||
differences.push(['black', word]);
|
||||
} else if (op === 1) { // Insert
|
||||
differences.push([color2, word]);
|
||||
} else if (op === -1) { // Delete
|
||||
differences.push([color1, word]);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return differences;
|
||||
}
|
||||
|
||||
@@ -2,6 +2,12 @@
|
||||
if (window.isDownloadScriptInitialized) return; // Prevent re-execution
|
||||
window.isDownloadScriptInitialized = true;
|
||||
|
||||
const PDFJS_DEFAULT_OPTIONS = {
|
||||
cMapUrl: pdfjsPath + 'cmaps/',
|
||||
cMapPacked: true,
|
||||
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
|
||||
};
|
||||
|
||||
// Global PDF processing count tracking for survey system
|
||||
window.incrementPdfProcessingCount = function() {
|
||||
let pdfProcessingCount = parseInt(localStorage.getItem('pdfProcessingCount') || '0');
|
||||
@@ -234,8 +240,13 @@
|
||||
async function getPDFPageCount(file) {
|
||||
try {
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
|
||||
const pdf = await pdfjsLib.getDocument({data: arrayBuffer}).promise;
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
|
||||
const pdf = await pdfjsLib
|
||||
.getDocument({
|
||||
...PDFJS_DEFAULT_OPTIONS,
|
||||
data: arrayBuffer,
|
||||
})
|
||||
.promise;
|
||||
return pdf.numPages;
|
||||
} catch (error) {
|
||||
console.error('Error getting PDF page count:', error);
|
||||
@@ -245,7 +256,7 @@
|
||||
|
||||
async function checkAndDecryptFiles(url, files) {
|
||||
const decryptedFiles = [];
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
|
||||
|
||||
// Extract the base URL
|
||||
const baseUrl = new URL(url);
|
||||
@@ -271,7 +282,10 @@
|
||||
}
|
||||
try {
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const loadingTask = pdfjsLib.getDocument({data: arrayBuffer});
|
||||
const loadingTask = pdfjsLib.getDocument({
|
||||
...PDFJS_DEFAULT_OPTIONS,
|
||||
data: arrayBuffer,
|
||||
});
|
||||
|
||||
console.log(`Attempting to load PDF: ${file.name}`);
|
||||
const pdf = await loadingTask.promise;
|
||||
|
||||
@@ -220,7 +220,7 @@ document.addEventListener('DOMContentLoaded', async function () {
|
||||
});
|
||||
}
|
||||
try {
|
||||
const response = await fetch('/files/popularity.txt');
|
||||
const response = await fetch('./files/popularity.txt');
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text().catch(() => '');
|
||||
const errorMsg = errorText || response.statusText || 'Request failed';
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
const PDFJS_DEFAULT_OPTIONS = {
|
||||
cMapUrl: pdfjsPath + 'cmaps/',
|
||||
cMapPacked: true,
|
||||
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
|
||||
};
|
||||
|
||||
let currentSort = {
|
||||
field: null,
|
||||
descending: false,
|
||||
@@ -73,7 +79,13 @@ async function displayFiles(files) {
|
||||
|
||||
async function getPDFPageCount(file) {
|
||||
const blobUrl = URL.createObjectURL(file);
|
||||
const pdf = await pdfjsLib.getDocument(blobUrl).promise;
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
|
||||
const pdf = await pdfjsLib
|
||||
.getDocument({
|
||||
...PDFJS_DEFAULT_OPTIONS,
|
||||
url: blobUrl,
|
||||
})
|
||||
.promise;
|
||||
URL.revokeObjectURL(blobUrl);
|
||||
return pdf.numPages;
|
||||
}
|
||||
|
||||
@@ -8,6 +8,12 @@ import { AddFilesCommand } from './commands/add-page.js';
|
||||
import { DecryptFile } from '../DecryptFiles.js';
|
||||
import { CommandSequence } from './commands/commands-sequence.js';
|
||||
|
||||
const PDFJS_DEFAULT_OPTIONS = {
|
||||
cMapUrl: pdfjsPath + 'cmaps/',
|
||||
cMapPacked: true,
|
||||
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
|
||||
};
|
||||
|
||||
const isSvgFile = (file) => {
|
||||
if (!file) return false;
|
||||
const type = (file.type || '').toLowerCase();
|
||||
@@ -479,8 +485,11 @@ class PdfContainer {
|
||||
}
|
||||
|
||||
async toRenderer(objectUrl) {
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
|
||||
const pdf = await pdfjsLib.getDocument(objectUrl).promise;
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
|
||||
const pdf = await pdfjsLib.getDocument({
|
||||
url: objectUrl,
|
||||
...PDFJS_DEFAULT_OPTIONS,
|
||||
}).promise;
|
||||
return {
|
||||
document: pdf,
|
||||
pageCount: pdf.numPages,
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
const PDFJS_DEFAULT_OPTIONS = {
|
||||
cMapUrl: pdfjsPath + 'cmaps/',
|
||||
cMapPacked: true,
|
||||
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
|
||||
};
|
||||
|
||||
window.goToFirstOrLastPage = goToFirstOrLastPage;
|
||||
|
||||
document.getElementById('download-pdf').addEventListener('click', async () => {
|
||||
@@ -31,8 +37,11 @@ document.querySelector('input[name=pdf-upload]').addEventListener('change', asyn
|
||||
const file = allFiles[0];
|
||||
originalFileName = file.name.replace(/\.[^/.]+$/, '');
|
||||
const pdfData = await file.arrayBuffer();
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
|
||||
const pdfDoc = await pdfjsLib.getDocument({ data: pdfData }).promise;
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
|
||||
const pdfDoc = await pdfjsLib.getDocument({
|
||||
...PDFJS_DEFAULT_OPTIONS,
|
||||
data: pdfData,
|
||||
}).promise;
|
||||
await DraggableUtils.renderPage(pdfDoc, 0);
|
||||
|
||||
document.querySelectorAll('.show-on-file-selected').forEach((el) => {
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
const PDFJS_DEFAULT_OPTIONS = {
|
||||
cMapUrl: pdfjsPath + 'cmaps/',
|
||||
cMapPacked: true,
|
||||
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
|
||||
};
|
||||
|
||||
var canvas = document.getElementById('contrast-pdf-canvas');
|
||||
var context = canvas.getContext('2d');
|
||||
var originalImageData = null;
|
||||
@@ -9,8 +15,11 @@ async function renderPDFAndSaveOriginalImageData(file) {
|
||||
var fileReader = new FileReader();
|
||||
fileReader.onload = async function () {
|
||||
var data = new Uint8Array(this.result);
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
|
||||
pdf = await pdfjsLib.getDocument({data: data}).promise;
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
|
||||
pdf = await pdfjsLib.getDocument({
|
||||
...PDFJS_DEFAULT_OPTIONS,
|
||||
data: data,
|
||||
}).promise;
|
||||
|
||||
// Get the number of pages in the PDF
|
||||
var numPages = pdf.numPages;
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
const PDFJS_DEFAULT_OPTIONS = {
|
||||
cMapUrl: pdfjsPath + 'cmaps/',
|
||||
cMapPacked: true,
|
||||
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
|
||||
};
|
||||
|
||||
const deleteAllCheckbox = document.querySelector('#deleteAll');
|
||||
let inputs = document.querySelectorAll('input');
|
||||
const customMetadataDiv = document.getElementById('customMetadata');
|
||||
@@ -43,8 +49,13 @@ fileInput.addEventListener('change', async function () {
|
||||
customMetadataFormContainer.removeChild(customMetadataFormContainer.firstChild);
|
||||
}
|
||||
var url = URL.createObjectURL(file);
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
|
||||
const pdf = await pdfjsLib.getDocument(url).promise;
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
|
||||
const pdf = await pdfjsLib
|
||||
.getDocument({
|
||||
...PDFJS_DEFAULT_OPTIONS,
|
||||
url: url,
|
||||
})
|
||||
.promise;
|
||||
const pdfMetadata = await pdf.getMetadata();
|
||||
lastPDFFile = pdfMetadata?.info;
|
||||
console.log(pdfMetadata);
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
const PDFJS_DEFAULT_OPTIONS = {
|
||||
cMapUrl: pdfjsPath + 'cmaps/',
|
||||
cMapPacked: true,
|
||||
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
|
||||
};
|
||||
|
||||
let pdfCanvas = document.getElementById('cropPdfCanvas');
|
||||
let overlayCanvas = document.getElementById('overlayCanvas');
|
||||
let canvasesContainer = document.getElementById('canvasesContainer');
|
||||
@@ -42,12 +48,17 @@ function renderPageFromFile(file) {
|
||||
let reader = new FileReader();
|
||||
reader.onload = function (ev) {
|
||||
let typedArray = new Uint8Array(reader.result);
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
|
||||
pdfjsLib.getDocument(typedArray).promise.then(function (pdf) {
|
||||
pdfDoc = pdf;
|
||||
totalPages = pdf.numPages;
|
||||
renderPage(currentPage);
|
||||
});
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
|
||||
pdfjsLib
|
||||
.getDocument({
|
||||
...PDFJS_DEFAULT_OPTIONS,
|
||||
data: typedArray,
|
||||
})
|
||||
.promise.then(function (pdf) {
|
||||
pdfDoc = pdf;
|
||||
totalPages = pdf.numPages;
|
||||
renderPage(currentPage);
|
||||
});
|
||||
};
|
||||
reader.readAsArrayBuffer(file);
|
||||
}
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
const PDFJS_DEFAULT_OPTIONS = {
|
||||
cMapUrl: pdfjsPath + 'cmaps/',
|
||||
cMapPacked: true,
|
||||
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
|
||||
};
|
||||
|
||||
let pdfCanvas = document.getElementById('cropPdfCanvas');
|
||||
let overlayCanvas = document.getElementById('overlayCanvas');
|
||||
let canvasesContainer = document.getElementById('canvasesContainer');
|
||||
@@ -37,12 +43,17 @@ btn1Object.addEventListener('click', function (e) {
|
||||
let reader = new FileReader();
|
||||
reader.onload = function (ev) {
|
||||
let typedArray = new Uint8Array(reader.result);
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
|
||||
pdfjsLib.getDocument(typedArray).promise.then(function (pdf) {
|
||||
pdfDoc = pdf;
|
||||
totalPages = pdf.numPages;
|
||||
renderPage(currentPage);
|
||||
});
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
|
||||
pdfjsLib
|
||||
.getDocument({
|
||||
...PDFJS_DEFAULT_OPTIONS,
|
||||
data: typedArray,
|
||||
})
|
||||
.promise.then(function (pdf) {
|
||||
pdfDoc = pdf;
|
||||
totalPages = pdf.numPages;
|
||||
renderPage(currentPage);
|
||||
});
|
||||
};
|
||||
reader.readAsArrayBuffer(file);
|
||||
}
|
||||
@@ -58,12 +69,17 @@ btn2Object.addEventListener('click', function (e) {
|
||||
let reader = new FileReader();
|
||||
reader.onload = function (ev) {
|
||||
let typedArray = new Uint8Array(reader.result);
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
|
||||
pdfjsLib.getDocument(typedArray).promise.then(function (pdf) {
|
||||
pdfDoc = pdf;
|
||||
totalPages = pdf.numPages;
|
||||
renderPage(currentPage);
|
||||
});
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
|
||||
pdfjsLib
|
||||
.getDocument({
|
||||
...PDFJS_DEFAULT_OPTIONS,
|
||||
data: typedArray,
|
||||
})
|
||||
.promise.then(function (pdf) {
|
||||
pdfDoc = pdf;
|
||||
totalPages = pdf.numPages;
|
||||
renderPage(currentPage);
|
||||
});
|
||||
};
|
||||
reader.readAsArrayBuffer(file);
|
||||
}
|
||||
@@ -75,12 +91,17 @@ function renderPageFromFile(file) {
|
||||
let reader = new FileReader();
|
||||
reader.onload = function (ev) {
|
||||
let typedArray = new Uint8Array(reader.result);
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
|
||||
pdfjsLib.getDocument(typedArray).promise.then(function (pdf) {
|
||||
pdfDoc = pdf;
|
||||
totalPages = pdf.numPages;
|
||||
renderPage(currentPage);
|
||||
});
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
|
||||
pdfjsLib
|
||||
.getDocument({
|
||||
...PDFJS_DEFAULT_OPTIONS,
|
||||
data: typedArray,
|
||||
})
|
||||
.promise.then(function (pdf) {
|
||||
pdfDoc = pdf;
|
||||
totalPages = pdf.numPages;
|
||||
renderPage(currentPage);
|
||||
});
|
||||
pageNumbers.value = currentPage;
|
||||
};
|
||||
reader.readAsArrayBuffer(file);
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
const PDFJS_DEFAULT_OPTIONS = {
|
||||
cMapUrl: pdfjsPath + 'cmaps/',
|
||||
cMapPacked: true,
|
||||
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
|
||||
};
|
||||
|
||||
window.toggleSignatureView = toggleSignatureView;
|
||||
window.previewSignature = previewSignature;
|
||||
window.addSignatureFromPreview = addSignatureFromPreview;
|
||||
@@ -70,9 +76,11 @@ document
|
||||
const file = allFiles[0];
|
||||
originalFileName = file.name.replace(/\.[^/.]+$/, "");
|
||||
const pdfData = await file.arrayBuffer();
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc =
|
||||
"./pdfjs-legacy/pdf.worker.mjs";
|
||||
const pdfDoc = await pdfjsLib.getDocument({ data: pdfData }).promise;
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
|
||||
const pdfDoc = await pdfjsLib.getDocument({
|
||||
...PDFJS_DEFAULT_OPTIONS,
|
||||
data: pdfData,
|
||||
}).promise;
|
||||
await DraggableUtils.renderPage(pdfDoc, 0);
|
||||
|
||||
document.querySelectorAll(".show-on-file-selected").forEach((el) => {
|
||||
|
||||
@@ -41,7 +41,7 @@
|
||||
<script type="module" th:src="@{'/pdfjs-legacy/pdf.mjs'}"></script>
|
||||
<script th:inline="javascript">
|
||||
document.getElementById('fileInput-input').addEventListener('change', async () => {
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
|
||||
const fileInput = document.getElementById('fileInput-input');
|
||||
const resultDiv = document.getElementById('result');
|
||||
|
||||
|
||||
@@ -49,6 +49,9 @@
|
||||
const updateBreakingChanges = /*[[#{update.breakingChanges}]]*/ 'Breaking Changes:';
|
||||
const updateBreakingChangesDefault = /*[[#{update.breakingChangesDefault}]]*/ 'This version contains breaking changes';
|
||||
const updateMigrationGuide = /*[[#{update.migrationGuide}]]*/ 'Migration Guide';
|
||||
|
||||
// PDF.js path
|
||||
const pdfjsPath = /*[[@{'/pdfjs-legacy/'}]]*/ './pdfjs-legacy/';
|
||||
</script>
|
||||
<script th:src="@{'/js/homecard.js'}"></script>
|
||||
<script th:src="@{'/js/githubVersion.js'}"></script>
|
||||
|
||||
@@ -58,7 +58,7 @@
|
||||
</script>
|
||||
<script type="module">
|
||||
import * as pdfjsLib from './pdfjs-legacy/pdf.mjs';
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
|
||||
</script>
|
||||
<script th:src="@{'/js/merge.js'}"></script>
|
||||
</div>
|
||||
|
||||
@@ -79,7 +79,7 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<button class="btn btn-primary" onclick="comparePDFs()" th:text="#{compare.submit}"></button>
|
||||
<button class="btn btn-primary" id="compareBtn" onclick="comparePDFs(event)" th:text="#{compare.submit}"></button>
|
||||
|
||||
<div class="row">
|
||||
<div class="col-md-6">
|
||||
@@ -105,7 +105,8 @@
|
||||
result2.addEventListener('scroll', function () {
|
||||
result1.scrollTop = result2.scrollTop;
|
||||
});
|
||||
async function comparePDFs() {
|
||||
|
||||
async function comparePDFs(event) {
|
||||
const file1 = document.getElementById("fileInput-input").files[0];
|
||||
const file2 = document.getElementById("fileInput2-input").files[0];
|
||||
var color1 = document.getElementById('color-box1').value;
|
||||
@@ -113,137 +114,216 @@
|
||||
|
||||
const complexMessage = /*[[#{compare.complex.message}]]*/ 'One or both of the provided documents are large files, accuracy of comparison may be reduced';
|
||||
const largeFilesMessage = /*[[#{compare.large.file.message}]]*/ 'One or Both of the provided documents are too large to process';
|
||||
const noTextMessage = /*[[#{compare.no.text.message}]]*/ 'One or both of the selected PDFs have no text content. Please choose PDFs with text for comparison."';
|
||||
const noTextMessage = /*[[#{compare.no.text.message}]]*/ 'One or both of the selected PDFs have no text content. Please choose PDFs with text for comparison.';
|
||||
const invalidPdfMessage = /*[[#{compare.invalid.pdf.message}]]*/ 'One or both files are not valid PDFs. Please check and re-upload.';
|
||||
const submitText = /*[[#{compare.submit}]]*/ 'Compare';
|
||||
|
||||
if (!file1 || !file2) {
|
||||
console.error("Please select two PDF files to compare");
|
||||
alert('Please select two PDF files to compare');
|
||||
return;
|
||||
}
|
||||
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs';
|
||||
|
||||
const [pdf1, pdf2] = await Promise.all([
|
||||
pdfjsLib.getDocument(URL.createObjectURL(file1)).promise,
|
||||
pdfjsLib.getDocument(URL.createObjectURL(file2)).promise
|
||||
]);
|
||||
|
||||
const extractText = async (pdf) => {
|
||||
const pages = [];
|
||||
for (let i = 1; i <= pdf.numPages; i++) {
|
||||
const page = await pdf.getPage(i);
|
||||
const content = await page.getTextContent();
|
||||
const strings = content.items.map(item => item.str);
|
||||
pages.push(strings.join(" "));
|
||||
}
|
||||
return pages.join(" ");
|
||||
};
|
||||
|
||||
const [text1, text2] = await Promise.all([
|
||||
extractText(pdf1),
|
||||
extractText(pdf2)
|
||||
]);
|
||||
|
||||
if (text1.trim() === "" || text2.trim() === "") {
|
||||
alert(noTextMessage);
|
||||
// Basic checks
|
||||
if (file1.size === 0 || file2.size === 0) {
|
||||
alert('One or both files are empty.');
|
||||
return;
|
||||
}
|
||||
if (file1.size > 100 * 1024 * 1024 || file2.size > 100 * 1024 * 1024) {
|
||||
alert(largeFilesMessage);
|
||||
return;
|
||||
}
|
||||
|
||||
const resultDiv1 = document.getElementById("result1");
|
||||
const resultDiv2 = document.getElementById("result2");
|
||||
const loading = /*[[#{loading}]]*/ 'Loading...';
|
||||
|
||||
resultDiv1.innerHTML = loading;
|
||||
resultDiv2.innerHTML = loading;
|
||||
|
||||
// Create a new Worker
|
||||
const worker = new Worker('./js/compare/pdfWorker.js');
|
||||
|
||||
|
||||
// Post messages to the worker
|
||||
worker.postMessage({
|
||||
type: 'SET_COMPLEX_MESSAGE',
|
||||
message: complexMessage
|
||||
});
|
||||
|
||||
worker.postMessage({
|
||||
type: 'SET_TOO_LARGE_MESSAGE',
|
||||
message: largeFilesMessage
|
||||
});
|
||||
|
||||
// Error handling for the worker
|
||||
worker.onerror = function (error) {
|
||||
console.error('Worker error:', error);
|
||||
// PDF.js setup (Legacy-safe: Worker disabled)
|
||||
const PDFJS_DEFAULT_OPTIONS = {
|
||||
cMapUrl: pdfjsPath + 'cmaps/',
|
||||
cMapPacked: true,
|
||||
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
|
||||
disableWorker: true // Avoids Legacy CMap errors without changing PDF.js
|
||||
};
|
||||
worker.onmessage = function (e) {
|
||||
const { status, differences, message } = e.data;
|
||||
if (status === 'error') {
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
|
||||
|
||||
resultDiv1.innerHTML = '';
|
||||
resultDiv2.innerHTML = '';
|
||||
alert(message);
|
||||
return;
|
||||
const button = event.target;
|
||||
button.disabled = true;
|
||||
button.textContent = 'Processing...';
|
||||
|
||||
try {
|
||||
// Load ArrayBuffer
|
||||
const [data1, data2] = await Promise.all([
|
||||
readFileAsArrayBuffer(file1),
|
||||
readFileAsArrayBuffer(file2)
|
||||
]);
|
||||
|
||||
// Header validation (prevents InvalidPDFException)
|
||||
await validatePdfHeader(data1, 'File 1');
|
||||
await validatePdfHeader(data2, 'File 2');
|
||||
|
||||
// Load PDFs
|
||||
const [pdf1, pdf2] = await Promise.all([
|
||||
loadPdfWithErrorHandling({ ...PDFJS_DEFAULT_OPTIONS, data: data1 }, 'File 1'),
|
||||
loadPdfWithErrorHandling({ ...PDFJS_DEFAULT_OPTIONS, data: data2 }, 'File 2')
|
||||
]);
|
||||
|
||||
// Extract text
|
||||
result1.innerHTML = 'Extracting text from File 1...';
|
||||
result2.innerHTML = 'Extracting text from File 2...';
|
||||
const [text1, text2] = await Promise.all([
|
||||
extractText(pdf1, 'File 1', result1),
|
||||
extractText(pdf2, 'File 2', result2)
|
||||
]);
|
||||
|
||||
if (text1.trim() === "" || text2.trim() === "") {
|
||||
throw new Error(noTextMessage);
|
||||
}
|
||||
if (status === 'success' && differences) {
|
||||
console.log('Differences:', differences);
|
||||
displayDifferences(differences);
|
||||
}
|
||||
if (event.data.status === 'warning') {
|
||||
console.warn(event.data.message);
|
||||
alert(event.data.message);
|
||||
}
|
||||
};
|
||||
|
||||
worker.postMessage({ text1, text2, color1, color2 });
|
||||
// Worker diff
|
||||
await processWithWorker(text1, text2, color1, color2, complexMessage, largeFilesMessage);
|
||||
|
||||
const displayDifferences = (differences) => {
|
||||
const resultDiv1 = document.getElementById("result1");
|
||||
const resultDiv2 = document.getElementById("result2");
|
||||
resultDiv1.innerHTML = "";
|
||||
resultDiv2.innerHTML = "";
|
||||
|
||||
differences.forEach(([color, word]) => {
|
||||
const span1 = document.createElement("span");
|
||||
const span2 = document.createElement("span");
|
||||
|
||||
if (color === color2) {
|
||||
span1.style.color = "transparent";
|
||||
span1.style.userSelect = "none";
|
||||
span2.style.color = color;
|
||||
}
|
||||
// If it's a deletion, show it in in the first document and transparent in the second
|
||||
else if (color === color1) {
|
||||
span1.style.color = color;
|
||||
span2.style.color = "transparent";
|
||||
span2.style.userSelect = "none";
|
||||
}
|
||||
// If it's unchanged, show it in black in both
|
||||
else {
|
||||
span1.style.color = color;
|
||||
span2.style.color = color;
|
||||
}
|
||||
|
||||
span1.textContent = word;
|
||||
span2.textContent = word;
|
||||
resultDiv1.appendChild(span1);
|
||||
resultDiv2.appendChild(span2);
|
||||
|
||||
// Add space after each word, or a new line if the word ends with a full stop
|
||||
const spaceOrNewline1 = document.createElement("span");
|
||||
const spaceOrNewline2 = document.createElement("span");
|
||||
if (word.endsWith(".")) {
|
||||
spaceOrNewline1.innerHTML = "<br>";
|
||||
spaceOrNewline2.innerHTML = "<br>";
|
||||
} else {
|
||||
spaceOrNewline1.textContent = " ";
|
||||
spaceOrNewline2.textContent = " ";
|
||||
}
|
||||
resultDiv1.appendChild(spaceOrNewline1);
|
||||
resultDiv2.appendChild(spaceOrNewline2);
|
||||
});
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Comparison failed:', error);
|
||||
alert(error.message || invalidPdfMessage);
|
||||
result1.innerHTML = '';
|
||||
result2.innerHTML = '';
|
||||
} finally {
|
||||
button.disabled = false;
|
||||
button.textContent = submitText;
|
||||
}
|
||||
}
|
||||
|
||||
// FileReader helper
|
||||
function readFileAsArrayBuffer(file) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.onload = () => resolve(reader.result);
|
||||
reader.onerror = reject;
|
||||
reader.readAsArrayBuffer(file);
|
||||
});
|
||||
}
|
||||
|
||||
// Header validation (PDF.js-specific, but client-side)
|
||||
async function validatePdfHeader(data, fileName) {
|
||||
const header = new Uint8Array(data.slice(0, 8));
|
||||
const headerStr = String.fromCharCode(...header);
|
||||
console.log(`${fileName} header:`, headerStr);
|
||||
if (!headerStr.startsWith('%PDF-')) {
|
||||
throw new Error(`${fileName} is not a valid PDF (header: ${headerStr}).`);
|
||||
}
|
||||
if (data.byteLength < 100) {
|
||||
throw new Error(`${fileName} is too short.`);
|
||||
}
|
||||
}
|
||||
|
||||
// PDF loading with catch
|
||||
function loadPdfWithErrorHandling(options, fileName) {
|
||||
return pdfjsLib.getDocument(options).promise
|
||||
.then(pdf => {
|
||||
console.log(`${fileName} loaded: ${pdf.numPages} pages`);
|
||||
return pdf;
|
||||
})
|
||||
.catch(err => {
|
||||
console.error(`${fileName} load failed:`, err);
|
||||
if (err.name === 'InvalidPDFException') {
|
||||
throw new Error(`${fileName}: Invalid PDF structure. Re-upload.`);
|
||||
}
|
||||
throw err;
|
||||
});
|
||||
}
|
||||
|
||||
// Text extraction
|
||||
async function extractText(pdf, fileName, statusElement) {
|
||||
const pages = [];
|
||||
const totalPages = pdf.numPages;
|
||||
for (let i = 1; i <= totalPages; i++) {
|
||||
const page = await pdf.getPage(i);
|
||||
const content = await page.getTextContent();
|
||||
const strings = content.items.map(item => item.str).join(' ');
|
||||
pages.push(strings);
|
||||
statusElement.innerHTML = `${fileName}: ${Math.round((i / totalPages) * 100)}%`;
|
||||
}
|
||||
return pages.join(' ');
|
||||
}
|
||||
|
||||
// Worker processing
|
||||
async function processWithWorker(text1, text2, color1, color2, complexMessage, largeFilesMessage) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const worker = new Worker('./js/compare/pdfWorker.js');
|
||||
const timeout = setTimeout(() => {
|
||||
worker.terminate();
|
||||
reject(new Error('Timeout: Files too complex.'));
|
||||
}, 30000);
|
||||
|
||||
worker.postMessage({ type: 'SET_COMPLEX_MESSAGE', message: complexMessage });
|
||||
worker.postMessage({ type: 'SET_TOO_LARGE_MESSAGE', message: largeFilesMessage });
|
||||
|
||||
worker.onerror = (error) => {
|
||||
clearTimeout(timeout);
|
||||
worker.terminate();
|
||||
reject(new Error('Worker error: ' + error.message));
|
||||
};
|
||||
|
||||
worker.onmessage = (e) => {
|
||||
clearTimeout(timeout);
|
||||
const { status, differences, message } = e.data;
|
||||
if (status === 'error') {
|
||||
worker.terminate();
|
||||
reject(new Error(message));
|
||||
return;
|
||||
}
|
||||
if (status === 'warning') {
|
||||
alert(message);
|
||||
}
|
||||
if (status === 'success' && differences) {
|
||||
displayDifferences(differences, color1, color2);
|
||||
worker.terminate();
|
||||
resolve();
|
||||
}
|
||||
};
|
||||
|
||||
worker.postMessage({ type: 'COMPARE', text1, text2, color1, color2 });
|
||||
});
|
||||
}
|
||||
|
||||
// Display differences
|
||||
function displayDifferences(differences, color1, color2) {
|
||||
const resultDiv1 = document.getElementById("result1");
|
||||
const resultDiv2 = document.getElementById("result2");
|
||||
resultDiv1.innerHTML = "";
|
||||
resultDiv2.innerHTML = "";
|
||||
|
||||
differences.forEach(([color, word]) => {
|
||||
const span1 = document.createElement("span");
|
||||
const span2 = document.createElement("span");
|
||||
|
||||
if (color === color2) {
|
||||
span1.style.color = "transparent";
|
||||
span1.style.userSelect = "none";
|
||||
span2.style.color = color;
|
||||
} else if (color === color1) {
|
||||
span1.style.color = color;
|
||||
span2.style.color = "transparent";
|
||||
span2.style.userSelect = "none";
|
||||
} else {
|
||||
span1.style.color = color || 'black';
|
||||
span2.style.color = color || 'black';
|
||||
}
|
||||
|
||||
span1.textContent = word;
|
||||
span2.textContent = word;
|
||||
resultDiv1.appendChild(span1);
|
||||
resultDiv2.appendChild(span2);
|
||||
|
||||
const spaceOrNewline1 = document.createElement("span");
|
||||
const spaceOrNewline2 = document.createElement("span");
|
||||
if (word.endsWith(".")) {
|
||||
spaceOrNewline1.innerHTML = "<br>";
|
||||
spaceOrNewline2.innerHTML = "<br>";
|
||||
} else {
|
||||
spaceOrNewline1.textContent = " ";
|
||||
spaceOrNewline2.textContent = " ";
|
||||
}
|
||||
resultDiv1.appendChild(spaceOrNewline1);
|
||||
resultDiv2.appendChild(spaceOrNewline2);
|
||||
});
|
||||
}
|
||||
</script>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -59,12 +59,24 @@
|
||||
</div>
|
||||
<th:block th:insert="~{fragments/footer.html :: footer}"></th:block>
|
||||
</div>
|
||||
<script type="module" th:src="@{'/pdfjs-legacy/pdf.mjs'}"></script>
|
||||
<script type="module">
|
||||
import * as pdfjsLib from './pdfjs-legacy/pdf.mjs';
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs';
|
||||
window.pdfjsLib = pdfjsLib;
|
||||
</script>
|
||||
<script>
|
||||
const angleInput = document.getElementById("angleInput");
|
||||
const fileInput = document.getElementById("fileInput-input");
|
||||
const previewContainer = document.getElementById("previewContainer");
|
||||
// const preview = document.getElementById("pdf-preview");
|
||||
|
||||
// PDF.js setup (with CMap options to fix font loading warning)
|
||||
const PDFJS_DEFAULT_OPTIONS = {
|
||||
cMapUrl: pdfjsPath + 'cmaps/',
|
||||
cMapPacked: true,
|
||||
standardFontDataUrl: pdfjsPath + 'standard_fonts/',
|
||||
disableWorker: true // Avoids Legacy CMap errors
|
||||
};
|
||||
|
||||
fileInput.addEventListener("change", async function () {
|
||||
console.log("loading pdf");
|
||||
|
||||
@@ -74,9 +86,9 @@
|
||||
if (existingPreview) {
|
||||
existingPreview.remove();
|
||||
}
|
||||
var url = URL.createObjectURL(fileInput.files[0])
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs'
|
||||
const pdf = await pdfjsLib.getDocument(url).promise;
|
||||
|
||||
const url = URL.createObjectURL(fileInput.files[0]);
|
||||
const pdf = await window.pdfjsLib.getDocument({ ...PDFJS_DEFAULT_OPTIONS, url }).promise;
|
||||
const page = await pdf.getPage(1);
|
||||
|
||||
const canvas = document.createElement("canvas");
|
||||
@@ -91,7 +103,7 @@
|
||||
}
|
||||
|
||||
// render the page onto the canvas
|
||||
var renderContext = {
|
||||
const renderContext = {
|
||||
canvasContext: canvas.getContext("2d"),
|
||||
viewport: page.getViewport({ scale: 1 })
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user