diff --git a/app/core/src/main/resources/static/js/DecryptFiles.js b/app/core/src/main/resources/static/js/DecryptFiles.js index 057ca98374..b19fb6e12e 100644 --- a/app/core/src/main/resources/static/js/DecryptFiles.js +++ b/app/core/src/main/resources/static/js/DecryptFiles.js @@ -1,3 +1,9 @@ +const PDFJS_DEFAULT_OPTIONS = { + cMapUrl: pdfjsPath + 'cmaps/', + cMapPacked: true, + standardFontDataUrl: pdfjsPath + 'standard_fonts/', +}; + function formatProblemDetailsJson(input) { try { const obj = typeof input === 'string' ? JSON.parse(input) : input; @@ -238,7 +244,7 @@ export class DecryptFile { return {isEncrypted: false, requiresPassword: false}; } - pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs'; + pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs'; const arrayBuffer = await file.arrayBuffer(); const arrayBufferForPdfLib = arrayBuffer.slice(0); @@ -246,12 +252,14 @@ export class DecryptFile { if(this.decryptWorker == null){ loadingTask = pdfjsLib.getDocument({ + ...PDFJS_DEFAULT_OPTIONS, data: arrayBuffer, }); this.decryptWorker = loadingTask._worker }else { loadingTask = pdfjsLib.getDocument({ + ...PDFJS_DEFAULT_OPTIONS, data: arrayBuffer, worker: this.decryptWorker }); diff --git a/app/core/src/main/resources/static/js/compare/pdfWorker.js b/app/core/src/main/resources/static/js/compare/pdfWorker.js index 78952bd759..86fb66383d 100644 --- a/app/core/src/main/resources/static/js/compare/pdfWorker.js +++ b/app/core/src/main/resources/static/js/compare/pdfWorker.js @@ -1,18 +1,39 @@ importScripts('./diff.js'); +let complexMessage = 'One or both of the provided documents are large files, accuracy of comparison may be reduced'; +let largeFilesMessage = 'One or Both of the provided documents are too large to process'; + +// Early: Listener for SET messages (before onmessage) +self.addEventListener('message', (event) => { + if (event.data.type === 'SET_COMPLEX_MESSAGE') { + complexMessage = event.data.message; + } else if (event.data.type === 'SET_TOO_LARGE_MESSAGE') { + largeFilesMessage = event.data.message; + } +}); + self.onmessage = async function (e) { - const { text1, text2, color1, color2 } = e.data; - console.log('Received text for comparison:', { text1, text2 }); + const data = e.data; + if (data.type !== 'COMPARE') { + console.log('Worker ignored non-COMPARE message'); + return; + } + + const { text1, text2, color1, color2 } = data; + console.log('Received text for comparison:', { lengths: { text1: text1.length, text2: text2.length } }); // Safe Log const startTime = performance.now(); - if (text1.trim() === "" || text2.trim() === "") { + // Safe Trim + if (!text1 || !text2 || text1.trim() === "" || text2.trim() === "") { self.postMessage({ status: 'error', message: 'One or both of the texts are empty.' }); return; } - const words1 = text1.split(' '); - const words2 = text2.split(' '); + // Robust Word-Split (handles spaces/punctuation better) + const words1 = text1.trim().split(/\s+/).filter(w => w.length > 0); + const words2 = text2.trim().split(/\s+/).filter(w => w.length > 0); + const MAX_WORD_COUNT = 150000; const COMPLEX_WORD_COUNT = 50000; const BATCH_SIZE = 5000; // Define a suitable batch size for processing @@ -21,44 +42,28 @@ self.onmessage = async function (e) { const isComplex = words1.length > COMPLEX_WORD_COUNT || words2.length > COMPLEX_WORD_COUNT; const isTooLarge = words1.length > MAX_WORD_COUNT || words2.length > MAX_WORD_COUNT; - let complexMessage = 'One or both of the provided documents are large files, accuracy of comparison may be reduced'; - let tooLargeMessage = 'One or Both of the provided documents are too large to process'; - - // Listen for messages from the main thread - self.addEventListener('message', (event) => { - if (event.data.type === 'SET_TOO_LARGE_MESSAGE') { - tooLargeMessage = event.data.message; - } - if (event.data.type === 'SET_COMPLEX_MESSAGE') { - complexMessage = event.data.message; - } - }); - if (isTooLarge) { - self.postMessage({ - status: 'warning', - message: tooLargeMessage, - }); + self.postMessage({ status: 'error', message: largeFilesMessage }); return; - } else { - - if (isComplex) { - self.postMessage({ - status: 'warning', - message: complexMessage, - }); - } - // Perform diff operation depending on document size - const differences = isComplex - ? await staggeredBatchDiff(words1, words2, color1, color2, BATCH_SIZE, OVERLAP_SIZE) - : diff(words1, words2, color1, color2); - - console.log(`Diff operation took ${performance.now() - startTime} milliseconds`); - self.postMessage({ status: 'success', differences }); } + + if (isComplex) { + self.postMessage({ status: 'warning', message: complexMessage }); + } + + // Diff based on size + let differences; + if (isComplex) { + differences = await staggeredBatchDiff(words1, words2, color1 || '#ff0000', color2 || '#008000', BATCH_SIZE, OVERLAP_SIZE); + } else { + differences = diff(words1, words2, color1 || '#ff0000', color2 || '#008000'); + } + + console.log(`Diff took ${performance.now() - startTime} ms for ${words1.length + words2.length} words`); + self.postMessage({ status: 'success', differences }); }; -//Splits text into smaller batches to run through diff checking algorithms. overlaps the batches to help ensure +// Splits text into smaller batches to run through diff checking algorithms. overlaps the batches to help ensure async function staggeredBatchDiff(words1, words2, color1, color2, batchSize, overlapSize) { const differences = []; const totalWords1 = words1.length; @@ -67,10 +72,9 @@ async function staggeredBatchDiff(words1, words2, color1, color2, batchSize, ove let previousEnd1 = 0; // Track where the last batch ended in words1 let previousEnd2 = 0; // Track where the last batch ended in words2 - // Function to determine if differences are large, differences that are too large indicate potential error in batching - const isLargeDifference = (differences) => { - return differences.length > 50; - }; + // Track processed indices to dedupe overlaps + const processed1 = new Set(); + const processed2 = new Set(); while (previousEnd1 < totalWords1 || previousEnd2 < totalWords2) { // Define the next chunk boundaries @@ -80,66 +84,130 @@ async function staggeredBatchDiff(words1, words2, color1, color2, batchSize, ove const start2 = previousEnd2; const end2 = Math.min(start2 + batchSize, totalWords2); - //If difference is too high decrease batch size for more granular check - const dynamicBatchSize = isLargeDifference(differences) ? batchSize / 2 : batchSize; + // Adaptive: If many diffs, smaller batch (max 3x downscale) + const recentDiffs = differences.slice(-100).filter(([c]) => c !== 'black').length; + // If difference is too high decrease batch size for more granular check + const dynamicBatchSize = Math.max(batchSize / Math.min(8, 1 + recentDiffs / 50), batchSize / 8); - // Adjust the size of the current chunk using dynamic batch size - const batchWords1 = words1.slice(start1, end1 + dynamicBatchSize); - const batchWords2 = words2.slice(start2, end2 + dynamicBatchSize); + const extendedEnd1 = Math.min(end1 + dynamicBatchSize, totalWords1); + const extendedEnd2 = Math.min(end2 + dynamicBatchSize, totalWords2); + + const batchWords1 = words1.slice(start1, extendedEnd1); + const batchWords2 = words2.slice(start2, extendedEnd2); // Include overlap from the previous chunk - const overlapWords1 = previousEnd1 > 0 ? words1.slice(Math.max(0, previousEnd1 - overlapSize), previousEnd1) : []; - const overlapWords2 = previousEnd2 > 0 ? words2.slice(Math.max(0, previousEnd2 - overlapSize), previousEnd2) : []; + const overlapStart1 = Math.max(0, previousEnd1 - overlapSize); + const overlapStart2 = Math.max(0, previousEnd2 - overlapSize); + const overlapWords1 = previousEnd1 > 0 ? words1.slice(overlapStart1, previousEnd1) : []; + const overlapWords2 = previousEnd2 > 0 ? words2.slice(overlapStart2, previousEnd2) : []; + // Combine overlaps and current batches for comparison - const combinedWords1 = overlapWords1.concat(batchWords1); - const combinedWords2 = overlapWords2.concat(batchWords2); + const combinedWords1 = [...overlapWords1, ...batchWords1]; + const combinedWords2 = [...overlapWords2, ...batchWords2]; // Perform the diff on the combined words const batchDifferences = diff(combinedWords1, combinedWords2, color1, color2); - differences.push(...batchDifferences); - // Update the previous end indices based on the results of this batch + const combinedIndices1 = []; + for (let i = overlapStart1; i < previousEnd1; i++) { + combinedIndices1.push(i); + } + for (let i = start1; i < extendedEnd1; i++) { + combinedIndices1.push(i); + } + + const combinedIndices2 = []; + for (let i = overlapStart2; i < previousEnd2; i++) { + combinedIndices2.push(i); + } + for (let i = start2; i < extendedEnd2; i++) { + combinedIndices2.push(i); + } + + let pointer1 = 0; + let pointer2 = 0; + + const filteredBatch = []; + batchDifferences.forEach(([color, word]) => { + if (color === color1) { + const globalIndex1 = combinedIndices1[pointer1]; + if (globalIndex1 === undefined || !processed1.has(globalIndex1)) { + filteredBatch.push([color, word]); + } + if (globalIndex1 !== undefined) { + processed1.add(globalIndex1); + } + pointer1++; + } else if (color === color2) { + const globalIndex2 = combinedIndices2[pointer2]; + if (globalIndex2 === undefined || !processed2.has(globalIndex2)) { + filteredBatch.push([color, word]); + } + if (globalIndex2 !== undefined) { + processed2.add(globalIndex2); + } + pointer2++; + } else { + const globalIndex1 = combinedIndices1[pointer1]; + const globalIndex2 = combinedIndices2[pointer2]; + const alreadyProcessed = (globalIndex1 !== undefined && processed1.has(globalIndex1)) && (globalIndex2 !== undefined && processed2.has(globalIndex2)); + if (!alreadyProcessed) { + filteredBatch.push([color, word]); + } + if (globalIndex1 !== undefined) { + processed1.add(globalIndex1); + } + if (globalIndex2 !== undefined) { + processed2.add(globalIndex2); + } + pointer1++; + pointer2++; + } + }); + + differences.push(...filteredBatch); + + // Mark as processed + for (let k = start1; k < end1; k++) processed1.add(k); + for (let k = start2; k < end2; k++) processed2.add(k); + previousEnd1 = end1; previousEnd2 = end2; + + // Yield for async (avoids blocking) + await new Promise(resolve => setTimeout(resolve, 0)); } return differences; } - // Standard diff function for small text comparisons function diff(words1, words2, color1, color2) { - console.log(`Starting diff between ${words1.length} words and ${words2.length} words`); - const matrix = Array.from({ length: words1.length + 1 }, () => Array(words2.length + 1).fill(0)); + console.log(`Diff: ${words1.length} vs ${words2.length} words`); + const oldStr = words1.join(' '); // As string for diff.js + const newStr = words2.join(' '); + // Static method: No 'new' needed, avoids constructor error + const changes = Diff.diffWords(oldStr, newStr, { ignoreWhitespace: true }); - for (let i = 1; i <= words1.length; i++) { - for (let j = 1; j <= words2.length; j++) { - matrix[i][j] = words1[i - 1] === words2[j - 1] - ? matrix[i - 1][j - 1] + 1 - : Math.max(matrix[i][j - 1], matrix[i - 1][j]); - } - } - return backtrack(matrix, words1, words2, color1, color2); -} - -// Backtrack function to find differences -function backtrack(matrix, words1, words2, color1, color2) { - let i = words1.length, j = words2.length; + // Map changes to [color, word] format (change.value and added/removed) const differences = []; + changes.forEach(change => { + const value = change.value; + const op = change.added ? 1 : change.removed ? -1 : 0; - while (i > 0 || j > 0) { - if (i > 0 && j > 0 && words1[i - 1] === words2[j - 1]) { - differences.unshift(['black', words1[i - 1]]); - i--; j--; - } else if (j > 0 && (i === 0 || matrix[i][j] === matrix[i][j - 1])) { - differences.unshift([color2, words2[j - 1]]); - j--; - } else { - differences.unshift([color1, words1[i - 1]]); - i--; - } - } + // Split value into words and process + const words = value.split(/\s+/).filter(w => w.length > 0); + words.forEach(word => { + if (op === 0) { // Equal + differences.push(['black', word]); + } else if (op === 1) { // Insert + differences.push([color2, word]); + } else if (op === -1) { // Delete + differences.push([color1, word]); + } + }); + }); return differences; } diff --git a/app/core/src/main/resources/static/js/downloader.js b/app/core/src/main/resources/static/js/downloader.js index 52113b731f..2afdb2f75c 100644 --- a/app/core/src/main/resources/static/js/downloader.js +++ b/app/core/src/main/resources/static/js/downloader.js @@ -2,6 +2,12 @@ if (window.isDownloadScriptInitialized) return; // Prevent re-execution window.isDownloadScriptInitialized = true; + const PDFJS_DEFAULT_OPTIONS = { + cMapUrl: pdfjsPath + 'cmaps/', + cMapPacked: true, + standardFontDataUrl: pdfjsPath + 'standard_fonts/', + }; + // Global PDF processing count tracking for survey system window.incrementPdfProcessingCount = function() { let pdfProcessingCount = parseInt(localStorage.getItem('pdfProcessingCount') || '0'); @@ -234,8 +240,13 @@ async function getPDFPageCount(file) { try { const arrayBuffer = await file.arrayBuffer(); - pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs'; - const pdf = await pdfjsLib.getDocument({data: arrayBuffer}).promise; + pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs'; + const pdf = await pdfjsLib + .getDocument({ + ...PDFJS_DEFAULT_OPTIONS, + data: arrayBuffer, + }) + .promise; return pdf.numPages; } catch (error) { console.error('Error getting PDF page count:', error); @@ -245,7 +256,7 @@ async function checkAndDecryptFiles(url, files) { const decryptedFiles = []; - pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs'; + pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs'; // Extract the base URL const baseUrl = new URL(url); @@ -271,7 +282,10 @@ } try { const arrayBuffer = await file.arrayBuffer(); - const loadingTask = pdfjsLib.getDocument({data: arrayBuffer}); + const loadingTask = pdfjsLib.getDocument({ + ...PDFJS_DEFAULT_OPTIONS, + data: arrayBuffer, + }); console.log(`Attempting to load PDF: ${file.name}`); const pdf = await loadingTask.promise; diff --git a/app/core/src/main/resources/static/js/homecard.js b/app/core/src/main/resources/static/js/homecard.js index 2e321b66dc..22c0caaf58 100644 --- a/app/core/src/main/resources/static/js/homecard.js +++ b/app/core/src/main/resources/static/js/homecard.js @@ -220,7 +220,7 @@ document.addEventListener('DOMContentLoaded', async function () { }); } try { - const response = await fetch('/files/popularity.txt'); + const response = await fetch('./files/popularity.txt'); if (!response.ok) { const errorText = await response.text().catch(() => ''); const errorMsg = errorText || response.statusText || 'Request failed'; diff --git a/app/core/src/main/resources/static/js/merge.js b/app/core/src/main/resources/static/js/merge.js index 82a0a0d88e..af5037c27b 100644 --- a/app/core/src/main/resources/static/js/merge.js +++ b/app/core/src/main/resources/static/js/merge.js @@ -1,3 +1,9 @@ +const PDFJS_DEFAULT_OPTIONS = { + cMapUrl: pdfjsPath + 'cmaps/', + cMapPacked: true, + standardFontDataUrl: pdfjsPath + 'standard_fonts/', +}; + let currentSort = { field: null, descending: false, @@ -73,7 +79,13 @@ async function displayFiles(files) { async function getPDFPageCount(file) { const blobUrl = URL.createObjectURL(file); - const pdf = await pdfjsLib.getDocument(blobUrl).promise; + pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs'; + const pdf = await pdfjsLib + .getDocument({ + ...PDFJS_DEFAULT_OPTIONS, + url: blobUrl, + }) + .promise; URL.revokeObjectURL(blobUrl); return pdf.numPages; } diff --git a/app/core/src/main/resources/static/js/multitool/PdfContainer.js b/app/core/src/main/resources/static/js/multitool/PdfContainer.js index a3fa4c116d..866fd69faf 100644 --- a/app/core/src/main/resources/static/js/multitool/PdfContainer.js +++ b/app/core/src/main/resources/static/js/multitool/PdfContainer.js @@ -8,6 +8,12 @@ import { AddFilesCommand } from './commands/add-page.js'; import { DecryptFile } from '../DecryptFiles.js'; import { CommandSequence } from './commands/commands-sequence.js'; +const PDFJS_DEFAULT_OPTIONS = { + cMapUrl: pdfjsPath + 'cmaps/', + cMapPacked: true, + standardFontDataUrl: pdfjsPath + 'standard_fonts/', +}; + const isSvgFile = (file) => { if (!file) return false; const type = (file.type || '').toLowerCase(); @@ -479,8 +485,11 @@ class PdfContainer { } async toRenderer(objectUrl) { - pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs'; - const pdf = await pdfjsLib.getDocument(objectUrl).promise; + pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs'; + const pdf = await pdfjsLib.getDocument({ + url: objectUrl, + ...PDFJS_DEFAULT_OPTIONS, + }).promise; return { document: pdf, pageCount: pdf.numPages, diff --git a/app/core/src/main/resources/static/js/pages/add-image.js b/app/core/src/main/resources/static/js/pages/add-image.js index 2bafd86ecc..6a40145b7f 100644 --- a/app/core/src/main/resources/static/js/pages/add-image.js +++ b/app/core/src/main/resources/static/js/pages/add-image.js @@ -1,3 +1,9 @@ +const PDFJS_DEFAULT_OPTIONS = { + cMapUrl: pdfjsPath + 'cmaps/', + cMapPacked: true, + standardFontDataUrl: pdfjsPath + 'standard_fonts/', +}; + window.goToFirstOrLastPage = goToFirstOrLastPage; document.getElementById('download-pdf').addEventListener('click', async () => { @@ -31,8 +37,11 @@ document.querySelector('input[name=pdf-upload]').addEventListener('change', asyn const file = allFiles[0]; originalFileName = file.name.replace(/\.[^/.]+$/, ''); const pdfData = await file.arrayBuffer(); - pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs'; - const pdfDoc = await pdfjsLib.getDocument({ data: pdfData }).promise; + pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs'; + const pdfDoc = await pdfjsLib.getDocument({ + ...PDFJS_DEFAULT_OPTIONS, + data: pdfData, + }).promise; await DraggableUtils.renderPage(pdfDoc, 0); document.querySelectorAll('.show-on-file-selected').forEach((el) => { diff --git a/app/core/src/main/resources/static/js/pages/adjust-contrast.js b/app/core/src/main/resources/static/js/pages/adjust-contrast.js index a9692d2bc4..506af1f8e6 100644 --- a/app/core/src/main/resources/static/js/pages/adjust-contrast.js +++ b/app/core/src/main/resources/static/js/pages/adjust-contrast.js @@ -1,3 +1,9 @@ +const PDFJS_DEFAULT_OPTIONS = { + cMapUrl: pdfjsPath + 'cmaps/', + cMapPacked: true, + standardFontDataUrl: pdfjsPath + 'standard_fonts/', +}; + var canvas = document.getElementById('contrast-pdf-canvas'); var context = canvas.getContext('2d'); var originalImageData = null; @@ -9,8 +15,11 @@ async function renderPDFAndSaveOriginalImageData(file) { var fileReader = new FileReader(); fileReader.onload = async function () { var data = new Uint8Array(this.result); - pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs'; - pdf = await pdfjsLib.getDocument({data: data}).promise; + pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs'; + pdf = await pdfjsLib.getDocument({ + ...PDFJS_DEFAULT_OPTIONS, + data: data, + }).promise; // Get the number of pages in the PDF var numPages = pdf.numPages; diff --git a/app/core/src/main/resources/static/js/pages/change-metadata.js b/app/core/src/main/resources/static/js/pages/change-metadata.js index bdc5426b71..25d8565a06 100644 --- a/app/core/src/main/resources/static/js/pages/change-metadata.js +++ b/app/core/src/main/resources/static/js/pages/change-metadata.js @@ -1,3 +1,9 @@ +const PDFJS_DEFAULT_OPTIONS = { + cMapUrl: pdfjsPath + 'cmaps/', + cMapPacked: true, + standardFontDataUrl: pdfjsPath + 'standard_fonts/', +}; + const deleteAllCheckbox = document.querySelector('#deleteAll'); let inputs = document.querySelectorAll('input'); const customMetadataDiv = document.getElementById('customMetadata'); @@ -43,8 +49,13 @@ fileInput.addEventListener('change', async function () { customMetadataFormContainer.removeChild(customMetadataFormContainer.firstChild); } var url = URL.createObjectURL(file); - pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs'; - const pdf = await pdfjsLib.getDocument(url).promise; + pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs'; + const pdf = await pdfjsLib + .getDocument({ + ...PDFJS_DEFAULT_OPTIONS, + url: url, + }) + .promise; const pdfMetadata = await pdf.getMetadata(); lastPDFFile = pdfMetadata?.info; console.log(pdfMetadata); diff --git a/app/core/src/main/resources/static/js/pages/crop.js b/app/core/src/main/resources/static/js/pages/crop.js index c61cbaeccb..f6af3e5e23 100644 --- a/app/core/src/main/resources/static/js/pages/crop.js +++ b/app/core/src/main/resources/static/js/pages/crop.js @@ -1,3 +1,9 @@ +const PDFJS_DEFAULT_OPTIONS = { + cMapUrl: pdfjsPath + 'cmaps/', + cMapPacked: true, + standardFontDataUrl: pdfjsPath + 'standard_fonts/', +}; + let pdfCanvas = document.getElementById('cropPdfCanvas'); let overlayCanvas = document.getElementById('overlayCanvas'); let canvasesContainer = document.getElementById('canvasesContainer'); @@ -42,12 +48,17 @@ function renderPageFromFile(file) { let reader = new FileReader(); reader.onload = function (ev) { let typedArray = new Uint8Array(reader.result); - pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs'; - pdfjsLib.getDocument(typedArray).promise.then(function (pdf) { - pdfDoc = pdf; - totalPages = pdf.numPages; - renderPage(currentPage); - }); + pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs'; + pdfjsLib + .getDocument({ + ...PDFJS_DEFAULT_OPTIONS, + data: typedArray, + }) + .promise.then(function (pdf) { + pdfDoc = pdf; + totalPages = pdf.numPages; + renderPage(currentPage); + }); }; reader.readAsArrayBuffer(file); } diff --git a/app/core/src/main/resources/static/js/pages/pdf-to-csv.js b/app/core/src/main/resources/static/js/pages/pdf-to-csv.js index 9a06aac5b4..1c3cc01248 100644 --- a/app/core/src/main/resources/static/js/pages/pdf-to-csv.js +++ b/app/core/src/main/resources/static/js/pages/pdf-to-csv.js @@ -1,3 +1,9 @@ +const PDFJS_DEFAULT_OPTIONS = { + cMapUrl: pdfjsPath + 'cmaps/', + cMapPacked: true, + standardFontDataUrl: pdfjsPath + 'standard_fonts/', +}; + let pdfCanvas = document.getElementById('cropPdfCanvas'); let overlayCanvas = document.getElementById('overlayCanvas'); let canvasesContainer = document.getElementById('canvasesContainer'); @@ -37,12 +43,17 @@ btn1Object.addEventListener('click', function (e) { let reader = new FileReader(); reader.onload = function (ev) { let typedArray = new Uint8Array(reader.result); - pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs'; - pdfjsLib.getDocument(typedArray).promise.then(function (pdf) { - pdfDoc = pdf; - totalPages = pdf.numPages; - renderPage(currentPage); - }); + pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs'; + pdfjsLib + .getDocument({ + ...PDFJS_DEFAULT_OPTIONS, + data: typedArray, + }) + .promise.then(function (pdf) { + pdfDoc = pdf; + totalPages = pdf.numPages; + renderPage(currentPage); + }); }; reader.readAsArrayBuffer(file); } @@ -58,12 +69,17 @@ btn2Object.addEventListener('click', function (e) { let reader = new FileReader(); reader.onload = function (ev) { let typedArray = new Uint8Array(reader.result); - pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs'; - pdfjsLib.getDocument(typedArray).promise.then(function (pdf) { - pdfDoc = pdf; - totalPages = pdf.numPages; - renderPage(currentPage); - }); + pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs'; + pdfjsLib + .getDocument({ + ...PDFJS_DEFAULT_OPTIONS, + data: typedArray, + }) + .promise.then(function (pdf) { + pdfDoc = pdf; + totalPages = pdf.numPages; + renderPage(currentPage); + }); }; reader.readAsArrayBuffer(file); } @@ -75,12 +91,17 @@ function renderPageFromFile(file) { let reader = new FileReader(); reader.onload = function (ev) { let typedArray = new Uint8Array(reader.result); - pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs'; - pdfjsLib.getDocument(typedArray).promise.then(function (pdf) { - pdfDoc = pdf; - totalPages = pdf.numPages; - renderPage(currentPage); - }); + pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs'; + pdfjsLib + .getDocument({ + ...PDFJS_DEFAULT_OPTIONS, + data: typedArray, + }) + .promise.then(function (pdf) { + pdfDoc = pdf; + totalPages = pdf.numPages; + renderPage(currentPage); + }); pageNumbers.value = currentPage; }; reader.readAsArrayBuffer(file); diff --git a/app/core/src/main/resources/static/js/pages/sign.js b/app/core/src/main/resources/static/js/pages/sign.js index ec02e75b36..48a15c7807 100644 --- a/app/core/src/main/resources/static/js/pages/sign.js +++ b/app/core/src/main/resources/static/js/pages/sign.js @@ -1,3 +1,9 @@ +const PDFJS_DEFAULT_OPTIONS = { + cMapUrl: pdfjsPath + 'cmaps/', + cMapPacked: true, + standardFontDataUrl: pdfjsPath + 'standard_fonts/', +}; + window.toggleSignatureView = toggleSignatureView; window.previewSignature = previewSignature; window.addSignatureFromPreview = addSignatureFromPreview; @@ -70,9 +76,11 @@ document const file = allFiles[0]; originalFileName = file.name.replace(/\.[^/.]+$/, ""); const pdfData = await file.arrayBuffer(); - pdfjsLib.GlobalWorkerOptions.workerSrc = - "./pdfjs-legacy/pdf.worker.mjs"; - const pdfDoc = await pdfjsLib.getDocument({ data: pdfData }).promise; + pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs'; + const pdfDoc = await pdfjsLib.getDocument({ + ...PDFJS_DEFAULT_OPTIONS, + data: pdfData, + }).promise; await DraggableUtils.renderPage(pdfDoc, 0); document.querySelectorAll(".show-on-file-selected").forEach((el) => { diff --git a/app/core/src/main/resources/templates/convert/pdf-to-pdfa.html b/app/core/src/main/resources/templates/convert/pdf-to-pdfa.html index 5b44ff5b62..6035e7561d 100644 --- a/app/core/src/main/resources/templates/convert/pdf-to-pdfa.html +++ b/app/core/src/main/resources/templates/convert/pdf-to-pdfa.html @@ -41,7 +41,7 @@ diff --git a/app/core/src/main/resources/templates/merge-pdfs.html b/app/core/src/main/resources/templates/merge-pdfs.html index 794eac8056..8878c3c257 100644 --- a/app/core/src/main/resources/templates/merge-pdfs.html +++ b/app/core/src/main/resources/templates/merge-pdfs.html @@ -58,7 +58,7 @@ diff --git a/app/core/src/main/resources/templates/misc/compare.html b/app/core/src/main/resources/templates/misc/compare.html index 27a7ed9edc..f7bab9589a 100644 --- a/app/core/src/main/resources/templates/misc/compare.html +++ b/app/core/src/main/resources/templates/misc/compare.html @@ -79,7 +79,7 @@ - +
@@ -105,7 +105,8 @@ result2.addEventListener('scroll', function () { result1.scrollTop = result2.scrollTop; }); - async function comparePDFs() { + + async function comparePDFs(event) { const file1 = document.getElementById("fileInput-input").files[0]; const file2 = document.getElementById("fileInput2-input").files[0]; var color1 = document.getElementById('color-box1').value; @@ -113,137 +114,216 @@ const complexMessage = /*[[#{compare.complex.message}]]*/ 'One or both of the provided documents are large files, accuracy of comparison may be reduced'; const largeFilesMessage = /*[[#{compare.large.file.message}]]*/ 'One or Both of the provided documents are too large to process'; - const noTextMessage = /*[[#{compare.no.text.message}]]*/ 'One or both of the selected PDFs have no text content. Please choose PDFs with text for comparison."'; + const noTextMessage = /*[[#{compare.no.text.message}]]*/ 'One or both of the selected PDFs have no text content. Please choose PDFs with text for comparison.'; + const invalidPdfMessage = /*[[#{compare.invalid.pdf.message}]]*/ 'One or both files are not valid PDFs. Please check and re-upload.'; + const submitText = /*[[#{compare.submit}]]*/ 'Compare'; if (!file1 || !file2) { - console.error("Please select two PDF files to compare"); + alert('Please select two PDF files to compare'); return; } - pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs'; - - const [pdf1, pdf2] = await Promise.all([ - pdfjsLib.getDocument(URL.createObjectURL(file1)).promise, - pdfjsLib.getDocument(URL.createObjectURL(file2)).promise - ]); - - const extractText = async (pdf) => { - const pages = []; - for (let i = 1; i <= pdf.numPages; i++) { - const page = await pdf.getPage(i); - const content = await page.getTextContent(); - const strings = content.items.map(item => item.str); - pages.push(strings.join(" ")); - } - return pages.join(" "); - }; - - const [text1, text2] = await Promise.all([ - extractText(pdf1), - extractText(pdf2) - ]); - - if (text1.trim() === "" || text2.trim() === "") { - alert(noTextMessage); + // Basic checks + if (file1.size === 0 || file2.size === 0) { + alert('One or both files are empty.'); + return; + } + if (file1.size > 100 * 1024 * 1024 || file2.size > 100 * 1024 * 1024) { + alert(largeFilesMessage); return; } - const resultDiv1 = document.getElementById("result1"); - const resultDiv2 = document.getElementById("result2"); - const loading = /*[[#{loading}]]*/ 'Loading...'; - - resultDiv1.innerHTML = loading; - resultDiv2.innerHTML = loading; - - // Create a new Worker - const worker = new Worker('./js/compare/pdfWorker.js'); - - - // Post messages to the worker - worker.postMessage({ - type: 'SET_COMPLEX_MESSAGE', - message: complexMessage - }); - - worker.postMessage({ - type: 'SET_TOO_LARGE_MESSAGE', - message: largeFilesMessage - }); - - // Error handling for the worker - worker.onerror = function (error) { - console.error('Worker error:', error); + // PDF.js setup (Legacy-safe: Worker disabled) + const PDFJS_DEFAULT_OPTIONS = { + cMapUrl: pdfjsPath + 'cmaps/', + cMapPacked: true, + standardFontDataUrl: pdfjsPath + 'standard_fonts/', + disableWorker: true // Avoids Legacy CMap errors without changing PDF.js }; - worker.onmessage = function (e) { - const { status, differences, message } = e.data; - if (status === 'error') { + pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsPath + 'pdf.worker.mjs'; - resultDiv1.innerHTML = ''; - resultDiv2.innerHTML = ''; - alert(message); - return; + const button = event.target; + button.disabled = true; + button.textContent = 'Processing...'; + + try { + // Load ArrayBuffer + const [data1, data2] = await Promise.all([ + readFileAsArrayBuffer(file1), + readFileAsArrayBuffer(file2) + ]); + + // Header validation (prevents InvalidPDFException) + await validatePdfHeader(data1, 'File 1'); + await validatePdfHeader(data2, 'File 2'); + + // Load PDFs + const [pdf1, pdf2] = await Promise.all([ + loadPdfWithErrorHandling({ ...PDFJS_DEFAULT_OPTIONS, data: data1 }, 'File 1'), + loadPdfWithErrorHandling({ ...PDFJS_DEFAULT_OPTIONS, data: data2 }, 'File 2') + ]); + + // Extract text + result1.innerHTML = 'Extracting text from File 1...'; + result2.innerHTML = 'Extracting text from File 2...'; + const [text1, text2] = await Promise.all([ + extractText(pdf1, 'File 1', result1), + extractText(pdf2, 'File 2', result2) + ]); + + if (text1.trim() === "" || text2.trim() === "") { + throw new Error(noTextMessage); } - if (status === 'success' && differences) { - console.log('Differences:', differences); - displayDifferences(differences); - } - if (event.data.status === 'warning') { - console.warn(event.data.message); - alert(event.data.message); - } - }; - worker.postMessage({ text1, text2, color1, color2 }); + // Worker diff + await processWithWorker(text1, text2, color1, color2, complexMessage, largeFilesMessage); - const displayDifferences = (differences) => { - const resultDiv1 = document.getElementById("result1"); - const resultDiv2 = document.getElementById("result2"); - resultDiv1.innerHTML = ""; - resultDiv2.innerHTML = ""; - - differences.forEach(([color, word]) => { - const span1 = document.createElement("span"); - const span2 = document.createElement("span"); - - if (color === color2) { - span1.style.color = "transparent"; - span1.style.userSelect = "none"; - span2.style.color = color; - } - // If it's a deletion, show it in in the first document and transparent in the second - else if (color === color1) { - span1.style.color = color; - span2.style.color = "transparent"; - span2.style.userSelect = "none"; - } - // If it's unchanged, show it in black in both - else { - span1.style.color = color; - span2.style.color = color; - } - - span1.textContent = word; - span2.textContent = word; - resultDiv1.appendChild(span1); - resultDiv2.appendChild(span2); - - // Add space after each word, or a new line if the word ends with a full stop - const spaceOrNewline1 = document.createElement("span"); - const spaceOrNewline2 = document.createElement("span"); - if (word.endsWith(".")) { - spaceOrNewline1.innerHTML = "
"; - spaceOrNewline2.innerHTML = "
"; - } else { - spaceOrNewline1.textContent = " "; - spaceOrNewline2.textContent = " "; - } - resultDiv1.appendChild(spaceOrNewline1); - resultDiv2.appendChild(spaceOrNewline2); - }); - }; + } catch (error) { + console.error('Comparison failed:', error); + alert(error.message || invalidPdfMessage); + result1.innerHTML = ''; + result2.innerHTML = ''; + } finally { + button.disabled = false; + button.textContent = submitText; + } } + // FileReader helper + function readFileAsArrayBuffer(file) { + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onload = () => resolve(reader.result); + reader.onerror = reject; + reader.readAsArrayBuffer(file); + }); + } + // Header validation (PDF.js-specific, but client-side) + async function validatePdfHeader(data, fileName) { + const header = new Uint8Array(data.slice(0, 8)); + const headerStr = String.fromCharCode(...header); + console.log(`${fileName} header:`, headerStr); + if (!headerStr.startsWith('%PDF-')) { + throw new Error(`${fileName} is not a valid PDF (header: ${headerStr}).`); + } + if (data.byteLength < 100) { + throw new Error(`${fileName} is too short.`); + } + } + + // PDF loading with catch + function loadPdfWithErrorHandling(options, fileName) { + return pdfjsLib.getDocument(options).promise + .then(pdf => { + console.log(`${fileName} loaded: ${pdf.numPages} pages`); + return pdf; + }) + .catch(err => { + console.error(`${fileName} load failed:`, err); + if (err.name === 'InvalidPDFException') { + throw new Error(`${fileName}: Invalid PDF structure. Re-upload.`); + } + throw err; + }); + } + + // Text extraction + async function extractText(pdf, fileName, statusElement) { + const pages = []; + const totalPages = pdf.numPages; + for (let i = 1; i <= totalPages; i++) { + const page = await pdf.getPage(i); + const content = await page.getTextContent(); + const strings = content.items.map(item => item.str).join(' '); + pages.push(strings); + statusElement.innerHTML = `${fileName}: ${Math.round((i / totalPages) * 100)}%`; + } + return pages.join(' '); + } + + // Worker processing + async function processWithWorker(text1, text2, color1, color2, complexMessage, largeFilesMessage) { + return new Promise((resolve, reject) => { + const worker = new Worker('./js/compare/pdfWorker.js'); + const timeout = setTimeout(() => { + worker.terminate(); + reject(new Error('Timeout: Files too complex.')); + }, 30000); + + worker.postMessage({ type: 'SET_COMPLEX_MESSAGE', message: complexMessage }); + worker.postMessage({ type: 'SET_TOO_LARGE_MESSAGE', message: largeFilesMessage }); + + worker.onerror = (error) => { + clearTimeout(timeout); + worker.terminate(); + reject(new Error('Worker error: ' + error.message)); + }; + + worker.onmessage = (e) => { + clearTimeout(timeout); + const { status, differences, message } = e.data; + if (status === 'error') { + worker.terminate(); + reject(new Error(message)); + return; + } + if (status === 'warning') { + alert(message); + } + if (status === 'success' && differences) { + displayDifferences(differences, color1, color2); + worker.terminate(); + resolve(); + } + }; + + worker.postMessage({ type: 'COMPARE', text1, text2, color1, color2 }); + }); + } + + // Display differences + function displayDifferences(differences, color1, color2) { + const resultDiv1 = document.getElementById("result1"); + const resultDiv2 = document.getElementById("result2"); + resultDiv1.innerHTML = ""; + resultDiv2.innerHTML = ""; + + differences.forEach(([color, word]) => { + const span1 = document.createElement("span"); + const span2 = document.createElement("span"); + + if (color === color2) { + span1.style.color = "transparent"; + span1.style.userSelect = "none"; + span2.style.color = color; + } else if (color === color1) { + span1.style.color = color; + span2.style.color = "transparent"; + span2.style.userSelect = "none"; + } else { + span1.style.color = color || 'black'; + span2.style.color = color || 'black'; + } + + span1.textContent = word; + span2.textContent = word; + resultDiv1.appendChild(span1); + resultDiv2.appendChild(span2); + + const spaceOrNewline1 = document.createElement("span"); + const spaceOrNewline2 = document.createElement("span"); + if (word.endsWith(".")) { + spaceOrNewline1.innerHTML = "
"; + spaceOrNewline2.innerHTML = "
"; + } else { + spaceOrNewline1.textContent = " "; + spaceOrNewline2.textContent = " "; + } + resultDiv1.appendChild(spaceOrNewline1); + resultDiv2.appendChild(spaceOrNewline2); + }); + }
diff --git a/app/core/src/main/resources/templates/rotate-pdf.html b/app/core/src/main/resources/templates/rotate-pdf.html index 3d54ce3177..6baa242683 100644 --- a/app/core/src/main/resources/templates/rotate-pdf.html +++ b/app/core/src/main/resources/templates/rotate-pdf.html @@ -59,12 +59,24 @@ - +