mirror of
				https://github.com/Frooodle/Stirling-PDF.git
				synced 2025-10-25 11:17:28 +02:00 
			
		
		
		
	Feature/298 improve compare performance (#2124)
* Implement Diff.js * Compare feature - add service worker and improve efficiency for large files * Compare - messages updated to be compatable with language packs * Compare - Acknowledge Diff.js usage * Add message warning there is no text in uploaded pdf to messages file --------- Co-authored-by: Anthony Stirling <77850077+Frooodle@users.noreply.github.com>
This commit is contained in:
		
							parent
							
								
									4922ab700e
								
							
						
					
					
						commit
						a9ce0e80ee
					
				| @ -79,6 +79,7 @@ info=Info | ||||
| pro=Pro | ||||
| page=Page | ||||
| pages=Pages | ||||
| loading=Loading... | ||||
| 
 | ||||
| legal.privacy=Privacy Policy | ||||
| legal.terms=Terms and Conditions | ||||
| @ -782,6 +783,9 @@ compare.highlightColor.2=Highlight Color 2: | ||||
| compare.document.1=Document 1 | ||||
| compare.document.2=Document 2 | ||||
| compare.submit=Compare | ||||
| compare.complex.message=One or both of the provided documents are large files, accuracy of comparison may be reduced | ||||
| compare.large.file.message=One or Both of the provided documents are too large to process | ||||
| compare.no.text.message=One or both of the selected PDFs have no text content. Please choose PDFs with text for comparison. | ||||
| 
 | ||||
| #BookToPDF | ||||
| BookToPDF.title=Books and Comics to PDF | ||||
| @ -1220,5 +1224,3 @@ splitByChapters.desc.2=Bookmark Level: Choose the level of bookmarks to use for | ||||
| splitByChapters.desc.3=Include Metadata: If checked, the original PDF's metadata will be included in each split PDF. | ||||
| splitByChapters.desc.4=Allow Duplicates: If checked, allows multiple bookmarks on the same page to create separate PDFs. | ||||
| splitByChapters.submit=Split PDF | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										2106
									
								
								src/main/resources/static/js/compare/diff.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2106
									
								
								src/main/resources/static/js/compare/diff.js
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										145
									
								
								src/main/resources/static/js/compare/pdfWorker.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										145
									
								
								src/main/resources/static/js/compare/pdfWorker.js
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,145 @@ | ||||
| importScripts('./diff.js'); | ||||
| 
 | ||||
| self.onmessage = async function (e) { | ||||
|   const { text1, text2, color1, color2 } = e.data; | ||||
|   console.log('Received text for comparison:', { text1, text2 }); | ||||
| 
 | ||||
|   const startTime = performance.now(); | ||||
| 
 | ||||
|   if (text1.trim() === "" || text2.trim() === "") { | ||||
|     self.postMessage({ status: 'error', message: 'One or both of the texts are empty.' }); | ||||
|     return; | ||||
|   } | ||||
| 
 | ||||
|   const words1 = text1.split(' '); | ||||
|   const words2 = text2.split(' '); | ||||
|   const MAX_WORD_COUNT = 150000; | ||||
|   const COMPLEX_WORD_COUNT = 50000; | ||||
|   const BATCH_SIZE = 5000; // Define a suitable batch size for processing
 | ||||
|   const OVERLAP_SIZE = 200;  // Number of words to overlap - bigger increases accuracy but affects performance
 | ||||
| 
 | ||||
|   const isComplex = words1.length > COMPLEX_WORD_COUNT || words2.length > COMPLEX_WORD_COUNT; | ||||
|   const isTooLarge = words1.length > MAX_WORD_COUNT || words2.length > MAX_WORD_COUNT; | ||||
| 
 | ||||
|   let complexMessage = 'One or both of the provided documents are large files, accuracy of comparison may be reduced'; | ||||
|   let tooLargeMessage = 'One or Both of the provided documents are too large to process'; | ||||
| 
 | ||||
|   // Listen for messages from the main thread
 | ||||
|   self.addEventListener('message', (event) => { | ||||
|     if (event.data.type === 'SET_TOO_LARGE_MESSAGE') { | ||||
|       tooLargeMessage = event.data.message; | ||||
|     } | ||||
|     if (event.data.type === 'SET_COMPLEX_MESSAGE') { | ||||
|       complexMessage = event.data.message; | ||||
|     } | ||||
|   }); | ||||
| 
 | ||||
|   if (isTooLarge) { | ||||
|     self.postMessage({ | ||||
|       status: 'warning', | ||||
|       message: tooLargeMessage, | ||||
|     }); | ||||
|     return; | ||||
|   } else { | ||||
| 
 | ||||
|     if (isComplex) { | ||||
|       self.postMessage({ | ||||
|         status: 'warning', | ||||
|         message: complexMessage, | ||||
|       }); | ||||
|     } | ||||
|     // Perform diff operation depending on document size
 | ||||
|     const differences = isComplex | ||||
|       ? await staggeredBatchDiff(words1, words2, color1, color2, BATCH_SIZE, OVERLAP_SIZE) | ||||
|       : diff(words1, words2, color1, color2); | ||||
| 
 | ||||
|     console.log(`Diff operation took ${performance.now() - startTime} milliseconds`); | ||||
|     self.postMessage({ status: 'success', differences }); | ||||
|   } | ||||
| }; | ||||
| 
 | ||||
| //Splits text into smaller batches to run through diff checking algorithms. overlaps the batches to help ensure
 | ||||
| async function staggeredBatchDiff(words1, words2, color1, color2, batchSize, overlapSize) { | ||||
|   const differences = []; | ||||
|   const totalWords1 = words1.length; | ||||
|   const totalWords2 = words2.length; | ||||
| 
 | ||||
|   let previousEnd1 = 0; // Track where the last batch ended in words1
 | ||||
|   let previousEnd2 = 0; // Track where the last batch ended in words2
 | ||||
| 
 | ||||
|   // Function to determine if differences are large, differences that are too large indicate potential error in batching
 | ||||
|   const isLargeDifference = (differences) => { | ||||
|     return differences.length > 50; | ||||
|   }; | ||||
| 
 | ||||
|   while (previousEnd1 < totalWords1 || previousEnd2 < totalWords2) { | ||||
|     // Define the next chunk boundaries
 | ||||
|     const start1 = previousEnd1; | ||||
|     const end1 = Math.min(start1 + batchSize, totalWords1); | ||||
| 
 | ||||
|     const start2 = previousEnd2; | ||||
|     const end2 = Math.min(start2 + batchSize, totalWords2); | ||||
| 
 | ||||
|     //If difference is too high decrease batch size for more granular check
 | ||||
|     const dynamicBatchSize = isLargeDifference(differences) ? batchSize / 2 : batchSize; | ||||
| 
 | ||||
|     // Adjust the size of the current chunk using dynamic batch size
 | ||||
|     const batchWords1 = words1.slice(start1, end1 + dynamicBatchSize); | ||||
|     const batchWords2 = words2.slice(start2, end2 + dynamicBatchSize); | ||||
| 
 | ||||
|     // Include overlap from the previous chunk
 | ||||
|     const overlapWords1 = previousEnd1 > 0 ? words1.slice(Math.max(0, previousEnd1 - overlapSize), previousEnd1) : []; | ||||
|     const overlapWords2 = previousEnd2 > 0 ? words2.slice(Math.max(0, previousEnd2 - overlapSize), previousEnd2) : []; | ||||
| 
 | ||||
|     // Combine overlaps and current batches for comparison
 | ||||
|     const combinedWords1 = overlapWords1.concat(batchWords1); | ||||
|     const combinedWords2 = overlapWords2.concat(batchWords2); | ||||
| 
 | ||||
|     // Perform the diff on the combined words
 | ||||
|     const batchDifferences = diff(combinedWords1, combinedWords2, color1, color2); | ||||
|     differences.push(...batchDifferences); | ||||
| 
 | ||||
|     // Update the previous end indices based on the results of this batch
 | ||||
|     previousEnd1 = end1; | ||||
|     previousEnd2 = end2; | ||||
|   } | ||||
| 
 | ||||
|   return differences; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| // Standard diff function for small text comparisons
 | ||||
| function diff(words1, words2, color1, color2) { | ||||
|   console.log(`Starting diff between ${words1.length} words and ${words2.length} words`); | ||||
|   const matrix = Array.from({ length: words1.length + 1 }, () => Array(words2.length + 1).fill(0)); | ||||
| 
 | ||||
|   for (let i = 1; i <= words1.length; i++) { | ||||
|     for (let j = 1; j <= words2.length; j++) { | ||||
|       matrix[i][j] = words1[i - 1] === words2[j - 1] | ||||
|         ? matrix[i - 1][j - 1] + 1 | ||||
|         : Math.max(matrix[i][j - 1], matrix[i - 1][j]); | ||||
|     } | ||||
|   } | ||||
|   return backtrack(matrix, words1, words2, color1, color2); | ||||
| } | ||||
| 
 | ||||
| // Backtrack function to find differences
 | ||||
| function backtrack(matrix, words1, words2, color1, color2) { | ||||
|   let i = words1.length, j = words2.length; | ||||
|   const differences = []; | ||||
| 
 | ||||
|   while (i > 0 || j > 0) { | ||||
|     if (i > 0 && j > 0 && words1[i - 1] === words2[j - 1]) { | ||||
|       differences.unshift(['black', words1[i - 1]]); | ||||
|       i--; j--; | ||||
|     } else if (j > 0 && (i === 0 || matrix[i][j] === matrix[i][j - 1])) { | ||||
|       differences.unshift([color2, words2[j - 1]]); | ||||
|       j--; | ||||
|     } else { | ||||
|       differences.unshift([color1, words1[i - 1]]); | ||||
|       i--; | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   return differences; | ||||
| } | ||||
| @ -1,236 +1,256 @@ | ||||
| <!DOCTYPE html> | ||||
| <html th:lang="${#locale.language}" th:dir="#{language.direction}" th:data-language="${#locale.toString()}" xmlns:th="https://www.thymeleaf.org"> | ||||
|   <head> | ||||
| <html th:lang="${#locale.language}" th:dir="#{language.direction}" th:data-language="${#locale.toString()}" | ||||
|   xmlns:th="https://www.thymeleaf.org"> | ||||
| 
 | ||||
| <head> | ||||
|   <th:block th:insert="~{fragments/common :: head(title=#{compare.title}, header=#{compare.header})}"></th:block> | ||||
|     <style> | ||||
|       .result-column { | ||||
|         border: 1px solid #ccc; | ||||
|         padding: 15px; | ||||
|         margin-bottom: 15px; | ||||
|         overflow-y: auto; | ||||
|         height: calc(100vh - 400px); | ||||
|         white-space: pre-wrap; | ||||
|       } | ||||
|       .flex-container { | ||||
|         display: flex; | ||||
|         flex-direction: row; | ||||
|       } | ||||
|       .color-selector { | ||||
|         display: flex; | ||||
|         flex-direction: row; | ||||
|         align-items: center; | ||||
|         width: 50%; | ||||
|         max-height: 100px; | ||||
|         margin-bottom: 2rem; | ||||
|       } | ||||
|       #color-box1, #color-box2 { | ||||
|         -webkit-appearance: none; | ||||
|         -moz-appearance: none; | ||||
|         appearance: none; | ||||
|         border: none; | ||||
|         background-color: transparent; | ||||
|       } | ||||
|       .spacer1 { | ||||
|         padding-right: calc(var(--bs-gutter-x) * .5); | ||||
|       } | ||||
|       .spacer2 { | ||||
|         padding-left: calc(var(--bs-gutter-x) * .5); | ||||
|       } | ||||
|     </style> | ||||
|   </head> | ||||
|   <style> | ||||
|     .result-column { | ||||
|       border: 1px solid #ccc; | ||||
|       padding: 15px; | ||||
|       margin-bottom: 15px; | ||||
|       overflow-y: auto; | ||||
|       height: calc(100vh - 400px); | ||||
|       white-space: pre-wrap; | ||||
|     } | ||||
| 
 | ||||
|   <body> | ||||
|     <div id="page-container"> | ||||
|       <div id="content-wrap"> | ||||
|         <th:block th:insert="~{fragments/navbar.html :: navbar}"></th:block> | ||||
|         <br><br> | ||||
|         <div class="container"> | ||||
|           <div class="row justify-content-center"> | ||||
|             <div class="col-md-9 bg-card"> | ||||
|               <div class="tool-header"> | ||||
|                 <span class="material-symbols-rounded tool-header-icon other">compare</span> | ||||
|                 <span class="tool-header-text" th:text="#{compare.header}"></span> | ||||
|               </div> | ||||
|               <div th:replace="~{fragments/common :: fileSelector(name='fileInput', multipleInputsForSingleRequest=false, accept='application/pdf', remoteCall='false')}"></div> | ||||
|               <div th:replace="~{fragments/common :: fileSelector(name='fileInput2', multipleInputsForSingleRequest=false, accept='application/pdf', remoteCall='false')}"></div> | ||||
|     .flex-container { | ||||
|       display: flex; | ||||
|       flex-direction: row; | ||||
|     } | ||||
| 
 | ||||
|               <div class="row"> | ||||
|                 <div class="flex-container"> | ||||
|                   <div class="color-selector spacer1"> | ||||
|                     <label th:text="#{compare.highlightColor.1}"></label> | ||||
|                     <label for="color-box1"></label><input type="color" id="color-box1" value="#ff0000"> | ||||
|                   </div> | ||||
|                   <div class="color-selector spacer2"> | ||||
|                     <label th:text="#{compare.highlightColor.2}"></label> | ||||
|                     <label for="color-box2"></label><input type="color" id="color-box2" value="#008000"> | ||||
|                   </div> | ||||
|                 </div> | ||||
|               </div> | ||||
|     .color-selector { | ||||
|       display: flex; | ||||
|       flex-direction: row; | ||||
|       align-items: center; | ||||
|       width: 50%; | ||||
|       max-height: 100px; | ||||
|       margin-bottom: 2rem; | ||||
|     } | ||||
| 
 | ||||
|               <button class="btn btn-primary"  onclick="comparePDFs()" th:text="#{compare.submit}"></button> | ||||
|     #color-box1, | ||||
|     #color-box2 { | ||||
|       -webkit-appearance: none; | ||||
|       -moz-appearance: none; | ||||
|       appearance: none; | ||||
|       border: none; | ||||
|       background-color: transparent; | ||||
|     } | ||||
| 
 | ||||
|               <div class="row"> | ||||
|                 <div class="col-md-6"> | ||||
|                   <h3 th:text="#{compare.document.1}"></h3> | ||||
|                   <div id="result1" class="result-column"></div> | ||||
|                 </div> | ||||
|                 <div class="col-md-6"> | ||||
|                   <h3 th:text="#{compare.document.2}"></h3> | ||||
|                   <div id="result2" class="result-column"></div> | ||||
|                 </div> | ||||
|               </div> | ||||
|               <script type="module" th:src="@{'/pdfjs-legacy/pdf.mjs'}"></script> | ||||
|               <script> | ||||
|                 // get the elements | ||||
|                 var result1 = document.getElementById('result1'); | ||||
|                 var result2 = document.getElementById('result2'); | ||||
|     .spacer1 { | ||||
|       padding-right: calc(var(--bs-gutter-x) * .5); | ||||
|     } | ||||
| 
 | ||||
|                 // add event listeners | ||||
|                 result1.addEventListener('scroll', function() { | ||||
|                   result2.scrollTop = result1.scrollTop; | ||||
|                 }); | ||||
|     .spacer2 { | ||||
|       padding-left: calc(var(--bs-gutter-x) * .5); | ||||
|     } | ||||
|   </style> | ||||
| </head> | ||||
| 
 | ||||
|                 result2.addEventListener('scroll', function() { | ||||
|                   result1.scrollTop = result2.scrollTop; | ||||
|                 }); | ||||
| 
 | ||||
|                 async function comparePDFs() { | ||||
|                   const file1 = document.getElementById("fileInput-input").files[0]; | ||||
|                   const file2 = document.getElementById("fileInput2-input").files[0]; | ||||
|                   var color1 = document.getElementById('color-box1').value; | ||||
|                   var color2 = document.getElementById('color-box2').value; | ||||
| 
 | ||||
|                   if (!file1 || !file2) { | ||||
|                     console.error("Please select two PDF files to compare"); | ||||
|                     return; | ||||
|                   } | ||||
|                   pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs' | ||||
|                   const [pdf1, pdf2] = await Promise.all([ | ||||
|                     pdfjsLib.getDocument(URL.createObjectURL(file1)).promise, | ||||
|                     pdfjsLib.getDocument(URL.createObjectURL(file2)).promise | ||||
|                   ]); | ||||
| 
 | ||||
|                   const extractText = async (pdf) => { | ||||
|                     const pages = []; | ||||
|                     for (let i = 1; i <= pdf.numPages; i++) { | ||||
|                       const page = await pdf.getPage(i); | ||||
|                       const content = await page.getTextContent(); | ||||
|                       const strings = content.items.map(item => item.str); | ||||
|                       pages.push(strings.join(" ")); | ||||
|                     } | ||||
|                     return pages.join(" "); | ||||
|                   }; | ||||
| 
 | ||||
|                   const [text1, text2] = await Promise.all([ | ||||
|                     extractText(pdf1), | ||||
|                     extractText(pdf2) | ||||
|                   ]); | ||||
| 
 | ||||
|                   if (text1.trim() === "" || text2.trim() === "") { | ||||
|                     alert("One or both of the selected PDFs have no text content. Please choose PDFs with text for comparison."); | ||||
|                     return; | ||||
|                   } | ||||
|                   const diff = (text1, text2) => { | ||||
|                     const words1 = text1.split(' '); | ||||
|                     const words2 = text2.split(' '); | ||||
| 
 | ||||
|                     // Create a 2D array to hold our "matrix" | ||||
|                     const matrix = Array(words1.length + 1).fill(null).map(() => Array(words2.length + 1).fill(0)); | ||||
| 
 | ||||
|                     // Perform standard LCS algorithm | ||||
|                     for (let i = 1; i <= words1.length; i++) { | ||||
|                       for (let j = 1; j <= words2.length; j++) { | ||||
|                         if (words1[i - 1] === words2[j - 1]) { | ||||
|                           matrix[i][j] = matrix[i - 1][j - 1] + 1; | ||||
|                         } else { | ||||
|                           matrix[i][j] = Math.max(matrix[i][j - 1], matrix[i - 1][j]); | ||||
|                         } | ||||
|                       } | ||||
|                     } | ||||
| 
 | ||||
|                     let i = words1.length; | ||||
|                     let j = words2.length; | ||||
|                     const differences = []; | ||||
| 
 | ||||
|                     // Backtrack through the matrix to create the diff | ||||
|                     while (i > 0 || j > 0) { | ||||
|                       if (i > 0 && j > 0 && words1[i - 1] === words2[j - 1]) { | ||||
|                         differences.unshift(['black', words1[i - 1]]); | ||||
|                         i--; | ||||
|                         j--; | ||||
|                       } else if (j > 0 && (i === 0 || matrix[i][j - 1] >= matrix[i - 1][j])) { | ||||
|                         differences.unshift([color2, words2[j - 1]]); | ||||
|                         j--; | ||||
|                       } else if (i > 0 && (j === 0 || matrix[i][j - 1] < matrix[i - 1][j])) { | ||||
|                         differences.unshift([color1, words1[i - 1]]); | ||||
|                         i--; | ||||
|                       } | ||||
|                     } | ||||
|                     console.log(differences); | ||||
| 
 | ||||
| 
 | ||||
|                     return differences; | ||||
|                   }; | ||||
| 
 | ||||
|                   const differences = diff(text1, text2); | ||||
| 
 | ||||
|                   const displayDifferences = (differences) => { | ||||
|                     const resultDiv1 = document.getElementById("result1"); | ||||
|                     const resultDiv2 = document.getElementById("result2"); | ||||
|                     resultDiv1.innerHTML = ""; | ||||
|                     resultDiv2.innerHTML = ""; | ||||
| 
 | ||||
|                     differences.forEach(([color, word]) => { | ||||
|                       const span1 = document.createElement("span"); | ||||
|                       const span2 = document.createElement("span"); | ||||
| 
 | ||||
|                       // If it's an addition, show it in color2 in the second document and transparent in the first | ||||
|                       if (color === color2) { | ||||
|                         span1.style.color = "transparent"; | ||||
|                         span1.style.userSelect = "none"; | ||||
|                         span2.style.color = color; | ||||
|                       } | ||||
|                       // If it's a deletion, show it in color1 in the first document and transparent in the second | ||||
|                       else if (color === color1) { | ||||
|                         span1.style.color = color; | ||||
|                         span2.style.color = "transparent"; | ||||
|                         span2.style.userSelect = "none"; | ||||
|                       } | ||||
|                       // If it's unchanged, show it in black in both | ||||
|                       else { | ||||
|                         span1.style.color = color; | ||||
|                         span2.style.color = color; | ||||
|                       } | ||||
| 
 | ||||
|                       span1.textContent = word; | ||||
|                       span2.textContent = word; | ||||
|                       resultDiv1.appendChild(span1); | ||||
|                       resultDiv2.appendChild(span2); | ||||
| 
 | ||||
|                       // Add space after each word, or a new line if the word ends with a full stop | ||||
|                       const spaceOrNewline1 = document.createElement("span"); | ||||
|                       const spaceOrNewline2 = document.createElement("span"); | ||||
|                       if (word.endsWith(".")) { | ||||
|                         spaceOrNewline1.innerHTML = "<br>"; | ||||
|                         spaceOrNewline2.innerHTML = "<br>"; | ||||
|                       } else { | ||||
|                         spaceOrNewline1.textContent = " "; | ||||
|                         spaceOrNewline2.textContent = " "; | ||||
|                       } | ||||
|                       resultDiv1.appendChild(spaceOrNewline1); | ||||
|                       resultDiv2.appendChild(spaceOrNewline2); | ||||
|                     }); | ||||
|                   }; | ||||
| 
 | ||||
|                   console.log('Differences:', differences); | ||||
|                   displayDifferences(differences); | ||||
|                 } | ||||
|               </script> | ||||
| <body> | ||||
|   <div id="page-container"> | ||||
|     <div id="content-wrap"> | ||||
|       <th:block th:insert="~{fragments/navbar.html :: navbar}"></th:block> | ||||
|       <br><br> | ||||
|       <div class="container"> | ||||
|         <div class="row justify-content-center"> | ||||
|           <div class="col-md-9 bg-card"> | ||||
|             <div class="tool-header"> | ||||
|               <span class="material-symbols-rounded tool-header-icon other">compare</span> | ||||
|               <span class="tool-header-text" th:text="#{compare.header}"></span> | ||||
|             </div> | ||||
|             <div | ||||
|               th:replace="~{fragments/common :: fileSelector(name='fileInput', disableMultipleFiles=true, multipleInputsForSingleRequest=false, accept='application/pdf', remoteCall='false')}"> | ||||
|             </div> | ||||
|             <div | ||||
|               th:replace="~{fragments/common :: fileSelector(name='fileInput2', disableMultipleFiles=true, multipleInputsForSingleRequest=false, accept='application/pdf', remoteCall='false')}"> | ||||
|             </div> | ||||
| 
 | ||||
|             <div class="row"> | ||||
|               <div class="flex-container"> | ||||
|                 <div class="color-selector spacer1"> | ||||
|                   <label th:text="#{compare.highlightColor.1}"></label> | ||||
|                   <label for="color-box1"></label><input type="color" id="color-box1" value="#ff0000"> | ||||
|                 </div> | ||||
|                 <div class="color-selector spacer2"> | ||||
|                   <label th:text="#{compare.highlightColor.2}"></label> | ||||
|                   <label for="color-box2"></label><input type="color" id="color-box2" value="#008000"> | ||||
|                 </div> | ||||
|               </div> | ||||
|             </div> | ||||
| 
 | ||||
|             <button class="btn btn-primary" onclick="comparePDFs()" th:text="#{compare.submit}"></button> | ||||
| 
 | ||||
|             <div class="row"> | ||||
|               <div class="col-md-6"> | ||||
|                 <h3 th:text="#{compare.document.1}"></h3> | ||||
|                 <div id="result1" class="result-column"></div> | ||||
|               </div> | ||||
|               <div class="col-md-6"> | ||||
|                 <h3 th:text="#{compare.document.2}"></h3> | ||||
|                 <div id="result2" class="result-column"></div> | ||||
|               </div> | ||||
|             </div> | ||||
|             <script type="module" th:src="@{'/pdfjs-legacy/pdf.mjs'}"></script> | ||||
|             <script th:inline="javascript"> | ||||
|               // get the elements | ||||
|               var result1 = document.getElementById('result1'); | ||||
|               var result2 = document.getElementById('result2'); | ||||
| 
 | ||||
|               // add event listeners | ||||
|               result1.addEventListener('scroll', function () { | ||||
|                 result2.scrollTop = result1.scrollTop; | ||||
|               }); | ||||
| 
 | ||||
|               result2.addEventListener('scroll', function () { | ||||
|                 result1.scrollTop = result2.scrollTop; | ||||
|               }); | ||||
|               async function comparePDFs() { | ||||
|                 const file1 = document.getElementById("fileInput-input").files[0]; | ||||
|                 const file2 = document.getElementById("fileInput2-input").files[0]; | ||||
|                 var color1 = document.getElementById('color-box1').value; | ||||
|                 var color2 = document.getElementById('color-box2').value; | ||||
| 
 | ||||
|                 const complexMessage = /*[[#{compare.complex.message}]]*/ 'One or both of the provided documents are large files, accuracy of comparison may be reduced'; | ||||
|                 const largeFilesMessage = /*[[#{compare.large.file.message}]]*/ 'One or Both of the provided documents are too large to process'; | ||||
|                 const noTextMessage = /*[[#{compare.no.text.message}]]*/ 'One or both of the selected PDFs have no text content. Please choose PDFs with text for comparison."'; | ||||
| 
 | ||||
|                 if (!file1 || !file2) { | ||||
|                   console.error("Please select two PDF files to compare"); | ||||
|                   return; | ||||
|                 } | ||||
| 
 | ||||
|                 pdfjsLib.GlobalWorkerOptions.workerSrc = './pdfjs-legacy/pdf.worker.mjs'; | ||||
| 
 | ||||
|                 const [pdf1, pdf2] = await Promise.all([ | ||||
|                   pdfjsLib.getDocument(URL.createObjectURL(file1)).promise, | ||||
|                   pdfjsLib.getDocument(URL.createObjectURL(file2)).promise | ||||
|                 ]); | ||||
| 
 | ||||
|                 const extractText = async (pdf) => { | ||||
|                   const pages = []; | ||||
|                   for (let i = 1; i <= pdf.numPages; i++) { | ||||
|                     const page = await pdf.getPage(i); | ||||
|                     const content = await page.getTextContent(); | ||||
|                     const strings = content.items.map(item => item.str); | ||||
|                     pages.push(strings.join(" ")); | ||||
|                   } | ||||
|                   return pages.join(" "); | ||||
|                 }; | ||||
| 
 | ||||
|                 const [text1, text2] = await Promise.all([ | ||||
|                   extractText(pdf1), | ||||
|                   extractText(pdf2) | ||||
|                 ]); | ||||
| 
 | ||||
|                 if (text1.trim() === "" || text2.trim() === "") { | ||||
|                   alert(noTextMessage); | ||||
|                   return; | ||||
|                 } | ||||
| 
 | ||||
|                 const resultDiv1 = document.getElementById("result1"); | ||||
|                 const resultDiv2 = document.getElementById("result2"); | ||||
|                 const loading = /*[[#{loading}]]*/ 'Loading...'; | ||||
| 
 | ||||
|                 resultDiv1.innerHTML = loading; | ||||
|                 resultDiv2.innerHTML = loading; | ||||
| 
 | ||||
|                 // Create a new Worker | ||||
|                 const worker = new Worker('/js/compare/pdfWorker.js'); | ||||
| 
 | ||||
| 
 | ||||
|                 // Post messages to the worker | ||||
|                 worker.postMessage({ | ||||
|                   type: 'SET_COMPLEX_MESSAGE', | ||||
|                   message: complexMessage | ||||
|                 }); | ||||
| 
 | ||||
|                 worker.postMessage({ | ||||
|                   type: 'SET_TOO_LARGE_MESSAGE', | ||||
|                   message: largeFilesMessage | ||||
|                 }); | ||||
| 
 | ||||
|                 // Error handling for the worker | ||||
|                 worker.onerror = function (error) { | ||||
|                   console.error('Worker error:', error); | ||||
|                 }; | ||||
|                 worker.onmessage = function (e) { | ||||
|                   const { status, differences, message } = e.data; | ||||
|                   if (status === 'error') { | ||||
| 
 | ||||
|                     resultDiv1.innerHTML = ''; | ||||
|                     resultDiv2.innerHTML = ''; | ||||
|                     alert(message); | ||||
|                     return; | ||||
|                   } | ||||
|                   if (status === 'success' && differences) { | ||||
|                     console.log('Differences:', differences); | ||||
|                     displayDifferences(differences); | ||||
|                   } | ||||
|                   if (event.data.status === 'warning') { | ||||
|                     console.warn(event.data.message); | ||||
|                     alert(event.data.message); | ||||
|                   } | ||||
|                 }; | ||||
| 
 | ||||
|                 worker.postMessage({ text1, text2, color1, color2 }); | ||||
| 
 | ||||
|                 const displayDifferences = (differences) => { | ||||
|                   const resultDiv1 = document.getElementById("result1"); | ||||
|                   const resultDiv2 = document.getElementById("result2"); | ||||
|                   resultDiv1.innerHTML = ""; | ||||
|                   resultDiv2.innerHTML = ""; | ||||
| 
 | ||||
|                   differences.forEach(([color, word]) => { | ||||
|                     const span1 = document.createElement("span"); | ||||
|                     const span2 = document.createElement("span"); | ||||
| 
 | ||||
|                     if (color === color2) { | ||||
|                       span1.style.color = "transparent"; | ||||
|                       span1.style.userSelect = "none"; | ||||
|                       span2.style.color = color; | ||||
|                     } | ||||
|                     // If it's a deletion, show it in  in the first document and transparent in the second | ||||
|                     else if (color === color1) { | ||||
|                       span1.style.color = color; | ||||
|                       span2.style.color = "transparent"; | ||||
|                       span2.style.userSelect = "none"; | ||||
|                     } | ||||
|                     // If it's unchanged, show it in black in both | ||||
|                     else { | ||||
|                       span1.style.color = color; | ||||
|                       span2.style.color = color; | ||||
|                     } | ||||
| 
 | ||||
|                     span1.textContent = word; | ||||
|                     span2.textContent = word; | ||||
|                     resultDiv1.appendChild(span1); | ||||
|                     resultDiv2.appendChild(span2); | ||||
| 
 | ||||
|                     // Add space after each word, or a new line if the word ends with a full stop | ||||
|                     const spaceOrNewline1 = document.createElement("span"); | ||||
|                     const spaceOrNewline2 = document.createElement("span"); | ||||
|                     if (word.endsWith(".")) { | ||||
|                       spaceOrNewline1.innerHTML = "<br>"; | ||||
|                       spaceOrNewline2.innerHTML = "<br>"; | ||||
|                     } else { | ||||
|                       spaceOrNewline1.textContent = " "; | ||||
|                       spaceOrNewline2.textContent = " "; | ||||
|                     } | ||||
|                     resultDiv1.appendChild(spaceOrNewline1); | ||||
|                     resultDiv2.appendChild(spaceOrNewline2); | ||||
|                   }); | ||||
|                 }; | ||||
|               } | ||||
| 
 | ||||
| 
 | ||||
|             </script> | ||||
|           </div> | ||||
|         </div> | ||||
|       </div> | ||||
|       <th:block th:insert="~{fragments/footer.html :: footer}"></th:block> | ||||
|     </div> | ||||
|   </body> | ||||
|     <th:block th:insert="~{fragments/footer.html :: footer}"></th:block> | ||||
|   </div> | ||||
| </body> | ||||
| 
 | ||||
| </html> | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user