mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2026-03-04 02:20:19 +01:00
Stirling 2.0 (#3928)
# Description of Changes <!-- File context for managing files between tools and views Optimisation for large files Updated Split to work with new file system and match Matts stepped design closer --> --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. --------- Co-authored-by: Anthony Stirling <77850077+Frooodle@users.noreply.github.com>
This commit is contained in:
127
frontend/src/utils/fileHash.ts
Normal file
127
frontend/src/utils/fileHash.ts
Normal file
@@ -0,0 +1,127 @@
|
||||
/**
|
||||
* File hashing utilities for cache key generation
|
||||
*/
|
||||
|
||||
export class FileHasher {
|
||||
private static readonly CHUNK_SIZE = 64 * 1024; // 64KB chunks for hashing
|
||||
|
||||
/**
|
||||
* Generate a content-based hash for a file
|
||||
* Uses first + last + middle chunks to create a reasonably unique hash
|
||||
* without reading the entire file (which would be expensive for large files)
|
||||
*/
|
||||
static async generateContentHash(file: File): Promise<string> {
|
||||
const chunks = await this.getFileChunks(file);
|
||||
const combined = await this.combineChunks(chunks);
|
||||
return await this.hashArrayBuffer(combined);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a fast hash based on file metadata
|
||||
* Faster but less collision-resistant than content hash
|
||||
*/
|
||||
static generateMetadataHash(file: File): string {
|
||||
const data = `${file.name}-${file.size}-${file.lastModified}-${file.type}`;
|
||||
return this.simpleHash(data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a hybrid hash that balances speed and uniqueness
|
||||
* Uses metadata + small content sample
|
||||
*/
|
||||
static async generateHybridHash(file: File): Promise<string> {
|
||||
const metadataHash = this.generateMetadataHash(file);
|
||||
|
||||
// For small files, use full content hash
|
||||
if (file.size <= 1024 * 1024) { // 1MB
|
||||
const contentHash = await this.generateContentHash(file);
|
||||
return `${metadataHash}-${contentHash}`;
|
||||
}
|
||||
|
||||
// For large files, use first chunk only
|
||||
const firstChunk = file.slice(0, this.CHUNK_SIZE);
|
||||
const firstChunkBuffer = await firstChunk.arrayBuffer();
|
||||
const firstChunkHash = await this.hashArrayBuffer(firstChunkBuffer);
|
||||
|
||||
return `${metadataHash}-${firstChunkHash}`;
|
||||
}
|
||||
|
||||
private static async getFileChunks(file: File): Promise<ArrayBuffer[]> {
|
||||
const chunks: ArrayBuffer[] = [];
|
||||
|
||||
// First chunk
|
||||
if (file.size > 0) {
|
||||
const firstChunk = file.slice(0, Math.min(this.CHUNK_SIZE, file.size));
|
||||
chunks.push(await firstChunk.arrayBuffer());
|
||||
}
|
||||
|
||||
// Middle chunk (if file is large enough)
|
||||
if (file.size > this.CHUNK_SIZE * 2) {
|
||||
const middleStart = Math.floor(file.size / 2) - Math.floor(this.CHUNK_SIZE / 2);
|
||||
const middleEnd = middleStart + this.CHUNK_SIZE;
|
||||
const middleChunk = file.slice(middleStart, middleEnd);
|
||||
chunks.push(await middleChunk.arrayBuffer());
|
||||
}
|
||||
|
||||
// Last chunk (if file is large enough and different from first)
|
||||
if (file.size > this.CHUNK_SIZE) {
|
||||
const lastStart = Math.max(file.size - this.CHUNK_SIZE, this.CHUNK_SIZE);
|
||||
const lastChunk = file.slice(lastStart);
|
||||
chunks.push(await lastChunk.arrayBuffer());
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
|
||||
private static async combineChunks(chunks: ArrayBuffer[]): Promise<ArrayBuffer> {
|
||||
const totalLength = chunks.reduce((sum, chunk) => sum + chunk.byteLength, 0);
|
||||
const combined = new Uint8Array(totalLength);
|
||||
|
||||
let offset = 0;
|
||||
for (const chunk of chunks) {
|
||||
combined.set(new Uint8Array(chunk), offset);
|
||||
offset += chunk.byteLength;
|
||||
}
|
||||
|
||||
return combined.buffer;
|
||||
}
|
||||
|
||||
private static async hashArrayBuffer(buffer: ArrayBuffer): Promise<string> {
|
||||
// Use Web Crypto API for proper hashing
|
||||
if (crypto.subtle) {
|
||||
const hashBuffer = await crypto.subtle.digest('SHA-256', buffer);
|
||||
const hashArray = Array.from(new Uint8Array(hashBuffer));
|
||||
return hashArray.map(b => b.toString(16).padStart(2, '0')).join('');
|
||||
}
|
||||
|
||||
// Fallback for environments without crypto.subtle
|
||||
return this.simpleHash(Array.from(new Uint8Array(buffer)).join(''));
|
||||
}
|
||||
|
||||
private static simpleHash(str: string): string {
|
||||
let hash = 0;
|
||||
if (str.length === 0) return hash.toString();
|
||||
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
const char = str.charCodeAt(i);
|
||||
hash = ((hash << 5) - hash) + char;
|
||||
hash = hash & hash; // Convert to 32-bit integer
|
||||
}
|
||||
|
||||
return Math.abs(hash).toString(16);
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate that a file matches its expected hash
|
||||
* Useful for detecting file corruption or changes
|
||||
*/
|
||||
static async validateFileHash(file: File, expectedHash: string): Promise<boolean> {
|
||||
try {
|
||||
const actualHash = await this.generateHybridHash(file);
|
||||
return actualHash === expectedHash;
|
||||
} catch (error) {
|
||||
console.error('Hash validation failed:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,19 @@
|
||||
import { getDocument } from "pdfjs-dist";
|
||||
|
||||
/**
|
||||
* Calculate thumbnail scale based on file size
|
||||
* Smaller files get higher quality, larger files get lower quality
|
||||
*/
|
||||
export function calculateScaleFromFileSize(fileSize: number): number {
|
||||
const MB = 1024 * 1024;
|
||||
|
||||
if (fileSize < 1 * MB) return 0.6; // < 1MB: High quality
|
||||
if (fileSize < 5 * MB) return 0.4; // 1-5MB: Medium-high quality
|
||||
if (fileSize < 15 * MB) return 0.3; // 5-15MB: Medium quality
|
||||
if (fileSize < 30 * MB) return 0.2; // 15-30MB: Low-medium quality
|
||||
return 0.15; // 30MB+: Low quality
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate thumbnail for a PDF file during upload
|
||||
* Returns base64 data URL or undefined if generation fails
|
||||
@@ -14,6 +28,10 @@ export async function generateThumbnailForFile(file: File): Promise<string | und
|
||||
try {
|
||||
console.log('Generating thumbnail for', file.name);
|
||||
|
||||
// Calculate quality scale based on file size
|
||||
const scale = calculateScaleFromFileSize(file.size);
|
||||
console.log(`Using scale ${scale} for ${file.name} (${(file.size / 1024 / 1024).toFixed(1)}MB)`);
|
||||
|
||||
// Only read first 2MB for thumbnail generation to save memory
|
||||
const chunkSize = 2 * 1024 * 1024; // 2MB
|
||||
const chunk = file.slice(0, Math.min(chunkSize, file.size));
|
||||
@@ -26,7 +44,7 @@ export async function generateThumbnailForFile(file: File): Promise<string | und
|
||||
}).promise;
|
||||
|
||||
const page = await pdf.getPage(1);
|
||||
const viewport = page.getViewport({ scale: 0.2 }); // Smaller scale for memory efficiency
|
||||
const viewport = page.getViewport({ scale }); // Dynamic scale based on file size
|
||||
const canvas = document.createElement("canvas");
|
||||
canvas.width = viewport.width;
|
||||
canvas.height = viewport.height;
|
||||
@@ -45,7 +63,45 @@ export async function generateThumbnailForFile(file: File): Promise<string | und
|
||||
|
||||
return thumbnail;
|
||||
} catch (error) {
|
||||
console.warn('Failed to generate thumbnail for', file.name, error);
|
||||
if (error instanceof Error) {
|
||||
if (error.name === 'InvalidPDFException') {
|
||||
console.warn(`PDF structure issue for ${file.name} - using fallback thumbnail`);
|
||||
// Return a placeholder or try with full file instead of chunk
|
||||
try {
|
||||
const fullArrayBuffer = await file.arrayBuffer();
|
||||
const pdf = await getDocument({
|
||||
data: fullArrayBuffer,
|
||||
disableAutoFetch: true,
|
||||
disableStream: true,
|
||||
verbosity: 0 // Reduce PDF.js warnings
|
||||
}).promise;
|
||||
|
||||
const page = await pdf.getPage(1);
|
||||
const viewport = page.getViewport({ scale });
|
||||
const canvas = document.createElement("canvas");
|
||||
canvas.width = viewport.width;
|
||||
canvas.height = viewport.height;
|
||||
const context = canvas.getContext("2d");
|
||||
|
||||
if (!context) {
|
||||
throw new Error('Could not get canvas context');
|
||||
}
|
||||
|
||||
await page.render({ canvasContext: context, viewport }).promise;
|
||||
const thumbnail = canvas.toDataURL();
|
||||
|
||||
pdf.destroy();
|
||||
return thumbnail;
|
||||
} catch (fallbackError) {
|
||||
console.warn('Fallback thumbnail generation also failed for', file.name, fallbackError);
|
||||
return undefined;
|
||||
}
|
||||
} else {
|
||||
console.warn('Failed to generate thumbnail for', file.name, error);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
console.warn('Unknown error generating thumbnail for', file.name, error);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user