mirror of
				https://github.com/advplyr/audiobookshelf.git
				synced 2025-10-27 11:18:14 +01:00 
			
		
		
		
	Merge pull request #3435 from mikiher/comic-book-extractors
Move to node-unrar-js for cbr and node-stream-zip for cbz
This commit is contained in:
		
						commit
						8f96d20a23
					
				
							
								
								
									
										9
									
								
								package-lock.json
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										9
									
								
								package-lock.json
									
									
									
										generated
									
									
									
								
							| @ -16,6 +16,7 @@ | ||||
|         "graceful-fs": "^4.2.10", | ||||
|         "htmlparser2": "^8.0.1", | ||||
|         "lru-cache": "^10.0.3", | ||||
|         "node-unrar-js": "^2.0.2", | ||||
|         "nodemailer": "^6.9.13", | ||||
|         "openid-client": "^5.6.1", | ||||
|         "p-throttle": "^4.1.1", | ||||
| @ -3565,6 +3566,14 @@ | ||||
|       "integrity": "sha512-uYr7J37ae/ORWdZeQ1xxMJe3NtdmqMC/JZK+geofDrkLUApKRHPd18/TxtBOJ4A0/+uUIliorNrfYV6s1b02eQ==", | ||||
|       "dev": true | ||||
|     }, | ||||
|     "node_modules/node-unrar-js": { | ||||
|       "version": "2.0.2", | ||||
|       "resolved": "https://registry.npmjs.org/node-unrar-js/-/node-unrar-js-2.0.2.tgz", | ||||
|       "integrity": "sha512-hLNmoJzqaKJnod8yiTVGe9hnlNRHotUi0CreSv/8HtfRi/3JnRC8DvsmKfeGGguRjTEulhZK6zXX5PXoVuDZ2w==", | ||||
|       "engines": { | ||||
|         "node": ">=10.0.0" | ||||
|       } | ||||
|     }, | ||||
|     "node_modules/nodemailer": { | ||||
|       "version": "6.9.13", | ||||
|       "resolved": "https://registry.npmjs.org/nodemailer/-/nodemailer-6.9.13.tgz", | ||||
|  | ||||
| @ -43,6 +43,7 @@ | ||||
|     "graceful-fs": "^4.2.10", | ||||
|     "htmlparser2": "^8.0.1", | ||||
|     "lru-cache": "^10.0.3", | ||||
|     "node-unrar-js": "^2.0.2", | ||||
|     "nodemailer": "^6.9.13", | ||||
|     "openid-client": "^5.6.1", | ||||
|     "p-throttle": "^4.1.1", | ||||
|  | ||||
							
								
								
									
										207
									
								
								server/utils/comicBookExtractors.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										207
									
								
								server/utils/comicBookExtractors.js
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,207 @@ | ||||
| const Path = require('path') | ||||
| const os = require('os') | ||||
| const unrar = require('node-unrar-js') | ||||
| const Logger = require('../Logger') | ||||
| const fs = require('../libs/fsExtra') | ||||
| const StreamZip = require('../libs/nodeStreamZip') | ||||
| const Archive = require('../libs/libarchive/archive') | ||||
| const { isWritable } = require('./fileUtils') | ||||
| 
 | ||||
| class AbstractComicBookExtractor { | ||||
|   constructor(comicPath) { | ||||
|     this.comicPath = comicPath | ||||
|   } | ||||
| 
 | ||||
|   async getBuffer() { | ||||
|     if (!(await fs.pathExists(this.comicPath))) { | ||||
|       Logger.error(`[parseComicMetadata] Comic path does not exist "${this.comicPath}"`) | ||||
|       return null | ||||
|     } | ||||
|     try { | ||||
|       return fs.readFile(this.comicPath) | ||||
|     } catch (error) { | ||||
|       Logger.error(`[parseComicMetadata] Failed to read comic at "${this.comicPath}"`, error) | ||||
|       return null | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   async open() { | ||||
|     throw new Error('Not implemented') | ||||
|   } | ||||
| 
 | ||||
|   async getFilePaths() { | ||||
|     throw new Error('Not implemented') | ||||
|   } | ||||
| 
 | ||||
|   async extractToFile(filePath, outputFilePath) { | ||||
|     throw new Error('Not implemented') | ||||
|   } | ||||
| 
 | ||||
|   async extractToBuffer(filePath) { | ||||
|     throw new Error('Not implemented') | ||||
|   } | ||||
| 
 | ||||
|   close() { | ||||
|     throw new Error('Not implemented') | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| class CbrComicBookExtractor extends AbstractComicBookExtractor { | ||||
|   constructor(comicPath) { | ||||
|     super(comicPath) | ||||
|     this.archive = null | ||||
|     this.tmpDir = null | ||||
|   } | ||||
| 
 | ||||
|   async open() { | ||||
|     this.tmpDir = global.MetadataPath ? Path.join(global.MetadataPath, 'tmp') : os.tmpdir() | ||||
|     await fs.ensureDir(this.tmpDir) | ||||
|     if (!(await isWritable(this.tmpDir))) throw new Error(`[CbrComicBookExtractor] Temp directory "${this.tmpDir}" is not writable`) | ||||
|     this.archive = await unrar.createExtractorFromFile({ filepath: this.comicPath, targetPath: this.tmpDir }) | ||||
|     Logger.debug(`[CbrComicBookExtractor] Opened comic book "${this.comicPath}". Using temp directory "${this.tmpDir}" for extraction.`) | ||||
|   } | ||||
| 
 | ||||
|   async getFilePaths() { | ||||
|     if (!this.archive) return null | ||||
|     const list = this.archive.getFileList() | ||||
|     const fileHeaders = [...list.fileHeaders] | ||||
|     const filePaths = fileHeaders.filter((fh) => !fh.flags.directory).map((fh) => fh.name) | ||||
|     Logger.debug(`[CbrComicBookExtractor] Found ${filePaths.length} files in comic book "${this.comicPath}"`) | ||||
|     return filePaths | ||||
|   } | ||||
| 
 | ||||
|   async removeEmptyParentDirs(file) { | ||||
|     let dir = Path.dirname(file) | ||||
|     while (dir !== '.') { | ||||
|       const fullDirPath = Path.join(this.tmpDir, dir) | ||||
|       const files = await fs.readdir(fullDirPath) | ||||
|       if (files.length > 0) break | ||||
|       await fs.remove(fullDirPath) | ||||
|       dir = Path.dirname(dir) | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   async extractToBuffer(file) { | ||||
|     if (!this.archive) return null | ||||
|     const extracted = this.archive.extract({ files: [file] }) | ||||
|     const files = [...extracted.files] | ||||
|     const filePath = Path.join(this.tmpDir, files[0].fileHeader.name) | ||||
|     const fileData = await fs.readFile(filePath) | ||||
|     await fs.remove(filePath) | ||||
|     await this.removeEmptyParentDirs(files[0].fileHeader.name) | ||||
|     Logger.debug(`[CbrComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to buffer, size: ${fileData.length}`) | ||||
|     return fileData | ||||
|   } | ||||
| 
 | ||||
|   async extractToFile(file, outputFilePath) { | ||||
|     if (!this.archive) return false | ||||
|     const extracted = this.archive.extract({ files: [file] }) | ||||
|     const files = [...extracted.files] | ||||
|     const extractedFilePath = Path.join(this.tmpDir, files[0].fileHeader.name) | ||||
|     await fs.move(extractedFilePath, outputFilePath, { overwrite: true }) | ||||
|     await this.removeEmptyParentDirs(files[0].fileHeader.name) | ||||
|     Logger.debug(`[CbrComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to "${outputFilePath}"`) | ||||
|     return true | ||||
|   } | ||||
| 
 | ||||
|   close() { | ||||
|     Logger.debug(`[CbrComicBookExtractor] Closed comic book "${this.comicPath}"`) | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| class CbzComicBookExtractor extends AbstractComicBookExtractor { | ||||
|   constructor(comicPath) { | ||||
|     super(comicPath) | ||||
|     this.archive = null | ||||
|   } | ||||
| 
 | ||||
|   async open() { | ||||
|     const buffer = await this.getBuffer() | ||||
|     this.archive = await Archive.open(buffer) | ||||
|     Logger.debug(`[CbzComicBookExtractor] Opened comic book "${this.comicPath}"`) | ||||
|   } | ||||
| 
 | ||||
|   async getFilePaths() { | ||||
|     if (!this.archive) return null | ||||
|     const list = await this.archive.getFilesArray() | ||||
|     const fileNames = list.map((fo) => fo.file._path) | ||||
|     Logger.debug(`[CbzComicBookExtractor] Found ${fileNames.length} files in comic book "${this.comicPath}"`) | ||||
|     return fileNames | ||||
|   } | ||||
| 
 | ||||
|   async extractToBuffer(file) { | ||||
|     if (!this.archive) return null | ||||
|     const extracted = await this.archive.extractSingleFile(file) | ||||
|     Logger.debug(`[CbzComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to buffer, size: ${extracted?.fileData.length}`) | ||||
|     return extracted?.fileData | ||||
|   } | ||||
| 
 | ||||
|   async extractToFile(file, outputFilePath) { | ||||
|     const data = await this.extractToBuffer(file) | ||||
|     if (!data) return false | ||||
|     await fs.writeFile(outputFilePath, data) | ||||
|     Logger.debug(`[CbzComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to "${outputFilePath}"`) | ||||
|     return true | ||||
|   } | ||||
| 
 | ||||
|   close() { | ||||
|     this.archive?.close() | ||||
|     Logger.debug(`[CbzComicBookExtractor] Closed comic book "${this.comicPath}"`) | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| class CbzStreamZipComicBookExtractor extends AbstractComicBookExtractor { | ||||
|   constructor(comicPath) { | ||||
|     super(comicPath) | ||||
|     this.archive = null | ||||
|   } | ||||
| 
 | ||||
|   async open() { | ||||
|     this.archive = new StreamZip.async({ file: this.comicPath }) | ||||
|     Logger.debug(`[CbzStreamZipComicBookExtractor] Opened comic book "${this.comicPath}"`) | ||||
|   } | ||||
| 
 | ||||
|   async getFilePaths() { | ||||
|     if (!this.archive) return null | ||||
|     const entries = await this.archive.entries() | ||||
|     const fileNames = Object.keys(entries).filter((entry) => !entries[entry].isDirectory) | ||||
|     Logger.debug(`[CbzStreamZipComicBookExtractor] Found ${fileNames.length} files in comic book "${this.comicPath}"`) | ||||
|     return fileNames | ||||
|   } | ||||
| 
 | ||||
|   async extractToBuffer(file) { | ||||
|     if (!this.archive) return null | ||||
|     const extracted = await this.archive?.entryData(file) | ||||
|     Logger.debug(`[CbzStreamZipComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to buffer, size: ${extracted.length}`) | ||||
|     return extracted | ||||
|   } | ||||
| 
 | ||||
|   async extractToFile(file, outputFilePath) { | ||||
|     if (!this.archive) return false | ||||
|     try { | ||||
|       await this.archive.extract(file, outputFilePath) | ||||
|       Logger.debug(`[CbzStreamZipComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to "${outputFilePath}"`) | ||||
|       return true | ||||
|     } catch (error) { | ||||
|       Logger.error(`[CbzStreamZipComicBookExtractor] Failed to extract file "${file}" to "${outputFilePath}"`, error) | ||||
|       return false | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   close() { | ||||
|     this.archive?.close() | ||||
|     Logger.debug(`[CbzStreamZipComicBookExtractor] Closed comic book "${this.comicPath}"`) | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| function createComicBookExtractor(comicPath) { | ||||
|   const ext = Path.extname(comicPath).toLowerCase() | ||||
|   if (ext === '.cbr') { | ||||
|     return new CbrComicBookExtractor(comicPath) | ||||
|   } else if (ext === '.cbz') { | ||||
|     return new CbzStreamZipComicBookExtractor(comicPath) | ||||
|   } else { | ||||
|     throw new Error(`Unsupported comic book format "${ext}"`) | ||||
|   } | ||||
| } | ||||
| module.exports = { createComicBookExtractor } | ||||
| @ -1,28 +1,9 @@ | ||||
| const Path = require('path') | ||||
| const globals = require('../globals') | ||||
| const fs = require('../../libs/fsExtra') | ||||
| const Logger = require('../../Logger') | ||||
| const Archive = require('../../libs/libarchive/archive') | ||||
| const { xmlToJSON } = require('../index') | ||||
| const parseComicInfoMetadata = require('./parseComicInfoMetadata') | ||||
| 
 | ||||
| /** | ||||
|  * | ||||
|  * @param {string} filepath | ||||
|  * @returns {Promise<Buffer>} | ||||
|  */ | ||||
| async function getComicFileBuffer(filepath) { | ||||
|   if (!(await fs.pathExists(filepath))) { | ||||
|     Logger.error(`[parseComicMetadata] Comic path does not exist "${filepath}"`) | ||||
|     return null | ||||
|   } | ||||
|   try { | ||||
|     return fs.readFile(filepath) | ||||
|   } catch (error) { | ||||
|     Logger.error(`[parseComicMetadata] Failed to read comic at "${filepath}"`, error) | ||||
|     return null | ||||
|   } | ||||
| } | ||||
| const globals = require('../globals') | ||||
| const { xmlToJSON } = require('../index') | ||||
| const { createComicBookExtractor } = require('../comicBookExtractors.js') | ||||
| 
 | ||||
| /** | ||||
|  * Extract cover image from comic return true if success | ||||
| @ -33,22 +14,11 @@ async function getComicFileBuffer(filepath) { | ||||
|  * @returns {Promise<boolean>} | ||||
|  */ | ||||
| async function extractCoverImage(comicPath, comicImageFilepath, outputCoverPath) { | ||||
|   const comicFileBuffer = await getComicFileBuffer(comicPath) | ||||
|   if (!comicFileBuffer) return null | ||||
| 
 | ||||
|   let archive = null | ||||
|   try { | ||||
|     archive = await Archive.open(comicFileBuffer) | ||||
|     const fileEntry = await archive.extractSingleFile(comicImageFilepath) | ||||
| 
 | ||||
|     if (!fileEntry?.fileData) { | ||||
|       Logger.error(`[parseComicMetadata] Invalid file entry data for comicPath "${comicPath}"/${comicImageFilepath}`) | ||||
|       return false | ||||
|     } | ||||
| 
 | ||||
|     await fs.writeFile(outputCoverPath, fileEntry.fileData) | ||||
| 
 | ||||
|     return true | ||||
|     archive = createComicBookExtractor(comicPath) | ||||
|     await archive.open() | ||||
|     return await archive.extractToFile(comicImageFilepath, outputCoverPath) | ||||
|   } catch (error) { | ||||
|     Logger.error(`[parseComicMetadata] Failed to extract image "${comicImageFilepath}" from comicPath "${comicPath}" into "${outputCoverPath}"`, error) | ||||
|     return false | ||||
| @ -67,30 +37,28 @@ module.exports.extractCoverImage = extractCoverImage | ||||
|  */ | ||||
| async function parse(ebookFile) { | ||||
|   const comicPath = ebookFile.metadata.path | ||||
|   Logger.debug(`Parsing metadata from comic at "${comicPath}"`) | ||||
| 
 | ||||
|   const comicFileBuffer = await getComicFileBuffer(comicPath) | ||||
|   if (!comicFileBuffer) return null | ||||
| 
 | ||||
|   Logger.debug(`[parseComicMetadata] Parsing comic metadata at "${comicPath}"`) | ||||
|   let archive = null | ||||
|   try { | ||||
|     archive = await Archive.open(comicFileBuffer) | ||||
|     archive = createComicBookExtractor(comicPath) | ||||
|     await archive.open() | ||||
| 
 | ||||
|     const fileObjects = await archive.getFilesArray() | ||||
|     const filePaths = await archive.getFilePaths() | ||||
| 
 | ||||
|     fileObjects.sort((a, b) => { | ||||
|       return a.file.name.localeCompare(b.file.name, undefined, { | ||||
|     // Sort the file paths in a natural order to get the first image
 | ||||
|     filePaths.sort((a, b) => { | ||||
|       return a.localeCompare(b, undefined, { | ||||
|         numeric: true, | ||||
|         sensitivity: 'base' | ||||
|       }) | ||||
|     }) | ||||
| 
 | ||||
|     let metadata = null | ||||
|     const comicInfo = fileObjects.find((fo) => fo.file.name === 'ComicInfo.xml') | ||||
|     if (comicInfo) { | ||||
|       const comicInfoEntry = await comicInfo.file.extract() | ||||
|       if (comicInfoEntry?.fileData) { | ||||
|         const comicInfoStr = new TextDecoder().decode(comicInfoEntry.fileData) | ||||
|     const comicInfoPath = filePaths.find((filePath) => filePath === 'ComicInfo.xml') | ||||
|     if (comicInfoPath) { | ||||
|       const comicInfoData = await archive.extractToBuffer(comicInfoPath) | ||||
|       if (comicInfoData) { | ||||
|         const comicInfoStr = new TextDecoder().decode(comicInfoData) | ||||
|         const comicInfoJson = await xmlToJSON(comicInfoStr) | ||||
|         if (comicInfoJson) { | ||||
|           metadata = parseComicInfoMetadata.parse(comicInfoJson) | ||||
| @ -104,9 +72,9 @@ async function parse(ebookFile) { | ||||
|       metadata | ||||
|     } | ||||
| 
 | ||||
|     const firstImage = fileObjects.find((fo) => globals.SupportedImageTypes.includes(Path.extname(fo.file.name).toLowerCase().slice(1))) | ||||
|     if (firstImage?.file?._path) { | ||||
|       payload.ebookCoverPath = firstImage.file._path | ||||
|     const firstImagePath = filePaths.find((filePath) => globals.SupportedImageTypes.includes(Path.extname(filePath).toLowerCase().slice(1))) | ||||
|     if (firstImagePath) { | ||||
|       payload.ebookCoverPath = firstImagePath | ||||
|     } else { | ||||
|       Logger.warn(`[parseComicMetadata] Cover image not found in comic at "${comicPath}"`) | ||||
|     } | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user