Merge pull request #3435 from mikiher/comic-book-extractors

Move to node-unrar-js for cbr and node-stream-zip for cbz
This commit is contained in:
advplyr 2024-09-18 14:52:32 -05:00 committed by GitHub
commit 8f96d20a23
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 238 additions and 53 deletions

9
package-lock.json generated
View File

@ -16,6 +16,7 @@
"graceful-fs": "^4.2.10",
"htmlparser2": "^8.0.1",
"lru-cache": "^10.0.3",
"node-unrar-js": "^2.0.2",
"nodemailer": "^6.9.13",
"openid-client": "^5.6.1",
"p-throttle": "^4.1.1",
@ -3565,6 +3566,14 @@
"integrity": "sha512-uYr7J37ae/ORWdZeQ1xxMJe3NtdmqMC/JZK+geofDrkLUApKRHPd18/TxtBOJ4A0/+uUIliorNrfYV6s1b02eQ==",
"dev": true
},
"node_modules/node-unrar-js": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/node-unrar-js/-/node-unrar-js-2.0.2.tgz",
"integrity": "sha512-hLNmoJzqaKJnod8yiTVGe9hnlNRHotUi0CreSv/8HtfRi/3JnRC8DvsmKfeGGguRjTEulhZK6zXX5PXoVuDZ2w==",
"engines": {
"node": ">=10.0.0"
}
},
"node_modules/nodemailer": {
"version": "6.9.13",
"resolved": "https://registry.npmjs.org/nodemailer/-/nodemailer-6.9.13.tgz",

View File

@ -43,6 +43,7 @@
"graceful-fs": "^4.2.10",
"htmlparser2": "^8.0.1",
"lru-cache": "^10.0.3",
"node-unrar-js": "^2.0.2",
"nodemailer": "^6.9.13",
"openid-client": "^5.6.1",
"p-throttle": "^4.1.1",

View File

@ -0,0 +1,207 @@
const Path = require('path')
const os = require('os')
const unrar = require('node-unrar-js')
const Logger = require('../Logger')
const fs = require('../libs/fsExtra')
const StreamZip = require('../libs/nodeStreamZip')
const Archive = require('../libs/libarchive/archive')
const { isWritable } = require('./fileUtils')
class AbstractComicBookExtractor {
constructor(comicPath) {
this.comicPath = comicPath
}
async getBuffer() {
if (!(await fs.pathExists(this.comicPath))) {
Logger.error(`[parseComicMetadata] Comic path does not exist "${this.comicPath}"`)
return null
}
try {
return fs.readFile(this.comicPath)
} catch (error) {
Logger.error(`[parseComicMetadata] Failed to read comic at "${this.comicPath}"`, error)
return null
}
}
async open() {
throw new Error('Not implemented')
}
async getFilePaths() {
throw new Error('Not implemented')
}
async extractToFile(filePath, outputFilePath) {
throw new Error('Not implemented')
}
async extractToBuffer(filePath) {
throw new Error('Not implemented')
}
close() {
throw new Error('Not implemented')
}
}
class CbrComicBookExtractor extends AbstractComicBookExtractor {
constructor(comicPath) {
super(comicPath)
this.archive = null
this.tmpDir = null
}
async open() {
this.tmpDir = global.MetadataPath ? Path.join(global.MetadataPath, 'tmp') : os.tmpdir()
await fs.ensureDir(this.tmpDir)
if (!(await isWritable(this.tmpDir))) throw new Error(`[CbrComicBookExtractor] Temp directory "${this.tmpDir}" is not writable`)
this.archive = await unrar.createExtractorFromFile({ filepath: this.comicPath, targetPath: this.tmpDir })
Logger.debug(`[CbrComicBookExtractor] Opened comic book "${this.comicPath}". Using temp directory "${this.tmpDir}" for extraction.`)
}
async getFilePaths() {
if (!this.archive) return null
const list = this.archive.getFileList()
const fileHeaders = [...list.fileHeaders]
const filePaths = fileHeaders.filter((fh) => !fh.flags.directory).map((fh) => fh.name)
Logger.debug(`[CbrComicBookExtractor] Found ${filePaths.length} files in comic book "${this.comicPath}"`)
return filePaths
}
async removeEmptyParentDirs(file) {
let dir = Path.dirname(file)
while (dir !== '.') {
const fullDirPath = Path.join(this.tmpDir, dir)
const files = await fs.readdir(fullDirPath)
if (files.length > 0) break
await fs.remove(fullDirPath)
dir = Path.dirname(dir)
}
}
async extractToBuffer(file) {
if (!this.archive) return null
const extracted = this.archive.extract({ files: [file] })
const files = [...extracted.files]
const filePath = Path.join(this.tmpDir, files[0].fileHeader.name)
const fileData = await fs.readFile(filePath)
await fs.remove(filePath)
await this.removeEmptyParentDirs(files[0].fileHeader.name)
Logger.debug(`[CbrComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to buffer, size: ${fileData.length}`)
return fileData
}
async extractToFile(file, outputFilePath) {
if (!this.archive) return false
const extracted = this.archive.extract({ files: [file] })
const files = [...extracted.files]
const extractedFilePath = Path.join(this.tmpDir, files[0].fileHeader.name)
await fs.move(extractedFilePath, outputFilePath, { overwrite: true })
await this.removeEmptyParentDirs(files[0].fileHeader.name)
Logger.debug(`[CbrComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to "${outputFilePath}"`)
return true
}
close() {
Logger.debug(`[CbrComicBookExtractor] Closed comic book "${this.comicPath}"`)
}
}
class CbzComicBookExtractor extends AbstractComicBookExtractor {
constructor(comicPath) {
super(comicPath)
this.archive = null
}
async open() {
const buffer = await this.getBuffer()
this.archive = await Archive.open(buffer)
Logger.debug(`[CbzComicBookExtractor] Opened comic book "${this.comicPath}"`)
}
async getFilePaths() {
if (!this.archive) return null
const list = await this.archive.getFilesArray()
const fileNames = list.map((fo) => fo.file._path)
Logger.debug(`[CbzComicBookExtractor] Found ${fileNames.length} files in comic book "${this.comicPath}"`)
return fileNames
}
async extractToBuffer(file) {
if (!this.archive) return null
const extracted = await this.archive.extractSingleFile(file)
Logger.debug(`[CbzComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to buffer, size: ${extracted?.fileData.length}`)
return extracted?.fileData
}
async extractToFile(file, outputFilePath) {
const data = await this.extractToBuffer(file)
if (!data) return false
await fs.writeFile(outputFilePath, data)
Logger.debug(`[CbzComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to "${outputFilePath}"`)
return true
}
close() {
this.archive?.close()
Logger.debug(`[CbzComicBookExtractor] Closed comic book "${this.comicPath}"`)
}
}
class CbzStreamZipComicBookExtractor extends AbstractComicBookExtractor {
constructor(comicPath) {
super(comicPath)
this.archive = null
}
async open() {
this.archive = new StreamZip.async({ file: this.comicPath })
Logger.debug(`[CbzStreamZipComicBookExtractor] Opened comic book "${this.comicPath}"`)
}
async getFilePaths() {
if (!this.archive) return null
const entries = await this.archive.entries()
const fileNames = Object.keys(entries).filter((entry) => !entries[entry].isDirectory)
Logger.debug(`[CbzStreamZipComicBookExtractor] Found ${fileNames.length} files in comic book "${this.comicPath}"`)
return fileNames
}
async extractToBuffer(file) {
if (!this.archive) return null
const extracted = await this.archive?.entryData(file)
Logger.debug(`[CbzStreamZipComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to buffer, size: ${extracted.length}`)
return extracted
}
async extractToFile(file, outputFilePath) {
if (!this.archive) return false
try {
await this.archive.extract(file, outputFilePath)
Logger.debug(`[CbzStreamZipComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to "${outputFilePath}"`)
return true
} catch (error) {
Logger.error(`[CbzStreamZipComicBookExtractor] Failed to extract file "${file}" to "${outputFilePath}"`, error)
return false
}
}
close() {
this.archive?.close()
Logger.debug(`[CbzStreamZipComicBookExtractor] Closed comic book "${this.comicPath}"`)
}
}
function createComicBookExtractor(comicPath) {
const ext = Path.extname(comicPath).toLowerCase()
if (ext === '.cbr') {
return new CbrComicBookExtractor(comicPath)
} else if (ext === '.cbz') {
return new CbzStreamZipComicBookExtractor(comicPath)
} else {
throw new Error(`Unsupported comic book format "${ext}"`)
}
}
module.exports = { createComicBookExtractor }

View File

@ -1,28 +1,9 @@
const Path = require('path')
const globals = require('../globals')
const fs = require('../../libs/fsExtra')
const Logger = require('../../Logger')
const Archive = require('../../libs/libarchive/archive')
const { xmlToJSON } = require('../index')
const parseComicInfoMetadata = require('./parseComicInfoMetadata')
/**
*
* @param {string} filepath
* @returns {Promise<Buffer>}
*/
async function getComicFileBuffer(filepath) {
if (!(await fs.pathExists(filepath))) {
Logger.error(`[parseComicMetadata] Comic path does not exist "${filepath}"`)
return null
}
try {
return fs.readFile(filepath)
} catch (error) {
Logger.error(`[parseComicMetadata] Failed to read comic at "${filepath}"`, error)
return null
}
}
const globals = require('../globals')
const { xmlToJSON } = require('../index')
const { createComicBookExtractor } = require('../comicBookExtractors.js')
/**
* Extract cover image from comic return true if success
@ -33,22 +14,11 @@ async function getComicFileBuffer(filepath) {
* @returns {Promise<boolean>}
*/
async function extractCoverImage(comicPath, comicImageFilepath, outputCoverPath) {
const comicFileBuffer = await getComicFileBuffer(comicPath)
if (!comicFileBuffer) return null
let archive = null
try {
archive = await Archive.open(comicFileBuffer)
const fileEntry = await archive.extractSingleFile(comicImageFilepath)
if (!fileEntry?.fileData) {
Logger.error(`[parseComicMetadata] Invalid file entry data for comicPath "${comicPath}"/${comicImageFilepath}`)
return false
}
await fs.writeFile(outputCoverPath, fileEntry.fileData)
return true
archive = createComicBookExtractor(comicPath)
await archive.open()
return await archive.extractToFile(comicImageFilepath, outputCoverPath)
} catch (error) {
Logger.error(`[parseComicMetadata] Failed to extract image "${comicImageFilepath}" from comicPath "${comicPath}" into "${outputCoverPath}"`, error)
return false
@ -67,30 +37,28 @@ module.exports.extractCoverImage = extractCoverImage
*/
async function parse(ebookFile) {
const comicPath = ebookFile.metadata.path
Logger.debug(`Parsing metadata from comic at "${comicPath}"`)
const comicFileBuffer = await getComicFileBuffer(comicPath)
if (!comicFileBuffer) return null
Logger.debug(`[parseComicMetadata] Parsing comic metadata at "${comicPath}"`)
let archive = null
try {
archive = await Archive.open(comicFileBuffer)
archive = createComicBookExtractor(comicPath)
await archive.open()
const fileObjects = await archive.getFilesArray()
const filePaths = await archive.getFilePaths()
fileObjects.sort((a, b) => {
return a.file.name.localeCompare(b.file.name, undefined, {
// Sort the file paths in a natural order to get the first image
filePaths.sort((a, b) => {
return a.localeCompare(b, undefined, {
numeric: true,
sensitivity: 'base'
})
})
let metadata = null
const comicInfo = fileObjects.find((fo) => fo.file.name === 'ComicInfo.xml')
if (comicInfo) {
const comicInfoEntry = await comicInfo.file.extract()
if (comicInfoEntry?.fileData) {
const comicInfoStr = new TextDecoder().decode(comicInfoEntry.fileData)
const comicInfoPath = filePaths.find((filePath) => filePath === 'ComicInfo.xml')
if (comicInfoPath) {
const comicInfoData = await archive.extractToBuffer(comicInfoPath)
if (comicInfoData) {
const comicInfoStr = new TextDecoder().decode(comicInfoData)
const comicInfoJson = await xmlToJSON(comicInfoStr)
if (comicInfoJson) {
metadata = parseComicInfoMetadata.parse(comicInfoJson)
@ -104,9 +72,9 @@ async function parse(ebookFile) {
metadata
}
const firstImage = fileObjects.find((fo) => globals.SupportedImageTypes.includes(Path.extname(fo.file.name).toLowerCase().slice(1)))
if (firstImage?.file?._path) {
payload.ebookCoverPath = firstImage.file._path
const firstImagePath = filePaths.find((filePath) => globals.SupportedImageTypes.includes(Path.extname(filePath).toLowerCase().slice(1)))
if (firstImagePath) {
payload.ebookCoverPath = firstImagePath
} else {
Logger.warn(`[parseComicMetadata] Cover image not found in comic at "${comicPath}"`)
}