mirror of
				https://github.com/advplyr/audiobookshelf.git
				synced 2025-10-27 11:18:14 +01:00 
			
		
		
		
	Merge pull request #3435 from mikiher/comic-book-extractors
Move to node-unrar-js for cbr and node-stream-zip for cbz
This commit is contained in:
		
						commit
						8f96d20a23
					
				
							
								
								
									
										9
									
								
								package-lock.json
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										9
									
								
								package-lock.json
									
									
									
										generated
									
									
									
								
							@ -16,6 +16,7 @@
 | 
			
		||||
        "graceful-fs": "^4.2.10",
 | 
			
		||||
        "htmlparser2": "^8.0.1",
 | 
			
		||||
        "lru-cache": "^10.0.3",
 | 
			
		||||
        "node-unrar-js": "^2.0.2",
 | 
			
		||||
        "nodemailer": "^6.9.13",
 | 
			
		||||
        "openid-client": "^5.6.1",
 | 
			
		||||
        "p-throttle": "^4.1.1",
 | 
			
		||||
@ -3565,6 +3566,14 @@
 | 
			
		||||
      "integrity": "sha512-uYr7J37ae/ORWdZeQ1xxMJe3NtdmqMC/JZK+geofDrkLUApKRHPd18/TxtBOJ4A0/+uUIliorNrfYV6s1b02eQ==",
 | 
			
		||||
      "dev": true
 | 
			
		||||
    },
 | 
			
		||||
    "node_modules/node-unrar-js": {
 | 
			
		||||
      "version": "2.0.2",
 | 
			
		||||
      "resolved": "https://registry.npmjs.org/node-unrar-js/-/node-unrar-js-2.0.2.tgz",
 | 
			
		||||
      "integrity": "sha512-hLNmoJzqaKJnod8yiTVGe9hnlNRHotUi0CreSv/8HtfRi/3JnRC8DvsmKfeGGguRjTEulhZK6zXX5PXoVuDZ2w==",
 | 
			
		||||
      "engines": {
 | 
			
		||||
        "node": ">=10.0.0"
 | 
			
		||||
      }
 | 
			
		||||
    },
 | 
			
		||||
    "node_modules/nodemailer": {
 | 
			
		||||
      "version": "6.9.13",
 | 
			
		||||
      "resolved": "https://registry.npmjs.org/nodemailer/-/nodemailer-6.9.13.tgz",
 | 
			
		||||
 | 
			
		||||
@ -43,6 +43,7 @@
 | 
			
		||||
    "graceful-fs": "^4.2.10",
 | 
			
		||||
    "htmlparser2": "^8.0.1",
 | 
			
		||||
    "lru-cache": "^10.0.3",
 | 
			
		||||
    "node-unrar-js": "^2.0.2",
 | 
			
		||||
    "nodemailer": "^6.9.13",
 | 
			
		||||
    "openid-client": "^5.6.1",
 | 
			
		||||
    "p-throttle": "^4.1.1",
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										207
									
								
								server/utils/comicBookExtractors.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										207
									
								
								server/utils/comicBookExtractors.js
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,207 @@
 | 
			
		||||
const Path = require('path')
 | 
			
		||||
const os = require('os')
 | 
			
		||||
const unrar = require('node-unrar-js')
 | 
			
		||||
const Logger = require('../Logger')
 | 
			
		||||
const fs = require('../libs/fsExtra')
 | 
			
		||||
const StreamZip = require('../libs/nodeStreamZip')
 | 
			
		||||
const Archive = require('../libs/libarchive/archive')
 | 
			
		||||
const { isWritable } = require('./fileUtils')
 | 
			
		||||
 | 
			
		||||
class AbstractComicBookExtractor {
 | 
			
		||||
  constructor(comicPath) {
 | 
			
		||||
    this.comicPath = comicPath
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async getBuffer() {
 | 
			
		||||
    if (!(await fs.pathExists(this.comicPath))) {
 | 
			
		||||
      Logger.error(`[parseComicMetadata] Comic path does not exist "${this.comicPath}"`)
 | 
			
		||||
      return null
 | 
			
		||||
    }
 | 
			
		||||
    try {
 | 
			
		||||
      return fs.readFile(this.comicPath)
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      Logger.error(`[parseComicMetadata] Failed to read comic at "${this.comicPath}"`, error)
 | 
			
		||||
      return null
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async open() {
 | 
			
		||||
    throw new Error('Not implemented')
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async getFilePaths() {
 | 
			
		||||
    throw new Error('Not implemented')
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async extractToFile(filePath, outputFilePath) {
 | 
			
		||||
    throw new Error('Not implemented')
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async extractToBuffer(filePath) {
 | 
			
		||||
    throw new Error('Not implemented')
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  close() {
 | 
			
		||||
    throw new Error('Not implemented')
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
class CbrComicBookExtractor extends AbstractComicBookExtractor {
 | 
			
		||||
  constructor(comicPath) {
 | 
			
		||||
    super(comicPath)
 | 
			
		||||
    this.archive = null
 | 
			
		||||
    this.tmpDir = null
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async open() {
 | 
			
		||||
    this.tmpDir = global.MetadataPath ? Path.join(global.MetadataPath, 'tmp') : os.tmpdir()
 | 
			
		||||
    await fs.ensureDir(this.tmpDir)
 | 
			
		||||
    if (!(await isWritable(this.tmpDir))) throw new Error(`[CbrComicBookExtractor] Temp directory "${this.tmpDir}" is not writable`)
 | 
			
		||||
    this.archive = await unrar.createExtractorFromFile({ filepath: this.comicPath, targetPath: this.tmpDir })
 | 
			
		||||
    Logger.debug(`[CbrComicBookExtractor] Opened comic book "${this.comicPath}". Using temp directory "${this.tmpDir}" for extraction.`)
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async getFilePaths() {
 | 
			
		||||
    if (!this.archive) return null
 | 
			
		||||
    const list = this.archive.getFileList()
 | 
			
		||||
    const fileHeaders = [...list.fileHeaders]
 | 
			
		||||
    const filePaths = fileHeaders.filter((fh) => !fh.flags.directory).map((fh) => fh.name)
 | 
			
		||||
    Logger.debug(`[CbrComicBookExtractor] Found ${filePaths.length} files in comic book "${this.comicPath}"`)
 | 
			
		||||
    return filePaths
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async removeEmptyParentDirs(file) {
 | 
			
		||||
    let dir = Path.dirname(file)
 | 
			
		||||
    while (dir !== '.') {
 | 
			
		||||
      const fullDirPath = Path.join(this.tmpDir, dir)
 | 
			
		||||
      const files = await fs.readdir(fullDirPath)
 | 
			
		||||
      if (files.length > 0) break
 | 
			
		||||
      await fs.remove(fullDirPath)
 | 
			
		||||
      dir = Path.dirname(dir)
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async extractToBuffer(file) {
 | 
			
		||||
    if (!this.archive) return null
 | 
			
		||||
    const extracted = this.archive.extract({ files: [file] })
 | 
			
		||||
    const files = [...extracted.files]
 | 
			
		||||
    const filePath = Path.join(this.tmpDir, files[0].fileHeader.name)
 | 
			
		||||
    const fileData = await fs.readFile(filePath)
 | 
			
		||||
    await fs.remove(filePath)
 | 
			
		||||
    await this.removeEmptyParentDirs(files[0].fileHeader.name)
 | 
			
		||||
    Logger.debug(`[CbrComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to buffer, size: ${fileData.length}`)
 | 
			
		||||
    return fileData
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async extractToFile(file, outputFilePath) {
 | 
			
		||||
    if (!this.archive) return false
 | 
			
		||||
    const extracted = this.archive.extract({ files: [file] })
 | 
			
		||||
    const files = [...extracted.files]
 | 
			
		||||
    const extractedFilePath = Path.join(this.tmpDir, files[0].fileHeader.name)
 | 
			
		||||
    await fs.move(extractedFilePath, outputFilePath, { overwrite: true })
 | 
			
		||||
    await this.removeEmptyParentDirs(files[0].fileHeader.name)
 | 
			
		||||
    Logger.debug(`[CbrComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to "${outputFilePath}"`)
 | 
			
		||||
    return true
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  close() {
 | 
			
		||||
    Logger.debug(`[CbrComicBookExtractor] Closed comic book "${this.comicPath}"`)
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
class CbzComicBookExtractor extends AbstractComicBookExtractor {
 | 
			
		||||
  constructor(comicPath) {
 | 
			
		||||
    super(comicPath)
 | 
			
		||||
    this.archive = null
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async open() {
 | 
			
		||||
    const buffer = await this.getBuffer()
 | 
			
		||||
    this.archive = await Archive.open(buffer)
 | 
			
		||||
    Logger.debug(`[CbzComicBookExtractor] Opened comic book "${this.comicPath}"`)
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async getFilePaths() {
 | 
			
		||||
    if (!this.archive) return null
 | 
			
		||||
    const list = await this.archive.getFilesArray()
 | 
			
		||||
    const fileNames = list.map((fo) => fo.file._path)
 | 
			
		||||
    Logger.debug(`[CbzComicBookExtractor] Found ${fileNames.length} files in comic book "${this.comicPath}"`)
 | 
			
		||||
    return fileNames
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async extractToBuffer(file) {
 | 
			
		||||
    if (!this.archive) return null
 | 
			
		||||
    const extracted = await this.archive.extractSingleFile(file)
 | 
			
		||||
    Logger.debug(`[CbzComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to buffer, size: ${extracted?.fileData.length}`)
 | 
			
		||||
    return extracted?.fileData
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async extractToFile(file, outputFilePath) {
 | 
			
		||||
    const data = await this.extractToBuffer(file)
 | 
			
		||||
    if (!data) return false
 | 
			
		||||
    await fs.writeFile(outputFilePath, data)
 | 
			
		||||
    Logger.debug(`[CbzComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to "${outputFilePath}"`)
 | 
			
		||||
    return true
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  close() {
 | 
			
		||||
    this.archive?.close()
 | 
			
		||||
    Logger.debug(`[CbzComicBookExtractor] Closed comic book "${this.comicPath}"`)
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
class CbzStreamZipComicBookExtractor extends AbstractComicBookExtractor {
 | 
			
		||||
  constructor(comicPath) {
 | 
			
		||||
    super(comicPath)
 | 
			
		||||
    this.archive = null
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async open() {
 | 
			
		||||
    this.archive = new StreamZip.async({ file: this.comicPath })
 | 
			
		||||
    Logger.debug(`[CbzStreamZipComicBookExtractor] Opened comic book "${this.comicPath}"`)
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async getFilePaths() {
 | 
			
		||||
    if (!this.archive) return null
 | 
			
		||||
    const entries = await this.archive.entries()
 | 
			
		||||
    const fileNames = Object.keys(entries).filter((entry) => !entries[entry].isDirectory)
 | 
			
		||||
    Logger.debug(`[CbzStreamZipComicBookExtractor] Found ${fileNames.length} files in comic book "${this.comicPath}"`)
 | 
			
		||||
    return fileNames
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async extractToBuffer(file) {
 | 
			
		||||
    if (!this.archive) return null
 | 
			
		||||
    const extracted = await this.archive?.entryData(file)
 | 
			
		||||
    Logger.debug(`[CbzStreamZipComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to buffer, size: ${extracted.length}`)
 | 
			
		||||
    return extracted
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async extractToFile(file, outputFilePath) {
 | 
			
		||||
    if (!this.archive) return false
 | 
			
		||||
    try {
 | 
			
		||||
      await this.archive.extract(file, outputFilePath)
 | 
			
		||||
      Logger.debug(`[CbzStreamZipComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to "${outputFilePath}"`)
 | 
			
		||||
      return true
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      Logger.error(`[CbzStreamZipComicBookExtractor] Failed to extract file "${file}" to "${outputFilePath}"`, error)
 | 
			
		||||
      return false
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  close() {
 | 
			
		||||
    this.archive?.close()
 | 
			
		||||
    Logger.debug(`[CbzStreamZipComicBookExtractor] Closed comic book "${this.comicPath}"`)
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function createComicBookExtractor(comicPath) {
 | 
			
		||||
  const ext = Path.extname(comicPath).toLowerCase()
 | 
			
		||||
  if (ext === '.cbr') {
 | 
			
		||||
    return new CbrComicBookExtractor(comicPath)
 | 
			
		||||
  } else if (ext === '.cbz') {
 | 
			
		||||
    return new CbzStreamZipComicBookExtractor(comicPath)
 | 
			
		||||
  } else {
 | 
			
		||||
    throw new Error(`Unsupported comic book format "${ext}"`)
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
module.exports = { createComicBookExtractor }
 | 
			
		||||
@ -1,28 +1,9 @@
 | 
			
		||||
const Path = require('path')
 | 
			
		||||
const globals = require('../globals')
 | 
			
		||||
const fs = require('../../libs/fsExtra')
 | 
			
		||||
const Logger = require('../../Logger')
 | 
			
		||||
const Archive = require('../../libs/libarchive/archive')
 | 
			
		||||
const { xmlToJSON } = require('../index')
 | 
			
		||||
const parseComicInfoMetadata = require('./parseComicInfoMetadata')
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 *
 | 
			
		||||
 * @param {string} filepath
 | 
			
		||||
 * @returns {Promise<Buffer>}
 | 
			
		||||
 */
 | 
			
		||||
async function getComicFileBuffer(filepath) {
 | 
			
		||||
  if (!(await fs.pathExists(filepath))) {
 | 
			
		||||
    Logger.error(`[parseComicMetadata] Comic path does not exist "${filepath}"`)
 | 
			
		||||
    return null
 | 
			
		||||
  }
 | 
			
		||||
  try {
 | 
			
		||||
    return fs.readFile(filepath)
 | 
			
		||||
  } catch (error) {
 | 
			
		||||
    Logger.error(`[parseComicMetadata] Failed to read comic at "${filepath}"`, error)
 | 
			
		||||
    return null
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
const globals = require('../globals')
 | 
			
		||||
const { xmlToJSON } = require('../index')
 | 
			
		||||
const { createComicBookExtractor } = require('../comicBookExtractors.js')
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Extract cover image from comic return true if success
 | 
			
		||||
@ -33,22 +14,11 @@ async function getComicFileBuffer(filepath) {
 | 
			
		||||
 * @returns {Promise<boolean>}
 | 
			
		||||
 */
 | 
			
		||||
async function extractCoverImage(comicPath, comicImageFilepath, outputCoverPath) {
 | 
			
		||||
  const comicFileBuffer = await getComicFileBuffer(comicPath)
 | 
			
		||||
  if (!comicFileBuffer) return null
 | 
			
		||||
 | 
			
		||||
  let archive = null
 | 
			
		||||
  try {
 | 
			
		||||
    archive = await Archive.open(comicFileBuffer)
 | 
			
		||||
    const fileEntry = await archive.extractSingleFile(comicImageFilepath)
 | 
			
		||||
 | 
			
		||||
    if (!fileEntry?.fileData) {
 | 
			
		||||
      Logger.error(`[parseComicMetadata] Invalid file entry data for comicPath "${comicPath}"/${comicImageFilepath}`)
 | 
			
		||||
      return false
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    await fs.writeFile(outputCoverPath, fileEntry.fileData)
 | 
			
		||||
 | 
			
		||||
    return true
 | 
			
		||||
    archive = createComicBookExtractor(comicPath)
 | 
			
		||||
    await archive.open()
 | 
			
		||||
    return await archive.extractToFile(comicImageFilepath, outputCoverPath)
 | 
			
		||||
  } catch (error) {
 | 
			
		||||
    Logger.error(`[parseComicMetadata] Failed to extract image "${comicImageFilepath}" from comicPath "${comicPath}" into "${outputCoverPath}"`, error)
 | 
			
		||||
    return false
 | 
			
		||||
@ -67,30 +37,28 @@ module.exports.extractCoverImage = extractCoverImage
 | 
			
		||||
 */
 | 
			
		||||
async function parse(ebookFile) {
 | 
			
		||||
  const comicPath = ebookFile.metadata.path
 | 
			
		||||
  Logger.debug(`Parsing metadata from comic at "${comicPath}"`)
 | 
			
		||||
 | 
			
		||||
  const comicFileBuffer = await getComicFileBuffer(comicPath)
 | 
			
		||||
  if (!comicFileBuffer) return null
 | 
			
		||||
 | 
			
		||||
  Logger.debug(`[parseComicMetadata] Parsing comic metadata at "${comicPath}"`)
 | 
			
		||||
  let archive = null
 | 
			
		||||
  try {
 | 
			
		||||
    archive = await Archive.open(comicFileBuffer)
 | 
			
		||||
    archive = createComicBookExtractor(comicPath)
 | 
			
		||||
    await archive.open()
 | 
			
		||||
 | 
			
		||||
    const fileObjects = await archive.getFilesArray()
 | 
			
		||||
    const filePaths = await archive.getFilePaths()
 | 
			
		||||
 | 
			
		||||
    fileObjects.sort((a, b) => {
 | 
			
		||||
      return a.file.name.localeCompare(b.file.name, undefined, {
 | 
			
		||||
    // Sort the file paths in a natural order to get the first image
 | 
			
		||||
    filePaths.sort((a, b) => {
 | 
			
		||||
      return a.localeCompare(b, undefined, {
 | 
			
		||||
        numeric: true,
 | 
			
		||||
        sensitivity: 'base'
 | 
			
		||||
      })
 | 
			
		||||
    })
 | 
			
		||||
 | 
			
		||||
    let metadata = null
 | 
			
		||||
    const comicInfo = fileObjects.find((fo) => fo.file.name === 'ComicInfo.xml')
 | 
			
		||||
    if (comicInfo) {
 | 
			
		||||
      const comicInfoEntry = await comicInfo.file.extract()
 | 
			
		||||
      if (comicInfoEntry?.fileData) {
 | 
			
		||||
        const comicInfoStr = new TextDecoder().decode(comicInfoEntry.fileData)
 | 
			
		||||
    const comicInfoPath = filePaths.find((filePath) => filePath === 'ComicInfo.xml')
 | 
			
		||||
    if (comicInfoPath) {
 | 
			
		||||
      const comicInfoData = await archive.extractToBuffer(comicInfoPath)
 | 
			
		||||
      if (comicInfoData) {
 | 
			
		||||
        const comicInfoStr = new TextDecoder().decode(comicInfoData)
 | 
			
		||||
        const comicInfoJson = await xmlToJSON(comicInfoStr)
 | 
			
		||||
        if (comicInfoJson) {
 | 
			
		||||
          metadata = parseComicInfoMetadata.parse(comicInfoJson)
 | 
			
		||||
@ -104,9 +72,9 @@ async function parse(ebookFile) {
 | 
			
		||||
      metadata
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const firstImage = fileObjects.find((fo) => globals.SupportedImageTypes.includes(Path.extname(fo.file.name).toLowerCase().slice(1)))
 | 
			
		||||
    if (firstImage?.file?._path) {
 | 
			
		||||
      payload.ebookCoverPath = firstImage.file._path
 | 
			
		||||
    const firstImagePath = filePaths.find((filePath) => globals.SupportedImageTypes.includes(Path.extname(filePath).toLowerCase().slice(1)))
 | 
			
		||||
    if (firstImagePath) {
 | 
			
		||||
      payload.ebookCoverPath = firstImagePath
 | 
			
		||||
    } else {
 | 
			
		||||
      Logger.warn(`[parseComicMetadata] Cover image not found in comic at "${comicPath}"`)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user