2024-01-08 00:51:07 +01:00
|
|
|
const Path = require('path')
|
|
|
|
const Logger = require('../../Logger')
|
|
|
|
const StreamZip = require('../../libs/nodeStreamZip')
|
|
|
|
const parseOpfMetadata = require('./parseOpfMetadata')
|
|
|
|
const { xmlToJSON } = require('../index')
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Extract file from epub and return string content
|
|
|
|
*
|
|
|
|
* @param {string} epubPath
|
|
|
|
* @param {string} filepath
|
|
|
|
* @returns {Promise<string>}
|
|
|
|
*/
|
|
|
|
async function extractFileFromEpub(epubPath, filepath) {
|
|
|
|
const zip = new StreamZip.async({ file: epubPath })
|
|
|
|
const data = await zip.entryData(filepath).catch((error) => {
|
|
|
|
Logger.error(`[parseEpubMetadata] Failed to extract ${filepath} from epub at "${epubPath}"`, error)
|
|
|
|
})
|
|
|
|
const filedata = data?.toString('utf8')
|
2024-04-21 22:07:53 +02:00
|
|
|
await zip.close().catch((error) => {
|
|
|
|
Logger.error(`[parseEpubMetadata] Failed to close zip`, error)
|
|
|
|
})
|
|
|
|
|
2024-01-08 00:51:07 +01:00
|
|
|
return filedata
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Extract an XML file from epub and return JSON
|
|
|
|
*
|
|
|
|
* @param {string} epubPath
|
|
|
|
* @param {string} xmlFilepath
|
|
|
|
* @returns {Promise<Object>}
|
|
|
|
*/
|
|
|
|
async function extractXmlToJson(epubPath, xmlFilepath) {
|
|
|
|
const filedata = await extractFileFromEpub(epubPath, xmlFilepath)
|
|
|
|
if (!filedata) return null
|
|
|
|
return xmlToJSON(filedata)
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Extract cover image from epub return true if success
|
|
|
|
*
|
|
|
|
* @param {string} epubPath
|
|
|
|
* @param {string} epubImageFilepath
|
|
|
|
* @param {string} outputCoverPath
|
|
|
|
* @returns {Promise<boolean>}
|
|
|
|
*/
|
|
|
|
async function extractCoverImage(epubPath, epubImageFilepath, outputCoverPath) {
|
|
|
|
const zip = new StreamZip.async({ file: epubPath })
|
|
|
|
|
|
|
|
const success = await zip.extract(epubImageFilepath, outputCoverPath).then(() => true).catch((error) => {
|
|
|
|
Logger.error(`[parseEpubMetadata] Failed to extract image ${epubImageFilepath} from epub at "${epubPath}"`, error)
|
|
|
|
return false
|
|
|
|
})
|
|
|
|
|
|
|
|
await zip.close()
|
|
|
|
|
|
|
|
return success
|
|
|
|
}
|
|
|
|
module.exports.extractCoverImage = extractCoverImage
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parse metadata from epub
|
|
|
|
*
|
2024-01-15 00:51:26 +01:00
|
|
|
* @param {import('../../models/Book').EBookFileObject} ebookFile
|
2024-01-08 00:51:07 +01:00
|
|
|
* @returns {Promise<import('./parseEbookMetadata').EBookFileScanData>}
|
|
|
|
*/
|
2024-01-15 00:51:26 +01:00
|
|
|
async function parse(ebookFile) {
|
|
|
|
const epubPath = ebookFile.metadata.path
|
2024-01-08 00:51:07 +01:00
|
|
|
Logger.debug(`Parsing metadata from epub at "${epubPath}"`)
|
|
|
|
// Entrypoint of the epub that contains the filepath to the package document (opf file)
|
|
|
|
const containerJson = await extractXmlToJson(epubPath, 'META-INF/container.xml')
|
2024-04-21 22:07:53 +02:00
|
|
|
if (!containerJson) {
|
|
|
|
return null
|
|
|
|
}
|
2024-01-08 00:51:07 +01:00
|
|
|
|
|
|
|
// Get package document opf filepath from container.xml
|
|
|
|
const packageDocPath = containerJson.container?.rootfiles?.[0]?.rootfile?.[0]?.$?.['full-path']
|
|
|
|
if (!packageDocPath) {
|
|
|
|
Logger.error(`Failed to get package doc path in Container.xml`, JSON.stringify(containerJson, null, 2))
|
|
|
|
return null
|
|
|
|
}
|
|
|
|
|
|
|
|
// Extract package document to JSON
|
|
|
|
const packageJson = await extractXmlToJson(epubPath, packageDocPath)
|
|
|
|
if (!packageJson) {
|
|
|
|
return null
|
|
|
|
}
|
|
|
|
|
|
|
|
// Parse metadata from package document opf file
|
|
|
|
const opfMetadata = parseOpfMetadata.parseOpfMetadataJson(packageJson)
|
|
|
|
if (!opfMetadata) {
|
|
|
|
Logger.error(`Unable to parse metadata in package doc with json`, JSON.stringify(packageJson, null, 2))
|
|
|
|
return null
|
|
|
|
}
|
|
|
|
|
|
|
|
const payload = {
|
|
|
|
path: epubPath,
|
|
|
|
ebookFormat: 'epub',
|
|
|
|
metadata: opfMetadata
|
|
|
|
}
|
|
|
|
|
|
|
|
// Attempt to find filepath to cover image
|
|
|
|
const manifestFirstImage = packageJson.package?.manifest?.[0]?.item?.find(item => item.$?.['media-type']?.startsWith('image/'))
|
|
|
|
let coverImagePath = manifestFirstImage?.$?.href
|
|
|
|
if (coverImagePath) {
|
|
|
|
const packageDirname = Path.dirname(packageDocPath)
|
|
|
|
payload.ebookCoverPath = Path.posix.join(packageDirname, coverImagePath)
|
|
|
|
} else {
|
|
|
|
Logger.warn(`Cover image not found in manifest for epub at "${epubPath}"`)
|
|
|
|
}
|
|
|
|
|
|
|
|
return payload
|
|
|
|
}
|
|
|
|
module.exports.parse = parse
|