diff --git a/client/components/modals/libraries/LibraryScannerSettings.vue b/client/components/modals/libraries/LibraryScannerSettings.vue index 8ec73dd0..43938f9c 100644 --- a/client/components/modals/libraries/LibraryScannerSettings.vue +++ b/client/components/modals/libraries/LibraryScannerSettings.vue @@ -63,7 +63,7 @@ export default { }, audioMetatags: { id: 'audioMetatags', - name: 'Audio file meta tags', + name: 'Audio file meta tags OR ebook metadata', include: true }, nfoFile: { diff --git a/server/managers/CoverManager.js b/server/managers/CoverManager.js index 3cf97f33..9b4aa32d 100644 --- a/server/managers/CoverManager.js +++ b/server/managers/CoverManager.js @@ -7,6 +7,8 @@ const imageType = require('../libs/imageType') const globals = require('../utils/globals') const { downloadImageFile, filePathToPOSIX, checkPathIsFile } = require('../utils/fileUtils') const { extractCoverArt } = require('../utils/ffmpegHelpers') +const parseEbookMetadata = require('../utils/parsers/parseEbookMetadata') + const CacheManager = require('../managers/CacheManager') class CoverManager { @@ -234,6 +236,7 @@ class CoverManager { /** * Extract cover art from audio file and save for library item + * * @param {import('../models/Book').AudioFileObject[]} audioFiles * @param {string} libraryItemId * @param {string} [libraryItemPath] null for isFile library items @@ -268,6 +271,44 @@ class CoverManager { return null } + /** + * Extract cover art from ebook and save for library item + * + * @param {import('../utils/parsers/parseEbookMetadata').EBookFileScanData} ebookFileScanData + * @param {string} libraryItemId + * @param {string} [libraryItemPath] null for isFile library items + * @returns {Promise} returns cover path + */ + async saveEbookCoverArt(ebookFileScanData, libraryItemId, libraryItemPath) { + if (!ebookFileScanData?.ebookCoverPath) return null + + let coverDirPath = null + if (global.ServerSettings.storeCoverWithItem && libraryItemPath) { + coverDirPath = libraryItemPath + } else { + coverDirPath = Path.posix.join(global.MetadataPath, 'items', libraryItemId) + } + await fs.ensureDir(coverDirPath) + + let extname = Path.extname(ebookFileScanData.ebookCoverPath) || '.jpg' + if (extname === '.jpeg') extname = '.jpg' + const coverFilename = `cover${extname}` + const coverFilePath = Path.join(coverDirPath, coverFilename) + + // TODO: Overwrite if exists? + const coverAlreadyExists = await fs.pathExists(coverFilePath) + if (coverAlreadyExists) { + Logger.warn(`[CoverManager] Extract embedded cover art but cover already exists for "${coverFilePath}" - overwriting`) + } + + const success = await parseEbookMetadata.extractCoverImage(ebookFileScanData, coverFilePath) + if (success) { + await CacheManager.purgeCoverCache(libraryItemId) + return coverFilePath + } + return null + } + /** * * @param {string} url diff --git a/server/scanner/AbsMetadataFileScanner.js b/server/scanner/AbsMetadataFileScanner.js index 1f9d2823..e554dfb4 100644 --- a/server/scanner/AbsMetadataFileScanner.js +++ b/server/scanner/AbsMetadataFileScanner.js @@ -36,6 +36,8 @@ class AbsMetadataFileScanner { for (const key in abMetadata) { // TODO: When to override with null or empty arrays? if (abMetadata[key] === undefined || abMetadata[key] === null) continue + if (key === 'authors' && !abMetadata.authors?.length) continue + if (key === 'genres' && !abMetadata.genres?.length) continue if (key === 'tags' && !abMetadata.tags?.length) continue if (key === 'chapters' && !abMetadata.chapters?.length) continue diff --git a/server/scanner/BookScanner.js b/server/scanner/BookScanner.js index 6c93dddf..b40e9323 100644 --- a/server/scanner/BookScanner.js +++ b/server/scanner/BookScanner.js @@ -3,8 +3,8 @@ const Path = require('path') const sequelize = require('sequelize') const { LogLevel } = require('../utils/constants') const { getTitleIgnorePrefix, areEquivalent } = require('../utils/index') -const abmetadataGenerator = require('../utils/generators/abmetadataGenerator') const parseNameString = require('../utils/parsers/parseNameString') +const parseEbookMetadata = require('../utils/parsers/parseEbookMetadata') const globals = require('../utils/globals') const AudioFileScanner = require('./AudioFileScanner') const Database = require('../Database') @@ -170,7 +170,9 @@ class BookScanner { hasMediaChanges = true } - const bookMetadata = await this.getBookMetadataFromScanData(media.audioFiles, libraryItemData, libraryScan, librarySettings, existingLibraryItem.id) + const ebookFileScanData = await parseEbookMetadata.parse(media.ebookFile) + + const bookMetadata = await this.getBookMetadataFromScanData(media.audioFiles, ebookFileScanData, libraryItemData, libraryScan, librarySettings, existingLibraryItem.id) let authorsUpdated = false const bookAuthorsRemoved = [] let seriesUpdated = false @@ -317,24 +319,34 @@ class BookScanner { }) } - // If no cover then extract cover from audio file if available OR search for cover if enabled in server settings + // If no cover then extract cover from audio file OR from ebook + const libraryItemDir = existingLibraryItem.isFile ? null : existingLibraryItem.path if (!media.coverPath) { - const libraryItemDir = existingLibraryItem.isFile ? null : existingLibraryItem.path - const extractedCoverPath = await CoverManager.saveEmbeddedCoverArt(media.audioFiles, existingLibraryItem.id, libraryItemDir) + let extractedCoverPath = await CoverManager.saveEmbeddedCoverArt(media.audioFiles, existingLibraryItem.id, libraryItemDir) if (extractedCoverPath) { libraryScan.addLog(LogLevel.DEBUG, `Updating book "${bookMetadata.title}" extracted embedded cover art from audio file to path "${extractedCoverPath}"`) media.coverPath = extractedCoverPath hasMediaChanges = true - } else if (Database.serverSettings.scannerFindCovers) { - const authorName = media.authors.map(au => au.name).filter(au => au).join(', ') - const coverPath = await this.searchForCover(existingLibraryItem.id, libraryItemDir, media.title, authorName, libraryScan) - if (coverPath) { - media.coverPath = coverPath + } else if (ebookFileScanData?.ebookCoverPath) { + extractedCoverPath = await CoverManager.saveEbookCoverArt(ebookFileScanData, existingLibraryItem.id, libraryItemDir) + if (extractedCoverPath) { + libraryScan.addLog(LogLevel.DEBUG, `Updating book "${bookMetadata.title}" extracted embedded cover art from ebook file to path "${extractedCoverPath}"`) + media.coverPath = extractedCoverPath hasMediaChanges = true } } } + // If no cover then search for cover if enabled in server settings + if (!media.coverPath && Database.serverSettings.scannerFindCovers) { + const authorName = media.authors.map(au => au.name).filter(au => au).join(', ') + const coverPath = await this.searchForCover(existingLibraryItem.id, libraryItemDir, media.title, authorName, libraryScan) + if (coverPath) { + media.coverPath = coverPath + hasMediaChanges = true + } + } + existingLibraryItem.media = media let libraryItemUpdated = false @@ -408,12 +420,14 @@ class BookScanner { return null } + let ebookFileScanData = null if (ebookLibraryFile) { ebookLibraryFile = ebookLibraryFile.toJSON() ebookLibraryFile.ebookFormat = ebookLibraryFile.metadata.ext.slice(1).toLowerCase() + ebookFileScanData = await parseEbookMetadata.parse(ebookLibraryFile) } - const bookMetadata = await this.getBookMetadataFromScanData(scannedAudioFiles, libraryItemData, libraryScan, librarySettings) + const bookMetadata = await this.getBookMetadataFromScanData(scannedAudioFiles, ebookFileScanData, libraryItemData, libraryScan, librarySettings) bookMetadata.explicit = !!bookMetadata.explicit // Ensure boolean bookMetadata.abridged = !!bookMetadata.abridged // Ensure boolean @@ -481,19 +495,28 @@ class BookScanner { } } - // If cover was not found in folder then check embedded covers in audio files OR search for cover + // If cover was not found in folder then check embedded covers in audio files OR ebook file + const libraryItemDir = libraryItemObj.isFile ? null : libraryItemObj.path if (!bookObject.coverPath) { - const libraryItemDir = libraryItemObj.isFile ? null : libraryItemObj.path - // Extract and save embedded cover art - const extractedCoverPath = await CoverManager.saveEmbeddedCoverArt(scannedAudioFiles, libraryItemObj.id, libraryItemDir) + let extractedCoverPath = await CoverManager.saveEmbeddedCoverArt(scannedAudioFiles, libraryItemObj.id, libraryItemDir) if (extractedCoverPath) { + libraryScan.addLog(LogLevel.DEBUG, `Extracted embedded cover from audio file at "${extractedCoverPath}" for book "${bookObject.title}"`) bookObject.coverPath = extractedCoverPath - } else if (Database.serverSettings.scannerFindCovers) { - const authorName = bookMetadata.authors.join(', ') - bookObject.coverPath = await this.searchForCover(libraryItemObj.id, libraryItemDir, bookObject.title, authorName, libraryScan) + } else if (ebookFileScanData?.ebookCoverPath) { + extractedCoverPath = await CoverManager.saveEbookCoverArt(ebookFileScanData, libraryItemObj.id, libraryItemDir) + if (extractedCoverPath) { + libraryScan.addLog(LogLevel.DEBUG, `Extracted embedded cover from ebook file at "${extractedCoverPath}" for book "${bookObject.title}"`) + bookObject.coverPath = extractedCoverPath + } } } + // If cover not found then search for cover if enabled in settings + if (!bookObject.coverPath && Database.serverSettings.scannerFindCovers) { + const authorName = bookMetadata.authors.join(', ') + bookObject.coverPath = await this.searchForCover(libraryItemObj.id, libraryItemDir, bookObject.title, authorName, libraryScan) + } + libraryItemObj.book = bookObject const libraryItem = await Database.libraryItemModel.create(libraryItemObj, { include: { @@ -570,13 +593,14 @@ class BookScanner { /** * * @param {import('../models/Book').AudioFileObject[]} audioFiles + * @param {import('../utils/parsers/parseEbookMetadata').EBookFileScanData} ebookFileScanData * @param {import('./LibraryItemScanData')} libraryItemData * @param {LibraryScan} libraryScan * @param {import('../models/Library').LibrarySettingsObject} librarySettings * @param {string} [existingLibraryItemId] * @returns {Promise} */ - async getBookMetadataFromScanData(audioFiles, libraryItemData, libraryScan, librarySettings, existingLibraryItemId = null) { + async getBookMetadataFromScanData(audioFiles, ebookFileScanData, libraryItemData, libraryScan, librarySettings, existingLibraryItemId = null) { // First set book metadata from folder/file names const bookMetadata = { title: libraryItemData.mediaMetadata.title, // required @@ -599,7 +623,7 @@ class BookScanner { coverPath: undefined } - const bookMetadataSourceHandler = new BookScanner.BookMetadataSourceHandler(bookMetadata, audioFiles, libraryItemData, libraryScan, existingLibraryItemId) + const bookMetadataSourceHandler = new BookScanner.BookMetadataSourceHandler(bookMetadata, audioFiles, ebookFileScanData, libraryItemData, libraryScan, existingLibraryItemId) const metadataPrecedence = librarySettings.metadataPrecedence || ['folderStructure', 'audioMetatags', 'nfoFile', 'txtFiles', 'opfFile', 'absMetadata'] libraryScan.addLog(LogLevel.DEBUG, `"${bookMetadata.title}" Getting metadata with precedence [${metadataPrecedence.join(', ')}]`) for (const metadataSource of metadataPrecedence) { @@ -627,13 +651,15 @@ class BookScanner { * * @param {Object} bookMetadata * @param {import('../models/Book').AudioFileObject[]} audioFiles + * @param {import('../utils/parsers/parseEbookMetadata').EBookFileScanData} ebookFileScanData * @param {import('./LibraryItemScanData')} libraryItemData * @param {LibraryScan} libraryScan * @param {string} existingLibraryItemId */ - constructor(bookMetadata, audioFiles, libraryItemData, libraryScan, existingLibraryItemId) { + constructor(bookMetadata, audioFiles, ebookFileScanData, libraryItemData, libraryScan, existingLibraryItemId) { this.bookMetadata = bookMetadata this.audioFiles = audioFiles + this.ebookFileScanData = ebookFileScanData this.libraryItemData = libraryItemData this.libraryScan = libraryScan this.existingLibraryItemId = existingLibraryItemId @@ -647,13 +673,42 @@ class BookScanner { } /** - * Metadata from audio file meta tags + * Metadata from audio file meta tags OR metadata from ebook file */ audioMetatags() { - if (!this.audioFiles.length) return - // Modifies bookMetadata with metadata mapped from audio file meta tags - const bookTitle = this.bookMetadata.title || this.libraryItemData.mediaMetadata.title - AudioFileScanner.setBookMetadataFromAudioMetaTags(bookTitle, this.audioFiles, this.bookMetadata, this.libraryScan) + if (this.audioFiles.length) { + // Modifies bookMetadata with metadata mapped from audio file meta tags + const bookTitle = this.bookMetadata.title || this.libraryItemData.mediaMetadata.title + AudioFileScanner.setBookMetadataFromAudioMetaTags(bookTitle, this.audioFiles, this.bookMetadata, this.libraryScan) + } else if (this.ebookFileScanData) { + const ebookMetdataObject = this.ebookFileScanData.metadata + for (const key in ebookMetdataObject) { + if (key === 'tags') { + if (ebookMetdataObject.tags.length) { + this.bookMetadata.tags = ebookMetdataObject.tags + } + } else if (key === 'genres') { + if (ebookMetdataObject.genres.length) { + this.bookMetadata.genres = ebookMetdataObject.genres + } + } else if (key === 'authors') { + if (ebookMetdataObject.authors?.length) { + this.bookMetadata.authors = ebookMetdataObject.authors + } + } else if (key === 'narrators') { + if (ebookMetdataObject.narrators?.length) { + this.bookMetadata.narrators = ebookMetdataObject.narrators + } + } else if (key === 'series') { + if (ebookMetdataObject.series?.length) { + this.bookMetadata.series = ebookMetdataObject.series + } + } else if (ebookMetdataObject[key] && key !== 'sequence') { + this.bookMetadata[key] = ebookMetdataObject[key] + } + } + } + return null } /** diff --git a/server/scanner/PodcastScanner.js b/server/scanner/PodcastScanner.js index b56c4db6..07dcbb11 100644 --- a/server/scanner/PodcastScanner.js +++ b/server/scanner/PodcastScanner.js @@ -2,7 +2,6 @@ const uuidv4 = require("uuid").v4 const Path = require('path') const { LogLevel } = require('../utils/constants') const { getTitleIgnorePrefix } = require('../utils/index') -const abmetadataGenerator = require('../utils/generators/abmetadataGenerator') const AudioFileScanner = require('./AudioFileScanner') const Database = require('../Database') const { filePathToPOSIX, getFileTimestampsWithIno } = require('../utils/fileUtils') diff --git a/server/utils/parsers/parseEbookMetadata.js b/server/utils/parsers/parseEbookMetadata.js new file mode 100644 index 00000000..6e97c1da --- /dev/null +++ b/server/utils/parsers/parseEbookMetadata.js @@ -0,0 +1,42 @@ +const parseEpubMetadata = require('./parseEpubMetadata') + +/** + * @typedef EBookFileScanData + * @property {string} path + * @property {string} ebookFormat + * @property {string} ebookCoverPath internal image path + * @property {import('../../scanner/BookScanner').BookMetadataObject} metadata + */ + +/** + * Parse metadata from ebook file + * + * @param {import('../../models/Book').EBookFileObject} ebookFile + * @returns {Promise} + */ +async function parse(ebookFile) { + if (!ebookFile) return null + + if (ebookFile.ebookFormat === 'epub') { + return parseEpubMetadata.parse(ebookFile.metadata.path) + } + return null +} +module.exports.parse = parse + +/** + * Extract cover from ebook file + * + * @param {EBookFileScanData} ebookFileScanData + * @param {string} outputCoverPath + * @returns {Promise} + */ +async function extractCoverImage(ebookFileScanData, outputCoverPath) { + if (!ebookFileScanData?.ebookCoverPath) return false + + if (ebookFileScanData.ebookFormat === 'epub') { + return parseEpubMetadata.extractCoverImage(ebookFileScanData.path, ebookFileScanData.ebookCoverPath, outputCoverPath) + } + return false +} +module.exports.extractCoverImage = extractCoverImage \ No newline at end of file diff --git a/server/utils/parsers/parseEpubMetadata.js b/server/utils/parsers/parseEpubMetadata.js new file mode 100644 index 00000000..7238b0bf --- /dev/null +++ b/server/utils/parsers/parseEpubMetadata.js @@ -0,0 +1,109 @@ +const Path = require('path') +const Logger = require('../../Logger') +const StreamZip = require('../../libs/nodeStreamZip') +const parseOpfMetadata = require('./parseOpfMetadata') +const { xmlToJSON } = require('../index') + + +/** + * Extract file from epub and return string content + * + * @param {string} epubPath + * @param {string} filepath + * @returns {Promise} + */ +async function extractFileFromEpub(epubPath, filepath) { + const zip = new StreamZip.async({ file: epubPath }) + const data = await zip.entryData(filepath).catch((error) => { + Logger.error(`[parseEpubMetadata] Failed to extract ${filepath} from epub at "${epubPath}"`, error) + }) + const filedata = data?.toString('utf8') + await zip.close() + return filedata +} + +/** + * Extract an XML file from epub and return JSON + * + * @param {string} epubPath + * @param {string} xmlFilepath + * @returns {Promise} + */ +async function extractXmlToJson(epubPath, xmlFilepath) { + const filedata = await extractFileFromEpub(epubPath, xmlFilepath) + if (!filedata) return null + return xmlToJSON(filedata) +} + +/** + * Extract cover image from epub return true if success + * + * @param {string} epubPath + * @param {string} epubImageFilepath + * @param {string} outputCoverPath + * @returns {Promise} + */ +async function extractCoverImage(epubPath, epubImageFilepath, outputCoverPath) { + const zip = new StreamZip.async({ file: epubPath }) + + const success = await zip.extract(epubImageFilepath, outputCoverPath).then(() => true).catch((error) => { + Logger.error(`[parseEpubMetadata] Failed to extract image ${epubImageFilepath} from epub at "${epubPath}"`, error) + return false + }) + + await zip.close() + + return success +} +module.exports.extractCoverImage = extractCoverImage + +/** + * Parse metadata from epub + * + * @param {string} epubPath + * @returns {Promise} + */ +async function parse(epubPath) { + Logger.debug(`Parsing metadata from epub at "${epubPath}"`) + // Entrypoint of the epub that contains the filepath to the package document (opf file) + const containerJson = await extractXmlToJson(epubPath, 'META-INF/container.xml') + + // Get package document opf filepath from container.xml + const packageDocPath = containerJson.container?.rootfiles?.[0]?.rootfile?.[0]?.$?.['full-path'] + if (!packageDocPath) { + Logger.error(`Failed to get package doc path in Container.xml`, JSON.stringify(containerJson, null, 2)) + return null + } + + // Extract package document to JSON + const packageJson = await extractXmlToJson(epubPath, packageDocPath) + if (!packageJson) { + return null + } + + // Parse metadata from package document opf file + const opfMetadata = parseOpfMetadata.parseOpfMetadataJson(packageJson) + if (!opfMetadata) { + Logger.error(`Unable to parse metadata in package doc with json`, JSON.stringify(packageJson, null, 2)) + return null + } + + const payload = { + path: epubPath, + ebookFormat: 'epub', + metadata: opfMetadata + } + + // Attempt to find filepath to cover image + const manifestFirstImage = packageJson.package?.manifest?.[0]?.item?.find(item => item.$?.['media-type']?.startsWith('image/')) + let coverImagePath = manifestFirstImage?.$?.href + if (coverImagePath) { + const packageDirname = Path.dirname(packageDocPath) + payload.ebookCoverPath = Path.posix.join(packageDirname, coverImagePath) + } else { + Logger.warn(`Cover image not found in manifest for epub at "${epubPath}"`) + } + + return payload +} +module.exports.parse = parse \ No newline at end of file diff --git a/server/utils/parsers/parseOpfMetadata.js b/server/utils/parsers/parseOpfMetadata.js index b51ceea5..3087497a 100644 --- a/server/utils/parsers/parseOpfMetadata.js +++ b/server/utils/parsers/parseOpfMetadata.js @@ -136,11 +136,7 @@ function stripPrefix(str) { return str.split(':').pop() } -module.exports.parseOpfMetadataXML = async (xml) => { - const json = await xmlToJSON(xml) - - if (!json) return null - +module.exports.parseOpfMetadataJson = (json) => { // Handle or with prefix const packageKey = Object.keys(json).find(key => stripPrefix(key) === 'package') if (!packageKey) return null @@ -167,7 +163,7 @@ module.exports.parseOpfMetadataXML = async (xml) => { const creators = parseCreators(metadata) const authors = (fetchCreators(creators, 'aut') || []).map(au => au?.trim()).filter(au => au) const narrators = (fetchNarrators(creators, metadata) || []).map(nrt => nrt?.trim()).filter(nrt => nrt) - const data = { + return { title: fetchTitle(metadata), subtitle: fetchSubtitle(metadata), authors, @@ -182,5 +178,10 @@ module.exports.parseOpfMetadataXML = async (xml) => { series: fetchSeries(metadataMeta), tags: fetchTags(metadata) } - return data +} + +module.exports.parseOpfMetadataXML = async (xml) => { + const json = await xmlToJSON(xml) + if (!json) return null + return this.parseOpfMetadataJson(json) } \ No newline at end of file