2022-05-29 18:46:45 +02:00
|
|
|
const { xmlToJSON } = require('../index')
|
|
|
|
const htmlSanitizer = require('../htmlSanitizer')
|
2021-11-09 03:05:12 +01:00
|
|
|
|
2024-07-28 21:54:17 +02:00
|
|
|
/**
|
|
|
|
* @typedef MetadataCreatorObject
|
|
|
|
* @property {string} value
|
|
|
|
* @property {string} role
|
|
|
|
* @property {string} fileAs
|
|
|
|
*
|
|
|
|
* @example
|
|
|
|
* <dc:creator xmlns:ns0="http://www.idpf.org/2007/opf" ns0:role="aut" ns0:file-as="Steinbeck, John">John Steinbeck</dc:creator>
|
|
|
|
* <dc:creator opf:role="aut" opf:file-as="Orwell, George">George Orwell</dc:creator>
|
|
|
|
*
|
|
|
|
* @param {Object} metadata
|
|
|
|
* @returns {MetadataCreatorObject[]}
|
|
|
|
*/
|
2021-11-09 03:05:12 +01:00
|
|
|
function parseCreators(metadata) {
|
2024-07-28 21:54:17 +02:00
|
|
|
if (!metadata['dc:creator']?.length) return null
|
|
|
|
return metadata['dc:creator'].map((c) => {
|
2021-11-09 03:05:12 +01:00
|
|
|
if (typeof c !== 'object' || !c['$'] || !c['_']) return false
|
2024-07-28 21:54:17 +02:00
|
|
|
const namespace =
|
|
|
|
Object.keys(c['$'])
|
|
|
|
.find((key) => key.startsWith('xmlns:'))
|
|
|
|
?.split(':')[1] || 'opf'
|
2021-11-09 03:05:12 +01:00
|
|
|
return {
|
|
|
|
value: c['_'],
|
2024-07-28 21:54:17 +02:00
|
|
|
role: c['$'][`${namespace}:role`] || null,
|
|
|
|
fileAs: c['$'][`${namespace}:file-as`] || null
|
2021-11-09 03:05:12 +01:00
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2022-08-13 00:30:05 +02:00
|
|
|
function fetchCreators(creators, role) {
|
2023-09-24 23:15:42 +02:00
|
|
|
if (!creators?.length) return null
|
2024-07-12 11:42:42 +02:00
|
|
|
return [...new Set(creators.filter((c) => c.role === role && c.value).map((c) => c.value))]
|
2021-11-09 03:05:12 +01:00
|
|
|
}
|
|
|
|
|
2021-11-10 00:54:28 +01:00
|
|
|
function fetchTagString(metadata, tag) {
|
|
|
|
if (!metadata[tag] || !metadata[tag].length) return null
|
2023-02-11 00:22:23 +01:00
|
|
|
let value = metadata[tag][0]
|
|
|
|
|
|
|
|
/*
|
|
|
|
EXAMPLES:
|
|
|
|
|
|
|
|
"dc:title": [
|
|
|
|
{
|
|
|
|
"_": "The Quest for Character",
|
|
|
|
"$": {
|
|
|
|
"opf:file-as": "Quest for Character What the Story of Socrates and Alcibiades"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
|
|
|
|
OR
|
|
|
|
|
|
|
|
"dc:title": [
|
|
|
|
"The Quest for Character"
|
|
|
|
]
|
|
|
|
*/
|
|
|
|
if (typeof value === 'object') value = value._
|
2023-01-02 17:47:13 +01:00
|
|
|
if (typeof value !== 'string') return null
|
|
|
|
return value
|
2021-11-10 00:54:28 +01:00
|
|
|
}
|
|
|
|
|
2021-11-09 03:05:12 +01:00
|
|
|
function fetchDate(metadata) {
|
2023-01-02 17:47:13 +01:00
|
|
|
const date = fetchTagString(metadata, 'dc:date')
|
2021-11-10 00:54:28 +01:00
|
|
|
if (!date) return null
|
2023-01-02 17:47:13 +01:00
|
|
|
const dateSplit = date.split('-')
|
2021-11-09 03:05:12 +01:00
|
|
|
if (!dateSplit.length || dateSplit[0].length !== 4 || isNaN(dateSplit[0])) return null
|
|
|
|
return dateSplit[0]
|
|
|
|
}
|
|
|
|
|
|
|
|
function fetchPublisher(metadata) {
|
2021-11-10 00:54:28 +01:00
|
|
|
return fetchTagString(metadata, 'dc:publisher')
|
2021-11-09 03:05:12 +01:00
|
|
|
}
|
|
|
|
|
2024-07-28 21:54:17 +02:00
|
|
|
/**
|
|
|
|
* @example
|
|
|
|
* <dc:identifier xmlns:ns4="http://www.idpf.org/2007/opf" ns4:scheme="ISBN">9781440633904</dc:identifier>
|
|
|
|
* <dc:identifier opf:scheme="ISBN">9780141187761</dc:identifier>
|
|
|
|
*
|
|
|
|
* @param {Object} metadata
|
|
|
|
* @param {string} scheme
|
|
|
|
* @returns {string}
|
|
|
|
*/
|
|
|
|
function fetchIdentifier(metadata, scheme) {
|
|
|
|
if (!metadata['dc:identifier']?.length) return null
|
|
|
|
const identifierObj = metadata['dc:identifier'].find((i) => {
|
|
|
|
if (!i['$']) return false
|
|
|
|
const namespace =
|
|
|
|
Object.keys(i['$'])
|
|
|
|
.find((key) => key.startsWith('xmlns:'))
|
|
|
|
?.split(':')[1] || 'opf'
|
|
|
|
return i['$'][`${namespace}:scheme`] === scheme
|
|
|
|
})
|
|
|
|
return identifierObj?.['_'] || null
|
|
|
|
}
|
|
|
|
|
2021-11-09 03:05:12 +01:00
|
|
|
function fetchISBN(metadata) {
|
2024-07-28 21:54:17 +02:00
|
|
|
return fetchIdentifier(metadata, 'ISBN')
|
2021-11-09 03:05:12 +01:00
|
|
|
}
|
|
|
|
|
2023-01-02 17:47:13 +01:00
|
|
|
function fetchASIN(metadata) {
|
2024-07-28 21:54:17 +02:00
|
|
|
return fetchIdentifier(metadata, 'ASIN')
|
2023-01-02 17:47:13 +01:00
|
|
|
}
|
|
|
|
|
2021-11-09 03:05:12 +01:00
|
|
|
function fetchTitle(metadata) {
|
2021-11-10 00:54:28 +01:00
|
|
|
return fetchTagString(metadata, 'dc:title')
|
|
|
|
}
|
|
|
|
|
2023-01-02 17:47:13 +01:00
|
|
|
function fetchSubtitle(metadata) {
|
|
|
|
return fetchTagString(metadata, 'dc:subtitle')
|
|
|
|
}
|
|
|
|
|
2021-11-10 00:54:28 +01:00
|
|
|
function fetchDescription(metadata) {
|
2023-01-02 17:47:13 +01:00
|
|
|
let description = fetchTagString(metadata, 'dc:description')
|
2021-11-10 00:54:28 +01:00
|
|
|
if (!description) return null
|
|
|
|
// check if description is HTML or plain text. only plain text allowed
|
|
|
|
// calibre stores < and > as < and >
|
|
|
|
description = description.replace(/</g, '<').replace(/>/g, '>')
|
2022-05-28 02:41:40 +02:00
|
|
|
return htmlSanitizer.stripAllTags(description)
|
2021-11-10 00:54:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
function fetchGenres(metadata) {
|
|
|
|
if (!metadata['dc:subject'] || !metadata['dc:subject'].length) return []
|
2024-07-12 11:42:42 +02:00
|
|
|
return [...new Set(metadata['dc:subject'].filter((g) => g && typeof g === 'string'))]
|
2021-11-10 00:54:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
function fetchLanguage(metadata) {
|
|
|
|
return fetchTagString(metadata, 'dc:language')
|
2021-11-09 03:05:12 +01:00
|
|
|
}
|
|
|
|
|
2022-05-19 02:25:18 +02:00
|
|
|
function fetchSeries(metadataMeta) {
|
2023-12-22 20:35:38 +01:00
|
|
|
if (!metadataMeta) return []
|
|
|
|
const result = []
|
|
|
|
for (let i = 0; i < metadataMeta.length; i++) {
|
2024-01-09 01:21:15 +01:00
|
|
|
if (metadataMeta[i].$?.name === 'calibre:series' && metadataMeta[i].$.content?.trim()) {
|
2023-12-24 18:41:27 +01:00
|
|
|
const name = metadataMeta[i].$.content.trim()
|
2023-12-22 20:35:38 +01:00
|
|
|
let sequence = null
|
2024-01-09 01:21:15 +01:00
|
|
|
if (metadataMeta[i + 1]?.$?.name === 'calibre:series_index' && metadataMeta[i + 1].$?.content?.trim()) {
|
2023-12-24 18:41:27 +01:00
|
|
|
sequence = metadataMeta[i + 1].$.content.trim()
|
2023-12-22 20:35:38 +01:00
|
|
|
}
|
|
|
|
result.push({ name, sequence })
|
|
|
|
}
|
|
|
|
}
|
2024-01-09 01:21:15 +01:00
|
|
|
|
|
|
|
// If one series was found with no series_index then check if any series_index meta can be found
|
|
|
|
// this is to support when calibre:series_index is not directly underneath calibre:series
|
|
|
|
if (result.length === 1 && !result[0].sequence) {
|
2024-07-12 11:42:42 +02:00
|
|
|
const seriesIndexMeta = metadataMeta.find((m) => m.$?.name === 'calibre:series_index' && m.$.content?.trim())
|
2024-01-09 01:21:15 +01:00
|
|
|
if (seriesIndexMeta) {
|
|
|
|
result[0].sequence = seriesIndexMeta.$.content.trim()
|
|
|
|
}
|
|
|
|
}
|
2024-07-12 11:42:42 +02:00
|
|
|
|
|
|
|
// Remove duplicates
|
|
|
|
const dedupedResult = result.filter((se, idx) => result.findIndex((s) => s.name === se.name) === idx)
|
|
|
|
|
|
|
|
return dedupedResult
|
2021-12-04 22:07:31 +01:00
|
|
|
}
|
|
|
|
|
2022-02-01 03:32:03 +01:00
|
|
|
function fetchNarrators(creators, metadata) {
|
2023-01-02 17:47:13 +01:00
|
|
|
const narrators = fetchCreators(creators, 'nrt')
|
2023-03-05 19:40:21 +01:00
|
|
|
if (narrators?.length) return narrators
|
2022-02-01 17:50:42 +01:00
|
|
|
try {
|
2024-07-12 11:42:42 +02:00
|
|
|
const narratorsJSON = JSON.parse(fetchTagString(metadata.meta, 'calibre:user_metadata:#narrators').replace(/"/g, '"'))
|
|
|
|
return narratorsJSON['#value#']
|
2022-02-01 17:50:42 +01:00
|
|
|
} catch {
|
|
|
|
return null
|
|
|
|
}
|
2022-02-01 03:32:03 +01:00
|
|
|
}
|
|
|
|
|
2022-05-19 02:25:18 +02:00
|
|
|
function fetchTags(metadata) {
|
|
|
|
if (!metadata['dc:tag'] || !metadata['dc:tag'].length) return []
|
2024-07-12 11:42:42 +02:00
|
|
|
return [...new Set(metadata['dc:tag'].filter((tag) => tag && typeof tag === 'string'))]
|
2022-05-19 02:25:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
function stripPrefix(str) {
|
|
|
|
if (!str) return ''
|
|
|
|
return str.split(':').pop()
|
|
|
|
}
|
|
|
|
|
2024-01-08 00:51:07 +01:00
|
|
|
module.exports.parseOpfMetadataJson = (json) => {
|
2022-05-19 02:25:18 +02:00
|
|
|
// Handle <package ...> or with prefix <ns0:package ...>
|
2024-07-12 11:42:42 +02:00
|
|
|
const packageKey = Object.keys(json).find((key) => stripPrefix(key) === 'package')
|
2022-05-19 02:25:18 +02:00
|
|
|
if (!packageKey) return null
|
|
|
|
const prefix = packageKey.split(':').shift()
|
2023-01-02 17:47:13 +01:00
|
|
|
let metadata = prefix ? json[packageKey][`${prefix}:metadata`] || json[packageKey].metadata : json[packageKey].metadata
|
2022-05-19 02:25:18 +02:00
|
|
|
if (!metadata) return null
|
2021-11-10 00:54:28 +01:00
|
|
|
|
2021-11-09 03:05:12 +01:00
|
|
|
if (Array.isArray(metadata)) {
|
|
|
|
if (!metadata.length) return null
|
|
|
|
metadata = metadata[0]
|
|
|
|
}
|
|
|
|
|
2022-05-19 02:25:18 +02:00
|
|
|
const metadataMeta = prefix ? metadata[`${prefix}:meta`] || metadata.meta : metadata.meta
|
|
|
|
|
|
|
|
metadata.meta = {}
|
2023-03-05 19:40:21 +01:00
|
|
|
if (metadataMeta?.length) {
|
2022-05-19 02:25:18 +02:00
|
|
|
metadataMeta.forEach((meta) => {
|
|
|
|
if (meta && meta['$'] && meta['$'].name) {
|
|
|
|
metadata.meta[meta['$'].name] = [meta['$'].content || '']
|
|
|
|
}
|
|
|
|
})
|
2021-12-04 23:15:23 +01:00
|
|
|
}
|
|
|
|
|
2022-08-13 00:30:05 +02:00
|
|
|
const creators = parseCreators(metadata)
|
2024-07-12 11:42:42 +02:00
|
|
|
const authors = (fetchCreators(creators, 'aut') || []).map((au) => au?.trim()).filter((au) => au)
|
|
|
|
const narrators = (fetchNarrators(creators, metadata) || []).map((nrt) => nrt?.trim()).filter((nrt) => nrt)
|
2024-01-08 00:51:07 +01:00
|
|
|
return {
|
2021-11-09 03:05:12 +01:00
|
|
|
title: fetchTitle(metadata),
|
2023-01-02 17:47:13 +01:00
|
|
|
subtitle: fetchSubtitle(metadata),
|
2022-08-13 00:30:05 +02:00
|
|
|
authors,
|
|
|
|
narrators,
|
2022-03-14 01:34:31 +01:00
|
|
|
publishedYear: fetchDate(metadata),
|
2021-11-09 03:05:12 +01:00
|
|
|
publisher: fetchPublisher(metadata),
|
2021-11-10 00:54:28 +01:00
|
|
|
isbn: fetchISBN(metadata),
|
2023-01-02 17:47:13 +01:00
|
|
|
asin: fetchASIN(metadata),
|
2021-11-10 00:54:28 +01:00
|
|
|
description: fetchDescription(metadata),
|
|
|
|
genres: fetchGenres(metadata),
|
2021-12-04 22:07:31 +01:00
|
|
|
language: fetchLanguage(metadata),
|
2023-12-22 20:35:38 +01:00
|
|
|
series: fetchSeries(metadataMeta),
|
2022-05-19 02:25:18 +02:00
|
|
|
tags: fetchTags(metadata)
|
2021-11-09 03:05:12 +01:00
|
|
|
}
|
2024-01-08 00:51:07 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
module.exports.parseOpfMetadataXML = async (xml) => {
|
|
|
|
const json = await xmlToJSON(xml)
|
|
|
|
if (!json) return null
|
|
|
|
return this.parseOpfMetadataJson(json)
|
2024-07-12 11:42:42 +02:00
|
|
|
}
|