2021-11-09 03:05:12 +01:00
|
|
|
const { xmlToJSON } = require('./index')
|
2021-12-04 22:07:43 +01:00
|
|
|
const { stripHtml } = require("string-strip-html")
|
2021-11-09 03:05:12 +01:00
|
|
|
|
|
|
|
function parseCreators(metadata) {
|
|
|
|
if (!metadata['dc:creator']) return null
|
|
|
|
var creators = metadata['dc:creator']
|
|
|
|
if (!creators.length) return null
|
|
|
|
return creators.map(c => {
|
|
|
|
if (typeof c !== 'object' || !c['$'] || !c['_']) return false
|
|
|
|
return {
|
|
|
|
value: c['_'],
|
|
|
|
role: c['$']['opf:role'] || null,
|
|
|
|
fileAs: c['$']['opf:file-as'] || null
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
function fetchCreator(creators, role) {
|
|
|
|
if (!creators || !creators.length) return null
|
|
|
|
var creator = creators.find(c => c.role === role)
|
|
|
|
return creator ? creator.value : null
|
|
|
|
}
|
|
|
|
|
2021-11-10 00:54:28 +01:00
|
|
|
function fetchTagString(metadata, tag) {
|
|
|
|
if (!metadata[tag] || !metadata[tag].length) return null
|
|
|
|
var tag = metadata[tag][0]
|
|
|
|
if (typeof tag !== 'string') return null
|
|
|
|
return tag
|
|
|
|
}
|
|
|
|
|
2021-11-09 03:05:12 +01:00
|
|
|
function fetchDate(metadata) {
|
2021-11-10 00:54:28 +01:00
|
|
|
var date = fetchTagString(metadata, 'dc:date')
|
|
|
|
if (!date) return null
|
|
|
|
var dateSplit = date.split('-')
|
2021-11-09 03:05:12 +01:00
|
|
|
if (!dateSplit.length || dateSplit[0].length !== 4 || isNaN(dateSplit[0])) return null
|
|
|
|
return dateSplit[0]
|
|
|
|
}
|
|
|
|
|
|
|
|
function fetchPublisher(metadata) {
|
2021-11-10 00:54:28 +01:00
|
|
|
return fetchTagString(metadata, 'dc:publisher')
|
2021-11-09 03:05:12 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
function fetchISBN(metadata) {
|
|
|
|
if (!metadata['dc:identifier'] || !metadata['dc:identifier'].length) return null
|
|
|
|
var identifiers = metadata['dc:identifier']
|
|
|
|
var isbnObj = identifiers.find(i => i['$'] && i['$']['opf:scheme'] === 'ISBN')
|
|
|
|
return isbnObj ? isbnObj['_'] || null : null
|
|
|
|
}
|
|
|
|
|
|
|
|
function fetchTitle(metadata) {
|
2021-11-10 00:54:28 +01:00
|
|
|
return fetchTagString(metadata, 'dc:title')
|
|
|
|
}
|
|
|
|
|
|
|
|
function fetchDescription(metadata) {
|
|
|
|
var description = fetchTagString(metadata, 'dc:description')
|
|
|
|
if (!description) return null
|
|
|
|
// check if description is HTML or plain text. only plain text allowed
|
|
|
|
// calibre stores < and > as < and >
|
|
|
|
description = description.replace(/</g, '<').replace(/>/g, '>')
|
2021-12-04 22:07:43 +01:00
|
|
|
if (description.match(/<!DOCTYPE html>|<\/?\s*[a-z-][^>]*\s*>|(\&(?:[\w\d]+|#\d+|#x[a-f\d]+);)/)) return stripHtml(description).result
|
2021-11-10 00:54:28 +01:00
|
|
|
return description
|
|
|
|
}
|
|
|
|
|
|
|
|
function fetchGenres(metadata) {
|
|
|
|
if (!metadata['dc:subject'] || !metadata['dc:subject'].length) return []
|
|
|
|
return metadata['dc:subject'].map(g => typeof g === 'string' ? g : null).filter(g => !!g)
|
|
|
|
}
|
|
|
|
|
|
|
|
function fetchLanguage(metadata) {
|
|
|
|
return fetchTagString(metadata, 'dc:language')
|
2021-11-09 03:05:12 +01:00
|
|
|
}
|
|
|
|
|
2021-12-04 22:07:31 +01:00
|
|
|
function fetchSeries(metadata) {
|
|
|
|
if(typeof metadata.meta == "undefined") return null
|
|
|
|
return fetchTagString(metadata.meta, "calibre:series")
|
|
|
|
}
|
|
|
|
|
|
|
|
function fetchVolumeNumber(metadata) {
|
|
|
|
if(typeof metadata.meta == "undefined") return null
|
|
|
|
return fetchTagString(metadata.meta, "calibre:series_index")
|
|
|
|
}
|
|
|
|
|
2022-02-01 03:32:03 +01:00
|
|
|
function fetchNarrators(creators, metadata) {
|
|
|
|
var roleNrt = fetchCreator(creators, 'nrt')
|
|
|
|
if(typeof metadata.meta == "undefined" || roleNrt != null) return roleNrt
|
2022-02-01 17:50:42 +01:00
|
|
|
try {
|
|
|
|
var narratorsJSON = JSON.parse(fetchTagString(metadata.meta, "calibre:user_metadata:#narrators").replace(/"/g,'"'))
|
|
|
|
return narratorsJSON["#value#"].join(", ")
|
|
|
|
} catch {
|
|
|
|
return null
|
|
|
|
}
|
2022-02-01 03:32:03 +01:00
|
|
|
}
|
|
|
|
|
2021-11-09 03:05:12 +01:00
|
|
|
module.exports.parseOpfMetadataXML = async (xml) => {
|
|
|
|
var json = await xmlToJSON(xml)
|
|
|
|
if (!json || !json.package || !json.package.metadata) return null
|
|
|
|
var metadata = json.package.metadata
|
2021-11-10 00:54:28 +01:00
|
|
|
|
2021-11-09 03:05:12 +01:00
|
|
|
if (Array.isArray(metadata)) {
|
|
|
|
if (!metadata.length) return null
|
|
|
|
metadata = metadata[0]
|
|
|
|
}
|
|
|
|
|
2021-12-04 23:15:23 +01:00
|
|
|
if (typeof metadata.meta != "undefined") {
|
|
|
|
metadata.meta = {}
|
|
|
|
for(var match of xml.matchAll(/<meta name="(?<name>.+)" content="(?<content>.+)"\/>/g)) {
|
|
|
|
metadata.meta[match.groups['name']] = [match.groups['content']]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-11-09 03:05:12 +01:00
|
|
|
var creators = parseCreators(metadata)
|
|
|
|
var data = {
|
|
|
|
title: fetchTitle(metadata),
|
|
|
|
author: fetchCreator(creators, 'aut'),
|
2022-02-01 03:32:03 +01:00
|
|
|
narrator: fetchNarrators(creators, metadata),
|
2021-11-09 03:05:12 +01:00
|
|
|
publishYear: fetchDate(metadata),
|
|
|
|
publisher: fetchPublisher(metadata),
|
2021-11-10 00:54:28 +01:00
|
|
|
isbn: fetchISBN(metadata),
|
|
|
|
description: fetchDescription(metadata),
|
|
|
|
genres: fetchGenres(metadata),
|
2021-12-04 22:07:31 +01:00
|
|
|
language: fetchLanguage(metadata),
|
|
|
|
series: fetchSeries(metadata),
|
|
|
|
volumeNumber: fetchVolumeNumber(metadata)
|
2021-11-09 03:05:12 +01:00
|
|
|
}
|
|
|
|
return data
|
|
|
|
}
|