Merge pull request #4113 from advplyr/parsing-opf-v3

Update opf parser to support refines meta elements
This commit is contained in:
advplyr 2025-03-14 17:39:20 -05:00 committed by GitHub
commit 607f143861
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 94 additions and 61 deletions

View File

@ -2,7 +2,7 @@ const { parseOpfMetadataXML } = require('../utils/parsers/parseOpfMetadata')
const { readTextFile } = require('../utils/fileUtils')
class OpfFileScanner {
constructor() { }
constructor() {}
/**
* Parse metadata from .opf file found in library scan and update bookMetadata
@ -15,11 +15,13 @@ class OpfFileScanner {
const opfMetadata = xmlText ? await parseOpfMetadataXML(xmlText) : null
if (opfMetadata) {
for (const key in opfMetadata) {
if (key === 'tags') { // Add tags only if tags are empty
if (key === 'tags') {
// Add tags only if tags are empty
if (opfMetadata.tags.length) {
bookMetadata.tags = opfMetadata.tags
}
} else if (key === 'genres') { // Add genres only if genres are empty
} else if (key === 'genres') {
// Add genres only if genres are empty
if (opfMetadata.genres.length) {
bookMetadata.genres = opfMetadata.genres
}

View File

@ -22,11 +22,22 @@ function parseCreators(metadata) {
Object.keys(c['$'])
.find((key) => key.startsWith('xmlns:'))
?.split(':')[1] || 'opf'
return {
const creator = {
value: c['_'],
role: c['$'][`${namespace}:role`] || null,
fileAs: c['$'][`${namespace}:file-as`] || null
}
const id = c['$']['id']
if (id && metadata.meta.refines?.some((r) => r.refines === `#${id}`)) {
const creatorMeta = metadata.meta.refines.filter((r) => r.refines === `#${id}`)
if (creatorMeta) {
creator.role = creatorMeta.find((r) => r.property === 'role')?.value || creator.role || null
creator.fileAs = creatorMeta.find((r) => r.property === 'file-as')?.value || creator.fileAs || null
}
}
return creator
})
}
@ -187,7 +198,6 @@ module.exports.parseOpfMetadataJson = (json) => {
const prefix = packageKey.split(':').shift()
let metadata = prefix ? json[packageKey][`${prefix}:metadata`] || json[packageKey].metadata : json[packageKey].metadata
if (!metadata) return null
if (Array.isArray(metadata)) {
if (!metadata.length) return null
metadata = metadata[0]
@ -198,12 +208,22 @@ module.exports.parseOpfMetadataJson = (json) => {
metadata.meta = {}
if (metadataMeta?.length) {
metadataMeta.forEach((meta) => {
if (meta && meta['$'] && meta['$'].name) {
if (meta?.['$']?.name) {
metadata.meta[meta['$'].name] = [meta['$'].content || '']
} else if (meta?.['$']?.refines) {
// https://www.w3.org/TR/epub-33/#sec-meta-elem
if (!metadata.meta.refines) {
metadata.meta.refines = []
}
metadata.meta.refines.push({
value: meta._,
refines: meta['$'].refines,
property: meta['$'].property
})
}
})
}
const creators = parseCreators(metadata)
const authors = (fetchCreators(creators, 'aut') || []).map((au) => au?.trim()).filter((au) => au)
const narrators = (fetchNarrators(creators, metadata) || []).map((nrt) => nrt?.trim()).filter((nrt) => nrt)
@ -227,5 +247,6 @@ module.exports.parseOpfMetadataJson = (json) => {
module.exports.parseOpfMetadataXML = async (xml) => {
const json = await xmlToJSON(xml)
if (!json) return null
return this.parseOpfMetadataJson(json)
}

View File

@ -14,7 +14,7 @@ describe('parseOpfMetadata - test series', async () => {
</package>
`
const parsedOpf = await parseOpfMetadataXML(opf)
expect(parsedOpf.series).to.deep.equal([{ "name": "Serie", "sequence": "1" }])
expect(parsedOpf.series).to.deep.equal([{ name: 'Serie', sequence: '1' }])
})
it('test more then 1 series - in correct order', async () => {
@ -33,9 +33,9 @@ describe('parseOpfMetadata - test series', async () => {
`
const parsedOpf = await parseOpfMetadataXML(opf)
expect(parsedOpf.series).to.deep.equal([
{ "name": "Serie 1", "sequence": "1" },
{ "name": "Serie 2", "sequence": "2" },
{ "name": "Serie 3", "sequence": "3" },
{ name: 'Serie 1', sequence: '1' },
{ name: 'Serie 2', sequence: '2' },
{ name: 'Serie 3', sequence: '3' }
])
})
@ -54,8 +54,8 @@ describe('parseOpfMetadata - test series', async () => {
`
const parsedOpf = await parseOpfMetadataXML(opf)
expect(parsedOpf.series).to.deep.equal([
{ "name": "Serie 1", "sequence": "1" },
{ "name": "Serie 3", "sequence": null },
{ name: 'Serie 1', sequence: '1' },
{ name: 'Serie 3', sequence: null }
])
})
@ -75,9 +75,9 @@ describe('parseOpfMetadata - test series', async () => {
`
const parsedOpf = await parseOpfMetadataXML(opf)
expect(parsedOpf.series).to.deep.equal([
{ "name": "Serie 1", "sequence": null },
{ "name": "Serie 2", "sequence": "abc" },
{ "name": "Serie 3", "sequence": null },
{ name: 'Serie 1', sequence: null },
{ name: 'Serie 2', sequence: 'abc' },
{ name: 'Serie 3', sequence: null }
])
})
@ -106,9 +106,7 @@ describe('parseOpfMetadata - test series', async () => {
</ns0:package>
`
const parsedOpf = await parseOpfMetadataXML(opf)
expect(parsedOpf.series).to.deep.equal([
{ "name": "Serie 1", "sequence": null }
])
expect(parsedOpf.series).to.deep.equal([{ name: 'Serie 1', sequence: null }])
})
it('test series and series index not directly underneath', async () => {
@ -123,8 +121,20 @@ describe('parseOpfMetadata - test series', async () => {
</package>
`
const parsedOpf = await parseOpfMetadataXML(opf)
expect(parsedOpf.series).to.deep.equal([
{ "name": "Serie 1", "sequence": "1" }
])
expect(parsedOpf.series).to.deep.equal([{ name: 'Serie 1', sequence: '1' }])
})
it('test author is parsed from refines meta', async () => {
const opf = `
<package version="3.0" unique-identifier="uuid_id" prefix="rendition: http://www.idpf.org/vocab/rendition/#" xmlns="http://www.idpf.org/2007/opf">
<metadata>
<dc:creator id="create1">Nevil Shute</dc:creator>
<meta refines="#create1" property="role" scheme="marc:relators">aut</meta>
<meta refines="#create1" property="file-as">Shute, Nevil</meta>
</metadata>
</package>
`
const parsedOpf = await parseOpfMetadataXML(opf)
expect(parsedOpf.authors).to.deep.equal(['Nevil Shute'])
})
})