audiobookshelf/server/utils/parsers/parseOverdriveMediaMarkers.js

148 lines
5.6 KiB
JavaScript
Raw Normal View History

const Logger = require('../../Logger')
// given a list of audio files, extract all of the Overdrive Media Markers metaTags, and return an array of them as XML
2022-06-13 05:28:09 +02:00
function extractOverdriveMediaMarkers(includedAudioFiles) {
Logger.debug('[parseOverdriveMediaMarkers] Extracting overdrive media markers')
var markers = includedAudioFiles.map((af) => af.metaTags.tagOverdriveMediaMarker).filter(notUndefined => notUndefined !== undefined).filter(elem => { return elem !== null }) || []
2022-06-13 05:28:09 +02:00
return markers
}
// given the array of Overdrive Media Markers from generateOverdriveMediaMarkers()
// parse and clean them in to something a bit more usable
function cleanOverdriveMediaMarkers(overdriveMediaMarkers) {
2022-06-13 05:28:09 +02:00
Logger.debug('[parseOverdriveMediaMarkers] Cleaning up overdrive media markers')
/*
returns an array of arrays of objects. Each inner array corresponds to an audio track, with it's objects being a chapter:
[
[
{
"Name": "Chapter 1",
"Time": "0:00.000"
},
{
"Name": "Chapter 2",
"Time": "15:51.000"
},
{ etc }
]
]
*/
var parseString = require('xml2js').parseString; // function to convert xml to JSON
var parsedOverdriveMediaMarkers = []
overdriveMediaMarkers.forEach(function (item, index) {
var parsed_result
parseString(item, function (err, result) {
2022-06-13 05:28:09 +02:00
/*
result.Markers.Marker is the result of parsing the XML for the MediaMarker tags for the MP3 file (Part##.mp3)
it is shaped like this, and needs further cleaning below:
[
{
"Name": [
"Chapter 1: "
],
"Time": [
"0:00.000"
]
},
{
ANOTHER CHAPTER
},
]
*/
// The values for Name and Time in results.Markers.Marker are returned as Arrays from parseString and should be strings
parsed_result = objectValuesArrayToString(result.Markers.Marker)
})
parsedOverdriveMediaMarkers.push(parsed_result)
})
2022-06-13 05:28:09 +02:00
return removeExtraChapters(parsedOverdriveMediaMarkers)
}
// given an array of objects, convert any values that are arrays to strings
function objectValuesArrayToString(arrayOfObjects) {
Logger.debug('[parseOverdriveMediaMarkers] Converting Marker object values from arrays to strings')
arrayOfObjects.forEach((item) => {
Object.keys(item).forEach(key => {
item[key] = item[key].toString()
})
})
return arrayOfObjects
}
// Overdrive sometimes has weird chapters and subchapters defined
// These aren't necessary, so lets remove them
function removeExtraChapters(parsedOverdriveMediaMarkers) {
Logger.debug('[parseOverdriveMediaMarkers] Removing any unnecessary chapters')
const weirdChapterFilterRegex = /([(]\d|[cC]ontinued)/
var cleaned = []
parsedOverdriveMediaMarkers.forEach(function (item) {
cleaned.push(item.filter(chapter => !weirdChapterFilterRegex.test(chapter.Name)))
})
return cleaned
}
// Given a set of chapters from generateParsedChapters, add the end time to each one
function addChapterEndTimes(chapters, totalAudioDuration) {
Logger.debug('[parseOverdriveMediaMarkers] Adding chapter end times')
chapters.forEach((chapter, chapter_index) => {
if (chapter_index < chapters.length - 1) {
chapter.end = chapters[chapter_index + 1].start
} else {
chapter.end = totalAudioDuration
}
})
return chapters
}
// The function that actually generates the Chapters object that we update ABS with
function generateParsedChapters(includedAudioFiles, cleanedOverdriveMediaMarkers) {
2022-06-13 05:28:09 +02:00
Logger.debug('[parseOverdriveMediaMarkers] Generating new chapters for ABS')
// logic ported over from benonymity's OverdriveChapterizer:
// https://github.com/benonymity/OverdriveChapterizer/blob/main/chapters.py
2022-06-13 05:28:09 +02:00
var parsedChapters = []
var length = 0.0
var index = 0
var time = 0.0
2022-06-13 05:28:09 +02:00
// cleanedOverdriveMediaMarkers is an array of array of objects, where the inner array matches to the included audio files tracks
// this allows us to leverage the individual track durations when calculating the start times of chapters in tracks after the first (using length)
includedAudioFiles.forEach((track, track_index) => {
cleanedOverdriveMediaMarkers[track_index].forEach((chapter) => {
time = chapter.Time.split(":")
time = length + parseFloat(time[0]) * 60 + parseFloat(time[1])
2022-06-13 05:28:09 +02:00
var newChapterData = {
id: index++,
start: time,
title: chapter.Name
}
parsedChapters.push(newChapterData)
})
length += track.duration
})
2022-06-13 05:28:09 +02:00
parsedChapters = addChapterEndTimes(parsedChapters, length) // we need all the start times sorted out before we can add the end times
return parsedChapters
}
module.exports.overdriveMediaMarkersExist = (includedAudioFiles) => {
2022-06-13 05:28:09 +02:00
return extractOverdriveMediaMarkers(includedAudioFiles).length > 1
}
module.exports.parseOverdriveMediaMarkersAsChapters = (includedAudioFiles) => {
Logger.info('[parseOverdriveMediaMarkers] Parsing of Overdrive Media Markers started')
2022-06-13 05:28:09 +02:00
var overdriveMediaMarkers = extractOverdriveMediaMarkers(includedAudioFiles)
var cleanedOverdriveMediaMarkers = cleanOverdriveMediaMarkers(overdriveMediaMarkers)
var parsedChapters = generateParsedChapters(includedAudioFiles, cleanedOverdriveMediaMarkers)
2022-06-13 05:28:09 +02:00
return parsedChapters
2022-06-13 05:28:09 +02:00
}