diff --git a/server/utils/parsers/parseOverdriveMediaMarkers.js b/server/utils/parsers/parseOverdriveMediaMarkers.js index da323924..a65e15c3 100644 --- a/server/utils/parsers/parseOverdriveMediaMarkers.js +++ b/server/utils/parsers/parseOverdriveMediaMarkers.js @@ -1,113 +1,148 @@ const Logger = require('../../Logger') // given a list of audio files, extract all of the Overdrive Media Markers metaTags, and return an array of them as XML -function overdriveMediaMarkers(includedAudioFiles) { +function extractOverdriveMediaMarkers(includedAudioFiles) { + Logger.debug('[parseOverdriveMediaMarkers] Extracting overdrive media markers') var markers = includedAudioFiles.map((af) => af.metaTags.tagOverdriveMediaMarker).filter(notUndefined => notUndefined !== undefined).filter(elem => { return elem !== null }) || [] + return markers } // given the array of Overdrive Media Markers from generateOverdriveMediaMarkers() // parse and clean them in to something a bit more usable function cleanOverdriveMediaMarkers(overdriveMediaMarkers) { + Logger.debug('[parseOverdriveMediaMarkers] Cleaning up overdrive media markers') + /* + returns an array of arrays of objects. Each inner array corresponds to an audio track, with it's objects being a chapter: + [ + [ + { + "Name": "Chapter 1", + "Time": "0:00.000" + }, + { + "Name": "Chapter 2", + "Time": "15:51.000" + }, + { etc } + ] + ] + */ + var parseString = require('xml2js').parseString; // function to convert xml to JSON var parsedOverdriveMediaMarkers = [] - // go from an array of arrays of objects to an array of objects - // end result looks like: - // [ - // { - // "Name": "Chapter 1", - // "Time": "0:00.000" - // }, - // { - // "Name": "Chapter 2", - // "Time": "15:51.000" - // }, - // { redacted } - // ] overdriveMediaMarkers.forEach(function (item, index) { var parsed_result parseString(item, function (err, result) { - // result.Markers.Marker is the result of parsing the XML for the MediaMarker tags for the MP3 file (Part##.mp3) - // it is shaped like this: - // [ - // { - // "Name": [ - // "Chapter 1: " - // ], - // "Time": [ - // "0:00.000" - // ] - // }, - // { - // "Name": [ - // "Chapter 2: " - // ], - // "Time": [ - // "15:51.000" - // ] - // } - // ] + /* + result.Markers.Marker is the result of parsing the XML for the MediaMarker tags for the MP3 file (Part##.mp3) + it is shaped like this, and needs further cleaning below: + [ + { + "Name": [ + "Chapter 1: " + ], + "Time": [ + "0:00.000" + ] + }, + { + ANOTHER CHAPTER + }, + ] + */ - parsed_result = result.Markers.Marker - - // The values for Name and Time in parsed_results are returned as Arrays from parseString - // update them to be strings - parsed_result.forEach((item, index) => { - Object.keys(item).forEach(key => { - item[key] = item[key].toString() - }) - }) + // The values for Name and Time in results.Markers.Marker are returned as Arrays from parseString and should be strings + parsed_result = objectValuesArrayToString(result.Markers.Marker) }) parsedOverdriveMediaMarkers.push(parsed_result) }) - return parsedOverdriveMediaMarkers + return removeExtraChapters(parsedOverdriveMediaMarkers) +} + +// given an array of objects, convert any values that are arrays to strings +function objectValuesArrayToString(arrayOfObjects) { + Logger.debug('[parseOverdriveMediaMarkers] Converting Marker object values from arrays to strings') + arrayOfObjects.forEach((item) => { + Object.keys(item).forEach(key => { + item[key] = item[key].toString() + }) + }) + + return arrayOfObjects +} + +// Overdrive sometimes has weird chapters and subchapters defined +// These aren't necessary, so lets remove them +function removeExtraChapters(parsedOverdriveMediaMarkers) { + Logger.debug('[parseOverdriveMediaMarkers] Removing any unnecessary chapters') + const weirdChapterFilterRegex = /([(]\d|[cC]ontinued)/ + var cleaned = [] + parsedOverdriveMediaMarkers.forEach(function (item) { + cleaned.push(item.filter(chapter => !weirdChapterFilterRegex.test(chapter.Name))) + }) + + return cleaned +} + +// Given a set of chapters from generateParsedChapters, add the end time to each one +function addChapterEndTimes(chapters, totalAudioDuration) { + Logger.debug('[parseOverdriveMediaMarkers] Adding chapter end times') + chapters.forEach((chapter, chapter_index) => { + if (chapter_index < chapters.length - 1) { + chapter.end = chapters[chapter_index + 1].start + } else { + chapter.end = totalAudioDuration + } + }) + + return chapters } // The function that actually generates the Chapters object that we update ABS with function generateParsedChapters(includedAudioFiles, cleanedOverdriveMediaMarkers) { - // actually generate the chapter object + Logger.debug('[parseOverdriveMediaMarkers] Generating new chapters for ABS') // logic ported over from benonymity's OverdriveChapterizer: // https://github.com/benonymity/OverdriveChapterizer/blob/main/chapters.py + var parsedChapters = [] var length = 0.0 var index = 0 var time = 0.0 - var newChapters = [] - const weirdChapterFilterRegex = /([(]\d|[cC]ontinued)/ + + // cleanedOverdriveMediaMarkers is an array of array of objects, where the inner array matches to the included audio files tracks + // this allows us to leverage the individual track durations when calculating the start times of chapters in tracks after the first (using length) includedAudioFiles.forEach((track, track_index) => { cleanedOverdriveMediaMarkers[track_index].forEach((chapter) => { - Logger.debug(`[parseOverdriveMediaMarkers] Attempting regex check for ${chapter.Name}...`) - if (weirdChapterFilterRegex.test(chapter.Name)) { - Logger.debug(`[parseOverdriveMediaMarkers] Regex matched. Skipping ${chapter.Name}!`) - return - } time = chapter.Time.split(":") time = length + parseFloat(time[0]) * 60 + parseFloat(time[1]) - newChapters.push( - { - id: index++, - start: time, - end: length, - title: chapter.Name - } - ) + var newChapterData = { + id: index++, + start: time, + title: chapter.Name + } + parsedChapters.push(newChapterData) }) length += track.duration }) - return newChapters + parsedChapters = addChapterEndTimes(parsedChapters, length) // we need all the start times sorted out before we can add the end times + + return parsedChapters } module.exports.overdriveMediaMarkersExist = (includedAudioFiles) => { - return overdriveMediaMarkers(includedAudioFiles).length > 1 + return extractOverdriveMediaMarkers(includedAudioFiles).length > 1 } module.exports.parseOverdriveMediaMarkersAsChapters = (includedAudioFiles) => { Logger.info('[parseOverdriveMediaMarkers] Parsing of Overdrive Media Markers started') - var cleanedOverdriveMediaMarkers = cleanOverdriveMediaMarkers(overdriveMediaMarkers(includedAudioFiles)) + + var overdriveMediaMarkers = extractOverdriveMediaMarkers(includedAudioFiles) + var cleanedOverdriveMediaMarkers = cleanOverdriveMediaMarkers(overdriveMediaMarkers) var parsedChapters = generateParsedChapters(includedAudioFiles, cleanedOverdriveMediaMarkers) - + return parsedChapters -} +} \ No newline at end of file