From 6ed66fea16f0551d6b8ff4cfd5209f392591fe89 Mon Sep 17 00:00:00 2001 From: advplyr Date: Mon, 31 Mar 2025 17:57:39 -0500 Subject: [PATCH] Update podcast rss feed parser to use psc chapters on episodes --- server/models/Feed.js | 1 + server/models/FeedEpisode.js | 18 +++++++++++ server/models/PodcastEpisode.js | 4 +++ server/utils/index.js | 26 ++++++++++++++++ server/utils/podcastUtils.js | 55 +++++++++++++++++++++++++++++++-- 5 files changed, 102 insertions(+), 2 deletions(-) diff --git a/server/models/Feed.js b/server/models/Feed.js index 577dedfb..0ec0ef68 100644 --- a/server/models/Feed.js +++ b/server/models/Feed.js @@ -607,6 +607,7 @@ class Feed extends Model { custom_namespaces: { itunes: 'http://www.itunes.com/dtds/podcast-1.0.dtd', podcast: 'https://podcastindex.org/namespace/1.0', + psc: 'http://podlove.org/simple-chapters', googleplay: 'http://www.google.com/schemas/play-podcasts/1.0' }, custom_elements: customElements diff --git a/server/models/FeedEpisode.js b/server/models/FeedEpisode.js index 4dcdca45..0eabb5b3 100644 --- a/server/models/FeedEpisode.js +++ b/server/models/FeedEpisode.js @@ -325,6 +325,24 @@ class FeedEpisode extends Model { customElements.push({ 'itunes:summary': { _cdata: this.description } }) } + customElements.push({ + 'psc:chapters': [ + { + _attr: { + version: '1.2' + }, + 'psc:chapter': [ + { + _attr: { + title: 'Test', + start: '00:00:00' + } + } + ] + } + ] + }) + return { title: this.title, description: this.description || '', diff --git a/server/models/PodcastEpisode.js b/server/models/PodcastEpisode.js index e6d62916..27e702a1 100644 --- a/server/models/PodcastEpisode.js +++ b/server/models/PodcastEpisode.js @@ -80,9 +80,13 @@ class PodcastEpisode extends Model { if (rssPodcastEpisode.guid) { podcastEpisode.extraData.guid = rssPodcastEpisode.guid } + if (audioFile.chapters?.length) { podcastEpisode.chapters = audioFile.chapters.map((ch) => ({ ...ch })) + } else if (rssPodcastEpisode.chapters?.length) { + podcastEpisode.chapters = rssPodcastEpisode.chapters.map((ch) => ({ ...ch })) } + return this.create(podcastEpisode) } diff --git a/server/utils/index.js b/server/utils/index.js index a8c0ff54..9f7d961c 100644 --- a/server/utils/index.js +++ b/server/utils/index.js @@ -243,3 +243,29 @@ module.exports.isValidASIN = (str) => { if (!str || typeof str !== 'string') return false return /^[A-Z0-9]{10}$/.test(str) } + +/** + * Convert timestamp to seconds + * @example "01:00:00" => 3600 + * @example "01:00" => 60 + * @example "01" => 1 + * + * @param {string} timestamp + * @returns {number} + */ +module.exports.timestampToSeconds = (timestamp) => { + if (typeof timestamp !== 'string') { + return null + } + const parts = timestamp.split(':').map(Number) + if (parts.some(isNaN)) { + return null + } else if (parts.length === 1) { + return parts[0] + } else if (parts.length === 2) { + return parts[0] * 60 + parts[1] + } else if (parts.length === 3) { + return parts[0] * 3600 + parts[1] * 60 + parts[2] + } + return null +} diff --git a/server/utils/podcastUtils.js b/server/utils/podcastUtils.js index 53ed8e7e..a7ecce8a 100644 --- a/server/utils/podcastUtils.js +++ b/server/utils/podcastUtils.js @@ -1,9 +1,17 @@ const axios = require('axios') const ssrfFilter = require('ssrf-req-filter') const Logger = require('../Logger') -const { xmlToJSON, levenshteinDistance } = require('./index') +const { xmlToJSON, levenshteinDistance, timestampToSeconds } = require('./index') const htmlSanitizer = require('../utils/htmlSanitizer') +/** + * @typedef RssPodcastChapter + * @property {number} id + * @property {string} title + * @property {number} start + * @property {number} end + */ + /** * @typedef RssPodcastEpisode * @property {string} title @@ -22,6 +30,7 @@ const htmlSanitizer = require('../utils/htmlSanitizer') * @property {string} guid * @property {string} chaptersUrl * @property {string} chaptersType + * @property {RssPodcastChapter[]} chapters */ /** @@ -205,12 +214,53 @@ function extractEpisodeData(item) { const cleanKey = key.split(':').pop() episode[cleanKey] = extractFirstArrayItemString(item, key) }) + + // Extract psc:chapters if duration is set + let episodeDuration = !isNaN(episode.duration) ? timestampToSeconds(episode.duration) : null + if (item['psc:chapters']?.[0]?.['psc:chapter']?.length && episodeDuration) { + // Example chapter: + // {"id":0,"start":0,"end":43.004286,"title":"chapter 1"} + + const cleanedChapters = item['psc:chapters'][0]['psc:chapter'].map((chapter, index) => { + if (!chapter['$']?.title || !chapter['$']?.start || typeof chapter['$']?.start !== 'string' || typeof chapter['$']?.title !== 'string') { + return null + } + + const start = timestampToSeconds(chapter['$'].start) + if (start === null) { + return null + } + + return { + id: index, + title: chapter['$'].title, + start + } + }) + + if (cleanedChapters.some((chapter) => !chapter)) { + Logger.warn(`[podcastUtils] Invalid chapter data for ${episode.enclosure.url}`) + } else { + episode.chapters = cleanedChapters.map((chapter, index) => { + const nextChapter = cleanedChapters[index + 1] + const end = nextChapter ? nextChapter.start : episodeDuration + return { + id: chapter.id, + title: chapter.title, + start: chapter.start, + end + } + }) + } + } + return episode } function cleanEpisodeData(data) { const pubJsDate = data.pubDate ? new Date(data.pubDate) : null const publishedAt = pubJsDate && !isNaN(pubJsDate) ? pubJsDate.valueOf() : null + return { title: data.title, subtitle: data.subtitle || '', @@ -227,7 +277,8 @@ function cleanEpisodeData(data) { enclosure: data.enclosure, guid: data.guid || null, chaptersUrl: data.chaptersUrl || null, - chaptersType: data.chaptersType || null + chaptersType: data.chaptersType || null, + chapters: data.chapters || [] } }