Update podcast rss feed parser to use psc chapters on episodes

This commit is contained in:
advplyr 2025-03-31 17:57:39 -05:00
parent ddcda197b4
commit 6ed66fea16
5 changed files with 102 additions and 2 deletions

View File

@ -607,6 +607,7 @@ class Feed extends Model {
custom_namespaces: {
itunes: 'http://www.itunes.com/dtds/podcast-1.0.dtd',
podcast: 'https://podcastindex.org/namespace/1.0',
psc: 'http://podlove.org/simple-chapters',
googleplay: 'http://www.google.com/schemas/play-podcasts/1.0'
},
custom_elements: customElements

View File

@ -325,6 +325,24 @@ class FeedEpisode extends Model {
customElements.push({ 'itunes:summary': { _cdata: this.description } })
}
customElements.push({
'psc:chapters': [
{
_attr: {
version: '1.2'
},
'psc:chapter': [
{
_attr: {
title: 'Test',
start: '00:00:00'
}
}
]
}
]
})
return {
title: this.title,
description: this.description || '',

View File

@ -80,9 +80,13 @@ class PodcastEpisode extends Model {
if (rssPodcastEpisode.guid) {
podcastEpisode.extraData.guid = rssPodcastEpisode.guid
}
if (audioFile.chapters?.length) {
podcastEpisode.chapters = audioFile.chapters.map((ch) => ({ ...ch }))
} else if (rssPodcastEpisode.chapters?.length) {
podcastEpisode.chapters = rssPodcastEpisode.chapters.map((ch) => ({ ...ch }))
}
return this.create(podcastEpisode)
}

View File

@ -243,3 +243,29 @@ module.exports.isValidASIN = (str) => {
if (!str || typeof str !== 'string') return false
return /^[A-Z0-9]{10}$/.test(str)
}
/**
* Convert timestamp to seconds
* @example "01:00:00" => 3600
* @example "01:00" => 60
* @example "01" => 1
*
* @param {string} timestamp
* @returns {number}
*/
module.exports.timestampToSeconds = (timestamp) => {
if (typeof timestamp !== 'string') {
return null
}
const parts = timestamp.split(':').map(Number)
if (parts.some(isNaN)) {
return null
} else if (parts.length === 1) {
return parts[0]
} else if (parts.length === 2) {
return parts[0] * 60 + parts[1]
} else if (parts.length === 3) {
return parts[0] * 3600 + parts[1] * 60 + parts[2]
}
return null
}

View File

@ -1,9 +1,17 @@
const axios = require('axios')
const ssrfFilter = require('ssrf-req-filter')
const Logger = require('../Logger')
const { xmlToJSON, levenshteinDistance } = require('./index')
const { xmlToJSON, levenshteinDistance, timestampToSeconds } = require('./index')
const htmlSanitizer = require('../utils/htmlSanitizer')
/**
* @typedef RssPodcastChapter
* @property {number} id
* @property {string} title
* @property {number} start
* @property {number} end
*/
/**
* @typedef RssPodcastEpisode
* @property {string} title
@ -22,6 +30,7 @@ const htmlSanitizer = require('../utils/htmlSanitizer')
* @property {string} guid
* @property {string} chaptersUrl
* @property {string} chaptersType
* @property {RssPodcastChapter[]} chapters
*/
/**
@ -205,12 +214,53 @@ function extractEpisodeData(item) {
const cleanKey = key.split(':').pop()
episode[cleanKey] = extractFirstArrayItemString(item, key)
})
// Extract psc:chapters if duration is set
let episodeDuration = !isNaN(episode.duration) ? timestampToSeconds(episode.duration) : null
if (item['psc:chapters']?.[0]?.['psc:chapter']?.length && episodeDuration) {
// Example chapter:
// {"id":0,"start":0,"end":43.004286,"title":"chapter 1"}
const cleanedChapters = item['psc:chapters'][0]['psc:chapter'].map((chapter, index) => {
if (!chapter['$']?.title || !chapter['$']?.start || typeof chapter['$']?.start !== 'string' || typeof chapter['$']?.title !== 'string') {
return null
}
const start = timestampToSeconds(chapter['$'].start)
if (start === null) {
return null
}
return {
id: index,
title: chapter['$'].title,
start
}
})
if (cleanedChapters.some((chapter) => !chapter)) {
Logger.warn(`[podcastUtils] Invalid chapter data for ${episode.enclosure.url}`)
} else {
episode.chapters = cleanedChapters.map((chapter, index) => {
const nextChapter = cleanedChapters[index + 1]
const end = nextChapter ? nextChapter.start : episodeDuration
return {
id: chapter.id,
title: chapter.title,
start: chapter.start,
end
}
})
}
}
return episode
}
function cleanEpisodeData(data) {
const pubJsDate = data.pubDate ? new Date(data.pubDate) : null
const publishedAt = pubJsDate && !isNaN(pubJsDate) ? pubJsDate.valueOf() : null
return {
title: data.title,
subtitle: data.subtitle || '',
@ -227,7 +277,8 @@ function cleanEpisodeData(data) {
enclosure: data.enclosure,
guid: data.guid || null,
chaptersUrl: data.chaptersUrl || null,
chaptersType: data.chaptersType || null
chaptersType: data.chaptersType || null,
chapters: data.chapters || []
}
}