audiobookshelf/server/utils/podcastUtils.js

172 lines
5.1 KiB
JavaScript

const Logger = require('../Logger')
const { xmlToJSON } = require('./index')
const htmlSanitizer = require('../utils/htmlSanitizer')
function extractFirstArrayItem(json, key) {
if (!json[key] || !json[key].length) return null
return json[key][0]
}
function extractImage(channel) {
if (!channel.image || !channel.image.url || !channel.image.url.length) {
if (!channel['itunes:image'] || !channel['itunes:image'].length || !channel['itunes:image'][0]['$']) {
return null
}
var itunesImage = channel['itunes:image'][0]['$']
return itunesImage.href || null
}
return channel.image.url[0] || null
}
function extractCategories(channel) {
if (!channel['itunes:category'] || !channel['itunes:category'].length) return []
var categories = channel['itunes:category']
var cleanedCats = []
categories.forEach((cat) => {
if (!cat['$'] || !cat['$'].text) return
var cattext = cat['$'].text
if (cat['itunes:category']) {
var subcats = extractCategories(cat)
if (subcats.length) {
cleanedCats = cleanedCats.concat(subcats.map((subcat) => `${cattext}:${subcat}`))
} else {
cleanedCats.push(cattext)
}
} else {
cleanedCats.push(cattext)
}
})
return cleanedCats
}
function extractPodcastMetadata(channel) {
var metadata = {
image: extractImage(channel),
categories: extractCategories(channel),
feedUrl: null,
description: null,
descriptionPlain: null
}
if (channel['itunes:new-feed-url']) {
metadata.feedUrl = extractFirstArrayItem(channel, 'itunes:new-feed-url')
} else if (channel['atom:link'] && channel['atom:link'].length && channel['atom:link'][0]['$']) {
metadata.feedUrl = channel['atom:link'][0]['$'].href || null
}
if (channel['description']) {
const rawDescription = extractFirstArrayItem(channel, 'description') || ''
metadata.description = htmlSanitizer.sanitize(rawDescription)
metadata.descriptionPlain = htmlSanitizer.stripAllTags(rawDescription)
}
var arrayFields = ['title', 'language', 'itunes:explicit', 'itunes:author', 'pubDate', 'link']
arrayFields.forEach((key) => {
var cleanKey = key.split(':').pop()
metadata[cleanKey] = extractFirstArrayItem(channel, key)
})
return metadata
}
function extractEpisodeData(item) {
// Episode must have url
if (!item.enclosure || !item.enclosure.length || !item.enclosure[0]['$'] || !item.enclosure[0]['$'].url) {
Logger.error(`[podcastUtils] Invalid podcast episode data`)
return null
}
var episode = {
enclosure: {
...item.enclosure[0]['$']
}
}
// Full description with html
if (item['content:encoded']) {
const rawDescription = (extractFirstArrayItem(item, 'content:encoded') || '').trim()
episode.description = htmlSanitizer.sanitize(rawDescription)
}
// Supposed to be the plaintext description but not always followed
if (item['description']) {
const rawDescription = extractFirstArrayItem(item, 'description') || ''
if (!episode.description) episode.description = htmlSanitizer.sanitize(rawDescription)
episode.descriptionPlain = htmlSanitizer.stripAllTags(rawDescription)
}
var arrayFields = ['title', 'pubDate', 'itunes:episodeType', 'itunes:season', 'itunes:episode', 'itunes:author', 'itunes:duration', 'itunes:explicit', 'itunes:subtitle']
arrayFields.forEach((key) => {
var cleanKey = key.split(':').pop()
episode[cleanKey] = extractFirstArrayItem(item, key)
})
return episode
}
function cleanEpisodeData(data) {
return {
title: data.title,
subtitle: data.subtitle || '',
description: data.description || '',
descriptionPlain: data.descriptionPlain || '',
pubDate: data.pubDate || '',
episodeType: data.episodeType || '',
season: data.season || '',
episode: data.episode || '',
author: data.author || '',
duration: data.duration || '',
explicit: data.explicit || '',
publishedAt: (new Date(data.pubDate)).valueOf(),
enclosure: data.enclosure
}
}
function extractPodcastEpisodes(items) {
var episodes = []
items.forEach((item) => {
var extracted = extractEpisodeData(item)
if (extracted) {
episodes.push(cleanEpisodeData(extracted))
}
})
return episodes
}
function cleanPodcastJson(rssJson) {
if (!rssJson.channel || !rssJson.channel.length) {
Logger.error(`[podcastUtil] Invalid podcast no channel object`)
return null
}
var channel = rssJson.channel[0]
if (!channel.item || !channel.item.length) {
Logger.error(`[podcastUtil] Invalid podcast no episodes`)
return null
}
var podcast = {
metadata: extractPodcastMetadata(channel),
episodes: extractPodcastEpisodes(channel.item)
}
return podcast
}
module.exports.parsePodcastRssFeedXml = async (xml, includeRaw = false) => {
if (!xml) return null
var json = await xmlToJSON(xml)
if (!json || !json.rss) {
Logger.error('[podcastUtils] Invalid XML or RSS feed')
return null
}
const podcast = cleanPodcastJson(json.rss)
if (!podcast) return null
if (includeRaw) {
return {
podcast,
rawJson: json
}
} else {
return {
podcast
}
}
}