diff --git a/server/controllers/PodcastController.js b/server/controllers/PodcastController.js index 097f26b2..36c82d01 100644 --- a/server/controllers/PodcastController.js +++ b/server/controllers/PodcastController.js @@ -1,10 +1,9 @@ const axios = require('axios') const fs = require('../libs/fsExtra') -const Path = require('path') const Logger = require('../Logger') -const { parsePodcastRssFeedXml } = require('../utils/podcastUtils') +const { getPodcastFeed, findMatchingEpisodes } = require('../utils/podcastUtils') const LibraryItem = require('../objects/LibraryItem') -const { getFileTimestampsWithIno, sanitizeFilename } = require('../utils/fileUtils') +const { getFileTimestampsWithIno } = require('../utils/fileUtils') const filePerms = require('../utils/filePerms') class PodcastController { @@ -91,32 +90,17 @@ class PodcastController { } } - getPodcastFeed(req, res) { + async getPodcastFeed(req, res) { var url = req.body.rssFeed if (!url) { return res.status(400).send('Bad request') } - var includeRaw = req.query.raw == 1 // Include raw json - axios.get(url).then(async (data) => { - if (!data || !data.data) { - Logger.error('Invalid podcast feed request response') - return res.status(500).send('Bad response from feed request') - } - Logger.debug(`[PodcastController] Podcast feed size ${(data.data.length / 1024 / 1024).toFixed(2)}MB`) - var payload = await parsePodcastRssFeedXml(data.data, false, includeRaw) - if (!payload) { - return res.status(500).send('Invalid podcast RSS feed') - } - - // RSS feed may be a private RSS feed - payload.podcast.metadata.feedUrl = url - - res.json(payload) - }).catch((error) => { - console.error('Failed', error) - res.status(500).send(error) - }) + const podcast = await getPodcastFeed(url) + if (!podcast) { + return res.status(404).send('Podcast RSS feed request failed or invalid response data') + } + res.json({ podcast }) } async getOPMLFeeds(req, res) { @@ -177,9 +161,7 @@ class PodcastController { if (!searchTitle) { return res.sendStatus(500) } - searchTitle = searchTitle.toLowerCase().trim() - - const episodes = await this.podcastManager.findEpisode(rssFeedUrl, searchTitle) + const episodes = await findMatchingEpisodes(rssFeedUrl, searchTitle) res.json({ episodes: episodes || [] }) diff --git a/server/managers/PodcastManager.js b/server/managers/PodcastManager.js index 1402c0b4..f9c1b819 100644 --- a/server/managers/PodcastManager.js +++ b/server/managers/PodcastManager.js @@ -1,7 +1,6 @@ const fs = require('../libs/fsExtra') -const axios = require('axios') -const { parsePodcastRssFeedXml } = require('../utils/podcastUtils') +const { getPodcastFeed } = require('../utils/podcastUtils') const Logger = require('../Logger') const { downloadFile, removeFile } = require('../utils/fileUtils') @@ -226,7 +225,7 @@ class PodcastManager { Logger.error(`[PodcastManager] checkPodcastForNewEpisodes no feed url for ${podcastLibraryItem.media.metadata.title} (ID: ${podcastLibraryItem.id})`) return false } - var feed = await this.getPodcastFeed(podcastLibraryItem.media.metadata.feedUrl) + var feed = await getPodcastFeed(podcastLibraryItem.media.metadata.feedUrl) if (!feed || !feed.episodes) { Logger.error(`[PodcastManager] checkPodcastForNewEpisodes invalid feed payload for ${podcastLibraryItem.media.metadata.title} (ID: ${podcastLibraryItem.id})`, feed) return false @@ -262,7 +261,7 @@ class PodcastManager { } async findEpisode(rssFeedUrl, searchTitle) { - const feed = await this.getPodcastFeed(rssFeedUrl).catch(() => { + const feed = await getPodcastFeed(rssFeedUrl).catch(() => { return null }) if (!feed || !feed.episodes) { @@ -292,25 +291,6 @@ class PodcastManager { return matches.sort((a, b) => a.levenshtein - b.levenshtein) } - getPodcastFeed(feedUrl, excludeEpisodeMetadata = false) { - Logger.debug(`[PodcastManager] getPodcastFeed for "${feedUrl}"`) - return axios.get(feedUrl, { timeout: 5000 }).then(async (data) => { - if (!data || !data.data) { - Logger.error('Invalid podcast feed request response') - return false - } - Logger.debug(`[PodcastManager] getPodcastFeed for "${feedUrl}" success - parsing xml`) - var payload = await parsePodcastRssFeedXml(data.data, excludeEpisodeMetadata) - if (!payload) { - return false - } - return payload.podcast - }).catch((error) => { - Logger.error('[PodcastManager] getPodcastFeed Error', error) - return false - }) - } - async getOPMLFeeds(opmlText) { var extractedFeeds = opmlParser.parse(opmlText) if (!extractedFeeds || !extractedFeeds.length) { @@ -323,7 +303,7 @@ class PodcastManager { var rssFeedData = [] for (let feed of extractedFeeds) { - var feedData = await this.getPodcastFeed(feed.feedUrl, true) + var feedData = await getPodcastFeed(feed.feedUrl, true) if (feedData) { feedData.metadata.feedUrl = feed.feedUrl rssFeedData.push(feedData) diff --git a/server/objects/entities/PodcastEpisode.js b/server/objects/entities/PodcastEpisode.js index e9717e2f..47b1b4bf 100644 --- a/server/objects/entities/PodcastEpisode.js +++ b/server/objects/entities/PodcastEpisode.js @@ -107,6 +107,9 @@ class PodcastEpisode { if (this.episode) return `${this.episode} - ${this.title}` return this.title } + get enclosureUrl() { + return this.enclosure ? this.enclosure.url : null + } setData(data, index = 1) { this.id = getId('ep') diff --git a/server/scanner/Scanner.js b/server/scanner/Scanner.js index 250b6f6d..acfcdd9e 100644 --- a/server/scanner/Scanner.js +++ b/server/scanner/Scanner.js @@ -7,6 +7,7 @@ const { groupFilesIntoLibraryItemPaths, getLibraryItemFileData, scanFolder } = r const { comparePaths } = require('../utils/index') const { getIno } = require('../utils/fileUtils') const { ScanResult, LogLevel } = require('../utils/constants') +const { findMatchingEpisodesInFeed, getPodcastFeed } = require('../utils/podcastUtils') const MediaFileScanner = require('./MediaFileScanner') const BookFinder = require('../finders/BookFinder') @@ -684,7 +685,7 @@ class Scanner { var updatePayload = {} var hasUpdated = false - if (libraryItem.mediaType === 'book') { + if (libraryItem.isBook) { var searchISBN = options.isbn || libraryItem.media.metadata.isbn var searchASIN = options.asin || libraryItem.media.metadata.asin @@ -708,7 +709,7 @@ class Scanner { } updatePayload = await this.quickMatchBookBuildUpdatePayload(libraryItem, matchData, options) - } else { // Podcast quick match + } else if (libraryItem.isPodcast) { // Podcast quick match var results = await this.podcastFinder.search(searchTitle) if (!results.length) { return { @@ -739,6 +740,10 @@ class Scanner { } if (hasUpdated) { + if (libraryItem.isPodcast && libraryItem.media.metadata.feedUrl) { // Quick match all unmatched podcast episodes + await this.quickMatchPodcastEpisodes(libraryItem, options) + } + await this.db.updateLibraryItem(libraryItem) this.emitter('item_updated', libraryItem.toJSONExpanded()) } @@ -762,6 +767,7 @@ class Scanner { itunesArtistId: matchData.artistId || null, releaseDate: matchData.releaseDate || null, imageUrl: matchData.cover || null, + feedUrl: matchData.feedUrl || null, description: matchData.descriptionPlain || null } @@ -769,9 +775,10 @@ class Scanner { if (matchDataTransformed[key]) { if (key === 'genres') { if ((!libraryItem.media.metadata.genres || options.overrideDetails)) { + // TODO: Genres array or string? updatePayload.metadata[key] = matchDataTransformed[key].split(',').map(v => v.trim()).filter(v => !!v) } - } else if (!libraryItem.media.metadata[key] || options.overrideDetails) { + } else if (libraryItem.media.metadata[key] !== matchDataTransformed[key] && (!libraryItem.media.metadata[key] || options.overrideDetails)) { updatePayload.metadata[key] = matchDataTransformed[key] } } @@ -854,6 +861,61 @@ class Scanner { return updatePayload } + async quickMatchPodcastEpisodes(libraryItem, options = {}) { + const episodesToQuickMatch = libraryItem.media.episodes.filter(ep => !ep.enclosureUrl) // Only quick match episodes without enclosure + if (!episodesToQuickMatch.length) return false + + const feed = await getPodcastFeed(libraryItem.media.metadata.feedUrl) + if (!feed) { + Logger.error(`[Scanner] quickMatchPodcastEpisodes: Unable to quick match episodes feed not found for "${libraryItem.media.metadata.feedUrl}"`) + return false + } + + var episodesWereUpdated = false + for (const episode of episodesToQuickMatch) { + const episodeMatches = findMatchingEpisodesInFeed(feed, episode.title) + if (episodeMatches && episodeMatches.length) { + const wasUpdated = this.updateEpisodeWithMatch(libraryItem, episode, episodeMatches[0].episode, options) + if (wasUpdated) episodesWereUpdated = true + } + } + return episodesWereUpdated + } + + updateEpisodeWithMatch(libraryItem, episode, episodeToMatch, options = {}) { + Logger.debug(`[Scanner] quickMatchPodcastEpisodes: Found episode match for "${episode.title}" => ${episodeToMatch.title}`) + const matchDataTransformed = { + title: episodeToMatch.title || '', + subtitle: episodeToMatch.subtitle || '', + description: episodeToMatch.description || '', + enclosure: episodeToMatch.enclosure || null, + episode: episodeToMatch.episode || '', + episodeType: episodeToMatch.episodeType || '', + season: episodeToMatch.season || '', + pubDate: episodeToMatch.pubDate || '', + publishedAt: episodeToMatch.publishedAt + } + const updatePayload = {} + for (const key in matchDataTransformed) { + if (matchDataTransformed[key]) { + if (key === 'enclosure') { + if (!episode.enclosure || JSON.stringify(episode.enclosure) !== JSON.stringify(matchDataTransformed.enclosure)) { + updatePayload[key] = { + ...matchDataTransformed.enclosure + } + } + } else if (episode[key] !== matchDataTransformed[key] && (!episode[key] || options.overrideDetails)) { + updatePayload[key] = matchDataTransformed[key] + } + } + } + + if (Object.keys(updatePayload).length) { + return libraryItem.media.updateEpisode(episode.id, updatePayload) + } + return false + } + async matchLibraryItems(library) { if (library.mediaType === 'podcast') { Logger.error(`[Scanner] matchLibraryItems: Match all not supported for podcasts yet`) diff --git a/server/utils/podcastUtils.js b/server/utils/podcastUtils.js index 93ed5349..a73fd768 100644 --- a/server/utils/podcastUtils.js +++ b/server/utils/podcastUtils.js @@ -1,5 +1,6 @@ const Logger = require('../Logger') -const { xmlToJSON } = require('./index') +const axios = require('axios') +const { xmlToJSON, levenshteinDistance } = require('./index') const htmlSanitizer = require('../utils/htmlSanitizer') function extractFirstArrayItem(json, key) { @@ -173,4 +174,65 @@ module.exports.parsePodcastRssFeedXml = async (xml, excludeEpisodeMetadata = fal podcast } } +} + +module.exports.getPodcastFeed = (feedUrl, excludeEpisodeMetadata = false) => { + Logger.debug(`[podcastUtils] getPodcastFeed for "${feedUrl}"`) + return axios.get(feedUrl, { timeout: 6000 }).then(async (data) => { + if (!data || !data.data) { + Logger.error(`[podcastUtils] getPodcastFeed: Invalid podcast feed request response (${feedUrl})`) + return false + } + Logger.debug(`[podcastUtils] getPodcastFeed for "${feedUrl}" success - parsing xml`) + var payload = await this.parsePodcastRssFeedXml(data.data, excludeEpisodeMetadata) + if (!payload) { + return false + } + + // RSS feed may be a private RSS feed + payload.podcast.metadata.feedUrl = feedUrl + + return payload.podcast + }).catch((error) => { + Logger.error('[podcastUtils] getPodcastFeed Error', error) + return false + }) +} + +// Return array of episodes ordered by closest match (Levenshtein distance of 6 or less) +module.exports.findMatchingEpisodes = async (feedUrl, searchTitle) => { + const feed = await this.getPodcastFeed(feedUrl).catch(() => { + return null + }) + + return this.findMatchingEpisodesInFeed(feed, searchTitle) +} + +module.exports.findMatchingEpisodesInFeed = (feed, searchTitle) => { + searchTitle = searchTitle.toLowerCase().trim() + if (!feed || !feed.episodes) { + return null + } + + const matches = [] + feed.episodes.forEach(ep => { + if (!ep.title) return + + const epTitle = ep.title.toLowerCase().trim() + if (epTitle === searchTitle) { + matches.push({ + episode: ep, + levenshtein: 0 + }) + } else { + const levenshtein = levenshteinDistance(searchTitle, epTitle, true) + if (levenshtein <= 6 && epTitle.length > levenshtein) { + matches.push({ + episode: ep, + levenshtein + }) + } + } + }) + return matches.sort((a, b) => a.levenshtein - b.levenshtein) } \ No newline at end of file