Add:Podcast quickmatch attempts quick matching unmatched episodes #983

This commit is contained in:
advplyr 2022-09-15 18:35:56 -05:00
parent b91b320006
commit 70ef09f451
5 changed files with 144 additions and 55 deletions

View File

@ -1,10 +1,9 @@
const axios = require('axios')
const fs = require('../libs/fsExtra')
const Path = require('path')
const Logger = require('../Logger')
const { parsePodcastRssFeedXml } = require('../utils/podcastUtils')
const { getPodcastFeed, findMatchingEpisodes } = require('../utils/podcastUtils')
const LibraryItem = require('../objects/LibraryItem')
const { getFileTimestampsWithIno, sanitizeFilename } = require('../utils/fileUtils')
const { getFileTimestampsWithIno } = require('../utils/fileUtils')
const filePerms = require('../utils/filePerms')
class PodcastController {
@ -91,32 +90,17 @@ class PodcastController {
}
}
getPodcastFeed(req, res) {
async getPodcastFeed(req, res) {
var url = req.body.rssFeed
if (!url) {
return res.status(400).send('Bad request')
}
var includeRaw = req.query.raw == 1 // Include raw json
axios.get(url).then(async (data) => {
if (!data || !data.data) {
Logger.error('Invalid podcast feed request response')
return res.status(500).send('Bad response from feed request')
}
Logger.debug(`[PodcastController] Podcast feed size ${(data.data.length / 1024 / 1024).toFixed(2)}MB`)
var payload = await parsePodcastRssFeedXml(data.data, false, includeRaw)
if (!payload) {
return res.status(500).send('Invalid podcast RSS feed')
}
// RSS feed may be a private RSS feed
payload.podcast.metadata.feedUrl = url
res.json(payload)
}).catch((error) => {
console.error('Failed', error)
res.status(500).send(error)
})
const podcast = await getPodcastFeed(url)
if (!podcast) {
return res.status(404).send('Podcast RSS feed request failed or invalid response data')
}
res.json({ podcast })
}
async getOPMLFeeds(req, res) {
@ -177,9 +161,7 @@ class PodcastController {
if (!searchTitle) {
return res.sendStatus(500)
}
searchTitle = searchTitle.toLowerCase().trim()
const episodes = await this.podcastManager.findEpisode(rssFeedUrl, searchTitle)
const episodes = await findMatchingEpisodes(rssFeedUrl, searchTitle)
res.json({
episodes: episodes || []
})

View File

@ -1,7 +1,6 @@
const fs = require('../libs/fsExtra')
const axios = require('axios')
const { parsePodcastRssFeedXml } = require('../utils/podcastUtils')
const { getPodcastFeed } = require('../utils/podcastUtils')
const Logger = require('../Logger')
const { downloadFile, removeFile } = require('../utils/fileUtils')
@ -226,7 +225,7 @@ class PodcastManager {
Logger.error(`[PodcastManager] checkPodcastForNewEpisodes no feed url for ${podcastLibraryItem.media.metadata.title} (ID: ${podcastLibraryItem.id})`)
return false
}
var feed = await this.getPodcastFeed(podcastLibraryItem.media.metadata.feedUrl)
var feed = await getPodcastFeed(podcastLibraryItem.media.metadata.feedUrl)
if (!feed || !feed.episodes) {
Logger.error(`[PodcastManager] checkPodcastForNewEpisodes invalid feed payload for ${podcastLibraryItem.media.metadata.title} (ID: ${podcastLibraryItem.id})`, feed)
return false
@ -262,7 +261,7 @@ class PodcastManager {
}
async findEpisode(rssFeedUrl, searchTitle) {
const feed = await this.getPodcastFeed(rssFeedUrl).catch(() => {
const feed = await getPodcastFeed(rssFeedUrl).catch(() => {
return null
})
if (!feed || !feed.episodes) {
@ -292,25 +291,6 @@ class PodcastManager {
return matches.sort((a, b) => a.levenshtein - b.levenshtein)
}
getPodcastFeed(feedUrl, excludeEpisodeMetadata = false) {
Logger.debug(`[PodcastManager] getPodcastFeed for "${feedUrl}"`)
return axios.get(feedUrl, { timeout: 5000 }).then(async (data) => {
if (!data || !data.data) {
Logger.error('Invalid podcast feed request response')
return false
}
Logger.debug(`[PodcastManager] getPodcastFeed for "${feedUrl}" success - parsing xml`)
var payload = await parsePodcastRssFeedXml(data.data, excludeEpisodeMetadata)
if (!payload) {
return false
}
return payload.podcast
}).catch((error) => {
Logger.error('[PodcastManager] getPodcastFeed Error', error)
return false
})
}
async getOPMLFeeds(opmlText) {
var extractedFeeds = opmlParser.parse(opmlText)
if (!extractedFeeds || !extractedFeeds.length) {
@ -323,7 +303,7 @@ class PodcastManager {
var rssFeedData = []
for (let feed of extractedFeeds) {
var feedData = await this.getPodcastFeed(feed.feedUrl, true)
var feedData = await getPodcastFeed(feed.feedUrl, true)
if (feedData) {
feedData.metadata.feedUrl = feed.feedUrl
rssFeedData.push(feedData)

View File

@ -107,6 +107,9 @@ class PodcastEpisode {
if (this.episode) return `${this.episode} - ${this.title}`
return this.title
}
get enclosureUrl() {
return this.enclosure ? this.enclosure.url : null
}
setData(data, index = 1) {
this.id = getId('ep')

View File

@ -7,6 +7,7 @@ const { groupFilesIntoLibraryItemPaths, getLibraryItemFileData, scanFolder } = r
const { comparePaths } = require('../utils/index')
const { getIno } = require('../utils/fileUtils')
const { ScanResult, LogLevel } = require('../utils/constants')
const { findMatchingEpisodesInFeed, getPodcastFeed } = require('../utils/podcastUtils')
const MediaFileScanner = require('./MediaFileScanner')
const BookFinder = require('../finders/BookFinder')
@ -684,7 +685,7 @@ class Scanner {
var updatePayload = {}
var hasUpdated = false
if (libraryItem.mediaType === 'book') {
if (libraryItem.isBook) {
var searchISBN = options.isbn || libraryItem.media.metadata.isbn
var searchASIN = options.asin || libraryItem.media.metadata.asin
@ -708,7 +709,7 @@ class Scanner {
}
updatePayload = await this.quickMatchBookBuildUpdatePayload(libraryItem, matchData, options)
} else { // Podcast quick match
} else if (libraryItem.isPodcast) { // Podcast quick match
var results = await this.podcastFinder.search(searchTitle)
if (!results.length) {
return {
@ -739,6 +740,10 @@ class Scanner {
}
if (hasUpdated) {
if (libraryItem.isPodcast && libraryItem.media.metadata.feedUrl) { // Quick match all unmatched podcast episodes
await this.quickMatchPodcastEpisodes(libraryItem, options)
}
await this.db.updateLibraryItem(libraryItem)
this.emitter('item_updated', libraryItem.toJSONExpanded())
}
@ -762,6 +767,7 @@ class Scanner {
itunesArtistId: matchData.artistId || null,
releaseDate: matchData.releaseDate || null,
imageUrl: matchData.cover || null,
feedUrl: matchData.feedUrl || null,
description: matchData.descriptionPlain || null
}
@ -769,9 +775,10 @@ class Scanner {
if (matchDataTransformed[key]) {
if (key === 'genres') {
if ((!libraryItem.media.metadata.genres || options.overrideDetails)) {
// TODO: Genres array or string?
updatePayload.metadata[key] = matchDataTransformed[key].split(',').map(v => v.trim()).filter(v => !!v)
}
} else if (!libraryItem.media.metadata[key] || options.overrideDetails) {
} else if (libraryItem.media.metadata[key] !== matchDataTransformed[key] && (!libraryItem.media.metadata[key] || options.overrideDetails)) {
updatePayload.metadata[key] = matchDataTransformed[key]
}
}
@ -854,6 +861,61 @@ class Scanner {
return updatePayload
}
async quickMatchPodcastEpisodes(libraryItem, options = {}) {
const episodesToQuickMatch = libraryItem.media.episodes.filter(ep => !ep.enclosureUrl) // Only quick match episodes without enclosure
if (!episodesToQuickMatch.length) return false
const feed = await getPodcastFeed(libraryItem.media.metadata.feedUrl)
if (!feed) {
Logger.error(`[Scanner] quickMatchPodcastEpisodes: Unable to quick match episodes feed not found for "${libraryItem.media.metadata.feedUrl}"`)
return false
}
var episodesWereUpdated = false
for (const episode of episodesToQuickMatch) {
const episodeMatches = findMatchingEpisodesInFeed(feed, episode.title)
if (episodeMatches && episodeMatches.length) {
const wasUpdated = this.updateEpisodeWithMatch(libraryItem, episode, episodeMatches[0].episode, options)
if (wasUpdated) episodesWereUpdated = true
}
}
return episodesWereUpdated
}
updateEpisodeWithMatch(libraryItem, episode, episodeToMatch, options = {}) {
Logger.debug(`[Scanner] quickMatchPodcastEpisodes: Found episode match for "${episode.title}" => ${episodeToMatch.title}`)
const matchDataTransformed = {
title: episodeToMatch.title || '',
subtitle: episodeToMatch.subtitle || '',
description: episodeToMatch.description || '',
enclosure: episodeToMatch.enclosure || null,
episode: episodeToMatch.episode || '',
episodeType: episodeToMatch.episodeType || '',
season: episodeToMatch.season || '',
pubDate: episodeToMatch.pubDate || '',
publishedAt: episodeToMatch.publishedAt
}
const updatePayload = {}
for (const key in matchDataTransformed) {
if (matchDataTransformed[key]) {
if (key === 'enclosure') {
if (!episode.enclosure || JSON.stringify(episode.enclosure) !== JSON.stringify(matchDataTransformed.enclosure)) {
updatePayload[key] = {
...matchDataTransformed.enclosure
}
}
} else if (episode[key] !== matchDataTransformed[key] && (!episode[key] || options.overrideDetails)) {
updatePayload[key] = matchDataTransformed[key]
}
}
}
if (Object.keys(updatePayload).length) {
return libraryItem.media.updateEpisode(episode.id, updatePayload)
}
return false
}
async matchLibraryItems(library) {
if (library.mediaType === 'podcast') {
Logger.error(`[Scanner] matchLibraryItems: Match all not supported for podcasts yet`)

View File

@ -1,5 +1,6 @@
const Logger = require('../Logger')
const { xmlToJSON } = require('./index')
const axios = require('axios')
const { xmlToJSON, levenshteinDistance } = require('./index')
const htmlSanitizer = require('../utils/htmlSanitizer')
function extractFirstArrayItem(json, key) {
@ -173,4 +174,65 @@ module.exports.parsePodcastRssFeedXml = async (xml, excludeEpisodeMetadata = fal
podcast
}
}
}
module.exports.getPodcastFeed = (feedUrl, excludeEpisodeMetadata = false) => {
Logger.debug(`[podcastUtils] getPodcastFeed for "${feedUrl}"`)
return axios.get(feedUrl, { timeout: 6000 }).then(async (data) => {
if (!data || !data.data) {
Logger.error(`[podcastUtils] getPodcastFeed: Invalid podcast feed request response (${feedUrl})`)
return false
}
Logger.debug(`[podcastUtils] getPodcastFeed for "${feedUrl}" success - parsing xml`)
var payload = await this.parsePodcastRssFeedXml(data.data, excludeEpisodeMetadata)
if (!payload) {
return false
}
// RSS feed may be a private RSS feed
payload.podcast.metadata.feedUrl = feedUrl
return payload.podcast
}).catch((error) => {
Logger.error('[podcastUtils] getPodcastFeed Error', error)
return false
})
}
// Return array of episodes ordered by closest match (Levenshtein distance of 6 or less)
module.exports.findMatchingEpisodes = async (feedUrl, searchTitle) => {
const feed = await this.getPodcastFeed(feedUrl).catch(() => {
return null
})
return this.findMatchingEpisodesInFeed(feed, searchTitle)
}
module.exports.findMatchingEpisodesInFeed = (feed, searchTitle) => {
searchTitle = searchTitle.toLowerCase().trim()
if (!feed || !feed.episodes) {
return null
}
const matches = []
feed.episodes.forEach(ep => {
if (!ep.title) return
const epTitle = ep.title.toLowerCase().trim()
if (epTitle === searchTitle) {
matches.push({
episode: ep,
levenshtein: 0
})
} else {
const levenshtein = levenshteinDistance(searchTitle, epTitle, true)
if (levenshtein <= 6 && epTitle.length > levenshtein) {
matches.push({
episode: ep,
levenshtein
})
}
}
})
return matches.sort((a, b) => a.levenshtein - b.levenshtein)
}