mirror of
				https://github.com/advplyr/audiobookshelf.git
				synced 2025-10-27 11:18:14 +01:00 
			
		
		
		
	Add:Podcast quickmatch attempts quick matching unmatched episodes #983
This commit is contained in:
		
							parent
							
								
									b91b320006
								
							
						
					
					
						commit
						70ef09f451
					
				| @ -1,10 +1,9 @@ | |||||||
| const axios = require('axios') | const axios = require('axios') | ||||||
| const fs = require('../libs/fsExtra') | const fs = require('../libs/fsExtra') | ||||||
| const Path = require('path') |  | ||||||
| const Logger = require('../Logger') | const Logger = require('../Logger') | ||||||
| const { parsePodcastRssFeedXml } = require('../utils/podcastUtils') | const { getPodcastFeed, findMatchingEpisodes } = require('../utils/podcastUtils') | ||||||
| const LibraryItem = require('../objects/LibraryItem') | const LibraryItem = require('../objects/LibraryItem') | ||||||
| const { getFileTimestampsWithIno, sanitizeFilename } = require('../utils/fileUtils') | const { getFileTimestampsWithIno } = require('../utils/fileUtils') | ||||||
| const filePerms = require('../utils/filePerms') | const filePerms = require('../utils/filePerms') | ||||||
| 
 | 
 | ||||||
| class PodcastController { | class PodcastController { | ||||||
| @ -91,32 +90,17 @@ class PodcastController { | |||||||
|     } |     } | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   getPodcastFeed(req, res) { |   async getPodcastFeed(req, res) { | ||||||
|     var url = req.body.rssFeed |     var url = req.body.rssFeed | ||||||
|     if (!url) { |     if (!url) { | ||||||
|       return res.status(400).send('Bad request') |       return res.status(400).send('Bad request') | ||||||
|     } |     } | ||||||
|     var includeRaw = req.query.raw == 1 // Include raw json
 |  | ||||||
| 
 | 
 | ||||||
|     axios.get(url).then(async (data) => { |     const podcast = await getPodcastFeed(url) | ||||||
|       if (!data || !data.data) { |     if (!podcast) { | ||||||
|         Logger.error('Invalid podcast feed request response') |       return res.status(404).send('Podcast RSS feed request failed or invalid response data') | ||||||
|         return res.status(500).send('Bad response from feed request') |     } | ||||||
|       } |     res.json({ podcast }) | ||||||
|       Logger.debug(`[PodcastController] Podcast feed size ${(data.data.length / 1024 / 1024).toFixed(2)}MB`) |  | ||||||
|       var payload = await parsePodcastRssFeedXml(data.data, false, includeRaw) |  | ||||||
|       if (!payload) { |  | ||||||
|         return res.status(500).send('Invalid podcast RSS feed') |  | ||||||
|       } |  | ||||||
| 
 |  | ||||||
|       // RSS feed may be a private RSS feed
 |  | ||||||
|       payload.podcast.metadata.feedUrl = url |  | ||||||
| 
 |  | ||||||
|       res.json(payload) |  | ||||||
|     }).catch((error) => { |  | ||||||
|       console.error('Failed', error) |  | ||||||
|       res.status(500).send(error) |  | ||||||
|     }) |  | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   async getOPMLFeeds(req, res) { |   async getOPMLFeeds(req, res) { | ||||||
| @ -177,9 +161,7 @@ class PodcastController { | |||||||
|     if (!searchTitle) { |     if (!searchTitle) { | ||||||
|       return res.sendStatus(500) |       return res.sendStatus(500) | ||||||
|     } |     } | ||||||
|     searchTitle = searchTitle.toLowerCase().trim() |     const episodes = await findMatchingEpisodes(rssFeedUrl, searchTitle) | ||||||
| 
 |  | ||||||
|     const episodes = await this.podcastManager.findEpisode(rssFeedUrl, searchTitle) |  | ||||||
|     res.json({ |     res.json({ | ||||||
|       episodes: episodes || [] |       episodes: episodes || [] | ||||||
|     }) |     }) | ||||||
|  | |||||||
| @ -1,7 +1,6 @@ | |||||||
| const fs = require('../libs/fsExtra') | const fs = require('../libs/fsExtra') | ||||||
| const axios = require('axios') |  | ||||||
| 
 | 
 | ||||||
| const { parsePodcastRssFeedXml } = require('../utils/podcastUtils') | const { getPodcastFeed } = require('../utils/podcastUtils') | ||||||
| const Logger = require('../Logger') | const Logger = require('../Logger') | ||||||
| 
 | 
 | ||||||
| const { downloadFile, removeFile } = require('../utils/fileUtils') | const { downloadFile, removeFile } = require('../utils/fileUtils') | ||||||
| @ -226,7 +225,7 @@ class PodcastManager { | |||||||
|       Logger.error(`[PodcastManager] checkPodcastForNewEpisodes no feed url for ${podcastLibraryItem.media.metadata.title} (ID: ${podcastLibraryItem.id})`) |       Logger.error(`[PodcastManager] checkPodcastForNewEpisodes no feed url for ${podcastLibraryItem.media.metadata.title} (ID: ${podcastLibraryItem.id})`) | ||||||
|       return false |       return false | ||||||
|     } |     } | ||||||
|     var feed = await this.getPodcastFeed(podcastLibraryItem.media.metadata.feedUrl) |     var feed = await getPodcastFeed(podcastLibraryItem.media.metadata.feedUrl) | ||||||
|     if (!feed || !feed.episodes) { |     if (!feed || !feed.episodes) { | ||||||
|       Logger.error(`[PodcastManager] checkPodcastForNewEpisodes invalid feed payload for ${podcastLibraryItem.media.metadata.title} (ID: ${podcastLibraryItem.id})`, feed) |       Logger.error(`[PodcastManager] checkPodcastForNewEpisodes invalid feed payload for ${podcastLibraryItem.media.metadata.title} (ID: ${podcastLibraryItem.id})`, feed) | ||||||
|       return false |       return false | ||||||
| @ -262,7 +261,7 @@ class PodcastManager { | |||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   async findEpisode(rssFeedUrl, searchTitle) { |   async findEpisode(rssFeedUrl, searchTitle) { | ||||||
|     const feed = await this.getPodcastFeed(rssFeedUrl).catch(() => { |     const feed = await getPodcastFeed(rssFeedUrl).catch(() => { | ||||||
|       return null |       return null | ||||||
|     }) |     }) | ||||||
|     if (!feed || !feed.episodes) { |     if (!feed || !feed.episodes) { | ||||||
| @ -292,25 +291,6 @@ class PodcastManager { | |||||||
|     return matches.sort((a, b) => a.levenshtein - b.levenshtein) |     return matches.sort((a, b) => a.levenshtein - b.levenshtein) | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   getPodcastFeed(feedUrl, excludeEpisodeMetadata = false) { |  | ||||||
|     Logger.debug(`[PodcastManager] getPodcastFeed for "${feedUrl}"`) |  | ||||||
|     return axios.get(feedUrl, { timeout: 5000 }).then(async (data) => { |  | ||||||
|       if (!data || !data.data) { |  | ||||||
|         Logger.error('Invalid podcast feed request response') |  | ||||||
|         return false |  | ||||||
|       } |  | ||||||
|       Logger.debug(`[PodcastManager] getPodcastFeed for "${feedUrl}" success - parsing xml`) |  | ||||||
|       var payload = await parsePodcastRssFeedXml(data.data, excludeEpisodeMetadata) |  | ||||||
|       if (!payload) { |  | ||||||
|         return false |  | ||||||
|       } |  | ||||||
|       return payload.podcast |  | ||||||
|     }).catch((error) => { |  | ||||||
|       Logger.error('[PodcastManager] getPodcastFeed Error', error) |  | ||||||
|       return false |  | ||||||
|     }) |  | ||||||
|   } |  | ||||||
| 
 |  | ||||||
|   async getOPMLFeeds(opmlText) { |   async getOPMLFeeds(opmlText) { | ||||||
|     var extractedFeeds = opmlParser.parse(opmlText) |     var extractedFeeds = opmlParser.parse(opmlText) | ||||||
|     if (!extractedFeeds || !extractedFeeds.length) { |     if (!extractedFeeds || !extractedFeeds.length) { | ||||||
| @ -323,7 +303,7 @@ class PodcastManager { | |||||||
|     var rssFeedData = [] |     var rssFeedData = [] | ||||||
| 
 | 
 | ||||||
|     for (let feed of extractedFeeds) { |     for (let feed of extractedFeeds) { | ||||||
|       var feedData = await this.getPodcastFeed(feed.feedUrl, true) |       var feedData = await getPodcastFeed(feed.feedUrl, true) | ||||||
|       if (feedData) { |       if (feedData) { | ||||||
|         feedData.metadata.feedUrl = feed.feedUrl |         feedData.metadata.feedUrl = feed.feedUrl | ||||||
|         rssFeedData.push(feedData) |         rssFeedData.push(feedData) | ||||||
|  | |||||||
| @ -107,6 +107,9 @@ class PodcastEpisode { | |||||||
|     if (this.episode) return `${this.episode} - ${this.title}` |     if (this.episode) return `${this.episode} - ${this.title}` | ||||||
|     return this.title |     return this.title | ||||||
|   } |   } | ||||||
|  |   get enclosureUrl() { | ||||||
|  |     return this.enclosure ? this.enclosure.url : null | ||||||
|  |   } | ||||||
| 
 | 
 | ||||||
|   setData(data, index = 1) { |   setData(data, index = 1) { | ||||||
|     this.id = getId('ep') |     this.id = getId('ep') | ||||||
|  | |||||||
| @ -7,6 +7,7 @@ const { groupFilesIntoLibraryItemPaths, getLibraryItemFileData, scanFolder } = r | |||||||
| const { comparePaths } = require('../utils/index') | const { comparePaths } = require('../utils/index') | ||||||
| const { getIno } = require('../utils/fileUtils') | const { getIno } = require('../utils/fileUtils') | ||||||
| const { ScanResult, LogLevel } = require('../utils/constants') | const { ScanResult, LogLevel } = require('../utils/constants') | ||||||
|  | const { findMatchingEpisodesInFeed, getPodcastFeed } = require('../utils/podcastUtils') | ||||||
| 
 | 
 | ||||||
| const MediaFileScanner = require('./MediaFileScanner') | const MediaFileScanner = require('./MediaFileScanner') | ||||||
| const BookFinder = require('../finders/BookFinder') | const BookFinder = require('../finders/BookFinder') | ||||||
| @ -684,7 +685,7 @@ class Scanner { | |||||||
|     var updatePayload = {} |     var updatePayload = {} | ||||||
|     var hasUpdated = false |     var hasUpdated = false | ||||||
| 
 | 
 | ||||||
|     if (libraryItem.mediaType === 'book') { |     if (libraryItem.isBook) { | ||||||
|       var searchISBN = options.isbn || libraryItem.media.metadata.isbn |       var searchISBN = options.isbn || libraryItem.media.metadata.isbn | ||||||
|       var searchASIN = options.asin || libraryItem.media.metadata.asin |       var searchASIN = options.asin || libraryItem.media.metadata.asin | ||||||
| 
 | 
 | ||||||
| @ -708,7 +709,7 @@ class Scanner { | |||||||
|       } |       } | ||||||
| 
 | 
 | ||||||
|       updatePayload = await this.quickMatchBookBuildUpdatePayload(libraryItem, matchData, options) |       updatePayload = await this.quickMatchBookBuildUpdatePayload(libraryItem, matchData, options) | ||||||
|     } else { // Podcast quick match
 |     } else if (libraryItem.isPodcast) { // Podcast quick match
 | ||||||
|       var results = await this.podcastFinder.search(searchTitle) |       var results = await this.podcastFinder.search(searchTitle) | ||||||
|       if (!results.length) { |       if (!results.length) { | ||||||
|         return { |         return { | ||||||
| @ -739,6 +740,10 @@ class Scanner { | |||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     if (hasUpdated) { |     if (hasUpdated) { | ||||||
|  |       if (libraryItem.isPodcast && libraryItem.media.metadata.feedUrl) { // Quick match all unmatched podcast episodes
 | ||||||
|  |         await this.quickMatchPodcastEpisodes(libraryItem, options) | ||||||
|  |       } | ||||||
|  | 
 | ||||||
|       await this.db.updateLibraryItem(libraryItem) |       await this.db.updateLibraryItem(libraryItem) | ||||||
|       this.emitter('item_updated', libraryItem.toJSONExpanded()) |       this.emitter('item_updated', libraryItem.toJSONExpanded()) | ||||||
|     } |     } | ||||||
| @ -762,6 +767,7 @@ class Scanner { | |||||||
|       itunesArtistId: matchData.artistId || null, |       itunesArtistId: matchData.artistId || null, | ||||||
|       releaseDate: matchData.releaseDate || null, |       releaseDate: matchData.releaseDate || null, | ||||||
|       imageUrl: matchData.cover || null, |       imageUrl: matchData.cover || null, | ||||||
|  |       feedUrl: matchData.feedUrl || null, | ||||||
|       description: matchData.descriptionPlain || null |       description: matchData.descriptionPlain || null | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| @ -769,9 +775,10 @@ class Scanner { | |||||||
|       if (matchDataTransformed[key]) { |       if (matchDataTransformed[key]) { | ||||||
|         if (key === 'genres') { |         if (key === 'genres') { | ||||||
|           if ((!libraryItem.media.metadata.genres || options.overrideDetails)) { |           if ((!libraryItem.media.metadata.genres || options.overrideDetails)) { | ||||||
|  |             // TODO: Genres array or string?
 | ||||||
|             updatePayload.metadata[key] = matchDataTransformed[key].split(',').map(v => v.trim()).filter(v => !!v) |             updatePayload.metadata[key] = matchDataTransformed[key].split(',').map(v => v.trim()).filter(v => !!v) | ||||||
|           } |           } | ||||||
|         } else if (!libraryItem.media.metadata[key] || options.overrideDetails) { |         } else if (libraryItem.media.metadata[key] !== matchDataTransformed[key] && (!libraryItem.media.metadata[key] || options.overrideDetails)) { | ||||||
|           updatePayload.metadata[key] = matchDataTransformed[key] |           updatePayload.metadata[key] = matchDataTransformed[key] | ||||||
|         } |         } | ||||||
|       } |       } | ||||||
| @ -854,6 +861,61 @@ class Scanner { | |||||||
|     return updatePayload |     return updatePayload | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|  |   async quickMatchPodcastEpisodes(libraryItem, options = {}) { | ||||||
|  |     const episodesToQuickMatch = libraryItem.media.episodes.filter(ep => !ep.enclosureUrl) // Only quick match episodes without enclosure
 | ||||||
|  |     if (!episodesToQuickMatch.length) return false | ||||||
|  | 
 | ||||||
|  |     const feed = await getPodcastFeed(libraryItem.media.metadata.feedUrl) | ||||||
|  |     if (!feed) { | ||||||
|  |       Logger.error(`[Scanner] quickMatchPodcastEpisodes: Unable to quick match episodes feed not found for "${libraryItem.media.metadata.feedUrl}"`) | ||||||
|  |       return false | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     var episodesWereUpdated = false | ||||||
|  |     for (const episode of episodesToQuickMatch) { | ||||||
|  |       const episodeMatches = findMatchingEpisodesInFeed(feed, episode.title) | ||||||
|  |       if (episodeMatches && episodeMatches.length) { | ||||||
|  |         const wasUpdated = this.updateEpisodeWithMatch(libraryItem, episode, episodeMatches[0].episode, options) | ||||||
|  |         if (wasUpdated) episodesWereUpdated = true | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     return episodesWereUpdated | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  |   updateEpisodeWithMatch(libraryItem, episode, episodeToMatch, options = {}) { | ||||||
|  |     Logger.debug(`[Scanner] quickMatchPodcastEpisodes: Found episode match for "${episode.title}" => ${episodeToMatch.title}`) | ||||||
|  |     const matchDataTransformed = { | ||||||
|  |       title: episodeToMatch.title || '', | ||||||
|  |       subtitle: episodeToMatch.subtitle || '', | ||||||
|  |       description: episodeToMatch.description || '', | ||||||
|  |       enclosure: episodeToMatch.enclosure || null, | ||||||
|  |       episode: episodeToMatch.episode || '', | ||||||
|  |       episodeType: episodeToMatch.episodeType || '', | ||||||
|  |       season: episodeToMatch.season || '', | ||||||
|  |       pubDate: episodeToMatch.pubDate || '', | ||||||
|  |       publishedAt: episodeToMatch.publishedAt | ||||||
|  |     } | ||||||
|  |     const updatePayload = {} | ||||||
|  |     for (const key in matchDataTransformed) { | ||||||
|  |       if (matchDataTransformed[key]) { | ||||||
|  |         if (key === 'enclosure') { | ||||||
|  |           if (!episode.enclosure || JSON.stringify(episode.enclosure) !== JSON.stringify(matchDataTransformed.enclosure)) { | ||||||
|  |             updatePayload[key] = { | ||||||
|  |               ...matchDataTransformed.enclosure | ||||||
|  |             } | ||||||
|  |           } | ||||||
|  |         } else if (episode[key] !== matchDataTransformed[key] && (!episode[key] || options.overrideDetails)) { | ||||||
|  |           updatePayload[key] = matchDataTransformed[key] | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (Object.keys(updatePayload).length) { | ||||||
|  |       return libraryItem.media.updateEpisode(episode.id, updatePayload) | ||||||
|  |     } | ||||||
|  |     return false | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|   async matchLibraryItems(library) { |   async matchLibraryItems(library) { | ||||||
|     if (library.mediaType === 'podcast') { |     if (library.mediaType === 'podcast') { | ||||||
|       Logger.error(`[Scanner] matchLibraryItems: Match all not supported for podcasts yet`) |       Logger.error(`[Scanner] matchLibraryItems: Match all not supported for podcasts yet`) | ||||||
|  | |||||||
| @ -1,5 +1,6 @@ | |||||||
| const Logger = require('../Logger') | const Logger = require('../Logger') | ||||||
| const { xmlToJSON } = require('./index') | const axios = require('axios') | ||||||
|  | const { xmlToJSON, levenshteinDistance } = require('./index') | ||||||
| const htmlSanitizer = require('../utils/htmlSanitizer') | const htmlSanitizer = require('../utils/htmlSanitizer') | ||||||
| 
 | 
 | ||||||
| function extractFirstArrayItem(json, key) { | function extractFirstArrayItem(json, key) { | ||||||
| @ -173,4 +174,65 @@ module.exports.parsePodcastRssFeedXml = async (xml, excludeEpisodeMetadata = fal | |||||||
|       podcast |       podcast | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | module.exports.getPodcastFeed = (feedUrl, excludeEpisodeMetadata = false) => { | ||||||
|  |   Logger.debug(`[podcastUtils] getPodcastFeed for "${feedUrl}"`) | ||||||
|  |   return axios.get(feedUrl, { timeout: 6000 }).then(async (data) => { | ||||||
|  |     if (!data || !data.data) { | ||||||
|  |       Logger.error(`[podcastUtils] getPodcastFeed: Invalid podcast feed request response (${feedUrl})`) | ||||||
|  |       return false | ||||||
|  |     } | ||||||
|  |     Logger.debug(`[podcastUtils] getPodcastFeed for "${feedUrl}" success - parsing xml`) | ||||||
|  |     var payload = await this.parsePodcastRssFeedXml(data.data, excludeEpisodeMetadata) | ||||||
|  |     if (!payload) { | ||||||
|  |       return false | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // RSS feed may be a private RSS feed
 | ||||||
|  |     payload.podcast.metadata.feedUrl = feedUrl | ||||||
|  | 
 | ||||||
|  |     return payload.podcast | ||||||
|  |   }).catch((error) => { | ||||||
|  |     Logger.error('[podcastUtils] getPodcastFeed Error', error) | ||||||
|  |     return false | ||||||
|  |   }) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Return array of episodes ordered by closest match (Levenshtein distance of 6 or less)
 | ||||||
|  | module.exports.findMatchingEpisodes = async (feedUrl, searchTitle) => { | ||||||
|  |   const feed = await this.getPodcastFeed(feedUrl).catch(() => { | ||||||
|  |     return null | ||||||
|  |   }) | ||||||
|  | 
 | ||||||
|  |   return this.findMatchingEpisodesInFeed(feed, searchTitle) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | module.exports.findMatchingEpisodesInFeed = (feed, searchTitle) => { | ||||||
|  |   searchTitle = searchTitle.toLowerCase().trim() | ||||||
|  |   if (!feed || !feed.episodes) { | ||||||
|  |     return null | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  |   const matches = [] | ||||||
|  |   feed.episodes.forEach(ep => { | ||||||
|  |     if (!ep.title) return | ||||||
|  | 
 | ||||||
|  |     const epTitle = ep.title.toLowerCase().trim() | ||||||
|  |     if (epTitle === searchTitle) { | ||||||
|  |       matches.push({ | ||||||
|  |         episode: ep, | ||||||
|  |         levenshtein: 0 | ||||||
|  |       }) | ||||||
|  |     } else { | ||||||
|  |       const levenshtein = levenshteinDistance(searchTitle, epTitle, true) | ||||||
|  |       if (levenshtein <= 6 && epTitle.length > levenshtein) { | ||||||
|  |         matches.push({ | ||||||
|  |           episode: ep, | ||||||
|  |           levenshtein | ||||||
|  |         }) | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   }) | ||||||
|  |   return matches.sort((a, b) => a.levenshtein - b.levenshtein) | ||||||
| } | } | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user