Merge pull request #2400 from mikiher/bookfinder-improvements

A few BookFinder improvements (including a fix for #2238)
This commit is contained in:
advplyr 2023-12-10 10:36:21 -06:00 committed by GitHub
commit 6abc0819d9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 77 additions and 19 deletions

View File

@ -332,6 +332,7 @@ export default {
if (this.isPodcast) return `term=${encodeURIComponent(this.searchTitle)}`
var searchQuery = `provider=${this.provider}&fallbackTitleOnly=1&title=${encodeURIComponent(this.searchTitle)}`
if (this.searchAuthor) searchQuery += `&author=${encodeURIComponent(this.searchAuthor)}`
if (this.libraryItemId) searchQuery += `&id=${this.libraryItemId}`
return searchQuery
},
submitSearch() {

View File

@ -3,15 +3,18 @@ const BookFinder = require('../finders/BookFinder')
const PodcastFinder = require('../finders/PodcastFinder')
const AuthorFinder = require('../finders/AuthorFinder')
const MusicFinder = require('../finders/MusicFinder')
const Database = require("../Database")
class SearchController {
constructor() { }
async findBooks(req, res) {
const id = req.query.id
const libraryItem = await Database.libraryItemModel.getOldById(id)
const provider = req.query.provider || 'google'
const title = req.query.title || ''
const author = req.query.author || ''
const results = await BookFinder.search(provider, title, author)
const results = await BookFinder.search(libraryItem, provider, title, author)
res.json(results)
}

View File

@ -167,6 +167,7 @@ class BookFinder {
[/ (2nd|3rd|\d+th)\s+ed(\.|ition)?/g, ''], // Remove edition
[/(^| |\.)(m4b|m4a|mp3)( |$)/g, ''], // Remove file-type
[/ a novel.*$/g, ''], // Remove "a novel"
[/(^| )(un)?abridged( |$)/g, ' '], // Remove "unabridged/abridged"
[/^\d+ | \d+$/g, ''], // Remove preceding/trailing numbers
]
@ -298,6 +299,7 @@ class BookFinder {
/**
* Search for books including fuzzy searches
*
* @param {Object} libraryItem
* @param {string} provider
* @param {string} title
* @param {string} author
@ -306,7 +308,7 @@ class BookFinder {
* @param {{titleDistance:number, authorDistance:number, maxFuzzySearches:number}} options
* @returns {Promise<Object[]>}
*/
async search(provider, title, author, isbn, asin, options = {}) {
async search(libraryItem, provider, title, author, isbn, asin, options = {}) {
let books = []
const maxTitleDistance = !isNaN(options.titleDistance) ? Number(options.titleDistance) : 4
const maxAuthorDistance = !isNaN(options.authorDistance) ? Number(options.authorDistance) : 4
@ -335,6 +337,7 @@ class BookFinder {
for (const titlePart of titleParts)
authorCandidates.add(titlePart)
authorCandidates = await authorCandidates.getCandidates()
loop_author:
for (const authorCandidate of authorCandidates) {
let titleCandidates = new BookFinder.TitleCandidates(authorCandidate)
for (const titlePart of titleParts)
@ -342,13 +345,27 @@ class BookFinder {
titleCandidates = titleCandidates.getCandidates()
for (const titleCandidate of titleCandidates) {
if (titleCandidate == title && authorCandidate == author) continue // We already tried this
if (++numFuzzySearches > maxFuzzySearches) return books
if (++numFuzzySearches > maxFuzzySearches) break loop_author
books = await this.runSearch(titleCandidate, authorCandidate, provider, asin, maxTitleDistance, maxAuthorDistance)
if (books.length) return books
if (books.length) break loop_author
}
}
}
if (books.length) {
const resultsHaveDuration = provider.startsWith('audible')
if (resultsHaveDuration && libraryItem?.media?.duration) {
const libraryItemDurationMinutes = libraryItem.media.duration / 60
// If provider results have duration, sort by ascendinge duration difference from libraryItem
books.sort((a, b) => {
const aDuration = a.duration || Number.POSITIVE_INFINITY
const bDuration = b.duration || Number.POSITIVE_INFINITY
const aDurationDiff = Math.abs(aDuration - libraryItemDurationMinutes)
const bDurationDiff = Math.abs(bDuration - libraryItemDurationMinutes)
return aDurationDiff - bDurationDiff
})
}
}
return books
}
@ -392,12 +409,12 @@ class BookFinder {
if (provider === 'all') {
for (const providerString of this.providers) {
const providerResults = await this.search(providerString, title, author, options)
const providerResults = await this.search(null, providerString, title, author, options)
Logger.debug(`[BookFinder] Found ${providerResults.length} covers from ${providerString}`)
searchResults.push(...providerResults)
}
} else {
searchResults = await this.search(provider, title, author, options)
searchResults = await this.search(null, provider, title, author, options)
}
Logger.debug(`[BookFinder] FindCovers search results: ${searchResults.length}`)
@ -455,12 +472,14 @@ function cleanTitleForCompares(title) {
function cleanAuthorForCompares(author) {
if (!author) return ''
author = stripRedundantSpaces(author)
let cleanAuthor = replaceAccentedChars(author).toLowerCase()
// separate initials
cleanAuthor = cleanAuthor.replace(/([a-z])\.([a-z])/g, '$1. $2')
// remove middle initials
cleanAuthor = cleanAuthor.replace(/(?<=\w\w)(\s+[a-z]\.?)+(?=\s+\w\w)/g, '')
// remove et al.
cleanAuthor = cleanAuthor.replace(/ et al\.?(?= |$)/g, '')
return cleanAuthor
}

View File

@ -37,7 +37,7 @@ class Scanner {
var searchISBN = options.isbn || libraryItem.media.metadata.isbn
var searchASIN = options.asin || libraryItem.media.metadata.asin
var results = await BookFinder.search(provider, searchTitle, searchAuthor, searchISBN, searchASIN, { maxFuzzySearches: 2 })
var results = await BookFinder.search(libraryItem, provider, searchTitle, searchAuthor, searchISBN, searchASIN, { maxFuzzySearches: 2 })
if (!results.length) {
return {
warning: `No ${provider} match found`

View File

@ -35,6 +35,8 @@ describe('TitleCandidates', () => {
['adds candidate + variant, removing edition 2', 'anna karenina 4th ed.', ['anna karenina', 'anna karenina 4th ed.']],
['adds candidate + variant, removing fie type', 'anna karenina.mp3', ['anna karenina', 'anna karenina.mp3']],
['adds candidate + variant, removing "a novel"', 'anna karenina a novel', ['anna karenina', 'anna karenina a novel']],
['adds candidate + variant, removing "abridged"', 'abridged anna karenina', ['anna karenina', 'abridged anna karenina']],
['adds candidate + variant, removing "unabridged"', 'anna karenina unabridged', ['anna karenina', 'anna karenina unabridged']],
['adds candidate + variant, removing preceding/trailing numbers', '1 anna karenina 2', ['anna karenina', '1 anna karenina 2']],
['does not add empty candidate', '', []],
['does not add spaces-only candidate', ' ', []],
@ -109,6 +111,7 @@ describe('AuthorCandidates', () => {
['adds recognized author if edit distance from candidate is small', 'nicolai gogol', ['nikolai gogol']],
['does not add candidate if edit distance from any recognized author is large', 'nikolai google', []],
['adds normalized recognized candidate (contains redundant spaces)', 'nikolai gogol', ['nikolai gogol']],
['adds normalized recognized candidate (et al removed)', 'nikolai gogol et al.', ['nikolai gogol']],
['adds normalized recognized candidate (normalized initials)', 'j.k. rowling', ['j. k. rowling']],
].forEach(([name, author, expected]) => it(name, async () => {
authorCandidates.add(author)
@ -222,14 +225,14 @@ describe('search', () => {
describe('search title is empty', () => {
it('returns empty result', async () => {
expect(await bookFinder.search('', '', a)).to.deep.equal([])
expect(await bookFinder.search(null, '', '', a)).to.deep.equal([])
sinon.assert.callCount(bookFinder.runSearch, 0)
})
})
describe('search title is a recognized title and search author is a recognized author', () => {
it('returns non-empty result (no fuzzy searches)', async () => {
expect(await bookFinder.search('', t, a)).to.deep.equal(r)
expect(await bookFinder.search(null, '', t, a)).to.deep.equal(r)
sinon.assert.callCount(bookFinder.runSearch, 1)
})
})
@ -251,7 +254,7 @@ describe('search', () => {
[`2022_${t}_HQ`],
].forEach(([searchTitle]) => {
it(`search('${searchTitle}', '${a}') returns non-empty result (with 1 fuzzy search)`, async () => {
expect(await bookFinder.search('', searchTitle, a)).to.deep.equal(r)
expect(await bookFinder.search(null, '', searchTitle, a)).to.deep.equal(r)
sinon.assert.callCount(bookFinder.runSearch, 2)
})
});
@ -261,7 +264,7 @@ describe('search', () => {
[`${a} - series 01 - ${t}`],
].forEach(([searchTitle]) => {
it(`search('${searchTitle}', '${a}') returns non-empty result (with 2 fuzzy searches)`, async () => {
expect(await bookFinder.search('', searchTitle, a)).to.deep.equal(r)
expect(await bookFinder.search(null, '', searchTitle, a)).to.deep.equal(r)
sinon.assert.callCount(bookFinder.runSearch, 3)
})
});
@ -271,7 +274,7 @@ describe('search', () => {
[`${t} junk`],
].forEach(([searchTitle]) => {
it(`search('${searchTitle}', '${a}') returns an empty result`, async () => {
expect(await bookFinder.search('', searchTitle, a)).to.deep.equal([])
expect(await bookFinder.search(null, '', searchTitle, a)).to.deep.equal([])
})
})
@ -280,7 +283,7 @@ describe('search', () => {
[`${t} - ${a}`],
].forEach(([searchTitle]) => {
it(`search('${searchTitle}', '${a}') returns an empty result (with no fuzzy searches)`, async () => {
expect(await bookFinder.search('', searchTitle, a, null, null, { maxFuzzySearches: 0 })).to.deep.equal([])
expect(await bookFinder.search(null, '', searchTitle, a, null, null, { maxFuzzySearches: 0 })).to.deep.equal([])
sinon.assert.callCount(bookFinder.runSearch, 1)
})
})
@ -292,7 +295,7 @@ describe('search', () => {
[`${a} - series 01 - ${t}`],
].forEach(([searchTitle]) => {
it(`search('${searchTitle}', '${a}') returns an empty result (1 fuzzy search)`, async () => {
expect(await bookFinder.search('', searchTitle, a, null, null, { maxFuzzySearches: 1 })).to.deep.equal([])
expect(await bookFinder.search(null, '', searchTitle, a, null, null, { maxFuzzySearches: 1 })).to.deep.equal([])
sinon.assert.callCount(bookFinder.runSearch, 2)
})
})
@ -305,7 +308,7 @@ describe('search', () => {
[`${a} - ${t}`],
].forEach(([searchTitle]) => {
it(`search('${searchTitle}', '') returns a non-empty result (1 fuzzy search)`, async () => {
expect(await bookFinder.search('', searchTitle, '')).to.deep.equal(r)
expect(await bookFinder.search(null, '', searchTitle, '')).to.deep.equal(r)
sinon.assert.callCount(bookFinder.runSearch, 2)
})
});
@ -316,7 +319,7 @@ describe('search', () => {
[`${u} - ${t}`]
].forEach(([searchTitle]) => {
it(`search('${searchTitle}', '') returns an empty result`, async () => {
expect(await bookFinder.search('', searchTitle, '')).to.deep.equal([])
expect(await bookFinder.search(null, '', searchTitle, '')).to.deep.equal([])
})
})
})
@ -327,7 +330,7 @@ describe('search', () => {
[`${u} - ${t}`]
].forEach(([searchTitle]) => {
it(`search('${searchTitle}', '${u}') returns a non-empty result (1 fuzzy search)`, async () => {
expect(await bookFinder.search('', searchTitle, u)).to.deep.equal(r)
expect(await bookFinder.search(null, '', searchTitle, u)).to.deep.equal(r)
sinon.assert.callCount(bookFinder.runSearch, 2)
})
});
@ -336,9 +339,41 @@ describe('search', () => {
[`${t}`]
].forEach(([searchTitle]) => {
it(`search('${searchTitle}', '${u}') returns a non-empty result (no fuzzy search)`, async () => {
expect(await bookFinder.search('', searchTitle, u)).to.deep.equal(r)
expect(await bookFinder.search(null, '', searchTitle, u)).to.deep.equal(r)
sinon.assert.callCount(bookFinder.runSearch, 1)
})
})
})
describe('search provider results have duration', () => {
const libraryItem = { media: { duration: 60 * 1000 } }
const provider = 'audible'
const unsorted = [{ duration: 3000 }, { duration: 2000 }, { duration: 1000 }, { duration: 500 }]
const sorted = [{ duration: 1000 }, { duration: 500 }, { duration: 2000 }, { duration: 3000 }]
runSearchStub.withArgs(t, a, provider).resolves(unsorted)
it('returns results sorted by library item duration diff', async () => {
expect(await bookFinder.search(libraryItem, provider, t, a)).to.deep.equal(sorted)
})
it('returns unsorted results if library item is null', async () => {
expect(await bookFinder.search(null, provider, t, a)).to.deep.equal(unsorted)
})
it('returns unsorted results if library item duration is undefined', async () => {
expect(await bookFinder.search({ media: {} }, provider, t, a)).to.deep.equal(unsorted)
})
it('returns unsorted results if library item media is undefined', async () => {
expect(await bookFinder.search({ }, provider, t, a)).to.deep.equal(unsorted)
})
it ('should return a result last if it has no duration', async () => {
const unsorted = [{}, { duration: 3000 }, { duration: 2000 }, { duration: 1000 }, { duration: 500 }]
const sorted = [{ duration: 1000 }, { duration: 500 }, { duration: 2000 }, { duration: 3000 }, {}]
runSearchStub.withArgs(t, a, provider).resolves(unsorted)
expect(await bookFinder.search(libraryItem, provider, t, a)).to.deep.equal(sorted)
})
})
})