From 46b0b3a6efb7f31ac7d67ee5fff6dcbd2ff28542 Mon Sep 17 00:00:00 2001 From: mikiher Date: Sun, 1 Oct 2023 08:42:47 +0000 Subject: [PATCH] [cleanup] Refactor candidates logic to separate class --- server/finders/BookFinder.js | 113 ++++++++++++++++++++--------------- 1 file changed, 66 insertions(+), 47 deletions(-) diff --git a/server/finders/BookFinder.js b/server/finders/BookFinder.js index debac709..b30510f2 100644 --- a/server/finders/BookFinder.js +++ b/server/finders/BookFinder.js @@ -183,35 +183,67 @@ class BookFinder { return books } - addTitleCandidate(title, candidates) { - // Main variant - const cleanTitle = this.cleanTitleForCompares(title).trim() - if (!cleanTitle) return - candidates.add(cleanTitle) + static TitleCandidates = class { - let candidate = cleanTitle + constructor(bookFinder, cleanAuthor) { + this.bookFinder = bookFinder + this.candidates = new Set() + this.cleanAuthor = cleanAuthor + } - // Remove subtitle - candidate = candidate.replace(/([,:;_]| by ).*/g, "").trim() - if (candidate) - candidates.add(candidate) + add(title) { + const titleTransformers = [ + [/([,:;_]| by ).*/g, ''], // Remove subtitle + [/^\d+ | \d+$/g, ''], // Remove preceding/trailing numbers + [/(^| )\d+k(bps)?( |$)/, ' '], // Remove bitrate + [/ (2nd|3rd|\d+th)\s+ed(\.|ition)?/g, ''] // Remove edition + ] - // Remove preceding/trailing numbers - candidate = candidate.replace(/^\d+ | \d+$/g, "").trim() - if (candidate) - candidates.add(candidate) + // Main variant + const cleanTitle = this.bookFinder.cleanTitleForCompares(title).trim() + if (!cleanTitle) return + this.candidates.add(cleanTitle) - // Remove bitrate - candidate = candidate.replace(/(^| )\d+k(bps)?( |$)/, " ").trim() - if (candidate) - candidates.add(candidate) + let candidate = cleanTitle - // Remove edition - candidate = candidate.replace(/ (2nd|3rd|\d+th)\s+ed(\.|ition)?/, "").trim() - if (candidate) - candidates.add(candidate) + for (const transformer of titleTransformers) { + candidate = candidate.replace(transformer[0], transformer[1]).trim() + if (candidate) { + this.candidates.add(candidate) + } + } + } + + get size() { + return this.candidates.size + } + + getCandidates() { + var candidates = [...this.candidates] + candidates.sort((a, b) => { + // Candidates that include the author are likely low quality + const includesAuthorDiff = !b.includes(this.cleanAuthor) - !a.includes(this.cleanAuthor) + if (includesAuthorDiff) return includesAuthorDiff + // Candidates that include only digits are also likely low quality + const onlyDigits = /^\d+$/ + const includesOnlyDigitsDiff = !onlyDigits.test(b) - !onlyDigits.test(a) + if (includesOnlyDigitsDiff) return includesOnlyDigitsDiff + // Start with longer candidaets, as they are likely more specific + const lengthDiff = b.length - a.length + if (lengthDiff) return lengthDiff + return b.localeCompare(a) + }) + Logger.debug(`[${this.constructor.name}] Found ${candidates.length} fuzzy title candidates`) + Logger.debug(candidates) + return candidates + } + + delete(title) { + return this.candidates.delete(title) + } } + /** * Search for books including fuzzy searches * @@ -240,46 +272,33 @@ class BookFinder { title = title.trim().toLowerCase() author = author.trim().toLowerCase() + const cleanAuthor = this.cleanAuthorForCompares(author) + // Now run up to maxFuzzySearches fuzzy searches - let candidates = new Set() - let cleanedAuthor = this.cleanAuthorForCompares(author) - this.addTitleCandidate(title, candidates) + let titleCandidates = new BookFinder.TitleCandidates(this, cleanAuthor) + titleCandidates.add(title) // remove parentheses and their contents, and replace with a separator const cleanTitle = title.replace(/\[.*?\]|\(.*?\)|{.*?}/g, " - ") // Split title into hypen-separated parts const titleParts = cleanTitle.split(/ - | -|- /) for (const titlePart of titleParts) { - this.addTitleCandidate(titlePart, candidates) + titleCandidates.add(titlePart) } // We already searched for original title - if (author == cleanedAuthor) candidates.delete(title) - if (candidates.size > 0) { - candidates = [...candidates] - candidates.sort((a, b) => { - // Candidates that include the author are likely low quality - const includesAuthorDiff = !b.includes(cleanedAuthor) - !a.includes(cleanedAuthor) - if (includesAuthorDiff) return includesAuthorDiff - // Candidates that include only digits are also likely low quality - const onlyDigits = /^\d+$/ - const includesOnlyDigitsDiff = !onlyDigits.test(b) - !onlyDigits.test(a) - if (includesOnlyDigitsDiff) return includesOnlyDigitsDiff - // Start with longer candidaets, as they are likely more specific - const lengthDiff = b.length - a.length - if (lengthDiff) return lengthDiff - return b.localeCompare(a) - }) - Logger.debug(`[BookFinder] Found ${candidates.length} fuzzy title candidates`, candidates) - for (const candidate of candidates) { + if (author == cleanAuthor) titleCandidates.delete(title) + if (titleCandidates.size > 0) { + titleCandidates = titleCandidates.getCandidates() + for (const titleCandidate of titleCandidates) { if (++numFuzzySearches > maxFuzzySearches) return books - books = await this.runSearch(candidate, cleanedAuthor, provider, asin, maxTitleDistance, maxAuthorDistance) + books = await this.runSearch(titleCandidate, cleanAuthor, provider, asin, maxTitleDistance, maxAuthorDistance) if (books.length) break } if (!books.length) { // Now try searching without the author - for (const candidate of candidates) { + for (const titleCandidate of titleCandidates) { if (++numFuzzySearches > maxFuzzySearches) return books - books = await this.runSearch(candidate, '', provider, asin, maxTitleDistance, maxAuthorDistance) + books = await this.runSearch(titleCandidate, '', provider, asin, maxTitleDistance, maxAuthorDistance) if (books.length) break } }