From dd664da8710c3d13e892b8523ab93237638ce90d Mon Sep 17 00:00:00 2001 From: Cassie Esposito Date: Thu, 19 May 2022 22:10:53 -0700 Subject: [PATCH 1/6] Separated individual element parsing functions out of function getBookDataFromDir --- server/utils/scandir.js | 125 +++++++++++++++++++++------------------- 1 file changed, 66 insertions(+), 59 deletions(-) diff --git a/server/utils/scandir.js b/server/utils/scandir.js index faeab015..55072954 100644 --- a/server/utils/scandir.js +++ b/server/utils/scandir.js @@ -212,17 +212,38 @@ function getBookDataFromDir(folderPath, relPath, parseSubtitle = false) { relPath = relPath.replace(/\\/g, '/') var splitDir = relPath.split('/') - // Audio files will always be in the directory named for the title - var [title, narrators] = getTitleAndNarrator(splitDir.pop()) + var title = splitDir.pop() // Audio files will always be in the directory named for the title + series = (splitDir.length > 1) ? splitDir.pop() : null // If there are at least 2 more directories, next furthest will be the series + author = (splitDir.length > 0) ? splitDir.pop() : null // There could be many more directories, but only the top 3 are used for naming /author/series/title/ - var series = null - var author = null - // If there are at least 2 more directories, next furthest will be the series - if (splitDir.length > 1) series = splitDir.pop() - if (splitDir.length > 0) author = splitDir.pop() - // There could be many more directories, but only the top 3 are used for naming /author/series/title/ + // The title directory may contain various other pieces of metadata, these functions extract it. + var [title, narrators] = getNarrator(title) + if (series) { var [series, title, sequence] = getSeries(series, title) } + var [title, publishedYear] = getPublishedYear(title) + if (parseSubtitle) { var [title, subtitle] = getSubtitle(title) } + return { + mediaMetadata: { + author, + title, + subtitle, + series, + sequence, + publishedYear, + narrators, + }, + relPath: relPath, // relative audiobook path i.e. /Author Name/Book Name/.. + path: Path.posix.join(folderPath, relPath) // i.e. /audiobook/Author Name/Book Name/.. + } +} +function getNarrator(folder) { + let pattern = /^(?.*)\{(?<narrators>.*)\} *$/ + let match = folder.match(pattern) + return match ? [match.groups.title.trimEnd(), match.groups.narrators] : [folder, null] +} + +function getSeries(series, title) { // If in a series directory check for volume number match /* ACCEPTS Book 2 - Title Here - Subtitle Here @@ -236,33 +257,47 @@ function getBookDataFromDir(folderPath, relPath, parseSubtitle = false) { 0.5 - Book Title */ var volumeNumber = null - if (series) { - // Added 1.7.1: If title starts with a # that is 3 digits or less (or w/ 2 decimal), then use as volume number - var volumeMatch = title.match(/^(\d{1,3}(?:\.\d{1,2})?) - ./) - if (volumeMatch && volumeMatch.length > 1) { - volumeNumber = volumeMatch[1] - title = title.replace(`${volumeNumber} - `, '') - } else { - // Match volumes with decimal (OLD: /(-? ?)\b((?:Book|Vol.?|Volume) (\d{1,3}))\b( ?-?)/i) - var volumeMatch = title.match(/(-? ?)\b((?:Book|Vol.?|Volume) (\d{0,3}(?:\.\d{1,2})?))\b( ?-?)/i) - if (volumeMatch && volumeMatch.length > 3 && volumeMatch[2] && volumeMatch[3]) { - volumeNumber = volumeMatch[3] - var replaceChunk = volumeMatch[2] - // "1980 - Book 2-Title Here" - // Group 1 would be "- " - // Group 3 would be "-" - // Only remove the first group - if (volumeMatch[1]) { - replaceChunk = volumeMatch[1] + replaceChunk - } else if (volumeMatch[4]) { - replaceChunk += volumeMatch[4] - } - title = title.replace(replaceChunk, '').trim() + // Added 1.7.1: If title starts with a # that is 3 digits or less (or w/ 2 decimal), then use as volume number + var volumeMatch = title.match(/^(\d{1,3}(?:\.\d{1,2})?) - ./) + if (volumeMatch && volumeMatch.length > 1) { + volumeNumber = volumeMatch[1] + title = title.replace(`${volumeNumber} - `, '') + } else { + // Match volumes with decimal (OLD: /(-? ?)\b((?:Book|Vol.?|Volume) (\d{1,3}))\b( ?-?)/i) + var volumeMatch = title.match(/(-? ?)\b((?:Book|Vol.?|Volume) (\d{0,3}(?:\.\d{1,2})?))\b( ?-?)/i) + if (volumeMatch && volumeMatch.length > 3 && volumeMatch[2] && volumeMatch[3]) { + volumeNumber = volumeMatch[3] + var replaceChunk = volumeMatch[2] + + // "1980 - Book 2-Title Here" + // Group 1 would be "- " + // Group 3 would be "-" + // Only remove the first group + if (volumeMatch[1]) { + replaceChunk = volumeMatch[1] + replaceChunk + } else if (volumeMatch[4]) { + replaceChunk += volumeMatch[4] } + title = title.replace(replaceChunk, '').trim() } } + return [series, title, volumeNumber] +} +function getSubtitle(title) { + // Subtitle can be parsed from the title if user enabled + // Subtitle is everything after " - " + var subtitle = null + if (title.includes(' - ')) { + var splitOnSubtitle = title.split(' - ') + title = splitOnSubtitle.shift() + subtitle = splitOnSubtitle.join(' - ') + } + return [title, subtitle] +} + +function getPublishedYear(title) { var publishedYear = null // If Title is of format 1999 OR (1999) - Title, then use 1999 as publish year var publishYearMatch = title.match(/^(\(?[0-9]{4}\)?) - (.+)/) @@ -276,35 +311,7 @@ function getBookDataFromDir(folderPath, relPath, parseSubtitle = false) { title = publishYearMatch[2] } } - - // Subtitle can be parsed from the title if user enabled - // Subtitle is everything after " - " - var subtitle = null - if (parseSubtitle && title.includes(' - ')) { - var splitOnSubtitle = title.split(' - ') - title = splitOnSubtitle.shift() - subtitle = splitOnSubtitle.join(' - ') - } - - return { - mediaMetadata: { - author, - title, - subtitle, - series, - sequence: volumeNumber, - publishedYear, - narrators, - }, - relPath: relPath, // relative audiobook path i.e. /Author Name/Book Name/.. - path: Path.posix.join(folderPath, relPath) // i.e. /audiobook/Author Name/Book Name/.. - } -} - -function getTitleAndNarrator(folder) { - let pattern = /^(?<title>.*)\{(?<narrators>.*)\} *$/ - let match = folder.match(pattern) - return match ? [match.groups.title.trimEnd(), match.groups.narrators] : [folder, null] + return [title, publishedYear] } function getPodcastDataFromDir(folderPath, relPath) { From 13d21e90f82131960c1160f13414e07c894090ca Mon Sep 17 00:00:00 2001 From: Cassie Esposito <dev@timevault.org> Date: Thu, 19 May 2022 22:31:55 -0700 Subject: [PATCH 2/6] Cleaned function getSubtitle --- server/utils/scandir.js | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/server/utils/scandir.js b/server/utils/scandir.js index 55072954..2adbb600 100644 --- a/server/utils/scandir.js +++ b/server/utils/scandir.js @@ -220,7 +220,7 @@ function getBookDataFromDir(folderPath, relPath, parseSubtitle = false) { var [title, narrators] = getNarrator(title) if (series) { var [series, title, sequence] = getSeries(series, title) } var [title, publishedYear] = getPublishedYear(title) - if (parseSubtitle) { var [title, subtitle] = getSubtitle(title) } + if (parseSubtitle) { var [title, subtitle] = getSubtitle(title) } // Subtitle can be parsed from the title if user enabled return { mediaMetadata: { @@ -286,15 +286,9 @@ function getSeries(series, title) { } function getSubtitle(title) { - // Subtitle can be parsed from the title if user enabled // Subtitle is everything after " - " - var subtitle = null - if (title.includes(' - ')) { - var splitOnSubtitle = title.split(' - ') - title = splitOnSubtitle.shift() - subtitle = splitOnSubtitle.join(' - ') - } - return [title, subtitle] + var splitTitle = title.split(' - ') + return [splitTitle.shift(), splitTitle.join(' - ')] } function getPublishedYear(title) { From f1f02b185e9fe48cdea36ec076858dbd74f5d9ec Mon Sep 17 00:00:00 2001 From: Cassie Esposito <dev@timevault.org> Date: Thu, 19 May 2022 22:55:00 -0700 Subject: [PATCH 3/6] Cleaned function getPublishedYear --- server/utils/scandir.js | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/server/utils/scandir.js b/server/utils/scandir.js index 2adbb600..9bb239aa 100644 --- a/server/utils/scandir.js +++ b/server/utils/scandir.js @@ -293,18 +293,14 @@ function getSubtitle(title) { function getPublishedYear(title) { var publishedYear = null - // If Title is of format 1999 OR (1999) - Title, then use 1999 as publish year - var publishYearMatch = title.match(/^(\(?[0-9]{4}\)?) - (.+)/) - if (publishYearMatch && publishYearMatch.length > 2 && publishYearMatch[1]) { - // Strip parentheses - if (publishYearMatch[1].startsWith('(') && publishYearMatch[1].endsWith(')')) { - publishYearMatch[1] = publishYearMatch[1].slice(1, -1) - } - if (!isNaN(publishYearMatch[1])) { - publishedYear = publishYearMatch[1] - title = publishYearMatch[2] - } + + pattern = /^\(?([0-9]{4})\)? - (.+)/ //Matches #### - title or (####) - title + var match = title.match(pattern) + if (match) { + publishedYear = match[1] + title = match[2] } + return [title, publishedYear] } From 2d8c840ad6833b33fba15996f0f19ae21927f13f Mon Sep 17 00:00:00 2001 From: Cassie Esposito <dev@timevault.org> Date: Fri, 20 May 2022 01:03:36 -0700 Subject: [PATCH 4/6] Cleaned up function getSequence, became more forgiving of whitespace around metadata elements --- server/utils/scandir.js | 76 ++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 43 deletions(-) diff --git a/server/utils/scandir.js b/server/utils/scandir.js index 9bb239aa..bf1e53dc 100644 --- a/server/utils/scandir.js +++ b/server/utils/scandir.js @@ -5,6 +5,7 @@ const { recurseFiles, getFileTimestampsWithIno } = require('./fileUtils') const globals = require('./globals') const LibraryFile = require('../objects/files/LibraryFile') const { response } = require('express') +const e = require('express') function isMediaFile(mediaType, ext) { // if (!path) return false @@ -218,7 +219,7 @@ function getBookDataFromDir(folderPath, relPath, parseSubtitle = false) { // The title directory may contain various other pieces of metadata, these functions extract it. var [title, narrators] = getNarrator(title) - if (series) { var [series, title, sequence] = getSeries(series, title) } + if (series) { var [title, sequence] = getSequence(title) } var [title, publishedYear] = getPublishedYear(title) if (parseSubtitle) { var [title, subtitle] = getSubtitle(title) } // Subtitle can be parsed from the title if user enabled @@ -243,58 +244,41 @@ function getNarrator(folder) { return match ? [match.groups.title.trimEnd(), match.groups.narrators] : [folder, null] } -function getSeries(series, title) { - // If in a series directory check for volume number match - /* ACCEPTS - Book 2 - Title Here - Subtitle Here - Title Here - Subtitle Here - Vol 12 - Title Here - volume 9 - Subtitle Here - Vol. 3 Title Here - Subtitle Here - 1980 - Book 2-Title Here - Title Here-Volume 999-Subtitle Here - 2 - Book Title - 100 - Book Title - 0.5 - Book Title - */ - var volumeNumber = null +function getSequence(title) { + // Valid ways of including a volume number: + // Book 2 - Title Here - Subtitle Here + // Title Here - Subtitle Here - Vol 12 + // Title Here - volume 9 - Subtitle Here + // Vol. 3 Title Here - Subtitle Here + // 1980 - Book 2-Title Here + // Title Here-Volume 999-Subtitle Here + // 2 - Book Title + // 100 - Book Title + // 0.5 - Book Title - // Added 1.7.1: If title starts with a # that is 3 digits or less (or w/ 2 decimal), then use as volume number - var volumeMatch = title.match(/^(\d{1,3}(?:\.\d{1,2})?) - ./) - if (volumeMatch && volumeMatch.length > 1) { - volumeNumber = volumeMatch[1] - title = title.replace(`${volumeNumber} - `, '') - } else { - // Match volumes with decimal (OLD: /(-? ?)\b((?:Book|Vol.?|Volume) (\d{1,3}))\b( ?-?)/i) - var volumeMatch = title.match(/(-? ?)\b((?:Book|Vol.?|Volume) (\d{0,3}(?:\.\d{1,2})?))\b( ?-?)/i) - if (volumeMatch && volumeMatch.length > 3 && volumeMatch[2] && volumeMatch[3]) { - volumeNumber = volumeMatch[3] - var replaceChunk = volumeMatch[2] + // Matches a valid volume string, capturing each section for later processing. + let pattern = /^(vol\.? |volume |book )?(\d{1,3}(?:\.\d{1,2})?)(.*)/i - // "1980 - Book 2-Title Here" - // Group 1 would be "- " - // Group 3 would be "-" - // Only remove the first group - if (volumeMatch[1]) { - replaceChunk = volumeMatch[1] + replaceChunk - } else if (volumeMatch[4]) { - replaceChunk += volumeMatch[4] - } - title = title.replace(replaceChunk, '').trim() + let volumeNumber = null + let parts = title.split('-') + for (let i = 0; i < parts.length; i++) { + let match = parts[i].trim().match(pattern) + if (match && !(match[3].trim() && !match[1])) { // "101 Dalmations" shouldn't match + volumeNumber = match[2] + parts[i] = match[3] + if (!parts[i].trim()) { parts.splice(i, 1) } + break } } - return [series, title, volumeNumber] -} + title = parts.join(' - ') -function getSubtitle(title) { - // Subtitle is everything after " - " - var splitTitle = title.split(' - ') - return [splitTitle.shift(), splitTitle.join(' - ')] + return [title, volumeNumber] } function getPublishedYear(title) { var publishedYear = null - pattern = /^\(?([0-9]{4})\)? - (.+)/ //Matches #### - title or (####) - title + pattern = /^ *\(?([0-9]{4})\)? *- *(.+)/ //Matches #### - title or (####) - title var match = title.match(pattern) if (match) { publishedYear = match[1] @@ -304,6 +288,12 @@ function getPublishedYear(title) { return [title, publishedYear] } +function getSubtitle(title) { + // Subtitle is everything after " - " + var splitTitle = title.split(' - ') + return [splitTitle.shift().trim(), splitTitle.join(' - ').trim()] +} + function getPodcastDataFromDir(folderPath, relPath) { relPath = relPath.replace(/\\/g, '/') var splitDir = relPath.split('/') From 1ab933c8b01574013fc21cd937a2f4f34c57cea0 Mon Sep 17 00:00:00 2001 From: Cassie Esposito <dev@timevault.org> Date: Tue, 24 May 2022 16:21:58 -0700 Subject: [PATCH 5/6] Refactored getSequence. Slight behavior changes introduced. All components of the bottom level directory except volume which can no longer use '-' for separation, but 'Vol 4 Title' is still valid and '4. Title' or 'Vol 4.' are also now valid. --- server/utils/scandir.js | 77 ++++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 36 deletions(-) diff --git a/server/utils/scandir.js b/server/utils/scandir.js index ec25f6bc..e1ec24b9 100644 --- a/server/utils/scandir.js +++ b/server/utils/scandir.js @@ -209,15 +209,15 @@ function getBookDataFromDir(folderPath, relPath, parseSubtitle = false) { relPath = relPath.replace(/\\/g, '/') var splitDir = relPath.split('/') - var title = splitDir.pop() // Audio files will always be in the directory named for the title + var folder = splitDir.pop() // Audio files will always be in the directory named for the title series = (splitDir.length > 1) ? splitDir.pop() : null // If there are at least 2 more directories, next furthest will be the series author = (splitDir.length > 0) ? splitDir.pop() : null // There could be many more directories, but only the top 3 are used for naming /author/series/title/ - // The title directory may contain various other pieces of metadata, these functions extract it. - var [title, narrators] = getNarrator(title) - if (series) { var [title, sequence] = getSequence(title) } - var [title, publishedYear] = getPublishedYear(title) - if (parseSubtitle) { var [title, subtitle] = getSubtitle(title) } // Subtitle can be parsed from the title if user enabled + // The may contain various other pieces of metadata, these functions extract it. + var [folder, narrators] = getNarrator(folder) + if (series) { var [folder, sequence] = getSequence(folder) } + var [folder, publishedYear] = getPublishedYear(folder) + if (parseSubtitle) { var [title, subtitle] = getSubtitle(folder) } // Subtitle can be parsed from the title if user enabled return { mediaMetadata: { @@ -235,59 +235,64 @@ function getBookDataFromDir(folderPath, relPath, parseSubtitle = false) { } function getNarrator(folder) { - let pattern = /^(?<title>.*)\{(?<narrators>.*)\} *$/ + let pattern = /^(?<title>.*) \{(?<narrators>.*)\}$/ let match = folder.match(pattern) - return match ? [match.groups.title.trimEnd(), match.groups.narrators] : [folder, null] + return match ? [match.groups.title, match.groups.narrators] : [folder, null] } -function getSequence(title) { +function getSequence(folder) { // Valid ways of including a volume number: - // Book 2 - Title Here - Subtitle Here - // Title Here - Subtitle Here - Vol 12 - // Title Here - volume 9 - Subtitle Here - // Vol. 3 Title Here - Subtitle Here - // 1980 - Book 2-Title Here - // Title Here-Volume 999-Subtitle Here - // 2 - Book Title - // 100 - Book Title - // 0.5 - Book Title + // [ + // 'Book 2 - Title - Subtitle', + // 'Title - Subtitle - Vol 12', + // 'Title - volume 9 - Subtitle', + // 'Vol. 3 Title Here - Subtitle', + // '1980 - Book 2 - Title', + // 'Volume 12. Title - Subtitle', + // '100 - Book Title', + // '2 - Book Title', + // '6. Title', + // '0.5 - Book Title' + // ] - // Matches a valid volume string, capturing each section for later processing. - let pattern = /^(vol\.? |volume |book )?(\d{1,3}(?:\.\d{1,2})?)(.*)/i + // Matches a valid volume string. Also matches a book whose title starts with a 1 to 3 digit number. Will handle that later. + let pattern = /^(?<volumeLabel>vol\.? |volume |book )?(?<sequence>\d{1,3}(?:\.\d{1,2})?)(?<trailingDot>\.?)(?: (?<suffix>.*))?/i let volumeNumber = null - let parts = title.split('-') + let parts = folder.split(' - ') for (let i = 0; i < parts.length; i++) { - let match = parts[i].trim().match(pattern) - if (match && !(match[3].trim() && !match[1])) { // "101 Dalmations" shouldn't match - volumeNumber = match[2] - parts[i] = match[3] - if (!parts[i].trim()) { parts.splice(i, 1) } + let match = parts[i].match(pattern) + + // This excludes '101 Dalmations' but includes '101. Dalmations' + if (match && !(match.groups.suffix && !(match.groups.volumeLabel || match.groups.trailingDot))) { + volumeNumber = match.groups.sequence + parts[i] = match.groups.suffix + if (!parts[i]) { parts.splice(i, 1) } break } } - title = parts.join(' - ') - return [title, volumeNumber] + folder = parts.join(' - ') + return [folder, volumeNumber] } -function getPublishedYear(title) { +function getPublishedYear(folder) { var publishedYear = null - pattern = /^ *\(?([0-9]{4})\)? *- *(.+)/ //Matches #### - title or (####) - title - var match = title.match(pattern) + pattern = /^ *\(?([0-9]{4})\)? * - *(.+)/ //Matches #### - title or (####) - title + var match = folder.match(pattern) if (match) { publishedYear = match[1] - title = match[2] + folder = match[2] } - return [title, publishedYear] + return [folder, publishedYear] } -function getSubtitle(title) { +function getSubtitle(folder) { // Subtitle is everything after " - " - var splitTitle = title.split(' - ') - return [splitTitle.shift().trim(), splitTitle.join(' - ').trim()] + var splitTitle = folder.split(' - ') + return [splitTitle.shift(), splitTitle.join(' - ')] } function getPodcastDataFromDir(folderPath, relPath) { From 6d947bbc294acf7bb9b22eda3960feaf09f7ef18 Mon Sep 17 00:00:00 2001 From: Cassie Esposito <dev@timevault.org> Date: Tue, 24 May 2022 17:06:44 -0700 Subject: [PATCH 6/6] Converted indentation from 4 spaces to 2 --- server/utils/scandir.js | 574 ++++++++++++++++++++-------------------- 1 file changed, 287 insertions(+), 287 deletions(-) diff --git a/server/utils/scandir.js b/server/utils/scandir.js index e1ec24b9..25258d71 100644 --- a/server/utils/scandir.js +++ b/server/utils/scandir.js @@ -6,370 +6,370 @@ const globals = require('./globals') const LibraryFile = require('../objects/files/LibraryFile') function isMediaFile(mediaType, ext) { - // if (!path) return false - // var ext = Path.extname(path) - if (!ext) return false - var extclean = ext.slice(1).toLowerCase() - if (mediaType === 'podcast') return globals.SupportedAudioTypes.includes(extclean) - return globals.SupportedAudioTypes.includes(extclean) || globals.SupportedEbookTypes.includes(extclean) + // if (!path) return false + // var ext = Path.extname(path) + if (!ext) return false + var extclean = ext.slice(1).toLowerCase() + if (mediaType === 'podcast') return globals.SupportedAudioTypes.includes(extclean) + return globals.SupportedAudioTypes.includes(extclean) || globals.SupportedEbookTypes.includes(extclean) } // TODO: Function needs to be re-done // Input: array of relative file paths // Output: map of files grouped into potential item dirs function groupFilesIntoLibraryItemPaths(mediaType, paths) { - // Step 1: Clean path, Remove leading "/", Filter out non-media files in root dir - var pathsFiltered = paths.map(path => { - return path.startsWith('/') ? path.slice(1) : path - }).filter(path => { - let parsedPath = Path.parse(path) - return parsedPath.dir || (mediaType === 'book' && isMediaFile(mediaType, parsedPath.ext)) - }) + // Step 1: Clean path, Remove leading "/", Filter out non-media files in root dir + var pathsFiltered = paths.map(path => { + return path.startsWith('/') ? path.slice(1) : path + }).filter(path => { + let parsedPath = Path.parse(path) + return parsedPath.dir || (mediaType === 'book' && isMediaFile(mediaType, parsedPath.ext)) + }) - // Step 2: Sort by least number of directories - pathsFiltered.sort((a, b) => { - var pathsA = Path.dirname(a).split('/').length - var pathsB = Path.dirname(b).split('/').length - return pathsA - pathsB - }) + // Step 2: Sort by least number of directories + pathsFiltered.sort((a, b) => { + var pathsA = Path.dirname(a).split('/').length + var pathsB = Path.dirname(b).split('/').length + return pathsA - pathsB + }) - // Step 3: Group files in dirs - var itemGroup = {} - pathsFiltered.forEach((path) => { - var dirparts = Path.dirname(path).split('/').filter(p => !!p && p !== '.') // dirname returns . if no directory - var numparts = dirparts.length - var _path = '' + // Step 3: Group files in dirs + var itemGroup = {} + pathsFiltered.forEach((path) => { + var dirparts = Path.dirname(path).split('/').filter(p => !!p && p !== '.') // dirname returns . if no directory + var numparts = dirparts.length + var _path = '' - if (!numparts) { - // Media file in root - itemGroup[path] = path - } else { - // Iterate over directories in path - for (let i = 0; i < numparts; i++) { - var dirpart = dirparts.shift() - _path = Path.posix.join(_path, dirpart) + if (!numparts) { + // Media file in root + itemGroup[path] = path + } else { + // Iterate over directories in path + for (let i = 0; i < numparts; i++) { + var dirpart = dirparts.shift() + _path = Path.posix.join(_path, dirpart) - if (itemGroup[_path]) { // Directory already has files, add file - var relpath = Path.posix.join(dirparts.join('/'), Path.basename(path)) - itemGroup[_path].push(relpath) - return - } else if (!dirparts.length) { // This is the last directory, create group - itemGroup[_path] = [Path.basename(path)] - return - } else if (dirparts.length === 1 && /^cd\d{1,3}$/i.test(dirparts[0])) { // Next directory is the last and is a CD dir, create group - itemGroup[_path] = [Path.posix.join(dirparts[0], Path.basename(path))] - return - } - } + if (itemGroup[_path]) { // Directory already has files, add file + var relpath = Path.posix.join(dirparts.join('/'), Path.basename(path)) + itemGroup[_path].push(relpath) + return + } else if (!dirparts.length) { // This is the last directory, create group + itemGroup[_path] = [Path.basename(path)] + return + } else if (dirparts.length === 1 && /^cd\d{1,3}$/i.test(dirparts[0])) { // Next directory is the last and is a CD dir, create group + itemGroup[_path] = [Path.posix.join(dirparts[0], Path.basename(path))] + return } - }) - return itemGroup + } + } + }) + return itemGroup } module.exports.groupFilesIntoLibraryItemPaths = groupFilesIntoLibraryItemPaths // Input: array of relative file items (see recurseFiles) // Output: map of files grouped into potential libarary item dirs function groupFileItemsIntoLibraryItemDirs(mediaType, fileItems) { - // Step 1: Filter out non-book-media files in root dir (with depth of 0) - var itemsFiltered = fileItems.filter(i => { - return i.deep > 0 || (mediaType === 'book' && isMediaFile(mediaType, i.extension)) - }) + // Step 1: Filter out non-book-media files in root dir (with depth of 0) + var itemsFiltered = fileItems.filter(i => { + return i.deep > 0 || (mediaType === 'book' && isMediaFile(mediaType, i.extension)) + }) - // Step 2: Seperate media files and other files - // - Directories without a media file will not be included - var mediaFileItems = [] - var otherFileItems = [] - itemsFiltered.forEach(item => { - if (isMediaFile(mediaType, item.extension)) mediaFileItems.push(item) - else otherFileItems.push(item) - }) + // Step 2: Seperate media files and other files + // - Directories without a media file will not be included + var mediaFileItems = [] + var otherFileItems = [] + itemsFiltered.forEach(item => { + if (isMediaFile(mediaType, item.extension)) mediaFileItems.push(item) + else otherFileItems.push(item) + }) - // Step 3: Group audio files in library items - var libraryItemGroup = {} - mediaFileItems.forEach((item) => { - var dirparts = item.reldirpath.split('/').filter(p => !!p) - var numparts = dirparts.length - var _path = '' + // Step 3: Group audio files in library items + var libraryItemGroup = {} + mediaFileItems.forEach((item) => { + var dirparts = item.reldirpath.split('/').filter(p => !!p) + var numparts = dirparts.length + var _path = '' - if (!dirparts.length) { - // Media file in root - libraryItemGroup[item.name] = item.name - } else { - // Iterate over directories in path - for (let i = 0; i < numparts; i++) { - var dirpart = dirparts.shift() - _path = Path.posix.join(_path, dirpart) + if (!dirparts.length) { + // Media file in root + libraryItemGroup[item.name] = item.name + } else { + // Iterate over directories in path + for (let i = 0; i < numparts; i++) { + var dirpart = dirparts.shift() + _path = Path.posix.join(_path, dirpart) - if (libraryItemGroup[_path]) { // Directory already has files, add file - var relpath = Path.posix.join(dirparts.join('/'), item.name) - libraryItemGroup[_path].push(relpath) - return - } else if (!dirparts.length) { // This is the last directory, create group - libraryItemGroup[_path] = [item.name] - return - } else if (dirparts.length === 1 && /^cd\d{1,3}$/i.test(dirparts[0])) { // Next directory is the last and is a CD dir, create group - libraryItemGroup[_path] = [Path.posix.join(dirparts[0], item.name)] - return - } - } + if (libraryItemGroup[_path]) { // Directory already has files, add file + var relpath = Path.posix.join(dirparts.join('/'), item.name) + libraryItemGroup[_path].push(relpath) + return + } else if (!dirparts.length) { // This is the last directory, create group + libraryItemGroup[_path] = [item.name] + return + } else if (dirparts.length === 1 && /^cd\d{1,3}$/i.test(dirparts[0])) { // Next directory is the last and is a CD dir, create group + libraryItemGroup[_path] = [Path.posix.join(dirparts[0], item.name)] + return } - }) + } + } + }) - // Step 4: Add other files into library item groups - otherFileItems.forEach((item) => { - var dirparts = item.reldirpath.split('/') - var numparts = dirparts.length - var _path = '' + // Step 4: Add other files into library item groups + otherFileItems.forEach((item) => { + var dirparts = item.reldirpath.split('/') + var numparts = dirparts.length + var _path = '' - // Iterate over directories in path - for (let i = 0; i < numparts; i++) { - var dirpart = dirparts.shift() - _path = Path.posix.join(_path, dirpart) - if (libraryItemGroup[_path]) { // Directory is audiobook group - var relpath = Path.posix.join(dirparts.join('/'), item.name) - libraryItemGroup[_path].push(relpath) - return - } - } - }) - return libraryItemGroup + // Iterate over directories in path + for (let i = 0; i < numparts; i++) { + var dirpart = dirparts.shift() + _path = Path.posix.join(_path, dirpart) + if (libraryItemGroup[_path]) { // Directory is audiobook group + var relpath = Path.posix.join(dirparts.join('/'), item.name) + libraryItemGroup[_path].push(relpath) + return + } + } + }) + return libraryItemGroup } function cleanFileObjects(libraryItemPath, files) { - return Promise.all(files.map(async(file) => { - var filePath = Path.posix.join(libraryItemPath, file) - var newLibraryFile = new LibraryFile() - await newLibraryFile.setDataFromPath(filePath, file) - return newLibraryFile - })) + return Promise.all(files.map(async(file) => { + var filePath = Path.posix.join(libraryItemPath, file) + var newLibraryFile = new LibraryFile() + await newLibraryFile.setDataFromPath(filePath, file) + return newLibraryFile + })) } // Scan folder async function scanFolder(libraryMediaType, folder, serverSettings = {}) { - var folderPath = folder.fullPath.replace(/\\/g, '/') + var folderPath = folder.fullPath.replace(/\\/g, '/') - var pathExists = await fs.pathExists(folderPath) - if (!pathExists) { - Logger.error(`[scandir] Invalid folder path does not exist "${folderPath}"`) - return [] + var pathExists = await fs.pathExists(folderPath) + if (!pathExists) { + Logger.error(`[scandir] Invalid folder path does not exist "${folderPath}"`) + return [] + } + + var fileItems = await recurseFiles(folderPath) + var libraryItemGrouping = groupFileItemsIntoLibraryItemDirs(libraryMediaType, fileItems) + + if (!Object.keys(libraryItemGrouping).length) { + Logger.error(`Root path has no media folders: ${folderPath}`) + return [] + } + + var isFile = false // item is not in a folder + var items = [] + for (const libraryItemPath in libraryItemGrouping) { + var libraryItemData = null + var fileObjs = [] + if (libraryItemPath === libraryItemGrouping[libraryItemPath]) { + // Media file in root only get title + libraryItemData = { + mediaMetadata: { + title: Path.basename(libraryItemPath, Path.extname(libraryItemPath)) + }, + path: Path.posix.join(folderPath, libraryItemPath), + relPath: libraryItemPath + } + fileObjs = await cleanFileObjects(folderPath, [libraryItemPath]) + isFile = true + } else { + libraryItemData = getDataFromMediaDir(libraryMediaType, folderPath, libraryItemPath, serverSettings) + fileObjs = await cleanFileObjects(libraryItemData.path, libraryItemGrouping[libraryItemPath]) } - var fileItems = await recurseFiles(folderPath) - var libraryItemGrouping = groupFileItemsIntoLibraryItemDirs(libraryMediaType, fileItems) - - if (!Object.keys(libraryItemGrouping).length) { - Logger.error(`Root path has no media folders: ${folderPath}`) - return [] - } - - var isFile = false // item is not in a folder - var items = [] - for (const libraryItemPath in libraryItemGrouping) { - var libraryItemData = null - var fileObjs = [] - if (libraryItemPath === libraryItemGrouping[libraryItemPath]) { - // Media file in root only get title - libraryItemData = { - mediaMetadata: { - title: Path.basename(libraryItemPath, Path.extname(libraryItemPath)) - }, - path: Path.posix.join(folderPath, libraryItemPath), - relPath: libraryItemPath - } - fileObjs = await cleanFileObjects(folderPath, [libraryItemPath]) - isFile = true - } else { - libraryItemData = getDataFromMediaDir(libraryMediaType, folderPath, libraryItemPath, serverSettings) - fileObjs = await cleanFileObjects(libraryItemData.path, libraryItemGrouping[libraryItemPath]) - } - - var libraryItemFolderStats = await getFileTimestampsWithIno(libraryItemData.path) - items.push({ - folderId: folder.id, - libraryId: folder.libraryId, - ino: libraryItemFolderStats.ino, - mtimeMs: libraryItemFolderStats.mtimeMs || 0, - ctimeMs: libraryItemFolderStats.ctimeMs || 0, - birthtimeMs: libraryItemFolderStats.birthtimeMs || 0, - path: libraryItemData.path, - relPath: libraryItemData.relPath, - isFile, - media: { - metadata: libraryItemData.mediaMetadata || null - }, - libraryFiles: fileObjs - }) - } - return items + var libraryItemFolderStats = await getFileTimestampsWithIno(libraryItemData.path) + items.push({ + folderId: folder.id, + libraryId: folder.libraryId, + ino: libraryItemFolderStats.ino, + mtimeMs: libraryItemFolderStats.mtimeMs || 0, + ctimeMs: libraryItemFolderStats.ctimeMs || 0, + birthtimeMs: libraryItemFolderStats.birthtimeMs || 0, + path: libraryItemData.path, + relPath: libraryItemData.relPath, + isFile, + media: { + metadata: libraryItemData.mediaMetadata || null + }, + libraryFiles: fileObjs + }) + } + return items } module.exports.scanFolder = scanFolder // Input relative filepath, output all details that can be parsed function getBookDataFromDir(folderPath, relPath, parseSubtitle = false) { - relPath = relPath.replace(/\\/g, '/') - var splitDir = relPath.split('/') + relPath = relPath.replace(/\\/g, '/') + var splitDir = relPath.split('/') - var folder = splitDir.pop() // Audio files will always be in the directory named for the title - series = (splitDir.length > 1) ? splitDir.pop() : null // If there are at least 2 more directories, next furthest will be the series - author = (splitDir.length > 0) ? splitDir.pop() : null // There could be many more directories, but only the top 3 are used for naming /author/series/title/ + var folder = splitDir.pop() // Audio files will always be in the directory named for the title + series = (splitDir.length > 1) ? splitDir.pop() : null // If there are at least 2 more directories, next furthest will be the series + author = (splitDir.length > 0) ? splitDir.pop() : null // There could be many more directories, but only the top 3 are used for naming /author/series/title/ - // The may contain various other pieces of metadata, these functions extract it. - var [folder, narrators] = getNarrator(folder) - if (series) { var [folder, sequence] = getSequence(folder) } - var [folder, publishedYear] = getPublishedYear(folder) - if (parseSubtitle) { var [title, subtitle] = getSubtitle(folder) } // Subtitle can be parsed from the title if user enabled + // The may contain various other pieces of metadata, these functions extract it. + var [folder, narrators] = getNarrator(folder) + if (series) { var [folder, sequence] = getSequence(folder) } + var [folder, publishedYear] = getPublishedYear(folder) + if (parseSubtitle) { var [title, subtitle] = getSubtitle(folder) } // Subtitle can be parsed from the title if user enabled - return { - mediaMetadata: { - author, - title, - subtitle, - series, - sequence, - publishedYear, - narrators, - }, - relPath: relPath, // relative audiobook path i.e. /Author Name/Book Name/.. - path: Path.posix.join(folderPath, relPath) // i.e. /audiobook/Author Name/Book Name/.. - } + return { + mediaMetadata: { + author, + title, + subtitle, + series, + sequence, + publishedYear, + narrators, + }, + relPath: relPath, // relative audiobook path i.e. /Author Name/Book Name/.. + path: Path.posix.join(folderPath, relPath) // i.e. /audiobook/Author Name/Book Name/.. + } } function getNarrator(folder) { - let pattern = /^(?<title>.*) \{(?<narrators>.*)\}$/ - let match = folder.match(pattern) - return match ? [match.groups.title, match.groups.narrators] : [folder, null] + let pattern = /^(?<title>.*) \{(?<narrators>.*)\}$/ + let match = folder.match(pattern) + return match ? [match.groups.title, match.groups.narrators] : [folder, null] } function getSequence(folder) { - // Valid ways of including a volume number: - // [ - // 'Book 2 - Title - Subtitle', - // 'Title - Subtitle - Vol 12', - // 'Title - volume 9 - Subtitle', - // 'Vol. 3 Title Here - Subtitle', - // '1980 - Book 2 - Title', - // 'Volume 12. Title - Subtitle', - // '100 - Book Title', - // '2 - Book Title', - // '6. Title', - // '0.5 - Book Title' - // ] + // Valid ways of including a volume number: + // [ + // 'Book 2 - Title - Subtitle', + // 'Title - Subtitle - Vol 12', + // 'Title - volume 9 - Subtitle', + // 'Vol. 3 Title Here - Subtitle', + // '1980 - Book 2 - Title', + // 'Volume 12. Title - Subtitle', + // '100 - Book Title', + // '2 - Book Title', + // '6. Title', + // '0.5 - Book Title' + // ] - // Matches a valid volume string. Also matches a book whose title starts with a 1 to 3 digit number. Will handle that later. - let pattern = /^(?<volumeLabel>vol\.? |volume |book )?(?<sequence>\d{1,3}(?:\.\d{1,2})?)(?<trailingDot>\.?)(?: (?<suffix>.*))?/i + // Matches a valid volume string. Also matches a book whose title starts with a 1 to 3 digit number. Will handle that later. + let pattern = /^(?<volumeLabel>vol\.? |volume |book )?(?<sequence>\d{1,3}(?:\.\d{1,2})?)(?<trailingDot>\.?)(?: (?<suffix>.*))?/i - let volumeNumber = null - let parts = folder.split(' - ') - for (let i = 0; i < parts.length; i++) { - let match = parts[i].match(pattern) + let volumeNumber = null + let parts = folder.split(' - ') + for (let i = 0; i < parts.length; i++) { + let match = parts[i].match(pattern) - // This excludes '101 Dalmations' but includes '101. Dalmations' - if (match && !(match.groups.suffix && !(match.groups.volumeLabel || match.groups.trailingDot))) { - volumeNumber = match.groups.sequence - parts[i] = match.groups.suffix - if (!parts[i]) { parts.splice(i, 1) } - break - } + // This excludes '101 Dalmations' but includes '101. Dalmations' + if (match && !(match.groups.suffix && !(match.groups.volumeLabel || match.groups.trailingDot))) { + volumeNumber = match.groups.sequence + parts[i] = match.groups.suffix + if (!parts[i]) { parts.splice(i, 1) } + break } + } - folder = parts.join(' - ') - return [folder, volumeNumber] + folder = parts.join(' - ') + return [folder, volumeNumber] } function getPublishedYear(folder) { - var publishedYear = null + var publishedYear = null - pattern = /^ *\(?([0-9]{4})\)? * - *(.+)/ //Matches #### - title or (####) - title - var match = folder.match(pattern) - if (match) { - publishedYear = match[1] - folder = match[2] - } + pattern = /^ *\(?([0-9]{4})\)? * - *(.+)/ //Matches #### - title or (####) - title + var match = folder.match(pattern) + if (match) { + publishedYear = match[1] + folder = match[2] + } - return [folder, publishedYear] + return [folder, publishedYear] } function getSubtitle(folder) { - // Subtitle is everything after " - " - var splitTitle = folder.split(' - ') - return [splitTitle.shift(), splitTitle.join(' - ')] + // Subtitle is everything after " - " + var splitTitle = folder.split(' - ') + return [splitTitle.shift(), splitTitle.join(' - ')] } function getPodcastDataFromDir(folderPath, relPath) { - relPath = relPath.replace(/\\/g, '/') - var splitDir = relPath.split('/') + relPath = relPath.replace(/\\/g, '/') + var splitDir = relPath.split('/') - // Audio files will always be in the directory named for the title - var title = splitDir.pop() - return { - mediaMetadata: { - title - }, - relPath: relPath, // relative audiobook path i.e. /Author Name/Book Name/.. - path: Path.posix.join(folderPath, relPath) // i.e. /audiobook/Author Name/Book Name/.. - } + // Audio files will always be in the directory named for the title + var title = splitDir.pop() + return { + mediaMetadata: { + title + }, + relPath: relPath, // relative audiobook path i.e. /Author Name/Book Name/.. + path: Path.posix.join(folderPath, relPath) // i.e. /audiobook/Author Name/Book Name/.. + } } function getDataFromMediaDir(libraryMediaType, folderPath, relPath, serverSettings) { - if (libraryMediaType === 'podcast') { - return getPodcastDataFromDir(folderPath, relPath) - } else { - var parseSubtitle = !!serverSettings.scannerParseSubtitle - return getBookDataFromDir(folderPath, relPath, parseSubtitle) - } + if (libraryMediaType === 'podcast') { + return getPodcastDataFromDir(folderPath, relPath) + } else { + var parseSubtitle = !!serverSettings.scannerParseSubtitle + return getBookDataFromDir(folderPath, relPath, parseSubtitle) + } } // Called from Scanner.js async function getLibraryItemFileData(libraryMediaType, folder, libraryItemPath, isSingleMediaItem, serverSettings = {}) { - libraryItemPath = libraryItemPath.replace(/\\/g, '/') - var folderFullPath = folder.fullPath.replace(/\\/g, '/') + libraryItemPath = libraryItemPath.replace(/\\/g, '/') + var folderFullPath = folder.fullPath.replace(/\\/g, '/') - var libraryItemDir = libraryItemPath.replace(folderFullPath, '').slice(1) - var libraryItemData = {} + var libraryItemDir = libraryItemPath.replace(folderFullPath, '').slice(1) + var libraryItemData = {} - var fileItems = [] + var fileItems = [] - if (isSingleMediaItem) { // Single media item in root of folder - fileItems = [{ - fullpath: libraryItemPath, - path: libraryItemDir // actually the relPath (only filename here) - }] - libraryItemData = { - path: libraryItemPath, // full path - relPath: libraryItemDir, // only filename - mediaMetadata: { - title: Path.basename(libraryItemDir, Path.extname(libraryItemDir)) - } - } - } else { - fileItems = await recurseFiles(libraryItemPath) - libraryItemData = getDataFromMediaDir(libraryMediaType, folderFullPath, libraryItemDir, serverSettings) + if (isSingleMediaItem) { // Single media item in root of folder + fileItems = [{ + fullpath: libraryItemPath, + path: libraryItemDir // actually the relPath (only filename here) + }] + libraryItemData = { + path: libraryItemPath, // full path + relPath: libraryItemDir, // only filename + mediaMetadata: { + title: Path.basename(libraryItemDir, Path.extname(libraryItemDir)) + } } + } else { + fileItems = await recurseFiles(libraryItemPath) + libraryItemData = getDataFromMediaDir(libraryMediaType, folderFullPath, libraryItemDir, serverSettings) + } - var libraryItemDirStats = await getFileTimestampsWithIno(libraryItemData.path) - var libraryItem = { - ino: libraryItemDirStats.ino, - mtimeMs: libraryItemDirStats.mtimeMs || 0, - ctimeMs: libraryItemDirStats.ctimeMs || 0, - birthtimeMs: libraryItemDirStats.birthtimeMs || 0, - folderId: folder.id, - libraryId: folder.libraryId, - path: libraryItemData.path, - relPath: libraryItemData.relPath, - isFile: isSingleMediaItem, - media: { - metadata: libraryItemData.mediaMetadata || null - }, - libraryFiles: [] - } + var libraryItemDirStats = await getFileTimestampsWithIno(libraryItemData.path) + var libraryItem = { + ino: libraryItemDirStats.ino, + mtimeMs: libraryItemDirStats.mtimeMs || 0, + ctimeMs: libraryItemDirStats.ctimeMs || 0, + birthtimeMs: libraryItemDirStats.birthtimeMs || 0, + folderId: folder.id, + libraryId: folder.libraryId, + path: libraryItemData.path, + relPath: libraryItemData.relPath, + isFile: isSingleMediaItem, + media: { + metadata: libraryItemData.mediaMetadata || null + }, + libraryFiles: [] + } - for (let i = 0; i < fileItems.length; i++) { - var fileItem = fileItems[i] - var newLibraryFile = new LibraryFile() - // fileItem.path is the relative path - await newLibraryFile.setDataFromPath(fileItem.fullpath, fileItem.path) - libraryItem.libraryFiles.push(newLibraryFile) - } - return libraryItem + for (let i = 0; i < fileItems.length; i++) { + var fileItem = fileItems[i] + var newLibraryFile = new LibraryFile() + // fileItem.path is the relative path + await newLibraryFile.setDataFromPath(fileItem.fullpath, fileItem.path) + libraryItem.libraryFiles.push(newLibraryFile) + } + return libraryItem } module.exports.getLibraryItemFileData = getLibraryItemFileData \ No newline at end of file