const Path = require('path') const h = require('htmlparser2') const ds = require('dom-serializer') const Logger = require('../../Logger') const StreamZip = require('../../libs/nodeStreamZip') const css = require('../../libs/css') const { xmlToJSON } = require('../index.js') module.exports.parse = async (ebookFile, libraryItemId, token, isDev) => { const zip = new StreamZip.async({ file: ebookFile.metadata.path }) const containerXml = await zip.entryData('META-INF/container.xml') const containerJson = await xmlToJSON(containerXml.toString('utf8')) const packageOpfPath = containerJson.container.rootfiles[0].rootfile[0].$['full-path'] const packageOpfDir = Path.dirname(packageOpfPath) const packageDoc = await zip.entryData(packageOpfPath) const packageJson = await xmlToJSON(packageDoc.toString('utf8')) const pages = [] let manifestItems = packageJson.package.manifest[0].item.map(item => item.$) const spineItems = packageJson.package.spine[0].itemref.map(ref => ref.$.idref) for (const spineItem of spineItems) { const mi = manifestItems.find(i => i.id === spineItem) if (mi) { manifestItems = manifestItems.filter(_mi => _mi.id !== mi.id) // Remove from manifest items mi.path = Path.posix.join(packageOpfDir, mi.href) pages.push(mi) } else { Logger.error('[parseEpub] Invalid spine item', spineItem) } } const stylesheets = [] const resources = [] for (const manifestItem of manifestItems) { manifestItem.path = Path.posix.join(packageOpfDir, manifestItem.href) if (manifestItem['media-type'] === 'text/css') { const stylesheetData = await zip.entryData(manifestItem.path) const modifiedCss = this.parseStylesheet(stylesheetData.toString('utf8'), manifestItem.path, libraryItemId, token, isDev) if (modifiedCss) { manifestItem.style = modifiedCss stylesheets.push(manifestItem) } else { Logger.error(`[parseEpub] Invalid stylesheet "${manifestItem.path}"`) } } else { resources.push(manifestItem) } } await zip.close() return { filepath: ebookFile.metadata.path, epubVersion: packageJson.package.$.version, packageDir: packageOpfDir, resources, stylesheets, pages } } module.exports.parsePage = async (pagePath, bookData, libraryItemId, token, isDev) => { const pageDir = Path.dirname(pagePath) const zip = new StreamZip.async({ file: bookData.filepath }) const pageData = await zip.entryData(pagePath) await zip.close() const rawHtml = pageData.toString('utf8') const results = {} const dh = new h.DomHandler((err, dom) => { if (err) return results.error = err // Get stylesheets const isStylesheetLink = (elem) => elem.type == 'tag' && elem.name.toLowerCase() === 'link' && elem.attribs.rel === 'stylesheet' && elem.attribs.type === 'text/css' const stylesheets = h.DomUtils.findAll(isStylesheetLink, dom) // Get body tag const isBodyTag = (elem) => elem.type == 'tag' && elem.name.toLowerCase() == 'body' const body = h.DomUtils.findOne(isBodyTag, dom) // Get all svg elements const isSvgTag = (name) => ['svg'].includes((name || '').toLowerCase()) const svgElements = h.DomUtils.getElementsByTagName(isSvgTag, body.children) svgElements.forEach((el) => { if (el.attribs.class) el.attribs.class += ' abs-svg-scale' else el.attribs.class = 'abs-svg-scale' }) // Get all img elements const isImageTag = (name) => ['img', 'image'].includes((name || '').toLowerCase()) const imgElements = h.DomUtils.getElementsByTagName(isImageTag, body.children) imgElements.forEach(el => { if (!el.attribs.src && !el.attribs['xlink:href']) { Logger.warn('[parseEpub] parsePage: Invalid img element attribs', el.attribs) return } if (el.attribs.class) el.attribs.class += ' abs-image-scale' else el.attribs.class = 'abs-image-scale' const srcKey = el.attribs.src ? 'src' : 'xlink:href' const src = encodeURIComponent(Path.posix.join(pageDir, el.attribs[srcKey])) const basePath = isDev ? 'http://localhost:3333' : '' el.attribs[srcKey] = `${basePath}/api/ebooks/${libraryItemId}/resource?path=${src}&token=${token}` }) let finalHtml = '