Strip html from opf descriptions

This commit is contained in:
Igor Kaldowski 2021-12-04 21:07:43 +00:00
parent 6e621ecce1
commit 670a461752

View File

@ -1,4 +1,5 @@
const { xmlToJSON } = require('./index') const { xmlToJSON } = require('./index')
const { stripHtml } = require("string-strip-html")
function parseCreators(metadata) { function parseCreators(metadata) {
if (!metadata['dc:creator']) return null if (!metadata['dc:creator']) return null
@ -56,7 +57,7 @@ function fetchDescription(metadata) {
// check if description is HTML or plain text. only plain text allowed // check if description is HTML or plain text. only plain text allowed
// calibre stores < and > as &lt; and &gt; // calibre stores < and > as &lt; and &gt;
description = description.replace(/&lt;/g, '<').replace(/&gt;/g, '>') description = description.replace(/&lt;/g, '<').replace(/&gt;/g, '>')
if (description.match(/<!DOCTYPE html>|<\/?\s*[a-z-][^>]*\s*>|(\&(?:[\w\d]+|#\d+|#x[a-f\d]+);)/)) return null if (description.match(/<!DOCTYPE html>|<\/?\s*[a-z-][^>]*\s*>|(\&(?:[\w\d]+|#\d+|#x[a-f\d]+);)/)) return stripHtml(description).result
return description return description
} }