Merge pull request #64 from ItsVipra/55-sanitize-html-special-characters

Sanitize html special characters
This commit is contained in:
Vivien 2023-08-01 11:00:21 +02:00 committed by GitHub
commit d2a992eea2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 28 additions and 1 deletions

View file

@ -85,3 +85,26 @@ export function insertAfter(insertion, target) {
//docs: https://developer.mozilla.org/en-US/docs/Web/API/Node/insertBefore#example_2
target.parentElement.insertBefore(insertion, target.nextSibling);
}
/**
* Turns HTML text into human-readable text
* @param {string} input HTML Text
* @returns {string}
*/
export function htmlDecode(input) {
if (typeof window === "undefined" || !window.DOMParser) {
const replacements = {
"&": "&",
""": '"',
"&lt;": "<",
"&gt;": ">",
"&nbsp;": "",
};
for (const [html, text] of Object.entries(replacements)) input = input.replaceAll(html, text);
return input;
}
const doc = new DOMParser().parseFromString(input, "text/html");
return doc.documentElement.textContent;
}

View file

@ -1,4 +1,5 @@
import sanitizeHtml from "sanitize-html";
import { htmlDecode } from "./domhelpers.js";
const fieldMatchers = [/\bpro.*nouns?\b/i, /\bpronomen\b/i, /(i )?go(es)? by/i];
const knownPronounUrls = [
@ -179,9 +180,12 @@ function sanitizePronouns(str) {
// Remove trailing characters that are used as separators.
str = str.replace(/[-| :/]+$/, "");
// Finally, remove leading and trailing whitespace.
// Remove leading and trailing whitespace.
str = str.trim();
//Finally, turn escaped characters (e.g. &,>) back into their original form
str = htmlDecode(str);
// If the result is empty, return null, otherwise the empty string.
return str === "" ? null : str;
}