Add bonuspunkt's parser

Fixes #15.
Fixes #199.
Fixes #583.
Fixes #654.
Fixes #928.
Fixes #1001.
This commit is contained in:
Bonuspunkt 2017-03-18 10:35:17 +02:00 committed by Pavel Djundik
parent ef8a2bab18
commit 0b85ded53f
6 changed files with 348 additions and 111 deletions

View file

@ -0,0 +1,10 @@
"use strict";
function anyIntersection(a, b) {
return a.start <= b.start && b.start < a.end ||
a.start < b.end && b.end <= a.end ||
b.start <= a.start && a.start < b.end ||
b.start < a.end && a.end <= b.end;
}
module.exports = anyIntersection;

View file

@ -0,0 +1,29 @@
"use strict";
function fill(existingEntries, text) {
let position = 0;
const result = [];
for (let i = 0; i < existingEntries.length; i++) {
const textSegment = existingEntries[i];
if (textSegment.start > position) {
result.push({
start: position,
end: textSegment.start
});
}
position = textSegment.end;
}
if (position < text.length) {
result.push({
start: position,
end: text.length
});
}
return result;
}
module.exports = fill;

View file

@ -0,0 +1,32 @@
"use strict";
const escapeRegExp = require("lodash/escapeRegExp");
// NOTE: channel prefixes should be RPL_ISUPPORT.CHANTYPES
// NOTE: userModes should be RPL_ISUPPORT.PREFIX
function findChannels(text, channelPrefixes, userModes) {
const userModePattern = userModes.map(escapeRegExp).join("");
const channelPrefixPattern = channelPrefixes.map(escapeRegExp).join("");
const channelPattern = `(?:^|\\s)[${ userModePattern }]*([${ channelPrefixPattern }][^ \u0007]+)`;
const channelRegExp = new RegExp(channelPattern, "g");
const result = [];
let match;
do {
match = channelRegExp.exec(text);
if (match) {
result.push({
start: match.index + match[0].length - match[1].length,
end: match.index + match[0].length,
channel: match[1]
});
}
} while (match);
return result;
}
module.exports = findChannels;

View file

@ -0,0 +1,47 @@
"use strict";
const anyIntersection = require("./anyIntersection");
const fill = require("./fill");
let Object_assign = Object.assign;
if (typeof Object_assign !== "function") {
Object_assign = function(target) {
Array.prototype.slice.call(arguments, 1).forEach(function(obj) {
Object.keys(obj).forEach(function(key) {
target[key] = obj[key];
});
});
return target;
};
}
function assign(textPart, fragment) {
const fragStart = fragment.start;
const start = Math.max(fragment.start, textPart.start);
const end = Math.min(fragment.end, textPart.end);
return Object_assign({}, fragment, {
start: start,
end: end,
text: fragment.text.slice(start - fragStart, end - fragStart)
});
}
function merge(textParts, styleFragments) {
const cleanText = styleFragments.map(fragment => fragment.text).join("");
const allParts = textParts
.concat(fill(textParts, cleanText))
.sort((a, b) => a.start - b.start);
return allParts.map(textPart => {
textPart.fragments = styleFragments
.filter(fragment => anyIntersection(textPart, fragment))
.map(fragment => assign(textPart, fragment));
return textPart;
});
}
module.exports = merge;

View file

@ -0,0 +1,131 @@
"use strict";
const BOLD = "\x02";
const COLOR = "\x03";
const RESET = "\x0f";
const REVERSE = "\x16";
const ITALIC = "\x1d";
const UNDERLINE = "\x1f";
const colorRx = /^(\d{1,2})(?:,(\d{1,2}))?/;
const controlCodesRx = /[\u0000-\u001F]/g;
function parseStyle(text) {
const result = [];
let start = 0;
let position = 0;
let colorCodes, bold, textColor, bgColor, reverse, italic, underline;
const resetStyle = () => {
bold = false;
textColor = undefined;
bgColor = undefined;
reverse = false;
italic = false;
underline = false;
};
resetStyle();
const emitFragment = () => {
const textPart = text.slice(start, position);
start = position + 1;
const processedText = textPart.replace(controlCodesRx, "");
if (!processedText.length) {
return;
}
result.push({
bold,
textColor,
bgColor,
reverse,
italic,
underline,
text: processedText
});
};
while (position < text.length) {
switch (text[position]) {
case RESET:
emitFragment();
resetStyle();
break;
case BOLD:
emitFragment();
bold = !bold;
break;
case COLOR:
emitFragment();
colorCodes = text.slice(position + 1).match(colorRx);
if (colorCodes) {
textColor = Number(colorCodes[1]);
bgColor = Number(colorCodes[2]);
if (Number.isNaN(bgColor)) {
bgColor = undefined;
}
position += colorCodes[0].length;
} else {
textColor = undefined;
bgColor = undefined;
}
start = position + 1;
break;
case REVERSE:
emitFragment();
reverse = !reverse;
break;
case ITALIC:
emitFragment();
italic = !italic;
break;
case UNDERLINE:
emitFragment();
underline = !underline;
break;
}
position += 1;
}
emitFragment();
return result;
}
const properties = ["bold", "textColor", "bgColor", "italic", "underline", "reverse"];
function prepare(text) {
return parseStyle(text)
.filter(fragment => fragment.text.length)
.reduce((prev, curr, i) => {
if (i === 0) {
return prev.concat([curr]);
}
const lastEntry = prev[prev.length - 1];
if (properties.some(key => curr[key] !== lastEntry[key])) {
return prev.concat([curr]);
}
lastEntry.text += curr.text;
return prev;
}, [])
.map((fragment, i, array) => {
fragment.start = i === 0 ? 0 : array[i - 1].end;
fragment.end = fragment.start + fragment.text.length;
return fragment;
});
}
module.exports = prepare;

View file

@ -2,125 +2,113 @@
const Handlebars = require("handlebars/runtime");
const URI = require("urijs");
const parseStyle = require("./ircmessageparser/parseStyle");
const findChannels = require("./ircmessageparser/findChannels");
const merge = require("./ircmessageparser/merge");
module.exports = function(text) {
text = Handlebars.Utils.escapeExpression(text);
text = colors(text);
text = channels(text);
text = uri(text);
return text;
};
const commonSchemes = [
"http", "https",
"ftp", "sftp",
"smb", "file",
"irc", "ircs",
"svn", "git",
"steam", "mumble", "ts3server",
"svn+ssh", "ssh",
];
function uri(text) {
return URI.withinString(text, function(url) {
if (url.indexOf("javascript:") === 0) {
return url;
}
var split = url.split("<");
url = "<a href='" + split[0].replace(/^www/, "http://www") + "' target='_blank' rel='noopener'>" + split[0] + "</a>";
if (split.length > 1) {
url += "<" + split.slice(1).join("<");
}
return url;
});
}
function findLinks(text) {
let result = [];
let lastPosition = 0;
/**
* Channels names are strings of length up to fifty (50) characters.
* The only restriction on a channel name is that it SHALL NOT contain
* any spaces (' '), a control G (^G or ASCII 7), a comma (',').
* Channel prefix '&' is handled as '&amp;' because this parser is executed
* after entities in the message have been escaped. This prevents a couple of bugs.
*/
function channels(text) {
return text.replace(
/(^|\s|\x07|,)((?:#|&amp;)[^\x07\s,]{1,49})/g,
'$1<span class="inline-channel" role="button" tabindex="0" data-chan="$2">$2</span>'
);
}
/**
* MIRC compliant colour and style parser
* Unfortuanately this is a non trivial operation
* See this branch for source and tests
* https://github.com/megawac/irc-style-parser/tree/shout
*/
var styleCheck_Re = /[\x00-\x1F]/,
back_re = /^([0-9]{1,2})(,([0-9]{1,2}))?/,
colourKey = "\x03",
// breaks all open styles ^O (\x0F)
styleBreak = "\x0F";
function styleTemplate(settings) {
return "<span class='" + settings.style + "'>" + settings.text + "</span>";
}
var styles = [
["normal", "\x00", ""], ["underline", "\x1F"],
["bold", "\x02"], ["italic", "\x1D"]
].map(function(style) {
var escaped = encodeURI(style[1]).replace("%", "\\x");
return {
name: style[0],
style: style[2] ? style[2] : "irc-" + style[0],
key: style[1],
keyregex: new RegExp(escaped + "(.*?)(" + escaped + "|$)")
};
});
function colors(line) {
// http://www.mirc.com/colors.html
// http://www.aviran.org/stripremove-irc-client-control-characters/
// https://github.com/perl6/mu/blob/master/examples/rules/Grammar-IRC.pm
// regexs are cruel to parse this thing
// already done?
if (!styleCheck_Re.test(line)) {
return line;
}
// split up by the irc style break character ^O
if (line.indexOf(styleBreak) >= 0) {
return line.split(styleBreak).map(colors).join("");
}
var result = line;
var parseArr = result.split(colourKey);
var text, match, colour, background = "";
for (var i = 0; i < parseArr.length; i++) {
text = parseArr[i];
match = text.match(back_re);
if (!match) {
// ^C (no colour) ending. Escape current colour and carry on
background = "";
continue;
}
colour = "irc-fg" + +match[1];
// set the background colour
if (match[3]) {
background = " irc-bg" + +match[3];
}
// update the parsed text result
result = result.replace(colourKey + text, styleTemplate({
style: colour + background,
text: text.slice(match[0].length)
}));
}
// Matching styles (italics/bold/underline)
// if only colours were this easy...
styles.forEach(function(style) {
if (result.indexOf(style.key) < 0) {
URI.withinString(text, function(url, start, end) {
// v-- fix: url was modified and does not match input string -> cant be mapped
if (text.indexOf(url, lastPosition) < 0) {
return;
}
// ^-- /fix: url was modified and does not match input string -> cant be mapped
result = result.replace(style.keyregex, function(matchedTrash, matchedText) {
return styleTemplate({
style: style.style,
text: matchedText
});
// v-- fix: use prefered scheme
const parsed = URI(url);
const parsedScheme = parsed.scheme().toLowerCase();
const matchedScheme = commonSchemes.find(scheme => parsedScheme.endsWith(scheme));
if (matchedScheme) {
const prefix = parsedScheme.length - matchedScheme.length;
start += prefix;
url = url.slice(prefix);
}
// ^-- /fix: use prefered scheme
// URL matched, but does not start with a protocol, add it
if (!parsedScheme.length) {
url = "http://" + url;
}
result.push({
start: start,
end: end,
link: url
});
});
return result;
}
function createFragment(fragment) {
let className = "";
if (fragment.bold) {
className += " irc-bold";
}
if (fragment.textColor !== undefined) {
className += " irc-fg" + fragment.textColor;
}
if (fragment.bgColor !== undefined) {
className += " irc-bg" + fragment.bgColor;
}
if (fragment.italic) {
className += " irc-italic";
}
if (fragment.underline) {
className += " irc-underline";
}
const escapedText = Handlebars.Utils.escapeExpression(fragment.text);
if (className) {
return "<span class='" + className.trim() + "'>" + escapedText + "</span>";
}
return escapedText;
}
module.exports = function parse(text) {
const styleFragments = parseStyle(text);
const cleanText = styleFragments.map(fragment => fragment.text).join("");
const channelPrefixes = ["#", "&"]; // RPL_ISUPPORT.CHANTYPES
const userModes = ["!", "@", "%", "+"]; // RPL_ISUPPORT.PREFIX
const channelParts = findChannels(cleanText, channelPrefixes, userModes);
const linkParts = findLinks(cleanText);
const parts = channelParts
.concat(linkParts)
.sort((a, b) => a.start - b.start);
return merge(parts, styleFragments).map(textPart => {
const fragments = textPart.fragments.map(createFragment).join("");
if (textPart.link) {
const escapedLink = Handlebars.Utils.escapeExpression(textPart.link);
return (
"<a href='" + escapedLink + "' target='_blank' rel='noopener'>" +
fragments +
"</a>");
} else if (textPart.channel) {
const escapedChannel = Handlebars.Utils.escapeExpression(textPart.channel);
return (
"<span class='inline-channel' role='button' tabindex='0' data-chan='" + escapedChannel + "'>" +
fragments +
"</span>");
}
return fragments;
}).join("");
};