Updated URL regexes to match more unescaped special characters

This commit is contained in:
n1474335 2017-08-15 16:44:45 +00:00
parent cf1ba60a10
commit a19b02aa8c
2 changed files with 3 additions and 3 deletions

View file

@ -170,9 +170,9 @@ const Extract = {
protocol = "[A-Z]+://", protocol = "[A-Z]+://",
hostname = "[-\\w]+(?:\\.\\w[-\\w]*)+", hostname = "[-\\w]+(?:\\.\\w[-\\w]*)+",
port = ":\\d+", port = ":\\d+",
path = "/[^.!,?;\"'<>()\\[\\]{}\\s\\x7F-\\xFF]*"; path = "/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*";
path += "(?:[.!,?]+[^.!,?;\"'<>()\\[\\]{}\\s\\x7F-\\xFF]+)*"; path += "(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*";
const regex = new RegExp(protocol + hostname + "(?:" + port + const regex = new RegExp(protocol + hostname + "(?:" + port +
")?(?:" + path + ")?", "ig"); ")?(?:" + path + ")?", "ig");
return Extract._search(input, regex, null, displayTotal); return Extract._search(input, regex, null, displayTotal);

View file

@ -36,7 +36,7 @@ const StrUtils = {
}, },
{ {
name: "URL", name: "URL",
value: "([A-Za-z]+://)([-\\w]+(?:\\.\\w[-\\w]*)+)(:\\d+)?(/[^.!,?;\"\\x27<>()\\[\\]{}\\s\\x7F-\\xFF]*(?:[.!,?]+[^.!,?;\"\\x27<>()\\[\\]{}\\s\\x7F-\\xFF]+)*)?" value: "([A-Za-z]+://)([-\\w]+(?:\\.\\w[-\\w]*)+)(:\\d+)?(/[^.!,?\"\<>\\[\\]{}\\s\\x7F-\\xFF]*(?:[.!,?]+[^.!,?\"\<>\\[\\]{}\\s\\x7F-\\xFF]+)*)?"
}, },
{ {
name: "Domain", name: "Domain",