mirror of
https://github.com/fish-shell/fish-shell
synced 2025-01-14 05:53:59 +00:00
parent
f56c317bd0
commit
e160cde606
7 changed files with 107 additions and 3 deletions
|
@ -196,6 +196,8 @@ static int handle_flag_1(wchar_t **argv, parser_t &parser, io_streams_t &streams
|
|||
opts->escape_style = STRING_STYLE_URL;
|
||||
} else if (wcscmp(w.woptarg, L"var") == 0) {
|
||||
opts->escape_style = STRING_STYLE_VAR;
|
||||
} else if (wcscmp(w.woptarg, L"pcre2") == 0) {
|
||||
opts->escape_style = STRING_STYLE_PCRE2;
|
||||
} else {
|
||||
string_error(streams, _(L"%ls: Invalid escape style '%ls'\n"), cmd, w.woptarg);
|
||||
return STATUS_INVALID_ARGS;
|
||||
|
|
|
@ -1096,6 +1096,42 @@ static void escape_string_script(const wchar_t *orig_in, size_t in_len, wcstring
|
|||
}
|
||||
}
|
||||
|
||||
/// Escapes a string for use in a regex string. Not safe for use with `eval` as only
|
||||
/// characters reserved by PCRE2 are escaped, i.e. it relies on fish's automatic escaping
|
||||
/// of subshell output in subsequent concatenation or for use as an argument.
|
||||
/// \param in is the raw string to be searched for literally when substituted in a PCRE2 expression.
|
||||
static wcstring escape_string_pcre2(const wcstring &in) {
|
||||
wcstring out;
|
||||
out.reserve(in.size() * 1.3); // a wild guess
|
||||
|
||||
for (auto c : in) {
|
||||
switch (c) {
|
||||
case L'.':
|
||||
case L'^':
|
||||
case L'$':
|
||||
case L'*':
|
||||
case L'+':
|
||||
case L'(':
|
||||
case L')':
|
||||
case L'?':
|
||||
case L'[':
|
||||
case L'{':
|
||||
case L'}':
|
||||
case L'\\':
|
||||
case L'|':
|
||||
// these two only *need* to be escaped within a character class, and technically it makes
|
||||
// no sense to ever use process substitution output to compose a character class, but...
|
||||
case L'-':
|
||||
case L']':
|
||||
out.push_back('\\');
|
||||
default:
|
||||
out.push_back(c);
|
||||
}
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
wcstring escape_string(const wchar_t *in, escape_flags_t flags, escape_string_style_t style) {
|
||||
wcstring result;
|
||||
|
||||
|
@ -1112,6 +1148,10 @@ wcstring escape_string(const wchar_t *in, escape_flags_t flags, escape_string_st
|
|||
escape_string_var(in, result);
|
||||
break;
|
||||
}
|
||||
case STRING_STYLE_PCRE2: {
|
||||
result = escape_string_pcre2(in);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
|
@ -1133,6 +1173,10 @@ wcstring escape_string(const wcstring &in, escape_flags_t flags, escape_string_s
|
|||
escape_string_var(in, result);
|
||||
break;
|
||||
}
|
||||
case STRING_STYLE_PCRE2: {
|
||||
result = escape_string_pcre2(in);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
|
@ -1617,6 +1661,11 @@ bool unescape_string(const wchar_t *input, wcstring *output, unescape_flags_t es
|
|||
success = unescape_string_var(input, output);
|
||||
break;
|
||||
}
|
||||
case STRING_STYLE_PCRE2: {
|
||||
// unescaping PCRE2 is not needed/supported, the PCRE2 engine is responsible for that
|
||||
success = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!success) output->clear();
|
||||
return success;
|
||||
|
@ -1638,6 +1687,11 @@ bool unescape_string(const wcstring &input, wcstring *output, unescape_flags_t e
|
|||
success = unescape_string_var(input.c_str(), output);
|
||||
break;
|
||||
}
|
||||
case STRING_STYLE_PCRE2: {
|
||||
// unescaping PCRE2 is not needed/supported, the PCRE2 engine is responsible for that
|
||||
success = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!success) output->clear();
|
||||
return success;
|
||||
|
|
|
@ -118,7 +118,12 @@ static_assert(false, "Neither NAME_MAX nor MAXNAMELEN is defined!");
|
|||
#endif
|
||||
#endif
|
||||
|
||||
enum escape_string_style_t { STRING_STYLE_SCRIPT, STRING_STYLE_URL, STRING_STYLE_VAR };
|
||||
enum escape_string_style_t {
|
||||
STRING_STYLE_SCRIPT,
|
||||
STRING_STYLE_URL,
|
||||
STRING_STYLE_VAR,
|
||||
STRING_STYLE_PCRE2,
|
||||
};
|
||||
|
||||
// Flags for unescape_string functions.
|
||||
enum {
|
||||
|
|
|
@ -4349,6 +4349,33 @@ static void test_wcstring_tok() {
|
|||
}
|
||||
}
|
||||
|
||||
static void test_pcre2_escape() {
|
||||
say(L"Testing escaping strings as pcre2 literals");
|
||||
// plain text should not be needlessly escaped
|
||||
auto input = L"hello world!";
|
||||
auto escaped = escape_string(input, 0, STRING_STYLE_PCRE2);
|
||||
if (escaped != input) {
|
||||
err(L"Input string %ls unnecessarily PCRE2 escaped as %ls", input, escaped.c_str());
|
||||
}
|
||||
|
||||
// all the following are intended to be ultimately matched literally - even if they don't look
|
||||
// like that's the intent - so we escape them.
|
||||
const wchar_t * tests[][2] = {
|
||||
L".ext", L"\\.ext",
|
||||
L"{word}", L"\\{word\\}",
|
||||
L"hola-mundo", L"hola\\-mundo",
|
||||
L"$17.42 is your total?", L"\\$17\\.42 is your total\\?",
|
||||
L"not really escaped\\?", L"not really escaped\\\\\\?",
|
||||
};
|
||||
|
||||
for (auto &test : tests) {
|
||||
auto escaped = escape_string(test[0], 0, STRING_STYLE_PCRE2);
|
||||
if (escaped != test[1]) {
|
||||
err(L"pcre2_escape error: pcre2_escape(%ls) -> %ls, expected %ls", test[0], escaped.c_str(), test[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int builtin_string(parser_t &parser, io_streams_t &streams, wchar_t **argv);
|
||||
static void run_one_string_test(const wchar_t *const *argv, int expected_rc,
|
||||
const wchar_t *expected_out) {
|
||||
|
@ -4961,6 +4988,7 @@ int main(int argc, char **argv) {
|
|||
if (should_test_function("utf8")) test_utf8();
|
||||
if (should_test_function("feature_flags")) test_feature_flags();
|
||||
if (should_test_function("escape_sequences")) test_escape_sequences();
|
||||
if (should_test_function("pcre2_escape")) test_pcre2_escape();
|
||||
if (should_test_function("lru")) test_lru();
|
||||
if (should_test_function("expand")) test_expand();
|
||||
if (should_test_function("fuzzy_match")) test_fuzzy_match();
|
||||
|
|
|
@ -92,6 +92,9 @@
|
|||
####################
|
||||
# string escape with multibyte chars
|
||||
|
||||
####################
|
||||
# string escape for literal pcre2 searching
|
||||
|
||||
####################
|
||||
# set x (string unescape (echo \x07 | string escape))
|
||||
|
||||
|
@ -182,7 +185,7 @@ string match: ^
|
|||
####################
|
||||
# string invalidarg
|
||||
string: Subcommand 'invalidarg' is not valid
|
||||
Standard input (line 205):
|
||||
Standard input (line 211):
|
||||
string invalidarg; and echo "unexpected exit 0"
|
||||
^
|
||||
|
||||
|
@ -267,7 +270,7 @@ string repeat: Expected argument
|
|||
####################
|
||||
# string repeat -l fakearg 2>&1
|
||||
string repeat: Unknown option '-l'
|
||||
Standard input (line 281):
|
||||
Standard input (line 287):
|
||||
string repeat -l fakearg
|
||||
^
|
||||
|
||||
|
|
|
@ -101,6 +101,12 @@ string escape --style=var 中
|
|||
string escape --style=var aöb | string unescape --style=var
|
||||
string escape --style=var 中 | string unescape --style=var
|
||||
|
||||
# test regex escaping
|
||||
logmsg 'string escape for literal pcre2 searching'
|
||||
string escape --style=pcre2 ".ext"
|
||||
string escape --style=pcre2 "bonjour, amigo"
|
||||
string escape --style=pcre2 "^this is a literal string"
|
||||
|
||||
# The following tests verify that we can correctly unescape the same strings
|
||||
# we tested escaping above.
|
||||
|
||||
|
|
|
@ -140,6 +140,12 @@ _E4_B8_AD_
|
|||
aöb
|
||||
中
|
||||
|
||||
####################
|
||||
# string escape for literal pcre2 searching
|
||||
\.ext
|
||||
bonjour, amigo
|
||||
\^this is a literal string
|
||||
|
||||
####################
|
||||
# set x (string unescape (echo \x07 | string escape))
|
||||
success
|
||||
|
|
Loading…
Reference in a new issue