mirror of
https://github.com/fish-shell/fish-shell
synced 2025-01-12 21:18:53 +00:00
parent
60bca14b37
commit
f3cb625802
8 changed files with 319 additions and 34 deletions
|
@ -5,6 +5,7 @@
|
|||
- New `status is-breakpoint` command that is true when a prompt is displayed in response to a `breakpoint` command (#1310).
|
||||
- Invalid array indexes are now silently ignored (#826, #4127).
|
||||
- `string escape` has a new `--style=xxx` flag where `xxx` can be `script`, `var`, or `url` (#4150)
|
||||
- `string unescape` has been implemented to reverse the effects of `string escape` (#3543)
|
||||
|
||||
## Other significant changes
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ string sub [(-s | --start) START] [(-l | --length) LENGTH] [(-q | --quiet)]
|
|||
[STRING...]
|
||||
string trim [(-l | --left)] [(-r | --right)] [(-c | --chars CHARS)]
|
||||
[(-q | --quiet)] [STRING...]
|
||||
string unescape [--style=xxx] [STRING...]
|
||||
string upper [(-q | --quiet)] [STRING...]
|
||||
\endfish
|
||||
|
||||
|
@ -42,6 +43,8 @@ The second is `--style=var` which ensures the string can be used as a variable n
|
|||
|
||||
The third is `--style=url` which ensures the string can be used as a URL by hex encoding any character which is not legal in a URL. The string is first converted to UTF-8 before being encoded.
|
||||
|
||||
`string unescape` performs the inverse of the `string escape` command. If the string to be unescaped is not properly formatted it is ignored. For example, doing `string unescape --style=var (string escape --style=var $str)` will return the original string.
|
||||
|
||||
\subsection string-join "join" subcommand
|
||||
|
||||
`string join` joins its STRING arguments into a single string separated by SEP, which can be an empty string. Exit status: 0 if at least one join was performed, or 1 otherwise.
|
||||
|
|
|
@ -484,6 +484,64 @@ static int string_escape_var(options_t &opts, int optind, wchar_t **argv, io_str
|
|||
return nesc > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
|
||||
}
|
||||
|
||||
/// Unescape a string encoded so it can be used in fish script.
|
||||
static int string_unescape_script(options_t &opts, int optind, wchar_t **argv,
|
||||
io_streams_t &streams) {
|
||||
UNUSED(opts);
|
||||
wcstring storage;
|
||||
int nesc = 0;
|
||||
unescape_flags_t flags = 0;
|
||||
|
||||
while (const wchar_t *arg = string_get_arg(&optind, argv, &storage, streams)) {
|
||||
wcstring result;
|
||||
if (unescape_string(arg, &result, flags, STRING_STYLE_SCRIPT)) {
|
||||
streams.out.append(result);
|
||||
streams.out.append(L'\n');
|
||||
nesc++;
|
||||
}
|
||||
}
|
||||
|
||||
return nesc > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
|
||||
}
|
||||
|
||||
/// Unescape an encoded URL.
|
||||
static int string_unescape_url(options_t &opts, int optind, wchar_t **argv, io_streams_t &streams) {
|
||||
UNUSED(opts);
|
||||
wcstring storage;
|
||||
int nesc = 0;
|
||||
unescape_flags_t flags = 0;
|
||||
|
||||
while (const wchar_t *arg = string_get_arg(&optind, argv, &storage, streams)) {
|
||||
wcstring result;
|
||||
if (unescape_string(arg, &result, flags, STRING_STYLE_URL)) {
|
||||
streams.out.append(result);
|
||||
streams.out.append(L'\n');
|
||||
nesc++;
|
||||
}
|
||||
}
|
||||
|
||||
return nesc > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
|
||||
}
|
||||
|
||||
/// Unescape an encoded var name.
|
||||
static int string_unescape_var(options_t &opts, int optind, wchar_t **argv, io_streams_t &streams) {
|
||||
UNUSED(opts);
|
||||
wcstring storage;
|
||||
int nesc = 0;
|
||||
unescape_flags_t flags = 0;
|
||||
|
||||
while (const wchar_t *arg = string_get_arg(&optind, argv, &storage, streams)) {
|
||||
wcstring result;
|
||||
if (unescape_string(arg, &result, flags, STRING_STYLE_VAR)) {
|
||||
streams.out.append(result);
|
||||
streams.out.append(L'\n');
|
||||
nesc++;
|
||||
}
|
||||
}
|
||||
|
||||
return nesc > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
|
||||
}
|
||||
|
||||
static int string_escape(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv) {
|
||||
options_t opts;
|
||||
opts.no_quoted_valid = true;
|
||||
|
@ -507,6 +565,29 @@ static int string_escape(parser_t &parser, io_streams_t &streams, int argc, wcha
|
|||
DIE("should never reach this statement");
|
||||
}
|
||||
|
||||
static int string_unescape(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv) {
|
||||
options_t opts;
|
||||
opts.no_quoted_valid = true;
|
||||
opts.style_valid = true;
|
||||
int optind;
|
||||
int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams);
|
||||
if (retval != STATUS_CMD_OK) return retval;
|
||||
|
||||
switch (opts.escape_style) {
|
||||
case STRING_STYLE_SCRIPT: {
|
||||
return string_unescape_script(opts, optind, argv, streams);
|
||||
}
|
||||
case STRING_STYLE_URL: {
|
||||
return string_unescape_url(opts, optind, argv, streams);
|
||||
}
|
||||
case STRING_STYLE_VAR: {
|
||||
return string_unescape_var(opts, optind, argv, streams);
|
||||
}
|
||||
}
|
||||
|
||||
DIE("should never reach this statement");
|
||||
}
|
||||
|
||||
static int string_join(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv) {
|
||||
options_t opts;
|
||||
opts.quiet_valid = true;
|
||||
|
@ -1277,11 +1358,19 @@ static const struct string_subcommand {
|
|||
wchar_t **argv); //!OCLINT(unused param)
|
||||
}
|
||||
|
||||
string_subcommands[] = {
|
||||
{L"escape", &string_escape}, {L"join", &string_join}, {L"length", &string_length},
|
||||
{L"match", &string_match}, {L"replace", &string_replace}, {L"split", &string_split},
|
||||
{L"sub", &string_sub}, {L"trim", &string_trim}, {L"lower", &string_lower},
|
||||
{L"upper", &string_upper}, {L"repeat", &string_repeat}, {NULL, NULL}};
|
||||
string_subcommands[] = {{L"escape", &string_escape},
|
||||
{L"join", &string_join},
|
||||
{L"length", &string_length},
|
||||
{L"match", &string_match},
|
||||
{L"replace", &string_replace},
|
||||
{L"split", &string_split},
|
||||
{L"sub", &string_sub},
|
||||
{L"trim", &string_trim},
|
||||
{L"lower", &string_lower},
|
||||
{L"upper", &string_upper},
|
||||
{L"repeat", &string_repeat},
|
||||
{L"unescape", &string_unescape},
|
||||
{NULL, NULL}};
|
||||
|
||||
/// The string builtin, for manipulating strings.
|
||||
int builtin_string(parser_t &parser, io_streams_t &streams, wchar_t **argv) {
|
||||
|
|
159
src/common.cpp
159
src/common.cpp
|
@ -75,6 +75,38 @@ static void debug_shared(const wchar_t msg_level, const wcstring &msg);
|
|||
|
||||
bool has_working_tty_timestamps = true;
|
||||
|
||||
/// Convert a character to its integer equivalent if it is a valid character for the requested base.
|
||||
/// Return the integer value if it is valid else -1.
|
||||
long convert_digit(wchar_t d, int base) {
|
||||
long res = -1;
|
||||
if ((d <= L'9') && (d >= L'0')) {
|
||||
res = d - L'0';
|
||||
} else if ((d <= L'z') && (d >= L'a')) {
|
||||
res = d + 10 - L'a';
|
||||
} else if ((d <= L'Z') && (d >= L'A')) {
|
||||
res = d + 10 - L'A';
|
||||
}
|
||||
if (res >= base) {
|
||||
res = -1;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Test whether the char is a valid hex digit as used by the `escape_string_*()` functions.
|
||||
static bool is_hex_digit(int c) { return strchr("0123456789ABCDEF", c) != NULL; }
|
||||
|
||||
/// This is a specialization of `convert_digit()` that only handles base 16 and only uppercase.
|
||||
long convert_hex_digit(wchar_t d) {
|
||||
if ((d <= L'9') && (d >= L'0')) {
|
||||
return d - L'0';
|
||||
} else if ((d <= L'Z') && (d >= L'A')) {
|
||||
return 10 + d - L'A';
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef HAVE_BACKTRACE_SYMBOLS
|
||||
// This function produces a stack backtrace with demangled function & method names. It is based on
|
||||
// https://gist.github.com/fmela/591333 but adapted to the style of the fish project.
|
||||
|
@ -765,12 +797,41 @@ static void escape_string_url(const wchar_t *orig_in, wcstring &out) {
|
|||
}
|
||||
}
|
||||
|
||||
static bool is_hex_digit(int c) { return strchr("0123456789abcdefABCDEF", c) != NULL; }
|
||||
/// Reverse the effects of `escape_string_url()`. By definition the string has consist of just ASCII
|
||||
/// chars.
|
||||
static bool unescape_string_url(const wchar_t *in, wcstring *out) {
|
||||
std::string result;
|
||||
result.reserve(out->size());
|
||||
for (wchar_t c = *in; c; c = *++in) {
|
||||
if (c > 0x7F) return false; // invalid character means we can't decode the string
|
||||
if (c == '%') {
|
||||
int c1 = in[1];
|
||||
if (c1 == 0) return false; // found unexpected end of string
|
||||
if (c1 == '%') {
|
||||
result.push_back('%');
|
||||
in++;
|
||||
} else {
|
||||
int c2 = in[2];
|
||||
if (c2 == 0) return false; // string ended prematurely
|
||||
long d1 = convert_digit(c1, 16);
|
||||
if (d1 < 0) return false;
|
||||
long d2 = convert_digit(c2, 16);
|
||||
if (d2 < 0) return false;
|
||||
result.push_back(16 * d1 + d2);
|
||||
in += 2;
|
||||
}
|
||||
} else {
|
||||
result.push_back(c);
|
||||
}
|
||||
}
|
||||
|
||||
*out = str2wcstring(result);
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Escape a string in a fashion suitable for using as a fish var name. Store the result in out_str.
|
||||
static void escape_string_var(const wchar_t *orig_in, wcstring &out) {
|
||||
bool prev_was_hex_encoded = false;
|
||||
bool maybe_encode_next_char = false;
|
||||
const std::string &in = wcs2string(orig_in);
|
||||
for (auto c1 : in) {
|
||||
// This silliness is so we get the correct result whether chars are signed or unsigned.
|
||||
|
@ -799,6 +860,46 @@ static void escape_string_var(const wchar_t *orig_in, wcstring &out) {
|
|||
}
|
||||
}
|
||||
|
||||
/// Reverse the effects of `escape_string_var()`. By definition the string has consist of just ASCII
|
||||
/// chars.
|
||||
static bool unescape_string_var(const wchar_t *in, wcstring *out) {
|
||||
std::string result;
|
||||
result.reserve(out->size());
|
||||
bool prev_was_hex_encoded = false;
|
||||
for (wchar_t c = *in; c; c = *++in) {
|
||||
if (c > 0x7F) return false; // invalid character means we can't decode the string
|
||||
if (c == '_') {
|
||||
int c1 = in[1];
|
||||
if (c1 == 0) {
|
||||
if (prev_was_hex_encoded) break;
|
||||
return false; // found unexpected escape char at end of string
|
||||
}
|
||||
if (c1 == '_') {
|
||||
result.push_back('_');
|
||||
in++;
|
||||
} else if (is_hex_digit(c1)) {
|
||||
int c2 = in[2];
|
||||
if (c2 == 0) return false; // string ended prematurely
|
||||
long d1 = convert_hex_digit(c1);
|
||||
if (d1 < 0) return false;
|
||||
long d2 = convert_hex_digit(c2);
|
||||
if (d2 < 0) return false;
|
||||
result.push_back(16 * d1 + d2);
|
||||
in += 2;
|
||||
prev_was_hex_encoded = true;
|
||||
}
|
||||
// No "else" clause because if the first char after an underscore is not another
|
||||
// underscore or a valid hex character then the underscore is there to improve
|
||||
// readability after we've encoded a character not valid in a var name.
|
||||
} else {
|
||||
result.push_back(c);
|
||||
}
|
||||
}
|
||||
|
||||
*out = str2wcstring(result);
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Escape a string in a fashion suitable for using in fish script. Store the result in out_str.
|
||||
static void escape_string_script(const wchar_t *orig_in, size_t in_len, wcstring &out,
|
||||
escape_flags_t flags) {
|
||||
|
@ -1390,14 +1491,44 @@ bool unescape_string_in_place(wcstring *str, unescape_flags_t escape_special) {
|
|||
return success;
|
||||
}
|
||||
|
||||
bool unescape_string(const wchar_t *input, wcstring *output, unescape_flags_t escape_special) {
|
||||
bool success = unescape_string_internal(input, wcslen(input), output, escape_special);
|
||||
bool unescape_string(const wchar_t *input, wcstring *output, unescape_flags_t escape_special,
|
||||
escape_string_style_t style) {
|
||||
bool success;
|
||||
switch (style) {
|
||||
case STRING_STYLE_SCRIPT: {
|
||||
success = unescape_string_internal(input, wcslen(input), output, escape_special);
|
||||
break;
|
||||
}
|
||||
case STRING_STYLE_URL: {
|
||||
success = unescape_string_url(input, output);
|
||||
break;
|
||||
}
|
||||
case STRING_STYLE_VAR: {
|
||||
success = unescape_string_var(input, output);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!success) output->clear();
|
||||
return success;
|
||||
}
|
||||
|
||||
bool unescape_string(const wcstring &input, wcstring *output, unescape_flags_t escape_special) {
|
||||
bool success = unescape_string_internal(input.c_str(), input.size(), output, escape_special);
|
||||
bool unescape_string(const wcstring &input, wcstring *output, unescape_flags_t escape_special,
|
||||
escape_string_style_t style) {
|
||||
bool success;
|
||||
switch (style) {
|
||||
case STRING_STYLE_SCRIPT: {
|
||||
success = unescape_string_internal(input.c_str(), input.size(), output, escape_special);
|
||||
break;
|
||||
}
|
||||
case STRING_STYLE_URL: {
|
||||
success = unescape_string_url(input.c_str(), output);
|
||||
break;
|
||||
}
|
||||
case STRING_STYLE_VAR: {
|
||||
success = unescape_string_var(input.c_str(), output);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!success) output->clear();
|
||||
return success;
|
||||
}
|
||||
|
@ -2023,22 +2154,6 @@ char **make_null_terminated_array(const std::vector<std::string> &lst) {
|
|||
return make_null_terminated_array_helper(lst);
|
||||
}
|
||||
|
||||
long convert_digit(wchar_t d, int base) {
|
||||
long res = -1;
|
||||
if ((d <= L'9') && (d >= L'0')) {
|
||||
res = d - L'0';
|
||||
} else if ((d <= L'z') && (d >= L'a')) {
|
||||
res = d + 10 - L'a';
|
||||
} else if ((d <= L'Z') && (d >= L'A')) {
|
||||
res = d + 10 - L'A';
|
||||
}
|
||||
if (res >= base) {
|
||||
res = -1;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Test if the specified character is in a range that fish uses interally to store special tokens.
|
||||
///
|
||||
/// NOTE: This is used when tokenizing the input. It is also used when reading input, before
|
||||
|
|
11
src/common.h
11
src/common.h
|
@ -715,10 +715,13 @@ size_t read_unquoted_escape(const wchar_t *input, wcstring *result, bool allow_i
|
|||
/// indicates the string was unmodified.
|
||||
bool unescape_string_in_place(wcstring *str, unescape_flags_t escape_special);
|
||||
|
||||
/// Unescapes a string, returning the unescaped value by reference. On failure, the output is set to
|
||||
/// an empty string.
|
||||
bool unescape_string(const wchar_t *input, wcstring *output, unescape_flags_t escape_special);
|
||||
bool unescape_string(const wcstring &input, wcstring *output, unescape_flags_t escape_special);
|
||||
/// Reverse the effects of calling `escape_string`. Returns the unescaped value by reference. On
|
||||
/// failure, the output is set to an empty string.
|
||||
bool unescape_string(const wchar_t *input, wcstring *output, unescape_flags_t escape_special,
|
||||
escape_string_style_t style = STRING_STYLE_SCRIPT);
|
||||
|
||||
bool unescape_string(const wcstring &input, wcstring *output, unescape_flags_t escape_special,
|
||||
escape_string_style_t style = STRING_STYLE_SCRIPT);
|
||||
|
||||
/// Returns the width of the terminal window, so that not all functions that use these values
|
||||
/// continually have to keep track of it separately.
|
||||
|
|
|
@ -5,7 +5,7 @@ string match: ^
|
|||
|
||||
# string invalidarg
|
||||
string: Subcommand 'invalidarg' is not valid
|
||||
Standard input (line 215):
|
||||
Standard input (line 258):
|
||||
string invalidarg; and echo "unexpected exit 0" >&2
|
||||
^
|
||||
|
||||
|
@ -29,6 +29,6 @@ string repeat: Expected argument
|
|||
|
||||
# string repeat -l fakearg 2>&1
|
||||
string repeat: Unknown option '-l'
|
||||
Standard input (line 316):
|
||||
Standard input (line 359):
|
||||
string repeat -l fakearg
|
||||
^
|
||||
|
|
|
@ -126,6 +126,49 @@ echo
|
|||
echo '# string escape --style=var -- -'
|
||||
string escape --style=var -- -
|
||||
|
||||
# The following tests verify that we can correctly unescape the same strings
|
||||
# we tested escaping above.
|
||||
|
||||
echo
|
||||
echo '# set x (string unescape (echo \x07 | string escape))'
|
||||
set x (string unescape (echo \x07 | string escape))
|
||||
test $x = \x07
|
||||
and echo success
|
||||
|
||||
echo
|
||||
echo '# string unescape --style=script (string escape --style=script \'a b#c"\\\'d\')'
|
||||
string unescape --style=script (string escape --style=script 'a b#c"\'d')
|
||||
|
||||
echo
|
||||
echo '# string unescape --style=url (string escape --style=url \'a b#c"\\\'d\')'
|
||||
string unescape --style=url (string escape --style=url 'a b#c"\'d')
|
||||
|
||||
echo
|
||||
echo '# string unescape --style=url (string escape --style=url \na\nb%c~d\n)'
|
||||
string unescape --style=url (string escape --style=url \na\nb%c~d\n)
|
||||
|
||||
echo
|
||||
echo '# string unescape --style=var (string escape --style=var \'a b#c"\\\'d\')'
|
||||
string unescape --style=var (string escape --style=var 'a b#c"\'d')
|
||||
|
||||
echo
|
||||
echo '# string unescape --style=var (string escape --style=var a\nghi_)'
|
||||
string unescape --style=var (string escape --style=var a\nghi_)
|
||||
|
||||
echo
|
||||
echo '# string unescape --style=var (string escape --style=var \'abc\')'
|
||||
string unescape --style=var (string escape --style=var 'abc')
|
||||
|
||||
echo
|
||||
echo '# string unescape --style=var (string escape --style=var \'_a_b_c_\')'
|
||||
string unescape --style=var (string escape --style=var '_a_b_c_')
|
||||
|
||||
echo
|
||||
echo '# string unescape --style=var (string escape --style=var -- -)'
|
||||
string unescape --style=var -- (string escape --style=var -- -)
|
||||
|
||||
# The following tests verify that we can correctly match strings.
|
||||
|
||||
echo
|
||||
echo '# string match "?" a'
|
||||
string match "?" a
|
||||
|
|
|
@ -84,7 +84,7 @@ a%20b%23c%22%27d
|
|||
%0Aa%0Ab%25c~d%0A
|
||||
|
||||
# string escape --style=var 'a b#c"\'d'
|
||||
a_20_62_23_63_22_27_64_
|
||||
a_20_b_23_c_22_27_d
|
||||
|
||||
# string escape --style=script a\nghi_
|
||||
a_0A_ghi__
|
||||
|
@ -98,6 +98,37 @@ __a__b__c__
|
|||
# string escape --style=var -- -
|
||||
_2D_
|
||||
|
||||
# set x (string unescape (echo \x07 | string escape))
|
||||
success
|
||||
|
||||
# string unescape --style=script (string escape --style=script 'a b#c"\'d')
|
||||
a b#c"'d
|
||||
|
||||
# string unescape --style=url (string escape --style=url 'a b#c"\'d')
|
||||
a b#c"'d
|
||||
|
||||
# string unescape --style=url (string escape --style=url \na\nb%c~d\n)
|
||||
|
||||
a
|
||||
b%c~d
|
||||
|
||||
|
||||
# string unescape --style=var (string escape --style=var 'a b#c"\'d')
|
||||
a b#c"'d
|
||||
|
||||
# string unescape --style=var (string escape --style=var a\nghi_)
|
||||
a
|
||||
ghi_
|
||||
|
||||
# string unescape --style=var (string escape --style=var 'abc')
|
||||
abc
|
||||
|
||||
# string unescape --style=var (string escape --style=var '_a_b_c_')
|
||||
_a_b_c_
|
||||
|
||||
# string unescape --style=var (string escape --style=var -- -)
|
||||
-
|
||||
|
||||
# string match "?" a
|
||||
a
|
||||
|
||||
|
|
Loading…
Reference in a new issue