diff --git a/CHANGELOG.md b/CHANGELOG.md
index adfb6a955..2d160c4d8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,7 @@
- The `COLUMNS` and `LINES` env vars are now correctly set the first time `fish_prompt` is run (#4141).
- New `status is-breakpoint` command that is true when a prompt is displayed in response to a `breakpoint` command (#1310).
- Invalid array indexes are now silently ignored (#826, #4127).
+- `string escape` has a new `--style=xxx` flag where `xxx` can be `script`, `var`, or `url` (#4150)
## Other significant changes
diff --git a/doc_src/string.txt b/doc_src/string.txt
index f893f7fb6..f4d9c6f9b 100644
--- a/doc_src/string.txt
+++ b/doc_src/string.txt
@@ -2,7 +2,7 @@
\subsection string-synopsis Synopsis
\fish{synopsis}
-string escape [(-n | --no-quoted)] [STRING...]
+string escape [(-n | --no-quoted)] [--style=xxx] [STRING...]
string join [(-q | --quiet)] SEP [STRING...]
string length [(-q | --quiet)] [STRING...]
string lower [(-q | --quiet)] [STRING...]
@@ -36,7 +36,11 @@ The following subcommands are available.
\subsection string-escape "escape" subcommand
-`string escape` escapes each STRING such that it can be passed back to `eval` to produce the original argument again. By default, all special characters are escaped, and quotes are used to simplify the output when possible. If `-n` or `--no-quoted` is given, the simplifying quoted format is not used. Exit status: 0 if at least one string was escaped, or 1 otherwise.
+`string escape` escapes each STRING in one of three ways. The first is `--style=script`. This is the default. It alters the string such that it can be passed back to `eval` to produce the original argument again. By default, all special characters are escaped, and quotes are used to simplify the output when possible. If `-n` or `--no-quoted` is given, the simplifying quoted format is not used. Exit status: 0 if at least one string was escaped, or 1 otherwise.
+
+The second is `--style=var` which ensures the string can be used as a variable name by hex encoding any non-alphanumeric characters. The string is first converted to UTF-8 before being encoded.
+
+The third is `--style=url` which ensures the string can be used as a URL by hex encoding any character which is not legal in a URL. The string is first converted to UTF-8 before being encoded.
\subsection string-join "join" subcommand
@@ -159,6 +163,11 @@ In general, special characters are special by default, so `a+` matches one or mo
cg
\endfish
+\fish{cli-dark}
+>_ string escape --style=var 'a1 b2'\u6161
+a1_20b2__c_E6_85_A1
+\endfish
+
\subsection string-example-match-glob Match Glob Examples
\fish{cli-dark}
diff --git a/src/builtin_string.cpp b/src/builtin_string.cpp
index d2f8a6c26..fa58bd728 100644
--- a/src/builtin_string.cpp
+++ b/src/builtin_string.cpp
@@ -116,6 +116,7 @@ typedef struct { //!OCLINT(too many fields)
bool regex_valid = false;
bool right_valid = false;
bool start_valid = false;
+ bool style_valid = false;
bool all = false;
bool entire = false;
@@ -138,8 +139,34 @@ typedef struct { //!OCLINT(too many fields)
const wchar_t *chars_to_trim = L" \f\n\r\t";
const wchar_t *arg1 = NULL;
const wchar_t *arg2 = NULL;
+
+ escape_string_style_t escape_style = STRING_STYLE_SCRIPT;
} options_t;
+/// This handles the `--style=xxx` flag.
+static int handle_flag_1(wchar_t **argv, parser_t &parser, io_streams_t &streams, wgetopter_t &w,
+ options_t *opts) {
+ const wchar_t *cmd = argv[0];
+
+ if (opts->style_valid) {
+ if (wcscmp(w.woptarg, L"script") == 0) {
+ opts->escape_style = STRING_STYLE_SCRIPT;
+ } else if (wcscmp(w.woptarg, L"url") == 0) {
+ opts->escape_style = STRING_STYLE_URL;
+ } else if (wcscmp(w.woptarg, L"var") == 0) {
+ opts->escape_style = STRING_STYLE_VAR;
+ }
+ else {
+ string_error(streams, _(L"%ls: Invalid escape style '%ls'\n"), cmd, w.woptarg);
+ return STATUS_INVALID_ARGS;
+ }
+ return STATUS_CMD_OK;
+ }
+
+ string_unknown_option(parser, streams, cmd, argv[w.woptind - 1]);
+ return STATUS_INVALID_ARGS;
+}
+
static int handle_flag_N(wchar_t **argv, parser_t &parser, io_streams_t &streams, wgetopter_t &w,
options_t *opts) {
if (opts->no_newline_valid) {
@@ -349,13 +376,14 @@ static const struct woption long_options[] = {
{L"max", required_argument, NULL, 'm'}, {L"no-newline", no_argument, NULL, 'N'},
{L"no-quoted", no_argument, NULL, 'n'}, {L"quiet", no_argument, NULL, 'q'},
{L"regex", no_argument, NULL, 'r'}, {L"right", no_argument, NULL, 'r'},
- {L"start", required_argument, NULL, 's'}, {NULL, 0, NULL, 0}};
+ {L"start", required_argument, NULL, 's'}, {L"style", required_argument, NULL, 1},
+ {NULL, 0, NULL, 0}};
static std::map flag_to_function = {
{'N', handle_flag_N}, {'a', handle_flag_a}, {'c', handle_flag_c}, {'e', handle_flag_e},
{'f', handle_flag_f}, {'i', handle_flag_i}, {'l', handle_flag_l}, {'m', handle_flag_m},
{'n', handle_flag_n}, {'q', handle_flag_q}, {'r', handle_flag_r}, {'s', handle_flag_s},
- {'v', handle_flag_v}};
+ {'v', handle_flag_v}, {1, handle_flag_1}};
/// Parse the arguments for flags recognized by a specific string subcommand.
static int parse_opts(options_t *opts, int *optind, int n_req_args, int argc, wchar_t **argv,
@@ -408,21 +436,15 @@ static int parse_opts(options_t *opts, int *optind, int n_req_args, int argc, wc
return STATUS_CMD_OK;
}
-static int string_escape(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv) {
- options_t opts;
- opts.no_quoted_valid = true;
- int optind;
- int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams);
- if (retval != STATUS_CMD_OK) return retval;
-
+/// Escape a string so that it can be used in a fish script without further word splitting.
+static int string_escape_script(options_t &opts, int optind, wchar_t **argv, io_streams_t &streams) {
+ wcstring storage;
+ int nesc = 0;
escape_flags_t flags = ESCAPE_ALL;
if (opts.no_quoted) flags |= ESCAPE_NO_QUOTED;
- int nesc = 0;
- wcstring storage;
- const wchar_t *arg;
- while ((arg = string_get_arg(&optind, argv, &storage, streams)) != 0) {
- streams.out.append(escape_string(arg, flags));
+ while (const wchar_t *arg = string_get_arg(&optind, argv, &storage, streams)) {
+ streams.out.append(escape_string(arg, flags, STRING_STYLE_SCRIPT));
streams.out.append(L'\n');
nesc++;
}
@@ -430,6 +452,61 @@ static int string_escape(parser_t &parser, io_streams_t &streams, int argc, wcha
return nesc > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
}
+/// Escape a string so that it can be used as a URL.
+static int string_escape_url(options_t &opts, int optind, wchar_t **argv, io_streams_t &streams) {
+ UNUSED(opts);
+ wcstring storage;
+ int nesc = 0;
+ escape_flags_t flags = 0;
+
+ while (const wchar_t *arg = string_get_arg(&optind, argv, &storage, streams)) {
+ streams.out.append(escape_string(arg, flags, STRING_STYLE_URL));
+ streams.out.append(L'\n');
+ nesc++;
+ }
+
+ return nesc > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
+}
+
+/// Escape a string so that it can be used as a fish var name.
+static int string_escape_var(options_t &opts, int optind, wchar_t **argv, io_streams_t &streams) {
+ UNUSED(opts);
+ wcstring storage;
+ int nesc = 0;
+ escape_flags_t flags = 0;
+
+ while (const wchar_t *arg = string_get_arg(&optind, argv, &storage, streams)) {
+ streams.out.append(escape_string(arg, flags, STRING_STYLE_VAR));
+ streams.out.append(L'\n');
+ nesc++;
+ }
+
+ return nesc > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
+}
+
+static int string_escape(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv) {
+ options_t opts;
+ opts.no_quoted_valid = true;
+ opts.style_valid = true;
+ int optind;
+ int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams);
+ if (retval != STATUS_CMD_OK) return retval;
+
+ switch (opts.escape_style) {
+ case STRING_STYLE_SCRIPT: {
+ return string_escape_script(opts, optind, argv, streams);
+ }
+ case STRING_STYLE_URL: {
+ return string_escape_url(opts, optind, argv, streams);
+ }
+ case STRING_STYLE_VAR: {
+ return string_escape_var(opts, optind, argv, streams);
+ }
+ }
+
+ DIE("should never reach this statement");
+}
+
static int string_join(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv) {
options_t opts;
opts.quiet_valid = true;
diff --git a/src/common.cpp b/src/common.cpp
index d57654e7c..fb51791de 100644
--- a/src/common.cpp
+++ b/src/common.cpp
@@ -1,6 +1,7 @@
// Various functions, mostly string utilities, that are used by most parts of fish.
#include "config.h"
+#include
#include
#include
#include
@@ -745,11 +746,62 @@ wcstring reformat_for_screen(const wcstring &msg) {
return buff;
}
-/// Escape a string, storing the result in out_str.
-static void escape_string_internal(const wchar_t *orig_in, size_t in_len, wcstring *out_str,
- escape_flags_t flags) {
- assert(orig_in != NULL);
+/// Escape a string in a fashion suitable for using as a URL. Store the result in out_str.
+static void escape_string_url(const wchar_t *orig_in, wcstring &out) {
+ const std::string &in = wcs2string(orig_in);
+ for (auto c1 : in) {
+ // This silliness is so we get the correct result whether chars are signed or unsigned.
+ unsigned int c2 = (unsigned int)c1 & 0xFF;
+ if (!(c2 & 0x80) &&
+ (isalnum(c2) || c2 == '/' || c2 == '.' || c2 == '~' || c2 == '-' || c2 == '_')) {
+ // The above characters don't need to be encoded.
+ out.push_back((wchar_t)c2);
+ } else {
+ // All other chars need to have their UTF-8 representation encoded in hex.
+ wchar_t buf[4];
+ swprintf(buf, sizeof buf / sizeof buf[0], L"%%%02X", c2);
+ out.append(buf);
+ }
+ }
+}
+static bool is_hex_digit(int c) { return strchr("0123456789abcdefABCDEF", c) != NULL; }
+
+/// Escape a string in a fashion suitable for using as a fish var name. Store the result in out_str.
+static void escape_string_var(const wchar_t *orig_in, wcstring &out) {
+ bool prev_was_hex_encoded = false;
+ bool maybe_encode_next_char = false;
+ const std::string &in = wcs2string(orig_in);
+ for (auto c1 : in) {
+ // This silliness is so we get the correct result whether chars are signed or unsigned.
+ unsigned int c2 = (unsigned int)c1 & 0xFF;
+ if (!(c2 & 0x80) && isalnum(c2) && (!prev_was_hex_encoded || !is_hex_digit(c2))) {
+ // ASCII alphanumerics don't need to be encoded.
+ if (prev_was_hex_encoded) {
+ out.push_back(L'_');
+ prev_was_hex_encoded = false;
+ }
+ out.push_back((wchar_t)c2);
+ } else if (c2 == '_') {
+ // Underscores are encoded by doubling them.
+ out.append(L"__");
+ prev_was_hex_encoded = false;
+ } else {
+ // All other chars need to have their UTF-8 representation encoded in hex.
+ wchar_t buf[4];
+ swprintf(buf, sizeof buf / sizeof buf[0], L"_%02X", c2);
+ out.append(buf);
+ prev_was_hex_encoded = true;
+ }
+ }
+ if (prev_was_hex_encoded) {
+ out.push_back(L'_');
+ }
+}
+
+/// Escape a string in a fashion suitable for using in fish script. Store the result in out_str.
+static void escape_string_script(const wchar_t *orig_in, size_t in_len, wcstring &out,
+ escape_flags_t flags) {
const wchar_t *in = orig_in;
bool escape_all = static_cast(flags & ESCAPE_ALL);
bool no_quoted = static_cast(flags & ESCAPE_NO_QUOTED);
@@ -758,9 +810,6 @@ static void escape_string_internal(const wchar_t *orig_in, size_t in_len, wcstri
int need_escape = 0;
int need_complex_escape = 0;
- // Avoid dereferencing all over the place.
- wcstring &out = *out_str;
-
if (!no_quoted && in_len == 0) {
out.assign(L"''");
return;
@@ -903,15 +952,45 @@ static void escape_string_internal(const wchar_t *orig_in, size_t in_len, wcstri
}
}
-wcstring escape_string(const wchar_t *in, escape_flags_t flags) {
+wcstring escape_string(const wchar_t *in, escape_flags_t flags, escape_string_style_t style) {
wcstring result;
- escape_string_internal(in, wcslen(in), &result, flags);
+
+ switch (style) {
+ case STRING_STYLE_SCRIPT: {
+ escape_string_script(in, wcslen(in), result, flags);
+ break;
+ }
+ case STRING_STYLE_URL: {
+ escape_string_url(in, result);
+ break;
+ }
+ case STRING_STYLE_VAR: {
+ escape_string_var(in, result);
+ break;
+ }
+ }
+
return result;
}
-wcstring escape_string(const wcstring &in, escape_flags_t flags) {
+wcstring escape_string(const wcstring &in, escape_flags_t flags, escape_string_style_t style) {
wcstring result;
- escape_string_internal(in.c_str(), in.size(), &result, flags);
+
+ switch (style) {
+ case STRING_STYLE_SCRIPT: {
+ escape_string_script(in.c_str(), in.size(), result, flags);
+ break;
+ }
+ case STRING_STYLE_URL: {
+ DIE("STRING_STYLE_URL not implemented");
+ break;
+ }
+ case STRING_STYLE_VAR: {
+ escape_string_var(in.c_str(), result);
+ break;
+ }
+ }
+
return result;
}
diff --git a/src/common.h b/src/common.h
index a1c2e0841..e1ed13772 100644
--- a/src/common.h
+++ b/src/common.h
@@ -89,6 +89,12 @@ typedef std::vector wcstring_list_t;
#define INPUT_COMMON_BASE (wchar_t)0xF700
#define INPUT_COMMON_END (INPUT_COMMON_BASE + 64)
+enum escape_string_style_t {
+ STRING_STYLE_SCRIPT,
+ STRING_STYLE_URL,
+ STRING_STYLE_VAR
+};
+
// Flags for unescape_string functions.
enum {
UNESCAPE_DEFAULT = 0, // default behavior
@@ -97,15 +103,14 @@ enum {
};
typedef unsigned int unescape_flags_t;
-// Flags for the escape_string() and escape_string() functions.
+// Flags for the escape_string() and escape_string() functions. These are only applicable when the
+// escape style is "script" (i.e., STRING_STYLE_SCRIPT).
enum {
/// Escape all characters, including magic characters like the semicolon.
ESCAPE_ALL = 1 << 0,
-
/// Do not try to use 'simplified' quoted escapes, and do not use empty quotes as the empty
/// string.
ESCAPE_NO_QUOTED = 1 << 1,
-
/// Do not escape tildes.
ESCAPE_NO_TILDE = 1 << 2
};
@@ -692,8 +697,10 @@ ssize_t read_loop(int fd, void *buff, size_t count);
/// \param in The string to be escaped
/// \param flags Flags to control the escaping
/// \return The escaped string
-wcstring escape_string(const wchar_t *in, escape_flags_t flags);
-wcstring escape_string(const wcstring &in, escape_flags_t flags);
+wcstring escape_string(const wchar_t *in, escape_flags_t flags,
+ escape_string_style_t style=STRING_STYLE_SCRIPT);
+wcstring escape_string(const wcstring &in, escape_flags_t flags,
+ escape_string_style_t style=STRING_STYLE_SCRIPT);
/// Expand backslashed escapes and substitute them with their unescaped counterparts. Also
/// optionally change the wildcards, the tilde character and a few more into constants which are
diff --git a/tests/string.err b/tests/string.err
index 33e9e847f..0c14841dc 100644
--- a/tests/string.err
+++ b/tests/string.err
@@ -5,7 +5,7 @@ string match: ^
# string invalidarg
string: Subcommand 'invalidarg' is not valid
-Standard input (line 183):
+Standard input (line 215):
string invalidarg; and echo "unexpected exit 0" >&2
^
@@ -29,6 +29,6 @@ string repeat: Expected argument
# string repeat -l fakearg 2>&1
string repeat: Unknown option '-l'
-Standard input (line 284):
+Standard input (line 316):
string repeat -l fakearg
^
diff --git a/tests/string.in b/tests/string.in
index ef4fa1940..297c77679 100644
--- a/tests/string.in
+++ b/tests/string.in
@@ -94,6 +94,38 @@ echo
echo '# echo \x07 | string escape'
echo \x07 | string escape
+echo
+echo '# string escape --style=script \'a b#c"\\\'d\''
+string escape --style=script 'a b#c"\'d'
+
+echo
+echo '# string escape --style=url \'a b#c"\\\'d\''
+string escape --style=url 'a b#c"\'d'
+
+echo
+echo '# string escape --style=url \\na\\nb%c~d\\n'
+string escape --style=url \na\nb%c~d\n
+
+echo
+echo '# string escape --style=var \'a b#c"\\\'d\''
+string escape --style=var 'a b#c"\'d'
+
+echo
+echo '# string escape --style=script a\nghi_'
+string escape --style=var a\nghi_
+
+echo
+echo '# string escape --style=var \'abc\''
+string escape --style=var 'abc'
+
+echo
+echo '# string escape --style=var \'_a_b_c_\''
+string escape --style=var '_a_b_c_'
+
+echo
+echo '# string escape --style=var -- -'
+string escape --style=var -- -
+
echo
echo '# string match "?" a'
string match "?" a
diff --git a/tests/string.out b/tests/string.out
index 8425591d8..1c24eec48 100644
--- a/tests/string.out
+++ b/tests/string.out
@@ -74,6 +74,30 @@ zan
# echo \x07 | string escape
\cg
+# string escape --style=script 'a b#c"\'d'
+a\ b\#c\"\'d
+
+# string escape --style=url 'a b#c"\'d'
+a%20b%23c%22%27d
+
+# string escape --style=url \na\nb%c~d\n
+%0Aa%0Ab%25c~d%0A
+
+# string escape --style=var 'a b#c"\'d'
+a_20_62_23_63_22_27_64_
+
+# string escape --style=script a\nghi_
+a_0A_ghi__
+
+# string escape --style=var 'abc'
+abc
+
+# string escape --style=var '_a_b_c_'
+__a__b__c__
+
+# string escape --style=var -- -
+_2D_
+
# string match "?" a
a