Fix string escape var and url styles

Turns out I broke these in my zeal to remove wcs2string.

This reverts commit 583d771b10.

Fixes #5322.
This commit is contained in:
Fabian Homborg 2018-11-07 12:37:47 +01:00
parent bfd50863b8
commit 460bc00698
4 changed files with 37 additions and 8 deletions

View file

@ -820,7 +820,8 @@ wcstring reformat_for_screen(const wcstring &msg) {
/// Escape a string in a fashion suitable for using as a URL. Store the result in out_str. /// Escape a string in a fashion suitable for using as a URL. Store the result in out_str.
static void escape_string_url(const wcstring &in, wcstring &out) { static void escape_string_url(const wcstring &in, wcstring &out) {
for (auto &c1 : in) { const std::string narrow = wcs2string(in);
for (auto &c1 : narrow) {
// This silliness is so we get the correct result whether chars are signed or unsigned. // This silliness is so we get the correct result whether chars are signed or unsigned.
unsigned int c2 = (unsigned int)c1 & 0xFF; unsigned int c2 = (unsigned int)c1 & 0xFF;
if (!(c2 & 0x80) && if (!(c2 & 0x80) &&
@ -871,22 +872,25 @@ static bool unescape_string_url(const wchar_t *in, wcstring *out) {
/// Escape a string in a fashion suitable for using as a fish var name. Store the result in out_str. /// Escape a string in a fashion suitable for using as a fish var name. Store the result in out_str.
static void escape_string_var(const wcstring &in, wcstring &out) { static void escape_string_var(const wcstring &in, wcstring &out) {
bool prev_was_hex_encoded = false; bool prev_was_hex_encoded = false;
for (auto c1 : in) { const std::string narrow = wcs2string(in);
if (c1 >= 0 && c1 <= 127 && isalnum(c1) && (!prev_was_hex_encoded || !is_hex_digit(c1))) { for (auto c1 : narrow) {
// This silliness is so we get the correct result whether chars are signed or unsigned.
unsigned int c2 = (unsigned int)c1 & 0xFF;
if (!(c2 & 0x80) && isalnum(c2) && (!prev_was_hex_encoded || !is_hex_digit(c2))) {
// ASCII alphanumerics don't need to be encoded. // ASCII alphanumerics don't need to be encoded.
if (prev_was_hex_encoded) { if (prev_was_hex_encoded) {
out.push_back(L'_'); out.push_back(L'_');
prev_was_hex_encoded = false; prev_was_hex_encoded = false;
} }
out.push_back(c1); out.push_back((wchar_t)c2);
} else if (c1 == L'_') { } else if (c2 == '_') {
// Underscores are encoded by doubling them. // Underscores are encoded by doubling them.
out.append(L"__"); out.append(L"__");
prev_was_hex_encoded = false; prev_was_hex_encoded = false;
} else { } else {
// All other chars need to have their UTF-8 representation encoded in hex. // All other chars need to have their UTF-8 representation encoded in hex.
wchar_t buf[4]; wchar_t buf[4];
swprintf(buf, sizeof buf / sizeof buf[0], L"_%02X", c1); swprintf(buf, sizeof buf / sizeof buf[0], L"_%02X", c2);
out.append(buf); out.append(buf);
prev_was_hex_encoded = true; prev_was_hex_encoded = true;
} }

View file

@ -89,6 +89,9 @@
#################### ####################
# string escape --style=var -- - # string escape --style=var -- -
####################
# string escape with multibyte chars
#################### ####################
# set x (string unescape (echo \x07 | string escape)) # set x (string unescape (echo \x07 | string escape))
@ -179,7 +182,7 @@ string match: ^
#################### ####################
# string invalidarg # string invalidarg
string: Subcommand 'invalidarg' is not valid string: Subcommand 'invalidarg' is not valid
Standard input (line 194): Standard input (line 205):
string invalidarg; and echo "unexpected exit 0" string invalidarg; and echo "unexpected exit 0"
^ ^
@ -264,7 +267,7 @@ string repeat: Expected argument
#################### ####################
# string repeat -l fakearg 2>&1 # string repeat -l fakearg 2>&1
string repeat: Unknown option '-l' string repeat: Unknown option '-l'
Standard input (line 270): Standard input (line 281):
string repeat -l fakearg string repeat -l fakearg
^ ^

View file

@ -90,6 +90,17 @@ string escape --style=var '_a_b_c_'
logmsg 'string escape --style=var -- -' logmsg 'string escape --style=var -- -'
string escape --style=var -- - string escape --style=var -- -
logmsg 'string escape with multibyte chars'
string escape --style=url aöb
string escape --style=url
string escape --style=url aöb | string unescape --style=url
string escape --style=url | string unescape --style=url
string escape --style=var aöb
string escape --style=var
string escape --style=var aöb | string unescape --style=var
string escape --style=var | string unescape --style=var
# The following tests verify that we can correctly unescape the same strings # The following tests verify that we can correctly unescape the same strings
# we tested escaping above. # we tested escaping above.

View file

@ -129,6 +129,17 @@ __a__b__c__
# string escape --style=var -- - # string escape --style=var -- -
_2D_ _2D_
####################
# string escape with multibyte chars
a%C3%B6b
%E4%B8%AD
aöb
a_C3_B6_b
_E4_B8_AD_
aöb
#################### ####################
# set x (string unescape (echo \x07 | string escape)) # set x (string unescape (echo \x07 | string escape))
success success