From 460bc00698ec31ef95c523011d64b2ea38898cf3 Mon Sep 17 00:00:00 2001 From: Fabian Homborg Date: Wed, 7 Nov 2018 12:37:47 +0100 Subject: [PATCH] Fix string escape var and url styles Turns out I broke these in my zeal to remove wcs2string. This reverts commit 583d771b106970863230f8df68d2c36b601db8b9. Fixes #5322. --- src/common.cpp | 16 ++++++++++------ tests/string.err | 7 +++++-- tests/string.in | 11 +++++++++++ tests/string.out | 11 +++++++++++ 4 files changed, 37 insertions(+), 8 deletions(-) diff --git a/src/common.cpp b/src/common.cpp index 038e6ead6..7160ef978 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -820,7 +820,8 @@ wcstring reformat_for_screen(const wcstring &msg) { /// Escape a string in a fashion suitable for using as a URL. Store the result in out_str. static void escape_string_url(const wcstring &in, wcstring &out) { - for (auto &c1 : in) { + const std::string narrow = wcs2string(in); + for (auto &c1 : narrow) { // This silliness is so we get the correct result whether chars are signed or unsigned. unsigned int c2 = (unsigned int)c1 & 0xFF; if (!(c2 & 0x80) && @@ -871,22 +872,25 @@ static bool unescape_string_url(const wchar_t *in, wcstring *out) { /// Escape a string in a fashion suitable for using as a fish var name. Store the result in out_str. static void escape_string_var(const wcstring &in, wcstring &out) { bool prev_was_hex_encoded = false; - for (auto c1 : in) { - if (c1 >= 0 && c1 <= 127 && isalnum(c1) && (!prev_was_hex_encoded || !is_hex_digit(c1))) { + const std::string narrow = wcs2string(in); + for (auto c1 : narrow) { + // This silliness is so we get the correct result whether chars are signed or unsigned. + unsigned int c2 = (unsigned int)c1 & 0xFF; + if (!(c2 & 0x80) && isalnum(c2) && (!prev_was_hex_encoded || !is_hex_digit(c2))) { // ASCII alphanumerics don't need to be encoded. if (prev_was_hex_encoded) { out.push_back(L'_'); prev_was_hex_encoded = false; } - out.push_back(c1); - } else if (c1 == L'_') { + out.push_back((wchar_t)c2); + } else if (c2 == '_') { // Underscores are encoded by doubling them. out.append(L"__"); prev_was_hex_encoded = false; } else { // All other chars need to have their UTF-8 representation encoded in hex. wchar_t buf[4]; - swprintf(buf, sizeof buf / sizeof buf[0], L"_%02X", c1); + swprintf(buf, sizeof buf / sizeof buf[0], L"_%02X", c2); out.append(buf); prev_was_hex_encoded = true; } diff --git a/tests/string.err b/tests/string.err index 890f6a00c..b016d450d 100644 --- a/tests/string.err +++ b/tests/string.err @@ -89,6 +89,9 @@ #################### # string escape --style=var -- - +#################### +# string escape with multibyte chars + #################### # set x (string unescape (echo \x07 | string escape)) @@ -179,7 +182,7 @@ string match: ^ #################### # string invalidarg string: Subcommand 'invalidarg' is not valid -Standard input (line 194): +Standard input (line 205): string invalidarg; and echo "unexpected exit 0" ^ @@ -264,7 +267,7 @@ string repeat: Expected argument #################### # string repeat -l fakearg 2>&1 string repeat: Unknown option '-l' -Standard input (line 270): +Standard input (line 281): string repeat -l fakearg ^ diff --git a/tests/string.in b/tests/string.in index f267a1952..fc6be1bbf 100644 --- a/tests/string.in +++ b/tests/string.in @@ -90,6 +90,17 @@ string escape --style=var '_a_b_c_' logmsg 'string escape --style=var -- -' string escape --style=var -- - +logmsg 'string escape with multibyte chars' +string escape --style=url aöb +string escape --style=url 中 +string escape --style=url aöb | string unescape --style=url +string escape --style=url 中 | string unescape --style=url + +string escape --style=var aöb +string escape --style=var 中 +string escape --style=var aöb | string unescape --style=var +string escape --style=var 中 | string unescape --style=var + # The following tests verify that we can correctly unescape the same strings # we tested escaping above. diff --git a/tests/string.out b/tests/string.out index d9de63114..a10e6010d 100644 --- a/tests/string.out +++ b/tests/string.out @@ -129,6 +129,17 @@ __a__b__c__ # string escape --style=var -- - _2D_ +#################### +# string escape with multibyte chars +a%C3%B6b +%E4%B8%AD +aöb +中 +a_C3_B6_b +_E4_B8_AD_ +aöb +中 + #################### # set x (string unescape (echo \x07 | string escape)) success