mirror of
https://github.com/fish-shell/fish-shell
synced 2025-01-28 04:35:09 +00:00
Decode multibyte escapes immediately
We forgot to decode (i.e. turn into nice wchar_t codepoints) "byte_literal" escape sequences. This meant that e.g. ```fish string match ö \Xc3\Xb6 math 5 \X2b 5 ``` didn't work, but `math 5 \x2b 5` did, and would print the wonderful error: ``` math: Error: Missing operator '5 + 5' ^ ``` So, instead, we decode eagerly.
This commit is contained in:
parent
62794446b7
commit
396e276286
2 changed files with 173 additions and 140 deletions
|
@ -1149,6 +1149,10 @@ maybe_t<size_t> read_unquoted_escape(const wchar_t *input, wcstring *result, boo
|
||||||
bool errored = false;
|
bool errored = false;
|
||||||
size_t in_pos = 1; // in_pos always tracks the next character to read (and therefore the number
|
size_t in_pos = 1; // in_pos always tracks the next character to read (and therefore the number
|
||||||
// of characters read so far)
|
// of characters read so far)
|
||||||
|
|
||||||
|
// For multibyte \X sequences.
|
||||||
|
std::string byte_buff;
|
||||||
|
while (!errored) {
|
||||||
const wchar_t c = input[in_pos++];
|
const wchar_t c = input[in_pos++];
|
||||||
switch (c) {
|
switch (c) {
|
||||||
// A null character after a backslash is an error.
|
// A null character after a backslash is an error.
|
||||||
|
@ -1207,8 +1211,8 @@ maybe_t<size_t> read_unquoted_escape(const wchar_t *input, wcstring *result, boo
|
||||||
default: {
|
default: {
|
||||||
base = 8;
|
base = 8;
|
||||||
chars = 3;
|
chars = 3;
|
||||||
// Note that in_pos currently is just after the first post-backslash character;
|
// Note that in_pos currently is just after the first post-backslash
|
||||||
// we want to start our escape from there.
|
// character; we want to start our escape from there.
|
||||||
assert(in_pos > 0);
|
assert(in_pos > 0);
|
||||||
in_pos--;
|
in_pos--;
|
||||||
break;
|
break;
|
||||||
|
@ -1228,8 +1232,20 @@ maybe_t<size_t> read_unquoted_escape(const wchar_t *input, wcstring *result, boo
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!errored && res <= max_val) {
|
if (!errored && res <= max_val) {
|
||||||
result_char_or_none =
|
if (byte_literal) {
|
||||||
static_cast<wchar_t>((byte_literal ? ENCODE_DIRECT_BASE : 0) + res);
|
// Multibyte encodings necessitate that we keep adjacent byte escapes.
|
||||||
|
// - `\Xc3\Xb6` is "ö", but only together.
|
||||||
|
// (this assumes a valid codepoint can't consist of multiple bytes
|
||||||
|
// that are valid on their own, which is true for UTF-8)
|
||||||
|
byte_buff.push_back(static_cast<char>(res));
|
||||||
|
result_char_or_none = none();
|
||||||
|
if (input[in_pos] == L'\\' && input[in_pos + 1] == L'X') {
|
||||||
|
in_pos++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
result_char_or_none = static_cast<wchar_t>(res);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
errored = true;
|
errored = true;
|
||||||
}
|
}
|
||||||
|
@ -1300,10 +1316,18 @@ maybe_t<size_t> read_unquoted_escape(const wchar_t *input, wcstring *result, boo
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!errored && result_char_or_none.has_value()) {
|
if (errored) return none();
|
||||||
|
|
||||||
|
if (!byte_buff.empty()) {
|
||||||
|
result->append(str2wcstring(byte_buff));
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result_char_or_none.has_value()) {
|
||||||
result->push_back(*result_char_or_none);
|
result->push_back(*result_char_or_none);
|
||||||
}
|
}
|
||||||
if (errored) return none();
|
|
||||||
|
|
||||||
return in_pos;
|
return in_pos;
|
||||||
}
|
}
|
||||||
|
|
|
@ -88,3 +88,12 @@ env LC_ALL=C $fish -c 'echo -n Y\u00FCY' | display_bytes
|
||||||
env LC_ALL=C $fish -c 'echo -n T\u01FDT' | display_bytes
|
env LC_ALL=C $fish -c 'echo -n T\u01FDT' | display_bytes
|
||||||
#CHECK: 0000000 124 077 124
|
#CHECK: 0000000 124 077 124
|
||||||
#CHECK: 0000003
|
#CHECK: 0000003
|
||||||
|
|
||||||
|
string match ö \Xc3\Xb6
|
||||||
|
#CHECK: ö
|
||||||
|
|
||||||
|
math 5 \X2b 5
|
||||||
|
#CHECK: 10
|
||||||
|
|
||||||
|
math 7 \x2b 7
|
||||||
|
#CHECK: 14
|
||||||
|
|
Loading…
Reference in a new issue