Encode all ENCODE_DIRECT codepoints with encode_direct

forward-port of 09986f5563
This commit is contained in:
Fabian Boehm 2023-12-10 09:29:42 +01:00
parent e380654fff
commit eb196c8330
3 changed files with 13 additions and 6 deletions

View file

@ -979,6 +979,9 @@ pub fn read_unquoted_escape(
}
if let Some(c) = result_char_or_none {
if fish_reserved_codepoint(c) {
return None;
}
result.push(c);
}
@ -1134,9 +1137,7 @@ pub fn str2wcstring(inp: &[u8]) -> WString {
Some(codepoint) => {
c = codepoint;
// Determine whether to encode this character with our crazy scheme.
(c >= ENCODE_DIRECT_BASE && c < ENCODE_DIRECT_END)
||
c == INTERNAL_SEPARATOR
fish_reserved_codepoint(c)
||
// Incomplete sequence.
ret == 0_usize.wrapping_sub(2)

View file

@ -328,12 +328,10 @@ static wcstring str2wcs_internal(const char *in, const size_t in_len) {
} else {
ret = std::mbrtowc(&wc, &in[in_pos], in_len - in_pos, &state);
// Determine whether to encode this character with our crazy scheme.
if (wc >= ENCODE_DIRECT_BASE && wc < ENCODE_DIRECT_BASE + 256) {
if (fish_reserved_codepoint(wc)) {
use_encode_direct = true;
} else if ((wc >= 0xD800 && wc <= 0xDFFF) || static_cast<uint32_t>(wc) >= 0x110000) {
use_encode_direct = true;
} else if (wc == INTERNAL_SEPARATOR) {
use_encode_direct = true;
} else if (ret == static_cast<size_t>(-2)) {
// Incomplete sequence.
use_encode_direct = true;

View file

@ -612,6 +612,14 @@ $fish -c begin
echo $status
# CHECK: 127
$fish -c 'echo \ufdd2"fart"'
# CHECKERR: fish: Invalid token '\ufdd2"fart"'
# CHECKERR: echo \ufdd2"fart"
# CHECKERR: ^~~~~~~~~~~^
echo (printf '\ufdd2foo') | string escape
# CHECK: \Xef\Xb7\X92foo
printf '%s\n' "#!/bin/sh" 'echo $0' > $tmpdir/argv0.sh
chmod +x $tmpdir/argv0.sh
cd $tmpdir