str2wcs: encode invalid Unicode characters in the private use area

Rust does not like invalid code points, so let's ease the transition by
treating them like byte sequences that do not map to any code point.
See https://github.com/fish-shell/fish-shell/pull/9688#discussion_r1155089596
This commit is contained in:
Johannes Altmanninger 2023-04-01 12:23:28 +02:00
parent 746019e4ad
commit 3b15e995e7

View file

@ -338,6 +338,8 @@ static wcstring str2wcs_internal(const char *in, const size_t in_len) {
// Determine whether to encode this character with our crazy scheme.
if (wc >= ENCODE_DIRECT_BASE && wc < ENCODE_DIRECT_BASE + 256) {
use_encode_direct = true;
} else if ((wc >= 0xD800 && wc <= 0xDFFF) || static_cast<uint32_t>(wc) >= 0x110000) {
use_encode_direct = true;
} else if (wc == INTERNAL_SEPARATOR) {
use_encode_direct = true;
} else if (ret == static_cast<size_t>(-2)) {