Don't leak encoding of invalid codepoints into uvar file

When we read bytes like \xfc that don't produce a Unicode code point,
we encode them in a Unicode private use area.
This encoding should be transparent to the user.

We accidentally add it to uvar files as \uf6fc in this case.  When reading
it back, read_unquoted_escape() will fail at the "fish_reserved_codepoint(c)"
check. This check is to avoid external input being misinterpreted
as one of our in-band signalling characters like ANY_CHAR (for *).

For encoded raw bytes, this check probably doesn't really matter in terms of
security because the only thing we do with these bytes is convert them back
to raw. So we could allow unescaping them at this point, thus supporting
old uvar files.

However that seems like the wrong direction. PUA encoding should never leak.
So let's instead make sure to serialize it as \xfc instead of \f6fc going
forward.

Fixes #10313
This commit is contained in:
Johannes Altmanninger 2024-04-14 07:43:14 +02:00
parent 2329a3adb9
commit e01fc62d69
2 changed files with 13 additions and 1 deletions

View file

@ -10,7 +10,7 @@ use crate::fds::{open_cloexec, wopen_cloexec};
use crate::flog::{FLOG, FLOGF};
use crate::path::path_get_config;
use crate::path::{path_get_config_remoteness, DirRemoteness};
use crate::wchar::prelude::*;
use crate::wchar::{decode_byte_from_char, prelude::*};
use crate::wcstringutil::{join_strings, split_string, string_suffixes_string, LineIterator};
use crate::wutil::{
file_id_for_fd, file_id_for_path, file_id_for_path_narrow, wdirname, wrealpath, wrename, wstat,
@ -904,6 +904,8 @@ fn full_escape(input: &wstr) -> WString {
out.push(c);
} else if c.is_ascii() {
sprintf!(=> &mut out, "\\x%.2x", u32::from(c));
} else if let Some(encoded_byte) = decode_byte_from_char(c) {
sprintf!(=> &mut out, "\\x%.2x", u32::from(encoded_byte));
} else if u32::from(c) < 65536 {
sprintf!(=> &mut out, "\\u%.4x", u32::from(c));
} else {

View file

@ -1,5 +1,7 @@
use crate::common::char_offset;
use crate::common::wcs2osstring;
use crate::common::ScopeGuard;
use crate::common::ENCODE_DIRECT_BASE;
use crate::env::{EnvVar, EnvVarFlags, VarTable};
use crate::env_universal_common::{CallbackDataList, EnvUniversal, UvarFormat};
use crate::parser::Parser;
@ -109,6 +111,13 @@ fn test_universal_output() {
flag_pathvar,
),
);
vars.insert(
L!("varF").to_owned(),
EnvVar::new_vec(
vec![WString::from_chars([char_offset(ENCODE_DIRECT_BASE, 0xfc)])],
EnvVarFlags::empty(),
),
);
let text = EnvUniversal::serialize_with_vars(&vars);
let expected = concat!(
@ -119,6 +128,7 @@ fn test_universal_output() {
"SETUVAR varC:ValC1\n",
"SETUVAR --export --path varD:ValD1\n",
"SETUVAR --path varE:ValE1\\x1eValE2\n",
"SETUVAR varF:\\xfc\n",
)
.as_bytes();
assert_eq!(text, expected);