From e01fc62d6956dcdd8ea289d05ea4e085d0da8df2 Mon Sep 17 00:00:00 2001 From: Johannes Altmanninger Date: Sun, 14 Apr 2024 07:43:14 +0200 Subject: [PATCH] Don't leak encoding of invalid codepoints into uvar file When we read bytes like \xfc that don't produce a Unicode code point, we encode them in a Unicode private use area. This encoding should be transparent to the user. We accidentally add it to uvar files as \uf6fc in this case. When reading it back, read_unquoted_escape() will fail at the "fish_reserved_codepoint(c)" check. This check is to avoid external input being misinterpreted as one of our in-band signalling characters like ANY_CHAR (for *). For encoded raw bytes, this check probably doesn't really matter in terms of security because the only thing we do with these bytes is convert them back to raw. So we could allow unescaping them at this point, thus supporting old uvar files. However that seems like the wrong direction. PUA encoding should never leak. So let's instead make sure to serialize it as \xfc instead of \f6fc going forward. Fixes #10313 --- src/env_universal_common.rs | 4 +++- src/tests/env_universal_common.rs | 10 ++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/env_universal_common.rs b/src/env_universal_common.rs index 2f9f98423..497956de3 100644 --- a/src/env_universal_common.rs +++ b/src/env_universal_common.rs @@ -10,7 +10,7 @@ use crate::fds::{open_cloexec, wopen_cloexec}; use crate::flog::{FLOG, FLOGF}; use crate::path::path_get_config; use crate::path::{path_get_config_remoteness, DirRemoteness}; -use crate::wchar::prelude::*; +use crate::wchar::{decode_byte_from_char, prelude::*}; use crate::wcstringutil::{join_strings, split_string, string_suffixes_string, LineIterator}; use crate::wutil::{ file_id_for_fd, file_id_for_path, file_id_for_path_narrow, wdirname, wrealpath, wrename, wstat, @@ -904,6 +904,8 @@ fn full_escape(input: &wstr) -> WString { out.push(c); } else if c.is_ascii() { sprintf!(=> &mut out, "\\x%.2x", u32::from(c)); + } else if let Some(encoded_byte) = decode_byte_from_char(c) { + sprintf!(=> &mut out, "\\x%.2x", u32::from(encoded_byte)); } else if u32::from(c) < 65536 { sprintf!(=> &mut out, "\\u%.4x", u32::from(c)); } else { diff --git a/src/tests/env_universal_common.rs b/src/tests/env_universal_common.rs index 1ecfc25d2..13aee0607 100644 --- a/src/tests/env_universal_common.rs +++ b/src/tests/env_universal_common.rs @@ -1,5 +1,7 @@ +use crate::common::char_offset; use crate::common::wcs2osstring; use crate::common::ScopeGuard; +use crate::common::ENCODE_DIRECT_BASE; use crate::env::{EnvVar, EnvVarFlags, VarTable}; use crate::env_universal_common::{CallbackDataList, EnvUniversal, UvarFormat}; use crate::parser::Parser; @@ -109,6 +111,13 @@ fn test_universal_output() { flag_pathvar, ), ); + vars.insert( + L!("varF").to_owned(), + EnvVar::new_vec( + vec![WString::from_chars([char_offset(ENCODE_DIRECT_BASE, 0xfc)])], + EnvVarFlags::empty(), + ), + ); let text = EnvUniversal::serialize_with_vars(&vars); let expected = concat!( @@ -119,6 +128,7 @@ fn test_universal_output() { "SETUVAR varC:ValC1\n", "SETUVAR --export --path varD:ValD1\n", "SETUVAR --path varE:ValE1\\x1eValE2\n", + "SETUVAR varF:\\xfc\n", ) .as_bytes(); assert_eq!(text, expected);