Warn about unescape_string_xxx() behavior (and tweak slightly)

The type system no longer guarantees that the input string is nul-terminated, meaning accessing beyond the range-checked `i` a char-at-a-time is no longer safe. (In C++, we would either be using a plain C string which is always nul-terminated or we would be using (w)string::cstr() which similarly grants access to its nul-terminated buffer.) Aside from that, there's no need to explicitly check `if c2 == '\0'` because '\0' is not a valid hex digit so the `?` tacked on to `convert_hex_digit(c2)?` will abort and return `None` anyway. convert_hex_digit() is not appreciably faster than char::to_digit(16) and makes the code less maintainable since it encodes certain assumptions; since it's also not used consistently just drop it in favor of the std fn. Since the output string (per the decode logic) is always shorter than or equal to the input string, just reserve the input string size upfront to prevent vec reallocations.
2024-11-14 00:47:30 +00:00 · 2023-04-23 13:47:40 -05:00 · 2023-04-23 13:47:40 -05:00 · 76dc849fca
commit 76dc849fca
parent f9c92753c4
1 changed files with 18 additions and 27 deletions
--- a/fish-rust/src/common.rs
+++ b/fish-rust/src/common.rs
@ -687,10 +687,14 @@ fn unescape_string_internal(input: &wstr, flags: UnescapeFlags) -> Option<WStrin
    Some(result)
 }

-/// Reverse the effects of `escape_string_url()`. By definition the string has consist of just ASCII
-/// chars.
+/// Reverse the effects of `escape_string_url()`. By definition the consists of just ASCII chars.
+///
+/// XXX: The C++ counterpart to this function didn't panic if passed a truncated or malformed
+/// escaped string because it relied on always being able to read at least one more char until a NUL
+/// is encountered. As currently written/ported, it can panic if the passed utf-32 char slice is
+/// truncated or malformed since that is no longer guaranteed to be the case!
 fn unescape_string_url(input: &wstr) -> Option<WString> {
-    let mut result: Vec<u8> = vec![];
+    let mut result: Vec<u8> = Vec::with_capacity(input.len());
    let mut i = 0;
    while i < input.len() {
        let c = input.char_at(i);
@ -705,12 +709,9 @@ fn unescape_string_url(input: &wstr) -> Option<WString> {
                result.push(b'%');
                i += 1;
            } else {
-                let c2 = input.char_at(i + 2);
-                if c2 == '\0' {
-                    return None; // string ended prematurely
-                }
                let d1 = c1.to_digit(16)?;
-                let d2 = c2.to_digit(16)?;
+                let c2 = input.char_at(i + 2);
+                let d2 = c2.to_digit(16)?; // also fails if '\0' i.e. premature end
                result.push((16 * d1 + d2) as u8);
                i += 2;
            }
@ -723,10 +724,15 @@ fn unescape_string_url(input: &wstr) -> Option<WString> {
    Some(str2wcstring(&result))
 }

-/// Reverse the effects of `escape_string_var()`. By definition the string has consist of just ASCII
+/// Reverse the effects of `escape_string_var()`. By definition the string consists of just ASCII
 /// chars.
+///
+/// XXX: The C++ counterpart to this function didn't panic if passed a truncated or malformed
+/// escaped string because it relied on always being able to read at least one more char until a NUL
+/// is encountered. As currently written/ported, it can panic if the passed utf-32 char slice is
+/// truncated or malformed since that is no longer guaranteed to be the case!
 fn unescape_string_var(input: &wstr) -> Option<WString> {
-    let mut result: Vec<u8> = vec![];
+    let mut result: Vec<u8> = Vec::with_capacity(input.len());
    let mut prev_was_hex_encoded = false;
    let mut i = 0;
    while i < input.len() {
@ -746,12 +752,9 @@ fn unescape_string_var(input: &wstr) -> Option<WString> {
                result.push(b'_');
                i += 1;
            } else if ('0'..='9').contains(&c1) || ('A'..='F').contains(&c1) {
+                let d1 = c1.to_digit(16)?;
                let c2 = input.char_at(i + 2);
-                if c2 == '\0' {
-                    return None; // string ended prematurely
-                }
-                let d1 = convert_hex_digit(c1)?;
-                let d2 = convert_hex_digit(c2)?;
+                let d2 = c2.to_digit(16)?; // also fails if '\0' i.e. premature end
                result.push((16 * d1 + d2) as u8);
                i += 2;
                prev_was_hex_encoded = true;
@ -946,18 +949,6 @@ pub fn read_unquoted_escape(
    Some(in_pos)
 }

-/// This is a specialization of `char::to_digit()` that only handles base 16 and only uppercase.
-fn convert_hex_digit(d: char) -> Option<u32> {
-    let val = if ('0'..='9').contains(&d) {
-        u32::from(d) - u32::from('0')
-    } else if ('A'..='Z').contains(&d) {
-        10 + u32::from(d) - u32::from('A')
-    } else {
-        return None;
-    };
-    Some(val)
-}
-
 pub const fn char_offset(base: char, offset: u32) -> char {
    match char::from_u32(base as u32 + offset) {
        Some(c) => c,