diff --git a/fish-rust/Cargo.lock b/fish-rust/Cargo.lock
index 059017816..cbde7ffb6 100644
--- a/fish-rust/Cargo.lock
+++ b/fish-rust/Cargo.lock
@@ -368,6 +368,7 @@ dependencies = [
  "autocxx",
  "autocxx-build",
  "bitflags",
+ "cc",
  "cxx",
  "cxx-build",
  "cxx-gen",
diff --git a/fish-rust/Cargo.toml b/fish-rust/Cargo.toml
index 24f803e47..1511d1637 100644
--- a/fish-rust/Cargo.toml
+++ b/fish-rust/Cargo.toml
@@ -26,6 +26,7 @@ widestring = "1.0.2"
 
 [build-dependencies]
 autocxx-build = "0.23.1"
+cc = { git = "https://github.com/mqudsi/cc-rs", branch = "fish" }
 cxx-build = { git = "https://github.com/fish-shell/cxx", branch = "fish" }
 cxx-gen = { git = "https://github.com/fish-shell/cxx", branch = "fish" }
 miette = { version = "5", features = ["fancy"] }
diff --git a/fish-rust/build.rs b/fish-rust/build.rs
index 5ffbbabd1..4d2edfee5 100644
--- a/fish-rust/build.rs
+++ b/fish-rust/build.rs
@@ -1,6 +1,8 @@
 use miette::miette;
 
 fn main() -> miette::Result<()> {
+    cc::Build::new().file("src/compat.c").compile("libcompat.a");
+
     let rust_dir = std::env::var("CARGO_MANIFEST_DIR").expect("Env var CARGO_MANIFEST_DIR missing");
     let target_dir =
         std::env::var("FISH_RUST_TARGET_DIR").unwrap_or(format!("{}/{}", rust_dir, "target/"));
@@ -25,6 +27,7 @@ fn main() -> miette::Result<()> {
     let source_files = vec![
         "src/abbrs.rs",
         "src/event.rs",
+        "src/common.rs",
         "src/fd_monitor.rs",
         "src/fd_readable_set.rs",
         "src/fds.rs",
diff --git a/fish-rust/src/common.rs b/fish-rust/src/common.rs
index 48a7cf622..75780987d 100644
--- a/fish-rust/src/common.rs
+++ b/fish-rust/src/common.rs
@@ -1,13 +1,81 @@
-use crate::ffi;
-use crate::wchar::{wstr, WString};
+//! Prototypes for various functions, mostly string utilities, that are used by most parts of fish.
+
+use crate::expand::{
+    BRACE_BEGIN, BRACE_END, BRACE_SEP, BRACE_SPACE, HOME_DIRECTORY, INTERNAL_SEPARATOR,
+    PROCESS_EXPAND_SELF, PROCESS_EXPAND_SELF_STR, VARIABLE_EXPAND, VARIABLE_EXPAND_SINGLE,
+};
+use crate::ffi::{self, fish_wcwidth};
+use crate::future_feature_flags::{feature_test, FeatureFlag};
+use crate::global_safety::RelaxedAtomicBool;
+use crate::termsize::Termsize;
+use crate::wchar::{encode_byte_to_char, wstr, WString, L};
 use crate::wchar_ext::WExt;
-use crate::wchar_ffi::c_str;
-use crate::wchar_ffi::WCharFromFFI;
+use crate::wchar_ffi::{c_str, WCharFromFFI, WCharToFFI};
+use crate::wcstringutil::wcs2string_callback;
+use crate::wildcard::{ANY_CHAR, ANY_STRING, ANY_STRING_RECURSIVE};
+use crate::wutil::encoding::{mbrtowc, wcrtomb, zero_mbstate, AT_LEAST_MB_LEN_MAX};
+use crate::wutil::{fish_iswalnum, sprintf, wgettext};
 use bitflags::bitflags;
-use std::mem;
-use std::mem::ManuallyDrop;
+use core::slice;
+use cxx::{CxxWString, UniquePtr};
+use libc::{EINTR, EIO, O_WRONLY, SIGTTOU, SIG_IGN, STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO};
+use once_cell::sync::Lazy;
+use std::cell::RefCell;
+use std::env;
+use std::ffi::CString;
+use std::mem::{self, ManuallyDrop};
 use std::ops::{Deref, DerefMut};
 use std::os::fd::AsRawFd;
+use std::path::PathBuf;
+use std::rc::Rc;
+use std::str::FromStr;
+use std::sync::atomic::{AtomicI32, AtomicU32, AtomicU64, Ordering};
+use std::sync::Mutex;
+use std::time;
+use widestring_suffix::widestrs;
+
+// Highest legal ASCII value.
+pub const ASCII_MAX: char = 127 as char;
+
+// Highest legal 16-bit Unicode value.
+pub const UCS2_MAX: char = '\u{FFFF}';
+
+// Highest legal byte value.
+pub const BYTE_MAX: char = 0xFF as char;
+
+// Unicode BOM value.
+pub const UTF8_BOM_WCHAR: char = '\u{FEFF}';
+
+// Use Unicode "non-characters" for internal characters as much as we can. This
+// gives us 32 "characters" for internal use that we can guarantee should not
+// appear in our input stream. See http://www.unicode.org/faq/private_use.html.
+pub const RESERVED_CHAR_BASE: char = '\u{FDD0}';
+pub const RESERVED_CHAR_END: char = '\u{FDF0}';
+// Split the available non-character values into two ranges to ensure there are
+// no conflicts among the places we use these special characters.
+pub const EXPAND_RESERVED_BASE: char = RESERVED_CHAR_BASE;
+pub const EXPAND_RESERVED_END: char = char_offset(EXPAND_RESERVED_BASE, 16);
+pub const WILDCARD_RESERVED_BASE: char = EXPAND_RESERVED_END;
+pub const WILDCARD_RESERVED_END: char = char_offset(WILDCARD_RESERVED_BASE, 16);
+// Make sure the ranges defined above don't exceed the range for non-characters.
+// This is to make sure we didn't do something stupid in subdividing the
+// Unicode range for our needs.
+const _: () = assert!(WILDCARD_RESERVED_END <= RESERVED_CHAR_END);
+
+// These are in the Unicode private-use range. We really shouldn't use this
+// range but have little choice in the matter given how our lexer/parser works.
+// We can't use non-characters for these two ranges because there are only 66 of
+// them and we need at least 256 + 64.
+//
+// If sizeof(wchar_t))==4 we could avoid using private-use chars; however, that
+// would result in fish having different behavior on machines with 16 versus 32
+// bit wchar_t. It's better that fish behave the same on both types of systems.
+//
+// Note: We don't use the highest 8 bit range (0xF800 - 0xF8FF) because we know
+// of at least one use of a codepoint in that range: the Apple symbol (0xF8FF)
+// on Mac OS X. See http://www.unicode.org/faq/private_use.html.
+pub const ENCODE_DIRECT_BASE: char = '\u{F600}';
+pub const ENCODE_DIRECT_END: char = char_offset(ENCODE_DIRECT_BASE, 256);
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum EscapeStringStyle {
@@ -41,6 +109,34 @@ bitflags! {
     }
 }
 
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum UnescapeStringStyle {
+    Script(UnescapeFlags),
+    Url,
+    Var,
+}
+
+impl Default for UnescapeStringStyle {
+    fn default() -> Self {
+        Self::Script(UnescapeFlags::default())
+    }
+}
+
+bitflags! {
+    /// Flags for unescape_string functions.
+    #[derive(Default)]
+    pub struct UnescapeFlags: u32 {
+        /// default behavior
+        const DEFAULT = 0;
+        /// escape special fish syntax characters like the semicolon
+        const SPECIAL = 1 << 0;
+        /// allow incomplete escape sequences
+        const INCOMPLETE = 1 << 1;
+        /// don't handle backslash escapes
+        const NO_BACKSLASHES = 1 << 2;
+    }
+}
+
 /// Replace special characters with backslash escape sequences. Newline is replaced with `\n`, etc.
 pub fn escape_string(s: &wstr, style: EscapeStringStyle) -> WString {
     let (style, flags) = match style {
@@ -64,6 +160,1042 @@ pub fn escape_string(s: &wstr, style: EscapeStringStyle) -> WString {
     ffi::escape_string(c_str!(s), flags.bits().into(), style).from_ffi()
 }
 
+/// Escape a string so that it may be inserted into a double-quoted string.
+/// This permits ownership transfer.
+pub fn escape_string_for_double_quotes(input: &wstr) -> WString {
+    // We need to escape backslashes, double quotes, and dollars only.
+    let mut result = input.to_owned();
+    let mut idx = result.len();
+    while idx > 0 {
+        idx -= 1;
+        if ['\\', '$', '"'].contains(&result.char_at(idx)) {
+            result.insert(idx, '\\');
+        }
+    }
+    result
+}
+
+pub fn unescape_string(input: &wstr, style: UnescapeStringStyle) -> Option<WString> {
+    match style {
+        UnescapeStringStyle::Script(flags) => unescape_string_internal(input, flags),
+        UnescapeStringStyle::Url => unescape_string_url(input),
+        UnescapeStringStyle::Var => unescape_string_var(input),
+    }
+}
+
+// TODO Delete this.
+pub fn unescape_string_in_place(s: &mut WString, style: UnescapeStringStyle) -> bool {
+    unescape_string(s, style)
+        .map(|unescaped| *s = unescaped)
+        .is_some()
+}
+
+/// Returns the unescaped version of input, or None on error.
+fn unescape_string_internal(input: &wstr, flags: UnescapeFlags) -> Option<WString> {
+    let mut result = WString::new();
+    result.reserve(input.len());
+
+    let unescape_special = flags.contains(UnescapeFlags::SPECIAL);
+    let allow_incomplete = flags.contains(UnescapeFlags::INCOMPLETE);
+    let ignore_backslashes = flags.contains(UnescapeFlags::NO_BACKSLASHES);
+
+    // The positions of open braces.
+    let mut braces = vec![];
+    // The positions of variable expansions or brace ","s.
+    // We only read braces as expanders if there's a variable expansion or "," in them.
+    let mut vars_or_seps = vec![];
+    let mut brace_count = 0;
+
+    let mut errored = false;
+    #[derive(PartialEq, Eq)]
+    enum Mode {
+        Unquoted,
+        SingleQuotes,
+        DoubleQuotes,
+    }
+    let mut mode = Mode::Unquoted;
+
+    let mut input_position = 0;
+    while input_position < input.len() && !errored {
+        let c = input.char_at(input_position);
+        // Here's the character we'll append to result, or none() to suppress it.
+        let mut to_append_or_none = Some(c);
+        if mode == Mode::Unquoted {
+            match c {
+                '\\' => {
+                    if !ignore_backslashes {
+                        // Backslashes (escapes) are complicated and may result in errors, or
+                        // appending INTERNAL_SEPARATORs, so we have to handle them specially.
+                        if let Some(escape_chars) = read_unquoted_escape(
+                            &input[input_position..],
+                            &mut result,
+                            allow_incomplete,
+                            unescape_special,
+                        ) {
+                            // Skip over the characters we read, minus one because the outer loop
+                            // will increment it.
+                            assert!(escape_chars > 0);
+                            input_position += escape_chars - 1;
+                        } else {
+                            // A none() return indicates an error.
+                            errored = true;
+                        }
+                        // We've already appended, don't append anything else.
+                        to_append_or_none = None;
+                    }
+                }
+                '~' => {
+                    if unescape_special && input_position == 0 {
+                        to_append_or_none = Some(HOME_DIRECTORY);
+                    }
+                }
+                '%' => {
+                    // Note that this only recognizes %self if the string is literally %self.
+                    // %self/foo will NOT match this.
+                    if unescape_special && input_position == 0 && input == PROCESS_EXPAND_SELF_STR {
+                        to_append_or_none = Some(PROCESS_EXPAND_SELF);
+                        input_position += PROCESS_EXPAND_SELF_STR.len() - 1; // skip over 'self's
+                    }
+                }
+                '*' => {
+                    if unescape_special {
+                        // In general, this is ANY_STRING. But as a hack, if the last appended char
+                        // is ANY_STRING, delete the last char and store ANY_STRING_RECURSIVE to
+                        // reflect the fact that ** is the recursive wildcard.
+                        if result.chars().last() == Some(ANY_STRING) {
+                            assert!(!result.is_empty());
+                            result.truncate(result.len() - 1);
+                            to_append_or_none = Some(ANY_STRING_RECURSIVE);
+                        } else {
+                            to_append_or_none = Some(ANY_STRING);
+                        }
+                    }
+                }
+                '?' => {
+                    if unescape_special && !feature_test(FeatureFlag::qmark_noglob) {
+                        to_append_or_none = Some(ANY_CHAR);
+                    }
+                }
+                '$' => {
+                    if unescape_special {
+                        let is_cmdsub = input_position + 1 < input.len()
+                            && input.char_at(input_position + 1) == '(';
+                        if !is_cmdsub {
+                            to_append_or_none = Some(VARIABLE_EXPAND);
+                            vars_or_seps.push(input_position);
+                        }
+                    }
+                }
+                '{' => {
+                    if unescape_special {
+                        brace_count += 1;
+                        to_append_or_none = Some(BRACE_BEGIN);
+                        // We need to store where the brace *ends up* in the output.
+                        braces.push(result.len());
+                    }
+                }
+                '}' => {
+                    if unescape_special {
+                        // HACK: The completion machinery sometimes hands us partial tokens.
+                        // We can't parse them properly, but it shouldn't hurt,
+                        // so we don't assert here.
+                        // See #4954.
+                        // assert(brace_count > 0 && "imbalanced brackets are a tokenizer error, we
+                        // shouldn't be able to get here");
+                        brace_count -= 1;
+                        to_append_or_none = Some(BRACE_END);
+                        if let Some(brace) = braces.pop() {
+                            // HACK: To reduce accidental use of brace expansion, treat a brace
+                            // with zero or one items as literal input. See #4632. (The hack is
+                            // doing it here and like this.)
+                            if vars_or_seps.last().map(|i| *i < brace).unwrap_or(true) {
+                                result.as_char_slice_mut()[brace] = '{';
+                                // We also need to turn all spaces back.
+                                for i in brace + 1..result.len() {
+                                    if result.char_at(i) == BRACE_SPACE {
+                                        result.as_char_slice_mut()[i] = ' ';
+                                    }
+                                }
+                                to_append_or_none = Some('}');
+                            }
+                            // Remove all seps inside the current brace pair, so if we have a
+                            // surrounding pair we only get seps inside *that*.
+                            if !vars_or_seps.is_empty() {
+                                while vars_or_seps.last().map(|i| *i > brace).unwrap_or_default() {
+                                    vars_or_seps.pop();
+                                }
+                            }
+                        }
+                    }
+                }
+                ',' => {
+                    if unescape_special && brace_count > 0 {
+                        to_append_or_none = Some(BRACE_SEP);
+                        vars_or_seps.push(input_position);
+                    }
+                }
+                ' ' => {
+                    if unescape_special && brace_count > 0 {
+                        to_append_or_none = Some(BRACE_SPACE);
+                    }
+                }
+                '\'' => {
+                    mode = Mode::SingleQuotes;
+                    to_append_or_none = if unescape_special {
+                        Some(INTERNAL_SEPARATOR)
+                    } else {
+                        None
+                    };
+                }
+                '"' => {
+                    mode = Mode::DoubleQuotes;
+                    to_append_or_none = if unescape_special {
+                        Some(INTERNAL_SEPARATOR)
+                    } else {
+                        None
+                    };
+                }
+                _ => (),
+            }
+        } else if mode == Mode::SingleQuotes {
+            if c == '\\' {
+                // A backslash may or may not escape something in single quotes.
+                match input.char_at(input_position + 1) {
+                    '\\' | '\'' => {
+                        to_append_or_none = Some(input.char_at(input_position + 1));
+                        input_position += 1; // skip over the backslash
+                    }
+                    '\0' => {
+                        if !allow_incomplete {
+                            errored = true;
+                        } else {
+                            // PCA this line had the following cryptic comment: 'We may ever escape
+                            // a NULL character, but still appending a \ in case I am wrong.' Not
+                            // sure what it means or the importance of this.
+                            input_position += 1; /* Skip over the backslash */
+                            to_append_or_none = Some('\\');
+                        }
+                    }
+                    _ => {
+                        // Literal backslash that doesn't escape anything! Leave things alone; we'll
+                        // append the backslash itself.
+                    }
+                }
+            } else if c == '\'' {
+                to_append_or_none = if unescape_special {
+                    Some(INTERNAL_SEPARATOR)
+                } else {
+                    None
+                };
+                mode = Mode::Unquoted;
+            }
+        } else if mode == Mode::DoubleQuotes {
+            match c {
+                '"' => {
+                    mode = Mode::Unquoted;
+                    to_append_or_none = if unescape_special {
+                        Some(INTERNAL_SEPARATOR)
+                    } else {
+                        None
+                    };
+                }
+                '\\' => {
+                    match input.char_at(input_position + 1) {
+                        '\0' => {
+                            if !allow_incomplete {
+                                errored = true;
+                            } else {
+                                to_append_or_none = Some('\0');
+                            }
+                        }
+                        '\\' | '$' | '"' => {
+                            to_append_or_none = Some(input.char_at(input_position + 1));
+                            input_position += 1; /* Skip over the backslash */
+                        }
+                        '\n' => {
+                            /* Swallow newline */
+                            to_append_or_none = None;
+                            input_position += 1; /* Skip over the backslash */
+                        }
+                        _ => {
+                            /* Literal backslash that doesn't escape anything! Leave things alone;
+                             * we'll append the backslash itself */
+                        }
+                    }
+                }
+                '$' => {
+                    if unescape_special {
+                        to_append_or_none = Some(VARIABLE_EXPAND_SINGLE);
+                        vars_or_seps.push(input_position);
+                    }
+                }
+                _ => (),
+            }
+        }
+
+        // Now maybe append the char.
+        if let Some(c) = to_append_or_none {
+            result.push(c);
+        }
+        input_position += 1;
+    }
+
+    // Return the string by reference, and then success.
+    if errored {
+        return None;
+    }
+    Some(result)
+}
+
+/// Reverse the effects of `escape_string_url()`. By definition the string has consist of just ASCII
+/// chars.
+fn unescape_string_url(input: &wstr) -> Option<WString> {
+    let mut result: Vec<u8> = vec![];
+    let mut i = 0;
+    while i < input.len() {
+        let c = input.char_at(i);
+        if c > '\u{7F}' {
+            return None; // invalid character means we can't decode the string
+        }
+        if c == '%' {
+            let c1 = input.char_at(i + 1);
+            if c1 == '\0' {
+                return None;
+            } else if c1 == '%' {
+                result.push(b'%');
+                i += 1;
+            } else {
+                let c2 = input.char_at(i + 2);
+                if c2 == '\0' {
+                    return None; // string ended prematurely
+                }
+                let d1 = c1.to_digit(16)?;
+                let d2 = c2.to_digit(16)?;
+                result.push((16 * d1 + d2) as u8);
+                i += 2;
+            }
+        } else {
+            result.push(c as u8);
+        }
+        i += 1
+    }
+
+    Some(str2wcstring(&result))
+}
+
+/// Reverse the effects of `escape_string_var()`. By definition the string has consist of just ASCII
+/// chars.
+fn unescape_string_var(input: &wstr) -> Option<WString> {
+    let mut result: Vec<u8> = vec![];
+    let mut prev_was_hex_encoded = false;
+    let mut i = 0;
+    while i < input.len() {
+        let c = input.char_at(i);
+        if c > '\u{7F}' {
+            return None; // invalid character means we can't decode the string
+        }
+        if c == '_' {
+            let c1 = input.char_at(i + 1);
+            if c1 == '\0' {
+                if prev_was_hex_encoded {
+                    break;
+                }
+                return None; // found unexpected escape char at end of string
+            }
+            if c1 == '_' {
+                result.push(b'_');
+                i += 1;
+            } else if ('0'..='9').contains(&c1) || ('A'..='F').contains(&c1) {
+                let c2 = input.char_at(i + 2);
+                if c2 == '\0' {
+                    return None; // string ended prematurely
+                }
+                let d1 = convert_hex_digit(c1)?;
+                let d2 = convert_hex_digit(c2)?;
+                result.push((16 * d1 + d2) as u8);
+                i += 2;
+                prev_was_hex_encoded = true;
+            }
+            // No "else" clause because if the first char after an underscore is not another
+            // underscore or a valid hex character then the underscore is there to improve
+            // readability after we've encoded a character not valid in a var name.
+        } else {
+            result.push(c as u8);
+        }
+        i += 1;
+    }
+
+    Some(str2wcstring(&result))
+}
+
+/// Given a string starting with a backslash, read the escape as if it is unquoted, appending
+/// to result. Return the number of characters consumed, or none on error.
+pub fn read_unquoted_escape(
+    input: &wstr,
+    result: &mut WString,
+    allow_incomplete: bool,
+    unescape_special: bool,
+) -> Option<usize> {
+    assert!(input.char_at(0) == '\\', "not an escape");
+
+    // Here's the character we'll ultimately append, or none. Note that '\0' is a
+    // valid thing to append.
+    let mut result_char_or_none: Option<char> = None;
+
+    let mut errored = false;
+    let mut in_pos = 1; // in_pos always tracks the next character to read (and therefore the number
+                        // of characters read so far)
+
+    // For multibyte \X sequences.
+    let mut byte_buff: Vec<u8> = vec![];
+
+    loop {
+        let c = input.char_at(in_pos);
+        in_pos += 1;
+        match c {
+            // A null character after a backslash is an error.
+            '\0' => {
+                // Adjust in_pos to only include the backslash.
+                assert!(in_pos > 0);
+                in_pos -= 1;
+
+                // It's an error, unless we're allowing incomplete escapes.
+                if !allow_incomplete {
+                    errored = true;
+                }
+            }
+            // Numeric escape sequences. No prefix means octal escape, otherwise hexadecimal.
+            '0'..='7' | 'u' | 'U' | 'x' | 'X' => {
+                let mut res: u64 = 0;
+                let mut chars = 2;
+                let mut base = 16;
+                let mut byte_literal = false;
+                let mut max_val = ASCII_MAX;
+
+                match c {
+                    'u' => {
+                        chars = 4;
+                        max_val = UCS2_MAX;
+                    }
+                    'U' => {
+                        chars = 8;
+                        // Don't exceed the largest Unicode code point - see #1107.
+                        max_val = char::MAX;
+                    }
+                    'x' | 'X' => {
+                        byte_literal = true;
+                        max_val = BYTE_MAX;
+                    }
+                    _ => {
+                        base = 8;
+                        chars = 3;
+                        // Note that in_pos currently is just after the first post-backslash
+                        // character; we want to start our escape from there.
+                        assert!(in_pos > 0);
+                        in_pos -= 1;
+                    }
+                }
+
+                for i in 0..chars {
+                    let Some(d) = input.char_at(in_pos).to_digit(base) else {
+                        // If we have no digit, this is a tokenizer error.
+                        if i == 0 {
+                            errored = true;
+                        }
+                        break;
+                    };
+
+                    res = (res * u64::from(base)) + u64::from(d);
+                    in_pos += 1;
+                }
+
+                if !errored && res <= u64::from(max_val) {
+                    if byte_literal {
+                        // Multibyte encodings necessitate that we keep adjacent byte escapes.
+                        // - `\Xc3\Xb6` is "ö", but only together.
+                        // (this assumes a valid codepoint can't consist of multiple bytes
+                        // that are valid on their own, which is true for UTF-8)
+                        byte_buff.push(res.try_into().unwrap());
+                        result_char_or_none = None;
+                        if input[in_pos..].starts_with("\\X") || input[in_pos..].starts_with("\\x")
+                        {
+                            in_pos += 1;
+                            continue;
+                        }
+                    } else {
+                        result_char_or_none =
+                            Some(char::from_u32(res.try_into().unwrap()).unwrap_or('\u{FFFD}'));
+                    }
+                } else {
+                    errored = true;
+                }
+            }
+            // \a means bell (alert).
+            'a' => {
+                result_char_or_none = Some('\x07');
+            }
+            // \b means backspace.
+            'b' => {
+                result_char_or_none = Some('\x08');
+            }
+            // \cX means control sequence X.
+            'c' => {
+                let sequence_char = u32::from(input.char_at(in_pos));
+                in_pos += 1;
+                if sequence_char >= u32::from('a') && sequence_char <= u32::from('a') + 32 {
+                    result_char_or_none =
+                        Some(char::from_u32(sequence_char - u32::from('a') + 1).unwrap());
+                } else if sequence_char >= u32::from('A') && sequence_char <= u32::from('A') + 32 {
+                    result_char_or_none =
+                        Some(char::from_u32(sequence_char - u32::from('A') + 1).unwrap());
+                } else {
+                    errored = true;
+                }
+            }
+            // \x1B means escape.
+            'e' => {
+                result_char_or_none = Some('\x1B');
+            }
+            // \f means form feed.
+            'f' => {
+                result_char_or_none = Some('\x0C');
+            }
+            // \n means newline.
+            'n' => {
+                result_char_or_none = Some('\n');
+            }
+            // \r means carriage return.
+            'r' => {
+                result_char_or_none = Some('\x0D');
+            }
+            // \t means tab.
+            't' => {
+                result_char_or_none = Some('\t');
+            }
+            // \v means vertical tab.
+            'v' => {
+                result_char_or_none = Some('\x0b');
+            }
+            // If a backslash is followed by an actual newline, swallow them both.
+            '\n' => {
+                result_char_or_none = None;
+            }
+            _ => {
+                if unescape_special {
+                    result.push(INTERNAL_SEPARATOR);
+                }
+                result_char_or_none = Some(c);
+            }
+        }
+
+        if errored {
+            return None;
+        }
+
+        if !byte_buff.is_empty() {
+            result.push_utfstr(&str2wcstring(&byte_buff));
+        }
+
+        break;
+    }
+
+    if let Some(c) = result_char_or_none {
+        result.push(c);
+    }
+
+    Some(in_pos)
+}
+
+/// This is a specialization of `char::to_digit()` that only handles base 16 and only uppercase.
+fn convert_hex_digit(d: char) -> Option<u32> {
+    let val = if ('0'..='9').contains(&d) {
+        u32::from(d) - u32::from('0')
+    } else if ('A'..='Z').contains(&d) {
+        10 + u32::from(d) - u32::from('A')
+    } else {
+        return None;
+    };
+    Some(val)
+}
+
+pub const fn char_offset(base: char, offset: u32) -> char {
+    match char::from_u32(base as u32 + offset) {
+        Some(c) => c,
+        None => panic!("not a valid char"),
+    }
+}
+
+/// A user-visible job ID.
+pub type JobId = i32;
+
+/// The non user-visible, never-recycled job ID.
+/// Every job has a unique positive value for this.
+pub type InternalJobId = u64;
+
+/// Exits without invoking destructors (via _exit), useful for code after fork.
+fn exit_without_destructors(code: i32) -> ! {
+    unsafe {
+        libc::_exit(code);
+    }
+}
+
+/// Save the shell mode on startup so we can restore them on exit.
+static SHELL_MODES: Lazy<Mutex<libc::termios>> = Lazy::new(|| Mutex::new(unsafe { mem::zeroed() }));
+
+/// The character to use where the text has been truncated. Is an ellipsis on unicode system and a $
+/// on other systems.
+pub fn get_ellipsis_char() -> char {
+    char::from_u32(ELLIPSIS_CHAR.load(Ordering::Relaxed)).unwrap()
+}
+
+static ELLIPSIS_CHAR: AtomicU32 = AtomicU32::new(0);
+
+/// The character or string to use where text has been truncated (ellipsis if possible, otherwise
+/// ...)
+pub static mut ELLIPSIS_STRING: Lazy<&'static wstr> = Lazy::new(|| L!(""));
+
+/// Character representing an omitted newline at the end of text.
+pub fn get_omitted_newline_str() -> &'static wstr {
+    unsafe { &OMITTED_NEWLINE_STR }
+}
+
+static mut OMITTED_NEWLINE_STR: Lazy<&'static wstr> = Lazy::new(|| L!(""));
+
+pub fn get_omitted_newline_width() -> usize {
+    unsafe { OMITTED_NEWLINE_STR.len() }
+}
+
+static OBFUSCATION_READ_CHAR: AtomicU32 = AtomicU32::new(0);
+
+pub fn get_obfuscation_read_char() -> char {
+    char::from_u32(OBFUSCATION_READ_CHAR.load(Ordering::Relaxed)).unwrap()
+}
+
+/// Profiling flag. True if commands should be profiled.
+pub static G_PROFILING_ACTIVE: RelaxedAtomicBool = RelaxedAtomicBool::new(false);
+
+/// Name of the current program. Should be set at startup. Used by the debug function.
+pub static mut PROGRAM_NAME: Lazy<&'static wstr> = Lazy::new(|| L!(""));
+
+#[cfg(windows)]
+/// Set to false if it's been determined we can't trust the last modified timestamp on the tty.
+pub const HAS_WORKING_TTY_TIMESTAMPS: bool = false;
+#[cfg(not(windows))]
+/// Set to false if it's been determined we can't trust the last modified timestamp on the tty.
+pub const HAS_WORKING_TTY_TIMESTAMPS: bool = true;
+
+/// A global, empty string. This is useful for functions which wish to return a reference to an
+/// empty string.
+pub static G_EMPTY_STRING: WString = WString::new();
+
+/// A global, empty wcstring_list_t. This is useful for functions which wish to return a reference
+/// to an empty string.
+pub static G_EMPTY_STRING_LIST: Vec<WString> = vec![];
+
+/// A function type to check for cancellation.
+/// \return true if execution should cancel.
+pub type CancelChecker = dyn Fn() -> bool;
+
+/// Converts the narrow character string \c in into its wide equivalent, and return it.
+///
+/// The string may contain embedded nulls.
+///
+/// This function encodes illegal character sequences in a reversible way using the private use
+/// area.
+pub fn str2wcstring(inp: &[u8]) -> WString {
+    if inp.is_empty() {
+        return WString::new();
+    }
+
+    let mut result = WString::new();
+    result.reserve(inp.len());
+    let mut pos = 0;
+    let mut state = zero_mbstate();
+    while pos < inp.len() {
+        // Append any initial sequence of ascii characters.
+        // Note we do not support character sets which are not supersets of ASCII.
+        let ascii_prefix_length = count_ascii_prefix(&inp[pos..]);
+        result.push_str(std::str::from_utf8(&inp[pos..pos + ascii_prefix_length]).unwrap());
+        pos += ascii_prefix_length;
+        assert!(pos <= inp.len(), "Position overflowed length");
+        if pos == inp.len() {
+            break;
+        }
+
+        // We have found a non-ASCII character.
+        let mut ret = 0;
+        let mut c = '\0';
+
+        let use_encode_direct = if inp[pos] & 0xF8 == 0xF8 {
+            // Protect against broken mbrtowc() implementations which attempt to encode UTF-8
+            // sequences longer than four bytes (e.g., OS X Snow Leopard).
+            // TODO This check used to be conditionally compiled only on affected platforms.
+            true
+        } else {
+            const _: () = assert!(mem::size_of::<libc::wchar_t>() == mem::size_of::<char>());
+            let mut codepoint = u32::from(c);
+            ret = unsafe {
+                mbrtowc(
+                    std::ptr::addr_of_mut!(codepoint).cast(),
+                    std::ptr::addr_of!(inp[pos]).cast(),
+                    inp.len() - pos,
+                    std::ptr::addr_of_mut!(state),
+                )
+            };
+            match char::from_u32(codepoint) {
+                Some(codepoint) => {
+                    c = codepoint;
+                    // Determine whether to encode this character with our crazy scheme.
+                    (c >= ENCODE_DIRECT_BASE && c < ENCODE_DIRECT_END)
+                    ||
+                    c == INTERNAL_SEPARATOR
+                    ||
+                    // Incomplete sequence.
+                    ret == 0_usize.wrapping_sub(2)
+                    ||
+                    // Invalid data.
+                    ret == 0_usize.wrapping_sub(1)
+                    ||
+                    // Other error codes? Terrifying, should never happen.
+                    ret > inp.len() - pos
+                }
+                None => true,
+            }
+        };
+
+        if use_encode_direct {
+            c = encode_byte_to_char(inp[pos]);
+            result.push(c);
+            pos += 1;
+            state = zero_mbstate();
+        } else if ret == 0 {
+            // embedded null byte!
+            result.push('\0');
+            pos += 1;
+            state = zero_mbstate();
+        } else {
+            // normal case
+            result.push(c);
+            pos += ret;
+        }
+    }
+    result
+}
+
+/// Returns a newly allocated multibyte character string equivalent of the specified wide character
+/// string.
+///
+/// This function decodes illegal character sequences in a reversible way using the private use
+/// area.
+pub fn wcs2string(input: &wstr) -> Vec<u8> {
+    if input.is_empty() {
+        return vec![];
+    }
+
+    let mut result = vec![];
+    wcs2string_appending(&mut result, input);
+    result
+}
+
+pub fn wcs2zstring(input: &wstr) -> CString {
+    if input.is_empty() {
+        return CString::default();
+    }
+
+    let mut result = vec![];
+    // result.reserve(input.len());
+    wcs2string_callback(input, |buff| {
+        result.extend_from_slice(buff);
+        true
+    });
+    let until_nul = match result.iter().position(|c| *c == b'\0') {
+        Some(pos) => &result[..pos],
+        None => &result[..],
+    };
+    CString::new(until_nul).unwrap()
+}
+
+/// Like wcs2string, but appends to \p receiver instead of returning a new string.
+pub fn wcs2string_appending(output: &mut Vec<u8>, input: &wstr) {
+    output.reserve(input.len());
+    wcs2string_callback(input, |buff| {
+        output.extend_from_slice(buff);
+        true
+    });
+}
+
+/// \return the count of initial characters in \p in which are ASCII.
+fn count_ascii_prefix(inp: &[u8]) -> usize {
+    // The C++ version had manual vectorization.
+    inp.iter().take_while(|c| c.is_ascii()).count()
+}
+
+// Check if we are running in the test mode, where we should suppress error output
+#[widestrs]
+pub const TESTS_PROGRAM_NAME: &wstr = "(ignore)"L;
+
+/// Hack to not print error messages in the tests. Do not call this from functions in this module
+/// like `debug()`. It is only intended to suppress diagnostic noise from testing things like the
+/// fish parser where we expect a lot of diagnostic messages due to testing error conditions.
+pub fn should_suppress_stderr_for_tests() -> bool {
+    unsafe { !PROGRAM_NAME.is_empty() && *PROGRAM_NAME != TESTS_PROGRAM_NAME }
+}
+
+fn assert_is_main_thread() {
+    assert!(is_main_thread() || THREAD_ASSERTS_CFG_FOR_TESTING.load());
+}
+
+fn assert_is_background_thread() {
+    assert!(!is_main_thread() || THREAD_ASSERTS_CFG_FOR_TESTING.load());
+}
+
+static THREAD_ASSERTS_CFG_FOR_TESTING: RelaxedAtomicBool = RelaxedAtomicBool::new(false);
+
+thread_local! {
+    static TL_TID: RefCell<u64> = RefCell::new(0);
+}
+
+static S_LAST_THREAD_ID: AtomicU64 = AtomicU64::new(0);
+fn next_thread_id() -> u64 {
+    // Note 0 is an invalid thread id.
+    // Note fetch_add is a CAS which returns the value *before* the modification.
+    1 + S_LAST_THREAD_ID.fetch_add(1, Ordering::Relaxed)
+}
+
+fn thread_id() -> u64 {
+    TL_TID.with(|tid| {
+        if *tid.borrow() == 0 {
+            *tid.borrow_mut() = next_thread_id()
+        }
+        *tid.borrow()
+    })
+}
+
+/// Format the specified size (in bytes, kilobytes, etc.) into the specified stringbuffer.
+#[widestrs]
+fn format_size(mut sz: i64) -> WString {
+    let mut result = WString::new();
+    const sz_names: [&wstr; 8] = ["kB"L, "MB"L, "GB"L, "TB"L, "PB"L, "EB"L, "ZB"L, "YB"L];
+    if sz < 0 {
+        result += "unknown"L;
+    } else if sz == 0 {
+        result += wgettext!("empty");
+    } else if sz < 1024 {
+        result += &sprintf!("%lldB"L, sz)[..];
+    } else {
+        for (i, sz_name) in sz_names.iter().enumerate() {
+            if sz < (1024 * 1024) || i == sz_names.len() - 1 {
+                let isz = sz / 1024;
+                if isz > 9 {
+                    result += &sprintf!("%ld%ls"L, isz, *sz_name)[..];
+                } else {
+                    result += &sprintf!("%.1f%ls"L, sz as f64 / 1024.0, *sz_name)[..];
+                }
+                break;
+            }
+            sz /= 1024;
+        }
+    }
+
+    result
+}
+
+/// Version of format_size that does not allocate memory.
+fn format_size_safe(buff: &mut [u8; 128], mut sz: u64) {
+    let buff_size = 128;
+    let max_len = buff_size - 1; // need to leave room for a null terminator
+    buff.fill(0);
+    let mut idx = 0;
+    const sz_names: [&str; 8] = ["kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"];
+    if sz == 0 {
+        let empty = "empty".as_bytes();
+        buff[..empty.len()].copy_from_slice(empty);
+    } else if sz < 1024 {
+        append_ull(buff, &mut sz, &mut idx, max_len);
+        append_str(buff, "B", &mut idx, max_len);
+    } else {
+        for (i, sz_name) in sz_names.iter().enumerate() {
+            if sz < (1024 * 1024) || i == sz_names.len() - 1 {
+                let mut isz = sz / 1024;
+                append_ull(buff, &mut isz, &mut idx, max_len);
+                if isz <= 9 {
+                    // Maybe append a single fraction digit.
+                    let mut remainder = sz % 1024;
+                    if remainder > 0 {
+                        let tmp = [b'.', extract_most_significant_digit(&mut remainder)];
+                        let tmp = std::str::from_utf8(&tmp).unwrap();
+                        append_str(buff, tmp, &mut idx, max_len);
+                    }
+                }
+                append_str(buff, sz_name, &mut idx, max_len);
+                break;
+            }
+            sz /= 1024;
+        }
+    }
+}
+
+/// Writes out a long safely.
+pub fn format_llong_safe<CharT: From<u8>>(buff: &mut [CharT; 64], val: i64) {
+    let uval = val.unsigned_abs();
+    if val >= 0 {
+        format_safe_impl(buff, 64, uval);
+    } else {
+        buff[0] = CharT::from(b'-');
+        format_safe_impl(&mut buff[1..], 63, uval);
+    }
+}
+
+pub fn format_ullong_safe<CharT: From<u8>>(buff: &mut [CharT; 64], val: u64) {
+    format_safe_impl(buff, 64, val);
+}
+
+fn format_safe_impl<CharT: From<u8>>(buff: &mut [CharT], size: usize, mut val: u64) {
+    let mut idx = 0;
+    if val == 0 {
+        buff[idx] = CharT::from(b'0');
+    } else {
+        // Generate the string backwards, then reverse it.
+        while val != 0 {
+            buff[idx] = CharT::from((val % 10) as u8 + b'0');
+            val /= 10;
+        }
+        buff[..idx].reverse();
+    }
+    buff[idx] = CharT::from(b'\0');
+    idx += 1;
+    assert!(idx <= size, "Buffer overflowed");
+}
+
+fn append_ull(buff: &mut [u8], val: &mut u64, inout_idx: &mut usize, max_len: usize) {
+    let mut idx = *inout_idx;
+    while *val > 0 && idx < max_len {
+        buff[idx] = extract_most_significant_digit(val);
+        idx += 1;
+    }
+    *inout_idx = idx;
+}
+
+fn append_str(buff: &mut [u8], s: &str, inout_idx: &mut usize, max_len: usize) {
+    let mut idx = *inout_idx;
+    let bytes = s.as_bytes();
+    while idx < bytes.len().min(max_len) {
+        buff[idx] = bytes[idx];
+        idx += 1;
+    }
+    *inout_idx = idx;
+}
+
+/// Crappy function to extract the most significant digit of an unsigned long long value.
+fn extract_most_significant_digit(xp: &mut u64) -> u8 {
+    let mut place_value = 1;
+    let mut x = *xp;
+    while x >= 10 {
+        x /= 10;
+        place_value *= 10;
+    }
+    *xp -= place_value * x;
+    x as u8 + b'0'
+}
+
+/// "Narrows" a wide character string. This just grabs any ASCII characters and truncates.
+pub fn narrow_string_safe(buff: &mut [u8; 64], s: &wstr) {
+    let mut idx = 0;
+    for c in s.chars() {
+        if c as u32 <= 127 {
+            buff[idx] = c as u8;
+            idx += 1;
+            if idx + 1 == 64 {
+                break;
+            }
+        }
+    }
+    buff[idx] = b'\0';
+}
+
+/// Stored in blocks to reference the file which created the block.
+pub type FilenameRef = Rc<WString>;
+
+/// This function should be called after calling `setlocale()` to perform fish specific locale
+/// initialization.
+#[widestrs]
+fn fish_setlocale() {
+    // Use various Unicode symbols if they can be encoded using the current locale, else a simple
+    // ASCII char alternative. All of the can_be_encoded() invocations should return the same
+    // true/false value since the code points are in the BMP but we're going to be paranoid. This
+    // is also technically wrong if we're not in a Unicode locale but we expect (or hope)
+    // can_be_encoded() will return false in that case.
+    if can_be_encoded('\u{2026}') {
+        ELLIPSIS_CHAR.store(u32::from('\u{2026}'), Ordering::Relaxed);
+        unsafe {
+            ELLIPSIS_STRING = Lazy::new(|| "\u{2026}"L);
+        }
+    } else {
+        ELLIPSIS_CHAR.store(u32::from('$'), Ordering::Relaxed); // "horizontal ellipsis"
+        unsafe {
+            ELLIPSIS_STRING = Lazy::new(|| "..."L);
+        }
+    }
+
+    if is_windows_subsystem_for_linux() {
+        // neither of \u23CE and \u25CF can be displayed in the default fonts on Windows, though
+        // they can be *encoded* just fine. Use alternative glyphs.
+        unsafe {
+            OMITTED_NEWLINE_STR = Lazy::new(|| "\u{00b6}"L); // "pilcrow"
+        }
+        OBFUSCATION_READ_CHAR.store(u32::from('\u{2022}'), Ordering::Relaxed); // "bullet"
+    } else if is_console_session() {
+        unsafe {
+            OMITTED_NEWLINE_STR = Lazy::new(|| "^J"L);
+        }
+        OBFUSCATION_READ_CHAR.store(u32::from('*'), Ordering::Relaxed);
+    } else {
+        if can_be_encoded('\u{23CE}') {
+            unsafe {
+                OMITTED_NEWLINE_STR = Lazy::new(|| "\u{23CE}"L); // "return symbol" (⏎)
+            }
+        } else {
+            unsafe {
+                OMITTED_NEWLINE_STR = Lazy::new(|| "^J"L);
+            }
+        }
+        OBFUSCATION_READ_CHAR.store(
+            u32::from(if can_be_encoded('\u{25CF}') {
+                '\u{25CF}' // "black circle"
+            } else {
+                '#'
+            }),
+            Ordering::Relaxed,
+        );
+    }
+    G_PROFILING_ACTIVE.store(true);
+}
+
+/// Test if the character can be encoded using the current locale.
+fn can_be_encoded(wc: char) -> bool {
+    let mut converted = [0_i8; AT_LEAST_MB_LEN_MAX];
+    let mut state = zero_mbstate();
+    unsafe {
+        wcrtomb(
+            std::ptr::addr_of_mut!(converted[0]),
+            wc as libc::wchar_t,
+            std::ptr::addr_of_mut!(state),
+        ) != 0_usize.wrapping_sub(1)
+    }
+}
+
+/// Call read, blocking and repeating on EINTR. Exits on EAGAIN.
+/// \return the number of bytes read, or 0 on EOF. On EAGAIN, returns -1 if nothing was read.
+pub fn read_blocked(fd: i32, mut buf: &mut [u8]) -> isize {
+    loop {
+        let res = unsafe { libc::read(fd, std::ptr::addr_of_mut!(buf).cast(), buf.len()) };
+        if res < 0 && errno::errno().0 == EINTR {
+            continue;
+        }
+        return res;
+    }
+}
+
 /// Test if the string is a valid function name.
 pub fn valid_func_name(name: &wstr) -> bool {
     if name.is_empty() {
@@ -123,6 +1255,235 @@ pub fn read_loop<Fd: AsRawFd>(fd: &Fd, buf: &mut [u8]) -> std::io::Result<usize>
     }
 }
 
+/// Write the given paragraph of output, redoing linebreaks to fit \p termsize.
+#[widestrs]
+fn reformat_for_screen(msg: &wstr, termsize: &Termsize) -> WString {
+    let mut buff = WString::new();
+
+    let screen_width = termsize.width;
+    if screen_width != 0 {
+        let mut start = 0;
+        let mut pos = start;
+        let mut line_width = 0;
+        while pos < msg.len() {
+            let mut overflow = false;
+            let mut tok_width = 0;
+
+            // Tokenize on whitespace, and also calculate the width of the token.
+            while pos < msg.len() && [' ', '\n', '\r', '\t'].contains(&msg.char_at(pos)) {
+                // Check is token is wider than one line. If so we mark it as an overflow and break
+                // the token.
+                let width = fish_wcwidth(msg.char_at(pos).into()).0 as isize;
+                if (tok_width + width) > (screen_width - 1) {
+                    overflow = true;
+                    break;
+                }
+                tok_width += width;
+                pos += 1;
+            }
+
+            // If token is zero character long, we don't do anything.
+            if pos == 0 {
+                pos += 1;
+            } else if overflow {
+                // In case of overflow, we print a newline, except if we already are at position 0.
+                let token = &msg[start..pos];
+                if line_width != 0 {
+                    buff.push('\n');
+                }
+                buff += &sprintf!("%ls-\n"L, token)[..];
+                line_width = 0;
+            } else {
+                // Print the token.
+                let token = &msg[start..pos];
+                let line_width_unit = (if line_width != 0 { 1 } else { 0 });
+                if (line_width + line_width_unit + tok_width) > screen_width {
+                    buff.push('\n');
+                    line_width = 0;
+                }
+                if line_width != 0 {
+                    buff += " "L;
+                }
+                buff += token;
+                line_width += line_width_unit + tok_width;
+            }
+
+            start = pos;
+        }
+    } else {
+        buff += msg;
+    }
+    buff.push('\n');
+    buff
+}
+
+pub type Timepoint = f64;
+
+/// Return the number of seconds from the UNIX epoch, with subsecond precision. This function uses
+/// the gettimeofday function and will have the same precision as that function.
+fn timef() -> Timepoint {
+    match time::SystemTime::now().duration_since(time::UNIX_EPOCH) {
+        Ok(difference) => difference.as_secs() as f64,
+        Err(until_epoch) => -(until_epoch.duration().as_secs() as f64),
+    }
+}
+
+/// Call the following function early in main to set the main thread. This is our replacement for
+/// pthread_main_np().
+pub fn set_main_thread() {
+    // Just call thread_id() once to force increment of thread_id.
+    let tid = thread_id();
+    assert!(tid == 1, "main thread should have thread ID 1");
+}
+
+pub fn is_main_thread() -> bool {
+    thread_id() == 1
+}
+
+pub fn configure_thread_assertions_for_testing() {
+    THREAD_ASSERTS_CFG_FOR_TESTING.store(true)
+}
+
+/// This allows us to notice when we've forked.
+static IS_FORKED_PROC: RelaxedAtomicBool = RelaxedAtomicBool::new(false);
+
+pub fn setup_fork_guards() {
+    IS_FORKED_PROC.store(false);
+    todo!();
+}
+
+pub fn is_forked_child() -> bool {
+    IS_FORKED_PROC.load()
+}
+
+/// Be able to restore the term's foreground process group.
+/// This is set during startup and not modified after.
+static INITIAL_FG_PROCESS_GROUP: AtomicI32 = AtomicI32::new(-1); // HACK, should be pid_t
+const _: () = assert!(mem::size_of::<i32>() >= mem::size_of::<libc::pid_t>());
+
+/// Save the value of tcgetpgrp so we can restore it on exit.
+pub fn save_term_foreground_process_group() {
+    INITIAL_FG_PROCESS_GROUP.store(unsafe { libc::tcgetpgrp(STDIN_FILENO) }, Ordering::Relaxed);
+}
+
+pub fn restore_term_foreground_process_group_for_exit() {
+    // We wish to restore the tty to the initial owner. There's two ways this can go wrong:
+    //  1. We may steal the tty from someone else (#7060).
+    //  2. The call to tcsetpgrp may deliver SIGSTOP to us, and we will not exit.
+    // Hanging on exit seems worse, so ensure that SIGTTOU is ignored so we do not get SIGSTOP.
+    // Note initial_fg_process_group == 0 is possible with Linux pid namespaces.
+    // This is called during shutdown and from a signal handler. We don't bother to complain on
+    // failure because doing so is unlikely to be noticed.
+    let initial_fg_process_group = INITIAL_FG_PROCESS_GROUP.load(Ordering::Relaxed);
+    if initial_fg_process_group > 0 && initial_fg_process_group != unsafe { libc::getpgrp() } {
+        unsafe {
+            libc::signal(SIGTTOU, SIG_IGN);
+            libc::tcsetpgrp(STDIN_FILENO, initial_fg_process_group);
+        }
+    }
+}
+
+/// Determines if we are running under Microsoft's Windows Subsystem for Linux to work around
+/// some known limitations and/or bugs.
+/// See https://github.com/Microsoft/WSL/issues/423 and Microsoft/WSL#2997
+pub fn is_windows_subsystem_for_linux() -> bool {
+    // We are purposely not using std::call_once as it may invoke locking, which is an unnecessary
+    // overhead since there's no actual race condition here - even if multiple threads call this
+    // routine simultaneously the first time around, we just end up needlessly querying uname(2) one
+    // more time.
+    *IS_WINDOWS_SUBSYSTEM_FOR_LINUX
+}
+
+fn slice_contains_slice<T: Eq>(a: &[T], b: &[T]) -> bool {
+    a.windows(b.len()).any(|aw| aw == b)
+}
+
+#[cfg(not(windows))]
+static IS_WINDOWS_SUBSYSTEM_FOR_LINUX: Lazy<bool> = Lazy::new(|| false);
+#[cfg(windows)]
+static IS_WINDOWS_SUBSYSTEM_FOR_LINUX: Lazy<bool> = Lazy::new(|| {
+    let mut info: libc::utsname = unsafe { mem::zeroed() };
+    unsafe {
+        libc::uname(std::ptr::addr_of_mut!(info));
+    }
+
+    // Sample utsname.release under WSL, testing for something like `4.4.0-17763-Microsoft`
+    if !slice_contains_slice(&info.release, b"Microsoft") {
+        return false;
+    }
+    let dash = info.release.iter().position('-');
+
+    if dash
+        .map(|d| unsafe { libc::strtod(std::ptr::addr_of!(info.release[d + 1]), std::ptr::null()) } >= 17763)
+        .unwrap_or(false)
+        {
+            return false;
+        }
+
+    // #5298, #5661: There are acknowledged, published, and (later) fixed issues with
+    // job control under early WSL releases that prevent fish from running correctly,
+    // with unexpected failures when piping. Fish 3.0 nightly builds worked around this
+    // issue with some needlessly complicated code that was later stripped from the
+    // fish 3.0 release, so we just bail. Note that fish 2.0 was also broken, but we
+    // just didn't warn about it.
+
+    // #6038 & 5101bde: It's been requested that there be some sort of way to disable
+    // this check: if the environment variable FISH_NO_WSL_CHECK is present, this test
+    // is bypassed. We intentionally do not include this in the error message because
+    // it'll only allow fish to run but not to actually work. Here be dragons!
+    if env::var("FISH_NO_WSL_CHECK") == Err(env::VarError::NotPresent) {
+        FLOG!(
+            error,
+            "This version of WSL has known bugs that prevent fish from working.\
+                    Please upgrade to Windows 10 1809 (17763) or higher to use fish!"
+        );
+    }
+    true;
+});
+
+/// Return true if the character is in a range reserved for fish's private use.
+///
+/// NOTE: This is used when tokenizing the input. It is also used when reading input, before
+/// tokenization, to replace such chars with REPLACEMENT_WCHAR if they're not part of a quoted
+/// string. We don't want external input to be able to feed reserved characters into our
+/// lexer/parser or code evaluator.
+//
+// TODO: Actually implement the replacement as documented above.
+pub fn fish_reserved_codepoint(c: char) -> bool {
+    (c >= RESERVED_CHAR_BASE && c < RESERVED_CHAR_END)
+        || (c >= ENCODE_DIRECT_BASE && c < ENCODE_DIRECT_END)
+}
+
+pub fn redirect_tty_output() {
+    unsafe {
+        let mut t: libc::termios = mem::zeroed();
+        let s = CString::new("/dev/null").unwrap();
+        let fd = libc::open(s.as_ptr(), O_WRONLY);
+        assert!(fd != -1, "Could not open /dev/null!");
+        for stdfd in [STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO] {
+            if libc::tcgetattr(stdfd, std::ptr::addr_of_mut!(t)) == -1 && errno::errno().0 == EIO {
+                libc::dup2(fd, stdfd);
+            }
+        }
+    }
+}
+
+/// Test if the given char is valid in a variable name.
+pub fn valid_var_name_char(chr: char) -> bool {
+    fish_iswalnum(chr) || chr == '_'
+}
+
+/// Test if the given string is a valid variable name.
+fn valid_var_name(s: &wstr) -> bool {
+    // Note do not use c_str(), we want to fail on embedded nul bytes.
+    !s.is_empty() && s.chars().all(valid_var_name_char)
+}
+
+/// Get the absolute path to the fish executable itself
+fn get_executable_path(argv0: &str) -> PathBuf {
+    std::env::current_exe().unwrap_or_else(|_| PathBuf::from_str(argv0).unwrap())
+}
+
 /// Like [`std::mem::replace()`] but provides a reference to the old value in a callback to obtain
 /// the replacement value. Useful to avoid errors about multiple references (`&mut T` for `old` then
 /// `&T` again in the `new` expression).
@@ -131,6 +1492,8 @@ pub fn replace_with<T, F: FnOnce(&T) -> T>(old: &mut T, with: F) -> T {
     std::mem::replace(old, new)
 }
 
+pub type Cleanup<T, F> = ScopeGuard<T, F>;
+
 /// A RAII cleanup object. Unlike in C++ where there is no borrow checker, we can't just provide a
 /// callback that modifies live objects willy-nilly because then there would be two &mut references
 /// to the same object - the original variables we keep around to use and their captured references
@@ -260,6 +1623,46 @@ pub const fn assert_send<T: Send>() {}
 
 pub const fn assert_sync<T: Sync>() {}
 
+/// This function attempts to distinguish between a console session (at the actual login vty) and a
+/// session within a terminal emulator inside a desktop environment or over SSH. Unfortunately
+/// there are few values of $TERM that we can interpret as being exclusively console sessions, and
+/// most common operating systems do not use them. The value is cached for the duration of the fish
+/// session. We err on the side of assuming it's not a console session. This approach isn't
+/// bullet-proof and that's OK.
+fn is_console_session() -> bool {
+    *CONSOLE_SESSION
+}
+
+static CONSOLE_SESSION: Lazy<bool> = Lazy::new(|| {
+    const path_max: usize = libc::PATH_MAX as _;
+    let mut tty_name: [u8; path_max] = [0; path_max];
+    if unsafe {
+        libc::ttyname_r(
+            STDIN_FILENO,
+            std::ptr::addr_of_mut!(tty_name).cast(),
+            path_max,
+        )
+    } != 0
+    {
+        return false;
+    }
+    // Test that the tty matches /dev/(console|dcons|tty[uv\d])
+    let len = "/dev/tty".len();
+    (
+    (
+        tty_name.starts_with(b"/dev/tty") &&
+            ([b'u', b'v'].contains(&tty_name[len]) || tty_name[len].is_ascii_digit())
+    ) ||
+    tty_name.starts_with(b"/dev/dcons\0") ||
+    tty_name.starts_with(b"/dev/console\0"))
+    // and that $TERM is simple, e.g. `xterm` or `vt100`, not `xterm-something`
+    && match env::var("TERM") {
+        Ok(term) => ["-", "sun-color"].contains(&term.as_str()),
+        Err(env::VarError::NotPresent) => true,
+        Err(_) => false,
+    }
+});
+
 /// Asserts that a slice is alphabetically sorted by a [`&wstr`] `name` field.
 ///
 /// Mainly useful for static asserts/const eval.
@@ -320,11 +1723,15 @@ macro_rules! assert_sorted_by_name {
         assert_sorted_by_name!($slice, name);
     };
 }
+
 mod tests {
-    use crate::{
-        common::{escape_string, EscapeStringStyle},
-        wchar::widestrs,
+    use crate::common::{
+        escape_string, str2wcstring, wcs2string, EscapeStringStyle, ENCODE_DIRECT_BASE,
+        ENCODE_DIRECT_END,
     };
+    use crate::wchar::widestrs;
+    use crate::wutil::encoding::{wcrtomb, zero_mbstate, AT_LEAST_MB_LEN_MAX};
+    use rand::random;
 
     #[widestrs]
     pub fn test_escape_string() {
@@ -333,8 +1740,8 @@ mod tests {
         // plain text should not be needlessly escaped
         assert_eq!(regex("hello world!"L), "hello world!"L);
 
-        // all the following are intended to be ultimately matched literally - even if they don't look
-        // like that's the intent - so we escape them.
+        // all the following are intended to be ultimately matched literally - even if they
+        // don't look like that's the intent - so we escape them.
         assert_eq!(regex(".ext"L), "\\.ext"L);
         assert_eq!(regex("{word}"L), "\\{word\\}"L);
         assert_eq!(regex("hola-mundo"L), "hola\\-mundo"L);
@@ -347,6 +1754,150 @@ mod tests {
             "not really escaped\\\\\\?"L
         );
     }
+
+    /// The number of tests to run.
+    const ESCAPE_TEST_COUNT: usize = 100000;
+    /// The average length of strings to unescape.
+    const ESCAPE_TEST_LENGTH: usize = 100;
+    /// The highest character number of character to try and escape.
+    const ESCAPE_TEST_CHAR: usize = 4000;
+
+    /// Helper to convert a narrow string to a sequence of hex digits.
+    fn str2hex(input: &[u8]) -> String {
+        let mut output = "".to_string();
+        for byte in input {
+            output += &format!("0x{:2X} ", *byte);
+        }
+        output
+    }
+
+    /// Test wide/narrow conversion by creating random strings and verifying that the original
+    /// string comes back through double conversion.
+    pub fn test_convert() {
+        for _ in 0..ESCAPE_TEST_COUNT {
+            let mut origin: Vec<u8> = vec![];
+            while (random::<usize>() % ESCAPE_TEST_LENGTH) != 0 {
+                let byte = random();
+                origin.push(byte);
+            }
+
+            let w = str2wcstring(&origin[..]);
+            let n = wcs2string(&w);
+            assert_eq!(
+                origin,
+                n,
+                "Conversion cycle of string:\n{:4} chars: {}\n\
+                 produced different string:\n\
+                 {:4} chars: {}",
+                origin.len(),
+                &str2hex(&origin),
+                n.len(),
+                &str2hex(&n)
+            );
+        }
+    }
+
+    /// Verify that ASCII narrow->wide conversions are correct.
+    pub fn test_convert_ascii() {
+        let mut s = vec![b'\0'; 4096];
+        for (i, c) in s.iter_mut().enumerate() {
+            *c = u8::try_from(i % 10).unwrap() + b'0';
+        }
+
+        // Test a variety of alignments.
+        for left in 0..16 {
+            for right in 0..16 {
+                let len = s.len() - left - right;
+                let input = &s[left..left + len];
+                let wide = str2wcstring(input);
+                let narrow = wcs2string(&wide);
+                assert_eq!(narrow, input);
+            }
+        }
+
+        // Put some non-ASCII bytes in and ensure it all still works.
+        for i in 0..s.len() {
+            let saved = s[i];
+            s[i] = 0xF7;
+            assert_eq!(wcs2string(&str2wcstring(&s)), s);
+            s[i] = saved;
+        }
+    }
+    /// fish uses the private-use range to encode bytes that could not be decoded using the
+    /// user's locale. If the input could be decoded, but decoded to private-use codepoints,
+    /// then fish should also use the direct encoding for those bytes. Verify that characters
+    /// in the private use area are correctly round-tripped. See #7723.
+    pub fn test_convert_private_use() {
+        for c in ENCODE_DIRECT_BASE..ENCODE_DIRECT_END {
+            // Encode the char via the locale. Do not use fish functions which interpret these
+            // specially.
+            let mut converted = [0_u8; AT_LEAST_MB_LEN_MAX];
+            let mut state = zero_mbstate();
+            let len = unsafe {
+                wcrtomb(
+                    std::ptr::addr_of_mut!(converted[0]).cast(),
+                    c as libc::wchar_t,
+                    std::ptr::addr_of_mut!(state),
+                )
+            };
+            if len == 0_usize.wrapping_sub(1) {
+                // Could not be encoded in this locale.
+                continue;
+            }
+            let s = &converted[..len];
+
+            // Ask fish to decode this via str2wcstring.
+            // str2wcstring should notice that the decoded form collides with its private use
+            // and encode it directly.
+            let ws = str2wcstring(s);
+
+            // Each byte should be encoded directly, and round tripping should work.
+            assert_eq!(ws.len(), s.len());
+            assert_eq!(wcs2string(&ws), s);
+        }
+    }
 }
 
 crate::ffi_tests::add_test!("escape_string", tests::test_escape_string);
+crate::ffi_tests::add_test!("escape_string", tests::test_convert);
+crate::ffi_tests::add_test!("escape_string", tests::test_convert_ascii);
+crate::ffi_tests::add_test!("escape_string", tests::test_convert_private_use);
+
+#[cxx::bridge]
+mod common_ffi {
+    extern "C++" {
+        include!("wutil.h");
+        include!("common.h");
+        type escape_string_style_t = crate::ffi::escape_string_style_t;
+    }
+    extern "Rust" {
+        fn rust_unescape_string(
+            input: *const wchar_t,
+            len: usize,
+            escape_special: u32,
+            style: escape_string_style_t,
+        ) -> UniquePtr<CxxWString>;
+    }
+}
+
+fn rust_unescape_string(
+    input: *const ffi::wchar_t,
+    len: usize,
+    escape_special: u32,
+    style: ffi::escape_string_style_t,
+) -> UniquePtr<CxxWString> {
+    let style = match style {
+        ffi::escape_string_style_t::STRING_STYLE_SCRIPT => {
+            UnescapeStringStyle::Script(UnescapeFlags::from_bits(escape_special).unwrap())
+        }
+        ffi::escape_string_style_t::STRING_STYLE_URL => UnescapeStringStyle::Url,
+        ffi::escape_string_style_t::STRING_STYLE_VAR => UnescapeStringStyle::Var,
+        _ => panic!(),
+    };
+    let input = unsafe { slice::from_raw_parts(input, len) };
+    let input = wstr::from_slice(input).unwrap();
+    match unescape_string(input, style) {
+        Some(result) => result.to_ffi(),
+        None => UniquePtr::null(),
+    }
+}
diff --git a/fish-rust/src/compat.c b/fish-rust/src/compat.c
new file mode 100644
index 000000000..a32885dde
--- /dev/null
+++ b/fish-rust/src/compat.c
@@ -0,0 +1,3 @@
+#include <stdlib.h>
+
+size_t C_MB_CUR_MAX() { return MB_CUR_MAX; }
diff --git a/fish-rust/src/compat.rs b/fish-rust/src/compat.rs
new file mode 100644
index 000000000..32cec77ba
--- /dev/null
+++ b/fish-rust/src/compat.rs
@@ -0,0 +1,8 @@
+#[allow(non_snake_case)]
+pub fn MB_CUR_MAX() -> usize {
+    unsafe { C_MB_CUR_MAX() }
+}
+
+extern "C" {
+    fn C_MB_CUR_MAX() -> usize;
+}
diff --git a/fish-rust/src/env.rs b/fish-rust/src/env.rs
index 38a3b18bf..2b76043b9 100644
--- a/fish-rust/src/env.rs
+++ b/fish-rust/src/env.rs
@@ -38,6 +38,11 @@ pub mod flags {
             c_int(i32::from(val.bits()))
         }
     }
+    impl From<EnvMode> for u16 {
+        fn from(val: EnvMode) -> Self {
+            val.bits()
+        }
+    }
 }
 
 /// Return values for `env_stack_t::set()`.
diff --git a/fish-rust/src/expand.rs b/fish-rust/src/expand.rs
index 1d8e136bf..2546e3468 100644
--- a/fish-rust/src/expand.rs
+++ b/fish-rust/src/expand.rs
@@ -1,39 +1,34 @@
-use crate::wchar::{EXPAND_RESERVED_BASE, EXPAND_RESERVED_END};
+use crate::common::{char_offset, EXPAND_RESERVED_BASE, EXPAND_RESERVED_END};
+use crate::wchar::wstr;
+use widestring_suffix::widestrs;
 
-/// Private use area characters used in expansions
-#[repr(u32)]
-pub enum ExpandChars {
-    /// Character representing a home directory.
-    HomeDirectory = EXPAND_RESERVED_BASE as u32,
-    /// Character representing process expansion for %self.
-    ProcessExpandSelf,
-    /// Character representing variable expansion.
-    VariableExpand,
-    /// Character representing variable expansion into a single element.
-    VariableExpandSingle,
-    /// Character representing the start of a bracket expansion.
-    BraceBegin,
-    /// Character representing the end of a bracket expansion.
-    BraceEnd,
-    /// Character representing separation between two bracket elements.
-    BraceSep,
-    /// Character that takes the place of any whitespace within non-quoted text in braces
-    BraceSpace,
-    /// Separate subtokens in a token with this character.
-    InternalSeparator,
-    /// Character representing an empty variable expansion. Only used transitively while expanding
-    /// variables.
-    VariableExpandEmpty,
-}
+/// Character representing a home directory.
+pub const HOME_DIRECTORY: char = char_offset(EXPAND_RESERVED_BASE, 0);
+/// Character representing process expansion for %self.
+pub const PROCESS_EXPAND_SELF: char = char_offset(EXPAND_RESERVED_BASE, 1);
+/// Character representing variable expansion.
+pub const VARIABLE_EXPAND: char = char_offset(EXPAND_RESERVED_BASE, 2);
+/// Character representing variable expansion into a single element.
+pub const VARIABLE_EXPAND_SINGLE: char = char_offset(EXPAND_RESERVED_BASE, 3);
+/// Character representing the start of a bracket expansion.
+pub const BRACE_BEGIN: char = char_offset(EXPAND_RESERVED_BASE, 4);
+/// Character representing the end of a bracket expansion.
+pub const BRACE_END: char = char_offset(EXPAND_RESERVED_BASE, 5);
+/// Character representing separation between two bracket elements.
+pub const BRACE_SEP: char = char_offset(EXPAND_RESERVED_BASE, 6);
+/// Character that takes the place of any whitespace within non-quoted text in braces
+pub const BRACE_SPACE: char = char_offset(EXPAND_RESERVED_BASE, 7);
+/// Separate subtokens in a token with this character.
+pub const INTERNAL_SEPARATOR: char = char_offset(EXPAND_RESERVED_BASE, 8);
+/// Character representing an empty variable expansion. Only used transitively while expanding
+/// variables.
+pub const VARIABLE_EXPAND_EMPTY: char = char_offset(EXPAND_RESERVED_BASE, 9);
 
 const _: () = assert!(
-    EXPAND_RESERVED_END as u32 > ExpandChars::VariableExpandEmpty as u32,
+    EXPAND_RESERVED_END as u32 > VARIABLE_EXPAND_EMPTY as u32,
     "Characters used in expansions must stay within private use area"
 );
 
-impl From<ExpandChars> for char {
-    fn from(val: ExpandChars) -> Self {
-        // We know this is safe because we limit the the range of this enum
-        unsafe { char::from_u32_unchecked(val as _) }
-    }
-}
+/// The string represented by PROCESS_EXPAND_SELF
+#[widestrs]
+pub const PROCESS_EXPAND_SELF_STR: &wstr = "%self"L;
diff --git a/fish-rust/src/ffi.rs b/fish-rust/src/ffi.rs
index 0c648de05..acdc56169 100644
--- a/fish-rust/src/ffi.rs
+++ b/fish-rust/src/ffi.rs
@@ -53,8 +53,6 @@ include_cpp! {
     generate!("env_var_t")
     generate!("make_pipes_ffi")
 
-    generate!("valid_var_name_char")
-
     generate!("get_flog_file_fd")
     generate!("log_extra_to_flog_file")
 
@@ -100,9 +98,6 @@ include_cpp! {
     generate!("re::regex_t")
     generate!("re::regex_result_ffi")
     generate!("re::try_compile_ffi")
-    generate!("wcs2string")
-    generate!("wcs2zstring")
-    generate!("str2wcstring")
 
     generate!("signal_handle")
     generate!("signal_check_cancel")
diff --git a/fish-rust/src/flog.rs b/fish-rust/src/flog.rs
index cc1d002ed..4c65458fb 100644
--- a/fish-rust/src/flog.rs
+++ b/fish-rust/src/flog.rs
@@ -188,7 +188,15 @@ macro_rules! FLOG {
         }
     };
 }
-pub(crate) use FLOG;
+
+// TODO implement.
+macro_rules! FLOGF {
+    ($category:ident, $($elem:expr),+) => {
+        crate::flog::FLOG!($category, $($elem),*);
+    }
+}
+
+pub(crate) use {FLOG, FLOGF};
 
 /// For each category, if its name matches the wildcard, set its enabled to the given sense.
 fn apply_one_wildcard(wc_esc: &wstr, sense: bool) {
diff --git a/fish-rust/src/lib.rs b/fish-rust/src/lib.rs
index 09c26a2ec..74fd34615 100644
--- a/fish-rust/src/lib.rs
+++ b/fish-rust/src/lib.rs
@@ -12,6 +12,7 @@ mod common;
 mod abbrs;
 mod builtins;
 mod color;
+mod compat;
 mod env;
 mod event;
 mod expand;
@@ -51,6 +52,7 @@ mod wchar_ext;
 mod wchar_ffi;
 mod wcstringutil;
 mod wgetopt;
+mod wildcard;
 mod wutil;
 
 // Don't use `#[cfg(test)]` here to make sure ffi tests are built and tested
diff --git a/fish-rust/src/path.rs b/fish-rust/src/path.rs
index 934df4007..383ba250b 100644
--- a/fish-rust/src/path.rs
+++ b/fish-rust/src/path.rs
@@ -1,5 +1,5 @@
 use crate::{
-    expand::ExpandChars::HomeDirectory,
+    expand::HOME_DIRECTORY,
     wchar::{wstr, WExt, WString, L},
 };
 
@@ -12,7 +12,7 @@ pub fn path_apply_working_directory(path: &wstr, working_directory: &wstr) -> WS
 
     // We're going to make sure that if we want to prepend the wd, that the string has no leading
     // "/".
-    let prepend_wd = path.char_at(0) != '/' && path.char_at(0) != HomeDirectory.into();
+    let prepend_wd = path.char_at(0) != '/' && path.char_at(0) != HOME_DIRECTORY;
 
     if !prepend_wd {
         // No need to prepend the wd, so just return the path we were given.
diff --git a/fish-rust/src/tokenizer.rs b/fish-rust/src/tokenizer.rs
index 56f5ac72d..10d7fb16e 100644
--- a/fish-rust/src/tokenizer.rs
+++ b/fish-rust/src/tokenizer.rs
@@ -1,7 +1,8 @@
 //! A specialized tokenizer for tokenizing the fish language. In the future, the tokenizer should be
 //! extended to support marks, tokenizing multiple strings and disposing of unused string segments.
 
-use crate::ffi::{valid_var_name_char, wcharz_t};
+use crate::common::valid_var_name_char;
+use crate::ffi::wcharz_t;
 use crate::future_feature_flags::{feature_test, FeatureFlag};
 use crate::parse_constants::SOURCE_OFFSET_INVALID;
 use crate::redirection::RedirectionMode;
@@ -1357,7 +1358,7 @@ pub fn variable_assignment_equals_pos(txt: &wstr) -> Option<usize> {
     // TODO bracket indexing
     for (i, c) in txt.chars().enumerate() {
         if !found_potential_variable {
-            if !valid_var_name_char(c as wchar_t) {
+            if !valid_var_name_char(c) {
                 return None;
             }
             found_potential_variable = true;
@@ -1365,7 +1366,7 @@ pub fn variable_assignment_equals_pos(txt: &wstr) -> Option<usize> {
             if c == '=' {
                 return Some(i);
             }
-            if !valid_var_name_char(c as wchar_t) {
+            if !valid_var_name_char(c) {
                 return None;
             }
         }
diff --git a/fish-rust/src/wchar.rs b/fish-rust/src/wchar.rs
index 7f723e4f0..c3d366a8d 100644
--- a/fish-rust/src/wchar.rs
+++ b/fish-rust/src/wchar.rs
@@ -4,6 +4,7 @@
 //!   - wstr: a string slice without a nul terminator. Like `&str` but wide chars.
 //!   - WString: an owning string without a nul terminator. Like `String` but wide chars.
 
+use crate::common::{ENCODE_DIRECT_BASE, ENCODE_DIRECT_END};
 pub use widestring::{Utf32Str as wstr, Utf32String as WString};
 
 /// Pull in our extensions.
@@ -30,43 +31,6 @@ pub(crate) use L;
 /// Note: the resulting string is NOT nul-terminated.
 pub use widestring_suffix::widestrs;
 
-// Use Unicode "non-characters" for internal characters as much as we can. This
-// gives us 32 "characters" for internal use that we can guarantee should not
-// appear in our input stream. See http://www.unicode.org/faq/private_use.html.
-pub const RESERVED_CHAR_BASE: char = '\u{FDD0}';
-pub const RESERVED_CHAR_END: char = '\u{FDF0}';
-// Split the available non-character values into two ranges to ensure there are
-// no conflicts among the places we use these special characters.
-pub const EXPAND_RESERVED_BASE: char = RESERVED_CHAR_BASE;
-pub const EXPAND_RESERVED_END: char = match char::from_u32(EXPAND_RESERVED_BASE as u32 + 16u32) {
-    Some(c) => c,
-    None => panic!("private use codepoint in expansion region should be valid char"),
-};
-pub const WILDCARD_RESERVED_BASE: char = EXPAND_RESERVED_END;
-pub const WILDCARD_RESERVED_END: char = match char::from_u32(WILDCARD_RESERVED_BASE as u32 + 16u32)
-{
-    Some(c) => c,
-    None => panic!("private use codepoint in wildcard region should be valid char"),
-};
-
-// These are in the Unicode private-use range. We really shouldn't use this
-// range but have little choice in the matter given how our lexer/parser works.
-// We can't use non-characters for these two ranges because there are only 66 of
-// them and we need at least 256 + 64.
-//
-// If sizeof(wchar_t)==4 we could avoid using private-use chars; however, that
-// would result in fish having different behavior on machines with 16 versus 32
-// bit wchar_t. It's better that fish behave the same on both types of systems.
-//
-// Note: We don't use the highest 8 bit range (0xF800 - 0xF8FF) because we know
-// of at least one use of a codepoint in that range: the Apple symbol (0xF8FF)
-// on Mac OS X. See http://www.unicode.org/faq/private_use.html.
-pub const ENCODE_DIRECT_BASE: char = '\u{F600}';
-pub const ENCODE_DIRECT_END: char = match char::from_u32(ENCODE_DIRECT_BASE as u32 + 256) {
-    Some(c) => c,
-    None => panic!("private use codepoint in encode direct region should be valid char"),
-};
-
 /// Encode a literal byte in a UTF-32 character. This is required for e.g. the echo builtin, whose
 /// escape sequences can be used to construct raw byte sequences which are then interpreted as e.g.
 /// UTF-8 by the terminal. If we were to interpret each of those bytes as a codepoint and encode it
@@ -78,3 +42,16 @@ pub fn encode_byte_to_char(byte: u8) -> char {
     char::from_u32(u32::from(ENCODE_DIRECT_BASE) + u32::from(byte))
         .expect("private-use codepoint should be valid char")
 }
+
+/// Decode a literal byte from a UTF-32 character.
+pub fn decode_byte_from_char(c: char) -> Option<u8> {
+    if c >= ENCODE_DIRECT_BASE && c < ENCODE_DIRECT_END {
+        Some(
+            (u32::from(c) - u32::from(ENCODE_DIRECT_BASE))
+                .try_into()
+                .unwrap(),
+        )
+    } else {
+        None
+    }
+}
diff --git a/fish-rust/src/wcstringutil.rs b/fish-rust/src/wcstringutil.rs
index 0fa5f820e..384cc7d40 100644
--- a/fish-rust/src/wcstringutil.rs
+++ b/fish-rust/src/wcstringutil.rs
@@ -1,6 +1,66 @@
 //! Helper functions for working with wcstring.
 
-use crate::wchar::{wstr, WString};
+use crate::compat::MB_CUR_MAX;
+use crate::expand::INTERNAL_SEPARATOR;
+use crate::flog::FLOGF;
+use crate::wchar::{decode_byte_from_char, wstr, WString, L};
+use crate::wutil::encoding::{wcrtomb, zero_mbstate, AT_LEAST_MB_LEN_MAX};
+
+/// Implementation of wcs2string that accepts a callback.
+/// This invokes \p func with (const char*, size_t) pairs.
+/// If \p func returns false, it stops; otherwise it continues.
+/// \return false if the callback returned false, otherwise true.
+pub fn wcs2string_callback(input: &wstr, mut func: impl FnMut(&[u8]) -> bool) -> bool {
+    let mut state = zero_mbstate();
+    let mut converted = [0_u8; AT_LEAST_MB_LEN_MAX];
+
+    for mut c in input.chars() {
+        // TODO: this doesn't seem sound.
+        if c == INTERNAL_SEPARATOR {
+            // do nothing
+        } else if let Some(byte) = decode_byte_from_char(c) {
+            converted[0] = byte;
+            if !func(&converted[..1]) {
+                return false;
+            }
+        } else if MB_CUR_MAX() == 1 {
+            // single-byte locale (C/POSIX/ISO-8859)
+            // If `c` contains a wide character we emit a question-mark.
+            if u32::from(c) & !0xFF != 0 {
+                c = '?';
+            }
+
+            converted[0] = c as u8;
+            if !func(&converted[..1]) {
+                return false;
+            }
+        } else {
+            converted = [0; AT_LEAST_MB_LEN_MAX];
+            let len = unsafe {
+                wcrtomb(
+                    std::ptr::addr_of_mut!(converted[0]).cast(),
+                    c as libc::wchar_t,
+                    std::ptr::addr_of_mut!(state),
+                )
+            };
+            if len == 0_usize.wrapping_sub(1) {
+                wcs2string_bad_char(c);
+                state = zero_mbstate();
+            } else if !func(&converted[..len]) {
+                return false;
+            }
+        }
+    }
+    true
+}
+
+fn wcs2string_bad_char(c: char) {
+    FLOGF!(
+        char_encoding,
+        L!("Wide character U+%4X has no narrow representation"),
+        c
+    );
+}
 
 /// Joins strings with a separator.
 pub fn join_strings(strs: &[&wstr], sep: char) -> WString {
diff --git a/fish-rust/src/wildcard.rs b/fish-rust/src/wildcard.rs
new file mode 100644
index 000000000..00b773743
--- /dev/null
+++ b/fish-rust/src/wildcard.rs
@@ -0,0 +1,13 @@
+// Enumeration of all wildcard types.
+
+use crate::common::{char_offset, WILDCARD_RESERVED_BASE};
+
+/// Character representing any character except '/' (slash).
+pub const ANY_CHAR: char = char_offset(WILDCARD_RESERVED_BASE, 0);
+/// Character representing any character string not containing '/' (slash).
+pub const ANY_STRING: char = char_offset(WILDCARD_RESERVED_BASE, 1);
+/// Character representing any character string.
+pub const ANY_STRING_RECURSIVE: char = char_offset(WILDCARD_RESERVED_BASE, 2);
+/// This is a special pseudo-char that is not used other than to mark the
+/// end of the the special characters so we can sanity check the enum range.
+pub const ANY_SENTINEL: char = char_offset(WILDCARD_RESERVED_BASE, 3);
diff --git a/fish-rust/src/wutil/encoding.rs b/fish-rust/src/wutil/encoding.rs
new file mode 100644
index 000000000..a3661661e
--- /dev/null
+++ b/fish-rust/src/wutil/encoding.rs
@@ -0,0 +1,19 @@
+extern "C" {
+    pub fn wcrtomb(s: *mut libc::c_char, wc: libc::wchar_t, ps: *mut mbstate_t) -> usize;
+    pub fn mbrtowc(
+        pwc: *mut libc::wchar_t,
+        s: *const libc::c_char,
+        n: usize,
+        p: *mut mbstate_t,
+    ) -> usize;
+}
+
+// HACK This should be mbstate_t from libc but that's not exposed.  Since it's only written by
+// libc, we define it as opaque type that should be large enough for all implementations.
+pub type mbstate_t = [u64; 16];
+pub fn zero_mbstate() -> mbstate_t {
+    [0; 16]
+}
+
+// HACK This should be the MB_LEN_MAX macro from libc but that's not easy to get.
+pub const AT_LEAST_MB_LEN_MAX: usize = 32;
diff --git a/fish-rust/src/wutil/mod.rs b/fish-rust/src/wutil/mod.rs
index 2da5179ea..358c9add7 100644
--- a/fish-rust/src/wutil/mod.rs
+++ b/fish-rust/src/wutil/mod.rs
@@ -1,3 +1,4 @@
+pub mod encoding;
 pub mod errors;
 pub mod gettext;
 mod normalize_path;
@@ -6,6 +7,7 @@ pub mod wcstod;
 pub mod wcstoi;
 mod wrealpath;
 
+use crate::common::fish_reserved_codepoint;
 pub(crate) use gettext::{wgettext, wgettext_fmt};
 pub use normalize_path::*;
 pub(crate) use printf::sprintf;
@@ -28,3 +30,21 @@ pub fn perror(s: &str) {
     let _ = stderr.write_all(slice);
     let _ = stderr.write_all(b"\n");
 }
+
+const PUA1_START: char = '\u{E000}';
+const PUA1_END: char = '\u{F900}';
+const PUA2_START: char = '\u{F0000}';
+const PUA2_END: char = '\u{FFFFE}';
+const PUA3_START: char = '\u{100000}';
+const PUA3_END: char = '\u{10FFFE}';
+
+/// Return one if the code point is in a Unicode private use area.
+fn fish_is_pua(c: char) -> bool {
+    PUA1_START <= c && c < PUA1_END
+}
+
+/// We need this because there are too many implementations that don't return the proper answer for
+/// some code points. See issue #3050.
+pub fn fish_iswalnum(c: char) -> bool {
+    !fish_reserved_codepoint(c) && !fish_is_pua(c) && c.is_alphanumeric()
+}
diff --git a/fish-rust/src/wutil/wrealpath.rs b/fish-rust/src/wutil/wrealpath.rs
index f4d155d6e..04f86404f 100644
--- a/fish-rust/src/wutil/wrealpath.rs
+++ b/fish-rust/src/wutil/wrealpath.rs
@@ -4,13 +4,8 @@ use std::{
     os::unix::prelude::{OsStrExt, OsStringExt},
 };
 
-use cxx::let_cxx_string;
-
-use crate::{
-    ffi::{str2wcstring, wcs2zstring},
-    wchar::{wstr, WString},
-    wchar_ffi::{WCharFromFFI, WCharToFFI},
-};
+use crate::common::{str2wcstring, wcs2zstring};
+use crate::wchar::{wstr, WString};
 
 /// Wide character realpath. The last path component does not need to be valid. If an error occurs,
 /// `wrealpath()` returns `None`
@@ -19,7 +14,7 @@ pub fn wrealpath(pathname: &wstr) -> Option<WString> {
         return None;
     }
 
-    let mut narrow_path: Vec<u8> = wcs2zstring(&pathname.to_ffi()).from_ffi();
+    let mut narrow_path: Vec<u8> = wcs2zstring(pathname).into();
 
     // Strip trailing slashes. This is treats "/a//" as equivalent to "/a" if /a is a non-directory.
     while narrow_path.len() > 1 && narrow_path[narrow_path.len() - 1] == b'/' {
@@ -68,7 +63,5 @@ pub fn wrealpath(pathname: &wstr) -> Option<WString> {
         }
     };
 
-    let_cxx_string!(s = real_path);
-
-    Some(str2wcstring(&s).from_ffi())
+    Some(str2wcstring(&real_path))
 }
diff --git a/src/ast.cpp b/src/ast.cpp
index bd5d0b23b..0ee6bd1ee 100644
--- a/src/ast.cpp
+++ b/src/ast.cpp
@@ -67,9 +67,8 @@ static parse_keyword_t keyword_for_token(token_type_t tok, const wcstring &token
         if (!needs_expand) {
             result = keyword_with_name(token);
         } else {
-            wcstring storage;
-            if (unescape_string(token, &storage, 0)) {
-                result = keyword_with_name(storage);
+            if (auto unescaped = unescape_string(token, 0)) {
+                result = keyword_with_name(*unescaped);
             }
         }
     }
diff --git a/src/builtins/complete.cpp b/src/builtins/complete.cpp
index 8b781a16d..5d7edd3fd 100644
--- a/src/builtins/complete.cpp
+++ b/src/builtins/complete.cpp
@@ -204,12 +204,11 @@ maybe_t<int> builtin_complete(parser_t &parser, io_streams_t &streams, const wch
             }
             case 'p':
             case 'c': {
-                wcstring tmp;
-                if (unescape_string(w.woptarg, &tmp, UNESCAPE_SPECIAL)) {
+                if (auto tmp = unescape_string(w.woptarg, UNESCAPE_SPECIAL)) {
                     if (opt == 'p')
-                        path.push_back(tmp);
+                        path.push_back(*tmp);
                     else
-                        cmd_to_complete.push_back(tmp);
+                        cmd_to_complete.push_back(*tmp);
                 } else {
                     streams.err.append_format(_(L"%ls: Invalid token '%ls'\n"), cmd, w.woptarg);
                     return STATUS_INVALID_ARGS;
diff --git a/src/builtins/read.cpp b/src/builtins/read.cpp
index ba16d0aa2..11ddcec1c 100644
--- a/src/builtins/read.cpp
+++ b/src/builtins/read.cpp
@@ -531,14 +531,13 @@ maybe_t<int> builtin_read(parser_t &parser, io_streams_t &streams, const wchar_t
 
         if (opts.tokenize) {
             auto tok = new_tokenizer(buff.c_str(), TOK_ACCEPT_UNFINISHED);
-            wcstring out;
             if (opts.array) {
                 // Array mode: assign each token as a separate element of the sole var.
                 wcstring_list_t tokens;
                 while (auto t = tok->next()) {
                     auto text = *tok->text_of(*t);
-                    if (unescape_string(text, &out, UNESCAPE_DEFAULT)) {
-                        tokens.push_back(out);
+                    if (auto out = unescape_string(text, UNESCAPE_DEFAULT)) {
+                        tokens.push_back(*out);
                     } else {
                         tokens.push_back(text);
                     }
@@ -549,8 +548,8 @@ maybe_t<int> builtin_read(parser_t &parser, io_streams_t &streams, const wchar_t
                 std::unique_ptr<tok_t> t;
                 while ((vars_left() - 1 > 0) && (t = tok->next())) {
                     auto text = *tok->text_of(*t);
-                    if (unescape_string(text, &out, UNESCAPE_DEFAULT)) {
-                        parser.set_var_and_fire(*var_ptr++, opts.place, out);
+                    if (auto out = unescape_string(text, UNESCAPE_DEFAULT)) {
+                        parser.set_var_and_fire(*var_ptr++, opts.place, *out);
                     } else {
                         parser.set_var_and_fire(*var_ptr++, opts.place, text);
                     }
diff --git a/src/builtins/string.cpp b/src/builtins/string.cpp
index 424dd2afe..bb993ade3 100644
--- a/src/builtins/string.cpp
+++ b/src/builtins/string.cpp
@@ -737,10 +737,9 @@ static int string_unescape(parser_t &parser, io_streams_t &streams, int argc,
 
     arg_iterator_t aiter(argv, optind, streams);
     while (const wcstring *arg = aiter.nextstr()) {
-        wcstring result;
         wcstring sep = aiter.want_newline() ? L"\n" : L"";
-        if (unescape_string(*arg, &result, flags, opts.escape_style)) {
-            streams.out.append(result + sep);
+        if (auto result = unescape_string(*arg, flags, opts.escape_style)) {
+            streams.out.append(*result + sep);
             nesc++;
         }
     }
diff --git a/src/common.cpp b/src/common.cpp
index a67bd6fa9..1e348f63c 100644
--- a/src/common.cpp
+++ b/src/common.cpp
@@ -33,6 +33,7 @@
 #include <memory>
 
 #include "common.h"
+#include "common.rs.h"
 #include "expand.h"
 #include "fallback.h"  // IWYU pragma: keep
 #include "flog.h"
@@ -119,17 +120,6 @@ long convert_digit(wchar_t d, int base) {
 /// Test whether the char is a valid hex digit as used by the `escape_string_*()` functions.
 static bool is_hex_digit(int c) { return std::strchr("0123456789ABCDEF", c) != nullptr; }
 
-/// This is a specialization of `convert_digit()` that only handles base 16 and only uppercase.
-static long convert_hex_digit(wchar_t d) {
-    if ((d <= L'9') && (d >= L'0')) {
-        return d - L'0';
-    } else if ((d <= L'Z') && (d >= L'A')) {
-        return 10 + d - L'A';
-    }
-
-    return -1;
-}
-
 bool is_windows_subsystem_for_linux() {
 #if defined(WSL)
     return true;
@@ -749,38 +739,6 @@ static void escape_string_url(const wcstring &in, wcstring &out) {
     }
 }
 
-/// Reverse the effects of `escape_string_url()`. By definition the string has consist of just ASCII
-/// chars.
-static bool unescape_string_url(const wchar_t *in, wcstring *out) {
-    std::string result;
-    result.reserve(out->size());
-    for (wchar_t c = *in; c; c = *++in) {
-        if (c > 0x7F) return false;  // invalid character means we can't decode the string
-        if (c == '%') {
-            int c1 = in[1];
-            if (c1 == 0) return false;  // found unexpected end of string
-            if (c1 == '%') {
-                result.push_back('%');
-                in++;
-            } else {
-                int c2 = in[2];
-                if (c2 == 0) return false;  // string ended prematurely
-                long d1 = convert_digit(c1, 16);
-                if (d1 < 0) return false;
-                long d2 = convert_digit(c2, 16);
-                if (d2 < 0) return false;
-                result.push_back(16 * d1 + d2);
-                in += 2;
-            }
-        } else {
-            result.push_back(c);
-        }
-    }
-
-    *out = str2wcstring(result);
-    return true;
-}
-
 /// Escape a string in a fashion suitable for using as a fish var name. Store the result in out_str.
 static void escape_string_var(const wcstring &in, wcstring &out) {
     bool prev_was_hex_encoded = false;
@@ -812,46 +770,6 @@ static void escape_string_var(const wcstring &in, wcstring &out) {
     }
 }
 
-/// Reverse the effects of `escape_string_var()`. By definition the string has consist of just ASCII
-/// chars.
-static bool unescape_string_var(const wchar_t *in, wcstring *out) {
-    std::string result;
-    result.reserve(out->size());
-    bool prev_was_hex_encoded = false;
-    for (wchar_t c = *in; c; c = *++in) {
-        if (c > 0x7F) return false;  // invalid character means we can't decode the string
-        if (c == '_') {
-            int c1 = in[1];
-            if (c1 == 0) {
-                if (prev_was_hex_encoded) break;
-                return false;  // found unexpected escape char at end of string
-            }
-            if (c1 == '_') {
-                result.push_back('_');
-                in++;
-            } else if (is_hex_digit(c1)) {
-                int c2 = in[2];
-                if (c2 == 0) return false;  // string ended prematurely
-                long d1 = convert_hex_digit(c1);
-                if (d1 < 0) return false;
-                long d2 = convert_hex_digit(c2);
-                if (d2 < 0) return false;
-                result.push_back(16 * d1 + d2);
-                in += 2;
-                prev_was_hex_encoded = true;
-            }
-            // No "else" clause because if the first char after an underscore is not another
-            // underscore or a valid hex character then the underscore is there to improve
-            // readability after we've encoded a character not valid in a var name.
-        } else {
-            result.push_back(c);
-        }
-    }
-
-    *out = str2wcstring(result);
-    return true;
-}
-
 wcstring escape_string_for_double_quotes(wcstring in) {
     // We need to escape backslashes, double quotes, and dollars only.
     wcstring result = std::move(in);
@@ -1130,12 +1048,6 @@ wcstring escape_string(const wcstring &in, escape_flags_t flags, escape_string_s
     return result;
 }
 
-/// Helper to return the last character in a string, or none.
-static maybe_t<wchar_t> string_last_char(const wcstring &str) {
-    if (str.empty()) return none();
-    return str.back();
-}
-
 /// Given a null terminated string starting with a backslash, read the escape as if it is unquoted,
 /// appending to result. Return the number of characters consumed, or none on error.
 maybe_t<size_t> read_unquoted_escape(const wchar_t *input, wcstring *result, bool allow_incomplete,
@@ -1329,320 +1241,30 @@ maybe_t<size_t> read_unquoted_escape(const wchar_t *input, wcstring *result, boo
     return in_pos;
 }
 
-/// Returns the unescaped version of input_str into output_str (by reference). Returns true if
-/// successful. If false, the contents of output_str are unchanged.
-static bool unescape_string_internal(const wchar_t *const input, const size_t input_len,
-                                     wcstring *output_str, unescape_flags_t flags) {
-    // Set up result string, which we'll swap with the output on success.
-    wcstring result;
-    result.reserve(input_len);
-
-    const bool unescape_special = static_cast<bool>(flags & UNESCAPE_SPECIAL);
-    const bool allow_incomplete = static_cast<bool>(flags & UNESCAPE_INCOMPLETE);
-    const bool ignore_backslashes = static_cast<bool>(flags & UNESCAPE_NO_BACKSLASHES);
-
-    // The positions of open braces.
-    std::vector<size_t> braces;
-    // The positions of variable expansions or brace ","s.
-    // We only read braces as expanders if there's a variable expansion or "," in them.
-    std::vector<size_t> vars_or_seps;
-    int brace_count = 0;
-
-    bool errored = false;
-    enum {
-        mode_unquoted,
-        mode_single_quotes,
-        mode_double_quotes,
-    } mode = mode_unquoted;
-
-    for (size_t input_position = 0; input_position < input_len && !errored; input_position++) {
-        const wchar_t c = input[input_position];
-        // Here's the character we'll append to result, or none() to suppress it.
-        maybe_t<wchar_t> to_append_or_none = c;
-        if (mode == mode_unquoted) {
-            switch (c) {
-                case L'\\': {
-                    if (!ignore_backslashes) {
-                        // Backslashes (escapes) are complicated and may result in errors, or
-                        // appending INTERNAL_SEPARATORs, so we have to handle them specially.
-                        auto escape_chars = read_unquoted_escape(
-                            input + input_position, &result, allow_incomplete, unescape_special);
-                        if (!escape_chars.has_value()) {
-                            // A none() return indicates an error.
-                            errored = true;
-                        } else {
-                            // Skip over the characters we read, minus one because the outer loop
-                            // will increment it.
-                            assert(*escape_chars > 0);
-                            input_position += *escape_chars - 1;
-                        }
-                        // We've already appended, don't append anything else.
-                        to_append_or_none = none();
-                    }
-                    break;
-                }
-                case L'~': {
-                    if (unescape_special && (input_position == 0)) {
-                        to_append_or_none = HOME_DIRECTORY;
-                    }
-                    break;
-                }
-                case L'%': {
-                    // Note that this only recognizes %self if the string is literally %self.
-                    // %self/foo will NOT match this.
-                    if (unescape_special && input_position == 0 &&
-                        !std::wcscmp(input, PROCESS_EXPAND_SELF_STR)) {
-                        to_append_or_none = PROCESS_EXPAND_SELF;
-                        input_position += PROCESS_EXPAND_SELF_STR_LEN - 1;  // skip over 'self's
-                    }
-                    break;
-                }
-                case L'*': {
-                    if (unescape_special) {
-                        // In general, this is ANY_STRING. But as a hack, if the last appended char
-                        // is ANY_STRING, delete the last char and store ANY_STRING_RECURSIVE to
-                        // reflect the fact that ** is the recursive wildcard.
-                        if (string_last_char(result) == ANY_STRING) {
-                            assert(!result.empty());
-                            result.resize(result.size() - 1);
-                            to_append_or_none = ANY_STRING_RECURSIVE;
-                        } else {
-                            to_append_or_none = ANY_STRING;
-                        }
-                    }
-                    break;
-                }
-                case L'?': {
-                    if (unescape_special && !feature_test(feature_flag_t::qmark_noglob)) {
-                        to_append_or_none = ANY_CHAR;
-                    }
-                    break;
-                }
-                case L'$': {
-                    if (unescape_special) {
-                        bool is_cmdsub =
-                            input_position + 1 < input_len && input[input_position + 1] == L'(';
-                        if (!is_cmdsub) {
-                            to_append_or_none = VARIABLE_EXPAND;
-                            vars_or_seps.push_back(input_position);
-                        }
-                    }
-                    break;
-                }
-                case L'{': {
-                    if (unescape_special) {
-                        brace_count++;
-                        to_append_or_none = BRACE_BEGIN;
-                        // We need to store where the brace *ends up* in the output.
-                        braces.push_back(result.size());
-                    }
-                    break;
-                }
-                case L'}': {
-                    if (unescape_special) {
-                        // HACK: The completion machinery sometimes hands us partial tokens.
-                        // We can't parse them properly, but it shouldn't hurt,
-                        // so we don't assert here.
-                        // See #4954.
-                        // assert(brace_count > 0 && "imbalanced brackets are a tokenizer error, we
-                        // shouldn't be able to get here");
-                        brace_count--;
-                        to_append_or_none = BRACE_END;
-                        if (!braces.empty()) {
-                            // HACK: To reduce accidental use of brace expansion, treat a brace
-                            // with zero or one items as literal input. See #4632. (The hack is
-                            // doing it here and like this.)
-                            if (vars_or_seps.empty() || vars_or_seps.back() < braces.back()) {
-                                result[braces.back()] = L'{';
-                                // We also need to turn all spaces back.
-                                for (size_t i = braces.back() + 1; i < result.size(); i++) {
-                                    if (result[i] == BRACE_SPACE) result[i] = L' ';
-                                }
-                                to_append_or_none = L'}';
-                            }
-
-                            // Remove all seps inside the current brace pair, so if we have a
-                            // surrounding pair we only get seps inside *that*.
-                            if (!vars_or_seps.empty()) {
-                                while (!vars_or_seps.empty() && vars_or_seps.back() > braces.back())
-                                    vars_or_seps.pop_back();
-                            }
-                            braces.pop_back();
-                        }
-                    }
-                    break;
-                }
-                case L',': {
-                    if (unescape_special && brace_count > 0) {
-                        to_append_or_none = BRACE_SEP;
-                        vars_or_seps.push_back(input_position);
-                    }
-                    break;
-                }
-                case L' ': {
-                    if (unescape_special && brace_count > 0) {
-                        to_append_or_none = BRACE_SPACE;
-                    }
-                    break;
-                }
-                case L'\'': {
-                    mode = mode_single_quotes;
-                    to_append_or_none =
-                        unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
-                    break;
-                }
-                case L'\"': {
-                    mode = mode_double_quotes;
-                    to_append_or_none =
-                        unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
-                    break;
-                }
-                default: {
-                    break;
-                }
-            }
-        } else if (mode == mode_single_quotes) {
-            if (c == L'\\') {
-                // A backslash may or may not escape something in single quotes.
-                switch (input[input_position + 1]) {
-                    case '\\':
-                    case L'\'': {
-                        to_append_or_none = input[input_position + 1];
-                        input_position += 1;  // skip over the backslash
-                        break;
-                    }
-                    case L'\0': {
-                        if (!allow_incomplete) {
-                            errored = true;
-                        } else {
-                            // PCA this line had the following cryptic comment: 'We may ever escape
-                            // a NULL character, but still appending a \ in case I am wrong.' Not
-                            // sure what it means or the importance of this.
-                            input_position += 1; /* Skip over the backslash */
-                            to_append_or_none = L'\\';
-                        }
-                        break;
-                    }
-                    default: {
-                        // Literal backslash that doesn't escape anything! Leave things alone; we'll
-                        // append the backslash itself.
-                        break;
-                    }
-                }
-            } else if (c == L'\'') {
-                to_append_or_none =
-                    unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
-                mode = mode_unquoted;
-            }
-        } else if (mode == mode_double_quotes) {
-            switch (c) {
-                case L'"': {
-                    mode = mode_unquoted;
-                    to_append_or_none =
-                        unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
-                    break;
-                }
-                case '\\': {
-                    switch (input[input_position + 1]) {
-                        case L'\0': {
-                            if (!allow_incomplete) {
-                                errored = true;
-                            } else {
-                                to_append_or_none = L'\0';
-                            }
-                            break;
-                        }
-                        case '\\':
-                        case L'$':
-                        case '"': {
-                            to_append_or_none = input[input_position + 1];
-                            input_position += 1; /* Skip over the backslash */
-                            break;
-                        }
-                        case '\n': {
-                            /* Swallow newline */
-                            to_append_or_none = none();
-                            input_position += 1; /* Skip over the backslash */
-                            break;
-                        }
-                        default: {
-                            /* Literal backslash that doesn't escape anything! Leave things alone;
-                             * we'll append the backslash itself */
-                            break;
-                        }
-                    }
-                    break;
-                }
-                case '$': {
-                    if (unescape_special) {
-                        to_append_or_none = VARIABLE_EXPAND_SINGLE;
-                        vars_or_seps.push_back(input_position);
-                    }
-                    break;
-                }
-                default: {
-                    break;
-                }
-            }
-        }
-
-        // Now maybe append the char.
-        if (to_append_or_none.has_value()) {
-            result.push_back(*to_append_or_none);
-        }
-    }
-
-    // Return the string by reference, and then success.
-    if (!errored) {
-        *output_str = std::move(result);
-    }
-    return !errored;
-}
-
 bool unescape_string_in_place(wcstring *str, unescape_flags_t escape_special) {
     assert(str != nullptr);
     wcstring output;
-    bool success = unescape_string_internal(str->c_str(), str->size(), &output, escape_special);
-    if (success) {
-        *str = std::move(output);
+    if (auto unescaped = unescape_string(str->c_str(), str->size(), escape_special)) {
+        *str = *unescaped;
+        return true;
     }
-    return success;
+    return false;
 }
 
-bool unescape_string(const wchar_t *input, size_t len, wcstring *output,
-                     unescape_flags_t escape_special, escape_string_style_t style) {
-    bool success = false;
-    switch (style) {
-        case STRING_STYLE_SCRIPT: {
-            success = unescape_string_internal(input, len, output, escape_special);
-            break;
-        }
-        case STRING_STYLE_URL: {
-            success = unescape_string_url(input, output);
-            break;
-        }
-        case STRING_STYLE_VAR: {
-            success = unescape_string_var(input, output);
-            break;
-        }
-        case STRING_STYLE_REGEX: {
-            // unescaping PCRE2 is not needed/supported, the PCRE2 engine is responsible for that
-            success = false;
-            break;
-        }
-    }
-    if (!success) output->clear();
-    return success;
+std::unique_ptr<wcstring> unescape_string(const wchar_t *input, unescape_flags_t escape_special,
+                                          escape_string_style_t style) {
+    return unescape_string(input, std::wcslen(input), escape_special, style);
 }
 
-bool unescape_string(const wchar_t *input, wcstring *output, unescape_flags_t escape_special,
-                     escape_string_style_t style) {
-    return unescape_string(input, std::wcslen(input), output, escape_special, style);
+std::unique_ptr<wcstring> unescape_string(const wchar_t *input, size_t len,
+                                          unescape_flags_t escape_special,
+                                          escape_string_style_t style) {
+    return rust_unescape_string(input, len, escape_special, style);
 }
 
-bool unescape_string(const wcstring &input, wcstring *output, unescape_flags_t escape_special,
-                     escape_string_style_t style) {
-    return unescape_string(input.c_str(), input.size(), output, escape_special, style);
+std::unique_ptr<wcstring> unescape_string(const wcstring &input, unescape_flags_t escape_special,
+                                          escape_string_style_t style) {
+    return unescape_string(input.c_str(), input.size(), escape_special, style);
 }
 
 wcstring format_size(long long sz) {
diff --git a/src/common.h b/src/common.h
index e329370b7..7ca0394ef 100644
--- a/src/common.h
+++ b/src/common.h
@@ -521,15 +521,15 @@ bool unescape_string_in_place(wcstring *str, unescape_flags_t escape_special);
 
 /// Reverse the effects of calling `escape_string`. Returns the unescaped value by reference. On
 /// failure, the output is set to an empty string.
-bool unescape_string(const wchar_t *input, wcstring *output, unescape_flags_t escape_special,
-                     escape_string_style_t style = STRING_STYLE_SCRIPT);
+std::unique_ptr<wcstring> unescape_string(const wchar_t *input, unescape_flags_t escape_special,
+                                          escape_string_style_t style = STRING_STYLE_SCRIPT);
 
-bool unescape_string(const wchar_t *input, size_t len, wcstring *output,
-                     unescape_flags_t escape_special,
-                     escape_string_style_t style = STRING_STYLE_SCRIPT);
+std::unique_ptr<wcstring> unescape_string(const wchar_t *input, size_t len,
+                                          unescape_flags_t escape_special,
+                                          escape_string_style_t style = STRING_STYLE_SCRIPT);
 
-bool unescape_string(const wcstring &input, wcstring *output, unescape_flags_t escape_special,
-                     escape_string_style_t style = STRING_STYLE_SCRIPT);
+std::unique_ptr<wcstring> unescape_string(const wcstring &input, unescape_flags_t escape_special,
+                                          escape_string_style_t style = STRING_STYLE_SCRIPT);
 
 /// Write the given paragraph of output, redoing linebreaks to fit \p termsize.
 wcstring reformat_for_screen(const wcstring &msg, const termsize_t &termsize);
diff --git a/src/complete.cpp b/src/complete.cpp
index 522879a21..7dd34b4fe 100644
--- a/src/complete.cpp
+++ b/src/complete.cpp
@@ -1469,8 +1469,8 @@ void completer_t::escape_opening_brackets(const wcstring &argument) {
     if (!have_unquoted_unescaped_bracket) return;
     // Since completion_apply_to_command_line will escape the completion, we need to provide an
     // unescaped version.
-    wcstring unescaped_argument;
-    if (!unescape_string(argument, &unescaped_argument, UNESCAPE_INCOMPLETE)) return;
+    auto unescaped_argument = unescape_string(argument, UNESCAPE_INCOMPLETE);
+    if (!unescaped_argument) return;
     for (completion_t &comp : completions.get_list()) {
         if (comp.flags & COMPLETE_REPLACES_TOKEN) continue;
         comp.flags |= COMPLETE_REPLACES_TOKEN;
@@ -1482,7 +1482,7 @@ void completer_t::escape_opening_brackets(const wcstring &argument) {
         if (comp.flags & COMPLETE_DONT_ESCAPE) {
             FLOG(warning, L"unexpected completion flag");
         }
-        comp.completion = unescaped_argument + comp.completion;
+        comp.completion = *unescaped_argument + comp.completion;
     }
 }
 
@@ -1494,9 +1494,8 @@ void completer_t::mark_completions_duplicating_arguments(const wcstring &cmd,
     wcstring_list_t arg_strs;
     for (const auto &arg : args) {
         wcstring argstr = *arg.get_source(cmd);
-        wcstring argstr_unesc;
-        if (unescape_string(argstr, &argstr_unesc, UNESCAPE_DEFAULT)) {
-            arg_strs.push_back(std::move(argstr_unesc));
+        if (auto argstr_unesc = unescape_string(argstr, UNESCAPE_DEFAULT)) {
+            arg_strs.push_back(std::move(*argstr_unesc));
         }
     }
     std::sort(arg_strs.begin(), arg_strs.end());
@@ -1668,11 +1667,14 @@ void completer_t::perform_for_commandline(wcstring cmdline) {
         source_range_t command_range = {cmd_tok.offset - bias, cmd_tok.length};
 
         wcstring exp_command = *cmd_tok.get_source(cmdline);
-        bool unescaped =
-            expand_command_token(ctx, exp_command) &&
-            unescape_string(previous_argument, &arg_data.previous_argument, UNESCAPE_DEFAULT) &&
-            unescape_string(current_argument, &arg_data.current_argument, UNESCAPE_INCOMPLETE);
+        std::unique_ptr<wcstring> prev;
+        std::unique_ptr<wcstring> cur;
+        bool unescaped = expand_command_token(ctx, exp_command) &&
+                         (prev = unescape_string(previous_argument, UNESCAPE_DEFAULT)) &&
+                         (cur = unescape_string(current_argument, UNESCAPE_INCOMPLETE));
         if (unescaped) {
+            arg_data.previous_argument = *prev;
+            arg_data.current_argument = *cur;
             // Have to walk over the command and its entire wrap chain. If any command
             // disables do_file, then they all do.
             walk_wrap_chain(exp_command, *effective_cmdline, command_range, &arg_data);
diff --git a/src/env.cpp b/src/env.cpp
index 8bacb4e01..b5e889856 100644
--- a/src/env.cpp
+++ b/src/env.cpp
@@ -472,11 +472,11 @@ void env_init(const struct config_paths_t *paths, bool do_uvars, bool default_pa
         for (const auto &kv : table) {
             if (string_prefixes_string(prefix, kv.first)) {
                 wcstring escaped_name = kv.first.substr(prefix_len);
-                wcstring name;
-                if (unescape_string(escaped_name, &name, unescape_flags_t{}, STRING_STYLE_VAR)) {
-                    wcstring key = name;
+                if (auto name =
+                        unescape_string(escaped_name, unescape_flags_t{}, STRING_STYLE_VAR)) {
+                    wcstring key = *name;
                     wcstring replacement = join_strings(kv.second.as_list(), L' ');
-                    abbrs->add(std::move(name), std::move(key), std::move(replacement),
+                    abbrs->add(std::move(*name), std::move(key), std::move(replacement),
                                abbrs_position_t::command, from_universal);
                 }
             }
diff --git a/src/env_universal_common.cpp b/src/env_universal_common.cpp
index 0159f9efd..fc5cd1e0d 100644
--- a/src/env_universal_common.cpp
+++ b/src/env_universal_common.cpp
@@ -800,9 +800,11 @@ bool env_universal_t::populate_1_variable(const wchar_t *input, env_var_t::env_v
 
     // Parse out the value into storage, and decode it into a variable.
     storage->clear();
-    if (!unescape_string(colon + 1, storage, 0)) {
+    auto unescaped = unescape_string(colon + 1, 0);
+    if (!unescaped) {
         return false;
     }
+    *storage = *unescaped;
     env_var_t var{decode_serialized(*storage), flags};
 
     // Parse out the key and write into the map.
diff --git a/src/expand.cpp b/src/expand.cpp
index 7ffa34acd..74e0bb650 100644
--- a/src/expand.cpp
+++ b/src/expand.cpp
@@ -971,7 +971,8 @@ expand_result_t expander_t::stage_variables(wcstring input, completion_receiver_
     // We accept incomplete strings here, since complete uses expand_string to expand incomplete
     // strings from the commandline.
     wcstring next;
-    unescape_string(input, &next, UNESCAPE_SPECIAL | UNESCAPE_INCOMPLETE);
+    if (auto unescaped = unescape_string(input, UNESCAPE_SPECIAL | UNESCAPE_INCOMPLETE))
+        next = *unescaped;
 
     if (flags & expand_flag::skip_variables) {
         for (auto &i : next) {
diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp
index 864ecce68..c03af9a59 100644
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@@ -376,27 +376,26 @@ static void test_unescape_sane() {
         {L"\"abcd\\n\"", L"abcd\\n"}, {L"\\143", L"c"},
         {L"'\\143'", L"\\143"},       {L"\\n", L"\n"}  // \n normally becomes newline
     };
-    wcstring output;
     for (const auto &test : tests) {
-        bool ret = unescape_string(test.input, &output, UNESCAPE_DEFAULT);
-        if (!ret) {
+        auto output = unescape_string(test.input, UNESCAPE_DEFAULT);
+        if (!output) {
             err(L"Failed to unescape '%ls'\n", test.input);
-        } else if (output != test.expected) {
+        } else if (*output != test.expected) {
             err(L"In unescaping '%ls', expected '%ls' but got '%ls'\n", test.input, test.expected,
-                output.c_str());
+                output->c_str());
         }
     }
 
     // Test for overflow.
-    if (unescape_string(L"echo \\UFFFFFF", &output, UNESCAPE_DEFAULT)) {
+    if (unescape_string(L"echo \\UFFFFFF", UNESCAPE_DEFAULT)) {
         err(L"Should not have been able to unescape \\UFFFFFF\n");
     }
-    if (unescape_string(L"echo \\U110000", &output, UNESCAPE_DEFAULT)) {
+    if (unescape_string(L"echo \\U110000", UNESCAPE_DEFAULT)) {
         err(L"Should not have been able to unescape \\U110000\n");
     }
 #if WCHAR_MAX != 0xffff
     // TODO: Make this work on MS Windows.
-    if (!unescape_string(L"echo \\U10FFFF", &output, UNESCAPE_DEFAULT)) {
+    if (!unescape_string(L"echo \\U10FFFF", UNESCAPE_DEFAULT)) {
         err(L"Should have been able to unescape \\U10FFFF\n");
     }
 #endif
@@ -408,8 +407,6 @@ static void test_escape_crazy() {
     say(L"Testing escaping and unescaping");
     wcstring random_string;
     wcstring escaped_string;
-    wcstring unescaped_string;
-    bool unescaped_success;
     for (size_t i = 0; i < ESCAPE_TEST_COUNT; i++) {
         random_string.clear();
         while (random() % ESCAPE_TEST_LENGTH) {
@@ -417,14 +414,14 @@ static void test_escape_crazy() {
         }
 
         escaped_string = escape_string(random_string);
-        unescaped_success = unescape_string(escaped_string, &unescaped_string, UNESCAPE_DEFAULT);
+        auto unescaped_string = unescape_string(escaped_string, UNESCAPE_DEFAULT);
 
-        if (!unescaped_success) {
+        if (!unescaped_string) {
             err(L"Failed to unescape string <%ls>", escaped_string.c_str());
             break;
-        } else if (unescaped_string != random_string) {
+        } else if (*unescaped_string != random_string) {
             err(L"Escaped and then unescaped string '%ls', but got back a different string '%ls'",
-                random_string.c_str(), unescaped_string.c_str());
+                random_string.c_str(), unescaped_string->c_str());
             break;
         }
     }
@@ -432,12 +429,12 @@ static void test_escape_crazy() {
     // Verify that ESCAPE_NO_PRINTABLES also escapes backslashes so we don't regress on issue #3892.
     random_string = L"line 1\\n\nline 2";
     escaped_string = escape_string(random_string, ESCAPE_NO_PRINTABLES | ESCAPE_NO_QUOTED);
-    unescaped_success = unescape_string(escaped_string, &unescaped_string, UNESCAPE_DEFAULT);
-    if (!unescaped_success) {
+    auto unescaped_string = unescape_string(escaped_string, UNESCAPE_DEFAULT);
+    if (!unescaped_string) {
         err(L"Failed to unescape string <%ls>", escaped_string.c_str());
-    } else if (unescaped_string != random_string) {
+    } else if (*unescaped_string != random_string) {
         err(L"Escaped and then unescaped string '%ls', but got back a different string '%ls'",
-            random_string.c_str(), unescaped_string.c_str());
+            random_string.c_str(), unescaped_string->c_str());
     }
 }
 
diff --git a/src/parse_util.cpp b/src/parse_util.cpp
index 404819742..c8bde9860 100644
--- a/src/parse_util.cpp
+++ b/src/parse_util.cpp
@@ -960,8 +960,8 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argumen
     parser_test_error_bits_t err = 0;
 
     auto check_subtoken = [&arg_src, &out_errors, source_start](size_t begin, size_t end) -> int {
-        wcstring unesc;
-        if (!unescape_string(arg_src.c_str() + begin, end - begin, &unesc, UNESCAPE_SPECIAL)) {
+        auto maybe_unesc = unescape_string(arg_src.c_str() + begin, end - begin, UNESCAPE_SPECIAL);
+        if (!maybe_unesc) {
             if (out_errors) {
                 const wchar_t *fmt = L"Invalid token '%ls'";
                 if (arg_src.length() == 2 && arg_src[0] == L'\\' &&
@@ -975,6 +975,7 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argumen
             }
             return 1;
         }
+        const wcstring &unesc = *maybe_unesc;
 
         parser_test_error_bits_t err = 0;
         // Check for invalid variable expansions.
diff --git a/src/wildcard.cpp b/src/wildcard.cpp
index 9dc9c55c5..6f6258379 100644
--- a/src/wildcard.cpp
+++ b/src/wildcard.cpp
@@ -60,7 +60,9 @@ bool wildcard_has(const wchar_t *str, size_t len) {
         return false;
     }
     wcstring unescaped;
-    unescape_string(str, len, &unescaped, UNESCAPE_SPECIAL);
+    if (auto tmp = unescape_string(wcstring{str, len}, UNESCAPE_SPECIAL)) {
+        unescaped = *tmp;
+    }
     return wildcard_has_internal(unescaped);
 }
 
diff --git a/tests/checks/basic.fish b/tests/checks/basic.fish
index 60a4e18a2..3d94ad038 100644
--- a/tests/checks/basic.fish
+++ b/tests/checks/basic.fish
@@ -158,6 +158,9 @@ echo -e 'abc\x211def'
 #CHECK: abc!def
 #CHECK: abc!1def
 
+echo \UDE01
+#CHECK: �
+
 # Comments allowed in between lines (#1987)
 echo before comment \
   # comment