Added constants for expansions

2025-01-15 06:24:01 +00:00 · 2023-03-10 21:47:41 -05:00 · 2023-03-10 21:47:41 -05:00 · 88e0c2137a
commit 88e0c2137a
parent 80c8bc75e6
4 changed files with 71 additions and 5 deletions
--- a/fish-rust/src/expand.rs
+++ b/fish-rust/src/expand.rs
@ -0,0 +1,39 @@
 use crate::wchar::{EXPAND_RESERVED_BASE, EXPAND_RESERVED_END};
 /// Private use area characters used in expansions
 #[repr(u32)]
 pub enum ExpandChars {
    /// Character representing a home directory.
    HomeDirectory = EXPAND_RESERVED_BASE as u32,
    /// Character representing process expansion for %self.
    ProcessExpandSelf,
    /// Character representing variable expansion.
    VariableExpand,
    /// Character representing variable expansion into a single element.
    VariableExpandSingle,
    /// Character representing the start of a bracket expansion.
    BraceBegin,
    /// Character representing the end of a bracket expansion.
    BraceEnd,
    /// Character representing separation between two bracket elements.
    BraceSep,
    /// Character that takes the place of any whitespace within non-quoted text in braces
    BraceSpace,
    /// Separate subtokens in a token with this character.
    InternalSeparator,
    /// Character representing an empty variable expansion. Only used transitively while expanding
    /// variables.
    VariableExpandEmpty,
 }
 const _: () = assert!(
    EXPAND_RESERVED_END as u32 > ExpandChars::VariableExpandEmpty as u32,
    "Characters used in expansions must stay within private use area"
 );
 impl From<ExpandChars> for char {
    fn from(val: ExpandChars) -> Self {
        // We know this is safe because we limit the the range of this enum
        unsafe { char::from_u32_unchecked(val as _) }
    }
 }
--- a/fish-rust/src/lib.rs
+++ b/fish-rust/src/lib.rs
@ -46,6 +46,7 @@ mod builtins;
 mod env;
 mod re;
 mod expand;
 mod path;
 // Don't use `#[cfg(test)]` here to make sure ffi tests are built and tested
--- a/fish-rust/src/path.rs
+++ b/fish-rust/src/path.rs
@ -1,4 +1,7 @@
-use crate::wchar::{wstr, WExt, WString, L};
+use crate::{
    expand::ExpandChars::HomeDirectory,
    wchar::{wstr, WExt, WString, L},
 };
 /// If the given path looks like it's relative to the working directory, then prepend that working
 /// directory. This operates on unescaped paths only (so a ~ means a literal ~).
@ -9,7 +12,8 @@ pub fn path_apply_working_directory(path: &wstr, working_directory: &wstr) -> WS
    // We're going to make sure that if we want to prepend the wd, that the string has no leading
    // "/".
-    let prepend_wd = path.as_char_slice()[0] != '/' && path.as_char_slice()[0] != '\u{FDD0}';
+    let prepend_wd =
        path.as_char_slice()[0] != '/' && path.as_char_slice()[0] != HomeDirectory.into();
    if !prepend_wd {
        // No need to prepend the wd, so just return the path we were given.
--- a/fish-rust/src/wchar.rs
+++ b/fish-rust/src/wchar.rs
@ -30,6 +30,25 @@ pub use widestring_suffix::widestrs;
 /// Pull in our extensions.
 pub use crate::wchar_ext::{CharPrefixSuffix, WExt};
 // Use Unicode "non-characters" for internal characters as much as we can. This
 // gives us 32 "characters" for internal use that we can guarantee should not
 // appear in our input stream. See http://www.unicode.org/faq/private_use.html.
 pub const RESERVED_CHAR_BASE: char = '\u{FDD0}';
 pub const RESERVED_CHAR_END: char = '\u{FDF0}';
 // Split the available non-character values into two ranges to ensure there are
 // no conflicts among the places we use these special characters.
 pub const EXPAND_RESERVED_BASE: char = RESERVED_CHAR_BASE;
 pub const EXPAND_RESERVED_END: char = match char::from_u32(EXPAND_RESERVED_BASE as u32 + 16u32) {
    Some(c) => c,
    None => panic!("private use codepoint in expansion region should be valid char"),
 };
 pub const WILDCARD_RESERVED_BASE: char = EXPAND_RESERVED_END;
 pub const WILDCARD_RESERVED_END: char = match char::from_u32(WILDCARD_RESERVED_BASE as u32 + 16u32)
 {
    Some(c) => c,
    None => panic!("private use codepoint in wildcard region should be valid char"),
 };
 // These are in the Unicode private-use range. We really shouldn't use this
 // range but have little choice in the matter given how our lexer/parser works.
 // We can't use non-characters for these two ranges because there are only 66 of
@ -42,8 +61,11 @@ pub use crate::wchar_ext::{CharPrefixSuffix, WExt};
 // Note: We don't use the highest 8 bit range (0xF800 - 0xF8FF) because we know
 // of at least one use of a codepoint in that range: the Apple symbol (0xF8FF)
 // on Mac OS X. See http://www.unicode.org/faq/private_use.html.
-const ENCODE_DIRECT_BASE: u32 = 0xF600;
+const ENCODE_DIRECT_BASE: char = '\u{F600}';
-const ENCODE_DIRECT_END: u32 = ENCODE_DIRECT_BASE + 256;
+const ENCODE_DIRECT_END: char = match char::from_u32(ENCODE_DIRECT_BASE as u32 + 256) {
    Some(c) => c,
    None => panic!("private use codepoint in encode direct region should be valid char"),
 };
 /// Encode a literal byte in a UTF-32 character. This is required for e.g. the echo builtin, whose
 /// escape sequences can be used to construct raw byte sequences which are then interpreted as e.g.
@ -53,6 +75,6 @@ const ENCODE_DIRECT_END: u32 = ENCODE_DIRECT_BASE + 256;
 ///
 /// See https://github.com/fish-shell/fish-shell/issues/1894.
 pub fn wchar_literal_byte(byte: u8) -> char {
-    char::from_u32(ENCODE_DIRECT_BASE + u32::from(byte))
+    char::from_u32(u32::from(ENCODE_DIRECT_BASE) + u32::from(byte))
        .expect("private-use codepoint should be valid char")
 }