Added constants for expansions

This commit is contained in:
Victor Song 2023-03-10 21:47:41 -05:00 committed by ridiculousfish
parent 80c8bc75e6
commit 88e0c2137a
4 changed files with 71 additions and 5 deletions

39
fish-rust/src/expand.rs Normal file
View file

@ -0,0 +1,39 @@
use crate::wchar::{EXPAND_RESERVED_BASE, EXPAND_RESERVED_END};
/// Private use area characters used in expansions
#[repr(u32)]
pub enum ExpandChars {
/// Character representing a home directory.
HomeDirectory = EXPAND_RESERVED_BASE as u32,
/// Character representing process expansion for %self.
ProcessExpandSelf,
/// Character representing variable expansion.
VariableExpand,
/// Character representing variable expansion into a single element.
VariableExpandSingle,
/// Character representing the start of a bracket expansion.
BraceBegin,
/// Character representing the end of a bracket expansion.
BraceEnd,
/// Character representing separation between two bracket elements.
BraceSep,
/// Character that takes the place of any whitespace within non-quoted text in braces
BraceSpace,
/// Separate subtokens in a token with this character.
InternalSeparator,
/// Character representing an empty variable expansion. Only used transitively while expanding
/// variables.
VariableExpandEmpty,
}
const _: () = assert!(
EXPAND_RESERVED_END as u32 > ExpandChars::VariableExpandEmpty as u32,
"Characters used in expansions must stay within private use area"
);
impl From<ExpandChars> for char {
fn from(val: ExpandChars) -> Self {
// We know this is safe because we limit the the range of this enum
unsafe { char::from_u32_unchecked(val as _) }
}
}

View file

@ -46,6 +46,7 @@ mod builtins;
mod env; mod env;
mod re; mod re;
mod expand;
mod path; mod path;
// Don't use `#[cfg(test)]` here to make sure ffi tests are built and tested // Don't use `#[cfg(test)]` here to make sure ffi tests are built and tested

View file

@ -1,4 +1,7 @@
use crate::wchar::{wstr, WExt, WString, L}; use crate::{
expand::ExpandChars::HomeDirectory,
wchar::{wstr, WExt, WString, L},
};
/// If the given path looks like it's relative to the working directory, then prepend that working /// If the given path looks like it's relative to the working directory, then prepend that working
/// directory. This operates on unescaped paths only (so a ~ means a literal ~). /// directory. This operates on unescaped paths only (so a ~ means a literal ~).
@ -9,7 +12,8 @@ pub fn path_apply_working_directory(path: &wstr, working_directory: &wstr) -> WS
// We're going to make sure that if we want to prepend the wd, that the string has no leading // We're going to make sure that if we want to prepend the wd, that the string has no leading
// "/". // "/".
let prepend_wd = path.as_char_slice()[0] != '/' && path.as_char_slice()[0] != '\u{FDD0}'; let prepend_wd =
path.as_char_slice()[0] != '/' && path.as_char_slice()[0] != HomeDirectory.into();
if !prepend_wd { if !prepend_wd {
// No need to prepend the wd, so just return the path we were given. // No need to prepend the wd, so just return the path we were given.

View file

@ -30,6 +30,25 @@ pub use widestring_suffix::widestrs;
/// Pull in our extensions. /// Pull in our extensions.
pub use crate::wchar_ext::{CharPrefixSuffix, WExt}; pub use crate::wchar_ext::{CharPrefixSuffix, WExt};
// Use Unicode "non-characters" for internal characters as much as we can. This
// gives us 32 "characters" for internal use that we can guarantee should not
// appear in our input stream. See http://www.unicode.org/faq/private_use.html.
pub const RESERVED_CHAR_BASE: char = '\u{FDD0}';
pub const RESERVED_CHAR_END: char = '\u{FDF0}';
// Split the available non-character values into two ranges to ensure there are
// no conflicts among the places we use these special characters.
pub const EXPAND_RESERVED_BASE: char = RESERVED_CHAR_BASE;
pub const EXPAND_RESERVED_END: char = match char::from_u32(EXPAND_RESERVED_BASE as u32 + 16u32) {
Some(c) => c,
None => panic!("private use codepoint in expansion region should be valid char"),
};
pub const WILDCARD_RESERVED_BASE: char = EXPAND_RESERVED_END;
pub const WILDCARD_RESERVED_END: char = match char::from_u32(WILDCARD_RESERVED_BASE as u32 + 16u32)
{
Some(c) => c,
None => panic!("private use codepoint in wildcard region should be valid char"),
};
// These are in the Unicode private-use range. We really shouldn't use this // These are in the Unicode private-use range. We really shouldn't use this
// range but have little choice in the matter given how our lexer/parser works. // range but have little choice in the matter given how our lexer/parser works.
// We can't use non-characters for these two ranges because there are only 66 of // We can't use non-characters for these two ranges because there are only 66 of
@ -42,8 +61,11 @@ pub use crate::wchar_ext::{CharPrefixSuffix, WExt};
// Note: We don't use the highest 8 bit range (0xF800 - 0xF8FF) because we know // Note: We don't use the highest 8 bit range (0xF800 - 0xF8FF) because we know
// of at least one use of a codepoint in that range: the Apple symbol (0xF8FF) // of at least one use of a codepoint in that range: the Apple symbol (0xF8FF)
// on Mac OS X. See http://www.unicode.org/faq/private_use.html. // on Mac OS X. See http://www.unicode.org/faq/private_use.html.
const ENCODE_DIRECT_BASE: u32 = 0xF600; const ENCODE_DIRECT_BASE: char = '\u{F600}';
const ENCODE_DIRECT_END: u32 = ENCODE_DIRECT_BASE + 256; const ENCODE_DIRECT_END: char = match char::from_u32(ENCODE_DIRECT_BASE as u32 + 256) {
Some(c) => c,
None => panic!("private use codepoint in encode direct region should be valid char"),
};
/// Encode a literal byte in a UTF-32 character. This is required for e.g. the echo builtin, whose /// Encode a literal byte in a UTF-32 character. This is required for e.g. the echo builtin, whose
/// escape sequences can be used to construct raw byte sequences which are then interpreted as e.g. /// escape sequences can be used to construct raw byte sequences which are then interpreted as e.g.
@ -53,6 +75,6 @@ const ENCODE_DIRECT_END: u32 = ENCODE_DIRECT_BASE + 256;
/// ///
/// See https://github.com/fish-shell/fish-shell/issues/1894. /// See https://github.com/fish-shell/fish-shell/issues/1894.
pub fn wchar_literal_byte(byte: u8) -> char { pub fn wchar_literal_byte(byte: u8) -> char {
char::from_u32(ENCODE_DIRECT_BASE + u32::from(byte)) char::from_u32(u32::from(ENCODE_DIRECT_BASE) + u32::from(byte))
.expect("private-use codepoint should be valid char") .expect("private-use codepoint should be valid char")
} }