mirror of
https://github.com/fish-shell/fish-shell
synced 2025-01-13 13:39:02 +00:00
Port common.{h,cpp} to Rust
Most of it is duplicated, hence untested. Functions like mbrtowc are not exposed by the libc crate, so declare them ourselves. Since we don't know the definition of C macros, add two big hacks to make this work: 1. Replace MB_LEN_MAX and mbstate_t with values (resp types) that should be large enough for any implementation. 2. Detect the definition of MB_CUR_MAX in the build script. This requires more changes for each new libc. We could also use this approach for 1. Additionally, this commit brings a small behavior change to read_unquoted_escape(): we cannot decode surrogate code points like \UDE01 into a Rust char, so use � (\UFFFD, replacement character) instead. Previously, we added such code points to a wcstring; looks like they were ignored when printed.
This commit is contained in:
parent
998cb7f1cd
commit
05bad5eda1
33 changed files with 1837 additions and 556 deletions
1
fish-rust/Cargo.lock
generated
1
fish-rust/Cargo.lock
generated
|
@ -368,6 +368,7 @@ dependencies = [
|
|||
"autocxx",
|
||||
"autocxx-build",
|
||||
"bitflags",
|
||||
"cc",
|
||||
"cxx",
|
||||
"cxx-build",
|
||||
"cxx-gen",
|
||||
|
|
|
@ -26,6 +26,7 @@ widestring = "1.0.2"
|
|||
|
||||
[build-dependencies]
|
||||
autocxx-build = "0.23.1"
|
||||
cc = { git = "https://github.com/mqudsi/cc-rs", branch = "fish" }
|
||||
cxx-build = { git = "https://github.com/fish-shell/cxx", branch = "fish" }
|
||||
cxx-gen = { git = "https://github.com/fish-shell/cxx", branch = "fish" }
|
||||
miette = { version = "5", features = ["fancy"] }
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
use miette::miette;
|
||||
|
||||
fn main() -> miette::Result<()> {
|
||||
cc::Build::new().file("src/compat.c").compile("libcompat.a");
|
||||
|
||||
let rust_dir = std::env::var("CARGO_MANIFEST_DIR").expect("Env var CARGO_MANIFEST_DIR missing");
|
||||
let target_dir =
|
||||
std::env::var("FISH_RUST_TARGET_DIR").unwrap_or(format!("{}/{}", rust_dir, "target/"));
|
||||
|
@ -25,6 +27,7 @@ fn main() -> miette::Result<()> {
|
|||
let source_files = vec![
|
||||
"src/abbrs.rs",
|
||||
"src/event.rs",
|
||||
"src/common.rs",
|
||||
"src/fd_monitor.rs",
|
||||
"src/fd_readable_set.rs",
|
||||
"src/fds.rs",
|
||||
|
|
File diff suppressed because it is too large
Load diff
3
fish-rust/src/compat.c
Normal file
3
fish-rust/src/compat.c
Normal file
|
@ -0,0 +1,3 @@
|
|||
#include <stdlib.h>
|
||||
|
||||
size_t C_MB_CUR_MAX() { return MB_CUR_MAX; }
|
8
fish-rust/src/compat.rs
Normal file
8
fish-rust/src/compat.rs
Normal file
|
@ -0,0 +1,8 @@
|
|||
#[allow(non_snake_case)]
|
||||
pub fn MB_CUR_MAX() -> usize {
|
||||
unsafe { C_MB_CUR_MAX() }
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
fn C_MB_CUR_MAX() -> usize;
|
||||
}
|
|
@ -38,6 +38,11 @@ pub mod flags {
|
|||
c_int(i32::from(val.bits()))
|
||||
}
|
||||
}
|
||||
impl From<EnvMode> for u16 {
|
||||
fn from(val: EnvMode) -> Self {
|
||||
val.bits()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return values for `env_stack_t::set()`.
|
||||
|
|
|
@ -1,39 +1,34 @@
|
|||
use crate::wchar::{EXPAND_RESERVED_BASE, EXPAND_RESERVED_END};
|
||||
use crate::common::{char_offset, EXPAND_RESERVED_BASE, EXPAND_RESERVED_END};
|
||||
use crate::wchar::wstr;
|
||||
use widestring_suffix::widestrs;
|
||||
|
||||
/// Private use area characters used in expansions
|
||||
#[repr(u32)]
|
||||
pub enum ExpandChars {
|
||||
/// Character representing a home directory.
|
||||
HomeDirectory = EXPAND_RESERVED_BASE as u32,
|
||||
/// Character representing process expansion for %self.
|
||||
ProcessExpandSelf,
|
||||
/// Character representing variable expansion.
|
||||
VariableExpand,
|
||||
/// Character representing variable expansion into a single element.
|
||||
VariableExpandSingle,
|
||||
/// Character representing the start of a bracket expansion.
|
||||
BraceBegin,
|
||||
/// Character representing the end of a bracket expansion.
|
||||
BraceEnd,
|
||||
/// Character representing separation between two bracket elements.
|
||||
BraceSep,
|
||||
/// Character that takes the place of any whitespace within non-quoted text in braces
|
||||
BraceSpace,
|
||||
/// Separate subtokens in a token with this character.
|
||||
InternalSeparator,
|
||||
/// Character representing an empty variable expansion. Only used transitively while expanding
|
||||
/// variables.
|
||||
VariableExpandEmpty,
|
||||
}
|
||||
/// Character representing a home directory.
|
||||
pub const HOME_DIRECTORY: char = char_offset(EXPAND_RESERVED_BASE, 0);
|
||||
/// Character representing process expansion for %self.
|
||||
pub const PROCESS_EXPAND_SELF: char = char_offset(EXPAND_RESERVED_BASE, 1);
|
||||
/// Character representing variable expansion.
|
||||
pub const VARIABLE_EXPAND: char = char_offset(EXPAND_RESERVED_BASE, 2);
|
||||
/// Character representing variable expansion into a single element.
|
||||
pub const VARIABLE_EXPAND_SINGLE: char = char_offset(EXPAND_RESERVED_BASE, 3);
|
||||
/// Character representing the start of a bracket expansion.
|
||||
pub const BRACE_BEGIN: char = char_offset(EXPAND_RESERVED_BASE, 4);
|
||||
/// Character representing the end of a bracket expansion.
|
||||
pub const BRACE_END: char = char_offset(EXPAND_RESERVED_BASE, 5);
|
||||
/// Character representing separation between two bracket elements.
|
||||
pub const BRACE_SEP: char = char_offset(EXPAND_RESERVED_BASE, 6);
|
||||
/// Character that takes the place of any whitespace within non-quoted text in braces
|
||||
pub const BRACE_SPACE: char = char_offset(EXPAND_RESERVED_BASE, 7);
|
||||
/// Separate subtokens in a token with this character.
|
||||
pub const INTERNAL_SEPARATOR: char = char_offset(EXPAND_RESERVED_BASE, 8);
|
||||
/// Character representing an empty variable expansion. Only used transitively while expanding
|
||||
/// variables.
|
||||
pub const VARIABLE_EXPAND_EMPTY: char = char_offset(EXPAND_RESERVED_BASE, 9);
|
||||
|
||||
const _: () = assert!(
|
||||
EXPAND_RESERVED_END as u32 > ExpandChars::VariableExpandEmpty as u32,
|
||||
EXPAND_RESERVED_END as u32 > VARIABLE_EXPAND_EMPTY as u32,
|
||||
"Characters used in expansions must stay within private use area"
|
||||
);
|
||||
|
||||
impl From<ExpandChars> for char {
|
||||
fn from(val: ExpandChars) -> Self {
|
||||
// We know this is safe because we limit the the range of this enum
|
||||
unsafe { char::from_u32_unchecked(val as _) }
|
||||
}
|
||||
}
|
||||
/// The string represented by PROCESS_EXPAND_SELF
|
||||
#[widestrs]
|
||||
pub const PROCESS_EXPAND_SELF_STR: &wstr = "%self"L;
|
||||
|
|
|
@ -53,8 +53,6 @@ include_cpp! {
|
|||
generate!("env_var_t")
|
||||
generate!("make_pipes_ffi")
|
||||
|
||||
generate!("valid_var_name_char")
|
||||
|
||||
generate!("get_flog_file_fd")
|
||||
generate!("log_extra_to_flog_file")
|
||||
|
||||
|
@ -100,9 +98,6 @@ include_cpp! {
|
|||
generate!("re::regex_t")
|
||||
generate!("re::regex_result_ffi")
|
||||
generate!("re::try_compile_ffi")
|
||||
generate!("wcs2string")
|
||||
generate!("wcs2zstring")
|
||||
generate!("str2wcstring")
|
||||
|
||||
generate!("signal_handle")
|
||||
generate!("signal_check_cancel")
|
||||
|
|
|
@ -188,7 +188,15 @@ macro_rules! FLOG {
|
|||
}
|
||||
};
|
||||
}
|
||||
pub(crate) use FLOG;
|
||||
|
||||
// TODO implement.
|
||||
macro_rules! FLOGF {
|
||||
($category:ident, $($elem:expr),+) => {
|
||||
crate::flog::FLOG!($category, $($elem),*);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) use {FLOG, FLOGF};
|
||||
|
||||
/// For each category, if its name matches the wildcard, set its enabled to the given sense.
|
||||
fn apply_one_wildcard(wc_esc: &wstr, sense: bool) {
|
||||
|
|
|
@ -12,6 +12,7 @@ mod common;
|
|||
mod abbrs;
|
||||
mod builtins;
|
||||
mod color;
|
||||
mod compat;
|
||||
mod env;
|
||||
mod event;
|
||||
mod expand;
|
||||
|
@ -51,6 +52,7 @@ mod wchar_ext;
|
|||
mod wchar_ffi;
|
||||
mod wcstringutil;
|
||||
mod wgetopt;
|
||||
mod wildcard;
|
||||
mod wutil;
|
||||
|
||||
// Don't use `#[cfg(test)]` here to make sure ffi tests are built and tested
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use crate::{
|
||||
expand::ExpandChars::HomeDirectory,
|
||||
expand::HOME_DIRECTORY,
|
||||
wchar::{wstr, WExt, WString, L},
|
||||
};
|
||||
|
||||
|
@ -12,7 +12,7 @@ pub fn path_apply_working_directory(path: &wstr, working_directory: &wstr) -> WS
|
|||
|
||||
// We're going to make sure that if we want to prepend the wd, that the string has no leading
|
||||
// "/".
|
||||
let prepend_wd = path.char_at(0) != '/' && path.char_at(0) != HomeDirectory.into();
|
||||
let prepend_wd = path.char_at(0) != '/' && path.char_at(0) != HOME_DIRECTORY;
|
||||
|
||||
if !prepend_wd {
|
||||
// No need to prepend the wd, so just return the path we were given.
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
//! A specialized tokenizer for tokenizing the fish language. In the future, the tokenizer should be
|
||||
//! extended to support marks, tokenizing multiple strings and disposing of unused string segments.
|
||||
|
||||
use crate::ffi::{valid_var_name_char, wcharz_t};
|
||||
use crate::common::valid_var_name_char;
|
||||
use crate::ffi::wcharz_t;
|
||||
use crate::future_feature_flags::{feature_test, FeatureFlag};
|
||||
use crate::parse_constants::SOURCE_OFFSET_INVALID;
|
||||
use crate::redirection::RedirectionMode;
|
||||
|
@ -1357,7 +1358,7 @@ pub fn variable_assignment_equals_pos(txt: &wstr) -> Option<usize> {
|
|||
// TODO bracket indexing
|
||||
for (i, c) in txt.chars().enumerate() {
|
||||
if !found_potential_variable {
|
||||
if !valid_var_name_char(c as wchar_t) {
|
||||
if !valid_var_name_char(c) {
|
||||
return None;
|
||||
}
|
||||
found_potential_variable = true;
|
||||
|
@ -1365,7 +1366,7 @@ pub fn variable_assignment_equals_pos(txt: &wstr) -> Option<usize> {
|
|||
if c == '=' {
|
||||
return Some(i);
|
||||
}
|
||||
if !valid_var_name_char(c as wchar_t) {
|
||||
if !valid_var_name_char(c) {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
//! - wstr: a string slice without a nul terminator. Like `&str` but wide chars.
|
||||
//! - WString: an owning string without a nul terminator. Like `String` but wide chars.
|
||||
|
||||
use crate::common::{ENCODE_DIRECT_BASE, ENCODE_DIRECT_END};
|
||||
pub use widestring::{Utf32Str as wstr, Utf32String as WString};
|
||||
|
||||
/// Pull in our extensions.
|
||||
|
@ -30,43 +31,6 @@ pub(crate) use L;
|
|||
/// Note: the resulting string is NOT nul-terminated.
|
||||
pub use widestring_suffix::widestrs;
|
||||
|
||||
// Use Unicode "non-characters" for internal characters as much as we can. This
|
||||
// gives us 32 "characters" for internal use that we can guarantee should not
|
||||
// appear in our input stream. See http://www.unicode.org/faq/private_use.html.
|
||||
pub const RESERVED_CHAR_BASE: char = '\u{FDD0}';
|
||||
pub const RESERVED_CHAR_END: char = '\u{FDF0}';
|
||||
// Split the available non-character values into two ranges to ensure there are
|
||||
// no conflicts among the places we use these special characters.
|
||||
pub const EXPAND_RESERVED_BASE: char = RESERVED_CHAR_BASE;
|
||||
pub const EXPAND_RESERVED_END: char = match char::from_u32(EXPAND_RESERVED_BASE as u32 + 16u32) {
|
||||
Some(c) => c,
|
||||
None => panic!("private use codepoint in expansion region should be valid char"),
|
||||
};
|
||||
pub const WILDCARD_RESERVED_BASE: char = EXPAND_RESERVED_END;
|
||||
pub const WILDCARD_RESERVED_END: char = match char::from_u32(WILDCARD_RESERVED_BASE as u32 + 16u32)
|
||||
{
|
||||
Some(c) => c,
|
||||
None => panic!("private use codepoint in wildcard region should be valid char"),
|
||||
};
|
||||
|
||||
// These are in the Unicode private-use range. We really shouldn't use this
|
||||
// range but have little choice in the matter given how our lexer/parser works.
|
||||
// We can't use non-characters for these two ranges because there are only 66 of
|
||||
// them and we need at least 256 + 64.
|
||||
//
|
||||
// If sizeof(wchar_t)==4 we could avoid using private-use chars; however, that
|
||||
// would result in fish having different behavior on machines with 16 versus 32
|
||||
// bit wchar_t. It's better that fish behave the same on both types of systems.
|
||||
//
|
||||
// Note: We don't use the highest 8 bit range (0xF800 - 0xF8FF) because we know
|
||||
// of at least one use of a codepoint in that range: the Apple symbol (0xF8FF)
|
||||
// on Mac OS X. See http://www.unicode.org/faq/private_use.html.
|
||||
pub const ENCODE_DIRECT_BASE: char = '\u{F600}';
|
||||
pub const ENCODE_DIRECT_END: char = match char::from_u32(ENCODE_DIRECT_BASE as u32 + 256) {
|
||||
Some(c) => c,
|
||||
None => panic!("private use codepoint in encode direct region should be valid char"),
|
||||
};
|
||||
|
||||
/// Encode a literal byte in a UTF-32 character. This is required for e.g. the echo builtin, whose
|
||||
/// escape sequences can be used to construct raw byte sequences which are then interpreted as e.g.
|
||||
/// UTF-8 by the terminal. If we were to interpret each of those bytes as a codepoint and encode it
|
||||
|
@ -78,3 +42,16 @@ pub fn encode_byte_to_char(byte: u8) -> char {
|
|||
char::from_u32(u32::from(ENCODE_DIRECT_BASE) + u32::from(byte))
|
||||
.expect("private-use codepoint should be valid char")
|
||||
}
|
||||
|
||||
/// Decode a literal byte from a UTF-32 character.
|
||||
pub fn decode_byte_from_char(c: char) -> Option<u8> {
|
||||
if c >= ENCODE_DIRECT_BASE && c < ENCODE_DIRECT_END {
|
||||
Some(
|
||||
(u32::from(c) - u32::from(ENCODE_DIRECT_BASE))
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,66 @@
|
|||
//! Helper functions for working with wcstring.
|
||||
|
||||
use crate::wchar::{wstr, WString};
|
||||
use crate::compat::MB_CUR_MAX;
|
||||
use crate::expand::INTERNAL_SEPARATOR;
|
||||
use crate::flog::FLOGF;
|
||||
use crate::wchar::{decode_byte_from_char, wstr, WString, L};
|
||||
use crate::wutil::encoding::{wcrtomb, zero_mbstate, AT_LEAST_MB_LEN_MAX};
|
||||
|
||||
/// Implementation of wcs2string that accepts a callback.
|
||||
/// This invokes \p func with (const char*, size_t) pairs.
|
||||
/// If \p func returns false, it stops; otherwise it continues.
|
||||
/// \return false if the callback returned false, otherwise true.
|
||||
pub fn wcs2string_callback(input: &wstr, mut func: impl FnMut(&[u8]) -> bool) -> bool {
|
||||
let mut state = zero_mbstate();
|
||||
let mut converted = [0_u8; AT_LEAST_MB_LEN_MAX];
|
||||
|
||||
for mut c in input.chars() {
|
||||
// TODO: this doesn't seem sound.
|
||||
if c == INTERNAL_SEPARATOR {
|
||||
// do nothing
|
||||
} else if let Some(byte) = decode_byte_from_char(c) {
|
||||
converted[0] = byte;
|
||||
if !func(&converted[..1]) {
|
||||
return false;
|
||||
}
|
||||
} else if MB_CUR_MAX() == 1 {
|
||||
// single-byte locale (C/POSIX/ISO-8859)
|
||||
// If `c` contains a wide character we emit a question-mark.
|
||||
if u32::from(c) & !0xFF != 0 {
|
||||
c = '?';
|
||||
}
|
||||
|
||||
converted[0] = c as u8;
|
||||
if !func(&converted[..1]) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
converted = [0; AT_LEAST_MB_LEN_MAX];
|
||||
let len = unsafe {
|
||||
wcrtomb(
|
||||
std::ptr::addr_of_mut!(converted[0]).cast(),
|
||||
c as libc::wchar_t,
|
||||
std::ptr::addr_of_mut!(state),
|
||||
)
|
||||
};
|
||||
if len == 0_usize.wrapping_sub(1) {
|
||||
wcs2string_bad_char(c);
|
||||
state = zero_mbstate();
|
||||
} else if !func(&converted[..len]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
fn wcs2string_bad_char(c: char) {
|
||||
FLOGF!(
|
||||
char_encoding,
|
||||
L!("Wide character U+%4X has no narrow representation"),
|
||||
c
|
||||
);
|
||||
}
|
||||
|
||||
/// Joins strings with a separator.
|
||||
pub fn join_strings(strs: &[&wstr], sep: char) -> WString {
|
||||
|
|
13
fish-rust/src/wildcard.rs
Normal file
13
fish-rust/src/wildcard.rs
Normal file
|
@ -0,0 +1,13 @@
|
|||
// Enumeration of all wildcard types.
|
||||
|
||||
use crate::common::{char_offset, WILDCARD_RESERVED_BASE};
|
||||
|
||||
/// Character representing any character except '/' (slash).
|
||||
pub const ANY_CHAR: char = char_offset(WILDCARD_RESERVED_BASE, 0);
|
||||
/// Character representing any character string not containing '/' (slash).
|
||||
pub const ANY_STRING: char = char_offset(WILDCARD_RESERVED_BASE, 1);
|
||||
/// Character representing any character string.
|
||||
pub const ANY_STRING_RECURSIVE: char = char_offset(WILDCARD_RESERVED_BASE, 2);
|
||||
/// This is a special pseudo-char that is not used other than to mark the
|
||||
/// end of the the special characters so we can sanity check the enum range.
|
||||
pub const ANY_SENTINEL: char = char_offset(WILDCARD_RESERVED_BASE, 3);
|
19
fish-rust/src/wutil/encoding.rs
Normal file
19
fish-rust/src/wutil/encoding.rs
Normal file
|
@ -0,0 +1,19 @@
|
|||
extern "C" {
|
||||
pub fn wcrtomb(s: *mut libc::c_char, wc: libc::wchar_t, ps: *mut mbstate_t) -> usize;
|
||||
pub fn mbrtowc(
|
||||
pwc: *mut libc::wchar_t,
|
||||
s: *const libc::c_char,
|
||||
n: usize,
|
||||
p: *mut mbstate_t,
|
||||
) -> usize;
|
||||
}
|
||||
|
||||
// HACK This should be mbstate_t from libc but that's not exposed. Since it's only written by
|
||||
// libc, we define it as opaque type that should be large enough for all implementations.
|
||||
pub type mbstate_t = [u64; 16];
|
||||
pub fn zero_mbstate() -> mbstate_t {
|
||||
[0; 16]
|
||||
}
|
||||
|
||||
// HACK This should be the MB_LEN_MAX macro from libc but that's not easy to get.
|
||||
pub const AT_LEAST_MB_LEN_MAX: usize = 32;
|
|
@ -1,3 +1,4 @@
|
|||
pub mod encoding;
|
||||
pub mod errors;
|
||||
pub mod gettext;
|
||||
mod normalize_path;
|
||||
|
@ -6,6 +7,7 @@ pub mod wcstod;
|
|||
pub mod wcstoi;
|
||||
mod wrealpath;
|
||||
|
||||
use crate::common::fish_reserved_codepoint;
|
||||
pub(crate) use gettext::{wgettext, wgettext_fmt};
|
||||
pub use normalize_path::*;
|
||||
pub(crate) use printf::sprintf;
|
||||
|
@ -28,3 +30,21 @@ pub fn perror(s: &str) {
|
|||
let _ = stderr.write_all(slice);
|
||||
let _ = stderr.write_all(b"\n");
|
||||
}
|
||||
|
||||
const PUA1_START: char = '\u{E000}';
|
||||
const PUA1_END: char = '\u{F900}';
|
||||
const PUA2_START: char = '\u{F0000}';
|
||||
const PUA2_END: char = '\u{FFFFE}';
|
||||
const PUA3_START: char = '\u{100000}';
|
||||
const PUA3_END: char = '\u{10FFFE}';
|
||||
|
||||
/// Return one if the code point is in a Unicode private use area.
|
||||
fn fish_is_pua(c: char) -> bool {
|
||||
PUA1_START <= c && c < PUA1_END
|
||||
}
|
||||
|
||||
/// We need this because there are too many implementations that don't return the proper answer for
|
||||
/// some code points. See issue #3050.
|
||||
pub fn fish_iswalnum(c: char) -> bool {
|
||||
!fish_reserved_codepoint(c) && !fish_is_pua(c) && c.is_alphanumeric()
|
||||
}
|
||||
|
|
|
@ -4,13 +4,8 @@ use std::{
|
|||
os::unix::prelude::{OsStrExt, OsStringExt},
|
||||
};
|
||||
|
||||
use cxx::let_cxx_string;
|
||||
|
||||
use crate::{
|
||||
ffi::{str2wcstring, wcs2zstring},
|
||||
wchar::{wstr, WString},
|
||||
wchar_ffi::{WCharFromFFI, WCharToFFI},
|
||||
};
|
||||
use crate::common::{str2wcstring, wcs2zstring};
|
||||
use crate::wchar::{wstr, WString};
|
||||
|
||||
/// Wide character realpath. The last path component does not need to be valid. If an error occurs,
|
||||
/// `wrealpath()` returns `None`
|
||||
|
@ -19,7 +14,7 @@ pub fn wrealpath(pathname: &wstr) -> Option<WString> {
|
|||
return None;
|
||||
}
|
||||
|
||||
let mut narrow_path: Vec<u8> = wcs2zstring(&pathname.to_ffi()).from_ffi();
|
||||
let mut narrow_path: Vec<u8> = wcs2zstring(pathname).into();
|
||||
|
||||
// Strip trailing slashes. This is treats "/a//" as equivalent to "/a" if /a is a non-directory.
|
||||
while narrow_path.len() > 1 && narrow_path[narrow_path.len() - 1] == b'/' {
|
||||
|
@ -68,7 +63,5 @@ pub fn wrealpath(pathname: &wstr) -> Option<WString> {
|
|||
}
|
||||
};
|
||||
|
||||
let_cxx_string!(s = real_path);
|
||||
|
||||
Some(str2wcstring(&s).from_ffi())
|
||||
Some(str2wcstring(&real_path))
|
||||
}
|
||||
|
|
|
@ -67,9 +67,8 @@ static parse_keyword_t keyword_for_token(token_type_t tok, const wcstring &token
|
|||
if (!needs_expand) {
|
||||
result = keyword_with_name(token);
|
||||
} else {
|
||||
wcstring storage;
|
||||
if (unescape_string(token, &storage, 0)) {
|
||||
result = keyword_with_name(storage);
|
||||
if (auto unescaped = unescape_string(token, 0)) {
|
||||
result = keyword_with_name(*unescaped);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -204,12 +204,11 @@ maybe_t<int> builtin_complete(parser_t &parser, io_streams_t &streams, const wch
|
|||
}
|
||||
case 'p':
|
||||
case 'c': {
|
||||
wcstring tmp;
|
||||
if (unescape_string(w.woptarg, &tmp, UNESCAPE_SPECIAL)) {
|
||||
if (auto tmp = unescape_string(w.woptarg, UNESCAPE_SPECIAL)) {
|
||||
if (opt == 'p')
|
||||
path.push_back(tmp);
|
||||
path.push_back(*tmp);
|
||||
else
|
||||
cmd_to_complete.push_back(tmp);
|
||||
cmd_to_complete.push_back(*tmp);
|
||||
} else {
|
||||
streams.err.append_format(_(L"%ls: Invalid token '%ls'\n"), cmd, w.woptarg);
|
||||
return STATUS_INVALID_ARGS;
|
||||
|
|
|
@ -531,14 +531,13 @@ maybe_t<int> builtin_read(parser_t &parser, io_streams_t &streams, const wchar_t
|
|||
|
||||
if (opts.tokenize) {
|
||||
auto tok = new_tokenizer(buff.c_str(), TOK_ACCEPT_UNFINISHED);
|
||||
wcstring out;
|
||||
if (opts.array) {
|
||||
// Array mode: assign each token as a separate element of the sole var.
|
||||
wcstring_list_t tokens;
|
||||
while (auto t = tok->next()) {
|
||||
auto text = *tok->text_of(*t);
|
||||
if (unescape_string(text, &out, UNESCAPE_DEFAULT)) {
|
||||
tokens.push_back(out);
|
||||
if (auto out = unescape_string(text, UNESCAPE_DEFAULT)) {
|
||||
tokens.push_back(*out);
|
||||
} else {
|
||||
tokens.push_back(text);
|
||||
}
|
||||
|
@ -549,8 +548,8 @@ maybe_t<int> builtin_read(parser_t &parser, io_streams_t &streams, const wchar_t
|
|||
std::unique_ptr<tok_t> t;
|
||||
while ((vars_left() - 1 > 0) && (t = tok->next())) {
|
||||
auto text = *tok->text_of(*t);
|
||||
if (unescape_string(text, &out, UNESCAPE_DEFAULT)) {
|
||||
parser.set_var_and_fire(*var_ptr++, opts.place, out);
|
||||
if (auto out = unescape_string(text, UNESCAPE_DEFAULT)) {
|
||||
parser.set_var_and_fire(*var_ptr++, opts.place, *out);
|
||||
} else {
|
||||
parser.set_var_and_fire(*var_ptr++, opts.place, text);
|
||||
}
|
||||
|
|
|
@ -737,10 +737,9 @@ static int string_unescape(parser_t &parser, io_streams_t &streams, int argc,
|
|||
|
||||
arg_iterator_t aiter(argv, optind, streams);
|
||||
while (const wcstring *arg = aiter.nextstr()) {
|
||||
wcstring result;
|
||||
wcstring sep = aiter.want_newline() ? L"\n" : L"";
|
||||
if (unescape_string(*arg, &result, flags, opts.escape_style)) {
|
||||
streams.out.append(result + sep);
|
||||
if (auto result = unescape_string(*arg, flags, opts.escape_style)) {
|
||||
streams.out.append(*result + sep);
|
||||
nesc++;
|
||||
}
|
||||
}
|
||||
|
|
408
src/common.cpp
408
src/common.cpp
|
@ -33,6 +33,7 @@
|
|||
#include <memory>
|
||||
|
||||
#include "common.h"
|
||||
#include "common.rs.h"
|
||||
#include "expand.h"
|
||||
#include "fallback.h" // IWYU pragma: keep
|
||||
#include "flog.h"
|
||||
|
@ -119,17 +120,6 @@ long convert_digit(wchar_t d, int base) {
|
|||
/// Test whether the char is a valid hex digit as used by the `escape_string_*()` functions.
|
||||
static bool is_hex_digit(int c) { return std::strchr("0123456789ABCDEF", c) != nullptr; }
|
||||
|
||||
/// This is a specialization of `convert_digit()` that only handles base 16 and only uppercase.
|
||||
static long convert_hex_digit(wchar_t d) {
|
||||
if ((d <= L'9') && (d >= L'0')) {
|
||||
return d - L'0';
|
||||
} else if ((d <= L'Z') && (d >= L'A')) {
|
||||
return 10 + d - L'A';
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
bool is_windows_subsystem_for_linux() {
|
||||
#if defined(WSL)
|
||||
return true;
|
||||
|
@ -749,38 +739,6 @@ static void escape_string_url(const wcstring &in, wcstring &out) {
|
|||
}
|
||||
}
|
||||
|
||||
/// Reverse the effects of `escape_string_url()`. By definition the string has consist of just ASCII
|
||||
/// chars.
|
||||
static bool unescape_string_url(const wchar_t *in, wcstring *out) {
|
||||
std::string result;
|
||||
result.reserve(out->size());
|
||||
for (wchar_t c = *in; c; c = *++in) {
|
||||
if (c > 0x7F) return false; // invalid character means we can't decode the string
|
||||
if (c == '%') {
|
||||
int c1 = in[1];
|
||||
if (c1 == 0) return false; // found unexpected end of string
|
||||
if (c1 == '%') {
|
||||
result.push_back('%');
|
||||
in++;
|
||||
} else {
|
||||
int c2 = in[2];
|
||||
if (c2 == 0) return false; // string ended prematurely
|
||||
long d1 = convert_digit(c1, 16);
|
||||
if (d1 < 0) return false;
|
||||
long d2 = convert_digit(c2, 16);
|
||||
if (d2 < 0) return false;
|
||||
result.push_back(16 * d1 + d2);
|
||||
in += 2;
|
||||
}
|
||||
} else {
|
||||
result.push_back(c);
|
||||
}
|
||||
}
|
||||
|
||||
*out = str2wcstring(result);
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Escape a string in a fashion suitable for using as a fish var name. Store the result in out_str.
|
||||
static void escape_string_var(const wcstring &in, wcstring &out) {
|
||||
bool prev_was_hex_encoded = false;
|
||||
|
@ -812,46 +770,6 @@ static void escape_string_var(const wcstring &in, wcstring &out) {
|
|||
}
|
||||
}
|
||||
|
||||
/// Reverse the effects of `escape_string_var()`. By definition the string has consist of just ASCII
|
||||
/// chars.
|
||||
static bool unescape_string_var(const wchar_t *in, wcstring *out) {
|
||||
std::string result;
|
||||
result.reserve(out->size());
|
||||
bool prev_was_hex_encoded = false;
|
||||
for (wchar_t c = *in; c; c = *++in) {
|
||||
if (c > 0x7F) return false; // invalid character means we can't decode the string
|
||||
if (c == '_') {
|
||||
int c1 = in[1];
|
||||
if (c1 == 0) {
|
||||
if (prev_was_hex_encoded) break;
|
||||
return false; // found unexpected escape char at end of string
|
||||
}
|
||||
if (c1 == '_') {
|
||||
result.push_back('_');
|
||||
in++;
|
||||
} else if (is_hex_digit(c1)) {
|
||||
int c2 = in[2];
|
||||
if (c2 == 0) return false; // string ended prematurely
|
||||
long d1 = convert_hex_digit(c1);
|
||||
if (d1 < 0) return false;
|
||||
long d2 = convert_hex_digit(c2);
|
||||
if (d2 < 0) return false;
|
||||
result.push_back(16 * d1 + d2);
|
||||
in += 2;
|
||||
prev_was_hex_encoded = true;
|
||||
}
|
||||
// No "else" clause because if the first char after an underscore is not another
|
||||
// underscore or a valid hex character then the underscore is there to improve
|
||||
// readability after we've encoded a character not valid in a var name.
|
||||
} else {
|
||||
result.push_back(c);
|
||||
}
|
||||
}
|
||||
|
||||
*out = str2wcstring(result);
|
||||
return true;
|
||||
}
|
||||
|
||||
wcstring escape_string_for_double_quotes(wcstring in) {
|
||||
// We need to escape backslashes, double quotes, and dollars only.
|
||||
wcstring result = std::move(in);
|
||||
|
@ -1130,12 +1048,6 @@ wcstring escape_string(const wcstring &in, escape_flags_t flags, escape_string_s
|
|||
return result;
|
||||
}
|
||||
|
||||
/// Helper to return the last character in a string, or none.
|
||||
static maybe_t<wchar_t> string_last_char(const wcstring &str) {
|
||||
if (str.empty()) return none();
|
||||
return str.back();
|
||||
}
|
||||
|
||||
/// Given a null terminated string starting with a backslash, read the escape as if it is unquoted,
|
||||
/// appending to result. Return the number of characters consumed, or none on error.
|
||||
maybe_t<size_t> read_unquoted_escape(const wchar_t *input, wcstring *result, bool allow_incomplete,
|
||||
|
@ -1329,320 +1241,30 @@ maybe_t<size_t> read_unquoted_escape(const wchar_t *input, wcstring *result, boo
|
|||
return in_pos;
|
||||
}
|
||||
|
||||
/// Returns the unescaped version of input_str into output_str (by reference). Returns true if
|
||||
/// successful. If false, the contents of output_str are unchanged.
|
||||
static bool unescape_string_internal(const wchar_t *const input, const size_t input_len,
|
||||
wcstring *output_str, unescape_flags_t flags) {
|
||||
// Set up result string, which we'll swap with the output on success.
|
||||
wcstring result;
|
||||
result.reserve(input_len);
|
||||
|
||||
const bool unescape_special = static_cast<bool>(flags & UNESCAPE_SPECIAL);
|
||||
const bool allow_incomplete = static_cast<bool>(flags & UNESCAPE_INCOMPLETE);
|
||||
const bool ignore_backslashes = static_cast<bool>(flags & UNESCAPE_NO_BACKSLASHES);
|
||||
|
||||
// The positions of open braces.
|
||||
std::vector<size_t> braces;
|
||||
// The positions of variable expansions or brace ","s.
|
||||
// We only read braces as expanders if there's a variable expansion or "," in them.
|
||||
std::vector<size_t> vars_or_seps;
|
||||
int brace_count = 0;
|
||||
|
||||
bool errored = false;
|
||||
enum {
|
||||
mode_unquoted,
|
||||
mode_single_quotes,
|
||||
mode_double_quotes,
|
||||
} mode = mode_unquoted;
|
||||
|
||||
for (size_t input_position = 0; input_position < input_len && !errored; input_position++) {
|
||||
const wchar_t c = input[input_position];
|
||||
// Here's the character we'll append to result, or none() to suppress it.
|
||||
maybe_t<wchar_t> to_append_or_none = c;
|
||||
if (mode == mode_unquoted) {
|
||||
switch (c) {
|
||||
case L'\\': {
|
||||
if (!ignore_backslashes) {
|
||||
// Backslashes (escapes) are complicated and may result in errors, or
|
||||
// appending INTERNAL_SEPARATORs, so we have to handle them specially.
|
||||
auto escape_chars = read_unquoted_escape(
|
||||
input + input_position, &result, allow_incomplete, unescape_special);
|
||||
if (!escape_chars.has_value()) {
|
||||
// A none() return indicates an error.
|
||||
errored = true;
|
||||
} else {
|
||||
// Skip over the characters we read, minus one because the outer loop
|
||||
// will increment it.
|
||||
assert(*escape_chars > 0);
|
||||
input_position += *escape_chars - 1;
|
||||
}
|
||||
// We've already appended, don't append anything else.
|
||||
to_append_or_none = none();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case L'~': {
|
||||
if (unescape_special && (input_position == 0)) {
|
||||
to_append_or_none = HOME_DIRECTORY;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case L'%': {
|
||||
// Note that this only recognizes %self if the string is literally %self.
|
||||
// %self/foo will NOT match this.
|
||||
if (unescape_special && input_position == 0 &&
|
||||
!std::wcscmp(input, PROCESS_EXPAND_SELF_STR)) {
|
||||
to_append_or_none = PROCESS_EXPAND_SELF;
|
||||
input_position += PROCESS_EXPAND_SELF_STR_LEN - 1; // skip over 'self's
|
||||
}
|
||||
break;
|
||||
}
|
||||
case L'*': {
|
||||
if (unescape_special) {
|
||||
// In general, this is ANY_STRING. But as a hack, if the last appended char
|
||||
// is ANY_STRING, delete the last char and store ANY_STRING_RECURSIVE to
|
||||
// reflect the fact that ** is the recursive wildcard.
|
||||
if (string_last_char(result) == ANY_STRING) {
|
||||
assert(!result.empty());
|
||||
result.resize(result.size() - 1);
|
||||
to_append_or_none = ANY_STRING_RECURSIVE;
|
||||
} else {
|
||||
to_append_or_none = ANY_STRING;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case L'?': {
|
||||
if (unescape_special && !feature_test(feature_flag_t::qmark_noglob)) {
|
||||
to_append_or_none = ANY_CHAR;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case L'$': {
|
||||
if (unescape_special) {
|
||||
bool is_cmdsub =
|
||||
input_position + 1 < input_len && input[input_position + 1] == L'(';
|
||||
if (!is_cmdsub) {
|
||||
to_append_or_none = VARIABLE_EXPAND;
|
||||
vars_or_seps.push_back(input_position);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case L'{': {
|
||||
if (unescape_special) {
|
||||
brace_count++;
|
||||
to_append_or_none = BRACE_BEGIN;
|
||||
// We need to store where the brace *ends up* in the output.
|
||||
braces.push_back(result.size());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case L'}': {
|
||||
if (unescape_special) {
|
||||
// HACK: The completion machinery sometimes hands us partial tokens.
|
||||
// We can't parse them properly, but it shouldn't hurt,
|
||||
// so we don't assert here.
|
||||
// See #4954.
|
||||
// assert(brace_count > 0 && "imbalanced brackets are a tokenizer error, we
|
||||
// shouldn't be able to get here");
|
||||
brace_count--;
|
||||
to_append_or_none = BRACE_END;
|
||||
if (!braces.empty()) {
|
||||
// HACK: To reduce accidental use of brace expansion, treat a brace
|
||||
// with zero or one items as literal input. See #4632. (The hack is
|
||||
// doing it here and like this.)
|
||||
if (vars_or_seps.empty() || vars_or_seps.back() < braces.back()) {
|
||||
result[braces.back()] = L'{';
|
||||
// We also need to turn all spaces back.
|
||||
for (size_t i = braces.back() + 1; i < result.size(); i++) {
|
||||
if (result[i] == BRACE_SPACE) result[i] = L' ';
|
||||
}
|
||||
to_append_or_none = L'}';
|
||||
}
|
||||
|
||||
// Remove all seps inside the current brace pair, so if we have a
|
||||
// surrounding pair we only get seps inside *that*.
|
||||
if (!vars_or_seps.empty()) {
|
||||
while (!vars_or_seps.empty() && vars_or_seps.back() > braces.back())
|
||||
vars_or_seps.pop_back();
|
||||
}
|
||||
braces.pop_back();
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case L',': {
|
||||
if (unescape_special && brace_count > 0) {
|
||||
to_append_or_none = BRACE_SEP;
|
||||
vars_or_seps.push_back(input_position);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case L' ': {
|
||||
if (unescape_special && brace_count > 0) {
|
||||
to_append_or_none = BRACE_SPACE;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case L'\'': {
|
||||
mode = mode_single_quotes;
|
||||
to_append_or_none =
|
||||
unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
|
||||
break;
|
||||
}
|
||||
case L'\"': {
|
||||
mode = mode_double_quotes;
|
||||
to_append_or_none =
|
||||
unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (mode == mode_single_quotes) {
|
||||
if (c == L'\\') {
|
||||
// A backslash may or may not escape something in single quotes.
|
||||
switch (input[input_position + 1]) {
|
||||
case '\\':
|
||||
case L'\'': {
|
||||
to_append_or_none = input[input_position + 1];
|
||||
input_position += 1; // skip over the backslash
|
||||
break;
|
||||
}
|
||||
case L'\0': {
|
||||
if (!allow_incomplete) {
|
||||
errored = true;
|
||||
} else {
|
||||
// PCA this line had the following cryptic comment: 'We may ever escape
|
||||
// a NULL character, but still appending a \ in case I am wrong.' Not
|
||||
// sure what it means or the importance of this.
|
||||
input_position += 1; /* Skip over the backslash */
|
||||
to_append_or_none = L'\\';
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
// Literal backslash that doesn't escape anything! Leave things alone; we'll
|
||||
// append the backslash itself.
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (c == L'\'') {
|
||||
to_append_or_none =
|
||||
unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
|
||||
mode = mode_unquoted;
|
||||
}
|
||||
} else if (mode == mode_double_quotes) {
|
||||
switch (c) {
|
||||
case L'"': {
|
||||
mode = mode_unquoted;
|
||||
to_append_or_none =
|
||||
unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
|
||||
break;
|
||||
}
|
||||
case '\\': {
|
||||
switch (input[input_position + 1]) {
|
||||
case L'\0': {
|
||||
if (!allow_incomplete) {
|
||||
errored = true;
|
||||
} else {
|
||||
to_append_or_none = L'\0';
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '\\':
|
||||
case L'$':
|
||||
case '"': {
|
||||
to_append_or_none = input[input_position + 1];
|
||||
input_position += 1; /* Skip over the backslash */
|
||||
break;
|
||||
}
|
||||
case '\n': {
|
||||
/* Swallow newline */
|
||||
to_append_or_none = none();
|
||||
input_position += 1; /* Skip over the backslash */
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
/* Literal backslash that doesn't escape anything! Leave things alone;
|
||||
* we'll append the backslash itself */
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '$': {
|
||||
if (unescape_special) {
|
||||
to_append_or_none = VARIABLE_EXPAND_SINGLE;
|
||||
vars_or_seps.push_back(input_position);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now maybe append the char.
|
||||
if (to_append_or_none.has_value()) {
|
||||
result.push_back(*to_append_or_none);
|
||||
}
|
||||
}
|
||||
|
||||
// Return the string by reference, and then success.
|
||||
if (!errored) {
|
||||
*output_str = std::move(result);
|
||||
}
|
||||
return !errored;
|
||||
}
|
||||
|
||||
bool unescape_string_in_place(wcstring *str, unescape_flags_t escape_special) {
|
||||
assert(str != nullptr);
|
||||
wcstring output;
|
||||
bool success = unescape_string_internal(str->c_str(), str->size(), &output, escape_special);
|
||||
if (success) {
|
||||
*str = std::move(output);
|
||||
if (auto unescaped = unescape_string(str->c_str(), str->size(), escape_special)) {
|
||||
*str = *unescaped;
|
||||
return true;
|
||||
}
|
||||
return success;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool unescape_string(const wchar_t *input, size_t len, wcstring *output,
|
||||
unescape_flags_t escape_special, escape_string_style_t style) {
|
||||
bool success = false;
|
||||
switch (style) {
|
||||
case STRING_STYLE_SCRIPT: {
|
||||
success = unescape_string_internal(input, len, output, escape_special);
|
||||
break;
|
||||
}
|
||||
case STRING_STYLE_URL: {
|
||||
success = unescape_string_url(input, output);
|
||||
break;
|
||||
}
|
||||
case STRING_STYLE_VAR: {
|
||||
success = unescape_string_var(input, output);
|
||||
break;
|
||||
}
|
||||
case STRING_STYLE_REGEX: {
|
||||
// unescaping PCRE2 is not needed/supported, the PCRE2 engine is responsible for that
|
||||
success = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!success) output->clear();
|
||||
return success;
|
||||
std::unique_ptr<wcstring> unescape_string(const wchar_t *input, unescape_flags_t escape_special,
|
||||
escape_string_style_t style) {
|
||||
return unescape_string(input, std::wcslen(input), escape_special, style);
|
||||
}
|
||||
|
||||
bool unescape_string(const wchar_t *input, wcstring *output, unescape_flags_t escape_special,
|
||||
escape_string_style_t style) {
|
||||
return unescape_string(input, std::wcslen(input), output, escape_special, style);
|
||||
std::unique_ptr<wcstring> unescape_string(const wchar_t *input, size_t len,
|
||||
unescape_flags_t escape_special,
|
||||
escape_string_style_t style) {
|
||||
return rust_unescape_string(input, len, escape_special, style);
|
||||
}
|
||||
|
||||
bool unescape_string(const wcstring &input, wcstring *output, unescape_flags_t escape_special,
|
||||
escape_string_style_t style) {
|
||||
return unescape_string(input.c_str(), input.size(), output, escape_special, style);
|
||||
std::unique_ptr<wcstring> unescape_string(const wcstring &input, unescape_flags_t escape_special,
|
||||
escape_string_style_t style) {
|
||||
return unescape_string(input.c_str(), input.size(), escape_special, style);
|
||||
}
|
||||
|
||||
wcstring format_size(long long sz) {
|
||||
|
|
14
src/common.h
14
src/common.h
|
@ -521,15 +521,15 @@ bool unescape_string_in_place(wcstring *str, unescape_flags_t escape_special);
|
|||
|
||||
/// Reverse the effects of calling `escape_string`. Returns the unescaped value by reference. On
|
||||
/// failure, the output is set to an empty string.
|
||||
bool unescape_string(const wchar_t *input, wcstring *output, unescape_flags_t escape_special,
|
||||
escape_string_style_t style = STRING_STYLE_SCRIPT);
|
||||
std::unique_ptr<wcstring> unescape_string(const wchar_t *input, unescape_flags_t escape_special,
|
||||
escape_string_style_t style = STRING_STYLE_SCRIPT);
|
||||
|
||||
bool unescape_string(const wchar_t *input, size_t len, wcstring *output,
|
||||
unescape_flags_t escape_special,
|
||||
escape_string_style_t style = STRING_STYLE_SCRIPT);
|
||||
std::unique_ptr<wcstring> unescape_string(const wchar_t *input, size_t len,
|
||||
unescape_flags_t escape_special,
|
||||
escape_string_style_t style = STRING_STYLE_SCRIPT);
|
||||
|
||||
bool unescape_string(const wcstring &input, wcstring *output, unescape_flags_t escape_special,
|
||||
escape_string_style_t style = STRING_STYLE_SCRIPT);
|
||||
std::unique_ptr<wcstring> unescape_string(const wcstring &input, unescape_flags_t escape_special,
|
||||
escape_string_style_t style = STRING_STYLE_SCRIPT);
|
||||
|
||||
/// Write the given paragraph of output, redoing linebreaks to fit \p termsize.
|
||||
wcstring reformat_for_screen(const wcstring &msg, const termsize_t &termsize);
|
||||
|
|
|
@ -1469,8 +1469,8 @@ void completer_t::escape_opening_brackets(const wcstring &argument) {
|
|||
if (!have_unquoted_unescaped_bracket) return;
|
||||
// Since completion_apply_to_command_line will escape the completion, we need to provide an
|
||||
// unescaped version.
|
||||
wcstring unescaped_argument;
|
||||
if (!unescape_string(argument, &unescaped_argument, UNESCAPE_INCOMPLETE)) return;
|
||||
auto unescaped_argument = unescape_string(argument, UNESCAPE_INCOMPLETE);
|
||||
if (!unescaped_argument) return;
|
||||
for (completion_t &comp : completions.get_list()) {
|
||||
if (comp.flags & COMPLETE_REPLACES_TOKEN) continue;
|
||||
comp.flags |= COMPLETE_REPLACES_TOKEN;
|
||||
|
@ -1482,7 +1482,7 @@ void completer_t::escape_opening_brackets(const wcstring &argument) {
|
|||
if (comp.flags & COMPLETE_DONT_ESCAPE) {
|
||||
FLOG(warning, L"unexpected completion flag");
|
||||
}
|
||||
comp.completion = unescaped_argument + comp.completion;
|
||||
comp.completion = *unescaped_argument + comp.completion;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1494,9 +1494,8 @@ void completer_t::mark_completions_duplicating_arguments(const wcstring &cmd,
|
|||
wcstring_list_t arg_strs;
|
||||
for (const auto &arg : args) {
|
||||
wcstring argstr = *arg.get_source(cmd);
|
||||
wcstring argstr_unesc;
|
||||
if (unescape_string(argstr, &argstr_unesc, UNESCAPE_DEFAULT)) {
|
||||
arg_strs.push_back(std::move(argstr_unesc));
|
||||
if (auto argstr_unesc = unescape_string(argstr, UNESCAPE_DEFAULT)) {
|
||||
arg_strs.push_back(std::move(*argstr_unesc));
|
||||
}
|
||||
}
|
||||
std::sort(arg_strs.begin(), arg_strs.end());
|
||||
|
@ -1668,11 +1667,14 @@ void completer_t::perform_for_commandline(wcstring cmdline) {
|
|||
source_range_t command_range = {cmd_tok.offset - bias, cmd_tok.length};
|
||||
|
||||
wcstring exp_command = *cmd_tok.get_source(cmdline);
|
||||
bool unescaped =
|
||||
expand_command_token(ctx, exp_command) &&
|
||||
unescape_string(previous_argument, &arg_data.previous_argument, UNESCAPE_DEFAULT) &&
|
||||
unescape_string(current_argument, &arg_data.current_argument, UNESCAPE_INCOMPLETE);
|
||||
std::unique_ptr<wcstring> prev;
|
||||
std::unique_ptr<wcstring> cur;
|
||||
bool unescaped = expand_command_token(ctx, exp_command) &&
|
||||
(prev = unescape_string(previous_argument, UNESCAPE_DEFAULT)) &&
|
||||
(cur = unescape_string(current_argument, UNESCAPE_INCOMPLETE));
|
||||
if (unescaped) {
|
||||
arg_data.previous_argument = *prev;
|
||||
arg_data.current_argument = *cur;
|
||||
// Have to walk over the command and its entire wrap chain. If any command
|
||||
// disables do_file, then they all do.
|
||||
walk_wrap_chain(exp_command, *effective_cmdline, command_range, &arg_data);
|
||||
|
|
|
@ -472,11 +472,11 @@ void env_init(const struct config_paths_t *paths, bool do_uvars, bool default_pa
|
|||
for (const auto &kv : table) {
|
||||
if (string_prefixes_string(prefix, kv.first)) {
|
||||
wcstring escaped_name = kv.first.substr(prefix_len);
|
||||
wcstring name;
|
||||
if (unescape_string(escaped_name, &name, unescape_flags_t{}, STRING_STYLE_VAR)) {
|
||||
wcstring key = name;
|
||||
if (auto name =
|
||||
unescape_string(escaped_name, unescape_flags_t{}, STRING_STYLE_VAR)) {
|
||||
wcstring key = *name;
|
||||
wcstring replacement = join_strings(kv.second.as_list(), L' ');
|
||||
abbrs->add(std::move(name), std::move(key), std::move(replacement),
|
||||
abbrs->add(std::move(*name), std::move(key), std::move(replacement),
|
||||
abbrs_position_t::command, from_universal);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -800,9 +800,11 @@ bool env_universal_t::populate_1_variable(const wchar_t *input, env_var_t::env_v
|
|||
|
||||
// Parse out the value into storage, and decode it into a variable.
|
||||
storage->clear();
|
||||
if (!unescape_string(colon + 1, storage, 0)) {
|
||||
auto unescaped = unescape_string(colon + 1, 0);
|
||||
if (!unescaped) {
|
||||
return false;
|
||||
}
|
||||
*storage = *unescaped;
|
||||
env_var_t var{decode_serialized(*storage), flags};
|
||||
|
||||
// Parse out the key and write into the map.
|
||||
|
|
|
@ -971,7 +971,8 @@ expand_result_t expander_t::stage_variables(wcstring input, completion_receiver_
|
|||
// We accept incomplete strings here, since complete uses expand_string to expand incomplete
|
||||
// strings from the commandline.
|
||||
wcstring next;
|
||||
unescape_string(input, &next, UNESCAPE_SPECIAL | UNESCAPE_INCOMPLETE);
|
||||
if (auto unescaped = unescape_string(input, UNESCAPE_SPECIAL | UNESCAPE_INCOMPLETE))
|
||||
next = *unescaped;
|
||||
|
||||
if (flags & expand_flag::skip_variables) {
|
||||
for (auto &i : next) {
|
||||
|
|
|
@ -376,27 +376,26 @@ static void test_unescape_sane() {
|
|||
{L"\"abcd\\n\"", L"abcd\\n"}, {L"\\143", L"c"},
|
||||
{L"'\\143'", L"\\143"}, {L"\\n", L"\n"} // \n normally becomes newline
|
||||
};
|
||||
wcstring output;
|
||||
for (const auto &test : tests) {
|
||||
bool ret = unescape_string(test.input, &output, UNESCAPE_DEFAULT);
|
||||
if (!ret) {
|
||||
auto output = unescape_string(test.input, UNESCAPE_DEFAULT);
|
||||
if (!output) {
|
||||
err(L"Failed to unescape '%ls'\n", test.input);
|
||||
} else if (output != test.expected) {
|
||||
} else if (*output != test.expected) {
|
||||
err(L"In unescaping '%ls', expected '%ls' but got '%ls'\n", test.input, test.expected,
|
||||
output.c_str());
|
||||
output->c_str());
|
||||
}
|
||||
}
|
||||
|
||||
// Test for overflow.
|
||||
if (unescape_string(L"echo \\UFFFFFF", &output, UNESCAPE_DEFAULT)) {
|
||||
if (unescape_string(L"echo \\UFFFFFF", UNESCAPE_DEFAULT)) {
|
||||
err(L"Should not have been able to unescape \\UFFFFFF\n");
|
||||
}
|
||||
if (unescape_string(L"echo \\U110000", &output, UNESCAPE_DEFAULT)) {
|
||||
if (unescape_string(L"echo \\U110000", UNESCAPE_DEFAULT)) {
|
||||
err(L"Should not have been able to unescape \\U110000\n");
|
||||
}
|
||||
#if WCHAR_MAX != 0xffff
|
||||
// TODO: Make this work on MS Windows.
|
||||
if (!unescape_string(L"echo \\U10FFFF", &output, UNESCAPE_DEFAULT)) {
|
||||
if (!unescape_string(L"echo \\U10FFFF", UNESCAPE_DEFAULT)) {
|
||||
err(L"Should have been able to unescape \\U10FFFF\n");
|
||||
}
|
||||
#endif
|
||||
|
@ -408,8 +407,6 @@ static void test_escape_crazy() {
|
|||
say(L"Testing escaping and unescaping");
|
||||
wcstring random_string;
|
||||
wcstring escaped_string;
|
||||
wcstring unescaped_string;
|
||||
bool unescaped_success;
|
||||
for (size_t i = 0; i < ESCAPE_TEST_COUNT; i++) {
|
||||
random_string.clear();
|
||||
while (random() % ESCAPE_TEST_LENGTH) {
|
||||
|
@ -417,14 +414,14 @@ static void test_escape_crazy() {
|
|||
}
|
||||
|
||||
escaped_string = escape_string(random_string);
|
||||
unescaped_success = unescape_string(escaped_string, &unescaped_string, UNESCAPE_DEFAULT);
|
||||
auto unescaped_string = unescape_string(escaped_string, UNESCAPE_DEFAULT);
|
||||
|
||||
if (!unescaped_success) {
|
||||
if (!unescaped_string) {
|
||||
err(L"Failed to unescape string <%ls>", escaped_string.c_str());
|
||||
break;
|
||||
} else if (unescaped_string != random_string) {
|
||||
} else if (*unescaped_string != random_string) {
|
||||
err(L"Escaped and then unescaped string '%ls', but got back a different string '%ls'",
|
||||
random_string.c_str(), unescaped_string.c_str());
|
||||
random_string.c_str(), unescaped_string->c_str());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -432,12 +429,12 @@ static void test_escape_crazy() {
|
|||
// Verify that ESCAPE_NO_PRINTABLES also escapes backslashes so we don't regress on issue #3892.
|
||||
random_string = L"line 1\\n\nline 2";
|
||||
escaped_string = escape_string(random_string, ESCAPE_NO_PRINTABLES | ESCAPE_NO_QUOTED);
|
||||
unescaped_success = unescape_string(escaped_string, &unescaped_string, UNESCAPE_DEFAULT);
|
||||
if (!unescaped_success) {
|
||||
auto unescaped_string = unescape_string(escaped_string, UNESCAPE_DEFAULT);
|
||||
if (!unescaped_string) {
|
||||
err(L"Failed to unescape string <%ls>", escaped_string.c_str());
|
||||
} else if (unescaped_string != random_string) {
|
||||
} else if (*unescaped_string != random_string) {
|
||||
err(L"Escaped and then unescaped string '%ls', but got back a different string '%ls'",
|
||||
random_string.c_str(), unescaped_string.c_str());
|
||||
random_string.c_str(), unescaped_string->c_str());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -960,8 +960,8 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argumen
|
|||
parser_test_error_bits_t err = 0;
|
||||
|
||||
auto check_subtoken = [&arg_src, &out_errors, source_start](size_t begin, size_t end) -> int {
|
||||
wcstring unesc;
|
||||
if (!unescape_string(arg_src.c_str() + begin, end - begin, &unesc, UNESCAPE_SPECIAL)) {
|
||||
auto maybe_unesc = unescape_string(arg_src.c_str() + begin, end - begin, UNESCAPE_SPECIAL);
|
||||
if (!maybe_unesc) {
|
||||
if (out_errors) {
|
||||
const wchar_t *fmt = L"Invalid token '%ls'";
|
||||
if (arg_src.length() == 2 && arg_src[0] == L'\\' &&
|
||||
|
@ -975,6 +975,7 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argumen
|
|||
}
|
||||
return 1;
|
||||
}
|
||||
const wcstring &unesc = *maybe_unesc;
|
||||
|
||||
parser_test_error_bits_t err = 0;
|
||||
// Check for invalid variable expansions.
|
||||
|
|
|
@ -60,7 +60,9 @@ bool wildcard_has(const wchar_t *str, size_t len) {
|
|||
return false;
|
||||
}
|
||||
wcstring unescaped;
|
||||
unescape_string(str, len, &unescaped, UNESCAPE_SPECIAL);
|
||||
if (auto tmp = unescape_string(wcstring{str, len}, UNESCAPE_SPECIAL)) {
|
||||
unescaped = *tmp;
|
||||
}
|
||||
return wildcard_has_internal(unescaped);
|
||||
}
|
||||
|
||||
|
|
|
@ -158,6 +158,9 @@ echo -e 'abc\x211def'
|
|||
#CHECK: abc!def
|
||||
#CHECK: abc!1def
|
||||
|
||||
echo \UDE01
|
||||
#CHECK: <20>
|
||||
|
||||
# Comments allowed in between lines (#1987)
|
||||
echo before comment \
|
||||
# comment
|
||||
|
|
Loading…
Reference in a new issue