diff --git a/CMakeLists.txt b/CMakeLists.txt index 1e2112f38..bc91c5003 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -107,7 +107,7 @@ set(FISH_BUILTIN_SRCS src/builtins/jobs.cpp src/builtins/path.cpp src/builtins/read.cpp src/builtins/set.cpp src/builtins/source.cpp - src/builtins/string.cpp src/builtins/ulimit.cpp + src/builtins/ulimit.cpp ) # List of other sources. @@ -121,7 +121,7 @@ set(FISH_SRCS src/null_terminated_array.cpp src/operation_context.cpp src/output.cpp src/pager.cpp src/parse_execution.cpp src/parse_util.cpp src/parser.cpp src/parser_keywords.cpp src/path.cpp src/postfork.cpp - src/proc.cpp src/re.cpp src/reader.cpp src/screen.cpp + src/proc.cpp src/reader.cpp src/screen.cpp src/signals.cpp src/utf8.cpp src/wcstringutil.cpp src/wgetopt.cpp src/wildcard.cpp src/wutil.cpp src/fds.cpp src/rustffi.cpp diff --git a/fish-rust/src/abbrs.rs b/fish-rust/src/abbrs.rs index 4eb967aca..9ce84161d 100644 --- a/fish-rust/src/abbrs.rs +++ b/fish-rust/src/abbrs.rs @@ -18,7 +18,6 @@ use self::abbrs_ffi::{abbreviation_t, abbrs_position_t, abbrs_replacement_t}; #[cxx::bridge] mod abbrs_ffi { extern "C++" { - include!("re.h"); include!("parse_constants.h"); type SourceRange = crate::parse_constants::SourceRange; diff --git a/fish-rust/src/builtins/mod.rs b/fish-rust/src/builtins/mod.rs index 4ae2432be..37a29e33e 100644 --- a/fish-rust/src/builtins/mod.rs +++ b/fish-rust/src/builtins/mod.rs @@ -20,6 +20,7 @@ pub mod realpath; pub mod r#return; pub mod set_color; pub mod status; +pub mod string; pub mod test; pub mod r#type; pub mod wait; diff --git a/fish-rust/src/builtins/shared.rs b/fish-rust/src/builtins/shared.rs index 11559e3da..5fb7df123 100644 --- a/fish-rust/src/builtins/shared.rs +++ b/fish-rust/src/builtins/shared.rs @@ -1,4 +1,5 @@ use crate::builtins::{printf, wait}; +use crate::ffi::separation_type_t; use crate::ffi::{self, parser_t, wcstring_list_ffi_t, Repin, RustBuiltin}; use crate::wchar::{wstr, WString, L}; use crate::wchar_ffi::{c_str, empty_wstring, ToCppWString, WCharFromFFI}; @@ -108,6 +109,20 @@ impl output_stream_t { pub fn append1(&mut self, c: char) -> bool { self.append(wstr::from_char_slice(&[c])) } + + pub fn append_with_separation( + &mut self, + s: impl AsRef, + sep: separation_type_t, + want_newline: bool, + ) -> bool { + self.ffi() + .append_with_separation(&s.as_ref().into_cpp(), sep, want_newline) + } + + pub fn flush_and_check_error(&mut self) -> c_int { + self.ffi().flush_and_check_error().into() + } } // Convenience wrappers around C++ io_streams_t. @@ -216,6 +231,7 @@ pub fn run_builtin( RustBuiltin::Return => super::r#return::r#return(parser, streams, args), RustBuiltin::SetColor => super::set_color::set_color(parser, streams, args), RustBuiltin::Status => super::status::status(parser, streams, args), + RustBuiltin::String => super::string::string(parser, streams, args), RustBuiltin::Test => super::test::test(parser, streams, args), RustBuiltin::Type => super::r#type::r#type(parser, streams, args), RustBuiltin::Wait => wait::wait(parser, streams, args), diff --git a/fish-rust/src/builtins/string.rs b/fish-rust/src/builtins/string.rs new file mode 100644 index 000000000..67491049f --- /dev/null +++ b/fish-rust/src/builtins/string.rs @@ -0,0 +1,493 @@ +use std::borrow::Cow; +use std::fs::File; +use std::io::{BufRead, BufReader, Read}; +use std::os::fd::FromRawFd; + +use crate::common::str2wcstring; +use crate::wcstringutil::fish_wcwidth_visible; +// Forward some imports to make subcmd implementations easier +pub(self) use crate::{ + builtins::shared::{ + builtin_missing_argument, builtin_print_error_trailer, builtin_print_help, io_streams_t, + BUILTIN_ERR_ARG_COUNT0, BUILTIN_ERR_ARG_COUNT1, BUILTIN_ERR_COMBO2, + BUILTIN_ERR_INVALID_SUBCMD, BUILTIN_ERR_MISSING_SUBCMD, BUILTIN_ERR_NOT_NUMBER, + BUILTIN_ERR_TOO_MANY_ARGUMENTS, BUILTIN_ERR_UNKNOWN, STATUS_CMD_ERROR, STATUS_CMD_OK, + STATUS_INVALID_ARGS, + }, + ffi::{parser_t, separation_type_t}, + wchar::{wstr, WString, L}, + wchar_ext::{ToWString, WExt}, + wgetopt::{wgetopter_t, wopt, woption, woption_argument_t::*, NONOPTION_CHAR_CODE}, + wutil::{wgettext, wgettext_fmt}, +}; +pub(self) use libc::c_int; + +mod collect; +mod escape; +mod join; +mod length; +mod r#match; +mod pad; +mod repeat; +mod replace; +mod shorten; +mod split; +mod sub; +mod transform; +mod trim; +mod unescape; + +macro_rules! string_error { + ( + $streams:expr, + $string:expr + $(, $args:expr)+ + $(,)? + ) => { + $streams.err.append(L!("string ")); + $streams.err.append(wgettext_fmt!($string, $($args),*)); + }; +} +pub(self) use string_error; + +fn string_unknown_option( + parser: &mut parser_t, + streams: &mut io_streams_t, + subcmd: &wstr, + opt: &wstr, +) { + string_error!(streams, BUILTIN_ERR_UNKNOWN, subcmd, opt); + builtin_print_error_trailer(parser, streams, L!("string")); +} + +trait StringSubCommand<'args> { + const SHORT_OPTIONS: &'static wstr; + const LONG_OPTIONS: &'static [woption<'static>]; + + /// Parse and store option specified by the associated short or long option. + fn parse_opt( + &mut self, + name: &wstr, + c: char, + arg: Option<&'args wstr>, + ) -> Result<(), StringError>; + + fn parse_opts( + &mut self, + args: &mut [&'args wstr], + parser: &mut parser_t, + streams: &mut io_streams_t, + ) -> Result> { + let cmd = args[0]; + let mut args_read = Vec::with_capacity(args.len()); + args_read.extend_from_slice(args); + + let mut w = wgetopter_t::new(Self::SHORT_OPTIONS, Self::LONG_OPTIONS, args); + while let Some(c) = w.wgetopt_long() { + match c { + ':' => { + streams.err.append(L!("string ")); // clone of string_error + builtin_missing_argument(parser, streams, cmd, args_read[w.woptind - 1], false); + return Err(STATUS_INVALID_ARGS); + } + '?' => { + string_unknown_option(parser, streams, cmd, args_read[w.woptind - 1]); + return Err(STATUS_INVALID_ARGS); + } + c => { + let retval = self.parse_opt(cmd, c, w.woptarg); + if let Err(e) = retval { + e.print_error(&args_read, parser, streams, w.woptarg, w.woptind); + return Err(e.retval()); + } + } + } + } + + return Ok(w.woptind); + } + + /// Take any positional arguments after options have been parsed. + #[allow(unused_variables)] + fn take_args( + &mut self, + optind: &mut usize, + args: &[&'args wstr], + streams: &mut io_streams_t, + ) -> Option { + STATUS_CMD_OK + } + + /// Perform the business logic of the command. + fn handle( + &mut self, + parser: &mut parser_t, + streams: &mut io_streams_t, + optind: &mut usize, + args: &[&'args wstr], + ) -> Option; + + fn run( + &mut self, + parser: &mut parser_t, + streams: &mut io_streams_t, + args: &mut [&'args wstr], + ) -> Option { + if args.len() >= 3 && (args[2] == "-h" || args[2] == "--help") { + let string_dash_subcmd = WString::from(args[0]) + L!("-") + args[1]; + builtin_print_help(parser, streams, &string_dash_subcmd); + return STATUS_CMD_OK; + } + + let args = &mut args[1..]; + + let mut optind = match self.parse_opts(args, parser, streams) { + Ok(optind) => optind, + Err(retval) => return retval, + }; + + let retval = self.take_args(&mut optind, args, streams); + if retval != STATUS_CMD_OK { + return retval; + } + + if streams.stdin_is_directly_redirected() && args.len() > optind { + string_error!(streams, BUILTIN_ERR_TOO_MANY_ARGUMENTS, args[0]); + return STATUS_INVALID_ARGS; + } + + return self.handle(parser, streams, &mut optind, args); + } +} + +/// This covers failing argument/option parsing +enum StringError { + InvalidArgs(WString), + NotANumber, + UnknownOption, +} + +enum RegexError { + Compile(WString, pcre2::Error), + InvalidCaptureGroupName(WString), + InvalidEscape(WString), +} + +impl RegexError { + fn print_error(&self, args: &[&wstr], streams: &mut io_streams_t) { + let cmd = args[0]; + use RegexError::*; + match self { + Compile(pattern, e) => { + string_error!( + streams, + "%ls: Regular expression compile error: %ls\n", + cmd, + &WString::from(e.error_message()) + ); + string_error!(streams, "%ls: %ls\n", cmd, pattern); + string_error!(streams, "%ls: %*ls\n", cmd, e.offset().unwrap(), "^"); + } + InvalidCaptureGroupName(name) => { + streams.err.append(wgettext_fmt!( + "Modification of read-only variable \"%ls\" is not allowed\n", + name + )); + } + InvalidEscape(pattern) => { + string_error!( + streams, + "%ls: Invalid escape sequence in pattern \"%ls\"\n", + cmd, + pattern + ); + } + } + } +} + +impl From for StringError { + fn from(_: crate::wutil::wcstoi::Error) -> Self { + StringError::NotANumber + } +} + +macro_rules! invalid_args { + ($msg:expr, $name:expr, $arg:expr) => { + StringError::InvalidArgs(crate::wutil::wgettext_fmt!($msg, $name, $arg.unwrap())) + }; +} +pub(self) use invalid_args; + +impl StringError { + fn print_error( + &self, + args: &[&wstr], + parser: &mut parser_t, + streams: &mut io_streams_t, + optarg: Option<&wstr>, + optind: usize, + ) { + let cmd = args[0]; + use StringError::*; + match self { + InvalidArgs(msg) => { + streams.err.append(L!("string ")); + // TODO: Once we can extract/edit translations in Rust files, replace this with + // something like wgettext_fmt("%ls: %ls", cmd, msg) that can be translated + // and remove the forwarding of the cmd name to `parse_opt` + streams.err.append(msg); + } + NotANumber => { + string_error!(streams, BUILTIN_ERR_NOT_NUMBER, cmd, optarg.unwrap()); + } + UnknownOption => { + string_unknown_option(parser, streams, cmd, args[optind - 1]); + } + } + } + + fn retval(&self) -> Option { + STATUS_INVALID_ARGS + } +} + +#[derive(Default, PartialEq, Clone, Copy)] +enum Direction { + #[default] + Left, + Right, +} + +pub(self) fn width_without_escapes(ins: &wstr, start_pos: usize) -> i32 { + let mut width: i32 = 0; + for c in ins[start_pos..].chars() { + let w = fish_wcwidth_visible(c); + // We assume that this string is on its own line, + // in which case a backslash can't bring us below 0. + if w > 0 || width > 0 { + width += w; + } + } + // ANSI escape sequences like \e\[31m contain printable characters. Subtract their width + // because they are not rendered. + let mut pos = start_pos; + while let Some(ec_pos) = ins.slice_from(pos).find_char('\x1B') { + pos += ec_pos; + if let Some(len) = escape_code_length(ins.slice_from(pos)) { + let sub = &ins[pos..pos + len]; + for c in sub.chars() { + width -= fish_wcwidth_visible(c); + } + // Move us forward behind the escape code, + // it might include a second escape! + // E.g. SGR0 ("reset") is \e\(B\e\[m in xterm. + pos += len - 1; + } else { + pos += 1; + } + } + + return width; +} + +pub(self) fn escape_code_length(code: &wstr) -> Option { + use crate::ffi::escape_code_length_ffi; + use crate::wchar_ffi::wstr_to_u32string; + + match escape_code_length_ffi(wstr_to_u32string(code).as_ptr()).into() { + -1 => None, + n => Some(n as usize), + } +} + +/// A helper type for extracting arguments from either argv or stdin. +pub(self) struct Arguments<'args, 'iter> { + /// The list of arguments passed to the string builtin. + args: &'iter [&'args wstr], + /// If using argv, index of the next argument to return. + argidx: &'iter mut usize, + /// If set, when reading from a stream, split on newlines. + split_on_newline: bool, + /// Buffer to store what we read with the BufReader + /// Is only here to avoid allocating every time + buffer: Vec, + /// If not using argv, we read with a buffer + reader: Option>, +} + +impl Drop for Arguments<'_, '_> { + fn drop(&mut self) { + if let Some(r) = self.reader.take() { + // we should not close stdin + std::mem::forget(r.into_inner()); + } + } +} + +impl<'args, 'iter> Arguments<'args, 'iter> { + const STRING_CHUNK_SIZE: usize = 1024; + + fn new( + args: &'iter [&'args wstr], + argidx: &'iter mut usize, + streams: &mut io_streams_t, + ) -> Self { + let reader = streams.stdin_is_directly_redirected().then(|| { + let stdin_fd = streams + .stdin_fd() + .filter(|&fd| fd >= 0) + .expect("should have a valid fd"); + // safety: this should be a valid fd, and already open + let fd = unsafe { File::from_raw_fd(stdin_fd) }; + BufReader::with_capacity(Self::STRING_CHUNK_SIZE, fd) + }); + + Arguments { + args, + argidx, + split_on_newline: true, + buffer: Vec::new(), + reader, + } + } + + fn without_splitting_on_newline( + args: &'iter [&'args wstr], + argidx: &'iter mut usize, + streams: &mut io_streams_t, + ) -> Self { + let mut args = Self::new(args, argidx, streams); + args.split_on_newline = false; + args + } + + fn get_arg_stdin(&mut self) -> Option<(Cow<'args, wstr>, bool)> { + let reader = self.reader.as_mut().unwrap(); + + // NOTE: C++ wrongly commented that read_blocked retries for EAGAIN + let num_bytes = match self.split_on_newline { + true => reader.read_until(b'\n', &mut self.buffer), + false => reader.read_to_end(&mut self.buffer), + } + .ok()?; + + // to match behaviour of earlier versions + if num_bytes == 0 { + return None; + } + + let mut parsed = str2wcstring(&self.buffer); + + // If not set, we have consumed all of stdin and its last line is missing a newline character. + // This is an edge case -- we expect text input, which is conventionally terminated by a + // newline character. But if it isn't, we use this to avoid creating one out of thin air, + // to not corrupt input data. + let want_newline; + if self.split_on_newline { + if parsed.char_at(parsed.len() - 1) == '\n' { + // consumers do not expect to deal with the newline + parsed.pop(); + want_newline = true; + } else { + // we are missing a trailing newline + want_newline = false; + } + } else { + want_newline = false; + } + + let retval = Some((Cow::Owned(parsed), want_newline)); + self.buffer.clear(); + retval + } +} + +impl<'args> Iterator for Arguments<'args, '_> { + // second is want_newline + type Item = (Cow<'args, wstr>, bool); + + fn next(&mut self) -> Option { + if self.reader.is_some() { + return self.get_arg_stdin(); + } + + if *self.argidx >= self.args.len() { + return None; + } + *self.argidx += 1; + return Some((Cow::Borrowed(self.args[*self.argidx - 1]), true)); + } +} + +/// The string builtin, for manipulating strings. +pub fn string( + parser: &mut parser_t, + streams: &mut io_streams_t, + args: &mut [&wstr], +) -> Option { + let cmd = args[0]; + let argc = args.len(); + + if argc <= 1 { + streams + .err + .append(wgettext_fmt!(BUILTIN_ERR_MISSING_SUBCMD, cmd)); + builtin_print_error_trailer(parser, streams, cmd); + return STATUS_INVALID_ARGS; + } + + if args[1] == "-h" || args[1] == "--help" { + builtin_print_help(parser, streams, cmd); + return STATUS_CMD_OK; + } + + let subcmd_name = args[1]; + + match subcmd_name.to_string().as_str() { + "collect" => collect::Collect::default().run(parser, streams, args), + "escape" => escape::Escape::default().run(parser, streams, args), + "join" => join::Join::default().run(parser, streams, args), + "join0" => { + let mut cmd = join::Join::default(); + cmd.is_join0 = true; + cmd.run(parser, streams, args) + } + "length" => length::Length::default().run(parser, streams, args), + "lower" => { + let mut cmd = transform::Transform { + quiet: false, + func: wstr::to_lowercase, + }; + cmd.run(parser, streams, args) + } + "match" => r#match::Match::default().run(parser, streams, args), + "pad" => pad::Pad::default().run(parser, streams, args), + "repeat" => repeat::Repeat::default().run(parser, streams, args), + "replace" => replace::Replace::default().run(parser, streams, args), + "shorten" => shorten::Shorten::default().run(parser, streams, args), + "split" => split::Split::default().run(parser, streams, args), + "split0" => { + let mut cmd = split::Split::default(); + cmd.is_split0 = true; + cmd.run(parser, streams, args) + } + "sub" => sub::Sub::default().run(parser, streams, args), + "trim" => trim::Trim::default().run(parser, streams, args), + "unescape" => unescape::Unescape::default().run(parser, streams, args), + "upper" => { + let mut cmd = transform::Transform { + quiet: false, + func: wstr::to_uppercase, + }; + cmd.run(parser, streams, args) + } + _ => { + streams + .err + .append(wgettext_fmt!(BUILTIN_ERR_INVALID_SUBCMD, cmd, subcmd_name)); + builtin_print_error_trailer(parser, streams, cmd); + STATUS_INVALID_ARGS + } + } +} diff --git a/fish-rust/src/builtins/string/collect.rs b/fish-rust/src/builtins/string/collect.rs new file mode 100644 index 000000000..be4206299 --- /dev/null +++ b/fish-rust/src/builtins/string/collect.rs @@ -0,0 +1,66 @@ +use super::*; + +#[derive(Default)] +pub struct Collect { + allow_empty: bool, + no_trim_newlines: bool, +} + +impl StringSubCommand<'_> for Collect { + const LONG_OPTIONS: &'static [woption<'static>] = &[ + wopt(L!("allow-empty"), no_argument, 'a'), + wopt(L!("no-trim-newlines"), no_argument, 'N'), + ]; + const SHORT_OPTIONS: &'static wstr = L!(":Na"); + + fn parse_opt(&mut self, _n: &wstr, c: char, _arg: Option<&wstr>) -> Result<(), StringError> { + match c { + 'a' => self.allow_empty = true, + 'N' => self.no_trim_newlines = true, + _ => return Err(StringError::UnknownOption), + } + Ok(()) + } + + fn handle( + &mut self, + _parser: &mut parser_t, + streams: &mut io_streams_t, + optind: &mut usize, + args: &[&wstr], + ) -> Option { + let mut appended = 0usize; + + for (arg, want_newline) in Arguments::without_splitting_on_newline(args, optind, streams) { + let arg = if !self.no_trim_newlines { + let trim_len = arg.len() - arg.chars().rev().take_while(|&c| c == '\n').count(); + &arg[..trim_len] + } else { + &arg + }; + + streams + .out + .append_with_separation(arg, separation_type_t::explicitly, want_newline); + appended += arg.len(); + } + + // If we haven't printed anything and "no_empty" is set, + // print something empty. Helps with empty ellision: + // echo (true | string collect --allow-empty)"bar" + // prints "bar". + if self.allow_empty && appended == 0 { + streams.out.append_with_separation( + L!(""), + separation_type_t::explicitly, + true, /* historical behavior is to always print a newline */ + ); + } + + if appended > 0 { + STATUS_CMD_OK + } else { + STATUS_CMD_ERROR + } + } +} diff --git a/fish-rust/src/builtins/string/escape.rs b/fish-rust/src/builtins/string/escape.rs new file mode 100644 index 000000000..405bfcfce --- /dev/null +++ b/fish-rust/src/builtins/string/escape.rs @@ -0,0 +1,65 @@ +use super::*; +use crate::common::{escape_string, EscapeFlags, EscapeStringStyle}; + +#[derive(Default)] +pub struct Escape { + no_quoted: bool, + style: EscapeStringStyle, +} + +impl StringSubCommand<'_> for Escape { + const LONG_OPTIONS: &'static [woption<'static>] = &[ + wopt(L!("no-quoted"), no_argument, 'n'), + wopt(L!("style"), required_argument, NONOPTION_CHAR_CODE), + ]; + const SHORT_OPTIONS: &'static wstr = L!(":n"); + + fn parse_opt(&mut self, name: &wstr, c: char, arg: Option<&wstr>) -> Result<(), StringError> { + match c { + 'n' => self.no_quoted = true, + NONOPTION_CHAR_CODE => { + self.style = arg + .unwrap() + .try_into() + .map_err(|_| invalid_args!("%ls: Invalid escape style '%ls'\n", name, arg))? + } + _ => return Err(StringError::UnknownOption), + } + return Ok(()); + } + + fn handle( + &mut self, + _parser: &mut parser_t, + streams: &mut io_streams_t, + optind: &mut usize, + args: &[&wstr], + ) -> Option { + // Currently, only the script style supports options. + // Ignore them for other styles for now. + let style = match self.style { + EscapeStringStyle::Script(..) if self.no_quoted => { + EscapeStringStyle::Script(EscapeFlags::NO_QUOTED) + } + x => x, + }; + + let mut escaped_any = false; + for (arg, want_newline) in Arguments::new(args, optind, streams) { + let mut escaped = escape_string(&arg, style); + + if want_newline { + escaped.push('\n'); + } + + streams.out.append(escaped); + escaped_any = true; + } + + if escaped_any { + STATUS_CMD_OK + } else { + STATUS_CMD_ERROR + } + } +} diff --git a/fish-rust/src/builtins/string/join.rs b/fish-rust/src/builtins/string/join.rs new file mode 100644 index 000000000..4d3b5d435 --- /dev/null +++ b/fish-rust/src/builtins/string/join.rs @@ -0,0 +1,99 @@ +use super::*; + +pub struct Join<'args> { + quiet: bool, + no_empty: bool, + pub is_join0: bool, + sep: &'args wstr, +} + +impl Default for Join<'_> { + fn default() -> Self { + Self { + quiet: false, + no_empty: false, + is_join0: false, + sep: L!("\0"), + } + } +} + +impl<'args> StringSubCommand<'args> for Join<'args> { + const LONG_OPTIONS: &'static [woption<'static>] = &[ + wopt(L!("quiet"), no_argument, 'q'), + wopt(L!("no-empty"), no_argument, 'n'), + ]; + const SHORT_OPTIONS: &'static wstr = L!(":qn"); + + fn parse_opt(&mut self, _n: &wstr, c: char, _arg: Option<&wstr>) -> Result<(), StringError> { + match c { + 'q' => self.quiet = true, + 'n' => self.no_empty = true, + _ => return Err(StringError::UnknownOption), + } + return Ok(()); + } + + fn take_args( + &mut self, + optind: &mut usize, + args: &[&'args wstr], + streams: &mut io_streams_t, + ) -> Option { + if self.is_join0 { + return STATUS_CMD_OK; + } + + let Some(arg) = args.get(*optind).copied() else { + string_error!(streams, BUILTIN_ERR_ARG_COUNT0, args[0]); + return STATUS_INVALID_ARGS; + }; + *optind += 1; + self.sep = arg; + + STATUS_CMD_OK + } + + fn handle( + &mut self, + _parser: &mut parser_t, + streams: &mut io_streams_t, + optind: &mut usize, + args: &[&wstr], + ) -> Option { + let sep = &self.sep; + let mut nargs = 0usize; + let mut print_trailing_newline = true; + for (arg, want_newline) in Arguments::new(args, optind, streams) { + if !self.quiet { + if self.no_empty && arg.is_empty() { + continue; + } + + if nargs > 0 { + streams.out.append(sep); + } + + streams.out.append(arg); + } else if nargs > 1 { + return STATUS_CMD_OK; + } + nargs += 1; + print_trailing_newline = want_newline; + } + + if nargs > 0 && !self.quiet { + if self.is_join0 { + streams.out.append1('\0'); + } else if print_trailing_newline { + streams.out.append1('\n'); + } + } + + if nargs > 1 { + STATUS_CMD_OK + } else { + STATUS_CMD_ERROR + } + } +} diff --git a/fish-rust/src/builtins/string/length.rs b/fish-rust/src/builtins/string/length.rs new file mode 100644 index 000000000..d400658c4 --- /dev/null +++ b/fish-rust/src/builtins/string/length.rs @@ -0,0 +1,74 @@ +use super::*; + +use crate::wcstringutil::split_string; + +#[derive(Default)] +pub struct Length { + quiet: bool, + visible: bool, +} + +impl StringSubCommand<'_> for Length { + const LONG_OPTIONS: &'static [woption<'static>] = &[ + wopt(L!("quiet"), no_argument, 'q'), + wopt(L!("visible"), no_argument, 'V'), + ]; + const SHORT_OPTIONS: &'static wstr = L!(":qV"); + + fn parse_opt(&mut self, _n: &wstr, c: char, _arg: Option<&wstr>) -> Result<(), StringError> { + match c { + 'q' => self.quiet = true, + 'V' => self.visible = true, + _ => return Err(StringError::UnknownOption), + } + return Ok(()); + } + + fn handle( + &mut self, + _parser: &mut parser_t, + streams: &mut io_streams_t, + optind: &mut usize, + args: &[&wstr], + ) -> Option { + let mut nnonempty = 0usize; + + for (arg, _) in Arguments::new(args, optind, streams) { + if self.visible { + // Visible length only makes sense line-wise. + for line in split_string(&arg, '\n') { + let mut max = 0; + // Carriage-return returns us to the beginning. The longest substring without + // carriage-return determines the overall width. + for reset in split_string(&line, '\r') { + let n = width_without_escapes(&reset, 0) as usize; + max = max.max(n); + } + if max > 0 { + nnonempty += 1; + } + if !self.quiet { + streams.out.append(max.to_wstring() + L!("\n")); + } else if nnonempty > 0 { + return STATUS_CMD_OK; + } + } + } else { + let n = arg.len(); + if n > 0 { + nnonempty += 1; + } + if !self.quiet { + streams.out.append(n.to_wstring() + L!("\n")); + } else if nnonempty > 0 { + return STATUS_CMD_OK; + } + } + } + if nnonempty > 0 { + STATUS_CMD_OK + } else { + STATUS_CMD_ERROR + } + } +} diff --git a/fish-rust/src/builtins/string/match.rs b/fish-rust/src/builtins/string/match.rs new file mode 100644 index 000000000..c6753c2d2 --- /dev/null +++ b/fish-rust/src/builtins/string/match.rs @@ -0,0 +1,406 @@ +use pcre2::utf32::{Captures, Regex, RegexBuilder}; +use printf_compat::sprintf; +use std::collections::HashMap; + +use super::*; +use crate::env::{EnvMode, EnvVar, EnvVarFlags}; +use crate::flog::FLOG; +use crate::parse_util::parse_util_unescape_wildcards; +use crate::wchar_ffi::WCharToFFI; +use crate::wildcard::ANY_STRING; + +#[derive(Default)] +pub struct Match<'args> { + all: bool, + entire: bool, + groups_only: bool, + ignore_case: bool, + invert_match: bool, + quiet: bool, + regex: bool, + index: bool, + pattern: &'args wstr, +} + +impl<'args> StringSubCommand<'args> for Match<'args> { + const LONG_OPTIONS: &'static [woption<'static>] = &[ + wopt(L!("all"), no_argument, 'a'), + wopt(L!("entire"), no_argument, 'e'), + wopt(L!("groups-only"), no_argument, 'g'), + wopt(L!("ignore-case"), no_argument, 'i'), + wopt(L!("invert"), no_argument, 'v'), + wopt(L!("quiet"), no_argument, 'q'), + wopt(L!("regex"), no_argument, 'r'), + wopt(L!("index"), no_argument, 'n'), + ]; + const SHORT_OPTIONS: &'static wstr = L!(":aegivqrn"); + + fn parse_opt(&mut self, _n: &wstr, c: char, _arg: Option<&wstr>) -> Result<(), StringError> { + match c { + 'a' => self.all = true, + 'e' => self.entire = true, + 'g' => self.groups_only = true, + 'i' => self.ignore_case = true, + 'v' => self.invert_match = true, + 'q' => self.quiet = true, + 'r' => self.regex = true, + 'n' => self.index = true, + _ => return Err(StringError::UnknownOption), + } + return Ok(()); + } + + fn take_args( + &mut self, + optind: &mut usize, + args: &[&'args wstr], + streams: &mut io_streams_t, + ) -> Option { + let cmd = args[0]; + let Some(arg) = args.get(*optind).copied() else { + string_error!(streams, BUILTIN_ERR_ARG_COUNT0, cmd); + return STATUS_INVALID_ARGS; + }; + *optind += 1; + self.pattern = arg; + STATUS_CMD_OK + } + + fn handle( + &mut self, + parser: &mut parser_t, + streams: &mut io_streams_t, + optind: &mut usize, + args: &[&wstr], + ) -> Option { + let cmd = args[0]; + + if self.entire && self.index { + streams.err.append(wgettext_fmt!( + BUILTIN_ERR_COMBO2, + cmd, + wgettext!("--entire and --index are mutually exclusive") + )); + return STATUS_INVALID_ARGS; + } + + if self.invert_match && self.groups_only { + streams.err.append(wgettext_fmt!( + BUILTIN_ERR_COMBO2, + cmd, + wgettext!("--invert and --groups-only are mutually exclusive") + )); + return STATUS_INVALID_ARGS; + } + + if self.entire && self.groups_only { + streams.err.append(wgettext_fmt!( + BUILTIN_ERR_COMBO2, + cmd, + wgettext!("--entire and --groups-only are mutually exclusive") + )); + return STATUS_INVALID_ARGS; + } + + let mut matcher = match StringMatcher::new(self.pattern, self) { + Ok(m) => m, + Err(e) => { + e.print_error(args, streams); + return STATUS_INVALID_ARGS; + } + }; + + for (arg, _) in Arguments::new(args, optind, streams) { + if let Err(e) = matcher.report_matches(arg.as_ref(), streams) { + FLOG!(error, "pcre2_match unexpected error:", e.error_message()) + } + if self.quiet && matcher.match_count() > 0 { + break; + } + } + + let match_count = matcher.match_count(); + + if let StringMatcher::Regex(RegexMatcher { + first_match_captures, + .. + }) = matcher + { + let vars = parser.get_vars(); + for (name, vals) in first_match_captures.into_iter() { + vars.set(&WString::from(name), EnvMode::DEFAULT, vals); + } + } + + if match_count > 0 { + STATUS_CMD_OK + } else { + STATUS_CMD_ERROR + } + } +} + +struct RegexMatcher<'opts, 'args> { + regex: Regex, + total_matched: usize, + first_match_captures: HashMap>, + opts: &'opts Match<'args>, +} + +struct WildCardMatcher<'opts, 'args> { + pattern: WString, + total_matched: usize, + opts: &'opts Match<'args>, +} + +#[allow(clippy::large_enum_variant)] +enum StringMatcher<'opts, 'args> { + Regex(RegexMatcher<'opts, 'args>), + WildCard(WildCardMatcher<'opts, 'args>), +} + +impl<'opts, 'args> StringMatcher<'opts, 'args> { + fn new( + pattern: &'args wstr, + opts: &'opts Match<'args>, + ) -> Result, RegexError> { + if opts.regex { + let m = RegexMatcher::new(pattern, opts)?; + Ok(Self::Regex(m)) + } else { + let m = WildCardMatcher::new(pattern, opts); + return Ok(Self::WildCard(m)); + } + } + + fn report_matches( + &mut self, + arg: &wstr, + streams: &mut io_streams_t, + ) -> Result<(), pcre2::Error> { + match self { + Self::Regex(m) => m.report_matches(arg, streams)?, + Self::WildCard(m) => m.report_matches(arg, streams), + } + Ok(()) + } + + fn match_count(&self) -> usize { + match self { + Self::Regex(m) => m.total_matched, + Self::WildCard(m) => m.total_matched, + } + } +} + +enum MatchResult<'a> { + NoMatch, + Match(Option>), +} + +impl<'opts, 'args> RegexMatcher<'opts, 'args> { + fn new( + pattern: &'args wstr, + opts: &'opts Match<'args>, + ) -> Result, RegexError> { + let regex = RegexBuilder::new() + .caseless(opts.ignore_case) + // UTF-mode can be enabled with `(*UTF)` https://www.pcre.org/current/doc/html/pcre2unicode.html + // we use the capture group names to set local variables, and those are limited + // to ascii-alphanumerics and underscores in non-UTF-mode + // https://www.pcre.org/current/doc/html/pcre2syntax.html#SEC13 + // we can probably relax this limitation as long as we ensure + // the capture group names are valid variable names + .never_utf(true) + .build(pattern.as_char_slice()) + .map_err(|e| RegexError::Compile(pattern.to_owned(), e))?; + + Self::validate_capture_group_names(regex.capture_names())?; + + let first_match_captures = regex + .capture_names() + .iter() + .filter_map(|name| name.as_ref().map(|n| (n.to_owned(), Vec::new()))) + .collect(); + let m = Self { + regex, + total_matched: 0, + first_match_captures, + opts, + }; + return Ok(m); + } + + fn report_matches( + &mut self, + arg: &wstr, + streams: &mut io_streams_t, + ) -> Result<(), pcre2::Error> { + let mut iter = self.regex.captures_iter(arg.as_char_slice()); + let cg = iter.next().transpose()?; + let rc = self.report_match(arg, cg, streams); + + let mut populate_captures = false; + if let MatchResult::Match(actual) = &rc { + populate_captures = self.total_matched == 0; + self.total_matched += 1; + + if populate_captures { + Self::populate_captures_from_match( + &mut self.first_match_captures, + self.opts, + actual, + ); + } + } + + if !self.opts.invert_match && self.opts.all { + // we are guaranteed to match as long as ops.invert_match is false + while let MatchResult::Match(cg) = + self.report_match(arg, iter.next().transpose()?, streams) + { + if populate_captures { + Self::populate_captures_from_match( + &mut self.first_match_captures, + self.opts, + &cg, + ); + } + } + } + Ok(()) + } + + fn populate_captures_from_match<'a>( + first_match_captures: &mut HashMap>, + opts: &Match<'args>, + cg: &Option>, + ) { + for (name, captures) in first_match_captures.iter_mut() { + // If there are multiple named groups and --all was used, we need to ensure that + // the indexes are always in sync between the variables. If an optional named + // group didn't match but its brethren did, we need to make sure to put + // *something* in the resulting array, and unfortunately fish doesn't support + // empty/null members so we're going to have to use an empty string as the + // sentinel value. + + if let Some(m) = cg.as_ref().and_then(|cg| cg.name(&name.to_string())) { + captures.push(WString::from(m.as_bytes())); + } else if opts.all { + captures.push(WString::new()); + } + } + } + + fn validate_capture_group_names( + capture_group_names: &[Option], + ) -> Result<(), RegexError> { + for name in capture_group_names.iter().filter_map(|n| n.as_ref()) { + let wname = WString::from_str(name); + if EnvVar::flags_for(&wname).contains(EnvVarFlags::READ_ONLY) { + return Err(RegexError::InvalidCaptureGroupName(wname)); + } + } + return Ok(()); + } + + fn report_match<'a>( + &self, + arg: &'a wstr, + cg: Option>, + streams: &mut io_streams_t, + ) -> MatchResult<'a> { + let Some(cg) = cg else { + if self.opts.invert_match && !self.opts.quiet { + if self.opts.index { + streams.out.append(sprintf!("1 %lu\n", arg.len())); + } else { + streams.out.append(arg); + streams.out.append1('\n'); + } + } + return match self.opts.invert_match { + true => MatchResult::Match(None), + false => MatchResult::NoMatch, + }; + }; + + if self.opts.invert_match { + return MatchResult::NoMatch; + } + + if self.opts.quiet { + return MatchResult::Match(Some(cg)); + } + + if self.opts.entire { + streams.out.append(arg); + streams.out.append1('\n'); + } + + let start = (self.opts.entire || self.opts.groups_only) as usize; + + for m in (start..cg.len()).filter_map(|i| cg.get(i)) { + if self.opts.index { + streams + .out + .append(sprintf!("%lu %lu\n", m.start() + 1, m.end() - m.start())); + } else { + streams.out.append(&arg[m.start()..m.end()]); + streams.out.append1('\n'); + } + } + + return MatchResult::Match(Some(cg)); + } +} + +impl<'opts, 'args> WildCardMatcher<'opts, 'args> { + fn new(pattern: &'args wstr, opts: &'opts Match<'args>) -> Self { + let mut wcpattern = parse_util_unescape_wildcards(pattern); + if opts.ignore_case { + wcpattern = wcpattern.to_lowercase(); + } + if opts.entire { + if !wcpattern.is_empty() { + if wcpattern.char_at(0) != ANY_STRING { + wcpattern.insert(0, ANY_STRING); + } + if wcpattern.char_at(wcpattern.len() - 1) != ANY_STRING { + wcpattern.push(ANY_STRING); + } + } else { + wcpattern.push(ANY_STRING); + } + } + WildCardMatcher { + pattern: wcpattern, + total_matched: 0, + opts, + } + } + + fn report_matches(&mut self, arg: &wstr, streams: &mut io_streams_t) { + // Note: --all is a no-op for glob matching since the pattern is always matched + // against the entire argument. + use crate::ffi::wildcard_match; + + let subject = match self.opts.ignore_case { + true => arg.to_lowercase(), + false => arg.to_owned(), + }; + let m = wildcard_match(&subject.to_ffi(), &self.pattern.to_ffi(), false); + + if m ^ self.opts.invert_match { + self.total_matched += 1; + if !self.opts.quiet { + if self.opts.index { + streams.out.append(sprintf!("1 %lu\n", arg.len())); + } else { + streams.out.append(arg); + streams.out.append1('\n'); + } + } + } + } +} diff --git a/fish-rust/src/builtins/string/pad.rs b/fish-rust/src/builtins/string/pad.rs new file mode 100644 index 000000000..7b2ad761d --- /dev/null +++ b/fish-rust/src/builtins/string/pad.rs @@ -0,0 +1,114 @@ +use std::borrow::Cow; + +use super::*; +use crate::wutil::{fish_wcstol, fish_wcswidth}; + +pub struct Pad { + char_to_pad: char, + pad_char_width: i32, + pad_from: Direction, + width: usize, +} + +impl Default for Pad { + fn default() -> Self { + Self { + char_to_pad: ' ', + pad_char_width: 1, + pad_from: Direction::Left, + width: 0, + } + } +} + +impl StringSubCommand<'_> for Pad { + const LONG_OPTIONS: &'static [woption<'static>] = &[ + // FIXME docs say `--char`, there was no long_opt with `--char` in C++ + wopt(L!("chars"), required_argument, 'c'), + wopt(L!("right"), no_argument, 'r'), + wopt(L!("width"), required_argument, 'w'), + ]; + const SHORT_OPTIONS: &'static wstr = L!(":c:rw:"); + + fn parse_opt(&mut self, name: &wstr, c: char, arg: Option<&wstr>) -> Result<(), StringError> { + match c { + 'c' => { + let arg = arg.expect("option -c requires an argument"); + if arg.len() != 1 { + return Err(invalid_args!( + "%ls: Padding should be a character '%ls'\n", + name, + Some(arg) + )); + } + let pad_char_width = fish_wcswidth(arg.slice_to(1)); + // can we ever have negative width? + if pad_char_width == 0 { + return Err(invalid_args!( + "%ls: Invalid padding character of width zero '%ls'\n", + name, + Some(arg) + )); + } + self.pad_char_width = pad_char_width; + self.char_to_pad = arg.char_at(0); + } + 'r' => self.pad_from = Direction::Right, + 'w' => { + self.width = fish_wcstol(arg.unwrap())? + .try_into() + .map_err(|_| invalid_args!("%ls: Invalid width value '%ls'\n", name, arg))? + } + _ => return Err(StringError::UnknownOption), + } + return Ok(()); + } + + fn handle<'args>( + &mut self, + _parser: &mut parser_t, + streams: &mut io_streams_t, + optind: &mut usize, + args: &[&'args wstr], + ) -> Option { + let mut max_width = 0i32; + let mut inputs: Vec<(Cow<'args, wstr>, i32)> = Vec::new(); + let mut print_newline = true; + + for (arg, want_newline) in Arguments::new(args, optind, streams) { + let width = width_without_escapes(&arg, 0); + max_width = max_width.max(width); + inputs.push((arg, width)); + print_newline = want_newline; + } + + let pad_width = max_width.max(self.width as i32); + + for (input, width) in inputs { + use std::iter::repeat; + + let pad = (pad_width - width) / self.pad_char_width; + let remaining_width = (pad_width - width) % self.pad_char_width; + let mut padded: WString = match self.pad_from { + Direction::Left => repeat(self.char_to_pad) + .take(pad as usize) + .chain(repeat(' ').take(remaining_width as usize)) + .chain(input.chars()) + .collect(), + Direction::Right => input + .chars() + .chain(repeat(' ').take(remaining_width as usize)) + .chain(repeat(self.char_to_pad).take(pad as usize)) + .collect(), + }; + + if print_newline { + padded.push('\n'); + } + + streams.out.append(padded); + } + + STATUS_CMD_OK + } +} diff --git a/fish-rust/src/builtins/string/repeat.rs b/fish-rust/src/builtins/string/repeat.rs new file mode 100644 index 000000000..84229171f --- /dev/null +++ b/fish-rust/src/builtins/string/repeat.rs @@ -0,0 +1,145 @@ +use super::*; +use crate::wutil::fish_wcstol; + +#[derive(Default)] +pub struct Repeat { + count: usize, + max: usize, + quiet: bool, + no_newline: bool, +} + +impl StringSubCommand<'_> for Repeat { + const LONG_OPTIONS: &'static [woption<'static>] = &[ + wopt(L!("count"), required_argument, 'n'), + wopt(L!("max"), required_argument, 'm'), + wopt(L!("quiet"), no_argument, 'q'), + wopt(L!("no-newline"), no_argument, 'N'), + ]; + const SHORT_OPTIONS: &'static wstr = L!(":n:m:qN"); + + fn parse_opt(&mut self, name: &wstr, c: char, arg: Option<&wstr>) -> Result<(), StringError> { + match c { + 'n' => { + self.count = fish_wcstol(arg.unwrap())? + .try_into() + .map_err(|_| invalid_args!("%ls: Invalid count value '%ls'\n", name, arg))? + } + 'm' => { + self.max = fish_wcstol(arg.unwrap())? + .try_into() + .map_err(|_| invalid_args!("%ls: Invalid max value '%ls'\n", name, arg))? + } + 'q' => self.quiet = true, + 'N' => self.no_newline = true, + _ => return Err(StringError::UnknownOption), + } + return Ok(()); + } + + fn handle( + &mut self, + _parser: &mut parser_t, + streams: &mut io_streams_t, + optind: &mut usize, + args: &[&wstr], + ) -> Option { + if self.max == 0 && self.count == 0 { + // XXX: This used to be allowed, but returned 1. + // Keep it that way for now instead of adding an error. + // streams.err.append(L"Count or max must be greater than zero"); + return STATUS_CMD_ERROR; + } + + let mut all_empty = true; + let mut first = true; + let mut print_newline = true; + + for (w, want_newline) in Arguments::new(args, optind, streams) { + print_newline = want_newline; + if w.is_empty() { + continue; + } + + all_empty = false; + + if self.quiet { + // Early out if we can - see #7495. + return STATUS_CMD_OK; + } + + if !first { + streams.out.append1('\n'); + } + first = false; + + // The maximum size of the string is either the "max" characters, + // or it's the "count" repetitions, whichever ends up lower. + let max = if self.max == 0 + || (self.count > 0 && w.len().wrapping_mul(self.count) < self.max) + { + // TODO: we should disallow overflowing unless max <= w.len().checked_mul(self.count).unwrap_or(usize::MAX) + w.len().wrapping_mul(self.count) + } else { + self.max + }; + + // Reserve a string to avoid writing constantly. + // The 1500 here is a total gluteal extraction, but 500 seems to perform slightly worse. + let chunk_size = 1500; + // The + word length is so we don't have to hit the chunk size exactly, + // which would require us to restart in the middle of the string. + // E.g. imagine repeating "12345678". The first chunk is hit after a last "1234", + // so we would then have to restart by appending "5678", which requires a substring. + // So let's not bother. + // + // Unless of course we don't even print the entire word, in which case we just need max. + let mut chunk = WString::with_capacity(max.min(chunk_size + w.len())); + + let mut i = max; + while i > 0 { + if i >= w.len() { + chunk.push_utfstr(&w); + } else { + chunk.push_utfstr(w.slice_to(i)); + break; + } + + i -= w.len(); + + if chunk.len() >= chunk_size { + // We hit the chunk size, write it repeatedly until we can't anymore. + streams.out.append(&chunk); + while i >= chunk.len() { + streams.out.append(&chunk); + // We can easily be asked to write *a lot* of data, + // so we need to check every so often if the pipe has been closed. + // If we didn't, running `string repeat -n LARGENUMBER foo | pv` + // and pressing ctrl-c seems to hang. + if streams.out.flush_and_check_error() != STATUS_CMD_OK.unwrap() { + return STATUS_CMD_ERROR; + } + i -= chunk.len(); + } + chunk.clear(); + } + } + + // Flush the remainder. + if !chunk.is_empty() { + streams.out.append(&chunk); + } + } + + // Historical behavior is to never append a newline if all strings were empty. + if !self.quiet && !self.no_newline && !all_empty && print_newline { + streams.out.append1('\n'); + } + + if all_empty { + STATUS_CMD_ERROR + } else { + STATUS_CMD_OK + } + } +} diff --git a/fish-rust/src/builtins/string/replace.rs b/fish-rust/src/builtins/string/replace.rs new file mode 100644 index 000000000..dc936aaf0 --- /dev/null +++ b/fish-rust/src/builtins/string/replace.rs @@ -0,0 +1,251 @@ +use pcre2::utf32::{Regex, RegexBuilder}; +use std::borrow::Cow; + +use super::*; +use crate::future_feature_flags::{feature_test, FeatureFlag}; + +#[derive(Default)] +pub struct Replace<'args> { + all: bool, + filter: bool, + ignore_case: bool, + quiet: bool, + regex: bool, + pattern: &'args wstr, + replacement: &'args wstr, +} + +impl<'args> StringSubCommand<'args> for Replace<'args> { + const LONG_OPTIONS: &'static [woption<'static>] = &[ + wopt(L!("all"), no_argument, 'a'), + wopt(L!("filter"), no_argument, 'f'), + wopt(L!("ignore-case"), no_argument, 'i'), + wopt(L!("quiet"), no_argument, 'q'), + wopt(L!("regex"), no_argument, 'r'), + ]; + const SHORT_OPTIONS: &'static wstr = L!(":afiqr"); + + fn parse_opt(&mut self, _n: &wstr, c: char, _arg: Option<&wstr>) -> Result<(), StringError> { + match c { + 'a' => self.all = true, + 'f' => self.filter = true, + 'i' => self.ignore_case = true, + 'q' => self.quiet = true, + 'r' => self.regex = true, + _ => return Err(StringError::UnknownOption), + } + return Ok(()); + } + + fn take_args( + &mut self, + optind: &mut usize, + args: &[&'args wstr], + streams: &mut io_streams_t, + ) -> Option { + let cmd = args[0]; + let Some(pattern) = args.get(*optind).copied() else { + string_error!(streams, BUILTIN_ERR_ARG_COUNT0, cmd); + return STATUS_INVALID_ARGS; + }; + *optind += 1; + let Some(replacement) = args.get(*optind).copied() else { + string_error!(streams, BUILTIN_ERR_ARG_COUNT1, cmd, 1, 2); + return STATUS_INVALID_ARGS; + }; + *optind += 1; + + self.pattern = pattern; + self.replacement = replacement; + return STATUS_CMD_OK; + } + + fn handle( + &mut self, + _parser: &mut parser_t, + streams: &mut io_streams_t, + optind: &mut usize, + args: &[&wstr], + ) -> Option { + let cmd = args[0]; + + let replacer = match StringReplacer::new(self.pattern, self.replacement, self) { + Ok(x) => x, + Err(e) => { + e.print_error(args, streams); + return STATUS_INVALID_ARGS; + } + }; + + let mut replace_count = 0; + + for (arg, want_newline) in Arguments::new(args, optind, streams) { + let (replaced, result) = match replacer.replace(arg) { + Ok(x) => x, + Err(e) => { + string_error!( + streams, + "%ls: Regular expression substitute error: %ls\n", + cmd, + e.error_message() + ); + return STATUS_INVALID_ARGS; + } + }; + replace_count += replaced as usize; + + if !self.quiet && (!self.filter || replaced) { + streams.out.append(result); + if want_newline { + streams.out.append1('\n'); + } + } + + if self.quiet && replace_count > 0 { + return STATUS_CMD_OK; + } + } + + if replace_count > 0 { + STATUS_CMD_OK + } else { + STATUS_CMD_ERROR + } + } +} + +#[allow(clippy::large_enum_variant)] +enum StringReplacer<'args, 'opts> { + Regex { + replacement: WString, + regex: Regex, + opts: &'opts Replace<'args>, + }, + Literal { + pattern: Cow<'args, wstr>, + replacement: Cow<'args, wstr>, + opts: &'opts Replace<'args>, + }, +} + +impl<'args, 'opts> StringReplacer<'args, 'opts> { + fn interpret_escape(arg: &'args wstr) -> Option { + use crate::common::read_unquoted_escape; + + let mut result: WString = WString::with_capacity(arg.len()); + let mut cursor = arg; + while !cursor.is_empty() { + if cursor.char_at(0) == '\\' { + if let Some(escape_len) = read_unquoted_escape(cursor, &mut result, true, false) { + cursor = cursor.slice_from(escape_len); + } else { + // invalid escape + return None; + } + } else { + result.push(cursor.char_at(0)); + cursor = cursor.slice_from(1); + } + } + return Some(result); + } + + fn new( + pattern: &'args wstr, + replacement: &'args wstr, + opts: &'opts Replace<'args>, + ) -> Result { + let r = match (opts.regex, opts.ignore_case) { + (true, _) => { + let regex = RegexBuilder::new() + .caseless(opts.ignore_case) + // set to behave similarly to match, could probably be either enabled by default or + // allowed to be user-controlled here + .never_utf(true) + .build(pattern.as_char_slice()) + .map_err(|e| RegexError::Compile(pattern.to_owned(), e))?; + + let replacement = if feature_test(FeatureFlag::string_replace_backslash) { + replacement.to_owned() + } else { + Self::interpret_escape(replacement) + .ok_or_else(|| RegexError::InvalidEscape(pattern.to_owned()))? + }; + Self::Regex { + replacement, + regex, + opts, + } + } + (false, true) => Self::Literal { + // previously we used wcsncasecmp but there is no equivalent function in Rust widestring + // this should likely be handled by a using the `literal` option on our regex + pattern: Cow::Owned(pattern.to_lowercase()), + replacement: Cow::Owned(replacement.to_owned()), + opts, + }, + (false, false) => Self::Literal { + pattern: Cow::Borrowed(pattern), + replacement: Cow::Borrowed(replacement), + opts, + }, + }; + Ok(r) + } + + fn replace<'a>(&self, arg: Cow<'a, wstr>) -> Result<(bool, Cow<'a, wstr>), pcre2::Error> { + match self { + StringReplacer::Regex { + replacement, + regex, + opts, + } => { + let res = if opts.all { + regex.replace_all(arg.as_char_slice(), replacement.as_char_slice(), true) + } else { + regex.replace(arg.as_char_slice(), replacement.as_char_slice(), true) + }?; + + let res = match res { + Cow::Borrowed(_slice_of_arg) => (false, arg), + Cow::Owned(s) => (true, Cow::Owned(WString::from_chars(s))), + }; + return Ok(res); + } + StringReplacer::Literal { + pattern, + replacement, + opts, + } => { + if pattern.is_empty() { + return Ok((false, arg)); + } + + // a premature optimization would be to alloc larger if we have replacement.len() > pattern.len() + let mut result = WString::with_capacity(arg.len()); + + let subject = if opts.ignore_case { + arg.to_lowercase() + } else { + arg.as_ref().to_owned() + }; + + let mut offset = 0; + while let Some(idx) = subject[offset..].find(pattern.as_char_slice()) { + result.push_utfstr(&subject[offset..offset + idx]); + result.push_utfstr(&replacement); + offset += idx + pattern.len(); + if !opts.all { + break; + } + } + if offset == 0 { + return Ok((false, arg)); + } + result.push_utfstr(&arg[offset..]); + + Ok((true, Cow::Owned(result))) + } + } + } +} diff --git a/fish-rust/src/builtins/string/shorten.rs b/fish-rust/src/builtins/string/shorten.rs new file mode 100644 index 000000000..0c46ddc97 --- /dev/null +++ b/fish-rust/src/builtins/string/shorten.rs @@ -0,0 +1,249 @@ +use super::*; +use crate::common::get_ellipsis_str; +use crate::fallback::fish_wcwidth; +use crate::wcstringutil::split_string; +use crate::wutil::{fish_wcstol, fish_wcswidth}; + +pub struct Shorten<'args> { + chars_to_shorten: &'args wstr, + max: Option, + no_newline: bool, + quiet: bool, + direction: Direction, +} + +impl Default for Shorten<'_> { + fn default() -> Self { + Self { + chars_to_shorten: get_ellipsis_str(), + max: None, + no_newline: false, + quiet: false, + direction: Direction::Right, + } + } +} + +impl<'args> StringSubCommand<'args> for Shorten<'args> { + const LONG_OPTIONS: &'static [woption<'static>] = &[ + // FIXME: documentation says it's --char + wopt(L!("chars"), required_argument, 'c'), + wopt(L!("max"), required_argument, 'm'), + wopt(L!("no-newline"), no_argument, 'N'), + wopt(L!("left"), no_argument, 'l'), + wopt(L!("quiet"), no_argument, 'q'), + ]; + const SHORT_OPTIONS: &'static wstr = L!(":c:m:Nlq"); + + fn parse_opt( + &mut self, + name: &wstr, + c: char, + arg: Option<&'args wstr>, + ) -> Result<(), StringError> { + match c { + 'c' => self.chars_to_shorten = arg.expect("option --char requires an argument"), + 'm' => { + self.max = Some( + fish_wcstol(arg.unwrap())? + .try_into() + .map_err(|_| invalid_args!("%ls: Invalid max value '%ls'\n", name, arg))?, + ) + } + 'N' => self.no_newline = true, + 'l' => self.direction = Direction::Left, + 'q' => self.quiet = true, + _ => return Err(StringError::UnknownOption), + } + return Ok(()); + } + + fn handle( + &mut self, + _parser: &mut parser_t, + streams: &mut io_streams_t, + optind: &mut usize, + args: &[&wstr], + ) -> Option { + let mut min_width = usize::MAX; + let mut inputs = Vec::new(); + let mut ell = self.chars_to_shorten; + + let iter = Arguments::new(args, optind, streams); + + if self.max == Some(0) { + // Special case: Max of 0 means no shortening. + // This makes this more reusable, so you don't need special-cases like + // + // if test $shorten -gt 0 + // string shorten -m $shorten whatever + // else + // echo whatever + // end + for (arg, _) in iter { + streams.out.append(arg); + streams.out.append1('\n'); + } + return STATUS_CMD_OK; + } + + for (arg, _) in iter { + // Visible width only makes sense line-wise. + // So either we have no-newlines (which means we shorten on the first newline), + // or we handle the lines separately. + let mut splits = split_string(&arg, '\n').into_iter(); + if self.no_newline && splits.len() > 1 { + let mut s = match self.direction { + Direction::Right => splits.next(), + Direction::Left => splits.last(), + } + .unwrap(); + s.push_utfstr(ell); + let width = width_without_escapes(&s, 0); + + if width > 0 && (width as usize) < min_width { + min_width = width as usize; + } + inputs.push(s); + } else { + for s in splits { + let width = width_without_escapes(&s, 0); + if width > 0 && (width as usize) < min_width { + min_width = width as usize; + } + inputs.push(s); + } + } + } + + let ourmax: usize = self.max.unwrap_or(min_width); + + // TODO: Can we have negative width + + let ell_width: i32 = { + let w = fish_wcswidth(ell); + if w > ourmax as i32 { + // If we can't even print our ellipsis, we substitute nothing, + // truncating instead. + ell = L!(""); + 0 + } else { + w + } + }; + + let mut nsub = 0usize; + // We could also error out here if the width of our ellipsis is larger + // than the target width. + // That seems excessive - specifically because the ellipsis on LANG=C + // is "..." (width 3!). + + let skip_escapes = |l: &wstr, pos: usize| -> usize { + let mut totallen = 0usize; + while l.char_at(pos + totallen) == '\x1B' { + let Some(len) = escape_code_length(l.slice_from(pos + totallen)) else { + break; + }; + totallen += len; + } + totallen + }; + + for line in inputs { + let mut pos = 0usize; + let mut max = 0usize; + // Collect how much of the string we can use without going over the maximum. + if self.direction == Direction::Left { + // Our strategy for keeping from the end. + // This is rather unoptimized - actually going *backwards* from the end + // is extremely tricky because we would have to subtract escapes again. + // Also we need to avoid hacking combiners into bits. + // This should work for most cases considering the combiners typically have width 0. + let mut out = L!(""); + while pos < line.len() { + let w = width_without_escapes(&line, pos); + // If we're at the beginning and it fits, we sits. + // + // Otherwise we require it to fit the ellipsis + if (w <= ourmax as i32 && pos == 0) || (w + ell_width <= ourmax as i32) { + out = line.slice_from(pos); + break; + } + + pos += skip_escapes(&line, pos).max(1); + } + if self.quiet && pos != 0 { + return STATUS_CMD_OK; + } + + let output = match pos { + 0 => line, + _ => { + // We have an ellipsis, construct our string and print it. + nsub += 1; + let mut res = WString::with_capacity(ell.len() + out.len()); + res.push_utfstr(ell); + res.push_utfstr(out); + res + } + }; + streams.out.append(output); + streams.out.append1('\n'); + continue; + } else { + /* Direction::Right */ + // Going from the left. + // This is somewhat easier. + while max <= ourmax && pos < line.len() { + pos += skip_escapes(&line, pos); + let w = fish_wcwidth(line.char_at(pos)); + if w <= 0 || max + w as usize + ell_width as usize <= ourmax { + // If it still fits, even if it is the last, we add it. + max += w as usize; + pos += 1; + } else { + // We're at the limit, so see if the entire string fits. + let mut max2: usize = max + w as usize; + let mut pos2 = pos + 1; + while pos2 < line.len() { + pos2 += skip_escapes(&line, pos2); + max2 += fish_wcwidth(line.char_at(pos2)) as usize; + pos2 += 1; + } + + if max2 <= ourmax { + // We're at the end and everything fits, + // no ellipsis. + pos = pos2; + } + break; + } + } + } + + if self.quiet && pos != line.len() { + return STATUS_CMD_OK; + } + + if pos == line.len() { + streams.out.append(line); + streams.out.append1('\n'); + continue; + } + + nsub += 1; + let mut newl = line; + newl.truncate(pos); + newl.push_utfstr(ell); + newl.push('\n'); + streams.out.append(newl); + } + + // Return true if we have shortened something and false otherwise. + if nsub > 0 { + STATUS_CMD_OK + } else { + STATUS_CMD_ERROR + } + } +} diff --git a/fish-rust/src/builtins/string/split.rs b/fish-rust/src/builtins/string/split.rs new file mode 100644 index 000000000..0dbb887f9 --- /dev/null +++ b/fish-rust/src/builtins/string/split.rs @@ -0,0 +1,285 @@ +use std::borrow::Cow; +use std::ops::Deref; + +use super::*; +use crate::wcstringutil::split_about; +use crate::wutil::{fish_wcstoi, fish_wcstol}; + +pub struct Split<'args> { + quiet: bool, + split_from: Direction, + max: usize, + no_empty: bool, + fields: Fields, + allow_empty: bool, + pub is_split0: bool, + sep: &'args wstr, +} + +impl Default for Split<'_> { + fn default() -> Self { + Self { + quiet: false, + split_from: Direction::Left, + max: usize::MAX, + no_empty: false, + fields: Fields(Vec::new()), + allow_empty: false, + is_split0: false, + sep: L!("\0"), + } + } +} + +#[repr(transparent)] +struct Fields(Vec); + +// we have a newtype just for the sake of implementing TryFrom +impl Deref for Fields { + type Target = Vec; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +enum FieldParseError { + /// Unable to parse as integer + Number, + /// One of the ends in a range is either too big or small + Range, + /// The field is a valid number but outside of the allowed range + Field, +} + +impl From for FieldParseError { + fn from(_: crate::wutil::wcstoi::Error) -> Self { + FieldParseError::Number + } +} + +impl<'args> TryFrom<&'args wstr> for Fields { + type Error = FieldParseError; + + /// FIELDS is a comma-separated string of field numbers and/or spans. + /// Each field is one-indexed. + fn try_from(value: &wstr) -> Result { + fn parse_field(f: &wstr) -> Result, FieldParseError> { + use FieldParseError::*; + let range: Vec<&wstr> = f.split('-').collect(); + let range: Vec = match range[..] { + [s, e] => { + let start = fish_wcstoi(s)? as usize; + let end = fish_wcstoi(e)? as usize; + + if start == 0 || end == 0 { + return Err(Range); + } + + if start <= end { + // we store as 0-indexed, but the range is 1-indexed + (start - 1..end).collect() + } else { + // this is allowed + (end - 1..start).rev().collect() + } + } + _ => match fish_wcstoi(f)? as usize { + n @ 1.. => vec![n - 1], + _ => return Err(Field), + }, + }; + Ok(range) + } + + let fields = value.split(',').map(parse_field); + + let mut indices = Vec::new(); + for field in fields { + indices.extend(field?); + } + + Ok(Self(indices)) + } +} + +impl<'args> StringSubCommand<'args> for Split<'args> { + const LONG_OPTIONS: &'static [woption<'static>] = &[ + wopt(L!("quiet"), no_argument, 'q'), + wopt(L!("right"), no_argument, 'r'), + wopt(L!("max"), required_argument, 'm'), + wopt(L!("no-empty"), no_argument, 'n'), + wopt(L!("fields"), required_argument, 'f'), + // FIXME: allow-empty is not documented + wopt(L!("allow-empty"), no_argument, 'a'), + ]; + const SHORT_OPTIONS: &'static wstr = L!(":qrm:nf:a"); + + fn parse_opt(&mut self, name: &wstr, c: char, arg: Option<&wstr>) -> Result<(), StringError> { + match c { + 'q' => self.quiet = true, + 'r' => self.split_from = Direction::Right, + 'm' => { + self.max = fish_wcstol(arg.unwrap())? + .try_into() + .map_err(|_| invalid_args!("%ls: Invalid max value '%ls'\n", name, arg))? + } + 'n' => self.no_empty = true, + 'f' => { + self.fields = arg.unwrap().try_into().map_err(|e| match e { + FieldParseError::Number => StringError::NotANumber, + FieldParseError::Range => { + invalid_args!("%ls: Invalid range value for field '%ls'\n", name, arg) + } + FieldParseError::Field => { + invalid_args!("%ls: Invalid fields value '%ls'\n", name, arg) + } + })?; + } + 'a' => self.allow_empty = true, + _ => return Err(StringError::UnknownOption), + } + return Ok(()); + } + + fn take_args( + &mut self, + optind: &mut usize, + args: &[&'args wstr], + streams: &mut io_streams_t, + ) -> Option { + if self.is_split0 { + return STATUS_CMD_OK; + } + let Some(arg) = args.get(*optind).copied() else { + string_error!(streams, BUILTIN_ERR_ARG_COUNT0, args[0]); + return STATUS_INVALID_ARGS; + }; + *optind += 1; + self.sep = arg; + return STATUS_CMD_OK; + } + + fn handle( + &mut self, + _parser: &mut parser_t, + streams: &mut io_streams_t, + optind: &mut usize, + args: &[&'args wstr], + ) -> Option { + if self.fields.is_empty() && self.allow_empty { + streams.err.append(wgettext_fmt!( + BUILTIN_ERR_COMBO2, + args[0], + wgettext!("--allow-empty is only valid with --fields") + )); + return STATUS_INVALID_ARGS; + } + + let sep = self.sep; + let mut all_splits: Vec>> = Vec::new(); + let mut split_count = 0usize; + let mut arg_count = 0usize; + + let argiter = match self.is_split0 { + false => Arguments::new(args, optind, streams), + true => Arguments::without_splitting_on_newline(args, optind, streams), + }; + for (arg, _) in argiter { + let splits: Vec> = match (self.split_from, arg) { + (Direction::Right, arg) => { + let mut rev = arg.into_owned(); + rev.as_char_slice_mut().reverse(); + let sep: WString = sep.chars().rev().collect(); + split_about(&rev, &sep, self.max, self.no_empty) + .into_iter() + // If we are from the right, split_about gave us reversed strings, in reversed order! + .map(|s| Cow::Owned(s.chars().rev().collect::())) + .rev() + .collect() + } + // we need to special-case the Cow::Borrowed case, since + // let arg: &'args wstr = &arg; + // does not compile since `arg` can be dropped at the end of this scope + // making the reference invalid if it is owned. + (Direction::Left, Cow::Borrowed(arg)) => { + split_about(arg, sep, self.max, self.no_empty) + .into_iter() + .map(Cow::Borrowed) + .collect() + } + (Direction::Left, Cow::Owned(arg)) => { + split_about(&arg, sep, self.max, self.no_empty) + .into_iter() + .map(|s| Cow::Owned(s.to_owned())) + .collect() + } + }; + + // If we're quiet, we return early if we've found something to split. + if self.quiet && splits.len() > 1 { + return STATUS_CMD_OK; + } + split_count += splits.len(); + arg_count += 1; + all_splits.push(splits); + } + + if self.quiet { + return if split_count > arg_count { + STATUS_CMD_OK + } else { + STATUS_CMD_ERROR + }; + } + + for mut splits in all_splits { + if self.is_split0 && !splits.is_empty() { + // split0 ignores a trailing \0, so a\0b\0 is two elements. + // In contrast to split, where a\nb\n is three - "a", "b" and "". + // + // Remove the last element if it is empty. + if splits.last().unwrap().is_empty() { + splits.pop(); + } + } + + let splits = splits; + + if !self.fields.is_empty() { + // Print nothing and return error if any of the supplied + // fields do not exist, unless `--allow-empty` is used. + if !self.allow_empty { + for field in self.fields.iter() { + // we already have checked the start + if *field >= splits.len() { + return STATUS_CMD_ERROR; + } + } + } + for field in self.fields.iter() { + if let Some(val) = splits.get(*field) { + streams.out.append_with_separation( + val, + separation_type_t::explicitly, + true, + ); + } + } + } else { + for split in &splits { + streams + .out + .append_with_separation(split, separation_type_t::explicitly, true); + } + } + } + + // We split something if we have more split values than args. + return if split_count > arg_count { + STATUS_CMD_OK + } else { + STATUS_CMD_ERROR + }; + } +} diff --git a/fish-rust/src/builtins/string/sub.rs b/fish-rust/src/builtins/string/sub.rs new file mode 100644 index 000000000..bb9d92290 --- /dev/null +++ b/fish-rust/src/builtins/string/sub.rs @@ -0,0 +1,115 @@ +use std::num::NonZeroI64; + +use super::*; +use crate::wutil::fish_wcstol; + +#[derive(Default)] +pub struct Sub { + length: Option, + quiet: bool, + start: Option, + end: Option, +} + +impl StringSubCommand<'_> for Sub { + const LONG_OPTIONS: &'static [woption<'static>] = &[ + wopt(L!("length"), required_argument, 'l'), + wopt(L!("start"), required_argument, 's'), + wopt(L!("end"), required_argument, 'e'), + wopt(L!("quiet"), no_argument, 'q'), + ]; + const SHORT_OPTIONS: &'static wstr = L!(":l:qs:e:"); + + fn parse_opt(&mut self, name: &wstr, c: char, arg: Option<&wstr>) -> Result<(), StringError> { + match c { + 'l' => { + self.length = + Some(fish_wcstol(arg.unwrap())?.try_into().map_err(|_| { + invalid_args!("%ls: Invalid length value '%ls'\n", name, arg) + })?) + } + 's' => { + self.start = + Some(fish_wcstol(arg.unwrap())?.try_into().map_err(|_| { + invalid_args!("%ls: Invalid start value '%ls'\n", name, arg) + })?) + } + 'e' => { + self.end = Some( + fish_wcstol(arg.unwrap())? + .try_into() + .map_err(|_| invalid_args!("%ls: Invalid end value '%ls'\n", name, arg))?, + ) + } + 'q' => self.quiet = true, + _ => return Err(StringError::UnknownOption), + } + return Ok(()); + } + + fn handle( + &mut self, + _parser: &mut parser_t, + streams: &mut io_streams_t, + optind: &mut usize, + args: &[&wstr], + ) -> Option { + let cmd = args[0]; + if self.length.is_some() && self.end.is_some() { + streams.err.append(wgettext_fmt!( + BUILTIN_ERR_COMBO2, + cmd, + wgettext!("--end and --length are mutually exclusive") + )); + return STATUS_INVALID_ARGS; + } + + let mut nsub = 0; + for (s, want_newline) in Arguments::new(args, optind, streams) { + let start: usize = match self.start.map(i64::from).unwrap_or_default() { + n @ 1.. => n as usize - 1, + 0 => 0, + n => { + let n = u64::min(n.unsigned_abs(), usize::MAX as u64) as usize; + s.len().saturating_sub(n) + } + } + .clamp(0, s.len()); + + let count = { + let n = self + .end + .map(|e| match i64::from(e) { + // end can never be 0 + n @ 1.. => n as usize, + n => { + let n = u64::min(n.unsigned_abs(), usize::MAX as u64) as usize; + s.len().saturating_sub(n) + } + }) + .map(|n| n.saturating_sub(start)); + + self.length.or(n).unwrap_or(s.len()) + }; + + if !self.quiet { + streams + .out + .append(&s[start..usize::min(start + count, s.len())]); + if want_newline { + streams.out.append1('\n'); + } + } + nsub += 1; + if self.quiet { + return STATUS_CMD_OK; + } + } + + if nsub > 0 { + STATUS_CMD_OK + } else { + STATUS_CMD_ERROR + } + } +} diff --git a/fish-rust/src/builtins/string/transform.rs b/fish-rust/src/builtins/string/transform.rs new file mode 100644 index 000000000..eee7576eb --- /dev/null +++ b/fish-rust/src/builtins/string/transform.rs @@ -0,0 +1,49 @@ +use super::*; + +pub struct Transform { + pub quiet: bool, + pub func: fn(&wstr) -> WString, +} + +impl StringSubCommand<'_> for Transform { + const LONG_OPTIONS: &'static [woption<'static>] = &[wopt(L!("quiet"), no_argument, 'q')]; + const SHORT_OPTIONS: &'static wstr = L!(":q"); + fn parse_opt(&mut self, _n: &wstr, c: char, _arg: Option<&wstr>) -> Result<(), StringError> { + match c { + 'q' => self.quiet = true, + _ => return Err(StringError::UnknownOption), + } + return Ok(()); + } + + fn handle( + &mut self, + _parser: &mut parser_t, + streams: &mut io_streams_t, + optind: &mut usize, + args: &[&wstr], + ) -> Option { + let mut n_transformed = 0usize; + + for (arg, want_newline) in Arguments::new(args, optind, streams) { + let transformed = (self.func)(&arg); + if transformed != arg { + n_transformed += 1; + } + if !self.quiet { + streams.out.append(&transformed); + if want_newline { + streams.out.append1('\n'); + } + } else if n_transformed > 0 { + return STATUS_CMD_OK; + } + } + + if n_transformed > 0 { + STATUS_CMD_OK + } else { + STATUS_CMD_ERROR + } + } +} diff --git a/fish-rust/src/builtins/string/trim.rs b/fish-rust/src/builtins/string/trim.rs new file mode 100644 index 000000000..2d05cbd06 --- /dev/null +++ b/fish-rust/src/builtins/string/trim.rs @@ -0,0 +1,99 @@ +use super::*; + +pub struct Trim<'args> { + chars_to_trim: &'args wstr, + left: bool, + right: bool, + quiet: bool, +} + +impl Default for Trim<'_> { + fn default() -> Self { + Self { + // from " \f\n\r\t\v" + chars_to_trim: L!(" \x0C\n\r\x09\x0B"), + left: false, + right: false, + quiet: false, + } + } +} + +impl<'args> StringSubCommand<'args> for Trim<'args> { + const LONG_OPTIONS: &'static [woption<'static>] = &[ + wopt(L!("chars"), required_argument, 'c'), + wopt(L!("left"), no_argument, 'l'), + wopt(L!("right"), no_argument, 'r'), + wopt(L!("quiet"), no_argument, 'q'), + ]; + const SHORT_OPTIONS: &'static wstr = L!(":c:lrq"); + + fn parse_opt( + &mut self, + _n: &wstr, + c: char, + arg: Option<&'args wstr>, + ) -> Result<(), StringError> { + match c { + 'c' => self.chars_to_trim = arg.unwrap(), + 'l' => self.left = true, + 'r' => self.right = true, + 'q' => self.quiet = true, + _ => return Err(StringError::UnknownOption), + } + return Ok(()); + } + + fn handle( + &mut self, + _parser: &mut parser_t, + streams: &mut io_streams_t, + optind: &mut usize, + args: &[&wstr], + ) -> Option { + // If neither left or right is specified, we do both. + if !self.left && !self.right { + self.left = true; + self.right = true; + } + + let mut ntrim = 0; + + let to_trim_end = |str: &wstr| -> usize { + str.chars() + .rev() + .take_while(|&c| self.chars_to_trim.contains(c)) + .count() + }; + + let to_trim_start = |str: &wstr| -> usize { + str.chars() + .take_while(|&c| self.chars_to_trim.contains(c)) + .count() + }; + + for (arg, want_newline) in Arguments::new(args, optind, streams) { + let trim_start = self.left.then(|| to_trim_start(&arg)).unwrap_or(0); + // collision is only an issue if the whole string is getting trimmed + let trim_end = (self.right && trim_start != arg.len()) + .then(|| to_trim_end(&arg)) + .unwrap_or(0); + + ntrim += trim_start + trim_end; + if !self.quiet { + streams.out.append(&arg[trim_start..arg.len() - trim_end]); + if want_newline { + streams.out.append1('\n'); + } + } else if ntrim > 0 { + return STATUS_CMD_OK; + } + } + + if ntrim > 0 { + STATUS_CMD_OK + } else { + STATUS_CMD_ERROR + } + } +} diff --git a/fish-rust/src/builtins/string/unescape.rs b/fish-rust/src/builtins/string/unescape.rs new file mode 100644 index 000000000..fb441a4c6 --- /dev/null +++ b/fish-rust/src/builtins/string/unescape.rs @@ -0,0 +1,57 @@ +use super::*; +use crate::common::{unescape_string, UnescapeStringStyle}; + +#[derive(Default)] +pub struct Unescape { + no_quoted: bool, + style: UnescapeStringStyle, +} + +impl StringSubCommand<'_> for Unescape { + const LONG_OPTIONS: &'static [woption<'static>] = &[ + // FIXME: this flag means nothing, but was present in the C++ code + // should be removed + wopt(L!("no-quoted"), no_argument, 'n'), + wopt(L!("style"), required_argument, NONOPTION_CHAR_CODE), + ]; + const SHORT_OPTIONS: &'static wstr = L!(":n"); + + fn parse_opt(&mut self, name: &wstr, c: char, arg: Option<&wstr>) -> Result<(), StringError> { + match c { + 'n' => self.no_quoted = true, + NONOPTION_CHAR_CODE => { + self.style = arg + .unwrap() + .try_into() + .map_err(|_| invalid_args!("%ls: Invalid style value '%ls'\n", name, arg))? + } + _ => return Err(StringError::UnknownOption), + } + return Ok(()); + } + + fn handle( + &mut self, + _parser: &mut parser_t, + streams: &mut io_streams_t, + optind: &mut usize, + args: &[&wstr], + ) -> Option { + let mut nesc = 0; + for (arg, want_newline) in Arguments::new(args, optind, streams) { + if let Some(res) = unescape_string(&arg, self.style) { + streams.out.append(res); + if want_newline { + streams.out.append1('\n'); + } + nesc += 1; + } + } + + if nesc > 0 { + STATUS_CMD_OK + } else { + STATUS_CMD_ERROR + } + } +} diff --git a/fish-rust/src/builtins/tests/mod.rs b/fish-rust/src/builtins/tests/mod.rs index d718bc4f7..b25db0384 100644 --- a/fish-rust/src/builtins/tests/mod.rs +++ b/fish-rust/src/builtins/tests/mod.rs @@ -1 +1,2 @@ +mod string_tests; mod test_tests; diff --git a/fish-rust/src/builtins/tests/string_tests.rs b/fish-rust/src/builtins/tests/string_tests.rs new file mode 100644 index 000000000..920613513 --- /dev/null +++ b/fish-rust/src/builtins/tests/string_tests.rs @@ -0,0 +1,303 @@ +use crate::ffi_tests::add_test; + +add_test! {"test_string", || { + use crate::ffi::parser_t; + use crate::ffi; + use crate::builtins::string::string; + use crate::wchar_ffi::WCharFromFFI; + use crate::common::{EscapeStringStyle, escape_string}; + use crate::wchar::wstr; + use crate::wchar::L; + use crate::builtins::shared::{STATUS_CMD_ERROR,STATUS_CMD_OK, STATUS_INVALID_ARGS}; + + use crate::future_feature_flags::{scoped_test, FeatureFlag}; + + // avoid 1.3k L!()'s + macro_rules! test_cases { + ([$($x:expr),*], $rc:expr, $out:expr) => { (vec![$(L!($x)),*], $rc, L!($out)) }; + [$($x:tt),* $(,)?] => { [$(test_cases!$x),*] }; + } + + // TODO: these should be individual tests, not all in one, port when we can run these with `cargo test` + fn string_test(mut args: Vec<&wstr>, expected_rc: Option, expected_out: &wstr) { + let parser: &mut parser_t = unsafe { &mut *parser_t::principal_parser_ffi() }; + let mut streams = ffi::make_test_io_streams_ffi(); + let mut io = crate::builtins::shared::io_streams_t::new(streams.pin_mut()); + + let rc = string(parser, &mut io, args.as_mut_slice()).expect("string failed"); + + assert_eq!(expected_rc.unwrap(), rc, "string builtin returned unexpected return code"); + + let string_stream_contents = &ffi::get_test_output_ffi(&streams); + let actual = escape_string(&string_stream_contents.from_ffi(), EscapeStringStyle::default()); + let expected = escape_string(expected_out, EscapeStringStyle::default()); + assert_eq!(expected, actual, "string builtin returned unexpected output"); + } + + let tests = test_cases![ + (["string", "escape"], STATUS_CMD_ERROR, ""), + (["string", "escape", ""], STATUS_CMD_OK, "''\n"), + (["string", "escape", "-n", ""], STATUS_CMD_OK, "\n"), + (["string", "escape", "a"], STATUS_CMD_OK, "a\n"), + (["string", "escape", "\x07"], STATUS_CMD_OK, "\\cg\n"), + (["string", "escape", "\"x\""], STATUS_CMD_OK, "'\"x\"'\n"), + (["string", "escape", "hello world"], STATUS_CMD_OK, "'hello world'\n"), + (["string", "escape", "-n", "hello world"], STATUS_CMD_OK, "hello\\ world\n"), + (["string", "escape", "hello", "world"], STATUS_CMD_OK, "hello\nworld\n"), + (["string", "escape", "-n", "~"], STATUS_CMD_OK, "\\~\n"), + + (["string", "join"], STATUS_INVALID_ARGS, ""), + (["string", "join", ""], STATUS_CMD_ERROR, ""), + (["string", "join", "", "", "", ""], STATUS_CMD_OK, "\n"), + (["string", "join", "", "a", "b", "c"], STATUS_CMD_OK, "abc\n"), + (["string", "join", ".", "fishshell", "com"], STATUS_CMD_OK, "fishshell.com\n"), + (["string", "join", "/", "usr"], STATUS_CMD_ERROR, "usr\n"), + (["string", "join", "/", "usr", "local", "bin"], STATUS_CMD_OK, "usr/local/bin\n"), + (["string", "join", "...", "3", "2", "1"], STATUS_CMD_OK, "3...2...1\n"), + (["string", "join", "-q"], STATUS_INVALID_ARGS, ""), + (["string", "join", "-q", "."], STATUS_CMD_ERROR, ""), + (["string", "join", "-q", ".", "."], STATUS_CMD_ERROR, ""), + + (["string", "length"], STATUS_CMD_ERROR, ""), + (["string", "length", ""], STATUS_CMD_ERROR, "0\n"), + (["string", "length", "", "", ""], STATUS_CMD_ERROR, "0\n0\n0\n"), + (["string", "length", "a"], STATUS_CMD_OK, "1\n"), + + (["string", "length", "\u{2008A}"], STATUS_CMD_OK, "1\n"), + (["string", "length", "um", "dois", "três"], STATUS_CMD_OK, "2\n4\n4\n"), + (["string", "length", "um", "dois", "três"], STATUS_CMD_OK, "2\n4\n4\n"), + (["string", "length", "-q"], STATUS_CMD_ERROR, ""), + (["string", "length", "-q", ""], STATUS_CMD_ERROR, ""), + (["string", "length", "-q", "a"], STATUS_CMD_OK, ""), + + (["string", "match"], STATUS_INVALID_ARGS, ""), + (["string", "match", ""], STATUS_CMD_ERROR, ""), + (["string", "match", "", ""], STATUS_CMD_OK, "\n"), + (["string", "match", "?", "a"], STATUS_CMD_OK, "a\n"), + (["string", "match", "*", ""], STATUS_CMD_OK, "\n"), + (["string", "match", "**", ""], STATUS_CMD_OK, "\n"), + (["string", "match", "*", "xyzzy"], STATUS_CMD_OK, "xyzzy\n"), + (["string", "match", "**", "plugh"], STATUS_CMD_OK, "plugh\n"), + (["string", "match", "a*b", "axxb"], STATUS_CMD_OK, "axxb\n"), + (["string", "match", "a??b", "axxb"], STATUS_CMD_OK, "axxb\n"), + (["string", "match", "-i", "a??B", "axxb"], STATUS_CMD_OK, "axxb\n"), + (["string", "match", "-i", "a??b", "Axxb"], STATUS_CMD_OK, "Axxb\n"), + (["string", "match", "a*", "axxb"], STATUS_CMD_OK, "axxb\n"), + (["string", "match", "*a", "xxa"], STATUS_CMD_OK, "xxa\n"), + (["string", "match", "*a*", "axa"], STATUS_CMD_OK, "axa\n"), + (["string", "match", "*a*", "xax"], STATUS_CMD_OK, "xax\n"), + (["string", "match", "*a*", "bxa"], STATUS_CMD_OK, "bxa\n"), + (["string", "match", "*a", "a"], STATUS_CMD_OK, "a\n"), + (["string", "match", "a*", "a"], STATUS_CMD_OK, "a\n"), + (["string", "match", "a*b*c", "axxbyyc"], STATUS_CMD_OK, "axxbyyc\n"), + (["string", "match", "\\*", "*"], STATUS_CMD_OK, "*\n"), + (["string", "match", "a*\\", "abc\\"], STATUS_CMD_OK, "abc\\\n"), + (["string", "match", "a*\\?", "abc?"], STATUS_CMD_OK, "abc?\n"), + + (["string", "match", "?", ""], STATUS_CMD_ERROR, ""), + (["string", "match", "?", "ab"], STATUS_CMD_ERROR, ""), + (["string", "match", "??", "a"], STATUS_CMD_ERROR, ""), + (["string", "match", "?a", "a"], STATUS_CMD_ERROR, ""), + (["string", "match", "a?", "a"], STATUS_CMD_ERROR, ""), + (["string", "match", "a??B", "axxb"], STATUS_CMD_ERROR, ""), + (["string", "match", "a*b", "axxbc"], STATUS_CMD_ERROR, ""), + (["string", "match", "*b", "bbba"], STATUS_CMD_ERROR, ""), + (["string", "match", "0x[0-9a-fA-F][0-9a-fA-F]", "0xbad"], STATUS_CMD_ERROR, ""), + + (["string", "match", "-a", "*", "ab", "cde"], STATUS_CMD_OK, "ab\ncde\n"), + (["string", "match", "*", "ab", "cde"], STATUS_CMD_OK, "ab\ncde\n"), + (["string", "match", "-n", "*d*", "cde"], STATUS_CMD_OK, "1 3\n"), + (["string", "match", "-n", "*x*", "cde"], STATUS_CMD_ERROR, ""), + (["string", "match", "-q", "a*", "b", "c"], STATUS_CMD_ERROR, ""), + (["string", "match", "-q", "a*", "b", "a"], STATUS_CMD_OK, ""), + + (["string", "match", "-r"], STATUS_INVALID_ARGS, ""), + (["string", "match", "-r", ""], STATUS_CMD_ERROR, ""), + (["string", "match", "-r", "", ""], STATUS_CMD_OK, "\n"), + (["string", "match", "-r", ".", "a"], STATUS_CMD_OK, "a\n"), + (["string", "match", "-r", ".*", ""], STATUS_CMD_OK, "\n"), + (["string", "match", "-r", "a*b", "b"], STATUS_CMD_OK, "b\n"), + (["string", "match", "-r", "a*b", "aab"], STATUS_CMD_OK, "aab\n"), + (["string", "match", "-r", "-i", "a*b", "Aab"], STATUS_CMD_OK, "Aab\n"), + (["string", "match", "-r", "-a", "a[bc]", "abadac"], STATUS_CMD_OK, "ab\nac\n"), + (["string", "match", "-r", "a", "xaxa", "axax"], STATUS_CMD_OK, "a\na\n"), + (["string", "match", "-r", "-a", "a", "xaxa", "axax"], STATUS_CMD_OK, "a\na\na\na\n"), + (["string", "match", "-r", "a[bc]", "abadac"], STATUS_CMD_OK, "ab\n"), + (["string", "match", "-r", "-q", "a[bc]", "abadac"], STATUS_CMD_OK, ""), + (["string", "match", "-r", "-q", "a[bc]", "ad"], STATUS_CMD_ERROR, ""), + (["string", "match", "-r", "(a+)b(c)", "aabc"], STATUS_CMD_OK, "aabc\naa\nc\n"), + (["string", "match", "-r", "-a", "(a)b(c)", "abcabc"], STATUS_CMD_OK, "abc\na\nc\nabc\na\nc\n"), + (["string", "match", "-r", "(a)b(c)", "abcabc"], STATUS_CMD_OK, "abc\na\nc\n"), + (["string", "match", "-r", "(a|(z))(bc)", "abc"], STATUS_CMD_OK, "abc\na\nbc\n"), + (["string", "match", "-r", "-n", "a", "ada", "dad"], STATUS_CMD_OK, "1 1\n2 1\n"), + (["string", "match", "-r", "-n", "-a", "a", "bacadae"], STATUS_CMD_OK, "2 1\n4 1\n6 1\n"), + (["string", "match", "-r", "-n", "(a).*(b)", "a---b"], STATUS_CMD_OK, "1 5\n1 1\n5 1\n"), + (["string", "match", "-r", "-n", "(a)(b)", "ab"], STATUS_CMD_OK, "1 2\n1 1\n2 1\n"), + (["string", "match", "-r", "-n", "(a)(b)", "abab"], STATUS_CMD_OK, "1 2\n1 1\n2 1\n"), + (["string", "match", "-r", "-n", "-a", "(a)(b)", "abab"], STATUS_CMD_OK, "1 2\n1 1\n2 1\n3 2\n3 1\n4 1\n"), + (["string", "match", "-r", "*", ""], STATUS_INVALID_ARGS, ""), + (["string", "match", "-r", "-a", "a*", "b"], STATUS_CMD_OK, "\n\n"), + (["string", "match", "-r", "foo\\Kbar", "foobar"], STATUS_CMD_OK, "bar\n"), + (["string", "match", "-r", "(foo)\\Kbar", "foobar"], STATUS_CMD_OK, "bar\nfoo\n"), + (["string", "replace"], STATUS_INVALID_ARGS, ""), + (["string", "replace", ""], STATUS_INVALID_ARGS, ""), + (["string", "replace", "", ""], STATUS_CMD_ERROR, ""), + (["string", "replace", "", "", ""], STATUS_CMD_ERROR, "\n"), + (["string", "replace", "", "", " "], STATUS_CMD_ERROR, " \n"), + (["string", "replace", "a", "b", ""], STATUS_CMD_ERROR, "\n"), + (["string", "replace", "a", "b", "a"], STATUS_CMD_OK, "b\n"), + (["string", "replace", "a", "b", "xax"], STATUS_CMD_OK, "xbx\n"), + (["string", "replace", "a", "b", "xax", "axa"], STATUS_CMD_OK, "xbx\nbxa\n"), + (["string", "replace", "bar", "x", "red barn"], STATUS_CMD_OK, "red xn\n"), + (["string", "replace", "x", "bar", "red xn"], STATUS_CMD_OK, "red barn\n"), + (["string", "replace", "--", "x", "-", "xyz"], STATUS_CMD_OK, "-yz\n"), + (["string", "replace", "--", "y", "-", "xyz"], STATUS_CMD_OK, "x-z\n"), + (["string", "replace", "--", "z", "-", "xyz"], STATUS_CMD_OK, "xy-\n"), + (["string", "replace", "-i", "z", "X", "_Z_"], STATUS_CMD_OK, "_X_\n"), + (["string", "replace", "-a", "a", "A", "aaa"], STATUS_CMD_OK, "AAA\n"), + (["string", "replace", "-i", "a", "z", "AAA"], STATUS_CMD_OK, "zAA\n"), + (["string", "replace", "-q", "x", ">x<", "x"], STATUS_CMD_OK, ""), + (["string", "replace", "-a", "x", "", "xxx"], STATUS_CMD_OK, "\n"), + (["string", "replace", "-a", "***", "_", "*****"], STATUS_CMD_OK, "_**\n"), + (["string", "replace", "-a", "***", "***", "******"], STATUS_CMD_OK, "******\n"), + (["string", "replace", "-a", "a", "b", "xax", "axa"], STATUS_CMD_OK, "xbx\nbxb\n"), + + (["string", "replace", "-r"], STATUS_INVALID_ARGS, ""), + (["string", "replace", "-r", ""], STATUS_INVALID_ARGS, ""), + (["string", "replace", "-r", "", ""], STATUS_CMD_ERROR, ""), + (["string", "replace", "-r", "", "", ""], STATUS_CMD_OK, "\n"), // pcre2 behavior + (["string", "replace", "-r", "", "", " "], STATUS_CMD_OK, " \n"), // pcre2 behavior + (["string", "replace", "-r", "a", "b", ""], STATUS_CMD_ERROR, "\n"), + (["string", "replace", "-r", "a", "b", "a"], STATUS_CMD_OK, "b\n"), + (["string", "replace", "-r", ".", "x", "abc"], STATUS_CMD_OK, "xbc\n"), + (["string", "replace", "-r", ".", "", "abc"], STATUS_CMD_OK, "bc\n"), + (["string", "replace", "-r", "(\\w)(\\w)", "$2$1", "ab"], STATUS_CMD_OK, "ba\n"), + (["string", "replace", "-r", "(\\w)", "$1$1", "ab"], STATUS_CMD_OK, "aab\n"), + (["string", "replace", "-r", "-a", ".", "x", "abc"], STATUS_CMD_OK, "xxx\n"), + (["string", "replace", "-r", "-a", "(\\w)", "$1$1", "ab"], STATUS_CMD_OK, "aabb\n"), + (["string", "replace", "-r", "-a", ".", "", "abc"], STATUS_CMD_OK, "\n"), + (["string", "replace", "-r", "a", "x", "bc", "cd", "de"], STATUS_CMD_ERROR, "bc\ncd\nde\n"), + (["string", "replace", "-r", "a", "x", "aba", "caa"], STATUS_CMD_OK, "xba\ncxa\n"), + (["string", "replace", "-r", "-a", "a", "x", "aba", "caa"], STATUS_CMD_OK, "xbx\ncxx\n"), + (["string", "replace", "-r", "-i", "A", "b", "xax"], STATUS_CMD_OK, "xbx\n"), + (["string", "replace", "-r", "-i", "[a-z]", ".", "1A2B"], STATUS_CMD_OK, "1.2B\n"), + (["string", "replace", "-r", "A", "b", "xax"], STATUS_CMD_ERROR, "xax\n"), + (["string", "replace", "-r", "a", "$1", "a"], STATUS_INVALID_ARGS, ""), + (["string", "replace", "-r", "(a)", "$2", "a"], STATUS_INVALID_ARGS, ""), + (["string", "replace", "-r", "*", ".", "a"], STATUS_INVALID_ARGS, ""), + (["string", "replace", "-ra", "x", "\\c"], STATUS_CMD_ERROR, ""), + (["string", "replace", "-r", "^(.)", "\t$1", "abc", "x"], STATUS_CMD_OK, "\tabc\n\tx\n"), + + (["string", "split"], STATUS_INVALID_ARGS, ""), + (["string", "split", ":"], STATUS_CMD_ERROR, ""), + (["string", "split", ".", "www.ch.ic.ac.uk"], STATUS_CMD_OK, "www\nch\nic\nac\nuk\n"), + (["string", "split", "..", "...."], STATUS_CMD_OK, "\n\n\n"), + (["string", "split", "-m", "x", "..", "...."], STATUS_INVALID_ARGS, ""), + (["string", "split", "-m1", "..", "...."], STATUS_CMD_OK, "\n..\n"), + (["string", "split", "-m0", "/", "/usr/local/bin/fish"], STATUS_CMD_ERROR, "/usr/local/bin/fish\n"), + (["string", "split", "-m2", ":", "a:b:c:d", "e:f:g:h"], STATUS_CMD_OK, "a\nb\nc:d\ne\nf\ng:h\n"), + (["string", "split", "-m1", "-r", "/", "/usr/local/bin/fish"], STATUS_CMD_OK, "/usr/local/bin\nfish\n"), + (["string", "split", "-r", ".", "www.ch.ic.ac.uk"], STATUS_CMD_OK, "www\nch\nic\nac\nuk\n"), + (["string", "split", "--", "--", "a--b---c----d"], STATUS_CMD_OK, "a\nb\n-c\n\nd\n"), + (["string", "split", "-r", "..", "...."], STATUS_CMD_OK, "\n\n\n"), + (["string", "split", "-r", "--", "--", "a--b---c----d"], STATUS_CMD_OK, "a\nb-\nc\n\nd\n"), + (["string", "split", "", ""], STATUS_CMD_ERROR, "\n"), + (["string", "split", "", "a"], STATUS_CMD_ERROR, "a\n"), + (["string", "split", "", "ab"], STATUS_CMD_OK, "a\nb\n"), + (["string", "split", "", "abc"], STATUS_CMD_OK, "a\nb\nc\n"), + (["string", "split", "-m1", "", "abc"], STATUS_CMD_OK, "a\nbc\n"), + (["string", "split", "-r", "", ""], STATUS_CMD_ERROR, "\n"), + (["string", "split", "-r", "", "a"], STATUS_CMD_ERROR, "a\n"), + (["string", "split", "-r", "", "ab"], STATUS_CMD_OK, "a\nb\n"), + (["string", "split", "-r", "", "abc"], STATUS_CMD_OK, "a\nb\nc\n"), + (["string", "split", "-r", "-m1", "", "abc"], STATUS_CMD_OK, "ab\nc\n"), + (["string", "split", "-q"], STATUS_INVALID_ARGS, ""), + (["string", "split", "-q", ":"], STATUS_CMD_ERROR, ""), + (["string", "split", "-q", "x", "axbxc"], STATUS_CMD_OK, ""), + + (["string", "sub"], STATUS_CMD_ERROR, ""), + (["string", "sub", "abcde"], STATUS_CMD_OK, "abcde\n"), + (["string", "sub", "-l", "x", "abcde"], STATUS_INVALID_ARGS, ""), + (["string", "sub", "-s", "x", "abcde"], STATUS_INVALID_ARGS, ""), + (["string", "sub", "-l0", "abcde"], STATUS_CMD_OK, "\n"), + (["string", "sub", "-l2", "abcde"], STATUS_CMD_OK, "ab\n"), + (["string", "sub", "-l5", "abcde"], STATUS_CMD_OK, "abcde\n"), + (["string", "sub", "-l6", "abcde"], STATUS_CMD_OK, "abcde\n"), + (["string", "sub", "-l-1", "abcde"], STATUS_INVALID_ARGS, ""), + (["string", "sub", "-s0", "abcde"], STATUS_INVALID_ARGS, ""), + (["string", "sub", "-s1", "abcde"], STATUS_CMD_OK, "abcde\n"), + (["string", "sub", "-s5", "abcde"], STATUS_CMD_OK, "e\n"), + (["string", "sub", "-s6", "abcde"], STATUS_CMD_OK, "\n"), + (["string", "sub", "-s-1", "abcde"], STATUS_CMD_OK, "e\n"), + (["string", "sub", "-s-5", "abcde"], STATUS_CMD_OK, "abcde\n"), + (["string", "sub", "-s-6", "abcde"], STATUS_CMD_OK, "abcde\n"), + (["string", "sub", "-s1", "-l0", "abcde"], STATUS_CMD_OK, "\n"), + (["string", "sub", "-s1", "-l1", "abcde"], STATUS_CMD_OK, "a\n"), + (["string", "sub", "-s2", "-l2", "abcde"], STATUS_CMD_OK, "bc\n"), + (["string", "sub", "-s-1", "-l1", "abcde"], STATUS_CMD_OK, "e\n"), + (["string", "sub", "-s-1", "-l2", "abcde"], STATUS_CMD_OK, "e\n"), + (["string", "sub", "-s-3", "-l2", "abcde"], STATUS_CMD_OK, "cd\n"), + (["string", "sub", "-s-3", "-l4", "abcde"], STATUS_CMD_OK, "cde\n"), + (["string", "sub", "-q"], STATUS_CMD_ERROR, ""), + (["string", "sub", "-q", "abcde"], STATUS_CMD_OK, ""), + + (["string", "trim"], STATUS_CMD_ERROR, ""), + (["string", "trim", ""], STATUS_CMD_ERROR, "\n"), + (["string", "trim", " "], STATUS_CMD_OK, "\n"), + (["string", "trim", " \x0C\n\r\t"], STATUS_CMD_OK, "\n"), + (["string", "trim", " a"], STATUS_CMD_OK, "a\n"), + (["string", "trim", "a "], STATUS_CMD_OK, "a\n"), + (["string", "trim", " a "], STATUS_CMD_OK, "a\n"), + (["string", "trim", "-l", " a"], STATUS_CMD_OK, "a\n"), + (["string", "trim", "-l", "a "], STATUS_CMD_ERROR, "a \n"), + (["string", "trim", "-l", " a "], STATUS_CMD_OK, "a \n"), + (["string", "trim", "-r", " a"], STATUS_CMD_ERROR, " a\n"), + (["string", "trim", "-r", "a "], STATUS_CMD_OK, "a\n"), + (["string", "trim", "-r", " a "], STATUS_CMD_OK, " a\n"), + (["string", "trim", "-c", ".", " a"], STATUS_CMD_ERROR, " a\n"), + (["string", "trim", "-c", ".", "a "], STATUS_CMD_ERROR, "a \n"), + (["string", "trim", "-c", ".", " a "], STATUS_CMD_ERROR, " a \n"), + (["string", "trim", "-c", ".", ".a"], STATUS_CMD_OK, "a\n"), + (["string", "trim", "-c", ".", "a."], STATUS_CMD_OK, "a\n"), + (["string", "trim", "-c", ".", ".a."], STATUS_CMD_OK, "a\n"), + (["string", "trim", "-c", "\\/", "/a\\"], STATUS_CMD_OK, "a\n"), + (["string", "trim", "-c", "\\/", "a/"], STATUS_CMD_OK, "a\n"), + (["string", "trim", "-c", "\\/", "\\a/"], STATUS_CMD_OK, "a\n"), + (["string", "trim", "-c", "", ".a."], STATUS_CMD_ERROR, ".a.\n"), + ]; + + for (cmd, expected_status, expected_stdout) in tests { + string_test(cmd, expected_status, expected_stdout); + } + + let qmark_noglob_tests = test_cases![ + (["string", "match", "a*b?c", "axxb?c"], STATUS_CMD_OK, "axxb?c\n"), + (["string", "match", "*?", "a"], STATUS_CMD_ERROR, ""), + (["string", "match", "*?", "ab"], STATUS_CMD_ERROR, ""), + (["string", "match", "?*", "a"], STATUS_CMD_ERROR, ""), + (["string", "match", "?*", "ab"], STATUS_CMD_ERROR, ""), + (["string", "match", "a*\\?", "abc?"], STATUS_CMD_ERROR, ""), + ]; + + scoped_test(FeatureFlag::qmark_noglob, true, || { + for (cmd, expected_status, expected_stdout) in qmark_noglob_tests { + string_test(cmd, expected_status, expected_stdout); + } + }); + + let qmark_glob_tests = test_cases![ + (["string", "match", "a*b?c", "axxbyc"], STATUS_CMD_OK, "axxbyc\n"), + (["string", "match", "*?", "a"], STATUS_CMD_OK, "a\n"), + (["string", "match", "*?", "ab"], STATUS_CMD_OK, "ab\n"), + (["string", "match", "?*", "a"], STATUS_CMD_OK, "a\n"), + (["string", "match", "?*", "ab"], STATUS_CMD_OK, "ab\n"), + (["string", "match", "a*\\?", "abc?"], STATUS_CMD_OK, "abc?\n"), + ]; + + scoped_test(FeatureFlag::qmark_noglob, false, || { + for (cmd, expected_status, expected_stdout) in qmark_glob_tests { + string_test(cmd, expected_status, expected_stdout); + } + }); + +}} diff --git a/fish-rust/src/common.rs b/fish-rust/src/common.rs index 2d80ca512..b5e740277 100644 --- a/fish-rust/src/common.rs +++ b/fish-rust/src/common.rs @@ -97,6 +97,20 @@ impl Default for EscapeStringStyle { } } +impl TryFrom<&wstr> for EscapeStringStyle { + type Error = &'static wstr; + fn try_from(s: &wstr) -> Result { + use EscapeStringStyle::*; + match s { + s if s == "script" => Ok(Self::default()), + s if s == "var" => Ok(Var), + s if s == "url" => Ok(Url), + s if s == "regex" => Ok(Regex), + _ => Err(L!("Invalid escape style")), + } + } +} + bitflags! { /// Flags for the [`escape_string()`] function. These are only applicable when the escape style is /// [`EscapeStringStyle::Script`]. @@ -128,6 +142,19 @@ impl Default for UnescapeStringStyle { } } +impl TryFrom<&wstr> for UnescapeStringStyle { + type Error = &'static wstr; + fn try_from(s: &wstr) -> Result { + use UnescapeStringStyle::*; + match s { + s if s == "script" => Ok(Self::default()), + s if s == "var" => Ok(Var), + s if s == "url" => Ok(Url), + _ => Err(L!("Invalid escape style")), + } + } +} + bitflags! { /// Flags for unescape_string functions. #[derive(Default)] diff --git a/fish-rust/src/ffi.rs b/fish-rust/src/ffi.rs index f594cd5d0..266b759fc 100644 --- a/fish-rust/src/ffi.rs +++ b/fish-rust/src/ffi.rs @@ -99,6 +99,8 @@ include_cpp! { generate!("output_stream_t") generate!("io_streams_t") generate!("make_null_io_streams_ffi") + generate!("make_test_io_streams_ffi") + generate!("get_test_output_ffi") generate_pod!("RustFFIJobList") generate_pod!("RustFFIProcList") @@ -137,6 +139,7 @@ include_cpp! { generate!("set_interactive_session") generate!("screen_set_midnight_commander_hack") generate!("screen_clear_layout_cache_ffi") + generate!("escape_code_length_ffi") generate!("reader_schedule_prompt_repaint") generate!("reader_change_history") generate!("history_session_id") diff --git a/fish-rust/src/parse_util.rs b/fish-rust/src/parse_util.rs index d938aa020..f7743319f 100644 --- a/fish-rust/src/parse_util.rs +++ b/fish-rust/src/parse_util.rs @@ -25,6 +25,7 @@ use crate::tokenizer::{ TOK_SHOW_COMMENTS, }; use crate::wchar::{wstr, WString, L}; +use crate::wchar_ext::WExt; use crate::wchar_ffi::{WCharFromFFI, WCharToFFI}; use crate::wcstringutil::truncate; use crate::wildcard::{ANY_CHAR, ANY_STRING, ANY_STRING_RECURSIVE}; @@ -542,22 +543,22 @@ pub fn parse_util_get_offset(s: &wstr, line: i32, mut line_offset: usize) -> Opt /// Return the given string, unescaping wildcard characters but not performing any other character /// transformation. pub fn parse_util_unescape_wildcards(s: &wstr) -> WString { - let mut result = WString::new(); - result.reserve(s.len()); + let mut result = WString::with_capacity(s.len()); let unesc_qmark = !feature_test(FeatureFlag::qmark_noglob); - let cs = s.as_char_slice(); + let mut i = 0; - for c in cs.iter().copied() { + while i < s.len() { + let c = s.char_at(i); if c == '*' { result.push(ANY_STRING); } else if c == '?' && unesc_qmark { result.push(ANY_CHAR); - } else if c == '\\' && cs.get(i + 1) == Some(&'*') - || (unesc_qmark && c == '\\' && cs.get(i + 1) == Some(&'?')) + } else if (c == '\\' && s.char_at(i + 1) == '*') + || (unesc_qmark && c == '\\' && s.char_at(i + 1) == '?') { - result.push(cs[i + 1]); + result.push(s.char_at(i + 1)); i += 1; - } else if c == '\\' && cs.get(i + 1) == Some(&'\\') { + } else if c == '\\' && s.char_at(i + 1) == '\\' { // Not a wildcard, but ensure the next iteration doesn't see this escaped backslash. result.push_utfstr(L!("\\\\")); i += 1; diff --git a/fish-rust/src/wcstringutil.rs b/fish-rust/src/wcstringutil.rs index a068c0c8b..c45ea52e9 100644 --- a/fish-rust/src/wcstringutil.rs +++ b/fish-rust/src/wcstringutil.rs @@ -379,11 +379,11 @@ pub fn bool_from_string(x: &wstr) -> bool { pub fn split_about<'haystack>( haystack: &'haystack wstr, needle: &wstr, - max: Option, + max: usize, no_empty: bool, ) -> Vec<&'haystack wstr> { let mut output = vec![]; - let mut remaining = max.unwrap_or(i64::MAX); + let mut remaining = max; let mut haystack = haystack.as_char_slice(); while remaining > 0 && !haystack.is_empty() { let split_point = if needle.is_empty() { @@ -398,6 +398,11 @@ pub fn split_about<'haystack>( None => break, // not found } }; + + if haystack.len() == split_point { + break; + } + if !no_empty || split_point != 0 { output.push(wstr::from_char_slice(&haystack[..split_point])); } diff --git a/src/abbrs.h b/src/abbrs.h index fab82975c..b28ba1a0c 100644 --- a/src/abbrs.h +++ b/src/abbrs.h @@ -9,7 +9,6 @@ #include "common.h" #include "maybe.h" #include "parse_constants.h" -#include "re.h" #if INCLUDE_RUST_HEADERS diff --git a/src/builtin.cpp b/src/builtin.cpp index 2079a49c9..6c3a4fdcd 100644 --- a/src/builtin.cpp +++ b/src/builtin.cpp @@ -43,7 +43,6 @@ #include "builtins/set.h" #include "builtins/shared.rs.h" #include "builtins/source.h" -#include "builtins/string.h" #include "builtins/ulimit.h" #include "complete.h" #include "cxx.h" @@ -393,7 +392,7 @@ static constexpr builtin_data_t builtin_datas[] = { {L"set_color", &implemented_in_rust, N_(L"Set the terminal color")}, {L"source", &builtin_source, N_(L"Evaluate contents of file")}, {L"status", &implemented_in_rust, N_(L"Return status information about fish")}, - {L"string", &builtin_string, N_(L"Manipulate strings")}, + {L"string", &implemented_in_rust, N_(L"Manipulate strings")}, {L"switch", &builtin_generic, N_(L"Conditionally run blocks of code")}, {L"test", &implemented_in_rust, N_(L"Test a condition")}, {L"time", &builtin_generic, N_(L"Measure how long a command or block takes")}, @@ -569,6 +568,9 @@ static maybe_t try_get_rust_builtin(const wcstring &cmd) { if (cmd == L"status") { return RustBuiltin::Status; } + if (cmd == L"string") { + return RustBuiltin::String; + } if (cmd == L"test" || cmd == L"[") { return RustBuiltin::Test; } diff --git a/src/builtin.h b/src/builtin.h index 830933b36..22a8bba4f 100644 --- a/src/builtin.h +++ b/src/builtin.h @@ -131,6 +131,7 @@ enum class RustBuiltin : int32_t { Return, SetColor, Status, + String, Test, Type, Wait, diff --git a/src/builtins/string.cpp b/src/builtins/string.cpp deleted file mode 100644 index 6b7896938..000000000 --- a/src/builtins/string.cpp +++ /dev/null @@ -1,1949 +0,0 @@ -// Implementation of the string builtin. -#include "config.h" // IWYU pragma: keep - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../builtin.h" -#include "../common.h" -#include "../env.h" -#include "../fallback.h" // IWYU pragma: keep -#include "../io.h" -#include "../maybe.h" -#include "../parse_util.h" -#include "../parser.h" -#include "../re.h" -#include "../screen.h" -#include "../wcstringutil.h" -#include "../wgetopt.h" -#include "../wildcard.h" -#include "../wutil.h" // IWYU pragma: keep -#include "future_feature_flags.h" - -// Empirically determined. -// This is probably down to some pipe buffer or some such, -// but too small means we need to call `read(2)` and str2wcstring a lot. -#define STRING_CHUNK_SIZE 1024 - -namespace { - -static void string_error(io_streams_t &streams, const wchar_t *fmt, ...) { - streams.err.append(L"string "); - va_list va; - va_start(va, fmt); - streams.err.append_formatv(fmt, va); - va_end(va); -} - -static void string_unknown_option(parser_t &parser, io_streams_t &streams, const wchar_t *subcmd, - const wchar_t *opt) { - string_error(streams, BUILTIN_ERR_UNKNOWN, subcmd, opt); - builtin_print_error_trailer(parser, streams.err, L"string"); -} - -// We read from stdin if we are the second or later process in a pipeline. -static bool string_args_from_stdin(const io_streams_t &streams) { - return streams.stdin_is_directly_redirected; -} - -static const wchar_t *string_get_arg_argv(int *argidx, const wchar_t *const *argv) { - return argv && argv[*argidx] ? argv[(*argidx)++] : nullptr; -} - -// A helper type for extracting arguments from either argv or stdin. -class arg_iterator_t { - // The list of arguments passed to the string builtin. - const wchar_t *const *argv_; - // If using argv, index of the next argument to return. - int argidx_; - // If not using argv, a string to store bytes that have been read but not yet returned. - std::string buffer_; - // If set, when reading from a stream, split on newlines. - const bool split_; - // Backing storage for the next() string. - wcstring storage_; - const io_streams_t &streams_; - // If set, we have consumed all of stdin and its last line is missing a newline character. - // This is an edge case -- we expect text input, which is conventionally terminated by a - // newline character. But if it isn't, we use this to avoid creating one out of thin air, - // to not corrupt input data. - bool missing_trailing_newline = false; - - /// Reads the next argument from stdin, returning true if an argument was produced and false if - /// not. On true, the string is stored in storage_. - bool get_arg_stdin() { - assert(string_args_from_stdin(streams_) && "should not be reading from stdin"); - assert(streams_.stdin_fd >= 0 && "should have a valid fd"); - // Read in chunks from fd until buffer has a line (or the end if split_ is unset). - size_t pos; - while (!split_ || (pos = buffer_.find('\n')) == std::string::npos) { - char buf[STRING_CHUNK_SIZE]; - long n = read_blocked(streams_.stdin_fd, buf, STRING_CHUNK_SIZE); - if (n == 0) { - // If we still have buffer contents, flush them, - // in case there was no trailing sep. - if (buffer_.empty()) return false; - missing_trailing_newline = true; - storage_ = str2wcstring(buffer_); - buffer_.clear(); - return true; - } - if (n == -1) { - // Some error happened. We can't do anything about it, - // so ignore it. - // (read_blocked already retries for EAGAIN and EINTR) - storage_ = str2wcstring(buffer_); - buffer_.clear(); - return false; - } - buffer_.append(buf, n); - } - - // Split the buffer on the sep and return the first part. - storage_ = str2wcstring(buffer_, pos); - buffer_.erase(0, pos + 1); - return true; - } - - public: - arg_iterator_t(const wchar_t *const *argv, int argidx, const io_streams_t &streams, - bool split = true) - : argv_(argv), argidx_(argidx), split_(split), streams_(streams) {} - - const wcstring *nextstr() { - if (string_args_from_stdin(streams_)) { - return get_arg_stdin() ? &storage_ : nullptr; - } - if (auto arg = string_get_arg_argv(&argidx_, argv_)) { - storage_ = arg; - return &storage_; - } else { - return nullptr; - } - } - - /// Returns true if we should add a newline after printing output for the current item. - /// This is only ever false in an edge case, namely after we have consumed stdin and the - /// last line is missing a trailing newline. - bool want_newline() const { return !missing_trailing_newline; } -}; - -// This is used by the string subcommands to communicate with the option parser which flags are -// valid and get the result of parsing the command for flags. -struct options_t { //!OCLINT(too many fields) - bool all_valid = false; - bool char_to_pad_valid = false; - bool chars_to_trim_valid = false; - bool chars_to_shorten_valid = false; - bool count_valid = false; - bool entire_valid = false; - bool filter_valid = false; - bool groups_only_valid = false; - bool ignore_case_valid = false; - bool index_valid = false; - bool invert_valid = false; - bool left_valid = false; - bool length_valid = false; - bool max_valid = false; - bool no_newline_valid = false; - bool no_quoted_valid = false; - bool quiet_valid = false; - bool regex_valid = false; - bool right_valid = false; - bool start_valid = false; - bool end_valid = false; - bool style_valid = false; - bool no_empty_valid = false; - bool no_trim_newlines_valid = false; - bool fields_valid = false; - bool allow_empty_valid = false; - bool visible_valid = false; - bool width_valid = false; - - bool all = false; - bool entire = false; - bool filter = false; - bool groups_only = false; - bool ignore_case = false; - bool index = false; - bool invert_match = false; - bool left = false; - bool no_newline = false; - bool no_quoted = false; - bool quiet = false; - bool regex = false; - bool right = false; - bool no_empty = false; - bool no_trim_newlines = false; - bool allow_empty = false; - bool visible = false; - - long count = 0; - long length = 0; - long max = 0; - long start = 0; - long end = 0; - ssize_t width = 0; - - wchar_t char_to_pad = L' '; - - std::vector fields; - - const wchar_t *chars_to_trim = L" \f\n\r\t\v"; - const wchar_t *arg1 = nullptr; - const wchar_t *arg2 = nullptr; - - escape_string_style_t escape_style = STRING_STYLE_SCRIPT; -}; - -static size_t width_without_escapes(const wcstring &ins, size_t start_pos = 0) { - ssize_t width = 0; - for (size_t i = start_pos; i < ins.size(); i++) { - wchar_t c = ins[i]; - auto w = fish_wcwidth_visible(c); - // We assume that this string is on its own line, - // in which case a backslash can't bring us below 0. - if (w > 0 || width > 0) { - width += w; - } - } - - // ANSI escape sequences like \e\[31m contain printable characters. Subtract their width - // because they are not rendered. - size_t pos = start_pos; - while ((pos = ins.find('\x1B', pos)) != std::string::npos) { - auto len = escape_code_length(ins.c_str() + pos); - if (len.has_value()) { - auto sub = ins.substr(pos, *len); - for (auto c : sub) { - auto w = fish_wcwidth_visible(c); - width -= w; - } - // Move us forward behind the escape code, - // it might include a second escape! - // E.g. SGR0 ("reset") is \e\(B\e\[m in xterm. - pos += *len - 1; - } else { - pos++; - } - } - return width; -} - -/// This handles the `--style=xxx` flag. -static int handle_flag_1(const wchar_t **argv, parser_t &parser, io_streams_t &streams, - const wgetopter_t &w, options_t *opts) { - const wchar_t *cmd = argv[0]; - - if (opts->style_valid) { - if (std::wcscmp(w.woptarg, L"script") == 0) { - opts->escape_style = STRING_STYLE_SCRIPT; - } else if (std::wcscmp(w.woptarg, L"url") == 0) { - opts->escape_style = STRING_STYLE_URL; - } else if (std::wcscmp(w.woptarg, L"var") == 0) { - opts->escape_style = STRING_STYLE_VAR; - } else if (std::wcscmp(w.woptarg, L"regex") == 0) { - opts->escape_style = STRING_STYLE_REGEX; - } else { - string_error(streams, _(L"%ls: Invalid escape style '%ls'\n"), cmd, w.woptarg); - return STATUS_INVALID_ARGS; - } - return STATUS_CMD_OK; - } - - string_unknown_option(parser, streams, cmd, argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; -} - -using flag_handler_t = int (*)(const wchar_t **argv, parser_t &parser, io_streams_t &streams, - const wgetopter_t &w, options_t *opts); - -static int handle_flag_N(const wchar_t **argv, parser_t &parser, io_streams_t &streams, - const wgetopter_t &w, options_t *opts) { - if (opts->no_newline_valid) { - opts->no_newline = true; - return STATUS_CMD_OK; - } else if (opts->no_trim_newlines_valid) { - opts->no_trim_newlines = true; - return STATUS_CMD_OK; - } - string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; -} - -static int handle_flag_a(const wchar_t **argv, parser_t &parser, io_streams_t &streams, - const wgetopter_t &w, options_t *opts) { - if (opts->all_valid) { - opts->all = true; - return STATUS_CMD_OK; - } else if (opts->allow_empty_valid) { - opts->allow_empty = true; - return STATUS_CMD_OK; - } - string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; -} - -static int handle_flag_c(const wchar_t **argv, parser_t &parser, io_streams_t &streams, - const wgetopter_t &w, options_t *opts) { - if (opts->chars_to_trim_valid || opts->chars_to_shorten_valid) { - opts->chars_to_trim = w.woptarg; - return STATUS_CMD_OK; - } else if (opts->char_to_pad_valid) { - if (wcslen(w.woptarg) != 1) { - string_error(streams, _(L"%ls: Padding should be a character '%ls'\n"), argv[0], - w.woptarg); - return STATUS_INVALID_ARGS; - } - opts->char_to_pad = w.woptarg[0]; - return STATUS_CMD_OK; - } - string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; -} - -static int handle_flag_e(const wchar_t **argv, parser_t &parser, io_streams_t &streams, - const wgetopter_t &w, options_t *opts) { - if (opts->end_valid) { - opts->end = fish_wcstol(w.woptarg); - if (opts->end == 0 || opts->end == LONG_MIN || errno == ERANGE) { - string_error(streams, _(L"%ls: Invalid end value '%ls'\n"), argv[0], w.woptarg); - return STATUS_INVALID_ARGS; - } else if (errno) { - string_error(streams, BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg); - return STATUS_INVALID_ARGS; - } - return STATUS_CMD_OK; - } else if (opts->entire_valid) { - opts->entire = true; - return STATUS_CMD_OK; - } - string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; -} - -static int handle_flag_f(const wchar_t **argv, parser_t &parser, io_streams_t &streams, - const wgetopter_t &w, options_t *opts) { - if (opts->filter_valid) { - opts->filter = true; - return STATUS_CMD_OK; - } else if (opts->fields_valid) { - for (const wcstring &s : split_string(w.woptarg, L',')) { - std::vector range = split_string(s, L'-'); - if (range.size() == 2) { - int begin = fish_wcstoi(range.at(0).c_str()); - if (begin <= 0 || errno == ERANGE) { - string_error(streams, _(L"%ls: Invalid range value for field '%ls'\n"), argv[0], - w.woptarg); - return STATUS_INVALID_ARGS; - } else if (errno) { - string_error(streams, BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg); - return STATUS_INVALID_ARGS; - } - int end = fish_wcstoi(range.at(1).c_str()); - if (end <= 0 || errno == ERANGE) { - string_error(streams, _(L"%ls: Invalid range value for field '%ls'\n"), argv[0], - w.woptarg); - return STATUS_INVALID_ARGS; - } else if (errno) { - string_error(streams, BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg); - return STATUS_INVALID_ARGS; - } - if (begin <= end) { - for (int i = begin; i <= end; i++) { - opts->fields.push_back(i); - } - } else { - for (int i = begin; i >= end; i--) { - opts->fields.push_back(i); - } - } - } else { - int field = fish_wcstoi(s.c_str()); - if (field <= 0 || errno == ERANGE) { - string_error(streams, _(L"%ls: Invalid fields value '%ls'\n"), argv[0], - w.woptarg); - return STATUS_INVALID_ARGS; - } else if (errno) { - string_error(streams, BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg); - return STATUS_INVALID_ARGS; - } - opts->fields.push_back(field); - } - } - return STATUS_CMD_OK; - } - string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; -} - -static int handle_flag_g(const wchar_t **argv, parser_t &parser, io_streams_t &streams, - const wgetopter_t &w, options_t *opts) { - if (opts->groups_only_valid) { - opts->groups_only = true; - return STATUS_CMD_OK; - } - string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; -} - -static int handle_flag_i(const wchar_t **argv, parser_t &parser, io_streams_t &streams, - const wgetopter_t &w, options_t *opts) { - if (opts->ignore_case_valid) { - opts->ignore_case = true; - return STATUS_CMD_OK; - } else if (opts->index_valid) { - opts->index = true; - return STATUS_CMD_OK; - } - string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; -} - -static int handle_flag_l(const wchar_t **argv, parser_t &parser, io_streams_t &streams, - const wgetopter_t &w, options_t *opts) { - if (opts->length_valid) { - opts->length = fish_wcstol(w.woptarg); - if (opts->length < 0 || opts->length == LONG_MIN || errno == ERANGE) { - string_error(streams, _(L"%ls: Invalid length value '%ls'\n"), argv[0], w.woptarg); - return STATUS_INVALID_ARGS; - } else if (errno) { - string_error(streams, BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg); - return STATUS_INVALID_ARGS; - } - return STATUS_CMD_OK; - } else if (opts->left_valid) { - opts->left = true; - return STATUS_CMD_OK; - } - string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; -} - -static int handle_flag_m(const wchar_t **argv, parser_t &parser, io_streams_t &streams, - const wgetopter_t &w, options_t *opts) { - if (opts->max_valid) { - opts->max = fish_wcstol(w.woptarg); - if (opts->max < 0 || errno == ERANGE) { - string_error(streams, _(L"%ls: Invalid max value '%ls'\n"), argv[0], w.woptarg); - return STATUS_INVALID_ARGS; - } else if (errno) { - string_error(streams, BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg); - return STATUS_INVALID_ARGS; - } - return STATUS_CMD_OK; - } - string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; -} - -static int handle_flag_n(const wchar_t **argv, parser_t &parser, io_streams_t &streams, - const wgetopter_t &w, options_t *opts) { - if (opts->count_valid) { - opts->count = fish_wcstol(w.woptarg); - if (opts->count < 0 || errno == ERANGE) { - string_error(streams, _(L"%ls: Invalid count value '%ls'\n"), argv[0], w.woptarg); - return STATUS_INVALID_ARGS; - } else if (errno) { - string_error(streams, BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg); - return STATUS_INVALID_ARGS; - } - return STATUS_CMD_OK; - } else if (opts->index_valid) { - opts->index = true; - return STATUS_CMD_OK; - } else if (opts->no_quoted_valid) { - opts->no_quoted = true; - return STATUS_CMD_OK; - } else if (opts->no_empty_valid) { - opts->no_empty = true; - return STATUS_CMD_OK; - } - string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; -} - -static int handle_flag_q(const wchar_t **argv, parser_t &parser, io_streams_t &streams, - const wgetopter_t &w, options_t *opts) { - if (opts->quiet_valid) { - opts->quiet = true; - return STATUS_CMD_OK; - } - string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; -} - -static int handle_flag_r(const wchar_t **argv, parser_t &parser, io_streams_t &streams, - const wgetopter_t &w, options_t *opts) { - if (opts->regex_valid) { - opts->regex = true; - return STATUS_CMD_OK; - } else if (opts->right_valid) { - opts->right = true; - return STATUS_CMD_OK; - } - string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; -} - -static int handle_flag_s(const wchar_t **argv, parser_t &parser, io_streams_t &streams, - const wgetopter_t &w, options_t *opts) { - if (opts->start_valid) { - opts->start = fish_wcstol(w.woptarg); - if (opts->start == 0 || opts->start == LONG_MIN || errno == ERANGE) { - string_error(streams, _(L"%ls: Invalid start value '%ls'\n"), argv[0], w.woptarg); - return STATUS_INVALID_ARGS; - } else if (errno) { - string_error(streams, BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg); - return STATUS_INVALID_ARGS; - } - return STATUS_CMD_OK; - } - string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; -} - -static int handle_flag_v(const wchar_t **argv, parser_t &parser, io_streams_t &streams, - const wgetopter_t &w, options_t *opts) { - if (opts->invert_valid) { - opts->invert_match = true; - return STATUS_CMD_OK; - } - string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; -} - -static int handle_flag_V(const wchar_t **argv, parser_t &parser, io_streams_t &streams, - const wgetopter_t &w, options_t *opts) { - if (opts->visible_valid) { - opts->visible = true; - return STATUS_CMD_OK; - } - string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; -} - -static int handle_flag_w(const wchar_t **argv, parser_t &parser, io_streams_t &streams, - const wgetopter_t &w, options_t *opts) { - if (opts->width_valid) { - long width = fish_wcstol(w.woptarg); - if (width < 0) { - string_error(streams, _(L"%ls: Invalid width value '%ls'\n"), argv[0], w.woptarg); - return STATUS_INVALID_ARGS; - } else if (errno) { - string_error(streams, BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg); - return STATUS_INVALID_ARGS; - } - opts->width = static_cast(width); - return STATUS_CMD_OK; - } - string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; -} - -/// This constructs the wgetopt() short options string based on which arguments are valid for the -/// subcommand. We have to do this because many short flags have multiple meanings and may or may -/// not require an argument depending on the meaning. -static wcstring construct_short_opts(options_t *opts) { //!OCLINT(high npath complexity) - wcstring short_opts(L":"); - if (opts->all_valid) short_opts.append(L"a"); - if (opts->char_to_pad_valid) short_opts.append(L"c:"); - if (opts->chars_to_trim_valid) short_opts.append(L"c:"); - if (opts->chars_to_shorten_valid) short_opts.append(L"c:"); - if (opts->count_valid) short_opts.append(L"n:"); - if (opts->entire_valid) short_opts.append(L"e"); - if (opts->filter_valid) short_opts.append(L"f"); - if (opts->groups_only_valid) short_opts.append(L"g"); - if (opts->ignore_case_valid) short_opts.append(L"i"); - if (opts->index_valid) short_opts.append(L"n"); - if (opts->invert_valid) short_opts.append(L"v"); - if (opts->visible_valid) short_opts.append(L"V"); - if (opts->left_valid) short_opts.append(L"l"); - if (opts->length_valid) short_opts.append(L"l:"); - if (opts->max_valid) short_opts.append(L"m:"); - if (opts->no_newline_valid) short_opts.append(L"N"); - if (opts->no_quoted_valid) short_opts.append(L"n"); - if (opts->quiet_valid) short_opts.append(L"q"); - if (opts->regex_valid) short_opts.append(L"r"); - if (opts->right_valid) short_opts.append(L"r"); - if (opts->start_valid) short_opts.append(L"s:"); - if (opts->end_valid) short_opts.append(L"e:"); - if (opts->no_empty_valid) short_opts.append(L"n"); - if (opts->no_trim_newlines_valid) short_opts.append(L"N"); - if (opts->fields_valid) short_opts.append(L"f:"); - if (opts->allow_empty_valid) short_opts.append(L"a"); - if (opts->width_valid) short_opts.append(L"w:"); - return short_opts; -} - -// Note that several long flags share the same short flag. That is okay. The caller is expected -// to indicate that a max of one of the long flags sharing a short flag is valid. -// Remember: adjust share/completions/string.fish when `string` options change -static const struct woption long_options[] = {{L"all", no_argument, 'a'}, - {L"chars", required_argument, 'c'}, - {L"count", required_argument, 'n'}, - {L"entire", no_argument, 'e'}, - {L"end", required_argument, 'e'}, - {L"filter", no_argument, 'f'}, - {L"groups-only", no_argument, 'g'}, - {L"ignore-case", no_argument, 'i'}, - {L"index", no_argument, 'n'}, - {L"invert", no_argument, 'v'}, - {L"visible", no_argument, 'V'}, - {L"left", no_argument, 'l'}, - {L"length", required_argument, 'l'}, - {L"max", required_argument, 'm'}, - {L"no-empty", no_argument, 'n'}, - {L"no-newline", no_argument, 'N'}, - {L"no-quoted", no_argument, 'n'}, - {L"quiet", no_argument, 'q'}, - {L"regex", no_argument, 'r'}, - {L"right", no_argument, 'r'}, - {L"start", required_argument, 's'}, - {L"style", required_argument, 1}, - {L"no-trim-newlines", no_argument, 'N'}, - {L"fields", required_argument, 'f'}, - {L"allow-empty", no_argument, 'a'}, - {L"width", required_argument, 'w'}, - {}}; - -static flag_handler_t get_handler_for_flag(char c) { - // clang-format off - switch (c) { - case 'N': return handle_flag_N; - case 'a': return handle_flag_a; - case 'c': return handle_flag_c; - case 'e': return handle_flag_e; - case 'f': return handle_flag_f; - case 'g': return handle_flag_g; - case 'i': return handle_flag_i; - case 'l': return handle_flag_l; - case 'm': return handle_flag_m; - case 'n': return handle_flag_n; - case 'q': return handle_flag_q; - case 'r': return handle_flag_r; - case 's': return handle_flag_s; - case 'V': return handle_flag_V; - case 'v': return handle_flag_v; - case 'w': return handle_flag_w; - case 1 : return handle_flag_1; - default: return nullptr; - } - // clang-format on -} - -/// Parse the arguments for flags recognized by a specific string subcommand. -static int parse_opts(options_t *opts, int *optind, int n_req_args, int argc, const wchar_t **argv, - parser_t &parser, io_streams_t &streams) { - const wchar_t *cmd = argv[0]; - wcstring short_opts = construct_short_opts(opts); - const wchar_t *short_options = short_opts.c_str(); - int opt; - wgetopter_t w; - while ((opt = w.wgetopt_long(argc, argv, short_options, long_options, nullptr)) != -1) { - if (auto fn = get_handler_for_flag(opt)) { - int retval = fn(argv, parser, streams, w, opts); - if (retval != STATUS_CMD_OK) return retval; - } else if (opt == ':') { - streams.err.append(L"string "); // clone of string_error - builtin_missing_argument(parser, streams, cmd, argv[w.woptind - 1], - false /* print_hints */); - return STATUS_INVALID_ARGS; - } else if (opt == '?') { - string_unknown_option(parser, streams, cmd, argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; - } else { - DIE("unexpected retval from wgetopt_long"); - } - } - - *optind = w.woptind; - - // If the caller requires one or two mandatory args deal with that here. - if (n_req_args) { - opts->arg1 = string_get_arg_argv(optind, argv); - if (!opts->arg1 && n_req_args == 1) { - string_error(streams, BUILTIN_ERR_ARG_COUNT0, cmd); - return STATUS_INVALID_ARGS; - } - } - if (n_req_args > 1) { - opts->arg2 = string_get_arg_argv(optind, argv); - if (!opts->arg2) { - string_error(streams, BUILTIN_ERR_MIN_ARG_COUNT1, cmd, n_req_args, - !!opts->arg2 + !!opts->arg1); - return STATUS_INVALID_ARGS; - } - } - - // At this point we should not have optional args and be reading args from stdin. - if (string_args_from_stdin(streams) && argc > *optind) { - string_error(streams, BUILTIN_ERR_TOO_MANY_ARGUMENTS, cmd); - return STATUS_INVALID_ARGS; - } - - return STATUS_CMD_OK; -} - -static int string_escape(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { - options_t opts; - opts.no_quoted_valid = true; - opts.style_valid = true; - int optind; - int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams); - if (retval != STATUS_CMD_OK) return retval; - - // Currently, only the script style supports options. - // Ignore them for other styles for now. - escape_flags_t flags = 0; - if (opts.escape_style == STRING_STYLE_SCRIPT && opts.no_quoted) { - flags |= ESCAPE_NO_QUOTED; - } - - int nesc = 0; - arg_iterator_t aiter(argv, optind, streams); - while (const wcstring *arg = aiter.nextstr()) { - wcstring sep = aiter.want_newline() ? L"\n" : L""; - streams.out.append(escape_string(*arg, flags, opts.escape_style) + sep); - nesc++; - } - - return nesc > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR; - DIE("should never reach this statement"); -} - -static int string_unescape(parser_t &parser, io_streams_t &streams, int argc, - const wchar_t **argv) { - options_t opts; - opts.no_quoted_valid = true; - opts.style_valid = true; - int optind; - int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams); - int nesc = 0; - unescape_flags_t flags = 0; - - if (retval != STATUS_CMD_OK) return retval; - - arg_iterator_t aiter(argv, optind, streams); - while (const wcstring *arg = aiter.nextstr()) { - wcstring sep = aiter.want_newline() ? L"\n" : L""; - if (auto result = unescape_string(*arg, flags, opts.escape_style)) { - streams.out.append(*result + sep); - nesc++; - } - } - - return nesc > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR; - DIE("should never reach this statement"); -} - -static int string_join_maybe0(parser_t &parser, io_streams_t &streams, int argc, - const wchar_t **argv, bool is_join0) { - options_t opts; - opts.quiet_valid = true; - opts.no_empty_valid = true; - int optind; - int retval = parse_opts(&opts, &optind, is_join0 ? 0 : 1, argc, argv, parser, streams); - if (retval != STATUS_CMD_OK) return retval; - - const wcstring sep = is_join0 ? wcstring(1, L'\0') : wcstring(opts.arg1); - int nargs = 0; - arg_iterator_t aiter(argv, optind, streams); - while (const wcstring *arg = aiter.nextstr()) { - if (!opts.quiet) { - if (opts.no_empty && arg->empty()) continue; - - if (nargs > 0) { - streams.out.append(sep); - } - streams.out.append(*arg); - } else if (nargs > 1) { - return STATUS_CMD_OK; - } - nargs++; - } - if (nargs > 0 && !opts.quiet) { - if (is_join0) { - streams.out.push(L'\0'); - } else if (aiter.want_newline()) { - streams.out.push(L'\n'); - } - } - - return nargs > 1 ? STATUS_CMD_OK : STATUS_CMD_ERROR; -} - -static int string_join(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { - return string_join_maybe0(parser, streams, argc, argv, false /* is_join0 */); -} - -static int string_join0(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { - return string_join_maybe0(parser, streams, argc, argv, true /* is_join0 */); -} - -static int string_length(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { - options_t opts; - opts.quiet_valid = true; - opts.visible_valid = true; - int optind; - int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams); - if (retval != STATUS_CMD_OK) return retval; - - int nnonempty = 0; - arg_iterator_t aiter(argv, optind, streams); - while (const wcstring *arg = aiter.nextstr()) { - if (opts.visible) { - // Visible length only makes sense line-wise. - for (auto &line : split_string(*arg, L'\n')) { - size_t max = 0; - // Carriage-return returns us to the beginning. The longest substring without - // carriage-return determines the overall width. - for (auto &reset : split_string(line, L'\r')) { - size_t n = width_without_escapes(reset); - if (n > max) max = n; - } - if (max > 0) { - nnonempty++; - } - if (!opts.quiet) { - streams.out.append(to_string(max) + L"\n"); - } else if (nnonempty > 0) { - return STATUS_CMD_OK; - } - } - } else { - size_t n = arg->length(); - if (n > 0) { - nnonempty++; - } - if (!opts.quiet) { - streams.out.append(to_string(n) + L"\n"); - } else if (nnonempty > 0) { - return STATUS_CMD_OK; - } - } - } - - return nnonempty > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR; -} - -namespace { -class string_matcher_t { - protected: - const options_t opts; - int total_matched{0}; - - public: - explicit string_matcher_t(const options_t &opts_) : opts(opts_) {} - - virtual ~string_matcher_t() = default; - virtual void report_matches(const wcstring &arg, io_streams_t &streams) = 0; - int match_count() const { return total_matched; } - - virtual void import_captures(env_stack_t &) {} -}; - -class wildcard_matcher_t final : public string_matcher_t { - private: - wcstring wcpattern; - - public: - wildcard_matcher_t(const wcstring &pattern, const options_t &opts) - : string_matcher_t(opts), wcpattern(parse_util_unescape_wildcards(pattern)) { - if (opts.ignore_case) { - wcpattern = wcstolower(std::move(wcpattern)); - } - if (opts.entire) { - if (!wcpattern.empty()) { - if (wcpattern.front() != ANY_STRING) wcpattern.insert(0, 1, ANY_STRING); - if (wcpattern.back() != ANY_STRING) wcpattern.push_back(ANY_STRING); - } else { - // If the pattern is empty, this becomes one ANY_STRING that matches everything. - wcpattern.push_back(ANY_STRING); - } - } - } - - ~wildcard_matcher_t() override = default; - - void report_matches(const wcstring &arg, io_streams_t &streams) override { - // Note: --all is a no-op for glob matching since the pattern is always matched - // against the entire argument. - bool match; - - if (opts.ignore_case) { - match = wildcard_match(wcstolower(arg), wcpattern, false); - } else { - match = wildcard_match(arg, wcpattern, false); - } - if (match ^ opts.invert_match) { - total_matched++; - - if (!opts.quiet) { - if (opts.index) { - streams.out.append_format(L"1 %lu\n", arg.length()); - } else { - streams.out.append(arg + L"\n"); - } - } - } - } -}; - -// Compile a regex, printing an error on failure. -static maybe_t try_compile_regex(const wcstring &pattern, const options_t &opts, - const wchar_t *cmd, io_streams_t &streams) { - re::re_error_t error{}; - re::flags_t flags{}; - flags.icase = opts.ignore_case; - auto re = re::regex_t::try_compile(pattern, flags, &error); - if (!re) { - string_error(streams, _(L"%ls: Regular expression compile error: %ls\n"), cmd, - error.message().c_str()); - string_error(streams, L"%ls: %ls\n", cmd, pattern.c_str()); - string_error(streams, L"%ls: %*ls\n", cmd, static_cast(error.offset), L"^"); - } - return re; -} - -/// Check if a list of capture group names is valid for variables. If any are invalid then report an -/// error to \p streams. \return true if all names are valid. -static bool validate_capture_group_names(const std::vector &capture_group_names, - io_streams_t &streams) { - for (const wcstring &name : capture_group_names) { - if (env_var_t::flags_for(name.c_str()) & env_var_t::flag_read_only) { - streams.err.append_format( - L"Modification of read-only variable \"%ls\" is not allowed\n", name.c_str()); - return false; - } - } - return true; -} - -class regex_matcher_t final : public string_matcher_t { - using regex_t = re::regex_t; - using match_data_t = re::match_data_t; - using match_range_t = re::match_range_t; - - // The regex to match against. - const regex_t regex_; - - // Match data associated with the regex. - match_data_t match_data_; - - // map from group name to matched substrings, for the first argument. - std::map> first_match_captures_; - - void populate_captures_from_match(const wcstring &subject) { - for (auto &kv : first_match_captures_) { - const auto &name = kv.first; - std::vector &vals = kv.second; - - // If there are multiple named groups and --all was used, we need to ensure that - // the indexes are always in sync between the variables. If an optional named - // group didn't match but its brethren did, we need to make sure to put - // *something* in the resulting array, and unfortunately fish doesn't support - // empty/null members so we're going to have to use an empty string as the - // sentinel value. - if (maybe_t capture = - regex_.substring_for_group(match_data_, name, subject)) { - vals.push_back(capture.acquire()); - } else if (this->opts.all) { - vals.emplace_back(); - } - } - } - - enum class match_result_t { - no_match = 0, - match = 1, - }; - - match_result_t report_match(const wcstring &arg, maybe_t mrange, - io_streams_t &streams) const { - if (!mrange.has_value()) { - if (opts.invert_match && !opts.quiet) { - if (opts.index) { - streams.out.append_format(L"1 %lu\n", arg.length()); - } else { - streams.out.append(arg + L"\n"); - } - } - - return opts.invert_match ? match_result_t::match : match_result_t::no_match; - } else if (opts.invert_match) { - return match_result_t::no_match; - } - - if (opts.entire && !opts.quiet) { - streams.out.append(arg + L"\n"); - } - - // If we have groups-only, we skip the first match, which is the full one. - size_t group_count = match_data_.matched_capture_group_count(); - for (size_t j = (opts.entire || opts.groups_only ? 1 : 0); j < group_count; j++) { - maybe_t cg = this->regex_.group(match_data_, j); - if (cg.has_value() && !opts.quiet) { - if (opts.index) { - streams.out.append_format(L"%lu %lu\n", cg->begin + 1, cg->end - cg->begin); - } else { - streams.out.append(arg.substr(cg->begin, cg->end - cg->begin) + L"\n"); - } - } - } - - return opts.invert_match ? match_result_t::no_match : match_result_t::match; - } - - public: - regex_matcher_t(regex_t regex, const options_t &opts) - : string_matcher_t(opts), regex_(std::move(regex)), match_data_(regex_.prepare()) { - // Populate first_match_captures_ with the capture group names and empty lists. - for (const wcstring &name : regex_.capture_group_names()) { - first_match_captures_.emplace(name, std::vector{}); - } - } - - ~regex_matcher_t() override = default; - - void report_matches(const wcstring &arg, io_streams_t &streams) override { - using namespace re; - - match_data_.reset(); - auto rc = report_match(arg, this->regex_.match(match_data_, arg), streams); - - bool populate_captures = false; - if (rc == match_result_t::match) { - // We only populate captures for the *first matching argument*. - populate_captures = (total_matched == 0); - total_matched++; - } - - if (populate_captures) { - this->populate_captures_from_match(arg); - } - - // Report any additional matches. - if (!opts.invert_match && opts.all) { - while (auto mr = this->regex_.match(match_data_, arg)) { - auto rc = this->report_match(arg, mr, streams); - if (rc == match_result_t::match && populate_captures) { - this->populate_captures_from_match(arg); - } - } - } - } - - void import_captures(env_stack_t &vars) override { - for (auto &kv : first_match_captures_) { - const wcstring &name = kv.first; - vars.set(name, ENV_DEFAULT, std::move(kv.second)); - } - } -}; -} // namespace - -static int string_match(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { - const wchar_t *cmd = argv[0]; - - options_t opts; - opts.all_valid = true; - opts.entire_valid = true; - opts.groups_only_valid = true; - opts.ignore_case_valid = true; - opts.invert_valid = true; - opts.quiet_valid = true; - opts.regex_valid = true; - opts.index_valid = true; - int optind; - int retval = parse_opts(&opts, &optind, 1, argc, argv, parser, streams); - if (retval != STATUS_CMD_OK) return retval; - const wchar_t *pattern = opts.arg1; - - if (opts.entire && opts.index) { - streams.err.append_format(BUILTIN_ERR_COMBO2, cmd, - _(L"--entire and --index are mutually exclusive")); - return STATUS_INVALID_ARGS; - } - - if (opts.invert_match && opts.groups_only) { - streams.err.append_format(BUILTIN_ERR_COMBO2, cmd, - _(L"--invert and --groups-only are mutually exclusive")); - return STATUS_INVALID_ARGS; - } - - if (opts.entire && opts.groups_only) { - streams.err.append_format(BUILTIN_ERR_COMBO2, cmd, - _(L"--entire and --groups-only are mutually exclusive")); - return STATUS_INVALID_ARGS; - } - - std::unique_ptr matcher; - if (!opts.regex) { - // Globs cannot fail. - matcher = make_unique(pattern, opts); - } else { - // Compile the pattern as regex and validate capture group names as variables; both may - // fail. Note both try_compile_regex and validate_capture_group_names print an error on - // failure. - auto re = try_compile_regex(pattern, opts, cmd, streams); - if (!re || !validate_capture_group_names(re->capture_group_names(), streams)) { - return STATUS_INVALID_ARGS; - } - matcher = make_unique(re.acquire(), opts); - } - - assert(matcher && "Should have a matcher"); - arg_iterator_t aiter(argv, optind, streams); - while (const wcstring *arg = aiter.nextstr()) { - matcher->report_matches(*arg, streams); - if (opts.quiet && matcher->match_count() > 0) { - break; - } - } - matcher->import_captures(parser.vars()); - - return matcher->match_count() > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR; -} - -static int string_pad(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { - options_t opts; - opts.char_to_pad_valid = true; - opts.right_valid = true; - opts.width_valid = true; - int optind; - int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams); - if (retval != STATUS_CMD_OK) return retval; - - size_t pad_char_width = fish_wcwidth(opts.char_to_pad); - if (pad_char_width == 0) { - string_error(streams, _(L"%ls: Invalid padding character of width zero\n"), argv[0]); - return STATUS_INVALID_ARGS; - } - - // Pad left by default - if (!opts.right) { - opts.left = true; - } - - // Find max width of strings and keep the inputs - ssize_t max_width = 0; - std::vector inputs; - - arg_iterator_t aiter_width(argv, optind, streams); - while (const wcstring *arg = aiter_width.nextstr()) { - wcstring input_string = *arg; - ssize_t width = width_without_escapes(input_string); - if (width > max_width) max_width = width; - inputs.push_back(std::move(input_string)); - } - - ssize_t pad_width = max_width > opts.width ? max_width : opts.width; - for (auto &input : inputs) { - wcstring padded; - ssize_t padded_width = width_without_escapes(input); - if (pad_width >= padded_width) { - ssize_t pad = (pad_width - padded_width) / pad_char_width; - ssize_t remaining_width = (pad_width - padded_width) % pad_char_width; - if (opts.left) { - padded.append(pad, opts.char_to_pad); - padded.append(remaining_width, L' '); - padded.append(input); - } - if (opts.right) { - padded.append(input); - padded.append(remaining_width, L' '); - padded.append(pad, opts.char_to_pad); - } - } - if (aiter_width.want_newline()) { - padded.push_back(L'\n'); - } - streams.out.append(padded); - } - - return STATUS_CMD_OK; -} - -class string_replacer_t { - protected: - const wchar_t *argv0; - options_t opts; - int total_replaced; - io_streams_t &streams; - - public: - string_replacer_t(const wchar_t *argv0_, options_t opts_, io_streams_t &streams_) - : argv0(argv0_), opts(std::move(opts_)), total_replaced(0), streams(streams_) {} - - virtual ~string_replacer_t() = default; - int replace_count() const { return total_replaced; } - virtual bool replace_matches(const wcstring &arg, bool want_newline) = 0; -}; - -class literal_replacer_t final : public string_replacer_t { - const wcstring pattern; - const wcstring replacement; - size_t patlen; - - public: - literal_replacer_t(const wchar_t *argv0, wcstring pattern_, const wchar_t *replacement_, - const options_t &opts, io_streams_t &streams) - : string_replacer_t(argv0, opts, streams), - pattern(std::move(pattern_)), - replacement(replacement_), - patlen(pattern.length()) {} - - ~literal_replacer_t() override = default; - bool replace_matches(const wcstring &arg, bool want_newline) override; -}; - -static maybe_t interpret_escapes(const wcstring &arg) { - wcstring result; - result.reserve(arg.size()); - const wchar_t *cursor = arg.c_str(); - const wchar_t *end = cursor + arg.size(); - while (cursor < end) { - if (*cursor == L'\\') { - auto escape_len = read_unquoted_escape(cursor, &result, true, false); - if (escape_len.has_value()) { - cursor += *escape_len; - } else { - // Invalid escape. - return none(); - } - } else { - result.push_back(*cursor); - cursor++; - } - } - return result; -} - -class regex_replacer_t final : public string_replacer_t { - re::regex_t regex; - maybe_t replacement; - - public: - regex_replacer_t(const wchar_t *argv0, re::regex_t regex, const wcstring &replacement_, - const options_t &opts, io_streams_t &streams) - : string_replacer_t(argv0, opts, streams), regex(std::move(regex)) { - if (feature_test(feature_flag_t::string_replace_backslash)) { - replacement = replacement_; - } else { - replacement = interpret_escapes(replacement_); - } - } - - bool replace_matches(const wcstring &arg, bool want_newline) override; -}; - -/// A return value of true means all is well (even if no replacements were performed), false -/// indicates an unrecoverable error. -bool literal_replacer_t::replace_matches(const wcstring &arg, bool want_newline) { - wcstring result; - bool replacement_occurred = false; - - if (patlen == 0) { - replacement_occurred = true; - result = arg; - } else { - auto &cmp_func = opts.ignore_case ? wcsncasecmp : std::wcsncmp; - const wchar_t *cur = arg.c_str(); - const wchar_t *end = cur + arg.size(); - while (cur < end) { - if ((opts.all || !replacement_occurred) && - cmp_func(cur, pattern.c_str(), patlen) == 0) { - result += replacement; - cur += patlen; - replacement_occurred = true; - total_replaced++; - } else { - result.push_back(*cur); - cur++; - } - } - } - - if (!opts.quiet && (!opts.filter || replacement_occurred)) { - wcstring sep = want_newline ? L"\n" : L""; - streams.out.append(result + sep); - } - - return true; -} - -/// A return value of true means all is well (even if no replacements were performed), false -/// indicates an unrecoverable error. -bool regex_replacer_t::replace_matches(const wcstring &arg, bool want_newline) { - using namespace re; - if (!replacement) return false; // replacement was an invalid string - - sub_flags_t sflags{}; - sflags.global = opts.all; - sflags.extended = true; - - re_error_t error{}; - int repl_count{}; - maybe_t result = - this->regex.substitute(arg, *replacement, sflags, 0, &error, &repl_count); - - if (!result) { - string_error(streams, _(L"%ls: Regular expression substitute error: %ls\n"), argv0, - error.message().c_str()); - } else { - bool replacement_occurred = repl_count > 0; - if (!opts.quiet && (!opts.filter || replacement_occurred)) { - wcstring sep = want_newline ? L"\n" : L""; - streams.out.append(*result + sep); - } - total_replaced += repl_count; - } - return result.has_value(); -} - -static int string_replace(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { - options_t opts; - opts.all_valid = true; - opts.filter_valid = true; - opts.ignore_case_valid = true; - opts.quiet_valid = true; - opts.regex_valid = true; - int optind; - int retval = parse_opts(&opts, &optind, 2, argc, argv, parser, streams); - if (retval != STATUS_CMD_OK) return retval; - - const wchar_t *pattern = opts.arg1; - const wchar_t *replacement = opts.arg2; - - std::unique_ptr replacer; - if (opts.regex) { - if (auto re = try_compile_regex(pattern, opts, argv[0], streams)) { - replacer = - make_unique(argv[0], re.acquire(), replacement, opts, streams); - } else { - // try_compile_regex prints an error. - return STATUS_INVALID_ARGS; - } - } else { - replacer = make_unique(argv[0], pattern, replacement, opts, streams); - } - - arg_iterator_t aiter(argv, optind, streams); - while (const wcstring *arg = aiter.nextstr()) { - if (!replacer->replace_matches(*arg, aiter.want_newline())) return STATUS_INVALID_ARGS; - if (opts.quiet && replacer->replace_count() > 0) return STATUS_CMD_OK; - } - - return replacer->replace_count() > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR; -} - -static int string_split_maybe0(parser_t &parser, io_streams_t &streams, int argc, - const wchar_t **argv, bool is_split0) { - const wchar_t *cmd = argv[0]; - options_t opts; - opts.quiet_valid = true; - opts.right_valid = true; - opts.max_valid = true; - opts.max = LONG_MAX; - opts.no_empty_valid = true; - opts.fields_valid = true; - opts.allow_empty_valid = true; - int optind; - int retval = parse_opts(&opts, &optind, is_split0 ? 0 : 1, argc, argv, parser, streams); - if (retval != STATUS_CMD_OK) return retval; - - if (opts.fields.empty() && opts.allow_empty) { - streams.err.append_format(BUILTIN_ERR_COMBO2, cmd, - _(L"--allow-empty is only valid with --fields")); - return STATUS_INVALID_ARGS; - } - - const wcstring sep = is_split0 ? wcstring(1, L'\0') : wcstring(opts.arg1); - - std::vector> all_splits; - size_t split_count = 0; - size_t arg_count = 0; - arg_iterator_t aiter(argv, optind, streams, !is_split0); - while (const wcstring *arg = aiter.nextstr()) { - std::vector splits; - if (opts.right) { - split_about(arg->rbegin(), arg->rend(), sep.rbegin(), sep.rend(), &splits, opts.max, - opts.no_empty); - } else { - split_about(arg->begin(), arg->end(), sep.begin(), sep.end(), &splits, opts.max, - opts.no_empty); - } - all_splits.push_back(splits); - // If we're quiet, we return early if we've found something to split. - if (opts.quiet && splits.size() > 1) return STATUS_CMD_OK; - split_count += splits.size(); - arg_count++; - } - - for (auto &splits : all_splits) { - // If we are from the right, split_about gave us reversed strings, in reversed order! - if (opts.right) { - for (auto &split : splits) { - std::reverse(split.begin(), split.end()); - } - std::reverse(splits.begin(), splits.end()); - } - - if (!opts.quiet) { - if (is_split0 && !splits.empty()) { - // split0 ignores a trailing \0, so a\0b\0 is two elements. - // In contrast to split, where a\nb\n is three - "a", "b" and "". - // - // Remove the last element if it is empty. - if (splits.back().empty()) splits.pop_back(); - } - if (!opts.fields.empty()) { - // Print nothing and return error if any of the supplied - // fields do not exist, unless `--allow-empty` is used. - if (!opts.allow_empty) { - for (const auto &field : opts.fields) { - // field indexing starts from 1 - if (field - 1 >= (long)splits.size()) { - return STATUS_CMD_ERROR; - } - } - } - for (const auto &field : opts.fields) { - if (field - 1 < (long)splits.size()) { - streams.out.append_with_separation(splits.at(field - 1), - separation_type_t::explicitly, true); - } - } - } else { - for (const wcstring &split : splits) { - streams.out.append_with_separation(split, separation_type_t::explicitly, true); - } - } - } - } - // We split something if we have more split values than args. - return split_count > arg_count ? STATUS_CMD_OK : STATUS_CMD_ERROR; -} - -static int string_split(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { - return string_split_maybe0(parser, streams, argc, argv, false /* is_split0 */); -} - -static int string_split0(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { - return string_split_maybe0(parser, streams, argc, argv, true /* is_split0 */); -} - -static int string_collect(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { - options_t opts; - opts.allow_empty_valid = true; - opts.no_trim_newlines_valid = true; - int optind; - int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams); - if (retval != STATUS_CMD_OK) return retval; - - arg_iterator_t aiter(argv, optind, streams, /* don't split */ false); - size_t appended = 0; - while (const wcstring *arg = aiter.nextstr()) { - const wchar_t *s = arg->c_str(); - size_t len = arg->size(); - if (!opts.no_trim_newlines) { - while (len > 0 && s[len - 1] == L'\n') { - len -= 1; - } - } - streams.out.append_with_separation(s, len, separation_type_t::explicitly, - aiter.want_newline()); - appended += len; - } - - // If we haven't printed anything and "no_empty" is set, - // print something empty. Helps with empty ellision: - // echo (true | string collect --allow-empty)"bar" - // prints "bar". - if (opts.allow_empty && appended == 0) { - streams.out.append_with_separation( - L"", 0, separation_type_t::explicitly, - true /* historical behavior is to always print a newline */); - } - - return appended > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR; -} - -static int string_repeat(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { - options_t opts; - opts.count_valid = true; - opts.max_valid = true; - opts.quiet_valid = true; - opts.no_newline_valid = true; - int optind; - int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams); - if (retval != STATUS_CMD_OK) return retval; - if (opts.max == 0 && opts.count == 0) { - // XXX: This used to be allowed, but returned 1. - // Keep it that way for now instead of adding an error. - // streams.err.append(L"Count or max must be greater than zero"); - return STATUS_CMD_ERROR; - } - - bool all_empty = true; - bool first = true; - - arg_iterator_t aiter(argv, optind, streams); - while (const wcstring *word = aiter.nextstr()) { - // If the string is empty, there is nothing to repeat. - if (word->empty()) { - continue; - } - - all_empty = false; - if (opts.quiet) { - // Early out if we can - see #7495. - return STATUS_CMD_OK; - } - - if (!first && !opts.quiet) { - streams.out.push(L'\n'); - } - first = false; - - auto &w = *word; - - // The maximum size of the string is either the "max" characters, - // or it's the "count" repetitions, whichever ends up lower. - size_t max = opts.max; - if (max == 0 || (opts.count > 0 && w.length() * opts.count < max)) { - max = w.length() * opts.count; - } - - // Reserve a string to avoid writing constantly. - // The 1500 here is a total gluteal extraction, but 500 seems to perform slightly worse. - const size_t chunk_size = 1500; - // The + word length is so we don't have to hit the chunk size exactly, - // which would require us to restart in the middle of the string. - // E.g. imagine repeating "12345678". The first chunk is hit after a last "1234", - // so we would then have to restart by appending "5678", which requires a substring. - // So let's not bother. - // - // Unless of course we don't even print the entire word, in which case we just need max. - wcstring chunk; - chunk.reserve(std::min(chunk_size + w.length(), max)); - - for (size_t i = max; i > 0;) { - // Build up the chunk. - if (i >= w.length()) { - chunk.append(w); - } else { - chunk.append(w.substr(0, i)); - break; - } - - i -= w.length(); - - if (chunk.length() >= chunk_size) { - // We hit the chunk size, write it repeatedly until we can't anymore. - streams.out.append(chunk); - while (i >= chunk.length()) { - streams.out.append(chunk); - // We can easily be asked to write *a lot* of data, - // so we need to check every so often if the pipe has been closed. - // If we didn't, running `string repeat -n LARGENUMBER foo | pv` - // and pressing ctrl-c seems to hang. - if (streams.out.flush_and_check_error() != STATUS_CMD_OK) { - return STATUS_CMD_ERROR; - } - i -= chunk.length(); - } - chunk.clear(); - } - } - // Flush the remainder. - if (!chunk.empty()) { - streams.out.append(chunk); - } - } - - // Historical behavior is to never append a newline if all strings were empty. - if (!opts.quiet && !opts.no_newline && !all_empty && aiter.want_newline()) { - streams.out.push(L'\n'); - } - - return all_empty ? STATUS_CMD_ERROR : STATUS_CMD_OK; -} - -static int string_sub(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { - const wchar_t *cmd = argv[0]; - - options_t opts; - opts.length_valid = true; - opts.quiet_valid = true; - opts.start_valid = true; - opts.end_valid = true; - opts.length = -1; - int optind; - int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams); - if (retval != STATUS_CMD_OK) return retval; - - if (opts.length != -1 && opts.end != 0) { - streams.err.append_format(BUILTIN_ERR_COMBO2, cmd, - _(L"--end and --length are mutually exclusive")); - return STATUS_INVALID_ARGS; - } - - int nsub = 0; - arg_iterator_t aiter(argv, optind, streams); - while (const wcstring *s = aiter.nextstr()) { - using size_type = wcstring::size_type; - size_type pos = 0; - size_type count = wcstring::npos; - wcstring sep = aiter.want_newline() ? L"\n" : L""; - - if (opts.start > 0) { - pos = static_cast(opts.start - 1); - } else if (opts.start < 0) { - assert(opts.start != LONG_MIN); // checked above - auto n = static_cast(-opts.start); - pos = n > s->length() ? 0 : s->length() - n; - } - - if (pos > s->length()) { - pos = s->length(); - } - - if (opts.length >= 0) { - count = static_cast(opts.length); - } else if (opts.end != 0) { - size_type n; - if (opts.end > 0) { - n = static_cast(opts.end); - } else { - assert(opts.end != LONG_MIN); // checked above - n = static_cast(-opts.end); - n = n > s->length() ? 0 : s->length() - n; - } - count = n < pos ? 0 : n - pos; - } - - // Note that std::string permits count to extend past end of string. - if (!opts.quiet) { - streams.out.append(s->substr(pos, count) + sep); - } - nsub++; - if (opts.quiet) return STATUS_CMD_OK; - } - - return nsub > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR; -} - -static int string_shorten(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { - options_t opts; - opts.chars_to_shorten_valid = true; - opts.chars_to_trim = get_ellipsis_str(); - opts.max_valid = true; - opts.no_newline_valid = true; - opts.quiet_valid = true; - opts.max = -1; - opts.left_valid = true; - int optind; - int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams); - if (retval != STATUS_CMD_OK) return retval; - - // Find max width of strings and keep the inputs - size_t min_width = SIZE_MAX; - std::vector inputs; - wcstring ell = opts.chars_to_trim; - - auto ell_width = fish_wcswidth(ell); - - arg_iterator_t aiter_width(argv, optind, streams); - - if (opts.max == 0) { - // Special case: Max of 0 means no shortening. - // This makes this more reusable, so you don't need special-cases like - // - // if test $shorten -gt 0 - // string shorten -m $shorten whatever - // else - // echo whatever - // end - while (const wcstring *arg = aiter_width.nextstr()) { - streams.out.append(*arg + L"\n"); - } - return STATUS_CMD_ERROR; - } - - while (const wcstring *arg = aiter_width.nextstr()) { - // Visible width only makes sense line-wise. - // So either we have no-newlines (which means we shorten on the first newline), - // or we handle the lines separately. - auto splits = split_string(*arg, L'\n'); - if (opts.no_newline && splits.size() > 1) { - wcstring str = !opts.left ? splits[0] : splits[splits.size() - 1]; - str.append(ell); - ssize_t width = width_without_escapes(str); - if (width > 0 && (size_t)width < min_width) min_width = width; - inputs.push_back(str); - } else { - for (auto &input_string : splits) { - ssize_t width = width_without_escapes(input_string); - if (width > 0 && (size_t)width < min_width) min_width = width; - inputs.push_back(std::move(input_string)); - } - } - } - - // opts.max is signed for other subcommands, - // but we compare against .size() a bunch, - // this shuts the compiler up. - size_t ourmax = min_width; - if (opts.max > 0) { - ourmax = opts.max; - } - - if (ell_width > (ssize_t)ourmax) { - // If we can't even print our ellipsis, we substitute nothing, - // truncating instead. - ell = L""; - ell_width = 0; - } - - int nsub = 0; - // We could also error out here if the width of our ellipsis is larger - // than the target width. - // That seems excessive - specifically because the ellipsis on LANG=C - // is "..." (width 3!). - - auto skip_escapes = [&](const wcstring &l, size_t pos) { - size_t totallen = 0; - while (l[pos + totallen] == L'\x1B') { - auto len = escape_code_length(l.c_str() + pos + totallen); - if (!len.has_value()) break; - totallen += *len; - } - return totallen; - }; - - for (auto &line : inputs) { - size_t pos = 0; - size_t max = 0; - // Collect how much of the string we can use without going over the maximum. - if (opts.left) { - // Our strategy for keeping from the end. - // This is rather unoptimized - actually going *backwards* - // is extremely tricky because we would have to subtract escapes again. - // Also we need to avoid hacking combiners into bits. - // This should work for most cases considering the combiners typically have width 0. - wcstring out; - while (pos < line.size()) { - auto w = width_without_escapes(line, pos); - // If we're at the beginning and it fits, we sits. - // - // Otherwise we require it to fit the ellipsis - if ((w <= ourmax && pos == 0) || w + ell_width <= ourmax) { - out = line.substr(pos); - break; - } - - auto skip = skip_escapes(line, pos); - pos += skip > 0 ? skip : 1; - } - if (opts.quiet && pos != 0) { - return STATUS_CMD_OK; - } - - if (pos == 0) { - streams.out.append(line + L"\n"); - } else { - // We have an ellipsis, construct our string and print it. - nsub++; - out = ell + out + L'\n'; - streams.out.append(out); - } - continue; - } else { - // Going from the left. - // This is somewhat easier. - while (max <= ourmax && pos < line.size()) { - pos += skip_escapes(line, pos); - auto w = fish_wcwidth(line[pos]); - if (w <= 0 || max + w + ell_width <= ourmax) { - // If it still fits, even if it is the last, we add it. - max += w; - pos++; - } else { - // We're at the limit, so see if the entire string fits. - auto max2 = max + w; - auto pos2 = pos + 1; - while (pos2 < line.size()) { - pos2 += skip_escapes(line, pos2); - max2 += fish_wcwidth(line[pos2]); - pos2++; - } - - if (max2 <= ourmax) { - // We're at the end and everything fits, - // no ellipsis. - pos = pos2; - } - break; - } - } - } - - if (opts.quiet && pos != line.size()) { - return STATUS_CMD_OK; - } - - if (pos == line.size()) { - streams.out.append(line + L"\n"); - } else { - nsub++; - wcstring newl = line.substr(0, pos); - newl.append(ell); - newl.push_back(L'\n'); - streams.out.append(newl); - } - } - - // Return true if we have shortened something and false otherwise. - return nsub > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR; -} - -static int string_trim(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { - options_t opts; - opts.chars_to_trim_valid = true; - opts.left_valid = true; - opts.right_valid = true; - opts.quiet_valid = true; - int optind; - int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams); - if (retval != STATUS_CMD_OK) return retval; - - // If neither left or right is specified, we do both. - if (!opts.left && !opts.right) { - opts.left = opts.right = true; - } - - size_t ntrim = 0; - - arg_iterator_t aiter(argv, optind, streams); - while (const wcstring *arg = aiter.nextstr()) { - wcstring sep = aiter.want_newline() ? L"\n" : L""; - // Begin and end are respectively the first character to keep on the left, and first - // character to trim on the right. The length is thus end - start. - size_t begin = 0, end = arg->size(); - if (opts.right) { - size_t last_to_keep = arg->find_last_not_of(opts.chars_to_trim); - end = (last_to_keep == wcstring::npos) ? 0 : last_to_keep + 1; - } - if (opts.left) { - size_t first_to_keep = arg->find_first_not_of(opts.chars_to_trim); - begin = (first_to_keep == wcstring::npos ? end : first_to_keep); - } - assert(begin <= end && end <= arg->size()); - ntrim += arg->size() - (end - begin); - if (!opts.quiet) { - streams.out.append(wcstring(*arg, begin, end - begin) + sep); - } else if (ntrim > 0) { - return STATUS_CMD_OK; - } - } - - return ntrim > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR; -} - -// A helper function for lower and upper. -static int string_transform(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv, - std::wint_t (*func)(std::wint_t)) { - options_t opts; - opts.quiet_valid = true; - int optind; - int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams); - if (retval != STATUS_CMD_OK) return retval; - - int n_transformed = 0; - arg_iterator_t aiter(argv, optind, streams); - while (const wcstring *arg = aiter.nextstr()) { - wcstring transformed(*arg); - std::transform(transformed.begin(), transformed.end(), transformed.begin(), func); - if (transformed != *arg) n_transformed++; - if (!opts.quiet) { - wcstring sep = aiter.want_newline() ? L"\n" : L""; - streams.out.append(transformed + sep); - } else if (n_transformed > 0) { - return STATUS_CMD_OK; - } - } - - return n_transformed > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR; -} - -/// Implementation of `string lower`. -static int string_lower(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { - return string_transform(parser, streams, argc, argv, std::towlower); -} - -/// Implementation of `string upper`. -static int string_upper(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { - return string_transform(parser, streams, argc, argv, std::towupper); -} - -// Keep sorted alphabetically -static constexpr const struct string_subcommand { - const wchar_t *name; - int (*handler)(parser_t &, io_streams_t &, int argc, //!OCLINT(unused param) - const wchar_t **argv); //!OCLINT(unused param) -} string_subcommands[] = { - {L"collect", &string_collect}, {L"escape", &string_escape}, {L"join", &string_join}, - {L"join0", &string_join0}, {L"length", &string_length}, {L"lower", &string_lower}, - {L"match", &string_match}, {L"pad", &string_pad}, {L"repeat", &string_repeat}, - {L"replace", &string_replace}, {L"shorten", &string_shorten}, {L"split", &string_split}, - {L"split0", &string_split0}, {L"sub", &string_sub}, {L"trim", &string_trim}, - {L"unescape", &string_unescape}, {L"upper", &string_upper}, -}; -ASSERT_SORTED_BY_NAME(string_subcommands); -} // namespace - -/// The string builtin, for manipulating strings. -maybe_t builtin_string(parser_t &parser, io_streams_t &streams, const wchar_t **argv) { - const wchar_t *cmd = argv[0]; - int argc = builtin_count_args(argv); - if (argc <= 1) { - streams.err.append_format(BUILTIN_ERR_MISSING_SUBCMD, cmd); - builtin_print_error_trailer(parser, streams.err, L"string"); - return STATUS_INVALID_ARGS; - } - - if (std::wcscmp(argv[1], L"-h") == 0 || std::wcscmp(argv[1], L"--help") == 0) { - builtin_print_help(parser, streams, L"string"); - return STATUS_CMD_OK; - } - - const wchar_t *subcmd_name = argv[1]; - const auto *subcmd = get_by_sorted_name(subcmd_name, string_subcommands); - if (!subcmd) { - streams.err.append_format(BUILTIN_ERR_INVALID_SUBCMD, cmd, subcmd_name); - builtin_print_error_trailer(parser, streams.err, L"string"); - return STATUS_INVALID_ARGS; - } - - if (argc >= 3 && (std::wcscmp(argv[2], L"-h") == 0 || std::wcscmp(argv[2], L"--help") == 0)) { - wcstring string_dash_subcommand = wcstring(argv[0]) + L"-" + subcmd_name; - builtin_print_help(parser, streams, string_dash_subcommand.c_str()); - return STATUS_CMD_OK; - } - argc--; - argv++; - return subcmd->handler(parser, streams, argc, argv); -} diff --git a/src/builtins/string.h b/src/builtins/string.h deleted file mode 100644 index cdb933e65..000000000 --- a/src/builtins/string.h +++ /dev/null @@ -1,14 +0,0 @@ -// Prototypes for functions for executing builtin_string functions. -#ifndef FISH_BUILTIN_STRING_H -#define FISH_BUILTIN_STRING_H - -#include -#include - -#include "../io.h" -#include "../maybe.h" - -class parser_t; - -maybe_t builtin_string(parser_t &parser, io_streams_t &streams, const wchar_t **argv); -#endif diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index def1bf7b6..724f4ea98 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -89,7 +89,6 @@ #include "parser.h" #include "path.h" #include "proc.h" -#include "re.h" #include "reader.h" #include "redirection.h" #include "screen.h" @@ -4981,384 +4980,6 @@ static void test_wwrite_to_fd() { (void)remove(t); } -maybe_t builtin_string(parser_t &parser, io_streams_t &streams, const wchar_t **argv); -static void run_one_string_test(const wchar_t *const *argv_raw, int expected_rc, - const wchar_t *expected_out) { - // Copy to a null terminated array, as builtin_string may wish to rearrange our pointers. - std::vector argv_list(argv_raw, argv_raw + null_terminated_array_length(argv_raw)); - null_terminated_array_t argv(argv_list); - - parser_t &parser = parser_t::principal_parser(); - string_output_stream_t outs{}; - null_output_stream_t errs{}; - io_streams_t streams(outs, errs); - streams.stdin_is_directly_redirected = false; // read from argv instead of stdin - maybe_t rc = builtin_string(parser, streams, argv.get()); - - wcstring args; - for (const wcstring &arg : argv_list) { - args += escape_string(arg) + L' '; - } - args.resize(args.size() - 1); - - if (rc != expected_rc) { - // The comparison above would have panicked if rc didn't have a value, so it's safe to - // assume it has one here: - std::wstring got = std::to_wstring(rc.value()); - err(L"Test failed on line %lu: [%ls]: expected return code %d but got %s", __LINE__, - args.c_str(), expected_rc, got.c_str()); - } else if (outs.contents() != expected_out) { - err(L"Test failed on line %lu: [%ls]: expected [%ls] but got [%ls]", __LINE__, args.c_str(), - escape_string(expected_out).c_str(), escape_string(outs.contents()).c_str()); - } -} - -static void test_string() { - say(L"Testing builtin_string"); - const struct string_test { - const wchar_t *argv[15]; - int expected_rc; - const wchar_t *expected_out; - } string_tests[] = { // - {{L"string", L"escape", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"escape", L"", nullptr}, STATUS_CMD_OK, L"''\n"}, - {{L"string", L"escape", L"-n", L"", nullptr}, STATUS_CMD_OK, L"\n"}, - {{L"string", L"escape", L"a", nullptr}, STATUS_CMD_OK, L"a\n"}, - {{L"string", L"escape", L"\x07", nullptr}, STATUS_CMD_OK, L"\\cg\n"}, - {{L"string", L"escape", L"\"x\"", nullptr}, STATUS_CMD_OK, L"'\"x\"'\n"}, - {{L"string", L"escape", L"hello world", nullptr}, STATUS_CMD_OK, L"'hello world'\n"}, - {{L"string", L"escape", L"-n", L"hello world", nullptr}, STATUS_CMD_OK, L"hello\\ world\n"}, - {{L"string", L"escape", L"hello", L"world", nullptr}, STATUS_CMD_OK, L"hello\nworld\n"}, - {{L"string", L"escape", L"-n", L"~", nullptr}, STATUS_CMD_OK, L"\\~\n"}, - - {{L"string", L"join", nullptr}, STATUS_INVALID_ARGS, L""}, - {{L"string", L"join", L"", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"join", L"", L"", L"", L"", nullptr}, STATUS_CMD_OK, L"\n"}, - {{L"string", L"join", L"", L"a", L"b", L"c", nullptr}, STATUS_CMD_OK, L"abc\n"}, - {{L"string", L"join", L".", L"fishshell", L"com", nullptr}, - STATUS_CMD_OK, - L"fishshell.com\n"}, - {{L"string", L"join", L"/", L"usr", nullptr}, STATUS_CMD_ERROR, L"usr\n"}, - {{L"string", L"join", L"/", L"usr", L"local", L"bin", nullptr}, - STATUS_CMD_OK, - L"usr/local/bin\n"}, - {{L"string", L"join", L"...", L"3", L"2", L"1", nullptr}, STATUS_CMD_OK, L"3...2...1\n"}, - {{L"string", L"join", L"-q", nullptr}, STATUS_INVALID_ARGS, L""}, - {{L"string", L"join", L"-q", L".", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"join", L"-q", L".", L".", nullptr}, STATUS_CMD_ERROR, L""}, - - {{L"string", L"length", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"length", L"", nullptr}, STATUS_CMD_ERROR, L"0\n"}, - {{L"string", L"length", L"", L"", L"", nullptr}, STATUS_CMD_ERROR, L"0\n0\n0\n"}, - {{L"string", L"length", L"a", nullptr}, STATUS_CMD_OK, L"1\n"}, -#if WCHAR_T_BITS > 16 - {{L"string", L"length", L"\U0002008A", nullptr}, STATUS_CMD_OK, L"1\n"}, -#endif - {{L"string", L"length", L"um", L"dois", L"três", nullptr}, STATUS_CMD_OK, L"2\n4\n4\n"}, - {{L"string", L"length", L"um", L"dois", L"três", nullptr}, STATUS_CMD_OK, L"2\n4\n4\n"}, - {{L"string", L"length", L"-q", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"length", L"-q", L"", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"length", L"-q", L"a", nullptr}, STATUS_CMD_OK, L""}, - - {{L"string", L"match", nullptr}, STATUS_INVALID_ARGS, L""}, - {{L"string", L"match", L"", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"match", L"", L"", nullptr}, STATUS_CMD_OK, L"\n"}, - {{L"string", L"match", L"?", L"a", nullptr}, STATUS_CMD_OK, L"a\n"}, - {{L"string", L"match", L"*", L"", nullptr}, STATUS_CMD_OK, L"\n"}, - {{L"string", L"match", L"**", L"", nullptr}, STATUS_CMD_OK, L"\n"}, - {{L"string", L"match", L"*", L"xyzzy", nullptr}, STATUS_CMD_OK, L"xyzzy\n"}, - {{L"string", L"match", L"**", L"plugh", nullptr}, STATUS_CMD_OK, L"plugh\n"}, - {{L"string", L"match", L"a*b", L"axxb", nullptr}, STATUS_CMD_OK, L"axxb\n"}, - {{L"string", L"match", L"a??b", L"axxb", nullptr}, STATUS_CMD_OK, L"axxb\n"}, - {{L"string", L"match", L"-i", L"a??B", L"axxb", nullptr}, STATUS_CMD_OK, L"axxb\n"}, - {{L"string", L"match", L"-i", L"a??b", L"Axxb", nullptr}, STATUS_CMD_OK, L"Axxb\n"}, - {{L"string", L"match", L"a*", L"axxb", nullptr}, STATUS_CMD_OK, L"axxb\n"}, - {{L"string", L"match", L"*a", L"xxa", nullptr}, STATUS_CMD_OK, L"xxa\n"}, - {{L"string", L"match", L"*a*", L"axa", nullptr}, STATUS_CMD_OK, L"axa\n"}, - {{L"string", L"match", L"*a*", L"xax", nullptr}, STATUS_CMD_OK, L"xax\n"}, - {{L"string", L"match", L"*a*", L"bxa", nullptr}, STATUS_CMD_OK, L"bxa\n"}, - {{L"string", L"match", L"*a", L"a", nullptr}, STATUS_CMD_OK, L"a\n"}, - {{L"string", L"match", L"a*", L"a", nullptr}, STATUS_CMD_OK, L"a\n"}, - {{L"string", L"match", L"a*b*c", L"axxbyyc", nullptr}, STATUS_CMD_OK, L"axxbyyc\n"}, - {{L"string", L"match", L"\\*", L"*", nullptr}, STATUS_CMD_OK, L"*\n"}, - {{L"string", L"match", L"a*\\", L"abc\\", nullptr}, STATUS_CMD_OK, L"abc\\\n"}, - {{L"string", L"match", L"a*\\?", L"abc?", nullptr}, STATUS_CMD_OK, L"abc?\n"}, - - {{L"string", L"match", L"?", L"", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"match", L"?", L"ab", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"match", L"??", L"a", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"match", L"?a", L"a", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"match", L"a?", L"a", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"match", L"a??B", L"axxb", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"match", L"a*b", L"axxbc", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"match", L"*b", L"bbba", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"match", L"0x[0-9a-fA-F][0-9a-fA-F]", L"0xbad", nullptr}, - STATUS_CMD_ERROR, - L""}, - - {{L"string", L"match", L"-a", L"*", L"ab", L"cde", nullptr}, STATUS_CMD_OK, L"ab\ncde\n"}, - {{L"string", L"match", L"*", L"ab", L"cde", nullptr}, STATUS_CMD_OK, L"ab\ncde\n"}, - {{L"string", L"match", L"-n", L"*d*", L"cde", nullptr}, STATUS_CMD_OK, L"1 3\n"}, - {{L"string", L"match", L"-n", L"*x*", L"cde", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"match", L"-q", L"a*", L"b", L"c", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"match", L"-q", L"a*", L"b", L"a", nullptr}, STATUS_CMD_OK, L""}, - - {{L"string", L"match", L"-r", nullptr}, STATUS_INVALID_ARGS, L""}, - {{L"string", L"match", L"-r", L"", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"match", L"-r", L"", L"", nullptr}, STATUS_CMD_OK, L"\n"}, - {{L"string", L"match", L"-r", L".", L"a", nullptr}, STATUS_CMD_OK, L"a\n"}, - {{L"string", L"match", L"-r", L".*", L"", nullptr}, STATUS_CMD_OK, L"\n"}, - {{L"string", L"match", L"-r", L"a*b", L"b", nullptr}, STATUS_CMD_OK, L"b\n"}, - {{L"string", L"match", L"-r", L"a*b", L"aab", nullptr}, STATUS_CMD_OK, L"aab\n"}, - {{L"string", L"match", L"-r", L"-i", L"a*b", L"Aab", nullptr}, STATUS_CMD_OK, L"Aab\n"}, - {{L"string", L"match", L"-r", L"-a", L"a[bc]", L"abadac", nullptr}, - STATUS_CMD_OK, - L"ab\nac\n"}, - {{L"string", L"match", L"-r", L"a", L"xaxa", L"axax", nullptr}, STATUS_CMD_OK, L"a\na\n"}, - {{L"string", L"match", L"-r", L"-a", L"a", L"xaxa", L"axax", nullptr}, - STATUS_CMD_OK, - L"a\na\na\na\n"}, - {{L"string", L"match", L"-r", L"a[bc]", L"abadac", nullptr}, STATUS_CMD_OK, L"ab\n"}, - {{L"string", L"match", L"-r", L"-q", L"a[bc]", L"abadac", nullptr}, STATUS_CMD_OK, L""}, - {{L"string", L"match", L"-r", L"-q", L"a[bc]", L"ad", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"match", L"-r", L"(a+)b(c)", L"aabc", nullptr}, - STATUS_CMD_OK, - L"aabc\naa\nc\n"}, - {{L"string", L"match", L"-r", L"-a", L"(a)b(c)", L"abcabc", nullptr}, - STATUS_CMD_OK, - L"abc\na\nc\nabc\na\nc\n"}, - {{L"string", L"match", L"-r", L"(a)b(c)", L"abcabc", nullptr}, - STATUS_CMD_OK, - L"abc\na\nc\n"}, - {{L"string", L"match", L"-r", L"(a|(z))(bc)", L"abc", nullptr}, - STATUS_CMD_OK, - L"abc\na\nbc\n"}, - {{L"string", L"match", L"-r", L"-n", L"a", L"ada", L"dad", nullptr}, - STATUS_CMD_OK, - L"1 1\n2 1\n"}, - {{L"string", L"match", L"-r", L"-n", L"-a", L"a", L"bacadae", nullptr}, - STATUS_CMD_OK, - L"2 1\n4 1\n6 1\n"}, - {{L"string", L"match", L"-r", L"-n", L"(a).*(b)", L"a---b", nullptr}, - STATUS_CMD_OK, - L"1 5\n1 1\n5 1\n"}, - {{L"string", L"match", L"-r", L"-n", L"(a)(b)", L"ab", nullptr}, - STATUS_CMD_OK, - L"1 2\n1 1\n2 1\n"}, - {{L"string", L"match", L"-r", L"-n", L"(a)(b)", L"abab", nullptr}, - STATUS_CMD_OK, - L"1 2\n1 1\n2 1\n"}, - {{L"string", L"match", L"-r", L"-n", L"-a", L"(a)(b)", L"abab", nullptr}, - STATUS_CMD_OK, - L"1 2\n1 1\n2 1\n3 2\n3 1\n4 1\n"}, - {{L"string", L"match", L"-r", L"*", L"", nullptr}, STATUS_INVALID_ARGS, L""}, - {{L"string", L"match", L"-r", L"-a", L"a*", L"b", nullptr}, STATUS_CMD_OK, L"\n\n"}, - {{L"string", L"match", L"-r", L"foo\\Kbar", L"foobar", nullptr}, STATUS_CMD_OK, L"bar\n"}, - {{L"string", L"match", L"-r", L"(foo)\\Kbar", L"foobar", nullptr}, - STATUS_CMD_OK, - L"bar\nfoo\n"}, - {{L"string", L"replace", nullptr}, STATUS_INVALID_ARGS, L""}, - {{L"string", L"replace", L"", nullptr}, STATUS_INVALID_ARGS, L""}, - {{L"string", L"replace", L"", L"", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"replace", L"", L"", L"", nullptr}, STATUS_CMD_ERROR, L"\n"}, - {{L"string", L"replace", L"", L"", L" ", nullptr}, STATUS_CMD_ERROR, L" \n"}, - {{L"string", L"replace", L"a", L"b", L"", nullptr}, STATUS_CMD_ERROR, L"\n"}, - {{L"string", L"replace", L"a", L"b", L"a", nullptr}, STATUS_CMD_OK, L"b\n"}, - {{L"string", L"replace", L"a", L"b", L"xax", nullptr}, STATUS_CMD_OK, L"xbx\n"}, - {{L"string", L"replace", L"a", L"b", L"xax", L"axa", nullptr}, - STATUS_CMD_OK, - L"xbx\nbxa\n"}, - {{L"string", L"replace", L"bar", L"x", L"red barn", nullptr}, STATUS_CMD_OK, L"red xn\n"}, - {{L"string", L"replace", L"x", L"bar", L"red xn", nullptr}, STATUS_CMD_OK, L"red barn\n"}, - {{L"string", L"replace", L"--", L"x", L"-", L"xyz", nullptr}, STATUS_CMD_OK, L"-yz\n"}, - {{L"string", L"replace", L"--", L"y", L"-", L"xyz", nullptr}, STATUS_CMD_OK, L"x-z\n"}, - {{L"string", L"replace", L"--", L"z", L"-", L"xyz", nullptr}, STATUS_CMD_OK, L"xy-\n"}, - {{L"string", L"replace", L"-i", L"z", L"X", L"_Z_", nullptr}, STATUS_CMD_OK, L"_X_\n"}, - {{L"string", L"replace", L"-a", L"a", L"A", L"aaa", nullptr}, STATUS_CMD_OK, L"AAA\n"}, - {{L"string", L"replace", L"-i", L"a", L"z", L"AAA", nullptr}, STATUS_CMD_OK, L"zAA\n"}, - {{L"string", L"replace", L"-q", L"x", L">x<", L"x", nullptr}, STATUS_CMD_OK, L""}, - {{L"string", L"replace", L"-a", L"x", L"", L"xxx", nullptr}, STATUS_CMD_OK, L"\n"}, - {{L"string", L"replace", L"-a", L"***", L"_", L"*****", nullptr}, STATUS_CMD_OK, L"_**\n"}, - {{L"string", L"replace", L"-a", L"***", L"***", L"******", nullptr}, - STATUS_CMD_OK, - L"******\n"}, - {{L"string", L"replace", L"-a", L"a", L"b", L"xax", L"axa", nullptr}, - STATUS_CMD_OK, - L"xbx\nbxb\n"}, - - {{L"string", L"replace", L"-r", nullptr}, STATUS_INVALID_ARGS, L""}, - {{L"string", L"replace", L"-r", L"", nullptr}, STATUS_INVALID_ARGS, L""}, - {{L"string", L"replace", L"-r", L"", L"", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"replace", L"-r", L"", L"", L"", nullptr}, - STATUS_CMD_OK, - L"\n"}, // pcre2 behavior - {{L"string", L"replace", L"-r", L"", L"", L" ", nullptr}, - STATUS_CMD_OK, - L" \n"}, // pcre2 behavior - {{L"string", L"replace", L"-r", L"a", L"b", L"", nullptr}, STATUS_CMD_ERROR, L"\n"}, - {{L"string", L"replace", L"-r", L"a", L"b", L"a", nullptr}, STATUS_CMD_OK, L"b\n"}, - {{L"string", L"replace", L"-r", L".", L"x", L"abc", nullptr}, STATUS_CMD_OK, L"xbc\n"}, - {{L"string", L"replace", L"-r", L".", L"", L"abc", nullptr}, STATUS_CMD_OK, L"bc\n"}, - {{L"string", L"replace", L"-r", L"(\\w)(\\w)", L"$2$1", L"ab", nullptr}, - STATUS_CMD_OK, - L"ba\n"}, - {{L"string", L"replace", L"-r", L"(\\w)", L"$1$1", L"ab", nullptr}, - STATUS_CMD_OK, - L"aab\n"}, - {{L"string", L"replace", L"-r", L"-a", L".", L"x", L"abc", nullptr}, - STATUS_CMD_OK, - L"xxx\n"}, - {{L"string", L"replace", L"-r", L"-a", L"(\\w)", L"$1$1", L"ab", nullptr}, - STATUS_CMD_OK, - L"aabb\n"}, - {{L"string", L"replace", L"-r", L"-a", L".", L"", L"abc", nullptr}, STATUS_CMD_OK, L"\n"}, - {{L"string", L"replace", L"-r", L"a", L"x", L"bc", L"cd", L"de", nullptr}, - STATUS_CMD_ERROR, - L"bc\ncd\nde\n"}, - {{L"string", L"replace", L"-r", L"a", L"x", L"aba", L"caa", nullptr}, - STATUS_CMD_OK, - L"xba\ncxa\n"}, - {{L"string", L"replace", L"-r", L"-a", L"a", L"x", L"aba", L"caa", nullptr}, - STATUS_CMD_OK, - L"xbx\ncxx\n"}, - {{L"string", L"replace", L"-r", L"-i", L"A", L"b", L"xax", nullptr}, - STATUS_CMD_OK, - L"xbx\n"}, - {{L"string", L"replace", L"-r", L"-i", L"[a-z]", L".", L"1A2B", nullptr}, - STATUS_CMD_OK, - L"1.2B\n"}, - {{L"string", L"replace", L"-r", L"A", L"b", L"xax", nullptr}, STATUS_CMD_ERROR, L"xax\n"}, - {{L"string", L"replace", L"-r", L"a", L"$1", L"a", nullptr}, STATUS_INVALID_ARGS, L""}, - {{L"string", L"replace", L"-r", L"(a)", L"$2", L"a", nullptr}, STATUS_INVALID_ARGS, L""}, - {{L"string", L"replace", L"-r", L"*", L".", L"a", nullptr}, STATUS_INVALID_ARGS, L""}, - {{L"string", L"replace", L"-ra", L"x", L"\\c", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"replace", L"-r", L"^(.)", L"\t$1", L"abc", L"x", nullptr}, - STATUS_CMD_OK, - L"\tabc\n\tx\n"}, - - {{L"string", L"split", nullptr}, STATUS_INVALID_ARGS, L""}, - {{L"string", L"split", L":", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"split", L".", L"www.ch.ic.ac.uk", nullptr}, - STATUS_CMD_OK, - L"www\nch\nic\nac\nuk\n"}, - {{L"string", L"split", L"..", L"....", nullptr}, STATUS_CMD_OK, L"\n\n\n"}, - {{L"string", L"split", L"-m", L"x", L"..", L"....", nullptr}, STATUS_INVALID_ARGS, L""}, - {{L"string", L"split", L"-m1", L"..", L"....", nullptr}, STATUS_CMD_OK, L"\n..\n"}, - {{L"string", L"split", L"-m0", L"/", L"/usr/local/bin/fish", nullptr}, - STATUS_CMD_ERROR, - L"/usr/local/bin/fish\n"}, - {{L"string", L"split", L"-m2", L":", L"a:b:c:d", L"e:f:g:h", nullptr}, - STATUS_CMD_OK, - L"a\nb\nc:d\ne\nf\ng:h\n"}, - {{L"string", L"split", L"-m1", L"-r", L"/", L"/usr/local/bin/fish", nullptr}, - STATUS_CMD_OK, - L"/usr/local/bin\nfish\n"}, - {{L"string", L"split", L"-r", L".", L"www.ch.ic.ac.uk", nullptr}, - STATUS_CMD_OK, - L"www\nch\nic\nac\nuk\n"}, - {{L"string", L"split", L"--", L"--", L"a--b---c----d", nullptr}, - STATUS_CMD_OK, - L"a\nb\n-c\n\nd\n"}, - {{L"string", L"split", L"-r", L"..", L"....", nullptr}, STATUS_CMD_OK, L"\n\n\n"}, - {{L"string", L"split", L"-r", L"--", L"--", L"a--b---c----d", nullptr}, - STATUS_CMD_OK, - L"a\nb-\nc\n\nd\n"}, - {{L"string", L"split", L"", L"", nullptr}, STATUS_CMD_ERROR, L"\n"}, - {{L"string", L"split", L"", L"a", nullptr}, STATUS_CMD_ERROR, L"a\n"}, - {{L"string", L"split", L"", L"ab", nullptr}, STATUS_CMD_OK, L"a\nb\n"}, - {{L"string", L"split", L"", L"abc", nullptr}, STATUS_CMD_OK, L"a\nb\nc\n"}, - {{L"string", L"split", L"-m1", L"", L"abc", nullptr}, STATUS_CMD_OK, L"a\nbc\n"}, - {{L"string", L"split", L"-r", L"", L"", nullptr}, STATUS_CMD_ERROR, L"\n"}, - {{L"string", L"split", L"-r", L"", L"a", nullptr}, STATUS_CMD_ERROR, L"a\n"}, - {{L"string", L"split", L"-r", L"", L"ab", nullptr}, STATUS_CMD_OK, L"a\nb\n"}, - {{L"string", L"split", L"-r", L"", L"abc", nullptr}, STATUS_CMD_OK, L"a\nb\nc\n"}, - {{L"string", L"split", L"-r", L"-m1", L"", L"abc", nullptr}, STATUS_CMD_OK, L"ab\nc\n"}, - {{L"string", L"split", L"-q", nullptr}, STATUS_INVALID_ARGS, L""}, - {{L"string", L"split", L"-q", L":", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"split", L"-q", L"x", L"axbxc", nullptr}, STATUS_CMD_OK, L""}, - - {{L"string", L"sub", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"sub", L"abcde", nullptr}, STATUS_CMD_OK, L"abcde\n"}, - {{L"string", L"sub", L"-l", L"x", L"abcde", nullptr}, STATUS_INVALID_ARGS, L""}, - {{L"string", L"sub", L"-s", L"x", L"abcde", nullptr}, STATUS_INVALID_ARGS, L""}, - {{L"string", L"sub", L"-l0", L"abcde", nullptr}, STATUS_CMD_OK, L"\n"}, - {{L"string", L"sub", L"-l2", L"abcde", nullptr}, STATUS_CMD_OK, L"ab\n"}, - {{L"string", L"sub", L"-l5", L"abcde", nullptr}, STATUS_CMD_OK, L"abcde\n"}, - {{L"string", L"sub", L"-l6", L"abcde", nullptr}, STATUS_CMD_OK, L"abcde\n"}, - {{L"string", L"sub", L"-l-1", L"abcde", nullptr}, STATUS_INVALID_ARGS, L""}, - {{L"string", L"sub", L"-s0", L"abcde", nullptr}, STATUS_INVALID_ARGS, L""}, - {{L"string", L"sub", L"-s1", L"abcde", nullptr}, STATUS_CMD_OK, L"abcde\n"}, - {{L"string", L"sub", L"-s5", L"abcde", nullptr}, STATUS_CMD_OK, L"e\n"}, - {{L"string", L"sub", L"-s6", L"abcde", nullptr}, STATUS_CMD_OK, L"\n"}, - {{L"string", L"sub", L"-s-1", L"abcde", nullptr}, STATUS_CMD_OK, L"e\n"}, - {{L"string", L"sub", L"-s-5", L"abcde", nullptr}, STATUS_CMD_OK, L"abcde\n"}, - {{L"string", L"sub", L"-s-6", L"abcde", nullptr}, STATUS_CMD_OK, L"abcde\n"}, - {{L"string", L"sub", L"-s1", L"-l0", L"abcde", nullptr}, STATUS_CMD_OK, L"\n"}, - {{L"string", L"sub", L"-s1", L"-l1", L"abcde", nullptr}, STATUS_CMD_OK, L"a\n"}, - {{L"string", L"sub", L"-s2", L"-l2", L"abcde", nullptr}, STATUS_CMD_OK, L"bc\n"}, - {{L"string", L"sub", L"-s-1", L"-l1", L"abcde", nullptr}, STATUS_CMD_OK, L"e\n"}, - {{L"string", L"sub", L"-s-1", L"-l2", L"abcde", nullptr}, STATUS_CMD_OK, L"e\n"}, - {{L"string", L"sub", L"-s-3", L"-l2", L"abcde", nullptr}, STATUS_CMD_OK, L"cd\n"}, - {{L"string", L"sub", L"-s-3", L"-l4", L"abcde", nullptr}, STATUS_CMD_OK, L"cde\n"}, - {{L"string", L"sub", L"-q", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"sub", L"-q", L"abcde", nullptr}, STATUS_CMD_OK, L""}, - - {{L"string", L"trim", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"trim", L""}, STATUS_CMD_ERROR, L"\n"}, - {{L"string", L"trim", L" "}, STATUS_CMD_OK, L"\n"}, - {{L"string", L"trim", L" \f\n\r\t"}, STATUS_CMD_OK, L"\n"}, - {{L"string", L"trim", L" a"}, STATUS_CMD_OK, L"a\n"}, - {{L"string", L"trim", L"a "}, STATUS_CMD_OK, L"a\n"}, - {{L"string", L"trim", L" a "}, STATUS_CMD_OK, L"a\n"}, - {{L"string", L"trim", L"-l", L" a"}, STATUS_CMD_OK, L"a\n"}, - {{L"string", L"trim", L"-l", L"a "}, STATUS_CMD_ERROR, L"a \n"}, - {{L"string", L"trim", L"-l", L" a "}, STATUS_CMD_OK, L"a \n"}, - {{L"string", L"trim", L"-r", L" a"}, STATUS_CMD_ERROR, L" a\n"}, - {{L"string", L"trim", L"-r", L"a "}, STATUS_CMD_OK, L"a\n"}, - {{L"string", L"trim", L"-r", L" a "}, STATUS_CMD_OK, L" a\n"}, - {{L"string", L"trim", L"-c", L".", L" a"}, STATUS_CMD_ERROR, L" a\n"}, - {{L"string", L"trim", L"-c", L".", L"a "}, STATUS_CMD_ERROR, L"a \n"}, - {{L"string", L"trim", L"-c", L".", L" a "}, STATUS_CMD_ERROR, L" a \n"}, - {{L"string", L"trim", L"-c", L".", L".a"}, STATUS_CMD_OK, L"a\n"}, - {{L"string", L"trim", L"-c", L".", L"a."}, STATUS_CMD_OK, L"a\n"}, - {{L"string", L"trim", L"-c", L".", L".a."}, STATUS_CMD_OK, L"a\n"}, - {{L"string", L"trim", L"-c", L"\\/", L"/a\\"}, STATUS_CMD_OK, L"a\n"}, - {{L"string", L"trim", L"-c", L"\\/", L"a/"}, STATUS_CMD_OK, L"a\n"}, - {{L"string", L"trim", L"-c", L"\\/", L"\\a/"}, STATUS_CMD_OK, L"a\n"}, - {{L"string", L"trim", L"-c", L"", L".a."}, STATUS_CMD_ERROR, L".a.\n"} - }; - - for (const auto &t : string_tests) { - run_one_string_test(t.argv, t.expected_rc, t.expected_out); - } - - bool saved_flag = feature_test(feature_flag_t::qmark_noglob); - const struct string_test qmark_noglob_tests[] = { - {{L"string", L"match", L"a*b?c", L"axxb?c", nullptr}, STATUS_CMD_OK, L"axxb?c\n"}, - {{L"string", L"match", L"*?", L"a", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"match", L"*?", L"ab", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"match", L"?*", L"a", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"match", L"?*", L"ab", nullptr}, STATUS_CMD_ERROR, L""}, - {{L"string", L"match", L"a*\\?", L"abc?", nullptr}, STATUS_CMD_ERROR, L""}}; - feature_set(feature_flag_t::qmark_noglob, true); - for (const auto &t : qmark_noglob_tests) { - run_one_string_test(t.argv, t.expected_rc, t.expected_out); - } - - const struct string_test qmark_glob_tests[] = { - {{L"string", L"match", L"a*b?c", L"axxbyc", nullptr}, STATUS_CMD_OK, L"axxbyc\n"}, - {{L"string", L"match", L"*?", L"a", nullptr}, STATUS_CMD_OK, L"a\n"}, - {{L"string", L"match", L"*?", L"ab", nullptr}, STATUS_CMD_OK, L"ab\n"}, - {{L"string", L"match", L"?*", L"a", nullptr}, STATUS_CMD_OK, L"a\n"}, - {{L"string", L"match", L"?*", L"ab", nullptr}, STATUS_CMD_OK, L"ab\n"}, - {{L"string", L"match", L"a*\\?", L"abc?", nullptr}, STATUS_CMD_OK, L"abc?\n"}}; - feature_set(feature_flag_t::qmark_noglob, false); - for (const auto &t : qmark_glob_tests) { - run_one_string_test(t.argv, t.expected_rc, t.expected_out); - } - feature_set(feature_flag_t::qmark_noglob, saved_flag); -} - /// Helper for test_timezone_env_vars(). long return_timezone_hour(time_t tstamp, const wchar_t *timezone) { auto &vars = parser_t::principal_parser().vars(); @@ -5881,164 +5502,6 @@ static void test_killring() { do_test((kill_entries() == std::vector{L"a", L"c", L"b", L"d"})); } -namespace { -using namespace re; - -// Basic tests for re, which wraps PCRE2. -static void test_re_errs() { - say(L"Testing re"); - flags_t flags{}; - re_error_t error{}; - maybe_t re; - do_test(!regex_t::try_compile(L"abc[", flags, &error)); - do_test(error.code != 0); - do_test(!error.message().empty()); - - error = re_error_t{}; - do_test(!regex_t::try_compile(L"abc(", flags, &error).has_value()); - do_test(error.code != 0); - do_test(!error.message().empty()); -} - -static void test_re_basic() { - // Match a character twice. - using namespace re; - wcstring subject = L"AAbCCd11e"; - auto substr_from_range = [&](maybe_t r) { - do_test(r.has_value()); - do_test(r->begin <= r->end); - do_test(r->end <= subject.size()); - return subject.substr(r->begin, r->end - r->begin); - }; - auto re = regex_t::try_compile(L"(.)\\1"); - do_test(re.has_value()); - auto md = re->prepare(); - std::vector matches; - std::vector captures; - while (auto r = re->match(md, subject)) { - matches.push_back(substr_from_range(r)); - captures.push_back(substr_from_range(re->group(md, 1))); - do_test(!re->group(md, 2)); - } - do_test(join_strings(matches, L',') == L"AA,CC,11"); - do_test(join_strings(captures, L',') == L"A,C,1"); -} - -static void test_re_reset() { - using namespace re; - auto re = regex_t::try_compile(L"([0-9])"); - wcstring s = L"012345"; - auto md = re->prepare(); - for (size_t idx = 0; idx < s.size(); idx++) { - md.reset(); - for (size_t j = 0; j <= idx; j++) { - auto m = re->match(md, s); - match_range_t expected{j, j + 1}; - do_test(m == expected); - do_test(re->group(md, 1) == expected); - } - } -} - -static void test_re_named() { - // Named capture groups. - using namespace re; - auto re = regex_t::try_compile(L"A(?x+)?"); - do_test(re->capture_group_count() == 1); - - wcstring subject = L"AxxAAx"; - auto md = re->prepare(); - - auto r = re->match(md, subject); - do_test((r == match_range_t{0, 3})); - do_test(re->substring_for_group(md, L"QQQ", subject) == none()); - do_test(re->substring_for_group(md, L"FOO", subject) == L"xx"); - - r = re->match(md, subject); - do_test((r == match_range_t{3, 4})); - do_test(re->substring_for_group(md, L"QQQ", subject) == none()); - do_test(re->substring_for_group(md, L"FOO", subject) == none()); - - r = re->match(md, subject); - do_test((r == match_range_t{4, 6})); - do_test(re->substring_for_group(md, L"QQQ", subject) == none()); - do_test(re->substring_for_group(md, L"FOO", subject) == wcstring(L"x")); -} - -static void test_re_name_extraction() { - // Names of capture groups can be extracted. - using namespace re; - auto re = regex_t::try_compile(L"(?dd)ff(?cc)aaa(?)ff(?)"); - do_test(re.has_value()); - do_test(re->capture_group_count() == 4); - // PCRE2 returns these sorted. - do_test(join_strings(re->capture_group_names(), L',') == L"BAR,BETA,FOO,alpha"); - - // Mixed named and positional captures. - re = regex_t::try_compile(L"(abc)(?def)(ghi)(?jkl)"); - do_test(re.has_value()); - do_test(re->capture_group_count() == 4); - do_test(join_strings(re->capture_group_names(), L',') == L"BAR,FOO"); - auto md = re->prepare(); - const wcstring subject = L"abcdefghijkl"; - auto m = re->match(md, subject); - do_test((m == match_range_t{0, 12})); - do_test((re->group(md, 1) == match_range_t{0, 3})); - do_test((re->group(md, 2) == match_range_t{3, 6})); - do_test((re->group(md, 3) == match_range_t{6, 9})); - do_test((re->group(md, 4) == match_range_t{9, 12})); - do_test(re->substring_for_group(md, L"FOO", subject) == wcstring(L"def")); - do_test(re->substring_for_group(md, L"BAR", subject) == wcstring(L"jkl")); -} - -static void test_re_substitute() { - // Names of capture groups can be extracted. - using namespace re; - auto re = regex_t::try_compile(L"[a-z]+(\\d+)"); - do_test(re.has_value()); - do_test(re->capture_group_count() == 1); - maybe_t res{}; - int repl_count{}; - sub_flags_t sflags{}; - const wcstring subj = L"AAabc123ZZ AAabc123ZZ"; - const wcstring repl = L"$1qqq"; - res = re->substitute(subj, repl, sflags, 0, nullptr, &repl_count); - do_test(res && *res == L"AA123qqqZZ AAabc123ZZ"); - do_test(repl_count == 1); - - res = re->substitute(subj, repl, sflags, 5, nullptr, &repl_count); - do_test(res && *res == L"AAabc123ZZ AA123qqqZZ"); - do_test(repl_count == 1); - - sflags.global = true; - res = re->substitute(subj, repl, sflags, 0, nullptr, &repl_count); - do_test(res && *res == L"AA123qqqZZ AA123qqqZZ"); - do_test(repl_count == 2); - - sflags.extended = true; - res = re->substitute(subj, L"\\x21", sflags, 0, nullptr, &repl_count); // \x21 = ! - do_test(res && *res == L"AA!ZZ AA!ZZ"); - do_test(repl_count == 2); - - // Test with a bad escape; \b is unsupported. - re_error_t error{}; - res = re->substitute(subj, L"AAA\\bZZZ", sflags, 0, &error); - do_test(!res.has_value()); - do_test(error.code == -57 /* PCRE2_ERROR_BADREPESCAPE */); - do_test(error.message() == L"bad escape sequence in replacement string"); - do_test(error.offset == 5 /* the b */); - - // Test a very long replacement as we used a fixed-size buffer. - sflags = sub_flags_t{}; - sflags.global = true; - re = regex_t::try_compile(L"A"); - res = - re->substitute(wcstring(4096, L'A'), wcstring(4096, L'X'), sflags, 0, nullptr, &repl_count); - do_test(res && *res == wcstring(4096 * 4096, L'X')); - do_test(repl_count == 4096); -} -} // namespace - void test_wgetopt() { // Regression test for a crash. const wchar_t *const short_options = L"-a"; @@ -6173,7 +5636,6 @@ static const test_t s_tests[]{ {TEST_GROUP("history_paths"), history_tests_t::test_history_path_detection}, {TEST_GROUP("history_races"), history_tests_t::test_history_races}, {TEST_GROUP("history_formats"), history_tests_t::test_history_formats}, - {TEST_GROUP("string"), test_string}, {TEST_GROUP("illegal_command_exit_code"), test_illegal_command_exit_code}, {TEST_GROUP("maybe"), test_maybe}, {TEST_GROUP("layout_cache"), test_layout_cache}, @@ -6185,12 +5647,6 @@ static const test_t s_tests[]{ {TEST_GROUP("pipes"), test_pipes}, {TEST_GROUP("fd_event"), test_fd_event_signaller}, {TEST_GROUP("killring"), test_killring}, - {TEST_GROUP("re"), test_re_errs}, - {TEST_GROUP("re"), test_re_basic}, - {TEST_GROUP("re"), test_re_reset}, - {TEST_GROUP("re"), test_re_named}, - {TEST_GROUP("re"), test_re_name_extraction}, - {TEST_GROUP("re"), test_re_substitute}, {TEST_GROUP("wgetopt"), test_wgetopt}, {TEST_GROUP("rust_smoke"), test_rust_smoke}, {TEST_GROUP("rust_ffi"), test_rust_ffi}, diff --git a/src/io.cpp b/src/io.cpp index 26ee46f56..56fd07b89 100644 --- a/src/io.cpp +++ b/src/io.cpp @@ -410,6 +410,21 @@ std::unique_ptr make_null_io_streams_ffi() { return std::make_unique(*null, *null); } +std::unique_ptr make_test_io_streams_ffi() { + // Temporary test helper. + auto streams = std::make_unique(); + streams->stdin_is_directly_redirected = false; // read from argv instead of stdin + return streams; +} + +wcstring get_test_output_ffi(const io_streams_t &streams) { + string_output_stream_t *out = static_cast(&streams.out); + if (out == nullptr) { + return wcstring(); + } + return out->contents(); +} + bool string_output_stream_t::append(const wchar_t *s, size_t amt) { contents_.append(s, amt); return true; diff --git a/src/io.h b/src/io.h index cb0bbf487..89b9dad3b 100644 --- a/src/io.h +++ b/src/io.h @@ -506,6 +506,7 @@ struct io_streams_t : noncopyable_t { std::shared_ptr job_group{}; io_streams_t(output_stream_t &out, output_stream_t &err) : out(out), err(err) {} + virtual ~io_streams_t() = default; /// autocxx junk. output_stream_t &get_out() { return out; }; @@ -518,6 +519,14 @@ struct io_streams_t : noncopyable_t { }; /// FFI helper. +struct owning_io_streams_t : io_streams_t { + string_output_stream_t out_storage; + null_output_stream_t err_storage; + owning_io_streams_t() : io_streams_t(out_storage, err_storage) {} +}; + std::unique_ptr make_null_io_streams_ffi(); +std::unique_ptr make_test_io_streams_ffi(); +wcstring get_test_output_ffi(const io_streams_t &streams); #endif diff --git a/src/re.cpp b/src/re.cpp deleted file mode 100644 index 279f94c58..000000000 --- a/src/re.cpp +++ /dev/null @@ -1,316 +0,0 @@ -#include "config.h" // IWYU pragma: keep - -#include "re.h" - -#include -#include - -#include "flog.h" - -#define PCRE2_CODE_UNIT_WIDTH WCHAR_T_BITS -#ifdef _WIN32 -#define PCRE2_STATIC -#endif - -#include "pcre2.h" - -using namespace re; -using namespace re::adapters; - -void bytecode_deleter_t::operator()(const void *ptr) { - if (ptr) { - pcre2_code_free(static_cast(const_cast(ptr))); - } -} - -void match_data_deleter_t::operator()(void *ptr) { - if (ptr) { - pcre2_match_data_free(static_cast(ptr)); - } -} - -// Get underlying pcre2_code from a bytecode_ptr_t. -const pcre2_code *get_code(const bytecode_ptr_t &ptr) { - assert(ptr && "Null pointer"); - return static_cast(ptr.get()); -} - -// Get underlying match_data_t. -pcre2_match_data *get_md(const match_data_ptr_t &ptr) { - assert(ptr && "Null pointer"); - return static_cast(ptr.get()); -} - -// Convert a wcstring to a PCRE2_SPTR. -PCRE2_SPTR to_sptr(const wcstring &str) { return reinterpret_cast(str.c_str()); } - -/// \return a message for an error code. -static wcstring message_for_code(error_code_t code) { - wchar_t buf[128] = {}; - pcre2_get_error_message(code, reinterpret_cast(buf), - sizeof(buf) / sizeof(wchar_t)); - return buf; -} - -maybe_t regex_t::try_compile(const wcstring &pattern, const flags_t &flags, - re_error_t *error) { - // Disable some sequences that can lead to security problems. - uint32_t options = PCRE2_NEVER_UTF; -#if PCRE2_CODE_UNIT_WIDTH < 32 - options |= PCRE2_NEVER_BACKSLASH_C; -#endif - if (flags.icase) options |= PCRE2_CASELESS; - - error_code_t err_code = 0; - PCRE2_SIZE err_offset = 0; - pcre2_code *code = - pcre2_compile(to_sptr(pattern), pattern.size(), options, &err_code, &err_offset, nullptr); - if (!code) { - if (error) { - error->code = err_code; - error->offset = err_offset; - } - return none(); - } - return regex_t{bytecode_ptr_t(code)}; -} - -match_data_t regex_t::prepare() const { - pcre2_match_data *md = pcre2_match_data_create_from_pattern(get_code(code_), nullptr); - // Bogus assertion for memory exhaustion. - if (unlikely(!md)) { - DIE("Out of memory"); - } - return match_data_t{match_data_ptr_t(static_cast(md))}; -} - -void match_data_t::reset() { - start_offset = 0; - max_capture = 0; - last_empty = false; -} - -maybe_t regex_t::match(match_data_t &md, const wcstring &subject) const { - pcre2_match_data *const match_data = get_md(md.data); - assert(match_data && "Invalid match data"); - - // Handle exhausted matches. - if (md.start_offset > subject.size() || (md.last_empty && md.start_offset == subject.size())) { - md.max_capture = 0; - return none(); - } - PCRE2_SIZE start_offset = md.start_offset; - - // See pcre2demo.c for an explanation of this logic. - uint32_t options = md.last_empty ? PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED : 0; - error_code_t code = pcre2_match(get_code(code_), to_sptr(subject), subject.size(), start_offset, - options, match_data, nullptr); - if (code == PCRE2_ERROR_NOMATCH && !md.last_empty) { - // Failed to match. - md.start_offset = subject.size(); - md.max_capture = 0; - return none(); - } else if (code == PCRE2_ERROR_NOMATCH && md.last_empty) { - // Failed to find a non-empty-string match at a point where there was a previous - // empty-string match. Advance by one character and try again. - md.start_offset += 1; - md.last_empty = false; - return this->match(md, subject); - } else if (code < 0) { - FLOG(error, "pcre2_match unexpected error:", message_for_code(code)); - return none(); - } - - // Match succeeded. - // Start at end of previous match, marking if it was empty. - const auto *ovector = pcre2_get_ovector_pointer(match_data); - md.start_offset = ovector[1]; - md.max_capture = static_cast(code); - md.last_empty = ovector[0] == ovector[1]; - return match_range_t{ovector[0], ovector[1]}; -} - -maybe_t regex_t::match(const wcstring &subject) const { - match_data_t md = this->prepare(); - return this->match(md, subject); -} - -bool regex_t::matches_ffi(const wcstring &subject) const { - return this->match(subject).has_value(); -} - -maybe_t regex_t::group(const match_data_t &md, size_t group_idx) const { - if (group_idx >= md.max_capture || group_idx >= pcre2_get_ovector_count(get_md(md.data))) { - return none(); - } - - const PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(get_md(md.data)); - PCRE2_SIZE start = ovector[2 * group_idx]; - PCRE2_SIZE end = ovector[2 * group_idx + 1]; - if (start == PCRE2_UNSET || end == PCRE2_UNSET) { - return none(); - } - // From PCRE2 docs: "Note that when a pattern such as (?=ab\K) matches, the reported start of - // the match can be greater than the end of the match." - // Saturate the end. - end = std::max(start, end); - return match_range_t{start, end}; -} - -maybe_t regex_t::group(const match_data_t &match_data, const wcstring &name) const { - const auto *pcname = to_sptr(name); - // Beware, pcre2_substring_copy_byname and pcre2_substring_copy_bynumber both have a bug - // on at least one Ubuntu (running PCRE2) where it outputs garbage for the first character. - // Read out from the ovector directly. - int num = pcre2_substring_number_from_name(get_code(code_), pcname); - if (num <= 0) { - return none(); - } - return this->group(match_data, static_cast(num)); -} - -static maybe_t range_to_substr(const wcstring &subject, maybe_t range) { - if (!range) { - return none(); - } - assert(range->begin <= range->end && range->end <= subject.size() && "Invalid range"); - return subject.substr(range->begin, range->end - range->begin); -} - -maybe_t regex_t::substring_for_group(const match_data_t &md, size_t group_idx, - const wcstring &subject) const { - return range_to_substr(subject, this->group(md, group_idx)); -} - -maybe_t regex_t::substring_for_group(const match_data_t &md, const wcstring &name, - const wcstring &subject) const { - return range_to_substr(subject, this->group(md, name)); -} - -size_t regex_t::capture_group_count() const { - uint32_t count{}; - pcre2_pattern_info(get_code(code_), PCRE2_INFO_CAPTURECOUNT, &count); - return count; -} - -std::vector regex_t::capture_group_names() const { - PCRE2_SPTR name_table{}; - uint32_t name_entry_size{}; - uint32_t name_count{}; - - const auto *code = get_code(code_); - pcre2_pattern_info(code, PCRE2_INFO_NAMETABLE, &name_table); - pcre2_pattern_info(code, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size); - pcre2_pattern_info(code, PCRE2_INFO_NAMECOUNT, &name_count); - - struct name_table_entry_t { -#if PCRE2_CODE_UNIT_WIDTH == 8 - uint8_t match_index_msb; - uint8_t match_index_lsb; -#if CHAR_BIT == PCRE2_CODE_UNIT_WIDTH - char name[]; -#else - char8_t name[]; -#endif -#elif PCRE2_CODE_UNIT_WIDTH == 16 - uint16_t match_index; -#if WCHAR_T_BITS == PCRE2_CODE_UNIT_WIDTH - wchar_t name[]; -#else - char16_t name[]; -#endif -#else - uint32_t match_index; -#if WCHAR_T_BITS == PCRE2_CODE_UNIT_WIDTH - wchar_t name[]; -#else - char32_t name[]; -#endif // WCHAR_T_BITS -#endif // PCRE2_CODE_UNIT_WIDTH - }; - - const auto *names = reinterpret_cast(name_table); - std::vector result; - result.reserve(name_count); - for (uint32_t i = 0; i < name_count; ++i) { - const auto &name_entry = names[i * name_entry_size]; - result.emplace_back(name_entry.name); - } - return result; -} - -maybe_t regex_t::substitute(const wcstring &subject, const wcstring &replacement, - sub_flags_t flags, size_t start_idx, re_error_t *out_error, - int *out_repl_count) const { - constexpr size_t stack_bufflen = 256; - wchar_t buffer[stack_bufflen]; - - // SUBSTITUTE_GLOBAL means more than one substitution happens. - uint32_t options = PCRE2_SUBSTITUTE_UNSET_EMPTY // don't error on unmatched - | PCRE2_SUBSTITUTE_OVERFLOW_LENGTH // return required length on overflow - | (flags.global ? PCRE2_SUBSTITUTE_GLOBAL : 0) // replace multiple - | (flags.extended ? PCRE2_SUBSTITUTE_EXTENDED : 0) // backslash escapes - ; - size_t bufflen = stack_bufflen; - error_code_t rc = - pcre2_substitute(get_code(code_), to_sptr(subject), subject.size(), start_idx, options, - nullptr /* match_data */, nullptr /* context */, to_sptr(replacement), - // (not using UCHAR32 here for cygwin's benefit) - replacement.size(), reinterpret_cast(buffer), &bufflen); - - if (out_repl_count) { - *out_repl_count = std::max(rc, 0); - } - if (rc == 0) { - // No replacements. - return subject; - } else if (rc > 0) { - // Some replacement which fit in our buffer. - // Note we may have had embedded nuls. - assert(bufflen <= stack_bufflen && "bufflen should not exceed buffer size"); - return wcstring(buffer, bufflen); - } else if (rc == PCRE2_ERROR_NOMEMORY) { - // bufflen has been updated to required buffer size. - // Try again with a real string. - wcstring res(bufflen, L'\0'); - rc = pcre2_substitute(get_code(code_), to_sptr(subject), subject.size(), start_idx, options, - nullptr /* match_data */, nullptr /* context */, to_sptr(replacement), - replacement.size(), reinterpret_cast(&res[0]), - &bufflen); - if (out_repl_count) { - *out_repl_count = std::max(rc, 0); - } - if (rc >= 0) { - res.resize(bufflen); - return res; - } - } - // Some error. The offset may be returned in the bufflen. - if (out_error) { - out_error->code = rc; - out_error->offset = (bufflen == PCRE2_UNSET ? 0 : bufflen); - } - return none(); -} - -regex_t::regex_t(adapters::bytecode_ptr_t &&code) : code_(std::move(code)) { - assert(code_ && "Null impl"); -} - -wcstring re_error_t::message() const { return message_for_code(this->code); } - -re::regex_result_ffi re::try_compile_ffi(const wcstring &pattern, const flags_t &flags) { - re_error_t error{}; - auto regex = regex_t::try_compile(pattern, flags, &error); - - if (regex) { - return regex_result_ffi{std::make_unique(regex.acquire()), error}; - } - - return re::regex_result_ffi{nullptr, error}; -} - -bool re::regex_result_ffi::has_error() const { return error.code != 0; } -re::re_error_t re::regex_result_ffi::get_error() const { return error; }; - -std::unique_ptr re::regex_result_ffi::get_regex() { return std::move(regex); } diff --git a/src/re.h b/src/re.h deleted file mode 100644 index 7d2a8a09b..000000000 --- a/src/re.h +++ /dev/null @@ -1,166 +0,0 @@ -// Wraps PCRE2. -#ifndef FISH_RE_H -#define FISH_RE_H - -#include -#include -#include - -#include "common.h" -#include "maybe.h" - -namespace re { - -namespace adapters { -// Adapter to store pcre2_code in unique_ptr. -struct bytecode_deleter_t { - void operator()(const void *); -}; -using bytecode_ptr_t = std::unique_ptr; - -// Adapter to store pcre2_match_data in unique_ptr. -struct match_data_deleter_t { - void operator()(void *); -}; -using match_data_ptr_t = std::unique_ptr; -} // namespace adapters - -/// Error code type alias. -using error_code_t = int; - -/// Flags for compiling a regex. -struct flags_t { - bool icase{}; // ignore case? -}; - -/// Flags for substituting a regex. -struct sub_flags_t { - bool global{}; // perform multiple substitutions? - bool extended{}; // apply PCRE2 extended backslash escapes? -}; - -/// A type wrapping up error information. -/// Beware, GNU defines error_t; hence we use an re_ prefix again. -struct re_error_t { - error_code_t code{}; // error code - size_t offset{}; // offset of the error in the pattern - - /// \return our error message. - wcstring message() const; -}; - -/// A half-open range of a subject which matched. -struct match_range_t { - size_t begin; - size_t end; - - bool operator==(match_range_t rhs) const { return begin == rhs.begin && end == rhs.end; } - bool operator!=(match_range_t rhs) const { return !(*this == rhs); } -}; - -/// A match data is the "stateful" object, storing string indices for where to start the next match, -/// capture results, etc. Create one via regex_t::prepare(). These are tied to the regex which -/// created them. -class match_data_t : noncopyable_t { - public: - match_data_t(match_data_t &&) = default; - match_data_t &operator=(match_data_t &&) = default; - ~match_data_t() = default; - - /// \return a "count" of the number of capture groups which matched. - /// This is really one more than the highest matching group. - /// 0 is considered a "group" for the entire match, so this will always return at least 1 for a - /// successful match. - size_t matched_capture_group_count() const { return max_capture; } - - /// Reset this data, as if this were freshly issued by a call to prepare(). - void reset(); - - private: - explicit match_data_t(adapters::match_data_ptr_t &&data) : data(std::move(data)) {} - - // Next start position. This may exceed the needle length, which indicates exhaustion. - size_t start_offset{0}; - - // One more than the highest numbered capturing pair that was set (e.g. 1 if no captures). - size_t max_capture{0}; - - // If set, the last match was empty. - bool last_empty{false}; - - // Underlying pcre2_match_data. - adapters::match_data_ptr_t data{}; - - friend class regex_t; -}; - -/// The compiled form of a PCRE2 regex. -/// This is thread safe. -class regex_t : noncopyable_t { - public: - /// Compile a pattern into a regex. \return the resulting regex, or none on error. - /// If \p error is not null, populate it with the error information. - static maybe_t try_compile(const wcstring &pattern, const flags_t &flags = flags_t{}, - re_error_t *out_error = nullptr); - - /// Create a match data for this regex. - /// The result is tied to this regex; it should not be used for others. - match_data_t prepare() const; - - /// Match against a string \p subject, populating \p md. - /// \return a range on a successful match, none on no match. - maybe_t match(match_data_t &md, const wcstring &subject) const; - - /// A convenience function which calls prepare() for you. - maybe_t match(const wcstring &subject) const; - - /// A convenience function which calls prepare() for you. - bool matches_ffi(const wcstring &subject) const; - - /// \return the matched range for an indexed or named capture group. 0 means the entire match. - maybe_t group(const match_data_t &md, size_t group_idx) const; - maybe_t group(const match_data_t &md, const wcstring &name) const; - - /// \return the matched substring for a capture group. - maybe_t substring_for_group(const match_data_t &md, size_t group_idx, - const wcstring &subject) const; - maybe_t substring_for_group(const match_data_t &md, const wcstring &name, - const wcstring &subject) const; - - /// \return the number of indexed capture groups. - size_t capture_group_count() const; - - /// \return the list of capture group names. - /// Note PCRE provides these in sorted order, not specification order. - std::vector capture_group_names() const; - - /// Search \p subject for matches for this regex, starting at \p start_idx, and replacing them - /// with \p replacement. If \p repl_count is not null, populate it with the number of - /// replacements which occurred. This may fail for e.g. bad escapes in the replacement string. - maybe_t substitute(const wcstring &subject, const wcstring &replacement, - sub_flags_t flags, size_t start_idx = 0, - re_error_t *out_error = nullptr, - int *out_repl_count = nullptr) const; - - regex_t(regex_t &&) = default; - regex_t &operator=(regex_t &&) = default; - ~regex_t() = default; - - private: - regex_t(adapters::bytecode_ptr_t &&); - adapters::bytecode_ptr_t code_; -}; - -struct regex_result_ffi { - std::unique_ptr regex; - re::re_error_t error; - - bool has_error() const; - std::unique_ptr get_regex(); - re::re_error_t get_error() const; -}; - -regex_result_ffi try_compile_ffi(const wcstring &pattern, const flags_t &flags); - -} // namespace re -#endif diff --git a/src/screen.cpp b/src/screen.cpp index 1d907e18b..7e6dcc128 100644 --- a/src/screen.cpp +++ b/src/screen.cpp @@ -266,6 +266,11 @@ maybe_t escape_code_length(const wchar_t *code) { return found ? maybe_t{esc_seq_len} : none(); } +long escape_code_length_ffi(const wchar_t *code) { + auto found = escape_code_length(code); + return found.has_value() ? (long)*found : -1; +} + size_t layout_cache_t::escape_code_length(const wchar_t *code) { assert(code != nullptr); if (*code != L'\x1B') return 0; diff --git a/src/screen.h b/src/screen.h index 65482e54f..c9ff628fe 100644 --- a/src/screen.h +++ b/src/screen.h @@ -332,6 +332,8 @@ class layout_cache_t : noncopyable_t { }; maybe_t escape_code_length(const wchar_t *code); +// Always return a value, by moving checking of sequence start to the caller. +long escape_code_length_ffi(const wchar_t *code); void screen_set_midnight_commander_hack(); #endif diff --git a/tests/checks/abbr.fish b/tests/checks/abbr.fish index 219afd6c8..3275022b4 100644 --- a/tests/checks/abbr.fish +++ b/tests/checks/abbr.fish @@ -199,3 +199,8 @@ abbr --add --regex foo --function foo # CHECKERR: abbr --add: Name cannot be empty echo foo # CHECK: foo + +abbr --add regex_name --regex '(*UTF).*' bar +# CHECKERR: abbr: Regular expression compile error: using UTF is disabled by the application +# CHECKERR: abbr: (*UTF).* +# CHECKERR: abbr: ^ diff --git a/tests/checks/string.fish b/tests/checks/string.fish index 8ffeef150..ee9f1bc0e 100644 --- a/tests/checks/string.fish +++ b/tests/checks/string.fish @@ -45,9 +45,13 @@ string length -q ""; and echo not zero length; or echo zero length string pad foo # CHECK: foo -string pad -r -w 7 -c - foo +string pad -r -w 7 --chars - foo # CHECK: foo---- +# might overflow when converting sign +string sub --start -9223372036854775808 abc +# CHECK: abc + string pad --width 7 -c '=' foo # CHECK: ====foo @@ -175,6 +179,10 @@ string split "" abc # CHECK: b # CHECK: c +string split --max 1 --right 12 "AB12CD" +# CHECK: AB +# CHECK: CD + string split --fields=2 "" abc # CHECK: b @@ -185,6 +193,39 @@ string split --fields=3,2 "" abc string split --fields=2,9 "" abc; or echo "exit 1" # CHECK: exit 1 +string split --fields=2-3-,9 "" a +# CHECKERR: string split: 2-3-,9: invalid integer + +string split --fields=1-99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 "" abc +# CHECKERR: string split: 1-99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999: invalid integer + +string split --fields=99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-1 "" abc +# CHECKERR: string split: 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-1: invalid integer + +string split --fields=1--2 "" b +# CHECKERR: string split: 1--2: invalid integer + +string split --fields=0 "" c +# CHECKERR: string split: Invalid fields value '0' + +string split --fields=99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 "" abc +# CHECKERR: string split: 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999: invalid integer + +string split --fields=1-0 "" d +# CHECKERR: string split: Invalid range value for field '1-0' + +string split --fields=0-1 "" e +# CHECKERR: string split: Invalid range value for field '0-1' + +string split --fields=-1 "" f +# CHECKERR: string split: -1: invalid integer + +string split --fields=1a "" g +# CHECKERR: string split: 1a: invalid integer + +string split --fields=a "" h +# CHECKERR: string split: a: invalid integer + string split --fields=1-3,5,9-7 "" 123456789 # CHECK: 1 # CHECK: 2 @@ -359,6 +400,14 @@ string replace -r "\s*newline\s*" "\n" "put a newline here" string replace -r -a "(\w)" "\$1\$1" ab # CHECK: aabb +echo a | string replace b c -q +or echo No replace fails +# CHECK: No replace fails + +echo a | string replace -r b c -q +or echo No replace regex fails +# CHECK: No replace regex fails + string replace --filter x X abc axc x def jkx or echo Unexpected exit status at line (status --current-line-number) # CHECK: aXc @@ -468,6 +517,22 @@ string repeat -n 5 --max 4 123 '' 789 # CHECK: # CHECK: 7897 +# FIXME: handle overflowing nicely +# overflow behaviour depends on 32 vs 64 bit + +# count here is isize::MAX +# we store what to print as usize, so this will overflow +# but we limit it to less than whatever the overflow is +# so this should be fine +# string repeat -m1 -n 9223372036854775807 aa +# DONTCHECK: a + +# count is here (i64::MAX + 1) / 2 +# we end up overflowing, and the result is 0 +# but this should work fine, as we limit it way before the overflow +# string repeat -m1 -n 4611686018427387904 aaaa +# DONTCHECK: a + # Historical string repeat behavior is no newline if no output. echo -n before string repeat -n 5 '' @@ -766,6 +831,18 @@ string match -qer asd asd echo $status # CHECK: 0 +# should not be able to enable UTF mode +string match -r "(*UTF).*" "aaa" +# CHECKERR: string match: Regular expression compile error: using UTF is disabled by the application +# CHECKERR: string match: (*UTF).* +# CHECKERR: string match: ^ + +string replace -r "(*UTF).*" "aaa" +# CHECKERR: string replace: Regular expression compile error: using UTF is disabled by the application +# CHECKERR: string replace: (*UTF).* +# CHECKERR: string replace: ^ + + string match -eq asd asd echo $status # CHECK: 0 @@ -832,6 +909,12 @@ echo "foo1x foo2x foo3x" | string match -arg 'foo(\d)x' echo -n abc | string upper echo '' # CHECK: ABC + +# newline should not appear from nowhere when command does not split on newline +echo -n abc | string collect +echo '' +# CHECK: abc + printf \< printf my-password | string replace -ra . \* printf \>\n