mirror of
https://github.com/fish-shell/fish-shell
synced 2024-12-27 05:13:10 +00:00
Port builtins/string to Rust
- Add test to verify piped string replace exit code Ensure fields parsing error messages are the same. Note: C++ relied upon the value of the parsed value even when `errno` was set, that is defined behaviour we should not rely on, and cannot easilt be replicated from Rust. Therefore the Rust version will change the following error behaviour from: ```shell > string split --fields=a "" abc string split: Invalid fields value 'a' > string split --fields=1a "" abc string split: 1a: invalid integer ``` To: ```shell > string split --fields=a "" abc string split: a: invalid integer > string split --fields=1a "" abc string split: 1a: invalid integer ```
This commit is contained in:
parent
2110b36426
commit
20be990fd9
39 changed files with 3061 additions and 3006 deletions
|
@ -107,7 +107,7 @@ set(FISH_BUILTIN_SRCS
|
||||||
src/builtins/jobs.cpp src/builtins/path.cpp
|
src/builtins/jobs.cpp src/builtins/path.cpp
|
||||||
src/builtins/read.cpp src/builtins/set.cpp
|
src/builtins/read.cpp src/builtins/set.cpp
|
||||||
src/builtins/source.cpp
|
src/builtins/source.cpp
|
||||||
src/builtins/string.cpp src/builtins/ulimit.cpp
|
src/builtins/ulimit.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
# List of other sources.
|
# List of other sources.
|
||||||
|
@ -121,7 +121,7 @@ set(FISH_SRCS
|
||||||
src/null_terminated_array.cpp src/operation_context.cpp src/output.cpp
|
src/null_terminated_array.cpp src/operation_context.cpp src/output.cpp
|
||||||
src/pager.cpp src/parse_execution.cpp src/parse_util.cpp
|
src/pager.cpp src/parse_execution.cpp src/parse_util.cpp
|
||||||
src/parser.cpp src/parser_keywords.cpp src/path.cpp src/postfork.cpp
|
src/parser.cpp src/parser_keywords.cpp src/path.cpp src/postfork.cpp
|
||||||
src/proc.cpp src/re.cpp src/reader.cpp src/screen.cpp
|
src/proc.cpp src/reader.cpp src/screen.cpp
|
||||||
src/signals.cpp src/utf8.cpp
|
src/signals.cpp src/utf8.cpp
|
||||||
src/wcstringutil.cpp src/wgetopt.cpp src/wildcard.cpp
|
src/wcstringutil.cpp src/wgetopt.cpp src/wildcard.cpp
|
||||||
src/wutil.cpp src/fds.cpp src/rustffi.cpp
|
src/wutil.cpp src/fds.cpp src/rustffi.cpp
|
||||||
|
|
|
@ -18,7 +18,6 @@ use self::abbrs_ffi::{abbreviation_t, abbrs_position_t, abbrs_replacement_t};
|
||||||
#[cxx::bridge]
|
#[cxx::bridge]
|
||||||
mod abbrs_ffi {
|
mod abbrs_ffi {
|
||||||
extern "C++" {
|
extern "C++" {
|
||||||
include!("re.h");
|
|
||||||
include!("parse_constants.h");
|
include!("parse_constants.h");
|
||||||
|
|
||||||
type SourceRange = crate::parse_constants::SourceRange;
|
type SourceRange = crate::parse_constants::SourceRange;
|
||||||
|
|
|
@ -20,6 +20,7 @@ pub mod realpath;
|
||||||
pub mod r#return;
|
pub mod r#return;
|
||||||
pub mod set_color;
|
pub mod set_color;
|
||||||
pub mod status;
|
pub mod status;
|
||||||
|
pub mod string;
|
||||||
pub mod test;
|
pub mod test;
|
||||||
pub mod r#type;
|
pub mod r#type;
|
||||||
pub mod wait;
|
pub mod wait;
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
use crate::builtins::{printf, wait};
|
use crate::builtins::{printf, wait};
|
||||||
|
use crate::ffi::separation_type_t;
|
||||||
use crate::ffi::{self, parser_t, wcstring_list_ffi_t, Repin, RustBuiltin};
|
use crate::ffi::{self, parser_t, wcstring_list_ffi_t, Repin, RustBuiltin};
|
||||||
use crate::wchar::{wstr, WString, L};
|
use crate::wchar::{wstr, WString, L};
|
||||||
use crate::wchar_ffi::{c_str, empty_wstring, ToCppWString, WCharFromFFI};
|
use crate::wchar_ffi::{c_str, empty_wstring, ToCppWString, WCharFromFFI};
|
||||||
|
@ -108,6 +109,20 @@ impl output_stream_t {
|
||||||
pub fn append1(&mut self, c: char) -> bool {
|
pub fn append1(&mut self, c: char) -> bool {
|
||||||
self.append(wstr::from_char_slice(&[c]))
|
self.append(wstr::from_char_slice(&[c]))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn append_with_separation(
|
||||||
|
&mut self,
|
||||||
|
s: impl AsRef<wstr>,
|
||||||
|
sep: separation_type_t,
|
||||||
|
want_newline: bool,
|
||||||
|
) -> bool {
|
||||||
|
self.ffi()
|
||||||
|
.append_with_separation(&s.as_ref().into_cpp(), sep, want_newline)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn flush_and_check_error(&mut self) -> c_int {
|
||||||
|
self.ffi().flush_and_check_error().into()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convenience wrappers around C++ io_streams_t.
|
// Convenience wrappers around C++ io_streams_t.
|
||||||
|
@ -216,6 +231,7 @@ pub fn run_builtin(
|
||||||
RustBuiltin::Return => super::r#return::r#return(parser, streams, args),
|
RustBuiltin::Return => super::r#return::r#return(parser, streams, args),
|
||||||
RustBuiltin::SetColor => super::set_color::set_color(parser, streams, args),
|
RustBuiltin::SetColor => super::set_color::set_color(parser, streams, args),
|
||||||
RustBuiltin::Status => super::status::status(parser, streams, args),
|
RustBuiltin::Status => super::status::status(parser, streams, args),
|
||||||
|
RustBuiltin::String => super::string::string(parser, streams, args),
|
||||||
RustBuiltin::Test => super::test::test(parser, streams, args),
|
RustBuiltin::Test => super::test::test(parser, streams, args),
|
||||||
RustBuiltin::Type => super::r#type::r#type(parser, streams, args),
|
RustBuiltin::Type => super::r#type::r#type(parser, streams, args),
|
||||||
RustBuiltin::Wait => wait::wait(parser, streams, args),
|
RustBuiltin::Wait => wait::wait(parser, streams, args),
|
||||||
|
|
493
fish-rust/src/builtins/string.rs
Normal file
493
fish-rust/src/builtins/string.rs
Normal file
|
@ -0,0 +1,493 @@
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::{BufRead, BufReader, Read};
|
||||||
|
use std::os::fd::FromRawFd;
|
||||||
|
|
||||||
|
use crate::common::str2wcstring;
|
||||||
|
use crate::wcstringutil::fish_wcwidth_visible;
|
||||||
|
// Forward some imports to make subcmd implementations easier
|
||||||
|
pub(self) use crate::{
|
||||||
|
builtins::shared::{
|
||||||
|
builtin_missing_argument, builtin_print_error_trailer, builtin_print_help, io_streams_t,
|
||||||
|
BUILTIN_ERR_ARG_COUNT0, BUILTIN_ERR_ARG_COUNT1, BUILTIN_ERR_COMBO2,
|
||||||
|
BUILTIN_ERR_INVALID_SUBCMD, BUILTIN_ERR_MISSING_SUBCMD, BUILTIN_ERR_NOT_NUMBER,
|
||||||
|
BUILTIN_ERR_TOO_MANY_ARGUMENTS, BUILTIN_ERR_UNKNOWN, STATUS_CMD_ERROR, STATUS_CMD_OK,
|
||||||
|
STATUS_INVALID_ARGS,
|
||||||
|
},
|
||||||
|
ffi::{parser_t, separation_type_t},
|
||||||
|
wchar::{wstr, WString, L},
|
||||||
|
wchar_ext::{ToWString, WExt},
|
||||||
|
wgetopt::{wgetopter_t, wopt, woption, woption_argument_t::*, NONOPTION_CHAR_CODE},
|
||||||
|
wutil::{wgettext, wgettext_fmt},
|
||||||
|
};
|
||||||
|
pub(self) use libc::c_int;
|
||||||
|
|
||||||
|
mod collect;
|
||||||
|
mod escape;
|
||||||
|
mod join;
|
||||||
|
mod length;
|
||||||
|
mod r#match;
|
||||||
|
mod pad;
|
||||||
|
mod repeat;
|
||||||
|
mod replace;
|
||||||
|
mod shorten;
|
||||||
|
mod split;
|
||||||
|
mod sub;
|
||||||
|
mod transform;
|
||||||
|
mod trim;
|
||||||
|
mod unescape;
|
||||||
|
|
||||||
|
macro_rules! string_error {
|
||||||
|
(
|
||||||
|
$streams:expr,
|
||||||
|
$string:expr
|
||||||
|
$(, $args:expr)+
|
||||||
|
$(,)?
|
||||||
|
) => {
|
||||||
|
$streams.err.append(L!("string "));
|
||||||
|
$streams.err.append(wgettext_fmt!($string, $($args),*));
|
||||||
|
};
|
||||||
|
}
|
||||||
|
pub(self) use string_error;
|
||||||
|
|
||||||
|
fn string_unknown_option(
|
||||||
|
parser: &mut parser_t,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
subcmd: &wstr,
|
||||||
|
opt: &wstr,
|
||||||
|
) {
|
||||||
|
string_error!(streams, BUILTIN_ERR_UNKNOWN, subcmd, opt);
|
||||||
|
builtin_print_error_trailer(parser, streams, L!("string"));
|
||||||
|
}
|
||||||
|
|
||||||
|
trait StringSubCommand<'args> {
|
||||||
|
const SHORT_OPTIONS: &'static wstr;
|
||||||
|
const LONG_OPTIONS: &'static [woption<'static>];
|
||||||
|
|
||||||
|
/// Parse and store option specified by the associated short or long option.
|
||||||
|
fn parse_opt(
|
||||||
|
&mut self,
|
||||||
|
name: &wstr,
|
||||||
|
c: char,
|
||||||
|
arg: Option<&'args wstr>,
|
||||||
|
) -> Result<(), StringError>;
|
||||||
|
|
||||||
|
fn parse_opts(
|
||||||
|
&mut self,
|
||||||
|
args: &mut [&'args wstr],
|
||||||
|
parser: &mut parser_t,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
) -> Result<usize, Option<c_int>> {
|
||||||
|
let cmd = args[0];
|
||||||
|
let mut args_read = Vec::with_capacity(args.len());
|
||||||
|
args_read.extend_from_slice(args);
|
||||||
|
|
||||||
|
let mut w = wgetopter_t::new(Self::SHORT_OPTIONS, Self::LONG_OPTIONS, args);
|
||||||
|
while let Some(c) = w.wgetopt_long() {
|
||||||
|
match c {
|
||||||
|
':' => {
|
||||||
|
streams.err.append(L!("string ")); // clone of string_error
|
||||||
|
builtin_missing_argument(parser, streams, cmd, args_read[w.woptind - 1], false);
|
||||||
|
return Err(STATUS_INVALID_ARGS);
|
||||||
|
}
|
||||||
|
'?' => {
|
||||||
|
string_unknown_option(parser, streams, cmd, args_read[w.woptind - 1]);
|
||||||
|
return Err(STATUS_INVALID_ARGS);
|
||||||
|
}
|
||||||
|
c => {
|
||||||
|
let retval = self.parse_opt(cmd, c, w.woptarg);
|
||||||
|
if let Err(e) = retval {
|
||||||
|
e.print_error(&args_read, parser, streams, w.woptarg, w.woptind);
|
||||||
|
return Err(e.retval());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Ok(w.woptind);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Take any positional arguments after options have been parsed.
|
||||||
|
#[allow(unused_variables)]
|
||||||
|
fn take_args(
|
||||||
|
&mut self,
|
||||||
|
optind: &mut usize,
|
||||||
|
args: &[&'args wstr],
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
) -> Option<c_int> {
|
||||||
|
STATUS_CMD_OK
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Perform the business logic of the command.
|
||||||
|
fn handle(
|
||||||
|
&mut self,
|
||||||
|
parser: &mut parser_t,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
optind: &mut usize,
|
||||||
|
args: &[&'args wstr],
|
||||||
|
) -> Option<c_int>;
|
||||||
|
|
||||||
|
fn run(
|
||||||
|
&mut self,
|
||||||
|
parser: &mut parser_t,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
args: &mut [&'args wstr],
|
||||||
|
) -> Option<c_int> {
|
||||||
|
if args.len() >= 3 && (args[2] == "-h" || args[2] == "--help") {
|
||||||
|
let string_dash_subcmd = WString::from(args[0]) + L!("-") + args[1];
|
||||||
|
builtin_print_help(parser, streams, &string_dash_subcmd);
|
||||||
|
return STATUS_CMD_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
let args = &mut args[1..];
|
||||||
|
|
||||||
|
let mut optind = match self.parse_opts(args, parser, streams) {
|
||||||
|
Ok(optind) => optind,
|
||||||
|
Err(retval) => return retval,
|
||||||
|
};
|
||||||
|
|
||||||
|
let retval = self.take_args(&mut optind, args, streams);
|
||||||
|
if retval != STATUS_CMD_OK {
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
|
if streams.stdin_is_directly_redirected() && args.len() > optind {
|
||||||
|
string_error!(streams, BUILTIN_ERR_TOO_MANY_ARGUMENTS, args[0]);
|
||||||
|
return STATUS_INVALID_ARGS;
|
||||||
|
}
|
||||||
|
|
||||||
|
return self.handle(parser, streams, &mut optind, args);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This covers failing argument/option parsing
|
||||||
|
enum StringError {
|
||||||
|
InvalidArgs(WString),
|
||||||
|
NotANumber,
|
||||||
|
UnknownOption,
|
||||||
|
}
|
||||||
|
|
||||||
|
enum RegexError {
|
||||||
|
Compile(WString, pcre2::Error),
|
||||||
|
InvalidCaptureGroupName(WString),
|
||||||
|
InvalidEscape(WString),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RegexError {
|
||||||
|
fn print_error(&self, args: &[&wstr], streams: &mut io_streams_t) {
|
||||||
|
let cmd = args[0];
|
||||||
|
use RegexError::*;
|
||||||
|
match self {
|
||||||
|
Compile(pattern, e) => {
|
||||||
|
string_error!(
|
||||||
|
streams,
|
||||||
|
"%ls: Regular expression compile error: %ls\n",
|
||||||
|
cmd,
|
||||||
|
&WString::from(e.error_message())
|
||||||
|
);
|
||||||
|
string_error!(streams, "%ls: %ls\n", cmd, pattern);
|
||||||
|
string_error!(streams, "%ls: %*ls\n", cmd, e.offset().unwrap(), "^");
|
||||||
|
}
|
||||||
|
InvalidCaptureGroupName(name) => {
|
||||||
|
streams.err.append(wgettext_fmt!(
|
||||||
|
"Modification of read-only variable \"%ls\" is not allowed\n",
|
||||||
|
name
|
||||||
|
));
|
||||||
|
}
|
||||||
|
InvalidEscape(pattern) => {
|
||||||
|
string_error!(
|
||||||
|
streams,
|
||||||
|
"%ls: Invalid escape sequence in pattern \"%ls\"\n",
|
||||||
|
cmd,
|
||||||
|
pattern
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<crate::wutil::wcstoi::Error> for StringError {
|
||||||
|
fn from(_: crate::wutil::wcstoi::Error) -> Self {
|
||||||
|
StringError::NotANumber
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! invalid_args {
|
||||||
|
($msg:expr, $name:expr, $arg:expr) => {
|
||||||
|
StringError::InvalidArgs(crate::wutil::wgettext_fmt!($msg, $name, $arg.unwrap()))
|
||||||
|
};
|
||||||
|
}
|
||||||
|
pub(self) use invalid_args;
|
||||||
|
|
||||||
|
impl StringError {
|
||||||
|
fn print_error(
|
||||||
|
&self,
|
||||||
|
args: &[&wstr],
|
||||||
|
parser: &mut parser_t,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
optarg: Option<&wstr>,
|
||||||
|
optind: usize,
|
||||||
|
) {
|
||||||
|
let cmd = args[0];
|
||||||
|
use StringError::*;
|
||||||
|
match self {
|
||||||
|
InvalidArgs(msg) => {
|
||||||
|
streams.err.append(L!("string "));
|
||||||
|
// TODO: Once we can extract/edit translations in Rust files, replace this with
|
||||||
|
// something like wgettext_fmt("%ls: %ls", cmd, msg) that can be translated
|
||||||
|
// and remove the forwarding of the cmd name to `parse_opt`
|
||||||
|
streams.err.append(msg);
|
||||||
|
}
|
||||||
|
NotANumber => {
|
||||||
|
string_error!(streams, BUILTIN_ERR_NOT_NUMBER, cmd, optarg.unwrap());
|
||||||
|
}
|
||||||
|
UnknownOption => {
|
||||||
|
string_unknown_option(parser, streams, cmd, args[optind - 1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn retval(&self) -> Option<c_int> {
|
||||||
|
STATUS_INVALID_ARGS
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default, PartialEq, Clone, Copy)]
|
||||||
|
enum Direction {
|
||||||
|
#[default]
|
||||||
|
Left,
|
||||||
|
Right,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(self) fn width_without_escapes(ins: &wstr, start_pos: usize) -> i32 {
|
||||||
|
let mut width: i32 = 0;
|
||||||
|
for c in ins[start_pos..].chars() {
|
||||||
|
let w = fish_wcwidth_visible(c);
|
||||||
|
// We assume that this string is on its own line,
|
||||||
|
// in which case a backslash can't bring us below 0.
|
||||||
|
if w > 0 || width > 0 {
|
||||||
|
width += w;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ANSI escape sequences like \e\[31m contain printable characters. Subtract their width
|
||||||
|
// because they are not rendered.
|
||||||
|
let mut pos = start_pos;
|
||||||
|
while let Some(ec_pos) = ins.slice_from(pos).find_char('\x1B') {
|
||||||
|
pos += ec_pos;
|
||||||
|
if let Some(len) = escape_code_length(ins.slice_from(pos)) {
|
||||||
|
let sub = &ins[pos..pos + len];
|
||||||
|
for c in sub.chars() {
|
||||||
|
width -= fish_wcwidth_visible(c);
|
||||||
|
}
|
||||||
|
// Move us forward behind the escape code,
|
||||||
|
// it might include a second escape!
|
||||||
|
// E.g. SGR0 ("reset") is \e\(B\e\[m in xterm.
|
||||||
|
pos += len - 1;
|
||||||
|
} else {
|
||||||
|
pos += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return width;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(self) fn escape_code_length(code: &wstr) -> Option<usize> {
|
||||||
|
use crate::ffi::escape_code_length_ffi;
|
||||||
|
use crate::wchar_ffi::wstr_to_u32string;
|
||||||
|
|
||||||
|
match escape_code_length_ffi(wstr_to_u32string(code).as_ptr()).into() {
|
||||||
|
-1 => None,
|
||||||
|
n => Some(n as usize),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A helper type for extracting arguments from either argv or stdin.
|
||||||
|
pub(self) struct Arguments<'args, 'iter> {
|
||||||
|
/// The list of arguments passed to the string builtin.
|
||||||
|
args: &'iter [&'args wstr],
|
||||||
|
/// If using argv, index of the next argument to return.
|
||||||
|
argidx: &'iter mut usize,
|
||||||
|
/// If set, when reading from a stream, split on newlines.
|
||||||
|
split_on_newline: bool,
|
||||||
|
/// Buffer to store what we read with the BufReader
|
||||||
|
/// Is only here to avoid allocating every time
|
||||||
|
buffer: Vec<u8>,
|
||||||
|
/// If not using argv, we read with a buffer
|
||||||
|
reader: Option<BufReader<File>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for Arguments<'_, '_> {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
if let Some(r) = self.reader.take() {
|
||||||
|
// we should not close stdin
|
||||||
|
std::mem::forget(r.into_inner());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'args, 'iter> Arguments<'args, 'iter> {
|
||||||
|
const STRING_CHUNK_SIZE: usize = 1024;
|
||||||
|
|
||||||
|
fn new(
|
||||||
|
args: &'iter [&'args wstr],
|
||||||
|
argidx: &'iter mut usize,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
) -> Self {
|
||||||
|
let reader = streams.stdin_is_directly_redirected().then(|| {
|
||||||
|
let stdin_fd = streams
|
||||||
|
.stdin_fd()
|
||||||
|
.filter(|&fd| fd >= 0)
|
||||||
|
.expect("should have a valid fd");
|
||||||
|
// safety: this should be a valid fd, and already open
|
||||||
|
let fd = unsafe { File::from_raw_fd(stdin_fd) };
|
||||||
|
BufReader::with_capacity(Self::STRING_CHUNK_SIZE, fd)
|
||||||
|
});
|
||||||
|
|
||||||
|
Arguments {
|
||||||
|
args,
|
||||||
|
argidx,
|
||||||
|
split_on_newline: true,
|
||||||
|
buffer: Vec::new(),
|
||||||
|
reader,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn without_splitting_on_newline(
|
||||||
|
args: &'iter [&'args wstr],
|
||||||
|
argidx: &'iter mut usize,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
) -> Self {
|
||||||
|
let mut args = Self::new(args, argidx, streams);
|
||||||
|
args.split_on_newline = false;
|
||||||
|
args
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_arg_stdin(&mut self) -> Option<(Cow<'args, wstr>, bool)> {
|
||||||
|
let reader = self.reader.as_mut().unwrap();
|
||||||
|
|
||||||
|
// NOTE: C++ wrongly commented that read_blocked retries for EAGAIN
|
||||||
|
let num_bytes = match self.split_on_newline {
|
||||||
|
true => reader.read_until(b'\n', &mut self.buffer),
|
||||||
|
false => reader.read_to_end(&mut self.buffer),
|
||||||
|
}
|
||||||
|
.ok()?;
|
||||||
|
|
||||||
|
// to match behaviour of earlier versions
|
||||||
|
if num_bytes == 0 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut parsed = str2wcstring(&self.buffer);
|
||||||
|
|
||||||
|
// If not set, we have consumed all of stdin and its last line is missing a newline character.
|
||||||
|
// This is an edge case -- we expect text input, which is conventionally terminated by a
|
||||||
|
// newline character. But if it isn't, we use this to avoid creating one out of thin air,
|
||||||
|
// to not corrupt input data.
|
||||||
|
let want_newline;
|
||||||
|
if self.split_on_newline {
|
||||||
|
if parsed.char_at(parsed.len() - 1) == '\n' {
|
||||||
|
// consumers do not expect to deal with the newline
|
||||||
|
parsed.pop();
|
||||||
|
want_newline = true;
|
||||||
|
} else {
|
||||||
|
// we are missing a trailing newline
|
||||||
|
want_newline = false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
want_newline = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
let retval = Some((Cow::Owned(parsed), want_newline));
|
||||||
|
self.buffer.clear();
|
||||||
|
retval
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'args> Iterator for Arguments<'args, '_> {
|
||||||
|
// second is want_newline
|
||||||
|
type Item = (Cow<'args, wstr>, bool);
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
if self.reader.is_some() {
|
||||||
|
return self.get_arg_stdin();
|
||||||
|
}
|
||||||
|
|
||||||
|
if *self.argidx >= self.args.len() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
*self.argidx += 1;
|
||||||
|
return Some((Cow::Borrowed(self.args[*self.argidx - 1]), true));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The string builtin, for manipulating strings.
|
||||||
|
pub fn string(
|
||||||
|
parser: &mut parser_t,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
args: &mut [&wstr],
|
||||||
|
) -> Option<c_int> {
|
||||||
|
let cmd = args[0];
|
||||||
|
let argc = args.len();
|
||||||
|
|
||||||
|
if argc <= 1 {
|
||||||
|
streams
|
||||||
|
.err
|
||||||
|
.append(wgettext_fmt!(BUILTIN_ERR_MISSING_SUBCMD, cmd));
|
||||||
|
builtin_print_error_trailer(parser, streams, cmd);
|
||||||
|
return STATUS_INVALID_ARGS;
|
||||||
|
}
|
||||||
|
|
||||||
|
if args[1] == "-h" || args[1] == "--help" {
|
||||||
|
builtin_print_help(parser, streams, cmd);
|
||||||
|
return STATUS_CMD_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
let subcmd_name = args[1];
|
||||||
|
|
||||||
|
match subcmd_name.to_string().as_str() {
|
||||||
|
"collect" => collect::Collect::default().run(parser, streams, args),
|
||||||
|
"escape" => escape::Escape::default().run(parser, streams, args),
|
||||||
|
"join" => join::Join::default().run(parser, streams, args),
|
||||||
|
"join0" => {
|
||||||
|
let mut cmd = join::Join::default();
|
||||||
|
cmd.is_join0 = true;
|
||||||
|
cmd.run(parser, streams, args)
|
||||||
|
}
|
||||||
|
"length" => length::Length::default().run(parser, streams, args),
|
||||||
|
"lower" => {
|
||||||
|
let mut cmd = transform::Transform {
|
||||||
|
quiet: false,
|
||||||
|
func: wstr::to_lowercase,
|
||||||
|
};
|
||||||
|
cmd.run(parser, streams, args)
|
||||||
|
}
|
||||||
|
"match" => r#match::Match::default().run(parser, streams, args),
|
||||||
|
"pad" => pad::Pad::default().run(parser, streams, args),
|
||||||
|
"repeat" => repeat::Repeat::default().run(parser, streams, args),
|
||||||
|
"replace" => replace::Replace::default().run(parser, streams, args),
|
||||||
|
"shorten" => shorten::Shorten::default().run(parser, streams, args),
|
||||||
|
"split" => split::Split::default().run(parser, streams, args),
|
||||||
|
"split0" => {
|
||||||
|
let mut cmd = split::Split::default();
|
||||||
|
cmd.is_split0 = true;
|
||||||
|
cmd.run(parser, streams, args)
|
||||||
|
}
|
||||||
|
"sub" => sub::Sub::default().run(parser, streams, args),
|
||||||
|
"trim" => trim::Trim::default().run(parser, streams, args),
|
||||||
|
"unescape" => unescape::Unescape::default().run(parser, streams, args),
|
||||||
|
"upper" => {
|
||||||
|
let mut cmd = transform::Transform {
|
||||||
|
quiet: false,
|
||||||
|
func: wstr::to_uppercase,
|
||||||
|
};
|
||||||
|
cmd.run(parser, streams, args)
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
streams
|
||||||
|
.err
|
||||||
|
.append(wgettext_fmt!(BUILTIN_ERR_INVALID_SUBCMD, cmd, subcmd_name));
|
||||||
|
builtin_print_error_trailer(parser, streams, cmd);
|
||||||
|
STATUS_INVALID_ARGS
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
66
fish-rust/src/builtins/string/collect.rs
Normal file
66
fish-rust/src/builtins/string/collect.rs
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct Collect {
|
||||||
|
allow_empty: bool,
|
||||||
|
no_trim_newlines: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StringSubCommand<'_> for Collect {
|
||||||
|
const LONG_OPTIONS: &'static [woption<'static>] = &[
|
||||||
|
wopt(L!("allow-empty"), no_argument, 'a'),
|
||||||
|
wopt(L!("no-trim-newlines"), no_argument, 'N'),
|
||||||
|
];
|
||||||
|
const SHORT_OPTIONS: &'static wstr = L!(":Na");
|
||||||
|
|
||||||
|
fn parse_opt(&mut self, _n: &wstr, c: char, _arg: Option<&wstr>) -> Result<(), StringError> {
|
||||||
|
match c {
|
||||||
|
'a' => self.allow_empty = true,
|
||||||
|
'N' => self.no_trim_newlines = true,
|
||||||
|
_ => return Err(StringError::UnknownOption),
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle(
|
||||||
|
&mut self,
|
||||||
|
_parser: &mut parser_t,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
optind: &mut usize,
|
||||||
|
args: &[&wstr],
|
||||||
|
) -> Option<libc::c_int> {
|
||||||
|
let mut appended = 0usize;
|
||||||
|
|
||||||
|
for (arg, want_newline) in Arguments::without_splitting_on_newline(args, optind, streams) {
|
||||||
|
let arg = if !self.no_trim_newlines {
|
||||||
|
let trim_len = arg.len() - arg.chars().rev().take_while(|&c| c == '\n').count();
|
||||||
|
&arg[..trim_len]
|
||||||
|
} else {
|
||||||
|
&arg
|
||||||
|
};
|
||||||
|
|
||||||
|
streams
|
||||||
|
.out
|
||||||
|
.append_with_separation(arg, separation_type_t::explicitly, want_newline);
|
||||||
|
appended += arg.len();
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we haven't printed anything and "no_empty" is set,
|
||||||
|
// print something empty. Helps with empty ellision:
|
||||||
|
// echo (true | string collect --allow-empty)"bar"
|
||||||
|
// prints "bar".
|
||||||
|
if self.allow_empty && appended == 0 {
|
||||||
|
streams.out.append_with_separation(
|
||||||
|
L!(""),
|
||||||
|
separation_type_t::explicitly,
|
||||||
|
true, /* historical behavior is to always print a newline */
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if appended > 0 {
|
||||||
|
STATUS_CMD_OK
|
||||||
|
} else {
|
||||||
|
STATUS_CMD_ERROR
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
65
fish-rust/src/builtins/string/escape.rs
Normal file
65
fish-rust/src/builtins/string/escape.rs
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
use super::*;
|
||||||
|
use crate::common::{escape_string, EscapeFlags, EscapeStringStyle};
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct Escape {
|
||||||
|
no_quoted: bool,
|
||||||
|
style: EscapeStringStyle,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StringSubCommand<'_> for Escape {
|
||||||
|
const LONG_OPTIONS: &'static [woption<'static>] = &[
|
||||||
|
wopt(L!("no-quoted"), no_argument, 'n'),
|
||||||
|
wopt(L!("style"), required_argument, NONOPTION_CHAR_CODE),
|
||||||
|
];
|
||||||
|
const SHORT_OPTIONS: &'static wstr = L!(":n");
|
||||||
|
|
||||||
|
fn parse_opt(&mut self, name: &wstr, c: char, arg: Option<&wstr>) -> Result<(), StringError> {
|
||||||
|
match c {
|
||||||
|
'n' => self.no_quoted = true,
|
||||||
|
NONOPTION_CHAR_CODE => {
|
||||||
|
self.style = arg
|
||||||
|
.unwrap()
|
||||||
|
.try_into()
|
||||||
|
.map_err(|_| invalid_args!("%ls: Invalid escape style '%ls'\n", name, arg))?
|
||||||
|
}
|
||||||
|
_ => return Err(StringError::UnknownOption),
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle(
|
||||||
|
&mut self,
|
||||||
|
_parser: &mut parser_t,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
optind: &mut usize,
|
||||||
|
args: &[&wstr],
|
||||||
|
) -> Option<libc::c_int> {
|
||||||
|
// Currently, only the script style supports options.
|
||||||
|
// Ignore them for other styles for now.
|
||||||
|
let style = match self.style {
|
||||||
|
EscapeStringStyle::Script(..) if self.no_quoted => {
|
||||||
|
EscapeStringStyle::Script(EscapeFlags::NO_QUOTED)
|
||||||
|
}
|
||||||
|
x => x,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut escaped_any = false;
|
||||||
|
for (arg, want_newline) in Arguments::new(args, optind, streams) {
|
||||||
|
let mut escaped = escape_string(&arg, style);
|
||||||
|
|
||||||
|
if want_newline {
|
||||||
|
escaped.push('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
streams.out.append(escaped);
|
||||||
|
escaped_any = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if escaped_any {
|
||||||
|
STATUS_CMD_OK
|
||||||
|
} else {
|
||||||
|
STATUS_CMD_ERROR
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
99
fish-rust/src/builtins/string/join.rs
Normal file
99
fish-rust/src/builtins/string/join.rs
Normal file
|
@ -0,0 +1,99 @@
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
pub struct Join<'args> {
|
||||||
|
quiet: bool,
|
||||||
|
no_empty: bool,
|
||||||
|
pub is_join0: bool,
|
||||||
|
sep: &'args wstr,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Join<'_> {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
quiet: false,
|
||||||
|
no_empty: false,
|
||||||
|
is_join0: false,
|
||||||
|
sep: L!("\0"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'args> StringSubCommand<'args> for Join<'args> {
|
||||||
|
const LONG_OPTIONS: &'static [woption<'static>] = &[
|
||||||
|
wopt(L!("quiet"), no_argument, 'q'),
|
||||||
|
wopt(L!("no-empty"), no_argument, 'n'),
|
||||||
|
];
|
||||||
|
const SHORT_OPTIONS: &'static wstr = L!(":qn");
|
||||||
|
|
||||||
|
fn parse_opt(&mut self, _n: &wstr, c: char, _arg: Option<&wstr>) -> Result<(), StringError> {
|
||||||
|
match c {
|
||||||
|
'q' => self.quiet = true,
|
||||||
|
'n' => self.no_empty = true,
|
||||||
|
_ => return Err(StringError::UnknownOption),
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn take_args(
|
||||||
|
&mut self,
|
||||||
|
optind: &mut usize,
|
||||||
|
args: &[&'args wstr],
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
) -> Option<libc::c_int> {
|
||||||
|
if self.is_join0 {
|
||||||
|
return STATUS_CMD_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Some(arg) = args.get(*optind).copied() else {
|
||||||
|
string_error!(streams, BUILTIN_ERR_ARG_COUNT0, args[0]);
|
||||||
|
return STATUS_INVALID_ARGS;
|
||||||
|
};
|
||||||
|
*optind += 1;
|
||||||
|
self.sep = arg;
|
||||||
|
|
||||||
|
STATUS_CMD_OK
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle(
|
||||||
|
&mut self,
|
||||||
|
_parser: &mut parser_t,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
optind: &mut usize,
|
||||||
|
args: &[&wstr],
|
||||||
|
) -> Option<libc::c_int> {
|
||||||
|
let sep = &self.sep;
|
||||||
|
let mut nargs = 0usize;
|
||||||
|
let mut print_trailing_newline = true;
|
||||||
|
for (arg, want_newline) in Arguments::new(args, optind, streams) {
|
||||||
|
if !self.quiet {
|
||||||
|
if self.no_empty && arg.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if nargs > 0 {
|
||||||
|
streams.out.append(sep);
|
||||||
|
}
|
||||||
|
|
||||||
|
streams.out.append(arg);
|
||||||
|
} else if nargs > 1 {
|
||||||
|
return STATUS_CMD_OK;
|
||||||
|
}
|
||||||
|
nargs += 1;
|
||||||
|
print_trailing_newline = want_newline;
|
||||||
|
}
|
||||||
|
|
||||||
|
if nargs > 0 && !self.quiet {
|
||||||
|
if self.is_join0 {
|
||||||
|
streams.out.append1('\0');
|
||||||
|
} else if print_trailing_newline {
|
||||||
|
streams.out.append1('\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if nargs > 1 {
|
||||||
|
STATUS_CMD_OK
|
||||||
|
} else {
|
||||||
|
STATUS_CMD_ERROR
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
74
fish-rust/src/builtins/string/length.rs
Normal file
74
fish-rust/src/builtins/string/length.rs
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
use crate::wcstringutil::split_string;
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct Length {
|
||||||
|
quiet: bool,
|
||||||
|
visible: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StringSubCommand<'_> for Length {
|
||||||
|
const LONG_OPTIONS: &'static [woption<'static>] = &[
|
||||||
|
wopt(L!("quiet"), no_argument, 'q'),
|
||||||
|
wopt(L!("visible"), no_argument, 'V'),
|
||||||
|
];
|
||||||
|
const SHORT_OPTIONS: &'static wstr = L!(":qV");
|
||||||
|
|
||||||
|
fn parse_opt(&mut self, _n: &wstr, c: char, _arg: Option<&wstr>) -> Result<(), StringError> {
|
||||||
|
match c {
|
||||||
|
'q' => self.quiet = true,
|
||||||
|
'V' => self.visible = true,
|
||||||
|
_ => return Err(StringError::UnknownOption),
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle(
|
||||||
|
&mut self,
|
||||||
|
_parser: &mut parser_t,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
optind: &mut usize,
|
||||||
|
args: &[&wstr],
|
||||||
|
) -> Option<libc::c_int> {
|
||||||
|
let mut nnonempty = 0usize;
|
||||||
|
|
||||||
|
for (arg, _) in Arguments::new(args, optind, streams) {
|
||||||
|
if self.visible {
|
||||||
|
// Visible length only makes sense line-wise.
|
||||||
|
for line in split_string(&arg, '\n') {
|
||||||
|
let mut max = 0;
|
||||||
|
// Carriage-return returns us to the beginning. The longest substring without
|
||||||
|
// carriage-return determines the overall width.
|
||||||
|
for reset in split_string(&line, '\r') {
|
||||||
|
let n = width_without_escapes(&reset, 0) as usize;
|
||||||
|
max = max.max(n);
|
||||||
|
}
|
||||||
|
if max > 0 {
|
||||||
|
nnonempty += 1;
|
||||||
|
}
|
||||||
|
if !self.quiet {
|
||||||
|
streams.out.append(max.to_wstring() + L!("\n"));
|
||||||
|
} else if nnonempty > 0 {
|
||||||
|
return STATUS_CMD_OK;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let n = arg.len();
|
||||||
|
if n > 0 {
|
||||||
|
nnonempty += 1;
|
||||||
|
}
|
||||||
|
if !self.quiet {
|
||||||
|
streams.out.append(n.to_wstring() + L!("\n"));
|
||||||
|
} else if nnonempty > 0 {
|
||||||
|
return STATUS_CMD_OK;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if nnonempty > 0 {
|
||||||
|
STATUS_CMD_OK
|
||||||
|
} else {
|
||||||
|
STATUS_CMD_ERROR
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
406
fish-rust/src/builtins/string/match.rs
Normal file
406
fish-rust/src/builtins/string/match.rs
Normal file
|
@ -0,0 +1,406 @@
|
||||||
|
use pcre2::utf32::{Captures, Regex, RegexBuilder};
|
||||||
|
use printf_compat::sprintf;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use crate::env::{EnvMode, EnvVar, EnvVarFlags};
|
||||||
|
use crate::flog::FLOG;
|
||||||
|
use crate::parse_util::parse_util_unescape_wildcards;
|
||||||
|
use crate::wchar_ffi::WCharToFFI;
|
||||||
|
use crate::wildcard::ANY_STRING;
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct Match<'args> {
|
||||||
|
all: bool,
|
||||||
|
entire: bool,
|
||||||
|
groups_only: bool,
|
||||||
|
ignore_case: bool,
|
||||||
|
invert_match: bool,
|
||||||
|
quiet: bool,
|
||||||
|
regex: bool,
|
||||||
|
index: bool,
|
||||||
|
pattern: &'args wstr,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'args> StringSubCommand<'args> for Match<'args> {
|
||||||
|
const LONG_OPTIONS: &'static [woption<'static>] = &[
|
||||||
|
wopt(L!("all"), no_argument, 'a'),
|
||||||
|
wopt(L!("entire"), no_argument, 'e'),
|
||||||
|
wopt(L!("groups-only"), no_argument, 'g'),
|
||||||
|
wopt(L!("ignore-case"), no_argument, 'i'),
|
||||||
|
wopt(L!("invert"), no_argument, 'v'),
|
||||||
|
wopt(L!("quiet"), no_argument, 'q'),
|
||||||
|
wopt(L!("regex"), no_argument, 'r'),
|
||||||
|
wopt(L!("index"), no_argument, 'n'),
|
||||||
|
];
|
||||||
|
const SHORT_OPTIONS: &'static wstr = L!(":aegivqrn");
|
||||||
|
|
||||||
|
fn parse_opt(&mut self, _n: &wstr, c: char, _arg: Option<&wstr>) -> Result<(), StringError> {
|
||||||
|
match c {
|
||||||
|
'a' => self.all = true,
|
||||||
|
'e' => self.entire = true,
|
||||||
|
'g' => self.groups_only = true,
|
||||||
|
'i' => self.ignore_case = true,
|
||||||
|
'v' => self.invert_match = true,
|
||||||
|
'q' => self.quiet = true,
|
||||||
|
'r' => self.regex = true,
|
||||||
|
'n' => self.index = true,
|
||||||
|
_ => return Err(StringError::UnknownOption),
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn take_args(
|
||||||
|
&mut self,
|
||||||
|
optind: &mut usize,
|
||||||
|
args: &[&'args wstr],
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
) -> Option<libc::c_int> {
|
||||||
|
let cmd = args[0];
|
||||||
|
let Some(arg) = args.get(*optind).copied() else {
|
||||||
|
string_error!(streams, BUILTIN_ERR_ARG_COUNT0, cmd);
|
||||||
|
return STATUS_INVALID_ARGS;
|
||||||
|
};
|
||||||
|
*optind += 1;
|
||||||
|
self.pattern = arg;
|
||||||
|
STATUS_CMD_OK
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle(
|
||||||
|
&mut self,
|
||||||
|
parser: &mut parser_t,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
optind: &mut usize,
|
||||||
|
args: &[&wstr],
|
||||||
|
) -> Option<libc::c_int> {
|
||||||
|
let cmd = args[0];
|
||||||
|
|
||||||
|
if self.entire && self.index {
|
||||||
|
streams.err.append(wgettext_fmt!(
|
||||||
|
BUILTIN_ERR_COMBO2,
|
||||||
|
cmd,
|
||||||
|
wgettext!("--entire and --index are mutually exclusive")
|
||||||
|
));
|
||||||
|
return STATUS_INVALID_ARGS;
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.invert_match && self.groups_only {
|
||||||
|
streams.err.append(wgettext_fmt!(
|
||||||
|
BUILTIN_ERR_COMBO2,
|
||||||
|
cmd,
|
||||||
|
wgettext!("--invert and --groups-only are mutually exclusive")
|
||||||
|
));
|
||||||
|
return STATUS_INVALID_ARGS;
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.entire && self.groups_only {
|
||||||
|
streams.err.append(wgettext_fmt!(
|
||||||
|
BUILTIN_ERR_COMBO2,
|
||||||
|
cmd,
|
||||||
|
wgettext!("--entire and --groups-only are mutually exclusive")
|
||||||
|
));
|
||||||
|
return STATUS_INVALID_ARGS;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut matcher = match StringMatcher::new(self.pattern, self) {
|
||||||
|
Ok(m) => m,
|
||||||
|
Err(e) => {
|
||||||
|
e.print_error(args, streams);
|
||||||
|
return STATUS_INVALID_ARGS;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
for (arg, _) in Arguments::new(args, optind, streams) {
|
||||||
|
if let Err(e) = matcher.report_matches(arg.as_ref(), streams) {
|
||||||
|
FLOG!(error, "pcre2_match unexpected error:", e.error_message())
|
||||||
|
}
|
||||||
|
if self.quiet && matcher.match_count() > 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let match_count = matcher.match_count();
|
||||||
|
|
||||||
|
if let StringMatcher::Regex(RegexMatcher {
|
||||||
|
first_match_captures,
|
||||||
|
..
|
||||||
|
}) = matcher
|
||||||
|
{
|
||||||
|
let vars = parser.get_vars();
|
||||||
|
for (name, vals) in first_match_captures.into_iter() {
|
||||||
|
vars.set(&WString::from(name), EnvMode::DEFAULT, vals);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if match_count > 0 {
|
||||||
|
STATUS_CMD_OK
|
||||||
|
} else {
|
||||||
|
STATUS_CMD_ERROR
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct RegexMatcher<'opts, 'args> {
|
||||||
|
regex: Regex,
|
||||||
|
total_matched: usize,
|
||||||
|
first_match_captures: HashMap<String, Vec<WString>>,
|
||||||
|
opts: &'opts Match<'args>,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct WildCardMatcher<'opts, 'args> {
|
||||||
|
pattern: WString,
|
||||||
|
total_matched: usize,
|
||||||
|
opts: &'opts Match<'args>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::large_enum_variant)]
|
||||||
|
enum StringMatcher<'opts, 'args> {
|
||||||
|
Regex(RegexMatcher<'opts, 'args>),
|
||||||
|
WildCard(WildCardMatcher<'opts, 'args>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'opts, 'args> StringMatcher<'opts, 'args> {
|
||||||
|
fn new(
|
||||||
|
pattern: &'args wstr,
|
||||||
|
opts: &'opts Match<'args>,
|
||||||
|
) -> Result<StringMatcher<'opts, 'args>, RegexError> {
|
||||||
|
if opts.regex {
|
||||||
|
let m = RegexMatcher::new(pattern, opts)?;
|
||||||
|
Ok(Self::Regex(m))
|
||||||
|
} else {
|
||||||
|
let m = WildCardMatcher::new(pattern, opts);
|
||||||
|
return Ok(Self::WildCard(m));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn report_matches(
|
||||||
|
&mut self,
|
||||||
|
arg: &wstr,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
) -> Result<(), pcre2::Error> {
|
||||||
|
match self {
|
||||||
|
Self::Regex(m) => m.report_matches(arg, streams)?,
|
||||||
|
Self::WildCard(m) => m.report_matches(arg, streams),
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn match_count(&self) -> usize {
|
||||||
|
match self {
|
||||||
|
Self::Regex(m) => m.total_matched,
|
||||||
|
Self::WildCard(m) => m.total_matched,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enum MatchResult<'a> {
|
||||||
|
NoMatch,
|
||||||
|
Match(Option<Captures<'a>>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'opts, 'args> RegexMatcher<'opts, 'args> {
|
||||||
|
fn new(
|
||||||
|
pattern: &'args wstr,
|
||||||
|
opts: &'opts Match<'args>,
|
||||||
|
) -> Result<RegexMatcher<'opts, 'args>, RegexError> {
|
||||||
|
let regex = RegexBuilder::new()
|
||||||
|
.caseless(opts.ignore_case)
|
||||||
|
// UTF-mode can be enabled with `(*UTF)` https://www.pcre.org/current/doc/html/pcre2unicode.html
|
||||||
|
// we use the capture group names to set local variables, and those are limited
|
||||||
|
// to ascii-alphanumerics and underscores in non-UTF-mode
|
||||||
|
// https://www.pcre.org/current/doc/html/pcre2syntax.html#SEC13
|
||||||
|
// we can probably relax this limitation as long as we ensure
|
||||||
|
// the capture group names are valid variable names
|
||||||
|
.never_utf(true)
|
||||||
|
.build(pattern.as_char_slice())
|
||||||
|
.map_err(|e| RegexError::Compile(pattern.to_owned(), e))?;
|
||||||
|
|
||||||
|
Self::validate_capture_group_names(regex.capture_names())?;
|
||||||
|
|
||||||
|
let first_match_captures = regex
|
||||||
|
.capture_names()
|
||||||
|
.iter()
|
||||||
|
.filter_map(|name| name.as_ref().map(|n| (n.to_owned(), Vec::new())))
|
||||||
|
.collect();
|
||||||
|
let m = Self {
|
||||||
|
regex,
|
||||||
|
total_matched: 0,
|
||||||
|
first_match_captures,
|
||||||
|
opts,
|
||||||
|
};
|
||||||
|
return Ok(m);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn report_matches(
|
||||||
|
&mut self,
|
||||||
|
arg: &wstr,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
) -> Result<(), pcre2::Error> {
|
||||||
|
let mut iter = self.regex.captures_iter(arg.as_char_slice());
|
||||||
|
let cg = iter.next().transpose()?;
|
||||||
|
let rc = self.report_match(arg, cg, streams);
|
||||||
|
|
||||||
|
let mut populate_captures = false;
|
||||||
|
if let MatchResult::Match(actual) = &rc {
|
||||||
|
populate_captures = self.total_matched == 0;
|
||||||
|
self.total_matched += 1;
|
||||||
|
|
||||||
|
if populate_captures {
|
||||||
|
Self::populate_captures_from_match(
|
||||||
|
&mut self.first_match_captures,
|
||||||
|
self.opts,
|
||||||
|
actual,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !self.opts.invert_match && self.opts.all {
|
||||||
|
// we are guaranteed to match as long as ops.invert_match is false
|
||||||
|
while let MatchResult::Match(cg) =
|
||||||
|
self.report_match(arg, iter.next().transpose()?, streams)
|
||||||
|
{
|
||||||
|
if populate_captures {
|
||||||
|
Self::populate_captures_from_match(
|
||||||
|
&mut self.first_match_captures,
|
||||||
|
self.opts,
|
||||||
|
&cg,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn populate_captures_from_match<'a>(
|
||||||
|
first_match_captures: &mut HashMap<String, Vec<WString>>,
|
||||||
|
opts: &Match<'args>,
|
||||||
|
cg: &Option<Captures<'a>>,
|
||||||
|
) {
|
||||||
|
for (name, captures) in first_match_captures.iter_mut() {
|
||||||
|
// If there are multiple named groups and --all was used, we need to ensure that
|
||||||
|
// the indexes are always in sync between the variables. If an optional named
|
||||||
|
// group didn't match but its brethren did, we need to make sure to put
|
||||||
|
// *something* in the resulting array, and unfortunately fish doesn't support
|
||||||
|
// empty/null members so we're going to have to use an empty string as the
|
||||||
|
// sentinel value.
|
||||||
|
|
||||||
|
if let Some(m) = cg.as_ref().and_then(|cg| cg.name(&name.to_string())) {
|
||||||
|
captures.push(WString::from(m.as_bytes()));
|
||||||
|
} else if opts.all {
|
||||||
|
captures.push(WString::new());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn validate_capture_group_names(
|
||||||
|
capture_group_names: &[Option<String>],
|
||||||
|
) -> Result<(), RegexError> {
|
||||||
|
for name in capture_group_names.iter().filter_map(|n| n.as_ref()) {
|
||||||
|
let wname = WString::from_str(name);
|
||||||
|
if EnvVar::flags_for(&wname).contains(EnvVarFlags::READ_ONLY) {
|
||||||
|
return Err(RegexError::InvalidCaptureGroupName(wname));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn report_match<'a>(
|
||||||
|
&self,
|
||||||
|
arg: &'a wstr,
|
||||||
|
cg: Option<Captures<'a>>,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
) -> MatchResult<'a> {
|
||||||
|
let Some(cg) = cg else {
|
||||||
|
if self.opts.invert_match && !self.opts.quiet {
|
||||||
|
if self.opts.index {
|
||||||
|
streams.out.append(sprintf!("1 %lu\n", arg.len()));
|
||||||
|
} else {
|
||||||
|
streams.out.append(arg);
|
||||||
|
streams.out.append1('\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return match self.opts.invert_match {
|
||||||
|
true => MatchResult::Match(None),
|
||||||
|
false => MatchResult::NoMatch,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
if self.opts.invert_match {
|
||||||
|
return MatchResult::NoMatch;
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.opts.quiet {
|
||||||
|
return MatchResult::Match(Some(cg));
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.opts.entire {
|
||||||
|
streams.out.append(arg);
|
||||||
|
streams.out.append1('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
let start = (self.opts.entire || self.opts.groups_only) as usize;
|
||||||
|
|
||||||
|
for m in (start..cg.len()).filter_map(|i| cg.get(i)) {
|
||||||
|
if self.opts.index {
|
||||||
|
streams
|
||||||
|
.out
|
||||||
|
.append(sprintf!("%lu %lu\n", m.start() + 1, m.end() - m.start()));
|
||||||
|
} else {
|
||||||
|
streams.out.append(&arg[m.start()..m.end()]);
|
||||||
|
streams.out.append1('\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return MatchResult::Match(Some(cg));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'opts, 'args> WildCardMatcher<'opts, 'args> {
|
||||||
|
fn new(pattern: &'args wstr, opts: &'opts Match<'args>) -> Self {
|
||||||
|
let mut wcpattern = parse_util_unescape_wildcards(pattern);
|
||||||
|
if opts.ignore_case {
|
||||||
|
wcpattern = wcpattern.to_lowercase();
|
||||||
|
}
|
||||||
|
if opts.entire {
|
||||||
|
if !wcpattern.is_empty() {
|
||||||
|
if wcpattern.char_at(0) != ANY_STRING {
|
||||||
|
wcpattern.insert(0, ANY_STRING);
|
||||||
|
}
|
||||||
|
if wcpattern.char_at(wcpattern.len() - 1) != ANY_STRING {
|
||||||
|
wcpattern.push(ANY_STRING);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
wcpattern.push(ANY_STRING);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
WildCardMatcher {
|
||||||
|
pattern: wcpattern,
|
||||||
|
total_matched: 0,
|
||||||
|
opts,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn report_matches(&mut self, arg: &wstr, streams: &mut io_streams_t) {
|
||||||
|
// Note: --all is a no-op for glob matching since the pattern is always matched
|
||||||
|
// against the entire argument.
|
||||||
|
use crate::ffi::wildcard_match;
|
||||||
|
|
||||||
|
let subject = match self.opts.ignore_case {
|
||||||
|
true => arg.to_lowercase(),
|
||||||
|
false => arg.to_owned(),
|
||||||
|
};
|
||||||
|
let m = wildcard_match(&subject.to_ffi(), &self.pattern.to_ffi(), false);
|
||||||
|
|
||||||
|
if m ^ self.opts.invert_match {
|
||||||
|
self.total_matched += 1;
|
||||||
|
if !self.opts.quiet {
|
||||||
|
if self.opts.index {
|
||||||
|
streams.out.append(sprintf!("1 %lu\n", arg.len()));
|
||||||
|
} else {
|
||||||
|
streams.out.append(arg);
|
||||||
|
streams.out.append1('\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
114
fish-rust/src/builtins/string/pad.rs
Normal file
114
fish-rust/src/builtins/string/pad.rs
Normal file
|
@ -0,0 +1,114 @@
|
||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use crate::wutil::{fish_wcstol, fish_wcswidth};
|
||||||
|
|
||||||
|
pub struct Pad {
|
||||||
|
char_to_pad: char,
|
||||||
|
pad_char_width: i32,
|
||||||
|
pad_from: Direction,
|
||||||
|
width: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Pad {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
char_to_pad: ' ',
|
||||||
|
pad_char_width: 1,
|
||||||
|
pad_from: Direction::Left,
|
||||||
|
width: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StringSubCommand<'_> for Pad {
|
||||||
|
const LONG_OPTIONS: &'static [woption<'static>] = &[
|
||||||
|
// FIXME docs say `--char`, there was no long_opt with `--char` in C++
|
||||||
|
wopt(L!("chars"), required_argument, 'c'),
|
||||||
|
wopt(L!("right"), no_argument, 'r'),
|
||||||
|
wopt(L!("width"), required_argument, 'w'),
|
||||||
|
];
|
||||||
|
const SHORT_OPTIONS: &'static wstr = L!(":c:rw:");
|
||||||
|
|
||||||
|
fn parse_opt(&mut self, name: &wstr, c: char, arg: Option<&wstr>) -> Result<(), StringError> {
|
||||||
|
match c {
|
||||||
|
'c' => {
|
||||||
|
let arg = arg.expect("option -c requires an argument");
|
||||||
|
if arg.len() != 1 {
|
||||||
|
return Err(invalid_args!(
|
||||||
|
"%ls: Padding should be a character '%ls'\n",
|
||||||
|
name,
|
||||||
|
Some(arg)
|
||||||
|
));
|
||||||
|
}
|
||||||
|
let pad_char_width = fish_wcswidth(arg.slice_to(1));
|
||||||
|
// can we ever have negative width?
|
||||||
|
if pad_char_width == 0 {
|
||||||
|
return Err(invalid_args!(
|
||||||
|
"%ls: Invalid padding character of width zero '%ls'\n",
|
||||||
|
name,
|
||||||
|
Some(arg)
|
||||||
|
));
|
||||||
|
}
|
||||||
|
self.pad_char_width = pad_char_width;
|
||||||
|
self.char_to_pad = arg.char_at(0);
|
||||||
|
}
|
||||||
|
'r' => self.pad_from = Direction::Right,
|
||||||
|
'w' => {
|
||||||
|
self.width = fish_wcstol(arg.unwrap())?
|
||||||
|
.try_into()
|
||||||
|
.map_err(|_| invalid_args!("%ls: Invalid width value '%ls'\n", name, arg))?
|
||||||
|
}
|
||||||
|
_ => return Err(StringError::UnknownOption),
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle<'args>(
|
||||||
|
&mut self,
|
||||||
|
_parser: &mut parser_t,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
optind: &mut usize,
|
||||||
|
args: &[&'args wstr],
|
||||||
|
) -> Option<libc::c_int> {
|
||||||
|
let mut max_width = 0i32;
|
||||||
|
let mut inputs: Vec<(Cow<'args, wstr>, i32)> = Vec::new();
|
||||||
|
let mut print_newline = true;
|
||||||
|
|
||||||
|
for (arg, want_newline) in Arguments::new(args, optind, streams) {
|
||||||
|
let width = width_without_escapes(&arg, 0);
|
||||||
|
max_width = max_width.max(width);
|
||||||
|
inputs.push((arg, width));
|
||||||
|
print_newline = want_newline;
|
||||||
|
}
|
||||||
|
|
||||||
|
let pad_width = max_width.max(self.width as i32);
|
||||||
|
|
||||||
|
for (input, width) in inputs {
|
||||||
|
use std::iter::repeat;
|
||||||
|
|
||||||
|
let pad = (pad_width - width) / self.pad_char_width;
|
||||||
|
let remaining_width = (pad_width - width) % self.pad_char_width;
|
||||||
|
let mut padded: WString = match self.pad_from {
|
||||||
|
Direction::Left => repeat(self.char_to_pad)
|
||||||
|
.take(pad as usize)
|
||||||
|
.chain(repeat(' ').take(remaining_width as usize))
|
||||||
|
.chain(input.chars())
|
||||||
|
.collect(),
|
||||||
|
Direction::Right => input
|
||||||
|
.chars()
|
||||||
|
.chain(repeat(' ').take(remaining_width as usize))
|
||||||
|
.chain(repeat(self.char_to_pad).take(pad as usize))
|
||||||
|
.collect(),
|
||||||
|
};
|
||||||
|
|
||||||
|
if print_newline {
|
||||||
|
padded.push('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
streams.out.append(padded);
|
||||||
|
}
|
||||||
|
|
||||||
|
STATUS_CMD_OK
|
||||||
|
}
|
||||||
|
}
|
145
fish-rust/src/builtins/string/repeat.rs
Normal file
145
fish-rust/src/builtins/string/repeat.rs
Normal file
|
@ -0,0 +1,145 @@
|
||||||
|
use super::*;
|
||||||
|
use crate::wutil::fish_wcstol;
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct Repeat {
|
||||||
|
count: usize,
|
||||||
|
max: usize,
|
||||||
|
quiet: bool,
|
||||||
|
no_newline: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StringSubCommand<'_> for Repeat {
|
||||||
|
const LONG_OPTIONS: &'static [woption<'static>] = &[
|
||||||
|
wopt(L!("count"), required_argument, 'n'),
|
||||||
|
wopt(L!("max"), required_argument, 'm'),
|
||||||
|
wopt(L!("quiet"), no_argument, 'q'),
|
||||||
|
wopt(L!("no-newline"), no_argument, 'N'),
|
||||||
|
];
|
||||||
|
const SHORT_OPTIONS: &'static wstr = L!(":n:m:qN");
|
||||||
|
|
||||||
|
fn parse_opt(&mut self, name: &wstr, c: char, arg: Option<&wstr>) -> Result<(), StringError> {
|
||||||
|
match c {
|
||||||
|
'n' => {
|
||||||
|
self.count = fish_wcstol(arg.unwrap())?
|
||||||
|
.try_into()
|
||||||
|
.map_err(|_| invalid_args!("%ls: Invalid count value '%ls'\n", name, arg))?
|
||||||
|
}
|
||||||
|
'm' => {
|
||||||
|
self.max = fish_wcstol(arg.unwrap())?
|
||||||
|
.try_into()
|
||||||
|
.map_err(|_| invalid_args!("%ls: Invalid max value '%ls'\n", name, arg))?
|
||||||
|
}
|
||||||
|
'q' => self.quiet = true,
|
||||||
|
'N' => self.no_newline = true,
|
||||||
|
_ => return Err(StringError::UnknownOption),
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle(
|
||||||
|
&mut self,
|
||||||
|
_parser: &mut parser_t,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
optind: &mut usize,
|
||||||
|
args: &[&wstr],
|
||||||
|
) -> Option<libc::c_int> {
|
||||||
|
if self.max == 0 && self.count == 0 {
|
||||||
|
// XXX: This used to be allowed, but returned 1.
|
||||||
|
// Keep it that way for now instead of adding an error.
|
||||||
|
// streams.err.append(L"Count or max must be greater than zero");
|
||||||
|
return STATUS_CMD_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut all_empty = true;
|
||||||
|
let mut first = true;
|
||||||
|
let mut print_newline = true;
|
||||||
|
|
||||||
|
for (w, want_newline) in Arguments::new(args, optind, streams) {
|
||||||
|
print_newline = want_newline;
|
||||||
|
if w.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
all_empty = false;
|
||||||
|
|
||||||
|
if self.quiet {
|
||||||
|
// Early out if we can - see #7495.
|
||||||
|
return STATUS_CMD_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
if !first {
|
||||||
|
streams.out.append1('\n');
|
||||||
|
}
|
||||||
|
first = false;
|
||||||
|
|
||||||
|
// The maximum size of the string is either the "max" characters,
|
||||||
|
// or it's the "count" repetitions, whichever ends up lower.
|
||||||
|
let max = if self.max == 0
|
||||||
|
|| (self.count > 0 && w.len().wrapping_mul(self.count) < self.max)
|
||||||
|
{
|
||||||
|
// TODO: we should disallow overflowing unless max <= w.len().checked_mul(self.count).unwrap_or(usize::MAX)
|
||||||
|
w.len().wrapping_mul(self.count)
|
||||||
|
} else {
|
||||||
|
self.max
|
||||||
|
};
|
||||||
|
|
||||||
|
// Reserve a string to avoid writing constantly.
|
||||||
|
// The 1500 here is a total gluteal extraction, but 500 seems to perform slightly worse.
|
||||||
|
let chunk_size = 1500;
|
||||||
|
// The + word length is so we don't have to hit the chunk size exactly,
|
||||||
|
// which would require us to restart in the middle of the string.
|
||||||
|
// E.g. imagine repeating "12345678". The first chunk is hit after a last "1234",
|
||||||
|
// so we would then have to restart by appending "5678", which requires a substring.
|
||||||
|
// So let's not bother.
|
||||||
|
//
|
||||||
|
// Unless of course we don't even print the entire word, in which case we just need max.
|
||||||
|
let mut chunk = WString::with_capacity(max.min(chunk_size + w.len()));
|
||||||
|
|
||||||
|
let mut i = max;
|
||||||
|
while i > 0 {
|
||||||
|
if i >= w.len() {
|
||||||
|
chunk.push_utfstr(&w);
|
||||||
|
} else {
|
||||||
|
chunk.push_utfstr(w.slice_to(i));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
i -= w.len();
|
||||||
|
|
||||||
|
if chunk.len() >= chunk_size {
|
||||||
|
// We hit the chunk size, write it repeatedly until we can't anymore.
|
||||||
|
streams.out.append(&chunk);
|
||||||
|
while i >= chunk.len() {
|
||||||
|
streams.out.append(&chunk);
|
||||||
|
// We can easily be asked to write *a lot* of data,
|
||||||
|
// so we need to check every so often if the pipe has been closed.
|
||||||
|
// If we didn't, running `string repeat -n LARGENUMBER foo | pv`
|
||||||
|
// and pressing ctrl-c seems to hang.
|
||||||
|
if streams.out.flush_and_check_error() != STATUS_CMD_OK.unwrap() {
|
||||||
|
return STATUS_CMD_ERROR;
|
||||||
|
}
|
||||||
|
i -= chunk.len();
|
||||||
|
}
|
||||||
|
chunk.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flush the remainder.
|
||||||
|
if !chunk.is_empty() {
|
||||||
|
streams.out.append(&chunk);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Historical behavior is to never append a newline if all strings were empty.
|
||||||
|
if !self.quiet && !self.no_newline && !all_empty && print_newline {
|
||||||
|
streams.out.append1('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
if all_empty {
|
||||||
|
STATUS_CMD_ERROR
|
||||||
|
} else {
|
||||||
|
STATUS_CMD_OK
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
251
fish-rust/src/builtins/string/replace.rs
Normal file
251
fish-rust/src/builtins/string/replace.rs
Normal file
|
@ -0,0 +1,251 @@
|
||||||
|
use pcre2::utf32::{Regex, RegexBuilder};
|
||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use crate::future_feature_flags::{feature_test, FeatureFlag};
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct Replace<'args> {
|
||||||
|
all: bool,
|
||||||
|
filter: bool,
|
||||||
|
ignore_case: bool,
|
||||||
|
quiet: bool,
|
||||||
|
regex: bool,
|
||||||
|
pattern: &'args wstr,
|
||||||
|
replacement: &'args wstr,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'args> StringSubCommand<'args> for Replace<'args> {
|
||||||
|
const LONG_OPTIONS: &'static [woption<'static>] = &[
|
||||||
|
wopt(L!("all"), no_argument, 'a'),
|
||||||
|
wopt(L!("filter"), no_argument, 'f'),
|
||||||
|
wopt(L!("ignore-case"), no_argument, 'i'),
|
||||||
|
wopt(L!("quiet"), no_argument, 'q'),
|
||||||
|
wopt(L!("regex"), no_argument, 'r'),
|
||||||
|
];
|
||||||
|
const SHORT_OPTIONS: &'static wstr = L!(":afiqr");
|
||||||
|
|
||||||
|
fn parse_opt(&mut self, _n: &wstr, c: char, _arg: Option<&wstr>) -> Result<(), StringError> {
|
||||||
|
match c {
|
||||||
|
'a' => self.all = true,
|
||||||
|
'f' => self.filter = true,
|
||||||
|
'i' => self.ignore_case = true,
|
||||||
|
'q' => self.quiet = true,
|
||||||
|
'r' => self.regex = true,
|
||||||
|
_ => return Err(StringError::UnknownOption),
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn take_args(
|
||||||
|
&mut self,
|
||||||
|
optind: &mut usize,
|
||||||
|
args: &[&'args wstr],
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
) -> Option<libc::c_int> {
|
||||||
|
let cmd = args[0];
|
||||||
|
let Some(pattern) = args.get(*optind).copied() else {
|
||||||
|
string_error!(streams, BUILTIN_ERR_ARG_COUNT0, cmd);
|
||||||
|
return STATUS_INVALID_ARGS;
|
||||||
|
};
|
||||||
|
*optind += 1;
|
||||||
|
let Some(replacement) = args.get(*optind).copied() else {
|
||||||
|
string_error!(streams, BUILTIN_ERR_ARG_COUNT1, cmd, 1, 2);
|
||||||
|
return STATUS_INVALID_ARGS;
|
||||||
|
};
|
||||||
|
*optind += 1;
|
||||||
|
|
||||||
|
self.pattern = pattern;
|
||||||
|
self.replacement = replacement;
|
||||||
|
return STATUS_CMD_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle(
|
||||||
|
&mut self,
|
||||||
|
_parser: &mut parser_t,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
optind: &mut usize,
|
||||||
|
args: &[&wstr],
|
||||||
|
) -> Option<libc::c_int> {
|
||||||
|
let cmd = args[0];
|
||||||
|
|
||||||
|
let replacer = match StringReplacer::new(self.pattern, self.replacement, self) {
|
||||||
|
Ok(x) => x,
|
||||||
|
Err(e) => {
|
||||||
|
e.print_error(args, streams);
|
||||||
|
return STATUS_INVALID_ARGS;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut replace_count = 0;
|
||||||
|
|
||||||
|
for (arg, want_newline) in Arguments::new(args, optind, streams) {
|
||||||
|
let (replaced, result) = match replacer.replace(arg) {
|
||||||
|
Ok(x) => x,
|
||||||
|
Err(e) => {
|
||||||
|
string_error!(
|
||||||
|
streams,
|
||||||
|
"%ls: Regular expression substitute error: %ls\n",
|
||||||
|
cmd,
|
||||||
|
e.error_message()
|
||||||
|
);
|
||||||
|
return STATUS_INVALID_ARGS;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
replace_count += replaced as usize;
|
||||||
|
|
||||||
|
if !self.quiet && (!self.filter || replaced) {
|
||||||
|
streams.out.append(result);
|
||||||
|
if want_newline {
|
||||||
|
streams.out.append1('\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.quiet && replace_count > 0 {
|
||||||
|
return STATUS_CMD_OK;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if replace_count > 0 {
|
||||||
|
STATUS_CMD_OK
|
||||||
|
} else {
|
||||||
|
STATUS_CMD_ERROR
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::large_enum_variant)]
|
||||||
|
enum StringReplacer<'args, 'opts> {
|
||||||
|
Regex {
|
||||||
|
replacement: WString,
|
||||||
|
regex: Regex,
|
||||||
|
opts: &'opts Replace<'args>,
|
||||||
|
},
|
||||||
|
Literal {
|
||||||
|
pattern: Cow<'args, wstr>,
|
||||||
|
replacement: Cow<'args, wstr>,
|
||||||
|
opts: &'opts Replace<'args>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'args, 'opts> StringReplacer<'args, 'opts> {
|
||||||
|
fn interpret_escape(arg: &'args wstr) -> Option<WString> {
|
||||||
|
use crate::common::read_unquoted_escape;
|
||||||
|
|
||||||
|
let mut result: WString = WString::with_capacity(arg.len());
|
||||||
|
let mut cursor = arg;
|
||||||
|
while !cursor.is_empty() {
|
||||||
|
if cursor.char_at(0) == '\\' {
|
||||||
|
if let Some(escape_len) = read_unquoted_escape(cursor, &mut result, true, false) {
|
||||||
|
cursor = cursor.slice_from(escape_len);
|
||||||
|
} else {
|
||||||
|
// invalid escape
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
result.push(cursor.char_at(0));
|
||||||
|
cursor = cursor.slice_from(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Some(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new(
|
||||||
|
pattern: &'args wstr,
|
||||||
|
replacement: &'args wstr,
|
||||||
|
opts: &'opts Replace<'args>,
|
||||||
|
) -> Result<Self, RegexError> {
|
||||||
|
let r = match (opts.regex, opts.ignore_case) {
|
||||||
|
(true, _) => {
|
||||||
|
let regex = RegexBuilder::new()
|
||||||
|
.caseless(opts.ignore_case)
|
||||||
|
// set to behave similarly to match, could probably be either enabled by default or
|
||||||
|
// allowed to be user-controlled here
|
||||||
|
.never_utf(true)
|
||||||
|
.build(pattern.as_char_slice())
|
||||||
|
.map_err(|e| RegexError::Compile(pattern.to_owned(), e))?;
|
||||||
|
|
||||||
|
let replacement = if feature_test(FeatureFlag::string_replace_backslash) {
|
||||||
|
replacement.to_owned()
|
||||||
|
} else {
|
||||||
|
Self::interpret_escape(replacement)
|
||||||
|
.ok_or_else(|| RegexError::InvalidEscape(pattern.to_owned()))?
|
||||||
|
};
|
||||||
|
Self::Regex {
|
||||||
|
replacement,
|
||||||
|
regex,
|
||||||
|
opts,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(false, true) => Self::Literal {
|
||||||
|
// previously we used wcsncasecmp but there is no equivalent function in Rust widestring
|
||||||
|
// this should likely be handled by a using the `literal` option on our regex
|
||||||
|
pattern: Cow::Owned(pattern.to_lowercase()),
|
||||||
|
replacement: Cow::Owned(replacement.to_owned()),
|
||||||
|
opts,
|
||||||
|
},
|
||||||
|
(false, false) => Self::Literal {
|
||||||
|
pattern: Cow::Borrowed(pattern),
|
||||||
|
replacement: Cow::Borrowed(replacement),
|
||||||
|
opts,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
Ok(r)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn replace<'a>(&self, arg: Cow<'a, wstr>) -> Result<(bool, Cow<'a, wstr>), pcre2::Error> {
|
||||||
|
match self {
|
||||||
|
StringReplacer::Regex {
|
||||||
|
replacement,
|
||||||
|
regex,
|
||||||
|
opts,
|
||||||
|
} => {
|
||||||
|
let res = if opts.all {
|
||||||
|
regex.replace_all(arg.as_char_slice(), replacement.as_char_slice(), true)
|
||||||
|
} else {
|
||||||
|
regex.replace(arg.as_char_slice(), replacement.as_char_slice(), true)
|
||||||
|
}?;
|
||||||
|
|
||||||
|
let res = match res {
|
||||||
|
Cow::Borrowed(_slice_of_arg) => (false, arg),
|
||||||
|
Cow::Owned(s) => (true, Cow::Owned(WString::from_chars(s))),
|
||||||
|
};
|
||||||
|
return Ok(res);
|
||||||
|
}
|
||||||
|
StringReplacer::Literal {
|
||||||
|
pattern,
|
||||||
|
replacement,
|
||||||
|
opts,
|
||||||
|
} => {
|
||||||
|
if pattern.is_empty() {
|
||||||
|
return Ok((false, arg));
|
||||||
|
}
|
||||||
|
|
||||||
|
// a premature optimization would be to alloc larger if we have replacement.len() > pattern.len()
|
||||||
|
let mut result = WString::with_capacity(arg.len());
|
||||||
|
|
||||||
|
let subject = if opts.ignore_case {
|
||||||
|
arg.to_lowercase()
|
||||||
|
} else {
|
||||||
|
arg.as_ref().to_owned()
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut offset = 0;
|
||||||
|
while let Some(idx) = subject[offset..].find(pattern.as_char_slice()) {
|
||||||
|
result.push_utfstr(&subject[offset..offset + idx]);
|
||||||
|
result.push_utfstr(&replacement);
|
||||||
|
offset += idx + pattern.len();
|
||||||
|
if !opts.all {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if offset == 0 {
|
||||||
|
return Ok((false, arg));
|
||||||
|
}
|
||||||
|
result.push_utfstr(&arg[offset..]);
|
||||||
|
|
||||||
|
Ok((true, Cow::Owned(result)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
249
fish-rust/src/builtins/string/shorten.rs
Normal file
249
fish-rust/src/builtins/string/shorten.rs
Normal file
|
@ -0,0 +1,249 @@
|
||||||
|
use super::*;
|
||||||
|
use crate::common::get_ellipsis_str;
|
||||||
|
use crate::fallback::fish_wcwidth;
|
||||||
|
use crate::wcstringutil::split_string;
|
||||||
|
use crate::wutil::{fish_wcstol, fish_wcswidth};
|
||||||
|
|
||||||
|
pub struct Shorten<'args> {
|
||||||
|
chars_to_shorten: &'args wstr,
|
||||||
|
max: Option<usize>,
|
||||||
|
no_newline: bool,
|
||||||
|
quiet: bool,
|
||||||
|
direction: Direction,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Shorten<'_> {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
chars_to_shorten: get_ellipsis_str(),
|
||||||
|
max: None,
|
||||||
|
no_newline: false,
|
||||||
|
quiet: false,
|
||||||
|
direction: Direction::Right,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'args> StringSubCommand<'args> for Shorten<'args> {
|
||||||
|
const LONG_OPTIONS: &'static [woption<'static>] = &[
|
||||||
|
// FIXME: documentation says it's --char
|
||||||
|
wopt(L!("chars"), required_argument, 'c'),
|
||||||
|
wopt(L!("max"), required_argument, 'm'),
|
||||||
|
wopt(L!("no-newline"), no_argument, 'N'),
|
||||||
|
wopt(L!("left"), no_argument, 'l'),
|
||||||
|
wopt(L!("quiet"), no_argument, 'q'),
|
||||||
|
];
|
||||||
|
const SHORT_OPTIONS: &'static wstr = L!(":c:m:Nlq");
|
||||||
|
|
||||||
|
fn parse_opt(
|
||||||
|
&mut self,
|
||||||
|
name: &wstr,
|
||||||
|
c: char,
|
||||||
|
arg: Option<&'args wstr>,
|
||||||
|
) -> Result<(), StringError> {
|
||||||
|
match c {
|
||||||
|
'c' => self.chars_to_shorten = arg.expect("option --char requires an argument"),
|
||||||
|
'm' => {
|
||||||
|
self.max = Some(
|
||||||
|
fish_wcstol(arg.unwrap())?
|
||||||
|
.try_into()
|
||||||
|
.map_err(|_| invalid_args!("%ls: Invalid max value '%ls'\n", name, arg))?,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
'N' => self.no_newline = true,
|
||||||
|
'l' => self.direction = Direction::Left,
|
||||||
|
'q' => self.quiet = true,
|
||||||
|
_ => return Err(StringError::UnknownOption),
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle(
|
||||||
|
&mut self,
|
||||||
|
_parser: &mut parser_t,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
optind: &mut usize,
|
||||||
|
args: &[&wstr],
|
||||||
|
) -> Option<libc::c_int> {
|
||||||
|
let mut min_width = usize::MAX;
|
||||||
|
let mut inputs = Vec::new();
|
||||||
|
let mut ell = self.chars_to_shorten;
|
||||||
|
|
||||||
|
let iter = Arguments::new(args, optind, streams);
|
||||||
|
|
||||||
|
if self.max == Some(0) {
|
||||||
|
// Special case: Max of 0 means no shortening.
|
||||||
|
// This makes this more reusable, so you don't need special-cases like
|
||||||
|
//
|
||||||
|
// if test $shorten -gt 0
|
||||||
|
// string shorten -m $shorten whatever
|
||||||
|
// else
|
||||||
|
// echo whatever
|
||||||
|
// end
|
||||||
|
for (arg, _) in iter {
|
||||||
|
streams.out.append(arg);
|
||||||
|
streams.out.append1('\n');
|
||||||
|
}
|
||||||
|
return STATUS_CMD_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (arg, _) in iter {
|
||||||
|
// Visible width only makes sense line-wise.
|
||||||
|
// So either we have no-newlines (which means we shorten on the first newline),
|
||||||
|
// or we handle the lines separately.
|
||||||
|
let mut splits = split_string(&arg, '\n').into_iter();
|
||||||
|
if self.no_newline && splits.len() > 1 {
|
||||||
|
let mut s = match self.direction {
|
||||||
|
Direction::Right => splits.next(),
|
||||||
|
Direction::Left => splits.last(),
|
||||||
|
}
|
||||||
|
.unwrap();
|
||||||
|
s.push_utfstr(ell);
|
||||||
|
let width = width_without_escapes(&s, 0);
|
||||||
|
|
||||||
|
if width > 0 && (width as usize) < min_width {
|
||||||
|
min_width = width as usize;
|
||||||
|
}
|
||||||
|
inputs.push(s);
|
||||||
|
} else {
|
||||||
|
for s in splits {
|
||||||
|
let width = width_without_escapes(&s, 0);
|
||||||
|
if width > 0 && (width as usize) < min_width {
|
||||||
|
min_width = width as usize;
|
||||||
|
}
|
||||||
|
inputs.push(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let ourmax: usize = self.max.unwrap_or(min_width);
|
||||||
|
|
||||||
|
// TODO: Can we have negative width
|
||||||
|
|
||||||
|
let ell_width: i32 = {
|
||||||
|
let w = fish_wcswidth(ell);
|
||||||
|
if w > ourmax as i32 {
|
||||||
|
// If we can't even print our ellipsis, we substitute nothing,
|
||||||
|
// truncating instead.
|
||||||
|
ell = L!("");
|
||||||
|
0
|
||||||
|
} else {
|
||||||
|
w
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut nsub = 0usize;
|
||||||
|
// We could also error out here if the width of our ellipsis is larger
|
||||||
|
// than the target width.
|
||||||
|
// That seems excessive - specifically because the ellipsis on LANG=C
|
||||||
|
// is "..." (width 3!).
|
||||||
|
|
||||||
|
let skip_escapes = |l: &wstr, pos: usize| -> usize {
|
||||||
|
let mut totallen = 0usize;
|
||||||
|
while l.char_at(pos + totallen) == '\x1B' {
|
||||||
|
let Some(len) = escape_code_length(l.slice_from(pos + totallen)) else {
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
totallen += len;
|
||||||
|
}
|
||||||
|
totallen
|
||||||
|
};
|
||||||
|
|
||||||
|
for line in inputs {
|
||||||
|
let mut pos = 0usize;
|
||||||
|
let mut max = 0usize;
|
||||||
|
// Collect how much of the string we can use without going over the maximum.
|
||||||
|
if self.direction == Direction::Left {
|
||||||
|
// Our strategy for keeping from the end.
|
||||||
|
// This is rather unoptimized - actually going *backwards* from the end
|
||||||
|
// is extremely tricky because we would have to subtract escapes again.
|
||||||
|
// Also we need to avoid hacking combiners into bits.
|
||||||
|
// This should work for most cases considering the combiners typically have width 0.
|
||||||
|
let mut out = L!("");
|
||||||
|
while pos < line.len() {
|
||||||
|
let w = width_without_escapes(&line, pos);
|
||||||
|
// If we're at the beginning and it fits, we sits.
|
||||||
|
//
|
||||||
|
// Otherwise we require it to fit the ellipsis
|
||||||
|
if (w <= ourmax as i32 && pos == 0) || (w + ell_width <= ourmax as i32) {
|
||||||
|
out = line.slice_from(pos);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
pos += skip_escapes(&line, pos).max(1);
|
||||||
|
}
|
||||||
|
if self.quiet && pos != 0 {
|
||||||
|
return STATUS_CMD_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
let output = match pos {
|
||||||
|
0 => line,
|
||||||
|
_ => {
|
||||||
|
// We have an ellipsis, construct our string and print it.
|
||||||
|
nsub += 1;
|
||||||
|
let mut res = WString::with_capacity(ell.len() + out.len());
|
||||||
|
res.push_utfstr(ell);
|
||||||
|
res.push_utfstr(out);
|
||||||
|
res
|
||||||
|
}
|
||||||
|
};
|
||||||
|
streams.out.append(output);
|
||||||
|
streams.out.append1('\n');
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
/* Direction::Right */
|
||||||
|
// Going from the left.
|
||||||
|
// This is somewhat easier.
|
||||||
|
while max <= ourmax && pos < line.len() {
|
||||||
|
pos += skip_escapes(&line, pos);
|
||||||
|
let w = fish_wcwidth(line.char_at(pos));
|
||||||
|
if w <= 0 || max + w as usize + ell_width as usize <= ourmax {
|
||||||
|
// If it still fits, even if it is the last, we add it.
|
||||||
|
max += w as usize;
|
||||||
|
pos += 1;
|
||||||
|
} else {
|
||||||
|
// We're at the limit, so see if the entire string fits.
|
||||||
|
let mut max2: usize = max + w as usize;
|
||||||
|
let mut pos2 = pos + 1;
|
||||||
|
while pos2 < line.len() {
|
||||||
|
pos2 += skip_escapes(&line, pos2);
|
||||||
|
max2 += fish_wcwidth(line.char_at(pos2)) as usize;
|
||||||
|
pos2 += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if max2 <= ourmax {
|
||||||
|
// We're at the end and everything fits,
|
||||||
|
// no ellipsis.
|
||||||
|
pos = pos2;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.quiet && pos != line.len() {
|
||||||
|
return STATUS_CMD_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
if pos == line.len() {
|
||||||
|
streams.out.append(line);
|
||||||
|
streams.out.append1('\n');
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
nsub += 1;
|
||||||
|
let mut newl = line;
|
||||||
|
newl.truncate(pos);
|
||||||
|
newl.push_utfstr(ell);
|
||||||
|
newl.push('\n');
|
||||||
|
streams.out.append(newl);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return true if we have shortened something and false otherwise.
|
||||||
|
if nsub > 0 {
|
||||||
|
STATUS_CMD_OK
|
||||||
|
} else {
|
||||||
|
STATUS_CMD_ERROR
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
285
fish-rust/src/builtins/string/split.rs
Normal file
285
fish-rust/src/builtins/string/split.rs
Normal file
|
@ -0,0 +1,285 @@
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::ops::Deref;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use crate::wcstringutil::split_about;
|
||||||
|
use crate::wutil::{fish_wcstoi, fish_wcstol};
|
||||||
|
|
||||||
|
pub struct Split<'args> {
|
||||||
|
quiet: bool,
|
||||||
|
split_from: Direction,
|
||||||
|
max: usize,
|
||||||
|
no_empty: bool,
|
||||||
|
fields: Fields,
|
||||||
|
allow_empty: bool,
|
||||||
|
pub is_split0: bool,
|
||||||
|
sep: &'args wstr,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Split<'_> {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
quiet: false,
|
||||||
|
split_from: Direction::Left,
|
||||||
|
max: usize::MAX,
|
||||||
|
no_empty: false,
|
||||||
|
fields: Fields(Vec::new()),
|
||||||
|
allow_empty: false,
|
||||||
|
is_split0: false,
|
||||||
|
sep: L!("\0"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[repr(transparent)]
|
||||||
|
struct Fields(Vec<usize>);
|
||||||
|
|
||||||
|
// we have a newtype just for the sake of implementing TryFrom
|
||||||
|
impl Deref for Fields {
|
||||||
|
type Target = Vec<usize>;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enum FieldParseError {
|
||||||
|
/// Unable to parse as integer
|
||||||
|
Number,
|
||||||
|
/// One of the ends in a range is either too big or small
|
||||||
|
Range,
|
||||||
|
/// The field is a valid number but outside of the allowed range
|
||||||
|
Field,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<crate::wutil::wcstoi::Error> for FieldParseError {
|
||||||
|
fn from(_: crate::wutil::wcstoi::Error) -> Self {
|
||||||
|
FieldParseError::Number
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'args> TryFrom<&'args wstr> for Fields {
|
||||||
|
type Error = FieldParseError;
|
||||||
|
|
||||||
|
/// FIELDS is a comma-separated string of field numbers and/or spans.
|
||||||
|
/// Each field is one-indexed.
|
||||||
|
fn try_from(value: &wstr) -> Result<Self, Self::Error> {
|
||||||
|
fn parse_field(f: &wstr) -> Result<Vec<usize>, FieldParseError> {
|
||||||
|
use FieldParseError::*;
|
||||||
|
let range: Vec<&wstr> = f.split('-').collect();
|
||||||
|
let range: Vec<usize> = match range[..] {
|
||||||
|
[s, e] => {
|
||||||
|
let start = fish_wcstoi(s)? as usize;
|
||||||
|
let end = fish_wcstoi(e)? as usize;
|
||||||
|
|
||||||
|
if start == 0 || end == 0 {
|
||||||
|
return Err(Range);
|
||||||
|
}
|
||||||
|
|
||||||
|
if start <= end {
|
||||||
|
// we store as 0-indexed, but the range is 1-indexed
|
||||||
|
(start - 1..end).collect()
|
||||||
|
} else {
|
||||||
|
// this is allowed
|
||||||
|
(end - 1..start).rev().collect()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => match fish_wcstoi(f)? as usize {
|
||||||
|
n @ 1.. => vec![n - 1],
|
||||||
|
_ => return Err(Field),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
Ok(range)
|
||||||
|
}
|
||||||
|
|
||||||
|
let fields = value.split(',').map(parse_field);
|
||||||
|
|
||||||
|
let mut indices = Vec::new();
|
||||||
|
for field in fields {
|
||||||
|
indices.extend(field?);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Self(indices))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'args> StringSubCommand<'args> for Split<'args> {
|
||||||
|
const LONG_OPTIONS: &'static [woption<'static>] = &[
|
||||||
|
wopt(L!("quiet"), no_argument, 'q'),
|
||||||
|
wopt(L!("right"), no_argument, 'r'),
|
||||||
|
wopt(L!("max"), required_argument, 'm'),
|
||||||
|
wopt(L!("no-empty"), no_argument, 'n'),
|
||||||
|
wopt(L!("fields"), required_argument, 'f'),
|
||||||
|
// FIXME: allow-empty is not documented
|
||||||
|
wopt(L!("allow-empty"), no_argument, 'a'),
|
||||||
|
];
|
||||||
|
const SHORT_OPTIONS: &'static wstr = L!(":qrm:nf:a");
|
||||||
|
|
||||||
|
fn parse_opt(&mut self, name: &wstr, c: char, arg: Option<&wstr>) -> Result<(), StringError> {
|
||||||
|
match c {
|
||||||
|
'q' => self.quiet = true,
|
||||||
|
'r' => self.split_from = Direction::Right,
|
||||||
|
'm' => {
|
||||||
|
self.max = fish_wcstol(arg.unwrap())?
|
||||||
|
.try_into()
|
||||||
|
.map_err(|_| invalid_args!("%ls: Invalid max value '%ls'\n", name, arg))?
|
||||||
|
}
|
||||||
|
'n' => self.no_empty = true,
|
||||||
|
'f' => {
|
||||||
|
self.fields = arg.unwrap().try_into().map_err(|e| match e {
|
||||||
|
FieldParseError::Number => StringError::NotANumber,
|
||||||
|
FieldParseError::Range => {
|
||||||
|
invalid_args!("%ls: Invalid range value for field '%ls'\n", name, arg)
|
||||||
|
}
|
||||||
|
FieldParseError::Field => {
|
||||||
|
invalid_args!("%ls: Invalid fields value '%ls'\n", name, arg)
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
'a' => self.allow_empty = true,
|
||||||
|
_ => return Err(StringError::UnknownOption),
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn take_args(
|
||||||
|
&mut self,
|
||||||
|
optind: &mut usize,
|
||||||
|
args: &[&'args wstr],
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
) -> Option<libc::c_int> {
|
||||||
|
if self.is_split0 {
|
||||||
|
return STATUS_CMD_OK;
|
||||||
|
}
|
||||||
|
let Some(arg) = args.get(*optind).copied() else {
|
||||||
|
string_error!(streams, BUILTIN_ERR_ARG_COUNT0, args[0]);
|
||||||
|
return STATUS_INVALID_ARGS;
|
||||||
|
};
|
||||||
|
*optind += 1;
|
||||||
|
self.sep = arg;
|
||||||
|
return STATUS_CMD_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle(
|
||||||
|
&mut self,
|
||||||
|
_parser: &mut parser_t,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
optind: &mut usize,
|
||||||
|
args: &[&'args wstr],
|
||||||
|
) -> Option<libc::c_int> {
|
||||||
|
if self.fields.is_empty() && self.allow_empty {
|
||||||
|
streams.err.append(wgettext_fmt!(
|
||||||
|
BUILTIN_ERR_COMBO2,
|
||||||
|
args[0],
|
||||||
|
wgettext!("--allow-empty is only valid with --fields")
|
||||||
|
));
|
||||||
|
return STATUS_INVALID_ARGS;
|
||||||
|
}
|
||||||
|
|
||||||
|
let sep = self.sep;
|
||||||
|
let mut all_splits: Vec<Vec<Cow<'args, wstr>>> = Vec::new();
|
||||||
|
let mut split_count = 0usize;
|
||||||
|
let mut arg_count = 0usize;
|
||||||
|
|
||||||
|
let argiter = match self.is_split0 {
|
||||||
|
false => Arguments::new(args, optind, streams),
|
||||||
|
true => Arguments::without_splitting_on_newline(args, optind, streams),
|
||||||
|
};
|
||||||
|
for (arg, _) in argiter {
|
||||||
|
let splits: Vec<Cow<'args, wstr>> = match (self.split_from, arg) {
|
||||||
|
(Direction::Right, arg) => {
|
||||||
|
let mut rev = arg.into_owned();
|
||||||
|
rev.as_char_slice_mut().reverse();
|
||||||
|
let sep: WString = sep.chars().rev().collect();
|
||||||
|
split_about(&rev, &sep, self.max, self.no_empty)
|
||||||
|
.into_iter()
|
||||||
|
// If we are from the right, split_about gave us reversed strings, in reversed order!
|
||||||
|
.map(|s| Cow::Owned(s.chars().rev().collect::<WString>()))
|
||||||
|
.rev()
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
// we need to special-case the Cow::Borrowed case, since
|
||||||
|
// let arg: &'args wstr = &arg;
|
||||||
|
// does not compile since `arg` can be dropped at the end of this scope
|
||||||
|
// making the reference invalid if it is owned.
|
||||||
|
(Direction::Left, Cow::Borrowed(arg)) => {
|
||||||
|
split_about(arg, sep, self.max, self.no_empty)
|
||||||
|
.into_iter()
|
||||||
|
.map(Cow::Borrowed)
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
(Direction::Left, Cow::Owned(arg)) => {
|
||||||
|
split_about(&arg, sep, self.max, self.no_empty)
|
||||||
|
.into_iter()
|
||||||
|
.map(|s| Cow::Owned(s.to_owned()))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// If we're quiet, we return early if we've found something to split.
|
||||||
|
if self.quiet && splits.len() > 1 {
|
||||||
|
return STATUS_CMD_OK;
|
||||||
|
}
|
||||||
|
split_count += splits.len();
|
||||||
|
arg_count += 1;
|
||||||
|
all_splits.push(splits);
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.quiet {
|
||||||
|
return if split_count > arg_count {
|
||||||
|
STATUS_CMD_OK
|
||||||
|
} else {
|
||||||
|
STATUS_CMD_ERROR
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
for mut splits in all_splits {
|
||||||
|
if self.is_split0 && !splits.is_empty() {
|
||||||
|
// split0 ignores a trailing \0, so a\0b\0 is two elements.
|
||||||
|
// In contrast to split, where a\nb\n is three - "a", "b" and "".
|
||||||
|
//
|
||||||
|
// Remove the last element if it is empty.
|
||||||
|
if splits.last().unwrap().is_empty() {
|
||||||
|
splits.pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let splits = splits;
|
||||||
|
|
||||||
|
if !self.fields.is_empty() {
|
||||||
|
// Print nothing and return error if any of the supplied
|
||||||
|
// fields do not exist, unless `--allow-empty` is used.
|
||||||
|
if !self.allow_empty {
|
||||||
|
for field in self.fields.iter() {
|
||||||
|
// we already have checked the start
|
||||||
|
if *field >= splits.len() {
|
||||||
|
return STATUS_CMD_ERROR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for field in self.fields.iter() {
|
||||||
|
if let Some(val) = splits.get(*field) {
|
||||||
|
streams.out.append_with_separation(
|
||||||
|
val,
|
||||||
|
separation_type_t::explicitly,
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for split in &splits {
|
||||||
|
streams
|
||||||
|
.out
|
||||||
|
.append_with_separation(split, separation_type_t::explicitly, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We split something if we have more split values than args.
|
||||||
|
return if split_count > arg_count {
|
||||||
|
STATUS_CMD_OK
|
||||||
|
} else {
|
||||||
|
STATUS_CMD_ERROR
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
115
fish-rust/src/builtins/string/sub.rs
Normal file
115
fish-rust/src/builtins/string/sub.rs
Normal file
|
@ -0,0 +1,115 @@
|
||||||
|
use std::num::NonZeroI64;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use crate::wutil::fish_wcstol;
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct Sub {
|
||||||
|
length: Option<usize>,
|
||||||
|
quiet: bool,
|
||||||
|
start: Option<NonZeroI64>,
|
||||||
|
end: Option<NonZeroI64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StringSubCommand<'_> for Sub {
|
||||||
|
const LONG_OPTIONS: &'static [woption<'static>] = &[
|
||||||
|
wopt(L!("length"), required_argument, 'l'),
|
||||||
|
wopt(L!("start"), required_argument, 's'),
|
||||||
|
wopt(L!("end"), required_argument, 'e'),
|
||||||
|
wopt(L!("quiet"), no_argument, 'q'),
|
||||||
|
];
|
||||||
|
const SHORT_OPTIONS: &'static wstr = L!(":l:qs:e:");
|
||||||
|
|
||||||
|
fn parse_opt(&mut self, name: &wstr, c: char, arg: Option<&wstr>) -> Result<(), StringError> {
|
||||||
|
match c {
|
||||||
|
'l' => {
|
||||||
|
self.length =
|
||||||
|
Some(fish_wcstol(arg.unwrap())?.try_into().map_err(|_| {
|
||||||
|
invalid_args!("%ls: Invalid length value '%ls'\n", name, arg)
|
||||||
|
})?)
|
||||||
|
}
|
||||||
|
's' => {
|
||||||
|
self.start =
|
||||||
|
Some(fish_wcstol(arg.unwrap())?.try_into().map_err(|_| {
|
||||||
|
invalid_args!("%ls: Invalid start value '%ls'\n", name, arg)
|
||||||
|
})?)
|
||||||
|
}
|
||||||
|
'e' => {
|
||||||
|
self.end = Some(
|
||||||
|
fish_wcstol(arg.unwrap())?
|
||||||
|
.try_into()
|
||||||
|
.map_err(|_| invalid_args!("%ls: Invalid end value '%ls'\n", name, arg))?,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
'q' => self.quiet = true,
|
||||||
|
_ => return Err(StringError::UnknownOption),
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle(
|
||||||
|
&mut self,
|
||||||
|
_parser: &mut parser_t,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
optind: &mut usize,
|
||||||
|
args: &[&wstr],
|
||||||
|
) -> Option<libc::c_int> {
|
||||||
|
let cmd = args[0];
|
||||||
|
if self.length.is_some() && self.end.is_some() {
|
||||||
|
streams.err.append(wgettext_fmt!(
|
||||||
|
BUILTIN_ERR_COMBO2,
|
||||||
|
cmd,
|
||||||
|
wgettext!("--end and --length are mutually exclusive")
|
||||||
|
));
|
||||||
|
return STATUS_INVALID_ARGS;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut nsub = 0;
|
||||||
|
for (s, want_newline) in Arguments::new(args, optind, streams) {
|
||||||
|
let start: usize = match self.start.map(i64::from).unwrap_or_default() {
|
||||||
|
n @ 1.. => n as usize - 1,
|
||||||
|
0 => 0,
|
||||||
|
n => {
|
||||||
|
let n = u64::min(n.unsigned_abs(), usize::MAX as u64) as usize;
|
||||||
|
s.len().saturating_sub(n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.clamp(0, s.len());
|
||||||
|
|
||||||
|
let count = {
|
||||||
|
let n = self
|
||||||
|
.end
|
||||||
|
.map(|e| match i64::from(e) {
|
||||||
|
// end can never be 0
|
||||||
|
n @ 1.. => n as usize,
|
||||||
|
n => {
|
||||||
|
let n = u64::min(n.unsigned_abs(), usize::MAX as u64) as usize;
|
||||||
|
s.len().saturating_sub(n)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.map(|n| n.saturating_sub(start));
|
||||||
|
|
||||||
|
self.length.or(n).unwrap_or(s.len())
|
||||||
|
};
|
||||||
|
|
||||||
|
if !self.quiet {
|
||||||
|
streams
|
||||||
|
.out
|
||||||
|
.append(&s[start..usize::min(start + count, s.len())]);
|
||||||
|
if want_newline {
|
||||||
|
streams.out.append1('\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nsub += 1;
|
||||||
|
if self.quiet {
|
||||||
|
return STATUS_CMD_OK;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if nsub > 0 {
|
||||||
|
STATUS_CMD_OK
|
||||||
|
} else {
|
||||||
|
STATUS_CMD_ERROR
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
49
fish-rust/src/builtins/string/transform.rs
Normal file
49
fish-rust/src/builtins/string/transform.rs
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
pub struct Transform {
|
||||||
|
pub quiet: bool,
|
||||||
|
pub func: fn(&wstr) -> WString,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StringSubCommand<'_> for Transform {
|
||||||
|
const LONG_OPTIONS: &'static [woption<'static>] = &[wopt(L!("quiet"), no_argument, 'q')];
|
||||||
|
const SHORT_OPTIONS: &'static wstr = L!(":q");
|
||||||
|
fn parse_opt(&mut self, _n: &wstr, c: char, _arg: Option<&wstr>) -> Result<(), StringError> {
|
||||||
|
match c {
|
||||||
|
'q' => self.quiet = true,
|
||||||
|
_ => return Err(StringError::UnknownOption),
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle(
|
||||||
|
&mut self,
|
||||||
|
_parser: &mut parser_t,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
optind: &mut usize,
|
||||||
|
args: &[&wstr],
|
||||||
|
) -> Option<libc::c_int> {
|
||||||
|
let mut n_transformed = 0usize;
|
||||||
|
|
||||||
|
for (arg, want_newline) in Arguments::new(args, optind, streams) {
|
||||||
|
let transformed = (self.func)(&arg);
|
||||||
|
if transformed != arg {
|
||||||
|
n_transformed += 1;
|
||||||
|
}
|
||||||
|
if !self.quiet {
|
||||||
|
streams.out.append(&transformed);
|
||||||
|
if want_newline {
|
||||||
|
streams.out.append1('\n');
|
||||||
|
}
|
||||||
|
} else if n_transformed > 0 {
|
||||||
|
return STATUS_CMD_OK;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if n_transformed > 0 {
|
||||||
|
STATUS_CMD_OK
|
||||||
|
} else {
|
||||||
|
STATUS_CMD_ERROR
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
99
fish-rust/src/builtins/string/trim.rs
Normal file
99
fish-rust/src/builtins/string/trim.rs
Normal file
|
@ -0,0 +1,99 @@
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
pub struct Trim<'args> {
|
||||||
|
chars_to_trim: &'args wstr,
|
||||||
|
left: bool,
|
||||||
|
right: bool,
|
||||||
|
quiet: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Trim<'_> {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
// from " \f\n\r\t\v"
|
||||||
|
chars_to_trim: L!(" \x0C\n\r\x09\x0B"),
|
||||||
|
left: false,
|
||||||
|
right: false,
|
||||||
|
quiet: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'args> StringSubCommand<'args> for Trim<'args> {
|
||||||
|
const LONG_OPTIONS: &'static [woption<'static>] = &[
|
||||||
|
wopt(L!("chars"), required_argument, 'c'),
|
||||||
|
wopt(L!("left"), no_argument, 'l'),
|
||||||
|
wopt(L!("right"), no_argument, 'r'),
|
||||||
|
wopt(L!("quiet"), no_argument, 'q'),
|
||||||
|
];
|
||||||
|
const SHORT_OPTIONS: &'static wstr = L!(":c:lrq");
|
||||||
|
|
||||||
|
fn parse_opt(
|
||||||
|
&mut self,
|
||||||
|
_n: &wstr,
|
||||||
|
c: char,
|
||||||
|
arg: Option<&'args wstr>,
|
||||||
|
) -> Result<(), StringError> {
|
||||||
|
match c {
|
||||||
|
'c' => self.chars_to_trim = arg.unwrap(),
|
||||||
|
'l' => self.left = true,
|
||||||
|
'r' => self.right = true,
|
||||||
|
'q' => self.quiet = true,
|
||||||
|
_ => return Err(StringError::UnknownOption),
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle(
|
||||||
|
&mut self,
|
||||||
|
_parser: &mut parser_t,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
optind: &mut usize,
|
||||||
|
args: &[&wstr],
|
||||||
|
) -> Option<libc::c_int> {
|
||||||
|
// If neither left or right is specified, we do both.
|
||||||
|
if !self.left && !self.right {
|
||||||
|
self.left = true;
|
||||||
|
self.right = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut ntrim = 0;
|
||||||
|
|
||||||
|
let to_trim_end = |str: &wstr| -> usize {
|
||||||
|
str.chars()
|
||||||
|
.rev()
|
||||||
|
.take_while(|&c| self.chars_to_trim.contains(c))
|
||||||
|
.count()
|
||||||
|
};
|
||||||
|
|
||||||
|
let to_trim_start = |str: &wstr| -> usize {
|
||||||
|
str.chars()
|
||||||
|
.take_while(|&c| self.chars_to_trim.contains(c))
|
||||||
|
.count()
|
||||||
|
};
|
||||||
|
|
||||||
|
for (arg, want_newline) in Arguments::new(args, optind, streams) {
|
||||||
|
let trim_start = self.left.then(|| to_trim_start(&arg)).unwrap_or(0);
|
||||||
|
// collision is only an issue if the whole string is getting trimmed
|
||||||
|
let trim_end = (self.right && trim_start != arg.len())
|
||||||
|
.then(|| to_trim_end(&arg))
|
||||||
|
.unwrap_or(0);
|
||||||
|
|
||||||
|
ntrim += trim_start + trim_end;
|
||||||
|
if !self.quiet {
|
||||||
|
streams.out.append(&arg[trim_start..arg.len() - trim_end]);
|
||||||
|
if want_newline {
|
||||||
|
streams.out.append1('\n');
|
||||||
|
}
|
||||||
|
} else if ntrim > 0 {
|
||||||
|
return STATUS_CMD_OK;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ntrim > 0 {
|
||||||
|
STATUS_CMD_OK
|
||||||
|
} else {
|
||||||
|
STATUS_CMD_ERROR
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
57
fish-rust/src/builtins/string/unescape.rs
Normal file
57
fish-rust/src/builtins/string/unescape.rs
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
use super::*;
|
||||||
|
use crate::common::{unescape_string, UnescapeStringStyle};
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct Unescape {
|
||||||
|
no_quoted: bool,
|
||||||
|
style: UnescapeStringStyle,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StringSubCommand<'_> for Unescape {
|
||||||
|
const LONG_OPTIONS: &'static [woption<'static>] = &[
|
||||||
|
// FIXME: this flag means nothing, but was present in the C++ code
|
||||||
|
// should be removed
|
||||||
|
wopt(L!("no-quoted"), no_argument, 'n'),
|
||||||
|
wopt(L!("style"), required_argument, NONOPTION_CHAR_CODE),
|
||||||
|
];
|
||||||
|
const SHORT_OPTIONS: &'static wstr = L!(":n");
|
||||||
|
|
||||||
|
fn parse_opt(&mut self, name: &wstr, c: char, arg: Option<&wstr>) -> Result<(), StringError> {
|
||||||
|
match c {
|
||||||
|
'n' => self.no_quoted = true,
|
||||||
|
NONOPTION_CHAR_CODE => {
|
||||||
|
self.style = arg
|
||||||
|
.unwrap()
|
||||||
|
.try_into()
|
||||||
|
.map_err(|_| invalid_args!("%ls: Invalid style value '%ls'\n", name, arg))?
|
||||||
|
}
|
||||||
|
_ => return Err(StringError::UnknownOption),
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle(
|
||||||
|
&mut self,
|
||||||
|
_parser: &mut parser_t,
|
||||||
|
streams: &mut io_streams_t,
|
||||||
|
optind: &mut usize,
|
||||||
|
args: &[&wstr],
|
||||||
|
) -> Option<libc::c_int> {
|
||||||
|
let mut nesc = 0;
|
||||||
|
for (arg, want_newline) in Arguments::new(args, optind, streams) {
|
||||||
|
if let Some(res) = unescape_string(&arg, self.style) {
|
||||||
|
streams.out.append(res);
|
||||||
|
if want_newline {
|
||||||
|
streams.out.append1('\n');
|
||||||
|
}
|
||||||
|
nesc += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if nesc > 0 {
|
||||||
|
STATUS_CMD_OK
|
||||||
|
} else {
|
||||||
|
STATUS_CMD_ERROR
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1 +1,2 @@
|
||||||
|
mod string_tests;
|
||||||
mod test_tests;
|
mod test_tests;
|
||||||
|
|
303
fish-rust/src/builtins/tests/string_tests.rs
Normal file
303
fish-rust/src/builtins/tests/string_tests.rs
Normal file
|
@ -0,0 +1,303 @@
|
||||||
|
use crate::ffi_tests::add_test;
|
||||||
|
|
||||||
|
add_test! {"test_string", || {
|
||||||
|
use crate::ffi::parser_t;
|
||||||
|
use crate::ffi;
|
||||||
|
use crate::builtins::string::string;
|
||||||
|
use crate::wchar_ffi::WCharFromFFI;
|
||||||
|
use crate::common::{EscapeStringStyle, escape_string};
|
||||||
|
use crate::wchar::wstr;
|
||||||
|
use crate::wchar::L;
|
||||||
|
use crate::builtins::shared::{STATUS_CMD_ERROR,STATUS_CMD_OK, STATUS_INVALID_ARGS};
|
||||||
|
|
||||||
|
use crate::future_feature_flags::{scoped_test, FeatureFlag};
|
||||||
|
|
||||||
|
// avoid 1.3k L!()'s
|
||||||
|
macro_rules! test_cases {
|
||||||
|
([$($x:expr),*], $rc:expr, $out:expr) => { (vec![$(L!($x)),*], $rc, L!($out)) };
|
||||||
|
[$($x:tt),* $(,)?] => { [$(test_cases!$x),*] };
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: these should be individual tests, not all in one, port when we can run these with `cargo test`
|
||||||
|
fn string_test(mut args: Vec<&wstr>, expected_rc: Option<i32>, expected_out: &wstr) {
|
||||||
|
let parser: &mut parser_t = unsafe { &mut *parser_t::principal_parser_ffi() };
|
||||||
|
let mut streams = ffi::make_test_io_streams_ffi();
|
||||||
|
let mut io = crate::builtins::shared::io_streams_t::new(streams.pin_mut());
|
||||||
|
|
||||||
|
let rc = string(parser, &mut io, args.as_mut_slice()).expect("string failed");
|
||||||
|
|
||||||
|
assert_eq!(expected_rc.unwrap(), rc, "string builtin returned unexpected return code");
|
||||||
|
|
||||||
|
let string_stream_contents = &ffi::get_test_output_ffi(&streams);
|
||||||
|
let actual = escape_string(&string_stream_contents.from_ffi(), EscapeStringStyle::default());
|
||||||
|
let expected = escape_string(expected_out, EscapeStringStyle::default());
|
||||||
|
assert_eq!(expected, actual, "string builtin returned unexpected output");
|
||||||
|
}
|
||||||
|
|
||||||
|
let tests = test_cases![
|
||||||
|
(["string", "escape"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "escape", ""], STATUS_CMD_OK, "''\n"),
|
||||||
|
(["string", "escape", "-n", ""], STATUS_CMD_OK, "\n"),
|
||||||
|
(["string", "escape", "a"], STATUS_CMD_OK, "a\n"),
|
||||||
|
(["string", "escape", "\x07"], STATUS_CMD_OK, "\\cg\n"),
|
||||||
|
(["string", "escape", "\"x\""], STATUS_CMD_OK, "'\"x\"'\n"),
|
||||||
|
(["string", "escape", "hello world"], STATUS_CMD_OK, "'hello world'\n"),
|
||||||
|
(["string", "escape", "-n", "hello world"], STATUS_CMD_OK, "hello\\ world\n"),
|
||||||
|
(["string", "escape", "hello", "world"], STATUS_CMD_OK, "hello\nworld\n"),
|
||||||
|
(["string", "escape", "-n", "~"], STATUS_CMD_OK, "\\~\n"),
|
||||||
|
|
||||||
|
(["string", "join"], STATUS_INVALID_ARGS, ""),
|
||||||
|
(["string", "join", ""], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "join", "", "", "", ""], STATUS_CMD_OK, "\n"),
|
||||||
|
(["string", "join", "", "a", "b", "c"], STATUS_CMD_OK, "abc\n"),
|
||||||
|
(["string", "join", ".", "fishshell", "com"], STATUS_CMD_OK, "fishshell.com\n"),
|
||||||
|
(["string", "join", "/", "usr"], STATUS_CMD_ERROR, "usr\n"),
|
||||||
|
(["string", "join", "/", "usr", "local", "bin"], STATUS_CMD_OK, "usr/local/bin\n"),
|
||||||
|
(["string", "join", "...", "3", "2", "1"], STATUS_CMD_OK, "3...2...1\n"),
|
||||||
|
(["string", "join", "-q"], STATUS_INVALID_ARGS, ""),
|
||||||
|
(["string", "join", "-q", "."], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "join", "-q", ".", "."], STATUS_CMD_ERROR, ""),
|
||||||
|
|
||||||
|
(["string", "length"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "length", ""], STATUS_CMD_ERROR, "0\n"),
|
||||||
|
(["string", "length", "", "", ""], STATUS_CMD_ERROR, "0\n0\n0\n"),
|
||||||
|
(["string", "length", "a"], STATUS_CMD_OK, "1\n"),
|
||||||
|
|
||||||
|
(["string", "length", "\u{2008A}"], STATUS_CMD_OK, "1\n"),
|
||||||
|
(["string", "length", "um", "dois", "três"], STATUS_CMD_OK, "2\n4\n4\n"),
|
||||||
|
(["string", "length", "um", "dois", "três"], STATUS_CMD_OK, "2\n4\n4\n"),
|
||||||
|
(["string", "length", "-q"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "length", "-q", ""], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "length", "-q", "a"], STATUS_CMD_OK, ""),
|
||||||
|
|
||||||
|
(["string", "match"], STATUS_INVALID_ARGS, ""),
|
||||||
|
(["string", "match", ""], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "match", "", ""], STATUS_CMD_OK, "\n"),
|
||||||
|
(["string", "match", "?", "a"], STATUS_CMD_OK, "a\n"),
|
||||||
|
(["string", "match", "*", ""], STATUS_CMD_OK, "\n"),
|
||||||
|
(["string", "match", "**", ""], STATUS_CMD_OK, "\n"),
|
||||||
|
(["string", "match", "*", "xyzzy"], STATUS_CMD_OK, "xyzzy\n"),
|
||||||
|
(["string", "match", "**", "plugh"], STATUS_CMD_OK, "plugh\n"),
|
||||||
|
(["string", "match", "a*b", "axxb"], STATUS_CMD_OK, "axxb\n"),
|
||||||
|
(["string", "match", "a??b", "axxb"], STATUS_CMD_OK, "axxb\n"),
|
||||||
|
(["string", "match", "-i", "a??B", "axxb"], STATUS_CMD_OK, "axxb\n"),
|
||||||
|
(["string", "match", "-i", "a??b", "Axxb"], STATUS_CMD_OK, "Axxb\n"),
|
||||||
|
(["string", "match", "a*", "axxb"], STATUS_CMD_OK, "axxb\n"),
|
||||||
|
(["string", "match", "*a", "xxa"], STATUS_CMD_OK, "xxa\n"),
|
||||||
|
(["string", "match", "*a*", "axa"], STATUS_CMD_OK, "axa\n"),
|
||||||
|
(["string", "match", "*a*", "xax"], STATUS_CMD_OK, "xax\n"),
|
||||||
|
(["string", "match", "*a*", "bxa"], STATUS_CMD_OK, "bxa\n"),
|
||||||
|
(["string", "match", "*a", "a"], STATUS_CMD_OK, "a\n"),
|
||||||
|
(["string", "match", "a*", "a"], STATUS_CMD_OK, "a\n"),
|
||||||
|
(["string", "match", "a*b*c", "axxbyyc"], STATUS_CMD_OK, "axxbyyc\n"),
|
||||||
|
(["string", "match", "\\*", "*"], STATUS_CMD_OK, "*\n"),
|
||||||
|
(["string", "match", "a*\\", "abc\\"], STATUS_CMD_OK, "abc\\\n"),
|
||||||
|
(["string", "match", "a*\\?", "abc?"], STATUS_CMD_OK, "abc?\n"),
|
||||||
|
|
||||||
|
(["string", "match", "?", ""], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "match", "?", "ab"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "match", "??", "a"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "match", "?a", "a"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "match", "a?", "a"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "match", "a??B", "axxb"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "match", "a*b", "axxbc"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "match", "*b", "bbba"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "match", "0x[0-9a-fA-F][0-9a-fA-F]", "0xbad"], STATUS_CMD_ERROR, ""),
|
||||||
|
|
||||||
|
(["string", "match", "-a", "*", "ab", "cde"], STATUS_CMD_OK, "ab\ncde\n"),
|
||||||
|
(["string", "match", "*", "ab", "cde"], STATUS_CMD_OK, "ab\ncde\n"),
|
||||||
|
(["string", "match", "-n", "*d*", "cde"], STATUS_CMD_OK, "1 3\n"),
|
||||||
|
(["string", "match", "-n", "*x*", "cde"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "match", "-q", "a*", "b", "c"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "match", "-q", "a*", "b", "a"], STATUS_CMD_OK, ""),
|
||||||
|
|
||||||
|
(["string", "match", "-r"], STATUS_INVALID_ARGS, ""),
|
||||||
|
(["string", "match", "-r", ""], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "match", "-r", "", ""], STATUS_CMD_OK, "\n"),
|
||||||
|
(["string", "match", "-r", ".", "a"], STATUS_CMD_OK, "a\n"),
|
||||||
|
(["string", "match", "-r", ".*", ""], STATUS_CMD_OK, "\n"),
|
||||||
|
(["string", "match", "-r", "a*b", "b"], STATUS_CMD_OK, "b\n"),
|
||||||
|
(["string", "match", "-r", "a*b", "aab"], STATUS_CMD_OK, "aab\n"),
|
||||||
|
(["string", "match", "-r", "-i", "a*b", "Aab"], STATUS_CMD_OK, "Aab\n"),
|
||||||
|
(["string", "match", "-r", "-a", "a[bc]", "abadac"], STATUS_CMD_OK, "ab\nac\n"),
|
||||||
|
(["string", "match", "-r", "a", "xaxa", "axax"], STATUS_CMD_OK, "a\na\n"),
|
||||||
|
(["string", "match", "-r", "-a", "a", "xaxa", "axax"], STATUS_CMD_OK, "a\na\na\na\n"),
|
||||||
|
(["string", "match", "-r", "a[bc]", "abadac"], STATUS_CMD_OK, "ab\n"),
|
||||||
|
(["string", "match", "-r", "-q", "a[bc]", "abadac"], STATUS_CMD_OK, ""),
|
||||||
|
(["string", "match", "-r", "-q", "a[bc]", "ad"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "match", "-r", "(a+)b(c)", "aabc"], STATUS_CMD_OK, "aabc\naa\nc\n"),
|
||||||
|
(["string", "match", "-r", "-a", "(a)b(c)", "abcabc"], STATUS_CMD_OK, "abc\na\nc\nabc\na\nc\n"),
|
||||||
|
(["string", "match", "-r", "(a)b(c)", "abcabc"], STATUS_CMD_OK, "abc\na\nc\n"),
|
||||||
|
(["string", "match", "-r", "(a|(z))(bc)", "abc"], STATUS_CMD_OK, "abc\na\nbc\n"),
|
||||||
|
(["string", "match", "-r", "-n", "a", "ada", "dad"], STATUS_CMD_OK, "1 1\n2 1\n"),
|
||||||
|
(["string", "match", "-r", "-n", "-a", "a", "bacadae"], STATUS_CMD_OK, "2 1\n4 1\n6 1\n"),
|
||||||
|
(["string", "match", "-r", "-n", "(a).*(b)", "a---b"], STATUS_CMD_OK, "1 5\n1 1\n5 1\n"),
|
||||||
|
(["string", "match", "-r", "-n", "(a)(b)", "ab"], STATUS_CMD_OK, "1 2\n1 1\n2 1\n"),
|
||||||
|
(["string", "match", "-r", "-n", "(a)(b)", "abab"], STATUS_CMD_OK, "1 2\n1 1\n2 1\n"),
|
||||||
|
(["string", "match", "-r", "-n", "-a", "(a)(b)", "abab"], STATUS_CMD_OK, "1 2\n1 1\n2 1\n3 2\n3 1\n4 1\n"),
|
||||||
|
(["string", "match", "-r", "*", ""], STATUS_INVALID_ARGS, ""),
|
||||||
|
(["string", "match", "-r", "-a", "a*", "b"], STATUS_CMD_OK, "\n\n"),
|
||||||
|
(["string", "match", "-r", "foo\\Kbar", "foobar"], STATUS_CMD_OK, "bar\n"),
|
||||||
|
(["string", "match", "-r", "(foo)\\Kbar", "foobar"], STATUS_CMD_OK, "bar\nfoo\n"),
|
||||||
|
(["string", "replace"], STATUS_INVALID_ARGS, ""),
|
||||||
|
(["string", "replace", ""], STATUS_INVALID_ARGS, ""),
|
||||||
|
(["string", "replace", "", ""], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "replace", "", "", ""], STATUS_CMD_ERROR, "\n"),
|
||||||
|
(["string", "replace", "", "", " "], STATUS_CMD_ERROR, " \n"),
|
||||||
|
(["string", "replace", "a", "b", ""], STATUS_CMD_ERROR, "\n"),
|
||||||
|
(["string", "replace", "a", "b", "a"], STATUS_CMD_OK, "b\n"),
|
||||||
|
(["string", "replace", "a", "b", "xax"], STATUS_CMD_OK, "xbx\n"),
|
||||||
|
(["string", "replace", "a", "b", "xax", "axa"], STATUS_CMD_OK, "xbx\nbxa\n"),
|
||||||
|
(["string", "replace", "bar", "x", "red barn"], STATUS_CMD_OK, "red xn\n"),
|
||||||
|
(["string", "replace", "x", "bar", "red xn"], STATUS_CMD_OK, "red barn\n"),
|
||||||
|
(["string", "replace", "--", "x", "-", "xyz"], STATUS_CMD_OK, "-yz\n"),
|
||||||
|
(["string", "replace", "--", "y", "-", "xyz"], STATUS_CMD_OK, "x-z\n"),
|
||||||
|
(["string", "replace", "--", "z", "-", "xyz"], STATUS_CMD_OK, "xy-\n"),
|
||||||
|
(["string", "replace", "-i", "z", "X", "_Z_"], STATUS_CMD_OK, "_X_\n"),
|
||||||
|
(["string", "replace", "-a", "a", "A", "aaa"], STATUS_CMD_OK, "AAA\n"),
|
||||||
|
(["string", "replace", "-i", "a", "z", "AAA"], STATUS_CMD_OK, "zAA\n"),
|
||||||
|
(["string", "replace", "-q", "x", ">x<", "x"], STATUS_CMD_OK, ""),
|
||||||
|
(["string", "replace", "-a", "x", "", "xxx"], STATUS_CMD_OK, "\n"),
|
||||||
|
(["string", "replace", "-a", "***", "_", "*****"], STATUS_CMD_OK, "_**\n"),
|
||||||
|
(["string", "replace", "-a", "***", "***", "******"], STATUS_CMD_OK, "******\n"),
|
||||||
|
(["string", "replace", "-a", "a", "b", "xax", "axa"], STATUS_CMD_OK, "xbx\nbxb\n"),
|
||||||
|
|
||||||
|
(["string", "replace", "-r"], STATUS_INVALID_ARGS, ""),
|
||||||
|
(["string", "replace", "-r", ""], STATUS_INVALID_ARGS, ""),
|
||||||
|
(["string", "replace", "-r", "", ""], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "replace", "-r", "", "", ""], STATUS_CMD_OK, "\n"), // pcre2 behavior
|
||||||
|
(["string", "replace", "-r", "", "", " "], STATUS_CMD_OK, " \n"), // pcre2 behavior
|
||||||
|
(["string", "replace", "-r", "a", "b", ""], STATUS_CMD_ERROR, "\n"),
|
||||||
|
(["string", "replace", "-r", "a", "b", "a"], STATUS_CMD_OK, "b\n"),
|
||||||
|
(["string", "replace", "-r", ".", "x", "abc"], STATUS_CMD_OK, "xbc\n"),
|
||||||
|
(["string", "replace", "-r", ".", "", "abc"], STATUS_CMD_OK, "bc\n"),
|
||||||
|
(["string", "replace", "-r", "(\\w)(\\w)", "$2$1", "ab"], STATUS_CMD_OK, "ba\n"),
|
||||||
|
(["string", "replace", "-r", "(\\w)", "$1$1", "ab"], STATUS_CMD_OK, "aab\n"),
|
||||||
|
(["string", "replace", "-r", "-a", ".", "x", "abc"], STATUS_CMD_OK, "xxx\n"),
|
||||||
|
(["string", "replace", "-r", "-a", "(\\w)", "$1$1", "ab"], STATUS_CMD_OK, "aabb\n"),
|
||||||
|
(["string", "replace", "-r", "-a", ".", "", "abc"], STATUS_CMD_OK, "\n"),
|
||||||
|
(["string", "replace", "-r", "a", "x", "bc", "cd", "de"], STATUS_CMD_ERROR, "bc\ncd\nde\n"),
|
||||||
|
(["string", "replace", "-r", "a", "x", "aba", "caa"], STATUS_CMD_OK, "xba\ncxa\n"),
|
||||||
|
(["string", "replace", "-r", "-a", "a", "x", "aba", "caa"], STATUS_CMD_OK, "xbx\ncxx\n"),
|
||||||
|
(["string", "replace", "-r", "-i", "A", "b", "xax"], STATUS_CMD_OK, "xbx\n"),
|
||||||
|
(["string", "replace", "-r", "-i", "[a-z]", ".", "1A2B"], STATUS_CMD_OK, "1.2B\n"),
|
||||||
|
(["string", "replace", "-r", "A", "b", "xax"], STATUS_CMD_ERROR, "xax\n"),
|
||||||
|
(["string", "replace", "-r", "a", "$1", "a"], STATUS_INVALID_ARGS, ""),
|
||||||
|
(["string", "replace", "-r", "(a)", "$2", "a"], STATUS_INVALID_ARGS, ""),
|
||||||
|
(["string", "replace", "-r", "*", ".", "a"], STATUS_INVALID_ARGS, ""),
|
||||||
|
(["string", "replace", "-ra", "x", "\\c"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "replace", "-r", "^(.)", "\t$1", "abc", "x"], STATUS_CMD_OK, "\tabc\n\tx\n"),
|
||||||
|
|
||||||
|
(["string", "split"], STATUS_INVALID_ARGS, ""),
|
||||||
|
(["string", "split", ":"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "split", ".", "www.ch.ic.ac.uk"], STATUS_CMD_OK, "www\nch\nic\nac\nuk\n"),
|
||||||
|
(["string", "split", "..", "...."], STATUS_CMD_OK, "\n\n\n"),
|
||||||
|
(["string", "split", "-m", "x", "..", "...."], STATUS_INVALID_ARGS, ""),
|
||||||
|
(["string", "split", "-m1", "..", "...."], STATUS_CMD_OK, "\n..\n"),
|
||||||
|
(["string", "split", "-m0", "/", "/usr/local/bin/fish"], STATUS_CMD_ERROR, "/usr/local/bin/fish\n"),
|
||||||
|
(["string", "split", "-m2", ":", "a:b:c:d", "e:f:g:h"], STATUS_CMD_OK, "a\nb\nc:d\ne\nf\ng:h\n"),
|
||||||
|
(["string", "split", "-m1", "-r", "/", "/usr/local/bin/fish"], STATUS_CMD_OK, "/usr/local/bin\nfish\n"),
|
||||||
|
(["string", "split", "-r", ".", "www.ch.ic.ac.uk"], STATUS_CMD_OK, "www\nch\nic\nac\nuk\n"),
|
||||||
|
(["string", "split", "--", "--", "a--b---c----d"], STATUS_CMD_OK, "a\nb\n-c\n\nd\n"),
|
||||||
|
(["string", "split", "-r", "..", "...."], STATUS_CMD_OK, "\n\n\n"),
|
||||||
|
(["string", "split", "-r", "--", "--", "a--b---c----d"], STATUS_CMD_OK, "a\nb-\nc\n\nd\n"),
|
||||||
|
(["string", "split", "", ""], STATUS_CMD_ERROR, "\n"),
|
||||||
|
(["string", "split", "", "a"], STATUS_CMD_ERROR, "a\n"),
|
||||||
|
(["string", "split", "", "ab"], STATUS_CMD_OK, "a\nb\n"),
|
||||||
|
(["string", "split", "", "abc"], STATUS_CMD_OK, "a\nb\nc\n"),
|
||||||
|
(["string", "split", "-m1", "", "abc"], STATUS_CMD_OK, "a\nbc\n"),
|
||||||
|
(["string", "split", "-r", "", ""], STATUS_CMD_ERROR, "\n"),
|
||||||
|
(["string", "split", "-r", "", "a"], STATUS_CMD_ERROR, "a\n"),
|
||||||
|
(["string", "split", "-r", "", "ab"], STATUS_CMD_OK, "a\nb\n"),
|
||||||
|
(["string", "split", "-r", "", "abc"], STATUS_CMD_OK, "a\nb\nc\n"),
|
||||||
|
(["string", "split", "-r", "-m1", "", "abc"], STATUS_CMD_OK, "ab\nc\n"),
|
||||||
|
(["string", "split", "-q"], STATUS_INVALID_ARGS, ""),
|
||||||
|
(["string", "split", "-q", ":"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "split", "-q", "x", "axbxc"], STATUS_CMD_OK, ""),
|
||||||
|
|
||||||
|
(["string", "sub"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "sub", "abcde"], STATUS_CMD_OK, "abcde\n"),
|
||||||
|
(["string", "sub", "-l", "x", "abcde"], STATUS_INVALID_ARGS, ""),
|
||||||
|
(["string", "sub", "-s", "x", "abcde"], STATUS_INVALID_ARGS, ""),
|
||||||
|
(["string", "sub", "-l0", "abcde"], STATUS_CMD_OK, "\n"),
|
||||||
|
(["string", "sub", "-l2", "abcde"], STATUS_CMD_OK, "ab\n"),
|
||||||
|
(["string", "sub", "-l5", "abcde"], STATUS_CMD_OK, "abcde\n"),
|
||||||
|
(["string", "sub", "-l6", "abcde"], STATUS_CMD_OK, "abcde\n"),
|
||||||
|
(["string", "sub", "-l-1", "abcde"], STATUS_INVALID_ARGS, ""),
|
||||||
|
(["string", "sub", "-s0", "abcde"], STATUS_INVALID_ARGS, ""),
|
||||||
|
(["string", "sub", "-s1", "abcde"], STATUS_CMD_OK, "abcde\n"),
|
||||||
|
(["string", "sub", "-s5", "abcde"], STATUS_CMD_OK, "e\n"),
|
||||||
|
(["string", "sub", "-s6", "abcde"], STATUS_CMD_OK, "\n"),
|
||||||
|
(["string", "sub", "-s-1", "abcde"], STATUS_CMD_OK, "e\n"),
|
||||||
|
(["string", "sub", "-s-5", "abcde"], STATUS_CMD_OK, "abcde\n"),
|
||||||
|
(["string", "sub", "-s-6", "abcde"], STATUS_CMD_OK, "abcde\n"),
|
||||||
|
(["string", "sub", "-s1", "-l0", "abcde"], STATUS_CMD_OK, "\n"),
|
||||||
|
(["string", "sub", "-s1", "-l1", "abcde"], STATUS_CMD_OK, "a\n"),
|
||||||
|
(["string", "sub", "-s2", "-l2", "abcde"], STATUS_CMD_OK, "bc\n"),
|
||||||
|
(["string", "sub", "-s-1", "-l1", "abcde"], STATUS_CMD_OK, "e\n"),
|
||||||
|
(["string", "sub", "-s-1", "-l2", "abcde"], STATUS_CMD_OK, "e\n"),
|
||||||
|
(["string", "sub", "-s-3", "-l2", "abcde"], STATUS_CMD_OK, "cd\n"),
|
||||||
|
(["string", "sub", "-s-3", "-l4", "abcde"], STATUS_CMD_OK, "cde\n"),
|
||||||
|
(["string", "sub", "-q"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "sub", "-q", "abcde"], STATUS_CMD_OK, ""),
|
||||||
|
|
||||||
|
(["string", "trim"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "trim", ""], STATUS_CMD_ERROR, "\n"),
|
||||||
|
(["string", "trim", " "], STATUS_CMD_OK, "\n"),
|
||||||
|
(["string", "trim", " \x0C\n\r\t"], STATUS_CMD_OK, "\n"),
|
||||||
|
(["string", "trim", " a"], STATUS_CMD_OK, "a\n"),
|
||||||
|
(["string", "trim", "a "], STATUS_CMD_OK, "a\n"),
|
||||||
|
(["string", "trim", " a "], STATUS_CMD_OK, "a\n"),
|
||||||
|
(["string", "trim", "-l", " a"], STATUS_CMD_OK, "a\n"),
|
||||||
|
(["string", "trim", "-l", "a "], STATUS_CMD_ERROR, "a \n"),
|
||||||
|
(["string", "trim", "-l", " a "], STATUS_CMD_OK, "a \n"),
|
||||||
|
(["string", "trim", "-r", " a"], STATUS_CMD_ERROR, " a\n"),
|
||||||
|
(["string", "trim", "-r", "a "], STATUS_CMD_OK, "a\n"),
|
||||||
|
(["string", "trim", "-r", " a "], STATUS_CMD_OK, " a\n"),
|
||||||
|
(["string", "trim", "-c", ".", " a"], STATUS_CMD_ERROR, " a\n"),
|
||||||
|
(["string", "trim", "-c", ".", "a "], STATUS_CMD_ERROR, "a \n"),
|
||||||
|
(["string", "trim", "-c", ".", " a "], STATUS_CMD_ERROR, " a \n"),
|
||||||
|
(["string", "trim", "-c", ".", ".a"], STATUS_CMD_OK, "a\n"),
|
||||||
|
(["string", "trim", "-c", ".", "a."], STATUS_CMD_OK, "a\n"),
|
||||||
|
(["string", "trim", "-c", ".", ".a."], STATUS_CMD_OK, "a\n"),
|
||||||
|
(["string", "trim", "-c", "\\/", "/a\\"], STATUS_CMD_OK, "a\n"),
|
||||||
|
(["string", "trim", "-c", "\\/", "a/"], STATUS_CMD_OK, "a\n"),
|
||||||
|
(["string", "trim", "-c", "\\/", "\\a/"], STATUS_CMD_OK, "a\n"),
|
||||||
|
(["string", "trim", "-c", "", ".a."], STATUS_CMD_ERROR, ".a.\n"),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (cmd, expected_status, expected_stdout) in tests {
|
||||||
|
string_test(cmd, expected_status, expected_stdout);
|
||||||
|
}
|
||||||
|
|
||||||
|
let qmark_noglob_tests = test_cases![
|
||||||
|
(["string", "match", "a*b?c", "axxb?c"], STATUS_CMD_OK, "axxb?c\n"),
|
||||||
|
(["string", "match", "*?", "a"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "match", "*?", "ab"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "match", "?*", "a"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "match", "?*", "ab"], STATUS_CMD_ERROR, ""),
|
||||||
|
(["string", "match", "a*\\?", "abc?"], STATUS_CMD_ERROR, ""),
|
||||||
|
];
|
||||||
|
|
||||||
|
scoped_test(FeatureFlag::qmark_noglob, true, || {
|
||||||
|
for (cmd, expected_status, expected_stdout) in qmark_noglob_tests {
|
||||||
|
string_test(cmd, expected_status, expected_stdout);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let qmark_glob_tests = test_cases![
|
||||||
|
(["string", "match", "a*b?c", "axxbyc"], STATUS_CMD_OK, "axxbyc\n"),
|
||||||
|
(["string", "match", "*?", "a"], STATUS_CMD_OK, "a\n"),
|
||||||
|
(["string", "match", "*?", "ab"], STATUS_CMD_OK, "ab\n"),
|
||||||
|
(["string", "match", "?*", "a"], STATUS_CMD_OK, "a\n"),
|
||||||
|
(["string", "match", "?*", "ab"], STATUS_CMD_OK, "ab\n"),
|
||||||
|
(["string", "match", "a*\\?", "abc?"], STATUS_CMD_OK, "abc?\n"),
|
||||||
|
];
|
||||||
|
|
||||||
|
scoped_test(FeatureFlag::qmark_noglob, false, || {
|
||||||
|
for (cmd, expected_status, expected_stdout) in qmark_glob_tests {
|
||||||
|
string_test(cmd, expected_status, expected_stdout);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
}}
|
|
@ -97,6 +97,20 @@ impl Default for EscapeStringStyle {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl TryFrom<&wstr> for EscapeStringStyle {
|
||||||
|
type Error = &'static wstr;
|
||||||
|
fn try_from(s: &wstr) -> Result<Self, Self::Error> {
|
||||||
|
use EscapeStringStyle::*;
|
||||||
|
match s {
|
||||||
|
s if s == "script" => Ok(Self::default()),
|
||||||
|
s if s == "var" => Ok(Var),
|
||||||
|
s if s == "url" => Ok(Url),
|
||||||
|
s if s == "regex" => Ok(Regex),
|
||||||
|
_ => Err(L!("Invalid escape style")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bitflags! {
|
bitflags! {
|
||||||
/// Flags for the [`escape_string()`] function. These are only applicable when the escape style is
|
/// Flags for the [`escape_string()`] function. These are only applicable when the escape style is
|
||||||
/// [`EscapeStringStyle::Script`].
|
/// [`EscapeStringStyle::Script`].
|
||||||
|
@ -128,6 +142,19 @@ impl Default for UnescapeStringStyle {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl TryFrom<&wstr> for UnescapeStringStyle {
|
||||||
|
type Error = &'static wstr;
|
||||||
|
fn try_from(s: &wstr) -> Result<Self, Self::Error> {
|
||||||
|
use UnescapeStringStyle::*;
|
||||||
|
match s {
|
||||||
|
s if s == "script" => Ok(Self::default()),
|
||||||
|
s if s == "var" => Ok(Var),
|
||||||
|
s if s == "url" => Ok(Url),
|
||||||
|
_ => Err(L!("Invalid escape style")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bitflags! {
|
bitflags! {
|
||||||
/// Flags for unescape_string functions.
|
/// Flags for unescape_string functions.
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
|
|
|
@ -99,6 +99,8 @@ include_cpp! {
|
||||||
generate!("output_stream_t")
|
generate!("output_stream_t")
|
||||||
generate!("io_streams_t")
|
generate!("io_streams_t")
|
||||||
generate!("make_null_io_streams_ffi")
|
generate!("make_null_io_streams_ffi")
|
||||||
|
generate!("make_test_io_streams_ffi")
|
||||||
|
generate!("get_test_output_ffi")
|
||||||
|
|
||||||
generate_pod!("RustFFIJobList")
|
generate_pod!("RustFFIJobList")
|
||||||
generate_pod!("RustFFIProcList")
|
generate_pod!("RustFFIProcList")
|
||||||
|
@ -137,6 +139,7 @@ include_cpp! {
|
||||||
generate!("set_interactive_session")
|
generate!("set_interactive_session")
|
||||||
generate!("screen_set_midnight_commander_hack")
|
generate!("screen_set_midnight_commander_hack")
|
||||||
generate!("screen_clear_layout_cache_ffi")
|
generate!("screen_clear_layout_cache_ffi")
|
||||||
|
generate!("escape_code_length_ffi")
|
||||||
generate!("reader_schedule_prompt_repaint")
|
generate!("reader_schedule_prompt_repaint")
|
||||||
generate!("reader_change_history")
|
generate!("reader_change_history")
|
||||||
generate!("history_session_id")
|
generate!("history_session_id")
|
||||||
|
|
|
@ -25,6 +25,7 @@ use crate::tokenizer::{
|
||||||
TOK_SHOW_COMMENTS,
|
TOK_SHOW_COMMENTS,
|
||||||
};
|
};
|
||||||
use crate::wchar::{wstr, WString, L};
|
use crate::wchar::{wstr, WString, L};
|
||||||
|
use crate::wchar_ext::WExt;
|
||||||
use crate::wchar_ffi::{WCharFromFFI, WCharToFFI};
|
use crate::wchar_ffi::{WCharFromFFI, WCharToFFI};
|
||||||
use crate::wcstringutil::truncate;
|
use crate::wcstringutil::truncate;
|
||||||
use crate::wildcard::{ANY_CHAR, ANY_STRING, ANY_STRING_RECURSIVE};
|
use crate::wildcard::{ANY_CHAR, ANY_STRING, ANY_STRING_RECURSIVE};
|
||||||
|
@ -542,22 +543,22 @@ pub fn parse_util_get_offset(s: &wstr, line: i32, mut line_offset: usize) -> Opt
|
||||||
/// Return the given string, unescaping wildcard characters but not performing any other character
|
/// Return the given string, unescaping wildcard characters but not performing any other character
|
||||||
/// transformation.
|
/// transformation.
|
||||||
pub fn parse_util_unescape_wildcards(s: &wstr) -> WString {
|
pub fn parse_util_unescape_wildcards(s: &wstr) -> WString {
|
||||||
let mut result = WString::new();
|
let mut result = WString::with_capacity(s.len());
|
||||||
result.reserve(s.len());
|
|
||||||
let unesc_qmark = !feature_test(FeatureFlag::qmark_noglob);
|
let unesc_qmark = !feature_test(FeatureFlag::qmark_noglob);
|
||||||
let cs = s.as_char_slice();
|
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
for c in cs.iter().copied() {
|
while i < s.len() {
|
||||||
|
let c = s.char_at(i);
|
||||||
if c == '*' {
|
if c == '*' {
|
||||||
result.push(ANY_STRING);
|
result.push(ANY_STRING);
|
||||||
} else if c == '?' && unesc_qmark {
|
} else if c == '?' && unesc_qmark {
|
||||||
result.push(ANY_CHAR);
|
result.push(ANY_CHAR);
|
||||||
} else if c == '\\' && cs.get(i + 1) == Some(&'*')
|
} else if (c == '\\' && s.char_at(i + 1) == '*')
|
||||||
|| (unesc_qmark && c == '\\' && cs.get(i + 1) == Some(&'?'))
|
|| (unesc_qmark && c == '\\' && s.char_at(i + 1) == '?')
|
||||||
{
|
{
|
||||||
result.push(cs[i + 1]);
|
result.push(s.char_at(i + 1));
|
||||||
i += 1;
|
i += 1;
|
||||||
} else if c == '\\' && cs.get(i + 1) == Some(&'\\') {
|
} else if c == '\\' && s.char_at(i + 1) == '\\' {
|
||||||
// Not a wildcard, but ensure the next iteration doesn't see this escaped backslash.
|
// Not a wildcard, but ensure the next iteration doesn't see this escaped backslash.
|
||||||
result.push_utfstr(L!("\\\\"));
|
result.push_utfstr(L!("\\\\"));
|
||||||
i += 1;
|
i += 1;
|
||||||
|
|
|
@ -379,11 +379,11 @@ pub fn bool_from_string(x: &wstr) -> bool {
|
||||||
pub fn split_about<'haystack>(
|
pub fn split_about<'haystack>(
|
||||||
haystack: &'haystack wstr,
|
haystack: &'haystack wstr,
|
||||||
needle: &wstr,
|
needle: &wstr,
|
||||||
max: Option<i64>,
|
max: usize,
|
||||||
no_empty: bool,
|
no_empty: bool,
|
||||||
) -> Vec<&'haystack wstr> {
|
) -> Vec<&'haystack wstr> {
|
||||||
let mut output = vec![];
|
let mut output = vec![];
|
||||||
let mut remaining = max.unwrap_or(i64::MAX);
|
let mut remaining = max;
|
||||||
let mut haystack = haystack.as_char_slice();
|
let mut haystack = haystack.as_char_slice();
|
||||||
while remaining > 0 && !haystack.is_empty() {
|
while remaining > 0 && !haystack.is_empty() {
|
||||||
let split_point = if needle.is_empty() {
|
let split_point = if needle.is_empty() {
|
||||||
|
@ -398,6 +398,11 @@ pub fn split_about<'haystack>(
|
||||||
None => break, // not found
|
None => break, // not found
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if haystack.len() == split_point {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if !no_empty || split_point != 0 {
|
if !no_empty || split_point != 0 {
|
||||||
output.push(wstr::from_char_slice(&haystack[..split_point]));
|
output.push(wstr::from_char_slice(&haystack[..split_point]));
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,7 +9,6 @@
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "maybe.h"
|
#include "maybe.h"
|
||||||
#include "parse_constants.h"
|
#include "parse_constants.h"
|
||||||
#include "re.h"
|
|
||||||
|
|
||||||
#if INCLUDE_RUST_HEADERS
|
#if INCLUDE_RUST_HEADERS
|
||||||
|
|
||||||
|
|
|
@ -43,7 +43,6 @@
|
||||||
#include "builtins/set.h"
|
#include "builtins/set.h"
|
||||||
#include "builtins/shared.rs.h"
|
#include "builtins/shared.rs.h"
|
||||||
#include "builtins/source.h"
|
#include "builtins/source.h"
|
||||||
#include "builtins/string.h"
|
|
||||||
#include "builtins/ulimit.h"
|
#include "builtins/ulimit.h"
|
||||||
#include "complete.h"
|
#include "complete.h"
|
||||||
#include "cxx.h"
|
#include "cxx.h"
|
||||||
|
@ -393,7 +392,7 @@ static constexpr builtin_data_t builtin_datas[] = {
|
||||||
{L"set_color", &implemented_in_rust, N_(L"Set the terminal color")},
|
{L"set_color", &implemented_in_rust, N_(L"Set the terminal color")},
|
||||||
{L"source", &builtin_source, N_(L"Evaluate contents of file")},
|
{L"source", &builtin_source, N_(L"Evaluate contents of file")},
|
||||||
{L"status", &implemented_in_rust, N_(L"Return status information about fish")},
|
{L"status", &implemented_in_rust, N_(L"Return status information about fish")},
|
||||||
{L"string", &builtin_string, N_(L"Manipulate strings")},
|
{L"string", &implemented_in_rust, N_(L"Manipulate strings")},
|
||||||
{L"switch", &builtin_generic, N_(L"Conditionally run blocks of code")},
|
{L"switch", &builtin_generic, N_(L"Conditionally run blocks of code")},
|
||||||
{L"test", &implemented_in_rust, N_(L"Test a condition")},
|
{L"test", &implemented_in_rust, N_(L"Test a condition")},
|
||||||
{L"time", &builtin_generic, N_(L"Measure how long a command or block takes")},
|
{L"time", &builtin_generic, N_(L"Measure how long a command or block takes")},
|
||||||
|
@ -569,6 +568,9 @@ static maybe_t<RustBuiltin> try_get_rust_builtin(const wcstring &cmd) {
|
||||||
if (cmd == L"status") {
|
if (cmd == L"status") {
|
||||||
return RustBuiltin::Status;
|
return RustBuiltin::Status;
|
||||||
}
|
}
|
||||||
|
if (cmd == L"string") {
|
||||||
|
return RustBuiltin::String;
|
||||||
|
}
|
||||||
if (cmd == L"test" || cmd == L"[") {
|
if (cmd == L"test" || cmd == L"[") {
|
||||||
return RustBuiltin::Test;
|
return RustBuiltin::Test;
|
||||||
}
|
}
|
||||||
|
|
|
@ -131,6 +131,7 @@ enum class RustBuiltin : int32_t {
|
||||||
Return,
|
Return,
|
||||||
SetColor,
|
SetColor,
|
||||||
Status,
|
Status,
|
||||||
|
String,
|
||||||
Test,
|
Test,
|
||||||
Type,
|
Type,
|
||||||
Wait,
|
Wait,
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,14 +0,0 @@
|
||||||
// Prototypes for functions for executing builtin_string functions.
|
|
||||||
#ifndef FISH_BUILTIN_STRING_H
|
|
||||||
#define FISH_BUILTIN_STRING_H
|
|
||||||
|
|
||||||
#include <cstring>
|
|
||||||
#include <cwchar>
|
|
||||||
|
|
||||||
#include "../io.h"
|
|
||||||
#include "../maybe.h"
|
|
||||||
|
|
||||||
class parser_t;
|
|
||||||
|
|
||||||
maybe_t<int> builtin_string(parser_t &parser, io_streams_t &streams, const wchar_t **argv);
|
|
||||||
#endif
|
|
|
@ -89,7 +89,6 @@
|
||||||
#include "parser.h"
|
#include "parser.h"
|
||||||
#include "path.h"
|
#include "path.h"
|
||||||
#include "proc.h"
|
#include "proc.h"
|
||||||
#include "re.h"
|
|
||||||
#include "reader.h"
|
#include "reader.h"
|
||||||
#include "redirection.h"
|
#include "redirection.h"
|
||||||
#include "screen.h"
|
#include "screen.h"
|
||||||
|
@ -4981,384 +4980,6 @@ static void test_wwrite_to_fd() {
|
||||||
(void)remove(t);
|
(void)remove(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
maybe_t<int> builtin_string(parser_t &parser, io_streams_t &streams, const wchar_t **argv);
|
|
||||||
static void run_one_string_test(const wchar_t *const *argv_raw, int expected_rc,
|
|
||||||
const wchar_t *expected_out) {
|
|
||||||
// Copy to a null terminated array, as builtin_string may wish to rearrange our pointers.
|
|
||||||
std::vector<wcstring> argv_list(argv_raw, argv_raw + null_terminated_array_length(argv_raw));
|
|
||||||
null_terminated_array_t<wchar_t> argv(argv_list);
|
|
||||||
|
|
||||||
parser_t &parser = parser_t::principal_parser();
|
|
||||||
string_output_stream_t outs{};
|
|
||||||
null_output_stream_t errs{};
|
|
||||||
io_streams_t streams(outs, errs);
|
|
||||||
streams.stdin_is_directly_redirected = false; // read from argv instead of stdin
|
|
||||||
maybe_t<int> rc = builtin_string(parser, streams, argv.get());
|
|
||||||
|
|
||||||
wcstring args;
|
|
||||||
for (const wcstring &arg : argv_list) {
|
|
||||||
args += escape_string(arg) + L' ';
|
|
||||||
}
|
|
||||||
args.resize(args.size() - 1);
|
|
||||||
|
|
||||||
if (rc != expected_rc) {
|
|
||||||
// The comparison above would have panicked if rc didn't have a value, so it's safe to
|
|
||||||
// assume it has one here:
|
|
||||||
std::wstring got = std::to_wstring(rc.value());
|
|
||||||
err(L"Test failed on line %lu: [%ls]: expected return code %d but got %s", __LINE__,
|
|
||||||
args.c_str(), expected_rc, got.c_str());
|
|
||||||
} else if (outs.contents() != expected_out) {
|
|
||||||
err(L"Test failed on line %lu: [%ls]: expected [%ls] but got [%ls]", __LINE__, args.c_str(),
|
|
||||||
escape_string(expected_out).c_str(), escape_string(outs.contents()).c_str());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_string() {
|
|
||||||
say(L"Testing builtin_string");
|
|
||||||
const struct string_test {
|
|
||||||
const wchar_t *argv[15];
|
|
||||||
int expected_rc;
|
|
||||||
const wchar_t *expected_out;
|
|
||||||
} string_tests[] = { //
|
|
||||||
{{L"string", L"escape", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"escape", L"", nullptr}, STATUS_CMD_OK, L"''\n"},
|
|
||||||
{{L"string", L"escape", L"-n", L"", nullptr}, STATUS_CMD_OK, L"\n"},
|
|
||||||
{{L"string", L"escape", L"a", nullptr}, STATUS_CMD_OK, L"a\n"},
|
|
||||||
{{L"string", L"escape", L"\x07", nullptr}, STATUS_CMD_OK, L"\\cg\n"},
|
|
||||||
{{L"string", L"escape", L"\"x\"", nullptr}, STATUS_CMD_OK, L"'\"x\"'\n"},
|
|
||||||
{{L"string", L"escape", L"hello world", nullptr}, STATUS_CMD_OK, L"'hello world'\n"},
|
|
||||||
{{L"string", L"escape", L"-n", L"hello world", nullptr}, STATUS_CMD_OK, L"hello\\ world\n"},
|
|
||||||
{{L"string", L"escape", L"hello", L"world", nullptr}, STATUS_CMD_OK, L"hello\nworld\n"},
|
|
||||||
{{L"string", L"escape", L"-n", L"~", nullptr}, STATUS_CMD_OK, L"\\~\n"},
|
|
||||||
|
|
||||||
{{L"string", L"join", nullptr}, STATUS_INVALID_ARGS, L""},
|
|
||||||
{{L"string", L"join", L"", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"join", L"", L"", L"", L"", nullptr}, STATUS_CMD_OK, L"\n"},
|
|
||||||
{{L"string", L"join", L"", L"a", L"b", L"c", nullptr}, STATUS_CMD_OK, L"abc\n"},
|
|
||||||
{{L"string", L"join", L".", L"fishshell", L"com", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"fishshell.com\n"},
|
|
||||||
{{L"string", L"join", L"/", L"usr", nullptr}, STATUS_CMD_ERROR, L"usr\n"},
|
|
||||||
{{L"string", L"join", L"/", L"usr", L"local", L"bin", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"usr/local/bin\n"},
|
|
||||||
{{L"string", L"join", L"...", L"3", L"2", L"1", nullptr}, STATUS_CMD_OK, L"3...2...1\n"},
|
|
||||||
{{L"string", L"join", L"-q", nullptr}, STATUS_INVALID_ARGS, L""},
|
|
||||||
{{L"string", L"join", L"-q", L".", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"join", L"-q", L".", L".", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
|
|
||||||
{{L"string", L"length", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"length", L"", nullptr}, STATUS_CMD_ERROR, L"0\n"},
|
|
||||||
{{L"string", L"length", L"", L"", L"", nullptr}, STATUS_CMD_ERROR, L"0\n0\n0\n"},
|
|
||||||
{{L"string", L"length", L"a", nullptr}, STATUS_CMD_OK, L"1\n"},
|
|
||||||
#if WCHAR_T_BITS > 16
|
|
||||||
{{L"string", L"length", L"\U0002008A", nullptr}, STATUS_CMD_OK, L"1\n"},
|
|
||||||
#endif
|
|
||||||
{{L"string", L"length", L"um", L"dois", L"três", nullptr}, STATUS_CMD_OK, L"2\n4\n4\n"},
|
|
||||||
{{L"string", L"length", L"um", L"dois", L"três", nullptr}, STATUS_CMD_OK, L"2\n4\n4\n"},
|
|
||||||
{{L"string", L"length", L"-q", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"length", L"-q", L"", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"length", L"-q", L"a", nullptr}, STATUS_CMD_OK, L""},
|
|
||||||
|
|
||||||
{{L"string", L"match", nullptr}, STATUS_INVALID_ARGS, L""},
|
|
||||||
{{L"string", L"match", L"", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"match", L"", L"", nullptr}, STATUS_CMD_OK, L"\n"},
|
|
||||||
{{L"string", L"match", L"?", L"a", nullptr}, STATUS_CMD_OK, L"a\n"},
|
|
||||||
{{L"string", L"match", L"*", L"", nullptr}, STATUS_CMD_OK, L"\n"},
|
|
||||||
{{L"string", L"match", L"**", L"", nullptr}, STATUS_CMD_OK, L"\n"},
|
|
||||||
{{L"string", L"match", L"*", L"xyzzy", nullptr}, STATUS_CMD_OK, L"xyzzy\n"},
|
|
||||||
{{L"string", L"match", L"**", L"plugh", nullptr}, STATUS_CMD_OK, L"plugh\n"},
|
|
||||||
{{L"string", L"match", L"a*b", L"axxb", nullptr}, STATUS_CMD_OK, L"axxb\n"},
|
|
||||||
{{L"string", L"match", L"a??b", L"axxb", nullptr}, STATUS_CMD_OK, L"axxb\n"},
|
|
||||||
{{L"string", L"match", L"-i", L"a??B", L"axxb", nullptr}, STATUS_CMD_OK, L"axxb\n"},
|
|
||||||
{{L"string", L"match", L"-i", L"a??b", L"Axxb", nullptr}, STATUS_CMD_OK, L"Axxb\n"},
|
|
||||||
{{L"string", L"match", L"a*", L"axxb", nullptr}, STATUS_CMD_OK, L"axxb\n"},
|
|
||||||
{{L"string", L"match", L"*a", L"xxa", nullptr}, STATUS_CMD_OK, L"xxa\n"},
|
|
||||||
{{L"string", L"match", L"*a*", L"axa", nullptr}, STATUS_CMD_OK, L"axa\n"},
|
|
||||||
{{L"string", L"match", L"*a*", L"xax", nullptr}, STATUS_CMD_OK, L"xax\n"},
|
|
||||||
{{L"string", L"match", L"*a*", L"bxa", nullptr}, STATUS_CMD_OK, L"bxa\n"},
|
|
||||||
{{L"string", L"match", L"*a", L"a", nullptr}, STATUS_CMD_OK, L"a\n"},
|
|
||||||
{{L"string", L"match", L"a*", L"a", nullptr}, STATUS_CMD_OK, L"a\n"},
|
|
||||||
{{L"string", L"match", L"a*b*c", L"axxbyyc", nullptr}, STATUS_CMD_OK, L"axxbyyc\n"},
|
|
||||||
{{L"string", L"match", L"\\*", L"*", nullptr}, STATUS_CMD_OK, L"*\n"},
|
|
||||||
{{L"string", L"match", L"a*\\", L"abc\\", nullptr}, STATUS_CMD_OK, L"abc\\\n"},
|
|
||||||
{{L"string", L"match", L"a*\\?", L"abc?", nullptr}, STATUS_CMD_OK, L"abc?\n"},
|
|
||||||
|
|
||||||
{{L"string", L"match", L"?", L"", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"match", L"?", L"ab", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"match", L"??", L"a", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"match", L"?a", L"a", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"match", L"a?", L"a", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"match", L"a??B", L"axxb", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"match", L"a*b", L"axxbc", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"match", L"*b", L"bbba", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"match", L"0x[0-9a-fA-F][0-9a-fA-F]", L"0xbad", nullptr},
|
|
||||||
STATUS_CMD_ERROR,
|
|
||||||
L""},
|
|
||||||
|
|
||||||
{{L"string", L"match", L"-a", L"*", L"ab", L"cde", nullptr}, STATUS_CMD_OK, L"ab\ncde\n"},
|
|
||||||
{{L"string", L"match", L"*", L"ab", L"cde", nullptr}, STATUS_CMD_OK, L"ab\ncde\n"},
|
|
||||||
{{L"string", L"match", L"-n", L"*d*", L"cde", nullptr}, STATUS_CMD_OK, L"1 3\n"},
|
|
||||||
{{L"string", L"match", L"-n", L"*x*", L"cde", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"match", L"-q", L"a*", L"b", L"c", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"match", L"-q", L"a*", L"b", L"a", nullptr}, STATUS_CMD_OK, L""},
|
|
||||||
|
|
||||||
{{L"string", L"match", L"-r", nullptr}, STATUS_INVALID_ARGS, L""},
|
|
||||||
{{L"string", L"match", L"-r", L"", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"match", L"-r", L"", L"", nullptr}, STATUS_CMD_OK, L"\n"},
|
|
||||||
{{L"string", L"match", L"-r", L".", L"a", nullptr}, STATUS_CMD_OK, L"a\n"},
|
|
||||||
{{L"string", L"match", L"-r", L".*", L"", nullptr}, STATUS_CMD_OK, L"\n"},
|
|
||||||
{{L"string", L"match", L"-r", L"a*b", L"b", nullptr}, STATUS_CMD_OK, L"b\n"},
|
|
||||||
{{L"string", L"match", L"-r", L"a*b", L"aab", nullptr}, STATUS_CMD_OK, L"aab\n"},
|
|
||||||
{{L"string", L"match", L"-r", L"-i", L"a*b", L"Aab", nullptr}, STATUS_CMD_OK, L"Aab\n"},
|
|
||||||
{{L"string", L"match", L"-r", L"-a", L"a[bc]", L"abadac", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"ab\nac\n"},
|
|
||||||
{{L"string", L"match", L"-r", L"a", L"xaxa", L"axax", nullptr}, STATUS_CMD_OK, L"a\na\n"},
|
|
||||||
{{L"string", L"match", L"-r", L"-a", L"a", L"xaxa", L"axax", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"a\na\na\na\n"},
|
|
||||||
{{L"string", L"match", L"-r", L"a[bc]", L"abadac", nullptr}, STATUS_CMD_OK, L"ab\n"},
|
|
||||||
{{L"string", L"match", L"-r", L"-q", L"a[bc]", L"abadac", nullptr}, STATUS_CMD_OK, L""},
|
|
||||||
{{L"string", L"match", L"-r", L"-q", L"a[bc]", L"ad", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"match", L"-r", L"(a+)b(c)", L"aabc", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"aabc\naa\nc\n"},
|
|
||||||
{{L"string", L"match", L"-r", L"-a", L"(a)b(c)", L"abcabc", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"abc\na\nc\nabc\na\nc\n"},
|
|
||||||
{{L"string", L"match", L"-r", L"(a)b(c)", L"abcabc", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"abc\na\nc\n"},
|
|
||||||
{{L"string", L"match", L"-r", L"(a|(z))(bc)", L"abc", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"abc\na\nbc\n"},
|
|
||||||
{{L"string", L"match", L"-r", L"-n", L"a", L"ada", L"dad", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"1 1\n2 1\n"},
|
|
||||||
{{L"string", L"match", L"-r", L"-n", L"-a", L"a", L"bacadae", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"2 1\n4 1\n6 1\n"},
|
|
||||||
{{L"string", L"match", L"-r", L"-n", L"(a).*(b)", L"a---b", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"1 5\n1 1\n5 1\n"},
|
|
||||||
{{L"string", L"match", L"-r", L"-n", L"(a)(b)", L"ab", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"1 2\n1 1\n2 1\n"},
|
|
||||||
{{L"string", L"match", L"-r", L"-n", L"(a)(b)", L"abab", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"1 2\n1 1\n2 1\n"},
|
|
||||||
{{L"string", L"match", L"-r", L"-n", L"-a", L"(a)(b)", L"abab", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"1 2\n1 1\n2 1\n3 2\n3 1\n4 1\n"},
|
|
||||||
{{L"string", L"match", L"-r", L"*", L"", nullptr}, STATUS_INVALID_ARGS, L""},
|
|
||||||
{{L"string", L"match", L"-r", L"-a", L"a*", L"b", nullptr}, STATUS_CMD_OK, L"\n\n"},
|
|
||||||
{{L"string", L"match", L"-r", L"foo\\Kbar", L"foobar", nullptr}, STATUS_CMD_OK, L"bar\n"},
|
|
||||||
{{L"string", L"match", L"-r", L"(foo)\\Kbar", L"foobar", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"bar\nfoo\n"},
|
|
||||||
{{L"string", L"replace", nullptr}, STATUS_INVALID_ARGS, L""},
|
|
||||||
{{L"string", L"replace", L"", nullptr}, STATUS_INVALID_ARGS, L""},
|
|
||||||
{{L"string", L"replace", L"", L"", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"replace", L"", L"", L"", nullptr}, STATUS_CMD_ERROR, L"\n"},
|
|
||||||
{{L"string", L"replace", L"", L"", L" ", nullptr}, STATUS_CMD_ERROR, L" \n"},
|
|
||||||
{{L"string", L"replace", L"a", L"b", L"", nullptr}, STATUS_CMD_ERROR, L"\n"},
|
|
||||||
{{L"string", L"replace", L"a", L"b", L"a", nullptr}, STATUS_CMD_OK, L"b\n"},
|
|
||||||
{{L"string", L"replace", L"a", L"b", L"xax", nullptr}, STATUS_CMD_OK, L"xbx\n"},
|
|
||||||
{{L"string", L"replace", L"a", L"b", L"xax", L"axa", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"xbx\nbxa\n"},
|
|
||||||
{{L"string", L"replace", L"bar", L"x", L"red barn", nullptr}, STATUS_CMD_OK, L"red xn\n"},
|
|
||||||
{{L"string", L"replace", L"x", L"bar", L"red xn", nullptr}, STATUS_CMD_OK, L"red barn\n"},
|
|
||||||
{{L"string", L"replace", L"--", L"x", L"-", L"xyz", nullptr}, STATUS_CMD_OK, L"-yz\n"},
|
|
||||||
{{L"string", L"replace", L"--", L"y", L"-", L"xyz", nullptr}, STATUS_CMD_OK, L"x-z\n"},
|
|
||||||
{{L"string", L"replace", L"--", L"z", L"-", L"xyz", nullptr}, STATUS_CMD_OK, L"xy-\n"},
|
|
||||||
{{L"string", L"replace", L"-i", L"z", L"X", L"_Z_", nullptr}, STATUS_CMD_OK, L"_X_\n"},
|
|
||||||
{{L"string", L"replace", L"-a", L"a", L"A", L"aaa", nullptr}, STATUS_CMD_OK, L"AAA\n"},
|
|
||||||
{{L"string", L"replace", L"-i", L"a", L"z", L"AAA", nullptr}, STATUS_CMD_OK, L"zAA\n"},
|
|
||||||
{{L"string", L"replace", L"-q", L"x", L">x<", L"x", nullptr}, STATUS_CMD_OK, L""},
|
|
||||||
{{L"string", L"replace", L"-a", L"x", L"", L"xxx", nullptr}, STATUS_CMD_OK, L"\n"},
|
|
||||||
{{L"string", L"replace", L"-a", L"***", L"_", L"*****", nullptr}, STATUS_CMD_OK, L"_**\n"},
|
|
||||||
{{L"string", L"replace", L"-a", L"***", L"***", L"******", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"******\n"},
|
|
||||||
{{L"string", L"replace", L"-a", L"a", L"b", L"xax", L"axa", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"xbx\nbxb\n"},
|
|
||||||
|
|
||||||
{{L"string", L"replace", L"-r", nullptr}, STATUS_INVALID_ARGS, L""},
|
|
||||||
{{L"string", L"replace", L"-r", L"", nullptr}, STATUS_INVALID_ARGS, L""},
|
|
||||||
{{L"string", L"replace", L"-r", L"", L"", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"replace", L"-r", L"", L"", L"", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"\n"}, // pcre2 behavior
|
|
||||||
{{L"string", L"replace", L"-r", L"", L"", L" ", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L" \n"}, // pcre2 behavior
|
|
||||||
{{L"string", L"replace", L"-r", L"a", L"b", L"", nullptr}, STATUS_CMD_ERROR, L"\n"},
|
|
||||||
{{L"string", L"replace", L"-r", L"a", L"b", L"a", nullptr}, STATUS_CMD_OK, L"b\n"},
|
|
||||||
{{L"string", L"replace", L"-r", L".", L"x", L"abc", nullptr}, STATUS_CMD_OK, L"xbc\n"},
|
|
||||||
{{L"string", L"replace", L"-r", L".", L"", L"abc", nullptr}, STATUS_CMD_OK, L"bc\n"},
|
|
||||||
{{L"string", L"replace", L"-r", L"(\\w)(\\w)", L"$2$1", L"ab", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"ba\n"},
|
|
||||||
{{L"string", L"replace", L"-r", L"(\\w)", L"$1$1", L"ab", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"aab\n"},
|
|
||||||
{{L"string", L"replace", L"-r", L"-a", L".", L"x", L"abc", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"xxx\n"},
|
|
||||||
{{L"string", L"replace", L"-r", L"-a", L"(\\w)", L"$1$1", L"ab", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"aabb\n"},
|
|
||||||
{{L"string", L"replace", L"-r", L"-a", L".", L"", L"abc", nullptr}, STATUS_CMD_OK, L"\n"},
|
|
||||||
{{L"string", L"replace", L"-r", L"a", L"x", L"bc", L"cd", L"de", nullptr},
|
|
||||||
STATUS_CMD_ERROR,
|
|
||||||
L"bc\ncd\nde\n"},
|
|
||||||
{{L"string", L"replace", L"-r", L"a", L"x", L"aba", L"caa", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"xba\ncxa\n"},
|
|
||||||
{{L"string", L"replace", L"-r", L"-a", L"a", L"x", L"aba", L"caa", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"xbx\ncxx\n"},
|
|
||||||
{{L"string", L"replace", L"-r", L"-i", L"A", L"b", L"xax", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"xbx\n"},
|
|
||||||
{{L"string", L"replace", L"-r", L"-i", L"[a-z]", L".", L"1A2B", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"1.2B\n"},
|
|
||||||
{{L"string", L"replace", L"-r", L"A", L"b", L"xax", nullptr}, STATUS_CMD_ERROR, L"xax\n"},
|
|
||||||
{{L"string", L"replace", L"-r", L"a", L"$1", L"a", nullptr}, STATUS_INVALID_ARGS, L""},
|
|
||||||
{{L"string", L"replace", L"-r", L"(a)", L"$2", L"a", nullptr}, STATUS_INVALID_ARGS, L""},
|
|
||||||
{{L"string", L"replace", L"-r", L"*", L".", L"a", nullptr}, STATUS_INVALID_ARGS, L""},
|
|
||||||
{{L"string", L"replace", L"-ra", L"x", L"\\c", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"replace", L"-r", L"^(.)", L"\t$1", L"abc", L"x", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"\tabc\n\tx\n"},
|
|
||||||
|
|
||||||
{{L"string", L"split", nullptr}, STATUS_INVALID_ARGS, L""},
|
|
||||||
{{L"string", L"split", L":", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"split", L".", L"www.ch.ic.ac.uk", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"www\nch\nic\nac\nuk\n"},
|
|
||||||
{{L"string", L"split", L"..", L"....", nullptr}, STATUS_CMD_OK, L"\n\n\n"},
|
|
||||||
{{L"string", L"split", L"-m", L"x", L"..", L"....", nullptr}, STATUS_INVALID_ARGS, L""},
|
|
||||||
{{L"string", L"split", L"-m1", L"..", L"....", nullptr}, STATUS_CMD_OK, L"\n..\n"},
|
|
||||||
{{L"string", L"split", L"-m0", L"/", L"/usr/local/bin/fish", nullptr},
|
|
||||||
STATUS_CMD_ERROR,
|
|
||||||
L"/usr/local/bin/fish\n"},
|
|
||||||
{{L"string", L"split", L"-m2", L":", L"a:b:c:d", L"e:f:g:h", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"a\nb\nc:d\ne\nf\ng:h\n"},
|
|
||||||
{{L"string", L"split", L"-m1", L"-r", L"/", L"/usr/local/bin/fish", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"/usr/local/bin\nfish\n"},
|
|
||||||
{{L"string", L"split", L"-r", L".", L"www.ch.ic.ac.uk", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"www\nch\nic\nac\nuk\n"},
|
|
||||||
{{L"string", L"split", L"--", L"--", L"a--b---c----d", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"a\nb\n-c\n\nd\n"},
|
|
||||||
{{L"string", L"split", L"-r", L"..", L"....", nullptr}, STATUS_CMD_OK, L"\n\n\n"},
|
|
||||||
{{L"string", L"split", L"-r", L"--", L"--", L"a--b---c----d", nullptr},
|
|
||||||
STATUS_CMD_OK,
|
|
||||||
L"a\nb-\nc\n\nd\n"},
|
|
||||||
{{L"string", L"split", L"", L"", nullptr}, STATUS_CMD_ERROR, L"\n"},
|
|
||||||
{{L"string", L"split", L"", L"a", nullptr}, STATUS_CMD_ERROR, L"a\n"},
|
|
||||||
{{L"string", L"split", L"", L"ab", nullptr}, STATUS_CMD_OK, L"a\nb\n"},
|
|
||||||
{{L"string", L"split", L"", L"abc", nullptr}, STATUS_CMD_OK, L"a\nb\nc\n"},
|
|
||||||
{{L"string", L"split", L"-m1", L"", L"abc", nullptr}, STATUS_CMD_OK, L"a\nbc\n"},
|
|
||||||
{{L"string", L"split", L"-r", L"", L"", nullptr}, STATUS_CMD_ERROR, L"\n"},
|
|
||||||
{{L"string", L"split", L"-r", L"", L"a", nullptr}, STATUS_CMD_ERROR, L"a\n"},
|
|
||||||
{{L"string", L"split", L"-r", L"", L"ab", nullptr}, STATUS_CMD_OK, L"a\nb\n"},
|
|
||||||
{{L"string", L"split", L"-r", L"", L"abc", nullptr}, STATUS_CMD_OK, L"a\nb\nc\n"},
|
|
||||||
{{L"string", L"split", L"-r", L"-m1", L"", L"abc", nullptr}, STATUS_CMD_OK, L"ab\nc\n"},
|
|
||||||
{{L"string", L"split", L"-q", nullptr}, STATUS_INVALID_ARGS, L""},
|
|
||||||
{{L"string", L"split", L"-q", L":", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"split", L"-q", L"x", L"axbxc", nullptr}, STATUS_CMD_OK, L""},
|
|
||||||
|
|
||||||
{{L"string", L"sub", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"sub", L"abcde", nullptr}, STATUS_CMD_OK, L"abcde\n"},
|
|
||||||
{{L"string", L"sub", L"-l", L"x", L"abcde", nullptr}, STATUS_INVALID_ARGS, L""},
|
|
||||||
{{L"string", L"sub", L"-s", L"x", L"abcde", nullptr}, STATUS_INVALID_ARGS, L""},
|
|
||||||
{{L"string", L"sub", L"-l0", L"abcde", nullptr}, STATUS_CMD_OK, L"\n"},
|
|
||||||
{{L"string", L"sub", L"-l2", L"abcde", nullptr}, STATUS_CMD_OK, L"ab\n"},
|
|
||||||
{{L"string", L"sub", L"-l5", L"abcde", nullptr}, STATUS_CMD_OK, L"abcde\n"},
|
|
||||||
{{L"string", L"sub", L"-l6", L"abcde", nullptr}, STATUS_CMD_OK, L"abcde\n"},
|
|
||||||
{{L"string", L"sub", L"-l-1", L"abcde", nullptr}, STATUS_INVALID_ARGS, L""},
|
|
||||||
{{L"string", L"sub", L"-s0", L"abcde", nullptr}, STATUS_INVALID_ARGS, L""},
|
|
||||||
{{L"string", L"sub", L"-s1", L"abcde", nullptr}, STATUS_CMD_OK, L"abcde\n"},
|
|
||||||
{{L"string", L"sub", L"-s5", L"abcde", nullptr}, STATUS_CMD_OK, L"e\n"},
|
|
||||||
{{L"string", L"sub", L"-s6", L"abcde", nullptr}, STATUS_CMD_OK, L"\n"},
|
|
||||||
{{L"string", L"sub", L"-s-1", L"abcde", nullptr}, STATUS_CMD_OK, L"e\n"},
|
|
||||||
{{L"string", L"sub", L"-s-5", L"abcde", nullptr}, STATUS_CMD_OK, L"abcde\n"},
|
|
||||||
{{L"string", L"sub", L"-s-6", L"abcde", nullptr}, STATUS_CMD_OK, L"abcde\n"},
|
|
||||||
{{L"string", L"sub", L"-s1", L"-l0", L"abcde", nullptr}, STATUS_CMD_OK, L"\n"},
|
|
||||||
{{L"string", L"sub", L"-s1", L"-l1", L"abcde", nullptr}, STATUS_CMD_OK, L"a\n"},
|
|
||||||
{{L"string", L"sub", L"-s2", L"-l2", L"abcde", nullptr}, STATUS_CMD_OK, L"bc\n"},
|
|
||||||
{{L"string", L"sub", L"-s-1", L"-l1", L"abcde", nullptr}, STATUS_CMD_OK, L"e\n"},
|
|
||||||
{{L"string", L"sub", L"-s-1", L"-l2", L"abcde", nullptr}, STATUS_CMD_OK, L"e\n"},
|
|
||||||
{{L"string", L"sub", L"-s-3", L"-l2", L"abcde", nullptr}, STATUS_CMD_OK, L"cd\n"},
|
|
||||||
{{L"string", L"sub", L"-s-3", L"-l4", L"abcde", nullptr}, STATUS_CMD_OK, L"cde\n"},
|
|
||||||
{{L"string", L"sub", L"-q", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"sub", L"-q", L"abcde", nullptr}, STATUS_CMD_OK, L""},
|
|
||||||
|
|
||||||
{{L"string", L"trim", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"trim", L""}, STATUS_CMD_ERROR, L"\n"},
|
|
||||||
{{L"string", L"trim", L" "}, STATUS_CMD_OK, L"\n"},
|
|
||||||
{{L"string", L"trim", L" \f\n\r\t"}, STATUS_CMD_OK, L"\n"},
|
|
||||||
{{L"string", L"trim", L" a"}, STATUS_CMD_OK, L"a\n"},
|
|
||||||
{{L"string", L"trim", L"a "}, STATUS_CMD_OK, L"a\n"},
|
|
||||||
{{L"string", L"trim", L" a "}, STATUS_CMD_OK, L"a\n"},
|
|
||||||
{{L"string", L"trim", L"-l", L" a"}, STATUS_CMD_OK, L"a\n"},
|
|
||||||
{{L"string", L"trim", L"-l", L"a "}, STATUS_CMD_ERROR, L"a \n"},
|
|
||||||
{{L"string", L"trim", L"-l", L" a "}, STATUS_CMD_OK, L"a \n"},
|
|
||||||
{{L"string", L"trim", L"-r", L" a"}, STATUS_CMD_ERROR, L" a\n"},
|
|
||||||
{{L"string", L"trim", L"-r", L"a "}, STATUS_CMD_OK, L"a\n"},
|
|
||||||
{{L"string", L"trim", L"-r", L" a "}, STATUS_CMD_OK, L" a\n"},
|
|
||||||
{{L"string", L"trim", L"-c", L".", L" a"}, STATUS_CMD_ERROR, L" a\n"},
|
|
||||||
{{L"string", L"trim", L"-c", L".", L"a "}, STATUS_CMD_ERROR, L"a \n"},
|
|
||||||
{{L"string", L"trim", L"-c", L".", L" a "}, STATUS_CMD_ERROR, L" a \n"},
|
|
||||||
{{L"string", L"trim", L"-c", L".", L".a"}, STATUS_CMD_OK, L"a\n"},
|
|
||||||
{{L"string", L"trim", L"-c", L".", L"a."}, STATUS_CMD_OK, L"a\n"},
|
|
||||||
{{L"string", L"trim", L"-c", L".", L".a."}, STATUS_CMD_OK, L"a\n"},
|
|
||||||
{{L"string", L"trim", L"-c", L"\\/", L"/a\\"}, STATUS_CMD_OK, L"a\n"},
|
|
||||||
{{L"string", L"trim", L"-c", L"\\/", L"a/"}, STATUS_CMD_OK, L"a\n"},
|
|
||||||
{{L"string", L"trim", L"-c", L"\\/", L"\\a/"}, STATUS_CMD_OK, L"a\n"},
|
|
||||||
{{L"string", L"trim", L"-c", L"", L".a."}, STATUS_CMD_ERROR, L".a.\n"}
|
|
||||||
};
|
|
||||||
|
|
||||||
for (const auto &t : string_tests) {
|
|
||||||
run_one_string_test(t.argv, t.expected_rc, t.expected_out);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool saved_flag = feature_test(feature_flag_t::qmark_noglob);
|
|
||||||
const struct string_test qmark_noglob_tests[] = {
|
|
||||||
{{L"string", L"match", L"a*b?c", L"axxb?c", nullptr}, STATUS_CMD_OK, L"axxb?c\n"},
|
|
||||||
{{L"string", L"match", L"*?", L"a", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"match", L"*?", L"ab", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"match", L"?*", L"a", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"match", L"?*", L"ab", nullptr}, STATUS_CMD_ERROR, L""},
|
|
||||||
{{L"string", L"match", L"a*\\?", L"abc?", nullptr}, STATUS_CMD_ERROR, L""}};
|
|
||||||
feature_set(feature_flag_t::qmark_noglob, true);
|
|
||||||
for (const auto &t : qmark_noglob_tests) {
|
|
||||||
run_one_string_test(t.argv, t.expected_rc, t.expected_out);
|
|
||||||
}
|
|
||||||
|
|
||||||
const struct string_test qmark_glob_tests[] = {
|
|
||||||
{{L"string", L"match", L"a*b?c", L"axxbyc", nullptr}, STATUS_CMD_OK, L"axxbyc\n"},
|
|
||||||
{{L"string", L"match", L"*?", L"a", nullptr}, STATUS_CMD_OK, L"a\n"},
|
|
||||||
{{L"string", L"match", L"*?", L"ab", nullptr}, STATUS_CMD_OK, L"ab\n"},
|
|
||||||
{{L"string", L"match", L"?*", L"a", nullptr}, STATUS_CMD_OK, L"a\n"},
|
|
||||||
{{L"string", L"match", L"?*", L"ab", nullptr}, STATUS_CMD_OK, L"ab\n"},
|
|
||||||
{{L"string", L"match", L"a*\\?", L"abc?", nullptr}, STATUS_CMD_OK, L"abc?\n"}};
|
|
||||||
feature_set(feature_flag_t::qmark_noglob, false);
|
|
||||||
for (const auto &t : qmark_glob_tests) {
|
|
||||||
run_one_string_test(t.argv, t.expected_rc, t.expected_out);
|
|
||||||
}
|
|
||||||
feature_set(feature_flag_t::qmark_noglob, saved_flag);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Helper for test_timezone_env_vars().
|
/// Helper for test_timezone_env_vars().
|
||||||
long return_timezone_hour(time_t tstamp, const wchar_t *timezone) {
|
long return_timezone_hour(time_t tstamp, const wchar_t *timezone) {
|
||||||
auto &vars = parser_t::principal_parser().vars();
|
auto &vars = parser_t::principal_parser().vars();
|
||||||
|
@ -5881,164 +5502,6 @@ static void test_killring() {
|
||||||
do_test((kill_entries() == std::vector<wcstring>{L"a", L"c", L"b", L"d"}));
|
do_test((kill_entries() == std::vector<wcstring>{L"a", L"c", L"b", L"d"}));
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
|
||||||
using namespace re;
|
|
||||||
|
|
||||||
// Basic tests for re, which wraps PCRE2.
|
|
||||||
static void test_re_errs() {
|
|
||||||
say(L"Testing re");
|
|
||||||
flags_t flags{};
|
|
||||||
re_error_t error{};
|
|
||||||
maybe_t<regex_t> re;
|
|
||||||
do_test(!regex_t::try_compile(L"abc[", flags, &error));
|
|
||||||
do_test(error.code != 0);
|
|
||||||
do_test(!error.message().empty());
|
|
||||||
|
|
||||||
error = re_error_t{};
|
|
||||||
do_test(!regex_t::try_compile(L"abc(", flags, &error).has_value());
|
|
||||||
do_test(error.code != 0);
|
|
||||||
do_test(!error.message().empty());
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_re_basic() {
|
|
||||||
// Match a character twice.
|
|
||||||
using namespace re;
|
|
||||||
wcstring subject = L"AAbCCd11e";
|
|
||||||
auto substr_from_range = [&](maybe_t<match_range_t> r) {
|
|
||||||
do_test(r.has_value());
|
|
||||||
do_test(r->begin <= r->end);
|
|
||||||
do_test(r->end <= subject.size());
|
|
||||||
return subject.substr(r->begin, r->end - r->begin);
|
|
||||||
};
|
|
||||||
auto re = regex_t::try_compile(L"(.)\\1");
|
|
||||||
do_test(re.has_value());
|
|
||||||
auto md = re->prepare();
|
|
||||||
std::vector<wcstring> matches;
|
|
||||||
std::vector<wcstring> captures;
|
|
||||||
while (auto r = re->match(md, subject)) {
|
|
||||||
matches.push_back(substr_from_range(r));
|
|
||||||
captures.push_back(substr_from_range(re->group(md, 1)));
|
|
||||||
do_test(!re->group(md, 2));
|
|
||||||
}
|
|
||||||
do_test(join_strings(matches, L',') == L"AA,CC,11");
|
|
||||||
do_test(join_strings(captures, L',') == L"A,C,1");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_re_reset() {
|
|
||||||
using namespace re;
|
|
||||||
auto re = regex_t::try_compile(L"([0-9])");
|
|
||||||
wcstring s = L"012345";
|
|
||||||
auto md = re->prepare();
|
|
||||||
for (size_t idx = 0; idx < s.size(); idx++) {
|
|
||||||
md.reset();
|
|
||||||
for (size_t j = 0; j <= idx; j++) {
|
|
||||||
auto m = re->match(md, s);
|
|
||||||
match_range_t expected{j, j + 1};
|
|
||||||
do_test(m == expected);
|
|
||||||
do_test(re->group(md, 1) == expected);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_re_named() {
|
|
||||||
// Named capture groups.
|
|
||||||
using namespace re;
|
|
||||||
auto re = regex_t::try_compile(L"A(?<FOO>x+)?");
|
|
||||||
do_test(re->capture_group_count() == 1);
|
|
||||||
|
|
||||||
wcstring subject = L"AxxAAx";
|
|
||||||
auto md = re->prepare();
|
|
||||||
|
|
||||||
auto r = re->match(md, subject);
|
|
||||||
do_test((r == match_range_t{0, 3}));
|
|
||||||
do_test(re->substring_for_group(md, L"QQQ", subject) == none());
|
|
||||||
do_test(re->substring_for_group(md, L"FOO", subject) == L"xx");
|
|
||||||
|
|
||||||
r = re->match(md, subject);
|
|
||||||
do_test((r == match_range_t{3, 4}));
|
|
||||||
do_test(re->substring_for_group(md, L"QQQ", subject) == none());
|
|
||||||
do_test(re->substring_for_group(md, L"FOO", subject) == none());
|
|
||||||
|
|
||||||
r = re->match(md, subject);
|
|
||||||
do_test((r == match_range_t{4, 6}));
|
|
||||||
do_test(re->substring_for_group(md, L"QQQ", subject) == none());
|
|
||||||
do_test(re->substring_for_group(md, L"FOO", subject) == wcstring(L"x"));
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_re_name_extraction() {
|
|
||||||
// Names of capture groups can be extracted.
|
|
||||||
using namespace re;
|
|
||||||
auto re = regex_t::try_compile(L"(?<FOO>dd)ff(?<BAR>cc)aaa(?<alpha>)ff(?<BETA>)");
|
|
||||||
do_test(re.has_value());
|
|
||||||
do_test(re->capture_group_count() == 4);
|
|
||||||
// PCRE2 returns these sorted.
|
|
||||||
do_test(join_strings(re->capture_group_names(), L',') == L"BAR,BETA,FOO,alpha");
|
|
||||||
|
|
||||||
// Mixed named and positional captures.
|
|
||||||
re = regex_t::try_compile(L"(abc)(?<FOO>def)(ghi)(?<BAR>jkl)");
|
|
||||||
do_test(re.has_value());
|
|
||||||
do_test(re->capture_group_count() == 4);
|
|
||||||
do_test(join_strings(re->capture_group_names(), L',') == L"BAR,FOO");
|
|
||||||
auto md = re->prepare();
|
|
||||||
const wcstring subject = L"abcdefghijkl";
|
|
||||||
auto m = re->match(md, subject);
|
|
||||||
do_test((m == match_range_t{0, 12}));
|
|
||||||
do_test((re->group(md, 1) == match_range_t{0, 3}));
|
|
||||||
do_test((re->group(md, 2) == match_range_t{3, 6}));
|
|
||||||
do_test((re->group(md, 3) == match_range_t{6, 9}));
|
|
||||||
do_test((re->group(md, 4) == match_range_t{9, 12}));
|
|
||||||
do_test(re->substring_for_group(md, L"FOO", subject) == wcstring(L"def"));
|
|
||||||
do_test(re->substring_for_group(md, L"BAR", subject) == wcstring(L"jkl"));
|
|
||||||
}
|
|
||||||
|
|
||||||
static void test_re_substitute() {
|
|
||||||
// Names of capture groups can be extracted.
|
|
||||||
using namespace re;
|
|
||||||
auto re = regex_t::try_compile(L"[a-z]+(\\d+)");
|
|
||||||
do_test(re.has_value());
|
|
||||||
do_test(re->capture_group_count() == 1);
|
|
||||||
maybe_t<wcstring> res{};
|
|
||||||
int repl_count{};
|
|
||||||
sub_flags_t sflags{};
|
|
||||||
const wcstring subj = L"AAabc123ZZ AAabc123ZZ";
|
|
||||||
const wcstring repl = L"$1qqq";
|
|
||||||
res = re->substitute(subj, repl, sflags, 0, nullptr, &repl_count);
|
|
||||||
do_test(res && *res == L"AA123qqqZZ AAabc123ZZ");
|
|
||||||
do_test(repl_count == 1);
|
|
||||||
|
|
||||||
res = re->substitute(subj, repl, sflags, 5, nullptr, &repl_count);
|
|
||||||
do_test(res && *res == L"AAabc123ZZ AA123qqqZZ");
|
|
||||||
do_test(repl_count == 1);
|
|
||||||
|
|
||||||
sflags.global = true;
|
|
||||||
res = re->substitute(subj, repl, sflags, 0, nullptr, &repl_count);
|
|
||||||
do_test(res && *res == L"AA123qqqZZ AA123qqqZZ");
|
|
||||||
do_test(repl_count == 2);
|
|
||||||
|
|
||||||
sflags.extended = true;
|
|
||||||
res = re->substitute(subj, L"\\x21", sflags, 0, nullptr, &repl_count); // \x21 = !
|
|
||||||
do_test(res && *res == L"AA!ZZ AA!ZZ");
|
|
||||||
do_test(repl_count == 2);
|
|
||||||
|
|
||||||
// Test with a bad escape; \b is unsupported.
|
|
||||||
re_error_t error{};
|
|
||||||
res = re->substitute(subj, L"AAA\\bZZZ", sflags, 0, &error);
|
|
||||||
do_test(!res.has_value());
|
|
||||||
do_test(error.code == -57 /* PCRE2_ERROR_BADREPESCAPE */);
|
|
||||||
do_test(error.message() == L"bad escape sequence in replacement string");
|
|
||||||
do_test(error.offset == 5 /* the b */);
|
|
||||||
|
|
||||||
// Test a very long replacement as we used a fixed-size buffer.
|
|
||||||
sflags = sub_flags_t{};
|
|
||||||
sflags.global = true;
|
|
||||||
re = regex_t::try_compile(L"A");
|
|
||||||
res =
|
|
||||||
re->substitute(wcstring(4096, L'A'), wcstring(4096, L'X'), sflags, 0, nullptr, &repl_count);
|
|
||||||
do_test(res && *res == wcstring(4096 * 4096, L'X'));
|
|
||||||
do_test(repl_count == 4096);
|
|
||||||
}
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
void test_wgetopt() {
|
void test_wgetopt() {
|
||||||
// Regression test for a crash.
|
// Regression test for a crash.
|
||||||
const wchar_t *const short_options = L"-a";
|
const wchar_t *const short_options = L"-a";
|
||||||
|
@ -6173,7 +5636,6 @@ static const test_t s_tests[]{
|
||||||
{TEST_GROUP("history_paths"), history_tests_t::test_history_path_detection},
|
{TEST_GROUP("history_paths"), history_tests_t::test_history_path_detection},
|
||||||
{TEST_GROUP("history_races"), history_tests_t::test_history_races},
|
{TEST_GROUP("history_races"), history_tests_t::test_history_races},
|
||||||
{TEST_GROUP("history_formats"), history_tests_t::test_history_formats},
|
{TEST_GROUP("history_formats"), history_tests_t::test_history_formats},
|
||||||
{TEST_GROUP("string"), test_string},
|
|
||||||
{TEST_GROUP("illegal_command_exit_code"), test_illegal_command_exit_code},
|
{TEST_GROUP("illegal_command_exit_code"), test_illegal_command_exit_code},
|
||||||
{TEST_GROUP("maybe"), test_maybe},
|
{TEST_GROUP("maybe"), test_maybe},
|
||||||
{TEST_GROUP("layout_cache"), test_layout_cache},
|
{TEST_GROUP("layout_cache"), test_layout_cache},
|
||||||
|
@ -6185,12 +5647,6 @@ static const test_t s_tests[]{
|
||||||
{TEST_GROUP("pipes"), test_pipes},
|
{TEST_GROUP("pipes"), test_pipes},
|
||||||
{TEST_GROUP("fd_event"), test_fd_event_signaller},
|
{TEST_GROUP("fd_event"), test_fd_event_signaller},
|
||||||
{TEST_GROUP("killring"), test_killring},
|
{TEST_GROUP("killring"), test_killring},
|
||||||
{TEST_GROUP("re"), test_re_errs},
|
|
||||||
{TEST_GROUP("re"), test_re_basic},
|
|
||||||
{TEST_GROUP("re"), test_re_reset},
|
|
||||||
{TEST_GROUP("re"), test_re_named},
|
|
||||||
{TEST_GROUP("re"), test_re_name_extraction},
|
|
||||||
{TEST_GROUP("re"), test_re_substitute},
|
|
||||||
{TEST_GROUP("wgetopt"), test_wgetopt},
|
{TEST_GROUP("wgetopt"), test_wgetopt},
|
||||||
{TEST_GROUP("rust_smoke"), test_rust_smoke},
|
{TEST_GROUP("rust_smoke"), test_rust_smoke},
|
||||||
{TEST_GROUP("rust_ffi"), test_rust_ffi},
|
{TEST_GROUP("rust_ffi"), test_rust_ffi},
|
||||||
|
|
15
src/io.cpp
15
src/io.cpp
|
@ -410,6 +410,21 @@ std::unique_ptr<io_streams_t> make_null_io_streams_ffi() {
|
||||||
return std::make_unique<io_streams_t>(*null, *null);
|
return std::make_unique<io_streams_t>(*null, *null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<io_streams_t> make_test_io_streams_ffi() {
|
||||||
|
// Temporary test helper.
|
||||||
|
auto streams = std::make_unique<owning_io_streams_t>();
|
||||||
|
streams->stdin_is_directly_redirected = false; // read from argv instead of stdin
|
||||||
|
return streams;
|
||||||
|
}
|
||||||
|
|
||||||
|
wcstring get_test_output_ffi(const io_streams_t &streams) {
|
||||||
|
string_output_stream_t *out = static_cast<string_output_stream_t *>(&streams.out);
|
||||||
|
if (out == nullptr) {
|
||||||
|
return wcstring();
|
||||||
|
}
|
||||||
|
return out->contents();
|
||||||
|
}
|
||||||
|
|
||||||
bool string_output_stream_t::append(const wchar_t *s, size_t amt) {
|
bool string_output_stream_t::append(const wchar_t *s, size_t amt) {
|
||||||
contents_.append(s, amt);
|
contents_.append(s, amt);
|
||||||
return true;
|
return true;
|
||||||
|
|
9
src/io.h
9
src/io.h
|
@ -506,6 +506,7 @@ struct io_streams_t : noncopyable_t {
|
||||||
std::shared_ptr<job_group_t> job_group{};
|
std::shared_ptr<job_group_t> job_group{};
|
||||||
|
|
||||||
io_streams_t(output_stream_t &out, output_stream_t &err) : out(out), err(err) {}
|
io_streams_t(output_stream_t &out, output_stream_t &err) : out(out), err(err) {}
|
||||||
|
virtual ~io_streams_t() = default;
|
||||||
|
|
||||||
/// autocxx junk.
|
/// autocxx junk.
|
||||||
output_stream_t &get_out() { return out; };
|
output_stream_t &get_out() { return out; };
|
||||||
|
@ -518,6 +519,14 @@ struct io_streams_t : noncopyable_t {
|
||||||
};
|
};
|
||||||
|
|
||||||
/// FFI helper.
|
/// FFI helper.
|
||||||
|
struct owning_io_streams_t : io_streams_t {
|
||||||
|
string_output_stream_t out_storage;
|
||||||
|
null_output_stream_t err_storage;
|
||||||
|
owning_io_streams_t() : io_streams_t(out_storage, err_storage) {}
|
||||||
|
};
|
||||||
|
|
||||||
std::unique_ptr<io_streams_t> make_null_io_streams_ffi();
|
std::unique_ptr<io_streams_t> make_null_io_streams_ffi();
|
||||||
|
std::unique_ptr<io_streams_t> make_test_io_streams_ffi();
|
||||||
|
wcstring get_test_output_ffi(const io_streams_t &streams);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
316
src/re.cpp
316
src/re.cpp
|
@ -1,316 +0,0 @@
|
||||||
#include "config.h" // IWYU pragma: keep
|
|
||||||
|
|
||||||
#include "re.h"
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <cstdint>
|
|
||||||
|
|
||||||
#include "flog.h"
|
|
||||||
|
|
||||||
#define PCRE2_CODE_UNIT_WIDTH WCHAR_T_BITS
|
|
||||||
#ifdef _WIN32
|
|
||||||
#define PCRE2_STATIC
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "pcre2.h"
|
|
||||||
|
|
||||||
using namespace re;
|
|
||||||
using namespace re::adapters;
|
|
||||||
|
|
||||||
void bytecode_deleter_t::operator()(const void *ptr) {
|
|
||||||
if (ptr) {
|
|
||||||
pcre2_code_free(static_cast<pcre2_code *>(const_cast<void *>(ptr)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void match_data_deleter_t::operator()(void *ptr) {
|
|
||||||
if (ptr) {
|
|
||||||
pcre2_match_data_free(static_cast<pcre2_match_data *>(ptr));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get underlying pcre2_code from a bytecode_ptr_t.
|
|
||||||
const pcre2_code *get_code(const bytecode_ptr_t &ptr) {
|
|
||||||
assert(ptr && "Null pointer");
|
|
||||||
return static_cast<const pcre2_code *>(ptr.get());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get underlying match_data_t.
|
|
||||||
pcre2_match_data *get_md(const match_data_ptr_t &ptr) {
|
|
||||||
assert(ptr && "Null pointer");
|
|
||||||
return static_cast<pcre2_match_data *>(ptr.get());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert a wcstring to a PCRE2_SPTR.
|
|
||||||
PCRE2_SPTR to_sptr(const wcstring &str) { return reinterpret_cast<PCRE2_SPTR>(str.c_str()); }
|
|
||||||
|
|
||||||
/// \return a message for an error code.
|
|
||||||
static wcstring message_for_code(error_code_t code) {
|
|
||||||
wchar_t buf[128] = {};
|
|
||||||
pcre2_get_error_message(code, reinterpret_cast<PCRE2_UCHAR *>(buf),
|
|
||||||
sizeof(buf) / sizeof(wchar_t));
|
|
||||||
return buf;
|
|
||||||
}
|
|
||||||
|
|
||||||
maybe_t<regex_t> regex_t::try_compile(const wcstring &pattern, const flags_t &flags,
|
|
||||||
re_error_t *error) {
|
|
||||||
// Disable some sequences that can lead to security problems.
|
|
||||||
uint32_t options = PCRE2_NEVER_UTF;
|
|
||||||
#if PCRE2_CODE_UNIT_WIDTH < 32
|
|
||||||
options |= PCRE2_NEVER_BACKSLASH_C;
|
|
||||||
#endif
|
|
||||||
if (flags.icase) options |= PCRE2_CASELESS;
|
|
||||||
|
|
||||||
error_code_t err_code = 0;
|
|
||||||
PCRE2_SIZE err_offset = 0;
|
|
||||||
pcre2_code *code =
|
|
||||||
pcre2_compile(to_sptr(pattern), pattern.size(), options, &err_code, &err_offset, nullptr);
|
|
||||||
if (!code) {
|
|
||||||
if (error) {
|
|
||||||
error->code = err_code;
|
|
||||||
error->offset = err_offset;
|
|
||||||
}
|
|
||||||
return none();
|
|
||||||
}
|
|
||||||
return regex_t{bytecode_ptr_t(code)};
|
|
||||||
}
|
|
||||||
|
|
||||||
match_data_t regex_t::prepare() const {
|
|
||||||
pcre2_match_data *md = pcre2_match_data_create_from_pattern(get_code(code_), nullptr);
|
|
||||||
// Bogus assertion for memory exhaustion.
|
|
||||||
if (unlikely(!md)) {
|
|
||||||
DIE("Out of memory");
|
|
||||||
}
|
|
||||||
return match_data_t{match_data_ptr_t(static_cast<void *>(md))};
|
|
||||||
}
|
|
||||||
|
|
||||||
void match_data_t::reset() {
|
|
||||||
start_offset = 0;
|
|
||||||
max_capture = 0;
|
|
||||||
last_empty = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
maybe_t<match_range_t> regex_t::match(match_data_t &md, const wcstring &subject) const {
|
|
||||||
pcre2_match_data *const match_data = get_md(md.data);
|
|
||||||
assert(match_data && "Invalid match data");
|
|
||||||
|
|
||||||
// Handle exhausted matches.
|
|
||||||
if (md.start_offset > subject.size() || (md.last_empty && md.start_offset == subject.size())) {
|
|
||||||
md.max_capture = 0;
|
|
||||||
return none();
|
|
||||||
}
|
|
||||||
PCRE2_SIZE start_offset = md.start_offset;
|
|
||||||
|
|
||||||
// See pcre2demo.c for an explanation of this logic.
|
|
||||||
uint32_t options = md.last_empty ? PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED : 0;
|
|
||||||
error_code_t code = pcre2_match(get_code(code_), to_sptr(subject), subject.size(), start_offset,
|
|
||||||
options, match_data, nullptr);
|
|
||||||
if (code == PCRE2_ERROR_NOMATCH && !md.last_empty) {
|
|
||||||
// Failed to match.
|
|
||||||
md.start_offset = subject.size();
|
|
||||||
md.max_capture = 0;
|
|
||||||
return none();
|
|
||||||
} else if (code == PCRE2_ERROR_NOMATCH && md.last_empty) {
|
|
||||||
// Failed to find a non-empty-string match at a point where there was a previous
|
|
||||||
// empty-string match. Advance by one character and try again.
|
|
||||||
md.start_offset += 1;
|
|
||||||
md.last_empty = false;
|
|
||||||
return this->match(md, subject);
|
|
||||||
} else if (code < 0) {
|
|
||||||
FLOG(error, "pcre2_match unexpected error:", message_for_code(code));
|
|
||||||
return none();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Match succeeded.
|
|
||||||
// Start at end of previous match, marking if it was empty.
|
|
||||||
const auto *ovector = pcre2_get_ovector_pointer(match_data);
|
|
||||||
md.start_offset = ovector[1];
|
|
||||||
md.max_capture = static_cast<size_t>(code);
|
|
||||||
md.last_empty = ovector[0] == ovector[1];
|
|
||||||
return match_range_t{ovector[0], ovector[1]};
|
|
||||||
}
|
|
||||||
|
|
||||||
maybe_t<match_range_t> regex_t::match(const wcstring &subject) const {
|
|
||||||
match_data_t md = this->prepare();
|
|
||||||
return this->match(md, subject);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool regex_t::matches_ffi(const wcstring &subject) const {
|
|
||||||
return this->match(subject).has_value();
|
|
||||||
}
|
|
||||||
|
|
||||||
maybe_t<match_range_t> regex_t::group(const match_data_t &md, size_t group_idx) const {
|
|
||||||
if (group_idx >= md.max_capture || group_idx >= pcre2_get_ovector_count(get_md(md.data))) {
|
|
||||||
return none();
|
|
||||||
}
|
|
||||||
|
|
||||||
const PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(get_md(md.data));
|
|
||||||
PCRE2_SIZE start = ovector[2 * group_idx];
|
|
||||||
PCRE2_SIZE end = ovector[2 * group_idx + 1];
|
|
||||||
if (start == PCRE2_UNSET || end == PCRE2_UNSET) {
|
|
||||||
return none();
|
|
||||||
}
|
|
||||||
// From PCRE2 docs: "Note that when a pattern such as (?=ab\K) matches, the reported start of
|
|
||||||
// the match can be greater than the end of the match."
|
|
||||||
// Saturate the end.
|
|
||||||
end = std::max(start, end);
|
|
||||||
return match_range_t{start, end};
|
|
||||||
}
|
|
||||||
|
|
||||||
maybe_t<match_range_t> regex_t::group(const match_data_t &match_data, const wcstring &name) const {
|
|
||||||
const auto *pcname = to_sptr(name);
|
|
||||||
// Beware, pcre2_substring_copy_byname and pcre2_substring_copy_bynumber both have a bug
|
|
||||||
// on at least one Ubuntu (running PCRE2) where it outputs garbage for the first character.
|
|
||||||
// Read out from the ovector directly.
|
|
||||||
int num = pcre2_substring_number_from_name(get_code(code_), pcname);
|
|
||||||
if (num <= 0) {
|
|
||||||
return none();
|
|
||||||
}
|
|
||||||
return this->group(match_data, static_cast<size_t>(num));
|
|
||||||
}
|
|
||||||
|
|
||||||
static maybe_t<wcstring> range_to_substr(const wcstring &subject, maybe_t<match_range_t> range) {
|
|
||||||
if (!range) {
|
|
||||||
return none();
|
|
||||||
}
|
|
||||||
assert(range->begin <= range->end && range->end <= subject.size() && "Invalid range");
|
|
||||||
return subject.substr(range->begin, range->end - range->begin);
|
|
||||||
}
|
|
||||||
|
|
||||||
maybe_t<wcstring> regex_t::substring_for_group(const match_data_t &md, size_t group_idx,
|
|
||||||
const wcstring &subject) const {
|
|
||||||
return range_to_substr(subject, this->group(md, group_idx));
|
|
||||||
}
|
|
||||||
|
|
||||||
maybe_t<wcstring> regex_t::substring_for_group(const match_data_t &md, const wcstring &name,
|
|
||||||
const wcstring &subject) const {
|
|
||||||
return range_to_substr(subject, this->group(md, name));
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t regex_t::capture_group_count() const {
|
|
||||||
uint32_t count{};
|
|
||||||
pcre2_pattern_info(get_code(code_), PCRE2_INFO_CAPTURECOUNT, &count);
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<wcstring> regex_t::capture_group_names() const {
|
|
||||||
PCRE2_SPTR name_table{};
|
|
||||||
uint32_t name_entry_size{};
|
|
||||||
uint32_t name_count{};
|
|
||||||
|
|
||||||
const auto *code = get_code(code_);
|
|
||||||
pcre2_pattern_info(code, PCRE2_INFO_NAMETABLE, &name_table);
|
|
||||||
pcre2_pattern_info(code, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size);
|
|
||||||
pcre2_pattern_info(code, PCRE2_INFO_NAMECOUNT, &name_count);
|
|
||||||
|
|
||||||
struct name_table_entry_t {
|
|
||||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
|
||||||
uint8_t match_index_msb;
|
|
||||||
uint8_t match_index_lsb;
|
|
||||||
#if CHAR_BIT == PCRE2_CODE_UNIT_WIDTH
|
|
||||||
char name[];
|
|
||||||
#else
|
|
||||||
char8_t name[];
|
|
||||||
#endif
|
|
||||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
|
||||||
uint16_t match_index;
|
|
||||||
#if WCHAR_T_BITS == PCRE2_CODE_UNIT_WIDTH
|
|
||||||
wchar_t name[];
|
|
||||||
#else
|
|
||||||
char16_t name[];
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
uint32_t match_index;
|
|
||||||
#if WCHAR_T_BITS == PCRE2_CODE_UNIT_WIDTH
|
|
||||||
wchar_t name[];
|
|
||||||
#else
|
|
||||||
char32_t name[];
|
|
||||||
#endif // WCHAR_T_BITS
|
|
||||||
#endif // PCRE2_CODE_UNIT_WIDTH
|
|
||||||
};
|
|
||||||
|
|
||||||
const auto *names = reinterpret_cast<const name_table_entry_t *>(name_table);
|
|
||||||
std::vector<wcstring> result;
|
|
||||||
result.reserve(name_count);
|
|
||||||
for (uint32_t i = 0; i < name_count; ++i) {
|
|
||||||
const auto &name_entry = names[i * name_entry_size];
|
|
||||||
result.emplace_back(name_entry.name);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
maybe_t<wcstring> regex_t::substitute(const wcstring &subject, const wcstring &replacement,
|
|
||||||
sub_flags_t flags, size_t start_idx, re_error_t *out_error,
|
|
||||||
int *out_repl_count) const {
|
|
||||||
constexpr size_t stack_bufflen = 256;
|
|
||||||
wchar_t buffer[stack_bufflen];
|
|
||||||
|
|
||||||
// SUBSTITUTE_GLOBAL means more than one substitution happens.
|
|
||||||
uint32_t options = PCRE2_SUBSTITUTE_UNSET_EMPTY // don't error on unmatched
|
|
||||||
| PCRE2_SUBSTITUTE_OVERFLOW_LENGTH // return required length on overflow
|
|
||||||
| (flags.global ? PCRE2_SUBSTITUTE_GLOBAL : 0) // replace multiple
|
|
||||||
| (flags.extended ? PCRE2_SUBSTITUTE_EXTENDED : 0) // backslash escapes
|
|
||||||
;
|
|
||||||
size_t bufflen = stack_bufflen;
|
|
||||||
error_code_t rc =
|
|
||||||
pcre2_substitute(get_code(code_), to_sptr(subject), subject.size(), start_idx, options,
|
|
||||||
nullptr /* match_data */, nullptr /* context */, to_sptr(replacement),
|
|
||||||
// (not using UCHAR32 here for cygwin's benefit)
|
|
||||||
replacement.size(), reinterpret_cast<PCRE2_UCHAR *>(buffer), &bufflen);
|
|
||||||
|
|
||||||
if (out_repl_count) {
|
|
||||||
*out_repl_count = std::max(rc, 0);
|
|
||||||
}
|
|
||||||
if (rc == 0) {
|
|
||||||
// No replacements.
|
|
||||||
return subject;
|
|
||||||
} else if (rc > 0) {
|
|
||||||
// Some replacement which fit in our buffer.
|
|
||||||
// Note we may have had embedded nuls.
|
|
||||||
assert(bufflen <= stack_bufflen && "bufflen should not exceed buffer size");
|
|
||||||
return wcstring(buffer, bufflen);
|
|
||||||
} else if (rc == PCRE2_ERROR_NOMEMORY) {
|
|
||||||
// bufflen has been updated to required buffer size.
|
|
||||||
// Try again with a real string.
|
|
||||||
wcstring res(bufflen, L'\0');
|
|
||||||
rc = pcre2_substitute(get_code(code_), to_sptr(subject), subject.size(), start_idx, options,
|
|
||||||
nullptr /* match_data */, nullptr /* context */, to_sptr(replacement),
|
|
||||||
replacement.size(), reinterpret_cast<PCRE2_UCHAR *>(&res[0]),
|
|
||||||
&bufflen);
|
|
||||||
if (out_repl_count) {
|
|
||||||
*out_repl_count = std::max(rc, 0);
|
|
||||||
}
|
|
||||||
if (rc >= 0) {
|
|
||||||
res.resize(bufflen);
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Some error. The offset may be returned in the bufflen.
|
|
||||||
if (out_error) {
|
|
||||||
out_error->code = rc;
|
|
||||||
out_error->offset = (bufflen == PCRE2_UNSET ? 0 : bufflen);
|
|
||||||
}
|
|
||||||
return none();
|
|
||||||
}
|
|
||||||
|
|
||||||
regex_t::regex_t(adapters::bytecode_ptr_t &&code) : code_(std::move(code)) {
|
|
||||||
assert(code_ && "Null impl");
|
|
||||||
}
|
|
||||||
|
|
||||||
wcstring re_error_t::message() const { return message_for_code(this->code); }
|
|
||||||
|
|
||||||
re::regex_result_ffi re::try_compile_ffi(const wcstring &pattern, const flags_t &flags) {
|
|
||||||
re_error_t error{};
|
|
||||||
auto regex = regex_t::try_compile(pattern, flags, &error);
|
|
||||||
|
|
||||||
if (regex) {
|
|
||||||
return regex_result_ffi{std::make_unique<re::regex_t>(regex.acquire()), error};
|
|
||||||
}
|
|
||||||
|
|
||||||
return re::regex_result_ffi{nullptr, error};
|
|
||||||
}
|
|
||||||
|
|
||||||
bool re::regex_result_ffi::has_error() const { return error.code != 0; }
|
|
||||||
re::re_error_t re::regex_result_ffi::get_error() const { return error; };
|
|
||||||
|
|
||||||
std::unique_ptr<re::regex_t> re::regex_result_ffi::get_regex() { return std::move(regex); }
|
|
166
src/re.h
166
src/re.h
|
@ -1,166 +0,0 @@
|
||||||
// Wraps PCRE2.
|
|
||||||
#ifndef FISH_RE_H
|
|
||||||
#define FISH_RE_H
|
|
||||||
|
|
||||||
#include <cstddef>
|
|
||||||
#include <memory>
|
|
||||||
#include <utility>
|
|
||||||
|
|
||||||
#include "common.h"
|
|
||||||
#include "maybe.h"
|
|
||||||
|
|
||||||
namespace re {
|
|
||||||
|
|
||||||
namespace adapters {
|
|
||||||
// Adapter to store pcre2_code in unique_ptr.
|
|
||||||
struct bytecode_deleter_t {
|
|
||||||
void operator()(const void *);
|
|
||||||
};
|
|
||||||
using bytecode_ptr_t = std::unique_ptr<const void, bytecode_deleter_t>;
|
|
||||||
|
|
||||||
// Adapter to store pcre2_match_data in unique_ptr.
|
|
||||||
struct match_data_deleter_t {
|
|
||||||
void operator()(void *);
|
|
||||||
};
|
|
||||||
using match_data_ptr_t = std::unique_ptr<void, match_data_deleter_t>;
|
|
||||||
} // namespace adapters
|
|
||||||
|
|
||||||
/// Error code type alias.
|
|
||||||
using error_code_t = int;
|
|
||||||
|
|
||||||
/// Flags for compiling a regex.
|
|
||||||
struct flags_t {
|
|
||||||
bool icase{}; // ignore case?
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Flags for substituting a regex.
|
|
||||||
struct sub_flags_t {
|
|
||||||
bool global{}; // perform multiple substitutions?
|
|
||||||
bool extended{}; // apply PCRE2 extended backslash escapes?
|
|
||||||
};
|
|
||||||
|
|
||||||
/// A type wrapping up error information.
|
|
||||||
/// Beware, GNU defines error_t; hence we use an re_ prefix again.
|
|
||||||
struct re_error_t {
|
|
||||||
error_code_t code{}; // error code
|
|
||||||
size_t offset{}; // offset of the error in the pattern
|
|
||||||
|
|
||||||
/// \return our error message.
|
|
||||||
wcstring message() const;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// A half-open range of a subject which matched.
|
|
||||||
struct match_range_t {
|
|
||||||
size_t begin;
|
|
||||||
size_t end;
|
|
||||||
|
|
||||||
bool operator==(match_range_t rhs) const { return begin == rhs.begin && end == rhs.end; }
|
|
||||||
bool operator!=(match_range_t rhs) const { return !(*this == rhs); }
|
|
||||||
};
|
|
||||||
|
|
||||||
/// A match data is the "stateful" object, storing string indices for where to start the next match,
|
|
||||||
/// capture results, etc. Create one via regex_t::prepare(). These are tied to the regex which
|
|
||||||
/// created them.
|
|
||||||
class match_data_t : noncopyable_t {
|
|
||||||
public:
|
|
||||||
match_data_t(match_data_t &&) = default;
|
|
||||||
match_data_t &operator=(match_data_t &&) = default;
|
|
||||||
~match_data_t() = default;
|
|
||||||
|
|
||||||
/// \return a "count" of the number of capture groups which matched.
|
|
||||||
/// This is really one more than the highest matching group.
|
|
||||||
/// 0 is considered a "group" for the entire match, so this will always return at least 1 for a
|
|
||||||
/// successful match.
|
|
||||||
size_t matched_capture_group_count() const { return max_capture; }
|
|
||||||
|
|
||||||
/// Reset this data, as if this were freshly issued by a call to prepare().
|
|
||||||
void reset();
|
|
||||||
|
|
||||||
private:
|
|
||||||
explicit match_data_t(adapters::match_data_ptr_t &&data) : data(std::move(data)) {}
|
|
||||||
|
|
||||||
// Next start position. This may exceed the needle length, which indicates exhaustion.
|
|
||||||
size_t start_offset{0};
|
|
||||||
|
|
||||||
// One more than the highest numbered capturing pair that was set (e.g. 1 if no captures).
|
|
||||||
size_t max_capture{0};
|
|
||||||
|
|
||||||
// If set, the last match was empty.
|
|
||||||
bool last_empty{false};
|
|
||||||
|
|
||||||
// Underlying pcre2_match_data.
|
|
||||||
adapters::match_data_ptr_t data{};
|
|
||||||
|
|
||||||
friend class regex_t;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// The compiled form of a PCRE2 regex.
|
|
||||||
/// This is thread safe.
|
|
||||||
class regex_t : noncopyable_t {
|
|
||||||
public:
|
|
||||||
/// Compile a pattern into a regex. \return the resulting regex, or none on error.
|
|
||||||
/// If \p error is not null, populate it with the error information.
|
|
||||||
static maybe_t<regex_t> try_compile(const wcstring &pattern, const flags_t &flags = flags_t{},
|
|
||||||
re_error_t *out_error = nullptr);
|
|
||||||
|
|
||||||
/// Create a match data for this regex.
|
|
||||||
/// The result is tied to this regex; it should not be used for others.
|
|
||||||
match_data_t prepare() const;
|
|
||||||
|
|
||||||
/// Match against a string \p subject, populating \p md.
|
|
||||||
/// \return a range on a successful match, none on no match.
|
|
||||||
maybe_t<match_range_t> match(match_data_t &md, const wcstring &subject) const;
|
|
||||||
|
|
||||||
/// A convenience function which calls prepare() for you.
|
|
||||||
maybe_t<match_range_t> match(const wcstring &subject) const;
|
|
||||||
|
|
||||||
/// A convenience function which calls prepare() for you.
|
|
||||||
bool matches_ffi(const wcstring &subject) const;
|
|
||||||
|
|
||||||
/// \return the matched range for an indexed or named capture group. 0 means the entire match.
|
|
||||||
maybe_t<match_range_t> group(const match_data_t &md, size_t group_idx) const;
|
|
||||||
maybe_t<match_range_t> group(const match_data_t &md, const wcstring &name) const;
|
|
||||||
|
|
||||||
/// \return the matched substring for a capture group.
|
|
||||||
maybe_t<wcstring> substring_for_group(const match_data_t &md, size_t group_idx,
|
|
||||||
const wcstring &subject) const;
|
|
||||||
maybe_t<wcstring> substring_for_group(const match_data_t &md, const wcstring &name,
|
|
||||||
const wcstring &subject) const;
|
|
||||||
|
|
||||||
/// \return the number of indexed capture groups.
|
|
||||||
size_t capture_group_count() const;
|
|
||||||
|
|
||||||
/// \return the list of capture group names.
|
|
||||||
/// Note PCRE provides these in sorted order, not specification order.
|
|
||||||
std::vector<wcstring> capture_group_names() const;
|
|
||||||
|
|
||||||
/// Search \p subject for matches for this regex, starting at \p start_idx, and replacing them
|
|
||||||
/// with \p replacement. If \p repl_count is not null, populate it with the number of
|
|
||||||
/// replacements which occurred. This may fail for e.g. bad escapes in the replacement string.
|
|
||||||
maybe_t<wcstring> substitute(const wcstring &subject, const wcstring &replacement,
|
|
||||||
sub_flags_t flags, size_t start_idx = 0,
|
|
||||||
re_error_t *out_error = nullptr,
|
|
||||||
int *out_repl_count = nullptr) const;
|
|
||||||
|
|
||||||
regex_t(regex_t &&) = default;
|
|
||||||
regex_t &operator=(regex_t &&) = default;
|
|
||||||
~regex_t() = default;
|
|
||||||
|
|
||||||
private:
|
|
||||||
regex_t(adapters::bytecode_ptr_t &&);
|
|
||||||
adapters::bytecode_ptr_t code_;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct regex_result_ffi {
|
|
||||||
std::unique_ptr<re::regex_t> regex;
|
|
||||||
re::re_error_t error;
|
|
||||||
|
|
||||||
bool has_error() const;
|
|
||||||
std::unique_ptr<re::regex_t> get_regex();
|
|
||||||
re::re_error_t get_error() const;
|
|
||||||
};
|
|
||||||
|
|
||||||
regex_result_ffi try_compile_ffi(const wcstring &pattern, const flags_t &flags);
|
|
||||||
|
|
||||||
} // namespace re
|
|
||||||
#endif
|
|
|
@ -266,6 +266,11 @@ maybe_t<size_t> escape_code_length(const wchar_t *code) {
|
||||||
return found ? maybe_t<size_t>{esc_seq_len} : none();
|
return found ? maybe_t<size_t>{esc_seq_len} : none();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
long escape_code_length_ffi(const wchar_t *code) {
|
||||||
|
auto found = escape_code_length(code);
|
||||||
|
return found.has_value() ? (long)*found : -1;
|
||||||
|
}
|
||||||
|
|
||||||
size_t layout_cache_t::escape_code_length(const wchar_t *code) {
|
size_t layout_cache_t::escape_code_length(const wchar_t *code) {
|
||||||
assert(code != nullptr);
|
assert(code != nullptr);
|
||||||
if (*code != L'\x1B') return 0;
|
if (*code != L'\x1B') return 0;
|
||||||
|
|
|
@ -332,6 +332,8 @@ class layout_cache_t : noncopyable_t {
|
||||||
};
|
};
|
||||||
|
|
||||||
maybe_t<size_t> escape_code_length(const wchar_t *code);
|
maybe_t<size_t> escape_code_length(const wchar_t *code);
|
||||||
|
// Always return a value, by moving checking of sequence start to the caller.
|
||||||
|
long escape_code_length_ffi(const wchar_t *code);
|
||||||
|
|
||||||
void screen_set_midnight_commander_hack();
|
void screen_set_midnight_commander_hack();
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -199,3 +199,8 @@ abbr --add --regex foo --function foo
|
||||||
# CHECKERR: abbr --add: Name cannot be empty
|
# CHECKERR: abbr --add: Name cannot be empty
|
||||||
echo foo
|
echo foo
|
||||||
# CHECK: foo
|
# CHECK: foo
|
||||||
|
|
||||||
|
abbr --add regex_name --regex '(*UTF).*' bar
|
||||||
|
# CHECKERR: abbr: Regular expression compile error: using UTF is disabled by the application
|
||||||
|
# CHECKERR: abbr: (*UTF).*
|
||||||
|
# CHECKERR: abbr: ^
|
||||||
|
|
|
@ -45,9 +45,13 @@ string length -q ""; and echo not zero length; or echo zero length
|
||||||
string pad foo
|
string pad foo
|
||||||
# CHECK: foo
|
# CHECK: foo
|
||||||
|
|
||||||
string pad -r -w 7 -c - foo
|
string pad -r -w 7 --chars - foo
|
||||||
# CHECK: foo----
|
# CHECK: foo----
|
||||||
|
|
||||||
|
# might overflow when converting sign
|
||||||
|
string sub --start -9223372036854775808 abc
|
||||||
|
# CHECK: abc
|
||||||
|
|
||||||
string pad --width 7 -c '=' foo
|
string pad --width 7 -c '=' foo
|
||||||
# CHECK: ====foo
|
# CHECK: ====foo
|
||||||
|
|
||||||
|
@ -175,6 +179,10 @@ string split "" abc
|
||||||
# CHECK: b
|
# CHECK: b
|
||||||
# CHECK: c
|
# CHECK: c
|
||||||
|
|
||||||
|
string split --max 1 --right 12 "AB12CD"
|
||||||
|
# CHECK: AB
|
||||||
|
# CHECK: CD
|
||||||
|
|
||||||
string split --fields=2 "" abc
|
string split --fields=2 "" abc
|
||||||
# CHECK: b
|
# CHECK: b
|
||||||
|
|
||||||
|
@ -185,6 +193,39 @@ string split --fields=3,2 "" abc
|
||||||
string split --fields=2,9 "" abc; or echo "exit 1"
|
string split --fields=2,9 "" abc; or echo "exit 1"
|
||||||
# CHECK: exit 1
|
# CHECK: exit 1
|
||||||
|
|
||||||
|
string split --fields=2-3-,9 "" a
|
||||||
|
# CHECKERR: string split: 2-3-,9: invalid integer
|
||||||
|
|
||||||
|
string split --fields=1-99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 "" abc
|
||||||
|
# CHECKERR: string split: 1-99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999: invalid integer
|
||||||
|
|
||||||
|
string split --fields=99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-1 "" abc
|
||||||
|
# CHECKERR: string split: 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-1: invalid integer
|
||||||
|
|
||||||
|
string split --fields=1--2 "" b
|
||||||
|
# CHECKERR: string split: 1--2: invalid integer
|
||||||
|
|
||||||
|
string split --fields=0 "" c
|
||||||
|
# CHECKERR: string split: Invalid fields value '0'
|
||||||
|
|
||||||
|
string split --fields=99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 "" abc
|
||||||
|
# CHECKERR: string split: 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999: invalid integer
|
||||||
|
|
||||||
|
string split --fields=1-0 "" d
|
||||||
|
# CHECKERR: string split: Invalid range value for field '1-0'
|
||||||
|
|
||||||
|
string split --fields=0-1 "" e
|
||||||
|
# CHECKERR: string split: Invalid range value for field '0-1'
|
||||||
|
|
||||||
|
string split --fields=-1 "" f
|
||||||
|
# CHECKERR: string split: -1: invalid integer
|
||||||
|
|
||||||
|
string split --fields=1a "" g
|
||||||
|
# CHECKERR: string split: 1a: invalid integer
|
||||||
|
|
||||||
|
string split --fields=a "" h
|
||||||
|
# CHECKERR: string split: a: invalid integer
|
||||||
|
|
||||||
string split --fields=1-3,5,9-7 "" 123456789
|
string split --fields=1-3,5,9-7 "" 123456789
|
||||||
# CHECK: 1
|
# CHECK: 1
|
||||||
# CHECK: 2
|
# CHECK: 2
|
||||||
|
@ -359,6 +400,14 @@ string replace -r "\s*newline\s*" "\n" "put a newline here"
|
||||||
string replace -r -a "(\w)" "\$1\$1" ab
|
string replace -r -a "(\w)" "\$1\$1" ab
|
||||||
# CHECK: aabb
|
# CHECK: aabb
|
||||||
|
|
||||||
|
echo a | string replace b c -q
|
||||||
|
or echo No replace fails
|
||||||
|
# CHECK: No replace fails
|
||||||
|
|
||||||
|
echo a | string replace -r b c -q
|
||||||
|
or echo No replace regex fails
|
||||||
|
# CHECK: No replace regex fails
|
||||||
|
|
||||||
string replace --filter x X abc axc x def jkx
|
string replace --filter x X abc axc x def jkx
|
||||||
or echo Unexpected exit status at line (status --current-line-number)
|
or echo Unexpected exit status at line (status --current-line-number)
|
||||||
# CHECK: aXc
|
# CHECK: aXc
|
||||||
|
@ -468,6 +517,22 @@ string repeat -n 5 --max 4 123 '' 789
|
||||||
# CHECK:
|
# CHECK:
|
||||||
# CHECK: 7897
|
# CHECK: 7897
|
||||||
|
|
||||||
|
# FIXME: handle overflowing nicely
|
||||||
|
# overflow behaviour depends on 32 vs 64 bit
|
||||||
|
|
||||||
|
# count here is isize::MAX
|
||||||
|
# we store what to print as usize, so this will overflow
|
||||||
|
# but we limit it to less than whatever the overflow is
|
||||||
|
# so this should be fine
|
||||||
|
# string repeat -m1 -n 9223372036854775807 aa
|
||||||
|
# DONTCHECK: a
|
||||||
|
|
||||||
|
# count is here (i64::MAX + 1) / 2
|
||||||
|
# we end up overflowing, and the result is 0
|
||||||
|
# but this should work fine, as we limit it way before the overflow
|
||||||
|
# string repeat -m1 -n 4611686018427387904 aaaa
|
||||||
|
# DONTCHECK: a
|
||||||
|
|
||||||
# Historical string repeat behavior is no newline if no output.
|
# Historical string repeat behavior is no newline if no output.
|
||||||
echo -n before
|
echo -n before
|
||||||
string repeat -n 5 ''
|
string repeat -n 5 ''
|
||||||
|
@ -766,6 +831,18 @@ string match -qer asd asd
|
||||||
echo $status
|
echo $status
|
||||||
# CHECK: 0
|
# CHECK: 0
|
||||||
|
|
||||||
|
# should not be able to enable UTF mode
|
||||||
|
string match -r "(*UTF).*" "aaa"
|
||||||
|
# CHECKERR: string match: Regular expression compile error: using UTF is disabled by the application
|
||||||
|
# CHECKERR: string match: (*UTF).*
|
||||||
|
# CHECKERR: string match: ^
|
||||||
|
|
||||||
|
string replace -r "(*UTF).*" "aaa"
|
||||||
|
# CHECKERR: string replace: Regular expression compile error: using UTF is disabled by the application
|
||||||
|
# CHECKERR: string replace: (*UTF).*
|
||||||
|
# CHECKERR: string replace: ^
|
||||||
|
|
||||||
|
|
||||||
string match -eq asd asd
|
string match -eq asd asd
|
||||||
echo $status
|
echo $status
|
||||||
# CHECK: 0
|
# CHECK: 0
|
||||||
|
@ -832,6 +909,12 @@ echo "foo1x foo2x foo3x" | string match -arg 'foo(\d)x'
|
||||||
echo -n abc | string upper
|
echo -n abc | string upper
|
||||||
echo '<eol>'
|
echo '<eol>'
|
||||||
# CHECK: ABC<eol>
|
# CHECK: ABC<eol>
|
||||||
|
|
||||||
|
# newline should not appear from nowhere when command does not split on newline
|
||||||
|
echo -n abc | string collect
|
||||||
|
echo '<eol>'
|
||||||
|
# CHECK: abc<eol>
|
||||||
|
|
||||||
printf \<
|
printf \<
|
||||||
printf my-password | string replace -ra . \*
|
printf my-password | string replace -ra . \*
|
||||||
printf \>\n
|
printf \>\n
|
||||||
|
|
Loading…
Reference in a new issue