From f52569a80085abd4ac5bd97e3154dbf1c25956a0 Mon Sep 17 00:00:00 2001 From: Neeraj Jaiswal Date: Fri, 24 Feb 2023 21:30:05 +0530 Subject: [PATCH] abbr: port abbreviation and abbr builtin to rust --- CMakeLists.txt | 4 +- fish-rust/build.rs | 1 + fish-rust/src/abbrs.rs | 470 ++++++++++++++++++++++++ fish-rust/src/builtins/abbr.rs | 604 +++++++++++++++++++++++++++++++ fish-rust/src/builtins/mod.rs | 1 + fish-rust/src/builtins/shared.rs | 1 + fish-rust/src/common.rs | 19 + fish-rust/src/ffi.rs | 5 + fish-rust/src/lib.rs | 3 + fish-rust/src/parse_constants.rs | 2 +- src/abbrs.cpp | 134 ------- src/abbrs.h | 138 +------ src/builtin.cpp | 6 +- src/builtin.h | 1 + src/builtins/abbr.cpp | 435 ---------------------- src/builtins/abbr.h | 11 - src/complete.cpp | 10 +- src/env.cpp | 17 +- src/fish_tests.cpp | 32 +- src/highlight.cpp | 3 +- src/parse_constants.h | 3 +- src/parser.h | 2 + src/reader.cpp | 19 +- 23 files changed, 1166 insertions(+), 755 deletions(-) create mode 100644 fish-rust/src/abbrs.rs create mode 100644 fish-rust/src/builtins/abbr.rs delete mode 100644 src/abbrs.cpp delete mode 100644 src/builtins/abbr.cpp delete mode 100644 src/builtins/abbr.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b1dc2e5d..39a770d2b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,7 +99,7 @@ endif() # List of sources for builtin functions. set(FISH_BUILTIN_SRCS - src/builtin.cpp src/builtins/abbr.cpp src/builtins/argparse.cpp + src/builtin.cpp src/builtins/argparse.cpp src/builtins/bg.cpp src/builtins/bind.cpp src/builtins/block.cpp src/builtins/builtin.cpp src/builtins/cd.cpp src/builtins/command.cpp src/builtins/commandline.cpp src/builtins/complete.cpp @@ -115,7 +115,7 @@ set(FISH_BUILTIN_SRCS # List of other sources. set(FISH_SRCS - src/ast.cpp src/abbrs.cpp src/autoload.cpp src/color.cpp src/common.cpp src/complete.cpp + src/ast.cpp src/autoload.cpp src/color.cpp src/common.cpp src/complete.cpp src/env.cpp src/env_dispatch.cpp src/env_universal_common.cpp src/event.cpp src/exec.cpp src/expand.cpp src/fallback.cpp src/fish_version.cpp src/flog.cpp src/function.cpp src/highlight.cpp diff --git a/fish-rust/build.rs b/fish-rust/build.rs index 907b3e0cc..95795f251 100644 --- a/fish-rust/build.rs +++ b/fish-rust/build.rs @@ -19,6 +19,7 @@ fn main() -> miette::Result<()> { // This allows "Rust to be used from C++" // This must come before autocxx so that cxx can emit its cxx.h header. let source_files = vec![ + "src/abbrs.rs", "src/fd_monitor.rs", "src/fd_readable_set.rs", "src/fds.rs", diff --git a/fish-rust/src/abbrs.rs b/fish-rust/src/abbrs.rs new file mode 100644 index 000000000..fdc83d226 --- /dev/null +++ b/fish-rust/src/abbrs.rs @@ -0,0 +1,470 @@ +#![allow(clippy::extra_unused_lifetimes, clippy::needless_lifetimes)] +use std::{ + collections::HashSet, + sync::{Arc, Mutex, MutexGuard}, +}; + +use crate::wchar::{wstr, WString}; +use crate::{ + wchar::L, + wchar_ffi::{WCharFromFFI, WCharToFFI}, +}; +use cxx::{CxxWString, UniquePtr}; +use once_cell::sync::Lazy; + +use crate::abbrs::abbrs_ffi::abbrs_replacer_t; +use crate::ffi::re::regex_t; +use crate::parse_constants::SourceRange; + +use self::abbrs_ffi::{abbreviation_t, abbrs_position_t, abbrs_replacement_t}; + +#[cxx::bridge] +mod abbrs_ffi { + extern "C++" { + include!("re.h"); + include!("parse_constants.h"); + + type SourceRange = crate::parse_constants::SourceRange; + } + + enum abbrs_position_t { + command, + anywhere, + } + + struct abbrs_replacer_t { + replacement: UniquePtr, + is_function: bool, + set_cursor_marker: UniquePtr, + has_cursor_marker: bool, + } + + struct abbrs_replacement_t { + range: SourceRange, + text: UniquePtr, + cursor: usize, + has_cursor: bool, + } + + struct abbreviation_t { + key: UniquePtr, + replacement: UniquePtr, + is_regex: bool, + } + + extern "Rust" { + type GlobalAbbrs<'a>; + + #[cxx_name = "abbrs_list"] + fn abbrs_list_ffi() -> Vec; + + #[cxx_name = "abbrs_match"] + fn abbrs_match_ffi(token: &CxxWString, position: abbrs_position_t) + -> Vec; + + #[cxx_name = "abbrs_has_match"] + fn abbrs_has_match_ffi(token: &CxxWString, position: abbrs_position_t) -> bool; + + #[cxx_name = "abbrs_replacement_from"] + fn abbrs_replacement_from_ffi( + range: SourceRange, + text: &CxxWString, + set_cursor_marker: &CxxWString, + has_cursor_marker: bool, + ) -> abbrs_replacement_t; + + #[cxx_name = "abbrs_get_set"] + unsafe fn abbrs_get_set_ffi<'a>() -> Box>; + unsafe fn add<'a>( + self: &mut GlobalAbbrs<'_>, + name: &CxxWString, + key: &CxxWString, + replacement: &CxxWString, + position: abbrs_position_t, + from_universal: bool, + ); + unsafe fn erase<'a>(self: &mut GlobalAbbrs<'_>, name: &CxxWString); + } +} + +static abbrs: Lazy>> = + Lazy::new(|| Arc::new(Mutex::new(Default::default()))); + +pub fn with_abbrs(cb: impl FnOnce(&AbbreviationSet) -> R) -> R { + let abbrs_g = abbrs.lock().unwrap(); + cb(&abbrs_g) +} + +pub fn with_abbrs_mut(cb: impl FnOnce(&mut AbbreviationSet) -> R) -> R { + let mut abbrs_g = abbrs.lock().unwrap(); + cb(&mut abbrs_g) +} + +/// Controls where in the command line abbreviations may expand. +#[derive(Debug, PartialEq, Clone, Copy)] +pub enum Position { + Command, // expand in command position + Anywhere, // expand in any token +} + +impl From for Position { + fn from(value: abbrs_position_t) -> Self { + match value { + abbrs_position_t::anywhere => Position::Anywhere, + abbrs_position_t::command => Position::Command, + _ => panic!("invalid abbrs_position_t"), + } + } +} + +#[derive(Debug)] +pub struct Abbreviation { + // Abbreviation name. This is unique within the abbreviation set. + // This is used as the token to match unless we have a regex. + pub name: WString, + + /// The key (recognized token) - either a literal or a regex pattern. + pub key: WString, + + /// If set, use this regex to recognize tokens. + /// If unset, the key is to be interpreted literally. + /// Note that the fish interface enforces that regexes match the entire token; + /// we accomplish this by surrounding the regex in ^ and $. + pub regex: Option>, + + /// Replacement string. + pub replacement: WString, + + /// If set, the replacement is a function name. + pub replacement_is_function: bool, + + /// Expansion position. + pub position: Position, + + /// If set, then move the cursor to the first instance of this string in the expansion. + pub set_cursor_marker: Option, + + /// Mark if we came from a universal variable. + pub from_universal: bool, +} + +impl Abbreviation { + // Construct from a name, a key which matches a token, a replacement token, a position, and + // whether we are derived from a universal variable. + pub fn new( + name: WString, + key: WString, + replacement: WString, + position: Position, + from_universal: bool, + ) -> Self { + Self { + name, + key, + regex: None, + replacement, + replacement_is_function: false, + position, + set_cursor_marker: None, + from_universal, + } + } + + // \return true if this is a regex abbreviation. + pub fn is_regex(&self) -> bool { + self.regex.is_some() + } + + // \return true if we match a token at a given position. + pub fn matches(&self, token: &wstr, position: Position) -> bool { + if !self.matches_position(position) { + return false; + } + self.regex + .as_ref() + .map(|r| r.matches_ffi(&token.to_ffi())) + .unwrap_or(self.key == token) + } + + // \return if we expand in a given position. + fn matches_position(&self, position: Position) -> bool { + return self.position == Position::Anywhere || self.position == position; + } +} + +/// The result of an abbreviation expansion. +pub struct Replacer { + /// The string to use to replace the incoming token, either literal or as a function name. + replacement: WString, + + /// If true, treat 'replacement' as the name of a function. + is_function: bool, + + /// If set, the cursor should be moved to the first instance of this string in the expansion. + set_cursor_marker: Option, +} + +impl From for abbrs_replacer_t { + fn from(value: Replacer) -> Self { + let has_cursor_marker = value.set_cursor_marker.is_some(); + Self { + replacement: value.replacement.to_ffi(), + is_function: value.is_function, + set_cursor_marker: value.set_cursor_marker.unwrap_or_default().to_ffi(), + has_cursor_marker, + } + } +} + +struct Replacement { + /// The original range of the token in the command line. + range: SourceRange, + + /// The string to replace with. + text: WString, + + /// The new cursor location, or none to use the default. + /// This is relative to the original range. + cursor: Option, +} + +impl Replacement { + /// Construct a replacement from a replacer. + /// The \p range is the range of the text matched by the replacer in the command line. + /// The text is passed in separately as it may be the output of the replacer's function. + fn from(range: SourceRange, mut text: WString, set_cursor_marker: Option) -> Self { + let mut cursor = None; + if let Some(set_cursor_marker) = set_cursor_marker { + let matched = text + .as_char_slice() + .windows(set_cursor_marker.len()) + .position(|w| w == set_cursor_marker.as_char_slice()); + + if let Some(start) = matched { + text.replace_range(start..(start + set_cursor_marker.len()), L!("")); + cursor = Some(start + range.start as usize) + } + } + Self { + range, + text, + cursor, + } + } +} + +#[derive(Default)] +pub struct AbbreviationSet { + /// List of abbreviations, in definition order. + abbrs: Vec, + + /// Set of used abbrevation names. + /// This is to avoid a linear scan when adding new abbreviations. + used_names: HashSet, +} + +impl AbbreviationSet { + /// \return the list of replacers for an input token, in priority order. + /// The \p position is given to describe where the token was found. + pub fn r#match(&self, token: &wstr, position: Position) -> Vec { + let mut result = vec![]; + + // Later abbreviations take precedence so walk backwards. + for abbr in self.abbrs.iter().rev() { + if abbr.matches(token, position) { + result.push(Replacer { + replacement: abbr.replacement.clone(), + is_function: abbr.replacement_is_function, + set_cursor_marker: abbr.set_cursor_marker.clone(), + }); + } + } + return result; + } + + /// \return whether we would have at least one replacer for a given token. + pub fn has_match(&self, token: &wstr, position: Position) -> bool { + self.abbrs.iter().any(|abbr| abbr.matches(token, position)) + } + + /// Add an abbreviation. Any abbreviation with the same name is replaced. + pub fn add(&mut self, abbr: Abbreviation) { + assert!(!abbr.name.is_empty(), "Invalid name"); + let inserted = self.used_names.insert(abbr.name.clone()); + if !inserted { + // Name was already used, do a linear scan to find it. + let index = self + .abbrs + .iter() + .position(|a| a.name == abbr.name) + .expect("Abbreviation not found though its name was present"); + + self.abbrs.remove(index); + } + self.abbrs.push(abbr); + } + + /// Rename an abbreviation. This asserts that the old name is used, and the new name is not; the + /// caller should check these beforehand with has_name(). + pub fn rename(&mut self, old_name: &wstr, new_name: &wstr) { + let erased = self.used_names.remove(old_name); + let inserted = self.used_names.insert(new_name.to_owned()); + assert!( + erased && inserted, + "Old name not found or new name already present" + ); + for abbr in self.abbrs.iter_mut() { + if abbr.name == old_name { + abbr.name = new_name.to_owned(); + break; + } + } + } + + /// Erase an abbreviation by name. + /// \return true if erased, false if not found. + pub fn erase(&mut self, name: &wstr) -> bool { + let erased = self.used_names.remove(name); + if !erased { + return false; + } + for (index, abbr) in self.abbrs.iter().enumerate().rev() { + if abbr.name == name { + self.abbrs.remove(index); + return true; + } + } + panic!("Unable to find named abbreviation"); + } + + /// \return true if we have an abbreviation with the given name. + pub fn has_name(&self, name: &wstr) -> bool { + self.used_names.contains(name) + } + + /// \return a reference to the abbreviation list. + pub fn list(&self) -> &[Abbreviation] { + &self.abbrs + } +} + +/// \return the list of replacers for an input token, in priority order, using the global set. +/// The \p position is given to describe where the token was found. +fn abbrs_match_ffi(token: &CxxWString, position: abbrs_position_t) -> Vec { + with_abbrs(|set| set.r#match(&token.from_ffi(), position.into())) + .into_iter() + .map(|r| r.into()) + .collect() +} + +fn abbrs_has_match_ffi(token: &CxxWString, position: abbrs_position_t) -> bool { + with_abbrs(|set| set.has_match(&token.from_ffi(), position.into())) +} + +fn abbrs_list_ffi() -> Vec { + with_abbrs(|set| -> Vec { + let list = set.list(); + let mut result = Vec::with_capacity(list.len()); + for abbr in list { + result.push(abbreviation_t { + key: abbr.key.to_ffi(), + replacement: abbr.replacement.to_ffi(), + is_regex: abbr.is_regex(), + }) + } + + result + }) +} + +fn abbrs_get_set_ffi<'a>() -> Box> { + let abbrs_g = abbrs.lock().unwrap(); + Box::new(GlobalAbbrs { g: abbrs_g }) +} + +fn abbrs_replacement_from_ffi( + range: SourceRange, + text: &CxxWString, + set_cursor_marker: &CxxWString, + has_cursor_marker: bool, +) -> abbrs_replacement_t { + let cursor_marker = if has_cursor_marker { + Some(set_cursor_marker.from_ffi()) + } else { + None + }; + + let replacement = Replacement::from(range, text.from_ffi(), cursor_marker); + + abbrs_replacement_t { + range, + text: replacement.text.to_ffi(), + cursor: replacement.cursor.unwrap_or_default(), + has_cursor: replacement.cursor.is_some(), + } +} + +pub struct GlobalAbbrs<'a> { + g: MutexGuard<'a, AbbreviationSet>, +} + +impl<'a> GlobalAbbrs<'a> { + fn add( + &mut self, + name: &CxxWString, + key: &CxxWString, + replacement: &CxxWString, + position: abbrs_position_t, + from_universal: bool, + ) { + self.g.add(Abbreviation::new( + name.from_ffi(), + key.from_ffi(), + replacement.from_ffi(), + position.into(), + from_universal, + )); + } + + fn erase(&mut self, name: &CxxWString) { + self.g.erase(&name.from_ffi()); + } +} +use crate::ffi_tests::add_test; +add_test!("rename_abbrs", || { + use crate::wchar::wstr; + use crate::{ + abbrs::{Abbreviation, Position}, + wchar::L, + }; + + with_abbrs_mut(|abbrs_g| { + let mut add = |name: &wstr, repl: &wstr, position: Position| { + abbrs_g.add(Abbreviation { + name: name.into(), + key: name.into(), + regex: None, + replacement: repl.into(), + replacement_is_function: false, + position, + set_cursor_marker: None, + from_universal: false, + }) + }; + add(L!("gc"), L!("git checkout"), Position::Command); + add(L!("foo"), L!("bar"), Position::Command); + add(L!("gx"), L!("git checkout"), Position::Command); + add(L!("yin"), L!("yang"), Position::Anywhere); + + assert!(!abbrs_g.has_name(L!("gcc"))); + assert!(abbrs_g.has_name(L!("gc"))); + + abbrs_g.rename(L!("gc"), L!("gcc")); + assert!(abbrs_g.has_name(L!("gcc"))); + assert!(!abbrs_g.has_name(L!("gc"))); + + assert!(!abbrs_g.erase(L!("gc"))); + assert!(abbrs_g.erase(L!("gcc"))); + assert!(!abbrs_g.erase(L!("gcc"))); + }) +}); diff --git a/fish-rust/src/builtins/abbr.rs b/fish-rust/src/builtins/abbr.rs new file mode 100644 index 000000000..bd4ac9d7f --- /dev/null +++ b/fish-rust/src/builtins/abbr.rs @@ -0,0 +1,604 @@ +use crate::abbrs::{self, Abbreviation, Position}; +use crate::builtins::shared::{ + builtin_missing_argument, builtin_print_error_trailer, builtin_print_help, + builtin_unknown_option, io_streams_t, BUILTIN_ERR_TOO_MANY_ARGUMENTS, STATUS_CMD_ERROR, + STATUS_CMD_OK, STATUS_INVALID_ARGS, +}; +use crate::common::{escape_string, valid_func_name, EscapeStringStyle}; +use crate::env::flags::ENV_UNIVERSAL; +use crate::env::status::{ENV_NOT_FOUND, ENV_OK}; +use crate::ffi::{self, parser_t}; +use crate::re::regex_make_anchored; +use crate::wchar::{wstr, L}; +use crate::wchar_ffi::WCharFromFFI; +use crate::wgetopt::{wgetopter_t, wopt, woption, woption_argument_t}; +use crate::wutil::wgettext_fmt; +use libc::c_int; +pub use widestring::Utf32String as WString; + +const CMD: &wstr = L!("abbr"); + +#[derive(Default, Debug)] +struct Options { + add: bool, + rename: bool, + show: bool, + list: bool, + erase: bool, + query: bool, + function: Option, + regex_pattern: Option, + position: Option, + set_cursor_marker: Option, + args: Vec, +} + +impl Options { + fn validate(&mut self, streams: &mut io_streams_t) -> bool { + // Duplicate options? + let mut cmds = vec![]; + if self.add { + cmds.push(L!("add")) + }; + if self.rename { + cmds.push(L!("rename")) + }; + if self.show { + cmds.push(L!("show")) + }; + if self.list { + cmds.push(L!("list")) + }; + if self.erase { + cmds.push(L!("erase")) + }; + if self.query { + cmds.push(L!("query")) + }; + + if cmds.len() > 1 { + streams.err.append(wgettext_fmt!( + "%ls: Cannot combine options %ls\n", + CMD, + join(&cmds, L!(", ")) + )); + return false; + } + + // If run with no options, treat it like --add if we have arguments, + // or --show if we do not have any arguments. + if cmds.is_empty() { + self.show = self.args.is_empty(); + self.add = !self.args.is_empty(); + } + + if !self.add && self.position.is_some() { + streams.err.append(wgettext_fmt!( + "%ls: --position option requires --add\n", + CMD + )); + return false; + } + if !self.add && self.regex_pattern.is_some() { + streams + .err + .append(wgettext_fmt!("%ls: --regex option requires --add\n", CMD)); + return false; + } + if !self.add && self.function.is_some() { + streams.err.append(wgettext_fmt!( + "%ls: --function option requires --add\n", + CMD + )); + return false; + } + if !self.add && self.set_cursor_marker.is_some() { + streams.err.append(wgettext_fmt!( + "%ls: --set-cursor option requires --add\n", + CMD + )); + return false; + } + if self + .set_cursor_marker + .as_ref() + .map(|m| m.is_empty()) + .unwrap_or(false) + { + streams.err.append(wgettext_fmt!( + "%ls: --set-cursor argument cannot be empty\n", + CMD + )); + return false; + } + + return true; + } +} + +fn join(list: &[&wstr], sep: &wstr) -> WString { + let mut result = WString::new(); + let mut iter = list.iter(); + + let first = match iter.next() { + Some(first) => first, + None => return result, + }; + result.push_utfstr(first); + + for s in iter { + result.push_utfstr(sep); + result.push_utfstr(s); + } + result +} + +// Print abbreviations in a fish-script friendly way. +fn abbr_show(streams: &mut io_streams_t) -> Option { + let style = EscapeStringStyle::Script(Default::default()); + + abbrs::with_abbrs(|abbrs| { + let mut result = WString::new(); + for abbr in abbrs.list() { + result.clear(); + let mut add_arg = |arg: &wstr| { + if !result.is_empty() { + result.push_str(" "); + } + result.push_utfstr(arg); + }; + + add_arg(L!("abbr -a")); + if abbr.is_regex() { + add_arg(L!("--regex")); + add_arg(&escape_string(&abbr.key, style)); + } + if abbr.position != Position::Command { + add_arg(L!("--position")); + add_arg(L!("anywhere")); + } + if let Some(ref set_cursor_marker) = abbr.set_cursor_marker { + add_arg(L!("--set-cursor=")); + add_arg(&escape_string(set_cursor_marker, style)); + } + if abbr.replacement_is_function { + add_arg(L!("--function")); + add_arg(&escape_string(&abbr.replacement, style)); + } + add_arg(L!("--")); + // Literal abbreviations have the name and key as the same. + // Regex abbreviations have a pattern separate from the name. + add_arg(&escape_string(&abbr.name, style)); + if !abbr.replacement_is_function { + add_arg(&escape_string(&abbr.replacement, style)); + } + if abbr.from_universal { + add_arg(L!("# imported from a universal variable, see `help abbr`")); + } + result.push('\n'); + streams.out.append(&result); + } + }); + + return STATUS_CMD_OK; +} + +// Print the list of abbreviation names. +fn abbr_list(opts: &Options, streams: &mut io_streams_t) -> Option { + const subcmd: &wstr = L!("--list"); + if !opts.args.is_empty() { + streams.err.append(wgettext_fmt!( + "%ls %ls: Unexpected argument -- '%ls'\n", + CMD, + subcmd, + opts.args[0] + )); + return STATUS_INVALID_ARGS; + } + abbrs::with_abbrs(|abbrs| { + for abbr in abbrs.list() { + let mut name = abbr.name.clone(); + name.push('\n'); + streams.out.append(name); + } + }); + + return STATUS_CMD_OK; +} + +// Rename an abbreviation, deleting any existing one with the given name. +fn abbr_rename(opts: &Options, streams: &mut io_streams_t) -> Option { + const subcmd: &wstr = L!("--rename"); + + if opts.args.len() != 2 { + streams.err.append(wgettext_fmt!( + "%ls %ls: Requires exactly two arguments\n", + CMD, + subcmd + )); + return STATUS_INVALID_ARGS; + } + let old_name = &opts.args[0]; + let new_name = &opts.args[1]; + if old_name.is_empty() || new_name.is_empty() { + streams.err.append(wgettext_fmt!( + "%ls %ls: Name cannot be empty\n", + CMD, + subcmd + )); + return STATUS_INVALID_ARGS; + } + + if contains_whitespace(new_name) { + streams.err.append(wgettext_fmt!( + "%ls %ls: Abbreviation '%ls' cannot have spaces in the word\n", + CMD, + subcmd, + new_name.as_utfstr() + )); + return STATUS_INVALID_ARGS; + } + abbrs::with_abbrs_mut(|abbrs| -> Option { + if !abbrs.has_name(old_name) { + streams.err.append(wgettext_fmt!( + "%ls %ls: No abbreviation named %ls\n", + CMD, + subcmd, + old_name.as_utfstr() + )); + return STATUS_CMD_ERROR; + } + if abbrs.has_name(new_name) { + streams.err.append(wgettext_fmt!( + "%ls %ls: Abbreviation %ls already exists, cannot rename %ls\n", + CMD, + subcmd, + new_name.as_utfstr(), + old_name.as_utfstr() + )); + return STATUS_INVALID_ARGS; + } + abbrs.rename(old_name, new_name); + STATUS_CMD_OK + }) +} + +fn contains_whitespace(val: &wstr) -> bool { + val.chars().any(char::is_whitespace) +} + +// Test if any args is an abbreviation. +fn abbr_query(opts: &Options) -> Option { + // Return success if any of our args matches an abbreviation. + abbrs::with_abbrs(|abbrs| { + for arg in opts.args.iter() { + if abbrs.has_name(arg) { + return STATUS_CMD_OK; + } + } + return STATUS_CMD_ERROR; + }) +} + +// Add a named abbreviation. +fn abbr_add(opts: &Options, streams: &mut io_streams_t) -> Option { + const subcmd: &wstr = L!("--add"); + + if opts.args.len() < 2 && opts.function.is_none() { + streams.err.append(wgettext_fmt!( + "%ls %ls: Requires at least two arguments\n", + CMD, + subcmd + )); + return STATUS_INVALID_ARGS; + } + + if opts.args.is_empty() || opts.args[0].is_empty() { + streams.err.append(wgettext_fmt!( + "%ls %ls: Name cannot be empty\n", + CMD, + subcmd + )); + return STATUS_INVALID_ARGS; + } + let name = &opts.args[0]; + if name.chars().any(|c| c.is_whitespace()) { + streams.err.append(wgettext_fmt!( + "%ls %ls: Abbreviation '%ls' cannot have spaces in the word\n", + CMD, + subcmd, + name.as_utfstr() + )); + return STATUS_INVALID_ARGS; + } + + let mut regex = None; + + let key = if let Some(ref regex_pattern) = opts.regex_pattern { + // Compile the regex as given; if that succeeds then wrap it in our ^$ so it matches the + // entire token. + let flags = ffi::re::flags_t { icase: false }; + let result = ffi::try_compile(regex_pattern, &flags); + + if result.has_error() { + let error = result.get_error(); + streams.err.append(wgettext_fmt!( + "%ls: Regular expression compile error: %ls\n", + CMD, + &error.message().from_ffi() + )); + streams + .err + .append(wgettext_fmt!("%ls: %ls\n", CMD, regex_pattern.as_utfstr())); + streams + .err + .append(wgettext_fmt!("%ls: %*ls\n", CMD, error.offset, "^")); + return STATUS_INVALID_ARGS; + } + let anchored = regex_make_anchored(regex_pattern); + let mut result = ffi::try_compile(&anchored, &flags); + assert!( + !result.has_error(), + "Anchored compilation should have succeeded" + ); + let re = result.as_mut().get_regex(); + assert!(!re.is_null(), "Anchored compilation should have succeeded"); + + let _ = regex.insert(re); + regex_pattern + } else { + // The name plays double-duty as the token to replace. + name + }; + + if opts.function.is_some() && opts.args.len() > 1 { + streams + .err + .append(wgettext_fmt!(BUILTIN_ERR_TOO_MANY_ARGUMENTS, L!("abbr"))); + return STATUS_INVALID_ARGS; + } + let replacement = if let Some(ref function) = opts.function { + // Abbreviation function names disallow spaces. + // This is to prevent accidental usage of e.g. `--function 'string replace'` + if !valid_func_name(function) || contains_whitespace(function) { + streams.err.append(wgettext_fmt!( + "%ls: Invalid function name: %ls\n", + CMD, + function.as_utfstr() + )); + return STATUS_INVALID_ARGS; + } + function.clone() + } else { + let mut replacement = WString::new(); + for iter in opts.args.iter().skip(1) { + if !replacement.is_empty() { + replacement.push(' ') + }; + replacement.push_utfstr(iter); + } + replacement + }; + + let position = opts.position.unwrap_or(Position::Command); + + // Note historically we have allowed overwriting existing abbreviations. + abbrs::with_abbrs_mut(move |abbrs| { + abbrs.add(Abbreviation { + name: name.clone(), + key: key.clone(), + regex, + replacement, + replacement_is_function: opts.function.is_some(), + position, + set_cursor_marker: opts.set_cursor_marker.clone(), + from_universal: false, + }) + }); + + return STATUS_CMD_OK; +} + +// Erase the named abbreviations. +fn abbr_erase(opts: &Options, parser: &mut parser_t) -> Option { + if opts.args.is_empty() { + // This has historically been a silent failure. + return STATUS_CMD_ERROR; + } + + // Erase each. If any is not found, return ENV_NOT_FOUND which is historical. + abbrs::with_abbrs_mut(|abbrs| -> Option { + let mut result = STATUS_CMD_OK; + for arg in &opts.args { + if !abbrs.erase(arg) { + result = Some(ENV_NOT_FOUND); + } + // Erase the old uvar - this makes `abbr -e` work. + let esc_src = escape_string(arg, EscapeStringStyle::Script(Default::default())); + if !esc_src.is_empty() { + let var_name = WString::from_str("_fish_abbr_") + esc_src.as_utfstr(); + let ret = parser.remove_var(&var_name, ENV_UNIVERSAL); + + if ret == autocxx::c_int(ENV_OK) { + result = STATUS_CMD_OK + }; + } + } + result + }) +} + +pub fn abbr( + parser: &mut parser_t, + streams: &mut io_streams_t, + argv: &mut [&wstr], +) -> Option { + let mut argv_read = Vec::with_capacity(argv.len()); + argv_read.extend_from_slice(argv); + + let cmd = argv[0]; + // Note 1 is returned by wgetopt to indicate a non-option argument. + const NON_OPTION_ARGUMENT: char = 1 as char; + const SET_CURSOR_SHORT: char = 2 as char; + const RENAME_SHORT: char = 3 as char; + + // Note the leading '-' causes wgetopter to return arguments in order, instead of permuting + // them. We need this behavior for compatibility with pre-builtin abbreviations where options + // could be given literally, for example `abbr e emacs -nw`. + const short_options: &wstr = L!("-:af:r:seqgUh"); + + const longopts: &[woption] = &[ + wopt(L!("add"), woption_argument_t::no_argument, 'a'), + wopt(L!("position"), woption_argument_t::required_argument, 'p'), + wopt(L!("regex"), woption_argument_t::required_argument, 'r'), + wopt( + L!("set-cursor"), + woption_argument_t::optional_argument, + SET_CURSOR_SHORT, + ), + wopt(L!("function"), woption_argument_t::required_argument, 'f'), + wopt(L!("rename"), woption_argument_t::no_argument, RENAME_SHORT), + wopt(L!("erase"), woption_argument_t::no_argument, 'e'), + wopt(L!("query"), woption_argument_t::no_argument, 'q'), + wopt(L!("show"), woption_argument_t::no_argument, 's'), + wopt(L!("list"), woption_argument_t::no_argument, 'l'), + wopt(L!("global"), woption_argument_t::no_argument, 'g'), + wopt(L!("universal"), woption_argument_t::no_argument, 'U'), + wopt(L!("help"), woption_argument_t::no_argument, 'h'), + ]; + + let mut opts = Options::default(); + let mut w = wgetopter_t::new(short_options, longopts, argv); + + while let Some(c) = w.wgetopt_long() { + match c { + NON_OPTION_ARGUMENT => { + // If --add is specified (or implied by specifying no other commands), all + // unrecognized options after the *second* non-option argument are considered part + // of the abbreviation expansion itself, rather than options to the abbr command. + // For example, `abbr e emacs -nw` works, because `-nw` occurs after the second + // non-option, and --add is implied. + if let Some(arg) = w.woptarg { + opts.args.push(arg.to_owned()) + }; + if opts.args.len() >= 2 + && !(opts.rename || opts.show || opts.list || opts.erase || opts.query) + { + break; + } + } + 'a' => opts.add = true, + 'p' => { + if opts.position.is_some() { + streams.err.append(wgettext_fmt!( + "%ls: Cannot specify multiple positions\n", + CMD + )); + return STATUS_INVALID_ARGS; + } + if w.woptarg == Some(L!("command")) { + opts.position = Some(Position::Command); + } else if w.woptarg == Some(L!("anywhere")) { + opts.position = Some(Position::Anywhere); + } else { + streams.err.append(wgettext_fmt!( + "%ls: Invalid position '%ls'\n", + CMD, + w.woptarg.unwrap_or_default() + )); + streams + .err + .append(L!("Position must be one of: command, anywhere.\n")); + return STATUS_INVALID_ARGS; + } + } + 'r' => { + if opts.regex_pattern.is_some() { + streams.err.append(wgettext_fmt!( + "%ls: Cannot specify multiple regex patterns\n", + CMD + )); + return STATUS_INVALID_ARGS; + } + opts.regex_pattern = w.woptarg.map(ToOwned::to_owned); + } + SET_CURSOR_SHORT => { + if opts.set_cursor_marker.is_some() { + streams.err.append(wgettext_fmt!( + "%ls: Cannot specify multiple set-cursor options\n", + CMD + )); + return STATUS_INVALID_ARGS; + } + // The default set-cursor indicator is '%'. + let _ = opts + .set_cursor_marker + .insert(w.woptarg.unwrap_or(L!("%")).to_owned()); + } + 'f' => opts.function = w.woptarg.map(ToOwned::to_owned), + RENAME_SHORT => opts.rename = true, + 'e' => opts.erase = true, + 'q' => opts.query = true, + 's' => opts.show = true, + 'l' => opts.list = true, + // Kept for backwards compatibility but ignored. + // This basically does nothing now. + 'g' => {} + + 'U' => { + // Kept and made ineffective, so we warn. + streams.err.append(wgettext_fmt!( + "%ls: Warning: Option '%ls' was removed and is now ignored", + cmd, + argv_read[w.woptind - 1] + )); + builtin_print_error_trailer(parser, streams, cmd); + } + 'h' => { + builtin_print_help(parser, streams, cmd); + return STATUS_CMD_OK; + } + ':' => { + builtin_missing_argument(parser, streams, cmd, argv[w.woptind - 1], true); + return STATUS_INVALID_ARGS; + } + '?' => { + builtin_unknown_option(parser, streams, cmd, argv[w.woptind - 1], false); + return STATUS_INVALID_ARGS; + } + _ => { + panic!("unexpected retval from wgeopter.next()"); + } + } + } + + for arg in argv_read[w.woptind..].iter() { + opts.args.push((*arg).into()); + } + + if !opts.validate(streams) { + return STATUS_INVALID_ARGS; + } + + if opts.add { + return abbr_add(&opts, streams); + }; + if opts.show { + return abbr_show(streams); + }; + if opts.list { + return abbr_list(&opts, streams); + }; + if opts.rename { + return abbr_rename(&opts, streams); + }; + if opts.erase { + return abbr_erase(&opts, parser); + }; + if opts.query { + return abbr_query(&opts); + }; + + // validate() should error or ensure at least one path is set. + panic!("unreachable"); +} diff --git a/fish-rust/src/builtins/mod.rs b/fish-rust/src/builtins/mod.rs index da78b3768..42fc971fb 100644 --- a/fish-rust/src/builtins/mod.rs +++ b/fish-rust/src/builtins/mod.rs @@ -1,5 +1,6 @@ pub mod shared; +pub mod abbr; pub mod contains; pub mod echo; pub mod emit; diff --git a/fish-rust/src/builtins/shared.rs b/fish-rust/src/builtins/shared.rs index 3ac94b195..2ba08469a 100644 --- a/fish-rust/src/builtins/shared.rs +++ b/fish-rust/src/builtins/shared.rs @@ -118,6 +118,7 @@ pub fn run_builtin( builtin: RustBuiltin, ) -> Option { match builtin { + RustBuiltin::Abbr => super::abbr::abbr(parser, streams, args), RustBuiltin::Contains => super::contains::contains(parser, streams, args), RustBuiltin::Echo => super::echo::echo(parser, streams, args), RustBuiltin::Emit => super::emit::emit(parser, streams, args), diff --git a/fish-rust/src/common.rs b/fish-rust/src/common.rs index 6c03f45dc..9655ddb5b 100644 --- a/fish-rust/src/common.rs +++ b/fish-rust/src/common.rs @@ -1,4 +1,5 @@ use crate::ffi; +use crate::wchar_ext::WExt; use crate::wchar_ffi::c_str; use crate::wchar_ffi::{wstr, WCharFromFFI, WString}; use std::{ffi::c_uint, mem}; @@ -92,3 +93,21 @@ pub fn escape_string(s: &wstr, style: EscapeStringStyle) -> WString { ffi::escape_string(c_str!(s), flags_int.into(), style).from_ffi() } + +/// Test if the string is a valid function name. +pub fn valid_func_name(name: &wstr) -> bool { + if name.is_empty() { + return false; + }; + if name.char_at(0) == '-' { + return false; + }; + // A function name needs to be a valid path, so no / and no NULL. + if name.find_char('/').is_some() { + return false; + }; + if name.find_char('\0').is_some() { + return false; + }; + true +} diff --git a/fish-rust/src/ffi.rs b/fish-rust/src/ffi.rs index fd200eead..b2174810f 100644 --- a/fish-rust/src/ffi.rs +++ b/fish-rust/src/ffi.rs @@ -38,6 +38,7 @@ include_cpp! { generate!("wperror") generate_pod!("pipes_ffi_t") + generate!("env_stack_t") generate!("make_pipes_ffi") generate!("valid_var_name_char") @@ -102,6 +103,10 @@ impl parser_t { unsafe { &mut *libdata } } + + pub fn remove_var(&mut self, var: &wstr, flags: c_int) -> c_int { + self.pin().remove_var_ffi(&var.to_ffi(), flags) + } } pub fn try_compile(anchored: &wstr, flags: &re::flags_t) -> Pin> { diff --git a/fish-rust/src/lib.rs b/fish-rust/src/lib.rs index 55f55768f..3d6f31e22 100644 --- a/fish-rust/src/lib.rs +++ b/fish-rust/src/lib.rs @@ -34,4 +34,7 @@ mod wchar_ffi; mod wgetopt; mod wutil; +mod abbrs; mod builtins; +mod env; +mod re; diff --git a/fish-rust/src/parse_constants.rs b/fish-rust/src/parse_constants.rs index 0118c8f03..f6c1d04ba 100644 --- a/fish-rust/src/parse_constants.rs +++ b/fish-rust/src/parse_constants.rs @@ -70,7 +70,7 @@ mod parse_constants_ffi { } /// A range of source code. - #[derive(PartialEq, Eq)] + #[derive(PartialEq, Eq, Clone, Copy)] struct SourceRange { start: u32, length: u32, diff --git a/src/abbrs.cpp b/src/abbrs.cpp deleted file mode 100644 index a7b31c323..000000000 --- a/src/abbrs.cpp +++ /dev/null @@ -1,134 +0,0 @@ -#include "config.h" // IWYU pragma: keep - -#include "abbrs.h" - -#include "env.h" -#include "global_safety.h" -#include "wcstringutil.h" - -abbreviation_t::abbreviation_t(wcstring name, wcstring key, wcstring replacement, - abbrs_position_t position, bool from_universal) - : name(std::move(name)), - key(std::move(key)), - replacement(std::move(replacement)), - position(position), - from_universal(from_universal) {} - -bool abbreviation_t::matches_position(abbrs_position_t position) const { - return this->position == abbrs_position_t::anywhere || this->position == position; -} - -bool abbreviation_t::matches(const wcstring &token, abbrs_position_t position) const { - if (!this->matches_position(position)) { - return false; - } - if (this->is_regex()) { - return this->regex->match(token).has_value(); - } else { - return this->key == token; - } -} - -acquired_lock abbrs_get_set() { - static owning_lock abbrs; - return abbrs.acquire(); -} - -abbrs_replacer_list_t abbrs_set_t::match(const wcstring &token, abbrs_position_t position) const { - abbrs_replacer_list_t result{}; - // Later abbreviations take precedence so walk backwards. - for (auto it = abbrs_.rbegin(); it != abbrs_.rend(); ++it) { - const abbreviation_t &abbr = *it; - if (abbr.matches(token, position)) { - result.push_back(abbrs_replacer_t{abbr.replacement, abbr.replacement_is_function, - abbr.set_cursor_marker}); - } - } - return result; -} - -bool abbrs_set_t::has_match(const wcstring &token, abbrs_position_t position) const { - for (const auto &abbr : abbrs_) { - if (abbr.matches(token, position)) { - return true; - } - } - return false; -} - -void abbrs_set_t::add(abbreviation_t &&abbr) { - assert(!abbr.name.empty() && "Invalid name"); - bool inserted = used_names_.insert(abbr.name).second; - if (!inserted) { - // Name was already used, do a linear scan to find it. - auto where = std::find_if(abbrs_.begin(), abbrs_.end(), [&](const abbreviation_t &other) { - return other.name == abbr.name; - }); - assert(where != abbrs_.end() && "Abbreviation not found though its name was present"); - abbrs_.erase(where); - } - abbrs_.push_back(std::move(abbr)); -} - -void abbrs_set_t::rename(const wcstring &old_name, const wcstring &new_name) { - bool erased = this->used_names_.erase(old_name) > 0; - bool inserted = this->used_names_.insert(new_name).second; - assert(erased && inserted && "Old name not found or new name already present"); - (void)erased; - (void)inserted; - for (auto &abbr : abbrs_) { - if (abbr.name == old_name) { - abbr.name = new_name; - break; - } - } -} - -bool abbrs_set_t::erase(const wcstring &name) { - bool erased = this->used_names_.erase(name) > 0; - if (!erased) { - return false; - } - for (auto it = abbrs_.begin(); it != abbrs_.end(); ++it) { - if (it->name == name) { - abbrs_.erase(it); - return true; - } - } - assert(false && "Unable to find named abbreviation"); - return false; -} - -void abbrs_set_t::import_from_uvars(const std::unordered_map &uvars) { - const wchar_t *const prefix = L"_fish_abbr_"; - size_t prefix_len = wcslen(prefix); - const bool from_universal = true; - for (const auto &kv : uvars) { - if (string_prefixes_string(prefix, kv.first)) { - wcstring escaped_name = kv.first.substr(prefix_len); - wcstring name; - if (unescape_string(escaped_name, &name, unescape_flags_t{}, STRING_STYLE_VAR)) { - wcstring key = name; - wcstring replacement = join_strings(kv.second.as_list(), L' '); - this->add(abbreviation_t{std::move(name), std::move(key), std::move(replacement), - abbrs_position_t::command, from_universal}); - } - } - } -} - -// static -abbrs_replacement_t abbrs_replacement_t::from(source_range_t range, wcstring text, - const abbrs_replacer_t &replacer) { - abbrs_replacement_t result{}; - result.range = range; - result.text = std::move(text); - if (replacer.set_cursor_marker.has_value()) { - size_t pos = result.text.find(*replacer.set_cursor_marker); - if (pos != wcstring::npos) { - result.text.erase(pos, replacer.set_cursor_marker->size()); - result.cursor = pos + range.start; - } - } - return result; -} diff --git a/src/abbrs.h b/src/abbrs.h index f257eb511..fab82975c 100644 --- a/src/abbrs.h +++ b/src/abbrs.h @@ -11,139 +11,17 @@ #include "parse_constants.h" #include "re.h" -class env_var_t; +#if INCLUDE_RUST_HEADERS -/// Controls where in the command line abbreviations may expand. -enum class abbrs_position_t : uint8_t { - command, // expand in command position - anywhere, // expand in any token -}; +#include "abbrs.rs.h" -struct abbreviation_t { - // Abbreviation name. This is unique within the abbreviation set. - // This is used as the token to match unless we have a regex. - wcstring name{}; +#else +// Hacks to allow us to compile without Rust headers. +struct abbrs_replacer_t; - /// The key (recognized token) - either a literal or a regex pattern. - wcstring key{}; +struct abbrs_replacement_t; - /// If set, use this regex to recognize tokens. - /// If unset, the key is to be interpreted literally. - /// Note that the fish interface enforces that regexes match the entire token; - /// we accomplish this by surrounding the regex in ^ and $. - maybe_t regex{}; - - /// Replacement string. - wcstring replacement{}; - - /// If set, the replacement is a function name. - bool replacement_is_function{}; - - /// Expansion position. - abbrs_position_t position{abbrs_position_t::command}; - - /// If set, then move the cursor to the first instance of this string in the expansion. - maybe_t set_cursor_marker{}; - - /// Mark if we came from a universal variable. - bool from_universal{}; - - // \return true if this is a regex abbreviation. - bool is_regex() const { return this->regex.has_value(); } - - // \return true if we match a token at a given position. - bool matches(const wcstring &token, abbrs_position_t position) const; - - // Construct from a name, a key which matches a token, a replacement token, a position, and - // whether we are derived from a universal variable. - explicit abbreviation_t(wcstring name, wcstring key, wcstring replacement, - abbrs_position_t position = abbrs_position_t::command, - bool from_universal = false); - - abbreviation_t() = default; - - private: - // \return if we expand in a given position. - bool matches_position(abbrs_position_t position) const; -}; - -/// The result of an abbreviation expansion. -struct abbrs_replacer_t { - /// The string to use to replace the incoming token, either literal or as a function name. - wcstring replacement; - - /// If true, treat 'replacement' as the name of a function. - bool is_function; - - /// If set, the cursor should be moved to the first instance of this string in the expansion. - maybe_t set_cursor_marker; -}; -using abbrs_replacer_list_t = std::vector; - -/// A helper type for replacing a range in a string. -struct abbrs_replacement_t { - /// The original range of the token in the command line. - source_range_t range{}; - - /// The string to replace with. - wcstring text{}; - - /// The new cursor location, or none to use the default. - /// This is relative to the original range. - maybe_t cursor{}; - - /// Construct a replacement from a replacer. - /// The \p range is the range of the text matched by the replacer in the command line. - /// The text is passed in separately as it may be the output of the replacer's function. - static abbrs_replacement_t from(source_range_t range, wcstring text, - const abbrs_replacer_t &replacer); -}; - -class abbrs_set_t { - public: - /// \return the list of replacers for an input token, in priority order. - /// The \p position is given to describe where the token was found. - abbrs_replacer_list_t match(const wcstring &token, abbrs_position_t position) const; - - /// \return whether we would have at least one replacer for a given token. - bool has_match(const wcstring &token, abbrs_position_t position) const; - - /// Add an abbreviation. Any abbreviation with the same name is replaced. - void add(abbreviation_t &&abbr); - - /// Rename an abbreviation. This asserts that the old name is used, and the new name is not; the - /// caller should check these beforehand with has_name(). - void rename(const wcstring &old_name, const wcstring &new_name); - - /// Erase an abbreviation by name. - /// \return true if erased, false if not found. - bool erase(const wcstring &name); - - /// \return true if we have an abbreviation with the given name. - bool has_name(const wcstring &name) const { return used_names_.count(name) > 0; } - - /// \return a reference to the abbreviation list. - const std::vector &list() const { return abbrs_; } - - /// Import from a universal variable set. - void import_from_uvars(const std::unordered_map &uvars); - - private: - /// List of abbreviations, in definition order. - std::vector abbrs_{}; - - /// Set of used abbrevation names. - /// This is to avoid a linear scan when adding new abbreviations. - std::unordered_set used_names_; -}; - -/// \return the global mutable set of abbreviations. -acquired_lock abbrs_get_set(); - -/// \return the list of replacers for an input token, in priority order, using the global set. -/// The \p position is given to describe where the token was found. -inline abbrs_replacer_list_t abbrs_match(const wcstring &token, abbrs_position_t position) { - return abbrs_get_set()->match(token, position); -} +struct abbreviation_t; +#endif #endif diff --git a/src/builtin.cpp b/src/builtin.cpp index c19e14b36..ea05cfc0c 100644 --- a/src/builtin.cpp +++ b/src/builtin.cpp @@ -29,7 +29,6 @@ #include #include -#include "builtins/abbr.h" #include "builtins/argparse.h" #include "builtins/bg.h" #include "builtins/bind.h" @@ -359,7 +358,7 @@ static constexpr builtin_data_t builtin_datas[] = { {L":", &builtin_true, N_(L"Return a successful result")}, {L"[", &builtin_test, N_(L"Test a condition")}, {L"_", &builtin_gettext, N_(L"Translate a string")}, - {L"abbr", &builtin_abbr, N_(L"Manage abbreviations")}, + {L"abbr", &implemented_in_rust, N_(L"Manage abbreviations")}, {L"and", &builtin_generic, N_(L"Run command if last command succeeded")}, {L"argparse", &builtin_argparse, N_(L"Parse options in fish script")}, {L"begin", &builtin_generic, N_(L"Create a block of code")}, @@ -523,6 +522,9 @@ const wchar_t *builtin_get_desc(const wcstring &name) { } static maybe_t try_get_rust_builtin(const wcstring &cmd) { + if (cmd == L"abbr") { + return RustBuiltin::Abbr; + } if (cmd == L"contains") { return RustBuiltin::Contains; } diff --git a/src/builtin.h b/src/builtin.h index 7b74d40e3..cf4727dea 100644 --- a/src/builtin.h +++ b/src/builtin.h @@ -109,6 +109,7 @@ int parse_help_only_cmd_opts(help_only_cmd_opts_t &opts, int *optind, int argc, /// An enum of the builtins implemented in Rust. enum RustBuiltin : int32_t { + Abbr, Contains, Echo, Emit, diff --git a/src/builtins/abbr.cpp b/src/builtins/abbr.cpp deleted file mode 100644 index 7e202712e..000000000 --- a/src/builtins/abbr.cpp +++ /dev/null @@ -1,435 +0,0 @@ -// Implementation of the read builtin. -#include "config.h" // IWYU pragma: keep - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../abbrs.h" -#include "../builtin.h" -#include "../common.h" -#include "../env.h" -#include "../io.h" -#include "../parser.h" -#include "../re.h" -#include "../wcstringutil.h" -#include "../wgetopt.h" -#include "../wutil.h" - -namespace { - -static const wchar_t *const CMD = L"abbr"; - -struct abbr_options_t { - bool add{}; - bool rename{}; - bool show{}; - bool list{}; - bool erase{}; - bool query{}; - maybe_t function; - maybe_t regex_pattern; - maybe_t position{}; - maybe_t set_cursor_marker{}; - - wcstring_list_t args; - - bool validate(io_streams_t &streams) { - // Duplicate options? - wcstring_list_t cmds; - if (add) cmds.push_back(L"add"); - if (rename) cmds.push_back(L"rename"); - if (show) cmds.push_back(L"show"); - if (list) cmds.push_back(L"list"); - if (erase) cmds.push_back(L"erase"); - if (query) cmds.push_back(L"query"); - if (cmds.size() > 1) { - streams.err.append_format(_(L"%ls: Cannot combine options %ls\n"), CMD, - join_strings(cmds, L", ").c_str()); - return false; - } - // If run with no options, treat it like --add if we have arguments, - // or --show if we do not have any arguments. - if (cmds.empty()) { - show = args.empty(); - add = !args.empty(); - } - - if (!add && position.has_value()) { - streams.err.append_format(_(L"%ls: --position option requires --add\n"), CMD); - return false; - } - if (!add && regex_pattern.has_value()) { - streams.err.append_format(_(L"%ls: --regex option requires --add\n"), CMD); - return false; - } - if (!add && function.has_value()) { - streams.err.append_format(_(L"%ls: --function option requires --add\n"), CMD); - return false; - } - if (!add && set_cursor_marker.has_value()) { - streams.err.append_format(_(L"%ls: --set-cursor option requires --add\n"), CMD); - return false; - } - if (set_cursor_marker.has_value() && set_cursor_marker->empty()) { - streams.err.append_format(_(L"%ls: --set-cursor argument cannot be empty\n"), CMD); - return false; - } - - return true; - } -}; - -// Print abbreviations in a fish-script friendly way. -static int abbr_show(const abbr_options_t &, io_streams_t &streams) { - const auto abbrs = abbrs_get_set(); - wcstring_list_t comps{}; - for (const auto &abbr : abbrs->list()) { - comps.clear(); - comps.push_back(L"abbr -a"); - if (abbr.is_regex()) { - comps.push_back(L"--regex"); - comps.push_back(escape_string(abbr.key)); - } - if (abbr.position != abbrs_position_t::command) { - comps.push_back(L"--position"); - comps.push_back(L"anywhere"); - } - if (abbr.set_cursor_marker.has_value()) { - comps.push_back(L"--set-cursor=" + escape_string(*abbr.set_cursor_marker)); - } - if (abbr.replacement_is_function) { - comps.push_back(L"--function"); - comps.push_back(escape_string(abbr.replacement)); - } - comps.push_back(L"--"); - // Literal abbreviations have the name and key as the same. - // Regex abbreviations have a pattern separate from the name. - comps.push_back(escape_string(abbr.name)); - if (!abbr.replacement_is_function) { - comps.push_back(escape_string(abbr.replacement)); - } - if (abbr.from_universal) comps.push_back(_(L"# imported from a universal variable, see `help abbr`")); - wcstring result = join_strings(comps, L' '); - result.push_back(L'\n'); - streams.out.append(result); - } - return STATUS_CMD_OK; -} - -// Print the list of abbreviation names. -static int abbr_list(const abbr_options_t &opts, io_streams_t &streams) { - const wchar_t *const subcmd = L"--list"; - if (opts.args.size() > 0) { - streams.err.append_format(_(L"%ls %ls: Unexpected argument -- '%ls'\n"), CMD, subcmd, - opts.args.front().c_str()); - return STATUS_INVALID_ARGS; - } - const auto abbrs = abbrs_get_set(); - for (const auto &abbr : abbrs->list()) { - wcstring name = abbr.name; - name.push_back(L'\n'); - streams.out.append(name); - } - return STATUS_CMD_OK; -} - -// Rename an abbreviation, deleting any existing one with the given name. -static int abbr_rename(const abbr_options_t &opts, io_streams_t &streams) { - const wchar_t *const subcmd = L"--rename"; - if (opts.args.size() != 2) { - streams.err.append_format(_(L"%ls %ls: Requires exactly two arguments\n"), CMD, subcmd); - return STATUS_INVALID_ARGS; - } - const wcstring &old_name = opts.args[0]; - const wcstring &new_name = opts.args[1]; - if (old_name.empty() || new_name.empty()) { - streams.err.append_format(_(L"%ls %ls: Name cannot be empty\n"), CMD, subcmd); - return STATUS_INVALID_ARGS; - } - - if (std::any_of(new_name.begin(), new_name.end(), iswspace)) { - streams.err.append_format( - _(L"%ls %ls: Abbreviation '%ls' cannot have spaces in the word\n"), CMD, subcmd, - new_name.c_str()); - return STATUS_INVALID_ARGS; - } - auto abbrs = abbrs_get_set(); - - if (!abbrs->has_name(old_name)) { - streams.err.append_format(_(L"%ls %ls: No abbreviation named %ls\n"), CMD, subcmd, - old_name.c_str()); - return STATUS_CMD_ERROR; - } - if (abbrs->has_name(new_name)) { - streams.err.append_format( - _(L"%ls %ls: Abbreviation %ls already exists, cannot rename %ls\n"), CMD, subcmd, - new_name.c_str(), old_name.c_str()); - return STATUS_INVALID_ARGS; - } - abbrs->rename(old_name, new_name); - return STATUS_CMD_OK; -} - -// Test if any args is an abbreviation. -static int abbr_query(const abbr_options_t &opts, io_streams_t &) { - // Return success if any of our args matches an abbreviation. - const auto abbrs = abbrs_get_set(); - for (const auto &arg : opts.args) { - if (abbrs->has_name(arg)) { - return STATUS_CMD_OK; - } - } - return STATUS_CMD_ERROR; -} - -// Add a named abbreviation. -static int abbr_add(const abbr_options_t &opts, io_streams_t &streams) { - const wchar_t *const subcmd = L"--add"; - if (opts.args.size() < 2 && !opts.function.has_value()) { - streams.err.append_format(_(L"%ls %ls: Requires at least two arguments\n"), CMD, subcmd); - return STATUS_INVALID_ARGS; - } - - if (opts.args.empty() || opts.args[0].empty()) { - streams.err.append_format(_(L"%ls %ls: Name cannot be empty\n"), CMD, subcmd); - return STATUS_INVALID_ARGS; - } - const wcstring &name = opts.args[0]; - if (std::any_of(name.begin(), name.end(), iswspace)) { - streams.err.append_format( - _(L"%ls %ls: Abbreviation '%ls' cannot have spaces in the word\n"), CMD, subcmd, - name.c_str()); - return STATUS_INVALID_ARGS; - } - - maybe_t regex; - wcstring key; - if (!opts.regex_pattern.has_value()) { - // The name plays double-duty as the token to replace. - key = name; - } else { - key = *opts.regex_pattern; - re::re_error_t error{}; - // Compile the regex as given; if that succeeds then wrap it in our ^$ so it matches the - // entire token. - if (!re::regex_t::try_compile(*opts.regex_pattern, re::flags_t{}, &error)) { - streams.err.append_format(_(L"%ls: Regular expression compile error: %ls\n"), CMD, - error.message().c_str()); - streams.err.append_format(L"%ls: %ls\n", CMD, opts.regex_pattern->c_str()); - streams.err.append_format(L"%ls: %*ls\n", CMD, static_cast(error.offset), L"^"); - return STATUS_INVALID_ARGS; - } - wcstring anchored = re::make_anchored(*opts.regex_pattern); - regex = re::regex_t::try_compile(anchored, re::flags_t{}, &error); - assert(regex.has_value() && "Anchored compilation should have succeeded"); - } - - if (opts.function.has_value() && opts.args.size() > 1) { - streams.err.append_format(BUILTIN_ERR_TOO_MANY_ARGUMENTS, L"abbr"); - return STATUS_INVALID_ARGS; - } - wcstring replacement; - if (opts.function.has_value()) { - replacement = *opts.function; - } else { - for (auto iter = opts.args.begin() + 1; iter != opts.args.end(); ++iter) { - if (!replacement.empty()) replacement.push_back(L' '); - replacement.append(*iter); - } - } - // Abbreviation function names disallow spaces. - // This is to prevent accidental usage of e.g. `--function 'string replace'` - if (opts.function.has_value() && - (!valid_func_name(replacement) || replacement.find(L' ') != wcstring::npos)) { - streams.err.append_format(_(L"%ls: Invalid function name: %ls\n"), CMD, - replacement.c_str()); - return STATUS_INVALID_ARGS; - } - - abbrs_position_t position = opts.position ? *opts.position : abbrs_position_t::command; - - // Note historically we have allowed overwriting existing abbreviations. - abbreviation_t abbr{std::move(name), std::move(key), std::move(replacement), position}; - abbr.regex = std::move(regex); - abbr.replacement_is_function = opts.function.has_value(); - abbr.set_cursor_marker = opts.set_cursor_marker; - abbrs_get_set()->add(std::move(abbr)); - return STATUS_CMD_OK; -} - -// Erase the named abbreviations. -static int abbr_erase(const abbr_options_t &opts, parser_t &parser, io_streams_t &) { - if (opts.args.empty()) { - // This has historically been a silent failure. - return STATUS_CMD_ERROR; - } - - // Erase each. If any is not found, return ENV_NOT_FOUND which is historical. - int result = STATUS_CMD_OK; - auto abbrs = abbrs_get_set(); - for (const auto &arg : opts.args) { - if (!abbrs->erase(arg)) { - result = ENV_NOT_FOUND; - } - // Erase the old uvar - this makes `abbr -e` work. - wcstring esc_src = escape_string(arg, 0, STRING_STYLE_VAR); - if (!esc_src.empty()) { - wcstring var_name = L"_fish_abbr_" + esc_src; - auto ret = parser.vars().remove(var_name, ENV_UNIVERSAL); - if (ret == ENV_OK) result = STATUS_CMD_OK; - } - - } - return result; -} - -} // namespace - -maybe_t builtin_abbr(parser_t &parser, io_streams_t &streams, const wchar_t **argv) { - const wchar_t *cmd = argv[0]; - abbr_options_t opts; - // Note 1 is returned by wgetopt to indicate a non-option argument. - enum { NON_OPTION_ARGUMENT = 1, SET_CURSOR_SHORT, RENAME_SHORT }; - - // Note the leading '-' causes wgetopter to return arguments in order, instead of permuting - // them. We need this behavior for compatibility with pre-builtin abbreviations where options - // could be given literally, for example `abbr e emacs -nw`. - static const wchar_t *const short_options = L"-:af:r:seqgUh"; - static const struct woption long_options[] = { - {L"add", no_argument, 'a'}, {L"position", required_argument, 'p'}, - {L"regex", required_argument, 'r'}, {L"set-cursor", optional_argument, SET_CURSOR_SHORT}, - {L"function", required_argument, 'f'}, {L"rename", no_argument, RENAME_SHORT}, - {L"erase", no_argument, 'e'}, {L"query", no_argument, 'q'}, - {L"show", no_argument, 's'}, {L"list", no_argument, 'l'}, - {L"global", no_argument, 'g'}, {L"universal", no_argument, 'U'}, - {L"help", no_argument, 'h'}, {}}; - - int argc = builtin_count_args(argv); - int opt; - wgetopter_t w; - bool in_expansion = false; - while (!in_expansion && - (opt = w.wgetopt_long(argc, argv, short_options, long_options, nullptr)) != -1) { - switch (opt) { - case NON_OPTION_ARGUMENT: - // If --add is specified (or implied by specifying no other commands), all - // unrecognized options after the *second* non-option argument are considered part - // of the abbreviation expansion itself, rather than options to the abbr command. - // For example, `abbr e emacs -nw` works, because `-nw` occurs after the second - // non-option, and --add is implied. - opts.args.push_back(w.woptarg); - if (opts.args.size() >= 2 && - !(opts.rename || opts.show || opts.list || opts.erase || opts.query)) { - in_expansion = true; - } - break; - case 'a': - opts.add = true; - break; - case 'p': { - if (opts.position.has_value()) { - streams.err.append_format(_(L"%ls: Cannot specify multiple positions\n"), CMD); - return STATUS_INVALID_ARGS; - } - if (!wcscmp(w.woptarg, L"command")) { - opts.position = abbrs_position_t::command; - } else if (!wcscmp(w.woptarg, L"anywhere")) { - opts.position = abbrs_position_t::anywhere; - } else { - streams.err.append_format(_(L"%ls: Invalid position '%ls'\n" - L"Position must be one of: command, anywhere.\n"), - CMD, w.woptarg); - return STATUS_INVALID_ARGS; - } - break; - } - case 'r': { - if (opts.regex_pattern.has_value()) { - streams.err.append_format(_(L"%ls: Cannot specify multiple regex patterns\n"), - CMD); - return STATUS_INVALID_ARGS; - } - opts.regex_pattern = w.woptarg; - break; - } - case SET_CURSOR_SHORT: { - if (opts.set_cursor_marker.has_value()) { - streams.err.append_format( - _(L"%ls: Cannot specify multiple set-cursor options\n"), CMD); - return STATUS_INVALID_ARGS; - } - // The default set-cursor indicator is '%'. - opts.set_cursor_marker = w.woptarg ? w.woptarg : L"%"; - break; - } - case 'f': - opts.function = wcstring(w.woptarg); - break; - case RENAME_SHORT: - opts.rename = true; - break; - case 'e': - opts.erase = true; - break; - case 'q': - opts.query = true; - break; - case 's': - opts.show = true; - break; - case 'l': - opts.list = true; - break; - case 'g': - // Kept for backwards compatibility but ignored. - // This basically does nothing now. - break; - case 'U': { - // Kept and made ineffective, so we warn. - streams.err.append_format(_(L"%ls: Warning: Option '%ls' was removed and is now ignored"), cmd, argv[w.woptind - 1]); - builtin_print_error_trailer(parser, streams.err, cmd); - break; - } - case 'h': { - builtin_print_help(parser, streams, cmd); - return STATUS_CMD_OK; - } - case ':': { - builtin_missing_argument(parser, streams, cmd, argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; - } - case '?': { - builtin_unknown_option(parser, streams, cmd, argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; - } - } - } - opts.args.insert(opts.args.end(), argv + w.woptind, argv + argc); - if (!opts.validate(streams)) { - return STATUS_INVALID_ARGS; - } - - if (opts.add) return abbr_add(opts, streams); - if (opts.show) return abbr_show(opts, streams); - if (opts.list) return abbr_list(opts, streams); - if (opts.rename) return abbr_rename(opts, streams); - if (opts.erase) return abbr_erase(opts, parser, streams); - if (opts.query) return abbr_query(opts, streams); - - // validate() should error or ensure at least one path is set. - DIE("unreachable"); - return STATUS_INVALID_ARGS; -} diff --git a/src/builtins/abbr.h b/src/builtins/abbr.h deleted file mode 100644 index 4a1dc883b..000000000 --- a/src/builtins/abbr.h +++ /dev/null @@ -1,11 +0,0 @@ -// Prototypes for executing builtin_abbr function. -#ifndef FISH_BUILTIN_ABBR_H -#define FISH_BUILTIN_ABBR_H - -#include "../maybe.h" - -class parser_t; -struct io_streams_t; - -maybe_t builtin_abbr(parser_t &parser, io_streams_t &streams, const wchar_t **argv); -#endif diff --git a/src/complete.cpp b/src/complete.cpp index f536362bd..522879a21 100644 --- a/src/complete.cpp +++ b/src/complete.cpp @@ -679,11 +679,11 @@ void completer_t::complete_abbr(const wcstring &cmd) { completion_list_t possible_comp; std::unordered_map descs; { - auto abbrs = abbrs_get_set(); - for (const auto &abbr : abbrs->list()) { - if (!abbr.is_regex()) { - possible_comp.emplace_back(abbr.key); - descs[abbr.key] = abbr.replacement; + auto abbrs = abbrs_list(); + for (const auto &abbr : abbrs) { + if (!abbr.is_regex) { + possible_comp.emplace_back(*abbr.key); + descs[*abbr.key] = *abbr.replacement; } } } diff --git a/src/env.cpp b/src/env.cpp index 38d9fc5f6..4eb17ac68 100644 --- a/src/env.cpp +++ b/src/env.cpp @@ -455,7 +455,22 @@ void env_init(const struct config_paths_t *paths, bool do_uvars, bool default_pa // Import any abbreviations from uvars. // Note we do not dynamically react to changes. - abbrs_get_set()->import_from_uvars(table); + const wchar_t *const prefix = L"_fish_abbr_"; + size_t prefix_len = wcslen(prefix); + const bool from_universal = true; + auto abbrs = abbrs_get_set(); + for (const auto &kv : table) { + if (string_prefixes_string(prefix, kv.first)) { + wcstring escaped_name = kv.first.substr(prefix_len); + wcstring name; + if (unescape_string(escaped_name, &name, unescape_flags_t{}, STRING_STYLE_VAR)) { + wcstring key = name; + wcstring replacement = join_strings(kv.second.as_list(), L' '); + abbrs->add(std::move(name), std::move(key), std::move(replacement), + abbrs_position_t::command, from_universal); + } + } + } } } diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index deb7a6275..e9a03c88a 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -2403,15 +2403,11 @@ static void test_ifind_fuzzy() { static void test_abbreviations() { say(L"Testing abbreviations"); { - auto literal_abbr = [](const wchar_t *name, const wchar_t *repl, - abbrs_position_t pos = abbrs_position_t::command) { - return abbreviation_t(name, name /* key */, repl, pos); - }; auto abbrs = abbrs_get_set(); - abbrs->add(literal_abbr(L"gc", L"git checkout")); - abbrs->add(literal_abbr(L"foo", L"bar")); - abbrs->add(literal_abbr(L"gx", L"git checkout")); - abbrs->add(literal_abbr(L"yin", L"yang", abbrs_position_t::anywhere)); + abbrs->add(L"gc", L"gc", L"git checkout", abbrs_position_t::command, false); + abbrs->add(L"foo", L"foo", L"bar", abbrs_position_t::command, false); + abbrs->add(L"gx", L"gx", L"git checkout", abbrs_position_t::command, false); + abbrs->add(L"yin", L"yin", L"yang", abbrs_position_t::anywhere, false); } // Helper to expand an abbreviation, enforcing we have no more than one result. @@ -2423,7 +2419,7 @@ static void test_abbreviations() { if (result.empty()) { return none(); } - return result.front().replacement; + return *result.front().replacement; }; auto cmd = abbrs_position_t::command; @@ -2445,7 +2441,7 @@ static void test_abbreviations() { cmdline, cursor_pos.value_or(cmdline.size()), parser_t::principal_parser())) { wcstring cmdline_expanded = cmdline; std::vector colors{cmdline_expanded.size()}; - apply_edit(&cmdline_expanded, &colors, edit_t{replacement->range, replacement->text}); + apply_edit(&cmdline_expanded, &colors, edit_t{replacement->range, *replacement->text}); return cmdline_expanded; } return none_t(); @@ -2499,19 +2495,6 @@ static void test_abbreviations() { err(L"command yin incorrectly expanded on line %ld to '%ls'", (long)__LINE__, result->c_str()); } - - // Renaming works. - { - auto abbrs = abbrs_get_set(); - do_test(!abbrs->has_name(L"gcc")); - do_test(abbrs->has_name(L"gc")); - abbrs->rename(L"gc", L"gcc"); - do_test(abbrs->has_name(L"gcc")); - do_test(!abbrs->has_name(L"gc")); - do_test(!abbrs->erase(L"gc")); - do_test(abbrs->erase(L"gcc")); - do_test(!abbrs->erase(L"gcc")); - } } /// Test path functions. @@ -3486,7 +3469,8 @@ static void test_complete() { // Test abbreviations. function_add(L"testabbrsonetwothreefour", func_props); - abbrs_get_set()->add(abbreviation_t(L"somename", L"testabbrsonetwothreezero", L"expansion")); + abbrs_get_set()->add(L"somename", L"testabbrsonetwothreezero", L"expansion", + abbrs_position_t::command, false); completions = complete(L"testabbrsonetwothree", {}, parser->context()); do_test(completions.size() == 2); do_test(completions.at(0).completion == L"four"); diff --git a/src/highlight.cpp b/src/highlight.cpp index bfa053d62..89217f7a2 100644 --- a/src/highlight.cpp +++ b/src/highlight.cpp @@ -1333,8 +1333,7 @@ static bool command_is_valid(const wcstring &cmd, statement_decoration_t decorat if (!is_valid && function_ok) is_valid = function_exists_no_autoload(cmd); // Abbreviations - if (!is_valid && abbreviation_ok) - is_valid = abbrs_get_set()->has_match(cmd, abbrs_position_t::command); + if (!is_valid && abbreviation_ok) is_valid = abbrs_has_match(cmd, abbrs_position_t::command); // Regular commands if (!is_valid && command_ok) is_valid = path_get_path(cmd, vars).has_value(); diff --git a/src/parse_constants.h b/src/parse_constants.h index a7c3e75e6..a6e12fc5e 100644 --- a/src/parse_constants.h +++ b/src/parse_constants.h @@ -31,10 +31,11 @@ using parse_error_list_t = ParseErrorList; #include "config.h" -struct source_range_t { +struct SourceRange { source_offset_t start; source_offset_t length; }; +using source_range_t = SourceRange; enum class parse_token_type_t : uint8_t { invalid = 1, diff --git a/src/parser.h b/src/parser.h index 97466a136..4ca7a0480 100644 --- a/src/parser.h +++ b/src/parser.h @@ -395,6 +395,8 @@ class parser_t : public std::enable_shared_from_this { env_stack_t &vars() { return *variables; } const env_stack_t &vars() const { return *variables; } + int remove_var_ffi(const wcstring &key, int mode) { return vars().remove(key, mode); } + /// Get the library data. library_data_t &libdata() { return library_data; } const library_data_t &libdata() const { return library_data; } diff --git a/src/reader.cpp b/src/reader.cpp index 1d2a14bfc..6c4b40d5a 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -1376,16 +1376,17 @@ void reader_data_t::pager_selection_changed() { /// Expand an abbreviation replacer, which may mean running its function. /// \return the replacement, or none to skip it. This may run fish script! -maybe_t expand_replacer(source_range_t range, const wcstring &token, +maybe_t expand_replacer(SourceRange range, const wcstring &token, const abbrs_replacer_t &repl, parser_t &parser) { if (!repl.is_function) { // Literal replacement cannot fail. FLOGF(abbrs, L"Expanded literal abbreviation <%ls> -> <%ls>", token.c_str(), - repl.replacement.c_str()); - return abbrs_replacement_t::from(range, repl.replacement, repl); + (*repl.replacement).c_str()); + return abbrs_replacement_from(range, *repl.replacement, *repl.set_cursor_marker, + repl.has_cursor_marker); } - wcstring cmd = escape_string(repl.replacement); + wcstring cmd = escape_string(*repl.replacement); cmd.push_back(L' '); cmd.append(escape_string(token)); @@ -1398,7 +1399,7 @@ maybe_t expand_replacer(source_range_t range, const wcstrin } wcstring result = join_strings(outputs, L'\n'); FLOGF(abbrs, L"Expanded function abbreviation <%ls> -> <%ls>", token.c_str(), result.c_str()); - return abbrs_replacement_t::from(range, std::move(result), repl); + return abbrs_replacement_from(range, result, *repl.set_cursor_marker, repl.has_cursor_marker); } // Extract all the token ranges in \p str, along with whether they are an undecorated command. @@ -1501,8 +1502,12 @@ bool reader_data_t::expand_abbreviation_at_cursor(size_t cursor_backtrack) { size_t cursor_pos = el->position() - std::min(el->position(), cursor_backtrack); if (auto replacement = reader_expand_abbreviation_at_cursor(el->text(), cursor_pos, this->parser())) { - push_edit(el, edit_t{replacement->range, std::move(replacement->text)}); - update_buff_pos(el, replacement->cursor); + push_edit(el, edit_t{replacement->range, *replacement->text}); + if (replacement->has_cursor) { + update_buff_pos(el, replacement->cursor); + } else { + update_buff_pos(el, none()); + } result = true; } }