Merge pull request #9916 from henrikhorluck/riir/wildcard

Port wildcard to Rust
2025-01-16 06:54:03 +00:00 · 2023-09-15 15:02:55 +02:00 · 2023-09-15 15:02:55 +02:00 · 280e72c152
commit 280e72c152
parent e1f5751ba0 0cc1aef725
12 changed files with 1495 additions and 141 deletions
--- a/fish-rust/build.rs
+++ b/fish-rust/build.rs
@ -87,6 +87,7 @@ fn main() {
        "fish-rust/src/trace.rs",
        "fish-rust/src/util.rs",
        "fish-rust/src/wait_handle.rs",
+        "fish-rust/src/wildcard.rs",
    ];
    cxx_build::bridges(&source_files)
        .flag_if_supported("-std=c++11")
--- a/fish-rust/src/builtins/string/match.rs
+++ b/fish-rust/src/builtins/string/match.rs
@ -6,8 +6,7 @@ use super::*;
 use crate::env::{EnvMode, EnvVar, EnvVarFlags};
 use crate::flog::FLOG;
 use crate::parse_util::parse_util_unescape_wildcards;
-use crate::wchar_ffi::WCharToFFI;
-use crate::wildcard::ANY_STRING;
+use crate::wildcard::{wildcard_match, ANY_STRING};

 #[derive(Default)]
 pub struct Match<'args> {
@ -380,13 +379,11 @@ impl<'opts, 'args> WildCardMatcher<'opts, 'args> {
    fn report_matches(&mut self, arg: &wstr, streams: &mut io_streams_t) {
        // Note: --all is a no-op for glob matching since the pattern is always matched
        // against the entire argument.
-        use crate::ffi::wildcard_match;
-
        let subject = match self.opts.ignore_case {
            true => arg.to_lowercase(),
            false => arg.to_owned(),
        };
-        let m = wildcard_match(&subject.to_ffi(), &self.pattern.to_ffi(), false);
+        let m = wildcard_match(subject, &self.pattern, false);

        if m ^ self.opts.invert_match {
            self.total_matched += 1;
--- a/fish-rust/src/common.rs
+++ b/fish-rust/src/common.rs
@ -1258,7 +1258,7 @@ pub fn assert_is_background_thread() {

 /// Format the specified size (in bytes, kilobytes, etc.) into the specified stringbuffer.
 #[widestrs]
-fn format_size(mut sz: i64) -> WString {
+pub fn format_size(mut sz: i64) -> WString {
    let mut result = WString::new();
    const sz_names: [&wstr; 8] = ["kB"L, "MB"L, "GB"L, "TB"L, "PB"L, "EB"L, "ZB"L, "YB"L];
    if sz < 0 {
--- a/fish-rust/src/complete.rs
+++ b/fish-rust/src/complete.rs
@ -0,0 +1,183 @@
+/// Prototypes for functions related to tab-completion.
+///
+/// These functions are used for storing and retrieving tab-completion data, as well as for
+/// performing tab-completion.
+use crate::wchar::prelude::*;
+use crate::wcstringutil::StringFuzzyMatch;
+use bitflags::bitflags;
+
+#[derive(Default, Debug)]
+pub struct CompletionMode {
+    /// If set, skip file completions.
+    pub no_files: bool,
+    pub force_files: bool,
+    /// If set, require a parameter after completion.
+    pub requires_param: bool,
+}
+
+/// Character that separates the completion and description on programmable completions.
+pub const PROG_COMPLETE_SEP: char = '\t';
+
+bitflags! {
+    #[derive(Default)]
+    pub struct CompleteFlags: u8 {
+    /// Do not insert space afterwards if this is the only completion. (The default is to try insert
+    /// a space).
+    const NO_SPACE = 1 << 0;
+    /// This is not the suffix of a token, but replaces it entirely.
+    const REPLACES_TOKEN = 1 << 1;
+    /// This completion may or may not want a space at the end - guess by checking the last
+    /// character of the completion.
+    const AUTO_SPACE = 1 << 2;
+    /// This completion should be inserted as-is, without escaping.
+    const DONT_ESCAPE = 1 << 3;
+    /// If you do escape, don't escape tildes.
+    const DONT_ESCAPE_TILDES = 1 << 4;
+    /// Do not sort supplied completions
+    const DONT_SORT = 1 << 5;
+    /// This completion looks to have the same string as an existing argument.
+    const DUPLICATES_ARGUMENT = 1 << 6;
+    /// This completes not just a token but replaces the entire commandline.
+    const REPLACES_COMMANDLINE = 1 << 7;
+    }
+}
+
+#[derive(Debug)]
+pub struct Completion {
+    pub completion: WString,
+    pub description: WString,
+    pub r#match: StringFuzzyMatch,
+    pub flags: CompleteFlags,
+}
+
+impl Default for Completion {
+    fn default() -> Self {
+        Self {
+            completion: Default::default(),
+            description: Default::default(),
+            r#match: StringFuzzyMatch::exact_match(),
+            flags: Default::default(),
+        }
+    }
+}
+
+impl From<WString> for Completion {
+    fn from(completion: WString) -> Completion {
+        Completion {
+            completion,
+            ..Default::default()
+        }
+    }
+}
+
+impl Completion {
+    /// \return whether this replaces its token.
+    pub fn replaces_token(&self) -> bool {
+        self.flags.contains(CompleteFlags::REPLACES_TOKEN)
+    }
+    /// \return whether this replaces the entire commandline.
+    pub fn replaces_commandline(&self) -> bool {
+        self.flags.contains(CompleteFlags::REPLACES_COMMANDLINE)
+    }
+
+    /// \return the completion's match rank. Lower ranks are better completions.
+    pub fn rank(&self) -> u32 {
+        self.r#match.rank()
+    }
+
+    /// If this completion replaces the entire token, prepend a prefix. Otherwise do nothing.
+    pub fn prepend_token_prefix(&mut self, prefix: impl AsRef<wstr>) {
+        if self.flags.contains(CompleteFlags::REPLACES_TOKEN) {
+            self.completion.insert_utfstr(0, prefix.as_ref());
+        }
+    }
+}
+
+/// A completion receiver accepts completions. It is essentially a wrapper around Vec with
+/// some conveniences.
+#[derive(Default, Debug)]
+pub struct CompletionReceiver {
+    /// Our list of completions.
+    completions: Vec<Completion>,
+    /// The maximum number of completions to add. If our list length exceeds this, then new
+    /// completions are not added. Note 0 has no special significance here - use
+    /// usize::MAX instead.
+    limit: usize,
+}
+
+// We are only wrapping a `Vec<Completion>`, any non-mutable methods can be safely deferred to the
+// Vec-impl
+impl std::ops::Deref for CompletionReceiver {
+    type Target = [Completion];
+
+    fn deref(&self) -> &Self::Target {
+        self.completions.as_slice()
+    }
+}
+
+impl std::ops::DerefMut for CompletionReceiver {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        self.completions.as_mut_slice()
+    }
+}
+
+impl CompletionReceiver {
+    pub fn new(limit: usize) -> Self {
+        Self {
+            limit,
+            ..Default::default()
+        }
+    }
+
+    /// Add a completion.
+    /// \return true on success, false if this would overflow the limit.
+    #[must_use]
+    pub fn add(&mut self, comp: impl Into<Completion>) -> bool {
+        if self.completions.len() >= self.limit {
+            return false;
+        }
+        self.completions.push(comp.into());
+        return true;
+    }
+
+    /// Add a list of completions.
+    /// \return true on success, false if this would overflow the limit.
+    #[must_use]
+    pub fn extend(
+        &mut self,
+        iter: impl IntoIterator<Item = Completion, IntoIter = impl ExactSizeIterator<Item = Completion>>,
+    ) -> bool {
+        let iter = iter.into_iter();
+        if iter.len() > self.limit - self.completions.len() {
+            return false;
+        }
+        self.completions.extend(iter);
+        // this only fails if the ExactSizeIterator impl is bogus
+        assert!(
+            self.completions.len() <= self.limit,
+            "ExactSizeIterator returned more items than it should"
+        );
+        true
+    }
+
+    /// Clear the list of completions. This retains the storage inside completions_ which can be
+    /// useful to prevent allocations.
+    pub fn clear(&mut self) {
+        self.completions.clear();
+    }
+
+    /// \return the list of completions, clearing it.
+    pub fn take(&mut self) -> Vec<Completion> {
+        std::mem::take(&mut self.completions)
+    }
+
+    /// \return a new, empty receiver whose limit is our remaining capacity.
+    /// This is useful for e.g. recursive calls when you want to act on the result before adding it.
+    pub fn subreceiver(&self) -> Self {
+        let remaining_capacity = self
+            .limit
+            .checked_sub(self.completions.len())
+            .expect("length should never be larger than limit");
+        Self::new(remaining_capacity)
+    }
+}
--- a/fish-rust/src/ffi.rs
+++ b/fish-rust/src/ffi.rs
@ -45,7 +45,6 @@ include_cpp! {
    #include "reader.h"
    #include "screen.h"
    #include "tokenizer.h"
-    #include "wildcard.h"
    #include "wutil.h"

    // We need to block these types so when exposing C++ to Rust.
@ -92,7 +91,6 @@ include_cpp! {

    generate!("log_extra_to_flog_file")

-    generate!("wildcard_match")
    generate!("wgettext_ptr")

    generate!("block_t")
--- a/fish-rust/src/flog.rs
+++ b/fish-rust/src/flog.rs
@ -1,7 +1,6 @@
-use crate::ffi::wildcard_match;
 use crate::parse_util::parse_util_unescape_wildcards;
 use crate::wchar::prelude::*;
-use crate::wchar_ffi::WCharToFFI;
+use crate::wildcard::wildcard_match;
 use libc::c_int;
 use std::io::Write;
 use std::os::unix::prelude::*;
@ -212,7 +211,7 @@ fn apply_one_wildcard(wc_esc: &wstr, sense: bool) {
    let wc = parse_util_unescape_wildcards(wc_esc);
    let mut match_found = false;
    for cat in categories::all_categories() {
-        if wildcard_match(&cat.name.to_ffi(), &wc.to_ffi(), false) {
+        if wildcard_match(cat.name, &wc, false) {
            cat.enabled.store(sense, Ordering::Relaxed);
            match_found = true;
        }
--- a/fish-rust/src/lib.rs
+++ b/fish-rust/src/lib.rs
@ -23,6 +23,7 @@ mod ast;
 mod builtins;
 mod color;
 mod compat;
+mod complete;
 mod curses;
 mod env;
 mod env_dispatch;
--- a/fish-rust/src/wcstringutil.rs
+++ b/fish-rust/src/wcstringutil.rs
@ -98,10 +98,10 @@ pub enum CaseFold {
 }

 /// A lightweight value-type describing how closely a string fuzzy-matches another string.
-#[derive(Debug, Eq, PartialEq)]
+#[derive(Debug, Eq, PartialEq, Clone)]
 pub struct StringFuzzyMatch {
-    typ: ContainType,
-    case_fold: CaseFold,
+    pub typ: ContainType,
+    pub case_fold: CaseFold,
 }

 impl StringFuzzyMatch {
--- a/fish-rust/src/wildcard.rs
+++ b/fish-rust/src/wildcard.rs
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@ -2444,28 +2444,6 @@ static void test_autoload() {
    autoload_tester_t::run_test();
 }

-static void test_wildcards() {
-    say(L"Testing wildcards");
-    do_test(!wildcard_has(L""));
-    do_test(wildcard_has(L"*"));
-    do_test(!wildcard_has(L"\\*"));
-    do_test(!wildcard_has(L"\"*\""));
-
-    wcstring wc = L"foo*bar";
-    do_test(wildcard_has(wc) && !wildcard_has_internal(wc));
-    unescape_string_in_place(&wc, UNESCAPE_SPECIAL);
-    do_test(!wildcard_has(wc) && wildcard_has_internal(wc));
-
-    auto saved = feature_test(feature_flag_t::qmark_noglob);
-    feature_set(feature_flag_t::qmark_noglob, false);
-    do_test(wildcard_has(L"?"));
-    do_test(!wildcard_has(L"\\?"));
-    feature_set(feature_flag_t::qmark_noglob, true);
-    do_test(!wildcard_has(L"?"));
-    do_test(!wildcard_has(L"\\?"));
-    feature_set(feature_flag_t::qmark_noglob, saved);
-}
-
 static void test_complete() {
    say(L"Testing complete");

@ -5595,7 +5573,6 @@ static const test_t s_tests[]{
    {TEST_GROUP("word_motion"), test_word_motion},
    {TEST_GROUP("is_potential_path"), test_is_potential_path},
    {TEST_GROUP("colors"), test_colors},
-    {TEST_GROUP("wildcard"), test_wildcards},
    {TEST_GROUP("complete"), test_complete},
    {TEST_GROUP("autoload"), test_autoload},
    {TEST_GROUP("input"), test_input},
--- a/src/wildcard.cpp
+++ b/src/wildcard.cpp
@ -39,101 +39,6 @@ static size_t wildcard_find(const wchar_t *wc) {
    return wcstring::npos;
 }

-bool wildcard_has_internal(const wchar_t *s, size_t len) {
-    for (size_t i = 0; i < len; i++) {
-        wchar_t c = s[i];
-        if (c == ANY_CHAR || c == ANY_STRING || c == ANY_STRING_RECURSIVE) {
-            return true;
-        }
-    }
-    return false;
-}
-
-// Note we want to handle embedded nulls (issue #1631).
-bool wildcard_has(const wchar_t *str, size_t len) {
-    assert(str != nullptr);
-    const wchar_t *end = str + len;
-    bool qmark_is_wild = !feature_test(feature_flag_t::qmark_noglob);
-    // Fast check for * or ?; if none there is no wildcard.
-    // Note some strings contain * but no wildcards, e.g. if they are quoted.
-    if (std::find(str, end, L'*') == end && (!qmark_is_wild || std::find(str, end, L'?') == end)) {
-        return false;
-    }
-    wcstring unescaped;
-    if (auto tmp = unescape_string(wcstring{str, len}, UNESCAPE_SPECIAL)) {
-        unescaped = *tmp;
-    }
-    return wildcard_has_internal(unescaped);
-}
-
-/// Check whether the string str matches the wildcard string wc.
-///
-/// \param str String to be matched.
-/// \param wc The wildcard.
-/// \param leading_dots_fail_to_match Whether files beginning with dots should not be matched
-/// against wildcards.
-bool wildcard_match(const wcstring &str, const wcstring &wc, bool leading_dots_fail_to_match) {
-    // Hackish fix for issue #270. Prevent wildcards from matching . or .., but we must still allow
-    // literal matches.
-    if (leading_dots_fail_to_match && (str == L"." || str == L"..")) {
-        // The string is '.' or '..' so the only possible match is an exact match.
-        return str == wc;
-    }
-
-    // Near Linear implementation as proposed here https://research.swtch.com/glob.
-    const wchar_t *const str_start = str.c_str();
-    const wchar_t *wc_x = wc.c_str();
-    const wchar_t *str_x = str_start;
-    const wchar_t *restart_wc_x = wc.c_str();
-    const wchar_t *restart_str_x = str_start;
-
-    bool restart_is_out_of_str = false;
-    for (; *wc_x != 0 || *str_x != 0;) {
-        bool is_first = (str_x == str_start);
-        if (*wc_x != 0) {
-            if (*wc_x == ANY_STRING || *wc_x == ANY_STRING_RECURSIVE) {
-                // Ignore hidden file
-                if (leading_dots_fail_to_match && is_first && str[0] == L'.') {
-                    return false;
-                }
-
-                // Common case of * at the end. In that case we can early out since we know it will
-                // match.
-                if (wc_x[1] == L'\0') {
-                    return true;
-                }
-                // Try to match at str_x.
-                // If that doesn't work out, restart at str_x+1 next.
-                restart_wc_x = wc_x;
-                restart_str_x = str_x + 1;
-                restart_is_out_of_str = (*str_x == 0);
-                wc_x++;
-                continue;
-            } else if (*wc_x == ANY_CHAR && *str_x != 0) {
-                if (is_first && *str_x == L'.') {
-                    return false;
-                }
-                wc_x++;
-                str_x++;
-                continue;
-            } else if (*str_x != 0 && *str_x == *wc_x) {  // ordinary character
-                wc_x++;
-                str_x++;
-                continue;
-            }
-        }
-        // Mismatch. Maybe restart.
-        if (restart_str_x != str.c_str() && !restart_is_out_of_str) {
-            wc_x = restart_wc_x;
-            str_x = restart_str_x;
-            continue;
-        }
-        return false;
-    }
-    // Matched all of pattern to all of name. Success.
-    return true;
-}
-
 // This does something horrible refactored from an even more horrible function.
 static wcstring resolve_description(const wcstring &full_completion, wcstring *completion,
                                    expand_flags_t expand_flags,
--- a/src/wildcard.h
+++ b/src/wildcard.h
@ -75,6 +75,11 @@ wildcard_result_t wildcard_expand_string(const wcstring &wc, const wcstring &wor
                                         const cancel_checker_t &cancel_checker,
                                         completion_receiver_t *output);

+#if INCLUDE_RUST_HEADERS
+
+#include "wildcard.rs.h"
+
+#else
 /// Test whether the given wildcard matches the string. Does not perform any I/O.
 ///
 /// \param str The string to test
@ -83,18 +88,24 @@ wildcard_result_t wildcard_expand_string(const wcstring &wc, const wcstring &wor
 /// files and are not matched
 ///
 /// \return true if the wildcard matched
-bool wildcard_match(const wcstring &str, const wcstring &wc,
-                    bool leading_dots_fail_to_match = false);
+bool wildcard_match_ffi(const wcstring &str, const wcstring &wc, bool leading_dots_fail_to_match);

 // Check if the string has any unescaped wildcards (e.g. ANY_STRING).
-bool wildcard_has_internal(const wchar_t *s, size_t len);
-inline bool wildcard_has_internal(const wcstring &s) {
-    return wildcard_has_internal(s.c_str(), s.size());
-}
+bool wildcard_has_internal(const wcstring &s);

 /// Check if the specified string contains wildcards (e.g. *).
-bool wildcard_has(const wchar_t *s, size_t len);
-inline bool wildcard_has(const wcstring &s) { return wildcard_has(s.c_str(), s.size()); }
+bool wildcard_has(const wcstring &s);
+
+#endif
+
+inline bool wildcard_match(const wcstring &str, const wcstring &wc,
+                    bool leading_dots_fail_to_match = false) {
+                        return wildcard_match_ffi(str, wc, leading_dots_fail_to_match);
+                    }
+
+inline bool wildcard_has(const wchar_t *s, size_t len) {
+    return wildcard_has(wcstring(s, len));
+};

 /// Test wildcard completion.
 wildcard_result_t wildcard_complete(const wcstring &str, const wchar_t *wc,