fish-shell/fish-rust/src/wildcard.rs

1296 lines
50 KiB
Rust
Raw Normal View History

// Enumeration of all wildcard types.
use std::cmp::Ordering;
use std::collections::HashSet;
use std::io::ErrorKind;
use std::os::unix::prelude::*;
use std::{fs, io};
use cxx::CxxWString;
use libc::{mode_t, ELOOP, S_IXGRP, S_IXOTH, S_IXUSR, X_OK};
use crate::common::{
2023-08-27 17:35:31 +00:00
char_offset, format_size, is_windows_subsystem_for_linux, unescape_string, UnescapeFlags,
UnescapeStringStyle, WILDCARD_RESERVED_BASE,
};
use crate::complete::{CompleteFlags, Completion, CompletionReceiver, PROG_COMPLETE_SEP};
use crate::expand::ExpandFlags;
use crate::fallback::wcscasecmp;
use crate::future_feature_flags::feature_test;
use crate::future_feature_flags::FeatureFlag;
use crate::wchar::prelude::*;
use crate::wchar_ffi::WCharFromFFI;
use crate::wcstringutil::{
string_fuzzy_match_string, string_suffixes_string_case_insensitive, CaseFold,
};
use crate::wutil::{lwstat, waccess, wstat};
use once_cell::sync::Lazy;
static COMPLETE_EXEC_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("command"));
static COMPLETE_EXEC_LINK_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("command link"));
static COMPLETE_CHAR_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("char device"));
static COMPLETE_BLOCK_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("block device"));
static COMPLETE_FIFO_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("fifo"));
static COMPLETE_FILE_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("file"));
static COMPLETE_SYMLINK_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("symlink"));
static COMPLETE_DIRECTORY_SYMLINK_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("dir symlink"));
static COMPLETE_BROKEN_SYMLINK_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("broken symlink"));
static COMPLETE_LOOP_SYMLINK_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("symlink loop"));
static COMPLETE_SOCKET_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("socket"));
static COMPLETE_DIRECTORY_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("directory"));
/// Character representing any character except '/' (slash).
pub const ANY_CHAR: char = char_offset(WILDCARD_RESERVED_BASE, 0);
/// Character representing any character string not containing '/' (slash).
pub const ANY_STRING: char = char_offset(WILDCARD_RESERVED_BASE, 1);
/// Character representing any character string.
pub const ANY_STRING_RECURSIVE: char = char_offset(WILDCARD_RESERVED_BASE, 2);
/// This is a special pseudo-char that is not used other than to mark the
/// end of the the special characters so we can sanity check the enum range.
pub const ANY_SENTINEL: char = char_offset(WILDCARD_RESERVED_BASE, 3);
#[derive(PartialEq)]
pub enum WildcardResult {
/// The wildcard did not match.
NoMatch,
/// The wildcard did match.
Match,
/// Expansion was cancelled (e.g. control-C).
Cancel,
/// Expansion produced too many results.
Overflow,
}
fn resolve_description<'f>(
full_completion: &wstr,
completion: &mut &wstr,
expand_flags: ExpandFlags,
description_func: Option<&'f dyn Fn(&wstr) -> WString>,
) -> WString {
if let Some(complete_sep_loc) = completion.find_char(PROG_COMPLETE_SEP) {
// This completion has an embedded description, do not use the generic description.
2023-09-15 12:38:49 +00:00
let description = completion[complete_sep_loc + 1..].to_owned();
*completion = &completion[..complete_sep_loc];
return description;
}
if let Some(f) = description_func {
if expand_flags.contains(ExpandFlags::GEN_DESCRIPTIONS) {
return f(full_completion);
}
}
WString::new()
}
// A transient parameter pack needed by wildcard_complete.
struct WcCompletePack<'orig, 'f> {
pub orig: &'orig wstr,
pub desc_func: Option<&'f dyn Fn(&wstr) -> WString>,
pub expand_flags: ExpandFlags,
}
// Weirdly specific and non-reusable helper function that makes its one call site much clearer.
fn has_prefix_match(comps: &CompletionReceiver, first: usize) -> bool {
comps[first..]
.iter()
.any(|c| c.r#match.is_exact_or_prefix() && c.r#match.case_fold == CaseFold::samecase)
}
/// Matches the string against the wildcard, and if the wildcard is a possible completion of the
/// string, the remainder of the string is inserted into the out vector.
///
/// We ignore ANY_STRING_RECURSIVE here. The consequence is that you cannot tab complete **
/// wildcards. This is historic behavior.
/// is_first_call is default false.
#[allow(clippy::unnecessary_unwrap)]
fn wildcard_complete_internal(
s: &wstr,
wc: &wstr,
params: &WcCompletePack,
flags: CompleteFlags,
// it is easier to recurse with this over taking it by value
mut out: Option<&mut CompletionReceiver>,
is_first_call: bool,
) -> WildcardResult {
// Maybe early out for hidden files. We require that the wildcard match these exactly (i.e. a
// dot); ANY_STRING not allowed.
if is_first_call
&& !params
.expand_flags
.contains(ExpandFlags::ALLOW_NONLITERAL_LEADING_DOT)
&& s.char_at(0) == '.'
&& wc.char_at(0) != '.'
{
return WildcardResult::NoMatch;
}
// Locate the next wildcard character position, e.g. ANY_CHAR or ANY_STRING.
let next_wc_char_pos = wc
.chars()
.position(|c| matches!(c, ANY_CHAR | ANY_STRING | ANY_STRING_RECURSIVE));
// Maybe we have no more wildcards at all. This includes the empty string.
if next_wc_char_pos.is_none() {
// Try matching
let Some(m) = string_fuzzy_match_string(wc, s, false) else {
return WildcardResult::NoMatch;
};
// If we're not allowing fuzzy match, then we require a prefix match.
let needs_prefix_match = !params.expand_flags.contains(ExpandFlags::FUZZY_MATCH);
2023-09-15 12:38:49 +00:00
if needs_prefix_match && !m.is_exact_or_prefix() {
return WildcardResult::NoMatch;
}
// The match was successful. If the string is not requested we're done.
let Some(out) = out else {
return WildcardResult::Match;
};
// Wildcard complete.
let full_replacement =
m.requires_full_replacement() || flags.contains(CompleteFlags::REPLACES_TOKEN);
// If we are not replacing the token, be careful to only store the part of the string after
// the wildcard.
assert!(!full_replacement || wc.len() <= s.len());
let mut out_completion = match full_replacement {
true => params.orig,
false => s.slice_from(wc.len()),
};
let out_desc = resolve_description(
params.orig,
&mut out_completion,
params.expand_flags,
params.desc_func,
);
// Note: out_completion may be empty if the completion really is empty, e.g. tab-completing
// 'foo' when a file 'foo' exists.
let local_flags = if full_replacement {
flags | CompleteFlags::REPLACES_TOKEN
} else {
flags
};
if !out.add(Completion {
completion: out_completion.to_owned(),
description: out_desc,
flags: local_flags,
r#match: m,
}) {
return WildcardResult::Overflow;
}
return WildcardResult::Match;
} else if let Some(next_wc_char_pos @ 1..) = next_wc_char_pos {
// The literal portion of a wildcard cannot be longer than the string itself,
// e.g. `abc*` can never match a string that is only two characters long.
if next_wc_char_pos >= s.len() {
return WildcardResult::NoMatch;
}
let (s_pre, s_suf) = s.split_at(next_wc_char_pos);
let (wc_pre, wc_suf) = wc.split_at(next_wc_char_pos);
// Here we have a non-wildcard prefix. Note that we don't do fuzzy matching for stuff before
// a wildcard, so just do case comparison and then recurse.
if s_pre == wc_pre {
// Normal match.
return wildcard_complete_internal(s_suf, wc_suf, params, flags, out, false);
}
if wcscasecmp(s_pre, wc_pre) == Ordering::Equal {
// Case insensitive match.
return wildcard_complete_internal(
s.slice_from(next_wc_char_pos),
wc.slice_from(next_wc_char_pos),
params,
flags | CompleteFlags::REPLACES_TOKEN,
out,
false,
);
}
return WildcardResult::NoMatch;
}
// Our first character is a wildcard.
assert_eq!(next_wc_char_pos, Some(0));
match wc.char_at(0) {
ANY_CHAR => {
if s.is_empty() {
return WildcardResult::NoMatch;
}
return wildcard_complete_internal(
s.slice_from(1),
wc.slice_from(1),
params,
flags,
out,
false,
);
}
ANY_STRING => {
// Hackish. If this is the last character of the wildcard, then just complete with
// the empty string. This fixes cases like "f*<tab>" -> "f*o".
if wc.len() == 1 {
return wildcard_complete_internal(L!(""), L!(""), params, flags, out, false);
}
// Try all submatches. Issue #929: if the recursive call gives us a prefix match,
// just stop. This is sloppy - what we really want to do is say, once we've seen a
// match of a particular type, ignore all matches of that type further down the
// string, such that the wildcard produces the "minimal match.".
let mut has_match = false;
for i in 0..s.len() {
let before_count = out.as_ref().map(|o| o.len()).unwrap_or_default();
let submatch_res = wildcard_complete_internal(
s.slice_from(i),
wc.slice_from(1),
params,
flags,
out.as_deref_mut(),
false,
);
match submatch_res {
WildcardResult::NoMatch => continue,
WildcardResult::Match => {
has_match = true;
// If out is NULL, we don't care about the actual matches. If out is not
// NULL but we have a prefix match, stop there.
let Some(out) = out.as_mut() else {
return WildcardResult::Match;
};
if has_prefix_match(out, before_count) {
return WildcardResult::Match;
}
continue;
}
// Note early return
WildcardResult::Cancel | WildcardResult::Overflow => return submatch_res,
}
}
return match has_match {
true => WildcardResult::Match,
false => WildcardResult::NoMatch,
};
}
// We don't even try with this one.
ANY_STRING_RECURSIVE => WildcardResult::NoMatch,
_ => unreachable!(),
}
}
pub fn wildcard_complete<'f>(
s: &wstr,
wc: &wstr,
desc_func: Option<&'f dyn Fn(&wstr) -> WString>,
out: Option<&mut CompletionReceiver>,
expand_flags: ExpandFlags,
flags: CompleteFlags,
) -> WildcardResult {
let params = WcCompletePack {
orig: s,
desc_func,
expand_flags,
};
return wildcard_complete_internal(s, wc, &params, flags, out, true);
}
/// Obtain a description string for the file specified by the filename.
///
/// The returned value is a string constant and should not be free'd.
///
/// \param filename The file for which to find a description string
/// \param lstat_res The result of calling lstat on the file
/// \param lbuf The struct buf output of calling lstat on the file
/// \param stat_res The result of calling stat on the file
/// \param buf The struct buf output of calling stat on the file
/// \param err The errno value after a failed stat call on the file.
fn file_get_desc(
filename: &wstr,
lstat: Option<fs::Metadata>,
stat: Option<io::Result<fs::Metadata>>,
2023-09-15 12:58:54 +00:00
definitely_executable: bool,
) -> &'static wstr {
let Some(lstat) = lstat else {
return *COMPLETE_FILE_DESC;
};
2023-09-15 12:58:54 +00:00
let is_executable = |buf: &fs::Metadata, filename: &wstr| -> bool {
// Weird group permissions and other such issues make it non-trivial to find out if
// we can actually execute a file using the result from stat. It is much safer to
// use the access function, since it tells us exactly what we want to know.
2023-09-15 12:58:54 +00:00
definitely_executable
|| (buf.mode() as mode_t & (S_IXUSR | S_IXGRP | S_IXOTH) != 0)
&& waccess(filename, X_OK) == 0
};
// stat was only queried if lstat succeeded
let stat = stat.unwrap();
if lstat.is_symlink() {
return match stat {
Ok(stat) if stat.is_dir() => *COMPLETE_DIRECTORY_SYMLINK_DESC,
Ok(stat) if is_executable(&stat, filename) => *COMPLETE_EXEC_LINK_DESC,
Ok(_) => *COMPLETE_SYMLINK_DESC,
Err(e) if e.kind() == ErrorKind::NotFound => *COMPLETE_BROKEN_SYMLINK_DESC,
Err(e) if e.raw_os_error().unwrap() == ELOOP => *COMPLETE_LOOP_SYMLINK_DESC,
_ => {
// On unknown errors we do nothing. The file will be given the default 'File'
// description or one based on the suffix.
*COMPLETE_FILE_DESC
}
};
}
let Ok(stat) = stat else {
// Assuming that the metadata was zero if stat-call failed
return *COMPLETE_FILE_DESC;
};
let ft = stat.file_type();
if ft.is_char_device() {
*COMPLETE_CHAR_DESC
} else if ft.is_block_device() {
*COMPLETE_BLOCK_DESC
} else if ft.is_fifo() {
*COMPLETE_FIFO_DESC
} else if ft.is_socket() {
*COMPLETE_SOCKET_DESC
} else if ft.is_dir() {
*COMPLETE_DIRECTORY_DESC
} else if is_executable(&stat, filename) {
*COMPLETE_EXEC_DESC
} else {
*COMPLETE_FILE_DESC
}
}
/// Test if the given file is an executable (if executables_only) or directory (if
/// directories_only). If it matches, call wildcard_complete() with some description that we make
/// up. Note that the filename came from a readdir() call, so we know it exists.
fn wildcard_test_flags_then_complete(
filepath: &wstr,
filename: &wstr,
wc: &wstr,
expand_flags: ExpandFlags,
out: &mut CompletionReceiver,
known_dir: bool,
) -> bool {
let executables_only = expand_flags.contains(ExpandFlags::EXECUTABLES_ONLY);
let need_directory = expand_flags.contains(ExpandFlags::DIRECTORIES_ONLY);
// Fast path: If we need directories, and we already know it is one,
// and we don't need to do anything else, just return it.
// This is a common case for cd completions, and removes the `stat` entirely in case the system
// supports it.
if known_dir && !executables_only && !expand_flags.contains(ExpandFlags::GEN_DESCRIPTIONS) {
return wildcard_complete(
&(filename.to_owned() + L!("/")),
wc,
Some(&|_| L!("").to_owned()),
Some(out),
expand_flags,
CompleteFlags::NO_SPACE,
) == WildcardResult::Match;
}
// Check if it will match before stat().
if wildcard_complete(
filename,
wc,
None,
None,
expand_flags,
CompleteFlags::default(),
) != WildcardResult::Match
{
return false;
}
let lstat: Option<fs::Metadata> = lwstat(filepath).ok();
let stat: Option<io::Result<fs::Metadata>>;
if let Some(md) = &lstat {
if md.is_symlink() {
// In order to differentiate between e.g. broken symlinks and symlink loops, we also
// need to know the error status of wstat.
stat = Some(wstat(filepath));
} else {
stat = Some(Ok(md.clone()));
}
} else {
stat = None;
}
let (file_size, is_directory, is_executable) = if let Some(Ok(md)) = &stat {
(md.len(), md.is_dir(), md.is_file())
} else {
(0, false, false)
};
if need_directory && !is_directory {
return false;
}
if executables_only && (!is_executable || waccess(filepath, X_OK) != 0) {
return false;
}
if executables_only
&& is_windows_subsystem_for_linux()
&& string_suffixes_string_case_insensitive(L!(".dll"), filename)
{
return false;
}
// Compute the description.
let desc = if expand_flags.contains(ExpandFlags::GEN_DESCRIPTIONS) {
2023-09-15 12:58:54 +00:00
let mut desc = file_get_desc(filename, lstat, stat, executables_only).to_owned();
if !is_directory && !is_executable {
if !desc.is_empty() {
desc.push_utfstr(L!(", "));
}
desc.push_utfstr(&format_size(file_size.try_into().unwrap()));
}
Some(desc)
} else {
None
};
// Append a / if this is a directory. Note this requirement may be the only reason we have to
// call stat() in some cases.
2023-09-15 12:38:49 +00:00
let desc_func = |_: &wstr| match desc.as_ref() {
Some(d) => d.to_owned(),
None => WString::new(),
};
2023-09-15 12:38:49 +00:00
let desc_func: Option<&dyn Fn(&wstr) -> WString> = Some(&desc_func);
if is_directory {
return wildcard_complete(
&(filename.to_owned() + L!("/")),
wc,
desc_func,
Some(out),
expand_flags,
CompleteFlags::NO_SPACE,
) == WildcardResult::Match;
}
wildcard_complete(
filename,
wc,
desc_func,
Some(out),
expand_flags,
CompleteFlags::empty(),
) == WildcardResult::Match
}
use expander::WildCardExpander;
mod expander {
use libc::F_OK;
use crate::{
common::scoped_push,
complete::CompleteFlags,
path::append_path_component,
wcstringutil::string_fuzzy_match_string,
wutil::{dir_iter::DirIter, normalize_path, waccess, FileId},
};
use super::*;
pub struct WildCardExpander<'e> {
/// A function to call to check cancellation.
2023-08-27 17:35:31 +00:00
cancel_checker: &'e mut dyn FnMut() -> bool,
/// The working directory to resolve paths against
working_directory: &'e wstr,
/// The set of items we have resolved, used to efficiently avoid duplication.
completion_set: HashSet<WString>,
/// The set of file IDs we have visited, used to avoid symlink loops.
visited_files: HashSet<FileId>,
/// Flags controlling expansion.
flags: ExpandFlags,
/// Resolved items get inserted into here. This is transient of course.
resolved_completions: &'e mut CompletionReceiver,
/// Whether we have been interrupted.
did_interrupt: bool,
/// Whether we have overflowed.
did_overflow: bool,
/// Whether we have successfully added any completions.
did_add: bool,
/// Whether some parent expansion is fuzzy, and therefore completions always prepend their prefix
/// This variable is a little suspicious - it should be passed along, not stored here
/// If we ever try to do parallel wildcard expansion we'll have to remove this
has_fuzzy_ancestor: bool,
}
impl<'e> WildCardExpander<'e> {
pub fn new(
working_directory: &'e wstr,
flags: ExpandFlags,
2023-08-27 17:35:31 +00:00
cancel_checker: &'e mut dyn FnMut() -> bool,
resolved_completions: &'e mut CompletionReceiver,
) -> Self {
Self {
cancel_checker,
working_directory,
completion_set: resolved_completions
.iter()
.map(|c| c.completion.to_owned())
.collect(),
visited_files: HashSet::new(),
flags,
resolved_completions,
did_add: false,
did_interrupt: false,
did_overflow: false,
has_fuzzy_ancestor: false,
}
}
/// The real implementation of wildcard expansion is in this function. Other functions are just
/// wrappers around this one.
///
/// This function traverses the relevant directory tree looking for matches, and recurses when
/// needed to handle wildcards spanning multiple components and recursive wildcards.
///
/// Args:
/// base_dir: the "working directory" against which the wildcard is to be resolved
/// wc: the wildcard string itself, e.g. foo*bar/baz (where * is actually ANY_CHAR)
/// effective_prefix: the string that should be prepended for completions that replace their token.
/// This is usually the same thing as the original wildcard, but for fuzzy matching, we
/// expand intermediate segments. effective_prefix is always either empty, or ends with a slash
pub fn expand(&mut self, base_dir: &wstr, wc: &wstr, effective_prefix: &wstr) {
if self.interrupted_or_overflowed() {
return;
}
// Get the current segment and compute interesting properties about it.
let (wc_segment, wc_remainder) = if let Some(next_slash) = wc.find_char('/') {
let (seg, rem) = wc.split_at(next_slash);
let rem_without_slash = rem.slice_from(1);
(seg, Some(rem_without_slash))
} else {
(wc, None)
};
let is_last_segment = wc_remainder.is_none();
let segment_has_wildcards = wildcard_has_internal(wc_segment);
if wc_segment.is_empty() {
assert!(!segment_has_wildcards);
if is_last_segment {
self.expand_trailing_slash(base_dir, effective_prefix);
} else {
let mut prefix = effective_prefix.to_owned();
prefix.push('/');
self.expand(base_dir, wc_remainder.unwrap(), &prefix);
}
} else if !segment_has_wildcards && !is_last_segment {
// Literal intermediate match. Note that we may not be able to actually read the directory
// (issue #2099).
let wc_remainder = wc_remainder.unwrap(); // TODO: if-let-chains
// Absolute path of the intermediate directory
let intermediate_dirpath: WString = base_dir.to_owned() + wc_segment + L!("/");
// This just trumps everything
let before = self.resolved_completions.len();
let prefix: WString = effective_prefix.to_owned() + wc_segment + L!("/");
self.expand(&intermediate_dirpath, wc_remainder, &prefix);
// Maybe try a fuzzy match (#94) if nothing was found with the literal match. Respect
// EXPAND_NO_DIRECTORY_ABBREVIATIONS (issue #2413).
// Don't do fuzzy matches if the literal segment was valid (#3211)
let allow_fuzzy = self.flags.contains(ExpandFlags::FUZZY_MATCH)
&& !self.flags.contains(ExpandFlags::NO_FUZZY_DIRECTORIES);
if allow_fuzzy
&& self.resolved_completions.len() == before
&& waccess(&intermediate_dirpath, F_OK) != 0
{
assert!(self.flags.contains(ExpandFlags::FOR_COMPLETIONS));
if let Ok(mut base_dir_iter) = self.open_dir(base_dir, false) {
self.expand_literal_intermediate_segment_with_fuzz(
base_dir,
&mut base_dir_iter,
wc_segment,
wc_remainder,
effective_prefix,
);
}
}
} else {
assert!(!wc_segment.is_empty() && (segment_has_wildcards || is_last_segment));
if !is_last_segment && matches!(wc_segment.as_char_slice(), [ANY_STRING_RECURSIVE])
{
// Hack for #7222. This is an intermediate wc segment that is exactly **. The
// tail matches in subdirectories as normal, but also the current directory.
// That is, '**/bar' may match 'bar' and 'foo/bar'.
// Implement this by matching the wildcard tail only, in this directory.
// Note if the segment is not exactly ANY_STRING_RECURSIVE then the segment may only
// match subdirectories.
self.expand(base_dir, wc_remainder.unwrap(), effective_prefix);
if self.interrupted_or_overflowed() {
return;
}
}
// return "." and ".." entries if we're doing completions
let Ok(mut dir) = self.open_dir(
base_dir, /* return . and .. */
self.flags.contains(ExpandFlags::FOR_COMPLETIONS),
) else {
return;
};
if let Some(wc_remainder) = wc_remainder {
// Not the last segment, nonempty wildcard.
self.expand_intermediate_segment(
base_dir,
&mut dir,
wc_segment,
wc_remainder,
&(effective_prefix.to_owned() + wc_segment + L!("/")),
);
} else {
// Last wildcard segment, nonempty wildcard.
self.expand_last_segment(base_dir, &mut dir, wc_segment, effective_prefix);
}
let Some(asr_idx) = wc_segment.find_char(ANY_STRING_RECURSIVE) else {
return;
};
// Apply the recursive **.
// Construct a "head + any" wildcard for matching stuff in this directory, and an
// "any + tail" wildcard for matching stuff in subdirectories. Note that the
// ANY_STRING_RECURSIVE character is present in both the head and the tail.
let head_any = wc_segment.slice_to(asr_idx + 1);
let any_tail = wc.slice_from(asr_idx);
assert!(head_any.chars().last().unwrap() == ANY_STRING_RECURSIVE);
assert!(any_tail.chars().next().unwrap() == ANY_STRING_RECURSIVE);
dir.rewind();
self.expand_intermediate_segment(
base_dir,
&mut dir,
head_any,
any_tail,
effective_prefix,
);
}
}
pub fn status_code(&self) -> WildcardResult {
if self.did_interrupt {
return WildcardResult::Cancel;
} else if self.did_overflow {
return WildcardResult::Overflow;
} else if self.did_add {
WildcardResult::Match
} else {
WildcardResult::NoMatch
}
}
}
impl<'e> WildCardExpander<'e> {
/// We are a trailing slash - expand at the end.
fn expand_trailing_slash(&mut self, base_dir: &wstr, prefix: &wstr) {
if self.interrupted_or_overflowed() {
return;
}
if !self.flags.contains(ExpandFlags::FOR_COMPLETIONS) {
// Trailing slash and not accepting incomplete, e.g. `echo /xyz/`. Insert this file, we already know it exists!
self.add_expansion_result(base_dir.to_owned());
return;
}
// Trailing slashes and accepting incomplete, e.g. `echo /xyz/<tab>`. Everything is added.
let Ok(mut dir) = self.open_dir(base_dir, false) else {
return;
};
// wreaddir_resolving without the out argument is just wreaddir.
// So we can use the information in case we need it.
let need_dir = self.flags.contains(ExpandFlags::DIRECTORIES_ONLY);
while let Some(Ok(entry)) = dir.next() {
if self.interrupted_or_overflowed() {
break;
}
// Note that is_dir() may cause a stat() call.
let known_dir = need_dir && entry.is_dir();
if need_dir && !known_dir {
continue;
};
if !entry.name.is_empty() && !entry.name.starts_with('.') {
self.try_add_completion_result(
&(base_dir.to_owned() + entry.name.as_utfstr()),
&entry.name,
L!(""),
prefix,
known_dir,
);
}
}
}
/// Given a directory base_dir, which is opened as base_dir_iter, expand an intermediate segment
/// of the wildcard. Treat ANY_STRING_RECURSIVE as ANY_STRING. wc_segment is the wildcard
/// segment for this directory, wc_remainder is the wildcard for subdirectories,
/// prefix is the prefix for completions.
fn expand_intermediate_segment(
&mut self,
base_dir: &wstr,
base_dir_iter: &mut DirIter,
wc_segment: &wstr,
wc_remainder: &wstr,
prefix: &wstr,
) {
while !self.interrupted_or_overflowed() {
let Some(Ok(entry)) = base_dir_iter.next() else {
break;
};
// Note that it's critical we ignore leading dots here, else we may descend into . and ..
if !wildcard_match(&entry.name, wc_segment, true) {
// Doesn't match the wildcard for this segment, skip it.
continue;
}
if !entry.is_dir() {
continue;
}
let Some(statbuf) = entry.stat() else {
continue;
};
let file_id = FileId::from_stat(&statbuf);
if !self.visited_files.insert(file_id.clone()) {
// Symlink loop! This directory was already visited, so skip it.
continue;
}
let full_path: WString = base_dir.to_owned() + entry.name.as_utfstr() + L!("/");
let prefix: WString = prefix.to_owned() + wc_segment + L!("/");
self.expand(&full_path, wc_remainder, &prefix);
// Now remove the visited file. This is for #2414: only directories "beneath" us should be
// considered visited.
self.visited_files.remove(&file_id);
}
}
/// Given a directory base_dir, which is opened as base_dir_fp, expand an intermediate literal
/// segment. Use a fuzzy matching algorithm.
fn expand_literal_intermediate_segment_with_fuzz(
&mut self,
base_dir: &wstr,
base_dir_iter: &mut DirIter,
wc_segment: &wstr,
wc_remainder: &wstr,
prefix: &wstr,
) {
// Mark that we are fuzzy for the duration of this function
let mut this = scoped_push(self, |e| &mut e.has_fuzzy_ancestor, true);
while !this.interrupted_or_overflowed() {
let Some(Ok(entry)) = base_dir_iter.next() else {
break;
};
// Don't bother with . and ..
if entry.name == "." || entry.name == ".." {
continue;
}
let Some(m) = string_fuzzy_match_string(wc_segment, &entry.name, false) else {
continue;
};
// The first port had !n.is_samecase_exact
if m.is_samecase_exact() {
continue;
}
// Note is_dir() may trigger a stat call.
if !entry.is_dir() {
continue;
}
// Determine the effective prefix for our children.
// Normally this would be the wildcard segment, but here we know our segment doesn't have
// wildcards ("literal") and we are doing fuzzy expansion, which means we replace the
// segment with files found through fuzzy matching.
let child_prefix: WString = prefix.to_owned() + entry.name.as_utfstr() + L!("/");
let new_full_path: WString = base_dir.to_owned() + entry.name.as_utfstr() + L!("/");
// Ok, this directory matches. Recurse to it. Then mark each resulting completion as fuzzy.
let before = this.resolved_completions.len();
this.expand(&new_full_path, wc_remainder, &child_prefix);
let after = this.resolved_completions.len();
assert!(before <= after);
for c in this.resolved_completions[before..after].iter_mut() {
// Mark the completion as replacing.
if !c.replaces_token() {
c.flags |= CompleteFlags::REPLACES_TOKEN;
c.prepend_token_prefix(&child_prefix);
}
// And every match must be made at least as fuzzy as ours.
// TODO: justify this, tests do not exercise it yet.
if m.rank() > c.r#match.rank() {
// Our match is fuzzier.
c.r#match = m.clone();
}
}
}
}
/// Given a directory base_dir, which is opened as base_dir_iter, expand the last segment of the
/// wildcard. Treat ANY_STRING_RECURSIVE as ANY_STRING. wc is the wildcard segment to use for
/// matching, wc_remainder is the wildcard for subdirectories, prefix is the prefix for
/// completions.
fn expand_last_segment(
&mut self,
base_dir: &wstr,
base_dir_iter: &mut DirIter,
wc: &wstr,
prefix: &wstr,
) {
let is_dir = false;
let need_dir = self.flags.contains(ExpandFlags::DIRECTORIES_ONLY);
while !self.interrupted_or_overflowed() {
let Some(Ok(entry)) = base_dir_iter.next() else {
break;
};
2023-09-15 12:38:49 +00:00
if need_dir && !entry.is_dir() {
continue;
}
if self.flags.contains(ExpandFlags::FOR_COMPLETIONS) {
self.try_add_completion_result(
&(base_dir.to_owned() + entry.name.as_utfstr()),
&entry.name,
wc,
prefix,
is_dir,
);
} else {
// Normal wildcard expansion, not for completions.
if wildcard_match(
&entry.name,
wc,
true, /* skip files with leading dots */
) {
self.add_expansion_result(base_dir.to_owned() + entry.name.as_utfstr());
}
}
}
}
/// Indicate whether we should cancel wildcard expansion. This latches 'interrupt'.
fn interrupted_or_overflowed(&mut self) -> bool {
self.did_interrupt |= (self.cancel_checker)();
self.did_interrupt || self.did_overflow
}
fn add_expansion_result(&mut self, result: WString) {
// This function is only for the non-completions case.
assert!(!self.flags.contains(ExpandFlags::FOR_COMPLETIONS));
#[allow(clippy::collapsible_if)]
if self.completion_set.insert(result.clone()) {
if !self.resolved_completions.add(result) {
self.did_overflow = true;
}
}
}
// Given a start point as an absolute path, for any directory that has exactly one non-hidden
// entity in it which is itself a directory, return that. The result is a relative path. For
// example, if start_point is '/usr' we may return 'local/bin/'.
//
// The result does not have a leading slash, but does have a trailing slash if non-empty.
fn descend_unique_hierarchy(&mut self, start_point: &mut WString) -> WString {
assert!(!start_point.is_empty() && !start_point.starts_with('/'));
let mut unique_hierarchy = WString::new();
let abs_unique_hierarchy = start_point;
// Ensure we don't fall into a symlink loop.
// Ideally we would compare both devices and inodes, but devices require a stat call, so we
// use inodes exclusively.
let mut visited_inodes: HashSet<libc::ino_t> = HashSet::new();
loop {
let mut unique_entry = WString::new();
let Ok(mut dir) = DirIter::new(abs_unique_hierarchy) else {
break;
};
while let Some(Ok(entry)) = dir.next() {
if entry.name.is_empty() || entry.name.starts_with('.') {
// either hidden, or . and .. entries -- skip them
continue;
}
if !visited_inodes.insert(entry.inode) {
// Either we've visited this inode already or there's multiple files;
// either way stop.
break;
} else if entry.is_dir() && unique_entry.is_empty() {
// first candidate
unique_entry = entry.name.to_owned();
} else {
// We either have two or more candidates, or the child is not a directory. We're
// done.
unique_entry.clear();
break;
}
}
// We stop if we got two or more entries; also stop if we got zero or were interrupted
if unique_entry.is_empty() || self.interrupted_or_overflowed() {
break;
}
append_path_component(&mut unique_hierarchy, &unique_entry);
unique_hierarchy.push('/');
append_path_component(abs_unique_hierarchy, &unique_entry);
abs_unique_hierarchy.push('/');
}
return unique_hierarchy;
}
fn try_add_completion_result(
&mut self,
filepath: &wstr,
filename: &wstr,
wildcard: &wstr,
prefix: &wstr,
known_dir: bool,
) {
// This function is only for the completions case.
assert!(self.flags.contains(ExpandFlags::FOR_COMPLETIONS));
let mut abs_path = self.working_directory.to_owned();
append_path_component(&mut abs_path, filepath);
// We must normalize the path to allow 'cd ..' to operate on logical paths.
if self.flags.contains(ExpandFlags::SPECIAL_FOR_CD) {
abs_path = normalize_path(&abs_path, true);
}
let before = self.resolved_completions.len();
if wildcard_test_flags_then_complete(
&abs_path,
filename,
wildcard,
self.flags,
self.resolved_completions,
known_dir,
) {
// Hack. We added this completion result based on the last component of the wildcard.
// Prepend our prefix to each wildcard that replaces its token.
// Note that prepend_token_prefix is a no-op unless COMPLETE_REPLACES_TOKEN is set
let after = self.resolved_completions.len();
for c in self.resolved_completions[before..after].iter_mut() {
if self.has_fuzzy_ancestor && !(c.flags.contains(CompleteFlags::REPLACES_TOKEN))
{
c.flags |= CompleteFlags::REPLACES_TOKEN;
c.prepend_token_prefix(wildcard);
}
c.prepend_token_prefix(prefix);
}
// Implement special_for_cd_autosuggestion by descending the deepest unique
// hierarchy we can, and then appending any components to each new result.
// Only descend deepest unique for cd autosuggest and not for cd tab completion
// (issue #4402).
if self
.flags
.contains(ExpandFlags::SPECIAL_FOR_CD_AUTOSUGGESTION)
{
let unique_hierarchy = self.descend_unique_hierarchy(&mut abs_path);
if !unique_hierarchy.is_empty() {
for c in self.resolved_completions[before..after].iter_mut() {
c.completion.push_utfstr(&unique_hierarchy);
}
}
}
self.did_add = true;
}
}
// Helper to resolve using our prefix.
/// dotdot default is false
fn open_dir(&self, base_dir: &wstr, dotdot: bool) -> std::io::Result<DirIter> {
let mut path = self.working_directory.to_owned();
append_path_component(&mut path, base_dir);
if self.flags.contains(ExpandFlags::SPECIAL_FOR_CD) {
// cd operates on logical paths.
// for example, cd ../<tab> should complete "without resolving symlinks".
path = normalize_path(&path, true);
}
return match dotdot {
true => DirIter::new_with_dots(&path),
false => DirIter::new(&path),
};
}
}
}
/// Expand the wildcard by matching against the filesystem.
///
/// wildcard_expand works by dividing the wildcard into segments at each directory boundary. Each
/// segment is processed separately. All except the last segment are handled by matching the
/// wildcard segment against all subdirectories of matching directories, and recursively calling
/// wildcard_expand for matches. On the last segment, matching is made to any file, and all matches
/// are inserted to the list.
///
/// If wildcard_expand encounters any errors (such as insufficient privileges) during matching, no
/// error messages will be printed and wildcard_expand will continue the matching process.
///
/// \param wc The wildcard string
/// \param working_directory The working directory
/// \param flags flags for the search. Can be any combination of for_completions and
/// executables_only
/// \param output The list in which to put the output
///
2023-08-27 17:35:31 +00:00
pub fn wildcard_expand_string<'closure>(
wc: &wstr,
working_directory: &wstr,
flags: ExpandFlags,
2023-08-27 17:35:31 +00:00
mut cancel_checker: impl FnMut() -> bool + 'closure,
output: &mut CompletionReceiver,
) -> WildcardResult {
// Fuzzy matching only if we're doing completions.
assert!(
flags.contains(ExpandFlags::FOR_COMPLETIONS) || !flags.contains(ExpandFlags::FUZZY_MATCH)
);
// ExpandFlags::SPECIAL_FOR_CD requires expand_flag::DIRECTORIES_ONLY and
// ExpandFlags::FOR_COMPLETIONS and !expand_flag::GEN_DESCRIPTIONS.
assert!(
!(flags.contains(ExpandFlags::SPECIAL_FOR_CD))
|| ((flags.contains(ExpandFlags::DIRECTORIES_ONLY))
&& (flags.contains(ExpandFlags::FOR_COMPLETIONS))
&& (!flags.contains(ExpandFlags::GEN_DESCRIPTIONS)))
);
// Hackish fix for issue #1631. We are about to call c_str(), which will produce a string
// truncated at any embedded nulls. We could fix this by passing around the size, etc. However
// embedded nulls are never allowed in a filename, so we just check for them and return 0 (no
// matches) if there is an embedded null.
if wc.contains('\0') {
return WildcardResult::NoMatch;
}
// We do not support tab-completing recursive (**) wildcards. This is historic behavior.
// Do not descend any directories if there is a ** wildcard.
if flags.contains(ExpandFlags::FOR_COMPLETIONS) && wc.contains(ANY_STRING_RECURSIVE) {
return WildcardResult::NoMatch;
}
// Compute the prefix and base dir. The prefix is what we prepend for filesystem operations
// (i.e. the working directory), the base_dir is the part of the wildcard consumed thus far,
// which we also have to append. The difference is that the base_dir is returned as part of the
// expansion, and the prefix is not.
//
// Check for a leading slash. If we find one, we have an absolute path: the prefix is empty, the
// base dir is /, and the wildcard is the remainder. If we don't find one, the prefix is the
// working directory, the base dir is empty.
let (prefix, base_dir, effective_wc) = if wc.starts_with(L!("/")) {
(L!(""), L!("/"), wc.slice_from(1))
} else {
(working_directory, L!(""), wc)
};
2023-08-27 17:35:31 +00:00
let mut expander = WildCardExpander::new(prefix, flags, &mut cancel_checker, output);
expander.expand(base_dir, effective_wc, base_dir);
return expander.status_code();
}
/// Test whether the given wildcard matches the string. Does not perform any I/O.
///
/// \param str The string to test
/// \param wc The wildcard to test against
/// \param leading_dots_fail_to_match if set, strings with leading dots are assumed to be hidden
/// files and are not matched (default was false)
///
/// \return true if the wildcard matched
#[must_use]
pub fn wildcard_match(
name: impl AsRef<wstr>,
pattern: impl AsRef<wstr>,
leading_dots_fail_to_match: bool,
) -> bool {
let name = name.as_ref();
let pattern = pattern.as_ref();
// Hackish fix for issue #270. Prevent wildcards from matching . or .., but we must still allow
// literal matches.
if leading_dots_fail_to_match && (name == L!(".") || name == L!("..")) {
// The string is '.' or '..' so the only possible match is an exact match.
return name == pattern;
}
// Near Linear implementation as proposed here https://research.swtch.com/glob.
let mut px = 0;
let mut nx = 0;
let mut next_px = 0;
let mut next_nx = 0;
while px < pattern.len() || nx < name.len() {
if px < pattern.len() {
match pattern.char_at(px) {
ANY_STRING | ANY_STRING_RECURSIVE => {
// Ignore hidden file
if leading_dots_fail_to_match && nx == 0 && name.char_at(0) == '.' {
return false;
}
// Common case of * at the end. In that case we can early out since we know it will
// match.
if px == pattern.len() - 1 {
return true;
}
// Try to match at nx.
// If that doesn't work out, restart at nx+1 next.
next_px = px;
next_nx = nx + 1;
px += 1;
continue;
}
ANY_CHAR => {
if nx < name.len() {
if nx == 0 && name.char_at(nx) == '.' {
return false;
}
px += 1;
nx += 1;
continue;
}
}
c => {
// ordinary char
if nx < name.len() && name.char_at(nx) == c {
px += 1;
nx += 1;
continue;
}
}
}
}
// Mismatch. Maybe restart.
if 0 < next_nx && next_nx <= name.len() {
px = next_px;
nx = next_nx;
continue;
}
return false;
}
// Matched all of pattern to all of name. Success.
true
}
// Check if the string has any unescaped wildcards (e.g. ANY_STRING).
#[inline]
#[must_use]
fn wildcard_has_internal(s: impl AsRef<wstr>) -> bool {
s.as_ref()
.chars()
.any(|c| matches!(c, ANY_STRING | ANY_STRING_RECURSIVE | ANY_CHAR))
}
/// Check if the specified string contains wildcards (e.g. *).
#[must_use]
fn wildcard_has(s: impl AsRef<wstr>) -> bool {
let s = s.as_ref();
let qmark_is_wild = !feature_test(FeatureFlag::qmark_noglob);
// Fast check for * or ?; if none there is no wildcard.
// Note some strings contain * but no wildcards, e.g. if they are quoted.
if !s.contains('*') && (!qmark_is_wild || !s.contains('?')) {
return false;
}
let unescaped =
unescape_string(s, UnescapeStringStyle::Script(UnescapeFlags::SPECIAL)).unwrap_or_default();
return wildcard_has_internal(unescaped);
}
#[cfg(test)]
mod tests {
use super::*;
use crate::future_feature_flags::scoped_test;
#[test]
fn test_wildcards() {
assert!(!wildcard_has(L!("")));
assert!(wildcard_has(L!("*")));
assert!(!wildcard_has(L!("\\*")));
let wc = L!("foo*bar");
assert!(wildcard_has(wc) && !wildcard_has_internal(wc));
let wc = unescape_string(wc, UnescapeStringStyle::Script(UnescapeFlags::SPECIAL)).unwrap();
assert!(!wildcard_has(&wc) && wildcard_has_internal(&wc));
scoped_test(FeatureFlag::qmark_noglob, false, || {
assert!(wildcard_has(L!("?")));
assert!(!wildcard_has(L!("\\?")));
});
scoped_test(FeatureFlag::qmark_noglob, true, || {
assert!(!wildcard_has(L!("?")));
assert!(!wildcard_has(L!("\\?")));
});
}
}
#[cxx::bridge]
mod ffi {
extern "C++" {
include!("wutil.h");
}
extern "Rust" {
#[cxx_name = "wildcard_match_ffi"]
fn wildcard_match_ffi(
str: &CxxWString,
wc: &CxxWString,
leading_dots_fail_to_match: bool,
) -> bool;
#[cxx_name = "wildcard_has"]
fn wildcard_has_ffi(s: &CxxWString) -> bool;
#[cxx_name = "wildcard_has_internal"]
fn wildcard_has_internal_ffi(s: &CxxWString) -> bool;
}
}
fn wildcard_match_ffi(str: &CxxWString, wc: &CxxWString, leading_dots_fail_to_match: bool) -> bool {
wildcard_match(str.from_ffi(), wc.from_ffi(), leading_dots_fail_to_match)
}
fn wildcard_has_ffi(s: &CxxWString) -> bool {
wildcard_has(s.from_ffi())
}
fn wildcard_has_internal_ffi(s: &CxxWString) -> bool {
wildcard_has_internal(s.from_ffi())
}