// Enumeration of all wildcard types. use std::cmp::Ordering; use std::collections::HashSet; use std::io::ErrorKind; use std::os::unix::prelude::*; use std::{fs, io}; use cxx::CxxWString; use libc::{mode_t, ELOOP, S_IXGRP, S_IXOTH, S_IXUSR, X_OK}; use crate::common::{ char_offset, format_size, is_windows_subsystem_for_linux, unescape_string, UnescapeFlags, UnescapeStringStyle, WILDCARD_RESERVED_BASE, }; use crate::complete::{CompleteFlags, Completion, CompletionReceiver, PROG_COMPLETE_SEP}; use crate::expand::ExpandFlags; use crate::fallback::wcscasecmp; use crate::future_feature_flags::feature_test; use crate::future_feature_flags::FeatureFlag; use crate::wchar::prelude::*; use crate::wchar_ffi::WCharFromFFI; use crate::wcstringutil::{ string_fuzzy_match_string, string_suffixes_string_case_insensitive, CaseFold, }; use crate::wutil::{lwstat, waccess, wstat}; use once_cell::sync::Lazy; static COMPLETE_EXEC_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("command")); static COMPLETE_EXEC_LINK_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("command link")); static COMPLETE_CHAR_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("char device")); static COMPLETE_BLOCK_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("block device")); static COMPLETE_FIFO_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("fifo")); static COMPLETE_FILE_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("file")); static COMPLETE_SYMLINK_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("symlink")); static COMPLETE_DIRECTORY_SYMLINK_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("dir symlink")); static COMPLETE_BROKEN_SYMLINK_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("broken symlink")); static COMPLETE_LOOP_SYMLINK_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("symlink loop")); static COMPLETE_SOCKET_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("socket")); static COMPLETE_DIRECTORY_DESC: Lazy<&wstr> = Lazy::new(|| wgettext!("directory")); /// Character representing any character except '/' (slash). pub const ANY_CHAR: char = char_offset(WILDCARD_RESERVED_BASE, 0); /// Character representing any character string not containing '/' (slash). pub const ANY_STRING: char = char_offset(WILDCARD_RESERVED_BASE, 1); /// Character representing any character string. pub const ANY_STRING_RECURSIVE: char = char_offset(WILDCARD_RESERVED_BASE, 2); /// This is a special pseudo-char that is not used other than to mark the /// end of the the special characters so we can sanity check the enum range. pub const ANY_SENTINEL: char = char_offset(WILDCARD_RESERVED_BASE, 3); #[derive(PartialEq)] pub enum WildcardResult { /// The wildcard did not match. NoMatch, /// The wildcard did match. Match, /// Expansion was cancelled (e.g. control-C). Cancel, /// Expansion produced too many results. Overflow, } fn resolve_description<'f>( full_completion: &wstr, completion: &mut &wstr, expand_flags: ExpandFlags, description_func: Option<&'f dyn Fn(&wstr) -> WString>, ) -> WString { if let Some(complete_sep_loc) = completion.find_char(PROG_COMPLETE_SEP) { // This completion has an embedded description, do not use the generic description. let description = completion[complete_sep_loc + 1..].to_owned(); *completion = &completion[..complete_sep_loc]; return description; } if let Some(f) = description_func { if expand_flags.contains(ExpandFlags::GEN_DESCRIPTIONS) { return f(full_completion); } } WString::new() } // A transient parameter pack needed by wildcard_complete. struct WcCompletePack<'orig, 'f> { pub orig: &'orig wstr, pub desc_func: Option<&'f dyn Fn(&wstr) -> WString>, pub expand_flags: ExpandFlags, } // Weirdly specific and non-reusable helper function that makes its one call site much clearer. fn has_prefix_match(comps: &CompletionReceiver, first: usize) -> bool { comps[first..] .iter() .any(|c| c.r#match.is_exact_or_prefix() && c.r#match.case_fold == CaseFold::samecase) } /// Matches the string against the wildcard, and if the wildcard is a possible completion of the /// string, the remainder of the string is inserted into the out vector. /// /// We ignore ANY_STRING_RECURSIVE here. The consequence is that you cannot tab complete ** /// wildcards. This is historic behavior. /// is_first_call is default false. #[allow(clippy::unnecessary_unwrap)] fn wildcard_complete_internal( s: &wstr, wc: &wstr, params: &WcCompletePack, flags: CompleteFlags, // it is easier to recurse with this over taking it by value mut out: Option<&mut CompletionReceiver>, is_first_call: bool, ) -> WildcardResult { // Maybe early out for hidden files. We require that the wildcard match these exactly (i.e. a // dot); ANY_STRING not allowed. if is_first_call && !params .expand_flags .contains(ExpandFlags::ALLOW_NONLITERAL_LEADING_DOT) && s.char_at(0) == '.' && wc.char_at(0) != '.' { return WildcardResult::NoMatch; } // Locate the next wildcard character position, e.g. ANY_CHAR or ANY_STRING. let next_wc_char_pos = wc .chars() .position(|c| matches!(c, ANY_CHAR | ANY_STRING | ANY_STRING_RECURSIVE)); // Maybe we have no more wildcards at all. This includes the empty string. if next_wc_char_pos.is_none() { // Try matching let Some(m) = string_fuzzy_match_string(wc, s, false) else { return WildcardResult::NoMatch; }; // If we're not allowing fuzzy match, then we require a prefix match. let needs_prefix_match = !params.expand_flags.contains(ExpandFlags::FUZZY_MATCH); if needs_prefix_match && !m.is_exact_or_prefix() { return WildcardResult::NoMatch; } // The match was successful. If the string is not requested we're done. let Some(out) = out else { return WildcardResult::Match; }; // Wildcard complete. let full_replacement = m.requires_full_replacement() || flags.contains(CompleteFlags::REPLACES_TOKEN); // If we are not replacing the token, be careful to only store the part of the string after // the wildcard. assert!(!full_replacement || wc.len() <= s.len()); let mut out_completion = match full_replacement { true => params.orig, false => s.slice_from(wc.len()), }; let out_desc = resolve_description( params.orig, &mut out_completion, params.expand_flags, params.desc_func, ); // Note: out_completion may be empty if the completion really is empty, e.g. tab-completing // 'foo' when a file 'foo' exists. let local_flags = if full_replacement { flags | CompleteFlags::REPLACES_TOKEN } else { flags }; if !out.add(Completion { completion: out_completion.to_owned(), description: out_desc, flags: local_flags, r#match: m, }) { return WildcardResult::Overflow; } return WildcardResult::Match; } else if let Some(next_wc_char_pos @ 1..) = next_wc_char_pos { // The literal portion of a wildcard cannot be longer than the string itself, // e.g. `abc*` can never match a string that is only two characters long. if next_wc_char_pos >= s.len() { return WildcardResult::NoMatch; } let (s_pre, s_suf) = s.split_at(next_wc_char_pos); let (wc_pre, wc_suf) = wc.split_at(next_wc_char_pos); // Here we have a non-wildcard prefix. Note that we don't do fuzzy matching for stuff before // a wildcard, so just do case comparison and then recurse. if s_pre == wc_pre { // Normal match. return wildcard_complete_internal(s_suf, wc_suf, params, flags, out, false); } if wcscasecmp(s_pre, wc_pre) == Ordering::Equal { // Case insensitive match. return wildcard_complete_internal( s.slice_from(next_wc_char_pos), wc.slice_from(next_wc_char_pos), params, flags | CompleteFlags::REPLACES_TOKEN, out, false, ); } return WildcardResult::NoMatch; } // Our first character is a wildcard. assert_eq!(next_wc_char_pos, Some(0)); match wc.char_at(0) { ANY_CHAR => { if s.is_empty() { return WildcardResult::NoMatch; } return wildcard_complete_internal( s.slice_from(1), wc.slice_from(1), params, flags, out, false, ); } ANY_STRING => { // Hackish. If this is the last character of the wildcard, then just complete with // the empty string. This fixes cases like "f*" -> "f*o". if wc.len() == 1 { return wildcard_complete_internal(L!(""), L!(""), params, flags, out, false); } // Try all submatches. Issue #929: if the recursive call gives us a prefix match, // just stop. This is sloppy - what we really want to do is say, once we've seen a // match of a particular type, ignore all matches of that type further down the // string, such that the wildcard produces the "minimal match.". let mut has_match = false; for i in 0..s.len() { let before_count = out.as_ref().map(|o| o.len()).unwrap_or_default(); let submatch_res = wildcard_complete_internal( s.slice_from(i), wc.slice_from(1), params, flags, out.as_deref_mut(), false, ); match submatch_res { WildcardResult::NoMatch => continue, WildcardResult::Match => { has_match = true; // If out is NULL, we don't care about the actual matches. If out is not // NULL but we have a prefix match, stop there. let Some(out) = out.as_mut() else { return WildcardResult::Match; }; if has_prefix_match(out, before_count) { return WildcardResult::Match; } continue; } // Note early return WildcardResult::Cancel | WildcardResult::Overflow => return submatch_res, } } return match has_match { true => WildcardResult::Match, false => WildcardResult::NoMatch, }; } // We don't even try with this one. ANY_STRING_RECURSIVE => WildcardResult::NoMatch, _ => unreachable!(), } } pub fn wildcard_complete<'f>( s: &wstr, wc: &wstr, desc_func: Option<&'f dyn Fn(&wstr) -> WString>, out: Option<&mut CompletionReceiver>, expand_flags: ExpandFlags, flags: CompleteFlags, ) -> WildcardResult { let params = WcCompletePack { orig: s, desc_func, expand_flags, }; return wildcard_complete_internal(s, wc, ¶ms, flags, out, true); } /// Obtain a description string for the file specified by the filename. /// /// The returned value is a string constant and should not be free'd. /// /// \param filename The file for which to find a description string /// \param lstat_res The result of calling lstat on the file /// \param lbuf The struct buf output of calling lstat on the file /// \param stat_res The result of calling stat on the file /// \param buf The struct buf output of calling stat on the file /// \param err The errno value after a failed stat call on the file. fn file_get_desc( filename: &wstr, lstat: Option, stat: Option>, definitely_executable: bool, ) -> &'static wstr { let Some(lstat) = lstat else { return *COMPLETE_FILE_DESC; }; let is_executable = |buf: &fs::Metadata, filename: &wstr| -> bool { // Weird group permissions and other such issues make it non-trivial to find out if // we can actually execute a file using the result from stat. It is much safer to // use the access function, since it tells us exactly what we want to know. definitely_executable || (buf.mode() as mode_t & (S_IXUSR | S_IXGRP | S_IXOTH) != 0) && waccess(filename, X_OK) == 0 }; // stat was only queried if lstat succeeded let stat = stat.unwrap(); if lstat.is_symlink() { return match stat { Ok(stat) if stat.is_dir() => *COMPLETE_DIRECTORY_SYMLINK_DESC, Ok(stat) if is_executable(&stat, filename) => *COMPLETE_EXEC_LINK_DESC, Ok(_) => *COMPLETE_SYMLINK_DESC, Err(e) if e.kind() == ErrorKind::NotFound => *COMPLETE_BROKEN_SYMLINK_DESC, Err(e) if e.raw_os_error().unwrap() == ELOOP => *COMPLETE_LOOP_SYMLINK_DESC, _ => { // On unknown errors we do nothing. The file will be given the default 'File' // description or one based on the suffix. *COMPLETE_FILE_DESC } }; } let Ok(stat) = stat else { // Assuming that the metadata was zero if stat-call failed return *COMPLETE_FILE_DESC; }; let ft = stat.file_type(); if ft.is_char_device() { *COMPLETE_CHAR_DESC } else if ft.is_block_device() { *COMPLETE_BLOCK_DESC } else if ft.is_fifo() { *COMPLETE_FIFO_DESC } else if ft.is_socket() { *COMPLETE_SOCKET_DESC } else if ft.is_dir() { *COMPLETE_DIRECTORY_DESC } else if is_executable(&stat, filename) { *COMPLETE_EXEC_DESC } else { *COMPLETE_FILE_DESC } } /// Test if the given file is an executable (if executables_only) or directory (if /// directories_only). If it matches, call wildcard_complete() with some description that we make /// up. Note that the filename came from a readdir() call, so we know it exists. fn wildcard_test_flags_then_complete( filepath: &wstr, filename: &wstr, wc: &wstr, expand_flags: ExpandFlags, out: &mut CompletionReceiver, known_dir: bool, ) -> bool { let executables_only = expand_flags.contains(ExpandFlags::EXECUTABLES_ONLY); let need_directory = expand_flags.contains(ExpandFlags::DIRECTORIES_ONLY); // Fast path: If we need directories, and we already know it is one, // and we don't need to do anything else, just return it. // This is a common case for cd completions, and removes the `stat` entirely in case the system // supports it. if known_dir && !executables_only && !expand_flags.contains(ExpandFlags::GEN_DESCRIPTIONS) { return wildcard_complete( &(filename.to_owned() + L!("/")), wc, Some(&|_| L!("").to_owned()), Some(out), expand_flags, CompleteFlags::NO_SPACE, ) == WildcardResult::Match; } // Check if it will match before stat(). if wildcard_complete( filename, wc, None, None, expand_flags, CompleteFlags::default(), ) != WildcardResult::Match { return false; } let lstat: Option = lwstat(filepath).ok(); let stat: Option>; if let Some(md) = &lstat { if md.is_symlink() { // In order to differentiate between e.g. broken symlinks and symlink loops, we also // need to know the error status of wstat. stat = Some(wstat(filepath)); } else { stat = Some(Ok(md.clone())); } } else { stat = None; } let (file_size, is_directory, is_executable) = if let Some(Ok(md)) = &stat { (md.len(), md.is_dir(), md.is_file()) } else { (0, false, false) }; if need_directory && !is_directory { return false; } if executables_only && (!is_executable || waccess(filepath, X_OK) != 0) { return false; } if executables_only && is_windows_subsystem_for_linux() && string_suffixes_string_case_insensitive(L!(".dll"), filename) { return false; } // Compute the description. let desc = if expand_flags.contains(ExpandFlags::GEN_DESCRIPTIONS) { let mut desc = file_get_desc(filename, lstat, stat, executables_only).to_owned(); if !is_directory && !is_executable { if !desc.is_empty() { desc.push_utfstr(L!(", ")); } desc.push_utfstr(&format_size(file_size.try_into().unwrap())); } Some(desc) } else { None }; // Append a / if this is a directory. Note this requirement may be the only reason we have to // call stat() in some cases. let desc_func = |_: &wstr| match desc.as_ref() { Some(d) => d.to_owned(), None => WString::new(), }; let desc_func: Option<&dyn Fn(&wstr) -> WString> = Some(&desc_func); if is_directory { return wildcard_complete( &(filename.to_owned() + L!("/")), wc, desc_func, Some(out), expand_flags, CompleteFlags::NO_SPACE, ) == WildcardResult::Match; } wildcard_complete( filename, wc, desc_func, Some(out), expand_flags, CompleteFlags::empty(), ) == WildcardResult::Match } use expander::WildCardExpander; mod expander { use libc::F_OK; use crate::{ common::scoped_push, complete::CompleteFlags, path::append_path_component, wcstringutil::string_fuzzy_match_string, wutil::{dir_iter::DirIter, normalize_path, waccess, FileId}, }; use super::*; pub struct WildCardExpander<'e> { /// A function to call to check cancellation. cancel_checker: &'e mut dyn FnMut() -> bool, /// The working directory to resolve paths against working_directory: &'e wstr, /// The set of items we have resolved, used to efficiently avoid duplication. completion_set: HashSet, /// The set of file IDs we have visited, used to avoid symlink loops. visited_files: HashSet, /// Flags controlling expansion. flags: ExpandFlags, /// Resolved items get inserted into here. This is transient of course. resolved_completions: &'e mut CompletionReceiver, /// Whether we have been interrupted. did_interrupt: bool, /// Whether we have overflowed. did_overflow: bool, /// Whether we have successfully added any completions. did_add: bool, /// Whether some parent expansion is fuzzy, and therefore completions always prepend their prefix /// This variable is a little suspicious - it should be passed along, not stored here /// If we ever try to do parallel wildcard expansion we'll have to remove this has_fuzzy_ancestor: bool, } impl<'e> WildCardExpander<'e> { pub fn new( working_directory: &'e wstr, flags: ExpandFlags, cancel_checker: &'e mut dyn FnMut() -> bool, resolved_completions: &'e mut CompletionReceiver, ) -> Self { Self { cancel_checker, working_directory, completion_set: resolved_completions .iter() .map(|c| c.completion.to_owned()) .collect(), visited_files: HashSet::new(), flags, resolved_completions, did_add: false, did_interrupt: false, did_overflow: false, has_fuzzy_ancestor: false, } } /// The real implementation of wildcard expansion is in this function. Other functions are just /// wrappers around this one. /// /// This function traverses the relevant directory tree looking for matches, and recurses when /// needed to handle wildcards spanning multiple components and recursive wildcards. /// /// Args: /// base_dir: the "working directory" against which the wildcard is to be resolved /// wc: the wildcard string itself, e.g. foo*bar/baz (where * is actually ANY_CHAR) /// effective_prefix: the string that should be prepended for completions that replace their token. /// This is usually the same thing as the original wildcard, but for fuzzy matching, we /// expand intermediate segments. effective_prefix is always either empty, or ends with a slash pub fn expand(&mut self, base_dir: &wstr, wc: &wstr, effective_prefix: &wstr) { if self.interrupted_or_overflowed() { return; } // Get the current segment and compute interesting properties about it. let (wc_segment, wc_remainder) = if let Some(next_slash) = wc.find_char('/') { let (seg, rem) = wc.split_at(next_slash); let rem_without_slash = rem.slice_from(1); (seg, Some(rem_without_slash)) } else { (wc, None) }; let is_last_segment = wc_remainder.is_none(); let segment_has_wildcards = wildcard_has_internal(wc_segment); if wc_segment.is_empty() { assert!(!segment_has_wildcards); if is_last_segment { self.expand_trailing_slash(base_dir, effective_prefix); } else { let mut prefix = effective_prefix.to_owned(); prefix.push('/'); self.expand(base_dir, wc_remainder.unwrap(), &prefix); } } else if !segment_has_wildcards && !is_last_segment { // Literal intermediate match. Note that we may not be able to actually read the directory // (issue #2099). let wc_remainder = wc_remainder.unwrap(); // TODO: if-let-chains // Absolute path of the intermediate directory let intermediate_dirpath: WString = base_dir.to_owned() + wc_segment + L!("/"); // This just trumps everything let before = self.resolved_completions.len(); let prefix: WString = effective_prefix.to_owned() + wc_segment + L!("/"); self.expand(&intermediate_dirpath, wc_remainder, &prefix); // Maybe try a fuzzy match (#94) if nothing was found with the literal match. Respect // EXPAND_NO_DIRECTORY_ABBREVIATIONS (issue #2413). // Don't do fuzzy matches if the literal segment was valid (#3211) let allow_fuzzy = self.flags.contains(ExpandFlags::FUZZY_MATCH) && !self.flags.contains(ExpandFlags::NO_FUZZY_DIRECTORIES); if allow_fuzzy && self.resolved_completions.len() == before && waccess(&intermediate_dirpath, F_OK) != 0 { assert!(self.flags.contains(ExpandFlags::FOR_COMPLETIONS)); if let Ok(mut base_dir_iter) = self.open_dir(base_dir, false) { self.expand_literal_intermediate_segment_with_fuzz( base_dir, &mut base_dir_iter, wc_segment, wc_remainder, effective_prefix, ); } } } else { assert!(!wc_segment.is_empty() && (segment_has_wildcards || is_last_segment)); if !is_last_segment && matches!(wc_segment.as_char_slice(), [ANY_STRING_RECURSIVE]) { // Hack for #7222. This is an intermediate wc segment that is exactly **. The // tail matches in subdirectories as normal, but also the current directory. // That is, '**/bar' may match 'bar' and 'foo/bar'. // Implement this by matching the wildcard tail only, in this directory. // Note if the segment is not exactly ANY_STRING_RECURSIVE then the segment may only // match subdirectories. self.expand(base_dir, wc_remainder.unwrap(), effective_prefix); if self.interrupted_or_overflowed() { return; } } // return "." and ".." entries if we're doing completions let Ok(mut dir) = self.open_dir( base_dir, /* return . and .. */ self.flags.contains(ExpandFlags::FOR_COMPLETIONS), ) else { return; }; if let Some(wc_remainder) = wc_remainder { // Not the last segment, nonempty wildcard. self.expand_intermediate_segment( base_dir, &mut dir, wc_segment, wc_remainder, &(effective_prefix.to_owned() + wc_segment + L!("/")), ); } else { // Last wildcard segment, nonempty wildcard. self.expand_last_segment(base_dir, &mut dir, wc_segment, effective_prefix); } let Some(asr_idx) = wc_segment.find_char(ANY_STRING_RECURSIVE) else { return; }; // Apply the recursive **. // Construct a "head + any" wildcard for matching stuff in this directory, and an // "any + tail" wildcard for matching stuff in subdirectories. Note that the // ANY_STRING_RECURSIVE character is present in both the head and the tail. let head_any = wc_segment.slice_to(asr_idx + 1); let any_tail = wc.slice_from(asr_idx); assert!(head_any.chars().last().unwrap() == ANY_STRING_RECURSIVE); assert!(any_tail.chars().next().unwrap() == ANY_STRING_RECURSIVE); dir.rewind(); self.expand_intermediate_segment( base_dir, &mut dir, head_any, any_tail, effective_prefix, ); } } pub fn status_code(&self) -> WildcardResult { if self.did_interrupt { return WildcardResult::Cancel; } else if self.did_overflow { return WildcardResult::Overflow; } else if self.did_add { WildcardResult::Match } else { WildcardResult::NoMatch } } } impl<'e> WildCardExpander<'e> { /// We are a trailing slash - expand at the end. fn expand_trailing_slash(&mut self, base_dir: &wstr, prefix: &wstr) { if self.interrupted_or_overflowed() { return; } if !self.flags.contains(ExpandFlags::FOR_COMPLETIONS) { // Trailing slash and not accepting incomplete, e.g. `echo /xyz/`. Insert this file, we already know it exists! self.add_expansion_result(base_dir.to_owned()); return; } // Trailing slashes and accepting incomplete, e.g. `echo /xyz/`. Everything is added. let Ok(mut dir) = self.open_dir(base_dir, false) else { return; }; // wreaddir_resolving without the out argument is just wreaddir. // So we can use the information in case we need it. let need_dir = self.flags.contains(ExpandFlags::DIRECTORIES_ONLY); while let Some(Ok(entry)) = dir.next() { if self.interrupted_or_overflowed() { break; } // Note that is_dir() may cause a stat() call. let known_dir = need_dir && entry.is_dir(); if need_dir && !known_dir { continue; }; if !entry.name.is_empty() && !entry.name.starts_with('.') { self.try_add_completion_result( &(base_dir.to_owned() + entry.name.as_utfstr()), &entry.name, L!(""), prefix, known_dir, ); } } } /// Given a directory base_dir, which is opened as base_dir_iter, expand an intermediate segment /// of the wildcard. Treat ANY_STRING_RECURSIVE as ANY_STRING. wc_segment is the wildcard /// segment for this directory, wc_remainder is the wildcard for subdirectories, /// prefix is the prefix for completions. fn expand_intermediate_segment( &mut self, base_dir: &wstr, base_dir_iter: &mut DirIter, wc_segment: &wstr, wc_remainder: &wstr, prefix: &wstr, ) { while !self.interrupted_or_overflowed() { let Some(Ok(entry)) = base_dir_iter.next() else { break; }; // Note that it's critical we ignore leading dots here, else we may descend into . and .. if !wildcard_match(&entry.name, wc_segment, true) { // Doesn't match the wildcard for this segment, skip it. continue; } if !entry.is_dir() { continue; } let Some(statbuf) = entry.stat() else { continue; }; let file_id = FileId::from_stat(&statbuf); if !self.visited_files.insert(file_id.clone()) { // Symlink loop! This directory was already visited, so skip it. continue; } let full_path: WString = base_dir.to_owned() + entry.name.as_utfstr() + L!("/"); let prefix: WString = prefix.to_owned() + wc_segment + L!("/"); self.expand(&full_path, wc_remainder, &prefix); // Now remove the visited file. This is for #2414: only directories "beneath" us should be // considered visited. self.visited_files.remove(&file_id); } } /// Given a directory base_dir, which is opened as base_dir_fp, expand an intermediate literal /// segment. Use a fuzzy matching algorithm. fn expand_literal_intermediate_segment_with_fuzz( &mut self, base_dir: &wstr, base_dir_iter: &mut DirIter, wc_segment: &wstr, wc_remainder: &wstr, prefix: &wstr, ) { // Mark that we are fuzzy for the duration of this function let mut this = scoped_push(self, |e| &mut e.has_fuzzy_ancestor, true); while !this.interrupted_or_overflowed() { let Some(Ok(entry)) = base_dir_iter.next() else { break; }; // Don't bother with . and .. if entry.name == "." || entry.name == ".." { continue; } let Some(m) = string_fuzzy_match_string(wc_segment, &entry.name, false) else { continue; }; // The first port had !n.is_samecase_exact if m.is_samecase_exact() { continue; } // Note is_dir() may trigger a stat call. if !entry.is_dir() { continue; } // Determine the effective prefix for our children. // Normally this would be the wildcard segment, but here we know our segment doesn't have // wildcards ("literal") and we are doing fuzzy expansion, which means we replace the // segment with files found through fuzzy matching. let child_prefix: WString = prefix.to_owned() + entry.name.as_utfstr() + L!("/"); let new_full_path: WString = base_dir.to_owned() + entry.name.as_utfstr() + L!("/"); // Ok, this directory matches. Recurse to it. Then mark each resulting completion as fuzzy. let before = this.resolved_completions.len(); this.expand(&new_full_path, wc_remainder, &child_prefix); let after = this.resolved_completions.len(); assert!(before <= after); for c in this.resolved_completions[before..after].iter_mut() { // Mark the completion as replacing. if !c.replaces_token() { c.flags |= CompleteFlags::REPLACES_TOKEN; c.prepend_token_prefix(&child_prefix); } // And every match must be made at least as fuzzy as ours. // TODO: justify this, tests do not exercise it yet. if m.rank() > c.r#match.rank() { // Our match is fuzzier. c.r#match = m.clone(); } } } } /// Given a directory base_dir, which is opened as base_dir_iter, expand the last segment of the /// wildcard. Treat ANY_STRING_RECURSIVE as ANY_STRING. wc is the wildcard segment to use for /// matching, wc_remainder is the wildcard for subdirectories, prefix is the prefix for /// completions. fn expand_last_segment( &mut self, base_dir: &wstr, base_dir_iter: &mut DirIter, wc: &wstr, prefix: &wstr, ) { let is_dir = false; let need_dir = self.flags.contains(ExpandFlags::DIRECTORIES_ONLY); while !self.interrupted_or_overflowed() { let Some(Ok(entry)) = base_dir_iter.next() else { break; }; if need_dir && !entry.is_dir() { continue; } if self.flags.contains(ExpandFlags::FOR_COMPLETIONS) { self.try_add_completion_result( &(base_dir.to_owned() + entry.name.as_utfstr()), &entry.name, wc, prefix, is_dir, ); } else { // Normal wildcard expansion, not for completions. if wildcard_match( &entry.name, wc, true, /* skip files with leading dots */ ) { self.add_expansion_result(base_dir.to_owned() + entry.name.as_utfstr()); } } } } /// Indicate whether we should cancel wildcard expansion. This latches 'interrupt'. fn interrupted_or_overflowed(&mut self) -> bool { self.did_interrupt |= (self.cancel_checker)(); self.did_interrupt || self.did_overflow } fn add_expansion_result(&mut self, result: WString) { // This function is only for the non-completions case. assert!(!self.flags.contains(ExpandFlags::FOR_COMPLETIONS)); #[allow(clippy::collapsible_if)] if self.completion_set.insert(result.clone()) { if !self.resolved_completions.add(result) { self.did_overflow = true; } } } // Given a start point as an absolute path, for any directory that has exactly one non-hidden // entity in it which is itself a directory, return that. The result is a relative path. For // example, if start_point is '/usr' we may return 'local/bin/'. // // The result does not have a leading slash, but does have a trailing slash if non-empty. fn descend_unique_hierarchy(&mut self, start_point: &mut WString) -> WString { assert!(!start_point.is_empty() && !start_point.starts_with('/')); let mut unique_hierarchy = WString::new(); let abs_unique_hierarchy = start_point; // Ensure we don't fall into a symlink loop. // Ideally we would compare both devices and inodes, but devices require a stat call, so we // use inodes exclusively. let mut visited_inodes: HashSet = HashSet::new(); loop { let mut unique_entry = WString::new(); let Ok(mut dir) = DirIter::new(abs_unique_hierarchy) else { break; }; while let Some(Ok(entry)) = dir.next() { if entry.name.is_empty() || entry.name.starts_with('.') { // either hidden, or . and .. entries -- skip them continue; } if !visited_inodes.insert(entry.inode) { // Either we've visited this inode already or there's multiple files; // either way stop. break; } else if entry.is_dir() && unique_entry.is_empty() { // first candidate unique_entry = entry.name.to_owned(); } else { // We either have two or more candidates, or the child is not a directory. We're // done. unique_entry.clear(); break; } } // We stop if we got two or more entries; also stop if we got zero or were interrupted if unique_entry.is_empty() || self.interrupted_or_overflowed() { break; } append_path_component(&mut unique_hierarchy, &unique_entry); unique_hierarchy.push('/'); append_path_component(abs_unique_hierarchy, &unique_entry); abs_unique_hierarchy.push('/'); } return unique_hierarchy; } fn try_add_completion_result( &mut self, filepath: &wstr, filename: &wstr, wildcard: &wstr, prefix: &wstr, known_dir: bool, ) { // This function is only for the completions case. assert!(self.flags.contains(ExpandFlags::FOR_COMPLETIONS)); let mut abs_path = self.working_directory.to_owned(); append_path_component(&mut abs_path, filepath); // We must normalize the path to allow 'cd ..' to operate on logical paths. if self.flags.contains(ExpandFlags::SPECIAL_FOR_CD) { abs_path = normalize_path(&abs_path, true); } let before = self.resolved_completions.len(); if wildcard_test_flags_then_complete( &abs_path, filename, wildcard, self.flags, self.resolved_completions, known_dir, ) { // Hack. We added this completion result based on the last component of the wildcard. // Prepend our prefix to each wildcard that replaces its token. // Note that prepend_token_prefix is a no-op unless COMPLETE_REPLACES_TOKEN is set let after = self.resolved_completions.len(); for c in self.resolved_completions[before..after].iter_mut() { if self.has_fuzzy_ancestor && !(c.flags.contains(CompleteFlags::REPLACES_TOKEN)) { c.flags |= CompleteFlags::REPLACES_TOKEN; c.prepend_token_prefix(wildcard); } c.prepend_token_prefix(prefix); } // Implement special_for_cd_autosuggestion by descending the deepest unique // hierarchy we can, and then appending any components to each new result. // Only descend deepest unique for cd autosuggest and not for cd tab completion // (issue #4402). if self .flags .contains(ExpandFlags::SPECIAL_FOR_CD_AUTOSUGGESTION) { let unique_hierarchy = self.descend_unique_hierarchy(&mut abs_path); if !unique_hierarchy.is_empty() { for c in self.resolved_completions[before..after].iter_mut() { c.completion.push_utfstr(&unique_hierarchy); } } } self.did_add = true; } } // Helper to resolve using our prefix. /// dotdot default is false fn open_dir(&self, base_dir: &wstr, dotdot: bool) -> std::io::Result { let mut path = self.working_directory.to_owned(); append_path_component(&mut path, base_dir); if self.flags.contains(ExpandFlags::SPECIAL_FOR_CD) { // cd operates on logical paths. // for example, cd ../ should complete "without resolving symlinks". path = normalize_path(&path, true); } return match dotdot { true => DirIter::new_with_dots(&path), false => DirIter::new(&path), }; } } } /// Expand the wildcard by matching against the filesystem. /// /// wildcard_expand works by dividing the wildcard into segments at each directory boundary. Each /// segment is processed separately. All except the last segment are handled by matching the /// wildcard segment against all subdirectories of matching directories, and recursively calling /// wildcard_expand for matches. On the last segment, matching is made to any file, and all matches /// are inserted to the list. /// /// If wildcard_expand encounters any errors (such as insufficient privileges) during matching, no /// error messages will be printed and wildcard_expand will continue the matching process. /// /// \param wc The wildcard string /// \param working_directory The working directory /// \param flags flags for the search. Can be any combination of for_completions and /// executables_only /// \param output The list in which to put the output /// pub fn wildcard_expand_string<'closure>( wc: &wstr, working_directory: &wstr, flags: ExpandFlags, mut cancel_checker: impl FnMut() -> bool + 'closure, output: &mut CompletionReceiver, ) -> WildcardResult { // Fuzzy matching only if we're doing completions. assert!( flags.contains(ExpandFlags::FOR_COMPLETIONS) || !flags.contains(ExpandFlags::FUZZY_MATCH) ); // ExpandFlags::SPECIAL_FOR_CD requires expand_flag::DIRECTORIES_ONLY and // ExpandFlags::FOR_COMPLETIONS and !expand_flag::GEN_DESCRIPTIONS. assert!( !(flags.contains(ExpandFlags::SPECIAL_FOR_CD)) || ((flags.contains(ExpandFlags::DIRECTORIES_ONLY)) && (flags.contains(ExpandFlags::FOR_COMPLETIONS)) && (!flags.contains(ExpandFlags::GEN_DESCRIPTIONS))) ); // Hackish fix for issue #1631. We are about to call c_str(), which will produce a string // truncated at any embedded nulls. We could fix this by passing around the size, etc. However // embedded nulls are never allowed in a filename, so we just check for them and return 0 (no // matches) if there is an embedded null. if wc.contains('\0') { return WildcardResult::NoMatch; } // We do not support tab-completing recursive (**) wildcards. This is historic behavior. // Do not descend any directories if there is a ** wildcard. if flags.contains(ExpandFlags::FOR_COMPLETIONS) && wc.contains(ANY_STRING_RECURSIVE) { return WildcardResult::NoMatch; } // Compute the prefix and base dir. The prefix is what we prepend for filesystem operations // (i.e. the working directory), the base_dir is the part of the wildcard consumed thus far, // which we also have to append. The difference is that the base_dir is returned as part of the // expansion, and the prefix is not. // // Check for a leading slash. If we find one, we have an absolute path: the prefix is empty, the // base dir is /, and the wildcard is the remainder. If we don't find one, the prefix is the // working directory, the base dir is empty. let (prefix, base_dir, effective_wc) = if wc.starts_with(L!("/")) { (L!(""), L!("/"), wc.slice_from(1)) } else { (working_directory, L!(""), wc) }; let mut expander = WildCardExpander::new(prefix, flags, &mut cancel_checker, output); expander.expand(base_dir, effective_wc, base_dir); return expander.status_code(); } /// Test whether the given wildcard matches the string. Does not perform any I/O. /// /// \param str The string to test /// \param wc The wildcard to test against /// \param leading_dots_fail_to_match if set, strings with leading dots are assumed to be hidden /// files and are not matched (default was false) /// /// \return true if the wildcard matched #[must_use] pub fn wildcard_match( name: impl AsRef, pattern: impl AsRef, leading_dots_fail_to_match: bool, ) -> bool { let name = name.as_ref(); let pattern = pattern.as_ref(); // Hackish fix for issue #270. Prevent wildcards from matching . or .., but we must still allow // literal matches. if leading_dots_fail_to_match && (name == L!(".") || name == L!("..")) { // The string is '.' or '..' so the only possible match is an exact match. return name == pattern; } // Near Linear implementation as proposed here https://research.swtch.com/glob. let mut px = 0; let mut nx = 0; let mut next_px = 0; let mut next_nx = 0; while px < pattern.len() || nx < name.len() { if px < pattern.len() { match pattern.char_at(px) { ANY_STRING | ANY_STRING_RECURSIVE => { // Ignore hidden file if leading_dots_fail_to_match && nx == 0 && name.char_at(0) == '.' { return false; } // Common case of * at the end. In that case we can early out since we know it will // match. if px == pattern.len() - 1 { return true; } // Try to match at nx. // If that doesn't work out, restart at nx+1 next. next_px = px; next_nx = nx + 1; px += 1; continue; } ANY_CHAR => { if nx < name.len() { if nx == 0 && name.char_at(nx) == '.' { return false; } px += 1; nx += 1; continue; } } c => { // ordinary char if nx < name.len() && name.char_at(nx) == c { px += 1; nx += 1; continue; } } } } // Mismatch. Maybe restart. if 0 < next_nx && next_nx <= name.len() { px = next_px; nx = next_nx; continue; } return false; } // Matched all of pattern to all of name. Success. true } // Check if the string has any unescaped wildcards (e.g. ANY_STRING). #[inline] #[must_use] fn wildcard_has_internal(s: impl AsRef) -> bool { s.as_ref() .chars() .any(|c| matches!(c, ANY_STRING | ANY_STRING_RECURSIVE | ANY_CHAR)) } /// Check if the specified string contains wildcards (e.g. *). #[must_use] fn wildcard_has(s: impl AsRef) -> bool { let s = s.as_ref(); let qmark_is_wild = !feature_test(FeatureFlag::qmark_noglob); // Fast check for * or ?; if none there is no wildcard. // Note some strings contain * but no wildcards, e.g. if they are quoted. if !s.contains('*') && (!qmark_is_wild || !s.contains('?')) { return false; } let unescaped = unescape_string(s, UnescapeStringStyle::Script(UnescapeFlags::SPECIAL)).unwrap_or_default(); return wildcard_has_internal(unescaped); } #[cfg(test)] mod tests { use super::*; use crate::future_feature_flags::scoped_test; #[test] fn test_wildcards() { assert!(!wildcard_has(L!(""))); assert!(wildcard_has(L!("*"))); assert!(!wildcard_has(L!("\\*"))); let wc = L!("foo*bar"); assert!(wildcard_has(wc) && !wildcard_has_internal(wc)); let wc = unescape_string(wc, UnescapeStringStyle::Script(UnescapeFlags::SPECIAL)).unwrap(); assert!(!wildcard_has(&wc) && wildcard_has_internal(&wc)); scoped_test(FeatureFlag::qmark_noglob, false, || { assert!(wildcard_has(L!("?"))); assert!(!wildcard_has(L!("\\?"))); }); scoped_test(FeatureFlag::qmark_noglob, true, || { assert!(!wildcard_has(L!("?"))); assert!(!wildcard_has(L!("\\?"))); }); } } #[cxx::bridge] mod ffi { extern "C++" { include!("wutil.h"); } extern "Rust" { #[cxx_name = "wildcard_match_ffi"] fn wildcard_match_ffi( str: &CxxWString, wc: &CxxWString, leading_dots_fail_to_match: bool, ) -> bool; #[cxx_name = "wildcard_has"] fn wildcard_has_ffi(s: &CxxWString) -> bool; #[cxx_name = "wildcard_has_internal"] fn wildcard_has_internal_ffi(s: &CxxWString) -> bool; } } fn wildcard_match_ffi(str: &CxxWString, wc: &CxxWString, leading_dots_fail_to_match: bool) -> bool { wildcard_match(str.from_ffi(), wc.from_ffi(), leading_dots_fail_to_match) } fn wildcard_has_ffi(s: &CxxWString) -> bool { wildcard_has(s.from_ffi()) } fn wildcard_has_internal_ffi(s: &CxxWString) -> bool { wildcard_has_internal(s.from_ffi()) }