// Fish needs it's own globbing implementation to support tab-expansion of globbed parameters. Also // provides recursive wildcards using **. #include "config.h" // IWYU pragma: keep #include "wildcard.h" #include #include #include #include #include #include #include #include #include #include #include #include #include "common.h" #include "complete.h" #include "enum_set.h" #include "expand.h" #include "fallback.h" // IWYU pragma: keep #include "future_feature_flags.h" #include "maybe.h" #include "path.h" #include "wcstringutil.h" #include "wutil.h" // IWYU pragma: keep /// Finds an internal (ANY_STRING, etc.) style wildcard, or wcstring::npos. static size_t wildcard_find(const wchar_t *wc) { for (size_t i = 0; wc[i] != L'\0'; i++) { if (wc[i] == ANY_CHAR || wc[i] == ANY_STRING || wc[i] == ANY_STRING_RECURSIVE) { return i; } } return wcstring::npos; } bool wildcard_has_internal(const wchar_t *s, size_t len) { for (size_t i = 0; i < len; i++) { wchar_t c = s[i]; if (c == ANY_CHAR || c == ANY_STRING || c == ANY_STRING_RECURSIVE) { return true; } } return false; } // Note we want to handle embedded nulls (issue #1631). bool wildcard_has(const wchar_t *str, size_t len) { assert(str != nullptr); const wchar_t *end = str + len; bool qmark_is_wild = !feature_test(features_t::qmark_noglob); // Fast check for * or ?; if none there is no wildcard. // Note some strings contain * but no wildcards, e.g. if they are quoted. if (std::find(str, end, L'*') == end && (!qmark_is_wild || std::find(str, end, L'?') == end)) { return false; } wcstring unescaped; unescape_string(str, len, &unescaped, UNESCAPE_SPECIAL); return wildcard_has_internal(unescaped); } /// Check whether the string str matches the wildcard string wc. /// /// \param str String to be matched. /// \param wc The wildcard. /// \param leading_dots_fail_to_match Whether files beginning with dots should not be matched /// against wildcards. bool wildcard_match(const wcstring &str, const wcstring &wc, bool leading_dots_fail_to_match) { // Hackish fix for issue #270. Prevent wildcards from matching . or .., but we must still allow // literal matches. if (leading_dots_fail_to_match && (str == L"." || str == L"..")) { // The string is '.' or '..' so the only possible match is an exact match. return str == wc; } // Near Linear implementation as proposed here https://research.swtch.com/glob. const wchar_t *const str_start = str.c_str(); const wchar_t *wc_x = wc.c_str(); const wchar_t *str_x = str_start; const wchar_t *restart_wc_x = wc.c_str(); const wchar_t *restart_str_x = str_start; bool restart_is_out_of_str = false; for (; *wc_x != 0 || *str_x != 0;) { bool is_first = (str_x == str_start); if (*wc_x != 0) { if (*wc_x == ANY_STRING || *wc_x == ANY_STRING_RECURSIVE) { // Ignore hidden file if (leading_dots_fail_to_match && is_first && str[0] == L'.') { return false; } // Common case of * at the end. In that case we can early out since we know it will // match. if (wc_x[1] == L'\0') { return true; } // Try to match at str_x. // If that doesn't work out, restart at str_x+1 next. restart_wc_x = wc_x; restart_str_x = str_x + 1; restart_is_out_of_str = (*str_x == 0); wc_x++; continue; } else if (*wc_x == ANY_CHAR && *str_x != 0) { if (is_first && *str_x == L'.') { return false; } wc_x++; str_x++; continue; } else if (*str_x != 0 && *str_x == *wc_x) { // ordinary character wc_x++; str_x++; continue; } } // Mismatch. Maybe restart. if (restart_str_x != str.c_str() && !restart_is_out_of_str) { wc_x = restart_wc_x; str_x = restart_str_x; continue; } return false; } // Matched all of pattern to all of name. Success. return true; } // This does something horrible refactored from an even more horrible function. static wcstring resolve_description(const wcstring &full_completion, wcstring *completion, expand_flags_t expand_flags, const description_func_t &desc_func) { size_t complete_sep_loc = completion->find(PROG_COMPLETE_SEP); if (complete_sep_loc != wcstring::npos) { // This completion has an embedded description, do not use the generic description. wcstring description = completion->substr(complete_sep_loc + 1); completion->resize(complete_sep_loc); return description; } if (desc_func && (expand_flags & expand_flag::gen_descriptions)) { return desc_func(full_completion); } return wcstring{}; } namespace { // A transient parameter pack needed by wildcard_complete. struct wc_complete_pack_t { const wcstring &orig; // the original string, transient const description_func_t &desc_func; // function for generating descriptions expand_flags_t expand_flags; wc_complete_pack_t(const wcstring &str, const description_func_t &df, expand_flags_t fl) : orig(str), desc_func(df), expand_flags(fl) {} }; } // namespace // Weirdly specific and non-reusable helper function that makes its one call site much clearer. static bool has_prefix_match(const completion_receiver_t *comps, size_t first) { if (comps != nullptr) { const size_t after_count = comps->size(); for (size_t j = first; j < after_count; j++) { const auto &match = comps->at(j).match; if (match.type <= string_fuzzy_match_t::contain_type_t::prefix && match.case_fold == string_fuzzy_match_t::case_fold_t::samecase) { return true; } } } return false; } /// Matches the string against the wildcard, and if the wildcard is a possible completion of the /// string, the remainder of the string is inserted into the out vector. /// /// We ignore ANY_STRING_RECURSIVE here. The consequence is that you cannot tab complete ** /// wildcards. This is historic behavior. static wildcard_result_t wildcard_complete_internal(const wchar_t *const str, size_t str_len, const wchar_t *const wc, size_t wc_len, const wc_complete_pack_t ¶ms, complete_flags_t flags, completion_receiver_t *out, bool is_first_call = false) { assert(str != nullptr); assert(wc != nullptr); // Maybe early out for hidden files. We require that the wildcard match these exactly (i.e. a // dot); ANY_STRING not allowed. if (is_first_call && str[0] == L'.' && wc[0] != L'.') { return wildcard_result_t::no_match; } // Locate the next wildcard character position, e.g. ANY_CHAR or ANY_STRING. const size_t next_wc_char_pos = wildcard_find(wc); // Maybe we have no more wildcards at all. This includes the empty string. if (next_wc_char_pos == wcstring::npos) { // Try matching. maybe_t match = string_fuzzy_match_string(wc, str); if (!match) return wildcard_result_t::no_match; // If we're not allowing fuzzy match, then we require a prefix match. bool needs_prefix_match = !(params.expand_flags & expand_flag::fuzzy_match); if (needs_prefix_match && !match->is_exact_or_prefix()) { return wildcard_result_t::no_match; } // The match was successful. If the string is not requested we're done. if (out == nullptr) { return wildcard_result_t::match; } // Wildcard complete. bool full_replacement = match->requires_full_replacement() || (flags & COMPLETE_REPLACES_TOKEN); // If we are not replacing the token, be careful to only store the part of the string after // the wildcard. assert(!full_replacement || wc_len <= str_len); wcstring out_completion = full_replacement ? params.orig : str + wc_len; wcstring out_desc = resolve_description(params.orig, &out_completion, params.expand_flags, params.desc_func); // Note: out_completion may be empty if the completion really is empty, e.g. tab-completing // 'foo' when a file 'foo' exists. complete_flags_t local_flags = flags | (full_replacement ? COMPLETE_REPLACES_TOKEN : 0); if (!out->add(std::move(out_completion), std::move(out_desc), local_flags, *match)) { return wildcard_result_t::overflow; } return wildcard_result_t::match; } else if (next_wc_char_pos > 0) { // The literal portion of a wildcard cannot be longer than the string itself, // e.g. `abc*` can never match a string that is only two characters long. if (next_wc_char_pos >= str_len) { return wildcard_result_t::no_match; } // Here we have a non-wildcard prefix. Note that we don't do fuzzy matching for stuff before // a wildcard, so just do case comparison and then recurse. if (std::wcsncmp(str, wc, next_wc_char_pos) == 0) { // Normal match. return wildcard_complete_internal(str + next_wc_char_pos, str_len - next_wc_char_pos, wc + next_wc_char_pos, wc_len - next_wc_char_pos, params, flags, out); } if (wcsncasecmp(str, wc, next_wc_char_pos) == 0) { // Case insensitive match. return wildcard_complete_internal(str + next_wc_char_pos, str_len - next_wc_char_pos, wc + next_wc_char_pos, wc_len - next_wc_char_pos, params, flags | COMPLETE_REPLACES_TOKEN, out); } return wildcard_result_t::no_match; } // Our first character is a wildcard. assert(next_wc_char_pos == 0); switch (wc[0]) { case ANY_CHAR: { if (str[0] == L'\0') { return wildcard_result_t::no_match; } return wildcard_complete_internal(str + 1, str_len - 1, wc + 1, wc_len - 1, params, flags, out); } case ANY_STRING: { // Hackish. If this is the last character of the wildcard, then just complete with // the empty string. This fixes cases like "f*" -> "f*o". if (wc[1] == L'\0') { return wildcard_complete_internal(L"", 0, L"", 0, params, flags, out); } // Try all submatches. Issue #929: if the recursive call gives us a prefix match, // just stop. This is sloppy - what we really want to do is say, once we've seen a // match of a particular type, ignore all matches of that type further down the // string, such that the wildcard produces the "minimal match.". bool has_match = false; for (size_t i = 0; str[i] != L'\0'; i++) { const size_t before_count = out ? out->size() : 0; auto submatch_res = wildcard_complete_internal(str + i, str_len - i, wc + 1, wc_len - 1, params, flags, out); switch (submatch_res) { case wildcard_result_t::no_match: break; case wildcard_result_t::match: has_match = true; // If out is NULL, we don't care about the actual matches. If out is not // NULL but we have a prefix match, stop there. if (out == nullptr || has_prefix_match(out, before_count)) { return wildcard_result_t::match; } break; case wildcard_result_t::cancel: case wildcard_result_t::overflow: // Note early return. return submatch_res; } } return has_match ? wildcard_result_t::match : wildcard_result_t::no_match; } case ANY_STRING_RECURSIVE: { // We don't even try with this one. return wildcard_result_t::no_match; } default: { DIE("unreachable code reached"); } } DIE("unreachable code reached"); } wildcard_result_t wildcard_complete(const wcstring &str, const wchar_t *wc, const std::function &desc_func, completion_receiver_t *out, expand_flags_t expand_flags, complete_flags_t flags) { // Note out may be NULL. assert(wc != nullptr); wc_complete_pack_t params(str, desc_func, expand_flags); return wildcard_complete_internal(str.c_str(), str.size(), wc, std::wcslen(wc), params, flags, out, true /* first call */); } static int fast_waccess(const struct stat &stat_buf, uint8_t mode) { // Cache the effective user id and group id of our own shell process. These can't change on us // because we don't change them. static const uid_t euid = geteuid(); static const gid_t egid = getegid(); // Cache a list of our group memberships. static const std::vector groups = ([&]() { std::vector groups; while (true) { int ngroups = getgroups(0, nullptr); // It is not defined if getgroups(2) includes the effective group of the calling process groups.reserve(ngroups + 1); groups.resize(ngroups, 0); if (getgroups(groups.size(), groups.data()) == -1) { if (errno == EINVAL) { // Race condition, ngroups has changed between the two getgroups() calls continue; } wperror(L"getgroups"); } break; } groups.push_back(egid); std::sort(groups.begin(), groups.end()); return groups; })(); bool have_suid = (stat_buf.st_mode & S_ISUID); if (euid == stat_buf.st_uid || have_suid) { // Check permissions granted to owner if (((stat_buf.st_mode & S_IRWXU) >> 6) & mode) { return 0; } } bool have_sgid = (stat_buf.st_mode & S_ISGID); auto binsearch = std::lower_bound(groups.begin(), groups.end(), stat_buf.st_gid); bool have_group = binsearch != groups.end() && !(stat_buf.st_gid < *binsearch); if (have_group || have_sgid) { // Check permissions granted to group if (((stat_buf.st_mode & S_IRWXG) >> 3) & mode) { return 0; } } if (euid != stat_buf.st_uid && !have_group) { // Check permissions granted to other if ((stat_buf.st_mode & S_IRWXO) & mode) { return 0; } } return -1; } /// Obtain a description string for the file specified by the filename. /// /// The returned value is a string constant and should not be free'd. /// /// \param filename The file for which to find a description string /// \param lstat_res The result of calling lstat on the file /// \param lbuf The struct buf output of calling lstat on the file /// \param stat_res The result of calling stat on the file /// \param buf The struct buf output of calling stat on the file /// \param err The errno value after a failed stat call on the file. static const wchar_t *file_get_desc(int lstat_res, const struct stat &lbuf, int stat_res, const struct stat &buf, int err) { if (lstat_res) { return COMPLETE_FILE_DESC; } if (S_ISLNK(lbuf.st_mode)) { if (!stat_res) { if (S_ISDIR(buf.st_mode)) { return COMPLETE_DIRECTORY_SYMLINK_DESC; } if (buf.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH) && fast_waccess(buf, X_OK) == 0) { return COMPLETE_EXEC_LINK_DESC; } return COMPLETE_SYMLINK_DESC; } if (err == ENOENT) return COMPLETE_BROKEN_SYMLINK_DESC; if (err == ELOOP) return COMPLETE_LOOP_SYMLINK_DESC; // On unknown errors we do nothing. The file will be given the default 'File' // description or one based on the suffix. } else if (S_ISCHR(buf.st_mode)) { return COMPLETE_CHAR_DESC; } else if (S_ISBLK(buf.st_mode)) { return COMPLETE_BLOCK_DESC; } else if (S_ISFIFO(buf.st_mode)) { return COMPLETE_FIFO_DESC; } else if (S_ISSOCK(buf.st_mode)) { return COMPLETE_SOCKET_DESC; } else if (S_ISDIR(buf.st_mode)) { return COMPLETE_DIRECTORY_DESC; } else if (buf.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH) && fast_waccess(buf, X_OK) == 0) { return COMPLETE_EXEC_DESC; } return COMPLETE_FILE_DESC; } /// Test if the given file is an executable (if executables_only) or directory (if /// directories_only). If it matches, call wildcard_complete() with some description that we make /// up. Note that the filename came from a readdir() call, so we know it exists. static bool wildcard_test_flags_then_complete(const wcstring &filepath, const wcstring &filename, const wchar_t *wc, expand_flags_t expand_flags, completion_receiver_t *out, bool known_dir) { const bool executables_only = expand_flags & expand_flag::executables_only; const bool need_directory = expand_flags & expand_flag::directories_only; // Fast path: If we need directories, and we already know it is one, // and we don't need to do anything else, just return it. // This is a common case for cd completions, and removes the `stat` entirely in case the system // supports it. if (known_dir && !executables_only && !(expand_flags & expand_flag::gen_descriptions)) { return wildcard_complete(filename + L'/', wc, const_desc(L""), out, expand_flags, COMPLETE_NO_SPACE) == wildcard_result_t::match; } // Check if it will match before stat(). if (wildcard_complete(filename, wc, {}, nullptr, expand_flags, 0) != wildcard_result_t::match) { return false; } struct stat lstat_buf = {}, stat_buf = {}; int stat_res = -1; int stat_errno = 0; int lstat_res = lwstat(filepath, &lstat_buf); if (lstat_res >= 0) { if (S_ISLNK(lstat_buf.st_mode)) { stat_res = wstat(filepath, &stat_buf); if (stat_res < 0) { // In order to differentiate between e.g. broken symlinks and symlink loops, we also // need to know the error status of wstat. stat_errno = errno; } } else { stat_buf = lstat_buf; stat_res = lstat_res; } } const long long file_size = stat_res == 0 ? stat_buf.st_size : 0; const bool is_directory = stat_res == 0 && S_ISDIR(stat_buf.st_mode); const bool is_executable = stat_res == 0 && S_ISREG(stat_buf.st_mode); if (need_directory && !is_directory) { return false; } if (executables_only && (!is_executable || fast_waccess(stat_buf, X_OK) != 0)) { return false; } if (executables_only && is_windows_subsystem_for_linux() && string_suffixes_string_case_insensitive(L".dll", filename)) { return false; } // Compute the description. wcstring desc; if (expand_flags & expand_flag::gen_descriptions) { desc = file_get_desc(lstat_res, lstat_buf, stat_res, stat_buf, stat_errno); if (!is_directory && !is_executable && file_size >= 0) { if (!desc.empty()) desc.append(L", "); desc.append(format_size(file_size)); } } // Append a / if this is a directory. Note this requirement may be the only reason we have to // call stat() in some cases. auto desc_func = const_desc(desc); if (is_directory) { return wildcard_complete(filename + L'/', wc, desc_func, out, expand_flags, COMPLETE_NO_SPACE) == wildcard_result_t::match; } return wildcard_complete(filename, wc, desc_func, out, expand_flags, 0) == wildcard_result_t::match; } namespace { class wildcard_expander_t { // A function to call to check cancellation. cancel_checker_t cancel_checker; // The working directory to resolve paths against const wcstring working_directory; // The set of items we have resolved, used to efficiently avoid duplication. std::unordered_set completion_set; // The set of file IDs we have visited, used to avoid symlink loops. std::unordered_set visited_files; // Flags controlling expansion. const expand_flags_t flags; // Resolved items get inserted into here. This is transient of course. completion_receiver_t *resolved_completions; // Whether we have been interrupted. bool did_interrupt{false}; // Whether we have overflowed. bool did_overflow{false}; // Whether we have successfully added any completions. bool did_add{false}; // Whether some parent expansion is fuzzy, and therefore completions always prepend their prefix // This variable is a little suspicious - it should be passed along, not stored here // If we ever try to do parallel wildcard expansion we'll have to remove this bool has_fuzzy_ancestor{false}; /// We are a trailing slash - expand at the end. void expand_trailing_slash(const wcstring &base_dir, const wcstring &prefix); /// Given a directory base_dir, which is opened as base_dir_iter, expand an intermediate segment /// of the wildcard. Treat ANY_STRING_RECURSIVE as ANY_STRING. wc_segment is the wildcard /// segment for this directory, wc_remainder is the wildcard for subdirectories, /// prefix is the prefix for completions. void expand_intermediate_segment(const wcstring &base_dir, dir_iter_t &base_dir_iter, const wcstring &wc_segment, const wchar_t *wc_remainder, const wcstring &prefix); /// Given a directory base_dir, which is opened as base_dir_fp, expand an intermediate literal /// segment. Use a fuzzy matching algorithm. void expand_literal_intermediate_segment_with_fuzz(const wcstring &base_dir, dir_iter_t &base_dir_iter, const wcstring &wc_segment, const wchar_t *wc_remainder, const wcstring &prefix); /// Given a directory base_dir, which is opened as base_dir_iter, expand the last segment of the /// wildcard. Treat ANY_STRING_RECURSIVE as ANY_STRING. wc is the wildcard segment to use for /// matching, wc_remainder is the wildcard for subdirectories, prefix is the prefix for /// completions. void expand_last_segment(const wcstring &base_dir, dir_iter_t &base_dir_iter, const wcstring &wc, const wcstring &prefix); /// Indicate whether we should cancel wildcard expansion. This latches 'interrupt'. bool interrupted_or_overflowed() { did_interrupt = did_interrupt || cancel_checker(); return did_interrupt || did_overflow; } void add_expansion_result(wcstring &&result) { // This function is only for the non-completions case. assert(!(this->flags & expand_flag::for_completions)); if (this->completion_set.insert(result).second) { if (!this->resolved_completions->add(std::move(result))) { this->did_overflow = true; } } } // Given a start point as an absolute path, for any directory that has exactly one non-hidden // entity in it which is itself a directory, return that. The result is a relative path. For // example, if start_point is '/usr' we may return 'local/bin/'. // // The result does not have a leading slash, but does have a trailing slash if non-empty. wcstring descend_unique_hierarchy(const wcstring &start_point) { assert(!start_point.empty() && start_point.at(0) == L'/'); wcstring unique_hierarchy; wcstring abs_unique_hierarchy = start_point; // Ensure we don't fall into a symlink loop. // Ideally we would compare both devices and inodes, but devices require a stat call, so we // use inodes exclusively. std::unordered_set visited_inodes; for (;;) { // We keep track of the single unique_entry entry. If we get more than one, it's not // unique and we stop the descent. wcstring unique_entry; dir_iter_t dir(abs_unique_hierarchy); if (!dir.valid()) { break; } while (const auto *entry = dir.next()) { if (entry->name.empty() || entry->name.at(0) == L'.') { continue; // either hidden, or . and .. entries -- skip them } if (!visited_inodes.insert(entry->inode).second) { // Either we've visited this inode already or there's multiple files; // either way stop. break; } else if (entry->is_dir() && unique_entry.empty()) { unique_entry = entry->name; // first candidate } else { // We either have two or more candidates, or the child is not a directory. We're // done. unique_entry.clear(); break; } } // We stop if we got two or more entries; also stop if we got zero or were interrupted if (unique_entry.empty() || interrupted_or_overflowed()) { break; } // We have an entry in the unique hierarchy! append_path_component(unique_hierarchy, unique_entry); unique_hierarchy.push_back(L'/'); append_path_component(abs_unique_hierarchy, unique_entry); abs_unique_hierarchy.push_back(L'/'); } return unique_hierarchy; } void try_add_completion_result(const wcstring &filepath, const wcstring &filename, const wcstring &wildcard, const wcstring &prefix, bool known_dir) { // This function is only for the completions case. assert(this->flags & expand_flag::for_completions); wcstring abs_path = this->working_directory; append_path_component(abs_path, filepath); // We must normalize the path to allow 'cd ..' to operate on logical paths. if (flags & expand_flag::special_for_cd) abs_path = normalize_path(abs_path); size_t before = this->resolved_completions->size(); if (wildcard_test_flags_then_complete(abs_path, filename, wildcard.c_str(), this->flags, this->resolved_completions, known_dir)) { // Hack. We added this completion result based on the last component of the wildcard. // Prepend our prefix to each wildcard that replaces its token. // Note that prepend_token_prefix is a no-op unless COMPLETE_REPLACES_TOKEN is set size_t after = this->resolved_completions->size(); for (size_t i = before; i < after; i++) { completion_t *c = &this->resolved_completions->at(i); if (this->has_fuzzy_ancestor && !(c->flags & COMPLETE_REPLACES_TOKEN)) { c->flags |= COMPLETE_REPLACES_TOKEN; c->prepend_token_prefix(wildcard); } c->prepend_token_prefix(prefix); } // Implement special_for_cd_autosuggestion by descending the deepest unique // hierarchy we can, and then appending any components to each new result. // Only descend deepest unique for cd autosuggest and not for cd tab completion // (issue #4402). if (flags & expand_flag::special_for_cd_autosuggestion) { wcstring unique_hierarchy = this->descend_unique_hierarchy(abs_path); if (!unique_hierarchy.empty()) { for (size_t i = before; i < after; i++) { completion_t &c = this->resolved_completions->at(i); c.completion.append(unique_hierarchy); } } } this->did_add = true; } } // Helper to resolve using our prefix. dir_iter_t open_dir(const wcstring &base_dir, bool dotdot = false) const { wcstring path = this->working_directory; append_path_component(path, base_dir); if (flags & expand_flag::special_for_cd) { // cd operates on logical paths. // for example, cd ../ should complete "without resolving symlinks". path = normalize_path(path); } return dir_iter_t(path, dotdot); } public: wildcard_expander_t(wcstring wd, expand_flags_t f, cancel_checker_t cancel_checker, completion_receiver_t *r) : cancel_checker(std::move(cancel_checker)), working_directory(std::move(wd)), flags(f), resolved_completions(r) { assert(resolved_completions != nullptr); // Insert initial completions into our set to avoid duplicates. for (const auto &resolved_completion : resolved_completions->get_list()) { this->completion_set.insert(resolved_completion.completion); } } // Do wildcard expansion. This is recursive. void expand(const wcstring &base_dir, const wchar_t *wc, const wcstring &prefix); wildcard_result_t status_code() const { if (this->did_interrupt) { return wildcard_result_t::cancel; } else if (this->did_overflow) { return wildcard_result_t::overflow; } return this->did_add ? wildcard_result_t::match : wildcard_result_t::no_match; } }; void wildcard_expander_t::expand_trailing_slash(const wcstring &base_dir, const wcstring &prefix) { if (interrupted_or_overflowed()) { return; } if (!(flags & expand_flag::for_completions)) { // Trailing slash and not accepting incomplete, e.g. `echo /xyz/`. Insert this file if it // exists. if (waccess(base_dir, F_OK) == 0) { this->add_expansion_result(wcstring{base_dir}); } } else { // Trailing slashes and accepting incomplete, e.g. `echo /xyz/`. Everything is added. dir_iter_t dir = open_dir(base_dir); if (dir.valid()) { // wreaddir_resolving without the out argument is just wreaddir. // So we can use the information in case we need it. bool need_dir = flags & expand_flag::directories_only; wcstring path = base_dir; if (flags & expand_flag::special_for_cd) { path = this->working_directory; append_path_component(path, base_dir); // cd operates on logical paths. // for example, cd ../ should complete "without resolving symlinks". path = normalize_path(path); } while (const auto *entry = dir.next()) { if (interrupted_or_overflowed()) { break; } // Note that is_dir() may cause a stat() call. bool known_dir = need_dir ? entry->is_dir() : false; if (need_dir && !known_dir) continue; if (!entry->name.empty() && entry->name.at(0) != L'.') { this->try_add_completion_result(base_dir + entry->name, entry->name, L"", prefix, known_dir); } } } } } void wildcard_expander_t::expand_intermediate_segment(const wcstring &base_dir, dir_iter_t &base_dir_iter, const wcstring &wc_segment, const wchar_t *wc_remainder, const wcstring &prefix) { std::string narrow; const dir_iter_t::entry_t *entry{}; while (!interrupted_or_overflowed() && (entry = base_dir_iter.next())) { // Note that it's critical we ignore leading dots here, else we may descend into . and .. if (!wildcard_match(entry->name, wc_segment, true)) { // Doesn't match the wildcard for this segment, skip it. continue; } if (!entry->is_dir()) { continue; } auto statbuf = entry->stat(); if (!statbuf) { continue; } const file_id_t file_id = file_id_t::from_stat(*statbuf); if (!this->visited_files.insert(file_id).second) { // Symlink loop! This directory was already visited, so skip it. continue; } // We made it through. Perform normal wildcard expansion on this new directory, starting at // our tail_wc, which includes the ANY_STRING_RECURSIVE guy. wcstring full_path = base_dir + entry->name; full_path.push_back(L'/'); this->expand(full_path, wc_remainder, prefix + wc_segment + L'/'); // Now remove the visited file. This is for #2414: only directories "beneath" us should be // considered visited. this->visited_files.erase(file_id); } } void wildcard_expander_t::expand_literal_intermediate_segment_with_fuzz(const wcstring &base_dir, dir_iter_t &base_dir_iter, const wcstring &wc_segment, const wchar_t *wc_remainder, const wcstring &prefix) { // Mark that we are fuzzy for the duration of this function const scoped_push scoped_fuzzy(&this->has_fuzzy_ancestor, true); const dir_iter_t::entry_t *entry{}; while (!interrupted_or_overflowed() && (entry = base_dir_iter.next())) { // Don't bother with . and .. if (entry->name == L"." || entry->name == L"..") { continue; } const auto match = string_fuzzy_match_string(wc_segment, entry->name); if (!match || match->is_samecase_exact()) continue; // Note is_dir() may trigger a stat call. if (!entry->is_dir()) continue; // Determine the effective prefix for our children. // Normally this would be the wildcard segment, but here we know our segment doesn't have // wildcards ("literal") and we are doing fuzzy expansion, which means we replace the // segment with files found through fuzzy matching. const wcstring child_prefix = prefix + entry->name + L'/'; wcstring new_full_path = base_dir + entry->name; new_full_path.push_back(L'/'); // Ok, this directory matches. Recurse to it. Then mark each resulting completion as fuzzy. const size_t before = this->resolved_completions->size(); this->expand(new_full_path, wc_remainder, child_prefix); const size_t after = this->resolved_completions->size(); assert(before <= after); for (size_t i = before; i < after; i++) { completion_t *c = &this->resolved_completions->at(i); // Mark the completion as replacing. if (!(c->flags & COMPLETE_REPLACES_TOKEN)) { c->flags |= COMPLETE_REPLACES_TOKEN; c->prepend_token_prefix(child_prefix); } // And every match must be made at least as fuzzy as ours. // TODO: justify this, tests do not exercise it yet. if (match->rank() > c->match.rank()) { // Our match is fuzzier. c->match = *match; } } } } void wildcard_expander_t::expand_last_segment(const wcstring &base_dir, dir_iter_t &base_dir_iter, const wcstring &wc, const wcstring &prefix) { bool is_dir = false; bool need_dir = flags & expand_flag::directories_only; const dir_iter_t::entry_t *entry{}; while (!interrupted_or_overflowed() && (entry = base_dir_iter.next())) { if (need_dir && !entry->is_dir()) continue; if (flags & expand_flag::for_completions) { this->try_add_completion_result(base_dir + entry->name, entry->name, wc, prefix, is_dir); } else { // Normal wildcard expansion, not for completions. if (wildcard_match(entry->name, wc, true /* skip files with leading dots */)) { this->add_expansion_result(base_dir + entry->name); } } } } /// The real implementation of wildcard expansion is in this function. Other functions are just /// wrappers around this one. /// /// This function traverses the relevant directory tree looking for matches, and recurses when /// needed to handle wildcards spanning multiple components and recursive wildcards. /// /// Args: /// base_dir: the "working directory" against which the wildcard is to be resolved /// wc: the wildcard string itself, e.g. foo*bar/baz (where * is actually ANY_CHAR) /// effective_prefix: the string that should be prepended for completions that replace their token. /// This is usually the same thing as the original wildcard, but for fuzzy matching, we /// expand intermediate segments. effective_prefix is always either empty, or ends with a slash void wildcard_expander_t::expand(const wcstring &base_dir, const wchar_t *wc, const wcstring &effective_prefix) { assert(wc != nullptr); if (interrupted_or_overflowed()) { return; } // Get the current segment and compute interesting properties about it. const wchar_t *const next_slash = std::wcschr(wc, L'/'); const bool is_last_segment = (next_slash == nullptr); const size_t wc_segment_len = next_slash ? next_slash - wc : std::wcslen(wc); const wcstring wc_segment = wcstring(wc, wc_segment_len); const bool segment_has_wildcards = wildcard_has_internal(wc_segment); // e.g. ANY_STRING. const wchar_t *const wc_remainder = next_slash ? next_slash + 1 : nullptr; if (wc_segment.empty()) { // Handle empty segment. assert(!segment_has_wildcards); //!OCLINT(multiple unary operator) if (is_last_segment) { this->expand_trailing_slash(base_dir, effective_prefix); } else { // Multiple adjacent slashes in the wildcard. Just skip them. this->expand(base_dir, wc_remainder, effective_prefix + L'/'); } } else if (!segment_has_wildcards && !is_last_segment) { // Literal intermediate match. Note that we may not be able to actually read the directory // (issue #2099). assert(next_slash != nullptr); // Absolute path of the intermediate directory const wcstring intermediate_dirpath = base_dir + wc_segment + L'/'; // This just trumps everything. size_t before = this->resolved_completions->size(); this->expand(intermediate_dirpath, wc_remainder, effective_prefix + wc_segment + L'/'); // Maybe try a fuzzy match (#94) if nothing was found with the literal match. Respect // EXPAND_NO_DIRECTORY_ABBREVIATIONS (issue #2413). // Don't do fuzzy matches if the literal segment was valid (#3211) bool allow_fuzzy = this->flags.get(expand_flag::fuzzy_match) && !this->flags.get(expand_flag::no_fuzzy_directories); if (allow_fuzzy && this->resolved_completions->size() == before && waccess(intermediate_dirpath, F_OK) != 0) { assert(this->flags & expand_flag::for_completions); dir_iter_t base_dir_iter = open_dir(base_dir); if (base_dir_iter.valid()) { this->expand_literal_intermediate_segment_with_fuzz( base_dir, base_dir_iter, wc_segment, wc_remainder, effective_prefix); } } } else { assert(!wc_segment.empty() && (segment_has_wildcards || is_last_segment)); if (!is_last_segment && wc_segment == wcstring{ANY_STRING_RECURSIVE}) { // Hack for #7222. This is an intermediate wc segment that is exactly **. The // tail matches in subdirectories as normal, but also the current directory. // That is, '**/bar' may match 'bar' and 'foo/bar'. // Implement this by matching the wildcard tail only, in this directory. // Note if the segment is not exactly ANY_STRING_RECURSIVE then the segment may only // match subdirectories. this->expand(base_dir, wc_remainder, effective_prefix); if (interrupted_or_overflowed()) { return; } } // return "." and ".." entries if we're doing completions dir_iter_t dir = open_dir(base_dir, /* return . and .. */ flags & expand_flag::for_completions); if (dir.valid()) { if (is_last_segment) { // Last wildcard segment, nonempty wildcard. this->expand_last_segment(base_dir, dir, wc_segment, effective_prefix); } else { // Not the last segment, nonempty wildcard. assert(next_slash != nullptr); this->expand_intermediate_segment(base_dir, dir, wc_segment, wc_remainder, effective_prefix + wc_segment + L'/'); } size_t asr_idx = wc_segment.find(ANY_STRING_RECURSIVE); if (asr_idx != wcstring::npos) { // Apply the recursive **. // Construct a "head + any" wildcard for matching stuff in this directory, and an // "any + tail" wildcard for matching stuff in subdirectories. Note that the // ANY_STRING_RECURSIVE character is present in both the head and the tail. const wcstring head_any(wc_segment, 0, asr_idx + 1); const wchar_t *any_tail = wc + asr_idx; assert(head_any.at(head_any.size() - 1) == ANY_STRING_RECURSIVE); assert(any_tail[0] == ANY_STRING_RECURSIVE); dir.rewind(); this->expand_intermediate_segment(base_dir, dir, head_any, any_tail, effective_prefix); } } } } } // namespace wildcard_result_t wildcard_expand_string(const wcstring &wc, const wcstring &working_directory, expand_flags_t flags, const cancel_checker_t &cancel_checker, completion_receiver_t *output) { assert(output != nullptr); // Fuzzy matching only if we're doing completions. assert(flags.get(expand_flag::for_completions) || !flags.get(expand_flag::fuzzy_match)); // expand_flag::special_for_cd requires expand_flag::directories_only and // expand_flag::for_completions and !expand_flag::gen_descriptions. assert(!(flags.get(expand_flag::special_for_cd)) || ((flags.get(expand_flag::directories_only)) && (flags.get(expand_flag::for_completions)) && (!flags.get(expand_flag::gen_descriptions)))); // Hackish fix for issue #1631. We are about to call c_str(), which will produce a string // truncated at any embedded nulls. We could fix this by passing around the size, etc. However // embedded nulls are never allowed in a filename, so we just check for them and return 0 (no // matches) if there is an embedded null. if (wc.find(L'\0') != wcstring::npos) { return wildcard_result_t::no_match; } // We do not support tab-completing recursive (**) wildcards. This is historic behavior. // Do not descend any directories if there is a ** wildcard. if (flags.get(expand_flag::for_completions) && wc.find(ANY_STRING_RECURSIVE) != wcstring::npos) { return wildcard_result_t::no_match; } // Compute the prefix and base dir. The prefix is what we prepend for filesystem operations // (i.e. the working directory), the base_dir is the part of the wildcard consumed thus far, // which we also have to append. The difference is that the base_dir is returned as part of the // expansion, and the prefix is not. // // Check for a leading slash. If we find one, we have an absolute path: the prefix is empty, the // base dir is /, and the wildcard is the remainder. If we don't find one, the prefix is the // working directory, the base dir is empty. wcstring prefix, base_dir, effective_wc; if (string_prefixes_string(L"/", wc)) { base_dir = L"/"; effective_wc = wc.substr(1); } else { prefix = working_directory; effective_wc = wc; } wildcard_expander_t expander(prefix, flags, cancel_checker, output); expander.expand(base_dir, effective_wc.c_str(), base_dir); return expander.status_code(); }