Reduce stat calls for wildcards ending in "/" (#10032)

This makes it so expand_intermediate_segment knows about the case
where it's last, only followed by a "/".

When it is, it can do without the file_id for finding links (we don't
resolve the files we get here), which allows us to remove a stat()
call.

This speeds up the case of `...*/` by quite a bit.

If that last component was a directory with 1000 subdirectories we
could skip 1000 stat calls!

One slight weirdness: We refuse to add links to directories that we already visited, even if they are the last component and we don't actually follow them. That means we can't do the fast path here either, but we do know if something is a link (if we get d_type), so it still works in common cases.

(cherry picked from commit 86803e4442)
This commit is contained in:
Fabian Boehm 2023-10-08 16:46:59 +02:00
parent 18c65df3c7
commit 724b44907e
3 changed files with 27 additions and 3 deletions

View file

@ -722,6 +722,7 @@ void wildcard_expander_t::expand_intermediate_segment(const wcstring &base_dir,
const wcstring &prefix) {
std::string narrow;
const dir_iter_t::entry_t *entry{};
bool is_final = !*wc_remainder && wc_segment.find(ANY_STRING_RECURSIVE) == wcstring::npos;
while (!interrupted_or_overflowed() && (entry = base_dir_iter.next())) {
// Note that it's critical we ignore leading dots here, else we may descend into . and ..
if (!wildcard_match(entry->name, wc_segment, true)) {
@ -733,6 +734,22 @@ void wildcard_expander_t::expand_intermediate_segment(const wcstring &base_dir,
continue;
}
// Fast path: If this entry can't be a link (we know via d_type),
// we don't need to protect against symlink loops.
// This is *not* deduplication, we just don't want a loop.
//
// We only do this when we are the last `*/` component,
// because we're a bit inconsistent on when we will enter loops.
if (is_final && !entry->is_possible_link()) {
// We made it through.
// Perform normal wildcard expansion on this new directory,
// starting at our tail_wc
wcstring full_path = base_dir + entry->name;
full_path.push_back(L'/');
this->expand(full_path, wc_remainder, prefix + wc_segment + L'/');
continue;
}
auto statbuf = entry->stat();
if (!statbuf) {
continue;
@ -744,8 +761,7 @@ void wildcard_expander_t::expand_intermediate_segment(const wcstring &base_dir,
continue;
}
// We made it through. Perform normal wildcard expansion on this new directory, starting at
// our tail_wc, which includes the ANY_STRING_RECURSIVE guy.
// (like the fast path above)
wcstring full_path = base_dir + entry->name;
full_path.push_back(L'/');
this->expand(full_path, wc_remainder, prefix + wc_segment + L'/');

View file

@ -228,10 +228,12 @@ const dir_iter_t::entry_t *dir_iter_t::next() {
entry_.inode = dent->d_ino;
#ifdef HAVE_STRUCT_DIRENT_D_TYPE
auto type = dirent_type_to_entry_type(dent->d_type);
// Do not store symlinks as we will need to resolve them.
// Do not store symlinks as type as we will need to resolve them.
if (type != dir_entry_type_t::lnk) {
entry_.type_ = type;
}
// This entry could be a link if it is a link or unknown.
entry_.possible_link_ = !type.has_value() || type == dir_entry_type_t::lnk;
#endif
return &entry_;
}

View file

@ -221,6 +221,9 @@ class dir_iter_t : noncopyable_t {
/// \return whether this is a directory. This may call stat().
bool is_dir() const { return check_type() == dir_entry_type_t::dir; }
/// \return false if we know this can't be a link via d_type, true if it could be.
bool is_possible_link() const { return possible_link_; }
/// \return the stat buff for this entry, invoking stat() if necessary.
const maybe_t<struct stat> &stat() const;
@ -239,6 +242,9 @@ class dir_iter_t : noncopyable_t {
// and the type is left as none(). Note this is an unavoidable race.
mutable maybe_t<dir_entry_type_t> type_{};
/// whether this entry could be a link, false if we know definitively it isn't.
bool possible_link_ = true;
// fd of the DIR*, used for fstatat().
int dirfd_{-1};