fish-shell/fish-rust/src/wutil/dir_iter.rs
ridiculousfish 2a13a30807 Clean up DirIter
DirIter had a serious bug where it would crash on an invalid path. Make it more
robust and rationalize its error handling. Move it into its own module and add
tests.
2023-07-16 12:05:29 -07:00

471 lines
15 KiB
Rust

use super::wcstoi;
use super::wopendir;
use crate::common::{cstr2wcstring, wcs2zstring};
use crate::wchar::{wstr, WString};
use libc::{
DT_BLK, DT_CHR, DT_DIR, DT_FIFO, DT_LNK, DT_REG, DT_SOCK, EACCES, EIO, ELOOP, ENAMETOOLONG,
ENODEV, ENOENT, ENOTDIR, S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG,
S_IFSOCK,
};
use std::cell::Cell;
use std::io::{self};
use std::os::fd::RawFd;
use std::ptr::NonNull;
use std::rc::Rc;
use std::slice;
pub use wcstoi::*;
/// Types of files that may be in a directory.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum DirEntryType {
fifo = 1, // FIFO file
chr, // character device
dir, // directory
blk, // block device
reg, // regular file
lnk, // symlink
sock, // socket
whiteout, // whiteout (from BSD)
}
/// An entry returned by DirIter.
#[derive(Clone)]
pub struct DirEntry {
/// File name of this entry.
pub name: WString,
/// inode of this entry.
pub inode: libc::ino_t,
// Stat buff for this entry, or none if not yet computed.
stat: Cell<Option<libc::stat>>,
// The type of the entry. This is initially none; it may be populated eagerly via readdir()
// on some filesystems, or later via stat(). If stat() fails, the error is silently ignored
// and the type is left as none(). Note this is an unavoidable race.
typ: Cell<Option<DirEntryType>>,
// fd of the DIR*, used for fstatat().
dirfd: Rc<DirFd>,
}
impl DirEntry {
/// \return the type of this entry if it is already available, otherwise none().
pub fn fast_type(&self) -> Option<DirEntryType> {
self.typ.get()
}
/// \return the type of this entry, falling back to stat() if necessary.
/// If stat() fails because the file has disappeared, this will return none().
/// If stat() fails because of a broken symlink, this will return type lnk.
pub fn check_type(&self) -> Option<DirEntryType> {
// Call stat if needed to populate our type, swallowing errors.
if self.typ.get().is_none() {
self.do_stat()
}
self.typ.get()
}
/// \return whether this is a directory. This may call stat().
pub fn is_dir(&self) -> bool {
self.check_type() == Some(DirEntryType::dir)
}
/// \return the stat buff for this entry, invoking stat() if necessary.
pub fn stat(&self) -> Option<libc::stat> {
if self.stat.get().is_none() {
self.do_stat();
}
self.stat.get()
}
// Reset our fields.
fn reset(&mut self) {
self.name.clear();
self.inode = unsafe { std::mem::zeroed() };
self.typ.set(None);
self.stat.set(None);
}
// Populate our stat buffer, and type. Errors are silently ignored.
fn do_stat(&self) {
// We want to set both our type and our stat buffer.
// If we follow symlinks and stat() errors with a bad symlink, set the type to link, but do not
// populate the stat buffer.
let fd = self.dirfd.fd();
if fd < 0 {
return;
}
let narrow = wcs2zstring(&self.name);
let mut s: libc::stat = unsafe { std::mem::zeroed() };
if unsafe { libc::fstatat(fd, narrow.as_ptr(), &mut s, 0) } == 0 {
self.stat.set(Some(s));
self.typ.set(stat_mode_to_entry_type(s.st_mode));
} else {
match errno::errno().0 {
ELOOP => {
self.typ.set(Some(DirEntryType::lnk));
}
EACCES | EIO | ENOENT | ENOTDIR | ENAMETOOLONG | ENODEV => {
// These are "expected" errors.
self.typ.set(None);
}
_ => {
self.typ.set(None);
// This used to print an error, but given that we have seen
// both ENODEV (above) and ENOTCONN,
// and that the error isn't actionable and shows up while typing,
// let's not do that.
// perror("fstatat");
}
}
}
}
}
fn dirent_type_to_entry_type(dt: u8) -> Option<DirEntryType> {
match dt {
DT_FIFO => Some(DirEntryType::fifo),
DT_CHR => Some(DirEntryType::chr),
DT_DIR => Some(DirEntryType::dir),
DT_BLK => Some(DirEntryType::blk),
DT_REG => Some(DirEntryType::reg),
DT_LNK => Some(DirEntryType::lnk),
DT_SOCK => Some(DirEntryType::sock),
// todo! whiteout
_ => None,
}
}
fn stat_mode_to_entry_type(m: libc::mode_t) -> Option<DirEntryType> {
match m & S_IFMT {
S_IFIFO => Some(DirEntryType::fifo),
S_IFCHR => Some(DirEntryType::chr),
S_IFDIR => Some(DirEntryType::dir),
S_IFBLK => Some(DirEntryType::blk),
S_IFREG => Some(DirEntryType::reg),
S_IFLNK => Some(DirEntryType::lnk),
S_IFSOCK => Some(DirEntryType::sock),
_ => {
// todo! whiteout
None
}
}
}
struct DirFd(NonNull<libc::DIR>);
impl DirFd {
/// Return the underlying file descriptor.
#[inline]
fn fd(&self) -> RawFd {
unsafe { libc::dirfd(self.dir()) }
}
/// Return the underlying DIR*.
#[inline]
fn dir(&self) -> *mut libc::DIR {
self.0.as_ptr()
}
}
/// Autoclose wrapper for DIR*.
impl Drop for DirFd {
fn drop(&mut self) {
unsafe {
let _ = libc::closedir(self.dir());
}
}
}
/// Class for iterating over a directory, wrapping readdir().
/// This allows enumerating the contents of a directory, exposing the file type if the filesystem
/// itself exposes that from readdir(). stat() is incurred only if necessary: if the entry is a
/// symlink, or if the caller asks for the stat buffer.
/// Symlinks are followed.
pub struct DirIter {
/// Whether this dir_iter considers the "." and ".." filesystem entries.
withdot: bool,
/// A reference to the underlying directory fd.
dir: Rc<DirFd>,
/// The storage for our entry. This allows us to iterate without allocating.
entry: DirEntry,
}
impl DirIter {
/// Open a directory at a given path.
/// Note opendir is guaranteed to set close-on-exec by POSIX (hooray).
pub fn new(path: &wstr) -> io::Result<Self> {
Self::new_impl(path, false)
}
pub fn new_with_dots(path: &wstr) -> io::Result<Self> {
Self::new_impl(path, true)
}
fn new_impl(path: &wstr, withdot: bool) -> io::Result<Self> {
let dir: *mut libc::DIR = wopendir(path);
let Some(dir) = NonNull::new(dir) else {
return Err(io::Error::last_os_error());
};
let dir = Rc::new(DirFd(dir));
let entry = DirEntry {
name: WString::new(),
inode: 0,
stat: Cell::new(None),
typ: Cell::new(None),
dirfd: dir.clone(),
};
Ok(DirIter {
withdot,
dir,
entry,
})
}
/// \return the underlying file descriptor.
pub fn fd(&self) -> RawFd {
self.dir.fd()
}
/// Rewind the directory to the beginning. This cannot fail.
pub fn rewind(&mut self) {
unsafe { libc::rewinddir(self.dir.dir()) };
}
/// Read the next entry in the directory.
/// This returns an error if readir errors, or Ok(None) if there are no more entries; else an Ok entry.
/// This is slightly more efficient than the Iterator version, as it avoids allocating.
pub fn next(&mut self) -> Option<io::Result<&DirEntry>> {
errno::set_errno(errno::Errno(0));
let dent = unsafe { libc::readdir(self.dir.dir()).as_ref() };
let Some(dent) = dent else {
// readdir distinguishes between EOF and error via errno.
let err = errno::errno().0;
if err == 0 {
return None;
} else {
return Some(Err(io::Error::from_raw_os_error(err)));
}
};
// dent.d_name is c_char; pretend it's u8.
assert!(std::mem::size_of::<libc::c_char>() == std::mem::size_of::<u8>());
let d_name_cchar = &dent.d_name;
let d_name = unsafe {
slice::from_raw_parts(d_name_cchar.as_ptr() as *const u8, d_name_cchar.len())
};
// Skip . and ..,
// unless we've been told not to.
if !self.withdot && (d_name.starts_with(b".\0") || d_name.starts_with(b"..\0")) {
return self.next();
}
self.entry.reset();
let d_name: Vec<u8> = dent.d_name.iter().map(|b| *b as u8).collect();
self.entry.name = cstr2wcstring(&d_name);
#[cfg(any(target_os = "freebsd", target_os = "netbsd", target_os = "openbsd"))]
{
self.entry.inode = dent.d_fileno;
}
#[cfg(not(any(target_os = "freebsd", target_os = "netbsd", target_os = "openbsd")))]
{
self.entry.inode = dent.d_ino;
}
let typ = dirent_type_to_entry_type(dent.d_type);
// Do not store symlinks as we will need to resolve them.
if typ != Some(DirEntryType::lnk) {
self.entry.typ.set(typ);
}
Some(Ok(&self.entry))
}
}
impl IntoIterator for DirIter {
type Item = io::Result<DirEntry>;
type IntoIter = Iter;
fn into_iter(self) -> Self::IntoIter {
Iter(self)
}
}
/// A convenient iterator over the entries in a directory.
/// This differs from DirIter::next() in that it allocates.
pub struct Iter(DirIter);
impl Iterator for Iter {
type Item = io::Result<DirEntry>;
fn next(&mut self) -> Option<Self::Item> {
match self.0.next()? {
Ok(entry) => Some(Ok(entry.clone())),
Err(e) => Some(Err(e)),
}
}
}
#[test]
fn test_dir_iter_bad_path() {
// Regression test: DirIter does not crash given a bad path.
use crate::wchar::L;
let dir = DirIter::new(L!("/a/bogus/path/which/does/notexist"));
assert!(dir.is_err());
}
#[test]
fn test_no_dots() {
use crate::wchar::L;
// DirIter does not return . or .. by default.
let dir = DirIter::new(L!(".")).expect("Should be able to open CWD");
for entry in dir {
assert!(entry.is_ok());
let entry = entry.unwrap();
assert_ne!(entry.name, ".");
assert_ne!(entry.name, "..");
}
}
#[test]
fn test_dots() {
use crate::wchar::L;
// DirIter returns . or .. if you ask nicely.
let dir = DirIter::new_with_dots(L!(".")).expect("Should be able to open CWD");
let mut seen_dot = false;
let mut seen_dotdot = false;
for entry in dir {
assert!(entry.is_ok());
let entry = entry.unwrap();
if entry.name == "." {
seen_dot = true;
} else if entry.name == ".." {
seen_dotdot = true;
}
}
assert!(seen_dot);
assert!(seen_dotdot);
}
// Test ported from C++.
#[test]
#[allow(clippy::if_same_then_else)]
fn test_dir_iter() {
use crate::common::charptr2wcstring;
use crate::wchar::L;
use libc::{close, mkfifo, open, rmdir, symlink, unlink, O_CREAT, O_WRONLY};
use std::ffi::CString;
let baditer = DirIter::new(L!("/definitely/not/a/valid/directory/for/sure"));
assert!(baditer.is_err());
let Err(err) = baditer else {
panic!("Expected error");
};
let err = err.raw_os_error().expect("Should have an errno value");
assert!(err == ENOENT || err == EACCES);
let mut t1: [u8; 31] = *b"/tmp/fish_test_dir_iter.XXXXXX\0";
let basepath_narrow = unsafe { libc::mkdtemp(t1.as_mut_ptr().cast()) };
assert!(!basepath_narrow.is_null(), "mkdtemp failed");
let basepath: WString = charptr2wcstring(basepath_narrow);
let makepath = |s: &str| -> CString {
let mut tmp = basepath.clone();
tmp.push('/');
tmp.push_str(s);
wcs2zstring(&tmp)
};
let dirname = "dir";
let regname = "reg";
let reglinkname = "reglink"; // link to regular file
let dirlinkname = "dirlink"; // link to directory
let badlinkname = "badlink"; // link to nowhere
let selflinkname = "selflink"; // link to self
let fifoname = "fifo";
#[rustfmt::skip]
let names = &[
dirname, regname, reglinkname, dirlinkname,
badlinkname, selflinkname, fifoname,
];
let is_link_name = |name: &wstr| -> bool {
name == reglinkname || name == dirlinkname || name == badlinkname || name == selflinkname
};
// Make our different file types
unsafe {
let mut ret = libc::mkdir(makepath(dirname).as_ptr(), 0o700);
assert!(ret == 0);
ret = open(makepath(regname).as_ptr(), O_CREAT | O_WRONLY, 0o600);
assert!(ret >= 0);
close(ret);
ret = symlink(makepath(regname).as_ptr(), makepath(reglinkname).as_ptr());
assert!(ret == 0);
ret = symlink(makepath(dirname).as_ptr(), makepath(dirlinkname).as_ptr());
assert!(ret == 0);
ret = symlink(
b"/this/is/an/invalid/path\0".as_ptr().cast(),
makepath(badlinkname).as_ptr(),
);
assert!(ret == 0);
ret = symlink(
makepath(selflinkname).as_ptr(),
makepath(selflinkname).as_ptr(),
);
assert!(ret == 0);
ret = mkfifo(makepath(fifoname).as_ptr(), 0o600);
assert!(ret == 0);
}
let mut iter1 = DirIter::new(&basepath).expect("Should be able to open directory");
let mut seen = 0;
while let Some(entry) = iter1.next() {
let entry = entry.expect("Should not have gotten error");
seen += 1;
assert!(entry.name != "." && entry.name != "..");
assert!(names.iter().any(|&n| entry.name == n));
let expected = if entry.name == dirname {
Some(DirEntryType::dir)
} else if entry.name == regname {
Some(DirEntryType::reg)
} else if entry.name == reglinkname {
Some(DirEntryType::reg)
} else if entry.name == dirlinkname {
Some(DirEntryType::dir)
} else if entry.name == badlinkname {
None
} else if entry.name == selflinkname {
Some(DirEntryType::lnk)
} else if entry.name == fifoname {
Some(DirEntryType::fifo)
} else {
panic!("Unexpected file type");
};
// Links should never have a fast type if we are resolving them, since we cannot resolve a
// symlink from readdir.
if is_link_name(&entry.name) {
assert!(entry.fast_type().is_none());
}
// If we have a fast type, it should be correct.
assert!(entry.fast_type().is_none() || entry.fast_type() == expected);
assert!(
entry.check_type() == expected,
"Wrong type for {}. Expected {:?}, got {:?}",
entry.name,
expected,
entry.check_type()
);
}
assert_eq!(seen, names.len());
// Clean up.
unsafe {
for name in names {
let _ = unlink(makepath(name).as_ptr().cast());
}
let _ = rmdir(basepath_narrow);
}
}