Merge pull request #3754 from ackerleytng/main

Add `parse_glob` module and update `du` to use `parse_glob`
This commit is contained in:
Sylvestre Ledru 2022-08-10 19:28:40 +02:00 committed by GitHub
commit 8692301ec7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 224 additions and 104 deletions

1
Cargo.lock generated
View file

@ -3102,6 +3102,7 @@ dependencies = [
"data-encoding-macro",
"dns-lookup",
"dunce",
"glob",
"itertools",
"libc",
"nix",

View file

@ -37,6 +37,7 @@ use uucore::display::{print_verbatim, Quotable};
use uucore::error::FromIo;
use uucore::error::{UError, UResult};
use uucore::format_usage;
use uucore::parse_glob;
use uucore::parse_size::{parse_size, ParseSizeError};
use uucore::InvalidEncodingHandling;
#[cfg(windows)]
@ -488,55 +489,28 @@ fn file_as_vec(filename: impl AsRef<Path>) -> Vec<String> {
// Given the --exclude-from and/or --exclude arguments, returns the globset lists
// to ignore the files
fn get_glob_ignore(matches: &ArgMatches) -> UResult<Vec<Pattern>> {
let mut excludes_from = if matches.contains_id(options::EXCLUDE_FROM) {
match matches.values_of(options::EXCLUDE_FROM) {
Some(all_files) => {
let mut exclusion = Vec::<String>::new();
// Read the exclude lists from all the files
// and add them into a vector of string
let files: Vec<String> = all_files.clone().map(|v| v.to_owned()).collect();
for f in files {
exclusion.extend(file_as_vec(&f));
}
exclusion
}
None => Vec::<String>::new(),
}
} else {
Vec::<String>::new()
};
fn build_exclude_patterns(matches: &ArgMatches) -> UResult<Vec<Pattern>> {
let exclude_from_iterator = matches
.values_of(options::EXCLUDE_FROM)
.unwrap_or_default()
.flat_map(|f| file_as_vec(&f));
let mut excludes = if matches.contains_id(options::EXCLUDE) {
match matches.values_of(options::EXCLUDE) {
Some(v) => {
// Read the various arguments
v.clone().map(|v| v.to_owned()).collect()
}
None => Vec::<String>::new(),
}
} else {
Vec::<String>::new()
};
let excludes_iterator = matches
.values_of(options::EXCLUDE)
.unwrap_or_default()
.map(|v| v.to_owned());
// Merge the two lines
excludes.append(&mut excludes_from);
if !&excludes.is_empty() {
let mut builder = Vec::new();
// Create the `Vec` of excludes
for f in excludes {
if matches.contains_id(options::VERBOSE) {
println!("adding {:?} to the exclude list ", &f);
}
match Pattern::new(&f) {
Ok(glob) => builder.push(glob),
Err(err) => return Err(DuError::InvalidGlob(err.to_string()).into()),
};
let mut exclude_patterns = Vec::new();
for f in excludes_iterator.chain(exclude_from_iterator) {
if matches.is_present(options::VERBOSE) {
println!("adding {:?} to the exclude list ", &f);
}
match parse_glob::from_str(&f) {
Ok(glob) => exclude_patterns.push(glob),
Err(err) => return Err(DuError::InvalidGlob(err.to_string()).into()),
}
Ok(builder)
} else {
Ok(Vec::new())
}
Ok(exclude_patterns)
}
#[uucore::main]
@ -615,85 +589,84 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
"\n"
};
let excludes = get_glob_ignore(&matches)?;
let excludes = build_exclude_patterns(&matches)?;
let mut grand_total = 0;
'loop_file: for path_string in files {
// Skip if we don't want to ignore anything
if !&excludes.is_empty() {
for pattern in &excludes {
{
if pattern.matches(path_string) {
// if the directory is ignored, leave early
if options.verbose {
println!("{} ignored", path_string.quote());
}
continue 'loop_file;
if pattern.matches(path_string) {
// if the directory is ignored, leave early
if options.verbose {
println!("{} ignored", path_string.quote());
}
continue 'loop_file;
}
}
}
let path = PathBuf::from(&path_string);
match Stat::new(path, &options) {
Ok(stat) => {
let mut inodes: HashSet<FileInfo> = HashSet::new();
if let Some(inode) = stat.inode {
inodes.insert(inode);
// Check existence of path provided in argument
if let Ok(stat) = Stat::new(path, &options) {
// Kick off the computation of disk usage from the initial path
let mut inodes: HashSet<FileInfo> = HashSet::new();
if let Some(inode) = stat.inode {
inodes.insert(inode);
}
let iter = du(stat, &options, 0, &mut inodes, &excludes);
// Sum up all the returned `Stat`s and display results
let (_, len) = iter.size_hint();
let len = len.unwrap();
for (index, stat) in iter.enumerate() {
let size = choose_size(&matches, &stat);
if threshold.map_or(false, |threshold| threshold.should_exclude(size)) {
continue;
}
let iter = du(stat, &options, 0, &mut inodes, &excludes);
let (_, len) = iter.size_hint();
let len = len.unwrap();
for (index, stat) in iter.enumerate() {
let size = choose_size(&matches, &stat);
if threshold.map_or(false, |threshold| threshold.should_exclude(size)) {
continue;
}
if matches.contains_id(options::TIME) {
let tm = {
let secs = {
match matches.value_of(options::TIME) {
Some(s) => match s {
"ctime" | "status" => stat.modified,
"access" | "atime" | "use" => stat.accessed,
"birth" | "creation" => stat
.created
.ok_or_else(|| DuError::InvalidTimeArg(s.into()))?,
// below should never happen as clap already restricts the values.
_ => unreachable!("Invalid field for --time"),
},
None => stat.modified,
}
};
DateTime::<Local>::from(UNIX_EPOCH + Duration::from_secs(secs))
if matches.is_present(options::TIME) {
let tm = {
let secs = {
match matches.value_of(options::TIME) {
Some(s) => match s {
"ctime" | "status" => stat.modified,
"access" | "atime" | "use" => stat.accessed,
"birth" | "creation" => stat
.created
.ok_or_else(|| DuError::InvalidTimeArg(s.into()))?,
// below should never happen as clap already restricts the values.
_ => unreachable!("Invalid field for --time"),
},
None => stat.modified,
}
};
if !summarize || index == len - 1 {
let time_str = tm.format(time_format_str).to_string();
print!("{}\t{}\t", convert_size(size), time_str);
print_verbatim(stat.path).unwrap();
print!("{}", line_separator);
}
} else if !summarize || index == len - 1 {
print!("{}\t", convert_size(size));
DateTime::<Local>::from(UNIX_EPOCH + Duration::from_secs(secs))
};
if !summarize || index == len - 1 {
let time_str = tm.format(time_format_str).to_string();
print!("{}\t{}\t", convert_size(size), time_str);
print_verbatim(stat.path).unwrap();
print!("{}", line_separator);
}
if options.total && index == (len - 1) {
// The last element will be the total size of the the path under
// path_string. We add it to the grand total.
grand_total += size;
}
} else if !summarize || index == len - 1 {
print!("{}\t", convert_size(size));
print_verbatim(stat.path).unwrap();
print!("{}", line_separator);
}
if options.total && index == (len - 1) {
// The last element will be the total size of the the path under
// path_string. We add it to the grand total.
grand_total += size;
}
}
Err(_) => {
show_error!(
"{}: {}",
path_string.maybe_quote(),
"No such file or directory"
);
}
} else {
show_error!(
"{}: {}",
path_string.maybe_quote(),
"No such file or directory"
);
}
}

View file

@ -23,6 +23,7 @@ clap = "3.2"
dns-lookup = { version="1.0.5", optional=true }
dunce = "1.0.0"
wild = "2.0"
glob = "0.3.0"
# * optional
itertools = { version="0.10.0", optional=true }
thiserror = { version="1.0", optional=true }

View file

@ -29,6 +29,7 @@ pub use crate::mods::ranges;
pub use crate::mods::version_cmp;
// * string parsing modules
pub use crate::parser::parse_glob;
pub use crate::parser::parse_size;
pub use crate::parser::parse_time;

View file

@ -1,2 +1,3 @@
pub mod parse_glob;
pub mod parse_size;
pub mod parse_time;

View file

@ -0,0 +1,109 @@
//! Parsing a glob Pattern from a string.
//!
//! Use the [`from_str`] function to parse a [`Pattern`] from a string.
// cSpell:words fnmatch
use glob::{Pattern, PatternError};
fn fix_negation(glob: &str) -> String {
let mut chars = glob.chars().collect::<Vec<_>>();
let mut i = 0;
while i < chars.len() {
if chars[i] == '[' && i + 4 <= glob.len() && chars[i + 1] == '^' {
match chars[i + 3..].iter().position(|x| *x == ']') {
None => (),
Some(j) => {
chars[i + 1] = '!';
i += j + 4;
continue;
}
}
}
i += 1;
}
chars.into_iter().collect::<String>()
}
/// Parse a glob Pattern from a string.
///
/// This function amends the input string to replace any caret or circumflex
/// character (^) used to negate a set of characters with an exclamation mark
/// (!), which adapts rust's glob matching to function the way the GNU utils'
/// fnmatch does.
///
/// # Examples
///
/// ```rust
/// use std::time::Duration;
/// use uucore::parse_glob::from_str;
/// assert!(!from_str("[^abc]").unwrap().matches("a"));
/// assert!(from_str("[^abc]").unwrap().matches("x"));
/// ```
pub fn from_str(glob: &str) -> Result<Pattern, PatternError> {
Pattern::new(&fix_negation(glob))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_from_str() {
assert_eq!(from_str("[^abc]").unwrap(), Pattern::new("[!abc]").unwrap());
}
#[test]
fn test_fix_negation() {
// Happy/Simple case
assert_eq!(fix_negation("[^abc]"), "[!abc]");
// Should fix negations in a long regex
assert_eq!(fix_negation("foo[abc] bar[^def]"), "foo[abc] bar[!def]");
// Should fix multiple negations in a regex
assert_eq!(fix_negation("foo[^abc]bar[^def]"), "foo[!abc]bar[!def]");
// Should fix negation of the single character ]
assert_eq!(fix_negation("[^]]"), "[!]]");
// Should fix negation of the single character ^
assert_eq!(fix_negation("[^^]"), "[!^]");
// Should fix negation of the space character
assert_eq!(fix_negation("[^ ]"), "[! ]");
// Complicated patterns
assert_eq!(fix_negation("[^][]"), "[!][]");
assert_eq!(fix_negation("[^[]]"), "[![]]");
// More complex patterns that should be replaced
assert_eq!(fix_negation("[[]] [^a]"), "[[]] [!a]");
assert_eq!(fix_negation("[[] [^a]"), "[[] [!a]");
assert_eq!(fix_negation("[]] [^a]"), "[]] [!a]");
}
#[test]
fn test_fix_negation_should_not_amend() {
assert_eq!(fix_negation("abc"), "abc");
// Regex specifically matches either [ or ^
assert_eq!(fix_negation("[[^]"), "[[^]");
// Regex that specifically matches either space or ^
assert_eq!(fix_negation("[ ^]"), "[ ^]");
// Regex that specifically matches either [, space or ^
assert_eq!(fix_negation("[[ ^]"), "[[ ^]");
assert_eq!(fix_negation("[ [^]"), "[ [^]");
// Invalid globs (according to rust's glob implementation) will remain unamended
assert_eq!(fix_negation("[^]"), "[^]");
assert_eq!(fix_negation("[^"), "[^");
assert_eq!(fix_negation("[][^]"), "[][^]");
}
}

View file

@ -747,6 +747,40 @@ fn test_du_exclude_mix() {
assert!(result.stdout_str().contains("xcwww"));
}
#[test]
// Disable on Windows because we are looking for /
// And the tests would be more complex if we have to support \ too
#[cfg(not(target_os = "windows"))]
fn test_du_complex_exclude_patterns() {
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;
at.mkdir_all("azerty/xcwww/azeaze");
at.mkdir_all("azerty/xcwww/qzerty");
at.mkdir_all("azerty/xcwww/amazing");
// Negation in glob should work with both ^ and !
let result = ts
.ucmd()
.arg("--exclude=azerty/*/[^q]*")
.arg("azerty")
.succeeds();
assert!(!result.stdout_str().contains("amazing"));
assert!(result.stdout_str().contains("qzerty"));
assert!(!result.stdout_str().contains("azeaze"));
assert!(result.stdout_str().contains("xcwww"));
let result = ts
.ucmd()
.arg("--exclude=azerty/*/[!q]*")
.arg("azerty")
.succeeds();
assert!(!result.stdout_str().contains("amazing"));
assert!(result.stdout_str().contains("qzerty"));
assert!(!result.stdout_str().contains("azeaze"));
assert!(result.stdout_str().contains("xcwww"));
}
#[test]
fn test_du_exclude_several_components() {
let ts = TestScenario::new(util_name!());