mirror of
https://github.com/uutils/coreutils
synced 2025-01-19 00:24:13 +00:00
Merge pull request #3754 from ackerleytng/main
Add `parse_glob` module and update `du` to use `parse_glob`
This commit is contained in:
commit
8692301ec7
7 changed files with 224 additions and 104 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -3102,6 +3102,7 @@ dependencies = [
|
|||
"data-encoding-macro",
|
||||
"dns-lookup",
|
||||
"dunce",
|
||||
"glob",
|
||||
"itertools",
|
||||
"libc",
|
||||
"nix",
|
||||
|
|
|
@ -37,6 +37,7 @@ use uucore::display::{print_verbatim, Quotable};
|
|||
use uucore::error::FromIo;
|
||||
use uucore::error::{UError, UResult};
|
||||
use uucore::format_usage;
|
||||
use uucore::parse_glob;
|
||||
use uucore::parse_size::{parse_size, ParseSizeError};
|
||||
use uucore::InvalidEncodingHandling;
|
||||
#[cfg(windows)]
|
||||
|
@ -488,55 +489,28 @@ fn file_as_vec(filename: impl AsRef<Path>) -> Vec<String> {
|
|||
|
||||
// Given the --exclude-from and/or --exclude arguments, returns the globset lists
|
||||
// to ignore the files
|
||||
fn get_glob_ignore(matches: &ArgMatches) -> UResult<Vec<Pattern>> {
|
||||
let mut excludes_from = if matches.contains_id(options::EXCLUDE_FROM) {
|
||||
match matches.values_of(options::EXCLUDE_FROM) {
|
||||
Some(all_files) => {
|
||||
let mut exclusion = Vec::<String>::new();
|
||||
// Read the exclude lists from all the files
|
||||
// and add them into a vector of string
|
||||
let files: Vec<String> = all_files.clone().map(|v| v.to_owned()).collect();
|
||||
for f in files {
|
||||
exclusion.extend(file_as_vec(&f));
|
||||
}
|
||||
exclusion
|
||||
}
|
||||
None => Vec::<String>::new(),
|
||||
}
|
||||
} else {
|
||||
Vec::<String>::new()
|
||||
};
|
||||
fn build_exclude_patterns(matches: &ArgMatches) -> UResult<Vec<Pattern>> {
|
||||
let exclude_from_iterator = matches
|
||||
.values_of(options::EXCLUDE_FROM)
|
||||
.unwrap_or_default()
|
||||
.flat_map(|f| file_as_vec(&f));
|
||||
|
||||
let mut excludes = if matches.contains_id(options::EXCLUDE) {
|
||||
match matches.values_of(options::EXCLUDE) {
|
||||
Some(v) => {
|
||||
// Read the various arguments
|
||||
v.clone().map(|v| v.to_owned()).collect()
|
||||
}
|
||||
None => Vec::<String>::new(),
|
||||
}
|
||||
} else {
|
||||
Vec::<String>::new()
|
||||
};
|
||||
let excludes_iterator = matches
|
||||
.values_of(options::EXCLUDE)
|
||||
.unwrap_or_default()
|
||||
.map(|v| v.to_owned());
|
||||
|
||||
// Merge the two lines
|
||||
excludes.append(&mut excludes_from);
|
||||
if !&excludes.is_empty() {
|
||||
let mut builder = Vec::new();
|
||||
// Create the `Vec` of excludes
|
||||
for f in excludes {
|
||||
if matches.contains_id(options::VERBOSE) {
|
||||
println!("adding {:?} to the exclude list ", &f);
|
||||
}
|
||||
match Pattern::new(&f) {
|
||||
Ok(glob) => builder.push(glob),
|
||||
Err(err) => return Err(DuError::InvalidGlob(err.to_string()).into()),
|
||||
};
|
||||
let mut exclude_patterns = Vec::new();
|
||||
for f in excludes_iterator.chain(exclude_from_iterator) {
|
||||
if matches.is_present(options::VERBOSE) {
|
||||
println!("adding {:?} to the exclude list ", &f);
|
||||
}
|
||||
match parse_glob::from_str(&f) {
|
||||
Ok(glob) => exclude_patterns.push(glob),
|
||||
Err(err) => return Err(DuError::InvalidGlob(err.to_string()).into()),
|
||||
}
|
||||
Ok(builder)
|
||||
} else {
|
||||
Ok(Vec::new())
|
||||
}
|
||||
Ok(exclude_patterns)
|
||||
}
|
||||
|
||||
#[uucore::main]
|
||||
|
@ -615,85 +589,84 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
|||
"\n"
|
||||
};
|
||||
|
||||
let excludes = get_glob_ignore(&matches)?;
|
||||
let excludes = build_exclude_patterns(&matches)?;
|
||||
|
||||
let mut grand_total = 0;
|
||||
'loop_file: for path_string in files {
|
||||
// Skip if we don't want to ignore anything
|
||||
if !&excludes.is_empty() {
|
||||
for pattern in &excludes {
|
||||
{
|
||||
if pattern.matches(path_string) {
|
||||
// if the directory is ignored, leave early
|
||||
if options.verbose {
|
||||
println!("{} ignored", path_string.quote());
|
||||
}
|
||||
continue 'loop_file;
|
||||
if pattern.matches(path_string) {
|
||||
// if the directory is ignored, leave early
|
||||
if options.verbose {
|
||||
println!("{} ignored", path_string.quote());
|
||||
}
|
||||
continue 'loop_file;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let path = PathBuf::from(&path_string);
|
||||
match Stat::new(path, &options) {
|
||||
Ok(stat) => {
|
||||
let mut inodes: HashSet<FileInfo> = HashSet::new();
|
||||
if let Some(inode) = stat.inode {
|
||||
inodes.insert(inode);
|
||||
// Check existence of path provided in argument
|
||||
if let Ok(stat) = Stat::new(path, &options) {
|
||||
// Kick off the computation of disk usage from the initial path
|
||||
let mut inodes: HashSet<FileInfo> = HashSet::new();
|
||||
if let Some(inode) = stat.inode {
|
||||
inodes.insert(inode);
|
||||
}
|
||||
let iter = du(stat, &options, 0, &mut inodes, &excludes);
|
||||
|
||||
// Sum up all the returned `Stat`s and display results
|
||||
let (_, len) = iter.size_hint();
|
||||
let len = len.unwrap();
|
||||
for (index, stat) in iter.enumerate() {
|
||||
let size = choose_size(&matches, &stat);
|
||||
|
||||
if threshold.map_or(false, |threshold| threshold.should_exclude(size)) {
|
||||
continue;
|
||||
}
|
||||
let iter = du(stat, &options, 0, &mut inodes, &excludes);
|
||||
let (_, len) = iter.size_hint();
|
||||
let len = len.unwrap();
|
||||
for (index, stat) in iter.enumerate() {
|
||||
let size = choose_size(&matches, &stat);
|
||||
|
||||
if threshold.map_or(false, |threshold| threshold.should_exclude(size)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if matches.contains_id(options::TIME) {
|
||||
let tm = {
|
||||
let secs = {
|
||||
match matches.value_of(options::TIME) {
|
||||
Some(s) => match s {
|
||||
"ctime" | "status" => stat.modified,
|
||||
"access" | "atime" | "use" => stat.accessed,
|
||||
"birth" | "creation" => stat
|
||||
.created
|
||||
.ok_or_else(|| DuError::InvalidTimeArg(s.into()))?,
|
||||
// below should never happen as clap already restricts the values.
|
||||
_ => unreachable!("Invalid field for --time"),
|
||||
},
|
||||
None => stat.modified,
|
||||
}
|
||||
};
|
||||
DateTime::<Local>::from(UNIX_EPOCH + Duration::from_secs(secs))
|
||||
if matches.is_present(options::TIME) {
|
||||
let tm = {
|
||||
let secs = {
|
||||
match matches.value_of(options::TIME) {
|
||||
Some(s) => match s {
|
||||
"ctime" | "status" => stat.modified,
|
||||
"access" | "atime" | "use" => stat.accessed,
|
||||
"birth" | "creation" => stat
|
||||
.created
|
||||
.ok_or_else(|| DuError::InvalidTimeArg(s.into()))?,
|
||||
// below should never happen as clap already restricts the values.
|
||||
_ => unreachable!("Invalid field for --time"),
|
||||
},
|
||||
None => stat.modified,
|
||||
}
|
||||
};
|
||||
if !summarize || index == len - 1 {
|
||||
let time_str = tm.format(time_format_str).to_string();
|
||||
print!("{}\t{}\t", convert_size(size), time_str);
|
||||
print_verbatim(stat.path).unwrap();
|
||||
print!("{}", line_separator);
|
||||
}
|
||||
} else if !summarize || index == len - 1 {
|
||||
print!("{}\t", convert_size(size));
|
||||
DateTime::<Local>::from(UNIX_EPOCH + Duration::from_secs(secs))
|
||||
};
|
||||
if !summarize || index == len - 1 {
|
||||
let time_str = tm.format(time_format_str).to_string();
|
||||
print!("{}\t{}\t", convert_size(size), time_str);
|
||||
print_verbatim(stat.path).unwrap();
|
||||
print!("{}", line_separator);
|
||||
}
|
||||
if options.total && index == (len - 1) {
|
||||
// The last element will be the total size of the the path under
|
||||
// path_string. We add it to the grand total.
|
||||
grand_total += size;
|
||||
}
|
||||
} else if !summarize || index == len - 1 {
|
||||
print!("{}\t", convert_size(size));
|
||||
print_verbatim(stat.path).unwrap();
|
||||
print!("{}", line_separator);
|
||||
}
|
||||
if options.total && index == (len - 1) {
|
||||
// The last element will be the total size of the the path under
|
||||
// path_string. We add it to the grand total.
|
||||
grand_total += size;
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
show_error!(
|
||||
"{}: {}",
|
||||
path_string.maybe_quote(),
|
||||
"No such file or directory"
|
||||
);
|
||||
}
|
||||
} else {
|
||||
show_error!(
|
||||
"{}: {}",
|
||||
path_string.maybe_quote(),
|
||||
"No such file or directory"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@ clap = "3.2"
|
|||
dns-lookup = { version="1.0.5", optional=true }
|
||||
dunce = "1.0.0"
|
||||
wild = "2.0"
|
||||
glob = "0.3.0"
|
||||
# * optional
|
||||
itertools = { version="0.10.0", optional=true }
|
||||
thiserror = { version="1.0", optional=true }
|
||||
|
|
|
@ -29,6 +29,7 @@ pub use crate::mods::ranges;
|
|||
pub use crate::mods::version_cmp;
|
||||
|
||||
// * string parsing modules
|
||||
pub use crate::parser::parse_glob;
|
||||
pub use crate::parser::parse_size;
|
||||
pub use crate::parser::parse_time;
|
||||
|
||||
|
|
|
@ -1,2 +1,3 @@
|
|||
pub mod parse_glob;
|
||||
pub mod parse_size;
|
||||
pub mod parse_time;
|
||||
|
|
109
src/uucore/src/lib/parser/parse_glob.rs
Normal file
109
src/uucore/src/lib/parser/parse_glob.rs
Normal file
|
@ -0,0 +1,109 @@
|
|||
//! Parsing a glob Pattern from a string.
|
||||
//!
|
||||
//! Use the [`from_str`] function to parse a [`Pattern`] from a string.
|
||||
|
||||
// cSpell:words fnmatch
|
||||
|
||||
use glob::{Pattern, PatternError};
|
||||
|
||||
fn fix_negation(glob: &str) -> String {
|
||||
let mut chars = glob.chars().collect::<Vec<_>>();
|
||||
|
||||
let mut i = 0;
|
||||
while i < chars.len() {
|
||||
if chars[i] == '[' && i + 4 <= glob.len() && chars[i + 1] == '^' {
|
||||
match chars[i + 3..].iter().position(|x| *x == ']') {
|
||||
None => (),
|
||||
Some(j) => {
|
||||
chars[i + 1] = '!';
|
||||
i += j + 4;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
i += 1;
|
||||
}
|
||||
|
||||
chars.into_iter().collect::<String>()
|
||||
}
|
||||
|
||||
/// Parse a glob Pattern from a string.
|
||||
///
|
||||
/// This function amends the input string to replace any caret or circumflex
|
||||
/// character (^) used to negate a set of characters with an exclamation mark
|
||||
/// (!), which adapts rust's glob matching to function the way the GNU utils'
|
||||
/// fnmatch does.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// use std::time::Duration;
|
||||
/// use uucore::parse_glob::from_str;
|
||||
/// assert!(!from_str("[^abc]").unwrap().matches("a"));
|
||||
/// assert!(from_str("[^abc]").unwrap().matches("x"));
|
||||
/// ```
|
||||
pub fn from_str(glob: &str) -> Result<Pattern, PatternError> {
|
||||
Pattern::new(&fix_negation(glob))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_from_str() {
|
||||
assert_eq!(from_str("[^abc]").unwrap(), Pattern::new("[!abc]").unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fix_negation() {
|
||||
// Happy/Simple case
|
||||
assert_eq!(fix_negation("[^abc]"), "[!abc]");
|
||||
|
||||
// Should fix negations in a long regex
|
||||
assert_eq!(fix_negation("foo[abc] bar[^def]"), "foo[abc] bar[!def]");
|
||||
|
||||
// Should fix multiple negations in a regex
|
||||
assert_eq!(fix_negation("foo[^abc]bar[^def]"), "foo[!abc]bar[!def]");
|
||||
|
||||
// Should fix negation of the single character ]
|
||||
assert_eq!(fix_negation("[^]]"), "[!]]");
|
||||
|
||||
// Should fix negation of the single character ^
|
||||
assert_eq!(fix_negation("[^^]"), "[!^]");
|
||||
|
||||
// Should fix negation of the space character
|
||||
assert_eq!(fix_negation("[^ ]"), "[! ]");
|
||||
|
||||
// Complicated patterns
|
||||
assert_eq!(fix_negation("[^][]"), "[!][]");
|
||||
assert_eq!(fix_negation("[^[]]"), "[![]]");
|
||||
|
||||
// More complex patterns that should be replaced
|
||||
assert_eq!(fix_negation("[[]] [^a]"), "[[]] [!a]");
|
||||
assert_eq!(fix_negation("[[] [^a]"), "[[] [!a]");
|
||||
assert_eq!(fix_negation("[]] [^a]"), "[]] [!a]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fix_negation_should_not_amend() {
|
||||
assert_eq!(fix_negation("abc"), "abc");
|
||||
|
||||
// Regex specifically matches either [ or ^
|
||||
assert_eq!(fix_negation("[[^]"), "[[^]");
|
||||
|
||||
// Regex that specifically matches either space or ^
|
||||
assert_eq!(fix_negation("[ ^]"), "[ ^]");
|
||||
|
||||
// Regex that specifically matches either [, space or ^
|
||||
assert_eq!(fix_negation("[[ ^]"), "[[ ^]");
|
||||
assert_eq!(fix_negation("[ [^]"), "[ [^]");
|
||||
|
||||
// Invalid globs (according to rust's glob implementation) will remain unamended
|
||||
assert_eq!(fix_negation("[^]"), "[^]");
|
||||
assert_eq!(fix_negation("[^"), "[^");
|
||||
assert_eq!(fix_negation("[][^]"), "[][^]");
|
||||
}
|
||||
}
|
|
@ -747,6 +747,40 @@ fn test_du_exclude_mix() {
|
|||
assert!(result.stdout_str().contains("xcwww"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
// Disable on Windows because we are looking for /
|
||||
// And the tests would be more complex if we have to support \ too
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
fn test_du_complex_exclude_patterns() {
|
||||
let ts = TestScenario::new(util_name!());
|
||||
let at = &ts.fixtures;
|
||||
|
||||
at.mkdir_all("azerty/xcwww/azeaze");
|
||||
at.mkdir_all("azerty/xcwww/qzerty");
|
||||
at.mkdir_all("azerty/xcwww/amazing");
|
||||
|
||||
// Negation in glob should work with both ^ and !
|
||||
let result = ts
|
||||
.ucmd()
|
||||
.arg("--exclude=azerty/*/[^q]*")
|
||||
.arg("azerty")
|
||||
.succeeds();
|
||||
assert!(!result.stdout_str().contains("amazing"));
|
||||
assert!(result.stdout_str().contains("qzerty"));
|
||||
assert!(!result.stdout_str().contains("azeaze"));
|
||||
assert!(result.stdout_str().contains("xcwww"));
|
||||
|
||||
let result = ts
|
||||
.ucmd()
|
||||
.arg("--exclude=azerty/*/[!q]*")
|
||||
.arg("azerty")
|
||||
.succeeds();
|
||||
assert!(!result.stdout_str().contains("amazing"));
|
||||
assert!(result.stdout_str().contains("qzerty"));
|
||||
assert!(!result.stdout_str().contains("azeaze"));
|
||||
assert!(result.stdout_str().contains("xcwww"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_du_exclude_several_components() {
|
||||
let ts = TestScenario::new(util_name!());
|
||||
|
|
Loading…
Reference in a new issue