Merge pull request #6252 from sylvestre/hash-error

hashsum: improve the error management to match GNU
This commit is contained in:
Sylvestre Ledru 2024-04-21 09:55:16 +02:00 committed by GitHub
commit 64027e5a57
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 249 additions and 64 deletions

View file

@ -26,7 +26,8 @@ use uucore::sum::{
Blake2b, Blake3, Digest, DigestWriter, Md5, Sha1, Sha224, Sha256, Sha384, Sha3_224, Sha3_256,
Sha3_384, Sha3_512, Sha512, Shake128, Shake256,
};
use uucore::{display::Quotable, show_warning};
use uucore::util_name;
use uucore::{display::Quotable, show_warning_caps};
use uucore::{format_usage, help_about, help_usage};
const NAME: &str = "hashsum";
@ -577,7 +578,6 @@ fn uu_app(binary_name: &str) -> Command {
#[derive(Debug)]
enum HashsumError {
InvalidRegex,
InvalidFormat,
IgnoreNotCheck,
}
@ -588,7 +588,6 @@ impl std::fmt::Display for HashsumError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::InvalidRegex => write!(f, "invalid regular expression"),
Self::InvalidFormat => Ok(()),
Self::IgnoreNotCheck => write!(
f,
"the --ignore-missing option is meaningful only when verifying checksums"
@ -597,14 +596,57 @@ impl std::fmt::Display for HashsumError {
}
}
/// Creates a Regex for parsing lines based on the given format.
/// The default value of `gnu_re` created with this function has to be recreated
/// after the initial line has been parsed, as this line dictates the format
/// for the rest of them, and mixing of formats is disallowed.
fn gnu_re_template(bytes_marker: &str, format_marker: &str) -> Result<Regex, HashsumError> {
Regex::new(&format!(
r"^(?P<digest>[a-fA-F0-9]{bytes_marker}) {format_marker}(?P<fileName>.*)"
))
.map_err(|_| HashsumError::InvalidRegex)
}
fn handle_captures(
caps: &Captures,
bytes_marker: &str,
bsd_reversed: &mut Option<bool>,
gnu_re: &mut Regex,
) -> Result<(String, String, bool), HashsumError> {
if bsd_reversed.is_none() {
let is_bsd_reversed = caps.name("binary").is_none();
let format_marker = if is_bsd_reversed {
""
} else {
r"(?P<binary>[ \*])"
}
.to_string();
*bsd_reversed = Some(is_bsd_reversed);
*gnu_re = gnu_re_template(bytes_marker, &format_marker)?;
}
Ok((
caps.name("fileName").unwrap().as_str().to_string(),
caps.name("digest").unwrap().as_str().to_ascii_lowercase(),
if *bsd_reversed == Some(false) {
caps.name("binary").unwrap().as_str() == "*"
} else {
false
},
))
}
#[allow(clippy::cognitive_complexity)]
fn hashsum<'a, I>(mut options: Options, files: I) -> UResult<()>
where
I: Iterator<Item = &'a OsStr>,
{
let mut bad_format = 0;
let mut correct_format = 0;
let mut failed_cksum = 0;
let mut failed_open_file = 0;
let mut skip_summary = false;
let binary_marker = if options.binary { "*" } else { " " };
for filename in files {
let filename = Path::new(filename);
@ -636,69 +678,32 @@ where
// BSD reversed mode format is similar to the default mode, but doesnt use a character to distinguish binary and text modes.
let mut bsd_reversed = None;
/// Creates a Regex for parsing lines based on the given format.
/// The default value of `gnu_re` created with this function has to be recreated
/// after the initial line has been parsed, as this line dictates the format
/// for the rest of them, and mixing of formats is disallowed.
fn gnu_re_template(
bytes_marker: &str,
format_marker: &str,
) -> Result<Regex, HashsumError> {
Regex::new(&format!(
r"^(?P<digest>[a-fA-F0-9]{bytes_marker}) {format_marker}(?P<fileName>.*)"
))
.map_err(|_| HashsumError::InvalidRegex)
}
let mut gnu_re = gnu_re_template(&bytes_marker, r"(?P<binary>[ \*])?")?;
let bsd_re = Regex::new(&format!(
// it can start with \
r"^(|\\){algorithm} \((?P<fileName>.*)\) = (?P<digest>[a-fA-F0-9]{digest_size})",
r"^(\\)?{algorithm}\s*\((?P<fileName>.*)\)\s*=\s*(?P<digest>[a-fA-F0-9]{digest_size})$",
algorithm = options.algoname,
digest_size = bytes_marker,
))
.map_err(|_| HashsumError::InvalidRegex)?;
fn handle_captures(
caps: &Captures,
bytes_marker: &str,
bsd_reversed: &mut Option<bool>,
gnu_re: &mut Regex,
) -> Result<(String, String, bool), HashsumError> {
if bsd_reversed.is_none() {
let is_bsd_reversed = caps.name("binary").is_none();
let format_marker = if is_bsd_reversed {
""
} else {
r"(?P<binary>[ \*])"
}
.to_string();
*bsd_reversed = Some(is_bsd_reversed);
*gnu_re = gnu_re_template(bytes_marker, &format_marker)?;
}
Ok((
caps.name("fileName").unwrap().as_str().to_string(),
caps.name("digest").unwrap().as_str().to_ascii_lowercase(),
if *bsd_reversed == Some(false) {
caps.name("binary").unwrap().as_str() == "*"
} else {
false
},
))
}
let buffer = file;
// iterate on the lines of the file
for (i, maybe_line) in buffer.lines().enumerate() {
let line = match maybe_line {
Ok(l) => l,
Err(e) => return Err(e.map_err_context(|| "failed to read file".to_string())),
};
if line.is_empty() {
// empty line, skip it
continue;
}
let (ck_filename, sum, binary_check) = match gnu_re.captures(&line) {
Some(caps) => {
handle_captures(&caps, &bytes_marker, &mut bsd_reversed, &mut gnu_re)?
}
None => match bsd_re.captures(&line) {
// if the GNU style parsing failed, try the BSD style
Some(caps) => (
caps.name("fileName").unwrap().as_str().to_string(),
caps.name("digest").unwrap().as_str().to_ascii_lowercase(),
@ -707,11 +712,14 @@ where
None => {
bad_format += 1;
if options.strict {
return Err(HashsumError::InvalidFormat.into());
// if we use strict, the warning "lines are improperly formatted"
// will trigger an exit code of 1
set_exit_code(1);
}
if options.warn {
show_warning!(
"{}: {}: improperly formatted {} checksum line",
eprintln!(
"{}: {}: {}: improperly formatted {} checksum line",
util_name(),
filename.maybe_quote(),
i + 1,
options.algoname
@ -725,7 +733,8 @@ where
let f = match File::open(ck_filename_unescaped) {
Err(_) => {
if options.ignore_missing {
// No need to show or return an error.
// No need to show or return an error
// except when the file doesn't have any successful checks
continue;
}
@ -760,6 +769,7 @@ where
// and display it using uucore::display::print_verbatim(). This is
// easier (and more important) on Unix than on Windows.
if sum == real_sum {
correct_format += 1;
if !options.quiet {
println!("{prefix}{ck_filename}: OK");
}
@ -768,6 +778,7 @@ where
println!("{prefix}{ck_filename}: FAILED");
}
failed_cksum += 1;
set_exit_code(1);
}
}
} else {
@ -793,20 +804,57 @@ where
println!("{}{} {}{}", prefix, sum, binary_marker, escaped_filename);
}
}
if bad_format > 0 && failed_cksum == 0 && correct_format == 0 && !options.status {
// we have only bad format. we didn't have anything correct.
// GNU has a different error message for this (with the filename)
set_exit_code(1);
eprintln!(
"{}: {}: no properly formatted checksum lines found",
util_name(),
filename.maybe_quote(),
);
skip_summary = true;
}
if options.ignore_missing && correct_format == 0 {
// we have only bad format
// and we had ignore-missing
eprintln!(
"{}: {}: no file was verified",
util_name(),
filename.maybe_quote(),
);
skip_summary = true;
set_exit_code(1);
}
}
if !options.status {
if !options.status && !skip_summary {
match bad_format.cmp(&1) {
Ordering::Equal => show_warning!("{} line is improperly formatted", bad_format),
Ordering::Greater => show_warning!("{} lines are improperly formatted", bad_format),
Ordering::Equal => {
show_warning_caps!("{} line is improperly formatted", bad_format)
}
Ordering::Greater => {
show_warning_caps!("{} lines are improperly formatted", bad_format)
}
Ordering::Less => {}
};
if failed_cksum > 0 {
show_warning!("{} computed checksum did NOT match", failed_cksum);
}
match failed_open_file.cmp(&1) {
Ordering::Equal => show_warning!("{} listed file could not be read", failed_open_file),
match failed_cksum.cmp(&1) {
Ordering::Equal => {
show_warning_caps!("{} computed checksum did NOT match", failed_cksum)
}
Ordering::Greater => {
show_warning!("{} listed files could not be read", failed_open_file);
show_warning_caps!("{} computed checksums did NOT match", failed_cksum)
}
Ordering::Less => {}
};
match failed_open_file.cmp(&1) {
Ordering::Equal => {
show_warning_caps!("{} listed file could not be read", failed_open_file)
}
Ordering::Greater => {
show_warning_caps!("{} listed files could not be read", failed_open_file);
}
Ordering::Less => {}
}

View file

@ -182,6 +182,14 @@ macro_rules! show_warning(
})
);
#[macro_export]
macro_rules! show_warning_caps(
($($args:tt)+) => ({
eprint!("{}: WARNING: ", $crate::util_name());
eprintln!($($args)+);
})
);
/// Display an error and [`std::process::exit`]
///
/// Displays the provided error message using [`show_error!`], then invokes

View file

@ -244,7 +244,7 @@ fn test_check_file_not_found_warning() {
.arg(at.subdir.join("testf.sha1"))
.fails()
.stdout_is("sha1sum: testf: No such file or directory\ntestf: FAILED open or read\n")
.stderr_is("sha1sum: warning: 1 listed file could not be read\n");
.stderr_is("sha1sum: WARNING: 1 listed file could not be read\n");
}
// Asterisk `*` is a reserved paths character on win32, nor the path can end with a whitespace.
@ -457,6 +457,24 @@ fn test_with_escape_filename_zero_text() {
assert!(stdout.contains("a\nb"));
}
#[test]
fn test_check_empty_line() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.touch("f");
at.write(
"in.md5",
"d41d8cd98f00b204e9800998ecf8427e f\n\nd41d8cd98f00b204e9800998ecf8427e f\ninvalid\n\n",
);
scene
.ccmd("md5sum")
.arg("--check")
.arg(at.subdir.join("in.md5"))
.succeeds()
.stderr_contains("WARNING: 1 line is improperly formatted");
}
#[test]
#[cfg(not(windows))]
fn test_check_with_escape_filename() {
@ -480,3 +498,114 @@ fn test_check_with_escape_filename() {
.succeeds();
result.stdout_is("\\a\\nb: OK\n");
}
#[test]
fn test_check_strict_error() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.write("f", "");
at.write(
"in.md5",
"ERR\nERR\nd41d8cd98f00b204e9800998ecf8427e f\nERR\n",
);
scene
.ccmd("md5sum")
.arg("--check")
.arg("--strict")
.arg(at.subdir.join("in.md5"))
.fails()
.stderr_contains("WARNING: 3 lines are improperly formatted");
}
#[test]
fn test_check_warn() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.write("f", "");
at.write(
"in.md5",
"d41d8cd98f00b204e9800998ecf8427e f\nd41d8cd98f00b204e9800998ecf8427e f\ninvalid\n",
);
scene
.ccmd("md5sum")
.arg("--check")
.arg("--warn")
.arg(at.subdir.join("in.md5"))
.succeeds()
.stderr_contains("in.md5: 3: improperly formatted MD5 checksum line")
.stderr_contains("WARNING: 1 line is improperly formatted");
// with strict, we should fail the execution
scene
.ccmd("md5sum")
.arg("--check")
.arg("--strict")
.arg(at.subdir.join("in.md5"))
.fails();
}
#[test]
fn test_check_status() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.write("f", "");
at.write("in.md5", "MD5(f)= d41d8cd98f00b204e9800998ecf8427f\n");
scene
.ccmd("md5sum")
.arg("--check")
.arg("--status")
.arg(at.subdir.join("in.md5"))
.fails()
.no_output();
}
#[test]
fn test_check_status_code() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.write("f", "");
at.write("in.md5", "d41d8cd98f00b204e9800998ecf8427f f\n");
scene
.ccmd("md5sum")
.arg("--check")
.arg("--status")
.arg(at.subdir.join("in.md5"))
.fails()
.stderr_is("")
.stdout_is("");
}
#[test]
fn test_check_no_backslash_no_space() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.write("f", "");
at.write("in.md5", "MD5(f)= d41d8cd98f00b204e9800998ecf8427e\n");
scene
.ccmd("md5sum")
.arg("--check")
.arg(at.subdir.join("in.md5"))
.succeeds()
.stdout_is("f: OK\n");
}
#[test]
fn test_check_check_ignore_no_file() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.write("f", "");
at.write("in.md5", "d41d8cd98f00b204e9800998ecf8427f missing\n");
scene
.ccmd("md5sum")
.arg("--check")
.arg("--ignore-missing")
.arg(at.subdir.join("in.md5"))
.fails()
.stderr_contains("in.md5: no file was verified");
}

View file

@ -337,12 +337,12 @@ ls: invalid --time-style argument 'XX'\nPossible values are: [\"full-iso\", \"lo
# "hostid BEFORE --help AFTER " same for this
sed -i -e "s/env \$prog \$BEFORE \$opt > out2/env \$prog \$BEFORE \$opt > out2 #/" -e "s/env \$prog \$BEFORE \$opt AFTER > out3/env \$prog \$BEFORE \$opt AFTER > out3 #/" -e "s/compare exp out2/compare exp out2 #/" -e "s/compare exp out3/compare exp out3 #/" tests/help/help-version-getopt.sh
# The case doesn't really matter here
sed -i -e "s|WARNING: 1 line is improperly formatted|warning: 1 line is improperly formatted|" tests/cksum/md5sum-bsd.sh
# Add debug info + we have less syscall then GNU's. Adjust our check.
# Use GNU sed for /c command
"${SED}" -i -e '/test \$n_stat1 = \$n_stat2 \\/c\
echo "n_stat1 = \$n_stat1"\n\
echo "n_stat2 = \$n_stat2"\n\
test \$n_stat1 -ge \$n_stat2 \\' tests/ls/stat-free-color.sh
# no need to replicate this output with hashsum
sed -i -e "s|Try 'md5sum --help' for more information.\\\n||" tests/cksum/md5sum.pl