From 07434647ad4a09b48d0c8729245814d7eb850c8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Og=C3=B3rek?= Date: Sat, 27 May 2023 23:56:15 +0200 Subject: [PATCH] fix: Correctly detect format when using md5sum with check flag (#4645) --- src/uu/hashsum/src/hashsum.rs | 70 +++++++++++++---- tests/by-util/test_hashsum.rs | 138 ++++++++++++++++++++++++++++++++++ 2 files changed, 194 insertions(+), 14 deletions(-) diff --git a/src/uu/hashsum/src/hashsum.rs b/src/uu/hashsum/src/hashsum.rs index af05ddbd9..7b571efcd 100644 --- a/src/uu/hashsum/src/hashsum.rs +++ b/src/uu/hashsum/src/hashsum.rs @@ -14,6 +14,7 @@ use clap::crate_version; use clap::ArgAction; use clap::{Arg, ArgMatches, Command}; use hex::encode; +use regex::Captures; use regex::Regex; use std::cmp::Ordering; use std::error::Error; @@ -604,22 +605,65 @@ where // regular expression, otherwise we use the `{n}` modifier, // where `n` is the number of bytes. let bytes = options.digest.output_bits() / 4; - let modifier = if bytes > 0 { + let bytes_marker = if bytes > 0 { format!("{{{bytes}}}") } else { "+".to_string() }; - let gnu_re = Regex::new(&format!( - r"^(?P[a-fA-F0-9]{modifier}) (?P[ \*])(?P.*)", - )) - .map_err(|_| HashsumError::InvalidRegex)?; + // BSD reversed mode format is similar to the default mode, but doesn’t use a character to distinguish binary and text modes. + let mut bsd_reversed = None; + + /// Creates a Regex for parsing lines based on the given format. + /// The default value of `gnu_re` created with this function has to be recreated + /// after the initial line has been parsed, as this line dictates the format + /// for the rest of them, and mixing of formats is disallowed. + fn gnu_re_template( + bytes_marker: &str, + format_marker: &str, + ) -> Result { + Regex::new(&format!( + r"^(?P[a-fA-F0-9]{bytes_marker}) {format_marker}(?P.*)" + )) + .map_err(|_| HashsumError::InvalidRegex) + } + let mut gnu_re = gnu_re_template(&bytes_marker, r"(?P[ \*])?")?; let bsd_re = Regex::new(&format!( r"^{algorithm} \((?P.*)\) = (?P[a-fA-F0-9]{digest_size})", algorithm = options.algoname, - digest_size = modifier, + digest_size = bytes_marker, )) .map_err(|_| HashsumError::InvalidRegex)?; + fn handle_captures( + caps: &Captures, + bytes_marker: &str, + bsd_reversed: &mut Option, + gnu_re: &mut Regex, + ) -> Result<(String, String, bool), HashsumError> { + if bsd_reversed.is_none() { + let is_bsd_reversed = caps.name("binary").is_none(); + let format_marker = if is_bsd_reversed { + "" + } else { + r"(?P[ \*])" + } + .to_string(); + + *bsd_reversed = Some(is_bsd_reversed); + *gnu_re = gnu_re_template(bytes_marker, &format_marker)?; + } + + Ok(( + caps.name("fileName").unwrap().as_str().to_string(), + caps.name("digest").unwrap().as_str().to_ascii_lowercase(), + if *bsd_reversed == Some(false) { + caps.name("binary").unwrap().as_str() == "*" + } else { + false + }, + )) + } + let buffer = file; for (i, maybe_line) in buffer.lines().enumerate() { let line = match maybe_line { @@ -627,14 +671,12 @@ where Err(e) => return Err(e.map_err_context(|| "failed to read file".to_string())), }; let (ck_filename, sum, binary_check) = match gnu_re.captures(&line) { - Some(caps) => ( - caps.name("fileName").unwrap().as_str(), - caps.name("digest").unwrap().as_str().to_ascii_lowercase(), - caps.name("binary").unwrap().as_str() == "*", - ), + Some(caps) => { + handle_captures(&caps, &bytes_marker, &mut bsd_reversed, &mut gnu_re)? + } None => match bsd_re.captures(&line) { Some(caps) => ( - caps.name("fileName").unwrap().as_str(), + caps.name("fileName").unwrap().as_str().to_string(), caps.name("digest").unwrap().as_str().to_ascii_lowercase(), true, ), @@ -655,7 +697,7 @@ where } }, }; - let f = match File::open(ck_filename) { + let f = match File::open(ck_filename.clone()) { Err(_) => { failed_open_file += 1; println!( @@ -706,7 +748,7 @@ where ) .map_err_context(|| "failed to read input".to_string())?; if options.tag { - println!("{} ({}) = {}", options.algoname, filename.display(), sum); + println!("{} ({:?}) = {}", options.algoname, filename.display(), sum); } else if options.nonames { println!("{sum}"); } else if options.zero { diff --git a/tests/by-util/test_hashsum.rs b/tests/by-util/test_hashsum.rs index 645f11ef2..3650047b2 100644 --- a/tests/by-util/test_hashsum.rs +++ b/tests/by-util/test_hashsum.rs @@ -209,6 +209,144 @@ fn test_check_file_not_found_warning() { .stderr_is("sha1sum: warning: 1 listed file could not be read\n"); } +// Asterisk `*` is a reserved paths character on win32, nor the path can end with a whitespace. +// ref: https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions +#[test] +fn test_check_md5sum() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + #[cfg(not(windows))] + { + for f in &["a", " b", "*c", "dd", " "] { + at.write(f, &format!("{f}\n")); + } + at.write( + "check.md5sum", + "60b725f10c9c85c70d97880dfe8191b3 a\n\ + bf35d7536c785cf06730d5a40301eba2 b\n\ + f5b61709718c1ecf8db1aea8547d4698 *c\n\ + b064a020db8018f18ff5ae367d01b212 dd\n\ + d784fa8b6d98d27699781bd9a7cf19f0 ", + ); + scene + .ccmd("md5sum") + .arg("--strict") + .arg("-c") + .arg("check.md5sum") + .succeeds() + .stdout_is("a: OK\n b: OK\n*c: OK\ndd: OK\n : OK\n") + .stderr_is(""); + } + #[cfg(windows)] + { + for f in &["a", " b", "dd"] { + at.write(f, &format!("{f}\n")); + } + at.write( + "check.md5sum", + "60b725f10c9c85c70d97880dfe8191b3 a\n\ + bf35d7536c785cf06730d5a40301eba2 b\n\ + b064a020db8018f18ff5ae367d01b212 dd", + ); + scene + .ccmd("md5sum") + .arg("--strict") + .arg("-c") + .arg("check.md5sum") + .succeeds() + .stdout_is("a: OK\n b: OK\ndd: OK\n") + .stderr_is(""); + } +} + +#[test] +fn test_check_md5sum_reverse_bsd() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + #[cfg(not(windows))] + { + for f in &["a", " b", "*c", "dd", " "] { + at.write(f, &format!("{f}\n")); + } + at.write( + "check.md5sum", + "60b725f10c9c85c70d97880dfe8191b3 a\n\ + bf35d7536c785cf06730d5a40301eba2 b\n\ + f5b61709718c1ecf8db1aea8547d4698 *c\n\ + b064a020db8018f18ff5ae367d01b212 dd\n\ + d784fa8b6d98d27699781bd9a7cf19f0 ", + ); + scene + .ccmd("md5sum") + .arg("--strict") + .arg("-c") + .arg("check.md5sum") + .succeeds() + .stdout_is("a: OK\n b: OK\n*c: OK\ndd: OK\n : OK\n") + .stderr_is(""); + } + #[cfg(windows)] + { + for f in &["a", " b", "dd"] { + at.write(f, &format!("{f}\n")); + } + at.write( + "check.md5sum", + "60b725f10c9c85c70d97880dfe8191b3 a\n\ + bf35d7536c785cf06730d5a40301eba2 b\n\ + b064a020db8018f18ff5ae367d01b212 dd", + ); + scene + .ccmd("md5sum") + .arg("--strict") + .arg("-c") + .arg("check.md5sum") + .succeeds() + .stdout_is("a: OK\n b: OK\ndd: OK\n") + .stderr_is(""); + } +} + +#[test] +fn test_check_md5sum_mixed_format() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + #[cfg(not(windows))] + { + for f in &[" b", "*c", "dd", " "] { + at.write(f, &format!("{f}\n")); + } + at.write( + "check.md5sum", + "bf35d7536c785cf06730d5a40301eba2 b\n\ + f5b61709718c1ecf8db1aea8547d4698 *c\n\ + b064a020db8018f18ff5ae367d01b212 dd\n\ + d784fa8b6d98d27699781bd9a7cf19f0 ", + ); + } + #[cfg(windows)] + { + for f in &[" b", "dd"] { + at.write(f, &format!("{f}\n")); + } + at.write( + "check.md5sum", + "bf35d7536c785cf06730d5a40301eba2 b\n\ + b064a020db8018f18ff5ae367d01b212 dd", + ); + } + scene + .ccmd("md5sum") + .arg("--strict") + .arg("-c") + .arg("check.md5sum") + .fails() + .code_is(1); +} + #[test] fn test_invalid_arg() { new_ucmd!().arg("--definitely-invalid").fails().code_is(1);