mirror of
https://github.com/uutils/coreutils
synced 2024-12-15 15:52:42 +00:00
9944bdedd4
The following are changes to fix #303: 1. hashsum pulls 512KB chunks of the file into memory. This ends up taking 1MB with a secondary buffer allocated for windows. hashsum is now able to hash files larger than the computer's available memory. 2. Text no longer transforms to UTF-8. This allows hashing to work on binary files without specifying text mode. On Windows, it converts a Windows newline '\r\n' to the standard newline '\n'. 3. Set default modes: Windows uses binary by default, all other systems use text. Gil Cottle <gcottle@redtown.org>
316 lines
12 KiB
Rust
316 lines
12 KiB
Rust
#![crate_id = "hashsum#1.0.0"]
|
|
|
|
/*
|
|
* This file is part of the uutils coreutils package.
|
|
*
|
|
* (c) Arcterus <arcterus@mail.com>
|
|
* (c) Vsevolod Velichko <torkvemada@sorokdva.net>
|
|
* (c) Gil Cottle <gcottle@redtown.org>
|
|
*
|
|
* For the full copyright and license information, please view the LICENSE
|
|
* file that was distributed with this source code.
|
|
*/
|
|
|
|
#![feature(macro_rules)]
|
|
|
|
extern crate regex;
|
|
|
|
extern crate crypto = "rust-crypto";
|
|
extern crate getopts;
|
|
|
|
use std::io::fs::File;
|
|
use std::io::stdio::stdin_raw;
|
|
use std::io::BufferedReader;
|
|
use std::io::IoError;
|
|
use std::io::EndOfFile;
|
|
use regex::Regex;
|
|
use crypto::digest::Digest;
|
|
use crypto::md5::Md5;
|
|
use crypto::sha1::Sha1;
|
|
use crypto::sha2::{Sha224, Sha256, Sha384, Sha512};
|
|
|
|
#[path = "../common/util.rs"]
|
|
mod util;
|
|
|
|
static NAME: &'static str = "hashsum";
|
|
static VERSION: &'static str = "1.0.0";
|
|
|
|
fn is_custom_binary(program: &str) -> bool {
|
|
match program {
|
|
"md5sum" | "sha1sum"
|
|
| "sha224sum" | "sha256sum"
|
|
| "sha384sum" | "sha512sum" => true,
|
|
_ => false
|
|
}
|
|
}
|
|
|
|
fn get_algo_opts(program: &str) -> Vec<getopts::OptGroup> {
|
|
if is_custom_binary(program) {
|
|
Vec::new()
|
|
} else {
|
|
vec!(
|
|
getopts::optflag("", "md5", "work with MD5"),
|
|
getopts::optflag("", "sha1", "work with SHA1"),
|
|
getopts::optflag("", "sha224", "work with SHA224"),
|
|
getopts::optflag("", "sha256", "work with SHA256"),
|
|
getopts::optflag("", "sha384", "work with SHA384"),
|
|
getopts::optflag("", "sha512", "work with SHA512")
|
|
)
|
|
}
|
|
}
|
|
|
|
fn detect_algo(program: &str, matches: &getopts::Matches) -> (&str, Box<Digest>) {
|
|
let mut alg: Option<Box<Digest>> = None;
|
|
let mut name: &'static str = "";
|
|
match program {
|
|
"md5sum" => ("MD5", box Md5::new() as Box<Digest>),
|
|
"sha1sum" => ("SHA1", box Sha1::new() as Box<Digest>),
|
|
"sha224sum" => ("SHA224", box Sha224::new() as Box<Digest>),
|
|
"sha256sum" => ("SHA256", box Sha256::new() as Box<Digest>),
|
|
"sha384sum" => ("SHA384", box Sha384::new() as Box<Digest>),
|
|
"sha512sum" => ("SHA512", box Sha512::new() as Box<Digest>),
|
|
_ => {
|
|
{
|
|
let set_or_crash = |n: &'static str, val: Box<Digest>| -> () {
|
|
if alg.is_some() { crash!(1, "You cannot combine multiple hash algorithms!") };
|
|
name = n;
|
|
alg = Some(val);
|
|
};
|
|
if matches.opt_present("md5") { set_or_crash("MD5", box Md5::new()) };
|
|
if matches.opt_present("sha1") { set_or_crash("SHA1", box Sha1::new()) };
|
|
if matches.opt_present("sha224") { set_or_crash("SHA224", box Sha224::new()) };
|
|
if matches.opt_present("sha256") { set_or_crash("SHA256", box Sha256::new()) };
|
|
if matches.opt_present("sha384") { set_or_crash("SHA384", box Sha384::new()) };
|
|
if matches.opt_present("sha512") { set_or_crash("SHA512", box Sha512::new()) };
|
|
}
|
|
if alg.is_none() { crash!(1, "You must specify hash algorithm!") };
|
|
(name, alg.unwrap())
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn uumain(args: Vec<String>) -> int {
|
|
let program = args.get(0).clone();
|
|
let binary = Path::new(program.as_slice());
|
|
let binary_name = binary.filename_str().unwrap();
|
|
|
|
// Default binary in Windows, text mode otherwise
|
|
let binary_flag_default = cfg!(windows);
|
|
|
|
let mut opts: Vec<getopts::OptGroup> = vec!(
|
|
getopts::optflag("b", "binary", format!("read in binary mode{}", if binary_flag_default { " (default)" } else { "" }).as_slice()),
|
|
getopts::optflag("c", "check", "read hashsums from the FILEs and check them"),
|
|
getopts::optflag("", "tag", "create a BSD-style checksum"),
|
|
getopts::optflag("t", "text", format!("read in text mode{}", if binary_flag_default { "" } else { " (default)" }).as_slice()),
|
|
getopts::optflag("q", "quiet", "don't print OK for each successfully verified file"),
|
|
getopts::optflag("s", "status", "don't output anything, status code shows success"),
|
|
getopts::optflag("", "strict", "exit non-zero for improperly formatted checksum lines"),
|
|
getopts::optflag("w", "warn", "warn about improperly formatted checksum lines"),
|
|
getopts::optflag("h", "help", "display this help and exit"),
|
|
getopts::optflag("V", "version", "output version information and exit"),
|
|
);
|
|
|
|
opts.push_all_move(get_algo_opts(binary_name.as_slice()));
|
|
|
|
let matches = match getopts::getopts(args.tail(), opts.as_slice()) {
|
|
Ok(m) => m,
|
|
Err(f) => crash!(1, "{}", f)
|
|
};
|
|
|
|
if matches.opt_present("help") {
|
|
usage(program.as_slice(), binary_name.as_slice(), opts.as_slice());
|
|
} else if matches.opt_present("version") {
|
|
version();
|
|
} else {
|
|
let (name, algo) = detect_algo(binary_name.as_slice(), &matches);
|
|
|
|
let binary_flag = matches.opt_present("binary");
|
|
let text_flag = matches.opt_present("text");
|
|
if binary_flag && text_flag {
|
|
crash!(1, "cannot set binary and text mode at the same time");
|
|
}
|
|
let binary = if binary_flag { true } else if text_flag { false } else { binary_flag_default };
|
|
let check = matches.opt_present("check");
|
|
let tag = matches.opt_present("tag");
|
|
let status = matches.opt_present("status");
|
|
let quiet = matches.opt_present("quiet") || status;
|
|
let strict = matches.opt_present("strict");
|
|
let warn = matches.opt_present("warn") && !status;
|
|
let files = if matches.free.is_empty() {
|
|
vec!("-".to_string())
|
|
} else {
|
|
matches.free
|
|
};
|
|
match hashsum(name, algo, files, binary, check, tag, status, quiet, strict, warn) {
|
|
Ok(()) => return 0,
|
|
Err(e) => return e
|
|
}
|
|
}
|
|
|
|
0
|
|
}
|
|
|
|
fn version() {
|
|
println!("{} v{}", NAME, VERSION);
|
|
}
|
|
|
|
fn usage(program: &str, binary_name: &str, opts: &[getopts::OptGroup]) {
|
|
version();
|
|
println!("");
|
|
println!("Usage:");
|
|
if is_custom_binary(binary_name) {
|
|
println!(" {} [OPTION]... [FILE]...", program);
|
|
} else {
|
|
println!(" {} {{--md5|--sha1|--sha224|--sha256|--sha384|--sha512}} [OPTION]... [FILE]...", program);
|
|
}
|
|
println!("");
|
|
print!("{}", getopts::usage("Compute and check message digests.", opts));
|
|
}
|
|
|
|
fn hashsum(algoname: &str, mut digest: Box<Digest>, files: Vec<String>, binary: bool, check: bool, tag: bool, status: bool, quiet: bool, strict: bool, warn: bool) -> Result<(), int> {
|
|
let mut bad_format = 0u;
|
|
let mut failed = 0u;
|
|
let binary_marker = if binary {
|
|
"*"
|
|
} else {
|
|
" "
|
|
};
|
|
for filename in files.iter() {
|
|
let filename: &str = filename.as_slice();
|
|
let mut file = BufferedReader::new(
|
|
if filename == "-" {
|
|
box stdin_raw() as Box<Reader>
|
|
} else {
|
|
box safe_unwrap!(File::open(&Path::new(filename))) as Box<Reader>
|
|
}
|
|
);
|
|
if check {
|
|
|
|
// Set up Regexes for line validation and parsing
|
|
let bytes = digest.output_bits() / 4;
|
|
let gnu_re = safe_unwrap!(
|
|
Regex::new(
|
|
format!(
|
|
r"^(?P<digest>[a-fA-F0-9]{{{}}}) (?P<binary>[ \*])(?P<fileName>.*)",
|
|
bytes
|
|
).as_slice()
|
|
)
|
|
);
|
|
let bsd_re = safe_unwrap!(
|
|
Regex::new(
|
|
format!(
|
|
r"^{algorithm} \((?P<fileName>.*)\) = (?P<digest>[a-fA-F0-9]{{{digest_size}}})",
|
|
algorithm = algoname,
|
|
digest_size = bytes
|
|
).as_slice()
|
|
)
|
|
);
|
|
|
|
let mut buffer = file;
|
|
for (i, line) in buffer.lines().enumerate() {
|
|
let line = safe_unwrap!(line);
|
|
let (ck_filename, sum, binary_check) = match gnu_re.captures(line.as_slice()) {
|
|
Some(caps) => (caps.name("fileName"), caps.name("digest").to_ascii().to_lower(), caps.name("binary") == "*"),
|
|
None => match bsd_re.captures(line.as_slice()) {
|
|
Some(caps) => (caps.name("fileName"), caps.name("digest").to_ascii().to_lower(), true),
|
|
None => {
|
|
bad_format += 1;
|
|
if strict {
|
|
return Err(1);
|
|
}
|
|
if warn {
|
|
show_warning!("{}: {}: improperly formatted {} checksum line", filename, i + 1, algoname);
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
};
|
|
let real_sum = safe_unwrap!(digest_reader(&mut digest, &mut safe_unwrap!(File::open(&Path::new(ck_filename))), binary_check))
|
|
.as_slice().to_ascii().to_lower();
|
|
if sum.as_slice() == real_sum.as_slice() {
|
|
if !quiet {
|
|
println!("{}: OK", ck_filename);
|
|
}
|
|
} else {
|
|
if !status {
|
|
println!("{}: FAILED", ck_filename);
|
|
}
|
|
failed += 1;
|
|
}
|
|
}
|
|
} else {
|
|
let sum = safe_unwrap!(digest_reader(&mut digest, &mut file, binary));
|
|
if tag {
|
|
println!("{} ({}) = {}", algoname, filename, sum);
|
|
} else {
|
|
println!("{} {}{}", sum, binary_marker, filename);
|
|
}
|
|
}
|
|
}
|
|
if !status {
|
|
if bad_format == 1 {
|
|
show_warning!("{} line is improperly formatted", bad_format);
|
|
} else if bad_format > 1 {
|
|
show_warning!("{} lines are improperly formatted", bad_format);
|
|
}
|
|
if failed > 0 {
|
|
show_warning!("{} computed checksum did NOT match", failed);
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn digest_reader(digest: &mut Box<Digest>, reader: &mut Reader, binary: bool) -> Result<String, IoError> {
|
|
digest.reset();
|
|
|
|
// Digest file, do not hold too much in memory at any given moment
|
|
let windows = cfg!(windows);
|
|
let mut buffer = [0, ..524288];
|
|
let mut vec = Vec::with_capacity(524288);
|
|
let mut looking_for_newline = false;
|
|
loop {
|
|
match reader.read(buffer) {
|
|
Ok(0) => {},
|
|
Ok(nread) => {
|
|
if windows && !binary {
|
|
// Windows text mode returns '\n' when reading '\r\n'
|
|
for i in range(0, nread) {
|
|
if looking_for_newline {
|
|
if buffer[i] != ('\n' as u8) {
|
|
vec.push('\r' as u8);
|
|
}
|
|
if buffer[i] != ('\r' as u8) {
|
|
vec.push(buffer[i]);
|
|
looking_for_newline = false;
|
|
}
|
|
} else if buffer[i] != ('\r' as u8) {
|
|
vec.push(buffer[i]);
|
|
} else {
|
|
looking_for_newline = true;
|
|
}
|
|
}
|
|
digest.input(vec.as_slice());
|
|
vec.clear();
|
|
} else {
|
|
digest.input(buffer.slice(0, nread));
|
|
}
|
|
},
|
|
Err(e) => match e.kind {
|
|
EndOfFile => {
|
|
break;
|
|
},
|
|
_ => {
|
|
return Err(e);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if windows && looking_for_newline {
|
|
vec.push('\r' as u8);
|
|
digest.input(vec.as_slice());
|
|
}
|
|
|
|
Ok(digest.result_str())
|
|
}
|
|
|