From 0d782e09c146c32066d08cc23b04cb51eb43cc45 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Tue, 28 Feb 2023 10:57:14 +0100 Subject: [PATCH] comm: implement --zero-terminated --- src/uu/comm/src/comm.rs | 108 ++++++++++++++++------ tests/by-util/test_comm.rs | 22 +++-- tests/fixtures/comm/a_nul | Bin 0 -> 3 bytes tests/fixtures/comm/ab_nul.expected | Bin 0 -> 9 bytes tests/fixtures/comm/ab_nul_total.expected | Bin 0 -> 21 bytes tests/fixtures/comm/b_nul | Bin 0 -> 3 bytes 6 files changed, 93 insertions(+), 37 deletions(-) create mode 100644 tests/fixtures/comm/a_nul create mode 100644 tests/fixtures/comm/ab_nul.expected create mode 100644 tests/fixtures/comm/ab_nul_total.expected create mode 100644 tests/fixtures/comm/b_nul diff --git a/src/uu/comm/src/comm.rs b/src/uu/comm/src/comm.rs index 3bc45b6a5..02a9221d4 100644 --- a/src/uu/comm/src/comm.rs +++ b/src/uu/comm/src/comm.rs @@ -8,11 +8,11 @@ // spell-checker:ignore (ToDO) delim mkdelim use std::cmp::Ordering; +use std::fmt::Display; use std::fs::File; use std::io::{self, stdin, BufRead, BufReader, Stdin}; use std::path::Path; -use uucore::error::FromIo; -use uucore::error::UResult; +use uucore::error::{FromIo, UResult}; use uucore::{format_usage, help_about, help_usage}; use clap::{crate_version, Arg, ArgAction, ArgMatches, Command}; @@ -29,6 +29,7 @@ mod options { pub const FILE_1: &str = "FILE1"; pub const FILE_2: &str = "FILE2"; pub const TOTAL: &str = "total"; + pub const ZERO_TERMINATED: &str = "zero-terminated"; } fn column_width(col: &str, opts: &ArgMatches) -> usize { @@ -39,23 +40,66 @@ fn column_width(col: &str, opts: &ArgMatches) -> usize { } } -fn ensure_nl(line: &mut String) { - if !line.ends_with('\n') { - line.push('\n'); +#[repr(u8)] +#[derive(Clone, Copy)] +enum LineEnding { + Newline = b'\n', + Nul = 0, +} + +impl From for u8 { + fn from(line_ending: LineEnding) -> Self { + line_ending as Self } } -enum LineReader { +impl From for LineEnding { + fn from(is_zero_terminated: bool) -> Self { + if is_zero_terminated { + Self::Nul + } else { + Self::Newline + } + } +} + +impl Display for LineEnding { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Newline => writeln!(f), + Self::Nul => write!(f, "\0"), + } + } +} + +enum Input { Stdin(Stdin), FileIn(BufReader), } +struct LineReader { + line_ending: LineEnding, + input: Input, +} + impl LineReader { - fn read_line(&mut self, buf: &mut String) -> io::Result { - match *self { - Self::Stdin(ref mut r) => r.read_line(buf), - Self::FileIn(ref mut r) => r.read_line(buf), + fn new(input: Input, line_ending: LineEnding) -> Self { + Self { input, line_ending } + } + + fn read_line(&mut self, buf: &mut Vec) -> io::Result { + let line_ending = self.line_ending.into(); + + let result = match &mut self.input { + Input::Stdin(r) => r.lock().read_until(line_ending, buf), + Input::FileIn(r) => r.read_until(line_ending, buf), + }; + + if !buf.ends_with(&[line_ending]) { + buf.push(line_ending); } + + result } } @@ -71,9 +115,9 @@ fn comm(a: &mut LineReader, b: &mut LineReader, opts: &ArgMatches) { let delim_col_2 = delim.repeat(width_col_1); let delim_col_3 = delim.repeat(width_col_1 + width_col_2); - let ra = &mut String::new(); + let ra = &mut Vec::new(); let mut na = a.read_line(ra); - let rb = &mut String::new(); + let rb = &mut Vec::new(); let mut nb = b.read_line(rb); let mut total_col_1 = 0; @@ -96,8 +140,7 @@ fn comm(a: &mut LineReader, b: &mut LineReader, opts: &ArgMatches) { match ord { Ordering::Less => { if !opts.get_flag(options::COLUMN_1) { - ensure_nl(ra); - print!("{ra}"); + print!("{}", String::from_utf8_lossy(ra)); } ra.clear(); na = a.read_line(ra); @@ -105,8 +148,7 @@ fn comm(a: &mut LineReader, b: &mut LineReader, opts: &ArgMatches) { } Ordering::Greater => { if !opts.get_flag(options::COLUMN_2) { - ensure_nl(rb); - print!("{delim_col_2}{rb}"); + print!("{delim_col_2}{}", String::from_utf8_lossy(rb)); } rb.clear(); nb = b.read_line(rb); @@ -114,8 +156,7 @@ fn comm(a: &mut LineReader, b: &mut LineReader, opts: &ArgMatches) { } Ordering::Equal => { if !opts.get_flag(options::COLUMN_3) { - ensure_nl(ra); - print!("{delim_col_3}{ra}"); + print!("{delim_col_3}{}", String::from_utf8_lossy(ra)); } ra.clear(); rb.clear(); @@ -127,17 +168,20 @@ fn comm(a: &mut LineReader, b: &mut LineReader, opts: &ArgMatches) { } if opts.get_flag(options::TOTAL) { - println!("{total_col_1}{delim}{total_col_2}{delim}{total_col_3}{delim}total"); + let line_ending = LineEnding::from(opts.get_flag(options::ZERO_TERMINATED)); + print!("{total_col_1}{delim}{total_col_2}{delim}{total_col_3}{delim}total{line_ending}"); } } -fn open_file(name: &str) -> io::Result { - match name { - "-" => Ok(LineReader::Stdin(stdin())), - _ => { - let f = File::open(Path::new(name))?; - Ok(LineReader::FileIn(BufReader::new(f))) - } +fn open_file(name: &str, line_ending: LineEnding) -> io::Result { + if name == "-" { + Ok(LineReader::new(Input::Stdin(stdin()), line_ending)) + } else { + let f = File::open(Path::new(name))?; + Ok(LineReader::new( + Input::FileIn(BufReader::new(f)), + line_ending, + )) } } @@ -146,10 +190,11 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let args = args.collect_lossy(); let matches = uu_app().try_get_matches_from(args)?; + let line_ending = LineEnding::from(matches.get_flag(options::ZERO_TERMINATED)); let filename1 = matches.get_one::(options::FILE_1).unwrap(); let filename2 = matches.get_one::(options::FILE_2).unwrap(); - let mut f1 = open_file(filename1).map_err_context(|| filename1.to_string())?; - let mut f2 = open_file(filename2).map_err_context(|| filename2.to_string())?; + let mut f1 = open_file(filename1, line_ending).map_err_context(|| filename1.to_string())?; + let mut f2 = open_file(filename2, line_ending).map_err_context(|| filename2.to_string())?; comm(&mut f1, &mut f2, &matches); Ok(()) @@ -187,6 +232,13 @@ pub fn uu_app() -> Command { .default_value(options::DELIMITER_DEFAULT) .hide_default_value(true), ) + .arg( + Arg::new(options::ZERO_TERMINATED) + .long(options::ZERO_TERMINATED) + .short('z') + .help("line delimiter is NUL, not newline") + .action(ArgAction::SetTrue), + ) .arg( Arg::new(options::FILE_1) .required(true) diff --git a/tests/by-util/test_comm.rs b/tests/by-util/test_comm.rs index 053746908..aa0791f97 100644 --- a/tests/by-util/test_comm.rs +++ b/tests/by-util/test_comm.rs @@ -95,19 +95,23 @@ fn output_delimiter_nul() { .stdout_only_fixture("ab_delimiter_nul.expected"); } -// even though (info) documentation suggests this is an option -// in latest GNU Coreutils comm, it actually is not. -// this test is essentially an alarm in case some well-intending -// developer implements it. -//marked as unimplemented as error message not set yet. -#[cfg_attr(not(feature = "test_unimplemented"), ignore)] #[test] fn zero_terminated() { for param in ["-z", "--zero-terminated"] { new_ucmd!() - .args(&[param, "a", "b"]) - .fails() - .stderr_only("error to be defined"); + .args(&[param, "a_nul", "b_nul"]) + .succeeds() + .stdout_only_fixture("ab_nul.expected"); + } +} + +#[test] +fn zero_terminated_with_total() { + for param in ["-z", "--zero-terminated"] { + new_ucmd!() + .args(&[param, "--total", "a_nul", "b_nul"]) + .succeeds() + .stdout_only_fixture("ab_nul_total.expected"); } } diff --git a/tests/fixtures/comm/a_nul b/tests/fixtures/comm/a_nul new file mode 100644 index 0000000000000000000000000000000000000000..3142c801ccd54e1a9bc9d587e5ab16369ead768d GIT binary patch literal 3 KcmYdfr~&{1pa9$e literal 0 HcmV?d00001 diff --git a/tests/fixtures/comm/ab_nul.expected b/tests/fixtures/comm/ab_nul.expected new file mode 100644 index 0000000000000000000000000000000000000000..f826106bb1926f3ce2c6553b72bb0a007c432f4f GIT binary patch literal 9 QcmYdf;7nrR