Merge pull request #4449 from cakebaker/comm_zero_terminated

comm: implement --zero-terminated
This commit is contained in:
Terts Diepraam 2023-03-01 22:52:31 +01:00 committed by GitHub
commit b9bca794cc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 93 additions and 37 deletions

View file

@ -8,11 +8,11 @@
// spell-checker:ignore (ToDO) delim mkdelim // spell-checker:ignore (ToDO) delim mkdelim
use std::cmp::Ordering; use std::cmp::Ordering;
use std::fmt::Display;
use std::fs::File; use std::fs::File;
use std::io::{self, stdin, BufRead, BufReader, Stdin}; use std::io::{self, stdin, BufRead, BufReader, Stdin};
use std::path::Path; use std::path::Path;
use uucore::error::FromIo; use uucore::error::{FromIo, UResult};
use uucore::error::UResult;
use uucore::{format_usage, help_about, help_usage}; use uucore::{format_usage, help_about, help_usage};
use clap::{crate_version, Arg, ArgAction, ArgMatches, Command}; use clap::{crate_version, Arg, ArgAction, ArgMatches, Command};
@ -29,6 +29,7 @@ mod options {
pub const FILE_1: &str = "FILE1"; pub const FILE_1: &str = "FILE1";
pub const FILE_2: &str = "FILE2"; pub const FILE_2: &str = "FILE2";
pub const TOTAL: &str = "total"; pub const TOTAL: &str = "total";
pub const ZERO_TERMINATED: &str = "zero-terminated";
} }
fn column_width(col: &str, opts: &ArgMatches) -> usize { fn column_width(col: &str, opts: &ArgMatches) -> usize {
@ -39,23 +40,66 @@ fn column_width(col: &str, opts: &ArgMatches) -> usize {
} }
} }
fn ensure_nl(line: &mut String) { #[repr(u8)]
if !line.ends_with('\n') { #[derive(Clone, Copy)]
line.push('\n'); enum LineEnding {
Newline = b'\n',
Nul = 0,
}
impl From<LineEnding> for u8 {
fn from(line_ending: LineEnding) -> Self {
line_ending as Self
} }
} }
enum LineReader { impl From<bool> for LineEnding {
fn from(is_zero_terminated: bool) -> Self {
if is_zero_terminated {
Self::Nul
} else {
Self::Newline
}
}
}
impl Display for LineEnding {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Newline => writeln!(f),
Self::Nul => write!(f, "\0"),
}
}
}
enum Input {
Stdin(Stdin), Stdin(Stdin),
FileIn(BufReader<File>), FileIn(BufReader<File>),
} }
impl LineReader { struct LineReader {
fn read_line(&mut self, buf: &mut String) -> io::Result<usize> { line_ending: LineEnding,
match *self { input: Input,
Self::Stdin(ref mut r) => r.read_line(buf),
Self::FileIn(ref mut r) => r.read_line(buf),
} }
impl LineReader {
fn new(input: Input, line_ending: LineEnding) -> Self {
Self { input, line_ending }
}
fn read_line(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
let line_ending = self.line_ending.into();
let result = match &mut self.input {
Input::Stdin(r) => r.lock().read_until(line_ending, buf),
Input::FileIn(r) => r.read_until(line_ending, buf),
};
if !buf.ends_with(&[line_ending]) {
buf.push(line_ending);
}
result
} }
} }
@ -71,9 +115,9 @@ fn comm(a: &mut LineReader, b: &mut LineReader, opts: &ArgMatches) {
let delim_col_2 = delim.repeat(width_col_1); let delim_col_2 = delim.repeat(width_col_1);
let delim_col_3 = delim.repeat(width_col_1 + width_col_2); let delim_col_3 = delim.repeat(width_col_1 + width_col_2);
let ra = &mut String::new(); let ra = &mut Vec::new();
let mut na = a.read_line(ra); let mut na = a.read_line(ra);
let rb = &mut String::new(); let rb = &mut Vec::new();
let mut nb = b.read_line(rb); let mut nb = b.read_line(rb);
let mut total_col_1 = 0; let mut total_col_1 = 0;
@ -96,8 +140,7 @@ fn comm(a: &mut LineReader, b: &mut LineReader, opts: &ArgMatches) {
match ord { match ord {
Ordering::Less => { Ordering::Less => {
if !opts.get_flag(options::COLUMN_1) { if !opts.get_flag(options::COLUMN_1) {
ensure_nl(ra); print!("{}", String::from_utf8_lossy(ra));
print!("{ra}");
} }
ra.clear(); ra.clear();
na = a.read_line(ra); na = a.read_line(ra);
@ -105,8 +148,7 @@ fn comm(a: &mut LineReader, b: &mut LineReader, opts: &ArgMatches) {
} }
Ordering::Greater => { Ordering::Greater => {
if !opts.get_flag(options::COLUMN_2) { if !opts.get_flag(options::COLUMN_2) {
ensure_nl(rb); print!("{delim_col_2}{}", String::from_utf8_lossy(rb));
print!("{delim_col_2}{rb}");
} }
rb.clear(); rb.clear();
nb = b.read_line(rb); nb = b.read_line(rb);
@ -114,8 +156,7 @@ fn comm(a: &mut LineReader, b: &mut LineReader, opts: &ArgMatches) {
} }
Ordering::Equal => { Ordering::Equal => {
if !opts.get_flag(options::COLUMN_3) { if !opts.get_flag(options::COLUMN_3) {
ensure_nl(ra); print!("{delim_col_3}{}", String::from_utf8_lossy(ra));
print!("{delim_col_3}{ra}");
} }
ra.clear(); ra.clear();
rb.clear(); rb.clear();
@ -127,17 +168,20 @@ fn comm(a: &mut LineReader, b: &mut LineReader, opts: &ArgMatches) {
} }
if opts.get_flag(options::TOTAL) { if opts.get_flag(options::TOTAL) {
println!("{total_col_1}{delim}{total_col_2}{delim}{total_col_3}{delim}total"); let line_ending = LineEnding::from(opts.get_flag(options::ZERO_TERMINATED));
print!("{total_col_1}{delim}{total_col_2}{delim}{total_col_3}{delim}total{line_ending}");
} }
} }
fn open_file(name: &str) -> io::Result<LineReader> { fn open_file(name: &str, line_ending: LineEnding) -> io::Result<LineReader> {
match name { if name == "-" {
"-" => Ok(LineReader::Stdin(stdin())), Ok(LineReader::new(Input::Stdin(stdin()), line_ending))
_ => { } else {
let f = File::open(Path::new(name))?; let f = File::open(Path::new(name))?;
Ok(LineReader::FileIn(BufReader::new(f))) Ok(LineReader::new(
} Input::FileIn(BufReader::new(f)),
line_ending,
))
} }
} }
@ -146,10 +190,11 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let args = args.collect_lossy(); let args = args.collect_lossy();
let matches = uu_app().try_get_matches_from(args)?; let matches = uu_app().try_get_matches_from(args)?;
let line_ending = LineEnding::from(matches.get_flag(options::ZERO_TERMINATED));
let filename1 = matches.get_one::<String>(options::FILE_1).unwrap(); let filename1 = matches.get_one::<String>(options::FILE_1).unwrap();
let filename2 = matches.get_one::<String>(options::FILE_2).unwrap(); let filename2 = matches.get_one::<String>(options::FILE_2).unwrap();
let mut f1 = open_file(filename1).map_err_context(|| filename1.to_string())?; let mut f1 = open_file(filename1, line_ending).map_err_context(|| filename1.to_string())?;
let mut f2 = open_file(filename2).map_err_context(|| filename2.to_string())?; let mut f2 = open_file(filename2, line_ending).map_err_context(|| filename2.to_string())?;
comm(&mut f1, &mut f2, &matches); comm(&mut f1, &mut f2, &matches);
Ok(()) Ok(())
@ -187,6 +232,13 @@ pub fn uu_app() -> Command {
.default_value(options::DELIMITER_DEFAULT) .default_value(options::DELIMITER_DEFAULT)
.hide_default_value(true), .hide_default_value(true),
) )
.arg(
Arg::new(options::ZERO_TERMINATED)
.long(options::ZERO_TERMINATED)
.short('z')
.help("line delimiter is NUL, not newline")
.action(ArgAction::SetTrue),
)
.arg( .arg(
Arg::new(options::FILE_1) Arg::new(options::FILE_1)
.required(true) .required(true)

View file

@ -95,19 +95,23 @@ fn output_delimiter_nul() {
.stdout_only_fixture("ab_delimiter_nul.expected"); .stdout_only_fixture("ab_delimiter_nul.expected");
} }
// even though (info) documentation suggests this is an option
// in latest GNU Coreutils comm, it actually is not.
// this test is essentially an alarm in case some well-intending
// developer implements it.
//marked as unimplemented as error message not set yet.
#[cfg_attr(not(feature = "test_unimplemented"), ignore)]
#[test] #[test]
fn zero_terminated() { fn zero_terminated() {
for param in ["-z", "--zero-terminated"] { for param in ["-z", "--zero-terminated"] {
new_ucmd!() new_ucmd!()
.args(&[param, "a", "b"]) .args(&[param, "a_nul", "b_nul"])
.fails() .succeeds()
.stderr_only("error to be defined"); .stdout_only_fixture("ab_nul.expected");
}
}
#[test]
fn zero_terminated_with_total() {
for param in ["-z", "--zero-terminated"] {
new_ucmd!()
.args(&[param, "--total", "a_nul", "b_nul"])
.succeeds()
.stdout_only_fixture("ab_nul_total.expected");
} }
} }

BIN
tests/fixtures/comm/a_nul vendored Normal file

Binary file not shown.

BIN
tests/fixtures/comm/ab_nul.expected vendored Normal file

Binary file not shown.

Binary file not shown.

BIN
tests/fixtures/comm/b_nul vendored Normal file

Binary file not shown.