uniq: implement group option

This commit is contained in:
Chirag Jadwani 2021-04-04 15:07:29 +05:30
parent 20d071a482
commit 19c6a42de5
8 changed files with 249 additions and 20 deletions

35
Cargo.lock generated
View file

@ -650,6 +650,15 @@ version = "1.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62aca2aba2d62b4a7f5b33f3712cb1b0692779a56fb510499d5c0aa594daeaf3" checksum = "62aca2aba2d62b4a7f5b33f3712cb1b0692779a56fb510499d5c0aa594daeaf3"
[[package]]
name = "heck"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac"
dependencies = [
"unicode-segmentation",
]
[[package]] [[package]]
name = "hermit-abi" name = "hermit-abi"
version = "0.1.18" version = "0.1.18"
@ -1352,6 +1361,24 @@ version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
[[package]]
name = "strum"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7318c509b5ba57f18533982607f24070a55d353e90d4cae30c467cdb2ad5ac5c"
[[package]]
name = "strum_macros"
version = "0.20.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee8bc6b87a5112aeeab1f4a9f7ab634fe6cbefc4850006df31267f4cfb9e3149"
dependencies = [
"heck",
"proc-macro2",
"quote 1.0.9",
"syn",
]
[[package]] [[package]]
name = "syn" name = "syn"
version = "1.0.68" version = "1.0.68"
@ -1499,6 +1526,12 @@ version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "879f6906492a7cd215bfa4cf595b600146ccfac0c79bcbd1f3000162af5e8b06" checksum = "879f6906492a7cd215bfa4cf595b600146ccfac0c79bcbd1f3000162af5e8b06"
[[package]]
name = "unicode-segmentation"
version = "1.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb0d2e7be6ae3a5fa87eed5fb451aff96f2573d2694942e40543ae0bbe19c796"
[[package]] [[package]]
name = "unicode-width" name = "unicode-width"
version = "0.1.8" version = "0.1.8"
@ -2455,6 +2488,8 @@ name = "uu_uniq"
version = "0.0.6" version = "0.0.6"
dependencies = [ dependencies = [
"clap", "clap",
"strum",
"strum_macros",
"uucore", "uucore",
"uucore_procs", "uucore_procs",
] ]

View file

@ -16,6 +16,8 @@ path = "src/uniq.rs"
[dependencies] [dependencies]
clap = "2.33" clap = "2.33"
strum = "0.20"
strum_macros = "0.20"
uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } uucore = { version=">=0.0.8", package="uucore", path="../../uucore" }
uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }

View file

@ -13,6 +13,7 @@ use std::fs::File;
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Result, Write}; use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Result, Write};
use std::path::Path; use std::path::Path;
use std::str::FromStr; use std::str::FromStr;
use strum_macros::{AsRefStr, EnumString};
static ABOUT: &str = "Report or omit repeated lines."; static ABOUT: &str = "Report or omit repeated lines.";
static VERSION: &str = env!("CARGO_PKG_VERSION"); static VERSION: &str = env!("CARGO_PKG_VERSION");
@ -26,14 +27,18 @@ pub mod options {
pub static SKIP_CHARS: &str = "skip-chars"; pub static SKIP_CHARS: &str = "skip-chars";
pub static UNIQUE: &str = "unique"; pub static UNIQUE: &str = "unique";
pub static ZERO_TERMINATED: &str = "zero-terminated"; pub static ZERO_TERMINATED: &str = "zero-terminated";
pub static GROUP: &str = "group";
} }
static ARG_FILES: &str = "files"; static ARG_FILES: &str = "files";
#[derive(PartialEq)] #[derive(PartialEq, Clone, Copy, AsRefStr, EnumString)]
#[strum(serialize_all = "snake_case")]
enum Delimiters { enum Delimiters {
Append,
Prepend, Prepend,
Separate, Separate,
Both,
None, None,
} }
@ -58,22 +63,33 @@ impl Uniq {
) { ) {
let mut lines: Vec<String> = vec![]; let mut lines: Vec<String> = vec![];
let mut first_line_printed = false; let mut first_line_printed = false;
let delimiters = &self.delimiters; let delimiters = self.delimiters;
let line_terminator = self.get_line_terminator(); let line_terminator = self.get_line_terminator();
// Don't print any delimiting lines before, after or between groups if delimiting method is 'none'
let no_delimiters = delimiters == Delimiters::None;
// The 'prepend' and 'both' delimit methods will cause output to start with delimiter line
let prepend_delimiter = delimiters == Delimiters::Prepend || delimiters == Delimiters::Both;
// The 'append' and 'both' delimit methods will cause output to end with delimiter line
let append_delimiter = delimiters == Delimiters::Append || delimiters == Delimiters::Both;
for line in reader.split(line_terminator).map(get_line_string) { for line in reader.split(line_terminator).map(get_line_string) {
if !lines.is_empty() && self.cmp_keys(&lines[0], &line) { if !lines.is_empty() && self.cmp_keys(&lines[0], &line) {
let print_delimiter = delimiters == &Delimiters::Prepend // Print delimiter if delimit method is not 'none' and any line has been output
|| (delimiters == &Delimiters::Separate && first_line_printed); // before or if we need to start output with delimiter
let print_delimiter = !no_delimiters && (prepend_delimiter || first_line_printed);
first_line_printed |= self.print_lines(writer, &lines, print_delimiter); first_line_printed |= self.print_lines(writer, &lines, print_delimiter);
lines.truncate(0); lines.truncate(0);
} }
lines.push(line); lines.push(line);
} }
if !lines.is_empty() { if !lines.is_empty() {
let print_delimiter = delimiters == &Delimiters::Prepend // Print delimiter if delimit method is not 'none' and any line has been output
|| (delimiters == &Delimiters::Separate && first_line_printed); // before or if we need to start output with delimiter
self.print_lines(writer, &lines, print_delimiter); let print_delimiter = !no_delimiters && (prepend_delimiter || first_line_printed);
first_line_printed |= self.print_lines(writer, &lines, print_delimiter);
}
if append_delimiter && first_line_printed {
crash_if_err!(1, writer.write_all(&[line_terminator]));
} }
} }
@ -233,10 +249,30 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
Arg::with_name(options::ALL_REPEATED) Arg::with_name(options::ALL_REPEATED)
.short("D") .short("D")
.long(options::ALL_REPEATED) .long(options::ALL_REPEATED)
.possible_values(&["none", "prepend", "separate"]) .possible_values(&[
.help("print all duplicate lines. Delimiting is done with blank lines") Delimiters::None.as_ref(), Delimiters::Prepend.as_ref(), Delimiters::Separate.as_ref()
])
.help("print all duplicate lines. Delimiting is done with blank lines. [default: none]")
.value_name("delimit-method") .value_name("delimit-method")
.default_value("none"), .min_values(0)
.max_values(1),
)
.arg(
Arg::with_name(options::GROUP)
.long(options::GROUP)
.possible_values(&[
Delimiters::Separate.as_ref(), Delimiters::Prepend.as_ref(),
Delimiters::Append.as_ref(), Delimiters::Both.as_ref()
])
.help("show all items, separating groups with an empty line. [default: separate]")
.value_name("group-method")
.min_values(0)
.max_values(1)
.conflicts_with_all(&[
options::REPEATED,
options::ALL_REPEATED,
options::UNIQUE,
]),
) )
.arg( .arg(
Arg::with_name(options::CHECK_CHARS) Arg::with_name(options::CHECK_CHARS)
@ -314,17 +350,11 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
let uniq = Uniq { let uniq = Uniq {
repeats_only: matches.is_present(options::REPEATED) repeats_only: matches.is_present(options::REPEATED)
|| matches.occurrences_of(options::ALL_REPEATED) > 0, || matches.is_present(options::ALL_REPEATED),
uniques_only: matches.is_present(options::UNIQUE), uniques_only: matches.is_present(options::UNIQUE),
all_repeated: matches.occurrences_of(options::ALL_REPEATED) > 0, all_repeated: matches.is_present(options::ALL_REPEATED)
delimiters: match matches.value_of(options::ALL_REPEATED).map(String::from) { || matches.is_present(options::GROUP),
Some(ref opt_arg) if opt_arg != "none" => match &(*opt_arg.as_str()) { delimiters: get_delimiter(&matches),
"prepend" => Delimiters::Prepend,
"separate" => Delimiters::Separate,
_ => crash!(1, "Incorrect argument for all-repeated: {}", opt_arg),
},
_ => Delimiters::None,
},
show_counts: matches.is_present(options::COUNT), show_counts: matches.is_present(options::COUNT),
skip_fields: opt_parsed(options::SKIP_FIELDS, &matches), skip_fields: opt_parsed(options::SKIP_FIELDS, &matches),
slice_start: opt_parsed(options::SKIP_CHARS, &matches), slice_start: opt_parsed(options::SKIP_CHARS, &matches),
@ -340,6 +370,19 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
0 0
} }
fn get_delimiter(matches: &ArgMatches) -> Delimiters {
let value = matches
.value_of(options::ALL_REPEATED)
.or_else(|| matches.value_of(options::GROUP));
if let Some(delimiter_arg) = value {
crash_if_err!(1, Delimiters::from_str(delimiter_arg))
} else if matches.is_present(options::GROUP) {
Delimiters::Separate
} else {
Delimiters::None
}
}
fn open_input_file(in_file_name: String) -> BufReader<Box<dyn Read + 'static>> { fn open_input_file(in_file_name: String) -> BufReader<Box<dyn Read + 'static>> {
let in_file = if in_file_name == "-" { let in_file = if in_file_name == "-" {
Box::new(stdin()) as Box<dyn Read> Box::new(stdin()) as Box<dyn Read>

View file

@ -147,3 +147,48 @@ fn test_invalid_utf8() {
.failure() .failure()
.stderr_only("uniq: error: invalid utf-8 sequence of 1 bytes from index 0"); .stderr_only("uniq: error: invalid utf-8 sequence of 1 bytes from index 0");
} }
#[test]
fn test_group() {
new_ucmd!()
.args(&["--group"])
.pipe_in_fixture(INPUT)
.run()
.stdout_is_fixture("group.expected");
}
#[test]
fn test_group_prepend() {
new_ucmd!()
.args(&["--group=prepend"])
.pipe_in_fixture(INPUT)
.run()
.stdout_is_fixture("group-prepend.expected");
}
#[test]
fn test_group_append() {
new_ucmd!()
.args(&["--group=append"])
.pipe_in_fixture(INPUT)
.run()
.stdout_is_fixture("group-append.expected");
}
#[test]
fn test_group_both() {
new_ucmd!()
.args(&["--group=both"])
.pipe_in_fixture(INPUT)
.run()
.stdout_is_fixture("group-both.expected");
}
#[test]
fn test_group_separate() {
new_ucmd!()
.args(&["--group=separate"])
.pipe_in_fixture(INPUT)
.run()
.stdout_is_fixture("group.expected");
}

View file

@ -0,0 +1,26 @@
aaaaa
bbbbb ⅱ
bbbbb ⅱ
ccccc ⅲ
ccccc ⅲ
ccccc ⅲ
ddddd ⅲ
ddddd ⅲ
ddddd ⅲ
ddddd ⅲ
eeeee ⅲ
fffff ⅲ
fffff ⅲ
ggggg ⅲ
ggggg ⅲ
ggggg ⅲ
GGGGG ⅲ
GGGGG ⅲ

27
tests/fixtures/uniq/group-both.expected vendored Normal file
View file

@ -0,0 +1,27 @@
aaaaa
bbbbb ⅱ
bbbbb ⅱ
ccccc ⅲ
ccccc ⅲ
ccccc ⅲ
ddddd ⅲ
ddddd ⅲ
ddddd ⅲ
ddddd ⅲ
eeeee ⅲ
fffff ⅲ
fffff ⅲ
ggggg ⅲ
ggggg ⅲ
ggggg ⅲ
GGGGG ⅲ
GGGGG ⅲ

View file

@ -0,0 +1,26 @@
aaaaa
bbbbb ⅱ
bbbbb ⅱ
ccccc ⅲ
ccccc ⅲ
ccccc ⅲ
ddddd ⅲ
ddddd ⅲ
ddddd ⅲ
ddddd ⅲ
eeeee ⅲ
fffff ⅲ
fffff ⅲ
ggggg ⅲ
ggggg ⅲ
ggggg ⅲ
GGGGG ⅲ
GGGGG ⅲ

25
tests/fixtures/uniq/group.expected vendored Normal file
View file

@ -0,0 +1,25 @@
aaaaa
bbbbb ⅱ
bbbbb ⅱ
ccccc ⅲ
ccccc ⅲ
ccccc ⅲ
ddddd ⅲ
ddddd ⅲ
ddddd ⅲ
ddddd ⅲ
eeeee ⅲ
fffff ⅲ
fffff ⅲ
ggggg ⅲ
ggggg ⅲ
ggggg ⅲ
GGGGG ⅲ
GGGGG ⅲ