From 7dc8ff62cc7fcf3ef3dd00cc8195a8edefdffcd8 Mon Sep 17 00:00:00 2001 From: Konstantin Pospelov Date: Wed, 11 Apr 2018 22:55:44 +0300 Subject: [PATCH] join: support headers --- src/join/join.rs | 61 +++++++++++++++---- tests/fixtures/join/header.expected | 4 ++ tests/fixtures/join/header_1.txt | 6 ++ tests/fixtures/join/header_2.txt | 5 ++ .../fixtures/join/header_autoformat.expected | 4 ++ tests/test_join.rs | 35 +++++++++++ 6 files changed, 104 insertions(+), 11 deletions(-) create mode 100644 tests/fixtures/join/header.expected create mode 100644 tests/fixtures/join/header_1.txt create mode 100644 tests/fixtures/join/header_2.txt create mode 100644 tests/fixtures/join/header_autoformat.expected diff --git a/src/join/join.rs b/src/join/join.rs index 6cc2ed13d..c76a99dbe 100644 --- a/src/join/join.rs +++ b/src/join/join.rs @@ -53,6 +53,7 @@ struct Settings { format: Vec, empty: String, check_order: CheckOrder, + headers: bool, } impl Default for Settings { @@ -67,6 +68,7 @@ impl Default for Settings { format: vec![], empty: String::new(), check_order: CheckOrder::Default, + headers: false, } } } @@ -256,13 +258,10 @@ impl<'a> State<'a> { /// Skip the current unpaired line. fn skip_line(&mut self, input: &Input, repr: &Repr) { if self.print_unpaired { - self.print_unpaired_line(&self.seq[0], repr); + self.print_first_line(repr); } - match self.next_line(input) { - Some(line) => self.seq[0] = line, - None => self.seq.clear(), - } + self.reset_next_line(input); } /// Keep reading line sequence until the key does not change, return @@ -285,6 +284,19 @@ impl<'a> State<'a> { return None; } + /// Print lines in the buffers as headers. + fn print_headers(&self, other: &State, repr: &Repr) { + if self.has_line() { + if other.has_line() { + self.combine(other, repr); + } else { + self.print_first_line(repr); + } + } else if other.has_line() { + other.print_first_line(repr); + } + } + /// Combine two line sequences. fn combine(&self, other: &State, repr: &Repr) { let key = self.seq[0].get_field(self.key); @@ -326,6 +338,16 @@ impl<'a> State<'a> { } } + fn reset_read_line(&mut self, input: &Input) { + let line = self.read_line(input.separator); + self.reset(line); + } + + fn reset_next_line(&mut self, input: &Input) { + let line = self.next_line(input); + self.reset(line); + } + fn has_line(&self) -> bool { !self.seq.is_empty() } @@ -342,21 +364,22 @@ impl<'a> State<'a> { fn finalize(&mut self, input: &Input, repr: &Repr) { if self.has_line() && self.print_unpaired { - self.print_unpaired_line(&self.seq[0], repr); + self.print_first_line(repr); while let Some(line) = self.next_line(input) { - self.print_unpaired_line(&line, repr); + self.print_line(&line, repr); } } } + /// Get the next line without the order check. fn read_line(&mut self, sep: Sep) -> Option { let value = self.lines.next()?; self.line_num += 1; Some(Line::new(crash_if_err!(1, value), sep)) } - /// Prepare the next line. + /// Get the next line with the order check. fn next_line(&mut self, input: &Input) -> Option { let line = self.read_line(input.separator)?; @@ -384,7 +407,7 @@ impl<'a> State<'a> { Some(line) } - fn print_unpaired_line(&self, line: &Line, repr: &Repr) { + fn print_line(&self, line: &Line, repr: &Repr) { if repr.uses_format() { repr.print_format(|spec| match spec { &Spec::Key => line.get_field(self.key), @@ -401,6 +424,10 @@ impl<'a> State<'a> { println!(); } + + fn print_first_line(&self, repr: &Repr) { + self.print_line(&self.seq[0], repr); + } } pub fn uumain(args: Vec) -> i32 { @@ -482,6 +509,10 @@ FILENUM is 1 or 2, corresponding to FILE1 or FILE2", .long("nocheck-order") .help("do not check that the input is correctly sorted"), ) + .arg(Arg::with_name("header").long("header").help( + "treat the first line in each file as field headers, \ + print them without trying to pair them", + )) .arg( Arg::with_name("file1") .required(true) @@ -544,6 +575,10 @@ FILENUM is 1 or 2, corresponding to FILE1 or FILE2", settings.check_order = CheckOrder::Enabled; } + if matches.is_present("header") { + settings.headers = true; + } + let file1 = matches.value_of("file1").unwrap(); let file2 = matches.value_of("file2").unwrap(); @@ -591,6 +626,12 @@ fn exec(file1: &str, file2: &str, settings: &Settings) -> i32 { state1.initialize(settings.separator, settings.autoformat); state2.initialize(settings.separator, settings.autoformat); + if settings.headers { + state1.print_headers(&state2, &repr); + state1.reset_read_line(&input); + state2.reset_read_line(&input); + } + while state1.has_line() && state2.has_line() { let diff = state1.compare(&state2, settings.ignore_case); @@ -604,9 +645,7 @@ fn exec(file1: &str, file2: &str, settings: &Settings) -> i32 { Ordering::Equal => { let next_line1 = state1.extend(&input); let next_line2 = state2.extend(&input); - state1.combine(&state2, &repr); - state1.reset(next_line1); state2.reset(next_line2); } diff --git a/tests/fixtures/join/header.expected b/tests/fixtures/join/header.expected new file mode 100644 index 000000000..160c65679 --- /dev/null +++ b/tests/fixtures/join/header.expected @@ -0,0 +1,4 @@ +id field count +1 a abc 10 +2 b abc 25 +4 d 17 xyz diff --git a/tests/fixtures/join/header_1.txt b/tests/fixtures/join/header_1.txt new file mode 100644 index 000000000..ae34e6715 --- /dev/null +++ b/tests/fixtures/join/header_1.txt @@ -0,0 +1,6 @@ +id field +1 a abc +2 b abc +3 c +4 d +5 c diff --git a/tests/fixtures/join/header_2.txt b/tests/fixtures/join/header_2.txt new file mode 100644 index 000000000..1e7c50e1c --- /dev/null +++ b/tests/fixtures/join/header_2.txt @@ -0,0 +1,5 @@ +id count +1 10 +2 25 +4 17 xyz +7 18 xyz diff --git a/tests/fixtures/join/header_autoformat.expected b/tests/fixtures/join/header_autoformat.expected new file mode 100644 index 000000000..e32bb162a --- /dev/null +++ b/tests/fixtures/join/header_autoformat.expected @@ -0,0 +1,4 @@ +id field count +1 a 10 +2 b 25 +4 d 17 diff --git a/tests/test_join.rs b/tests/test_join.rs index 2ceb7c97d..04ddd411d 100644 --- a/tests/test_join.rs +++ b/tests/test_join.rs @@ -207,3 +207,38 @@ fn wrong_line_order() { .arg("fields_4.txt") .fails().stderr_is("fields_4.txt:5: is not sorted"); } + +#[test] +fn headers() { + new_ucmd!() + .arg("header_1.txt") + .arg("header_2.txt") + .arg("--header") + .succeeds().stdout_only_fixture("header.expected"); +} + +#[test] +fn headers_autoformat() { + new_ucmd!() + .arg("header_1.txt") + .arg("header_2.txt") + .arg("--header") + .arg("-o") + .arg("auto") + .succeeds().stdout_only_fixture("header_autoformat.expected"); +} + +#[test] +fn single_file_with_header() { + new_ucmd!() + .arg("capitalized.txt") + .arg("empty.txt") + .arg("--header") + .succeeds().stdout_is("A 1"); + + new_ucmd!() + .arg("empty.txt") + .arg("capitalized.txt") + .arg("--header") + .succeeds().stdout_is("A 1"); +}