coreutils/tests/by-util/test_paste.rs
Andrew Liebenow c41c601b45
paste: permit the delimiter list to be empty (#6714)
* paste: permit the delimiter list to be empty

Also: refactored the delimiter processing logic

* Extract duplicated code into function

* Address PR comments. Improve code structure.

* Fix additional paste bugs

* Fix additional paste bugs

* Simplify backslash delimiter validation

* Fix Clippy violations
2024-10-10 15:36:30 +02:00

376 lines
8.6 KiB
Rust

// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore bsdutils toybox
use crate::common::util::TestScenario;
struct TestData<'b> {
name: &'b str,
args: &'b [&'b str],
ins: &'b [&'b str],
out: &'b str,
}
const EXAMPLE_DATA: &[TestData] = &[
// Ensure that paste properly handles files lacking a final newline.
TestData {
name: "no-nl-1",
args: &[],
ins: &["a", "b"],
out: "a\tb\n",
},
TestData {
name: "no-nl-2",
args: &[],
ins: &["a\n", "b"],
out: "a\tb\n",
},
TestData {
name: "no-nl-3",
args: &[],
ins: &["a", "b\n"],
out: "a\tb\n",
},
TestData {
name: "no-nl-4",
args: &[],
ins: &["a\n", "b\n"],
out: "a\tb\n",
},
TestData {
name: "zno-nl-1",
args: &["-z"],
ins: &["a", "b"],
out: "a\tb\0",
},
TestData {
name: "zno-nl-2",
args: &["-z"],
ins: &["a\0", "b"],
out: "a\tb\0",
},
TestData {
name: "zno-nl-3",
args: &["-z"],
ins: &["a", "b\0"],
out: "a\tb\0",
},
TestData {
name: "zno-nl-4",
args: &["-z"],
ins: &["a\0", "b\0"],
out: "a\tb\0",
},
// Same as above, but with a two lines in each input file and the
// addition of the -d option to make SPACE be the output
// delimiter.
TestData {
name: "no-nla-1",
args: &["-d", " "],
ins: &["1\na", "2\nb"],
out: "1 2\na b\n",
},
TestData {
name: "no-nla-2",
args: &["-d", " "],
ins: &["1\na\n", "2\nb"],
out: "1 2\na b\n",
},
TestData {
name: "no-nla-3",
args: &["-d", " "],
ins: &["1\na", "2\nb\n"],
out: "1 2\na b\n",
},
TestData {
name: "no-nla-4",
args: &["-d", " "],
ins: &["1\na\n", "2\nb\n"],
out: "1 2\na b\n",
},
TestData {
name: "zno-nla1",
args: &["-zd", " "],
ins: &["1\0a", "2\0b"],
out: "1 2\0a b\0",
},
TestData {
name: "zno-nla2",
args: &["-zd", " "],
ins: &["1\0a\0", "2\0b"],
out: "1 2\0a b\0",
},
TestData {
name: "zno-nla3",
args: &["-zd", " "],
ins: &["1\0a", "2\0b\0"],
out: "1 2\0a b\0",
},
TestData {
name: "zno-nla4",
args: &["-zd", " "],
ins: &["1\0a\0", "2\0b\0"],
out: "1 2\0a b\0",
},
TestData {
name: "multibyte-delim",
args: &["-d", "💣"],
ins: &["1\na\n", "2\nb\n"],
out: "1💣2\na💣b\n",
},
TestData {
name: "multibyte-delim-serial",
args: &["-d", "💣", "-s"],
ins: &["1\na\n", "2\nb\n"],
out: "1💣a\n2💣b\n",
},
TestData {
name: "trailing whitespace",
args: &["-d", "|"],
ins: &["1 \na \n", "2\t\nb\t\n"],
out: "1 |2\t\na |b\t\n",
},
];
#[test]
fn test_invalid_arg() {
new_ucmd!().arg("--definitely-invalid").fails().code_is(1);
}
#[test]
fn test_combine_pairs_of_lines() {
for s in ["-s", "--serial"] {
for d in ["-d", "--delimiters"] {
new_ucmd!()
.args(&[s, d, "\t\n", "html_colors.txt"])
.run()
.stdout_is_fixture("html_colors.expected");
}
}
}
#[test]
fn test_multi_stdin() {
for d in ["-d", "--delimiters"] {
new_ucmd!()
.args(&[d, "\t\n", "-", "-"])
.pipe_in_fixture("html_colors.txt")
.succeeds()
.stdout_is_fixture("html_colors.expected");
}
}
#[test]
// TODO: make this test work on Windows
#[cfg(not(windows))]
fn test_delimiter_list_ending_with_escaped_backslash() {
for d in ["-d", "--delimiters"] {
let (at, mut ucmd) = at_and_ucmd!();
let mut ins = vec![];
for (i, one_in) in ["a\n", "b\n"].iter().enumerate() {
let file = format!("in{i}");
at.write(&file, one_in);
ins.push(file);
}
ucmd.args(&[d, r"\\"])
.args(&ins)
.succeeds()
.stdout_is("a\\b\n");
}
}
#[test]
fn test_delimiter_list_ending_with_unescaped_backslash() {
for d in ["-d", "--delimiters"] {
new_ucmd!()
.args(&[d, r"\"])
.fails()
.stderr_contains(r"delimiter list ends with an unescaped backslash: \");
new_ucmd!()
.args(&[d, r"\\\"])
.fails()
.stderr_contains(r"delimiter list ends with an unescaped backslash: \\\");
new_ucmd!()
.args(&[d, r"_\"])
.fails()
.stderr_contains(r"delimiter list ends with an unescaped backslash: _\");
}
}
#[test]
fn test_delimiter_list_empty() {
for option_style in ["-d", "--delimiters"] {
new_ucmd!()
.args(&[option_style, "", "-s"])
.pipe_in(
"\
A ALPHA 1 _
B BRAVO 2 _
C CHARLIE 3 _
",
)
.succeeds()
.stdout_only(
"\
A ALPHA 1 _B BRAVO 2 _C CHARLIE 3 _
",
);
}
}
// Was panicking (usize subtraction that would have resulted in a negative number)
// Not observable in release builds, since integer overflow checking is not enabled
#[test]
fn test_delimiter_truncation() {
for option_style in ["-d", "--delimiters"] {
new_ucmd!()
.args(&[option_style, "!@#", "-s", "-", "-", "-"])
.pipe_in(
"\
FIRST
SECOND
THIRD
FOURTH
ABCDEFG
",
)
.succeeds()
.stdout_only(
"\
FIRST!SECOND@THIRD#FOURTH!ABCDEFG
",
);
}
}
#[test]
fn test_non_utf8_input() {
// 0xC0 is not valid UTF-8
const INPUT: &[u8] = b"Non-UTF-8 test: \xC0\x00\xC0.\n";
new_ucmd!()
.pipe_in(INPUT)
.succeeds()
.stdout_only_bytes(INPUT);
}
#[test]
fn test_three_trailing_backslashes_delimiter() {
const ONE_BACKSLASH_STR: &str = r"\";
let three_backslashes_string = ONE_BACKSLASH_STR.repeat(3);
for option_style in ["-d", "--delimiters"] {
new_ucmd!()
.args(&[option_style, &three_backslashes_string])
.fails()
.no_stdout()
.stderr_str_check(|st| {
st.ends_with(&format!(
": delimiter list ends with an unescaped backslash: {three_backslashes_string}\n"
))
});
}
}
// "If any other characters follow the <backslash>, the results are unspecified."
// https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
// However, other implementations remove the backslash
#[test]
fn test_posix_unspecified_delimiter() {
for option_style in ["-d", "--delimiters"] {
new_ucmd!()
.args(&[option_style, r"\z", "-s"])
.pipe_in(
"\
1
2
3
4
",
)
.succeeds()
.stdout_only(
"\
1z2z3z4
",
);
}
}
// "Empty string (not a null character)"
// https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
#[test]
fn test_backslash_zero_delimiter() {
for option_style in ["-d", "--delimiters"] {
new_ucmd!()
.args(&[option_style, r"\0z\0", "-s"])
.pipe_in(
"\
1
2
3
4
5
6
",
)
.succeeds()
.stdout_only(
"\
12z345z6
",
);
}
}
// As of 2024-10-09, only bsdutils (https://github.com/dcantrell/bsdutils, derived from FreeBSD) and toybox handle
// multibyte delimiter characters in the way a user would likely expect. BusyBox and GNU Core Utilities do not.
#[test]
fn test_multi_byte_delimiter() {
for option_style in ["-d", "--delimiters"] {
new_ucmd!()
.args(&[option_style, "!ß@", "-s"])
.pipe_in(
"\
1
2
3
4
5
6
",
)
.succeeds()
.stdout_only(
"\
1!2ß3@4!5ß6
",
);
}
}
#[test]
fn test_data() {
for example in EXAMPLE_DATA {
let (at, mut ucmd) = at_and_ucmd!();
let mut ins = vec![];
for (i, one_in) in example.ins.iter().enumerate() {
let file = format!("in{i}");
at.write(&file, one_in);
ins.push(file);
}
println!("{}", example.name);
ucmd.args(example.args)
.args(&ins)
.succeeds()
.stdout_is(example.out);
}
}