2021-06-03 22:49:06 +00:00
|
|
|
// * This file is part of the uutils coreutils package.
|
|
|
|
// *
|
|
|
|
// * For the full copyright and license information, please view the LICENSE
|
|
|
|
// * file that was distributed with this source code.
|
2022-01-02 04:53:29 +00:00
|
|
|
// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes onehundredlines nbbbb
|
2015-11-16 05:25:01 +00:00
|
|
|
|
2023-03-20 13:51:19 +00:00
|
|
|
use crate::common::util::{AtPath, TestScenario};
|
2023-04-10 06:31:31 +00:00
|
|
|
use rand::{thread_rng, Rng, SeedableRng};
|
|
|
|
use regex::Regex;
|
2021-01-18 13:42:44 +00:00
|
|
|
#[cfg(not(windows))]
|
|
|
|
use std::env;
|
2020-04-13 18:36:03 +00:00
|
|
|
use std::path::Path;
|
2021-05-04 11:01:01 +00:00
|
|
|
use std::{
|
|
|
|
fs::{read_dir, File},
|
2022-01-03 00:31:43 +00:00
|
|
|
io::{BufWriter, Read, Write},
|
2021-05-04 11:01:01 +00:00
|
|
|
};
|
2015-11-16 05:25:01 +00:00
|
|
|
|
|
|
|
fn random_chars(n: usize) -> String {
|
2020-04-13 18:36:03 +00:00
|
|
|
thread_rng()
|
|
|
|
.sample_iter(&rand::distributions::Alphanumeric)
|
2022-01-18 01:23:51 +00:00
|
|
|
.map(char::from)
|
2020-04-13 18:36:03 +00:00
|
|
|
.take(n)
|
|
|
|
.collect::<String>()
|
2015-11-16 05:25:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
struct Glob {
|
|
|
|
directory: AtPath,
|
|
|
|
regex: Regex,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Glob {
|
2022-01-30 13:59:31 +00:00
|
|
|
fn new(at: &AtPath, directory: &str, regex: &str) -> Self {
|
|
|
|
Self {
|
2015-11-16 05:25:01 +00:00
|
|
|
directory: AtPath::new(Path::new(&at.plus_as_string(directory))),
|
|
|
|
regex: Regex::new(regex).unwrap(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn count(&self) -> usize {
|
|
|
|
self.collect().len()
|
|
|
|
}
|
|
|
|
|
2021-01-18 13:42:44 +00:00
|
|
|
/// Get all files in `self.directory` that match `self.regex`
|
2015-11-16 05:25:01 +00:00
|
|
|
fn collect(&self) -> Vec<String> {
|
|
|
|
read_dir(Path::new(&self.directory.subdir))
|
|
|
|
.unwrap()
|
|
|
|
.filter_map(|entry| {
|
|
|
|
let path = entry.unwrap().path();
|
2020-04-13 18:36:03 +00:00
|
|
|
let name = self
|
|
|
|
.directory
|
|
|
|
.minus_as_string(path.as_path().to_str().unwrap_or(""));
|
2015-11-16 05:25:01 +00:00
|
|
|
if self.regex.is_match(&name) {
|
|
|
|
Some(name)
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.collect()
|
|
|
|
}
|
|
|
|
|
2021-01-18 13:42:44 +00:00
|
|
|
/// Accumulate bytes of all files in `self.collect()`
|
2015-11-16 05:25:01 +00:00
|
|
|
fn collate(&self) -> Vec<u8> {
|
|
|
|
let mut files = self.collect();
|
|
|
|
files.sort();
|
|
|
|
let mut data: Vec<u8> = vec![];
|
2016-11-25 19:14:46 +00:00
|
|
|
for name in &files {
|
2021-05-04 11:01:01 +00:00
|
|
|
data.extend(self.directory.read_bytes(name));
|
2015-11-16 05:25:01 +00:00
|
|
|
}
|
|
|
|
data
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-18 13:42:44 +00:00
|
|
|
/// File handle that user can add random bytes (line-formatted or not) to
|
2015-11-16 05:25:01 +00:00
|
|
|
struct RandomFile {
|
|
|
|
inner: File,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl RandomFile {
|
2021-01-18 13:42:44 +00:00
|
|
|
/// Size of each line that's being generated
|
|
|
|
const LINESIZE: usize = 32;
|
|
|
|
|
|
|
|
/// `create()` file handle located at `at` / `name`
|
2022-01-30 13:59:31 +00:00
|
|
|
fn new(at: &AtPath, name: &str) -> Self {
|
|
|
|
Self {
|
2022-11-15 15:57:08 +00:00
|
|
|
inner: File::create(at.plus(name)).unwrap(),
|
2020-04-13 18:36:03 +00:00
|
|
|
}
|
2015-11-16 05:25:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn add_bytes(&mut self, bytes: usize) {
|
2021-05-04 11:01:01 +00:00
|
|
|
// Note that just writing random characters isn't enough to cover all
|
|
|
|
// cases. We need truly random bytes.
|
|
|
|
let mut writer = BufWriter::new(&self.inner);
|
|
|
|
|
|
|
|
// Seed the rng so as to avoid spurious test failures.
|
|
|
|
let mut rng = rand::rngs::StdRng::seed_from_u64(123);
|
|
|
|
let mut buffer = [0; 1024];
|
|
|
|
let mut remaining_size = bytes;
|
|
|
|
|
|
|
|
while remaining_size > 0 {
|
|
|
|
let to_write = std::cmp::min(remaining_size, buffer.len());
|
|
|
|
let buf = &mut buffer[..to_write];
|
|
|
|
rng.fill(buf);
|
2021-05-29 12:32:35 +00:00
|
|
|
writer.write_all(buf).unwrap();
|
2021-05-04 11:01:01 +00:00
|
|
|
|
|
|
|
remaining_size -= to_write;
|
2015-11-16 05:25:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-18 13:42:44 +00:00
|
|
|
/// Add n lines each of size `RandomFile::LINESIZE`
|
2015-11-16 05:25:01 +00:00
|
|
|
fn add_lines(&mut self, lines: usize) {
|
|
|
|
let mut n = lines;
|
|
|
|
while n > 0 {
|
2022-01-30 13:59:31 +00:00
|
|
|
writeln!(self.inner, "{}", random_chars(Self::LINESIZE)).unwrap();
|
2015-11-16 05:25:01 +00:00
|
|
|
n -= 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-09-10 16:38:14 +00:00
|
|
|
#[test]
|
|
|
|
fn test_invalid_arg() {
|
|
|
|
new_ucmd!().arg("--definitely-invalid").fails().code_is(1);
|
|
|
|
}
|
|
|
|
|
2015-11-16 05:25:01 +00:00
|
|
|
#[test]
|
|
|
|
fn test_split_default() {
|
2016-08-23 11:52:43 +00:00
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
2015-11-16 05:25:01 +00:00
|
|
|
let name = "split_default";
|
|
|
|
RandomFile::new(&at, name).add_lines(2000);
|
2016-08-13 21:59:21 +00:00
|
|
|
ucmd.args(&[name]).succeeds();
|
2021-05-04 11:01:01 +00:00
|
|
|
|
|
|
|
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
|
2015-11-16 05:25:01 +00:00
|
|
|
assert_eq!(glob.count(), 2);
|
2021-05-04 11:01:01 +00:00
|
|
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
2015-11-16 05:25:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2021-02-11 19:45:23 +00:00
|
|
|
fn test_split_numeric_prefixed_chunks_by_bytes() {
|
2016-08-23 11:52:43 +00:00
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
2015-11-16 05:25:01 +00:00
|
|
|
let name = "split_num_prefixed_chunks_by_bytes";
|
|
|
|
RandomFile::new(&at, name).add_bytes(10000);
|
2021-02-11 19:45:23 +00:00
|
|
|
ucmd.args(&[
|
|
|
|
"-d", // --numeric-suffixes
|
|
|
|
"-b", // --bytes
|
|
|
|
"1000", name, "a",
|
|
|
|
])
|
|
|
|
.succeeds();
|
2021-05-04 11:01:01 +00:00
|
|
|
|
|
|
|
let glob = Glob::new(&at, ".", r"a\d\d$");
|
2015-11-16 05:25:01 +00:00
|
|
|
assert_eq!(glob.count(), 10);
|
2021-05-04 11:01:01 +00:00
|
|
|
for filename in glob.collect() {
|
|
|
|
assert_eq!(glob.directory.metadata(&filename).len(), 1000);
|
|
|
|
}
|
|
|
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
2015-11-16 05:25:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_split_str_prefixed_chunks_by_bytes() {
|
2016-08-23 11:52:43 +00:00
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
2015-11-16 05:25:01 +00:00
|
|
|
let name = "split_str_prefixed_chunks_by_bytes";
|
|
|
|
RandomFile::new(&at, name).add_bytes(10000);
|
2021-05-04 11:01:01 +00:00
|
|
|
// Important that this is less than 1024 since that's our internal buffer
|
|
|
|
// size. Good to test that we don't overshoot.
|
2016-08-13 21:59:21 +00:00
|
|
|
ucmd.args(&["-b", "1000", name, "b"]).succeeds();
|
2021-05-04 11:01:01 +00:00
|
|
|
|
|
|
|
let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$");
|
2015-11-16 05:25:01 +00:00
|
|
|
assert_eq!(glob.count(), 10);
|
2021-05-04 11:01:01 +00:00
|
|
|
for filename in glob.collect() {
|
|
|
|
assert_eq!(glob.directory.metadata(&filename).len(), 1000);
|
|
|
|
}
|
|
|
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
|
|
|
}
|
|
|
|
|
|
|
|
// This is designed to test what happens when the desired part size is not a
|
|
|
|
// multiple of the buffer size and we hopefully don't overshoot the desired part
|
|
|
|
// size.
|
|
|
|
#[test]
|
|
|
|
fn test_split_bytes_prime_part_size() {
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
let name = "test_split_bytes_prime_part_size";
|
|
|
|
RandomFile::new(&at, name).add_bytes(10000);
|
|
|
|
// 1753 is prime and greater than the buffer size, 1024.
|
|
|
|
ucmd.args(&["-b", "1753", name, "b"]).succeeds();
|
|
|
|
|
|
|
|
let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$");
|
|
|
|
assert_eq!(glob.count(), 6);
|
2021-05-04 22:19:35 +00:00
|
|
|
let mut fns = glob.collect();
|
|
|
|
// glob.collect() is not guaranteed to return in sorted order, so we sort.
|
|
|
|
fns.sort();
|
2021-05-29 12:32:35 +00:00
|
|
|
#[allow(clippy::needless_range_loop)]
|
2021-05-04 11:01:01 +00:00
|
|
|
for i in 0..5 {
|
2021-05-04 22:19:35 +00:00
|
|
|
assert_eq!(glob.directory.metadata(&fns[i]).len(), 1753);
|
2021-05-04 11:01:01 +00:00
|
|
|
}
|
2021-05-04 22:19:35 +00:00
|
|
|
assert_eq!(glob.directory.metadata(&fns[5]).len(), 1235);
|
2021-05-04 11:01:01 +00:00
|
|
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
2015-11-16 05:25:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_split_num_prefixed_chunks_by_lines() {
|
2016-08-23 11:52:43 +00:00
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
2015-11-16 05:25:01 +00:00
|
|
|
let name = "split_num_prefixed_chunks_by_lines";
|
|
|
|
RandomFile::new(&at, name).add_lines(10000);
|
2016-08-13 21:59:21 +00:00
|
|
|
ucmd.args(&["-d", "-l", "1000", name, "c"]).succeeds();
|
2021-05-04 11:01:01 +00:00
|
|
|
|
|
|
|
let glob = Glob::new(&at, ".", r"c\d\d$");
|
2015-11-16 05:25:01 +00:00
|
|
|
assert_eq!(glob.count(), 10);
|
2021-05-04 11:01:01 +00:00
|
|
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
2015-11-16 05:25:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_split_str_prefixed_chunks_by_lines() {
|
2016-08-23 11:52:43 +00:00
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
2015-11-16 05:25:01 +00:00
|
|
|
let name = "split_str_prefixed_chunks_by_lines";
|
|
|
|
RandomFile::new(&at, name).add_lines(10000);
|
2016-08-13 21:59:21 +00:00
|
|
|
ucmd.args(&["-l", "1000", name, "d"]).succeeds();
|
2021-05-04 11:01:01 +00:00
|
|
|
|
|
|
|
let glob = Glob::new(&at, ".", r"d[[:alpha:]][[:alpha:]]$");
|
2015-11-16 05:25:01 +00:00
|
|
|
assert_eq!(glob.count(), 10);
|
2021-05-04 11:01:01 +00:00
|
|
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
2015-11-16 05:25:01 +00:00
|
|
|
}
|
2020-09-16 15:59:39 +00:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_split_additional_suffix() {
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
let name = "split_additional_suffix";
|
|
|
|
RandomFile::new(&at, name).add_lines(2000);
|
|
|
|
ucmd.args(&["--additional-suffix", ".txt", name]).succeeds();
|
2021-05-04 11:01:01 +00:00
|
|
|
|
|
|
|
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]].txt$");
|
2020-09-16 15:59:39 +00:00
|
|
|
assert_eq!(glob.count(), 2);
|
2021-05-04 11:01:01 +00:00
|
|
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
2020-09-16 15:59:39 +00:00
|
|
|
}
|
2021-01-18 13:42:44 +00:00
|
|
|
|
2022-02-11 00:16:49 +00:00
|
|
|
#[test]
|
|
|
|
fn test_additional_suffix_no_slash() {
|
|
|
|
new_ucmd!()
|
|
|
|
.args(&["--additional-suffix", "a/b"])
|
|
|
|
.fails()
|
|
|
|
.usage_error("invalid suffix 'a/b', contains directory separator");
|
|
|
|
}
|
|
|
|
|
2021-01-18 13:42:44 +00:00
|
|
|
// note: the test_filter* tests below are unix-only
|
|
|
|
// windows support has been waived for now because of the difficulty of getting
|
|
|
|
// the `cmd` call right
|
|
|
|
// see https://github.com/rust-lang/rust/issues/29494
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
#[cfg(unix)]
|
|
|
|
fn test_filter() {
|
|
|
|
// like `test_split_default()` but run a command before writing
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
let name = "filtered";
|
|
|
|
let n_lines = 3;
|
|
|
|
RandomFile::new(&at, name).add_lines(n_lines);
|
|
|
|
|
|
|
|
// change all characters to 'i'
|
|
|
|
ucmd.args(&["--filter=sed s/./i/g > $FILE", name])
|
|
|
|
.succeeds();
|
2021-05-04 11:01:01 +00:00
|
|
|
|
2021-01-18 13:42:44 +00:00
|
|
|
// assert all characters are 'i' / no character is not 'i'
|
2021-05-30 05:10:54 +00:00
|
|
|
// (assert that command succeeded)
|
2021-05-04 11:01:01 +00:00
|
|
|
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
|
2022-08-22 02:48:09 +00:00
|
|
|
assert!(!glob.collate().iter().any(|&c| {
|
|
|
|
// is not i
|
|
|
|
c != (b'i')
|
2021-01-18 13:42:44 +00:00
|
|
|
// is not newline
|
2021-05-29 12:32:35 +00:00
|
|
|
&& c != (b'\n')
|
2022-08-22 02:48:09 +00:00
|
|
|
}));
|
2021-01-18 13:42:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
#[cfg(unix)]
|
|
|
|
fn test_filter_with_env_var_set() {
|
|
|
|
// This test will ensure that if $FILE env var was set before running --filter, it'll stay that
|
|
|
|
// way
|
|
|
|
// implemented like `test_split_default()` but run a command before writing
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
let name = "filtered";
|
|
|
|
let n_lines = 3;
|
|
|
|
RandomFile::new(&at, name).add_lines(n_lines);
|
|
|
|
|
2021-05-30 05:10:54 +00:00
|
|
|
let env_var_value = "some-value";
|
2022-10-12 20:04:21 +00:00
|
|
|
env::set_var("FILE", env_var_value);
|
2021-01-18 13:42:44 +00:00
|
|
|
ucmd.args(&[format!("--filter={}", "cat > $FILE").as_str(), name])
|
|
|
|
.succeeds();
|
2021-05-04 11:01:01 +00:00
|
|
|
|
|
|
|
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
|
|
|
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
2021-05-29 12:32:35 +00:00
|
|
|
assert!(env::var("FILE").unwrap_or_else(|_| "var was unset".to_owned()) == env_var_value);
|
2021-01-18 13:42:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
#[cfg(unix)]
|
|
|
|
fn test_filter_command_fails() {
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
let name = "filter-will-fail";
|
|
|
|
RandomFile::new(&at, name).add_lines(4);
|
|
|
|
|
|
|
|
ucmd.args(&["--filter=/a/path/that/totally/does/not/exist", name])
|
|
|
|
.fails();
|
|
|
|
}
|
2021-06-02 16:37:21 +00:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_split_lines_number() {
|
|
|
|
// Test if stdout/stderr for '--lines' option is correct
|
2021-06-03 19:13:44 +00:00
|
|
|
let scene = TestScenario::new(util_name!());
|
|
|
|
let at = &scene.fixtures;
|
|
|
|
at.touch("file");
|
|
|
|
|
|
|
|
scene
|
|
|
|
.ucmd()
|
|
|
|
.args(&["--lines", "2", "file"])
|
2021-06-02 16:37:21 +00:00
|
|
|
.succeeds()
|
|
|
|
.no_stderr()
|
|
|
|
.no_stdout();
|
2021-06-03 19:13:44 +00:00
|
|
|
scene
|
|
|
|
.ucmd()
|
|
|
|
.args(&["--lines", "2fb", "file"])
|
2021-06-02 16:37:21 +00:00
|
|
|
.fails()
|
|
|
|
.code_is(1)
|
2023-01-05 20:09:15 +00:00
|
|
|
.stderr_only("split: invalid number of lines: '2fb'\n");
|
2021-06-02 16:37:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_split_invalid_bytes_size() {
|
|
|
|
new_ucmd!()
|
|
|
|
.args(&["-b", "1024R"])
|
|
|
|
.fails()
|
|
|
|
.code_is(1)
|
2023-01-05 20:09:15 +00:00
|
|
|
.stderr_only("split: invalid number of bytes: '1024R'\n");
|
2021-06-02 16:37:21 +00:00
|
|
|
#[cfg(not(target_pointer_width = "128"))]
|
|
|
|
new_ucmd!()
|
|
|
|
.args(&["-b", "1Y"])
|
|
|
|
.fails()
|
|
|
|
.code_is(1)
|
2023-01-05 20:09:15 +00:00
|
|
|
.stderr_only(
|
|
|
|
"split: invalid number of bytes: '1Y': Value too large for defined data type\n",
|
|
|
|
);
|
2021-06-02 16:37:21 +00:00
|
|
|
#[cfg(target_pointer_width = "32")]
|
|
|
|
{
|
|
|
|
let sizes = ["1000G", "10T"];
|
|
|
|
for size in &sizes {
|
2022-02-22 10:09:22 +00:00
|
|
|
new_ucmd!().args(&["-b", size]).succeeds();
|
2021-06-02 16:37:21 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-01-01 18:47:11 +00:00
|
|
|
|
2022-02-22 19:58:41 +00:00
|
|
|
#[test]
|
|
|
|
fn test_split_chunks_num_chunks_oversized_32() {
|
|
|
|
#[cfg(target_pointer_width = "32")]
|
|
|
|
{
|
|
|
|
let scene = TestScenario::new(util_name!());
|
|
|
|
let at = &scene.fixtures;
|
|
|
|
at.touch("file");
|
|
|
|
scene
|
|
|
|
.ucmd()
|
|
|
|
.args(&["--number", "5000000000", "file"])
|
|
|
|
.fails()
|
|
|
|
.code_is(1)
|
2023-01-05 20:09:15 +00:00
|
|
|
.stderr_only("split: Number of chunks too big\n");
|
2022-02-22 19:58:41 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_split_stdin_num_chunks() {
|
|
|
|
new_ucmd!()
|
|
|
|
.args(&["--number=1"])
|
|
|
|
.fails()
|
|
|
|
.code_is(1)
|
2023-01-05 20:09:15 +00:00
|
|
|
.stderr_only("split: -: cannot determine file size\n");
|
2022-02-22 19:58:41 +00:00
|
|
|
}
|
|
|
|
|
2022-01-01 18:47:11 +00:00
|
|
|
fn file_read(at: &AtPath, filename: &str) -> String {
|
|
|
|
let mut s = String::new();
|
|
|
|
at.open(filename).read_to_string(&mut s).unwrap();
|
|
|
|
s
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Test for the default suffix length behavior: dynamically increasing size.
|
|
|
|
#[test]
|
|
|
|
fn test_alphabetic_dynamic_suffix_length() {
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
// Split into chunks of one byte each.
|
|
|
|
//
|
|
|
|
// The input file has (26^2) - 26 + 1 = 651 bytes. This is just
|
|
|
|
// enough to force `split` to dynamically increase the length of
|
|
|
|
// the filename for the very last chunk.
|
|
|
|
//
|
|
|
|
// We expect the output files to be named
|
|
|
|
//
|
|
|
|
// xaa, xab, xac, ..., xyx, xyy, xyz, xzaaa
|
|
|
|
//
|
|
|
|
ucmd.args(&["-b", "1", "sixhundredfiftyonebytes.txt"])
|
|
|
|
.succeeds();
|
2023-08-12 01:36:08 +00:00
|
|
|
for i in b'a'..=b'y' {
|
|
|
|
for j in b'a'..=b'z' {
|
|
|
|
let filename = format!("x{}{}", i as char, j as char);
|
2022-01-01 18:47:11 +00:00
|
|
|
let contents = file_read(&at, &filename);
|
|
|
|
assert_eq!(contents, "a");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert_eq!(file_read(&at, "xzaaa"), "a");
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Test for the default suffix length behavior: dynamically increasing size.
|
|
|
|
#[test]
|
|
|
|
fn test_numeric_dynamic_suffix_length() {
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
// Split into chunks of one byte each, use numbers instead of
|
|
|
|
// letters as file suffixes.
|
|
|
|
//
|
|
|
|
// The input file has (10^2) - 10 + 1 = 91 bytes. This is just
|
|
|
|
// enough to force `split` to dynamically increase the length of
|
|
|
|
// the filename for the very last chunk.
|
|
|
|
//
|
|
|
|
// x00, x01, x02, ..., x87, x88, x89, x9000
|
|
|
|
//
|
|
|
|
ucmd.args(&["-d", "-b", "1", "ninetyonebytes.txt"])
|
|
|
|
.succeeds();
|
|
|
|
for i in 0..90 {
|
2023-01-27 09:29:45 +00:00
|
|
|
let filename = format!("x{i:02}");
|
2022-01-01 18:47:11 +00:00
|
|
|
let contents = file_read(&at, &filename);
|
|
|
|
assert_eq!(contents, "a");
|
|
|
|
}
|
|
|
|
assert_eq!(file_read(&at, "x9000"), "a");
|
|
|
|
}
|
|
|
|
|
2022-01-16 15:41:52 +00:00
|
|
|
#[test]
|
|
|
|
fn test_hex_dynamic_suffix_length() {
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
// Split into chunks of one byte each, use hexadecimal digits
|
|
|
|
// instead of letters as file suffixes.
|
|
|
|
//
|
|
|
|
// The input file has (16^2) - 16 + 1 = 241 bytes. This is just
|
|
|
|
// enough to force `split` to dynamically increase the length of
|
|
|
|
// the filename for the very last chunk.
|
|
|
|
//
|
|
|
|
// x00, x01, x02, ..., xed, xee, xef, xf000
|
|
|
|
//
|
|
|
|
ucmd.args(&["-x", "-b", "1", "twohundredfortyonebytes.txt"])
|
|
|
|
.succeeds();
|
|
|
|
for i in 0..240 {
|
2023-01-27 09:29:45 +00:00
|
|
|
let filename = format!("x{i:02x}");
|
2022-01-16 15:41:52 +00:00
|
|
|
let contents = file_read(&at, &filename);
|
|
|
|
assert_eq!(contents, "a");
|
|
|
|
}
|
|
|
|
assert_eq!(file_read(&at, "xf000"), "a");
|
|
|
|
}
|
|
|
|
|
2022-01-01 18:47:11 +00:00
|
|
|
#[test]
|
|
|
|
fn test_suffixes_exhausted() {
|
|
|
|
new_ucmd!()
|
|
|
|
.args(&["-b", "1", "-a", "1", "asciilowercase.txt"])
|
|
|
|
.fails()
|
2023-01-05 20:09:15 +00:00
|
|
|
.stderr_only("split: output file suffixes exhausted\n");
|
2022-01-01 18:47:11 +00:00
|
|
|
}
|
2022-01-16 04:04:46 +00:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_verbose() {
|
|
|
|
new_ucmd!()
|
|
|
|
.args(&["-b", "5", "--verbose", "asciilowercase.txt"])
|
|
|
|
.succeeds()
|
|
|
|
.stdout_only(
|
|
|
|
"creating file 'xaa'
|
|
|
|
creating file 'xab'
|
|
|
|
creating file 'xac'
|
|
|
|
creating file 'xad'
|
|
|
|
creating file 'xae'
|
|
|
|
creating file 'xaf'
|
|
|
|
",
|
|
|
|
);
|
|
|
|
}
|
2022-01-03 00:31:43 +00:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_number() {
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
let file_read = |f| {
|
|
|
|
let mut s = String::new();
|
|
|
|
at.open(f).read_to_string(&mut s).unwrap();
|
|
|
|
s
|
|
|
|
};
|
|
|
|
ucmd.args(&["-n", "5", "asciilowercase.txt"]).succeeds();
|
|
|
|
assert_eq!(file_read("xaa"), "abcde");
|
|
|
|
assert_eq!(file_read("xab"), "fghij");
|
|
|
|
assert_eq!(file_read("xac"), "klmno");
|
|
|
|
assert_eq!(file_read("xad"), "pqrst");
|
2022-01-30 23:53:42 +00:00
|
|
|
assert_eq!(file_read("xae"), "uvwxyz\n");
|
2022-01-03 00:31:43 +00:00
|
|
|
}
|
2022-02-01 00:04:32 +00:00
|
|
|
|
2022-02-14 18:47:18 +00:00
|
|
|
#[test]
|
|
|
|
fn test_split_number_with_io_blksize() {
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
let file_read = |f| {
|
|
|
|
let mut s = String::new();
|
|
|
|
at.open(f).read_to_string(&mut s).unwrap();
|
|
|
|
s
|
|
|
|
};
|
|
|
|
ucmd.args(&["-n", "5", "asciilowercase.txt", "---io-blksize", "1024"])
|
|
|
|
.succeeds();
|
|
|
|
assert_eq!(file_read("xaa"), "abcde");
|
|
|
|
assert_eq!(file_read("xab"), "fghij");
|
|
|
|
assert_eq!(file_read("xac"), "klmno");
|
|
|
|
assert_eq!(file_read("xad"), "pqrst");
|
2022-02-15 01:41:58 +00:00
|
|
|
assert_eq!(file_read("xae"), "uvwxyz\n");
|
2022-02-14 18:47:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_split_default_with_io_blksize() {
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
let name = "split_default_with_io_blksize";
|
|
|
|
RandomFile::new(&at, name).add_lines(2000);
|
|
|
|
ucmd.args(&[name, "---io-blksize", "2M"]).succeeds();
|
|
|
|
|
|
|
|
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
|
|
|
|
assert_eq!(glob.count(), 2);
|
|
|
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
|
|
|
}
|
|
|
|
|
2022-02-01 00:04:32 +00:00
|
|
|
#[test]
|
|
|
|
fn test_invalid_suffix_length() {
|
|
|
|
new_ucmd!()
|
|
|
|
.args(&["-a", "xyz"])
|
|
|
|
.fails()
|
|
|
|
.no_stdout()
|
|
|
|
.stderr_contains("invalid suffix length: 'xyz'");
|
|
|
|
}
|
2021-12-31 01:11:03 +00:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_include_newlines() {
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
ucmd.args(&["-l", "2", "fivelines.txt"]).succeeds();
|
|
|
|
|
|
|
|
let mut s = String::new();
|
|
|
|
at.open("xaa").read_to_string(&mut s).unwrap();
|
|
|
|
assert_eq!(s, "1\n2\n");
|
|
|
|
|
|
|
|
let mut s = String::new();
|
|
|
|
at.open("xab").read_to_string(&mut s).unwrap();
|
|
|
|
assert_eq!(s, "3\n4\n");
|
|
|
|
|
|
|
|
let mut s = String::new();
|
|
|
|
at.open("xac").read_to_string(&mut s).unwrap();
|
|
|
|
assert_eq!(s, "5\n");
|
|
|
|
}
|
2022-02-10 02:41:33 +00:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_allow_empty_files() {
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
ucmd.args(&["-n", "4", "threebytes.txt"])
|
|
|
|
.succeeds()
|
|
|
|
.no_stdout()
|
|
|
|
.no_stderr();
|
|
|
|
assert_eq!(at.read("xaa"), "a");
|
|
|
|
assert_eq!(at.read("xab"), "b");
|
|
|
|
assert_eq!(at.read("xac"), "c");
|
|
|
|
assert_eq!(at.read("xad"), "");
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_elide_empty_files() {
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
ucmd.args(&["-e", "-n", "4", "threebytes.txt"])
|
|
|
|
.succeeds()
|
|
|
|
.no_stdout()
|
|
|
|
.no_stderr();
|
|
|
|
assert_eq!(at.read("xaa"), "a");
|
|
|
|
assert_eq!(at.read("xab"), "b");
|
|
|
|
assert_eq!(at.read("xac"), "c");
|
|
|
|
assert!(!at.plus("xad").exists());
|
|
|
|
}
|
2022-01-30 22:27:05 +00:00
|
|
|
|
2022-03-19 16:03:10 +00:00
|
|
|
#[test]
|
|
|
|
#[cfg(unix)]
|
|
|
|
fn test_elide_dev_null() {
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
ucmd.args(&["-e", "-n", "3", "/dev/null"])
|
|
|
|
.succeeds()
|
|
|
|
.no_stdout()
|
|
|
|
.no_stderr();
|
|
|
|
assert!(!at.plus("xaa").exists());
|
|
|
|
assert!(!at.plus("xab").exists());
|
|
|
|
assert!(!at.plus("xac").exists());
|
|
|
|
}
|
|
|
|
|
2022-01-30 22:27:05 +00:00
|
|
|
#[test]
|
|
|
|
fn test_lines() {
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
|
|
|
|
let file_read = |f| {
|
|
|
|
let mut s = String::new();
|
|
|
|
at.open(f).read_to_string(&mut s).unwrap();
|
|
|
|
s
|
|
|
|
};
|
|
|
|
|
|
|
|
// Split into two files without splitting up lines.
|
|
|
|
ucmd.args(&["-n", "l/2", "fivelines.txt"]).succeeds();
|
|
|
|
|
|
|
|
assert_eq!(file_read("xaa"), "1\n2\n3\n");
|
|
|
|
assert_eq!(file_read("xab"), "4\n5\n");
|
|
|
|
}
|
2022-01-06 02:06:04 +00:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_lines_kth() {
|
|
|
|
new_ucmd!()
|
|
|
|
.args(&["-n", "l/3/10", "onehundredlines.txt"])
|
|
|
|
.succeeds()
|
|
|
|
.stdout_only("20\n21\n22\n23\n24\n25\n26\n27\n28\n29\n");
|
|
|
|
}
|
2022-01-02 04:53:29 +00:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_line_bytes() {
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
ucmd.args(&["-C", "8", "letters.txt"]).succeeds();
|
|
|
|
assert_eq!(at.read("xaa"), "aaaaaaaa");
|
|
|
|
assert_eq!(at.read("xab"), "a\nbbbb\n");
|
|
|
|
assert_eq!(at.read("xac"), "cccc\ndd\n");
|
|
|
|
assert_eq!(at.read("xad"), "ee\n");
|
|
|
|
}
|
2022-03-20 03:50:02 +00:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_line_bytes_no_final_newline() {
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
ucmd.args(&["-C", "2"])
|
|
|
|
.pipe_in("1\n2222\n3\n4")
|
|
|
|
.succeeds()
|
|
|
|
.no_stdout()
|
|
|
|
.no_stderr();
|
|
|
|
assert_eq!(at.read("xaa"), "1\n");
|
|
|
|
assert_eq!(at.read("xab"), "22");
|
|
|
|
assert_eq!(at.read("xac"), "22");
|
|
|
|
assert_eq!(at.read("xad"), "\n");
|
|
|
|
assert_eq!(at.read("xae"), "3\n");
|
|
|
|
assert_eq!(at.read("xaf"), "4");
|
|
|
|
}
|
2022-03-20 04:19:36 +00:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_line_bytes_no_empty_file() {
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
ucmd.args(&["-C", "1"])
|
|
|
|
.pipe_in("1\n2222\n3\n4")
|
|
|
|
.succeeds()
|
|
|
|
.no_stdout()
|
|
|
|
.no_stderr();
|
|
|
|
assert_eq!(at.read("xaa"), "1");
|
|
|
|
assert_eq!(at.read("xab"), "\n");
|
|
|
|
assert_eq!(at.read("xac"), "2");
|
|
|
|
assert_eq!(at.read("xad"), "2");
|
|
|
|
assert_eq!(at.read("xae"), "2");
|
|
|
|
assert_eq!(at.read("xaf"), "2");
|
|
|
|
assert_eq!(at.read("xag"), "\n");
|
|
|
|
assert_eq!(at.read("xah"), "3");
|
|
|
|
assert_eq!(at.read("xai"), "\n");
|
|
|
|
assert_eq!(at.read("xaj"), "4");
|
|
|
|
assert!(!at.plus("xak").exists());
|
|
|
|
}
|
2022-07-15 16:44:25 +00:00
|
|
|
|
2022-10-07 21:46:49 +00:00
|
|
|
#[test]
|
|
|
|
fn test_line_bytes_no_eof() {
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
ucmd.args(&["-C", "3"])
|
|
|
|
.pipe_in("1\n2222\n3\n4")
|
|
|
|
.succeeds()
|
|
|
|
.no_stdout()
|
|
|
|
.no_stderr();
|
|
|
|
assert_eq!(at.read("xaa"), "1\n");
|
|
|
|
assert_eq!(at.read("xab"), "222");
|
|
|
|
assert_eq!(at.read("xac"), "2\n");
|
|
|
|
assert_eq!(at.read("xad"), "3\n");
|
|
|
|
assert_eq!(at.read("xae"), "4");
|
|
|
|
assert!(!at.plus("xaf").exists());
|
|
|
|
}
|
|
|
|
|
2022-07-15 16:44:25 +00:00
|
|
|
#[test]
|
|
|
|
fn test_guard_input() {
|
|
|
|
let ts = TestScenario::new(util_name!());
|
|
|
|
let at = &ts.fixtures;
|
|
|
|
|
|
|
|
ts.ucmd()
|
|
|
|
.args(&["-C", "6"])
|
|
|
|
.pipe_in("1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n")
|
|
|
|
.succeeds()
|
|
|
|
.no_stdout()
|
|
|
|
.no_stderr();
|
|
|
|
assert_eq!(at.read("xaa"), "1\n2\n3\n");
|
|
|
|
|
|
|
|
ts.ucmd()
|
|
|
|
.args(&["-C", "6"])
|
|
|
|
.pipe_in("1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n")
|
|
|
|
.succeeds()
|
|
|
|
.no_stdout()
|
|
|
|
.no_stderr();
|
|
|
|
assert_eq!(at.read("xaa"), "1\n2\n3\n");
|
|
|
|
|
|
|
|
ts.ucmd()
|
|
|
|
.args(&["-C", "6", "xaa"])
|
|
|
|
.fails()
|
2023-01-05 20:09:15 +00:00
|
|
|
.stderr_only("split: 'xaa' would overwrite input; aborting\n");
|
2022-07-15 16:44:25 +00:00
|
|
|
assert_eq!(at.read("xaa"), "1\n2\n3\n");
|
|
|
|
}
|
2022-08-16 09:02:52 +00:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_multiple_of_input_chunk() {
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
let name = "multiple_of_input_chunk";
|
|
|
|
RandomFile::new(&at, name).add_bytes(16 * 1024);
|
|
|
|
ucmd.args(&["-b", "8K", name, "b"]).succeeds();
|
|
|
|
|
|
|
|
let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$");
|
|
|
|
assert_eq!(glob.count(), 2);
|
|
|
|
for filename in glob.collect() {
|
|
|
|
assert_eq!(glob.directory.metadata(&filename).len(), 8 * 1024);
|
|
|
|
}
|
|
|
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
|
|
|
}
|
2022-09-24 16:49:21 +00:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_numeric_suffix() {
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
ucmd.args(&["-n", "4", "--numeric-suffixes", "9", "threebytes.txt"])
|
|
|
|
.succeeds()
|
|
|
|
.no_stdout()
|
|
|
|
.no_stderr();
|
|
|
|
assert_eq!(at.read("x09"), "a");
|
|
|
|
assert_eq!(at.read("x10"), "b");
|
|
|
|
assert_eq!(at.read("x11"), "c");
|
|
|
|
assert_eq!(at.read("x12"), "");
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_hex_suffix() {
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
ucmd.args(&["-n", "4", "--hex-suffixes", "9", "threebytes.txt"])
|
|
|
|
.succeeds()
|
|
|
|
.no_stdout()
|
|
|
|
.no_stderr();
|
|
|
|
assert_eq!(at.read("x09"), "a");
|
|
|
|
assert_eq!(at.read("x0a"), "b");
|
|
|
|
assert_eq!(at.read("x0b"), "c");
|
|
|
|
assert_eq!(at.read("x0c"), "");
|
|
|
|
}
|
2022-01-03 03:38:27 +00:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_round_robin() {
|
|
|
|
let (at, mut ucmd) = at_and_ucmd!();
|
|
|
|
|
|
|
|
let file_read = |f| {
|
|
|
|
let mut s = String::new();
|
|
|
|
at.open(f).read_to_string(&mut s).unwrap();
|
|
|
|
s
|
|
|
|
};
|
|
|
|
|
|
|
|
ucmd.args(&["-n", "r/2", "fivelines.txt"]).succeeds();
|
|
|
|
|
|
|
|
assert_eq!(file_read("xaa"), "1\n3\n5\n");
|
|
|
|
assert_eq!(file_read("xab"), "2\n4\n");
|
|
|
|
}
|
2023-07-03 20:56:32 +00:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_split_invalid_input() {
|
|
|
|
// Test if stdout/stderr for '--lines' option is correct
|
|
|
|
let scene = TestScenario::new(util_name!());
|
|
|
|
let at = &scene.fixtures;
|
|
|
|
at.touch("file");
|
|
|
|
|
|
|
|
scene
|
|
|
|
.ucmd()
|
|
|
|
.args(&["--lines", "0", "file"])
|
|
|
|
.fails()
|
|
|
|
.no_stdout()
|
|
|
|
.stderr_contains("split: invalid number of lines: 0");
|
|
|
|
scene
|
|
|
|
.ucmd()
|
|
|
|
.args(&["-C", "0", "file"])
|
|
|
|
.fails()
|
|
|
|
.no_stdout()
|
|
|
|
.stderr_contains("split: invalid number of bytes: 0");
|
|
|
|
scene
|
|
|
|
.ucmd()
|
|
|
|
.args(&["-b", "0", "file"])
|
|
|
|
.fails()
|
|
|
|
.no_stdout()
|
|
|
|
.stderr_contains("split: invalid number of bytes: 0");
|
|
|
|
scene
|
|
|
|
.ucmd()
|
|
|
|
.args(&["-n", "0", "file"])
|
|
|
|
.fails()
|
|
|
|
.no_stdout()
|
|
|
|
.stderr_contains("split: invalid number of chunks: 0");
|
|
|
|
}
|