split: add support for -e argument

Add the `-e` flag, which indicates whether to elide (that is, remove)
empty files that would have been created by the `-n` option.

The `-n` command-line argument gives a specific number of chunks into
which the input files will be split. If the number of chunks is
greater than the number of bytes, then empty files will be created for
the excess chunks. But if `-e` is given, then empty files will not be
created.

For example, contrast

    $ printf 'a\n' > f && split -e -n 3 f && cat xaa xab xac
    a
    cat: xac: No such file or directory

with

    $ printf 'a\n' > f && split -n 3 f && cat xaa xab xac
    a
This commit is contained in:
Jeffrey Finkelstein 2022-02-09 21:41:33 -05:00
parent e1a611374a
commit 6718d97f97
3 changed files with 63 additions and 2 deletions

View file

@ -39,6 +39,7 @@ static OPT_VERBOSE: &str = "verbose";
//The ---io-blksize parameter is consumed and ignored.
//The parameter is included to make GNU coreutils tests pass.
static OPT_IO_BLKSIZE: &str = "-io-blksize";
static OPT_ELIDE_EMPTY_FILES: &str = "elide-empty-files";
static ARG_INPUT: &str = "input";
static ARG_PREFIX: &str = "prefix";
@ -128,6 +129,13 @@ pub fn uu_app<'a>() -> App<'a> {
"write to shell COMMAND file name is $FILE (Currently not implemented for Windows)",
),
)
.arg(
Arg::new(OPT_ELIDE_EMPTY_FILES)
.long(OPT_ELIDE_EMPTY_FILES)
.short('e')
.takes_value(false)
.help("do not generate empty output files with '-n'"),
)
.arg(
Arg::new(OPT_NUMERIC_SUFFIXES)
.short('d')
@ -285,6 +293,16 @@ struct Settings {
filter: Option<String>,
strategy: Strategy,
verbose: bool,
/// Whether to *not* produce empty files when using `-n`.
///
/// The `-n` command-line argument gives a specific number of
/// chunks into which the input files will be split. If the number
/// of chunks is greater than the number of bytes, and this is
/// `false`, then empty files will be created for the excess
/// chunks. If this is `false`, then empty files will not be
/// created.
elide_empty_files: bool,
}
/// An error when parsing settings from command-line arguments.
@ -352,6 +370,7 @@ impl Settings {
input: matches.value_of(ARG_INPUT).unwrap().to_owned(),
prefix: matches.value_of(ARG_PREFIX).unwrap().to_owned(),
filter: matches.value_of(OPT_FILTER).map(|s| s.to_owned()),
elide_empty_files: matches.is_present(OPT_ELIDE_EMPTY_FILES),
};
#[cfg(windows)]
if result.filter.is_some() {
@ -616,9 +635,24 @@ where
{
// Get the size of the input file in bytes and compute the number
// of bytes per chunk.
//
// If the requested number of chunks exceeds the number of bytes
// in the file *and* the `elide_empty_files` parameter is enabled,
// then behave as if the number of chunks was set to the number of
// bytes in the file. This ensures that we don't write empty
// files. Otherwise, just write the `num_chunks - num_bytes` empty
// files.
let metadata = metadata(&settings.input).unwrap();
let num_bytes = metadata.len();
let chunk_size = (num_bytes / (num_chunks as u64)) as usize;
let will_have_empty_files = settings.elide_empty_files && num_chunks as u64 > num_bytes;
let (num_chunks, chunk_size) = if will_have_empty_files {
let num_chunks = num_bytes as usize;
let chunk_size = 1;
(num_chunks, chunk_size)
} else {
let chunk_size = ((num_bytes / (num_chunks as u64)) as usize).max(1);
(num_chunks, chunk_size)
};
// This object is responsible for creating the filename for each chunk.
let mut filename_iterator = FilenameIterator::new(

View file

@ -2,7 +2,7 @@
// *
// * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code.
// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes
// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes
extern crate rand;
extern crate regex;
@ -526,3 +526,29 @@ fn test_include_newlines() {
at.open("xac").read_to_string(&mut s).unwrap();
assert_eq!(s, "5\n");
}
#[test]
fn test_allow_empty_files() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-n", "4", "threebytes.txt"])
.succeeds()
.no_stdout()
.no_stderr();
assert_eq!(at.read("xaa"), "a");
assert_eq!(at.read("xab"), "b");
assert_eq!(at.read("xac"), "c");
assert_eq!(at.read("xad"), "");
}
#[test]
fn test_elide_empty_files() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-e", "-n", "4", "threebytes.txt"])
.succeeds()
.no_stdout()
.no_stderr();
assert_eq!(at.read("xaa"), "a");
assert_eq!(at.read("xab"), "b");
assert_eq!(at.read("xac"), "c");
assert!(!at.plus("xad").exists());
}

1
tests/fixtures/split/threebytes.txt vendored Normal file
View file

@ -0,0 +1 @@
abc