mirror of
https://github.com/uutils/coreutils
synced 2024-11-16 01:38:04 +00:00
split: use ByteChunkWriter and LineChunkWriter
Replace `ByteSplitter` and `LineSplitter` with `ByteChunkWriter` and `LineChunkWriter` respectively. This results in a more maintainable design and an increase in the speed of splitting by lines.
This commit is contained in:
parent
ca7af808d5
commit
1d7e1b8732
3 changed files with 65 additions and 59 deletions
|
@ -16,13 +16,14 @@ use clap::{crate_version, App, AppSettings, Arg, ArgMatches};
|
|||
use std::convert::TryFrom;
|
||||
use std::env;
|
||||
use std::fmt;
|
||||
use std::fs::{metadata, remove_file, File};
|
||||
use std::fs::{metadata, File};
|
||||
use std::io::{stdin, BufRead, BufReader, BufWriter, ErrorKind, Read, Write};
|
||||
use std::num::ParseIntError;
|
||||
use std::path::Path;
|
||||
use uucore::display::Quotable;
|
||||
use uucore::error::{FromIo, UResult, USimpleError, UUsageError};
|
||||
use uucore::error::{FromIo, UIoError, UResult, USimpleError, UUsageError};
|
||||
use uucore::parse_size::{parse_size, ParseSizeError};
|
||||
use uucore::uio_error;
|
||||
|
||||
static OPT_BYTES: &str = "bytes";
|
||||
static OPT_LINE_BYTES: &str = "line-bytes";
|
||||
|
@ -739,65 +740,47 @@ fn split(settings: &Settings) -> UResult<()> {
|
|||
Box::new(r) as Box<dyn Read>
|
||||
});
|
||||
|
||||
if let Strategy::Number(num_chunks) = settings.strategy {
|
||||
return split_into_n_chunks_by_byte(settings, &mut reader, num_chunks);
|
||||
}
|
||||
|
||||
let mut splitter: Box<dyn Splitter> = match settings.strategy {
|
||||
Strategy::Lines(chunk_size) => Box::new(LineSplitter::new(chunk_size)),
|
||||
Strategy::Bytes(chunk_size) | Strategy::LineBytes(chunk_size) => {
|
||||
Box::new(ByteSplitter::new(chunk_size))
|
||||
match settings.strategy {
|
||||
Strategy::Number(num_chunks) => {
|
||||
split_into_n_chunks_by_byte(settings, &mut reader, num_chunks)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
// This object is responsible for creating the filename for each chunk.
|
||||
let mut filename_iterator = FilenameIterator::new(
|
||||
&settings.prefix,
|
||||
&settings.additional_suffix,
|
||||
settings.suffix_length,
|
||||
settings.numeric_suffix,
|
||||
);
|
||||
loop {
|
||||
// Get a new part file set up, and construct `writer` for it.
|
||||
let filename = filename_iterator
|
||||
.next()
|
||||
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
|
||||
let mut writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());
|
||||
|
||||
let bytes_consumed = splitter
|
||||
.consume(&mut reader, &mut writer)
|
||||
.map_err_context(|| "input/output error".to_string())?;
|
||||
writer
|
||||
.flush()
|
||||
.map_err_context(|| "error flushing to output file".to_string())?;
|
||||
|
||||
// If we didn't write anything we should clean up the empty file, and
|
||||
// break from the loop.
|
||||
if bytes_consumed == 0 {
|
||||
// The output file is only ever created if --filter isn't used.
|
||||
// Complicated, I know...
|
||||
if settings.filter.is_none() {
|
||||
remove_file(filename)
|
||||
.map_err_context(|| "error removing empty file".to_string())?;
|
||||
Strategy::Lines(chunk_size) => {
|
||||
let mut writer = LineChunkWriter::new(chunk_size, settings)
|
||||
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
|
||||
match std::io::copy(&mut reader, &mut writer) {
|
||||
Ok(_) => Ok(()),
|
||||
Err(e) => match e.kind() {
|
||||
// TODO Since the writer object controls the creation of
|
||||
// new files, we need to rely on the `std::io::Result`
|
||||
// returned by its `write()` method to communicate any
|
||||
// errors to this calling scope. If a new file cannot be
|
||||
// created because we have exceeded the number of
|
||||
// allowable filenames, we use `ErrorKind::Other` to
|
||||
// indicate that. A special error message needs to be
|
||||
// printed in that case.
|
||||
ErrorKind::Other => Err(USimpleError::new(1, "output file suffixes exhausted")),
|
||||
_ => Err(uio_error!(e, "input/output error")),
|
||||
},
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// TODO It is silly to have the "creating file" message here
|
||||
// after the file has been already created. However, because
|
||||
// of the way the main loop has been written, an extra file
|
||||
// gets created and then deleted in the last iteration of the
|
||||
// loop. So we need to make sure we are not in that case when
|
||||
// printing this message.
|
||||
//
|
||||
// This is only here temporarily while we make some
|
||||
// improvements to the architecture of the main loop in this
|
||||
// function. In the future, it will move to a more appropriate
|
||||
// place---at the point where the file is actually created.
|
||||
if settings.verbose {
|
||||
println!("creating file {}", filename.quote());
|
||||
Strategy::Bytes(chunk_size) | Strategy::LineBytes(chunk_size) => {
|
||||
let mut writer = ByteChunkWriter::new(chunk_size, settings)
|
||||
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
|
||||
match std::io::copy(&mut reader, &mut writer) {
|
||||
Ok(_) => Ok(()),
|
||||
Err(e) => match e.kind() {
|
||||
// TODO Since the writer object controls the creation of
|
||||
// new files, we need to rely on the `std::io::Result`
|
||||
// returned by its `write()` method to communicate any
|
||||
// errors to this calling scope. If a new file cannot be
|
||||
// created because we have exceeded the number of
|
||||
// allowable filenames, we use `ErrorKind::Other` to
|
||||
// indicate that. A special error message needs to be
|
||||
// printed in that case.
|
||||
ErrorKind::Other => Err(USimpleError::new(1, "output file suffixes exhausted")),
|
||||
_ => Err(uio_error!(e, "input/output error")),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
// *
|
||||
// * For the full copyright and license information, please view the LICENSE
|
||||
// * file that was distributed with this source code.
|
||||
// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes asciilowercase fghij klmno pqrst uvwxyz
|
||||
// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes asciilowercase fghij klmno pqrst uvwxyz fivelines
|
||||
extern crate rand;
|
||||
extern crate regex;
|
||||
|
||||
|
@ -449,3 +449,21 @@ fn test_invalid_suffix_length() {
|
|||
.no_stdout()
|
||||
.stderr_contains("invalid suffix length: 'xyz'");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_include_newlines() {
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
ucmd.args(&["-l", "2", "fivelines.txt"]).succeeds();
|
||||
|
||||
let mut s = String::new();
|
||||
at.open("xaa").read_to_string(&mut s).unwrap();
|
||||
assert_eq!(s, "1\n2\n");
|
||||
|
||||
let mut s = String::new();
|
||||
at.open("xab").read_to_string(&mut s).unwrap();
|
||||
assert_eq!(s, "3\n4\n");
|
||||
|
||||
let mut s = String::new();
|
||||
at.open("xac").read_to_string(&mut s).unwrap();
|
||||
assert_eq!(s, "5\n");
|
||||
}
|
||||
|
|
5
tests/fixtures/split/fivelines.txt
vendored
Normal file
5
tests/fixtures/split/fivelines.txt
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
Loading…
Reference in a new issue