mirror of
https://github.com/uutils/coreutils
synced 2024-11-16 09:48:03 +00:00
wc: compute number widths using total file sizes
Previously, individual file sizes were used to compute the number width, which would cause misalignment when the total has a greater number of digits, and is different from the behavior of GNU wc ``` $ ./target/debug/wc -w -l -m -c -L deny.toml GNUmakefile 95 422 3110 3110 85 deny.toml 349 865 6996 6996 196 GNUmakefile 444 1287 10106 10106 196 total $ wc -w -l -m -c -L deny.toml GNUmakefile 95 422 3110 3110 85 deny.toml 349 865 6996 6996 196 GNUmakefile 444 1287 10106 10106 196 total ```
This commit is contained in:
parent
d6fd701511
commit
e9131e2b7f
2 changed files with 69 additions and 118 deletions
|
@ -399,46 +399,6 @@ fn word_count_from_input(input: &Input, settings: &Settings) -> CountResult {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Compute the number of digits needed to represent any count for this input.
|
|
||||||
///
|
|
||||||
/// If `input` is [`Input::Stdin`], then this function returns
|
|
||||||
/// [`MINIMUM_WIDTH`]. Otherwise, if metadata could not be read from
|
|
||||||
/// `input` then this function returns 1.
|
|
||||||
///
|
|
||||||
/// # Errors
|
|
||||||
///
|
|
||||||
/// This function will return an error if `input` is a [`Input::Path`]
|
|
||||||
/// and there is a problem accessing the metadata of the given `input`.
|
|
||||||
///
|
|
||||||
/// # Examples
|
|
||||||
///
|
|
||||||
/// A [`Input::Stdin`] gets a default minimum width:
|
|
||||||
///
|
|
||||||
/// ```rust,ignore
|
|
||||||
/// let input = Input::Stdin(StdinKind::Explicit);
|
|
||||||
/// assert_eq!(7, digit_width(input));
|
|
||||||
/// ```
|
|
||||||
fn digit_width(input: &Input) -> io::Result<usize> {
|
|
||||||
match input {
|
|
||||||
Input::Stdin(_) => Ok(MINIMUM_WIDTH),
|
|
||||||
Input::Path(filename) => {
|
|
||||||
let path = Path::new(filename);
|
|
||||||
let metadata = fs::metadata(path)?;
|
|
||||||
if metadata.is_file() {
|
|
||||||
// TODO We are now computing the number of bytes in a file
|
|
||||||
// twice: once here and once in `WordCount::from_line()` (or
|
|
||||||
// in `count_bytes_fast()` if that function is called
|
|
||||||
// instead). See GitHub issue #2201.
|
|
||||||
let num_bytes = metadata.len();
|
|
||||||
let num_digits = num_bytes.to_string().len();
|
|
||||||
Ok(num_digits)
|
|
||||||
} else {
|
|
||||||
Ok(MINIMUM_WIDTH)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Compute the number of digits needed to represent all counts in all inputs.
|
/// Compute the number of digits needed to represent all counts in all inputs.
|
||||||
///
|
///
|
||||||
/// `inputs` may include zero or more [`Input::Stdin`] entries, each of
|
/// `inputs` may include zero or more [`Input::Stdin`] entries, each of
|
||||||
|
@ -446,52 +406,45 @@ fn digit_width(input: &Input) -> io::Result<usize> {
|
||||||
/// entry causes this function to return a width that is at least
|
/// entry causes this function to return a width that is at least
|
||||||
/// [`MINIMUM_WIDTH`].
|
/// [`MINIMUM_WIDTH`].
|
||||||
///
|
///
|
||||||
/// If `input` is empty, then this function returns 1. If file metadata
|
/// If `input` is empty, or if only one number needs to be printed (for just
|
||||||
/// could not be read from any of the [`Input::Path`] inputs and there
|
/// one file) then this function is optimized to return 1 without making any
|
||||||
/// are no [`Input::Stdin`] inputs, then this function returns 1.
|
/// calls to get file metadata.
|
||||||
///
|
///
|
||||||
/// If there is a problem accessing the metadata, this function will
|
/// If file metadata could not be read from any of the [`Input::Path`] input,
|
||||||
/// silently ignore the error and assume that the number of digits
|
/// that input does not affect number width computation
|
||||||
/// needed to display the counts for that file is 1.
|
|
||||||
///
|
///
|
||||||
/// # Examples
|
/// Otherwise, the file sizes in the file metadata are summed and the number of
|
||||||
///
|
/// digits in that total size is returned as the number width
|
||||||
/// An empty slice implies a width of 1:
|
fn compute_number_width(inputs: &[Input], settings: &Settings) -> usize {
|
||||||
///
|
if inputs.is_empty() || (inputs.len() == 1 && settings.number_enabled() == 1) {
|
||||||
/// ```rust,ignore
|
return 1;
|
||||||
/// assert_eq!(1, max_width(&vec![]));
|
}
|
||||||
/// ```
|
|
||||||
///
|
let mut minimum_width = 1;
|
||||||
/// The presence of [`Input::Stdin`] implies a minimum width:
|
let mut total = 0;
|
||||||
///
|
|
||||||
/// ```rust,ignore
|
|
||||||
/// let inputs = vec![Input::Stdin(StdinKind::Explicit)];
|
|
||||||
/// assert_eq!(7, max_width(&inputs));
|
|
||||||
/// ```
|
|
||||||
fn max_width(inputs: &[Input]) -> usize {
|
|
||||||
let mut result = 1;
|
|
||||||
for input in inputs {
|
for input in inputs {
|
||||||
if let Ok(n) = digit_width(input) {
|
match input {
|
||||||
result = result.max(n);
|
Input::Stdin(_) => {
|
||||||
|
minimum_width = MINIMUM_WIDTH;
|
||||||
|
}
|
||||||
|
Input::Path(path) => {
|
||||||
|
if let Ok(meta) = fs::metadata(path) {
|
||||||
|
if meta.is_file() {
|
||||||
|
total += meta.len();
|
||||||
|
} else {
|
||||||
|
minimum_width = MINIMUM_WIDTH;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
result
|
|
||||||
|
max(minimum_width, total.to_string().len())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> {
|
fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> {
|
||||||
// Compute the width, in digits, to use when formatting counts.
|
let number_width = compute_number_width(inputs, settings);
|
||||||
//
|
|
||||||
// The width is the number of digits needed to print the number of
|
|
||||||
// bytes in the largest file. This is true regardless of whether
|
|
||||||
// the `settings` indicate that the bytes will be displayed.
|
|
||||||
//
|
|
||||||
// If we only need to display a single number, set this to 0 to
|
|
||||||
// prevent leading spaces.
|
|
||||||
let max_width = if settings.number_enabled() <= 1 {
|
|
||||||
0
|
|
||||||
} else {
|
|
||||||
max_width(inputs)
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut total_word_count = WordCount::default();
|
let mut total_word_count = WordCount::default();
|
||||||
|
|
||||||
|
@ -517,7 +470,7 @@ fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> {
|
||||||
};
|
};
|
||||||
total_word_count += word_count;
|
total_word_count += word_count;
|
||||||
let result = word_count.with_title(input.to_title());
|
let result = word_count.with_title(input.to_title());
|
||||||
if let Err(err) = print_stats(settings, &result, max_width) {
|
if let Err(err) = print_stats(settings, &result, number_width) {
|
||||||
show!(USimpleError::new(
|
show!(USimpleError::new(
|
||||||
1,
|
1,
|
||||||
format!(
|
format!(
|
||||||
|
@ -534,7 +487,7 @@ fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> {
|
||||||
|
|
||||||
if num_inputs > 1 {
|
if num_inputs > 1 {
|
||||||
let total_result = total_word_count.with_title(Some("total".as_ref()));
|
let total_result = total_word_count.with_title(Some("total".as_ref()));
|
||||||
if let Err(err) = print_stats(settings, &total_result, max_width) {
|
if let Err(err) = print_stats(settings, &total_result, number_width) {
|
||||||
show!(USimpleError::new(
|
show!(USimpleError::new(
|
||||||
1,
|
1,
|
||||||
format!("failed to print total: {}", err)
|
format!("failed to print total: {}", err)
|
||||||
|
@ -547,56 +500,32 @@ fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn print_stats(settings: &Settings, result: &TitledWordCount, min_width: usize) -> io::Result<()> {
|
fn print_stats(
|
||||||
let stdout = io::stdout();
|
settings: &Settings,
|
||||||
let mut stdout_lock = stdout.lock();
|
result: &TitledWordCount,
|
||||||
|
number_width: usize,
|
||||||
let mut is_first: bool = true;
|
) -> io::Result<()> {
|
||||||
|
let mut columns = Vec::new();
|
||||||
|
|
||||||
if settings.show_lines {
|
if settings.show_lines {
|
||||||
if !is_first {
|
columns.push(format!("{:1$}", result.count.lines, number_width));
|
||||||
write!(stdout_lock, " ")?;
|
|
||||||
}
|
|
||||||
write!(stdout_lock, "{:1$}", result.count.lines, min_width)?;
|
|
||||||
is_first = false;
|
|
||||||
}
|
}
|
||||||
if settings.show_words {
|
if settings.show_words {
|
||||||
if !is_first {
|
columns.push(format!("{:1$}", result.count.words, number_width));
|
||||||
write!(stdout_lock, " ")?;
|
|
||||||
}
|
|
||||||
write!(stdout_lock, "{:1$}", result.count.words, min_width)?;
|
|
||||||
is_first = false;
|
|
||||||
}
|
}
|
||||||
if settings.show_chars {
|
if settings.show_chars {
|
||||||
if !is_first {
|
columns.push(format!("{:1$}", result.count.chars, number_width));
|
||||||
write!(stdout_lock, " ")?;
|
|
||||||
}
|
|
||||||
write!(stdout_lock, "{:1$}", result.count.chars, min_width)?;
|
|
||||||
is_first = false;
|
|
||||||
}
|
}
|
||||||
if settings.show_bytes {
|
if settings.show_bytes {
|
||||||
if !is_first {
|
columns.push(format!("{:1$}", result.count.bytes, number_width));
|
||||||
write!(stdout_lock, " ")?;
|
|
||||||
}
|
|
||||||
write!(stdout_lock, "{:1$}", result.count.bytes, min_width)?;
|
|
||||||
is_first = false;
|
|
||||||
}
|
}
|
||||||
if settings.show_max_line_length {
|
if settings.show_max_line_length {
|
||||||
if !is_first {
|
columns.push(format!("{:1$}", result.count.max_line_length, number_width));
|
||||||
write!(stdout_lock, " ")?;
|
|
||||||
}
|
|
||||||
write!(
|
|
||||||
stdout_lock,
|
|
||||||
"{:1$}",
|
|
||||||
result.count.max_line_length, min_width
|
|
||||||
)?;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(title) = result.title {
|
if let Some(title) = result.title {
|
||||||
writeln!(stdout_lock, " {}", title.maybe_quote())?;
|
columns.push(title.maybe_quote().to_string());
|
||||||
} else {
|
|
||||||
writeln!(stdout_lock)?;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
writeln!(io::stdout().lock(), "{}", columns.join(" "))
|
||||||
}
|
}
|
||||||
|
|
|
@ -181,7 +181,8 @@ fn test_file_one_long_word() {
|
||||||
.stdout_is(" 1 1 10001 10001 10000 onelongword.txt\n");
|
.stdout_is(" 1 1 10001 10001 10000 onelongword.txt\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Test that the number of bytes in the file dictate the display width.
|
/// Test that the total size of all the files in the input dictates
|
||||||
|
/// the display width.
|
||||||
///
|
///
|
||||||
/// The width in digits of any count is the width in digits of the
|
/// The width in digits of any count is the width in digits of the
|
||||||
/// number of bytes in the file, regardless of whether the number of
|
/// number of bytes in the file, regardless of whether the number of
|
||||||
|
@ -203,6 +204,27 @@ fn test_file_bytes_dictate_width() {
|
||||||
.args(&["-lw", "emptyfile.txt"])
|
.args(&["-lw", "emptyfile.txt"])
|
||||||
.run()
|
.run()
|
||||||
.stdout_is("0 0 emptyfile.txt\n");
|
.stdout_is("0 0 emptyfile.txt\n");
|
||||||
|
|
||||||
|
// lorem_ipsum.txt contains 772 bytes, and alice_in_wonderland.txt contains
|
||||||
|
// 302 bytes. The total is 1074 bytes, which has a width of 4
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-lwc", "alice_in_wonderland.txt", "lorem_ipsum.txt"])
|
||||||
|
.run()
|
||||||
|
.stdout_is(
|
||||||
|
" 5 57 302 alice_in_wonderland.txt\n 13 109 772 \
|
||||||
|
lorem_ipsum.txt\n 18 166 1074 total\n",
|
||||||
|
);
|
||||||
|
|
||||||
|
// . is a directory, so minimum_width should get set to 7
|
||||||
|
#[cfg(not(windows))]
|
||||||
|
const STDOUT: &str = " 0 0 0 emptyfile.txt\n 0 0 0 \
|
||||||
|
.\n 0 0 0 total\n";
|
||||||
|
#[cfg(windows)]
|
||||||
|
const STDOUT: &str = " 0 0 0 emptyfile.txt\n 0 0 0 total\n";
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-lwc", "emptyfile.txt", "."])
|
||||||
|
.run()
|
||||||
|
.stdout_is(STDOUT);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Test that getting counts from a directory is an error.
|
/// Test that getting counts from a directory is an error.
|
||||||
|
|
Loading…
Reference in a new issue