wc: compute number widths using total file sizes

Previously, individual file sizes were used to compute the number width, which
would cause misalignment when the total has a greater number of digits, and is
different from the behavior of GNU wc

```
$ ./target/debug/wc -w -l -m -c -L deny.toml GNUmakefile
  95  422 3110 3110   85 deny.toml
 349  865 6996 6996  196 GNUmakefile
 444 1287 10106 10106  196 total
$ wc -w -l -m -c -L deny.toml GNUmakefile
   95   422  3110  3110    85 deny.toml
  349   865  6996  6996   196 GNUmakefile
  444  1287 10106 10106   196 total
```
This commit is contained in:
Ackerley Tng 2022-03-23 19:51:27 +08:00 committed by Sylvestre Ledru
parent d6fd701511
commit e9131e2b7f
2 changed files with 69 additions and 118 deletions

View file

@ -399,46 +399,6 @@ fn word_count_from_input(input: &Input, settings: &Settings) -> CountResult {
} }
} }
/// Compute the number of digits needed to represent any count for this input.
///
/// If `input` is [`Input::Stdin`], then this function returns
/// [`MINIMUM_WIDTH`]. Otherwise, if metadata could not be read from
/// `input` then this function returns 1.
///
/// # Errors
///
/// This function will return an error if `input` is a [`Input::Path`]
/// and there is a problem accessing the metadata of the given `input`.
///
/// # Examples
///
/// A [`Input::Stdin`] gets a default minimum width:
///
/// ```rust,ignore
/// let input = Input::Stdin(StdinKind::Explicit);
/// assert_eq!(7, digit_width(input));
/// ```
fn digit_width(input: &Input) -> io::Result<usize> {
match input {
Input::Stdin(_) => Ok(MINIMUM_WIDTH),
Input::Path(filename) => {
let path = Path::new(filename);
let metadata = fs::metadata(path)?;
if metadata.is_file() {
// TODO We are now computing the number of bytes in a file
// twice: once here and once in `WordCount::from_line()` (or
// in `count_bytes_fast()` if that function is called
// instead). See GitHub issue #2201.
let num_bytes = metadata.len();
let num_digits = num_bytes.to_string().len();
Ok(num_digits)
} else {
Ok(MINIMUM_WIDTH)
}
}
}
}
/// Compute the number of digits needed to represent all counts in all inputs. /// Compute the number of digits needed to represent all counts in all inputs.
/// ///
/// `inputs` may include zero or more [`Input::Stdin`] entries, each of /// `inputs` may include zero or more [`Input::Stdin`] entries, each of
@ -446,52 +406,45 @@ fn digit_width(input: &Input) -> io::Result<usize> {
/// entry causes this function to return a width that is at least /// entry causes this function to return a width that is at least
/// [`MINIMUM_WIDTH`]. /// [`MINIMUM_WIDTH`].
/// ///
/// If `input` is empty, then this function returns 1. If file metadata /// If `input` is empty, or if only one number needs to be printed (for just
/// could not be read from any of the [`Input::Path`] inputs and there /// one file) then this function is optimized to return 1 without making any
/// are no [`Input::Stdin`] inputs, then this function returns 1. /// calls to get file metadata.
/// ///
/// If there is a problem accessing the metadata, this function will /// If file metadata could not be read from any of the [`Input::Path`] input,
/// silently ignore the error and assume that the number of digits /// that input does not affect number width computation
/// needed to display the counts for that file is 1.
/// ///
/// # Examples /// Otherwise, the file sizes in the file metadata are summed and the number of
/// /// digits in that total size is returned as the number width
/// An empty slice implies a width of 1: fn compute_number_width(inputs: &[Input], settings: &Settings) -> usize {
/// if inputs.is_empty() || (inputs.len() == 1 && settings.number_enabled() == 1) {
/// ```rust,ignore return 1;
/// assert_eq!(1, max_width(&vec![])); }
/// ```
/// let mut minimum_width = 1;
/// The presence of [`Input::Stdin`] implies a minimum width: let mut total = 0;
///
/// ```rust,ignore
/// let inputs = vec![Input::Stdin(StdinKind::Explicit)];
/// assert_eq!(7, max_width(&inputs));
/// ```
fn max_width(inputs: &[Input]) -> usize {
let mut result = 1;
for input in inputs { for input in inputs {
if let Ok(n) = digit_width(input) { match input {
result = result.max(n); Input::Stdin(_) => {
minimum_width = MINIMUM_WIDTH;
}
Input::Path(path) => {
if let Ok(meta) = fs::metadata(path) {
if meta.is_file() {
total += meta.len();
} else {
minimum_width = MINIMUM_WIDTH;
}
}
}
} }
} }
result
max(minimum_width, total.to_string().len())
} }
fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> { fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> {
// Compute the width, in digits, to use when formatting counts. let number_width = compute_number_width(inputs, settings);
//
// The width is the number of digits needed to print the number of
// bytes in the largest file. This is true regardless of whether
// the `settings` indicate that the bytes will be displayed.
//
// If we only need to display a single number, set this to 0 to
// prevent leading spaces.
let max_width = if settings.number_enabled() <= 1 {
0
} else {
max_width(inputs)
};
let mut total_word_count = WordCount::default(); let mut total_word_count = WordCount::default();
@ -517,7 +470,7 @@ fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> {
}; };
total_word_count += word_count; total_word_count += word_count;
let result = word_count.with_title(input.to_title()); let result = word_count.with_title(input.to_title());
if let Err(err) = print_stats(settings, &result, max_width) { if let Err(err) = print_stats(settings, &result, number_width) {
show!(USimpleError::new( show!(USimpleError::new(
1, 1,
format!( format!(
@ -534,7 +487,7 @@ fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> {
if num_inputs > 1 { if num_inputs > 1 {
let total_result = total_word_count.with_title(Some("total".as_ref())); let total_result = total_word_count.with_title(Some("total".as_ref()));
if let Err(err) = print_stats(settings, &total_result, max_width) { if let Err(err) = print_stats(settings, &total_result, number_width) {
show!(USimpleError::new( show!(USimpleError::new(
1, 1,
format!("failed to print total: {}", err) format!("failed to print total: {}", err)
@ -547,56 +500,32 @@ fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> {
Ok(()) Ok(())
} }
fn print_stats(settings: &Settings, result: &TitledWordCount, min_width: usize) -> io::Result<()> { fn print_stats(
let stdout = io::stdout(); settings: &Settings,
let mut stdout_lock = stdout.lock(); result: &TitledWordCount,
number_width: usize,
let mut is_first: bool = true; ) -> io::Result<()> {
let mut columns = Vec::new();
if settings.show_lines { if settings.show_lines {
if !is_first { columns.push(format!("{:1$}", result.count.lines, number_width));
write!(stdout_lock, " ")?;
}
write!(stdout_lock, "{:1$}", result.count.lines, min_width)?;
is_first = false;
} }
if settings.show_words { if settings.show_words {
if !is_first { columns.push(format!("{:1$}", result.count.words, number_width));
write!(stdout_lock, " ")?;
}
write!(stdout_lock, "{:1$}", result.count.words, min_width)?;
is_first = false;
} }
if settings.show_chars { if settings.show_chars {
if !is_first { columns.push(format!("{:1$}", result.count.chars, number_width));
write!(stdout_lock, " ")?;
}
write!(stdout_lock, "{:1$}", result.count.chars, min_width)?;
is_first = false;
} }
if settings.show_bytes { if settings.show_bytes {
if !is_first { columns.push(format!("{:1$}", result.count.bytes, number_width));
write!(stdout_lock, " ")?;
}
write!(stdout_lock, "{:1$}", result.count.bytes, min_width)?;
is_first = false;
} }
if settings.show_max_line_length { if settings.show_max_line_length {
if !is_first { columns.push(format!("{:1$}", result.count.max_line_length, number_width));
write!(stdout_lock, " ")?;
}
write!(
stdout_lock,
"{:1$}",
result.count.max_line_length, min_width
)?;
} }
if let Some(title) = result.title { if let Some(title) = result.title {
writeln!(stdout_lock, " {}", title.maybe_quote())?; columns.push(title.maybe_quote().to_string());
} else {
writeln!(stdout_lock)?;
} }
Ok(()) writeln!(io::stdout().lock(), "{}", columns.join(" "))
} }

View file

@ -181,7 +181,8 @@ fn test_file_one_long_word() {
.stdout_is(" 1 1 10001 10001 10000 onelongword.txt\n"); .stdout_is(" 1 1 10001 10001 10000 onelongword.txt\n");
} }
/// Test that the number of bytes in the file dictate the display width. /// Test that the total size of all the files in the input dictates
/// the display width.
/// ///
/// The width in digits of any count is the width in digits of the /// The width in digits of any count is the width in digits of the
/// number of bytes in the file, regardless of whether the number of /// number of bytes in the file, regardless of whether the number of
@ -203,6 +204,27 @@ fn test_file_bytes_dictate_width() {
.args(&["-lw", "emptyfile.txt"]) .args(&["-lw", "emptyfile.txt"])
.run() .run()
.stdout_is("0 0 emptyfile.txt\n"); .stdout_is("0 0 emptyfile.txt\n");
// lorem_ipsum.txt contains 772 bytes, and alice_in_wonderland.txt contains
// 302 bytes. The total is 1074 bytes, which has a width of 4
new_ucmd!()
.args(&["-lwc", "alice_in_wonderland.txt", "lorem_ipsum.txt"])
.run()
.stdout_is(
" 5 57 302 alice_in_wonderland.txt\n 13 109 772 \
lorem_ipsum.txt\n 18 166 1074 total\n",
);
// . is a directory, so minimum_width should get set to 7
#[cfg(not(windows))]
const STDOUT: &str = " 0 0 0 emptyfile.txt\n 0 0 0 \
.\n 0 0 0 total\n";
#[cfg(windows)]
const STDOUT: &str = " 0 0 0 emptyfile.txt\n 0 0 0 total\n";
new_ucmd!()
.args(&["-lwc", "emptyfile.txt", "."])
.run()
.stdout_is(STDOUT);
} }
/// Test that getting counts from a directory is an error. /// Test that getting counts from a directory is an error.