coreutils/tests/by-util/test_wc.rs

use crate::common::util::*;

// spell-checker:ignore (flags) lwmcL clmwL ; (path) bogusfile emptyfile manyemptylines moby notrailingnewline onelongemptyline onelongword

#[test]
fn test_count_bytes_large_stdin() {
    for &n in &[
        0,
        1,
        42,
        16 * 1024 - 7,
        16 * 1024 - 1,
        16 * 1024,
        16 * 1024 + 1,
        16 * 1024 + 3,
        32 * 1024,
        64 * 1024,
        80 * 1024,
        96 * 1024,
        112 * 1024,
        128 * 1024,
    ] {
        let data = vec_of_size(n);
        let expected = format!("{}\n", n);
        new_ucmd!()
            .args(&["-c"])
            .pipe_in(data)
            .succeeds()
            .stdout_is_bytes(&expected.as_bytes());
    }
}

#[test]
fn test_stdin_default() {
    new_ucmd!()
        .pipe_in_fixture("lorem_ipsum.txt")
        .run()
        .stdout_is("     13     109     772\n");
}

#[test]
fn test_stdin_explicit() {
    new_ucmd!()
        .pipe_in_fixture("lorem_ipsum.txt")
        .arg("-")
        .run()
        .stdout_is("     13     109     772 -\n");
}

#[test]
fn test_utf8() {
    new_ucmd!()
        .args(&["-lwmcL"])
        .pipe_in_fixture("UTF_8_test.txt")
        .run()
        .stdout_is("    300    4969   22781   22213      79\n");
    // GNU returns "    300    2086   22219   22781      79"
    //
    // TODO: we should fix the word, character, and byte count to
    // match the behavior of GNU wc
}

#[test]
fn test_stdin_line_len_regression() {
    new_ucmd!()
        .args(&["-L"])
        .pipe_in("\n123456")
        .run()
        .stdout_is("6\n");
}

#[test]
fn test_stdin_only_bytes() {
    new_ucmd!()
        .args(&["-c"])
        .pipe_in_fixture("lorem_ipsum.txt")
        .run()
        .stdout_is("772\n");
}

#[test]
fn test_stdin_all_counts() {
    new_ucmd!()
        .args(&["-c", "-m", "-l", "-L", "-w"])
        .pipe_in_fixture("alice_in_wonderland.txt")
        .run()
        .stdout_is("      5      57     302     302      66\n");
}

#[test]
fn test_single_default() {
    new_ucmd!()
        .arg("moby_dick.txt")
        .run()
        .stdout_is("  18  204 1115 moby_dick.txt\n");
}

#[test]
fn test_single_only_lines() {
    new_ucmd!()
        .args(&["-l", "moby_dick.txt"])
        .run()
        .stdout_is("18 moby_dick.txt\n");
}

#[test]
fn test_single_all_counts() {
    new_ucmd!()
        .args(&["-c", "-l", "-L", "-m", "-w", "alice_in_wonderland.txt"])
        .run()
        .stdout_is("  5  57 302 302  66 alice_in_wonderland.txt\n");
}

#[test]
fn test_multiple_default() {
    new_ucmd!()
        .args(&[
            "lorem_ipsum.txt",
            "moby_dick.txt",
            "alice_in_wonderland.txt",
        ])
        .run()
        .stdout_is(
            "  13  109  772 lorem_ipsum.txt\n  18  204 1115 moby_dick.txt\n   5   57  302 \
             alice_in_wonderland.txt\n  36  370 2189 total\n",
        );
}

/// Test for an empty file.
#[test]
fn test_file_empty() {
    new_ucmd!()
        .args(&["-clmwL", "emptyfile.txt"])
        .run()
        .stdout_is("0 0 0 0 0 emptyfile.txt\n");
}

/// Test for an file containing a single non-whitespace character
/// *without* a trailing newline.
#[test]
fn test_file_single_line_no_trailing_newline() {
    new_ucmd!()
        .args(&["-clmwL", "notrailingnewline.txt"])
        .run()
        .stdout_is("1 1 2 2 1 notrailingnewline.txt\n");
}

/// Test for a file that has 100 empty lines (that is, the contents of
/// the file are the newline character repeated one hundred times).
#[test]
fn test_file_many_empty_lines() {
    new_ucmd!()
        .args(&["-clmwL", "manyemptylines.txt"])
        .run()
        .stdout_is("100   0 100 100   0 manyemptylines.txt\n");
}

/// Test for a file that has one long line comprising only spaces.
#[test]
fn test_file_one_long_line_only_spaces() {
    new_ucmd!()
        .args(&["-clmwL", "onelongemptyline.txt"])
        .run()
        .stdout_is("    1     0 10001 10001 10000 onelongemptyline.txt\n");
}

/// Test for a file that has one long line comprising a single "word".
#[test]
fn test_file_one_long_word() {
    new_ucmd!()
        .args(&["-clmwL", "onelongword.txt"])
        .run()
        .stdout_is("    1     1 10001 10001 10000 onelongword.txt\n");
}

/// Test that the number of bytes in the file dictate the display width.
///
/// The width in digits of any count is the width in digits of the
/// number of bytes in the file, regardless of whether the number of
/// bytes are displayed.
#[test]
fn test_file_bytes_dictate_width() {
    // This file has 10,001 bytes. Five digits are required to
    // represent that. Even though the number of lines is 1 and the
    // number of words is 0, each of those counts is formatted with
    // five characters, filled with whitespace.
    new_ucmd!()
        .args(&["-lw", "onelongemptyline.txt"])
        .run()
        .stdout_is("    1     0 onelongemptyline.txt\n");

    // This file has zero bytes. Only one digit is required to
    // represent that.
    new_ucmd!()
        .args(&["-lw", "emptyfile.txt"])
        .run()
        .stdout_is("0 0 emptyfile.txt\n");
}

/// Test that getting counts from a directory is an error.
#[test]
fn test_read_from_directory_error() {
    // TODO To match GNU `wc`, the `stdout` should be:
    //
    //     "      0       0       0 .\n"
    //
    new_ucmd!()
        .args(&["."])
        .fails()
        .stderr_contains(".: Is a directory\n")
        .stdout_is("0 0 0 .\n");
}

/// Test that getting counts from nonexistent file is an error.
#[test]
fn test_read_from_nonexistent_file() {
    new_ucmd!()
        .args(&["bogusfile"])
        .fails()
        .stderr_contains("bogusfile: No such file or directory\n");
}
tests ~ fix 'edition=2018' module import errors - ref: <https://users.rust-lang.org/t/imports-can-only-refer-to-extern-crate-names/24388> @@ <https://archive.is/iCaXp> 2020-05-25 17:05:26 +00:00			`use crate::common::util::*;`
rewrite tests for cargo compat, decoupled directory, output handling 2015-11-16 05:25:01 +00:00
tests ~ refactor/polish spelling (comments, names, and exceptions) 2021-05-30 05:10:54 +00:00			`// spell-checker:ignore (flags) lwmcL clmwL ; (path) bogusfile emptyfile manyemptylines moby notrailingnewline onelongemptyline onelongword`

cat: Unrevert splice patch (#2020) * cat: Unrevert splice patch * cat: Add fifo test * cat: Add tests for error cases * cat: Add tests for character devices * wc: Make sure we handle short splice writes * cat: Fix tests for 1.40.0 compiler * cat: Run rustfmt on test_cat.rs * Run 'cargo +1.40.0 update' 2021-04-10 20:19:53 +00:00			`#[test]`
			`fn test_count_bytes_large_stdin() {`
			`for &n in &[`
			`0,`
			`1,`
			`42,`
			`16 * 1024 - 7,`
			`16 * 1024 - 1,`
			`16 * 1024,`
			`16 * 1024 + 1,`
			`16 * 1024 + 3,`
			`32 * 1024,`
			`64 * 1024,`
			`80 * 1024,`
			`96 * 1024,`
			`112 * 1024,`
			`128 * 1024,`
			`] {`
			`let data = vec_of_size(n);`
			`let expected = format!("{}\n", n);`
			`new_ucmd!()`
			`.args(&["-c"])`
			`.pipe_in(data)`
			`.succeeds()`
			`.stdout_is_bytes(&expected.as_bytes());`
			`}`
			`}`

rewrite tests for cargo compat, decoupled directory, output handling 2015-11-16 05:25:01 +00:00			`#[test]`
			`fn test_stdin_default() {`
rustfmt the tests 2020-04-13 18:36:03 +00:00			`new_ucmd!()`
			`.pipe_in_fixture("lorem_ipsum.txt")`
			`.run()`
wc: compute min width to format counts up front Fix two issues with the string formatting width for counts displayed by `wc`. First, the output was previously not using the default minimum width (seven characters) when reading from `stdin`. This commit corrects this behavior to match GNU `wc`. For example, $ cat alice_in_wonderland.txt \| wc 5 57 302 Second, if at least 10^7 bytes were read from `stdin` after reading from a smaller regular file, then every output row would have width 8. This disagrees with GNU `wc`, in which only the `stdin` row and the total row would have width 8. This commit corrects this behavior to match GNU `wc`. For example, $ printf "%.0s0" {1..10000000} \| wc emptyfile.txt - 0 0 0 emptyfile.txt 0 1 10000000 0 1 10000000 total Fixes #2186. 2021-05-07 19:07:17 +00:00			`.stdout_is(" 13 109 772\n");`
rewrite tests for cargo compat, decoupled directory, output handling 2015-11-16 05:25:01 +00:00			`}`

wc: emit '-' in ouput when set on command-line When stdin is explicitly specified on the command-line with '-', emit it in the output stats to match GNU wc output. Fixes #2188. 2021-05-09 13:42:55 +00:00			`#[test]`
			`fn test_stdin_explicit() {`
			`new_ucmd!()`
			`.pipe_in_fixture("lorem_ipsum.txt")`
			`.arg("-")`
			`.run()`
wc: compute min width to format counts up front Fix two issues with the string formatting width for counts displayed by `wc`. First, the output was previously not using the default minimum width (seven characters) when reading from `stdin`. This commit corrects this behavior to match GNU `wc`. For example, $ cat alice_in_wonderland.txt \| wc 5 57 302 Second, if at least 10^7 bytes were read from `stdin` after reading from a smaller regular file, then every output row would have width 8. This disagrees with GNU `wc`, in which only the `stdin` row and the total row would have width 8. This commit corrects this behavior to match GNU `wc`. For example, $ printf "%.0s0" {1..10000000} \| wc emptyfile.txt - 0 0 0 emptyfile.txt 0 1 10000000 0 1 10000000 total Fixes #2186. 2021-05-07 19:07:17 +00:00			`.stdout_is(" 13 109 772 -\n");`
wc: emit '-' in ouput when set on command-line When stdin is explicitly specified on the command-line with '-', emit it in the output stats to match GNU wc output. Fixes #2188. 2021-05-09 13:42:55 +00:00			`}`

bug(wc): Add a test for unexpected behavior (#1723) 2021-02-16 12:36:49 +00:00			`#[test]`
			`fn test_utf8() {`
			`new_ucmd!()`
			`.args(&["-lwmcL"])`
			`.pipe_in_fixture("UTF_8_test.txt")`
			`.run()`
wc: compute min width to format counts up front Fix two issues with the string formatting width for counts displayed by `wc`. First, the output was previously not using the default minimum width (seven characters) when reading from `stdin`. This commit corrects this behavior to match GNU `wc`. For example, $ cat alice_in_wonderland.txt \| wc 5 57 302 Second, if at least 10^7 bytes were read from `stdin` after reading from a smaller regular file, then every output row would have width 8. This disagrees with GNU `wc`, in which only the `stdin` row and the total row would have width 8. This commit corrects this behavior to match GNU `wc`. For example, $ printf "%.0s0" {1..10000000} \| wc emptyfile.txt - 0 0 0 emptyfile.txt 0 1 10000000 0 1 10000000 total Fixes #2186. 2021-05-07 19:07:17 +00:00			`.stdout_is(" 300 4969 22781 22213 79\n");`
			`// GNU returns " 300 2086 22219 22781 79"`
			`//`
			`// TODO: we should fix the word, character, and byte count to`
			`// match the behavior of GNU wc`
bug(wc): Add a test for unexpected behavior (#1723) 2021-02-16 12:36:49 +00:00			`}`

Fixed wc -L no end of line LF bug (#1714) 2021-02-08 20:54:48 +00:00			`#[test]`
			`fn test_stdin_line_len_regression() {`
			`new_ucmd!()`
			`.args(&["-L"])`
			`.pipe_in("\n123456")`
			`.run()`
wc: Don't read() if we only need to count number of bytes (Version 2) (#1851) * wc: Don't read() if we only need to count number of bytes * Resolve a few code review comments * Use write macros instead of print * Fix wc tests in case only one thing is printed * wc: Fix style * wc: Use return value of first splice rather than second * wc: Make main loop more readable * wc: Don't unwrap on failed write to stdout * wc: Increment error count when stats fail to print * Re-add Cargo.lock 2021-03-30 18:53:02 +00:00			`.stdout_is("6\n");`
Fixed wc -L no end of line LF bug (#1714) 2021-02-08 20:54:48 +00:00			`}`

rewrite tests for cargo compat, decoupled directory, output handling 2015-11-16 05:25:01 +00:00			`#[test]`
			`fn test_stdin_only_bytes() {`
rustfmt the tests 2020-04-13 18:36:03 +00:00			`new_ucmd!()`
			`.args(&["-c"])`
			`.pipe_in_fixture("lorem_ipsum.txt")`
			`.run()`
wc: Don't read() if we only need to count number of bytes (Version 2) (#1851) * wc: Don't read() if we only need to count number of bytes * Resolve a few code review comments * Use write macros instead of print * Fix wc tests in case only one thing is printed * wc: Fix style * wc: Use return value of first splice rather than second * wc: Make main loop more readable * wc: Don't unwrap on failed write to stdout * wc: Increment error count when stats fail to print * Re-add Cargo.lock 2021-03-30 18:53:02 +00:00			`.stdout_is("772\n");`
rewrite tests for cargo compat, decoupled directory, output handling 2015-11-16 05:25:01 +00:00			`}`

			`#[test]`
			`fn test_stdin_all_counts() {`
rustfmt the tests 2020-04-13 18:36:03 +00:00			`new_ucmd!()`
			`.args(&["-c", "-m", "-l", "-L", "-w"])`
tests: normalize around chaining asserts Although for some tests this adds characters we still use them there because the brevity cost is now worth the benefit in terms of instant, natural-language readability and recognizability for people not familiar with this tests of this module or even the project 2016-08-13 21:59:21 +00:00			`.pipe_in_fixture("alice_in_wonderland.txt")`
			`.run()`
wc: compute min width to format counts up front Fix two issues with the string formatting width for counts displayed by `wc`. First, the output was previously not using the default minimum width (seven characters) when reading from `stdin`. This commit corrects this behavior to match GNU `wc`. For example, $ cat alice_in_wonderland.txt \| wc 5 57 302 Second, if at least 10^7 bytes were read from `stdin` after reading from a smaller regular file, then every output row would have width 8. This disagrees with GNU `wc`, in which only the `stdin` row and the total row would have width 8. This commit corrects this behavior to match GNU `wc`. For example, $ printf "%.0s0" {1..10000000} \| wc emptyfile.txt - 0 0 0 emptyfile.txt 0 1 10000000 0 1 10000000 total Fixes #2186. 2021-05-07 19:07:17 +00:00			`.stdout_is(" 5 57 302 302 66\n");`
rewrite tests for cargo compat, decoupled directory, output handling 2015-11-16 05:25:01 +00:00			`}`

			`#[test]`
			`fn test_single_default() {`
tests: remove helper function boilerplate via macros 2016-08-23 11:52:43 +00:00			`new_ucmd!()`
rustfmt the tests 2020-04-13 18:36:03 +00:00			`.arg("moby_dick.txt")`
			`.run()`
wc: rm leading space when printing multiple counts Remove the leading space from the output of `wc` when printing two or more types of counts. Fixes #2173. 2021-05-06 00:59:37 +00:00			`.stdout_is(" 18 204 1115 moby_dick.txt\n");`
rewrite tests for cargo compat, decoupled directory, output handling 2015-11-16 05:25:01 +00:00			`}`

			`#[test]`
			`fn test_single_only_lines() {`
tests: remove helper function boilerplate via macros 2016-08-23 11:52:43 +00:00			`new_ucmd!()`
rustfmt the tests 2020-04-13 18:36:03 +00:00			`.args(&["-l", "moby_dick.txt"])`
			`.run()`
wc: Don't read() if we only need to count number of bytes (Version 2) (#1851) * wc: Don't read() if we only need to count number of bytes * Resolve a few code review comments * Use write macros instead of print * Fix wc tests in case only one thing is printed * wc: Fix style * wc: Use return value of first splice rather than second * wc: Make main loop more readable * wc: Don't unwrap on failed write to stdout * wc: Increment error count when stats fail to print * Re-add Cargo.lock 2021-03-30 18:53:02 +00:00			`.stdout_is("18 moby_dick.txt\n");`
rewrite tests for cargo compat, decoupled directory, output handling 2015-11-16 05:25:01 +00:00			`}`

			`#[test]`
			`fn test_single_all_counts() {`
tests: remove helper function boilerplate via macros 2016-08-23 11:52:43 +00:00			`new_ucmd!()`
rustfmt the tests 2020-04-13 18:36:03 +00:00			`.args(&["-c", "-l", "-L", "-m", "-w", "alice_in_wonderland.txt"])`
			`.run()`
wc: rm leading space when printing multiple counts Remove the leading space from the output of `wc` when printing two or more types of counts. Fixes #2173. 2021-05-06 00:59:37 +00:00			`.stdout_is(" 5 57 302 302 66 alice_in_wonderland.txt\n");`
rewrite tests for cargo compat, decoupled directory, output handling 2015-11-16 05:25:01 +00:00			`}`

			`#[test]`
			`fn test_multiple_default() {`
tests: remove helper function boilerplate via macros 2016-08-23 11:52:43 +00:00			`new_ucmd!()`
rustfmt the tests 2020-04-13 18:36:03 +00:00			`.args(&[`
			`"lorem_ipsum.txt",`
			`"moby_dick.txt",`
			`"alice_in_wonderland.txt",`
			`])`
			`.run()`
tests: normalize around chaining asserts Although for some tests this adds characters we still use them there because the brevity cost is now worth the benefit in terms of instant, natural-language readability and recognizability for people not familiar with this tests of this module or even the project 2016-08-13 21:59:21 +00:00			`.stdout_is(`
wc: rm leading space when printing multiple counts Remove the leading space from the output of `wc` when printing two or more types of counts. Fixes #2173. 2021-05-06 00:59:37 +00:00			`" 13 109 772 lorem_ipsum.txt\n 18 204 1115 moby_dick.txt\n 5 57 302 \`
			`alice_in_wonderland.txt\n 36 370 2189 total\n",`
rustfmt the tests 2020-04-13 18:36:03 +00:00			`);`
rewrite tests for cargo compat, decoupled directory, output handling 2015-11-16 05:25:01 +00:00			`}`
wc: add tests for edge cases for wc on files 2021-05-04 00:52:32 +00:00
			`/// Test for an empty file.`
			`#[test]`
			`fn test_file_empty() {`
			`new_ucmd!()`
			`.args(&["-clmwL", "emptyfile.txt"])`
			`.run()`
wc: rm leading space when printing multiple counts Remove the leading space from the output of `wc` when printing two or more types of counts. Fixes #2173. 2021-05-06 00:59:37 +00:00			`.stdout_is("0 0 0 0 0 emptyfile.txt\n");`
wc: add tests for edge cases for wc on files 2021-05-04 00:52:32 +00:00			`}`

			`/// Test for an file containing a single non-whitespace character`
			`/// without a trailing newline.`
			`#[test]`
			`fn test_file_single_line_no_trailing_newline() {`
			`new_ucmd!()`
			`.args(&["-clmwL", "notrailingnewline.txt"])`
			`.run()`
wc: rm leading space when printing multiple counts Remove the leading space from the output of `wc` when printing two or more types of counts. Fixes #2173. 2021-05-06 00:59:37 +00:00			`.stdout_is("1 1 2 2 1 notrailingnewline.txt\n");`
wc: add tests for edge cases for wc on files 2021-05-04 00:52:32 +00:00			`}`

			`/// Test for a file that has 100 empty lines (that is, the contents of`
			`/// the file are the newline character repeated one hundred times).`
			`#[test]`
			`fn test_file_many_empty_lines() {`
			`new_ucmd!()`
			`.args(&["-clmwL", "manyemptylines.txt"])`
			`.run()`
wc: rm leading space when printing multiple counts Remove the leading space from the output of `wc` when printing two or more types of counts. Fixes #2173. 2021-05-06 00:59:37 +00:00			`.stdout_is("100 0 100 100 0 manyemptylines.txt\n");`
wc: add tests for edge cases for wc on files 2021-05-04 00:52:32 +00:00			`}`

			`/// Test for a file that has one long line comprising only spaces.`
			`#[test]`
			`fn test_file_one_long_line_only_spaces() {`
			`new_ucmd!()`
			`.args(&["-clmwL", "onelongemptyline.txt"])`
			`.run()`
wc: rm leading space when printing multiple counts Remove the leading space from the output of `wc` when printing two or more types of counts. Fixes #2173. 2021-05-06 00:59:37 +00:00			`.stdout_is(" 1 0 10001 10001 10000 onelongemptyline.txt\n");`
wc: add tests for edge cases for wc on files 2021-05-04 00:52:32 +00:00			`}`

			`/// Test for a file that has one long line comprising a single "word".`
			`#[test]`
			`fn test_file_one_long_word() {`
			`new_ucmd!()`
			`.args(&["-clmwL", "onelongword.txt"])`
			`.run()`
wc: rm leading space when printing multiple counts Remove the leading space from the output of `wc` when printing two or more types of counts. Fixes #2173. 2021-05-06 00:59:37 +00:00			`.stdout_is(" 1 1 10001 10001 10000 onelongword.txt\n");`
wc: add tests for edge cases for wc on files 2021-05-04 00:52:32 +00:00			`}`
wc: correct some error messages for invalid inputs Change the error messages that get printed to `stderr` for compatibility with GNU `wc` when an input is a directory and when an input does not exist. Fixes #2211. 2021-05-15 14:32:03 +00:00
wc: compute min width to format counts up front Fix two issues with the string formatting width for counts displayed by `wc`. First, the output was previously not using the default minimum width (seven characters) when reading from `stdin`. This commit corrects this behavior to match GNU `wc`. For example, $ cat alice_in_wonderland.txt \| wc 5 57 302 Second, if at least 10^7 bytes were read from `stdin` after reading from a smaller regular file, then every output row would have width 8. This disagrees with GNU `wc`, in which only the `stdin` row and the total row would have width 8. This commit corrects this behavior to match GNU `wc`. For example, $ printf "%.0s0" {1..10000000} \| wc emptyfile.txt - 0 0 0 emptyfile.txt 0 1 10000000 0 1 10000000 total Fixes #2186. 2021-05-07 19:07:17 +00:00			`/// Test that the number of bytes in the file dictate the display width.`
			`///`
			`/// The width in digits of any count is the width in digits of the`
			`/// number of bytes in the file, regardless of whether the number of`
			`/// bytes are displayed.`
			`#[test]`
			`fn test_file_bytes_dictate_width() {`
			`// This file has 10,001 bytes. Five digits are required to`
			`// represent that. Even though the number of lines is 1 and the`
			`// number of words is 0, each of those counts is formatted with`
			`// five characters, filled with whitespace.`
			`new_ucmd!()`
			`.args(&["-lw", "onelongemptyline.txt"])`
			`.run()`
			`.stdout_is(" 1 0 onelongemptyline.txt\n");`

			`// This file has zero bytes. Only one digit is required to`
			`// represent that.`
			`new_ucmd!()`
			`.args(&["-lw", "emptyfile.txt"])`
			`.run()`
			`.stdout_is("0 0 emptyfile.txt\n");`
			`}`

wc: correct some error messages for invalid inputs Change the error messages that get printed to `stderr` for compatibility with GNU `wc` when an input is a directory and when an input does not exist. Fixes #2211. 2021-05-15 14:32:03 +00:00			`/// Test that getting counts from a directory is an error.`
			`#[test]`
			`fn test_read_from_directory_error() {`
			// TODO To match GNU `wc`, the `stdout` should be:
			`//`
			`// " 0 0 0 .\n"`
			`//`
			`new_ucmd!()`
			`.args(&["."])`
			`.fails()`
			`.stderr_contains(".: Is a directory\n")`
			`.stdout_is("0 0 0 .\n");`
			`}`

			`/// Test that getting counts from nonexistent file is an error.`
			`#[test]`
			`fn test_read_from_nonexistent_file() {`
			`new_ucmd!()`
			`.args(&["bogusfile"])`
			`.fails()`
			`.stderr_contains("bogusfile: No such file or directory\n");`
			`}`