Merge pull request #6199 from BenWiederhake/dev-wc-ascii-control-is-word

wc: count ASCII control characters as word characters
This commit is contained in:
Sylvestre Ledru 2024-04-07 08:51:12 +02:00 committed by GitHub
commit a75b8a0f67
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 18 additions and 9 deletions

View file

@ -580,9 +580,8 @@ fn process_chunk<
if SHOW_WORDS { if SHOW_WORDS {
if ch.is_whitespace() { if ch.is_whitespace() {
*in_word = false; *in_word = false;
} else if ch.is_ascii_control() {
// These count as characters but do not affect the word state
} else if !(*in_word) { } else if !(*in_word) {
// This also counts control characters! (As of GNU coreutils 9.5)
*in_word = true; *in_word = true;
total.words += 1; total.words += 1;
} }

View file

@ -71,7 +71,7 @@ fn test_utf8_words() {
.arg("-w") .arg("-w")
.pipe_in_fixture("UTF_8_weirdchars.txt") .pipe_in_fixture("UTF_8_weirdchars.txt")
.run() .run()
.stdout_is("87\n"); .stdout_is("89\n");
} }
#[test] #[test]
@ -80,7 +80,7 @@ fn test_utf8_line_length_words() {
.arg("-Lw") .arg("-Lw")
.pipe_in_fixture("UTF_8_weirdchars.txt") .pipe_in_fixture("UTF_8_weirdchars.txt")
.run() .run()
.stdout_is(" 87 48\n"); .stdout_is(" 89 48\n");
} }
#[test] #[test]
@ -98,7 +98,7 @@ fn test_utf8_line_length_chars_words() {
.arg("-Lmw") .arg("-Lmw")
.pipe_in_fixture("UTF_8_weirdchars.txt") .pipe_in_fixture("UTF_8_weirdchars.txt")
.run() .run()
.stdout_is(" 87 442 48\n"); .stdout_is(" 89 442 48\n");
} }
#[test] #[test]
@ -143,7 +143,7 @@ fn test_utf8_chars_words() {
.arg("-mw") .arg("-mw")
.pipe_in_fixture("UTF_8_weirdchars.txt") .pipe_in_fixture("UTF_8_weirdchars.txt")
.run() .run()
.stdout_is(" 87 442\n"); .stdout_is(" 89 442\n");
} }
#[test] #[test]
@ -161,7 +161,7 @@ fn test_utf8_line_length_lines_words() {
.arg("-Llw") .arg("-Llw")
.pipe_in_fixture("UTF_8_weirdchars.txt") .pipe_in_fixture("UTF_8_weirdchars.txt")
.run() .run()
.stdout_is(" 25 87 48\n"); .stdout_is(" 25 89 48\n");
} }
#[test] #[test]
@ -179,7 +179,7 @@ fn test_utf8_lines_words_chars() {
.arg("-mlw") .arg("-mlw")
.pipe_in_fixture("UTF_8_weirdchars.txt") .pipe_in_fixture("UTF_8_weirdchars.txt")
.run() .run()
.stdout_is(" 25 87 442\n"); .stdout_is(" 25 89 442\n");
} }
#[test] #[test]
@ -197,7 +197,17 @@ fn test_utf8_all() {
.arg("-lwmcL") .arg("-lwmcL")
.pipe_in_fixture("UTF_8_weirdchars.txt") .pipe_in_fixture("UTF_8_weirdchars.txt")
.run() .run()
.stdout_is(" 25 87 442 513 48\n"); .stdout_is(" 25 89 442 513 48\n");
}
#[test]
fn test_ascii_control() {
// GNU coreutils "d1" test
new_ucmd!()
.arg("-w")
.pipe_in(*b"\x01\n")
.run()
.stdout_is("1\n");
} }
#[test] #[test]