mirror of
https://github.com/uutils/coreutils
synced 2025-01-21 09:34:44 +00:00
Merge pull request #6199 from BenWiederhake/dev-wc-ascii-control-is-word
wc: count ASCII control characters as word characters
This commit is contained in:
commit
a75b8a0f67
2 changed files with 18 additions and 9 deletions
|
@ -580,9 +580,8 @@ fn process_chunk<
|
||||||
if SHOW_WORDS {
|
if SHOW_WORDS {
|
||||||
if ch.is_whitespace() {
|
if ch.is_whitespace() {
|
||||||
*in_word = false;
|
*in_word = false;
|
||||||
} else if ch.is_ascii_control() {
|
|
||||||
// These count as characters but do not affect the word state
|
|
||||||
} else if !(*in_word) {
|
} else if !(*in_word) {
|
||||||
|
// This also counts control characters! (As of GNU coreutils 9.5)
|
||||||
*in_word = true;
|
*in_word = true;
|
||||||
total.words += 1;
|
total.words += 1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -71,7 +71,7 @@ fn test_utf8_words() {
|
||||||
.arg("-w")
|
.arg("-w")
|
||||||
.pipe_in_fixture("UTF_8_weirdchars.txt")
|
.pipe_in_fixture("UTF_8_weirdchars.txt")
|
||||||
.run()
|
.run()
|
||||||
.stdout_is("87\n");
|
.stdout_is("89\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -80,7 +80,7 @@ fn test_utf8_line_length_words() {
|
||||||
.arg("-Lw")
|
.arg("-Lw")
|
||||||
.pipe_in_fixture("UTF_8_weirdchars.txt")
|
.pipe_in_fixture("UTF_8_weirdchars.txt")
|
||||||
.run()
|
.run()
|
||||||
.stdout_is(" 87 48\n");
|
.stdout_is(" 89 48\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -98,7 +98,7 @@ fn test_utf8_line_length_chars_words() {
|
||||||
.arg("-Lmw")
|
.arg("-Lmw")
|
||||||
.pipe_in_fixture("UTF_8_weirdchars.txt")
|
.pipe_in_fixture("UTF_8_weirdchars.txt")
|
||||||
.run()
|
.run()
|
||||||
.stdout_is(" 87 442 48\n");
|
.stdout_is(" 89 442 48\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -143,7 +143,7 @@ fn test_utf8_chars_words() {
|
||||||
.arg("-mw")
|
.arg("-mw")
|
||||||
.pipe_in_fixture("UTF_8_weirdchars.txt")
|
.pipe_in_fixture("UTF_8_weirdchars.txt")
|
||||||
.run()
|
.run()
|
||||||
.stdout_is(" 87 442\n");
|
.stdout_is(" 89 442\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -161,7 +161,7 @@ fn test_utf8_line_length_lines_words() {
|
||||||
.arg("-Llw")
|
.arg("-Llw")
|
||||||
.pipe_in_fixture("UTF_8_weirdchars.txt")
|
.pipe_in_fixture("UTF_8_weirdchars.txt")
|
||||||
.run()
|
.run()
|
||||||
.stdout_is(" 25 87 48\n");
|
.stdout_is(" 25 89 48\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -179,7 +179,7 @@ fn test_utf8_lines_words_chars() {
|
||||||
.arg("-mlw")
|
.arg("-mlw")
|
||||||
.pipe_in_fixture("UTF_8_weirdchars.txt")
|
.pipe_in_fixture("UTF_8_weirdchars.txt")
|
||||||
.run()
|
.run()
|
||||||
.stdout_is(" 25 87 442\n");
|
.stdout_is(" 25 89 442\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -197,7 +197,17 @@ fn test_utf8_all() {
|
||||||
.arg("-lwmcL")
|
.arg("-lwmcL")
|
||||||
.pipe_in_fixture("UTF_8_weirdchars.txt")
|
.pipe_in_fixture("UTF_8_weirdchars.txt")
|
||||||
.run()
|
.run()
|
||||||
.stdout_is(" 25 87 442 513 48\n");
|
.stdout_is(" 25 89 442 513 48\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_ascii_control() {
|
||||||
|
// GNU coreutils "d1" test
|
||||||
|
new_ucmd!()
|
||||||
|
.arg("-w")
|
||||||
|
.pipe_in(*b"\x01\n")
|
||||||
|
.run()
|
||||||
|
.stdout_is("1\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
Loading…
Reference in a new issue