mirror of
https://github.com/nushell/nushell
synced 2025-01-13 05:38:57 +00:00
add unicode-width to str stats
(#14014)
# Description This PR adds another type of length to `str stats`, unicode-width. ```nushell ❯ "\u{ff03}" | str stats ╭───────────────┬───╮ │ lines │ 1 │ │ words │ 0 │ │ bytes │ 3 │ │ chars │ 1 │ │ graphemes │ 1 │ │ unicode-width │ 2 │ ╰───────────────┴───╯ ❯ "Amélie Amelie" | str stats ╭───────────────┬────╮ │ lines │ 1 │ │ words │ 2 │ │ bytes │ 15 │ │ chars │ 14 │ │ graphemes │ 13 │ │ unicode-width │ 13 │ ╰───────────────┴────╯ ❯ '今天天气真好' | str stats ╭───────────────┬────╮ │ lines │ 1 │ │ words │ 6 │ │ bytes │ 18 │ │ chars │ 6 │ │ graphemes │ 6 │ │ unicode-width │ 12 │ ╰───────────────┴────╯ ❯ "Μπορῶ νὰ φάω σπασμένα γυαλιὰ χωρὶς νὰ πάθω τίποτα." | str stats ╭───────────────┬────╮ │ lines │ 1 │ │ words │ 9 │ │ bytes │ 96 │ │ chars │ 50 │ │ graphemes │ 50 │ │ unicode-width │ 50 │ ╰───────────────┴────╯ ❯ "\n" | str stats ╭───────────────┬───╮ │ lines │ 1 │ │ words │ 0 │ │ bytes │ 1 │ │ chars │ 1 │ │ graphemes │ 1 │ │ unicode-width │ 0 │ ╰───────────────┴───╯ ``` The idea of this PR came from me wondering if we could replace `#` with `\u{ff03}` in tables. # User-Facing Changes <!-- List of all changes that impact the user experience here. This helps us keep track of breaking changes. --> # Tests + Formatting <!-- Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass (on Windows make sure to [enable developer mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging)) - `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the tests for the standard library > **Note** > from `nushell` you can also use the `toolkit` as follows > ```bash > use toolkit.nu # or use an `env_change` hook to activate it automatically > toolkit check pr > ``` --> # After Submitting <!-- If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. -->
This commit is contained in:
parent
d6f4e4c4fe
commit
6dc71f5ad0
1 changed files with 15 additions and 2 deletions
|
@ -64,17 +64,19 @@ impl Command for SubCommand {
|
|||
"bytes" => Value::test_int(38),
|
||||
"chars" => Value::test_int(38),
|
||||
"graphemes" => Value::test_int(38),
|
||||
"unicode-width" => Value::test_int(38),
|
||||
})),
|
||||
},
|
||||
Example {
|
||||
description: "Counts unicode characters",
|
||||
example: r#"'今天天气真好' | str stats "#,
|
||||
example: r#"'今天天气真好' | str stats"#,
|
||||
result: Some(Value::test_record(record! {
|
||||
"lines" => Value::test_int(1),
|
||||
"words" => Value::test_int(6),
|
||||
"bytes" => Value::test_int(18),
|
||||
"chars" => Value::test_int(6),
|
||||
"graphemes" => Value::test_int(6),
|
||||
"unicode-width" => Value::test_int(12),
|
||||
})),
|
||||
},
|
||||
Example {
|
||||
|
@ -86,6 +88,7 @@ impl Command for SubCommand {
|
|||
"bytes" => Value::test_int(15),
|
||||
"chars" => Value::test_int(14),
|
||||
"graphemes" => Value::test_int(13),
|
||||
"unicode-width" => Value::test_int(13),
|
||||
})),
|
||||
},
|
||||
]
|
||||
|
@ -139,6 +142,7 @@ fn counter(contents: &str, span: Span) -> Value {
|
|||
"bytes" => get_count(&counts, Counter::Bytes, span),
|
||||
"chars" => get_count(&counts, Counter::CodePoints, span),
|
||||
"graphemes" => get_count(&counts, Counter::GraphemeClusters, span),
|
||||
"unicode-width" => get_count(&counts, Counter::UnicodeWidth, span),
|
||||
};
|
||||
|
||||
Value::record(record, span)
|
||||
|
@ -208,6 +212,7 @@ impl Count for Counter {
|
|||
}
|
||||
Counter::Words => s.unicode_words().count(),
|
||||
Counter::CodePoints => s.chars().count(),
|
||||
Counter::UnicodeWidth => unicode_width::UnicodeWidthStr::width(s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -229,15 +234,19 @@ pub enum Counter {
|
|||
|
||||
/// Counts unicode code points
|
||||
CodePoints,
|
||||
|
||||
/// Counts the width of the string
|
||||
UnicodeWidth,
|
||||
}
|
||||
|
||||
/// A convenience array of all counter types.
|
||||
pub const ALL_COUNTERS: [Counter; 5] = [
|
||||
pub const ALL_COUNTERS: [Counter; 6] = [
|
||||
Counter::GraphemeClusters,
|
||||
Counter::Bytes,
|
||||
Counter::Lines,
|
||||
Counter::Words,
|
||||
Counter::CodePoints,
|
||||
Counter::UnicodeWidth,
|
||||
];
|
||||
|
||||
impl fmt::Display for Counter {
|
||||
|
@ -248,6 +257,7 @@ impl fmt::Display for Counter {
|
|||
Counter::Lines => "lines",
|
||||
Counter::Words => "words",
|
||||
Counter::CodePoints => "codepoints",
|
||||
Counter::UnicodeWidth => "unicode-width",
|
||||
};
|
||||
|
||||
write!(f, "{s}")
|
||||
|
@ -297,6 +307,7 @@ fn test_one_newline() {
|
|||
correct_counts.insert(Counter::GraphemeClusters, 1);
|
||||
correct_counts.insert(Counter::Bytes, 1);
|
||||
correct_counts.insert(Counter::CodePoints, 1);
|
||||
correct_counts.insert(Counter::UnicodeWidth, 0);
|
||||
|
||||
assert_eq!(correct_counts, counts);
|
||||
}
|
||||
|
@ -336,6 +347,7 @@ fn test_count_counts_lines() {
|
|||
|
||||
// one more than grapheme clusters because of \r\n
|
||||
correct_counts.insert(Counter::CodePoints, 24);
|
||||
correct_counts.insert(Counter::UnicodeWidth, 17);
|
||||
|
||||
assert_eq!(correct_counts, counts);
|
||||
}
|
||||
|
@ -353,6 +365,7 @@ fn test_count_counts_words() {
|
|||
correct_counts.insert(Counter::Bytes, i_can_eat_glass.len());
|
||||
correct_counts.insert(Counter::Words, 9);
|
||||
correct_counts.insert(Counter::CodePoints, 50);
|
||||
correct_counts.insert(Counter::UnicodeWidth, 50);
|
||||
|
||||
assert_eq!(correct_counts, counts);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue