add unicode-width to str stats (#14014)

# Description

This PR adds another type of length to `str stats`, unicode-width.
```nushell
❯ "\u{ff03}" | str stats
╭───────────────┬───╮
│ lines         │ 1 │
│ words         │ 0 │
│ bytes         │ 3 │
│ chars         │ 1 │
│ graphemes     │ 1 │
│ unicode-width │ 2 │
╰───────────────┴───╯
❯ "Amélie Amelie" | str stats
╭───────────────┬────╮
│ lines         │ 1  │
│ words         │ 2  │
│ bytes         │ 15 │
│ chars         │ 14 │
│ graphemes     │ 13 │
│ unicode-width │ 13 │
╰───────────────┴────╯
❯ '今天天气真好' | str stats
╭───────────────┬────╮
│ lines         │ 1  │
│ words         │ 6  │
│ bytes         │ 18 │
│ chars         │ 6  │
│ graphemes     │ 6  │
│ unicode-width │ 12 │
╰───────────────┴────╯
❯ "Μπορῶ νὰ φάω σπασμένα γυαλιὰ χωρὶς νὰ πάθω τίποτα." | str stats
╭───────────────┬────╮
│ lines         │ 1  │
│ words         │ 9  │
│ bytes         │ 96 │
│ chars         │ 50 │
│ graphemes     │ 50 │
│ unicode-width │ 50 │
╰───────────────┴────╯
❯ "\n" | str stats
╭───────────────┬───╮
│ lines         │ 1 │
│ words         │ 0 │
│ bytes         │ 1 │
│ chars         │ 1 │
│ graphemes     │ 1 │
│ unicode-width │ 0 │
╰───────────────┴───╯
```
The idea of this PR came from me wondering if we could replace `#` with
`\u{ff03}` in tables.

# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the
tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
This commit is contained in:
Darren Schroeder 2024-10-06 15:17:12 -05:00 committed by GitHub
parent d6f4e4c4fe
commit 6dc71f5ad0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -64,17 +64,19 @@ impl Command for SubCommand {
"bytes" => Value::test_int(38),
"chars" => Value::test_int(38),
"graphemes" => Value::test_int(38),
"unicode-width" => Value::test_int(38),
})),
},
Example {
description: "Counts unicode characters",
example: r#"'今天天气真好' | str stats "#,
example: r#"'今天天气真好' | str stats"#,
result: Some(Value::test_record(record! {
"lines" => Value::test_int(1),
"words" => Value::test_int(6),
"bytes" => Value::test_int(18),
"chars" => Value::test_int(6),
"graphemes" => Value::test_int(6),
"unicode-width" => Value::test_int(12),
})),
},
Example {
@ -86,6 +88,7 @@ impl Command for SubCommand {
"bytes" => Value::test_int(15),
"chars" => Value::test_int(14),
"graphemes" => Value::test_int(13),
"unicode-width" => Value::test_int(13),
})),
},
]
@ -139,6 +142,7 @@ fn counter(contents: &str, span: Span) -> Value {
"bytes" => get_count(&counts, Counter::Bytes, span),
"chars" => get_count(&counts, Counter::CodePoints, span),
"graphemes" => get_count(&counts, Counter::GraphemeClusters, span),
"unicode-width" => get_count(&counts, Counter::UnicodeWidth, span),
};
Value::record(record, span)
@ -208,6 +212,7 @@ impl Count for Counter {
}
Counter::Words => s.unicode_words().count(),
Counter::CodePoints => s.chars().count(),
Counter::UnicodeWidth => unicode_width::UnicodeWidthStr::width(s),
}
}
}
@ -229,15 +234,19 @@ pub enum Counter {
/// Counts unicode code points
CodePoints,
/// Counts the width of the string
UnicodeWidth,
}
/// A convenience array of all counter types.
pub const ALL_COUNTERS: [Counter; 5] = [
pub const ALL_COUNTERS: [Counter; 6] = [
Counter::GraphemeClusters,
Counter::Bytes,
Counter::Lines,
Counter::Words,
Counter::CodePoints,
Counter::UnicodeWidth,
];
impl fmt::Display for Counter {
@ -248,6 +257,7 @@ impl fmt::Display for Counter {
Counter::Lines => "lines",
Counter::Words => "words",
Counter::CodePoints => "codepoints",
Counter::UnicodeWidth => "unicode-width",
};
write!(f, "{s}")
@ -297,6 +307,7 @@ fn test_one_newline() {
correct_counts.insert(Counter::GraphemeClusters, 1);
correct_counts.insert(Counter::Bytes, 1);
correct_counts.insert(Counter::CodePoints, 1);
correct_counts.insert(Counter::UnicodeWidth, 0);
assert_eq!(correct_counts, counts);
}
@ -336,6 +347,7 @@ fn test_count_counts_lines() {
// one more than grapheme clusters because of \r\n
correct_counts.insert(Counter::CodePoints, 24);
correct_counts.insert(Counter::UnicodeWidth, 17);
assert_eq!(correct_counts, counts);
}
@ -353,6 +365,7 @@ fn test_count_counts_words() {
correct_counts.insert(Counter::Bytes, i_can_eat_glass.len());
correct_counts.insert(Counter::Words, 9);
correct_counts.insert(Counter::CodePoints, 50);
correct_counts.insert(Counter::UnicodeWidth, 50);
assert_eq!(correct_counts, counts);
}