mirror of
https://github.com/nushell/nushell
synced 2024-12-27 21:43:09 +00:00
Size: count unicode graphmemes as single char (#2482)
This commit is contained in:
parent
47c5346934
commit
666e6a7b57
3 changed files with 32 additions and 14 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -3093,6 +3093,7 @@ dependencies = [
|
|||
"trash",
|
||||
"typetag",
|
||||
"umask",
|
||||
"unicode-segmentation",
|
||||
"unicode-xid",
|
||||
"url 2.1.1",
|
||||
"users",
|
||||
|
|
|
@ -87,6 +87,7 @@ termcolor = "1.1.0"
|
|||
toml = "0.5.6"
|
||||
typetag = "0.1.5"
|
||||
umask = "1.0.0"
|
||||
unicode-segmentation = "1.6.0"
|
||||
unicode-xid = "0.2.1"
|
||||
uuid_crate = {package = "uuid", version = "0.8.1", features = ["v4"], optional = true}
|
||||
which = {version = "4.0.2", optional = true}
|
||||
|
|
|
@ -1,8 +1,11 @@
|
|||
extern crate unicode_segmentation;
|
||||
|
||||
use crate::commands::WholeStreamCommand;
|
||||
use crate::prelude::*;
|
||||
use indexmap::indexmap;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{ReturnSuccess, Signature, TaggedDictBuilder, UntaggedValue, Value};
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
|
||||
pub struct Size;
|
||||
|
||||
|
@ -29,17 +32,30 @@ impl WholeStreamCommand for Size {
|
|||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Count the number of words in a string",
|
||||
example: r#"echo "There are seven words in this sentence" | size"#,
|
||||
result: Some(vec![UntaggedValue::row(indexmap! {
|
||||
"lines".to_string() => UntaggedValue::int(0).into(),
|
||||
"words".to_string() => UntaggedValue::int(7).into(),
|
||||
"chars".to_string() => UntaggedValue::int(38).into(),
|
||||
"bytes".to_string() => UntaggedValue::int(38).into(),
|
||||
})
|
||||
.into()]),
|
||||
}]
|
||||
vec![
|
||||
Example {
|
||||
description: "Count the number of words in a string",
|
||||
example: r#"echo "There are seven words in this sentence" | size"#,
|
||||
result: Some(vec![UntaggedValue::row(indexmap! {
|
||||
"lines".to_string() => UntaggedValue::int(0).into(),
|
||||
"words".to_string() => UntaggedValue::int(7).into(),
|
||||
"chars".to_string() => UntaggedValue::int(38).into(),
|
||||
"bytes".to_string() => UntaggedValue::int(38).into(),
|
||||
})
|
||||
.into()]),
|
||||
},
|
||||
Example {
|
||||
description: "Counts unicode characters correctly in a string",
|
||||
example: r#"echo "Amélie Amelie" | size"#,
|
||||
result: Some(vec![UntaggedValue::row(indexmap! {
|
||||
"lines".to_string() => UntaggedValue::int(0).into(),
|
||||
"words".to_string() => UntaggedValue::int(2).into(),
|
||||
"chars".to_string() => UntaggedValue::int(13).into(),
|
||||
"bytes".to_string() => UntaggedValue::int(15).into(),
|
||||
})
|
||||
.into()]),
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -72,15 +88,15 @@ fn count(contents: &str, tag: impl Into<Tag>) -> Value {
|
|||
let bytes = contents.len() as i64;
|
||||
let mut end_of_word = true;
|
||||
|
||||
for c in contents.chars() {
|
||||
for c in UnicodeSegmentation::graphemes(contents, true) {
|
||||
chars += 1;
|
||||
|
||||
match c {
|
||||
'\n' => {
|
||||
"\n" => {
|
||||
lines += 1;
|
||||
end_of_word = true;
|
||||
}
|
||||
' ' => end_of_word = true,
|
||||
" " => end_of_word = true,
|
||||
_ => {
|
||||
if end_of_word {
|
||||
words += 1;
|
||||
|
|
Loading…
Reference in a new issue