mirror of
https://github.com/nushell/nushell
synced 2024-12-28 14:03:09 +00:00
Size: count unicode graphmemes as single char (#2482)
This commit is contained in:
parent
47c5346934
commit
666e6a7b57
3 changed files with 32 additions and 14 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -3093,6 +3093,7 @@ dependencies = [
|
||||||
"trash",
|
"trash",
|
||||||
"typetag",
|
"typetag",
|
||||||
"umask",
|
"umask",
|
||||||
|
"unicode-segmentation",
|
||||||
"unicode-xid",
|
"unicode-xid",
|
||||||
"url 2.1.1",
|
"url 2.1.1",
|
||||||
"users",
|
"users",
|
||||||
|
|
|
@ -87,6 +87,7 @@ termcolor = "1.1.0"
|
||||||
toml = "0.5.6"
|
toml = "0.5.6"
|
||||||
typetag = "0.1.5"
|
typetag = "0.1.5"
|
||||||
umask = "1.0.0"
|
umask = "1.0.0"
|
||||||
|
unicode-segmentation = "1.6.0"
|
||||||
unicode-xid = "0.2.1"
|
unicode-xid = "0.2.1"
|
||||||
uuid_crate = {package = "uuid", version = "0.8.1", features = ["v4"], optional = true}
|
uuid_crate = {package = "uuid", version = "0.8.1", features = ["v4"], optional = true}
|
||||||
which = {version = "4.0.2", optional = true}
|
which = {version = "4.0.2", optional = true}
|
||||||
|
|
|
@ -1,8 +1,11 @@
|
||||||
|
extern crate unicode_segmentation;
|
||||||
|
|
||||||
use crate::commands::WholeStreamCommand;
|
use crate::commands::WholeStreamCommand;
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
use indexmap::indexmap;
|
use indexmap::indexmap;
|
||||||
use nu_errors::ShellError;
|
use nu_errors::ShellError;
|
||||||
use nu_protocol::{ReturnSuccess, Signature, TaggedDictBuilder, UntaggedValue, Value};
|
use nu_protocol::{ReturnSuccess, Signature, TaggedDictBuilder, UntaggedValue, Value};
|
||||||
|
use unicode_segmentation::UnicodeSegmentation;
|
||||||
|
|
||||||
pub struct Size;
|
pub struct Size;
|
||||||
|
|
||||||
|
@ -29,17 +32,30 @@ impl WholeStreamCommand for Size {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn examples(&self) -> Vec<Example> {
|
fn examples(&self) -> Vec<Example> {
|
||||||
vec![Example {
|
vec![
|
||||||
description: "Count the number of words in a string",
|
Example {
|
||||||
example: r#"echo "There are seven words in this sentence" | size"#,
|
description: "Count the number of words in a string",
|
||||||
result: Some(vec![UntaggedValue::row(indexmap! {
|
example: r#"echo "There are seven words in this sentence" | size"#,
|
||||||
"lines".to_string() => UntaggedValue::int(0).into(),
|
result: Some(vec![UntaggedValue::row(indexmap! {
|
||||||
"words".to_string() => UntaggedValue::int(7).into(),
|
"lines".to_string() => UntaggedValue::int(0).into(),
|
||||||
"chars".to_string() => UntaggedValue::int(38).into(),
|
"words".to_string() => UntaggedValue::int(7).into(),
|
||||||
"bytes".to_string() => UntaggedValue::int(38).into(),
|
"chars".to_string() => UntaggedValue::int(38).into(),
|
||||||
})
|
"bytes".to_string() => UntaggedValue::int(38).into(),
|
||||||
.into()]),
|
})
|
||||||
}]
|
.into()]),
|
||||||
|
},
|
||||||
|
Example {
|
||||||
|
description: "Counts unicode characters correctly in a string",
|
||||||
|
example: r#"echo "Amélie Amelie" | size"#,
|
||||||
|
result: Some(vec![UntaggedValue::row(indexmap! {
|
||||||
|
"lines".to_string() => UntaggedValue::int(0).into(),
|
||||||
|
"words".to_string() => UntaggedValue::int(2).into(),
|
||||||
|
"chars".to_string() => UntaggedValue::int(13).into(),
|
||||||
|
"bytes".to_string() => UntaggedValue::int(15).into(),
|
||||||
|
})
|
||||||
|
.into()]),
|
||||||
|
},
|
||||||
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -72,15 +88,15 @@ fn count(contents: &str, tag: impl Into<Tag>) -> Value {
|
||||||
let bytes = contents.len() as i64;
|
let bytes = contents.len() as i64;
|
||||||
let mut end_of_word = true;
|
let mut end_of_word = true;
|
||||||
|
|
||||||
for c in contents.chars() {
|
for c in UnicodeSegmentation::graphemes(contents, true) {
|
||||||
chars += 1;
|
chars += 1;
|
||||||
|
|
||||||
match c {
|
match c {
|
||||||
'\n' => {
|
"\n" => {
|
||||||
lines += 1;
|
lines += 1;
|
||||||
end_of_word = true;
|
end_of_word = true;
|
||||||
}
|
}
|
||||||
' ' => end_of_word = true,
|
" " => end_of_word = true,
|
||||||
_ => {
|
_ => {
|
||||||
if end_of_word {
|
if end_of_word {
|
||||||
words += 1;
|
words += 1;
|
||||||
|
|
Loading…
Reference in a new issue