diff --git a/Cargo.lock b/Cargo.lock index 03b6548148..65709e251e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -554,6 +554,7 @@ dependencies = [ "terminal_size", "thiserror", "trash", + "unicode-segmentation", ] [[package]] diff --git a/crates/nu-command/Cargo.toml b/crates/nu-command/Cargo.toml index 90f623468a..5b24859f37 100644 --- a/crates/nu-command/Cargo.toml +++ b/crates/nu-command/Cargo.toml @@ -13,7 +13,9 @@ nu-protocol = { path = "../nu-protocol" } nu-table = { path = "../nu-table" } nu-term-grid = { path = "../nu-term-grid" } nu-parser = { path = "../nu-parser" } + trash = { version = "1.3.0", optional = true } +unicode-segmentation = "1.8.0" # Potential dependencies for extras glob = "0.3.0" diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index d36d51f67f..1fe5d90ec3 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -57,6 +57,7 @@ pub fn create_default_context() -> Rc> { Ps, Rm, Select, + Size, Split, SplitChars, SplitColumn, diff --git a/crates/nu-command/src/strings/mod.rs b/crates/nu-command/src/strings/mod.rs index d3df18f05a..bbb78a24d7 100644 --- a/crates/nu-command/src/strings/mod.rs +++ b/crates/nu-command/src/strings/mod.rs @@ -1,5 +1,7 @@ mod build_string; +mod size; mod split; pub use build_string::BuildString; +pub use size::Size; pub use split::*; diff --git a/crates/nu-command/src/strings/size.rs b/crates/nu-command/src/strings/size.rs new file mode 100644 index 0000000000..b1e411cb45 --- /dev/null +++ b/crates/nu-command/src/strings/size.rs @@ -0,0 +1,123 @@ +extern crate unicode_segmentation; + +use std::collections::HashMap; + +// use indexmap::indexmap; +use unicode_segmentation::UnicodeSegmentation; + +use nu_protocol::ast::Call; +use nu_protocol::engine::{Command, EvaluationContext}; +use nu_protocol::{ShellError, Signature, Span, Spanned, Type, Value}; + +pub struct Size; + +impl Command for Size { + fn name(&self) -> &str { + "size" + } + + fn signature(&self) -> Signature { + Signature::build("size") + } + + fn usage(&self) -> &str { + "Gather word count statistics on the text." + } + + fn run( + &self, + context: &EvaluationContext, + call: &Call, + input: Value, + ) -> Result { + size(context, call, input) + } + + // fn examples(&self) -> Vec { + // vec![ + // Example { + // description: "Count the number of words in a string", + // example: r#"echo "There are seven words in this sentence" | size"#, + // result: Some(vec![Value::row(indexmap! { + // "lines".to_string() => UntaggedValue::int(0).into(), + // "words".to_string() => UntaggedValue::int(7).into(), + // "chars".to_string() => UntaggedValue::int(38).into(), + // "bytes".to_string() => UntaggedValue::int(38).into(), + // }) + // .into()]), + // }, + // Example { + // description: "Counts Unicode characters correctly in a string", + // example: r#"echo "AmeĢlie Amelie" | size"#, + // result: Some(vec![UntaggedValue::row(indexmap! { + // "lines".to_string() => UntaggedValue::int(0).into(), + // "words".to_string() => UntaggedValue::int(2).into(), + // "chars".to_string() => UntaggedValue::int(13).into(), + // "bytes".to_string() => UntaggedValue::int(15).into(), + // }) + // .into()]), + // }, + // ] + // } +} + +fn size(_context: &EvaluationContext, call: &Call, input: Value) -> Result { + let span = call.head; + input.map(span, move |v| match v.as_string() { + Ok(s) => count(&s, span), + Err(_) => Value::Error { + error: ShellError::PipelineMismatch { + expected: Type::String, + expected_span: span, + origin: span, + }, + }, + }) +} + +fn count(contents: &str, span: Span) -> Value { + let mut lines: i64 = 0; + let mut words: i64 = 0; + let mut chars: i64 = 0; + let bytes = contents.len() as i64; + let mut end_of_word = true; + + for c in UnicodeSegmentation::graphemes(contents, true) { + chars += 1; + + match c { + "\n" => { + lines += 1; + end_of_word = true; + } + " " => end_of_word = true, + _ => { + if end_of_word { + words += 1; + } + end_of_word = false; + } + } + } + + let mut item: HashMap = HashMap::new(); + item.insert("lines".to_string(), Value::Int { val: lines, span }); + item.insert("words".to_string(), Value::Int { val: words, span }); + item.insert("chars".to_string(), Value::Int { val: chars, span }); + item.insert("bytes".to_string(), Value::Int { val: bytes, span }); + + Value::from(Spanned { item, span }) +} + +// #[cfg(test)] +// mod tests { +// use super::ShellError; +// use super::Size; + +// #[test] +// fn examples_work_as_expected() -> Result<(), ShellError> { +// use crate::examples::test as test_examples; + +// test_examples(Size {}) +// } +// } diff --git a/crates/nu-protocol/src/value/mod.rs b/crates/nu-protocol/src/value/mod.rs index 8eb7dd566f..e792e253fc 100644 --- a/crates/nu-protocol/src/value/mod.rs +++ b/crates/nu-protocol/src/value/mod.rs @@ -11,10 +11,11 @@ use serde::{Deserialize, Serialize}; pub use stream::*; pub use unit::*; +use std::collections::HashMap; use std::{cmp::Ordering, fmt::Debug}; use crate::ast::{CellPath, PathMember}; -use crate::{span, BlockId, Span, Type}; +use crate::{span, BlockId, Span, Spanned, Type}; use crate::ShellError; @@ -1032,6 +1033,23 @@ impl Value { } } +/// Create a Value::Record from a spanned hashmap +impl From>> for Value { + fn from(input: Spanned>) -> Self { + let span = input.span; + let (cols, vals) = input + .item + .into_iter() + .fold((vec![], vec![]), |mut acc, (k, v)| { + acc.0.push(k); + acc.1.push(v); + acc + }); + + Value::Record { cols, vals, span } + } +} + /// Format a duration in nanoseconds into a string pub fn format_duration(duration: i64) -> String { let (sign, duration) = if duration >= 0 {