diff --git a/crates/nu-cli/src/cli.rs b/crates/nu-cli/src/cli.rs index 05add78bc5..5136e875b0 100644 --- a/crates/nu-cli/src/cli.rs +++ b/crates/nu-cli/src/cli.rs @@ -49,8 +49,9 @@ fn load_plugin(path: &std::path::Path, context: &mut Context) -> Result<(), Shel let mut input = String::new(); let result = match reader.read_line(&mut input) { Ok(count) => { - trace!("processing response ({} bytes)", count); - trace!("response: {}", input); + trace!(target: "nu::load", "plugin infrastructure -> config response"); + trace!(target: "nu::load", "plugin infrastructure -> processing response ({} bytes)", count); + trace!(target: "nu::load", "plugin infrastructure -> response: {}", input); let response = serde_json::from_str::>>(&input); match response { @@ -58,13 +59,13 @@ fn load_plugin(path: &std::path::Path, context: &mut Context) -> Result<(), Shel Ok(params) => { let fname = path.to_string_lossy(); - trace!("processing {:?}", params); + trace!(target: "nu::load", "plugin infrastructure -> processing {:?}", params); let name = params.name.clone(); let fname = fname.to_string(); if context.get_command(&name).is_some() { - trace!("plugin {:?} already loaded.", &name); + trace!(target: "nu::load", "plugin infrastructure -> {:?} already loaded.", &name); } else if params.is_filter { context.add_commands(vec![whole_stream_command(PluginCommand::new( name, fname, params, @@ -79,7 +80,7 @@ fn load_plugin(path: &std::path::Path, context: &mut Context) -> Result<(), Shel Err(e) => Err(e), }, Err(e) => { - trace!("incompatible plugin {:?}", input); + trace!(target: "nu::load", "plugin infrastructure -> incompatible {:?}", input); Err(ShellError::untagged_runtime_error(format!( "Error: {:?}", e @@ -188,7 +189,7 @@ pub fn load_plugins(context: &mut Context) -> Result<(), ShellError> { }; if is_valid_name && is_executable { - trace!("Trying {:?}", path.display()); + trace!(target: "nu::load", "plugin infrastructure -> Trying {:?}", path.display()); // we are ok if this plugin load fails let _ = load_plugin(&path, &mut context.clone()); diff --git a/crates/nu-cli/src/commands/group_by.rs b/crates/nu-cli/src/commands/group_by.rs index 4b63ed8233..c762f05a3a 100644 --- a/crates/nu-cli/src/commands/group_by.rs +++ b/crates/nu-cli/src/commands/group_by.rs @@ -4,6 +4,7 @@ use indexmap::indexmap; use nu_errors::ShellError; use nu_protocol::{ReturnSuccess, Signature, SyntaxShape, UntaggedValue, Value}; use nu_source::Tagged; +use nu_value_ext::as_string; pub struct GroupBy; @@ -71,6 +72,10 @@ impl WholeStreamCommand for GroupBy { } } +enum Grouper { + ByColumn(Option>), +} + pub async fn group_by( args: CommandArgs, registry: &CommandRegistry, @@ -81,30 +86,84 @@ pub async fn group_by( let values: Vec = input.collect().await; if values.is_empty() { - Err(ShellError::labeled_error( + return Err(ShellError::labeled_error( "Expected table from pipeline", "requires a table input", name, - )) + )); + } + + let values = UntaggedValue::table(&values).into_value(&name); + + match group(&column_name, &values, name) { + Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))), + Err(reason) => Err(reason), + } +} + +pub fn suggestions(tried: Tagged<&str>, for_value: &Value) -> ShellError { + let possibilities = for_value.data_descriptors(); + + let mut possible_matches: Vec<_> = possibilities + .iter() + .map(|x| (natural::distance::levenshtein_distance(x, &tried), x)) + .collect(); + + possible_matches.sort(); + + if !possible_matches.is_empty() { + ShellError::labeled_error( + "Unknown column", + format!("did you mean '{}'?", possible_matches[0].1), + tried.tag(), + ) } else { - match crate::utils::data::group(column_name, &values, None, &name) { - Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))), - Err(err) => Err(err), - } + ShellError::labeled_error( + "Unknown column", + "row does not contain this column", + tried.tag(), + ) } } pub fn group( - column_name: &Tagged, - values: Vec, + column_name: &Option>, + values: &Value, tag: impl Into, ) -> Result { - crate::utils::data::group(Some(column_name.clone()), &values, None, tag) + let name = tag.into(); + + let grouper = if let Some(column_name) = column_name { + Grouper::ByColumn(Some(column_name.clone())) + } else { + Grouper::ByColumn(None) + }; + + match grouper { + Grouper::ByColumn(Some(column_name)) => { + let block = Box::new(move |row: &Value| { + match row.get_data_by_key(column_name.borrow_spanned()) { + Some(group_key) => Ok(as_string(&group_key)?), + None => Err(suggestions(column_name.borrow_tagged(), &row)), + } + }); + + crate::utils::data::group(&values, &Some(block), &name) + } + Grouper::ByColumn(None) => { + let block = Box::new(move |row: &Value| match as_string(row) { + Ok(group_key) => Ok(group_key), + Err(reason) => Err(reason), + }); + + crate::utils::data::group(&values, &Some(block), &name) + } + } } #[cfg(test)] mod tests { - use crate::commands::group_by::group; + use super::group; use indexmap::IndexMap; use nu_errors::ShellError; use nu_protocol::{UntaggedValue, Value}; @@ -122,7 +181,7 @@ mod tests { UntaggedValue::table(list).into_untagged_value() } - fn nu_releases_commiters() -> Vec { + fn nu_releases_committers() -> Vec { vec![ row( indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}, @@ -156,10 +215,11 @@ mod tests { #[test] fn groups_table_by_date_column() -> Result<(), ShellError> { - let for_key = String::from("date").tagged_unknown(); + let for_key = Some(String::from("date").tagged_unknown()); + let sample = table(&nu_releases_committers()); assert_eq!( - group(&for_key, nu_releases_commiters(), Tag::unknown())?, + group(&for_key, &sample, Tag::unknown())?, row(indexmap! { "August 23-2019".into() => table(&[ row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}), @@ -184,10 +244,11 @@ mod tests { #[test] fn groups_table_by_country_column() -> Result<(), ShellError> { - let for_key = String::from("country").tagged_unknown(); + let for_key = Some(String::from("country").tagged_unknown()); + let sample = table(&nu_releases_committers()); assert_eq!( - group(&for_key, nu_releases_commiters(), Tag::unknown())?, + group(&for_key, &sample, Tag::unknown())?, row(indexmap! { "EC".into() => table(&[ row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}), diff --git a/crates/nu-cli/src/commands/group_by_date.rs b/crates/nu-cli/src/commands/group_by_date.rs index f361deb207..b4c0391d86 100644 --- a/crates/nu-cli/src/commands/group_by_date.rs +++ b/crates/nu-cli/src/commands/group_by_date.rs @@ -1,7 +1,7 @@ use crate::commands::WholeStreamCommand; use crate::prelude::*; use nu_errors::ShellError; -use nu_protocol::{ReturnSuccess, Signature, SyntaxShape, Value}; +use nu_protocol::{ReturnSuccess, Signature, SyntaxShape, UntaggedValue, Value}; use nu_source::Tagged; pub struct GroupByDate; @@ -55,7 +55,11 @@ impl WholeStreamCommand for GroupByDate { } enum Grouper { - ByDate(Option), + ByDate(Option>), +} + +enum GroupByColumn { + Name(Option>), } pub async fn group_by_date( @@ -80,31 +84,63 @@ pub async fn group_by_date( name, )) } else { - let grouper = if let Some(Tagged { item: fmt, tag: _ }) = format { - Grouper::ByDate(Some(fmt)) + let values = UntaggedValue::table(&values).into_value(&name); + + let grouper_column = if let Some(column_name) = column_name { + GroupByColumn::Name(Some(column_name)) + } else { + GroupByColumn::Name(None) + }; + + let grouper_date = if let Some(date_format) = format { + Grouper::ByDate(Some(date_format)) } else { Grouper::ByDate(None) }; - match grouper { - Grouper::ByDate(None) => { - match crate::utils::data::group( - column_name, - &values, - Some(Box::new(|row: &Value| row.format("%Y-%b-%d"))), - &name, - ) { + match (grouper_date, grouper_column) { + (Grouper::ByDate(None), GroupByColumn::Name(None)) => { + let block = Box::new(move |row: &Value| row.format("%Y-%b-%d")); + + match crate::utils::data::group(&values, &Some(block), &name) { Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))), Err(err) => Err(err), } } - Grouper::ByDate(Some(fmt)) => { - match crate::utils::data::group( - column_name, - &values, - Some(Box::new(move |row: &Value| row.format(&fmt))), - &name, - ) { + (Grouper::ByDate(None), GroupByColumn::Name(Some(column_name))) => { + let block = Box::new(move |row: &Value| { + let group_key = match row.get_data_by_key(column_name.borrow_spanned()) { + Some(group_key) => Ok(group_key), + None => Err(suggestions(column_name.borrow_tagged(), &row)), + }; + + group_key?.format("%Y-%b-%d") + }); + + match crate::utils::data::group(&values, &Some(block), &name) { + Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))), + Err(err) => Err(err), + } + } + (Grouper::ByDate(Some(fmt)), GroupByColumn::Name(None)) => { + let block = Box::new(move |row: &Value| row.format(&fmt)); + + match crate::utils::data::group(&values, &Some(block), &name) { + Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))), + Err(err) => Err(err), + } + } + (Grouper::ByDate(Some(fmt)), GroupByColumn::Name(Some(column_name))) => { + let block = Box::new(move |row: &Value| { + let group_key = match row.get_data_by_key(column_name.borrow_spanned()) { + Some(group_key) => Ok(group_key), + None => Err(suggestions(column_name.borrow_tagged(), &row)), + }; + + group_key?.format(&fmt) + }); + + match crate::utils::data::group(&values, &Some(block), &name) { Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))), Err(err) => Err(err), } @@ -113,6 +149,31 @@ pub async fn group_by_date( } } +pub fn suggestions(tried: Tagged<&str>, for_value: &Value) -> ShellError { + let possibilities = for_value.data_descriptors(); + + let mut possible_matches: Vec<_> = possibilities + .iter() + .map(|x| (natural::distance::levenshtein_distance(x, &tried), x)) + .collect(); + + possible_matches.sort(); + + if !possible_matches.is_empty() { + ShellError::labeled_error( + "Unknown column", + format!("did you mean '{}'?", possible_matches[0].1), + tried.tag(), + ) + } else { + ShellError::labeled_error( + "Unknown column", + "row does not contain this column", + tried.tag(), + ) + } +} + #[cfg(test)] mod tests { use super::GroupByDate; diff --git a/crates/nu-cli/src/commands/histogram.rs b/crates/nu-cli/src/commands/histogram.rs index 8a8c3b70ad..1e6fb4244e 100644 --- a/crates/nu-cli/src/commands/histogram.rs +++ b/crates/nu-cli/src/commands/histogram.rs @@ -76,14 +76,14 @@ pub async fn histogram( ) -> Result { let registry = registry.clone(); let name = args.call_info.name_tag.clone(); + let (HistogramArgs { column_name, rest }, input) = args.process(®istry).await?; let values: Vec = input.collect().await; + let values = UntaggedValue::table(&values).into_value(&name); - let Tagged { item: group_by, .. } = column_name.clone(); - - let groups = group(&column_name, values, &name)?; - let group_labels = columns_sorted(Some(group_by.clone()), &groups, &name); - let sorted = t_sort(Some(group_by), None, &groups, &name)?; + let groups = group(&Some(column_name.clone()), &values, &name)?; + let group_labels = columns_sorted(Some(column_name.clone()), &groups, &name); + let sorted = t_sort(Some(column_name.clone()), None, &groups, &name)?; let evaled = evaluate(&sorted, None, &name)?; let reduced = reduce(&evaled, None, &name)?; let maxima = map_max(&reduced, None, &name)?; diff --git a/crates/nu-cli/src/commands/split_by.rs b/crates/nu-cli/src/commands/split_by.rs index 6a35a178c9..ba44d32cdc 100644 --- a/crates/nu-cli/src/commands/split_by.rs +++ b/crates/nu-cli/src/commands/split_by.rs @@ -1,16 +1,15 @@ use crate::commands::WholeStreamCommand; use crate::prelude::*; use nu_errors::ShellError; -use nu_protocol::{ - Signature, SpannedTypeName, SyntaxShape, TaggedDictBuilder, UntaggedValue, Value, -}; +use nu_protocol::{ReturnSuccess, Signature, SyntaxShape, Value}; use nu_source::Tagged; +use nu_value_ext::as_string; pub struct SplitBy; #[derive(Deserialize)] pub struct SplitByArgs { - column_name: Tagged, + column_name: Option>, } #[async_trait] @@ -20,7 +19,7 @@ impl WholeStreamCommand for SplitBy { } fn signature(&self) -> Signature { - Signature::build("split-by").required( + Signature::build("split-by").optional( "column_name", SyntaxShape::String, "the name of the column within the nested table to split by", @@ -53,108 +52,84 @@ pub async fn split_by( return Err(ShellError::labeled_error( "Expected table from pipeline", "requires a table input", - column_name.span(), + name, )); } - match split(&column_name, &values[0], name) { - Ok(split) => Ok(OutputStream::one(split)), + match split(&column_name, &values[0], &name) { + Ok(splits) => Ok(OutputStream::one(ReturnSuccess::value(splits))), Err(err) => Err(err), } } +enum Grouper { + ByColumn(Option>), +} + pub fn split( - column_name: &Tagged, - value: &Value, + column_name: &Option>, + values: &Value, tag: impl Into, ) -> Result { - let origin_tag = tag.into(); + let name = tag.into(); - let mut splits = indexmap::IndexMap::new(); + let grouper = if let Some(column_name) = column_name { + Grouper::ByColumn(Some(column_name.clone())) + } else { + Grouper::ByColumn(None) + }; - match value { - Value { - value: UntaggedValue::Row(group_sets), - .. - } => { - for (group_key, group_value) in group_sets.entries.iter() { - match *group_value { - Value { - value: UntaggedValue::Table(ref dataset), - .. - } => { - let group = crate::commands::group_by::group( - &column_name, - dataset.to_vec(), - &origin_tag, - )?; - - match group { - Value { - value: UntaggedValue::Row(o), - .. - } => { - for (split_label, subset) in o.entries.into_iter() { - match subset { - Value { - value: UntaggedValue::Table(subset), - tag, - } => { - let s = splits - .entry(split_label.clone()) - .or_insert(indexmap::IndexMap::new()); - s.insert( - group_key.clone(), - UntaggedValue::table(&subset).into_value(tag), - ); - } - other => { - return Err(ShellError::type_error( - "a table value", - other.spanned_type_name(), - )) - } - } - } - } - _ => { - return Err(ShellError::type_error( - "a table value", - group.spanned_type_name(), - )) - } - } - } - ref other => { - return Err(ShellError::type_error( - "a table value", - other.spanned_type_name(), - )) - } + match grouper { + Grouper::ByColumn(Some(column_name)) => { + let block = Box::new(move |row: &Value| { + match row.get_data_by_key(column_name.borrow_spanned()) { + Some(group_key) => Ok(as_string(&group_key)?), + None => Err(suggestions(column_name.borrow_tagged(), &row)), } - } + }); + + crate::utils::data::split(&values, &Some(block), &name) } - _ => { - return Err(ShellError::type_error( - "a table value", - value.spanned_type_name(), - )) + Grouper::ByColumn(None) => { + let block = Box::new(move |row: &Value| match as_string(row) { + Ok(group_key) => Ok(group_key), + Err(reason) => Err(reason), + }); + + crate::utils::data::split(&values, &Some(block), &name) } } - - let mut out = TaggedDictBuilder::new(&origin_tag); - - for (k, v) in splits.into_iter() { - out.insert_untagged(k, UntaggedValue::row(v)); - } - - Ok(out.into_value()) } + +pub fn suggestions(tried: Tagged<&str>, for_value: &Value) -> ShellError { + let possibilities = for_value.data_descriptors(); + + let mut possible_matches: Vec<_> = possibilities + .iter() + .map(|x| (natural::distance::levenshtein_distance(x, &tried), x)) + .collect(); + + possible_matches.sort(); + + if !possible_matches.is_empty() { + return ShellError::labeled_error( + "Unknown column", + format!("did you mean '{}'?", possible_matches[0].1), + tried.tag(), + ); + } else { + return ShellError::labeled_error( + "Unknown column", + "row does not contain this column", + tried.tag(), + ); + } +} + #[cfg(test)] mod tests { - + use super::split; use crate::commands::group_by::group; - use crate::commands::split_by::split; use indexmap::IndexMap; use nu_errors::ShellError; use nu_protocol::{UntaggedValue, Value}; @@ -173,11 +148,12 @@ mod tests { } fn nu_releases_grouped_by_date() -> Result { - let key = String::from("date").tagged_unknown(); - group(&key, nu_releases_commiters(), Tag::unknown()) + let key = Some(String::from("date").tagged_unknown()); + let sample = table(&nu_releases_committers()); + group(&key, &sample, Tag::unknown()) } - fn nu_releases_commiters() -> Vec { + fn nu_releases_committers() -> Vec { vec![ row( indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}, @@ -211,7 +187,7 @@ mod tests { #[test] fn splits_inner_tables_by_key() -> Result<(), ShellError> { - let for_key = String::from("country").tagged_unknown(); + let for_key = Some(String::from("country").tagged_unknown()); assert_eq!( split(&for_key, &nu_releases_grouped_by_date()?, Tag::unknown())?, @@ -257,7 +233,7 @@ mod tests { #[test] fn errors_if_key_within_some_inner_table_is_missing() { - let for_key = String::from("country").tagged_unknown(); + let for_key = Some(String::from("country").tagged_unknown()); let nu_releases = row(indexmap! { "August 23-2019".into() => table(&[ diff --git a/crates/nu-cli/src/commands/t_sort_by.rs b/crates/nu-cli/src/commands/t_sort_by.rs index 98d76ec27a..a1aac2e91e 100644 --- a/crates/nu-cli/src/commands/t_sort_by.rs +++ b/crates/nu-cli/src/commands/t_sort_by.rs @@ -78,7 +78,7 @@ async fn t_sort_by( let values: Vec = input.collect().await; let column_grouped_by_name = if let Some(grouped_by) = group_by { - Some(grouped_by.item().clone()) + Some(grouped_by) } else { None }; diff --git a/crates/nu-cli/src/utils/data/group.rs b/crates/nu-cli/src/utils/data/group.rs index e7b6161e05..e1f0dd7859 100644 --- a/crates/nu-cli/src/utils/data/group.rs +++ b/crates/nu-cli/src/utils/data/group.rs @@ -1,61 +1,28 @@ use indexmap::IndexMap; use nu_errors::ShellError; use nu_protocol::{TaggedDictBuilder, UntaggedValue, Value}; -use nu_source::{Tag, Tagged, TaggedItem}; -use nu_value_ext::{as_string, get_data_by_key}; +use nu_source::Tag; +use nu_value_ext::as_string; #[allow(clippy::type_complexity)] pub fn group( - column_name: Option>, - values: &[Value], - grouper: Option Result + Send>>, + values: &Value, + grouper: &Option Result + Send>>, tag: impl Into, ) -> Result { let tag = tag.into(); let mut groups: IndexMap> = IndexMap::new(); - for value in values { - let group_key = if let Some(ref column_name) = column_name { - get_data_by_key(&value, column_name.borrow_spanned()) + for value in values.table_entries() { + let group_key = if let Some(ref grouper) = grouper { + grouper(&value) } else { - Some(value.clone()) + as_string(&value) }; - if let Some(group_key) = group_key { - let group_key = if let Some(ref grouper) = grouper { - grouper(&group_key) - } else { - as_string(&group_key) - }; - let group = groups.entry(group_key?).or_insert(vec![]); - group.push((*value).clone()); - } else { - let column_name = column_name.unwrap_or_else(|| String::from("").tagged(&tag)); - - let possibilities = value.data_descriptors(); - - let mut possible_matches: Vec<_> = possibilities - .iter() - .map(|x| (natural::distance::levenshtein_distance(x, &column_name), x)) - .collect(); - - possible_matches.sort(); - - if !possible_matches.is_empty() { - return Err(ShellError::labeled_error( - "Unknown column", - format!("did you mean '{}'?", possible_matches[0].1), - column_name.tag(), - )); - } else { - return Err(ShellError::labeled_error( - "Unknown column", - "row does not contain this column", - column_name.tag(), - )); - } - } + let group = groups.entry(group_key?).or_insert(vec![]); + group.push((*value).clone()); } let mut out = TaggedDictBuilder::new(&tag); diff --git a/crates/nu-cli/src/utils/data/mod.rs b/crates/nu-cli/src/utils/data/mod.rs index f90d93bdbb..8e98210695 100644 --- a/crates/nu-cli/src/utils/data/mod.rs +++ b/crates/nu-cli/src/utils/data/mod.rs @@ -1,3 +1,5 @@ pub mod group; +pub mod split; pub use crate::utils::data::group::group; +pub use crate::utils::data::split::split; diff --git a/crates/nu-cli/src/utils/data_processing.rs b/crates/nu-cli/src/utils/data_processing.rs index 82966903cf..b17f714326 100644 --- a/crates/nu-cli/src/utils/data_processing.rs +++ b/crates/nu-cli/src/utils/data_processing.rs @@ -12,7 +12,7 @@ use num_traits::Zero; const ERR_EMPTY_DATA: &str = "Cannot perform aggregate math operation on empty data"; pub fn columns_sorted( - _group_by_name: Option, + _group_by_name: Option>, value: &Value, tag: impl Into, ) -> Vec> { @@ -61,7 +61,7 @@ pub fn columns_sorted( } pub fn t_sort( - group_by_name: Option, + group_by_name: Option>, split_by_name: Option, value: &Value, tag: impl Into, @@ -454,12 +454,13 @@ mod tests { } fn nu_releases_grouped_by_date() -> Result { - let key = String::from("date").tagged_unknown(); - group(&key, nu_releases_commiters(), Tag::unknown()) + let key = Some(String::from("date").tagged_unknown()); + let sample = table(&nu_releases_committers()); + group(&key, &sample, Tag::unknown()) } fn nu_releases_sorted_by_date() -> Result { - let key = String::from("date"); + let key = String::from("date").tagged(Tag::unknown()); t_sort( Some(key), @@ -481,7 +482,7 @@ mod tests { ) } - fn nu_releases_commiters() -> Vec { + fn nu_releases_committers() -> Vec { vec![ row( indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}, @@ -515,7 +516,7 @@ mod tests { #[test] fn show_columns_sorted_given_a_column_to_sort_by() -> Result<(), ShellError> { - let by_column = String::from("date"); + let by_column = String::from("date").tagged(Tag::unknown()); assert_eq!( columns_sorted( @@ -535,7 +536,7 @@ mod tests { #[test] fn sorts_the_tables() -> Result<(), ShellError> { - let group_by = String::from("date"); + let group_by = String::from("date").tagged(Tag::unknown()); assert_eq!( t_sort( diff --git a/crates/nu-protocol/src/value.rs b/crates/nu-protocol/src/value.rs index acef9d86cf..c2315e2e8e 100644 --- a/crates/nu-protocol/src/value.rs +++ b/crates/nu-protocol/src/value.rs @@ -91,6 +91,14 @@ impl UntaggedValue { } } + /// Returns true if this value represents a table + pub fn is_table(&self) -> bool { + match self { + UntaggedValue::Table(_) => true, + _ => false, + } + } + /// Returns true if the value represents something other than Nothing pub fn is_some(&self) -> bool { !self.is_none()