From 21f48577aeb4bc483b5959788376c80e97c63475 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20N=2E=20Robalino?= Date: Sun, 3 Nov 2019 20:55:34 -0500 Subject: [PATCH 1/3] Reductions placeholder. --- src/cli.rs | 11 ++++- src/commands.rs | 16 ++++-- src/commands/reduce_by.rs | 100 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 121 insertions(+), 6 deletions(-) create mode 100644 src/commands/reduce_by.rs diff --git a/src/cli.rs b/src/cli.rs index 483ee332ea..5af74b132b 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -323,10 +323,17 @@ pub async fn cli() -> Result<(), Box> { whole_stream_command(Table), whole_stream_command(Version), whole_stream_command(Which), - #[cfg(data_processing_primitives)] - whole_stream_command(SplitBy), ]); + cfg_if::cfg_if! { + if #[cfg(data_processing_primitives)] { + context.add_commands(vec![ + whole_stream_command(SplitBy), + whole_stream_command(ReduceBy), + ]); + } + } + #[cfg(feature = "clipboard")] { context.add_commands(vec![whole_stream_command( diff --git a/src/commands.rs b/src/commands.rs index 7c2c188629..73a4b7244a 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -58,8 +58,12 @@ pub(crate) mod size; pub(crate) mod skip_while; pub(crate) mod sort_by; -#[cfg(data_processing_primitives)] -pub(crate) mod split_by; +cfg_if::cfg_if! { + if #[cfg(data_processing_primitives)] { + pub(crate) mod split_by; + pub(crate) mod reduce_by; + } +} pub(crate) mod split_column; pub(crate) mod split_row; @@ -138,8 +142,12 @@ pub(crate) use size::Size; pub(crate) use skip_while::SkipWhile; pub(crate) use sort_by::SortBy; -#[cfg(data_processing_primitives)] -pub(crate) use split_by::SplitBy; +cfg_if::cfg_if! { + if #[cfg(data_processing_primitives)] { + pub(crate) use split_by::SplitBy; + pub(crate) use reduce_by::ReduceBy; + } +} pub(crate) use split_column::SplitColumn; pub(crate) use split_row::SplitRow; diff --git a/src/commands/reduce_by.rs b/src/commands/reduce_by.rs new file mode 100644 index 0000000000..de64caac15 --- /dev/null +++ b/src/commands/reduce_by.rs @@ -0,0 +1,100 @@ +use crate::commands::WholeStreamCommand; +use crate::data::TaggedDictBuilder; +use crate::parser::hir::SyntaxShape; +use crate::parser::registry; +use crate::data::base::Block; +use crate::prelude::*; + +use log::trace; + +pub struct ReduceBy; + +#[derive(Deserialize)] +pub struct ReduceByArgs { + calculator: Block, +} + +impl WholeStreamCommand for ReduceBy { + fn name(&self) -> &str { + "reduce-by" + } + + fn signature(&self) -> Signature { + Signature::build("reduce-by").required( + "calculator", + SyntaxShape::Block, + "The block used for calculating values", + ) + } + + fn usage(&self) -> &str { + "Crates a new table with the data from the table rows reduced by the block given." + } + + fn run( + &self, + args: CommandArgs, + registry: &CommandRegistry, + ) -> Result { + args.process(registry, reduce_by)?.run() + } +} + +pub fn reduce_by( + ReduceByArgs { calculator }: ReduceByArgs, + RunnableContext { input, name, .. }: RunnableContext, +) -> Result { + let stream = async_stream! { + let values: Vec> = input.values.collect().await; + + trace!("{:?}", &calculator); + + if values.is_empty() { + yield Err(ShellError::labeled_error( + "Expected table from pipeline", + "requires a table input", + name + )) + } else { + match reduce(values, &calculator, name) { + Ok(reduced) => yield ReturnSuccess::value(reduced), + Err(err) => yield Err(err) + } + } + }; + + Ok(stream.to_output_stream()) +} + +pub fn reduce( + values: Vec>, + calculator: &Block, + tag: impl Into, +) -> Result, ShellError> { + let tag = tag.into(); + + let mut out = TaggedDictBuilder::new(&tag); + + Ok(out.into_tagged_value()) +} + +#[cfg(test)] +mod tests { + + use crate::commands::reduce_by::reduce; + use crate::data::meta::*; + use crate::Value; + use indexmap::IndexMap; + + fn string(input: impl Into) -> Tagged { + Value::string(input.into()).tagged_unknown() + } + + fn row(entries: IndexMap>) -> Tagged { + Value::row(entries).tagged_unknown() + } + + fn table(list: &Vec>) -> Tagged { + Value::table(list).tagged_unknown() + } +} From 3163b0d362ca771e102ff09843f0cdd3f69b8d4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20N=2E=20Robalino?= Date: Tue, 12 Nov 2019 02:07:43 -0500 Subject: [PATCH 2/3] Data processing mvp histogram. --- features.toml | 2 +- src/cli.rs | 4 + src/commands.rs | 8 + src/commands/evaluate_by.rs | 260 ++++++++++++++++++++++++++ src/commands/group_by.rs | 42 ++++- src/commands/histogram.rs | 148 +++++++++++++++ src/commands/map_max_by.rs | 227 +++++++++++++++++++++++ src/commands/reduce_by.rs | 199 +++++++++++++++++--- src/commands/split_by.rs | 68 ++++--- src/commands/t_sort_by.rs | 358 ++++++++++++++++++++++++++++++++++++ src/data/base.rs | 1 + src/data/dict.rs | 2 +- 12 files changed, 1262 insertions(+), 57 deletions(-) create mode 100644 src/commands/evaluate_by.rs create mode 100644 src/commands/histogram.rs create mode 100644 src/commands/map_max_by.rs create mode 100644 src/commands/t_sort_by.rs diff --git a/features.toml b/features.toml index e1cf56e33d..6dd7a26c36 100644 --- a/features.toml +++ b/features.toml @@ -18,4 +18,4 @@ description = "Groundwork so tables can be data processed" reason = """ These will allow take tables and be able to transform, process, and explore. """ -enabled = false \ No newline at end of file +enabled = false diff --git a/src/cli.rs b/src/cli.rs index 5af74b132b..c6995ef711 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -330,6 +330,10 @@ pub async fn cli() -> Result<(), Box> { context.add_commands(vec![ whole_stream_command(SplitBy), whole_stream_command(ReduceBy), + whole_stream_command(EvaluateBy), + whole_stream_command(TSortBy), + whole_stream_command(MapMaxBy), + whole_stream_command(Histogram), ]); } } diff --git a/src/commands.rs b/src/commands.rs index 73a4b7244a..629289b565 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -62,6 +62,10 @@ cfg_if::cfg_if! { if #[cfg(data_processing_primitives)] { pub(crate) mod split_by; pub(crate) mod reduce_by; + pub(crate) mod evaluate_by; + pub(crate) mod t_sort_by; + pub(crate) mod map_max_by; + pub(crate) mod histogram; } } @@ -146,6 +150,10 @@ cfg_if::cfg_if! { if #[cfg(data_processing_primitives)] { pub(crate) use split_by::SplitBy; pub(crate) use reduce_by::ReduceBy; + pub(crate) use evaluate_by::EvaluateBy; + pub(crate) use t_sort_by::TSortBy; + pub(crate) use map_max_by::MapMaxBy; + pub(crate) use histogram::Histogram; } } diff --git a/src/commands/evaluate_by.rs b/src/commands/evaluate_by.rs new file mode 100644 index 0000000000..f4925917c4 --- /dev/null +++ b/src/commands/evaluate_by.rs @@ -0,0 +1,260 @@ +use crate::commands::WholeStreamCommand; +use crate::parser::hir::SyntaxShape; +use crate::prelude::*; +pub struct EvaluateBy; + +#[derive(Deserialize)] +pub struct EvaluateByArgs { + evaluate_with: Option>, +} + +impl WholeStreamCommand for EvaluateBy { + fn name(&self) -> &str { + "evaluate-by" + } + + fn signature(&self) -> Signature { + Signature::build("evaluate-by").named( + "evaluate_with", + SyntaxShape::String, + "the name of the column to evaluate by", + ) + } + + fn usage(&self) -> &str { + "Creates a new table with the data from the tables rows evaluated by the column given." + } + + fn run( + &self, + args: CommandArgs, + registry: &CommandRegistry, + ) -> Result { + args.process(registry, evaluate_by)?.run() + } +} + +pub fn evaluate_by( + EvaluateByArgs { evaluate_with }: EvaluateByArgs, + RunnableContext { input, name, .. }: RunnableContext, +) -> Result { + let stream = async_stream! { + let values: Vec> = input.values.collect().await; + + + if values.is_empty() { + yield Err(ShellError::labeled_error( + "Expected table from pipeline", + "requires a table input", + name + )) + } else { + + let evaluate_with = if let Some(evaluator) = evaluate_with { + Some(evaluator.item().clone()) + } else { + None + }; + + match evaluate(&values[0], evaluate_with, name) { + Ok(evaluated) => yield ReturnSuccess::value(evaluated), + Err(err) => yield Err(err) + } + } + }; + + Ok(stream.to_output_stream()) +} + +fn fetch( + key: Option, +) -> Box, Tag) -> Option> + 'static> { + Box::new(move |value: Tagged, tag| match key { + Some(ref key_given) => { + if let Some(Tagged { item, .. }) = value.get_data_by_key(&key_given) { + Some(item.clone().tagged(tag)) + } else { + None + } + } + None => Some(Value::int(1).tagged(tag)), + }) +} + +pub fn evaluate( + values: &Tagged, + evaluator: Option, + tag: impl Into, +) -> Result, ShellError> { + let tag = tag.into(); + + let evaluate_with = match evaluator { + Some(keyfn) => fetch(Some(keyfn)), + None => fetch(None), + }; + + let results: Tagged = match values { + Tagged { + item: Value::Table(datasets), + .. + } => { + let datasets: Vec<_> = datasets + .into_iter() + .map(|subsets| match subsets { + Tagged { + item: Value::Table(subsets), + .. + } => { + let subsets: Vec<_> = subsets + .clone() + .into_iter() + .map(|data| match data { + Tagged { + item: Value::Table(data), + .. + } => { + let data: Vec<_> = data + .into_iter() + .map(|x| evaluate_with(x.clone(), tag.clone()).unwrap()) + .collect(); + Value::Table(data).tagged(&tag) + } + _ => Value::Table(vec![]).tagged(&tag), + }) + .collect(); + Value::Table(subsets).tagged(&tag) + } + _ => Value::Table(vec![]).tagged(&tag), + }) + .collect(); + + Value::Table(datasets.clone()).tagged(&tag) + } + _ => Value::Table(vec![]).tagged(&tag), + }; + + Ok(results) +} + +#[cfg(test)] +mod tests { + + use crate::commands::evaluate_by::{evaluate, fetch}; + use crate::commands::group_by::group; + use crate::commands::t_sort_by::t_sort; + use crate::data::meta::*; + use crate::prelude::*; + use crate::Value; + use indexmap::IndexMap; + + fn int(s: impl Into) -> Tagged { + Value::int(s).tagged_unknown() + } + + fn string(input: impl Into) -> Tagged { + Value::string(input.into()).tagged_unknown() + } + + fn row(entries: IndexMap>) -> Tagged { + Value::row(entries).tagged_unknown() + } + + fn table(list: &Vec>) -> Tagged { + Value::table(list).tagged_unknown() + } + + fn nu_releases_sorted_by_date() -> Tagged { + let key = String::from("date"); + + t_sort( + Some(key), + None, + &nu_releases_grouped_by_date(), + Tag::unknown(), + ) + .unwrap() + } + + fn nu_releases_grouped_by_date() -> Tagged { + let key = String::from("date").tagged_unknown(); + group(&key, nu_releases_commiters(), Tag::unknown()).unwrap() + } + + fn nu_releases_commiters() -> Vec> { + vec![ + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}, + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")}, + ), + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")}, + ), + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")}, + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")}, + ), + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")}, + ), + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")}, + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")}, + ), + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")}, + ), + ] + } + + #[test] + fn evaluator_fetches_by_column_if_supplied_a_column_name() { + let subject = row(indexmap! { "name".into() => string("andres") }); + + let evaluator = fetch(Some(String::from("name"))); + + assert_eq!(evaluator(subject, Tag::unknown()), Some(string("andres"))); + } + + #[test] + fn evaluator_returns_1_if_no_column_name_given() { + let subject = row(indexmap! { "name".into() => string("andres") }); + let evaluator = fetch(None); + + assert_eq!( + evaluator(subject, Tag::unknown()), + Some(Value::int(1).tagged_unknown()) + ); + } + + #[test] + fn evaluates_the_tables() { + assert_eq!( + evaluate(&nu_releases_sorted_by_date(), None, Tag::unknown()).unwrap(), + table(&vec![table(&vec![ + table(&vec![int(1), int(1), int(1)]), + table(&vec![int(1), int(1), int(1)]), + table(&vec![int(1), int(1), int(1)]), + ]),]) + ); + } + + #[test] + fn evaluates_the_tables_with_custom_evaluator() { + let eval = String::from("name"); + + assert_eq!( + evaluate(&nu_releases_sorted_by_date(), Some(eval), Tag::unknown()).unwrap(), + table(&vec![table(&vec![ + table(&vec![string("AR"), string("JT"), string("YK")]), + table(&vec![string("AR"), string("YK"), string("JT")]), + table(&vec![string("YK"), string("JT"), string("AR")]), + ]),]) + ); + } +} diff --git a/src/commands/group_by.rs b/src/commands/group_by.rs index 66c1360f5d..07e74841b1 100644 --- a/src/commands/group_by.rs +++ b/src/commands/group_by.rs @@ -131,11 +131,8 @@ mod tests { Value::table(list).tagged_unknown() } - #[test] - fn groups_table_by_key() { - let for_key = String::from("date").tagged_unknown(); - - let nu_releases = vec![ + fn nu_releases_commiters() -> Vec> { + vec![ row( indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}, ), @@ -163,10 +160,15 @@ mod tests { row( indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")}, ), - ]; + ] + } + + #[test] + fn groups_table_by_date_column() { + let for_key = String::from("date").tagged_unknown(); assert_eq!( - group(&for_key, nu_releases, Tag::unknown()).unwrap(), + group(&for_key, nu_releases_commiters(), Tag::unknown()).unwrap(), row(indexmap! { "August 23-2019".into() => table(&vec![ row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}), @@ -186,4 +188,30 @@ mod tests { }) ); } + + #[test] + fn groups_table_by_country_column() { + let for_key = String::from("country").tagged_unknown(); + + assert_eq!( + group(&for_key, nu_releases_commiters(), Tag::unknown()).unwrap(), + row(indexmap! { + "EC".into() => table(&vec![ + row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}), + row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("Sept 24-2019")}), + row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")}) + ]), + "NZ".into() => table(&vec![ + row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")}), + row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")}), + row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("Sept 24-2019")}) + ]), + "US".into() => table(&vec![ + row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")}), + row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("Sept 24-2019")}), + row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")}), + ]), + }) + ); + } } diff --git a/src/commands/histogram.rs b/src/commands/histogram.rs new file mode 100644 index 0000000000..52d72bdfbf --- /dev/null +++ b/src/commands/histogram.rs @@ -0,0 +1,148 @@ +use crate::commands::WholeStreamCommand; +use crate::commands::group_by::group; +use crate::commands::t_sort_by::columns_sorted; +use crate::commands::t_sort_by::t_sort; +use crate::commands::evaluate_by::evaluate; +use crate::commands::reduce_by::reduce; +use crate::commands::map_max_by::map_max; +use crate::data::TaggedDictBuilder; +use crate::errors::ShellError; +use crate::prelude::*; +use num_traits::cast::ToPrimitive; + +pub struct Histogram; + +#[derive(Deserialize)] +pub struct HistogramArgs { + column_name: Tagged, +} + +impl WholeStreamCommand for Histogram { + fn name(&self) -> &str { + "histogram" + } + + fn signature(&self) -> Signature { + Signature::build("histogram").required( + "column_name", + SyntaxShape::String, + "the name of the column to graph by", + ) + } + + fn usage(&self) -> &str { + "Creates a new table with a histogram based on the column name passed in." + } + + fn run( + &self, + args: CommandArgs, + registry: &CommandRegistry, + ) -> Result { + args.process(registry, histogram)?.run() + } +} + +pub fn histogram( + HistogramArgs { column_name }: HistogramArgs, + RunnableContext { input, name, .. }: RunnableContext, +) -> Result { + let stream = async_stream! { + let values: Vec> = input.values.collect().await; + + let Tagged { item: group_by, .. } = column_name.clone(); + + let groups = group(&column_name, values, &name)?; + let group_labels = columns_sorted(Some(group_by.clone()), &groups, &name); + let sorted = t_sort(Some(group_by.clone()), None, &groups, &name)?; + let evaled = evaluate(&sorted, None, &name)?; + let reduced = reduce(&evaled, None, &name)?; + let maxima = map_max(&reduced, None, &name)?; + let percents = percentages(&reduced, maxima, &name)?; + + match percents { + Tagged { + item: Value::Table(datasets), + .. + } => { + + let mut idx = 0; + + if let Tagged { item: Value::Table(start), .. } = datasets.get(0).unwrap() { + for percentage in start.into_iter() { + let mut fact = TaggedDictBuilder::new(&name); + fact.insert_tagged("committer", group_labels.get(idx).unwrap().clone()); + + if let Tagged { item: Value::Primitive(Primitive::Int(ref num)), .. } = percentage.clone() { + fact.insert("activity", std::iter::repeat("*").take(num.to_i32().unwrap() as usize).collect::()); + } + + idx = idx + 1; + + yield ReturnSuccess::value(fact.into_tagged_value()); + } + } + } + _ => {} + } + }; + + Ok(stream.to_output_stream()) +} + +fn percentages( + values: &Tagged, + max: Tagged, + tag: impl Into, +) -> Result, ShellError> { + let tag = tag.into(); + + let results: Tagged = match values { + Tagged { + item: Value::Table(datasets), + .. + } => { + let datasets: Vec<_> = datasets + .into_iter() + .map(|subsets| { + match subsets { + Tagged { + item: Value::Table(data), + .. + } => { + let data = data + .into_iter() + .map(|d| match d { + Tagged { + item: Value::Primitive(Primitive::Int(n)), + .. + } => { + let max = match max { + Tagged { + item: Value::Primitive(Primitive::Int(ref maxima)), + .. + } => maxima.to_i32().unwrap(), + _ => 0, + }; + + let n = { n.to_i32().unwrap() * 100 / max }; + + Value::number(n).tagged(&tag) + } + _ => Value::number(0).tagged(&tag), + }) + .collect::>(); + Value::Table(data).tagged(&tag) + } + _ => Value::Table(vec![]).tagged(&tag), + } + }) + .collect(); + + Value::Table(datasets).tagged(&tag) + } + other => other.clone(), + }; + + Ok(results) +} diff --git a/src/commands/map_max_by.rs b/src/commands/map_max_by.rs new file mode 100644 index 0000000000..31a02a81b1 --- /dev/null +++ b/src/commands/map_max_by.rs @@ -0,0 +1,227 @@ +use crate::commands::WholeStreamCommand; +use crate::parser::hir::SyntaxShape; +use crate::prelude::*; +use num_traits::cast::ToPrimitive; +pub struct MapMaxBy; + +#[derive(Deserialize)] +pub struct MapMaxByArgs { + column_name: Option>, +} + +impl WholeStreamCommand for MapMaxBy { + fn name(&self) -> &str { + "map-max-by" + } + + fn signature(&self) -> Signature { + Signature::build("map-max-by").named( + "column_name", + SyntaxShape::String, + "the name of the column to map-max the table's rows", + ) + } + + fn usage(&self) -> &str { + "Creates a new table with the data from the tables rows maxed by the column given." + } + + fn run( + &self, + args: CommandArgs, + registry: &CommandRegistry, + ) -> Result { + args.process(registry, map_max_by)?.run() + } +} + +pub fn map_max_by( + MapMaxByArgs { column_name }: MapMaxByArgs, + RunnableContext { input, name, .. }: RunnableContext, +) -> Result { + let stream = async_stream! { + let values: Vec> = input.values.collect().await; + + + if values.is_empty() { + yield Err(ShellError::labeled_error( + "Expected table from pipeline", + "requires a table input", + name + )) + } else { + + let map_by_column = if let Some(column_to_map) = column_name { + Some(column_to_map.item().clone()) + } else { + None + }; + + match map_max(&values[0], map_by_column, name) { + Ok(table_maxed) => yield ReturnSuccess::value(table_maxed), + Err(err) => yield Err(err) + } + } + }; + + Ok(stream.to_output_stream()) +} + +pub fn map_max( + values: &Tagged, + _map_by_column_name: Option, + tag: impl Into, +) -> Result, ShellError> { + let tag = tag.into(); + + let results: Tagged = match values { + Tagged { + item: Value::Table(datasets), + .. + } => { + let datasets: Vec<_> = datasets + .into_iter() + .map(|subsets| { + match subsets { + Tagged { + item: Value::Table(data), + .. + } => { + let data = data.into_iter().fold(0, |acc, value| match value { + Tagged { + item: Value::Primitive(Primitive::Int(n)), + .. + } => { + if n.to_i32().unwrap() > acc { + n.to_i32().unwrap() + } else { + acc + } + } + _ => acc, + }); + Value::number(data).tagged(&tag) + } + _ => Value::number(0).tagged(&tag), + } + }) + .collect(); + + let datasets = datasets.iter().fold(0, |max, value| match value { + Tagged { + item: Value::Primitive(Primitive::Int(n)), + .. + } => { + if n.to_i32().unwrap() > max { + n.to_i32().unwrap() + } else { + max + } + } + _ => max, + }); + Value::number(datasets).tagged(&tag) + } + _ => Value::number(-1).tagged(&tag), + }; + + Ok(results) +} + +#[cfg(test)] +mod tests { + + use crate::commands::evaluate_by::evaluate; + use crate::commands::group_by::group; + use crate::commands::map_max_by::map_max; + use crate::commands::reduce_by::reduce; + use crate::commands::t_sort_by::t_sort; + use crate::data::meta::*; + use crate::prelude::*; + use crate::Value; + use indexmap::IndexMap; + + fn int(s: impl Into) -> Tagged { + Value::int(s).tagged_unknown() + } + + fn string(input: impl Into) -> Tagged { + Value::string(input.into()).tagged_unknown() + } + + fn row(entries: IndexMap>) -> Tagged { + Value::row(entries).tagged_unknown() + } + + fn nu_releases_evaluated_by_default_one() -> Tagged { + evaluate(&nu_releases_sorted_by_date(), None, Tag::unknown()).unwrap() + } + + fn nu_releases_reduced_by_sum() -> Tagged { + reduce( + &nu_releases_evaluated_by_default_one(), + Some(String::from("sum")), + Tag::unknown(), + ) + .unwrap() + } + + fn nu_releases_sorted_by_date() -> Tagged { + let key = String::from("date"); + + t_sort( + Some(key), + None, + &nu_releases_grouped_by_date(), + Tag::unknown(), + ) + .unwrap() + } + + fn nu_releases_grouped_by_date() -> Tagged { + let key = String::from("date").tagged_unknown(); + group(&key, nu_releases_commiters(), Tag::unknown()).unwrap() + } + + fn nu_releases_commiters() -> Vec> { + vec![ + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}, + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")}, + ), + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")}, + ), + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")}, + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")}, + ), + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")}, + ), + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")}, + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")}, + ), + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")}, + ), + row( + indexmap! {"name".into() => string("JK"), "country".into() => string("US"), "date".into() => string("August 23-2019")}, + ), + ] + } + #[test] + fn maps_and_gets_max_value() { + assert_eq!( + map_max(&nu_releases_reduced_by_sum(), None, Tag::unknown()).unwrap(), + int(4) + ); + } +} diff --git a/src/commands/reduce_by.rs b/src/commands/reduce_by.rs index de64caac15..53ddf7e15d 100644 --- a/src/commands/reduce_by.rs +++ b/src/commands/reduce_by.rs @@ -1,17 +1,12 @@ use crate::commands::WholeStreamCommand; -use crate::data::TaggedDictBuilder; use crate::parser::hir::SyntaxShape; -use crate::parser::registry; -use crate::data::base::Block; use crate::prelude::*; - -use log::trace; - +use num_traits::cast::ToPrimitive; pub struct ReduceBy; #[derive(Deserialize)] pub struct ReduceByArgs { - calculator: Block, + reduce_with: Option>, } impl WholeStreamCommand for ReduceBy { @@ -20,15 +15,15 @@ impl WholeStreamCommand for ReduceBy { } fn signature(&self) -> Signature { - Signature::build("reduce-by").required( - "calculator", - SyntaxShape::Block, - "The block used for calculating values", + Signature::build("reduce-by").named( + "reduce_with", + SyntaxShape::String, + "the command to reduce by with", ) } fn usage(&self) -> &str { - "Crates a new table with the data from the table rows reduced by the block given." + "Creates a new table with the data from the tables rows reduced by the command given." } fn run( @@ -41,14 +36,12 @@ impl WholeStreamCommand for ReduceBy { } pub fn reduce_by( - ReduceByArgs { calculator }: ReduceByArgs, + ReduceByArgs { reduce_with }: ReduceByArgs, RunnableContext { input, name, .. }: RunnableContext, ) -> Result { let stream = async_stream! { let values: Vec> = input.values.collect().await; - trace!("{:?}", &calculator); - if values.is_empty() { yield Err(ShellError::labeled_error( "Expected table from pipeline", @@ -56,7 +49,14 @@ pub fn reduce_by( name )) } else { - match reduce(values, &calculator, name) { + + let reduce_with = if let Some(reducer) = reduce_with { + Some(reducer.item().clone()) + } else { + None + }; + + match reduce(&values[0], reduce_with, name) { Ok(reduced) => yield ReturnSuccess::value(reduced), Err(err) => yield Err(err) } @@ -66,26 +66,109 @@ pub fn reduce_by( Ok(stream.to_output_stream()) } +fn sum(data: Vec>) -> i32 { + data.into_iter().fold(0, |acc, value| match value { + Tagged { + item: Value::Primitive(Primitive::Int(n)), + .. + } => acc + n.to_i32().unwrap(), + _ => acc, + }) +} + +fn formula( + acc_begin: i32, + calculator: Box>) -> i32 + 'static>, +) -> Box>) -> i32 + 'static> { + Box::new(move |acc, datax| -> i32 { + let result = acc * acc_begin; + result + calculator(datax) + }) +} + +fn reducer_for(command: Reduce) -> Box>) -> i32 + 'static> { + match command { + Reduce::Sum | Reduce::Default => Box::new(formula(0, Box::new(sum))), + } +} + +pub enum Reduce { + Sum, + Default, +} + pub fn reduce( - values: Vec>, - calculator: &Block, + values: &Tagged, + reducer: Option, tag: impl Into, ) -> Result, ShellError> { let tag = tag.into(); - let mut out = TaggedDictBuilder::new(&tag); + let reduce_with = match reducer { + Some(cmd) if cmd == "sum" => reducer_for(Reduce::Sum), + Some(_) | None => reducer_for(Reduce::Default), + }; - Ok(out.into_tagged_value()) + let results: Tagged = match values { + Tagged { + item: Value::Table(datasets), + .. + } => { + let datasets: Vec<_> = datasets + .into_iter() + .map(|subsets| { + let mut acc = 0; + match subsets { + Tagged { + item: Value::Table(data), + .. + } => { + let data = data + .into_iter() + .map(|d| { + if let Tagged { + item: Value::Table(x), + .. + } = d + { + acc = reduce_with(acc, x.clone()); + Value::number(acc).tagged(&tag) + } else { + Value::number(0).tagged(&tag) + } + }) + .collect::>(); + Value::Table(data).tagged(&tag) + } + _ => Value::Table(vec![]).tagged(&tag), + } + }) + .collect(); + + Value::Table(datasets).tagged(&tag) + } + _ => Value::Table(vec![]).tagged(&tag), + }; + + Ok(results) } #[cfg(test)] mod tests { - use crate::commands::reduce_by::reduce; + use crate::commands::evaluate_by::evaluate; + use crate::commands::group_by::group; + use crate::commands::reduce_by::{reduce, reducer_for, Reduce}; + use crate::commands::t_sort_by::t_sort; use crate::data::meta::*; + use crate::prelude::*; use crate::Value; use indexmap::IndexMap; + fn int(s: impl Into) -> Tagged { + Value::int(s).tagged_unknown() + } + fn string(input: impl Into) -> Tagged { Value::string(input.into()).tagged_unknown() } @@ -97,4 +180,78 @@ mod tests { fn table(list: &Vec>) -> Tagged { Value::table(list).tagged_unknown() } + + fn nu_releases_sorted_by_date() -> Tagged { + let key = String::from("date"); + + t_sort( + Some(key), + None, + &nu_releases_grouped_by_date(), + Tag::unknown(), + ) + .unwrap() + } + + fn nu_releases_evaluated_by_default_one() -> Tagged { + evaluate(&nu_releases_sorted_by_date(), None, Tag::unknown()).unwrap() + } + + fn nu_releases_grouped_by_date() -> Tagged { + let key = String::from("date").tagged_unknown(); + group(&key, nu_releases_commiters(), Tag::unknown()).unwrap() + } + + fn nu_releases_commiters() -> Vec> { + vec![ + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}, + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")}, + ), + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")}, + ), + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")}, + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")}, + ), + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")}, + ), + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")}, + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")}, + ), + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")}, + ), + ] + } + + #[test] + fn reducer_computes_given_a_sum_command() { + let subject = vec![int(1), int(1), int(1)]; + + let action = reducer_for(Reduce::Sum); + + assert_eq!(action(0, subject), 3); + } + + #[test] + fn reducer_computes() { + assert_eq!( + reduce( + &nu_releases_evaluated_by_default_one(), + Some(String::from("sum")), + Tag::unknown() + ), + Ok(table(&vec![table(&vec![int(3), int(3), int(3)])])) + ); + } } diff --git a/src/commands/split_by.rs b/src/commands/split_by.rs index b995b041d7..1f972a2c55 100644 --- a/src/commands/split_by.rs +++ b/src/commands/split_by.rs @@ -150,6 +150,7 @@ pub fn split( #[cfg(test)] mod tests { + use crate::commands::group_by::group; use crate::commands::split_by::split; use crate::data::meta::*; use crate::Value; @@ -167,30 +168,49 @@ mod tests { Value::table(list).tagged_unknown() } + fn nu_releases_grouped_by_date() -> Tagged { + let key = String::from("date").tagged_unknown(); + group(&key, nu_releases_commiters(), Tag::unknown()).unwrap() + } + + fn nu_releases_commiters() -> Vec> { + vec![ + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}, + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")}, + ), + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")}, + ), + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("Sept 24-2019")}, + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")}, + ), + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("Sept 24-2019")}, + ), + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")}, + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("Sept 24-2019")}, + ), + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")}, + ), + ] + } + #[test] fn splits_inner_tables_by_key() { let for_key = String::from("country").tagged_unknown(); - let nu_releases = row(indexmap! { - "August 23-2019".into() => table(&vec![ - row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}), - row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")}), - row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")}) - ]), - "Sept 24-2019".into() => table(&vec![ - row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("Sept 24-2019")}), - row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("Sept 24-2019")}), - row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("Sept 24-2019")}) - ]), - "October 10-2019".into() => table(&vec![ - row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")}), - row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")}), - row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")}) - ]) - }); - assert_eq!( - split(&for_key, &nu_releases, Tag::unknown()).unwrap(), + split(&for_key, &nu_releases_grouped_by_date(), Tag::unknown()).unwrap(), Value::row(indexmap! { "EC".into() => row(indexmap! { "August 23-2019".into() => table(&vec![ @@ -235,18 +255,12 @@ mod tests { let nu_releases = row(indexmap! { "August 23-2019".into() => table(&vec![ - row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}), - row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")}), - row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")}) + row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}) ]), "Sept 24-2019".into() => table(&vec![ - row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("Sept 24-2019")}), - row(indexmap!{"name".into() => Value::string("JT").tagged(Tag::from(Span::new(5,10))), "date".into() => string("Sept 24-2019")}), - row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("Sept 24-2019")}) + row(indexmap!{"name".into() => Value::string("JT").tagged(Tag::from(Span::new(5,10))), "date".into() => string("Sept 24-2019")}) ]), "October 10-2019".into() => table(&vec![ - row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")}), - row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")}), row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")}) ]) }); diff --git a/src/commands/t_sort_by.rs b/src/commands/t_sort_by.rs new file mode 100644 index 0000000000..1df4cce887 --- /dev/null +++ b/src/commands/t_sort_by.rs @@ -0,0 +1,358 @@ +use crate::commands::WholeStreamCommand; +use crate::data::{TaggedDictBuilder, TaggedListBuilder}; +use crate::errors::ShellError; +use crate::prelude::*; +use chrono::{DateTime, NaiveDate, Utc}; + +pub struct TSortBy; + +#[derive(Deserialize)] +pub struct TSortByArgs { + #[serde(rename(deserialize = "show-columns"))] + show_columns: bool, + group_by: Option>, + #[allow(unused)] + split_by: Option, +} + +impl WholeStreamCommand for TSortBy { + fn name(&self) -> &str { + "t-sort-by" + } + + fn signature(&self) -> Signature { + Signature::build("t-sort-by") + .switch("show-columns", "Displays the column names sorted") + .named( + "group_by", + SyntaxShape::String, + "the name of the column to group by", + ) + .named( + "split_by", + SyntaxShape::String, + "the name of the column within the grouped by table to split by", + ) + } + + fn usage(&self) -> &str { + "Sort by the given columns." + } + + fn run( + &self, + args: CommandArgs, + registry: &CommandRegistry, + ) -> Result { + args.process(registry, t_sort_by)?.run() + } +} + +fn t_sort_by( + TSortByArgs { + show_columns, + group_by, + .. + }: TSortByArgs, + RunnableContext { input, name, .. }: RunnableContext, +) -> Result { + Ok(OutputStream::new(async_stream! { + let values: Vec> = input.values.collect().await; + + let column_grouped_by_name = if let Some(grouped_by) = group_by { + Some(grouped_by.item().clone()) + } else { + None + }; + + if show_columns { + for label in columns_sorted(column_grouped_by_name, &values[0], &name).iter() { + yield ReturnSuccess::value(label.clone()); + } + } else { + match t_sort(column_grouped_by_name, None, &values[0], name) { + Ok(sorted) => yield ReturnSuccess::value(sorted), + Err(err) => yield Err(err) + } + } + })) +} + +pub fn columns_sorted( + _group_by_name: Option, + value: &Tagged, + tag: impl Into, +) -> Vec> { + let origin_tag = tag.into(); + + match value { + Tagged { + item: Value::Row(rows), + .. + } => { + let mut keys: Vec> = + rows.entries + .keys() + .map(|s| s.as_ref()) + .map(|k: &str| { + let date = NaiveDate::parse_from_str(k, "%B %d-%Y"); + + let date = match date { + Ok(parsed) => Value::Primitive(Primitive::Date( + DateTime::::from_utc(parsed.and_hms(12, 34, 56), Utc), + )), + Err(_) => Value::string(k), + }; + + date.tagged_unknown() + }) + .collect(); + + keys.sort(); + + let keys: Vec = keys + .into_iter() + .map(|k| { + Value::string(match k { + Tagged { + item: Value::Primitive(Primitive::Date(d)), + .. + } => format!("{}", d.format("%B %d-%Y")), + _ => k.as_string().unwrap(), + }) + }) + .collect(); + + keys.into_iter().map(|k| k.tagged(&origin_tag)).collect() + } + _ => vec![Value::string("default").tagged(&origin_tag)] + } +} + +pub fn t_sort( + group_by_name: Option, + split_by_name: Option, + value: &Tagged, + tag: impl Into, +) -> Result, ShellError> { + let origin_tag = tag.into(); + + match group_by_name { + Some(column_name) => { + let sorted_labels = columns_sorted(Some(column_name), value, &origin_tag); + + match split_by_name { + None => { + let mut dataset = TaggedDictBuilder::new(&origin_tag); + dataset.insert_tagged("default", value.clone()); + let dataset = dataset.into_tagged_value(); + + let split_labels = match &dataset { + Tagged { + item: Value::Row(rows), + .. + } => { + let mut keys: Vec> = rows + .entries + .keys() + .map(|s| s.as_ref()) + .map(|k: &str| { + let date = NaiveDate::parse_from_str(k, "%B %d-%Y"); + + let date = match date { + Ok(parsed) => Value::Primitive(Primitive::Date( + DateTime::::from_utc( + parsed.and_hms(12, 34, 56), + Utc, + ), + )), + Err(_) => Value::string(k), + }; + + date.tagged_unknown() + }) + .collect(); + + keys.sort(); + + let keys: Vec = keys + .into_iter() + .map(|k| { + Value::string(match k { + Tagged { + item: Value::Primitive(Primitive::Date(d)), + .. + } => format!("{}", d.format("%B %d-%Y")), + _ => k.as_string().unwrap(), + }) + }) + .collect(); + + keys.into_iter().map(|k| k.tagged(&origin_tag)).collect() + } + _ => vec![], + }; + + let results: Vec>> = split_labels + .into_iter() + .map(|split| { + let groups = dataset.get_data_by_key(&split.as_string().unwrap()); + + sorted_labels + .clone() + .into_iter() + .map(|label| { + let label = label.as_string().unwrap(); + + match groups { + Some(Tagged { + item: Value::Row(dict), + .. + }) => dict.get_data_by_key(&label).unwrap().clone(), + _ => Value::Table(vec![]).tagged(&origin_tag), + } + }) + .collect() + }) + .collect(); + + let mut outer = TaggedListBuilder::new(&origin_tag); + + for i in results { + outer.insert_tagged(Value::Table(i).tagged(&origin_tag)); + } + + return Ok(Value::Table(outer.list).tagged(&origin_tag)); + } + Some(_) => return Ok(Value::nothing().tagged(&origin_tag)), + } + } + None => return Ok(Value::nothing().tagged(&origin_tag)), + } +} +#[cfg(test)] +mod tests { + + use crate::commands::group_by::group; + use crate::commands::t_sort_by::{columns_sorted, t_sort}; + use crate::data::meta::*; + use crate::Value; + use indexmap::IndexMap; + + fn string(input: impl Into) -> Tagged { + Value::string(input.into()).tagged_unknown() + } + + fn row(entries: IndexMap>) -> Tagged { + Value::row(entries).tagged_unknown() + } + + fn table(list: &Vec>) -> Tagged { + Value::table(list).tagged_unknown() + } + + fn nu_releases_grouped_by_date() -> Tagged { + let key = String::from("date").tagged_unknown(); + group(&key, nu_releases_commiters(), Tag::unknown()).unwrap() + } + + fn nu_releases_commiters() -> Vec> { + vec![ + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}, + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")}, + ), + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")}, + ), + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")}, + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")}, + ), + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")}, + ), + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")}, + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")}, + ), + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")}, + ), + ] + } + + #[test] + fn show_columns_sorted_given_a_column_to_sort_by() { + let by_column = String::from("date"); + + assert_eq!( + columns_sorted( + Some(by_column), + &nu_releases_grouped_by_date(), + Tag::unknown() + ), + vec![ + string("August 23-2019"), + string("September 24-2019"), + string("October 10-2019") + ] + ) + } + + #[test] + fn sorts_the_tables() { + let group_by = String::from("date"); + + assert_eq!( + t_sort( + Some(group_by), + None, + &nu_releases_grouped_by_date(), + Tag::unknown() + ) + .unwrap(), + table(&vec![table(&vec![ + table(&vec![ + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")} + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")} + ), + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")} + ) + ]), + table(&vec![ + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")} + ), + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")} + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")} + ) + ]), + table(&vec![ + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")} + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")} + ), + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")} + ) + ]), + ]),]) + ); + } +} diff --git a/src/data/base.rs b/src/data/base.rs index d877e4a7cc..f0357273e2 100644 --- a/src/data/base.rs +++ b/src/data/base.rs @@ -430,6 +430,7 @@ impl Tagged { Value::Primitive(Primitive::Int(x)) => Ok(format!("{}", x)), Value::Primitive(Primitive::Bytes(x)) => Ok(format!("{}", x)), Value::Primitive(Primitive::Path(x)) => Ok(format!("{}", x.display())), + Value::Primitive(Primitive::Date(x)) => Ok(format!("{}", x.to_rfc3339())), // TODO: this should definitely be more general with better errors other => Err(ShellError::labeled_error( "Expected string", diff --git a/src/data/dict.rs b/src/data/dict.rs index 432170f361..32393f0a0d 100644 --- a/src/data/dict.rs +++ b/src/data/dict.rs @@ -114,7 +114,7 @@ impl Dictionary { #[derive(Debug)] pub struct TaggedListBuilder { tag: Tag, - list: Vec>, + pub list: Vec>, } impl TaggedListBuilder { From 00b3c2036a0295bc34bf4d96a124621acf3b21f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20N=2E=20Robalino?= Date: Tue, 12 Nov 2019 03:38:55 -0500 Subject: [PATCH 3/3] This is part of on-going work with capabilities when working with tables and able to work with them for data processing & viewing purposes. At the moment, certain ways to process said tables we are able to view a histogram of a given column. As usage matures, we may find certain core commands that could be used ergonomically when working with tables on Nu. --- README.md | 2 + src/cli.rs | 4 +- src/commands.rs | 44 ++++++++--------- src/commands/histogram.rs | 97 ++++++++++++++++++++++---------------- src/commands/map_max_by.rs | 40 ++++++++-------- src/commands/t_sort_by.rs | 38 +++++++-------- tests/commands_test.rs | 31 +++++++++++- 7 files changed, 147 insertions(+), 109 deletions(-) diff --git a/README.md b/README.md index 46a4c45ac8..b1ce4feec1 100644 --- a/README.md +++ b/README.md @@ -256,6 +256,7 @@ Nu adheres closely to a set of goals that make up its design philosophy. As feat | format pattern | Format table row data as a string following the given pattern | | get column-or-column-path | Open column and get data from the corresponding cells | | group-by column | Creates a new table with the data from the table rows grouped by the column given | +| histogram column ...column-names | Creates a new table with a histogram based on the column name passed in, optionally give the frequency column name | inc (column-or-column-path) | Increment a value or version. Optionally use the column of a table | | insert column-or-column-path value | Insert a new column to the table | | last amount | Show only the last number of rows | @@ -267,6 +268,7 @@ Nu adheres closely to a set of goals that make up its design philosophy. As feat | reverse | Reverses the table. | | skip amount | Skip a number of rows | | skip-while condition | Skips rows while the condition matches. | +| split-by column | Creates a new table with the data from the inner tables splitted by the column given | | sort-by ...columns | Sort by the given columns | | str (column) | Apply string function. Optionally use the column of a table | | sum | Sum a column of values | diff --git a/src/cli.rs b/src/cli.rs index c6995ef711..b882d57d69 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -301,6 +301,7 @@ pub async fn cli() -> Result<(), Box> { whole_stream_command(FromYML), whole_stream_command(Pick), whole_stream_command(Get), + whole_stream_command(Histogram), per_item_command(Remove), per_item_command(Fetch), per_item_command(Open), @@ -320,6 +321,7 @@ pub async fn cli() -> Result<(), Box> { per_item_command(Mkdir), per_item_command(Move), whole_stream_command(Save), + whole_stream_command(SplitBy), whole_stream_command(Table), whole_stream_command(Version), whole_stream_command(Which), @@ -328,12 +330,10 @@ pub async fn cli() -> Result<(), Box> { cfg_if::cfg_if! { if #[cfg(data_processing_primitives)] { context.add_commands(vec![ - whole_stream_command(SplitBy), whole_stream_command(ReduceBy), whole_stream_command(EvaluateBy), whole_stream_command(TSortBy), whole_stream_command(MapMaxBy), - whole_stream_command(Histogram), ]); } } diff --git a/src/commands.rs b/src/commands.rs index 629289b565..ee70534640 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -16,6 +16,8 @@ pub(crate) mod debug; pub(crate) mod echo; pub(crate) mod enter; pub(crate) mod env; +#[allow(unused)] +pub(crate) mod evaluate_by; pub(crate) mod exit; pub(crate) mod fetch; pub(crate) mod first; @@ -33,10 +35,13 @@ pub(crate) mod from_yaml; pub(crate) mod get; pub(crate) mod group_by; pub(crate) mod help; +pub(crate) mod histogram; pub(crate) mod history; pub(crate) mod last; pub(crate) mod lines; pub(crate) mod ls; +#[allow(unused)] +pub(crate) mod map_max_by; pub(crate) mod mkdir; pub(crate) mod mv; pub(crate) mod next; @@ -49,6 +54,8 @@ pub(crate) mod post; pub(crate) mod prepend; pub(crate) mod prev; pub(crate) mod pwd; +#[allow(unused)] +pub(crate) mod reduce_by; pub(crate) mod reject; pub(crate) mod reverse; pub(crate) mod rm; @@ -57,20 +64,11 @@ pub(crate) mod shells; pub(crate) mod size; pub(crate) mod skip_while; pub(crate) mod sort_by; - -cfg_if::cfg_if! { - if #[cfg(data_processing_primitives)] { - pub(crate) mod split_by; - pub(crate) mod reduce_by; - pub(crate) mod evaluate_by; - pub(crate) mod t_sort_by; - pub(crate) mod map_max_by; - pub(crate) mod histogram; - } -} - +pub(crate) mod split_by; pub(crate) mod split_column; pub(crate) mod split_row; +#[allow(unused)] +pub(crate) mod t_sort_by; pub(crate) mod table; pub(crate) mod tags; pub(crate) mod to_bson; @@ -103,6 +101,8 @@ pub(crate) use debug::Debug; pub(crate) use echo::Echo; pub(crate) use enter::Enter; pub(crate) use env::Env; +#[allow(unused)] +pub(crate) use evaluate_by::EvaluateBy; pub(crate) use exit::Exit; pub(crate) use fetch::Fetch; pub(crate) use first::First; @@ -122,10 +122,13 @@ pub(crate) use from_yaml::FromYML; pub(crate) use get::Get; pub(crate) use group_by::GroupBy; pub(crate) use help::Help; +pub(crate) use histogram::Histogram; pub(crate) use history::History; pub(crate) use last::Last; pub(crate) use lines::Lines; pub(crate) use ls::LS; +#[allow(unused)] +pub(crate) use map_max_by::MapMaxBy; pub(crate) use mkdir::Mkdir; pub(crate) use mv::Move; pub(crate) use next::Next; @@ -137,6 +140,8 @@ pub(crate) use post::Post; pub(crate) use prepend::Prepend; pub(crate) use prev::Previous; pub(crate) use pwd::PWD; +#[allow(unused)] +pub(crate) use reduce_by::ReduceBy; pub(crate) use reject::Reject; pub(crate) use reverse::Reverse; pub(crate) use rm::Remove; @@ -145,20 +150,11 @@ pub(crate) use shells::Shells; pub(crate) use size::Size; pub(crate) use skip_while::SkipWhile; pub(crate) use sort_by::SortBy; - -cfg_if::cfg_if! { - if #[cfg(data_processing_primitives)] { - pub(crate) use split_by::SplitBy; - pub(crate) use reduce_by::ReduceBy; - pub(crate) use evaluate_by::EvaluateBy; - pub(crate) use t_sort_by::TSortBy; - pub(crate) use map_max_by::MapMaxBy; - pub(crate) use histogram::Histogram; - } -} - +pub(crate) use split_by::SplitBy; pub(crate) use split_column::SplitColumn; pub(crate) use split_row::SplitRow; +#[allow(unused)] +pub(crate) use t_sort_by::TSortBy; pub(crate) use table::Table; pub(crate) use tags::Tags; pub(crate) use to_bson::ToBSON; diff --git a/src/commands/histogram.rs b/src/commands/histogram.rs index 52d72bdfbf..6933f28a6f 100644 --- a/src/commands/histogram.rs +++ b/src/commands/histogram.rs @@ -1,10 +1,10 @@ -use crate::commands::WholeStreamCommand; +use crate::commands::evaluate_by::evaluate; use crate::commands::group_by::group; +use crate::commands::map_max_by::map_max; +use crate::commands::reduce_by::reduce; use crate::commands::t_sort_by::columns_sorted; use crate::commands::t_sort_by::t_sort; -use crate::commands::evaluate_by::evaluate; -use crate::commands::reduce_by::reduce; -use crate::commands::map_max_by::map_max; +use crate::commands::WholeStreamCommand; use crate::data::TaggedDictBuilder; use crate::errors::ShellError; use crate::prelude::*; @@ -15,6 +15,7 @@ pub struct Histogram; #[derive(Deserialize)] pub struct HistogramArgs { column_name: Tagged, + rest: Vec>, } impl WholeStreamCommand for Histogram { @@ -23,11 +24,16 @@ impl WholeStreamCommand for Histogram { } fn signature(&self) -> Signature { - Signature::build("histogram").required( - "column_name", - SyntaxShape::String, - "the name of the column to graph by", - ) + Signature::build("histogram") + .required( + "column_name", + SyntaxShape::String, + "the name of the column to graph by", + ) + .rest( + SyntaxShape::Member, + "column name to give the histogram's frequency column", + ) } fn usage(&self) -> &str { @@ -44,7 +50,7 @@ impl WholeStreamCommand for Histogram { } pub fn histogram( - HistogramArgs { column_name }: HistogramArgs, + HistogramArgs { column_name, rest }: HistogramArgs, RunnableContext { input, name, .. }: RunnableContext, ) -> Result { let stream = async_stream! { @@ -68,13 +74,24 @@ pub fn histogram( let mut idx = 0; + let column_names_supplied: Vec<_> = rest.iter().map(|f| f.item.clone()).collect(); + + let frequency_column_name = if column_names_supplied.is_empty() { + "frecuency".to_string() + } else { + column_names_supplied[0].clone() + }; + + let column = (*column_name).clone(); + if let Tagged { item: Value::Table(start), .. } = datasets.get(0).unwrap() { for percentage in start.into_iter() { + let mut fact = TaggedDictBuilder::new(&name); - fact.insert_tagged("committer", group_labels.get(idx).unwrap().clone()); + fact.insert_tagged(&column, group_labels.get(idx).unwrap().clone()); if let Tagged { item: Value::Primitive(Primitive::Int(ref num)), .. } = percentage.clone() { - fact.insert("activity", std::iter::repeat("*").take(num.to_i32().unwrap() as usize).collect::()); + fact.insert(&frequency_column_name, std::iter::repeat("*").take(num.to_i32().unwrap() as usize).collect::()); } idx = idx + 1; @@ -104,38 +121,36 @@ fn percentages( } => { let datasets: Vec<_> = datasets .into_iter() - .map(|subsets| { - match subsets { - Tagged { - item: Value::Table(data), - .. - } => { - let data = data - .into_iter() - .map(|d| match d { - Tagged { - item: Value::Primitive(Primitive::Int(n)), - .. - } => { - let max = match max { - Tagged { - item: Value::Primitive(Primitive::Int(ref maxima)), - .. - } => maxima.to_i32().unwrap(), - _ => 0, - }; + .map(|subsets| match subsets { + Tagged { + item: Value::Table(data), + .. + } => { + let data = data + .into_iter() + .map(|d| match d { + Tagged { + item: Value::Primitive(Primitive::Int(n)), + .. + } => { + let max = match max { + Tagged { + item: Value::Primitive(Primitive::Int(ref maxima)), + .. + } => maxima.to_i32().unwrap(), + _ => 0, + }; - let n = { n.to_i32().unwrap() * 100 / max }; + let n = { n.to_i32().unwrap() * 100 / max }; - Value::number(n).tagged(&tag) - } - _ => Value::number(0).tagged(&tag), - }) - .collect::>(); - Value::Table(data).tagged(&tag) - } - _ => Value::Table(vec![]).tagged(&tag), + Value::number(n).tagged(&tag) + } + _ => Value::number(0).tagged(&tag), + }) + .collect::>(); + Value::Table(data).tagged(&tag) } + _ => Value::Table(vec![]).tagged(&tag), }) .collect(); diff --git a/src/commands/map_max_by.rs b/src/commands/map_max_by.rs index 31a02a81b1..ea2fc99219 100644 --- a/src/commands/map_max_by.rs +++ b/src/commands/map_max_by.rs @@ -81,29 +81,27 @@ pub fn map_max( } => { let datasets: Vec<_> = datasets .into_iter() - .map(|subsets| { - match subsets { - Tagged { - item: Value::Table(data), - .. - } => { - let data = data.into_iter().fold(0, |acc, value| match value { - Tagged { - item: Value::Primitive(Primitive::Int(n)), - .. - } => { - if n.to_i32().unwrap() > acc { - n.to_i32().unwrap() - } else { - acc - } + .map(|subsets| match subsets { + Tagged { + item: Value::Table(data), + .. + } => { + let data = data.into_iter().fold(0, |acc, value| match value { + Tagged { + item: Value::Primitive(Primitive::Int(n)), + .. + } => { + if n.to_i32().unwrap() > acc { + n.to_i32().unwrap() + } else { + acc } - _ => acc, - }); - Value::number(data).tagged(&tag) - } - _ => Value::number(0).tagged(&tag), + } + _ => acc, + }); + Value::number(data).tagged(&tag) } + _ => Value::number(0).tagged(&tag), }) .collect(); diff --git a/src/commands/t_sort_by.rs b/src/commands/t_sort_by.rs index 1df4cce887..1c914dbac3 100644 --- a/src/commands/t_sort_by.rs +++ b/src/commands/t_sort_by.rs @@ -57,25 +57,25 @@ fn t_sort_by( RunnableContext { input, name, .. }: RunnableContext, ) -> Result { Ok(OutputStream::new(async_stream! { - let values: Vec> = input.values.collect().await; + let values: Vec> = input.values.collect().await; - let column_grouped_by_name = if let Some(grouped_by) = group_by { - Some(grouped_by.item().clone()) - } else { - None - }; + let column_grouped_by_name = if let Some(grouped_by) = group_by { + Some(grouped_by.item().clone()) + } else { + None + }; - if show_columns { - for label in columns_sorted(column_grouped_by_name, &values[0], &name).iter() { - yield ReturnSuccess::value(label.clone()); - } - } else { - match t_sort(column_grouped_by_name, None, &values[0], name) { - Ok(sorted) => yield ReturnSuccess::value(sorted), - Err(err) => yield Err(err) - } + if show_columns { + for label in columns_sorted(column_grouped_by_name, &values[0], &name).iter() { + yield ReturnSuccess::value(label.clone()); } - })) + } else { + match t_sort(column_grouped_by_name, None, &values[0], name) { + Ok(sorted) => yield ReturnSuccess::value(sorted), + Err(err) => yield Err(err) + } + } + })) } pub fn columns_sorted( @@ -125,7 +125,7 @@ pub fn columns_sorted( keys.into_iter().map(|k| k.tagged(&origin_tag)).collect() } - _ => vec![Value::string("default").tagged(&origin_tag)] + _ => vec![Value::string("default").tagged(&origin_tag)], } } @@ -238,7 +238,7 @@ mod tests { use crate::data::meta::*; use crate::Value; use indexmap::IndexMap; - + fn string(input: impl Into) -> Tagged { Value::string(input.into()).tagged_unknown() } @@ -305,7 +305,7 @@ mod tests { ] ) } - + #[test] fn sorts_the_tables() { let group_by = String::from("date"); diff --git a/tests/commands_test.rs b/tests/commands_test.rs index acd5e8374c..661d14023e 100644 --- a/tests/commands_test.rs +++ b/tests/commands_test.rs @@ -31,6 +31,35 @@ fn group_by() { }) } +#[test] +fn histogram() { + Playground::setup("histogram_test_1", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "los_tres_caballeros.csv", + r#" + first_name,last_name,rusty_at + Andrés,Robalino,Ecuador + Jonathan,Turner,Estados Unidos + Yehuda,Katz,Estados Unidos + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), h::pipeline( + r#" + open los_tres_caballeros.csv + | histogram rusty_at countries + | where rusty_at == "Ecuador" + | get countries + | echo $it + "# + )); + + assert_eq!(actual, "**************************************************"); + // 50% + }) +} + #[test] fn group_by_errors_if_unknown_column_name() { Playground::setup("group_by_test_2", |dirs, sandbox| { @@ -56,7 +85,6 @@ fn group_by_errors_if_unknown_column_name() { }) } -#[cfg(data_processing_primitives)] #[test] fn split_by() { Playground::setup("split_by_test_1", |dirs, sandbox| { @@ -86,7 +114,6 @@ fn split_by() { }) } -#[cfg(data_processing_primitives)] #[test] fn split_by_errors_if_no_table_given_as_input() { Playground::setup("split_by_test_2", |dirs, sandbox| {