From 6dceabf389406c96767f7b2147df4b3a4074387f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20N=2E=20Robalino?= Date: Fri, 3 Jan 2020 23:00:39 -0500 Subject: [PATCH] Isolate data processing helpers. (#1159) Isolate data processing helpers. Remove unwraps and down to zero unwraps. --- src/commands/evaluate_by.rs | 191 +---------- src/commands/histogram.rs | 47 ++- src/commands/map_max_by.rs | 158 +-------- src/commands/reduce_by.rs | 191 +---------- src/commands/t_sort_by.rs | 248 +------------- src/utils.rs | 2 + src/utils/data_processing.rs | 604 +++++++++++++++++++++++++++++++++++ 7 files changed, 630 insertions(+), 811 deletions(-) create mode 100644 src/utils/data_processing.rs diff --git a/src/commands/evaluate_by.rs b/src/commands/evaluate_by.rs index b982f98cf4..81831cd1c3 100644 --- a/src/commands/evaluate_by.rs +++ b/src/commands/evaluate_by.rs @@ -1,5 +1,6 @@ use crate::commands::WholeStreamCommand; use crate::prelude::*; +use crate::utils::data_processing::{evaluate, fetch}; use nu_errors::ShellError; use nu_protocol::{ReturnSuccess, Signature, SyntaxShape, UntaggedValue, Value}; use nu_source::{SpannedItem, Tagged}; @@ -68,193 +69,3 @@ pub fn evaluate_by( Ok(stream.to_output_stream()) } - -fn fetch(key: Option) -> Box Option + 'static> { - Box::new(move |value: Value, tag| match &key { - Some(key_given) => value.get_data_by_key(key_given[..].spanned(tag.span)), - None => Some(UntaggedValue::int(1).into_value(tag)), - }) -} - -pub fn evaluate( - values: &Value, - evaluator: Option, - tag: impl Into, -) -> Result { - let tag = tag.into(); - - let evaluate_with = match evaluator { - Some(keyfn) => fetch(Some(keyfn)), - None => fetch(None), - }; - - let results: Value = match values { - Value { - value: UntaggedValue::Table(datasets), - .. - } => { - let datasets: Vec<_> = datasets - .iter() - .map(|subsets| match subsets { - Value { - value: UntaggedValue::Table(subsets), - .. - } => { - let subsets: Vec<_> = subsets - .clone() - .into_iter() - .map(|data| match data { - Value { - value: UntaggedValue::Table(data), - .. - } => { - let data: Vec<_> = data - .into_iter() - .map(|x| evaluate_with(x, tag.clone()).unwrap()) - .collect(); - UntaggedValue::Table(data).into_value(&tag) - } - _ => UntaggedValue::Table(vec![]).into_value(&tag), - }) - .collect(); - UntaggedValue::Table(subsets).into_value(&tag) - } - _ => UntaggedValue::Table(vec![]).into_value(&tag), - }) - .collect(); - - UntaggedValue::Table(datasets).into_value(&tag) - } - _ => UntaggedValue::Table(vec![]).into_value(&tag), - }; - - Ok(results) -} - -#[cfg(test)] -mod tests { - - use crate::commands::evaluate_by::{evaluate, fetch}; - use crate::commands::group_by::group; - use crate::commands::t_sort_by::t_sort; - use crate::data::value; - use crate::prelude::*; - use indexmap::IndexMap; - use nu_errors::ShellError; - use nu_protocol::{UntaggedValue, Value}; - use nu_source::TaggedItem; - - fn int(s: impl Into) -> Value { - UntaggedValue::int(s).into_untagged_value() - } - - fn string(input: impl Into) -> Value { - UntaggedValue::string(input.into()).into_untagged_value() - } - - fn row(entries: IndexMap) -> Value { - UntaggedValue::row(entries).into_untagged_value() - } - - fn table(list: &[Value]) -> Value { - UntaggedValue::table(list).into_untagged_value() - } - - fn nu_releases_sorted_by_date() -> Result { - let key = String::from("date"); - - t_sort( - Some(key), - None, - &nu_releases_grouped_by_date()?, - Tag::unknown(), - ) - } - - fn nu_releases_grouped_by_date() -> Result { - let key = String::from("date").tagged_unknown(); - group(&key, nu_releases_commiters(), Tag::unknown()) - } - - fn nu_releases_commiters() -> Vec { - vec![ - row( - indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}, - ), - row( - indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")}, - ), - row( - indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")}, - ), - row( - indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")}, - ), - row( - indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")}, - ), - row( - indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")}, - ), - row( - indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")}, - ), - row( - indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")}, - ), - row( - indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")}, - ), - ] - } - - #[test] - fn evaluator_fetches_by_column_if_supplied_a_column_name() { - let subject = row(indexmap! { "name".into() => string("andres") }); - - let evaluator = fetch(Some(String::from("name"))); - - assert_eq!(evaluator(subject, Tag::unknown()), Some(string("andres"))); - } - - #[test] - fn evaluator_returns_1_if_no_column_name_given() { - let subject = row(indexmap! { "name".into() => string("andres") }); - let evaluator = fetch(None); - - assert_eq!( - evaluator(subject, Tag::unknown()), - Some(UntaggedValue::int(1).into_untagged_value()) - ); - } - - #[test] - fn evaluates_the_tables() -> Result<(), ShellError> { - assert_eq!( - evaluate(&nu_releases_sorted_by_date()?, None, Tag::unknown())?, - table(&[table(&[ - table(&[int(1), int(1), int(1)]), - table(&[int(1), int(1), int(1)]), - table(&[int(1), int(1), int(1)]), - ]),]) - ); - - Ok(()) - } - - #[test] - fn evaluates_the_tables_with_custom_evaluator() -> Result<(), ShellError> { - let eval = String::from("name"); - - assert_eq!( - evaluate(&nu_releases_sorted_by_date()?, Some(eval), Tag::unknown())?, - table(&[table(&[ - table(&[string("AR"), string("JT"), string("YK")]), - table(&[string("AR"), string("YK"), string("JT")]), - table(&[string("YK"), string("JT"), string("AR")]), - ]),]) - ); - - Ok(()) - } -} diff --git a/src/commands/histogram.rs b/src/commands/histogram.rs index da5ec417cb..a48567f78f 100644 --- a/src/commands/histogram.rs +++ b/src/commands/histogram.rs @@ -1,17 +1,13 @@ -use crate::commands::evaluate_by::evaluate; use crate::commands::group_by::group; -use crate::commands::map_max_by::map_max; -use crate::commands::reduce_by::reduce; -use crate::commands::t_sort_by::columns_sorted; -use crate::commands::t_sort_by::t_sort; use crate::commands::WholeStreamCommand; use crate::prelude::*; +use crate::utils::data_processing::{columns_sorted, evaluate, map_max, reduce, t_sort}; use nu_errors::ShellError; use nu_protocol::{ Primitive, ReturnSuccess, Signature, SyntaxShape, TaggedDictBuilder, UntaggedValue, Value, }; use nu_source::Tagged; -use num_traits::cast::ToPrimitive; +use num_traits::{ToPrimitive, Zero}; pub struct Histogram; @@ -127,31 +123,28 @@ fn percentages(values: &Value, max: Value, tag: impl Into) -> Result { - let data = - data.iter() - .map(|d| match d { + let data = data + .iter() + .map(|d| match d { + Value { + value: UntaggedValue::Primitive(Primitive::Int(n)), + .. + } => { + let max = match &max { Value { - value: UntaggedValue::Primitive(Primitive::Int(n)), + value: UntaggedValue::Primitive(Primitive::Int(maxima)), .. - } => { - let max = match max { - Value { - value: - UntaggedValue::Primitive(Primitive::Int( - ref maxima, - )), - .. - } => maxima.to_i32().unwrap(), - _ => 0, - }; + } => maxima.clone(), + _ => Zero::zero(), + }; - let n = { n.to_i32().unwrap() * 100 / max }; + let n = (n * 100) / max; - UntaggedValue::int(n).into_value(&tag) - } - _ => UntaggedValue::int(0).into_value(&tag), - }) - .collect::>(); + UntaggedValue::int(n).into_value(&tag) + } + _ => UntaggedValue::int(0).into_value(&tag), + }) + .collect::>(); UntaggedValue::Table(data).into_value(&tag) } _ => UntaggedValue::Table(vec![]).into_value(&tag), diff --git a/src/commands/map_max_by.rs b/src/commands/map_max_by.rs index fcea4603a5..2f94fe32ef 100644 --- a/src/commands/map_max_by.rs +++ b/src/commands/map_max_by.rs @@ -1,6 +1,7 @@ use crate::commands::WholeStreamCommand; use crate::data::value; use crate::prelude::*; +use crate::utils::data_processing::map_max; use nu_errors::ShellError; use nu_protocol::{Primitive, ReturnSuccess, Signature, SyntaxShape, UntaggedValue, Value}; use nu_source::Tagged; @@ -70,160 +71,3 @@ pub fn map_max_by( Ok(stream.to_output_stream()) } - -pub fn map_max( - values: &Value, - _map_by_column_name: Option, - tag: impl Into, -) -> Result { - let tag = tag.into(); - - let results: Value = match values { - Value { - value: UntaggedValue::Table(datasets), - .. - } => { - let datasets: Vec<_> = datasets - .iter() - .map(|subsets| match subsets { - Value { - value: UntaggedValue::Table(data), - .. - } => { - let data = data.iter().fold(0, |acc, value| match value { - Value { - value: UntaggedValue::Primitive(Primitive::Int(n)), - .. - } => { - if n.to_i32().unwrap() > acc { - n.to_i32().unwrap() - } else { - acc - } - } - _ => acc, - }); - UntaggedValue::int(data).into_value(&tag) - } - _ => UntaggedValue::int(0).into_value(&tag), - }) - .collect(); - - let datasets = datasets.iter().fold(0, |max, value| match value { - Value { - value: UntaggedValue::Primitive(Primitive::Int(n)), - .. - } => { - if n.to_i32().unwrap() > max { - n.to_i32().unwrap() - } else { - max - } - } - _ => max, - }); - UntaggedValue::int(datasets).into_value(&tag) - } - _ => UntaggedValue::int(-1).into_value(&tag), - }; - - Ok(results) -} - -#[cfg(test)] -mod tests { - - use crate::commands::evaluate_by::evaluate; - use crate::commands::group_by::group; - use crate::commands::map_max_by::map_max; - use crate::commands::reduce_by::reduce; - use crate::commands::t_sort_by::t_sort; - use crate::prelude::*; - use indexmap::IndexMap; - use nu_protocol::{UntaggedValue, Value}; - use nu_source::*; - - fn int(s: impl Into) -> Value { - UntaggedValue::int(s).into_untagged_value() - } - - fn string(input: impl Into) -> Value { - UntaggedValue::string(input.into()).into_untagged_value() - } - - fn row(entries: IndexMap) -> Value { - UntaggedValue::row(entries).into_untagged_value() - } - - fn nu_releases_evaluated_by_default_one() -> Value { - evaluate(&nu_releases_sorted_by_date(), None, Tag::unknown()).unwrap() - } - - fn nu_releases_reduced_by_sum() -> Value { - reduce( - &nu_releases_evaluated_by_default_one(), - Some(String::from("sum")), - Tag::unknown(), - ) - .unwrap() - } - - fn nu_releases_sorted_by_date() -> Value { - let key = String::from("date"); - - t_sort( - Some(key), - None, - &nu_releases_grouped_by_date(), - Tag::unknown(), - ) - .unwrap() - } - - fn nu_releases_grouped_by_date() -> Value { - let key = String::from("date").tagged_unknown(); - group(&key, nu_releases_commiters(), Tag::unknown()).unwrap() - } - - fn nu_releases_commiters() -> Vec { - vec![ - row( - indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}, - ), - row( - indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")}, - ), - row( - indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")}, - ), - row( - indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")}, - ), - row( - indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")}, - ), - row( - indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")}, - ), - row( - indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")}, - ), - row( - indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")}, - ), - row( - indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")}, - ), - row( - indexmap! {"name".into() => string("JK"), "country".into() => string("US"), "date".into() => string("August 23-2019")}, - ), - ] - } - #[test] - fn maps_and_gets_max_value() { - assert_eq!( - map_max(&nu_releases_reduced_by_sum(), None, Tag::unknown()).unwrap(), - int(4) - ); - } -} diff --git a/src/commands/reduce_by.rs b/src/commands/reduce_by.rs index e6a7d1a1fa..c36a1bd2fa 100644 --- a/src/commands/reduce_by.rs +++ b/src/commands/reduce_by.rs @@ -1,5 +1,6 @@ use crate::commands::WholeStreamCommand; use crate::prelude::*; +use crate::utils::data_processing::reduce; use nu_errors::ShellError; use nu_protocol::{Primitive, ReturnSuccess, Signature, SyntaxShape, UntaggedValue, Value}; use nu_source::Tagged; @@ -68,193 +69,3 @@ pub fn reduce_by( Ok(stream.to_output_stream()) } - -fn sum(data: Vec) -> i32 { - data.into_iter().fold(0, |acc, value| match value { - Value { - value: UntaggedValue::Primitive(Primitive::Int(n)), - .. - } => acc + n.to_i32().unwrap(), - _ => acc, - }) -} - -fn formula( - acc_begin: i32, - calculator: Box) -> i32 + 'static>, -) -> Box) -> i32 + 'static> { - Box::new(move |acc, datax| -> i32 { - let result = acc * acc_begin; - result + calculator(datax) - }) -} - -fn reducer_for(command: Reduce) -> Box) -> i32 + 'static> { - match command { - Reduce::Sum | Reduce::Default => Box::new(formula(0, Box::new(sum))), - } -} - -pub enum Reduce { - Sum, - Default, -} - -pub fn reduce( - values: &Value, - reducer: Option, - tag: impl Into, -) -> Result { - let tag = tag.into(); - - let reduce_with = match reducer { - Some(cmd) if cmd == "sum" => reducer_for(Reduce::Sum), - Some(_) | None => reducer_for(Reduce::Default), - }; - - let results: Value = match values { - Value { - value: UntaggedValue::Table(datasets), - .. - } => { - let datasets: Vec<_> = datasets - .iter() - .map(|subsets| { - let mut acc = 0; - match subsets { - Value { - value: UntaggedValue::Table(data), - .. - } => { - let data = data - .iter() - .map(|d| { - if let Value { - value: UntaggedValue::Table(x), - .. - } = d - { - acc = reduce_with(acc, x.clone()); - UntaggedValue::int(acc).into_value(&tag) - } else { - UntaggedValue::int(0).into_value(&tag) - } - }) - .collect::>(); - UntaggedValue::Table(data).into_value(&tag) - } - _ => UntaggedValue::Table(vec![]).into_value(&tag), - } - }) - .collect(); - - UntaggedValue::Table(datasets).into_value(&tag) - } - _ => UntaggedValue::Table(vec![]).into_value(&tag), - }; - - Ok(results) -} - -#[cfg(test)] -mod tests { - - use crate::commands::evaluate_by::evaluate; - use crate::commands::group_by::group; - use crate::commands::reduce_by::{reduce, reducer_for, Reduce}; - use crate::commands::t_sort_by::t_sort; - use crate::prelude::*; - use indexmap::IndexMap; - use nu_protocol::{UntaggedValue, Value}; - use nu_source::*; - - fn int(s: impl Into) -> Value { - UntaggedValue::int(s).into_untagged_value() - } - - fn string(input: impl Into) -> Value { - UntaggedValue::string(input.into()).into_untagged_value() - } - - fn row(entries: IndexMap) -> Value { - UntaggedValue::row(entries).into_untagged_value() - } - - fn table(list: &[Value]) -> Value { - UntaggedValue::table(list).into_untagged_value() - } - - fn nu_releases_sorted_by_date() -> Value { - let key = String::from("date"); - - t_sort( - Some(key), - None, - &nu_releases_grouped_by_date(), - Tag::unknown(), - ) - .unwrap() - } - - fn nu_releases_evaluated_by_default_one() -> Value { - evaluate(&nu_releases_sorted_by_date(), None, Tag::unknown()).unwrap() - } - - fn nu_releases_grouped_by_date() -> Value { - let key = String::from("date").tagged_unknown(); - group(&key, nu_releases_commiters(), Tag::unknown()).unwrap() - } - - fn nu_releases_commiters() -> Vec { - vec![ - row( - indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}, - ), - row( - indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")}, - ), - row( - indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")}, - ), - row( - indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")}, - ), - row( - indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")}, - ), - row( - indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")}, - ), - row( - indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")}, - ), - row( - indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")}, - ), - row( - indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")}, - ), - ] - } - - #[test] - fn reducer_computes_given_a_sum_command() { - let subject = vec![int(1), int(1), int(1)]; - - let action = reducer_for(Reduce::Sum); - - assert_eq!(action(0, subject), 3); - } - - #[test] - fn reducer_computes() { - assert_eq!( - reduce( - &nu_releases_evaluated_by_default_one(), - Some(String::from("sum")), - Tag::unknown() - ), - Ok(table(&[table(&[int(3), int(3), int(3)])])) - ); - } -} diff --git a/src/commands/t_sort_by.rs b/src/commands/t_sort_by.rs index c4ef27f6ff..93abe4f1a0 100644 --- a/src/commands/t_sort_by.rs +++ b/src/commands/t_sort_by.rs @@ -1,6 +1,7 @@ use crate::commands::WholeStreamCommand; use crate::data::TaggedListBuilder; use crate::prelude::*; +use crate::utils::data_processing::{columns_sorted, t_sort}; use chrono::{DateTime, NaiveDate, Utc}; use nu_errors::ShellError; use nu_protocol::{ @@ -82,250 +83,3 @@ fn t_sort_by( } })) } - -pub fn columns_sorted( - _group_by_name: Option, - value: &Value, - tag: impl Into, -) -> Vec> { - let origin_tag = tag.into(); - - match value { - Value { - value: UntaggedValue::Row(rows), - .. - } => { - let mut keys: Vec = rows - .entries - .keys() - .map(|s| s.as_ref()) - .map(|k: &str| { - let date = NaiveDate::parse_from_str(k, "%B %d-%Y"); - - let date = match date { - Ok(parsed) => UntaggedValue::Primitive(Primitive::Date( - DateTime::::from_utc(parsed.and_hms(12, 34, 56), Utc), - )), - Err(_) => UntaggedValue::string(k), - }; - - date.into_untagged_value() - }) - .collect(); - - keys.sort(); - - let keys: Vec = keys - .into_iter() - .map(|k| match k { - Value { - value: UntaggedValue::Primitive(Primitive::Date(d)), - .. - } => format!("{}", d.format("%B %d-%Y")), - _ => k.as_string().unwrap(), - }) - .collect(); - - keys.into_iter().map(|k| k.tagged(&origin_tag)).collect() - } - _ => vec!["default".to_owned().tagged(&origin_tag)], - } -} - -pub fn t_sort( - group_by_name: Option, - split_by_name: Option, - value: &Value, - tag: impl Into, -) -> Result { - let origin_tag = tag.into(); - - match group_by_name { - Some(column_name) => { - let sorted_labels: Vec> = - columns_sorted(Some(column_name), value, &origin_tag); - - match split_by_name { - None => { - let mut dataset = TaggedDictBuilder::new(&origin_tag); - dataset.insert_value("default", value.clone()); - let dataset = dataset.into_value(); - - let split_labels: Vec> = match &dataset { - Value { - value: UntaggedValue::Row(rows), - .. - } => { - let mut keys: Vec> = rows - .entries - .keys() - .map(|k| k.clone().tagged_unknown()) - .collect(); - - keys.sort(); - - keys - } - _ => vec![], - }; - - let results: Vec> = split_labels - .iter() - .map(|split| { - let groups = get_data_by_key(&dataset, split.borrow_spanned()); - - sorted_labels - .clone() - .into_iter() - .map(|label| match &groups { - Some(Value { - value: UntaggedValue::Row(dict), - .. - }) => dict.get_data_by_key(label.borrow_spanned()).unwrap(), - _ => UntaggedValue::Table(vec![]).into_value(&origin_tag), - }) - .collect() - }) - .collect(); - - let mut outer = TaggedListBuilder::new(&origin_tag); - - for i in results { - outer.push_value(UntaggedValue::Table(i).into_value(&origin_tag)); - } - - Ok(UntaggedValue::Table(outer.list).into_value(&origin_tag)) - } - Some(_) => Ok(UntaggedValue::nothing().into_value(&origin_tag)), - } - } - None => Ok(UntaggedValue::nothing().into_value(&origin_tag)), - } -} -#[cfg(test)] -mod tests { - - use crate::commands::group_by::group; - use crate::commands::t_sort_by::{columns_sorted, t_sort}; - use crate::data::value; - use indexmap::IndexMap; - use nu_protocol::{UntaggedValue, Value}; - use nu_source::*; - - fn string(input: impl Into) -> Value { - UntaggedValue::string(input.into()).into_untagged_value() - } - - fn row(entries: IndexMap) -> Value { - UntaggedValue::row(entries).into_untagged_value() - } - - fn table(list: &[Value]) -> Value { - UntaggedValue::table(list).into_untagged_value() - } - - fn nu_releases_grouped_by_date() -> Value { - let key = String::from("date").tagged_unknown(); - group(&key, nu_releases_commiters(), Tag::unknown()).unwrap() - } - - fn nu_releases_commiters() -> Vec { - vec![ - row( - indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}, - ), - row( - indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")}, - ), - row( - indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")}, - ), - row( - indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")}, - ), - row( - indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")}, - ), - row( - indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")}, - ), - row( - indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")}, - ), - row( - indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")}, - ), - row( - indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")}, - ), - ] - } - - #[test] - fn show_columns_sorted_given_a_column_to_sort_by() { - let by_column = String::from("date"); - - assert_eq!( - columns_sorted( - Some(by_column), - &nu_releases_grouped_by_date(), - Tag::unknown() - ), - vec![ - "August 23-2019".to_string().tagged_unknown(), - "September 24-2019".to_string().tagged_unknown(), - "October 10-2019".to_string().tagged_unknown() - ] - ) - } - - #[test] - fn sorts_the_tables() { - let group_by = String::from("date"); - - assert_eq!( - t_sort( - Some(group_by), - None, - &nu_releases_grouped_by_date(), - Tag::unknown() - ) - .unwrap(), - table(&[table(&[ - table(&[ - row( - indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")} - ), - row( - indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")} - ), - row( - indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")} - ) - ]), - table(&[ - row( - indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")} - ), - row( - indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")} - ), - row( - indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")} - ) - ]), - table(&[ - row( - indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")} - ), - row( - indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")} - ), - row( - indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")} - ) - ]), - ]),]) - ); - } -} diff --git a/src/utils.rs b/src/utils.rs index e73643d710..3cb42b894b 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,3 +1,5 @@ +pub mod data_processing; + use nu_errors::ShellError; use nu_protocol::{UntaggedValue, Value}; use std::path::{Component, Path, PathBuf}; diff --git a/src/utils/data_processing.rs b/src/utils/data_processing.rs new file mode 100644 index 0000000000..e88a321a05 --- /dev/null +++ b/src/utils/data_processing.rs @@ -0,0 +1,604 @@ +use crate::data::TaggedListBuilder; +use chrono::{DateTime, NaiveDate, Utc}; +use nu_errors::ShellError; +use nu_protocol::{Primitive, TaggedDictBuilder, UntaggedValue, Value}; +use nu_source::{SpannedItem, Tag, Tagged, TaggedItem}; +use nu_value_ext::{get_data_by_key, ValueExt}; +use num_bigint::BigInt; +use num_traits::Zero; + +pub fn columns_sorted( + _group_by_name: Option, + value: &Value, + tag: impl Into, +) -> Vec> { + let origin_tag = tag.into(); + + match value { + Value { + value: UntaggedValue::Row(rows), + .. + } => { + let mut keys: Vec = rows + .entries + .keys() + .map(|s| s.as_ref()) + .map(|k: &str| { + let date = NaiveDate::parse_from_str(k, "%B %d-%Y"); + + let date = match date { + Ok(parsed) => UntaggedValue::Primitive(Primitive::Date( + DateTime::::from_utc(parsed.and_hms(12, 34, 56), Utc), + )), + Err(_) => UntaggedValue::string(k), + }; + + date.into_untagged_value() + }) + .collect(); + + keys.sort(); + + let keys: Vec = keys + .into_iter() + .map(|k| match k { + Value { + value: UntaggedValue::Primitive(Primitive::Date(d)), + .. + } => format!("{}", d.format("%B %d-%Y")), + _ => k.as_string().unwrap_or_else(|_| String::from("")), + }) + .collect(); + + keys.into_iter().map(|k| k.tagged(&origin_tag)).collect() + } + _ => vec!["default".to_owned().tagged(&origin_tag)], + } +} + +pub fn t_sort( + group_by_name: Option, + split_by_name: Option, + value: &Value, + tag: impl Into, +) -> Result { + let origin_tag = tag.into(); + + match group_by_name { + Some(column_name) => { + let sorted_labels: Vec> = + columns_sorted(Some(column_name), value, &origin_tag); + + match split_by_name { + None => { + let mut dataset = TaggedDictBuilder::new(&origin_tag); + dataset.insert_value("default", value.clone()); + let dataset = dataset.into_value(); + + let split_labels: Vec> = match &dataset { + Value { + value: UntaggedValue::Row(rows), + .. + } => { + let mut keys: Vec> = rows + .entries + .keys() + .map(|k| k.clone().tagged_unknown()) + .collect(); + + keys.sort(); + + keys + } + _ => vec![], + }; + + let results: Vec> = split_labels + .iter() + .map(|split| { + let groups = get_data_by_key(&dataset, split.borrow_spanned()); + + sorted_labels + .clone() + .into_iter() + .map(|label| match &groups { + Some(Value { + value: UntaggedValue::Row(dict), + .. + }) => { + dict.get_data_by_key(label.borrow_spanned()).unwrap_or_else( + || UntaggedValue::Table(vec![]).into_value(&origin_tag), + ) + } + _ => UntaggedValue::Table(vec![]).into_value(&origin_tag), + }) + .collect() + }) + .collect(); + + let mut outer = TaggedListBuilder::new(&origin_tag); + + for i in results { + outer.push_value(UntaggedValue::Table(i).into_value(&origin_tag)); + } + + Ok(UntaggedValue::Table(outer.list).into_value(&origin_tag)) + } + Some(_) => Ok(UntaggedValue::nothing().into_value(&origin_tag)), + } + } + None => Ok(UntaggedValue::nothing().into_value(&origin_tag)), + } +} + +pub fn fetch(key: Option) -> Box Option + 'static> { + Box::new(move |value: Value, tag| match &key { + Some(key_given) => value.get_data_by_key(key_given[..].spanned(tag.span)), + None => Some(UntaggedValue::int(1).into_value(tag)), + }) +} + +pub fn evaluate( + values: &Value, + evaluator: Option, + tag: impl Into, +) -> Result { + let tag = tag.into(); + + let evaluate_with = match evaluator { + Some(keyfn) => fetch(Some(keyfn)), + None => fetch(None), + }; + + let results: Value = match values { + Value { + value: UntaggedValue::Table(datasets), + .. + } => { + let datasets: Vec<_> = datasets + .iter() + .map(|subsets| match subsets { + Value { + value: UntaggedValue::Table(subsets), + .. + } => { + let subsets: Vec<_> = subsets + .clone() + .into_iter() + .map(|data| match data { + Value { + value: UntaggedValue::Table(data), + .. + } => { + let data: Vec<_> = data + .into_iter() + .map(|x| match evaluate_with(x, tag.clone()) { + Some(val) => val, + None => UntaggedValue::int(1).into_value(tag.clone()), + }) + .collect(); + UntaggedValue::Table(data).into_value(&tag) + } + _ => UntaggedValue::Table(vec![]).into_value(&tag), + }) + .collect(); + UntaggedValue::Table(subsets).into_value(&tag) + } + _ => UntaggedValue::Table(vec![]).into_value(&tag), + }) + .collect(); + + UntaggedValue::Table(datasets).into_value(&tag) + } + _ => UntaggedValue::Table(vec![]).into_value(&tag), + }; + + Ok(results) +} + +fn sum(data: Vec) -> Result { + let total = data + .into_iter() + .fold(Zero::zero(), |acc: BigInt, value| match value { + Value { + value: UntaggedValue::Primitive(Primitive::Int(n)), + .. + } => acc + n, + _ => acc, + }); + + Ok(UntaggedValue::int(total).into_untagged_value()) +} + +fn formula( + acc_begin: BigInt, + calculator: Box) -> Result + 'static>, +) -> Box) -> Result + 'static> { + Box::new(move |acc, datax| -> Result { + let result = acc * acc_begin.clone(); + + if let Ok(Value { + value: UntaggedValue::Primitive(Primitive::Int(computed)), + .. + }) = calculator(datax) + { + return Ok(UntaggedValue::int(result + computed).into_untagged_value()); + } + + Ok(UntaggedValue::int(0).into_untagged_value()) + }) +} + +pub fn reducer_for( + command: Reduce, +) -> Box) -> Result + 'static> { + match command { + Reduce::Sum | Reduce::Default => Box::new(formula(Zero::zero(), Box::new(sum))), + } +} + +pub enum Reduce { + Sum, + Default, +} + +pub fn reduce( + values: &Value, + reducer: Option, + tag: impl Into, +) -> Result { + let tag = tag.into(); + + let reduce_with = match reducer { + Some(cmd) if cmd == "sum" => reducer_for(Reduce::Sum), + Some(_) | None => reducer_for(Reduce::Default), + }; + + let results: Value = match values { + Value { + value: UntaggedValue::Table(datasets), + .. + } => { + let datasets: Vec<_> = datasets + .iter() + .map(|subsets| { + let acc: BigInt = Zero::zero(); + match subsets { + Value { + value: UntaggedValue::Table(data), + .. + } => { + let data = data + .iter() + .map(|d| { + if let Value { + value: UntaggedValue::Table(x), + .. + } = d + { + if let Ok(Value { + value: + UntaggedValue::Primitive(Primitive::Int(computed)), + .. + }) = reduce_with(acc.clone(), x.clone()) + { + UntaggedValue::int(computed).into_value(&tag) + } else { + UntaggedValue::int(0).into_value(&tag) + } + } else { + UntaggedValue::int(0).into_value(&tag) + } + }) + .collect::>(); + UntaggedValue::Table(data).into_value(&tag) + } + _ => UntaggedValue::Table(vec![]).into_value(&tag), + } + }) + .collect(); + + UntaggedValue::Table(datasets).into_value(&tag) + } + _ => UntaggedValue::Table(vec![]).into_value(&tag), + }; + + Ok(results) +} + +pub fn map_max( + values: &Value, + _map_by_column_name: Option, + tag: impl Into, +) -> Result { + let tag = tag.into(); + + let results: Value = match values { + Value { + value: UntaggedValue::Table(datasets), + .. + } => { + let datasets: Vec<_> = datasets + .iter() + .map(|subsets| match subsets { + Value { + value: UntaggedValue::Table(data), + .. + } => { + let data: BigInt = + data.iter().fold(Zero::zero(), |acc, value| match value { + Value { + value: UntaggedValue::Primitive(Primitive::Int(n)), + .. + } if *n > acc => n.clone(), + _ => acc, + }); + UntaggedValue::int(data).into_value(&tag) + } + _ => UntaggedValue::int(0).into_value(&tag), + }) + .collect(); + + let datasets: BigInt = datasets + .iter() + .fold(Zero::zero(), |max, value| match value { + Value { + value: UntaggedValue::Primitive(Primitive::Int(n)), + .. + } if *n > max => n.clone(), + _ => max, + }); + UntaggedValue::int(datasets).into_value(&tag) + } + _ => UntaggedValue::int(-1).into_value(&tag), + }; + + Ok(results) +} + +#[cfg(test)] +mod tests { + use super::{columns_sorted, evaluate, fetch, map_max, reduce, reducer_for, t_sort, Reduce}; + use crate::commands::group_by::group; + use indexmap::IndexMap; + use nu_errors::ShellError; + use nu_protocol::{UntaggedValue, Value}; + use nu_source::*; + use num_bigint::BigInt; + use num_traits::Zero; + + fn int(s: impl Into) -> Value { + UntaggedValue::int(s).into_untagged_value() + } + + fn string(input: impl Into) -> Value { + UntaggedValue::string(input.into()).into_untagged_value() + } + + fn row(entries: IndexMap) -> Value { + UntaggedValue::row(entries).into_untagged_value() + } + + fn table(list: &[Value]) -> Value { + UntaggedValue::table(list).into_untagged_value() + } + + fn nu_releases_grouped_by_date() -> Result { + let key = String::from("date").tagged_unknown(); + group(&key, nu_releases_commiters(), Tag::unknown()) + } + + fn nu_releases_sorted_by_date() -> Result { + let key = String::from("date"); + + t_sort( + Some(key), + None, + &nu_releases_grouped_by_date()?, + Tag::unknown(), + ) + } + + fn nu_releases_evaluated_by_default_one() -> Result { + evaluate(&nu_releases_sorted_by_date()?, None, Tag::unknown()) + } + + fn nu_releases_reduced_by_sum() -> Result { + reduce( + &nu_releases_evaluated_by_default_one()?, + Some(String::from("sum")), + Tag::unknown(), + ) + } + + fn nu_releases_commiters() -> Vec { + vec![ + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}, + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")}, + ), + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")}, + ), + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")}, + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")}, + ), + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")}, + ), + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")}, + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")}, + ), + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")}, + ), + ] + } + + #[test] + fn show_columns_sorted_given_a_column_to_sort_by() -> Result<(), ShellError> { + let by_column = String::from("date"); + + assert_eq!( + columns_sorted( + Some(by_column), + &nu_releases_grouped_by_date()?, + Tag::unknown() + ), + vec![ + "August 23-2019".to_string().tagged_unknown(), + "September 24-2019".to_string().tagged_unknown(), + "October 10-2019".to_string().tagged_unknown() + ] + ); + + Ok(()) + } + + #[test] + fn sorts_the_tables() -> Result<(), ShellError> { + let group_by = String::from("date"); + + assert_eq!( + t_sort( + Some(group_by), + None, + &nu_releases_grouped_by_date()?, + Tag::unknown() + )?, + table(&[table(&[ + table(&[ + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")} + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")} + ), + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")} + ) + ]), + table(&[ + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")} + ), + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")} + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")} + ) + ]), + table(&[ + row( + indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")} + ), + row( + indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")} + ), + row( + indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")} + ) + ]), + ]),]) + ); + + Ok(()) + } + + #[test] + fn evaluator_fetches_by_column_if_supplied_a_column_name() -> Result<(), ShellError> { + let subject = row(indexmap! { "name".into() => string("andres") }); + + let evaluator = fetch(Some(String::from("name"))); + + assert_eq!(evaluator(subject, Tag::unknown()), Some(string("andres"))); + Ok(()) + } + + #[test] + fn evaluator_returns_1_if_no_column_name_given() -> Result<(), ShellError> { + let subject = row(indexmap! { "name".into() => string("andres") }); + let evaluator = fetch(None); + + assert_eq!( + evaluator(subject, Tag::unknown()), + Some(UntaggedValue::int(1).into_untagged_value()) + ); + + Ok(()) + } + + #[test] + fn evaluates_the_tables() -> Result<(), ShellError> { + assert_eq!( + evaluate(&nu_releases_sorted_by_date()?, None, Tag::unknown())?, + table(&[table(&[ + table(&[int(1), int(1), int(1)]), + table(&[int(1), int(1), int(1)]), + table(&[int(1), int(1), int(1)]), + ]),]) + ); + + Ok(()) + } + + #[test] + fn evaluates_the_tables_with_custom_evaluator() -> Result<(), ShellError> { + let eval = String::from("name"); + + assert_eq!( + evaluate(&nu_releases_sorted_by_date()?, Some(eval), Tag::unknown())?, + table(&[table(&[ + table(&[string("AR"), string("JT"), string("YK")]), + table(&[string("AR"), string("YK"), string("JT")]), + table(&[string("YK"), string("JT"), string("AR")]), + ]),]) + ); + + Ok(()) + } + + #[test] + fn reducer_computes_given_a_sum_command() -> Result<(), ShellError> { + let subject = vec![int(1), int(1), int(1)]; + + let action = reducer_for(Reduce::Sum); + + assert_eq!(action(Zero::zero(), subject)?, int(3)); + + Ok(()) + } + + #[test] + fn reducer_computes() -> Result<(), ShellError> { + assert_eq!( + reduce( + &nu_releases_evaluated_by_default_one()?, + Some(String::from("sum")), + Tag::unknown() + )?, + table(&[table(&[int(3), int(3), int(3)])]) + ); + + Ok(()) + } + + #[test] + fn maps_and_gets_max_value() -> Result<(), ShellError> { + assert_eq!( + map_max(&nu_releases_reduced_by_sum()?, None, Tag::unknown())?, + int(3) + ); + + Ok(()) + } +}