Data processing mvp histogram.

This commit is contained in:
Andrés N. Robalino 2019-11-12 02:07:43 -05:00
parent 21f48577ae
commit 3163b0d362
12 changed files with 1262 additions and 57 deletions

View file

@ -18,4 +18,4 @@ description = "Groundwork so tables can be data processed"
reason = """ reason = """
These will allow take tables and be able to transform, process, and explore. These will allow take tables and be able to transform, process, and explore.
""" """
enabled = false enabled = false

View file

@ -330,6 +330,10 @@ pub async fn cli() -> Result<(), Box<dyn Error>> {
context.add_commands(vec![ context.add_commands(vec![
whole_stream_command(SplitBy), whole_stream_command(SplitBy),
whole_stream_command(ReduceBy), whole_stream_command(ReduceBy),
whole_stream_command(EvaluateBy),
whole_stream_command(TSortBy),
whole_stream_command(MapMaxBy),
whole_stream_command(Histogram),
]); ]);
} }
} }

View file

@ -62,6 +62,10 @@ cfg_if::cfg_if! {
if #[cfg(data_processing_primitives)] { if #[cfg(data_processing_primitives)] {
pub(crate) mod split_by; pub(crate) mod split_by;
pub(crate) mod reduce_by; pub(crate) mod reduce_by;
pub(crate) mod evaluate_by;
pub(crate) mod t_sort_by;
pub(crate) mod map_max_by;
pub(crate) mod histogram;
} }
} }
@ -146,6 +150,10 @@ cfg_if::cfg_if! {
if #[cfg(data_processing_primitives)] { if #[cfg(data_processing_primitives)] {
pub(crate) use split_by::SplitBy; pub(crate) use split_by::SplitBy;
pub(crate) use reduce_by::ReduceBy; pub(crate) use reduce_by::ReduceBy;
pub(crate) use evaluate_by::EvaluateBy;
pub(crate) use t_sort_by::TSortBy;
pub(crate) use map_max_by::MapMaxBy;
pub(crate) use histogram::Histogram;
} }
} }

260
src/commands/evaluate_by.rs Normal file
View file

@ -0,0 +1,260 @@
use crate::commands::WholeStreamCommand;
use crate::parser::hir::SyntaxShape;
use crate::prelude::*;
pub struct EvaluateBy;
#[derive(Deserialize)]
pub struct EvaluateByArgs {
evaluate_with: Option<Tagged<String>>,
}
impl WholeStreamCommand for EvaluateBy {
fn name(&self) -> &str {
"evaluate-by"
}
fn signature(&self) -> Signature {
Signature::build("evaluate-by").named(
"evaluate_with",
SyntaxShape::String,
"the name of the column to evaluate by",
)
}
fn usage(&self) -> &str {
"Creates a new table with the data from the tables rows evaluated by the column given."
}
fn run(
&self,
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
args.process(registry, evaluate_by)?.run()
}
}
pub fn evaluate_by(
EvaluateByArgs { evaluate_with }: EvaluateByArgs,
RunnableContext { input, name, .. }: RunnableContext,
) -> Result<OutputStream, ShellError> {
let stream = async_stream! {
let values: Vec<Tagged<Value>> = input.values.collect().await;
if values.is_empty() {
yield Err(ShellError::labeled_error(
"Expected table from pipeline",
"requires a table input",
name
))
} else {
let evaluate_with = if let Some(evaluator) = evaluate_with {
Some(evaluator.item().clone())
} else {
None
};
match evaluate(&values[0], evaluate_with, name) {
Ok(evaluated) => yield ReturnSuccess::value(evaluated),
Err(err) => yield Err(err)
}
}
};
Ok(stream.to_output_stream())
}
fn fetch(
key: Option<String>,
) -> Box<dyn Fn(Tagged<Value>, Tag) -> Option<Tagged<Value>> + 'static> {
Box::new(move |value: Tagged<Value>, tag| match key {
Some(ref key_given) => {
if let Some(Tagged { item, .. }) = value.get_data_by_key(&key_given) {
Some(item.clone().tagged(tag))
} else {
None
}
}
None => Some(Value::int(1).tagged(tag)),
})
}
pub fn evaluate(
values: &Tagged<Value>,
evaluator: Option<String>,
tag: impl Into<Tag>,
) -> Result<Tagged<Value>, ShellError> {
let tag = tag.into();
let evaluate_with = match evaluator {
Some(keyfn) => fetch(Some(keyfn)),
None => fetch(None),
};
let results: Tagged<Value> = match values {
Tagged {
item: Value::Table(datasets),
..
} => {
let datasets: Vec<_> = datasets
.into_iter()
.map(|subsets| match subsets {
Tagged {
item: Value::Table(subsets),
..
} => {
let subsets: Vec<_> = subsets
.clone()
.into_iter()
.map(|data| match data {
Tagged {
item: Value::Table(data),
..
} => {
let data: Vec<_> = data
.into_iter()
.map(|x| evaluate_with(x.clone(), tag.clone()).unwrap())
.collect();
Value::Table(data).tagged(&tag)
}
_ => Value::Table(vec![]).tagged(&tag),
})
.collect();
Value::Table(subsets).tagged(&tag)
}
_ => Value::Table(vec![]).tagged(&tag),
})
.collect();
Value::Table(datasets.clone()).tagged(&tag)
}
_ => Value::Table(vec![]).tagged(&tag),
};
Ok(results)
}
#[cfg(test)]
mod tests {
use crate::commands::evaluate_by::{evaluate, fetch};
use crate::commands::group_by::group;
use crate::commands::t_sort_by::t_sort;
use crate::data::meta::*;
use crate::prelude::*;
use crate::Value;
use indexmap::IndexMap;
fn int(s: impl Into<BigInt>) -> Tagged<Value> {
Value::int(s).tagged_unknown()
}
fn string(input: impl Into<String>) -> Tagged<Value> {
Value::string(input.into()).tagged_unknown()
}
fn row(entries: IndexMap<String, Tagged<Value>>) -> Tagged<Value> {
Value::row(entries).tagged_unknown()
}
fn table(list: &Vec<Tagged<Value>>) -> Tagged<Value> {
Value::table(list).tagged_unknown()
}
fn nu_releases_sorted_by_date() -> Tagged<Value> {
let key = String::from("date");
t_sort(
Some(key),
None,
&nu_releases_grouped_by_date(),
Tag::unknown(),
)
.unwrap()
}
fn nu_releases_grouped_by_date() -> Tagged<Value> {
let key = String::from("date").tagged_unknown();
group(&key, nu_releases_commiters(), Tag::unknown()).unwrap()
}
fn nu_releases_commiters() -> Vec<Tagged<Value>> {
vec![
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")},
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")},
),
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")},
),
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")},
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")},
),
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")},
),
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")},
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")},
),
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")},
),
]
}
#[test]
fn evaluator_fetches_by_column_if_supplied_a_column_name() {
let subject = row(indexmap! { "name".into() => string("andres") });
let evaluator = fetch(Some(String::from("name")));
assert_eq!(evaluator(subject, Tag::unknown()), Some(string("andres")));
}
#[test]
fn evaluator_returns_1_if_no_column_name_given() {
let subject = row(indexmap! { "name".into() => string("andres") });
let evaluator = fetch(None);
assert_eq!(
evaluator(subject, Tag::unknown()),
Some(Value::int(1).tagged_unknown())
);
}
#[test]
fn evaluates_the_tables() {
assert_eq!(
evaluate(&nu_releases_sorted_by_date(), None, Tag::unknown()).unwrap(),
table(&vec![table(&vec![
table(&vec![int(1), int(1), int(1)]),
table(&vec![int(1), int(1), int(1)]),
table(&vec![int(1), int(1), int(1)]),
]),])
);
}
#[test]
fn evaluates_the_tables_with_custom_evaluator() {
let eval = String::from("name");
assert_eq!(
evaluate(&nu_releases_sorted_by_date(), Some(eval), Tag::unknown()).unwrap(),
table(&vec![table(&vec![
table(&vec![string("AR"), string("JT"), string("YK")]),
table(&vec![string("AR"), string("YK"), string("JT")]),
table(&vec![string("YK"), string("JT"), string("AR")]),
]),])
);
}
}

View file

@ -131,11 +131,8 @@ mod tests {
Value::table(list).tagged_unknown() Value::table(list).tagged_unknown()
} }
#[test] fn nu_releases_commiters() -> Vec<Tagged<Value>> {
fn groups_table_by_key() { vec![
let for_key = String::from("date").tagged_unknown();
let nu_releases = vec![
row( row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}, indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")},
), ),
@ -163,10 +160,15 @@ mod tests {
row( row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")}, indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")},
), ),
]; ]
}
#[test]
fn groups_table_by_date_column() {
let for_key = String::from("date").tagged_unknown();
assert_eq!( assert_eq!(
group(&for_key, nu_releases, Tag::unknown()).unwrap(), group(&for_key, nu_releases_commiters(), Tag::unknown()).unwrap(),
row(indexmap! { row(indexmap! {
"August 23-2019".into() => table(&vec![ "August 23-2019".into() => table(&vec![
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}), row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}),
@ -186,4 +188,30 @@ mod tests {
}) })
); );
} }
#[test]
fn groups_table_by_country_column() {
let for_key = String::from("country").tagged_unknown();
assert_eq!(
group(&for_key, nu_releases_commiters(), Tag::unknown()).unwrap(),
row(indexmap! {
"EC".into() => table(&vec![
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}),
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("Sept 24-2019")}),
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")})
]),
"NZ".into() => table(&vec![
row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")}),
row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")}),
row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("Sept 24-2019")})
]),
"US".into() => table(&vec![
row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")}),
row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("Sept 24-2019")}),
row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")}),
]),
})
);
}
} }

148
src/commands/histogram.rs Normal file
View file

@ -0,0 +1,148 @@
use crate::commands::WholeStreamCommand;
use crate::commands::group_by::group;
use crate::commands::t_sort_by::columns_sorted;
use crate::commands::t_sort_by::t_sort;
use crate::commands::evaluate_by::evaluate;
use crate::commands::reduce_by::reduce;
use crate::commands::map_max_by::map_max;
use crate::data::TaggedDictBuilder;
use crate::errors::ShellError;
use crate::prelude::*;
use num_traits::cast::ToPrimitive;
pub struct Histogram;
#[derive(Deserialize)]
pub struct HistogramArgs {
column_name: Tagged<String>,
}
impl WholeStreamCommand for Histogram {
fn name(&self) -> &str {
"histogram"
}
fn signature(&self) -> Signature {
Signature::build("histogram").required(
"column_name",
SyntaxShape::String,
"the name of the column to graph by",
)
}
fn usage(&self) -> &str {
"Creates a new table with a histogram based on the column name passed in."
}
fn run(
&self,
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
args.process(registry, histogram)?.run()
}
}
pub fn histogram(
HistogramArgs { column_name }: HistogramArgs,
RunnableContext { input, name, .. }: RunnableContext,
) -> Result<OutputStream, ShellError> {
let stream = async_stream! {
let values: Vec<Tagged<Value>> = input.values.collect().await;
let Tagged { item: group_by, .. } = column_name.clone();
let groups = group(&column_name, values, &name)?;
let group_labels = columns_sorted(Some(group_by.clone()), &groups, &name);
let sorted = t_sort(Some(group_by.clone()), None, &groups, &name)?;
let evaled = evaluate(&sorted, None, &name)?;
let reduced = reduce(&evaled, None, &name)?;
let maxima = map_max(&reduced, None, &name)?;
let percents = percentages(&reduced, maxima, &name)?;
match percents {
Tagged {
item: Value::Table(datasets),
..
} => {
let mut idx = 0;
if let Tagged { item: Value::Table(start), .. } = datasets.get(0).unwrap() {
for percentage in start.into_iter() {
let mut fact = TaggedDictBuilder::new(&name);
fact.insert_tagged("committer", group_labels.get(idx).unwrap().clone());
if let Tagged { item: Value::Primitive(Primitive::Int(ref num)), .. } = percentage.clone() {
fact.insert("activity", std::iter::repeat("*").take(num.to_i32().unwrap() as usize).collect::<String>());
}
idx = idx + 1;
yield ReturnSuccess::value(fact.into_tagged_value());
}
}
}
_ => {}
}
};
Ok(stream.to_output_stream())
}
fn percentages(
values: &Tagged<Value>,
max: Tagged<Value>,
tag: impl Into<Tag>,
) -> Result<Tagged<Value>, ShellError> {
let tag = tag.into();
let results: Tagged<Value> = match values {
Tagged {
item: Value::Table(datasets),
..
} => {
let datasets: Vec<_> = datasets
.into_iter()
.map(|subsets| {
match subsets {
Tagged {
item: Value::Table(data),
..
} => {
let data = data
.into_iter()
.map(|d| match d {
Tagged {
item: Value::Primitive(Primitive::Int(n)),
..
} => {
let max = match max {
Tagged {
item: Value::Primitive(Primitive::Int(ref maxima)),
..
} => maxima.to_i32().unwrap(),
_ => 0,
};
let n = { n.to_i32().unwrap() * 100 / max };
Value::number(n).tagged(&tag)
}
_ => Value::number(0).tagged(&tag),
})
.collect::<Vec<_>>();
Value::Table(data).tagged(&tag)
}
_ => Value::Table(vec![]).tagged(&tag),
}
})
.collect();
Value::Table(datasets).tagged(&tag)
}
other => other.clone(),
};
Ok(results)
}

227
src/commands/map_max_by.rs Normal file
View file

@ -0,0 +1,227 @@
use crate::commands::WholeStreamCommand;
use crate::parser::hir::SyntaxShape;
use crate::prelude::*;
use num_traits::cast::ToPrimitive;
pub struct MapMaxBy;
#[derive(Deserialize)]
pub struct MapMaxByArgs {
column_name: Option<Tagged<String>>,
}
impl WholeStreamCommand for MapMaxBy {
fn name(&self) -> &str {
"map-max-by"
}
fn signature(&self) -> Signature {
Signature::build("map-max-by").named(
"column_name",
SyntaxShape::String,
"the name of the column to map-max the table's rows",
)
}
fn usage(&self) -> &str {
"Creates a new table with the data from the tables rows maxed by the column given."
}
fn run(
&self,
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
args.process(registry, map_max_by)?.run()
}
}
pub fn map_max_by(
MapMaxByArgs { column_name }: MapMaxByArgs,
RunnableContext { input, name, .. }: RunnableContext,
) -> Result<OutputStream, ShellError> {
let stream = async_stream! {
let values: Vec<Tagged<Value>> = input.values.collect().await;
if values.is_empty() {
yield Err(ShellError::labeled_error(
"Expected table from pipeline",
"requires a table input",
name
))
} else {
let map_by_column = if let Some(column_to_map) = column_name {
Some(column_to_map.item().clone())
} else {
None
};
match map_max(&values[0], map_by_column, name) {
Ok(table_maxed) => yield ReturnSuccess::value(table_maxed),
Err(err) => yield Err(err)
}
}
};
Ok(stream.to_output_stream())
}
pub fn map_max(
values: &Tagged<Value>,
_map_by_column_name: Option<String>,
tag: impl Into<Tag>,
) -> Result<Tagged<Value>, ShellError> {
let tag = tag.into();
let results: Tagged<Value> = match values {
Tagged {
item: Value::Table(datasets),
..
} => {
let datasets: Vec<_> = datasets
.into_iter()
.map(|subsets| {
match subsets {
Tagged {
item: Value::Table(data),
..
} => {
let data = data.into_iter().fold(0, |acc, value| match value {
Tagged {
item: Value::Primitive(Primitive::Int(n)),
..
} => {
if n.to_i32().unwrap() > acc {
n.to_i32().unwrap()
} else {
acc
}
}
_ => acc,
});
Value::number(data).tagged(&tag)
}
_ => Value::number(0).tagged(&tag),
}
})
.collect();
let datasets = datasets.iter().fold(0, |max, value| match value {
Tagged {
item: Value::Primitive(Primitive::Int(n)),
..
} => {
if n.to_i32().unwrap() > max {
n.to_i32().unwrap()
} else {
max
}
}
_ => max,
});
Value::number(datasets).tagged(&tag)
}
_ => Value::number(-1).tagged(&tag),
};
Ok(results)
}
#[cfg(test)]
mod tests {
use crate::commands::evaluate_by::evaluate;
use crate::commands::group_by::group;
use crate::commands::map_max_by::map_max;
use crate::commands::reduce_by::reduce;
use crate::commands::t_sort_by::t_sort;
use crate::data::meta::*;
use crate::prelude::*;
use crate::Value;
use indexmap::IndexMap;
fn int(s: impl Into<BigInt>) -> Tagged<Value> {
Value::int(s).tagged_unknown()
}
fn string(input: impl Into<String>) -> Tagged<Value> {
Value::string(input.into()).tagged_unknown()
}
fn row(entries: IndexMap<String, Tagged<Value>>) -> Tagged<Value> {
Value::row(entries).tagged_unknown()
}
fn nu_releases_evaluated_by_default_one() -> Tagged<Value> {
evaluate(&nu_releases_sorted_by_date(), None, Tag::unknown()).unwrap()
}
fn nu_releases_reduced_by_sum() -> Tagged<Value> {
reduce(
&nu_releases_evaluated_by_default_one(),
Some(String::from("sum")),
Tag::unknown(),
)
.unwrap()
}
fn nu_releases_sorted_by_date() -> Tagged<Value> {
let key = String::from("date");
t_sort(
Some(key),
None,
&nu_releases_grouped_by_date(),
Tag::unknown(),
)
.unwrap()
}
fn nu_releases_grouped_by_date() -> Tagged<Value> {
let key = String::from("date").tagged_unknown();
group(&key, nu_releases_commiters(), Tag::unknown()).unwrap()
}
fn nu_releases_commiters() -> Vec<Tagged<Value>> {
vec![
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")},
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")},
),
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")},
),
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")},
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")},
),
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")},
),
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")},
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")},
),
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")},
),
row(
indexmap! {"name".into() => string("JK"), "country".into() => string("US"), "date".into() => string("August 23-2019")},
),
]
}
#[test]
fn maps_and_gets_max_value() {
assert_eq!(
map_max(&nu_releases_reduced_by_sum(), None, Tag::unknown()).unwrap(),
int(4)
);
}
}

View file

@ -1,17 +1,12 @@
use crate::commands::WholeStreamCommand; use crate::commands::WholeStreamCommand;
use crate::data::TaggedDictBuilder;
use crate::parser::hir::SyntaxShape; use crate::parser::hir::SyntaxShape;
use crate::parser::registry;
use crate::data::base::Block;
use crate::prelude::*; use crate::prelude::*;
use num_traits::cast::ToPrimitive;
use log::trace;
pub struct ReduceBy; pub struct ReduceBy;
#[derive(Deserialize)] #[derive(Deserialize)]
pub struct ReduceByArgs { pub struct ReduceByArgs {
calculator: Block, reduce_with: Option<Tagged<String>>,
} }
impl WholeStreamCommand for ReduceBy { impl WholeStreamCommand for ReduceBy {
@ -20,15 +15,15 @@ impl WholeStreamCommand for ReduceBy {
} }
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build("reduce-by").required( Signature::build("reduce-by").named(
"calculator", "reduce_with",
SyntaxShape::Block, SyntaxShape::String,
"The block used for calculating values", "the command to reduce by with",
) )
} }
fn usage(&self) -> &str { fn usage(&self) -> &str {
"Crates a new table with the data from the table rows reduced by the block given." "Creates a new table with the data from the tables rows reduced by the command given."
} }
fn run( fn run(
@ -41,14 +36,12 @@ impl WholeStreamCommand for ReduceBy {
} }
pub fn reduce_by( pub fn reduce_by(
ReduceByArgs { calculator }: ReduceByArgs, ReduceByArgs { reduce_with }: ReduceByArgs,
RunnableContext { input, name, .. }: RunnableContext, RunnableContext { input, name, .. }: RunnableContext,
) -> Result<OutputStream, ShellError> { ) -> Result<OutputStream, ShellError> {
let stream = async_stream! { let stream = async_stream! {
let values: Vec<Tagged<Value>> = input.values.collect().await; let values: Vec<Tagged<Value>> = input.values.collect().await;
trace!("{:?}", &calculator);
if values.is_empty() { if values.is_empty() {
yield Err(ShellError::labeled_error( yield Err(ShellError::labeled_error(
"Expected table from pipeline", "Expected table from pipeline",
@ -56,7 +49,14 @@ pub fn reduce_by(
name name
)) ))
} else { } else {
match reduce(values, &calculator, name) {
let reduce_with = if let Some(reducer) = reduce_with {
Some(reducer.item().clone())
} else {
None
};
match reduce(&values[0], reduce_with, name) {
Ok(reduced) => yield ReturnSuccess::value(reduced), Ok(reduced) => yield ReturnSuccess::value(reduced),
Err(err) => yield Err(err) Err(err) => yield Err(err)
} }
@ -66,26 +66,109 @@ pub fn reduce_by(
Ok(stream.to_output_stream()) Ok(stream.to_output_stream())
} }
fn sum(data: Vec<Tagged<Value>>) -> i32 {
data.into_iter().fold(0, |acc, value| match value {
Tagged {
item: Value::Primitive(Primitive::Int(n)),
..
} => acc + n.to_i32().unwrap(),
_ => acc,
})
}
fn formula(
acc_begin: i32,
calculator: Box<dyn Fn(Vec<Tagged<Value>>) -> i32 + 'static>,
) -> Box<dyn Fn(i32, Vec<Tagged<Value>>) -> i32 + 'static> {
Box::new(move |acc, datax| -> i32 {
let result = acc * acc_begin;
result + calculator(datax)
})
}
fn reducer_for(command: Reduce) -> Box<dyn Fn(i32, Vec<Tagged<Value>>) -> i32 + 'static> {
match command {
Reduce::Sum | Reduce::Default => Box::new(formula(0, Box::new(sum))),
}
}
pub enum Reduce {
Sum,
Default,
}
pub fn reduce( pub fn reduce(
values: Vec<Tagged<Value>>, values: &Tagged<Value>,
calculator: &Block, reducer: Option<String>,
tag: impl Into<Tag>, tag: impl Into<Tag>,
) -> Result<Tagged<Value>, ShellError> { ) -> Result<Tagged<Value>, ShellError> {
let tag = tag.into(); let tag = tag.into();
let mut out = TaggedDictBuilder::new(&tag); let reduce_with = match reducer {
Some(cmd) if cmd == "sum" => reducer_for(Reduce::Sum),
Some(_) | None => reducer_for(Reduce::Default),
};
Ok(out.into_tagged_value()) let results: Tagged<Value> = match values {
Tagged {
item: Value::Table(datasets),
..
} => {
let datasets: Vec<_> = datasets
.into_iter()
.map(|subsets| {
let mut acc = 0;
match subsets {
Tagged {
item: Value::Table(data),
..
} => {
let data = data
.into_iter()
.map(|d| {
if let Tagged {
item: Value::Table(x),
..
} = d
{
acc = reduce_with(acc, x.clone());
Value::number(acc).tagged(&tag)
} else {
Value::number(0).tagged(&tag)
}
})
.collect::<Vec<_>>();
Value::Table(data).tagged(&tag)
}
_ => Value::Table(vec![]).tagged(&tag),
}
})
.collect();
Value::Table(datasets).tagged(&tag)
}
_ => Value::Table(vec![]).tagged(&tag),
};
Ok(results)
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::commands::reduce_by::reduce; use crate::commands::evaluate_by::evaluate;
use crate::commands::group_by::group;
use crate::commands::reduce_by::{reduce, reducer_for, Reduce};
use crate::commands::t_sort_by::t_sort;
use crate::data::meta::*; use crate::data::meta::*;
use crate::prelude::*;
use crate::Value; use crate::Value;
use indexmap::IndexMap; use indexmap::IndexMap;
fn int(s: impl Into<BigInt>) -> Tagged<Value> {
Value::int(s).tagged_unknown()
}
fn string(input: impl Into<String>) -> Tagged<Value> { fn string(input: impl Into<String>) -> Tagged<Value> {
Value::string(input.into()).tagged_unknown() Value::string(input.into()).tagged_unknown()
} }
@ -97,4 +180,78 @@ mod tests {
fn table(list: &Vec<Tagged<Value>>) -> Tagged<Value> { fn table(list: &Vec<Tagged<Value>>) -> Tagged<Value> {
Value::table(list).tagged_unknown() Value::table(list).tagged_unknown()
} }
fn nu_releases_sorted_by_date() -> Tagged<Value> {
let key = String::from("date");
t_sort(
Some(key),
None,
&nu_releases_grouped_by_date(),
Tag::unknown(),
)
.unwrap()
}
fn nu_releases_evaluated_by_default_one() -> Tagged<Value> {
evaluate(&nu_releases_sorted_by_date(), None, Tag::unknown()).unwrap()
}
fn nu_releases_grouped_by_date() -> Tagged<Value> {
let key = String::from("date").tagged_unknown();
group(&key, nu_releases_commiters(), Tag::unknown()).unwrap()
}
fn nu_releases_commiters() -> Vec<Tagged<Value>> {
vec![
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")},
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")},
),
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")},
),
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")},
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")},
),
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")},
),
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")},
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")},
),
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")},
),
]
}
#[test]
fn reducer_computes_given_a_sum_command() {
let subject = vec![int(1), int(1), int(1)];
let action = reducer_for(Reduce::Sum);
assert_eq!(action(0, subject), 3);
}
#[test]
fn reducer_computes() {
assert_eq!(
reduce(
&nu_releases_evaluated_by_default_one(),
Some(String::from("sum")),
Tag::unknown()
),
Ok(table(&vec![table(&vec![int(3), int(3), int(3)])]))
);
}
} }

View file

@ -150,6 +150,7 @@ pub fn split(
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::commands::group_by::group;
use crate::commands::split_by::split; use crate::commands::split_by::split;
use crate::data::meta::*; use crate::data::meta::*;
use crate::Value; use crate::Value;
@ -167,30 +168,49 @@ mod tests {
Value::table(list).tagged_unknown() Value::table(list).tagged_unknown()
} }
fn nu_releases_grouped_by_date() -> Tagged<Value> {
let key = String::from("date").tagged_unknown();
group(&key, nu_releases_commiters(), Tag::unknown()).unwrap()
}
fn nu_releases_commiters() -> Vec<Tagged<Value>> {
vec![
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")},
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")},
),
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")},
),
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("Sept 24-2019")},
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")},
),
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("Sept 24-2019")},
),
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")},
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("Sept 24-2019")},
),
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")},
),
]
}
#[test] #[test]
fn splits_inner_tables_by_key() { fn splits_inner_tables_by_key() {
let for_key = String::from("country").tagged_unknown(); let for_key = String::from("country").tagged_unknown();
let nu_releases = row(indexmap! {
"August 23-2019".into() => table(&vec![
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}),
row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")}),
row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")})
]),
"Sept 24-2019".into() => table(&vec![
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("Sept 24-2019")}),
row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("Sept 24-2019")}),
row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("Sept 24-2019")})
]),
"October 10-2019".into() => table(&vec![
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")}),
row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")}),
row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")})
])
});
assert_eq!( assert_eq!(
split(&for_key, &nu_releases, Tag::unknown()).unwrap(), split(&for_key, &nu_releases_grouped_by_date(), Tag::unknown()).unwrap(),
Value::row(indexmap! { Value::row(indexmap! {
"EC".into() => row(indexmap! { "EC".into() => row(indexmap! {
"August 23-2019".into() => table(&vec![ "August 23-2019".into() => table(&vec![
@ -235,18 +255,12 @@ mod tests {
let nu_releases = row(indexmap! { let nu_releases = row(indexmap! {
"August 23-2019".into() => table(&vec![ "August 23-2019".into() => table(&vec![
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}), row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")})
row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")}),
row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")})
]), ]),
"Sept 24-2019".into() => table(&vec![ "Sept 24-2019".into() => table(&vec![
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("Sept 24-2019")}), row(indexmap!{"name".into() => Value::string("JT").tagged(Tag::from(Span::new(5,10))), "date".into() => string("Sept 24-2019")})
row(indexmap!{"name".into() => Value::string("JT").tagged(Tag::from(Span::new(5,10))), "date".into() => string("Sept 24-2019")}),
row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("Sept 24-2019")})
]), ]),
"October 10-2019".into() => table(&vec![ "October 10-2019".into() => table(&vec![
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")}),
row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")}),
row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")}) row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")})
]) ])
}); });

358
src/commands/t_sort_by.rs Normal file
View file

@ -0,0 +1,358 @@
use crate::commands::WholeStreamCommand;
use crate::data::{TaggedDictBuilder, TaggedListBuilder};
use crate::errors::ShellError;
use crate::prelude::*;
use chrono::{DateTime, NaiveDate, Utc};
pub struct TSortBy;
#[derive(Deserialize)]
pub struct TSortByArgs {
#[serde(rename(deserialize = "show-columns"))]
show_columns: bool,
group_by: Option<Tagged<String>>,
#[allow(unused)]
split_by: Option<String>,
}
impl WholeStreamCommand for TSortBy {
fn name(&self) -> &str {
"t-sort-by"
}
fn signature(&self) -> Signature {
Signature::build("t-sort-by")
.switch("show-columns", "Displays the column names sorted")
.named(
"group_by",
SyntaxShape::String,
"the name of the column to group by",
)
.named(
"split_by",
SyntaxShape::String,
"the name of the column within the grouped by table to split by",
)
}
fn usage(&self) -> &str {
"Sort by the given columns."
}
fn run(
&self,
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
args.process(registry, t_sort_by)?.run()
}
}
fn t_sort_by(
TSortByArgs {
show_columns,
group_by,
..
}: TSortByArgs,
RunnableContext { input, name, .. }: RunnableContext,
) -> Result<OutputStream, ShellError> {
Ok(OutputStream::new(async_stream! {
let values: Vec<Tagged<Value>> = input.values.collect().await;
let column_grouped_by_name = if let Some(grouped_by) = group_by {
Some(grouped_by.item().clone())
} else {
None
};
if show_columns {
for label in columns_sorted(column_grouped_by_name, &values[0], &name).iter() {
yield ReturnSuccess::value(label.clone());
}
} else {
match t_sort(column_grouped_by_name, None, &values[0], name) {
Ok(sorted) => yield ReturnSuccess::value(sorted),
Err(err) => yield Err(err)
}
}
}))
}
pub fn columns_sorted(
_group_by_name: Option<String>,
value: &Tagged<Value>,
tag: impl Into<Tag>,
) -> Vec<Tagged<Value>> {
let origin_tag = tag.into();
match value {
Tagged {
item: Value::Row(rows),
..
} => {
let mut keys: Vec<Tagged<Value>> =
rows.entries
.keys()
.map(|s| s.as_ref())
.map(|k: &str| {
let date = NaiveDate::parse_from_str(k, "%B %d-%Y");
let date = match date {
Ok(parsed) => Value::Primitive(Primitive::Date(
DateTime::<Utc>::from_utc(parsed.and_hms(12, 34, 56), Utc),
)),
Err(_) => Value::string(k),
};
date.tagged_unknown()
})
.collect();
keys.sort();
let keys: Vec<Value> = keys
.into_iter()
.map(|k| {
Value::string(match k {
Tagged {
item: Value::Primitive(Primitive::Date(d)),
..
} => format!("{}", d.format("%B %d-%Y")),
_ => k.as_string().unwrap(),
})
})
.collect();
keys.into_iter().map(|k| k.tagged(&origin_tag)).collect()
}
_ => vec![Value::string("default").tagged(&origin_tag)]
}
}
pub fn t_sort(
group_by_name: Option<String>,
split_by_name: Option<String>,
value: &Tagged<Value>,
tag: impl Into<Tag>,
) -> Result<Tagged<Value>, ShellError> {
let origin_tag = tag.into();
match group_by_name {
Some(column_name) => {
let sorted_labels = columns_sorted(Some(column_name), value, &origin_tag);
match split_by_name {
None => {
let mut dataset = TaggedDictBuilder::new(&origin_tag);
dataset.insert_tagged("default", value.clone());
let dataset = dataset.into_tagged_value();
let split_labels = match &dataset {
Tagged {
item: Value::Row(rows),
..
} => {
let mut keys: Vec<Tagged<Value>> = rows
.entries
.keys()
.map(|s| s.as_ref())
.map(|k: &str| {
let date = NaiveDate::parse_from_str(k, "%B %d-%Y");
let date = match date {
Ok(parsed) => Value::Primitive(Primitive::Date(
DateTime::<Utc>::from_utc(
parsed.and_hms(12, 34, 56),
Utc,
),
)),
Err(_) => Value::string(k),
};
date.tagged_unknown()
})
.collect();
keys.sort();
let keys: Vec<Value> = keys
.into_iter()
.map(|k| {
Value::string(match k {
Tagged {
item: Value::Primitive(Primitive::Date(d)),
..
} => format!("{}", d.format("%B %d-%Y")),
_ => k.as_string().unwrap(),
})
})
.collect();
keys.into_iter().map(|k| k.tagged(&origin_tag)).collect()
}
_ => vec![],
};
let results: Vec<Vec<Tagged<Value>>> = split_labels
.into_iter()
.map(|split| {
let groups = dataset.get_data_by_key(&split.as_string().unwrap());
sorted_labels
.clone()
.into_iter()
.map(|label| {
let label = label.as_string().unwrap();
match groups {
Some(Tagged {
item: Value::Row(dict),
..
}) => dict.get_data_by_key(&label).unwrap().clone(),
_ => Value::Table(vec![]).tagged(&origin_tag),
}
})
.collect()
})
.collect();
let mut outer = TaggedListBuilder::new(&origin_tag);
for i in results {
outer.insert_tagged(Value::Table(i).tagged(&origin_tag));
}
return Ok(Value::Table(outer.list).tagged(&origin_tag));
}
Some(_) => return Ok(Value::nothing().tagged(&origin_tag)),
}
}
None => return Ok(Value::nothing().tagged(&origin_tag)),
}
}
#[cfg(test)]
mod tests {
use crate::commands::group_by::group;
use crate::commands::t_sort_by::{columns_sorted, t_sort};
use crate::data::meta::*;
use crate::Value;
use indexmap::IndexMap;
fn string(input: impl Into<String>) -> Tagged<Value> {
Value::string(input.into()).tagged_unknown()
}
fn row(entries: IndexMap<String, Tagged<Value>>) -> Tagged<Value> {
Value::row(entries).tagged_unknown()
}
fn table(list: &Vec<Tagged<Value>>) -> Tagged<Value> {
Value::table(list).tagged_unknown()
}
fn nu_releases_grouped_by_date() -> Tagged<Value> {
let key = String::from("date").tagged_unknown();
group(&key, nu_releases_commiters(), Tag::unknown()).unwrap()
}
fn nu_releases_commiters() -> Vec<Tagged<Value>> {
vec![
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")},
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")},
),
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")},
),
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")},
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")},
),
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")},
),
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")},
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")},
),
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")},
),
]
}
#[test]
fn show_columns_sorted_given_a_column_to_sort_by() {
let by_column = String::from("date");
assert_eq!(
columns_sorted(
Some(by_column),
&nu_releases_grouped_by_date(),
Tag::unknown()
),
vec![
string("August 23-2019"),
string("September 24-2019"),
string("October 10-2019")
]
)
}
#[test]
fn sorts_the_tables() {
let group_by = String::from("date");
assert_eq!(
t_sort(
Some(group_by),
None,
&nu_releases_grouped_by_date(),
Tag::unknown()
)
.unwrap(),
table(&vec![table(&vec![
table(&vec![
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")}
),
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")}
)
]),
table(&vec![
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")}
),
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")}
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")}
)
]),
table(&vec![
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")}
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")}
),
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")}
)
]),
]),])
);
}
}

View file

@ -430,6 +430,7 @@ impl Tagged<Value> {
Value::Primitive(Primitive::Int(x)) => Ok(format!("{}", x)), Value::Primitive(Primitive::Int(x)) => Ok(format!("{}", x)),
Value::Primitive(Primitive::Bytes(x)) => Ok(format!("{}", x)), Value::Primitive(Primitive::Bytes(x)) => Ok(format!("{}", x)),
Value::Primitive(Primitive::Path(x)) => Ok(format!("{}", x.display())), Value::Primitive(Primitive::Path(x)) => Ok(format!("{}", x.display())),
Value::Primitive(Primitive::Date(x)) => Ok(format!("{}", x.to_rfc3339())),
// TODO: this should definitely be more general with better errors // TODO: this should definitely be more general with better errors
other => Err(ShellError::labeled_error( other => Err(ShellError::labeled_error(
"Expected string", "Expected string",

View file

@ -114,7 +114,7 @@ impl Dictionary {
#[derive(Debug)] #[derive(Debug)]
pub struct TaggedListBuilder { pub struct TaggedListBuilder {
tag: Tag, tag: Tag,
list: Vec<Tagged<Value>>, pub list: Vec<Tagged<Value>>,
} }
impl TaggedListBuilder { impl TaggedListBuilder {