mirror of
https://github.com/nushell/nushell
synced 2024-12-26 04:53:09 +00:00
Merge pull request #954 from andrasio/reduce
Expose histogram and split-by command.
This commit is contained in:
commit
5bb822dcd4
14 changed files with 1404 additions and 46 deletions
|
@ -256,6 +256,7 @@ Nu adheres closely to a set of goals that make up its design philosophy. As feat
|
|||
| format pattern | Format table row data as a string following the given pattern |
|
||||
| get column-or-column-path | Open column and get data from the corresponding cells |
|
||||
| group-by column | Creates a new table with the data from the table rows grouped by the column given |
|
||||
| histogram column ...column-names | Creates a new table with a histogram based on the column name passed in, optionally give the frequency column name
|
||||
| inc (column-or-column-path) | Increment a value or version. Optionally use the column of a table |
|
||||
| insert column-or-column-path value | Insert a new column to the table |
|
||||
| last amount | Show only the last number of rows |
|
||||
|
@ -267,6 +268,7 @@ Nu adheres closely to a set of goals that make up its design philosophy. As feat
|
|||
| reverse | Reverses the table. |
|
||||
| skip amount | Skip a number of rows |
|
||||
| skip-while condition | Skips rows while the condition matches. |
|
||||
| split-by column | Creates a new table with the data from the inner tables splitted by the column given |
|
||||
| sort-by ...columns | Sort by the given columns |
|
||||
| str (column) | Apply string function. Optionally use the column of a table |
|
||||
| sum | Sum a column of values |
|
||||
|
|
|
@ -18,4 +18,4 @@ description = "Groundwork so tables can be data processed"
|
|||
reason = """
|
||||
These will allow take tables and be able to transform, process, and explore.
|
||||
"""
|
||||
enabled = false
|
||||
enabled = false
|
||||
|
|
15
src/cli.rs
15
src/cli.rs
|
@ -301,6 +301,7 @@ pub async fn cli() -> Result<(), Box<dyn Error>> {
|
|||
whole_stream_command(FromYML),
|
||||
whole_stream_command(Pick),
|
||||
whole_stream_command(Get),
|
||||
whole_stream_command(Histogram),
|
||||
per_item_command(Remove),
|
||||
per_item_command(Fetch),
|
||||
per_item_command(Open),
|
||||
|
@ -320,13 +321,23 @@ pub async fn cli() -> Result<(), Box<dyn Error>> {
|
|||
per_item_command(Mkdir),
|
||||
per_item_command(Move),
|
||||
whole_stream_command(Save),
|
||||
whole_stream_command(SplitBy),
|
||||
whole_stream_command(Table),
|
||||
whole_stream_command(Version),
|
||||
whole_stream_command(Which),
|
||||
#[cfg(data_processing_primitives)]
|
||||
whole_stream_command(SplitBy),
|
||||
]);
|
||||
|
||||
cfg_if::cfg_if! {
|
||||
if #[cfg(data_processing_primitives)] {
|
||||
context.add_commands(vec![
|
||||
whole_stream_command(ReduceBy),
|
||||
whole_stream_command(EvaluateBy),
|
||||
whole_stream_command(TSortBy),
|
||||
whole_stream_command(MapMaxBy),
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "clipboard")]
|
||||
{
|
||||
context.add_commands(vec![whole_stream_command(
|
||||
|
|
|
@ -16,6 +16,8 @@ pub(crate) mod debug;
|
|||
pub(crate) mod echo;
|
||||
pub(crate) mod enter;
|
||||
pub(crate) mod env;
|
||||
#[allow(unused)]
|
||||
pub(crate) mod evaluate_by;
|
||||
pub(crate) mod exit;
|
||||
pub(crate) mod fetch;
|
||||
pub(crate) mod first;
|
||||
|
@ -33,10 +35,13 @@ pub(crate) mod from_yaml;
|
|||
pub(crate) mod get;
|
||||
pub(crate) mod group_by;
|
||||
pub(crate) mod help;
|
||||
pub(crate) mod histogram;
|
||||
pub(crate) mod history;
|
||||
pub(crate) mod last;
|
||||
pub(crate) mod lines;
|
||||
pub(crate) mod ls;
|
||||
#[allow(unused)]
|
||||
pub(crate) mod map_max_by;
|
||||
pub(crate) mod mkdir;
|
||||
pub(crate) mod mv;
|
||||
pub(crate) mod next;
|
||||
|
@ -49,6 +54,8 @@ pub(crate) mod post;
|
|||
pub(crate) mod prepend;
|
||||
pub(crate) mod prev;
|
||||
pub(crate) mod pwd;
|
||||
#[allow(unused)]
|
||||
pub(crate) mod reduce_by;
|
||||
pub(crate) mod reject;
|
||||
pub(crate) mod reverse;
|
||||
pub(crate) mod rm;
|
||||
|
@ -57,12 +64,11 @@ pub(crate) mod shells;
|
|||
pub(crate) mod size;
|
||||
pub(crate) mod skip_while;
|
||||
pub(crate) mod sort_by;
|
||||
|
||||
#[cfg(data_processing_primitives)]
|
||||
pub(crate) mod split_by;
|
||||
|
||||
pub(crate) mod split_column;
|
||||
pub(crate) mod split_row;
|
||||
#[allow(unused)]
|
||||
pub(crate) mod t_sort_by;
|
||||
pub(crate) mod table;
|
||||
pub(crate) mod tags;
|
||||
pub(crate) mod to_bson;
|
||||
|
@ -95,6 +101,8 @@ pub(crate) use debug::Debug;
|
|||
pub(crate) use echo::Echo;
|
||||
pub(crate) use enter::Enter;
|
||||
pub(crate) use env::Env;
|
||||
#[allow(unused)]
|
||||
pub(crate) use evaluate_by::EvaluateBy;
|
||||
pub(crate) use exit::Exit;
|
||||
pub(crate) use fetch::Fetch;
|
||||
pub(crate) use first::First;
|
||||
|
@ -114,10 +122,13 @@ pub(crate) use from_yaml::FromYML;
|
|||
pub(crate) use get::Get;
|
||||
pub(crate) use group_by::GroupBy;
|
||||
pub(crate) use help::Help;
|
||||
pub(crate) use histogram::Histogram;
|
||||
pub(crate) use history::History;
|
||||
pub(crate) use last::Last;
|
||||
pub(crate) use lines::Lines;
|
||||
pub(crate) use ls::LS;
|
||||
#[allow(unused)]
|
||||
pub(crate) use map_max_by::MapMaxBy;
|
||||
pub(crate) use mkdir::Mkdir;
|
||||
pub(crate) use mv::Move;
|
||||
pub(crate) use next::Next;
|
||||
|
@ -129,6 +140,8 @@ pub(crate) use post::Post;
|
|||
pub(crate) use prepend::Prepend;
|
||||
pub(crate) use prev::Previous;
|
||||
pub(crate) use pwd::PWD;
|
||||
#[allow(unused)]
|
||||
pub(crate) use reduce_by::ReduceBy;
|
||||
pub(crate) use reject::Reject;
|
||||
pub(crate) use reverse::Reverse;
|
||||
pub(crate) use rm::Remove;
|
||||
|
@ -137,12 +150,11 @@ pub(crate) use shells::Shells;
|
|||
pub(crate) use size::Size;
|
||||
pub(crate) use skip_while::SkipWhile;
|
||||
pub(crate) use sort_by::SortBy;
|
||||
|
||||
#[cfg(data_processing_primitives)]
|
||||
pub(crate) use split_by::SplitBy;
|
||||
|
||||
pub(crate) use split_column::SplitColumn;
|
||||
pub(crate) use split_row::SplitRow;
|
||||
#[allow(unused)]
|
||||
pub(crate) use t_sort_by::TSortBy;
|
||||
pub(crate) use table::Table;
|
||||
pub(crate) use tags::Tags;
|
||||
pub(crate) use to_bson::ToBSON;
|
||||
|
|
260
src/commands/evaluate_by.rs
Normal file
260
src/commands/evaluate_by.rs
Normal file
|
@ -0,0 +1,260 @@
|
|||
use crate::commands::WholeStreamCommand;
|
||||
use crate::parser::hir::SyntaxShape;
|
||||
use crate::prelude::*;
|
||||
pub struct EvaluateBy;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct EvaluateByArgs {
|
||||
evaluate_with: Option<Tagged<String>>,
|
||||
}
|
||||
|
||||
impl WholeStreamCommand for EvaluateBy {
|
||||
fn name(&self) -> &str {
|
||||
"evaluate-by"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("evaluate-by").named(
|
||||
"evaluate_with",
|
||||
SyntaxShape::String,
|
||||
"the name of the column to evaluate by",
|
||||
)
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a new table with the data from the tables rows evaluated by the column given."
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
args: CommandArgs,
|
||||
registry: &CommandRegistry,
|
||||
) -> Result<OutputStream, ShellError> {
|
||||
args.process(registry, evaluate_by)?.run()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn evaluate_by(
|
||||
EvaluateByArgs { evaluate_with }: EvaluateByArgs,
|
||||
RunnableContext { input, name, .. }: RunnableContext,
|
||||
) -> Result<OutputStream, ShellError> {
|
||||
let stream = async_stream! {
|
||||
let values: Vec<Tagged<Value>> = input.values.collect().await;
|
||||
|
||||
|
||||
if values.is_empty() {
|
||||
yield Err(ShellError::labeled_error(
|
||||
"Expected table from pipeline",
|
||||
"requires a table input",
|
||||
name
|
||||
))
|
||||
} else {
|
||||
|
||||
let evaluate_with = if let Some(evaluator) = evaluate_with {
|
||||
Some(evaluator.item().clone())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
match evaluate(&values[0], evaluate_with, name) {
|
||||
Ok(evaluated) => yield ReturnSuccess::value(evaluated),
|
||||
Err(err) => yield Err(err)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(stream.to_output_stream())
|
||||
}
|
||||
|
||||
fn fetch(
|
||||
key: Option<String>,
|
||||
) -> Box<dyn Fn(Tagged<Value>, Tag) -> Option<Tagged<Value>> + 'static> {
|
||||
Box::new(move |value: Tagged<Value>, tag| match key {
|
||||
Some(ref key_given) => {
|
||||
if let Some(Tagged { item, .. }) = value.get_data_by_key(&key_given) {
|
||||
Some(item.clone().tagged(tag))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
None => Some(Value::int(1).tagged(tag)),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn evaluate(
|
||||
values: &Tagged<Value>,
|
||||
evaluator: Option<String>,
|
||||
tag: impl Into<Tag>,
|
||||
) -> Result<Tagged<Value>, ShellError> {
|
||||
let tag = tag.into();
|
||||
|
||||
let evaluate_with = match evaluator {
|
||||
Some(keyfn) => fetch(Some(keyfn)),
|
||||
None => fetch(None),
|
||||
};
|
||||
|
||||
let results: Tagged<Value> = match values {
|
||||
Tagged {
|
||||
item: Value::Table(datasets),
|
||||
..
|
||||
} => {
|
||||
let datasets: Vec<_> = datasets
|
||||
.into_iter()
|
||||
.map(|subsets| match subsets {
|
||||
Tagged {
|
||||
item: Value::Table(subsets),
|
||||
..
|
||||
} => {
|
||||
let subsets: Vec<_> = subsets
|
||||
.clone()
|
||||
.into_iter()
|
||||
.map(|data| match data {
|
||||
Tagged {
|
||||
item: Value::Table(data),
|
||||
..
|
||||
} => {
|
||||
let data: Vec<_> = data
|
||||
.into_iter()
|
||||
.map(|x| evaluate_with(x.clone(), tag.clone()).unwrap())
|
||||
.collect();
|
||||
Value::Table(data).tagged(&tag)
|
||||
}
|
||||
_ => Value::Table(vec![]).tagged(&tag),
|
||||
})
|
||||
.collect();
|
||||
Value::Table(subsets).tagged(&tag)
|
||||
}
|
||||
_ => Value::Table(vec![]).tagged(&tag),
|
||||
})
|
||||
.collect();
|
||||
|
||||
Value::Table(datasets.clone()).tagged(&tag)
|
||||
}
|
||||
_ => Value::Table(vec![]).tagged(&tag),
|
||||
};
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use crate::commands::evaluate_by::{evaluate, fetch};
|
||||
use crate::commands::group_by::group;
|
||||
use crate::commands::t_sort_by::t_sort;
|
||||
use crate::data::meta::*;
|
||||
use crate::prelude::*;
|
||||
use crate::Value;
|
||||
use indexmap::IndexMap;
|
||||
|
||||
fn int(s: impl Into<BigInt>) -> Tagged<Value> {
|
||||
Value::int(s).tagged_unknown()
|
||||
}
|
||||
|
||||
fn string(input: impl Into<String>) -> Tagged<Value> {
|
||||
Value::string(input.into()).tagged_unknown()
|
||||
}
|
||||
|
||||
fn row(entries: IndexMap<String, Tagged<Value>>) -> Tagged<Value> {
|
||||
Value::row(entries).tagged_unknown()
|
||||
}
|
||||
|
||||
fn table(list: &Vec<Tagged<Value>>) -> Tagged<Value> {
|
||||
Value::table(list).tagged_unknown()
|
||||
}
|
||||
|
||||
fn nu_releases_sorted_by_date() -> Tagged<Value> {
|
||||
let key = String::from("date");
|
||||
|
||||
t_sort(
|
||||
Some(key),
|
||||
None,
|
||||
&nu_releases_grouped_by_date(),
|
||||
Tag::unknown(),
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn nu_releases_grouped_by_date() -> Tagged<Value> {
|
||||
let key = String::from("date").tagged_unknown();
|
||||
group(&key, nu_releases_commiters(), Tag::unknown()).unwrap()
|
||||
}
|
||||
|
||||
fn nu_releases_commiters() -> Vec<Tagged<Value>> {
|
||||
vec![
|
||||
row(
|
||||
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")},
|
||||
),
|
||||
]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn evaluator_fetches_by_column_if_supplied_a_column_name() {
|
||||
let subject = row(indexmap! { "name".into() => string("andres") });
|
||||
|
||||
let evaluator = fetch(Some(String::from("name")));
|
||||
|
||||
assert_eq!(evaluator(subject, Tag::unknown()), Some(string("andres")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn evaluator_returns_1_if_no_column_name_given() {
|
||||
let subject = row(indexmap! { "name".into() => string("andres") });
|
||||
let evaluator = fetch(None);
|
||||
|
||||
assert_eq!(
|
||||
evaluator(subject, Tag::unknown()),
|
||||
Some(Value::int(1).tagged_unknown())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn evaluates_the_tables() {
|
||||
assert_eq!(
|
||||
evaluate(&nu_releases_sorted_by_date(), None, Tag::unknown()).unwrap(),
|
||||
table(&vec![table(&vec![
|
||||
table(&vec![int(1), int(1), int(1)]),
|
||||
table(&vec![int(1), int(1), int(1)]),
|
||||
table(&vec![int(1), int(1), int(1)]),
|
||||
]),])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn evaluates_the_tables_with_custom_evaluator() {
|
||||
let eval = String::from("name");
|
||||
|
||||
assert_eq!(
|
||||
evaluate(&nu_releases_sorted_by_date(), Some(eval), Tag::unknown()).unwrap(),
|
||||
table(&vec![table(&vec![
|
||||
table(&vec![string("AR"), string("JT"), string("YK")]),
|
||||
table(&vec![string("AR"), string("YK"), string("JT")]),
|
||||
table(&vec![string("YK"), string("JT"), string("AR")]),
|
||||
]),])
|
||||
);
|
||||
}
|
||||
}
|
|
@ -131,11 +131,8 @@ mod tests {
|
|||
Value::table(list).tagged_unknown()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn groups_table_by_key() {
|
||||
let for_key = String::from("date").tagged_unknown();
|
||||
|
||||
let nu_releases = vec![
|
||||
fn nu_releases_commiters() -> Vec<Tagged<Value>> {
|
||||
vec![
|
||||
row(
|
||||
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")},
|
||||
),
|
||||
|
@ -163,10 +160,15 @@ mod tests {
|
|||
row(
|
||||
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")},
|
||||
),
|
||||
];
|
||||
]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn groups_table_by_date_column() {
|
||||
let for_key = String::from("date").tagged_unknown();
|
||||
|
||||
assert_eq!(
|
||||
group(&for_key, nu_releases, Tag::unknown()).unwrap(),
|
||||
group(&for_key, nu_releases_commiters(), Tag::unknown()).unwrap(),
|
||||
row(indexmap! {
|
||||
"August 23-2019".into() => table(&vec![
|
||||
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}),
|
||||
|
@ -186,4 +188,30 @@ mod tests {
|
|||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn groups_table_by_country_column() {
|
||||
let for_key = String::from("country").tagged_unknown();
|
||||
|
||||
assert_eq!(
|
||||
group(&for_key, nu_releases_commiters(), Tag::unknown()).unwrap(),
|
||||
row(indexmap! {
|
||||
"EC".into() => table(&vec![
|
||||
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}),
|
||||
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("Sept 24-2019")}),
|
||||
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")})
|
||||
]),
|
||||
"NZ".into() => table(&vec![
|
||||
row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")}),
|
||||
row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")}),
|
||||
row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("Sept 24-2019")})
|
||||
]),
|
||||
"US".into() => table(&vec![
|
||||
row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")}),
|
||||
row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("Sept 24-2019")}),
|
||||
row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")}),
|
||||
]),
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
163
src/commands/histogram.rs
Normal file
163
src/commands/histogram.rs
Normal file
|
@ -0,0 +1,163 @@
|
|||
use crate::commands::evaluate_by::evaluate;
|
||||
use crate::commands::group_by::group;
|
||||
use crate::commands::map_max_by::map_max;
|
||||
use crate::commands::reduce_by::reduce;
|
||||
use crate::commands::t_sort_by::columns_sorted;
|
||||
use crate::commands::t_sort_by::t_sort;
|
||||
use crate::commands::WholeStreamCommand;
|
||||
use crate::data::TaggedDictBuilder;
|
||||
use crate::errors::ShellError;
|
||||
use crate::prelude::*;
|
||||
use num_traits::cast::ToPrimitive;
|
||||
|
||||
pub struct Histogram;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct HistogramArgs {
|
||||
column_name: Tagged<String>,
|
||||
rest: Vec<Tagged<String>>,
|
||||
}
|
||||
|
||||
impl WholeStreamCommand for Histogram {
|
||||
fn name(&self) -> &str {
|
||||
"histogram"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("histogram")
|
||||
.required(
|
||||
"column_name",
|
||||
SyntaxShape::String,
|
||||
"the name of the column to graph by",
|
||||
)
|
||||
.rest(
|
||||
SyntaxShape::Member,
|
||||
"column name to give the histogram's frequency column",
|
||||
)
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a new table with a histogram based on the column name passed in."
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
args: CommandArgs,
|
||||
registry: &CommandRegistry,
|
||||
) -> Result<OutputStream, ShellError> {
|
||||
args.process(registry, histogram)?.run()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn histogram(
|
||||
HistogramArgs { column_name, rest }: HistogramArgs,
|
||||
RunnableContext { input, name, .. }: RunnableContext,
|
||||
) -> Result<OutputStream, ShellError> {
|
||||
let stream = async_stream! {
|
||||
let values: Vec<Tagged<Value>> = input.values.collect().await;
|
||||
|
||||
let Tagged { item: group_by, .. } = column_name.clone();
|
||||
|
||||
let groups = group(&column_name, values, &name)?;
|
||||
let group_labels = columns_sorted(Some(group_by.clone()), &groups, &name);
|
||||
let sorted = t_sort(Some(group_by.clone()), None, &groups, &name)?;
|
||||
let evaled = evaluate(&sorted, None, &name)?;
|
||||
let reduced = reduce(&evaled, None, &name)?;
|
||||
let maxima = map_max(&reduced, None, &name)?;
|
||||
let percents = percentages(&reduced, maxima, &name)?;
|
||||
|
||||
match percents {
|
||||
Tagged {
|
||||
item: Value::Table(datasets),
|
||||
..
|
||||
} => {
|
||||
|
||||
let mut idx = 0;
|
||||
|
||||
let column_names_supplied: Vec<_> = rest.iter().map(|f| f.item.clone()).collect();
|
||||
|
||||
let frequency_column_name = if column_names_supplied.is_empty() {
|
||||
"frecuency".to_string()
|
||||
} else {
|
||||
column_names_supplied[0].clone()
|
||||
};
|
||||
|
||||
let column = (*column_name).clone();
|
||||
|
||||
if let Tagged { item: Value::Table(start), .. } = datasets.get(0).unwrap() {
|
||||
for percentage in start.into_iter() {
|
||||
|
||||
let mut fact = TaggedDictBuilder::new(&name);
|
||||
fact.insert_tagged(&column, group_labels.get(idx).unwrap().clone());
|
||||
|
||||
if let Tagged { item: Value::Primitive(Primitive::Int(ref num)), .. } = percentage.clone() {
|
||||
fact.insert(&frequency_column_name, std::iter::repeat("*").take(num.to_i32().unwrap() as usize).collect::<String>());
|
||||
}
|
||||
|
||||
idx = idx + 1;
|
||||
|
||||
yield ReturnSuccess::value(fact.into_tagged_value());
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(stream.to_output_stream())
|
||||
}
|
||||
|
||||
fn percentages(
|
||||
values: &Tagged<Value>,
|
||||
max: Tagged<Value>,
|
||||
tag: impl Into<Tag>,
|
||||
) -> Result<Tagged<Value>, ShellError> {
|
||||
let tag = tag.into();
|
||||
|
||||
let results: Tagged<Value> = match values {
|
||||
Tagged {
|
||||
item: Value::Table(datasets),
|
||||
..
|
||||
} => {
|
||||
let datasets: Vec<_> = datasets
|
||||
.into_iter()
|
||||
.map(|subsets| match subsets {
|
||||
Tagged {
|
||||
item: Value::Table(data),
|
||||
..
|
||||
} => {
|
||||
let data = data
|
||||
.into_iter()
|
||||
.map(|d| match d {
|
||||
Tagged {
|
||||
item: Value::Primitive(Primitive::Int(n)),
|
||||
..
|
||||
} => {
|
||||
let max = match max {
|
||||
Tagged {
|
||||
item: Value::Primitive(Primitive::Int(ref maxima)),
|
||||
..
|
||||
} => maxima.to_i32().unwrap(),
|
||||
_ => 0,
|
||||
};
|
||||
|
||||
let n = { n.to_i32().unwrap() * 100 / max };
|
||||
|
||||
Value::number(n).tagged(&tag)
|
||||
}
|
||||
_ => Value::number(0).tagged(&tag),
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
Value::Table(data).tagged(&tag)
|
||||
}
|
||||
_ => Value::Table(vec![]).tagged(&tag),
|
||||
})
|
||||
.collect();
|
||||
|
||||
Value::Table(datasets).tagged(&tag)
|
||||
}
|
||||
other => other.clone(),
|
||||
};
|
||||
|
||||
Ok(results)
|
||||
}
|
225
src/commands/map_max_by.rs
Normal file
225
src/commands/map_max_by.rs
Normal file
|
@ -0,0 +1,225 @@
|
|||
use crate::commands::WholeStreamCommand;
|
||||
use crate::parser::hir::SyntaxShape;
|
||||
use crate::prelude::*;
|
||||
use num_traits::cast::ToPrimitive;
|
||||
pub struct MapMaxBy;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct MapMaxByArgs {
|
||||
column_name: Option<Tagged<String>>,
|
||||
}
|
||||
|
||||
impl WholeStreamCommand for MapMaxBy {
|
||||
fn name(&self) -> &str {
|
||||
"map-max-by"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("map-max-by").named(
|
||||
"column_name",
|
||||
SyntaxShape::String,
|
||||
"the name of the column to map-max the table's rows",
|
||||
)
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a new table with the data from the tables rows maxed by the column given."
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
args: CommandArgs,
|
||||
registry: &CommandRegistry,
|
||||
) -> Result<OutputStream, ShellError> {
|
||||
args.process(registry, map_max_by)?.run()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn map_max_by(
|
||||
MapMaxByArgs { column_name }: MapMaxByArgs,
|
||||
RunnableContext { input, name, .. }: RunnableContext,
|
||||
) -> Result<OutputStream, ShellError> {
|
||||
let stream = async_stream! {
|
||||
let values: Vec<Tagged<Value>> = input.values.collect().await;
|
||||
|
||||
|
||||
if values.is_empty() {
|
||||
yield Err(ShellError::labeled_error(
|
||||
"Expected table from pipeline",
|
||||
"requires a table input",
|
||||
name
|
||||
))
|
||||
} else {
|
||||
|
||||
let map_by_column = if let Some(column_to_map) = column_name {
|
||||
Some(column_to_map.item().clone())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
match map_max(&values[0], map_by_column, name) {
|
||||
Ok(table_maxed) => yield ReturnSuccess::value(table_maxed),
|
||||
Err(err) => yield Err(err)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(stream.to_output_stream())
|
||||
}
|
||||
|
||||
pub fn map_max(
|
||||
values: &Tagged<Value>,
|
||||
_map_by_column_name: Option<String>,
|
||||
tag: impl Into<Tag>,
|
||||
) -> Result<Tagged<Value>, ShellError> {
|
||||
let tag = tag.into();
|
||||
|
||||
let results: Tagged<Value> = match values {
|
||||
Tagged {
|
||||
item: Value::Table(datasets),
|
||||
..
|
||||
} => {
|
||||
let datasets: Vec<_> = datasets
|
||||
.into_iter()
|
||||
.map(|subsets| match subsets {
|
||||
Tagged {
|
||||
item: Value::Table(data),
|
||||
..
|
||||
} => {
|
||||
let data = data.into_iter().fold(0, |acc, value| match value {
|
||||
Tagged {
|
||||
item: Value::Primitive(Primitive::Int(n)),
|
||||
..
|
||||
} => {
|
||||
if n.to_i32().unwrap() > acc {
|
||||
n.to_i32().unwrap()
|
||||
} else {
|
||||
acc
|
||||
}
|
||||
}
|
||||
_ => acc,
|
||||
});
|
||||
Value::number(data).tagged(&tag)
|
||||
}
|
||||
_ => Value::number(0).tagged(&tag),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let datasets = datasets.iter().fold(0, |max, value| match value {
|
||||
Tagged {
|
||||
item: Value::Primitive(Primitive::Int(n)),
|
||||
..
|
||||
} => {
|
||||
if n.to_i32().unwrap() > max {
|
||||
n.to_i32().unwrap()
|
||||
} else {
|
||||
max
|
||||
}
|
||||
}
|
||||
_ => max,
|
||||
});
|
||||
Value::number(datasets).tagged(&tag)
|
||||
}
|
||||
_ => Value::number(-1).tagged(&tag),
|
||||
};
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use crate::commands::evaluate_by::evaluate;
|
||||
use crate::commands::group_by::group;
|
||||
use crate::commands::map_max_by::map_max;
|
||||
use crate::commands::reduce_by::reduce;
|
||||
use crate::commands::t_sort_by::t_sort;
|
||||
use crate::data::meta::*;
|
||||
use crate::prelude::*;
|
||||
use crate::Value;
|
||||
use indexmap::IndexMap;
|
||||
|
||||
fn int(s: impl Into<BigInt>) -> Tagged<Value> {
|
||||
Value::int(s).tagged_unknown()
|
||||
}
|
||||
|
||||
fn string(input: impl Into<String>) -> Tagged<Value> {
|
||||
Value::string(input.into()).tagged_unknown()
|
||||
}
|
||||
|
||||
fn row(entries: IndexMap<String, Tagged<Value>>) -> Tagged<Value> {
|
||||
Value::row(entries).tagged_unknown()
|
||||
}
|
||||
|
||||
fn nu_releases_evaluated_by_default_one() -> Tagged<Value> {
|
||||
evaluate(&nu_releases_sorted_by_date(), None, Tag::unknown()).unwrap()
|
||||
}
|
||||
|
||||
fn nu_releases_reduced_by_sum() -> Tagged<Value> {
|
||||
reduce(
|
||||
&nu_releases_evaluated_by_default_one(),
|
||||
Some(String::from("sum")),
|
||||
Tag::unknown(),
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn nu_releases_sorted_by_date() -> Tagged<Value> {
|
||||
let key = String::from("date");
|
||||
|
||||
t_sort(
|
||||
Some(key),
|
||||
None,
|
||||
&nu_releases_grouped_by_date(),
|
||||
Tag::unknown(),
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn nu_releases_grouped_by_date() -> Tagged<Value> {
|
||||
let key = String::from("date").tagged_unknown();
|
||||
group(&key, nu_releases_commiters(), Tag::unknown()).unwrap()
|
||||
}
|
||||
|
||||
fn nu_releases_commiters() -> Vec<Tagged<Value>> {
|
||||
vec![
|
||||
row(
|
||||
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("JK"), "country".into() => string("US"), "date".into() => string("August 23-2019")},
|
||||
),
|
||||
]
|
||||
}
|
||||
#[test]
|
||||
fn maps_and_gets_max_value() {
|
||||
assert_eq!(
|
||||
map_max(&nu_releases_reduced_by_sum(), None, Tag::unknown()).unwrap(),
|
||||
int(4)
|
||||
);
|
||||
}
|
||||
}
|
257
src/commands/reduce_by.rs
Normal file
257
src/commands/reduce_by.rs
Normal file
|
@ -0,0 +1,257 @@
|
|||
use crate::commands::WholeStreamCommand;
|
||||
use crate::parser::hir::SyntaxShape;
|
||||
use crate::prelude::*;
|
||||
use num_traits::cast::ToPrimitive;
|
||||
pub struct ReduceBy;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct ReduceByArgs {
|
||||
reduce_with: Option<Tagged<String>>,
|
||||
}
|
||||
|
||||
impl WholeStreamCommand for ReduceBy {
|
||||
fn name(&self) -> &str {
|
||||
"reduce-by"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("reduce-by").named(
|
||||
"reduce_with",
|
||||
SyntaxShape::String,
|
||||
"the command to reduce by with",
|
||||
)
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a new table with the data from the tables rows reduced by the command given."
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
args: CommandArgs,
|
||||
registry: &CommandRegistry,
|
||||
) -> Result<OutputStream, ShellError> {
|
||||
args.process(registry, reduce_by)?.run()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reduce_by(
|
||||
ReduceByArgs { reduce_with }: ReduceByArgs,
|
||||
RunnableContext { input, name, .. }: RunnableContext,
|
||||
) -> Result<OutputStream, ShellError> {
|
||||
let stream = async_stream! {
|
||||
let values: Vec<Tagged<Value>> = input.values.collect().await;
|
||||
|
||||
if values.is_empty() {
|
||||
yield Err(ShellError::labeled_error(
|
||||
"Expected table from pipeline",
|
||||
"requires a table input",
|
||||
name
|
||||
))
|
||||
} else {
|
||||
|
||||
let reduce_with = if let Some(reducer) = reduce_with {
|
||||
Some(reducer.item().clone())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
match reduce(&values[0], reduce_with, name) {
|
||||
Ok(reduced) => yield ReturnSuccess::value(reduced),
|
||||
Err(err) => yield Err(err)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(stream.to_output_stream())
|
||||
}
|
||||
|
||||
fn sum(data: Vec<Tagged<Value>>) -> i32 {
|
||||
data.into_iter().fold(0, |acc, value| match value {
|
||||
Tagged {
|
||||
item: Value::Primitive(Primitive::Int(n)),
|
||||
..
|
||||
} => acc + n.to_i32().unwrap(),
|
||||
_ => acc,
|
||||
})
|
||||
}
|
||||
|
||||
fn formula(
|
||||
acc_begin: i32,
|
||||
calculator: Box<dyn Fn(Vec<Tagged<Value>>) -> i32 + 'static>,
|
||||
) -> Box<dyn Fn(i32, Vec<Tagged<Value>>) -> i32 + 'static> {
|
||||
Box::new(move |acc, datax| -> i32 {
|
||||
let result = acc * acc_begin;
|
||||
result + calculator(datax)
|
||||
})
|
||||
}
|
||||
|
||||
fn reducer_for(command: Reduce) -> Box<dyn Fn(i32, Vec<Tagged<Value>>) -> i32 + 'static> {
|
||||
match command {
|
||||
Reduce::Sum | Reduce::Default => Box::new(formula(0, Box::new(sum))),
|
||||
}
|
||||
}
|
||||
|
||||
pub enum Reduce {
|
||||
Sum,
|
||||
Default,
|
||||
}
|
||||
|
||||
pub fn reduce(
|
||||
values: &Tagged<Value>,
|
||||
reducer: Option<String>,
|
||||
tag: impl Into<Tag>,
|
||||
) -> Result<Tagged<Value>, ShellError> {
|
||||
let tag = tag.into();
|
||||
|
||||
let reduce_with = match reducer {
|
||||
Some(cmd) if cmd == "sum" => reducer_for(Reduce::Sum),
|
||||
Some(_) | None => reducer_for(Reduce::Default),
|
||||
};
|
||||
|
||||
let results: Tagged<Value> = match values {
|
||||
Tagged {
|
||||
item: Value::Table(datasets),
|
||||
..
|
||||
} => {
|
||||
let datasets: Vec<_> = datasets
|
||||
.into_iter()
|
||||
.map(|subsets| {
|
||||
let mut acc = 0;
|
||||
match subsets {
|
||||
Tagged {
|
||||
item: Value::Table(data),
|
||||
..
|
||||
} => {
|
||||
let data = data
|
||||
.into_iter()
|
||||
.map(|d| {
|
||||
if let Tagged {
|
||||
item: Value::Table(x),
|
||||
..
|
||||
} = d
|
||||
{
|
||||
acc = reduce_with(acc, x.clone());
|
||||
Value::number(acc).tagged(&tag)
|
||||
} else {
|
||||
Value::number(0).tagged(&tag)
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
Value::Table(data).tagged(&tag)
|
||||
}
|
||||
_ => Value::Table(vec![]).tagged(&tag),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Value::Table(datasets).tagged(&tag)
|
||||
}
|
||||
_ => Value::Table(vec![]).tagged(&tag),
|
||||
};
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use crate::commands::evaluate_by::evaluate;
|
||||
use crate::commands::group_by::group;
|
||||
use crate::commands::reduce_by::{reduce, reducer_for, Reduce};
|
||||
use crate::commands::t_sort_by::t_sort;
|
||||
use crate::data::meta::*;
|
||||
use crate::prelude::*;
|
||||
use crate::Value;
|
||||
use indexmap::IndexMap;
|
||||
|
||||
fn int(s: impl Into<BigInt>) -> Tagged<Value> {
|
||||
Value::int(s).tagged_unknown()
|
||||
}
|
||||
|
||||
fn string(input: impl Into<String>) -> Tagged<Value> {
|
||||
Value::string(input.into()).tagged_unknown()
|
||||
}
|
||||
|
||||
fn row(entries: IndexMap<String, Tagged<Value>>) -> Tagged<Value> {
|
||||
Value::row(entries).tagged_unknown()
|
||||
}
|
||||
|
||||
fn table(list: &Vec<Tagged<Value>>) -> Tagged<Value> {
|
||||
Value::table(list).tagged_unknown()
|
||||
}
|
||||
|
||||
fn nu_releases_sorted_by_date() -> Tagged<Value> {
|
||||
let key = String::from("date");
|
||||
|
||||
t_sort(
|
||||
Some(key),
|
||||
None,
|
||||
&nu_releases_grouped_by_date(),
|
||||
Tag::unknown(),
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn nu_releases_evaluated_by_default_one() -> Tagged<Value> {
|
||||
evaluate(&nu_releases_sorted_by_date(), None, Tag::unknown()).unwrap()
|
||||
}
|
||||
|
||||
fn nu_releases_grouped_by_date() -> Tagged<Value> {
|
||||
let key = String::from("date").tagged_unknown();
|
||||
group(&key, nu_releases_commiters(), Tag::unknown()).unwrap()
|
||||
}
|
||||
|
||||
fn nu_releases_commiters() -> Vec<Tagged<Value>> {
|
||||
vec![
|
||||
row(
|
||||
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")},
|
||||
),
|
||||
]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reducer_computes_given_a_sum_command() {
|
||||
let subject = vec![int(1), int(1), int(1)];
|
||||
|
||||
let action = reducer_for(Reduce::Sum);
|
||||
|
||||
assert_eq!(action(0, subject), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reducer_computes() {
|
||||
assert_eq!(
|
||||
reduce(
|
||||
&nu_releases_evaluated_by_default_one(),
|
||||
Some(String::from("sum")),
|
||||
Tag::unknown()
|
||||
),
|
||||
Ok(table(&vec![table(&vec![int(3), int(3), int(3)])]))
|
||||
);
|
||||
}
|
||||
}
|
|
@ -150,6 +150,7 @@ pub fn split(
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use crate::commands::group_by::group;
|
||||
use crate::commands::split_by::split;
|
||||
use crate::data::meta::*;
|
||||
use crate::Value;
|
||||
|
@ -167,30 +168,49 @@ mod tests {
|
|||
Value::table(list).tagged_unknown()
|
||||
}
|
||||
|
||||
fn nu_releases_grouped_by_date() -> Tagged<Value> {
|
||||
let key = String::from("date").tagged_unknown();
|
||||
group(&key, nu_releases_commiters(), Tag::unknown()).unwrap()
|
||||
}
|
||||
|
||||
fn nu_releases_commiters() -> Vec<Tagged<Value>> {
|
||||
vec![
|
||||
row(
|
||||
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("Sept 24-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("Sept 24-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("Sept 24-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")},
|
||||
),
|
||||
]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn splits_inner_tables_by_key() {
|
||||
let for_key = String::from("country").tagged_unknown();
|
||||
|
||||
let nu_releases = row(indexmap! {
|
||||
"August 23-2019".into() => table(&vec![
|
||||
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}),
|
||||
row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")}),
|
||||
row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")})
|
||||
]),
|
||||
"Sept 24-2019".into() => table(&vec![
|
||||
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("Sept 24-2019")}),
|
||||
row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("Sept 24-2019")}),
|
||||
row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("Sept 24-2019")})
|
||||
]),
|
||||
"October 10-2019".into() => table(&vec![
|
||||
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")}),
|
||||
row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")}),
|
||||
row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")})
|
||||
])
|
||||
});
|
||||
|
||||
assert_eq!(
|
||||
split(&for_key, &nu_releases, Tag::unknown()).unwrap(),
|
||||
split(&for_key, &nu_releases_grouped_by_date(), Tag::unknown()).unwrap(),
|
||||
Value::row(indexmap! {
|
||||
"EC".into() => row(indexmap! {
|
||||
"August 23-2019".into() => table(&vec![
|
||||
|
@ -235,18 +255,12 @@ mod tests {
|
|||
|
||||
let nu_releases = row(indexmap! {
|
||||
"August 23-2019".into() => table(&vec![
|
||||
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}),
|
||||
row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")}),
|
||||
row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")})
|
||||
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")})
|
||||
]),
|
||||
"Sept 24-2019".into() => table(&vec![
|
||||
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("Sept 24-2019")}),
|
||||
row(indexmap!{"name".into() => Value::string("JT").tagged(Tag::from(Span::new(5,10))), "date".into() => string("Sept 24-2019")}),
|
||||
row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("Sept 24-2019")})
|
||||
row(indexmap!{"name".into() => Value::string("JT").tagged(Tag::from(Span::new(5,10))), "date".into() => string("Sept 24-2019")})
|
||||
]),
|
||||
"October 10-2019".into() => table(&vec![
|
||||
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")}),
|
||||
row(indexmap!{"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")}),
|
||||
row(indexmap!{"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")})
|
||||
])
|
||||
});
|
||||
|
|
358
src/commands/t_sort_by.rs
Normal file
358
src/commands/t_sort_by.rs
Normal file
|
@ -0,0 +1,358 @@
|
|||
use crate::commands::WholeStreamCommand;
|
||||
use crate::data::{TaggedDictBuilder, TaggedListBuilder};
|
||||
use crate::errors::ShellError;
|
||||
use crate::prelude::*;
|
||||
use chrono::{DateTime, NaiveDate, Utc};
|
||||
|
||||
pub struct TSortBy;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct TSortByArgs {
|
||||
#[serde(rename(deserialize = "show-columns"))]
|
||||
show_columns: bool,
|
||||
group_by: Option<Tagged<String>>,
|
||||
#[allow(unused)]
|
||||
split_by: Option<String>,
|
||||
}
|
||||
|
||||
impl WholeStreamCommand for TSortBy {
|
||||
fn name(&self) -> &str {
|
||||
"t-sort-by"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("t-sort-by")
|
||||
.switch("show-columns", "Displays the column names sorted")
|
||||
.named(
|
||||
"group_by",
|
||||
SyntaxShape::String,
|
||||
"the name of the column to group by",
|
||||
)
|
||||
.named(
|
||||
"split_by",
|
||||
SyntaxShape::String,
|
||||
"the name of the column within the grouped by table to split by",
|
||||
)
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Sort by the given columns."
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
args: CommandArgs,
|
||||
registry: &CommandRegistry,
|
||||
) -> Result<OutputStream, ShellError> {
|
||||
args.process(registry, t_sort_by)?.run()
|
||||
}
|
||||
}
|
||||
|
||||
fn t_sort_by(
|
||||
TSortByArgs {
|
||||
show_columns,
|
||||
group_by,
|
||||
..
|
||||
}: TSortByArgs,
|
||||
RunnableContext { input, name, .. }: RunnableContext,
|
||||
) -> Result<OutputStream, ShellError> {
|
||||
Ok(OutputStream::new(async_stream! {
|
||||
let values: Vec<Tagged<Value>> = input.values.collect().await;
|
||||
|
||||
let column_grouped_by_name = if let Some(grouped_by) = group_by {
|
||||
Some(grouped_by.item().clone())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
if show_columns {
|
||||
for label in columns_sorted(column_grouped_by_name, &values[0], &name).iter() {
|
||||
yield ReturnSuccess::value(label.clone());
|
||||
}
|
||||
} else {
|
||||
match t_sort(column_grouped_by_name, None, &values[0], name) {
|
||||
Ok(sorted) => yield ReturnSuccess::value(sorted),
|
||||
Err(err) => yield Err(err)
|
||||
}
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn columns_sorted(
|
||||
_group_by_name: Option<String>,
|
||||
value: &Tagged<Value>,
|
||||
tag: impl Into<Tag>,
|
||||
) -> Vec<Tagged<Value>> {
|
||||
let origin_tag = tag.into();
|
||||
|
||||
match value {
|
||||
Tagged {
|
||||
item: Value::Row(rows),
|
||||
..
|
||||
} => {
|
||||
let mut keys: Vec<Tagged<Value>> =
|
||||
rows.entries
|
||||
.keys()
|
||||
.map(|s| s.as_ref())
|
||||
.map(|k: &str| {
|
||||
let date = NaiveDate::parse_from_str(k, "%B %d-%Y");
|
||||
|
||||
let date = match date {
|
||||
Ok(parsed) => Value::Primitive(Primitive::Date(
|
||||
DateTime::<Utc>::from_utc(parsed.and_hms(12, 34, 56), Utc),
|
||||
)),
|
||||
Err(_) => Value::string(k),
|
||||
};
|
||||
|
||||
date.tagged_unknown()
|
||||
})
|
||||
.collect();
|
||||
|
||||
keys.sort();
|
||||
|
||||
let keys: Vec<Value> = keys
|
||||
.into_iter()
|
||||
.map(|k| {
|
||||
Value::string(match k {
|
||||
Tagged {
|
||||
item: Value::Primitive(Primitive::Date(d)),
|
||||
..
|
||||
} => format!("{}", d.format("%B %d-%Y")),
|
||||
_ => k.as_string().unwrap(),
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
keys.into_iter().map(|k| k.tagged(&origin_tag)).collect()
|
||||
}
|
||||
_ => vec![Value::string("default").tagged(&origin_tag)],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn t_sort(
|
||||
group_by_name: Option<String>,
|
||||
split_by_name: Option<String>,
|
||||
value: &Tagged<Value>,
|
||||
tag: impl Into<Tag>,
|
||||
) -> Result<Tagged<Value>, ShellError> {
|
||||
let origin_tag = tag.into();
|
||||
|
||||
match group_by_name {
|
||||
Some(column_name) => {
|
||||
let sorted_labels = columns_sorted(Some(column_name), value, &origin_tag);
|
||||
|
||||
match split_by_name {
|
||||
None => {
|
||||
let mut dataset = TaggedDictBuilder::new(&origin_tag);
|
||||
dataset.insert_tagged("default", value.clone());
|
||||
let dataset = dataset.into_tagged_value();
|
||||
|
||||
let split_labels = match &dataset {
|
||||
Tagged {
|
||||
item: Value::Row(rows),
|
||||
..
|
||||
} => {
|
||||
let mut keys: Vec<Tagged<Value>> = rows
|
||||
.entries
|
||||
.keys()
|
||||
.map(|s| s.as_ref())
|
||||
.map(|k: &str| {
|
||||
let date = NaiveDate::parse_from_str(k, "%B %d-%Y");
|
||||
|
||||
let date = match date {
|
||||
Ok(parsed) => Value::Primitive(Primitive::Date(
|
||||
DateTime::<Utc>::from_utc(
|
||||
parsed.and_hms(12, 34, 56),
|
||||
Utc,
|
||||
),
|
||||
)),
|
||||
Err(_) => Value::string(k),
|
||||
};
|
||||
|
||||
date.tagged_unknown()
|
||||
})
|
||||
.collect();
|
||||
|
||||
keys.sort();
|
||||
|
||||
let keys: Vec<Value> = keys
|
||||
.into_iter()
|
||||
.map(|k| {
|
||||
Value::string(match k {
|
||||
Tagged {
|
||||
item: Value::Primitive(Primitive::Date(d)),
|
||||
..
|
||||
} => format!("{}", d.format("%B %d-%Y")),
|
||||
_ => k.as_string().unwrap(),
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
keys.into_iter().map(|k| k.tagged(&origin_tag)).collect()
|
||||
}
|
||||
_ => vec![],
|
||||
};
|
||||
|
||||
let results: Vec<Vec<Tagged<Value>>> = split_labels
|
||||
.into_iter()
|
||||
.map(|split| {
|
||||
let groups = dataset.get_data_by_key(&split.as_string().unwrap());
|
||||
|
||||
sorted_labels
|
||||
.clone()
|
||||
.into_iter()
|
||||
.map(|label| {
|
||||
let label = label.as_string().unwrap();
|
||||
|
||||
match groups {
|
||||
Some(Tagged {
|
||||
item: Value::Row(dict),
|
||||
..
|
||||
}) => dict.get_data_by_key(&label).unwrap().clone(),
|
||||
_ => Value::Table(vec![]).tagged(&origin_tag),
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut outer = TaggedListBuilder::new(&origin_tag);
|
||||
|
||||
for i in results {
|
||||
outer.insert_tagged(Value::Table(i).tagged(&origin_tag));
|
||||
}
|
||||
|
||||
return Ok(Value::Table(outer.list).tagged(&origin_tag));
|
||||
}
|
||||
Some(_) => return Ok(Value::nothing().tagged(&origin_tag)),
|
||||
}
|
||||
}
|
||||
None => return Ok(Value::nothing().tagged(&origin_tag)),
|
||||
}
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use crate::commands::group_by::group;
|
||||
use crate::commands::t_sort_by::{columns_sorted, t_sort};
|
||||
use crate::data::meta::*;
|
||||
use crate::Value;
|
||||
use indexmap::IndexMap;
|
||||
|
||||
fn string(input: impl Into<String>) -> Tagged<Value> {
|
||||
Value::string(input.into()).tagged_unknown()
|
||||
}
|
||||
|
||||
fn row(entries: IndexMap<String, Tagged<Value>>) -> Tagged<Value> {
|
||||
Value::row(entries).tagged_unknown()
|
||||
}
|
||||
|
||||
fn table(list: &Vec<Tagged<Value>>) -> Tagged<Value> {
|
||||
Value::table(list).tagged_unknown()
|
||||
}
|
||||
|
||||
fn nu_releases_grouped_by_date() -> Tagged<Value> {
|
||||
let key = String::from("date").tagged_unknown();
|
||||
group(&key, nu_releases_commiters(), Tag::unknown()).unwrap()
|
||||
}
|
||||
|
||||
fn nu_releases_commiters() -> Vec<Tagged<Value>> {
|
||||
vec![
|
||||
row(
|
||||
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")},
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")},
|
||||
),
|
||||
]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn show_columns_sorted_given_a_column_to_sort_by() {
|
||||
let by_column = String::from("date");
|
||||
|
||||
assert_eq!(
|
||||
columns_sorted(
|
||||
Some(by_column),
|
||||
&nu_releases_grouped_by_date(),
|
||||
Tag::unknown()
|
||||
),
|
||||
vec![
|
||||
string("August 23-2019"),
|
||||
string("September 24-2019"),
|
||||
string("October 10-2019")
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sorts_the_tables() {
|
||||
let group_by = String::from("date");
|
||||
|
||||
assert_eq!(
|
||||
t_sort(
|
||||
Some(group_by),
|
||||
None,
|
||||
&nu_releases_grouped_by_date(),
|
||||
Tag::unknown()
|
||||
)
|
||||
.unwrap(),
|
||||
table(&vec![table(&vec![
|
||||
table(&vec![
|
||||
row(
|
||||
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")}
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")}
|
||||
)
|
||||
]),
|
||||
table(&vec![
|
||||
row(
|
||||
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")}
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")}
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")}
|
||||
)
|
||||
]),
|
||||
table(&vec![
|
||||
row(
|
||||
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")}
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")}
|
||||
),
|
||||
row(
|
||||
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")}
|
||||
)
|
||||
]),
|
||||
]),])
|
||||
);
|
||||
}
|
||||
}
|
|
@ -430,6 +430,7 @@ impl Tagged<Value> {
|
|||
Value::Primitive(Primitive::Int(x)) => Ok(format!("{}", x)),
|
||||
Value::Primitive(Primitive::Bytes(x)) => Ok(format!("{}", x)),
|
||||
Value::Primitive(Primitive::Path(x)) => Ok(format!("{}", x.display())),
|
||||
Value::Primitive(Primitive::Date(x)) => Ok(format!("{}", x.to_rfc3339())),
|
||||
// TODO: this should definitely be more general with better errors
|
||||
other => Err(ShellError::labeled_error(
|
||||
"Expected string",
|
||||
|
|
|
@ -114,7 +114,7 @@ impl Dictionary {
|
|||
#[derive(Debug)]
|
||||
pub struct TaggedListBuilder {
|
||||
tag: Tag,
|
||||
list: Vec<Tagged<Value>>,
|
||||
pub list: Vec<Tagged<Value>>,
|
||||
}
|
||||
|
||||
impl TaggedListBuilder {
|
||||
|
|
|
@ -31,6 +31,35 @@ fn group_by() {
|
|||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn histogram() {
|
||||
Playground::setup("histogram_test_1", |dirs, sandbox| {
|
||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||
"los_tres_caballeros.csv",
|
||||
r#"
|
||||
first_name,last_name,rusty_at
|
||||
Andrés,Robalino,Ecuador
|
||||
Jonathan,Turner,Estados Unidos
|
||||
Yehuda,Katz,Estados Unidos
|
||||
"#,
|
||||
)]);
|
||||
|
||||
let actual = nu!(
|
||||
cwd: dirs.test(), h::pipeline(
|
||||
r#"
|
||||
open los_tres_caballeros.csv
|
||||
| histogram rusty_at countries
|
||||
| where rusty_at == "Ecuador"
|
||||
| get countries
|
||||
| echo $it
|
||||
"#
|
||||
));
|
||||
|
||||
assert_eq!(actual, "**************************************************");
|
||||
// 50%
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn group_by_errors_if_unknown_column_name() {
|
||||
Playground::setup("group_by_test_2", |dirs, sandbox| {
|
||||
|
@ -56,7 +85,6 @@ fn group_by_errors_if_unknown_column_name() {
|
|||
})
|
||||
}
|
||||
|
||||
#[cfg(data_processing_primitives)]
|
||||
#[test]
|
||||
fn split_by() {
|
||||
Playground::setup("split_by_test_1", |dirs, sandbox| {
|
||||
|
@ -86,7 +114,6 @@ fn split_by() {
|
|||
})
|
||||
}
|
||||
|
||||
#[cfg(data_processing_primitives)]
|
||||
#[test]
|
||||
fn split_by_errors_if_no_table_given_as_input() {
|
||||
Playground::setup("split_by_test_2", |dirs, sandbox| {
|
||||
|
|
Loading…
Reference in a new issue