group-by can generate custom grouping key by block evaluation. (#2172)

This commit is contained in:
Andrés N. Robalino 2020-07-14 08:45:19 -05:00 committed by GitHub
parent 8551e06d9e
commit f2c4d22739
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 163 additions and 28 deletions

View file

@ -83,7 +83,7 @@ fn is_expanded_it_usage(head: &SpannedExpression) -> bool {
} }
} }
async fn process_row( pub async fn process_row(
block: Arc<Block>, block: Arc<Block>,
scope: Arc<Scope>, scope: Arc<Scope>,
head: Arc<Box<SpannedExpression>>, head: Arc<Box<SpannedExpression>>,

View file

@ -10,7 +10,7 @@ pub struct GroupBy;
#[derive(Deserialize)] #[derive(Deserialize)]
pub struct GroupByArgs { pub struct GroupByArgs {
column_name: Option<Tagged<String>>, grouper: Option<Value>,
} }
#[async_trait] #[async_trait]
@ -21,14 +21,14 @@ impl WholeStreamCommand for GroupBy {
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build("group-by").optional( Signature::build("group-by").optional(
"column_name", "grouper",
SyntaxShape::String, SyntaxShape::Any,
"the name of the column to group by", "the grouper value to use",
) )
} }
fn usage(&self) -> &str { fn usage(&self) -> &str {
"Creates a new table with the data from the table rows grouped by the column given." "create a new table grouped."
} }
async fn run( async fn run(
@ -42,12 +42,17 @@ impl WholeStreamCommand for GroupBy {
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![ vec![
Example { Example {
description: "Group items by type", description: "group items by column named \"type\"",
example: r#"ls | group-by type"#, example: r#"ls | group-by type"#,
result: None, result: None,
}, },
Example { Example {
description: "Group items by their value", description: "blocks can be used for generating a grouping key (same as above)",
example: r#"ls | group-by { get type }"#,
result: None,
},
Example {
description: "you can also group by raw values by leaving out the argument",
example: "echo [1 3 1 3 2 1 1] | group-by", example: "echo [1 3 1 3 2 1 1] | group-by",
result: Some(vec![UntaggedValue::row(indexmap! { result: Some(vec![UntaggedValue::row(indexmap! {
"1".to_string() => UntaggedValue::Table(vec![ "1".to_string() => UntaggedValue::Table(vec![
@ -68,26 +73,95 @@ impl WholeStreamCommand for GroupBy {
}) })
.into()]), .into()]),
}, },
Example {
description: "write pipelines for a more involved grouping key",
example:
"echo [1 3 1 3 2 1 1] | group-by { echo `({{$it}} - 1) % 3` | calc | str from }",
result: None,
},
] ]
} }
} }
enum Grouper { enum Grouper {
ByColumn(Option<Tagged<String>>), ByColumn(Option<Tagged<String>>),
ByBlock,
} }
pub async fn group_by( pub async fn group_by(
args: CommandArgs, args: CommandArgs,
registry: &CommandRegistry, registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> { ) -> Result<OutputStream, ShellError> {
let registry = registry.clone();
let name = args.call_info.name_tag.clone(); let name = args.call_info.name_tag.clone();
let (GroupByArgs { column_name }, input) = args.process(&registry).await?; let registry = registry.clone();
let head = Arc::new(args.call_info.args.head.clone());
let scope = Arc::new(args.call_info.scope.clone());
let context = Arc::new(Context::from_raw(&args, &registry));
let (GroupByArgs { grouper }, input) = args.process(&registry).await?;
let values: Vec<Value> = input.collect().await; let values: Vec<Value> = input.collect().await;
let mut keys: Vec<Result<String, ShellError>> = vec![];
let mut group_strategy = Grouper::ByColumn(None);
match grouper {
Some(Value {
value: UntaggedValue::Block(block_given),
..
}) => {
let block = Arc::new(block_given);
let error_key = "error";
for value in values.iter() {
let run = block.clone();
let scope = scope.clone();
let head = head.clone();
let context = context.clone();
match crate::commands::each::process_row(run, scope, head, context, value.clone())
.await
{
Ok(mut s) => {
let collection: Vec<Result<ReturnSuccess, ShellError>> =
s.drain_vec().await;
if collection.len() > 1 {
return Err(ShellError::labeled_error(
"expected one value from the block",
"requires a table with one value for grouping",
&name,
));
}
let value = match collection.get(0) {
Some(Ok(return_value)) => {
return_value.raw_value().unwrap_or_else(|| {
UntaggedValue::string(error_key).into_value(&name)
})
}
Some(Err(_)) | None => {
UntaggedValue::string(error_key).into_value(&name)
}
};
keys.push(as_string(&value));
}
Err(_) => {
keys.push(Ok(error_key.into()));
}
}
}
group_strategy = Grouper::ByBlock;
}
Some(other) => {
group_strategy = Grouper::ByColumn(Some(as_string(&other)?.tagged(&name)));
}
_ => {}
}
if values.is_empty() { if values.is_empty() {
return Err(ShellError::labeled_error( return Err(ShellError::labeled_error(
"Expected table from pipeline", "expected table from pipeline",
"requires a table input", "requires a table input",
name, name,
)); ));
@ -95,11 +169,27 @@ pub async fn group_by(
let values = UntaggedValue::table(&values).into_value(&name); let values = UntaggedValue::table(&values).into_value(&name);
match group(&column_name, &values, name) { match group_strategy {
Grouper::ByBlock => {
let map = keys.clone();
let block = Box::new(move |idx: usize, row: &Value| match map.get(idx) {
Some(Ok(key)) => Ok(key.clone()),
Some(Err(reason)) => Err(reason.clone()),
None => as_string(row),
});
match crate::utils::data::group(&values, &Some(block), &name) {
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))), Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
Err(reason) => Err(reason), Err(reason) => Err(reason),
} }
} }
Grouper::ByColumn(column_name) => match group(&column_name, &values, name) {
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
Err(reason) => Err(reason),
},
}
}
pub fn suggestions(tried: Tagged<&str>, for_value: &Value) -> ShellError { pub fn suggestions(tried: Tagged<&str>, for_value: &Value) -> ShellError {
let possibilities = for_value.data_descriptors(); let possibilities = for_value.data_descriptors();
@ -141,7 +231,7 @@ pub fn group(
match grouper { match grouper {
Grouper::ByColumn(Some(column_name)) => { Grouper::ByColumn(Some(column_name)) => {
let block = Box::new(move |row: &Value| { let block = Box::new(move |_, row: &Value| {
match row.get_data_by_key(column_name.borrow_spanned()) { match row.get_data_by_key(column_name.borrow_spanned()) {
Some(group_key) => Ok(as_string(&group_key)?), Some(group_key) => Ok(as_string(&group_key)?),
None => Err(suggestions(column_name.borrow_tagged(), &row)), None => Err(suggestions(column_name.borrow_tagged(), &row)),
@ -151,13 +241,16 @@ pub fn group(
crate::utils::data::group(&values, &Some(block), &name) crate::utils::data::group(&values, &Some(block), &name)
} }
Grouper::ByColumn(None) => { Grouper::ByColumn(None) => {
let block = Box::new(move |row: &Value| match as_string(row) { let block = Box::new(move |_, row: &Value| match as_string(row) {
Ok(group_key) => Ok(group_key), Ok(group_key) => Ok(group_key),
Err(reason) => Err(reason), Err(reason) => Err(reason),
}); });
crate::utils::data::group(&values, &Some(block), &name) crate::utils::data::group(&values, &Some(block), &name)
} }
Grouper::ByBlock => Err(ShellError::unimplemented(
"Block not implemented: This should never happen.",
)),
} }
} }

View file

@ -34,7 +34,7 @@ impl WholeStreamCommand for GroupByDate {
} }
fn usage(&self) -> &str { fn usage(&self) -> &str {
"Creates a new table with the data from the table rows grouped by the column given." "creates a table grouped by date."
} }
async fn run( async fn run(
@ -100,7 +100,7 @@ pub async fn group_by_date(
match (grouper_date, grouper_column) { match (grouper_date, grouper_column) {
(Grouper::ByDate(None), GroupByColumn::Name(None)) => { (Grouper::ByDate(None), GroupByColumn::Name(None)) => {
let block = Box::new(move |row: &Value| row.format("%Y-%b-%d")); let block = Box::new(move |_, row: &Value| row.format("%Y-%b-%d"));
match crate::utils::data::group(&values, &Some(block), &name) { match crate::utils::data::group(&values, &Some(block), &name) {
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))), Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
@ -108,7 +108,7 @@ pub async fn group_by_date(
} }
} }
(Grouper::ByDate(None), GroupByColumn::Name(Some(column_name))) => { (Grouper::ByDate(None), GroupByColumn::Name(Some(column_name))) => {
let block = Box::new(move |row: &Value| { let block = Box::new(move |_, row: &Value| {
let group_key = match row.get_data_by_key(column_name.borrow_spanned()) { let group_key = match row.get_data_by_key(column_name.borrow_spanned()) {
Some(group_key) => Ok(group_key), Some(group_key) => Ok(group_key),
None => Err(suggestions(column_name.borrow_tagged(), &row)), None => Err(suggestions(column_name.borrow_tagged(), &row)),
@ -123,7 +123,7 @@ pub async fn group_by_date(
} }
} }
(Grouper::ByDate(Some(fmt)), GroupByColumn::Name(None)) => { (Grouper::ByDate(Some(fmt)), GroupByColumn::Name(None)) => {
let block = Box::new(move |row: &Value| row.format(&fmt)); let block = Box::new(move |_, row: &Value| row.format(&fmt));
match crate::utils::data::group(&values, &Some(block), &name) { match crate::utils::data::group(&values, &Some(block), &name) {
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))), Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
@ -131,7 +131,7 @@ pub async fn group_by_date(
} }
} }
(Grouper::ByDate(Some(fmt)), GroupByColumn::Name(Some(column_name))) => { (Grouper::ByDate(Some(fmt)), GroupByColumn::Name(Some(column_name))) => {
let block = Box::new(move |row: &Value| { let block = Box::new(move |_, row: &Value| {
let group_key = match row.get_data_by_key(column_name.borrow_spanned()) { let group_key = match row.get_data_by_key(column_name.borrow_spanned()) {
Some(group_key) => Ok(group_key), Some(group_key) => Ok(group_key),
None => Err(suggestions(column_name.borrow_tagged(), &row)), None => Err(suggestions(column_name.borrow_tagged(), &row)),

View file

@ -81,7 +81,7 @@ pub fn split(
match grouper { match grouper {
Grouper::ByColumn(Some(column_name)) => { Grouper::ByColumn(Some(column_name)) => {
let block = Box::new(move |row: &Value| { let block = Box::new(move |_, row: &Value| {
match row.get_data_by_key(column_name.borrow_spanned()) { match row.get_data_by_key(column_name.borrow_spanned()) {
Some(group_key) => Ok(as_string(&group_key)?), Some(group_key) => Ok(as_string(&group_key)?),
None => Err(suggestions(column_name.borrow_tagged(), &row)), None => Err(suggestions(column_name.borrow_tagged(), &row)),
@ -91,7 +91,7 @@ pub fn split(
crate::utils::data::split(&values, &Some(block), &name) crate::utils::data::split(&values, &Some(block), &name)
} }
Grouper::ByColumn(None) => { Grouper::ByColumn(None) => {
let block = Box::new(move |row: &Value| match as_string(row) { let block = Box::new(move |_, row: &Value| match as_string(row) {
Ok(group_key) => Ok(group_key), Ok(group_key) => Ok(group_key),
Err(reason) => Err(reason), Err(reason) => Err(reason),
}); });

View file

@ -7,16 +7,16 @@ use nu_value_ext::as_string;
#[allow(clippy::type_complexity)] #[allow(clippy::type_complexity)]
pub fn group( pub fn group(
values: &Value, values: &Value,
grouper: &Option<Box<dyn Fn(&Value) -> Result<String, ShellError> + Send>>, grouper: &Option<Box<dyn Fn(usize, &Value) -> Result<String, ShellError> + Send>>,
tag: impl Into<Tag>, tag: impl Into<Tag>,
) -> Result<Value, ShellError> { ) -> Result<Value, ShellError> {
let tag = tag.into(); let tag = tag.into();
let mut groups: IndexMap<String, Vec<Value>> = IndexMap::new(); let mut groups: IndexMap<String, Vec<Value>> = IndexMap::new();
for value in values.table_entries() { for (idx, value) in values.table_entries().enumerate() {
let group_key = if let Some(ref grouper) = grouper { let group_key = if let Some(ref grouper) = grouper {
grouper(&value) grouper(idx, &value)
} else { } else {
as_string(&value) as_string(&value)
}; };

View file

@ -7,7 +7,7 @@ use crate::utils::data::group;
#[allow(clippy::type_complexity)] #[allow(clippy::type_complexity)]
pub fn split( pub fn split(
value: &Value, value: &Value,
splitter: &Option<Box<dyn Fn(&Value) -> Result<String, ShellError> + Send>>, splitter: &Option<Box<dyn Fn(usize, &Value) -> Result<String, ShellError> + Send>>,
tag: impl Into<Tag>, tag: impl Into<Tag>,
) -> Result<Value, ShellError> { ) -> Result<Value, ShellError> {
let tag = tag.into(); let tag = tag.into();

View file

@ -31,8 +31,50 @@ fn groups() {
} }
#[test] #[test]
fn errors_if_given_unknown_column_name_is_missing() { fn errors_if_given_unknown_column_name() {
Playground::setup("group_by_test_2", |dirs, sandbox| { Playground::setup("group_by_test_2", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.json",
r#"
{
"nu": {
"committers": [
{"name": "Andrés N. Robalino"},
{"name": "Jonathan Turner"},
{"name": "Yehuda Katz"}
],
"releases": [
{"version": "0.2"}
{"version": "0.8"},
{"version": "0.9999999"}
],
"0xATYKARNU": [
["Th", "e", " "],
["BIG", " ", "UnO"],
["punto", "cero"]
]
}
}
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.json
| group-by { get nu.releases.version }
"#
));
assert!(actual
.err
.contains("requires a table with one value for grouping"));
})
}
#[test]
fn errors_if_block_given_evaluates_more_than_one_row() {
Playground::setup("group_by_test_3", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed( sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.csv", "los_tres_caballeros.csv",
r#" r#"