diff --git a/crates/nu-cli/src/commands/each.rs b/crates/nu-cli/src/commands/each.rs index e2cd376ada..9b3f107d79 100644 --- a/crates/nu-cli/src/commands/each.rs +++ b/crates/nu-cli/src/commands/each.rs @@ -83,7 +83,7 @@ fn is_expanded_it_usage(head: &SpannedExpression) -> bool { } } -async fn process_row( +pub async fn process_row( block: Arc, scope: Arc, head: Arc>, diff --git a/crates/nu-cli/src/commands/group_by.rs b/crates/nu-cli/src/commands/group_by.rs index c762f05a3a..f7af89ce23 100644 --- a/crates/nu-cli/src/commands/group_by.rs +++ b/crates/nu-cli/src/commands/group_by.rs @@ -10,7 +10,7 @@ pub struct GroupBy; #[derive(Deserialize)] pub struct GroupByArgs { - column_name: Option>, + grouper: Option, } #[async_trait] @@ -21,14 +21,14 @@ impl WholeStreamCommand for GroupBy { fn signature(&self) -> Signature { Signature::build("group-by").optional( - "column_name", - SyntaxShape::String, - "the name of the column to group by", + "grouper", + SyntaxShape::Any, + "the grouper value to use", ) } fn usage(&self) -> &str { - "Creates a new table with the data from the table rows grouped by the column given." + "create a new table grouped." } async fn run( @@ -42,12 +42,17 @@ impl WholeStreamCommand for GroupBy { fn examples(&self) -> Vec { vec![ Example { - description: "Group items by type", + description: "group items by column named \"type\"", example: r#"ls | group-by type"#, result: None, }, Example { - description: "Group items by their value", + description: "blocks can be used for generating a grouping key (same as above)", + example: r#"ls | group-by { get type }"#, + result: None, + }, + Example { + description: "you can also group by raw values by leaving out the argument", example: "echo [1 3 1 3 2 1 1] | group-by", result: Some(vec![UntaggedValue::row(indexmap! { "1".to_string() => UntaggedValue::Table(vec![ @@ -68,26 +73,95 @@ impl WholeStreamCommand for GroupBy { }) .into()]), }, + Example { + description: "write pipelines for a more involved grouping key", + example: + "echo [1 3 1 3 2 1 1] | group-by { echo `({{$it}} - 1) % 3` | calc | str from }", + result: None, + }, ] } } enum Grouper { ByColumn(Option>), + ByBlock, } pub async fn group_by( args: CommandArgs, registry: &CommandRegistry, ) -> Result { - let registry = registry.clone(); let name = args.call_info.name_tag.clone(); - let (GroupByArgs { column_name }, input) = args.process(®istry).await?; + let registry = registry.clone(); + let head = Arc::new(args.call_info.args.head.clone()); + let scope = Arc::new(args.call_info.scope.clone()); + let context = Arc::new(Context::from_raw(&args, ®istry)); + let (GroupByArgs { grouper }, input) = args.process(®istry).await?; + let values: Vec = input.collect().await; + let mut keys: Vec> = vec![]; + let mut group_strategy = Grouper::ByColumn(None); + + match grouper { + Some(Value { + value: UntaggedValue::Block(block_given), + .. + }) => { + let block = Arc::new(block_given); + let error_key = "error"; + + for value in values.iter() { + let run = block.clone(); + let scope = scope.clone(); + let head = head.clone(); + let context = context.clone(); + + match crate::commands::each::process_row(run, scope, head, context, value.clone()) + .await + { + Ok(mut s) => { + let collection: Vec> = + s.drain_vec().await; + + if collection.len() > 1 { + return Err(ShellError::labeled_error( + "expected one value from the block", + "requires a table with one value for grouping", + &name, + )); + } + + let value = match collection.get(0) { + Some(Ok(return_value)) => { + return_value.raw_value().unwrap_or_else(|| { + UntaggedValue::string(error_key).into_value(&name) + }) + } + Some(Err(_)) | None => { + UntaggedValue::string(error_key).into_value(&name) + } + }; + + keys.push(as_string(&value)); + } + Err(_) => { + keys.push(Ok(error_key.into())); + } + } + } + + group_strategy = Grouper::ByBlock; + } + Some(other) => { + group_strategy = Grouper::ByColumn(Some(as_string(&other)?.tagged(&name))); + } + _ => {} + } if values.is_empty() { return Err(ShellError::labeled_error( - "Expected table from pipeline", + "expected table from pipeline", "requires a table input", name, )); @@ -95,9 +169,25 @@ pub async fn group_by( let values = UntaggedValue::table(&values).into_value(&name); - match group(&column_name, &values, name) { - Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))), - Err(reason) => Err(reason), + match group_strategy { + Grouper::ByBlock => { + let map = keys.clone(); + + let block = Box::new(move |idx: usize, row: &Value| match map.get(idx) { + Some(Ok(key)) => Ok(key.clone()), + Some(Err(reason)) => Err(reason.clone()), + None => as_string(row), + }); + + match crate::utils::data::group(&values, &Some(block), &name) { + Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))), + Err(reason) => Err(reason), + } + } + Grouper::ByColumn(column_name) => match group(&column_name, &values, name) { + Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))), + Err(reason) => Err(reason), + }, } } @@ -141,7 +231,7 @@ pub fn group( match grouper { Grouper::ByColumn(Some(column_name)) => { - let block = Box::new(move |row: &Value| { + let block = Box::new(move |_, row: &Value| { match row.get_data_by_key(column_name.borrow_spanned()) { Some(group_key) => Ok(as_string(&group_key)?), None => Err(suggestions(column_name.borrow_tagged(), &row)), @@ -151,13 +241,16 @@ pub fn group( crate::utils::data::group(&values, &Some(block), &name) } Grouper::ByColumn(None) => { - let block = Box::new(move |row: &Value| match as_string(row) { + let block = Box::new(move |_, row: &Value| match as_string(row) { Ok(group_key) => Ok(group_key), Err(reason) => Err(reason), }); crate::utils::data::group(&values, &Some(block), &name) } + Grouper::ByBlock => Err(ShellError::unimplemented( + "Block not implemented: This should never happen.", + )), } } diff --git a/crates/nu-cli/src/commands/group_by_date.rs b/crates/nu-cli/src/commands/group_by_date.rs index b4c0391d86..29f9def7a7 100644 --- a/crates/nu-cli/src/commands/group_by_date.rs +++ b/crates/nu-cli/src/commands/group_by_date.rs @@ -34,7 +34,7 @@ impl WholeStreamCommand for GroupByDate { } fn usage(&self) -> &str { - "Creates a new table with the data from the table rows grouped by the column given." + "creates a table grouped by date." } async fn run( @@ -100,7 +100,7 @@ pub async fn group_by_date( match (grouper_date, grouper_column) { (Grouper::ByDate(None), GroupByColumn::Name(None)) => { - let block = Box::new(move |row: &Value| row.format("%Y-%b-%d")); + let block = Box::new(move |_, row: &Value| row.format("%Y-%b-%d")); match crate::utils::data::group(&values, &Some(block), &name) { Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))), @@ -108,7 +108,7 @@ pub async fn group_by_date( } } (Grouper::ByDate(None), GroupByColumn::Name(Some(column_name))) => { - let block = Box::new(move |row: &Value| { + let block = Box::new(move |_, row: &Value| { let group_key = match row.get_data_by_key(column_name.borrow_spanned()) { Some(group_key) => Ok(group_key), None => Err(suggestions(column_name.borrow_tagged(), &row)), @@ -123,7 +123,7 @@ pub async fn group_by_date( } } (Grouper::ByDate(Some(fmt)), GroupByColumn::Name(None)) => { - let block = Box::new(move |row: &Value| row.format(&fmt)); + let block = Box::new(move |_, row: &Value| row.format(&fmt)); match crate::utils::data::group(&values, &Some(block), &name) { Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))), @@ -131,7 +131,7 @@ pub async fn group_by_date( } } (Grouper::ByDate(Some(fmt)), GroupByColumn::Name(Some(column_name))) => { - let block = Box::new(move |row: &Value| { + let block = Box::new(move |_, row: &Value| { let group_key = match row.get_data_by_key(column_name.borrow_spanned()) { Some(group_key) => Ok(group_key), None => Err(suggestions(column_name.borrow_tagged(), &row)), diff --git a/crates/nu-cli/src/commands/split_by.rs b/crates/nu-cli/src/commands/split_by.rs index d7c299c993..cfb918fdad 100644 --- a/crates/nu-cli/src/commands/split_by.rs +++ b/crates/nu-cli/src/commands/split_by.rs @@ -81,7 +81,7 @@ pub fn split( match grouper { Grouper::ByColumn(Some(column_name)) => { - let block = Box::new(move |row: &Value| { + let block = Box::new(move |_, row: &Value| { match row.get_data_by_key(column_name.borrow_spanned()) { Some(group_key) => Ok(as_string(&group_key)?), None => Err(suggestions(column_name.borrow_tagged(), &row)), @@ -91,7 +91,7 @@ pub fn split( crate::utils::data::split(&values, &Some(block), &name) } Grouper::ByColumn(None) => { - let block = Box::new(move |row: &Value| match as_string(row) { + let block = Box::new(move |_, row: &Value| match as_string(row) { Ok(group_key) => Ok(group_key), Err(reason) => Err(reason), }); diff --git a/crates/nu-cli/src/utils/data/group.rs b/crates/nu-cli/src/utils/data/group.rs index e1f0dd7859..35ce283745 100644 --- a/crates/nu-cli/src/utils/data/group.rs +++ b/crates/nu-cli/src/utils/data/group.rs @@ -7,16 +7,16 @@ use nu_value_ext::as_string; #[allow(clippy::type_complexity)] pub fn group( values: &Value, - grouper: &Option Result + Send>>, + grouper: &Option Result + Send>>, tag: impl Into, ) -> Result { let tag = tag.into(); let mut groups: IndexMap> = IndexMap::new(); - for value in values.table_entries() { + for (idx, value) in values.table_entries().enumerate() { let group_key = if let Some(ref grouper) = grouper { - grouper(&value) + grouper(idx, &value) } else { as_string(&value) }; diff --git a/crates/nu-cli/src/utils/data/split.rs b/crates/nu-cli/src/utils/data/split.rs index 86bfeeb107..a8db2cfefb 100644 --- a/crates/nu-cli/src/utils/data/split.rs +++ b/crates/nu-cli/src/utils/data/split.rs @@ -7,7 +7,7 @@ use crate::utils::data::group; #[allow(clippy::type_complexity)] pub fn split( value: &Value, - splitter: &Option Result + Send>>, + splitter: &Option Result + Send>>, tag: impl Into, ) -> Result { let tag = tag.into(); diff --git a/crates/nu-cli/tests/commands/group_by.rs b/crates/nu-cli/tests/commands/group_by.rs index 5f3661abb6..2c09499ae1 100644 --- a/crates/nu-cli/tests/commands/group_by.rs +++ b/crates/nu-cli/tests/commands/group_by.rs @@ -31,8 +31,50 @@ fn groups() { } #[test] -fn errors_if_given_unknown_column_name_is_missing() { +fn errors_if_given_unknown_column_name() { Playground::setup("group_by_test_2", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "los_tres_caballeros.json", + r#" + { + "nu": { + "committers": [ + {"name": "AndrĂ©s N. Robalino"}, + {"name": "Jonathan Turner"}, + {"name": "Yehuda Katz"} + ], + "releases": [ + {"version": "0.2"} + {"version": "0.8"}, + {"version": "0.9999999"} + ], + "0xATYKARNU": [ + ["Th", "e", " "], + ["BIG", " ", "UnO"], + ["punto", "cero"] + ] + } + } + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), pipeline( + r#" + open los_tres_caballeros.json + | group-by { get nu.releases.version } + "# + )); + + assert!(actual + .err + .contains("requires a table with one value for grouping")); + }) +} + +#[test] +fn errors_if_block_given_evaluates_more_than_one_row() { + Playground::setup("group_by_test_3", |dirs, sandbox| { sandbox.with_files(vec![FileWithContentToBeTrimmed( "los_tres_caballeros.csv", r#"