group-by can generate custom grouping key by block evaluation. (#2172)

2024-12-28 14:03:09 +00:00 · 2020-07-14 08:45:19 -05:00 · 2020-07-14 08:45:19 -05:00 · f2c4d22739
commit f2c4d22739
parent 8551e06d9e
7 changed files with 163 additions and 28 deletions
--- a/crates/nu-cli/src/commands/each.rs
+++ b/crates/nu-cli/src/commands/each.rs
@ -83,7 +83,7 @@ fn is_expanded_it_usage(head: &SpannedExpression) -> bool {
    }
 }

-async fn process_row(
+pub async fn process_row(
    block: Arc<Block>,
    scope: Arc<Scope>,
    head: Arc<Box<SpannedExpression>>,
--- a/crates/nu-cli/src/commands/group_by.rs
+++ b/crates/nu-cli/src/commands/group_by.rs
@ -10,7 +10,7 @@ pub struct GroupBy;

 #[derive(Deserialize)]
 pub struct GroupByArgs {
-    column_name: Option<Tagged<String>>,
+    grouper: Option<Value>,
 }

 #[async_trait]
@ -21,14 +21,14 @@ impl WholeStreamCommand for GroupBy {

    fn signature(&self) -> Signature {
        Signature::build("group-by").optional(
-            "column_name",
-            SyntaxShape::String,
-            "the name of the column to group by",
+            "grouper",
+            SyntaxShape::Any,
+            "the grouper value to use",
        )
    }

    fn usage(&self) -> &str {
-        "Creates a new table with the data from the table rows grouped by the column given."
+        "create a new table grouped."
    }

    async fn run(
@ -42,12 +42,17 @@ impl WholeStreamCommand for GroupBy {
    fn examples(&self) -> Vec<Example> {
        vec![
            Example {
-                description: "Group items by type",
+                description: "group items by column named \"type\"",
                example: r#"ls | group-by type"#,
                result: None,
            },
            Example {
-                description: "Group items by their value",
+                description: "blocks can be used for generating a grouping key (same as above)",
+                example: r#"ls | group-by { get type }"#,
+                result: None,
+            },
+            Example {
+                description: "you can also group by raw values by leaving out the argument",
                example: "echo [1 3 1 3 2 1 1] | group-by",
                result: Some(vec![UntaggedValue::row(indexmap! {
                    "1".to_string() => UntaggedValue::Table(vec![
@ -68,26 +73,95 @@ impl WholeStreamCommand for GroupBy {
                })
                .into()]),
            },
+            Example {
+                description: "write pipelines for a more involved grouping key",
+                example:
+                    "echo [1 3 1 3 2 1 1] | group-by { echo `({{$it}} - 1) % 3` | calc | str from }",
+                result: None,
+            },
        ]
    }
 }

 enum Grouper {
    ByColumn(Option<Tagged<String>>),
+    ByBlock,
 }

 pub async fn group_by(
    args: CommandArgs,
    registry: &CommandRegistry,
 ) -> Result<OutputStream, ShellError> {
-    let registry = registry.clone();
    let name = args.call_info.name_tag.clone();
-    let (GroupByArgs { column_name }, input) = args.process(&registry).await?;
+    let registry = registry.clone();
+    let head = Arc::new(args.call_info.args.head.clone());
+    let scope = Arc::new(args.call_info.scope.clone());
+    let context = Arc::new(Context::from_raw(&args, &registry));
+    let (GroupByArgs { grouper }, input) = args.process(&registry).await?;
+
    let values: Vec<Value> = input.collect().await;
+    let mut keys: Vec<Result<String, ShellError>> = vec![];
+    let mut group_strategy = Grouper::ByColumn(None);
+
+    match grouper {
+        Some(Value {
+            value: UntaggedValue::Block(block_given),
+            ..
+        }) => {
+            let block = Arc::new(block_given);
+            let error_key = "error";
+
+            for value in values.iter() {
+                let run = block.clone();
+                let scope = scope.clone();
+                let head = head.clone();
+                let context = context.clone();
+
+                match crate::commands::each::process_row(run, scope, head, context, value.clone())
+                    .await
+                {
+                    Ok(mut s) => {
+                        let collection: Vec<Result<ReturnSuccess, ShellError>> =
+                            s.drain_vec().await;
+
+                        if collection.len() > 1 {
+                            return Err(ShellError::labeled_error(
+                                "expected one value from the block",
+                                "requires a table with one value for grouping",
+                                &name,
+                            ));
+                        }
+
+                        let value = match collection.get(0) {
+                            Some(Ok(return_value)) => {
+                                return_value.raw_value().unwrap_or_else(|| {
+                                    UntaggedValue::string(error_key).into_value(&name)
+                                })
+                            }
+                            Some(Err(_)) | None => {
+                                UntaggedValue::string(error_key).into_value(&name)
+                            }
+                        };
+
+                        keys.push(as_string(&value));
+                    }
+                    Err(_) => {
+                        keys.push(Ok(error_key.into()));
+                    }
+                }
+            }
+
+            group_strategy = Grouper::ByBlock;
+        }
+        Some(other) => {
+            group_strategy = Grouper::ByColumn(Some(as_string(&other)?.tagged(&name)));
+        }
+        _ => {}
+    }

    if values.is_empty() {
        return Err(ShellError::labeled_error(
-            "Expected table from pipeline",
+            "expected table from pipeline",
            "requires a table input",
            name,
        ));
@ -95,9 +169,25 @@ pub async fn group_by(

    let values = UntaggedValue::table(&values).into_value(&name);

-    match group(&column_name, &values, name) {
-        Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
-        Err(reason) => Err(reason),
+    match group_strategy {
+        Grouper::ByBlock => {
+            let map = keys.clone();
+
+            let block = Box::new(move |idx: usize, row: &Value| match map.get(idx) {
+                Some(Ok(key)) => Ok(key.clone()),
+                Some(Err(reason)) => Err(reason.clone()),
+                None => as_string(row),
+            });
+
+            match crate::utils::data::group(&values, &Some(block), &name) {
+                Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
+                Err(reason) => Err(reason),
+            }
+        }
+        Grouper::ByColumn(column_name) => match group(&column_name, &values, name) {
+            Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
+            Err(reason) => Err(reason),
+        },
    }
 }

@ -141,7 +231,7 @@ pub fn group(

    match grouper {
        Grouper::ByColumn(Some(column_name)) => {
-            let block = Box::new(move |row: &Value| {
+            let block = Box::new(move |_, row: &Value| {
                match row.get_data_by_key(column_name.borrow_spanned()) {
                    Some(group_key) => Ok(as_string(&group_key)?),
                    None => Err(suggestions(column_name.borrow_tagged(), &row)),
@ -151,13 +241,16 @@ pub fn group(
            crate::utils::data::group(&values, &Some(block), &name)
        }
        Grouper::ByColumn(None) => {
-            let block = Box::new(move |row: &Value| match as_string(row) {
+            let block = Box::new(move |_, row: &Value| match as_string(row) {
                Ok(group_key) => Ok(group_key),
                Err(reason) => Err(reason),
            });

            crate::utils::data::group(&values, &Some(block), &name)
        }
+        Grouper::ByBlock => Err(ShellError::unimplemented(
+            "Block not implemented: This should never happen.",
+        )),
    }
 }

--- a/crates/nu-cli/src/commands/group_by_date.rs
+++ b/crates/nu-cli/src/commands/group_by_date.rs
@ -34,7 +34,7 @@ impl WholeStreamCommand for GroupByDate {
    }

    fn usage(&self) -> &str {
-        "Creates a new table with the data from the table rows grouped by the column given."
+        "creates a table grouped by date."
    }

    async fn run(
@ -100,7 +100,7 @@ pub async fn group_by_date(

        match (grouper_date, grouper_column) {
            (Grouper::ByDate(None), GroupByColumn::Name(None)) => {
-                let block = Box::new(move |row: &Value| row.format("%Y-%b-%d"));
+                let block = Box::new(move |_, row: &Value| row.format("%Y-%b-%d"));

                match crate::utils::data::group(&values, &Some(block), &name) {
                    Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
@ -108,7 +108,7 @@ pub async fn group_by_date(
                }
            }
            (Grouper::ByDate(None), GroupByColumn::Name(Some(column_name))) => {
-                let block = Box::new(move |row: &Value| {
+                let block = Box::new(move |_, row: &Value| {
                    let group_key = match row.get_data_by_key(column_name.borrow_spanned()) {
                        Some(group_key) => Ok(group_key),
                        None => Err(suggestions(column_name.borrow_tagged(), &row)),
@ -123,7 +123,7 @@ pub async fn group_by_date(
                }
            }
            (Grouper::ByDate(Some(fmt)), GroupByColumn::Name(None)) => {
-                let block = Box::new(move |row: &Value| row.format(&fmt));
+                let block = Box::new(move |_, row: &Value| row.format(&fmt));

                match crate::utils::data::group(&values, &Some(block), &name) {
                    Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
@ -131,7 +131,7 @@ pub async fn group_by_date(
                }
            }
            (Grouper::ByDate(Some(fmt)), GroupByColumn::Name(Some(column_name))) => {
-                let block = Box::new(move |row: &Value| {
+                let block = Box::new(move |_, row: &Value| {
                    let group_key = match row.get_data_by_key(column_name.borrow_spanned()) {
                        Some(group_key) => Ok(group_key),
                        None => Err(suggestions(column_name.borrow_tagged(), &row)),
--- a/crates/nu-cli/src/commands/split_by.rs
+++ b/crates/nu-cli/src/commands/split_by.rs
@ -81,7 +81,7 @@ pub fn split(

    match grouper {
        Grouper::ByColumn(Some(column_name)) => {
-            let block = Box::new(move |row: &Value| {
+            let block = Box::new(move |_, row: &Value| {
                match row.get_data_by_key(column_name.borrow_spanned()) {
                    Some(group_key) => Ok(as_string(&group_key)?),
                    None => Err(suggestions(column_name.borrow_tagged(), &row)),
@ -91,7 +91,7 @@ pub fn split(
            crate::utils::data::split(&values, &Some(block), &name)
        }
        Grouper::ByColumn(None) => {
-            let block = Box::new(move |row: &Value| match as_string(row) {
+            let block = Box::new(move |_, row: &Value| match as_string(row) {
                Ok(group_key) => Ok(group_key),
                Err(reason) => Err(reason),
            });
--- a/crates/nu-cli/src/utils/data/group.rs
+++ b/crates/nu-cli/src/utils/data/group.rs
@ -7,16 +7,16 @@ use nu_value_ext::as_string;
 #[allow(clippy::type_complexity)]
 pub fn group(
    values: &Value,
-    grouper: &Option<Box<dyn Fn(&Value) -> Result<String, ShellError> + Send>>,
+    grouper: &Option<Box<dyn Fn(usize, &Value) -> Result<String, ShellError> + Send>>,
    tag: impl Into<Tag>,
 ) -> Result<Value, ShellError> {
    let tag = tag.into();

    let mut groups: IndexMap<String, Vec<Value>> = IndexMap::new();

-    for value in values.table_entries() {
+    for (idx, value) in values.table_entries().enumerate() {
        let group_key = if let Some(ref grouper) = grouper {
-            grouper(&value)
+            grouper(idx, &value)
        } else {
            as_string(&value)
        };
--- a/crates/nu-cli/src/utils/data/split.rs
+++ b/crates/nu-cli/src/utils/data/split.rs
@ -7,7 +7,7 @@ use crate::utils::data::group;
 #[allow(clippy::type_complexity)]
 pub fn split(
    value: &Value,
-    splitter: &Option<Box<dyn Fn(&Value) -> Result<String, ShellError> + Send>>,
+    splitter: &Option<Box<dyn Fn(usize, &Value) -> Result<String, ShellError> + Send>>,
    tag: impl Into<Tag>,
 ) -> Result<Value, ShellError> {
    let tag = tag.into();
--- a/crates/nu-cli/tests/commands/group_by.rs
+++ b/crates/nu-cli/tests/commands/group_by.rs
@ -31,8 +31,50 @@ fn groups() {
 }

 #[test]
-fn errors_if_given_unknown_column_name_is_missing() {
+fn errors_if_given_unknown_column_name() {
    Playground::setup("group_by_test_2", |dirs, sandbox| {
+        sandbox.with_files(vec![FileWithContentToBeTrimmed(
+            "los_tres_caballeros.json",
+            r#"
+                {
+                    "nu": {
+                        "committers": [
+                            {"name": "Andrés N. Robalino"},
+                            {"name": "Jonathan Turner"},
+                            {"name": "Yehuda Katz"}
+                        ],
+                        "releases": [
+                            {"version": "0.2"}
+                            {"version": "0.8"},
+                            {"version": "0.9999999"}
+                        ],
+                        "0xATYKARNU": [
+                            ["Th", "e", " "],
+                            ["BIG", " ", "UnO"],
+                            ["punto", "cero"]
+                        ]
+                    }
+                }
+            "#,
+        )]);
+
+        let actual = nu!(
+            cwd: dirs.test(), pipeline(
+            r#"
+                open los_tres_caballeros.json
+                | group-by { get nu.releases.version }
+            "#
+        ));
+
+        assert!(actual
+            .err
+            .contains("requires a table with one value for grouping"));
+    })
+}
+
+#[test]
+fn errors_if_block_given_evaluates_more_than_one_row() {
+    Playground::setup("group_by_test_3", |dirs, sandbox| {
        sandbox.with_files(vec![FileWithContentToBeTrimmed(
            "los_tres_caballeros.csv",
            r#"