mirror of
https://github.com/nushell/nushell
synced 2025-01-12 21:29:07 +00:00
add multiple grouper support to group-by
(#14337)
- closes #14330 Related: - #2607 - #14019 - #14316 # Description This PR changes `group-by` to support grouping by multiple `grouper` arguments. # Changes - No grouper: no change in behavior - Single grouper - `--to-table=false`: no change in behavior - `--to-table=true`: - closure grouper: named group0 - cell-path grouper: named after the cell-path - Multiple groupers: - `--to-table=false`: nested groups - `--to-table=true`: one column for each grouper argument, followed by the `items` column - columns corresponding to cell-paths are named after them - columns corresponding to closure groupers are named `group{i}` where `i` is the index of the grouper argument # Examples ```nushell > [1 3 1 3 2 1 1] | group-by ╭───┬───────────╮ │ │ ╭───┬───╮ │ │ 1 │ │ 0 │ 1 │ │ │ │ │ 1 │ 1 │ │ │ │ │ 2 │ 1 │ │ │ │ │ 3 │ 1 │ │ │ │ ╰───┴───╯ │ │ │ ╭───┬───╮ │ │ 3 │ │ 0 │ 3 │ │ │ │ │ 1 │ 3 │ │ │ │ ╰───┴───╯ │ │ │ ╭───┬───╮ │ │ 2 │ │ 0 │ 2 │ │ │ │ ╰───┴───╯ │ ╰───┴───────────╯ > [1 3 1 3 2 1 1] | group-by --to-table ╭─#─┬─group─┬───items───╮ │ 0 │ 1 │ ╭───┬───╮ │ │ │ │ │ 0 │ 1 │ │ │ │ │ │ 1 │ 1 │ │ │ │ │ │ 2 │ 1 │ │ │ │ │ │ 3 │ 1 │ │ │ │ │ ╰───┴───╯ │ │ 1 │ 3 │ ╭───┬───╮ │ │ │ │ │ 0 │ 3 │ │ │ │ │ │ 1 │ 3 │ │ │ │ │ ╰───┴───╯ │ │ 2 │ 2 │ ╭───┬───╮ │ │ │ │ │ 0 │ 2 │ │ │ │ │ ╰───┴───╯ │ ╰─#─┴─group─┴───items───╯ > [1 3 1 3 2 1 1] | group-by { $in >= 2 } ╭───────┬───────────╮ │ │ ╭───┬───╮ │ │ false │ │ 0 │ 1 │ │ │ │ │ 1 │ 1 │ │ │ │ │ 2 │ 1 │ │ │ │ │ 3 │ 1 │ │ │ │ ╰───┴───╯ │ │ │ ╭───┬───╮ │ │ true │ │ 0 │ 3 │ │ │ │ │ 1 │ 3 │ │ │ │ │ 2 │ 2 │ │ │ │ ╰───┴───╯ │ ╰───────┴───────────╯ > [1 3 1 3 2 1 1] | group-by { $in >= 2 } --to-table ╭─#─┬─group0─┬───items───╮ │ 0 │ false │ ╭───┬───╮ │ │ │ │ │ 0 │ 1 │ │ │ │ │ │ 1 │ 1 │ │ │ │ │ │ 2 │ 1 │ │ │ │ │ │ 3 │ 1 │ │ │ │ │ ╰───┴───╯ │ │ 1 │ true │ ╭───┬───╮ │ │ │ │ │ 0 │ 3 │ │ │ │ │ │ 1 │ 3 │ │ │ │ │ │ 2 │ 2 │ │ │ │ │ ╰───┴───╯ │ ╰─#─┴─group0─┴───items───╯ ``` ```nushell let data = [ [name, lang, year]; [andres, rb, "2019"], [jt, rs, "2019"], [storm, rs, "2021"] ] > $data ╭─#─┬──name──┬─lang─┬─year─╮ │ 0 │ andres │ rb │ 2019 │ │ 1 │ jt │ rs │ 2019 │ │ 2 │ storm │ rs │ 2021 │ ╰─#─┴──name──┴─lang─┴─year─╯ ``` ```nushell > $data | group-by lang ╭────┬──────────────────────────────╮ │ │ ╭─#─┬──name──┬─lang─┬─year─╮ │ │ rb │ │ 0 │ andres │ rb │ 2019 │ │ │ │ ╰─#─┴──name──┴─lang─┴─year─╯ │ │ │ ╭─#─┬─name──┬─lang─┬─year─╮ │ │ rs │ │ 0 │ jt │ rs │ 2019 │ │ │ │ │ 1 │ storm │ rs │ 2021 │ │ │ │ ╰─#─┴─name──┴─lang─┴─year─╯ │ ╰────┴──────────────────────────────╯ ``` Group column is now named after the grouper, to allow multiple groupers. ```nushell > $data | group-by lang --to-table # column names changed! ╭─#─┬─lang─┬────────────items─────────────╮ │ 0 │ rb │ ╭─#─┬──name──┬─lang─┬─year─╮ │ │ │ │ │ 0 │ andres │ rb │ 2019 │ │ │ │ │ ╰─#─┴──name──┴─lang─┴─year─╯ │ │ 1 │ rs │ ╭─#─┬─name──┬─lang─┬─year─╮ │ │ │ │ │ 0 │ jt │ rs │ 2019 │ │ │ │ │ │ 1 │ storm │ rs │ 2021 │ │ │ │ │ ╰─#─┴─name──┴─lang─┴─year─╯ │ ╰─#─┴─lang─┴────────────items─────────────╯ ``` Grouping by multiple columns makes finer grained aggregations possible. ```nushell > $data | group-by lang year --to-table ╭─#─┬─lang─┬─year─┬────────────items─────────────╮ │ 0 │ rb │ 2019 │ ╭─#─┬──name──┬─lang─┬─year─╮ │ │ │ │ │ │ 0 │ andres │ rb │ 2019 │ │ │ │ │ │ ╰─#─┴──name──┴─lang─┴─year─╯ │ │ 1 │ rs │ 2019 │ ╭─#─┬─name─┬─lang─┬─year─╮ │ │ │ │ │ │ 0 │ jt │ rs │ 2019 │ │ │ │ │ │ ╰─#─┴─name─┴─lang─┴─year─╯ │ │ 2 │ rs │ 2021 │ ╭─#─┬─name──┬─lang─┬─year─╮ │ │ │ │ │ │ 0 │ storm │ rs │ 2021 │ │ │ │ │ │ ╰─#─┴─name──┴─lang─┴─year─╯ │ ╰─#─┴─lang─┴─year─┴────────────items─────────────╯ ``` Grouping by multiple columns, without `--to-table` returns a nested structure. This is equivalent to `$data | group-by year | split-by lang`, making `split-by` obsolete. ```nushell > $data | group-by lang year ╭────┬─────────────────────────────────────────╮ │ │ ╭──────┬──────────────────────────────╮ │ │ rb │ │ │ ╭─#─┬──name──┬─lang─┬─year─╮ │ │ │ │ │ 2019 │ │ 0 │ andres │ rb │ 2019 │ │ │ │ │ │ │ ╰─#─┴──name──┴─lang─┴─year─╯ │ │ │ │ ╰──────┴──────────────────────────────╯ │ │ │ ╭──────┬─────────────────────────────╮ │ │ rs │ │ │ ╭─#─┬─name─┬─lang─┬─year─╮ │ │ │ │ │ 2019 │ │ 0 │ jt │ rs │ 2019 │ │ │ │ │ │ │ ╰─#─┴─name─┴─lang─┴─year─╯ │ │ │ │ │ │ ╭─#─┬─name──┬─lang─┬─year─╮ │ │ │ │ │ 2021 │ │ 0 │ storm │ rs │ 2021 │ │ │ │ │ │ │ ╰─#─┴─name──┴─lang─┴─year─╯ │ │ │ │ ╰──────┴─────────────────────────────╯ │ ╰────┴─────────────────────────────────────────╯ ``` From #2607: > Here's a couple more examples without much explanation. This one shows adding two grouping keys. I'm always wanting to add more columns when using group-by and it just-work™️ `gb.exe -f movies-2.csv -k 3,2 -s 7 --skip_header` > > ``` > k:3 | k:2 | count | sum:7 > -----------------------+-----------+-------+-------------------- > 20th Century Fox | Drama | 1 | 117.09 > 20th Century Fox | Romance | 1 | 39.66 > CBS | Comedy | 1 | 77.09 > Disney | Animation | 4 | 1264.23 > Disney | Comedy | 4 | 950.27 > Fox | Comedy | 5 | 661.85 > Independent | Comedy | 7 | 399.07 > Independent | Drama | 4 | 69.75 > Independent | Romance | 7 | 1048.75 > Independent | romance | 1 | 29.37 > ... > ``` This example can be achieved like this: ```nushell > open movies-2.csv | group-by "Lead Studio" Genre --to-table | insert count {get items | length} | insert sum { get items."Worldwide Gross" | math sum} | reject items | sort-by "Lead Studio" Genre ╭─#──┬──────Lead Studio──────┬───Genre───┬─count─┬───sum───╮ │ 0 │ 20th Century Fox │ Drama │ 1 │ 117.09 │ │ 1 │ 20th Century Fox │ Romance │ 1 │ 39.66 │ │ 2 │ CBS │ Comedy │ 1 │ 77.09 │ │ 3 │ Disney │ Animation │ 4 │ 1264.23 │ │ 4 │ Disney │ Comedy │ 4 │ 950.27 │ │ 5 │ Fox │ Comedy │ 5 │ 661.85 │ │ 6 │ Fox │ comedy │ 1 │ 60.72 │ │ 7 │ Independent │ Comedy │ 7 │ 399.07 │ │ 8 │ Independent │ Drama │ 4 │ 69.75 │ │ 9 │ Independent │ Romance │ 7 │ 1048.75 │ │ 10 │ Independent │ romance │ 1 │ 29.37 │ ... ```
This commit is contained in:
parent
f7832c0e82
commit
b6e84879b6
2 changed files with 226 additions and 60 deletions
|
@ -1,6 +1,6 @@
|
|||
use indexmap::IndexMap;
|
||||
use nu_engine::{command_prelude::*, ClosureEval};
|
||||
use nu_protocol::engine::Closure;
|
||||
use nu_protocol::{engine::Closure, IntoValue};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GroupBy;
|
||||
|
@ -22,7 +22,7 @@ impl Command for GroupBy {
|
|||
"Return a table with \"groups\" and \"items\" columns",
|
||||
None,
|
||||
)
|
||||
.optional(
|
||||
.rest(
|
||||
"grouper",
|
||||
SyntaxShape::OneOf(vec![
|
||||
SyntaxShape::CellPath,
|
||||
|
@ -135,7 +135,89 @@ impl Command for GroupBy {
|
|||
Value::test_string("false"),
|
||||
]),
|
||||
})),
|
||||
}
|
||||
},
|
||||
Example {
|
||||
description: "Group items by multiple columns' values",
|
||||
example: r#"[
|
||||
[name, lang, year];
|
||||
[andres, rb, "2019"],
|
||||
[jt, rs, "2019"],
|
||||
[storm, rs, "2021"]
|
||||
]
|
||||
| group-by lang year"#,
|
||||
result: Some(Value::test_record(record! {
|
||||
"rb" => Value::test_record(record! {
|
||||
"2019" => Value::test_list(
|
||||
vec![Value::test_record(record! {
|
||||
"name" => Value::test_string("andres"),
|
||||
"lang" => Value::test_string("rb"),
|
||||
"year" => Value::test_string("2019"),
|
||||
})],
|
||||
),
|
||||
}),
|
||||
"rs" => Value::test_record(record! {
|
||||
"2019" => Value::test_list(
|
||||
vec![Value::test_record(record! {
|
||||
"name" => Value::test_string("jt"),
|
||||
"lang" => Value::test_string("rs"),
|
||||
"year" => Value::test_string("2019"),
|
||||
})],
|
||||
),
|
||||
"2021" => Value::test_list(
|
||||
vec![Value::test_record(record! {
|
||||
"name" => Value::test_string("storm"),
|
||||
"lang" => Value::test_string("rs"),
|
||||
"year" => Value::test_string("2021"),
|
||||
})],
|
||||
),
|
||||
}),
|
||||
}))
|
||||
},
|
||||
Example {
|
||||
description: "Group items by multiple columns' values",
|
||||
example: r#"[
|
||||
[name, lang, year];
|
||||
[andres, rb, "2019"],
|
||||
[jt, rs, "2019"],
|
||||
[storm, rs, "2021"]
|
||||
]
|
||||
| group-by lang year --to-table"#,
|
||||
result: Some(Value::test_list(vec![
|
||||
Value::test_record(record! {
|
||||
"lang" => Value::test_string("rb"),
|
||||
"year" => Value::test_string("2019"),
|
||||
"items" => Value::test_list(vec![
|
||||
Value::test_record(record! {
|
||||
"name" => Value::test_string("andres"),
|
||||
"lang" => Value::test_string("rb"),
|
||||
"year" => Value::test_string("2019"),
|
||||
})
|
||||
]),
|
||||
}),
|
||||
Value::test_record(record! {
|
||||
"lang" => Value::test_string("rs"),
|
||||
"year" => Value::test_string("2019"),
|
||||
"items" => Value::test_list(vec![
|
||||
Value::test_record(record! {
|
||||
"name" => Value::test_string("jt"),
|
||||
"lang" => Value::test_string("rs"),
|
||||
"year" => Value::test_string("2019"),
|
||||
})
|
||||
]),
|
||||
}),
|
||||
Value::test_record(record! {
|
||||
"lang" => Value::test_string("rs"),
|
||||
"year" => Value::test_string("2021"),
|
||||
"items" => Value::test_list(vec![
|
||||
Value::test_record(record! {
|
||||
"name" => Value::test_string("storm"),
|
||||
"lang" => Value::test_string("rs"),
|
||||
"year" => Value::test_string("2021"),
|
||||
})
|
||||
]),
|
||||
}),
|
||||
]))
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
|
@ -147,7 +229,7 @@ pub fn group_by(
|
|||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let head = call.head;
|
||||
let grouper: Option<Value> = call.opt(engine_state, stack, 0)?;
|
||||
let groupers: Vec<Value> = call.rest(engine_state, stack, 0)?;
|
||||
let to_table = call.has_flag(engine_state, stack, "to-table")?;
|
||||
let config = engine_state.get_config();
|
||||
|
||||
|
@ -156,29 +238,22 @@ pub fn group_by(
|
|||
return Ok(Value::record(Record::new(), head).into_pipeline_data());
|
||||
}
|
||||
|
||||
let groups = match grouper {
|
||||
Some(grouper) => {
|
||||
let span = grouper.span();
|
||||
match grouper {
|
||||
Value::CellPath { val, .. } => group_cell_path(val, values, config)?,
|
||||
Value::Closure { val, .. } => {
|
||||
group_closure(values, span, *val, engine_state, stack)?
|
||||
}
|
||||
_ => {
|
||||
return Err(ShellError::TypeMismatch {
|
||||
err_message: "unsupported grouper type".to_string(),
|
||||
span,
|
||||
})
|
||||
}
|
||||
}
|
||||
let mut groupers = groupers.into_iter();
|
||||
|
||||
let grouped = if let Some(grouper) = groupers.next() {
|
||||
let mut groups = Grouped::new(&grouper, values, config, engine_state, stack)?;
|
||||
for grouper in groupers {
|
||||
groups.subgroup(&grouper, config, engine_state, stack)?;
|
||||
}
|
||||
None => group_no_grouper(values, config)?,
|
||||
groups
|
||||
} else {
|
||||
Grouped::empty(values, config)
|
||||
};
|
||||
|
||||
let value = if to_table {
|
||||
groups_to_table(groups, head)
|
||||
grouped.into_table(head)
|
||||
} else {
|
||||
groups_to_record(groups, head)
|
||||
grouped.into_record(head)
|
||||
};
|
||||
|
||||
Ok(value.into_pipeline_data())
|
||||
|
@ -207,20 +282,6 @@ fn group_cell_path(
|
|||
Ok(groups)
|
||||
}
|
||||
|
||||
fn group_no_grouper(
|
||||
values: Vec<Value>,
|
||||
config: &nu_protocol::Config,
|
||||
) -> Result<IndexMap<String, Vec<Value>>, ShellError> {
|
||||
let mut groups = IndexMap::<_, Vec<_>>::new();
|
||||
|
||||
for value in values.into_iter() {
|
||||
let key = value.to_abbreviated_string(config);
|
||||
groups.entry(key).or_default().push(value);
|
||||
}
|
||||
|
||||
Ok(groups)
|
||||
}
|
||||
|
||||
fn group_closure(
|
||||
values: Vec<Value>,
|
||||
span: Span,
|
||||
|
@ -244,32 +305,137 @@ fn group_closure(
|
|||
Ok(groups)
|
||||
}
|
||||
|
||||
fn groups_to_record(groups: IndexMap<String, Vec<Value>>, span: Span) -> Value {
|
||||
Value::record(
|
||||
groups
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k, Value::list(v, span)))
|
||||
.collect(),
|
||||
span,
|
||||
)
|
||||
struct Grouped {
|
||||
grouper: Option<String>,
|
||||
groups: Tree,
|
||||
}
|
||||
|
||||
fn groups_to_table(groups: IndexMap<String, Vec<Value>>, span: Span) -> Value {
|
||||
Value::list(
|
||||
groups
|
||||
.into_iter()
|
||||
.map(|(group, items)| {
|
||||
Value::record(
|
||||
record! {
|
||||
"group" => Value::string(group, span),
|
||||
"items" => Value::list(items, span),
|
||||
},
|
||||
enum Tree {
|
||||
Leaf(IndexMap<String, Vec<Value>>),
|
||||
Branch(IndexMap<String, Grouped>),
|
||||
}
|
||||
|
||||
impl Grouped {
|
||||
fn empty(values: Vec<Value>, config: &nu_protocol::Config) -> Self {
|
||||
let mut groups = IndexMap::<_, Vec<_>>::new();
|
||||
|
||||
for value in values.into_iter() {
|
||||
let key = value.to_abbreviated_string(config);
|
||||
groups.entry(key).or_default().push(value);
|
||||
}
|
||||
|
||||
Self {
|
||||
grouper: Some("group".into()),
|
||||
groups: Tree::Leaf(groups),
|
||||
}
|
||||
}
|
||||
|
||||
fn new(
|
||||
grouper: &Value,
|
||||
values: Vec<Value>,
|
||||
config: &nu_protocol::Config,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
) -> Result<Self, ShellError> {
|
||||
let span = grouper.span();
|
||||
let groups = match grouper {
|
||||
Value::CellPath { val, .. } => group_cell_path(val.clone(), values, config)?,
|
||||
Value::Closure { val, .. } => {
|
||||
group_closure(values, span, Closure::clone(val), engine_state, stack)?
|
||||
}
|
||||
_ => {
|
||||
return Err(ShellError::TypeMismatch {
|
||||
err_message: "unsupported grouper type".to_string(),
|
||||
span,
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
span,
|
||||
)
|
||||
})
|
||||
}
|
||||
};
|
||||
let grouper = grouper.as_cell_path().ok().map(CellPath::to_column_name);
|
||||
Ok(Self {
|
||||
grouper,
|
||||
groups: Tree::Leaf(groups),
|
||||
})
|
||||
}
|
||||
|
||||
fn subgroup(
|
||||
&mut self,
|
||||
grouper: &Value,
|
||||
config: &nu_protocol::Config,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
) -> Result<(), ShellError> {
|
||||
let groups = match &mut self.groups {
|
||||
Tree::Leaf(groups) => std::mem::take(groups)
|
||||
.into_iter()
|
||||
.map(|(key, values)| -> Result<_, ShellError> {
|
||||
let leaf = Self::new(grouper, values, config, engine_state, stack)?;
|
||||
Ok((key, leaf))
|
||||
})
|
||||
.collect::<Result<IndexMap<_, _>, ShellError>>()?,
|
||||
Tree::Branch(nested_groups) => {
|
||||
let mut nested_groups = std::mem::take(nested_groups);
|
||||
for v in nested_groups.values_mut() {
|
||||
v.subgroup(grouper, config, engine_state, stack)?;
|
||||
}
|
||||
nested_groups
|
||||
}
|
||||
};
|
||||
self.groups = Tree::Branch(groups);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn into_table(self, head: Span) -> Value {
|
||||
self._into_table(head, 0)
|
||||
.into_iter()
|
||||
.map(|row| row.into_iter().rev().collect::<Record>().into_value(head))
|
||||
.collect::<Vec<_>>()
|
||||
.into_value(head)
|
||||
}
|
||||
|
||||
fn _into_table(self, head: Span, index: usize) -> Vec<Record> {
|
||||
let grouper = self.grouper.unwrap_or_else(|| format!("group{index}"));
|
||||
match self.groups {
|
||||
Tree::Leaf(leaf) => leaf
|
||||
.into_iter()
|
||||
.map(|(group, values)| {
|
||||
[
|
||||
("items".to_string(), values.into_value(head)),
|
||||
(grouper.clone(), group.into_value(head)),
|
||||
]
|
||||
.into_iter()
|
||||
.collect()
|
||||
})
|
||||
.collect::<Vec<Record>>(),
|
||||
Tree::Branch(branch) => branch
|
||||
.into_iter()
|
||||
.flat_map(|(group, items)| {
|
||||
let mut inner = items._into_table(head, index + 1);
|
||||
for row in &mut inner {
|
||||
row.insert(grouper.clone(), group.clone().into_value(head));
|
||||
}
|
||||
inner
|
||||
})
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
|
||||
fn into_record(self, head: Span) -> Value {
|
||||
match self.groups {
|
||||
Tree::Leaf(leaf) => Value::record(
|
||||
leaf.into_iter()
|
||||
.map(|(k, v)| (k, v.into_value(head)))
|
||||
.collect(),
|
||||
head,
|
||||
),
|
||||
Tree::Branch(branch) => {
|
||||
let values = branch
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k, v.into_record(head)))
|
||||
.collect();
|
||||
Value::record(values, head)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
@ -79,7 +79,7 @@ def create-test-record [] nothing -> record<before-each: string, after-each: str
|
|||
| group-by --to-table annotation
|
||||
| update items {|x|
|
||||
$x.items.function_name
|
||||
| if $x.group in ["test", "test-skip"] {
|
||||
| if $x.annotation in ["test", "test-skip"] {
|
||||
$in
|
||||
} else {
|
||||
get 0
|
||||
|
|
Loading…
Reference in a new issue