mirror of
https://github.com/nushell/nushell
synced 2024-11-10 07:04:13 +00:00
Change group-by to accept cell paths (#9020)
Closes #9003. This PR changes `group-by` so that its optional argument is interpreted as a cell path. In turn, this lets users use `?` to ignore rows that are missing the column they wish to group on. For example: ``` > [{foo: 123}, {foo: 234}, {bar: 345}] | group-by foo Error: nu:🐚:column_not_found × Cannot find column ╭─[entry #3:1:1] 1 │ [{foo: 123}, {foo: 234}, {bar: 345}] | group-by foo · ─────┬──── ─┬─ · │ ╰── cannot find column 'foo' · ╰── value originates here ╰──── > [{foo: 123}, {foo: 234}, {bar: 345}] | group-by foo? ╭─────┬───────────────╮ │ 123 │ [table 1 row] │ │ 234 │ [table 1 row] │ ╰─────┴───────────────╯ ``` ~~This removes the ability to pass `group-by` a closure or block (I wasn't able to figure out how to make the 2 features coexist), and so it is a breaking change. I think this is OK; I didn't even know `group-by` could accept a closure or block because there was no example for that functionality.~~
This commit is contained in:
parent
b912d4c1ea
commit
9ce61dc677
4 changed files with 212 additions and 157 deletions
|
@ -10,8 +10,8 @@ pub fn test_examples(cmd: impl Command + 'static) {
|
|||
mod test_examples {
|
||||
use super::super::{
|
||||
Ansi, Date, Enumerate, Flatten, From, Get, Into, IntoString, LetEnv, Math, MathEuler,
|
||||
MathPi, MathRound, ParEach, Path, Random, Sort, SortBy, Split, SplitColumn, SplitRow, Str,
|
||||
StrJoin, StrLength, StrReplace, Update, Url, Values, Wrap,
|
||||
MathPi, MathRound, ParEach, Path, PathParse, Random, Sort, SortBy, Split, SplitColumn,
|
||||
SplitRow, Str, StrJoin, StrLength, StrReplace, Update, Url, Values, Wrap,
|
||||
};
|
||||
use crate::{Each, To};
|
||||
use nu_cmd_lang::example_support::{
|
||||
|
@ -85,6 +85,7 @@ mod test_examples {
|
|||
working_set.add_decl(Box::new(MathRound));
|
||||
working_set.add_decl(Box::new(Mut));
|
||||
working_set.add_decl(Box::new(Path));
|
||||
working_set.add_decl(Box::new(PathParse));
|
||||
working_set.add_decl(Box::new(ParEach));
|
||||
working_set.add_decl(Box::new(Random));
|
||||
working_set.add_decl(Box::new(Sort));
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
use nu_engine::{eval_block, CallExt};
|
||||
use nu_protocol::ast::Call;
|
||||
use nu_protocol::ast::{Call, CellPath};
|
||||
use nu_protocol::engine::{Closure, Command, EngineState, Stack};
|
||||
use nu_protocol::{
|
||||
Example, IntoPipelineData, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape,
|
||||
Type, Value,
|
||||
Example, IntoPipelineData, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
use indexmap::IndexMap;
|
||||
|
@ -26,7 +25,16 @@ impl Command for GroupBy {
|
|||
Type::List(Box::new(Type::Any)),
|
||||
Type::Record(vec![]),
|
||||
)])
|
||||
.optional("grouper", SyntaxShape::Any, "the grouper value to use")
|
||||
.optional(
|
||||
"grouper",
|
||||
SyntaxShape::OneOf(vec![
|
||||
SyntaxShape::CellPath,
|
||||
SyntaxShape::Block,
|
||||
SyntaxShape::Closure(None),
|
||||
SyntaxShape::Closure(Some(vec![SyntaxShape::Any])),
|
||||
]),
|
||||
"the path to the column to group on",
|
||||
)
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
|
@ -50,6 +58,33 @@ impl Command for GroupBy {
|
|||
example: r#"ls | group-by type"#,
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Group items by the \"foo\" column's values, ignoring records without a \"foo\" column",
|
||||
example: r#"open cool.json | group-by foo?"#,
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Group using a block which is evaluated against each input value",
|
||||
example: "[foo.txt bar.csv baz.txt] | group-by { path parse | get extension }",
|
||||
result: Some(Value::Record {
|
||||
cols: vec!["txt".to_string(), "csv".to_string()],
|
||||
vals: vec![
|
||||
Value::List {
|
||||
vals: vec![
|
||||
Value::test_string("foo.txt"),
|
||||
Value::test_string("baz.txt"),
|
||||
],
|
||||
span: Span::test_data(),
|
||||
},
|
||||
Value::List {
|
||||
vals: vec![Value::test_string("bar.csv")],
|
||||
span: Span::test_data(),
|
||||
},
|
||||
],
|
||||
span: Span::test_data(),
|
||||
}),
|
||||
},
|
||||
|
||||
Example {
|
||||
description: "You can also group by raw values by leaving out the argument",
|
||||
example: "['1' '3' '1' '3' '2' '1' '1'] | group-by",
|
||||
|
@ -81,131 +116,163 @@ impl Command for GroupBy {
|
|||
}
|
||||
}
|
||||
|
||||
enum Grouper {
|
||||
ByColumn(Option<Spanned<String>>),
|
||||
ByBlock,
|
||||
}
|
||||
|
||||
pub fn group_by(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let name = call.head;
|
||||
let span = call.head;
|
||||
|
||||
let grouper: Option<Value> = call.opt(engine_state, stack, 0)?;
|
||||
let values: Vec<Value> = input.into_iter().collect();
|
||||
let mut keys: Vec<Result<String, ShellError>> = vec![];
|
||||
let mut group_strategy = Grouper::ByColumn(None);
|
||||
|
||||
if values.is_empty() {
|
||||
return Err(ShellError::GenericError(
|
||||
"expected table from pipeline".into(),
|
||||
"requires a table input".into(),
|
||||
Some(name),
|
||||
Some(span),
|
||||
None,
|
||||
Vec::new(),
|
||||
));
|
||||
}
|
||||
|
||||
let first = values[0].clone();
|
||||
|
||||
let value_list = Value::List {
|
||||
vals: values.clone(),
|
||||
span: name,
|
||||
};
|
||||
|
||||
match grouper {
|
||||
let group_value = match grouper {
|
||||
Some(Value::CellPath { val, span }) => group_cell_path(val, values, span)?,
|
||||
Some(Value::Block { .. }) | Some(Value::Closure { .. }) => {
|
||||
let block: Option<Closure> = call.opt(engine_state, stack, 0)?;
|
||||
let error_key = "error";
|
||||
|
||||
for value in values {
|
||||
if let Some(capture_block) = &block {
|
||||
let mut stack = stack.captures_to_stack(&capture_block.captures);
|
||||
let block = engine_state.get_block(capture_block.block_id);
|
||||
let pipeline = eval_block(
|
||||
engine_state,
|
||||
&mut stack,
|
||||
block,
|
||||
value.into_pipeline_data(),
|
||||
call.redirect_stdout,
|
||||
call.redirect_stderr,
|
||||
);
|
||||
|
||||
match pipeline {
|
||||
Ok(s) => {
|
||||
let collection: Vec<Value> = s.into_iter().collect();
|
||||
|
||||
if collection.len() > 1 {
|
||||
return Err(ShellError::GenericError(
|
||||
"expected one value from the block".into(),
|
||||
"requires a table with one value for grouping".into(),
|
||||
Some(name),
|
||||
None,
|
||||
Vec::new(),
|
||||
));
|
||||
}
|
||||
|
||||
let value = match collection.get(0) {
|
||||
Some(Value::Error { .. }) | None => Value::string(error_key, name),
|
||||
Some(return_value) => return_value.clone(),
|
||||
};
|
||||
|
||||
keys.push(value.as_string());
|
||||
}
|
||||
Err(_) => {
|
||||
keys.push(Ok(error_key.into()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
group_strategy = Grouper::ByBlock;
|
||||
group_closure(&values, span, block, stack, engine_state, call)?
|
||||
}
|
||||
Some(other) => {
|
||||
group_strategy = Grouper::ByColumn(Some(Spanned {
|
||||
item: other.as_string()?,
|
||||
span: name,
|
||||
}));
|
||||
None => group_no_grouper(values, span)?,
|
||||
_ => {
|
||||
return Err(ShellError::TypeMismatch {
|
||||
err_message: "unsupported grouper type".to_string(),
|
||||
span,
|
||||
})
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
let name = if let Ok(span) = first.span() {
|
||||
span
|
||||
} else {
|
||||
name
|
||||
};
|
||||
|
||||
let group_value = match group_strategy {
|
||||
Grouper::ByBlock => {
|
||||
let map = keys;
|
||||
|
||||
let block = Box::new(move |idx: usize, row: &Value| match map.get(idx) {
|
||||
Some(Ok(key)) => Ok(key.clone()),
|
||||
Some(Err(reason)) => Err(reason.clone()),
|
||||
None => row.as_string(),
|
||||
});
|
||||
|
||||
data_group(&value_list, &Some(block), name)
|
||||
}
|
||||
Grouper::ByColumn(column_name) => group(&column_name, &value_list, name),
|
||||
};
|
||||
|
||||
Ok(PipelineData::Value(group_value?, None))
|
||||
Ok(PipelineData::Value(group_value, None))
|
||||
}
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
pub fn data_group(
|
||||
values: &Value,
|
||||
grouper: &Option<Box<dyn Fn(usize, &Value) -> Result<String, ShellError> + Send>>,
|
||||
pub fn group_cell_path(
|
||||
column_name: CellPath,
|
||||
values: Vec<Value>,
|
||||
span: Span,
|
||||
) -> Result<Value, ShellError> {
|
||||
let mut groups: IndexMap<String, Vec<Value>> = IndexMap::new();
|
||||
|
||||
for (idx, value) in values.clone().into_pipeline_data().into_iter().enumerate() {
|
||||
for value in values.into_iter() {
|
||||
let group_key = value
|
||||
.clone()
|
||||
.follow_cell_path(&column_name.members, false)?;
|
||||
if matches!(group_key, Value::Nothing { .. }) {
|
||||
continue; // likely the result of a failed optional access, ignore this value
|
||||
}
|
||||
|
||||
let group_key = group_key.as_string()?;
|
||||
let group = groups.entry(group_key).or_default();
|
||||
group.push(value);
|
||||
}
|
||||
|
||||
let mut cols = vec![];
|
||||
let mut vals = vec![];
|
||||
|
||||
for (k, v) in groups {
|
||||
cols.push(k.to_string());
|
||||
vals.push(Value::List { vals: v, span });
|
||||
}
|
||||
|
||||
Ok(Value::Record { cols, vals, span })
|
||||
}
|
||||
|
||||
pub fn group_no_grouper(values: Vec<Value>, span: Span) -> Result<Value, ShellError> {
|
||||
let mut groups: IndexMap<String, Vec<Value>> = IndexMap::new();
|
||||
|
||||
for value in values.into_iter() {
|
||||
let group_key = value.as_string()?;
|
||||
let group = groups.entry(group_key).or_default();
|
||||
group.push(value);
|
||||
}
|
||||
|
||||
let mut cols = vec![];
|
||||
let mut vals = vec![];
|
||||
|
||||
for (k, v) in groups {
|
||||
cols.push(k.to_string());
|
||||
vals.push(Value::List { vals: v, span });
|
||||
}
|
||||
|
||||
Ok(Value::Record { cols, vals, span })
|
||||
}
|
||||
|
||||
// TODO: refactor this, it's a bit of a mess
|
||||
fn group_closure(
|
||||
values: &Vec<Value>,
|
||||
span: Span,
|
||||
block: Option<Closure>,
|
||||
stack: &mut Stack,
|
||||
engine_state: &EngineState,
|
||||
call: &Call,
|
||||
) -> Result<Value, ShellError> {
|
||||
let error_key = "error";
|
||||
let mut keys: Vec<Result<String, ShellError>> = vec![];
|
||||
let value_list = Value::List {
|
||||
vals: values.clone(),
|
||||
span,
|
||||
};
|
||||
|
||||
for value in values {
|
||||
if let Some(capture_block) = &block {
|
||||
let mut stack = stack.captures_to_stack(&capture_block.captures);
|
||||
let block = engine_state.get_block(capture_block.block_id);
|
||||
let pipeline = eval_block(
|
||||
engine_state,
|
||||
&mut stack,
|
||||
block,
|
||||
value.clone().into_pipeline_data(),
|
||||
call.redirect_stdout,
|
||||
call.redirect_stderr,
|
||||
);
|
||||
|
||||
match pipeline {
|
||||
Ok(s) => {
|
||||
let collection: Vec<Value> = s.into_iter().collect();
|
||||
|
||||
if collection.len() > 1 {
|
||||
return Err(ShellError::GenericError(
|
||||
"expected one value from the block".into(),
|
||||
"requires a table with one value for grouping".into(),
|
||||
Some(span),
|
||||
None,
|
||||
Vec::new(),
|
||||
));
|
||||
}
|
||||
|
||||
let value = match collection.get(0) {
|
||||
Some(Value::Error { .. }) | None => Value::string(error_key, span),
|
||||
Some(return_value) => return_value.clone(),
|
||||
};
|
||||
|
||||
keys.push(value.as_string());
|
||||
}
|
||||
Err(_) => {
|
||||
keys.push(Ok(error_key.into()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let map = keys;
|
||||
let block = Box::new(move |idx: usize, row: &Value| match map.get(idx) {
|
||||
Some(Ok(key)) => Ok(key.clone()),
|
||||
Some(Err(reason)) => Err(reason.clone()),
|
||||
None => row.as_string(),
|
||||
});
|
||||
|
||||
let grouper = &Some(block);
|
||||
let mut groups: IndexMap<String, Vec<Value>> = IndexMap::new();
|
||||
|
||||
for (idx, value) in value_list.into_pipeline_data().into_iter().enumerate() {
|
||||
let group_key = if let Some(ref grouper) = grouper {
|
||||
grouper(idx, &value)
|
||||
} else {
|
||||
|
@ -227,48 +294,6 @@ pub fn data_group(
|
|||
Ok(Value::Record { cols, vals, span })
|
||||
}
|
||||
|
||||
pub fn group(
|
||||
column_name: &Option<Spanned<String>>,
|
||||
values: &Value,
|
||||
span: Span,
|
||||
) -> Result<Value, ShellError> {
|
||||
let name = span;
|
||||
|
||||
let grouper = if let Some(column_name) = column_name {
|
||||
Grouper::ByColumn(Some(column_name.clone()))
|
||||
} else {
|
||||
Grouper::ByColumn(None)
|
||||
};
|
||||
|
||||
match grouper {
|
||||
Grouper::ByColumn(Some(column_name)) => {
|
||||
let block = Box::new(move |_, row: &Value| {
|
||||
if let Value::Error { error } = row {
|
||||
return Err(*error.clone());
|
||||
};
|
||||
match row.get_data_by_key(&column_name.item) {
|
||||
Some(group_key) => Ok(group_key.as_string()?),
|
||||
None => Err(ShellError::CantFindColumn {
|
||||
col_name: column_name.item.to_string(),
|
||||
span: column_name.span,
|
||||
src_span: row.expect_span(),
|
||||
}),
|
||||
}
|
||||
});
|
||||
|
||||
data_group(values, &Some(block), name)
|
||||
}
|
||||
Grouper::ByColumn(None) => {
|
||||
let block = Box::new(move |_, row: &Value| row.as_string());
|
||||
|
||||
data_group(values, &Some(block), name)
|
||||
}
|
||||
Grouper::ByBlock => Err(ShellError::NushellFailed {
|
||||
msg: "Block not implemented: This should never happen.".into(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
|
|
@ -3,7 +3,8 @@ use nu_engine::CallExt;
|
|||
use nu_protocol::ast::Call;
|
||||
use nu_protocol::engine::{Command, EngineState, Stack};
|
||||
use nu_protocol::{
|
||||
Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Type, Value,
|
||||
Example, IntoPipelineData, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape,
|
||||
Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
|
@ -182,6 +183,36 @@ pub fn split(
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
fn data_group(
|
||||
values: &Value,
|
||||
grouper: &Option<Box<dyn Fn(usize, &Value) -> Result<String, ShellError> + Send>>,
|
||||
span: Span,
|
||||
) -> Result<Value, ShellError> {
|
||||
let mut groups: IndexMap<String, Vec<Value>> = IndexMap::new();
|
||||
|
||||
for (idx, value) in values.clone().into_pipeline_data().into_iter().enumerate() {
|
||||
let group_key = if let Some(ref grouper) = grouper {
|
||||
grouper(idx, &value)
|
||||
} else {
|
||||
value.as_string()
|
||||
};
|
||||
|
||||
let group = groups.entry(group_key?).or_default();
|
||||
group.push(value);
|
||||
}
|
||||
|
||||
let mut cols = vec![];
|
||||
let mut vals = vec![];
|
||||
|
||||
for (k, v) in groups {
|
||||
cols.push(k.to_string());
|
||||
vals.push(Value::List { vals: v, span });
|
||||
}
|
||||
|
||||
Ok(Value::Record { cols, vals, span })
|
||||
}
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
pub fn data_split(
|
||||
value: PipelineData,
|
||||
|
@ -203,7 +234,7 @@ pub fn data_split(
|
|||
_,
|
||||
) => {
|
||||
for (idx, list) in grouped_rows.iter().enumerate() {
|
||||
match super::group_by::data_group(list, splitter, span) {
|
||||
match data_group(list, splitter, span) {
|
||||
Ok(grouped) => {
|
||||
if let Value::Record {
|
||||
vals: li,
|
||||
|
|
|
@ -72,7 +72,7 @@ fn errors_if_given_unknown_column_name() {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn errors_if_block_given_evaluates_more_than_one_row() {
|
||||
fn errors_if_column_not_found() {
|
||||
Playground::setup("group_by_test_3", |dirs, sandbox| {
|
||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||
"los_tres_caballeros.csv",
|
||||
|
@ -92,21 +92,19 @@ fn errors_if_block_given_evaluates_more_than_one_row() {
|
|||
"#
|
||||
));
|
||||
|
||||
assert!(actual.err.contains("value originates here"),);
|
||||
assert!(actual.err.contains("cannot find column"),);
|
||||
assert!(actual.err.contains("did you mean 'type'"),);
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn errors_if_input_empty() {
|
||||
Playground::setup("group_by_empty_test", |dirs, _sandbox| {
|
||||
let actual = nu!(
|
||||
cwd: dirs.test(), pipeline(
|
||||
r#"
|
||||
group-by date
|
||||
"#
|
||||
));
|
||||
|
||||
assert!(actual.err.contains("expected table from pipeline"));
|
||||
});
|
||||
let actual = nu!("group-by date");
|
||||
assert!(actual.err.contains("expected table from pipeline"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn optional_cell_path_works() {
|
||||
let actual = nu!("[{foo: 123}, {foo: 234}, {bar: 345}] | group-by foo? | to nuon");
|
||||
let expected = r#"{"123": [[foo]; [123]], "234": [[foo]; [234]]}"#;
|
||||
assert_eq!(actual.out, expected)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue