mirror of
https://github.com/nushell/nushell
synced 2025-01-16 07:04:09 +00:00
group-by can generate custom grouping key by block evaluation. (#2172)
This commit is contained in:
parent
8551e06d9e
commit
f2c4d22739
7 changed files with 163 additions and 28 deletions
|
@ -83,7 +83,7 @@ fn is_expanded_it_usage(head: &SpannedExpression) -> bool {
|
|||
}
|
||||
}
|
||||
|
||||
async fn process_row(
|
||||
pub async fn process_row(
|
||||
block: Arc<Block>,
|
||||
scope: Arc<Scope>,
|
||||
head: Arc<Box<SpannedExpression>>,
|
||||
|
|
|
@ -10,7 +10,7 @@ pub struct GroupBy;
|
|||
|
||||
#[derive(Deserialize)]
|
||||
pub struct GroupByArgs {
|
||||
column_name: Option<Tagged<String>>,
|
||||
grouper: Option<Value>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
|
@ -21,14 +21,14 @@ impl WholeStreamCommand for GroupBy {
|
|||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("group-by").optional(
|
||||
"column_name",
|
||||
SyntaxShape::String,
|
||||
"the name of the column to group by",
|
||||
"grouper",
|
||||
SyntaxShape::Any,
|
||||
"the grouper value to use",
|
||||
)
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a new table with the data from the table rows grouped by the column given."
|
||||
"create a new table grouped."
|
||||
}
|
||||
|
||||
async fn run(
|
||||
|
@ -42,12 +42,17 @@ impl WholeStreamCommand for GroupBy {
|
|||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Group items by type",
|
||||
description: "group items by column named \"type\"",
|
||||
example: r#"ls | group-by type"#,
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Group items by their value",
|
||||
description: "blocks can be used for generating a grouping key (same as above)",
|
||||
example: r#"ls | group-by { get type }"#,
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "you can also group by raw values by leaving out the argument",
|
||||
example: "echo [1 3 1 3 2 1 1] | group-by",
|
||||
result: Some(vec![UntaggedValue::row(indexmap! {
|
||||
"1".to_string() => UntaggedValue::Table(vec![
|
||||
|
@ -68,26 +73,95 @@ impl WholeStreamCommand for GroupBy {
|
|||
})
|
||||
.into()]),
|
||||
},
|
||||
Example {
|
||||
description: "write pipelines for a more involved grouping key",
|
||||
example:
|
||||
"echo [1 3 1 3 2 1 1] | group-by { echo `({{$it}} - 1) % 3` | calc | str from }",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
enum Grouper {
|
||||
ByColumn(Option<Tagged<String>>),
|
||||
ByBlock,
|
||||
}
|
||||
|
||||
pub async fn group_by(
|
||||
args: CommandArgs,
|
||||
registry: &CommandRegistry,
|
||||
) -> Result<OutputStream, ShellError> {
|
||||
let registry = registry.clone();
|
||||
let name = args.call_info.name_tag.clone();
|
||||
let (GroupByArgs { column_name }, input) = args.process(®istry).await?;
|
||||
let registry = registry.clone();
|
||||
let head = Arc::new(args.call_info.args.head.clone());
|
||||
let scope = Arc::new(args.call_info.scope.clone());
|
||||
let context = Arc::new(Context::from_raw(&args, ®istry));
|
||||
let (GroupByArgs { grouper }, input) = args.process(®istry).await?;
|
||||
|
||||
let values: Vec<Value> = input.collect().await;
|
||||
let mut keys: Vec<Result<String, ShellError>> = vec![];
|
||||
let mut group_strategy = Grouper::ByColumn(None);
|
||||
|
||||
match grouper {
|
||||
Some(Value {
|
||||
value: UntaggedValue::Block(block_given),
|
||||
..
|
||||
}) => {
|
||||
let block = Arc::new(block_given);
|
||||
let error_key = "error";
|
||||
|
||||
for value in values.iter() {
|
||||
let run = block.clone();
|
||||
let scope = scope.clone();
|
||||
let head = head.clone();
|
||||
let context = context.clone();
|
||||
|
||||
match crate::commands::each::process_row(run, scope, head, context, value.clone())
|
||||
.await
|
||||
{
|
||||
Ok(mut s) => {
|
||||
let collection: Vec<Result<ReturnSuccess, ShellError>> =
|
||||
s.drain_vec().await;
|
||||
|
||||
if collection.len() > 1 {
|
||||
return Err(ShellError::labeled_error(
|
||||
"expected one value from the block",
|
||||
"requires a table with one value for grouping",
|
||||
&name,
|
||||
));
|
||||
}
|
||||
|
||||
let value = match collection.get(0) {
|
||||
Some(Ok(return_value)) => {
|
||||
return_value.raw_value().unwrap_or_else(|| {
|
||||
UntaggedValue::string(error_key).into_value(&name)
|
||||
})
|
||||
}
|
||||
Some(Err(_)) | None => {
|
||||
UntaggedValue::string(error_key).into_value(&name)
|
||||
}
|
||||
};
|
||||
|
||||
keys.push(as_string(&value));
|
||||
}
|
||||
Err(_) => {
|
||||
keys.push(Ok(error_key.into()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
group_strategy = Grouper::ByBlock;
|
||||
}
|
||||
Some(other) => {
|
||||
group_strategy = Grouper::ByColumn(Some(as_string(&other)?.tagged(&name)));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
if values.is_empty() {
|
||||
return Err(ShellError::labeled_error(
|
||||
"Expected table from pipeline",
|
||||
"expected table from pipeline",
|
||||
"requires a table input",
|
||||
name,
|
||||
));
|
||||
|
@ -95,9 +169,25 @@ pub async fn group_by(
|
|||
|
||||
let values = UntaggedValue::table(&values).into_value(&name);
|
||||
|
||||
match group(&column_name, &values, name) {
|
||||
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
|
||||
Err(reason) => Err(reason),
|
||||
match group_strategy {
|
||||
Grouper::ByBlock => {
|
||||
let map = keys.clone();
|
||||
|
||||
let block = Box::new(move |idx: usize, row: &Value| match map.get(idx) {
|
||||
Some(Ok(key)) => Ok(key.clone()),
|
||||
Some(Err(reason)) => Err(reason.clone()),
|
||||
None => as_string(row),
|
||||
});
|
||||
|
||||
match crate::utils::data::group(&values, &Some(block), &name) {
|
||||
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
|
||||
Err(reason) => Err(reason),
|
||||
}
|
||||
}
|
||||
Grouper::ByColumn(column_name) => match group(&column_name, &values, name) {
|
||||
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
|
||||
Err(reason) => Err(reason),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -141,7 +231,7 @@ pub fn group(
|
|||
|
||||
match grouper {
|
||||
Grouper::ByColumn(Some(column_name)) => {
|
||||
let block = Box::new(move |row: &Value| {
|
||||
let block = Box::new(move |_, row: &Value| {
|
||||
match row.get_data_by_key(column_name.borrow_spanned()) {
|
||||
Some(group_key) => Ok(as_string(&group_key)?),
|
||||
None => Err(suggestions(column_name.borrow_tagged(), &row)),
|
||||
|
@ -151,13 +241,16 @@ pub fn group(
|
|||
crate::utils::data::group(&values, &Some(block), &name)
|
||||
}
|
||||
Grouper::ByColumn(None) => {
|
||||
let block = Box::new(move |row: &Value| match as_string(row) {
|
||||
let block = Box::new(move |_, row: &Value| match as_string(row) {
|
||||
Ok(group_key) => Ok(group_key),
|
||||
Err(reason) => Err(reason),
|
||||
});
|
||||
|
||||
crate::utils::data::group(&values, &Some(block), &name)
|
||||
}
|
||||
Grouper::ByBlock => Err(ShellError::unimplemented(
|
||||
"Block not implemented: This should never happen.",
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ impl WholeStreamCommand for GroupByDate {
|
|||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a new table with the data from the table rows grouped by the column given."
|
||||
"creates a table grouped by date."
|
||||
}
|
||||
|
||||
async fn run(
|
||||
|
@ -100,7 +100,7 @@ pub async fn group_by_date(
|
|||
|
||||
match (grouper_date, grouper_column) {
|
||||
(Grouper::ByDate(None), GroupByColumn::Name(None)) => {
|
||||
let block = Box::new(move |row: &Value| row.format("%Y-%b-%d"));
|
||||
let block = Box::new(move |_, row: &Value| row.format("%Y-%b-%d"));
|
||||
|
||||
match crate::utils::data::group(&values, &Some(block), &name) {
|
||||
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
|
||||
|
@ -108,7 +108,7 @@ pub async fn group_by_date(
|
|||
}
|
||||
}
|
||||
(Grouper::ByDate(None), GroupByColumn::Name(Some(column_name))) => {
|
||||
let block = Box::new(move |row: &Value| {
|
||||
let block = Box::new(move |_, row: &Value| {
|
||||
let group_key = match row.get_data_by_key(column_name.borrow_spanned()) {
|
||||
Some(group_key) => Ok(group_key),
|
||||
None => Err(suggestions(column_name.borrow_tagged(), &row)),
|
||||
|
@ -123,7 +123,7 @@ pub async fn group_by_date(
|
|||
}
|
||||
}
|
||||
(Grouper::ByDate(Some(fmt)), GroupByColumn::Name(None)) => {
|
||||
let block = Box::new(move |row: &Value| row.format(&fmt));
|
||||
let block = Box::new(move |_, row: &Value| row.format(&fmt));
|
||||
|
||||
match crate::utils::data::group(&values, &Some(block), &name) {
|
||||
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
|
||||
|
@ -131,7 +131,7 @@ pub async fn group_by_date(
|
|||
}
|
||||
}
|
||||
(Grouper::ByDate(Some(fmt)), GroupByColumn::Name(Some(column_name))) => {
|
||||
let block = Box::new(move |row: &Value| {
|
||||
let block = Box::new(move |_, row: &Value| {
|
||||
let group_key = match row.get_data_by_key(column_name.borrow_spanned()) {
|
||||
Some(group_key) => Ok(group_key),
|
||||
None => Err(suggestions(column_name.borrow_tagged(), &row)),
|
||||
|
|
|
@ -81,7 +81,7 @@ pub fn split(
|
|||
|
||||
match grouper {
|
||||
Grouper::ByColumn(Some(column_name)) => {
|
||||
let block = Box::new(move |row: &Value| {
|
||||
let block = Box::new(move |_, row: &Value| {
|
||||
match row.get_data_by_key(column_name.borrow_spanned()) {
|
||||
Some(group_key) => Ok(as_string(&group_key)?),
|
||||
None => Err(suggestions(column_name.borrow_tagged(), &row)),
|
||||
|
@ -91,7 +91,7 @@ pub fn split(
|
|||
crate::utils::data::split(&values, &Some(block), &name)
|
||||
}
|
||||
Grouper::ByColumn(None) => {
|
||||
let block = Box::new(move |row: &Value| match as_string(row) {
|
||||
let block = Box::new(move |_, row: &Value| match as_string(row) {
|
||||
Ok(group_key) => Ok(group_key),
|
||||
Err(reason) => Err(reason),
|
||||
});
|
||||
|
|
|
@ -7,16 +7,16 @@ use nu_value_ext::as_string;
|
|||
#[allow(clippy::type_complexity)]
|
||||
pub fn group(
|
||||
values: &Value,
|
||||
grouper: &Option<Box<dyn Fn(&Value) -> Result<String, ShellError> + Send>>,
|
||||
grouper: &Option<Box<dyn Fn(usize, &Value) -> Result<String, ShellError> + Send>>,
|
||||
tag: impl Into<Tag>,
|
||||
) -> Result<Value, ShellError> {
|
||||
let tag = tag.into();
|
||||
|
||||
let mut groups: IndexMap<String, Vec<Value>> = IndexMap::new();
|
||||
|
||||
for value in values.table_entries() {
|
||||
for (idx, value) in values.table_entries().enumerate() {
|
||||
let group_key = if let Some(ref grouper) = grouper {
|
||||
grouper(&value)
|
||||
grouper(idx, &value)
|
||||
} else {
|
||||
as_string(&value)
|
||||
};
|
||||
|
|
|
@ -7,7 +7,7 @@ use crate::utils::data::group;
|
|||
#[allow(clippy::type_complexity)]
|
||||
pub fn split(
|
||||
value: &Value,
|
||||
splitter: &Option<Box<dyn Fn(&Value) -> Result<String, ShellError> + Send>>,
|
||||
splitter: &Option<Box<dyn Fn(usize, &Value) -> Result<String, ShellError> + Send>>,
|
||||
tag: impl Into<Tag>,
|
||||
) -> Result<Value, ShellError> {
|
||||
let tag = tag.into();
|
||||
|
|
|
@ -31,8 +31,50 @@ fn groups() {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn errors_if_given_unknown_column_name_is_missing() {
|
||||
fn errors_if_given_unknown_column_name() {
|
||||
Playground::setup("group_by_test_2", |dirs, sandbox| {
|
||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||
"los_tres_caballeros.json",
|
||||
r#"
|
||||
{
|
||||
"nu": {
|
||||
"committers": [
|
||||
{"name": "Andrés N. Robalino"},
|
||||
{"name": "Jonathan Turner"},
|
||||
{"name": "Yehuda Katz"}
|
||||
],
|
||||
"releases": [
|
||||
{"version": "0.2"}
|
||||
{"version": "0.8"},
|
||||
{"version": "0.9999999"}
|
||||
],
|
||||
"0xATYKARNU": [
|
||||
["Th", "e", " "],
|
||||
["BIG", " ", "UnO"],
|
||||
["punto", "cero"]
|
||||
]
|
||||
}
|
||||
}
|
||||
"#,
|
||||
)]);
|
||||
|
||||
let actual = nu!(
|
||||
cwd: dirs.test(), pipeline(
|
||||
r#"
|
||||
open los_tres_caballeros.json
|
||||
| group-by { get nu.releases.version }
|
||||
"#
|
||||
));
|
||||
|
||||
assert!(actual
|
||||
.err
|
||||
.contains("requires a table with one value for grouping"));
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn errors_if_block_given_evaluates_more_than_one_row() {
|
||||
Playground::setup("group_by_test_3", |dirs, sandbox| {
|
||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||
"los_tres_caballeros.csv",
|
||||
r#"
|
||||
|
|
Loading…
Reference in a new issue