mirror of
https://github.com/nushell/nushell
synced 2024-12-30 15:03:25 +00:00
group-by can generate custom grouping key by block evaluation. (#2172)
This commit is contained in:
parent
8551e06d9e
commit
f2c4d22739
7 changed files with 163 additions and 28 deletions
|
@ -83,7 +83,7 @@ fn is_expanded_it_usage(head: &SpannedExpression) -> bool {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn process_row(
|
pub async fn process_row(
|
||||||
block: Arc<Block>,
|
block: Arc<Block>,
|
||||||
scope: Arc<Scope>,
|
scope: Arc<Scope>,
|
||||||
head: Arc<Box<SpannedExpression>>,
|
head: Arc<Box<SpannedExpression>>,
|
||||||
|
|
|
@ -10,7 +10,7 @@ pub struct GroupBy;
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
pub struct GroupByArgs {
|
pub struct GroupByArgs {
|
||||||
column_name: Option<Tagged<String>>,
|
grouper: Option<Value>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
|
@ -21,14 +21,14 @@ impl WholeStreamCommand for GroupBy {
|
||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
Signature::build("group-by").optional(
|
Signature::build("group-by").optional(
|
||||||
"column_name",
|
"grouper",
|
||||||
SyntaxShape::String,
|
SyntaxShape::Any,
|
||||||
"the name of the column to group by",
|
"the grouper value to use",
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn usage(&self) -> &str {
|
fn usage(&self) -> &str {
|
||||||
"Creates a new table with the data from the table rows grouped by the column given."
|
"create a new table grouped."
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn run(
|
async fn run(
|
||||||
|
@ -42,12 +42,17 @@ impl WholeStreamCommand for GroupBy {
|
||||||
fn examples(&self) -> Vec<Example> {
|
fn examples(&self) -> Vec<Example> {
|
||||||
vec![
|
vec![
|
||||||
Example {
|
Example {
|
||||||
description: "Group items by type",
|
description: "group items by column named \"type\"",
|
||||||
example: r#"ls | group-by type"#,
|
example: r#"ls | group-by type"#,
|
||||||
result: None,
|
result: None,
|
||||||
},
|
},
|
||||||
Example {
|
Example {
|
||||||
description: "Group items by their value",
|
description: "blocks can be used for generating a grouping key (same as above)",
|
||||||
|
example: r#"ls | group-by { get type }"#,
|
||||||
|
result: None,
|
||||||
|
},
|
||||||
|
Example {
|
||||||
|
description: "you can also group by raw values by leaving out the argument",
|
||||||
example: "echo [1 3 1 3 2 1 1] | group-by",
|
example: "echo [1 3 1 3 2 1 1] | group-by",
|
||||||
result: Some(vec![UntaggedValue::row(indexmap! {
|
result: Some(vec![UntaggedValue::row(indexmap! {
|
||||||
"1".to_string() => UntaggedValue::Table(vec![
|
"1".to_string() => UntaggedValue::Table(vec![
|
||||||
|
@ -68,26 +73,95 @@ impl WholeStreamCommand for GroupBy {
|
||||||
})
|
})
|
||||||
.into()]),
|
.into()]),
|
||||||
},
|
},
|
||||||
|
Example {
|
||||||
|
description: "write pipelines for a more involved grouping key",
|
||||||
|
example:
|
||||||
|
"echo [1 3 1 3 2 1 1] | group-by { echo `({{$it}} - 1) % 3` | calc | str from }",
|
||||||
|
result: None,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
enum Grouper {
|
enum Grouper {
|
||||||
ByColumn(Option<Tagged<String>>),
|
ByColumn(Option<Tagged<String>>),
|
||||||
|
ByBlock,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn group_by(
|
pub async fn group_by(
|
||||||
args: CommandArgs,
|
args: CommandArgs,
|
||||||
registry: &CommandRegistry,
|
registry: &CommandRegistry,
|
||||||
) -> Result<OutputStream, ShellError> {
|
) -> Result<OutputStream, ShellError> {
|
||||||
let registry = registry.clone();
|
|
||||||
let name = args.call_info.name_tag.clone();
|
let name = args.call_info.name_tag.clone();
|
||||||
let (GroupByArgs { column_name }, input) = args.process(®istry).await?;
|
let registry = registry.clone();
|
||||||
|
let head = Arc::new(args.call_info.args.head.clone());
|
||||||
|
let scope = Arc::new(args.call_info.scope.clone());
|
||||||
|
let context = Arc::new(Context::from_raw(&args, ®istry));
|
||||||
|
let (GroupByArgs { grouper }, input) = args.process(®istry).await?;
|
||||||
|
|
||||||
let values: Vec<Value> = input.collect().await;
|
let values: Vec<Value> = input.collect().await;
|
||||||
|
let mut keys: Vec<Result<String, ShellError>> = vec![];
|
||||||
|
let mut group_strategy = Grouper::ByColumn(None);
|
||||||
|
|
||||||
|
match grouper {
|
||||||
|
Some(Value {
|
||||||
|
value: UntaggedValue::Block(block_given),
|
||||||
|
..
|
||||||
|
}) => {
|
||||||
|
let block = Arc::new(block_given);
|
||||||
|
let error_key = "error";
|
||||||
|
|
||||||
|
for value in values.iter() {
|
||||||
|
let run = block.clone();
|
||||||
|
let scope = scope.clone();
|
||||||
|
let head = head.clone();
|
||||||
|
let context = context.clone();
|
||||||
|
|
||||||
|
match crate::commands::each::process_row(run, scope, head, context, value.clone())
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(mut s) => {
|
||||||
|
let collection: Vec<Result<ReturnSuccess, ShellError>> =
|
||||||
|
s.drain_vec().await;
|
||||||
|
|
||||||
|
if collection.len() > 1 {
|
||||||
|
return Err(ShellError::labeled_error(
|
||||||
|
"expected one value from the block",
|
||||||
|
"requires a table with one value for grouping",
|
||||||
|
&name,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let value = match collection.get(0) {
|
||||||
|
Some(Ok(return_value)) => {
|
||||||
|
return_value.raw_value().unwrap_or_else(|| {
|
||||||
|
UntaggedValue::string(error_key).into_value(&name)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
Some(Err(_)) | None => {
|
||||||
|
UntaggedValue::string(error_key).into_value(&name)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
keys.push(as_string(&value));
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
keys.push(Ok(error_key.into()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
group_strategy = Grouper::ByBlock;
|
||||||
|
}
|
||||||
|
Some(other) => {
|
||||||
|
group_strategy = Grouper::ByColumn(Some(as_string(&other)?.tagged(&name)));
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
if values.is_empty() {
|
if values.is_empty() {
|
||||||
return Err(ShellError::labeled_error(
|
return Err(ShellError::labeled_error(
|
||||||
"Expected table from pipeline",
|
"expected table from pipeline",
|
||||||
"requires a table input",
|
"requires a table input",
|
||||||
name,
|
name,
|
||||||
));
|
));
|
||||||
|
@ -95,11 +169,27 @@ pub async fn group_by(
|
||||||
|
|
||||||
let values = UntaggedValue::table(&values).into_value(&name);
|
let values = UntaggedValue::table(&values).into_value(&name);
|
||||||
|
|
||||||
match group(&column_name, &values, name) {
|
match group_strategy {
|
||||||
|
Grouper::ByBlock => {
|
||||||
|
let map = keys.clone();
|
||||||
|
|
||||||
|
let block = Box::new(move |idx: usize, row: &Value| match map.get(idx) {
|
||||||
|
Some(Ok(key)) => Ok(key.clone()),
|
||||||
|
Some(Err(reason)) => Err(reason.clone()),
|
||||||
|
None => as_string(row),
|
||||||
|
});
|
||||||
|
|
||||||
|
match crate::utils::data::group(&values, &Some(block), &name) {
|
||||||
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
|
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
|
||||||
Err(reason) => Err(reason),
|
Err(reason) => Err(reason),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Grouper::ByColumn(column_name) => match group(&column_name, &values, name) {
|
||||||
|
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
|
||||||
|
Err(reason) => Err(reason),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn suggestions(tried: Tagged<&str>, for_value: &Value) -> ShellError {
|
pub fn suggestions(tried: Tagged<&str>, for_value: &Value) -> ShellError {
|
||||||
let possibilities = for_value.data_descriptors();
|
let possibilities = for_value.data_descriptors();
|
||||||
|
@ -141,7 +231,7 @@ pub fn group(
|
||||||
|
|
||||||
match grouper {
|
match grouper {
|
||||||
Grouper::ByColumn(Some(column_name)) => {
|
Grouper::ByColumn(Some(column_name)) => {
|
||||||
let block = Box::new(move |row: &Value| {
|
let block = Box::new(move |_, row: &Value| {
|
||||||
match row.get_data_by_key(column_name.borrow_spanned()) {
|
match row.get_data_by_key(column_name.borrow_spanned()) {
|
||||||
Some(group_key) => Ok(as_string(&group_key)?),
|
Some(group_key) => Ok(as_string(&group_key)?),
|
||||||
None => Err(suggestions(column_name.borrow_tagged(), &row)),
|
None => Err(suggestions(column_name.borrow_tagged(), &row)),
|
||||||
|
@ -151,13 +241,16 @@ pub fn group(
|
||||||
crate::utils::data::group(&values, &Some(block), &name)
|
crate::utils::data::group(&values, &Some(block), &name)
|
||||||
}
|
}
|
||||||
Grouper::ByColumn(None) => {
|
Grouper::ByColumn(None) => {
|
||||||
let block = Box::new(move |row: &Value| match as_string(row) {
|
let block = Box::new(move |_, row: &Value| match as_string(row) {
|
||||||
Ok(group_key) => Ok(group_key),
|
Ok(group_key) => Ok(group_key),
|
||||||
Err(reason) => Err(reason),
|
Err(reason) => Err(reason),
|
||||||
});
|
});
|
||||||
|
|
||||||
crate::utils::data::group(&values, &Some(block), &name)
|
crate::utils::data::group(&values, &Some(block), &name)
|
||||||
}
|
}
|
||||||
|
Grouper::ByBlock => Err(ShellError::unimplemented(
|
||||||
|
"Block not implemented: This should never happen.",
|
||||||
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -34,7 +34,7 @@ impl WholeStreamCommand for GroupByDate {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn usage(&self) -> &str {
|
fn usage(&self) -> &str {
|
||||||
"Creates a new table with the data from the table rows grouped by the column given."
|
"creates a table grouped by date."
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn run(
|
async fn run(
|
||||||
|
@ -100,7 +100,7 @@ pub async fn group_by_date(
|
||||||
|
|
||||||
match (grouper_date, grouper_column) {
|
match (grouper_date, grouper_column) {
|
||||||
(Grouper::ByDate(None), GroupByColumn::Name(None)) => {
|
(Grouper::ByDate(None), GroupByColumn::Name(None)) => {
|
||||||
let block = Box::new(move |row: &Value| row.format("%Y-%b-%d"));
|
let block = Box::new(move |_, row: &Value| row.format("%Y-%b-%d"));
|
||||||
|
|
||||||
match crate::utils::data::group(&values, &Some(block), &name) {
|
match crate::utils::data::group(&values, &Some(block), &name) {
|
||||||
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
|
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
|
||||||
|
@ -108,7 +108,7 @@ pub async fn group_by_date(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
(Grouper::ByDate(None), GroupByColumn::Name(Some(column_name))) => {
|
(Grouper::ByDate(None), GroupByColumn::Name(Some(column_name))) => {
|
||||||
let block = Box::new(move |row: &Value| {
|
let block = Box::new(move |_, row: &Value| {
|
||||||
let group_key = match row.get_data_by_key(column_name.borrow_spanned()) {
|
let group_key = match row.get_data_by_key(column_name.borrow_spanned()) {
|
||||||
Some(group_key) => Ok(group_key),
|
Some(group_key) => Ok(group_key),
|
||||||
None => Err(suggestions(column_name.borrow_tagged(), &row)),
|
None => Err(suggestions(column_name.borrow_tagged(), &row)),
|
||||||
|
@ -123,7 +123,7 @@ pub async fn group_by_date(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
(Grouper::ByDate(Some(fmt)), GroupByColumn::Name(None)) => {
|
(Grouper::ByDate(Some(fmt)), GroupByColumn::Name(None)) => {
|
||||||
let block = Box::new(move |row: &Value| row.format(&fmt));
|
let block = Box::new(move |_, row: &Value| row.format(&fmt));
|
||||||
|
|
||||||
match crate::utils::data::group(&values, &Some(block), &name) {
|
match crate::utils::data::group(&values, &Some(block), &name) {
|
||||||
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
|
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
|
||||||
|
@ -131,7 +131,7 @@ pub async fn group_by_date(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
(Grouper::ByDate(Some(fmt)), GroupByColumn::Name(Some(column_name))) => {
|
(Grouper::ByDate(Some(fmt)), GroupByColumn::Name(Some(column_name))) => {
|
||||||
let block = Box::new(move |row: &Value| {
|
let block = Box::new(move |_, row: &Value| {
|
||||||
let group_key = match row.get_data_by_key(column_name.borrow_spanned()) {
|
let group_key = match row.get_data_by_key(column_name.borrow_spanned()) {
|
||||||
Some(group_key) => Ok(group_key),
|
Some(group_key) => Ok(group_key),
|
||||||
None => Err(suggestions(column_name.borrow_tagged(), &row)),
|
None => Err(suggestions(column_name.borrow_tagged(), &row)),
|
||||||
|
|
|
@ -81,7 +81,7 @@ pub fn split(
|
||||||
|
|
||||||
match grouper {
|
match grouper {
|
||||||
Grouper::ByColumn(Some(column_name)) => {
|
Grouper::ByColumn(Some(column_name)) => {
|
||||||
let block = Box::new(move |row: &Value| {
|
let block = Box::new(move |_, row: &Value| {
|
||||||
match row.get_data_by_key(column_name.borrow_spanned()) {
|
match row.get_data_by_key(column_name.borrow_spanned()) {
|
||||||
Some(group_key) => Ok(as_string(&group_key)?),
|
Some(group_key) => Ok(as_string(&group_key)?),
|
||||||
None => Err(suggestions(column_name.borrow_tagged(), &row)),
|
None => Err(suggestions(column_name.borrow_tagged(), &row)),
|
||||||
|
@ -91,7 +91,7 @@ pub fn split(
|
||||||
crate::utils::data::split(&values, &Some(block), &name)
|
crate::utils::data::split(&values, &Some(block), &name)
|
||||||
}
|
}
|
||||||
Grouper::ByColumn(None) => {
|
Grouper::ByColumn(None) => {
|
||||||
let block = Box::new(move |row: &Value| match as_string(row) {
|
let block = Box::new(move |_, row: &Value| match as_string(row) {
|
||||||
Ok(group_key) => Ok(group_key),
|
Ok(group_key) => Ok(group_key),
|
||||||
Err(reason) => Err(reason),
|
Err(reason) => Err(reason),
|
||||||
});
|
});
|
||||||
|
|
|
@ -7,16 +7,16 @@ use nu_value_ext::as_string;
|
||||||
#[allow(clippy::type_complexity)]
|
#[allow(clippy::type_complexity)]
|
||||||
pub fn group(
|
pub fn group(
|
||||||
values: &Value,
|
values: &Value,
|
||||||
grouper: &Option<Box<dyn Fn(&Value) -> Result<String, ShellError> + Send>>,
|
grouper: &Option<Box<dyn Fn(usize, &Value) -> Result<String, ShellError> + Send>>,
|
||||||
tag: impl Into<Tag>,
|
tag: impl Into<Tag>,
|
||||||
) -> Result<Value, ShellError> {
|
) -> Result<Value, ShellError> {
|
||||||
let tag = tag.into();
|
let tag = tag.into();
|
||||||
|
|
||||||
let mut groups: IndexMap<String, Vec<Value>> = IndexMap::new();
|
let mut groups: IndexMap<String, Vec<Value>> = IndexMap::new();
|
||||||
|
|
||||||
for value in values.table_entries() {
|
for (idx, value) in values.table_entries().enumerate() {
|
||||||
let group_key = if let Some(ref grouper) = grouper {
|
let group_key = if let Some(ref grouper) = grouper {
|
||||||
grouper(&value)
|
grouper(idx, &value)
|
||||||
} else {
|
} else {
|
||||||
as_string(&value)
|
as_string(&value)
|
||||||
};
|
};
|
||||||
|
|
|
@ -7,7 +7,7 @@ use crate::utils::data::group;
|
||||||
#[allow(clippy::type_complexity)]
|
#[allow(clippy::type_complexity)]
|
||||||
pub fn split(
|
pub fn split(
|
||||||
value: &Value,
|
value: &Value,
|
||||||
splitter: &Option<Box<dyn Fn(&Value) -> Result<String, ShellError> + Send>>,
|
splitter: &Option<Box<dyn Fn(usize, &Value) -> Result<String, ShellError> + Send>>,
|
||||||
tag: impl Into<Tag>,
|
tag: impl Into<Tag>,
|
||||||
) -> Result<Value, ShellError> {
|
) -> Result<Value, ShellError> {
|
||||||
let tag = tag.into();
|
let tag = tag.into();
|
||||||
|
|
|
@ -31,8 +31,50 @@ fn groups() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn errors_if_given_unknown_column_name_is_missing() {
|
fn errors_if_given_unknown_column_name() {
|
||||||
Playground::setup("group_by_test_2", |dirs, sandbox| {
|
Playground::setup("group_by_test_2", |dirs, sandbox| {
|
||||||
|
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||||
|
"los_tres_caballeros.json",
|
||||||
|
r#"
|
||||||
|
{
|
||||||
|
"nu": {
|
||||||
|
"committers": [
|
||||||
|
{"name": "Andrés N. Robalino"},
|
||||||
|
{"name": "Jonathan Turner"},
|
||||||
|
{"name": "Yehuda Katz"}
|
||||||
|
],
|
||||||
|
"releases": [
|
||||||
|
{"version": "0.2"}
|
||||||
|
{"version": "0.8"},
|
||||||
|
{"version": "0.9999999"}
|
||||||
|
],
|
||||||
|
"0xATYKARNU": [
|
||||||
|
["Th", "e", " "],
|
||||||
|
["BIG", " ", "UnO"],
|
||||||
|
["punto", "cero"]
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"#,
|
||||||
|
)]);
|
||||||
|
|
||||||
|
let actual = nu!(
|
||||||
|
cwd: dirs.test(), pipeline(
|
||||||
|
r#"
|
||||||
|
open los_tres_caballeros.json
|
||||||
|
| group-by { get nu.releases.version }
|
||||||
|
"#
|
||||||
|
));
|
||||||
|
|
||||||
|
assert!(actual
|
||||||
|
.err
|
||||||
|
.contains("requires a table with one value for grouping"));
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn errors_if_block_given_evaluates_more_than_one_row() {
|
||||||
|
Playground::setup("group_by_test_3", |dirs, sandbox| {
|
||||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||||
"los_tres_caballeros.csv",
|
"los_tres_caballeros.csv",
|
||||||
r#"
|
r#"
|
||||||
|
|
Loading…
Reference in a new issue