From def5869c1c67aa88e28378661f6cc017a3387fb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20N=2E=20Robalino?= Date: Sun, 30 Jan 2022 18:29:21 -0500 Subject: [PATCH] command(split-by) (#897) --- crates/nu-command/src/default_context.rs | 1 + crates/nu-command/src/filters/mod.rs | 2 + crates/nu-command/src/filters/split_by.rs | 270 ++++++++++++++++++++++ 3 files changed, 273 insertions(+) create mode 100644 crates/nu-command/src/filters/split_by.rs diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index a20f13b864..184fe2d94c 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -71,6 +71,7 @@ pub fn create_default_context(cwd: impl AsRef) -> EngineState { Flatten, Get, GroupBy, + SplitBy, Keep, Merge, Move, diff --git a/crates/nu-command/src/filters/mod.rs b/crates/nu-command/src/filters/mod.rs index 31dd2fa1c9..7f5e14cb9d 100644 --- a/crates/nu-command/src/filters/mod.rs +++ b/crates/nu-command/src/filters/mod.rs @@ -33,6 +33,7 @@ mod select; mod shuffle; mod skip; mod sort_by; +mod split_by; mod transpose; mod uniq; mod update; @@ -76,6 +77,7 @@ pub use select::Select; pub use shuffle::Shuffle; pub use skip::*; pub use sort_by::SortBy; +pub use split_by::SplitBy; pub use transpose::Transpose; pub use uniq::*; pub use update::Update; diff --git a/crates/nu-command/src/filters/split_by.rs b/crates/nu-command/src/filters/split_by.rs new file mode 100644 index 0000000000..e559165e2e --- /dev/null +++ b/crates/nu-command/src/filters/split_by.rs @@ -0,0 +1,270 @@ +use nu_engine::CallExt; +use nu_protocol::ast::Call; +use nu_protocol::engine::{Command, EngineState, Stack}; +use nu_protocol::{ + Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value, +}; + +#[derive(Clone)] +pub struct SplitBy; + +impl Command for SplitBy { + fn name(&self) -> &str { + "split-by" + } + + fn signature(&self) -> Signature { + Signature::build("split-by").optional( + "splitter", + SyntaxShape::Any, + "the splitter value to use", + ) + } + + fn usage(&self) -> &str { + "Create a new table splitted." + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + split_by(engine_state, stack, call, input) + } + + #[allow(clippy::unwrap_used)] + fn examples(&self) -> Vec { + vec![Example { + description: "split items by column named \"lang\"", + example: r#" + { + '2019': [ + { name: 'andres', lang: 'rb', year: '2019' }, + { name: 'jt', lang: 'rs', year: '2019' } + ], + '2021': [ + { name: 'storm', lang: 'rs', 'year': '2021' } + ] + } | split-by lang + "#, + result: Some(Value::Record { + cols: vec!["rb".to_string(), "rs".to_string()], + vals: vec![ + Value::Record { + cols: vec!["2019".to_string()], + vals: vec![Value::List { + vals: vec![Value::Record { + cols: vec![ + "name".to_string(), + "lang".to_string(), + "year".to_string(), + ], + vals: vec![ + Value::test_string("andres"), + Value::test_string("rb"), + Value::test_string("2019"), + ], + span: Span::test_data(), + }], + span: Span::test_data(), + }], + span: Span::test_data(), + }, + Value::Record { + cols: vec!["2019".to_string(), "2021".to_string()], + vals: vec![ + Value::List { + vals: vec![Value::Record { + cols: vec![ + "name".to_string(), + "lang".to_string(), + "year".to_string(), + ], + vals: vec![ + Value::test_string("jt"), + Value::test_string("rs"), + Value::test_string("2019"), + ], + span: Span::test_data(), + }], + span: Span::test_data(), + }, + Value::List { + vals: vec![Value::Record { + cols: vec![ + "name".to_string(), + "lang".to_string(), + "year".to_string(), + ], + vals: vec![ + Value::test_string("storm"), + Value::test_string("rs"), + Value::test_string("2021"), + ], + span: Span::test_data(), + }], + span: Span::test_data(), + }, + ], + span: Span::test_data(), + }, + ], + span: Span::test_data(), + }), + }] + } +} + +enum Grouper { + ByColumn(Option>), +} + +pub fn split_by( + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, +) -> Result { + let name = call.head; + + let splitter: Option = call.opt(engine_state, stack, 0)?; + + match splitter { + Some(v) => { + let splitter = Some(Spanned { + item: v.as_string()?, + span: name, + }); + Ok(split(&splitter, input, name)?) + } + None => Err(ShellError::SpannedLabeledError( + "expected name".into(), + "requires a column name for splitting".into(), + name, + )), + } +} + +pub fn split( + column_name: &Option>, + values: PipelineData, + span: Span, +) -> Result { + let grouper = if let Some(column_name) = column_name { + Grouper::ByColumn(Some(column_name.clone())) + } else { + Grouper::ByColumn(None) + }; + + match grouper { + Grouper::ByColumn(Some(column_name)) => { + let block = + Box::new( + move |_, row: &Value| match row.get_data_by_key(&column_name.item) { + Some(group_key) => Ok(group_key.as_string()?), + None => Err(ShellError::CantFindColumn( + column_name.span, + row.span().unwrap_or(column_name.span), + )), + }, + ); + + data_split(values, &Some(block), span) + } + Grouper::ByColumn(None) => { + let block = Box::new(move |_, row: &Value| row.as_string()); + + data_split(values, &Some(block), span) + } + } +} + +#[allow(clippy::type_complexity)] +pub fn data_split( + value: PipelineData, + splitter: &Option Result + Send>>, + span: Span, +) -> Result { + let mut splits = indexmap::IndexMap::new(); + + let mut cols = vec![]; + let mut vals = vec![]; + + match value { + PipelineData::Value( + Value::Record { + cols, + vals: grouped_rows, + span, + }, + _, + ) => { + for (idx, list) in grouped_rows.iter().enumerate() { + match super::group_by::data_group(list, splitter, span) { + Ok(grouped) => { + if let Value::Record { + vals: li, + cols: sub_cols, + .. + } = grouped + { + for (inner_idx, subset) in li.iter().enumerate() { + let s = splits + .entry(sub_cols[inner_idx].clone()) + .or_insert(indexmap::IndexMap::new()); + + s.insert(cols[idx].clone(), subset.clone()); + } + } + } + Err(reason) => return Err(reason), + } + } + } + _ => { + return Err(ShellError::SpannedLabeledError( + "unsupported input".into(), + "requires a table with one row for splitting".into(), + span, + )) + } + } + + for (k, rows) in splits { + cols.push(k.to_string()); + + let mut sub_cols = vec![]; + let mut sub_vals = vec![]; + + for (k, v) in rows { + sub_cols.push(k); + sub_vals.push(v); + } + + vals.push(Value::Record { + cols: sub_cols, + vals: sub_vals, + span, + }); + } + + Ok(PipelineData::Value( + Value::Record { cols, vals, span }, + None, + )) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_examples() { + use crate::test_examples; + + test_examples(SplitBy {}) + } +}