diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index 014ff2ff95..c35462538c 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -164,6 +164,7 @@ pub fn create_default_context() -> EngineState { ToTsv, ToCsv, Touch, + Uniq, Use, Update, Where, diff --git a/crates/nu-command/src/filters/mod.rs b/crates/nu-command/src/filters/mod.rs index 718a006bf0..7f99013c21 100644 --- a/crates/nu-command/src/filters/mod.rs +++ b/crates/nu-command/src/filters/mod.rs @@ -17,6 +17,7 @@ mod reverse; mod select; mod shuffle; mod skip; +mod uniq; mod update; mod where_; mod wrap; @@ -41,6 +42,7 @@ pub use reverse::Reverse; pub use select::Select; pub use shuffle::Shuffle; pub use skip::*; +pub use uniq::*; pub use update::Update; pub use where_::Where; pub use wrap::Wrap; diff --git a/crates/nu-command/src/filters/uniq.rs b/crates/nu-command/src/filters/uniq.rs new file mode 100644 index 0000000000..53a60229f7 --- /dev/null +++ b/crates/nu-command/src/filters/uniq.rs @@ -0,0 +1,206 @@ +use std::collections::VecDeque; + +use nu_protocol::ast::Call; +use nu_protocol::engine::{Command, EngineState, Stack}; +use nu_protocol::{ + Category, Example, IntoPipelineData, PipelineData, ShellError, Signature, Span, Value, +}; + +#[derive(Clone)] +pub struct Uniq; + +impl Command for Uniq { + fn name(&self) -> &str { + "uniq" + } + + fn signature(&self) -> Signature { + Signature::build("uniq") + .switch("count", "Count the unique rows", Some('c')) + .switch( + "repeated", + "Count the rows that has more than one value", + Some('d'), + ) + .switch( + "ignore-case", + "Ignore differences in case when comparing", + Some('i'), + ) + .switch("unique", "Only return unique values", Some('u')) + .category(Category::Filters) + } + + fn usage(&self) -> &str { + "Return the unique rows." + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + uniq(engine_state, stack, call, input) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Remove duplicate rows of a list/table", + example: "[2 3 3 4] | uniq", + result: Some(Value::List { + vals: vec![Value::test_int(2), Value::test_int(3), Value::test_int(4)], + span: Span::unknown(), + }), + }, + Example { + description: "Only print duplicate lines, one for each group", + example: "[1 2 2] | uniq -d", + result: Some(Value::test_int(2)), + }, + Example { + description: "Only print unique lines lines", + example: "[1 2 2] | uniq -u", + result: Some(Value::test_int(1)), + }, + Example { + description: "Ignore differences in case when comparing", + example: "['hello' 'goodbye' 'Hello'] | uniq -i", + result: Some(Value::List { + vals: vec![Value::test_string("hello"), Value::test_string("goodbye")], + span: Span::unknown(), + }), + }, + Example { + description: "Remove duplicate rows and show counts of a list/table", + example: "[1 2 2] | uniq -c", + result: Some(Value::List { + vals: vec![ + Value::Record { + cols: vec!["value".to_string(), "count".to_string()], + vals: vec![Value::test_int(1), Value::test_int(1)], + span: Span::unknown(), + }, + Value::Record { + cols: vec!["value".to_string(), "count".to_string()], + vals: vec![Value::test_int(2), Value::test_int(2)], + span: Span::unknown(), + }, + ], + span: Span::unknown(), + }), + }, + ] + } +} + +fn to_lowercase(value: nu_protocol::Value) -> nu_protocol::Value { + match value { + Value::String { val: s, span } => Value::String { + val: s.to_lowercase(), + span, + }, + other => other, + } +} + +fn uniq( + _engine_state: &EngineState, + _stack: &mut Stack, + call: &Call, + input: PipelineData, +) -> Result { + let head = call.head; + let should_show_count = call.has_flag("count"); + let show_repeated = call.has_flag("repeated"); + let ignore_case = call.has_flag("ignore-case"); + let only_uniques = call.has_flag("unique"); + + let uniq_values = { + let counter = &mut Vec::new(); + for line in input.into_iter() { + let item = if ignore_case { + to_lowercase(line) + } else { + line + }; + + if counter.is_empty() { + counter.push((item, 1)); + } else { + // check if the value item already exists in our collection. if it does, increase counter, otherwise add it to the collection + match counter.iter_mut().find(|x| x.0 == item) { + Some(x) => x.1 += 1, + None => counter.push((item, 1)), + } + } + } + counter.to_vec() + }; + + let uv = uniq_values.to_vec(); + let mut values = if show_repeated { + uv.into_iter().filter(|i| i.1 > 1).collect() + } else { + uv + }; + + if only_uniques { + values = values.into_iter().filter(|i| i.1 == 1).collect::<_>() + } + + let mut values_vec_deque = VecDeque::new(); + + if should_show_count { + for item in values { + values_vec_deque.push_back({ + let cols = vec!["value".to_string(), "count".to_string()]; + let vals = vec![ + item.0, + Value::Int { + val: item.1, + span: head, + }, + ]; + Value::Record { + cols, + vals, + span: head, + } + }); + } + } else { + for item in values { + values_vec_deque.push_back(item.0); + } + } + + // keeps the original Nushell semantics + if values_vec_deque.len() == 1 { + if let Some(x) = values_vec_deque.pop_front() { + Ok(x.into_pipeline_data()) + } else { + Err(ShellError::NushellFailed("No input given...".to_string())) + } + } else { + Ok(Value::List { + vals: values_vec_deque.into_iter().collect(), + span: head, + } + .into_pipeline_data()) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_examples() { + use crate::test_examples; + + test_examples(Uniq {}) + } +}