From ea6912c3f7f59ebe404a35bbede12abf3bf64a61 Mon Sep 17 00:00:00 2001 From: Fernando Herrera Date: Tue, 21 Dec 2021 23:35:02 +0000 Subject: [PATCH] missing commands (#549) --- .../src/dataframe/eager/drop_duplicates.rs | 106 ++++++++++++++++++ crates/nu-command/src/dataframe/eager/mod.rs | 2 + 2 files changed, 108 insertions(+) create mode 100644 crates/nu-command/src/dataframe/eager/drop_duplicates.rs diff --git a/crates/nu-command/src/dataframe/eager/drop_duplicates.rs b/crates/nu-command/src/dataframe/eager/drop_duplicates.rs new file mode 100644 index 0000000000..4d61a95e11 --- /dev/null +++ b/crates/nu-command/src/dataframe/eager/drop_duplicates.rs @@ -0,0 +1,106 @@ +use nu_engine::CallExt; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, +}; + +use super::super::values::utils::convert_columns_string; +use super::super::values::{Column, NuDataFrame}; + +#[derive(Clone)] +pub struct DropDuplicates; + +impl Command for DropDuplicates { + fn name(&self) -> &str { + "dfr drop-duplicates" + } + + fn usage(&self) -> &str { + "Drops duplicate values in dataframe" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .optional( + "subset", + SyntaxShape::Table, + "subset of columns to drop duplicates", + ) + .switch("maintain", "maintain order", Some('m')) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "drop duplicates", + example: "[[a b]; [1 2] [3 4] [1 2]] | dfr to-df | dfr drop-duplicates", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + command(engine_state, stack, call, input) + } +} + +fn command( + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, +) -> Result { + let columns: Option> = call.opt(engine_state, stack, 0)?; + let (subset, col_span) = match columns { + Some(cols) => { + let (agg_string, col_span) = convert_columns_string(cols, call.head)?; + (Some(agg_string), col_span) + } + None => (None, call.head), + }; + + let df = NuDataFrame::try_from_pipeline(input, call.head)?; + + let subset_slice = subset.as_ref().map(|cols| &cols[..]); + + df.as_ref() + .drop_duplicates(call.has_flag("maintain"), subset_slice) + .map_err(|e| { + ShellError::SpannedLabeledError( + "Error dropping duplicates".into(), + e.to_string(), + col_span, + ) + }) + .map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None)) +} + +#[cfg(test)] +mod test { + use super::super::super::test_dataframe::test_dataframe; + use super::*; + + #[test] + fn test_examples() { + test_dataframe(vec![Box::new(DropDuplicates {})]) + } +} diff --git a/crates/nu-command/src/dataframe/eager/mod.rs b/crates/nu-command/src/dataframe/eager/mod.rs index 493779916b..1177bd2059 100644 --- a/crates/nu-command/src/dataframe/eager/mod.rs +++ b/crates/nu-command/src/dataframe/eager/mod.rs @@ -4,6 +4,7 @@ mod column; mod command; mod describe; mod drop; +mod drop_duplicates; mod drop_nulls; mod dtypes; mod dummies; @@ -36,6 +37,7 @@ pub use column::ColumnDF; pub use command::Dataframe; pub use describe::DescribeDF; pub use drop::DropDF; +pub use drop_duplicates::DropDuplicates; pub use drop_nulls::DropNulls; pub use dtypes::DataTypes; pub use dummies::Dummies;