diff --git a/crates/nu-command/src/commands/dataframe/append.rs b/crates/nu-command/src/commands/dataframe/append.rs new file mode 100644 index 0000000000..a2f23ba4e0 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/append.rs @@ -0,0 +1,138 @@ +use crate::prelude::*; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::{Axis, Column, NuDataFrame}, + Signature, SyntaxShape, UntaggedValue, Value, +}; +use nu_source::Tagged; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe append" + } + + fn usage(&self) -> &str { + "[DataFrame] Appends a new dataframe" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe append") + .required_named( + "other", + SyntaxShape::Any, + "dataframe to be appended", + Some('o'), + ) + .required_named( + "axis", + SyntaxShape::String, + "row or col axis orientation", + Some('a'), + ) + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Appends a dataframe as new columns", + example: r#"let a = ([[a b]; [1 2] [3 4]] | dataframe to-df); + $a | dataframe append -o $a -a row"#, + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()], + ), + Column::new( + "b".to_string(), + vec![UntaggedValue::int(2).into(), UntaggedValue::int(4).into()], + ), + Column::new( + "a_x".to_string(), + vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()], + ), + Column::new( + "b_x".to_string(), + vec![UntaggedValue::int(2).into(), UntaggedValue::int(4).into()], + ), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }, + Example { + description: "Appends a dataframe merging at the end of columns", + example: r#"let a = ([[a b]; [1 2] [3 4]] | dataframe to-df); + $a | dataframe append -o $a -a col"#, + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![ + UntaggedValue::int(1).into(), + UntaggedValue::int(3).into(), + UntaggedValue::int(1).into(), + UntaggedValue::int(3).into(), + ], + ), + Column::new( + "b".to_string(), + vec![ + UntaggedValue::int(2).into(), + UntaggedValue::int(4).into(), + UntaggedValue::int(2).into(), + UntaggedValue::int(4).into(), + ], + ), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }, + ] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + let other: Value = args.req_named("other")?; + let axis: Tagged = args.req_named("axis")?; + + let axis = Axis::try_from_str(axis.item.as_str(), &axis.tag.span)?; + + let df_other = match other.value { + UntaggedValue::DataFrame(df) => Ok(df), + _ => Err(ShellError::labeled_error( + "Incorrect type", + "can only append a dataframe to a dataframe", + other.tag.span, + )), + }?; + + let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + + let df_new = df.append_df(&df_other, axis, &tag.span)?; + Ok(OutputStream::one(df_new.into_value(tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/mod.rs b/crates/nu-command/src/commands/dataframe/mod.rs index dcb9fcf3bd..e6df70e419 100644 --- a/crates/nu-command/src/commands/dataframe/mod.rs +++ b/crates/nu-command/src/commands/dataframe/mod.rs @@ -1,4 +1,5 @@ pub mod aggregate; +pub mod append; pub mod column; pub mod command; pub mod drop; @@ -31,6 +32,7 @@ pub mod where_; pub mod with_column; pub use aggregate::DataFrame as DataFrameAggregate; +pub use append::DataFrame as DataFrameAppend; pub use column::DataFrame as DataFrameColumn; pub use command::Command as DataFrame; pub use drop::DataFrame as DataFrameDrop; diff --git a/crates/nu-command/src/commands/mod.rs b/crates/nu-command/src/commands/mod.rs index 00ee4a16b1..210c733611 100644 --- a/crates/nu-command/src/commands/mod.rs +++ b/crates/nu-command/src/commands/mod.rs @@ -25,9 +25,9 @@ pub use conversions::*; pub use core_commands::*; #[cfg(feature = "dataframe")] pub use dataframe::{ - DataFrame, DataFrameAggregate, DataFrameAllFalse, DataFrameAllTrue, DataFrameArgMax, - DataFrameArgMin, DataFrameArgSort, DataFrameArgTrue, DataFrameArgUnique, DataFrameColumn, - DataFrameConcatenate, DataFrameContains, DataFrameDTypes, DataFrameDrop, + DataFrame, DataFrameAggregate, DataFrameAllFalse, DataFrameAllTrue, DataFrameAppend, + DataFrameArgMax, DataFrameArgMin, DataFrameArgSort, DataFrameArgTrue, DataFrameArgUnique, + DataFrameColumn, DataFrameConcatenate, DataFrameContains, DataFrameDTypes, DataFrameDrop, DataFrameDropDuplicates, DataFrameDropNulls, DataFrameDummies, DataFrameFilter, DataFrameFirst, DataFrameGet, DataFrameGroupBy, DataFrameIsDuplicated, DataFrameIsIn, DataFrameIsNotNull, DataFrameIsNull, DataFrameIsUnique, DataFrameJoin, DataFrameLast, DataFrameList, DataFrameMelt, diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index 61917cf2cf..e07fd4daef 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -328,6 +328,7 @@ pub fn create_default_context(interactive: bool) -> Result Result { match operator { - Operator::Plus => { - let mut columns: Vec<&str> = Vec::new(); - - let new = lhs - .as_ref() - .get_columns() - .iter() - .chain(rhs.as_ref().get_columns().iter()) - .map(|s| { - let name = if columns.contains(&s.name()) { - format!("{}_{}", s.name(), "x") - } else { - columns.push(s.name()); - s.name().to_string() - }; - - let mut series = s.clone(); - series.rename(name.as_str()); - series - }) - .collect::>(); - - match DataFrame::new(new) { - Ok(df) => Ok(NuDataFrame::dataframe_to_untagged(df)), - Err(e) => Ok(UntaggedValue::Error(ShellError::labeled_error( - "Appending error", - format!("{}", e), - operation_span, - ))), - } - } + Operator::Plus => match lhs.append_df(rhs, Axis::Row, operation_span) { + Ok(df) => Ok(df.into_untagged()), + Err(e) => Ok(UntaggedValue::Error(ShellError::labeled_error( + "Appending error", + format!("{}", e), + operation_span, + ))), + }, _ => Ok(UntaggedValue::Error(ShellError::labeled_error( "Incorrect datatype", "unable to use this datatype for this operation", diff --git a/crates/nu-protocol/src/dataframe/mod.rs b/crates/nu-protocol/src/dataframe/mod.rs index 985c11c93f..cd409d4491 100644 --- a/crates/nu-protocol/src/dataframe/mod.rs +++ b/crates/nu-protocol/src/dataframe/mod.rs @@ -1,8 +1,10 @@ pub mod nu_dataframe; pub mod nu_groupby; +pub mod operations; pub use nu_dataframe::{Column, NuDataFrame}; pub use nu_groupby::NuGroupBy; +pub use operations::Axis; use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)] diff --git a/crates/nu-protocol/src/dataframe/operations.rs b/crates/nu-protocol/src/dataframe/operations.rs new file mode 100644 index 0000000000..9140d9474d --- /dev/null +++ b/crates/nu-protocol/src/dataframe/operations.rs @@ -0,0 +1,120 @@ +use nu_errors::ShellError; +use nu_source::Span; +use polars::prelude::{DataFrame, Series}; + +use super::NuDataFrame; + +pub enum Axis { + Row, + Column, +} + +impl Axis { + pub fn try_from_str(axis: &str, span: &Span) -> Result { + match axis { + "row" => Ok(Axis::Row), + "col" => Ok(Axis::Column), + _ => Err(ShellError::labeled_error_with_secondary( + "Wrong axis", + "The selected axis does not exist", + span, + "The only axis options are 'row' or 'col'", + span, + )), + } + } +} + +impl NuDataFrame { + pub fn append_df( + &self, + other: &NuDataFrame, + axis: Axis, + span: &Span, + ) -> Result { + match axis { + Axis::Row => { + let mut columns: Vec<&str> = Vec::new(); + + let new_cols = self + .as_ref() + .get_columns() + .iter() + .chain(other.as_ref().get_columns().iter()) + .map(|s| { + let name = if columns.contains(&s.name()) { + format!("{}_{}", s.name(), "x") + } else { + columns.push(s.name()); + s.name().to_string() + }; + + let mut series = s.clone(); + series.rename(name.as_str()); + series + }) + .collect::>(); + + let df_new = DataFrame::new(new_cols).map_err(|e| { + ShellError::labeled_error("Appending error", format!("{}", e), span) + })?; + + Ok(NuDataFrame::new(df_new)) + } + Axis::Column => { + if self.as_ref().width() != other.as_ref().width() { + return Err(ShellError::labeled_error( + "Appending error", + "Dataframes with different number of columns", + span, + )); + } + + if !self + .as_ref() + .get_column_names() + .iter() + .all(|col| other.as_ref().get_column_names().contains(col)) + { + return Err(ShellError::labeled_error( + "Appending error", + "Dataframes with different columns names", + span, + )); + } + + let new_cols = self + .as_ref() + .get_columns() + .iter() + .map(|s| { + let other_col = other + .as_ref() + .column(s.name()) + .expect("Already checked that dataframes have same columns"); + + let mut tmp = s.clone(); + let res = tmp.append(other_col); + + match res { + Ok(s) => Ok(s.clone()), + Err(e) => Err({ + ShellError::labeled_error( + "Appending error", + format!("Unable to append dataframes: {}", e), + span, + ) + }), + } + }) + .collect::, ShellError>>()?; + + let df_new = DataFrame::new(new_cols).map_err(|e| { + ShellError::labeled_error("Appending error", format!("{}", e), span) + })?; + + Ok(NuDataFrame::new(df_new)) + } + } + } +}