diff --git a/crates/nu-command/src/dataframe/mod.rs b/crates/nu-command/src/dataframe/mod.rs index a4f39e6726..f77f2eb31a 100644 --- a/crates/nu-command/src/dataframe/mod.rs +++ b/crates/nu-command/src/dataframe/mod.rs @@ -1,10 +1,3 @@ -mod describe; -mod dtypes; -mod objects; -mod open; -mod to_df; +mod nu_dataframe; -pub use describe::DescribeDF; -pub use dtypes::DataTypes; -pub use open::OpenDataFrame; -pub use to_df::ToDataFrame; +pub use nu_dataframe::commands::{DataTypes, DescribeDF, OpenDataFrame, ToDataFrame}; diff --git a/crates/nu-command/src/dataframe/objects/nu_dataframe/between_values.rs b/crates/nu-command/src/dataframe/nu_dataframe/between_values.rs similarity index 99% rename from crates/nu-command/src/dataframe/objects/nu_dataframe/between_values.rs rename to crates/nu-command/src/dataframe/nu_dataframe/between_values.rs index fe8a5ed4b6..528777bd2c 100644 --- a/crates/nu-command/src/dataframe/objects/nu_dataframe/between_values.rs +++ b/crates/nu-command/src/dataframe/nu_dataframe/between_values.rs @@ -18,7 +18,7 @@ pub fn between_dataframes( let operation_span = span(&[left.span()?, right.span()?]); match operator.item { Operator::Plus => match lhs.append_df(rhs, Axis::Row, operation_span) { - Ok(df) => Ok(df.to_value(operation_span)), + Ok(df) => Ok(df.into_value(operation_span)), Err(e) => Err(e), }, _ => Err(ShellError::OperatorMismatch { diff --git a/crates/nu-command/src/dataframe/describe.rs b/crates/nu-command/src/dataframe/nu_dataframe/commands/describe.rs similarity index 67% rename from crates/nu-command/src/dataframe/describe.rs rename to crates/nu-command/src/dataframe/nu_dataframe/commands/describe.rs index dfa5bc7b39..1a0a06af18 100644 --- a/crates/nu-command/src/dataframe/describe.rs +++ b/crates/nu-command/src/dataframe/nu_dataframe/commands/describe.rs @@ -1,9 +1,11 @@ -use super::objects::nu_dataframe::NuDataFrame; +use crate::dataframe::nu_dataframe::Column; + +use super::super::NuDataFrame; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, + Category, Example, PipelineData, ShellError, Signature, Span, }; use polars::{ chunked_array::ChunkedArray, @@ -31,8 +33,58 @@ impl Command for DescribeDF { fn examples(&self) -> Vec { vec![Example { description: "dataframe description", - example: "[[a b]; [1 1] [1 1]] | to-df | describe", - result: None, + example: "[[a b]; [1 1] [1 1]] | to df | describe", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "descriptor".to_string(), + vec![ + "count".to_string().into(), + "sum".to_string().into(), + "mean".to_string().into(), + "median".to_string().into(), + "std".to_string().into(), + "min".to_string().into(), + "25%".to_string().into(), + "50%".to_string().into(), + "75%".to_string().into(), + "max".to_string().into(), + ], + ), + Column::new( + "a (i64)".to_string(), + vec![ + 2.0.into(), + 2.0.into(), + 1.0.into(), + 1.0.into(), + 0.0.into(), + 1.0.into(), + 1.0.into(), + 1.0.into(), + 1.0.into(), + 1.0.into(), + ], + ), + Column::new( + "b (i64)".to_string(), + vec![ + 2.0.into(), + 2.0.into(), + 1.0.into(), + 1.0.into(), + 0.0.into(), + 1.0.into(), + 1.0.into(), + 1.0.into(), + 1.0.into(), + 1.0.into(), + ], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::unknown()), + ), }] } @@ -181,3 +233,14 @@ fn command( df, call.head, ))) } + +#[cfg(test)] +mod test { + use super::super::test_dataframe::test_dataframe; + use super::*; + + #[test] + fn test_examples() { + test_dataframe(DescribeDF {}) + } +} diff --git a/crates/nu-command/src/dataframe/dtypes.rs b/crates/nu-command/src/dataframe/nu_dataframe/commands/dtypes.rs similarity index 65% rename from crates/nu-command/src/dataframe/dtypes.rs rename to crates/nu-command/src/dataframe/nu_dataframe/commands/dtypes.rs index ae0b35ded3..db6e7b50f4 100644 --- a/crates/nu-command/src/dataframe/dtypes.rs +++ b/crates/nu-command/src/dataframe/nu_dataframe/commands/dtypes.rs @@ -1,8 +1,8 @@ -use super::objects::nu_dataframe::{Column, NuDataFrame}; +use super::super::{Column, NuDataFrame}; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, Value, + Category, Example, PipelineData, ShellError, Signature, Span, Value, }; #[derive(Clone)] @@ -24,8 +24,21 @@ impl Command for DataTypes { fn examples(&self) -> Vec { vec![Example { description: "drop column a", - example: "[[a b]; [1 2] [3 4]] | to-df | dtypes", - result: None, + example: "[[a b]; [1 2] [3 4]] | to df | dtypes", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "column".to_string(), + vec!["a".to_string().into(), "b".to_string().into()], + ), + Column::new( + "dtype".to_string(), + vec!["i64".to_string().into(), "i64".to_string().into()], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::unknown()), + ), }] } @@ -78,5 +91,16 @@ fn command( let dtypes_col = Column::new("dtype".to_string(), dtypes); let df = NuDataFrame::try_from_columns(vec![names_col, dtypes_col])?; - Ok(PipelineData::Value(df.to_value(call.head))) + Ok(PipelineData::Value(df.into_value(call.head))) +} + +#[cfg(test)] +mod test { + use super::super::test_dataframe::test_dataframe; + use super::*; + + #[test] + fn test_examples() { + test_dataframe(DataTypes {}) + } } diff --git a/crates/nu-command/src/dataframe/nu_dataframe/commands/mod.rs b/crates/nu-command/src/dataframe/nu_dataframe/commands/mod.rs new file mode 100644 index 0000000000..c08d34988a --- /dev/null +++ b/crates/nu-command/src/dataframe/nu_dataframe/commands/mod.rs @@ -0,0 +1,12 @@ +mod describe; +mod dtypes; +mod open; +mod to_df; + +pub use describe::DescribeDF; +pub use dtypes::DataTypes; +pub use open::OpenDataFrame; +pub use to_df::ToDataFrame; + +#[cfg(test)] +mod test_dataframe; diff --git a/crates/nu-command/src/dataframe/open.rs b/crates/nu-command/src/dataframe/nu_dataframe/commands/open.rs similarity index 99% rename from crates/nu-command/src/dataframe/open.rs rename to crates/nu-command/src/dataframe/nu_dataframe/commands/open.rs index af8a8d4819..d309676426 100644 --- a/crates/nu-command/src/dataframe/open.rs +++ b/crates/nu-command/src/dataframe/nu_dataframe/commands/open.rs @@ -1,12 +1,11 @@ -use std::{fs::File, path::PathBuf}; - -use super::objects::nu_dataframe::NuDataFrame; +use super::super::NuDataFrame; use nu_engine::CallExt; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape, }; +use std::{fs::File, path::PathBuf}; use polars::prelude::{CsvEncoding, CsvReader, JsonReader, ParquetReader, SerReader}; diff --git a/crates/nu-command/src/dataframe/nu_dataframe/commands/test_dataframe.rs b/crates/nu-command/src/dataframe/nu_dataframe/commands/test_dataframe.rs new file mode 100644 index 0000000000..653f3936f9 --- /dev/null +++ b/crates/nu-command/src/dataframe/nu_dataframe/commands/test_dataframe.rs @@ -0,0 +1,87 @@ +use nu_engine::eval_block; +use nu_parser::parse; +use nu_protocol::{ + engine::{Command, EngineState, Stack, StateWorkingSet}, + PipelineData, Span, Value, CONFIG_VARIABLE_ID, +}; + +use super::ToDataFrame; + +pub fn test_dataframe(cmd: impl Command + 'static) { + let examples = cmd.examples(); + let mut engine_state = Box::new(EngineState::new()); + + let delta = { + // Base functions that are needed for testing + // Try to keep this working set small to keep tests running as fast as possible + let mut working_set = StateWorkingSet::new(&*engine_state); + working_set.add_decl(Box::new(ToDataFrame)); + + // Adding the command that is being tested to the working set + working_set.add_decl(Box::new(cmd)); + + working_set.render() + }; + + let _ = engine_state.merge_delta(delta); + + for example in examples { + // Skip tests that don't have results to compare to + if example.result.is_none() { + continue; + } + let start = std::time::Instant::now(); + + let (block, delta) = { + let mut working_set = StateWorkingSet::new(&*engine_state); + let (output, err) = parse(&mut working_set, None, example.example.as_bytes(), false); + + if let Some(err) = err { + panic!("test parse error in `{}`: {:?}", example.example, err) + } + + (output, working_set.render()) + }; + + let _ = engine_state.merge_delta(delta); + + let mut stack = Stack::new(); + + // Set up our initial config to start from + stack.vars.insert( + CONFIG_VARIABLE_ID, + Value::Record { + cols: vec![], + vals: vec![], + span: Span::unknown(), + }, + ); + + match eval_block( + &engine_state, + &mut stack, + &block, + PipelineData::new(Span::unknown()), + ) { + Err(err) => panic!("test eval error in `{}`: {:?}", example.example, err), + Ok(result) => { + let result = result.into_value(Span::unknown()); + println!("input: {}", example.example); + println!("result: {:?}", result); + println!("done: {:?}", start.elapsed()); + + // Note. Value implements PartialEq for Bool, Int, Float, String and Block + // If the command you are testing requires to compare another case, then + // you need to define its equality in the Value struct + if let Some(expected) = example.result { + if result != expected { + panic!( + "the example result is different to expected value: {:?} != {:?}", + result, expected + ) + } + } + } + } + } +} diff --git a/crates/nu-command/src/dataframe/nu_dataframe/commands/to_df.rs b/crates/nu-command/src/dataframe/nu_dataframe/commands/to_df.rs new file mode 100644 index 0000000000..6ce8845b82 --- /dev/null +++ b/crates/nu-command/src/dataframe/nu_dataframe/commands/to_df.rs @@ -0,0 +1,111 @@ +use super::super::{Column, NuDataFrame}; + +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, Span, +}; + +#[derive(Clone)] +pub struct ToDataFrame; + +impl Command for ToDataFrame { + fn name(&self) -> &str { + "to df" + } + + fn usage(&self) -> &str { + "Converts a List, Table or Dictionary into a dataframe" + } + + fn signature(&self) -> Signature { + Signature::build(self.name().to_string()).category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Takes a dictionary and creates a dataframe", + example: "[[a b];[1 2] [3 4]] | to df", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("a".to_string(), vec![1.into(), 3.into()]), + Column::new("b".to_string(), vec![2.into(), 4.into()]), + ]) + .expect("simple df for test should not fail") + .into_value(Span::unknown()), + ), + }, + Example { + description: "Takes a list of tables and creates a dataframe", + example: "[[1 2 a] [3 4 b] [5 6 c]] | to df", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("0".to_string(), vec![1.into(), 3.into(), 5.into()]), + Column::new("1".to_string(), vec![2.into(), 4.into(), 6.into()]), + Column::new( + "2".to_string(), + vec![ + "a".to_string().into(), + "b".to_string().into(), + "c".to_string().into(), + ], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::unknown()), + ), + }, + Example { + description: "Takes a list and creates a dataframe", + example: "[a b c] | to df", + result: Some( + NuDataFrame::try_from_columns(vec![Column::new( + "0".to_string(), + vec![ + "a".to_string().into(), + "b".to_string().into(), + "c".to_string().into(), + ], + )]) + .expect("simple df for test should not fail") + .into_value(Span::unknown()), + ), + }, + Example { + description: "Takes a list of booleans and creates a dataframe", + example: "[$true $true $false] | to df", + result: Some( + NuDataFrame::try_from_columns(vec![Column::new( + "0".to_string(), + vec![true.into(), true.into(), false.into()], + )]) + .expect("simple df for test should not fail") + .into_value(Span::unknown()), + ), + }, + ] + } + + fn run( + &self, + _engine_state: &EngineState, + _stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let df = NuDataFrame::try_from_iter(input.into_iter())?; + Ok(PipelineData::Value(NuDataFrame::into_value(df, call.head))) + } +} + +#[cfg(test)] +mod test { + use super::super::test_dataframe::test_dataframe; + use super::*; + + #[test] + fn test_examples() { + test_dataframe(ToDataFrame {}) + } +} diff --git a/crates/nu-command/src/dataframe/objects/nu_dataframe/conversion.rs b/crates/nu-command/src/dataframe/nu_dataframe/conversion.rs similarity index 99% rename from crates/nu-command/src/dataframe/objects/nu_dataframe/conversion.rs rename to crates/nu-command/src/dataframe/nu_dataframe/conversion.rs index c1be5511aa..daccdedbce 100644 --- a/crates/nu-command/src/dataframe/objects/nu_dataframe/conversion.rs +++ b/crates/nu-command/src/dataframe/nu_dataframe/conversion.rs @@ -1,4 +1,5 @@ use super::{DataFrameValue, NuDataFrame}; + use chrono::{DateTime, FixedOffset, NaiveDateTime}; use indexmap::map::{Entry, IndexMap}; use nu_protocol::{ShellError, Span, Value}; diff --git a/crates/nu-command/src/dataframe/objects/nu_dataframe/custom_value.rs b/crates/nu-command/src/dataframe/nu_dataframe/custom_value.rs similarity index 81% rename from crates/nu-command/src/dataframe/objects/nu_dataframe/custom_value.rs rename to crates/nu-command/src/dataframe/nu_dataframe/custom_value.rs index a02acbf561..842d0b1159 100644 --- a/crates/nu-command/src/dataframe/objects/nu_dataframe/custom_value.rs +++ b/crates/nu-command/src/dataframe/nu_dataframe/custom_value.rs @@ -48,7 +48,17 @@ impl CustomValue for NuDataFrame { fn follow_path_string(&self, column_name: String, span: Span) -> Result { let column = self.column(&column_name, span)?; - Ok(column.to_value(span)) + Ok(column.into_value(span)) + } + + fn partial_cmp(&self, other: &Value) -> Option { + match other { + Value::CustomValue { val, .. } => val + .as_any() + .downcast_ref::() + .and_then(|other| self.is_equal(other)), + _ => None, + } } fn operation( diff --git a/crates/nu-command/src/dataframe/objects/nu_dataframe/mod.rs b/crates/nu-command/src/dataframe/nu_dataframe/mod.rs similarity index 80% rename from crates/nu-command/src/dataframe/objects/nu_dataframe/mod.rs rename to crates/nu-command/src/dataframe/nu_dataframe/mod.rs index bec15ebabe..8517836df4 100644 --- a/crates/nu-command/src/dataframe/objects/nu_dataframe/mod.rs +++ b/crates/nu-command/src/dataframe/nu_dataframe/mod.rs @@ -1,15 +1,17 @@ +pub mod commands; + mod between_values; mod conversion; mod custom_value; mod operations; -use std::{cmp::Ordering, fmt::Display, hash::Hasher}; +use conversion::{Column, ColumnMap}; -pub use conversion::{Column, ColumnMap}; use indexmap::map::IndexMap; use nu_protocol::{did_you_mean, PipelineData, ShellError, Span, Value}; -use polars::prelude::{DataFrame, PolarsObject, Series}; +use polars::prelude::{DataFrame, DataType, PolarsObject, Series}; use serde::{Deserialize, Serialize}; +use std::{cmp::Ordering, fmt::Display, hash::Hasher}; // DataFrameValue is an encapsulation of Nushell Value that can be used // to define the PolarsObject Trait. The polars object trait allows to @@ -98,7 +100,7 @@ impl NuDataFrame { } } - pub fn to_value(self, span: Span) -> Value { + pub fn into_value(self, span: Span) -> Value { Value::CustomValue { val: Box::new(self), span, @@ -325,4 +327,64 @@ impl NuDataFrame { Ok(values) } + + // Dataframes are considered equal if they have the same shape, column name and values + pub fn is_equal(&self, other: &Self) -> Option { + if self.as_ref().width() == 0 { + // checking for empty dataframe + return None; + } + + if self.as_ref().get_column_names() != other.as_ref().get_column_names() { + // checking both dataframes share the same names + return None; + } + + if self.as_ref().height() != other.as_ref().height() { + // checking both dataframes have the same row size + return None; + } + + // sorting dataframe by the first column + let column_names = self.as_ref().get_column_names(); + let first_col = column_names + .get(0) + .expect("already checked that dataframe is different than 0"); + + // if unable to sort, then unable to compare + let lhs = match self.as_ref().sort(*first_col, false) { + Ok(df) => df, + Err(_) => return None, + }; + + let rhs = match other.as_ref().sort(*first_col, false) { + Ok(df) => df, + Err(_) => return None, + }; + + for name in self.as_ref().get_column_names() { + let self_series = lhs.column(name).expect("name from dataframe names"); + + let other_series = rhs + .column(name) + .expect("already checked that name in other"); + + let self_series = match self_series.dtype() { + // Casting needed to compare other numeric types with nushell numeric type. + // In nushell we only have i64 integer numeric types and any array created + // with nushell untagged primitives will be of type i64 + DataType::UInt32 => match self_series.cast(&DataType::Int64) { + Ok(series) => series, + Err(_) => return None, + }, + _ => self_series.clone(), + }; + + if !self_series.series_equal(other_series) { + return None; + } + } + + Some(Ordering::Equal) + } } diff --git a/crates/nu-command/src/dataframe/objects/nu_dataframe/operations.rs b/crates/nu-command/src/dataframe/nu_dataframe/operations.rs similarity index 100% rename from crates/nu-command/src/dataframe/objects/nu_dataframe/operations.rs rename to crates/nu-command/src/dataframe/nu_dataframe/operations.rs diff --git a/crates/nu-command/src/dataframe/objects/mod.rs b/crates/nu-command/src/dataframe/objects/mod.rs deleted file mode 100644 index cc2a47aa9f..0000000000 --- a/crates/nu-command/src/dataframe/objects/mod.rs +++ /dev/null @@ -1 +0,0 @@ -pub(super) mod nu_dataframe; diff --git a/crates/nu-command/src/dataframe/to_df.rs b/crates/nu-command/src/dataframe/to_df.rs deleted file mode 100644 index cadeab7723..0000000000 --- a/crates/nu-command/src/dataframe/to_df.rs +++ /dev/null @@ -1,59 +0,0 @@ -use super::objects::nu_dataframe::NuDataFrame; -use nu_protocol::{ - ast::Call, - engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, -}; - -#[derive(Clone)] -pub struct ToDataFrame; - -impl Command for ToDataFrame { - fn name(&self) -> &str { - "to df" - } - - fn usage(&self) -> &str { - "Converts a List, Table or Dictionary into a dataframe" - } - - fn signature(&self) -> Signature { - Signature::build(self.name().to_string()).category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Takes a dictionary and creates a dataframe", - example: "[[a b];[1 2] [3 4]] | to df", - result: None, - }, - Example { - description: "Takes a list of tables and creates a dataframe", - example: "[[1 2 a] [3 4 b] [5 6 c]] | to df", - result: None, - }, - Example { - description: "Takes a list and creates a dataframe", - example: "[a b c] | to df", - result: None, - }, - Example { - description: "Takes a list of booleans and creates a dataframe", - example: "[$true $true $false] | to df", - result: None, - }, - ] - } - - fn run( - &self, - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let df = NuDataFrame::try_from_iter(input.into_iter())?; - Ok(PipelineData::Value(NuDataFrame::to_value(df, call.head))) - } -} diff --git a/crates/nu-protocol/src/value/custom_value.rs b/crates/nu-protocol/src/value/custom_value.rs index f03cbcf302..92423d5776 100644 --- a/crates/nu-protocol/src/value/custom_value.rs +++ b/crates/nu-protocol/src/value/custom_value.rs @@ -1,4 +1,4 @@ -use std::fmt; +use std::{cmp::Ordering, fmt}; use crate::{ast::Operator, Category, ShellError, Span, Value}; @@ -29,6 +29,9 @@ pub trait CustomValue: fmt::Debug + Send + Sync { fn follow_path_int(&self, count: usize, span: Span) -> Result; fn follow_path_string(&self, column_name: String, span: Span) -> Result; + // ordering with other value + fn partial_cmp(&self, other: &Value) -> Option; + // Definition of an operation between the object that implements the trait // and another Value. // The Operator enum is used to indicate the expected operation diff --git a/crates/nu-protocol/src/value/from.rs b/crates/nu-protocol/src/value/from.rs index f61253b3d6..5185bb9977 100644 --- a/crates/nu-protocol/src/value/from.rs +++ b/crates/nu-protocol/src/value/from.rs @@ -1,5 +1,14 @@ use crate::{ShellError, Span, Value}; +impl From for Value { + fn from(val: String) -> Self { + Value::String { + val, + span: Span::unknown(), + } + } +} + impl From for Value { fn from(val: bool) -> Self { Value::Bool { diff --git a/crates/nu-protocol/src/value/mod.rs b/crates/nu-protocol/src/value/mod.rs index 27d0821bec..8e041bcaea 100644 --- a/crates/nu-protocol/src/value/mod.rs +++ b/crates/nu-protocol/src/value/mod.rs @@ -640,6 +640,7 @@ impl PartialOrd for Value { (Value::Binary { val: lhs, .. }, Value::Binary { val: rhs, .. }) => { lhs.partial_cmp(rhs) } + (Value::CustomValue { val: lhs, .. }, rhs) => lhs.partial_cmp(rhs), (Value::Nothing { .. }, Value::Nothing { .. }) => Some(Ordering::Equal), (_, _) => None, }