mirror of
https://github.com/nushell/nushell
synced 2024-12-27 13:33:16 +00:00
testing suite for dataframes (#379)
This commit is contained in:
parent
e07ce57423
commit
ee239a0d37
17 changed files with 403 additions and 88 deletions
|
@ -1,10 +1,3 @@
|
|||
mod describe;
|
||||
mod dtypes;
|
||||
mod objects;
|
||||
mod open;
|
||||
mod to_df;
|
||||
mod nu_dataframe;
|
||||
|
||||
pub use describe::DescribeDF;
|
||||
pub use dtypes::DataTypes;
|
||||
pub use open::OpenDataFrame;
|
||||
pub use to_df::ToDataFrame;
|
||||
pub use nu_dataframe::commands::{DataTypes, DescribeDF, OpenDataFrame, ToDataFrame};
|
||||
|
|
|
@ -18,7 +18,7 @@ pub fn between_dataframes(
|
|||
let operation_span = span(&[left.span()?, right.span()?]);
|
||||
match operator.item {
|
||||
Operator::Plus => match lhs.append_df(rhs, Axis::Row, operation_span) {
|
||||
Ok(df) => Ok(df.to_value(operation_span)),
|
||||
Ok(df) => Ok(df.into_value(operation_span)),
|
||||
Err(e) => Err(e),
|
||||
},
|
||||
_ => Err(ShellError::OperatorMismatch {
|
|
@ -1,9 +1,11 @@
|
|||
use super::objects::nu_dataframe::NuDataFrame;
|
||||
use crate::dataframe::nu_dataframe::Column;
|
||||
|
||||
use super::super::NuDataFrame;
|
||||
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature,
|
||||
Category, Example, PipelineData, ShellError, Signature, Span,
|
||||
};
|
||||
use polars::{
|
||||
chunked_array::ChunkedArray,
|
||||
|
@ -31,8 +33,58 @@ impl Command for DescribeDF {
|
|||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "dataframe description",
|
||||
example: "[[a b]; [1 1] [1 1]] | to-df | describe",
|
||||
result: None,
|
||||
example: "[[a b]; [1 1] [1 1]] | to df | describe",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"descriptor".to_string(),
|
||||
vec![
|
||||
"count".to_string().into(),
|
||||
"sum".to_string().into(),
|
||||
"mean".to_string().into(),
|
||||
"median".to_string().into(),
|
||||
"std".to_string().into(),
|
||||
"min".to_string().into(),
|
||||
"25%".to_string().into(),
|
||||
"50%".to_string().into(),
|
||||
"75%".to_string().into(),
|
||||
"max".to_string().into(),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"a (i64)".to_string(),
|
||||
vec![
|
||||
2.0.into(),
|
||||
2.0.into(),
|
||||
1.0.into(),
|
||||
1.0.into(),
|
||||
0.0.into(),
|
||||
1.0.into(),
|
||||
1.0.into(),
|
||||
1.0.into(),
|
||||
1.0.into(),
|
||||
1.0.into(),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"b (i64)".to_string(),
|
||||
vec![
|
||||
2.0.into(),
|
||||
2.0.into(),
|
||||
1.0.into(),
|
||||
1.0.into(),
|
||||
0.0.into(),
|
||||
1.0.into(),
|
||||
1.0.into(),
|
||||
1.0.into(),
|
||||
1.0.into(),
|
||||
1.0.into(),
|
||||
],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::unknown()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
|
@ -181,3 +233,14 @@ fn command(
|
|||
df, call.head,
|
||||
)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(DescribeDF {})
|
||||
}
|
||||
}
|
|
@ -1,8 +1,8 @@
|
|||
use super::objects::nu_dataframe::{Column, NuDataFrame};
|
||||
use super::super::{Column, NuDataFrame};
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Value,
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
|
@ -24,8 +24,21 @@ impl Command for DataTypes {
|
|||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "drop column a",
|
||||
example: "[[a b]; [1 2] [3 4]] | to-df | dtypes",
|
||||
result: None,
|
||||
example: "[[a b]; [1 2] [3 4]] | to df | dtypes",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"column".to_string(),
|
||||
vec!["a".to_string().into(), "b".to_string().into()],
|
||||
),
|
||||
Column::new(
|
||||
"dtype".to_string(),
|
||||
vec!["i64".to_string().into(), "i64".to_string().into()],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::unknown()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
|
@ -78,5 +91,16 @@ fn command(
|
|||
let dtypes_col = Column::new("dtype".to_string(), dtypes);
|
||||
|
||||
let df = NuDataFrame::try_from_columns(vec![names_col, dtypes_col])?;
|
||||
Ok(PipelineData::Value(df.to_value(call.head)))
|
||||
Ok(PipelineData::Value(df.into_value(call.head)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(DataTypes {})
|
||||
}
|
||||
}
|
12
crates/nu-command/src/dataframe/nu_dataframe/commands/mod.rs
Normal file
12
crates/nu-command/src/dataframe/nu_dataframe/commands/mod.rs
Normal file
|
@ -0,0 +1,12 @@
|
|||
mod describe;
|
||||
mod dtypes;
|
||||
mod open;
|
||||
mod to_df;
|
||||
|
||||
pub use describe::DescribeDF;
|
||||
pub use dtypes::DataTypes;
|
||||
pub use open::OpenDataFrame;
|
||||
pub use to_df::ToDataFrame;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_dataframe;
|
|
@ -1,12 +1,11 @@
|
|||
use std::{fs::File, path::PathBuf};
|
||||
|
||||
use super::objects::nu_dataframe::NuDataFrame;
|
||||
use super::super::NuDataFrame;
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
|
||||
};
|
||||
use std::{fs::File, path::PathBuf};
|
||||
|
||||
use polars::prelude::{CsvEncoding, CsvReader, JsonReader, ParquetReader, SerReader};
|
||||
|
|
@ -0,0 +1,87 @@
|
|||
use nu_engine::eval_block;
|
||||
use nu_parser::parse;
|
||||
use nu_protocol::{
|
||||
engine::{Command, EngineState, Stack, StateWorkingSet},
|
||||
PipelineData, Span, Value, CONFIG_VARIABLE_ID,
|
||||
};
|
||||
|
||||
use super::ToDataFrame;
|
||||
|
||||
pub fn test_dataframe(cmd: impl Command + 'static) {
|
||||
let examples = cmd.examples();
|
||||
let mut engine_state = Box::new(EngineState::new());
|
||||
|
||||
let delta = {
|
||||
// Base functions that are needed for testing
|
||||
// Try to keep this working set small to keep tests running as fast as possible
|
||||
let mut working_set = StateWorkingSet::new(&*engine_state);
|
||||
working_set.add_decl(Box::new(ToDataFrame));
|
||||
|
||||
// Adding the command that is being tested to the working set
|
||||
working_set.add_decl(Box::new(cmd));
|
||||
|
||||
working_set.render()
|
||||
};
|
||||
|
||||
let _ = engine_state.merge_delta(delta);
|
||||
|
||||
for example in examples {
|
||||
// Skip tests that don't have results to compare to
|
||||
if example.result.is_none() {
|
||||
continue;
|
||||
}
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
let (block, delta) = {
|
||||
let mut working_set = StateWorkingSet::new(&*engine_state);
|
||||
let (output, err) = parse(&mut working_set, None, example.example.as_bytes(), false);
|
||||
|
||||
if let Some(err) = err {
|
||||
panic!("test parse error in `{}`: {:?}", example.example, err)
|
||||
}
|
||||
|
||||
(output, working_set.render())
|
||||
};
|
||||
|
||||
let _ = engine_state.merge_delta(delta);
|
||||
|
||||
let mut stack = Stack::new();
|
||||
|
||||
// Set up our initial config to start from
|
||||
stack.vars.insert(
|
||||
CONFIG_VARIABLE_ID,
|
||||
Value::Record {
|
||||
cols: vec![],
|
||||
vals: vec![],
|
||||
span: Span::unknown(),
|
||||
},
|
||||
);
|
||||
|
||||
match eval_block(
|
||||
&engine_state,
|
||||
&mut stack,
|
||||
&block,
|
||||
PipelineData::new(Span::unknown()),
|
||||
) {
|
||||
Err(err) => panic!("test eval error in `{}`: {:?}", example.example, err),
|
||||
Ok(result) => {
|
||||
let result = result.into_value(Span::unknown());
|
||||
println!("input: {}", example.example);
|
||||
println!("result: {:?}", result);
|
||||
println!("done: {:?}", start.elapsed());
|
||||
|
||||
// Note. Value implements PartialEq for Bool, Int, Float, String and Block
|
||||
// If the command you are testing requires to compare another case, then
|
||||
// you need to define its equality in the Value struct
|
||||
if let Some(expected) = example.result {
|
||||
if result != expected {
|
||||
panic!(
|
||||
"the example result is different to expected value: {:?} != {:?}",
|
||||
result, expected
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
111
crates/nu-command/src/dataframe/nu_dataframe/commands/to_df.rs
Normal file
111
crates/nu-command/src/dataframe/nu_dataframe/commands/to_df.rs
Normal file
|
@ -0,0 +1,111 @@
|
|||
use super::super::{Column, NuDataFrame};
|
||||
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToDataFrame;
|
||||
|
||||
impl Command for ToDataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"to df"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Converts a List, Table or Dictionary into a dataframe"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name().to_string()).category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Takes a dictionary and creates a dataframe",
|
||||
example: "[[a b];[1 2] [3 4]] | to df",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("a".to_string(), vec![1.into(), 3.into()]),
|
||||
Column::new("b".to_string(), vec![2.into(), 4.into()]),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::unknown()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Takes a list of tables and creates a dataframe",
|
||||
example: "[[1 2 a] [3 4 b] [5 6 c]] | to df",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("0".to_string(), vec![1.into(), 3.into(), 5.into()]),
|
||||
Column::new("1".to_string(), vec![2.into(), 4.into(), 6.into()]),
|
||||
Column::new(
|
||||
"2".to_string(),
|
||||
vec![
|
||||
"a".to_string().into(),
|
||||
"b".to_string().into(),
|
||||
"c".to_string().into(),
|
||||
],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::unknown()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Takes a list and creates a dataframe",
|
||||
example: "[a b c] | to df",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
"a".to_string().into(),
|
||||
"b".to_string().into(),
|
||||
"c".to_string().into(),
|
||||
],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::unknown()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Takes a list of booleans and creates a dataframe",
|
||||
example: "[$true $true $false] | to df",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![true.into(), true.into(), false.into()],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::unknown()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_iter(input.into_iter())?;
|
||||
Ok(PipelineData::Value(NuDataFrame::into_value(df, call.head)))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(ToDataFrame {})
|
||||
}
|
||||
}
|
|
@ -1,4 +1,5 @@
|
|||
use super::{DataFrameValue, NuDataFrame};
|
||||
|
||||
use chrono::{DateTime, FixedOffset, NaiveDateTime};
|
||||
use indexmap::map::{Entry, IndexMap};
|
||||
use nu_protocol::{ShellError, Span, Value};
|
|
@ -48,7 +48,17 @@ impl CustomValue for NuDataFrame {
|
|||
|
||||
fn follow_path_string(&self, column_name: String, span: Span) -> Result<Value, ShellError> {
|
||||
let column = self.column(&column_name, span)?;
|
||||
Ok(column.to_value(span))
|
||||
Ok(column.into_value(span))
|
||||
}
|
||||
|
||||
fn partial_cmp(&self, other: &Value) -> Option<std::cmp::Ordering> {
|
||||
match other {
|
||||
Value::CustomValue { val, .. } => val
|
||||
.as_any()
|
||||
.downcast_ref::<Self>()
|
||||
.and_then(|other| self.is_equal(other)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn operation(
|
|
@ -1,15 +1,17 @@
|
|||
pub mod commands;
|
||||
|
||||
mod between_values;
|
||||
mod conversion;
|
||||
mod custom_value;
|
||||
mod operations;
|
||||
|
||||
use std::{cmp::Ordering, fmt::Display, hash::Hasher};
|
||||
use conversion::{Column, ColumnMap};
|
||||
|
||||
pub use conversion::{Column, ColumnMap};
|
||||
use indexmap::map::IndexMap;
|
||||
use nu_protocol::{did_you_mean, PipelineData, ShellError, Span, Value};
|
||||
use polars::prelude::{DataFrame, PolarsObject, Series};
|
||||
use polars::prelude::{DataFrame, DataType, PolarsObject, Series};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{cmp::Ordering, fmt::Display, hash::Hasher};
|
||||
|
||||
// DataFrameValue is an encapsulation of Nushell Value that can be used
|
||||
// to define the PolarsObject Trait. The polars object trait allows to
|
||||
|
@ -98,7 +100,7 @@ impl NuDataFrame {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn to_value(self, span: Span) -> Value {
|
||||
pub fn into_value(self, span: Span) -> Value {
|
||||
Value::CustomValue {
|
||||
val: Box::new(self),
|
||||
span,
|
||||
|
@ -325,4 +327,64 @@ impl NuDataFrame {
|
|||
|
||||
Ok(values)
|
||||
}
|
||||
|
||||
// Dataframes are considered equal if they have the same shape, column name and values
|
||||
pub fn is_equal(&self, other: &Self) -> Option<Ordering> {
|
||||
if self.as_ref().width() == 0 {
|
||||
// checking for empty dataframe
|
||||
return None;
|
||||
}
|
||||
|
||||
if self.as_ref().get_column_names() != other.as_ref().get_column_names() {
|
||||
// checking both dataframes share the same names
|
||||
return None;
|
||||
}
|
||||
|
||||
if self.as_ref().height() != other.as_ref().height() {
|
||||
// checking both dataframes have the same row size
|
||||
return None;
|
||||
}
|
||||
|
||||
// sorting dataframe by the first column
|
||||
let column_names = self.as_ref().get_column_names();
|
||||
let first_col = column_names
|
||||
.get(0)
|
||||
.expect("already checked that dataframe is different than 0");
|
||||
|
||||
// if unable to sort, then unable to compare
|
||||
let lhs = match self.as_ref().sort(*first_col, false) {
|
||||
Ok(df) => df,
|
||||
Err(_) => return None,
|
||||
};
|
||||
|
||||
let rhs = match other.as_ref().sort(*first_col, false) {
|
||||
Ok(df) => df,
|
||||
Err(_) => return None,
|
||||
};
|
||||
|
||||
for name in self.as_ref().get_column_names() {
|
||||
let self_series = lhs.column(name).expect("name from dataframe names");
|
||||
|
||||
let other_series = rhs
|
||||
.column(name)
|
||||
.expect("already checked that name in other");
|
||||
|
||||
let self_series = match self_series.dtype() {
|
||||
// Casting needed to compare other numeric types with nushell numeric type.
|
||||
// In nushell we only have i64 integer numeric types and any array created
|
||||
// with nushell untagged primitives will be of type i64
|
||||
DataType::UInt32 => match self_series.cast(&DataType::Int64) {
|
||||
Ok(series) => series,
|
||||
Err(_) => return None,
|
||||
},
|
||||
_ => self_series.clone(),
|
||||
};
|
||||
|
||||
if !self_series.series_equal(other_series) {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
Some(Ordering::Equal)
|
||||
}
|
||||
}
|
|
@ -1 +0,0 @@
|
|||
pub(super) mod nu_dataframe;
|
|
@ -1,59 +0,0 @@
|
|||
use super::objects::nu_dataframe::NuDataFrame;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToDataFrame;
|
||||
|
||||
impl Command for ToDataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"to df"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Converts a List, Table or Dictionary into a dataframe"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name().to_string()).category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Takes a dictionary and creates a dataframe",
|
||||
example: "[[a b];[1 2] [3 4]] | to df",
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Takes a list of tables and creates a dataframe",
|
||||
example: "[[1 2 a] [3 4 b] [5 6 c]] | to df",
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Takes a list and creates a dataframe",
|
||||
example: "[a b c] | to df",
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Takes a list of booleans and creates a dataframe",
|
||||
example: "[$true $true $false] | to df",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_iter(input.into_iter())?;
|
||||
Ok(PipelineData::Value(NuDataFrame::to_value(df, call.head)))
|
||||
}
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
use std::fmt;
|
||||
use std::{cmp::Ordering, fmt};
|
||||
|
||||
use crate::{ast::Operator, Category, ShellError, Span, Value};
|
||||
|
||||
|
@ -29,6 +29,9 @@ pub trait CustomValue: fmt::Debug + Send + Sync {
|
|||
fn follow_path_int(&self, count: usize, span: Span) -> Result<Value, ShellError>;
|
||||
fn follow_path_string(&self, column_name: String, span: Span) -> Result<Value, ShellError>;
|
||||
|
||||
// ordering with other value
|
||||
fn partial_cmp(&self, other: &Value) -> Option<Ordering>;
|
||||
|
||||
// Definition of an operation between the object that implements the trait
|
||||
// and another Value.
|
||||
// The Operator enum is used to indicate the expected operation
|
||||
|
|
|
@ -1,5 +1,14 @@
|
|||
use crate::{ShellError, Span, Value};
|
||||
|
||||
impl From<String> for Value {
|
||||
fn from(val: String) -> Self {
|
||||
Value::String {
|
||||
val,
|
||||
span: Span::unknown(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<bool> for Value {
|
||||
fn from(val: bool) -> Self {
|
||||
Value::Bool {
|
||||
|
|
|
@ -640,6 +640,7 @@ impl PartialOrd for Value {
|
|||
(Value::Binary { val: lhs, .. }, Value::Binary { val: rhs, .. }) => {
|
||||
lhs.partial_cmp(rhs)
|
||||
}
|
||||
(Value::CustomValue { val: lhs, .. }, rhs) => lhs.partial_cmp(rhs),
|
||||
(Value::Nothing { .. }, Value::Nothing { .. }) => Some(Ordering::Equal),
|
||||
(_, _) => None,
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue