Merged overloaded commands (#9860)

- fixes #9807

# Description

This pull request merges all overloaded dfr commands into one command:

eager:
dfr first -> eager/first.rs
dfr last -> eager/last.rs
dfr into-nu -> eager/to_nu.rs (merged)

lazy:
dfr min -> expressions/expressions_macro.rs lazy_expressions_macro
dfr max -> expressions/expressions_macro.rs lazy_expressions_macro
dfr sum -> expressions/expressions_macro.rs lazy_expressions_macro
dfr mean -> expressions/expressions_macro.rs lazy_expressions_macro
dfr std -> expressions/expressions_macro.rs lazy_expressions_macro
dfr var   -> expressions/expressions_macro.rs lazy_expressions_macro

series:
dfr n-unique -> series/n_unique.rs
dfr is-not-null -> series/masks/is_not_null.rs
dfr is-null -> series/masks/is_null.rs

# User-Facing Changes
No user facing changes

---------

Co-authored-by: Jack Wright <jack.wright@disqo.com>
This commit is contained in:
Jack Wright 2023-07-31 05:34:12 -07:00 committed by GitHub
parent ba0f069c31
commit 87abfee268
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 731 additions and 493 deletions

View file

@ -1,4 +1,4 @@
use super::super::values::{Column, NuDataFrame}; use super::super::values::{Column, NuDataFrame, NuExpression};
use nu_engine::CallExt; use nu_engine::CallExt;
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
@ -15,7 +15,7 @@ impl Command for FirstDF {
} }
fn usage(&self) -> &str { fn usage(&self) -> &str {
"Show only the first number of rows." "Show only the first number of rows or create a first expression"
} }
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
@ -25,10 +25,16 @@ impl Command for FirstDF {
SyntaxShape::Int, SyntaxShape::Int,
"starting from the front, the number of rows to return", "starting from the front, the number of rows to return",
) )
.input_output_type( .input_output_types(vec![
Type::Custom("dataframe".into()), (
Type::Custom("dataframe".into()), Type::Custom("expression".into()),
) Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("dataframe".into())) .category(Category::Custom("dataframe".into()))
} }
@ -64,6 +70,11 @@ impl Command for FirstDF {
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
}, },
Example {
description: "Creates a first expression from a column",
example: "dfr col a | dfr first",
result: None,
},
] ]
} }
@ -74,8 +85,19 @@ impl Command for FirstDF {
call: &Call, call: &Call,
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline(input, call.head)?; let value = input.into_value(call.head);
command(engine_state, stack, call, df) if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(value)?;
command(engine_state, stack, call, df)
} else {
let expr = NuExpression::try_from_value(value)?;
let expr: NuExpression = expr.into_polars().first().into();
Ok(PipelineData::Value(
NuExpression::into_value(expr, call.head),
None,
))
}
} }
} }
@ -97,11 +119,25 @@ fn command(
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::super::super::test_dataframe::test_dataframe; use super::super::super::test_dataframe::{build_test_engine_state, test_dataframe_example};
use super::*; use super::*;
use crate::dataframe::lazy::aggregate::LazyAggregate;
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
#[test] #[test]
fn test_examples() { fn test_examples_dataframe() {
test_dataframe(vec![Box::new(FirstDF {})]) let mut engine_state = build_test_engine_state(vec![Box::new(FirstDF {})]);
test_dataframe_example(&mut engine_state, &FirstDF.examples()[0]);
test_dataframe_example(&mut engine_state, &FirstDF.examples()[1]);
}
#[test]
fn test_examples_expression() {
let mut engine_state = build_test_engine_state(vec![
Box::new(FirstDF {}),
Box::new(LazyAggregate {}),
Box::new(ToLazyGroupBy {}),
]);
test_dataframe_example(&mut engine_state, &FirstDF.examples()[2]);
} }
} }

View file

@ -1,4 +1,4 @@
use super::super::values::{utils::DEFAULT_ROWS, Column, NuDataFrame}; use super::super::values::{utils::DEFAULT_ROWS, Column, NuDataFrame, NuExpression};
use nu_engine::CallExt; use nu_engine::CallExt;
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
@ -21,26 +21,39 @@ impl Command for LastDF {
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build(self.name()) Signature::build(self.name())
.optional("rows", SyntaxShape::Int, "Number of rows for tail") .optional("rows", SyntaxShape::Int, "Number of rows for tail")
.input_output_type( .input_output_types(vec![
Type::Custom("dataframe".into()), (
Type::Custom("dataframe".into()), Type::Custom("expression".into()),
) Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("dataframe".into())) .category(Category::Custom("dataframe".into()))
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![
description: "Create new dataframe with last rows", Example {
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr last 1", description: "Create new dataframe with last rows",
result: Some( example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr last 1",
NuDataFrame::try_from_columns(vec![ result: Some(
Column::new("a".to_string(), vec![Value::test_int(3)]), NuDataFrame::try_from_columns(vec![
Column::new("b".to_string(), vec![Value::test_int(4)]), Column::new("a".to_string(), vec![Value::test_int(3)]),
]) Column::new("b".to_string(), vec![Value::test_int(4)]),
.expect("simple df for test should not fail") ])
.into_value(Span::test_data()), .expect("simple df for test should not fail")
), .into_value(Span::test_data()),
}] ),
},
Example {
description: "Creates a last expression from a column",
example: "dfr col a | dfr last",
result: None,
},
]
} }
fn run( fn run(
@ -50,8 +63,19 @@ impl Command for LastDF {
call: &Call, call: &Call,
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline(input, call.head)?; let value = input.into_value(call.head);
command(engine_state, stack, call, df) if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(value)?;
command(engine_state, stack, call, df)
} else {
let expr = NuExpression::try_from_value(value)?;
let expr: NuExpression = expr.into_polars().last().into();
Ok(PipelineData::Value(
NuExpression::into_value(expr, call.head),
None,
))
}
} }
} }
@ -73,11 +97,24 @@ fn command(
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::super::super::test_dataframe::test_dataframe; use super::super::super::test_dataframe::{build_test_engine_state, test_dataframe_example};
use super::*; use super::*;
use crate::dataframe::lazy::aggregate::LazyAggregate;
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
#[test] #[test]
fn test_examples() { fn test_examples_dataframe() {
test_dataframe(vec![Box::new(LastDF {})]) let mut engine_state = build_test_engine_state(vec![Box::new(LastDF {})]);
test_dataframe_example(&mut engine_state, &LastDF.examples()[0]);
}
#[test]
fn test_examples_expression() {
let mut engine_state = build_test_engine_state(vec![
Box::new(LastDF {}),
Box::new(LazyAggregate {}),
Box::new(ToLazyGroupBy {}),
]);
test_dataframe_example(&mut engine_state, &LastDF.examples()[1]);
} }
} }

View file

@ -1,7 +1,7 @@
/// Definition of multiple Expression commands using a macro rule /// Definition of multiple Expression commands using a macro rule
/// All of these expressions have an identical body and only require /// All of these expressions have an identical body and only require
/// to have a change in the name, description and expression function /// to have a change in the name, description and expression function
use crate::dataframe::values::{Column, NuDataFrame, NuExpression}; use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
@ -134,6 +134,186 @@ macro_rules! expr_command {
}; };
} }
// The structs defined in this file are structs that form part of other commands
// since they share a similar name
macro_rules! lazy_expr_command {
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => {
#[derive(Clone)]
pub struct $command;
impl Command for $command {
fn name(&self) -> &str {
$name
}
fn usage(&self) -> &str {
$desc
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
$examples
}
fn run(
&self,
_engine_state: &EngineState,
_stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let value = input.into_value(call.head);
if NuDataFrame::can_downcast(&value) {
let lazy = NuLazyFrame::try_from_value(value)?;
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().$func());
Ok(PipelineData::Value(lazy.into_value(call.head)?, None))
} else {
let expr = NuExpression::try_from_value(value)?;
let expr: NuExpression = expr.into_polars().$func().into();
Ok(PipelineData::Value(
NuExpression::into_value(expr, call.head),
None,
))
}
}
}
#[cfg(test)]
mod $test {
use super::super::super::test_dataframe::{
build_test_engine_state, test_dataframe_example,
};
use super::*;
use crate::dataframe::lazy::aggregate::LazyAggregate;
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
#[test]
fn test_examples_dataframe() {
// the first example should be a for the dataframe case
let example = &$command.examples()[0];
let mut engine_state = build_test_engine_state(vec![Box::new($command {})]);
test_dataframe_example(&mut engine_state, &example)
}
#[test]
fn test_examples_expressions() {
// the second example should be a for the dataframe case
let example = &$command.examples()[1];
let mut engine_state = build_test_engine_state(vec![
Box::new($command {}),
Box::new(LazyAggregate {}),
Box::new(ToLazyGroupBy {}),
]);
test_dataframe_example(&mut engine_state, &example)
}
}
};
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident, $ddof: expr) => {
#[derive(Clone)]
pub struct $command;
impl Command for $command {
fn name(&self) -> &str {
$name
}
fn usage(&self) -> &str {
$desc
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
$examples
}
fn run(
&self,
_engine_state: &EngineState,
_stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let value = input.into_value(call.head);
if NuDataFrame::can_downcast(&value) {
let lazy = NuLazyFrame::try_from_value(value)?;
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().$func($ddof));
Ok(PipelineData::Value(lazy.into_value(call.head)?, None))
} else {
let expr = NuExpression::try_from_value(value)?;
let expr: NuExpression = expr.into_polars().$func($ddof).into();
Ok(PipelineData::Value(
NuExpression::into_value(expr, call.head),
None,
))
}
}
}
#[cfg(test)]
mod $test {
use super::super::super::test_dataframe::{
build_test_engine_state, test_dataframe_example,
};
use super::*;
use crate::dataframe::lazy::aggregate::LazyAggregate;
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
#[test]
fn test_examples_dataframe() {
// the first example should be a for the dataframe case
let example = &$command.examples()[0];
let mut engine_state = build_test_engine_state(vec![Box::new($command {})]);
test_dataframe_example(&mut engine_state, &example)
}
#[test]
fn test_examples_expressions() {
// the second example should be a for the dataframe case
let example = &$command.examples()[1];
let mut engine_state = build_test_engine_state(vec![
Box::new($command {}),
Box::new(LazyAggregate {}),
Box::new(ToLazyGroupBy {}),
]);
test_dataframe_example(&mut engine_state, &example)
}
}
};
}
// ExprList command // ExprList command
// Expands to a command definition for a list expression // Expands to a command definition for a list expression
expr_command!( expr_command!(
@ -209,81 +389,6 @@ expr_command!(
test_count test_count
); );
// ExprFirst command
// Expands to a command definition for a count expression
expr_command!(
ExprFirst,
"dfr first",
"creates a first expression",
vec![Example {
description: "Creates a first expression from a column",
example: "dfr col a | dfr first",
result: None,
},],
first,
test_first
);
// ExprLast command
// Expands to a command definition for a count expression
expr_command!(
ExprLast,
"dfr last",
"creates a last expression",
vec![Example {
description: "Creates a last expression from a column",
example: "dfr col a | dfr last",
result: None,
},],
last,
test_last
);
// ExprNUnique command
// Expands to a command definition for a n-unique expression
expr_command!(
ExprNUnique,
"dfr n-unique",
"creates a n-unique expression",
vec![Example {
description: "Creates a is n-unique expression from a column",
example: "dfr col a | dfr n-unique",
result: None,
},],
n_unique,
test_nunique
);
// ExprIsNotNull command
// Expands to a command definition for a n-unique expression
expr_command!(
ExprIsNotNull,
"dfr is-not-null",
"creates a is not null expression",
vec![Example {
description: "Creates a is not null expression from a column",
example: "dfr col a | dfr is-not-null",
result: None,
},],
is_not_null,
test_is_not_null
);
// ExprIsNull command
// Expands to a command definition for a n-unique expression
expr_command!(
ExprIsNull,
"dfr is-null",
"creates a is null expression",
vec![Example {
description: "Creates a is null expression from a column",
example: "dfr col a | dfr is-null",
result: None,
},],
is_null,
test_is_null
);
// ExprNot command // ExprNot command
// Expands to a command definition for a not expression // Expands to a command definition for a not expression
expr_command!( expr_command!(
@ -301,124 +406,180 @@ expr_command!(
// ExprMax command // ExprMax command
// Expands to a command definition for max aggregation // Expands to a command definition for max aggregation
expr_command!( lazy_expr_command!(
ExprMax, ExprMax,
"dfr max", "dfr max",
"Creates a max expression", "Creates a max expression or aggregates columns to their max value",
vec![Example { vec![
description: "Max aggregation for a group-by", Example {
example: r#"[[a b]; [one 2] [one 4] [two 1]] description: "Max value from columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr max",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_int(6)],),
Column::new("b".to_string(), vec![Value::test_int(4)],),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Max aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| dfr into-df | dfr into-df
| dfr group-by a | dfr group-by a
| dfr agg (dfr col b | dfr max)"#, | dfr agg (dfr col b | dfr max)"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")], vec![Value::test_string("one"), Value::test_string("two")],
), ),
Column::new( Column::new(
"b".to_string(), "b".to_string(),
vec![Value::test_int(4), Value::test_int(1)], vec![Value::test_int(4), Value::test_int(1)],
), ),
]) ])
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
},], },
],
max, max,
test_max test_max
); );
// ExprMin command // ExprMin command
// Expands to a command definition for min aggregation // Expands to a command definition for min aggregation
expr_command!( lazy_expr_command!(
ExprMin, ExprMin,
"dfr min", "dfr min",
"Creates a min expression", "Creates a min expression or aggregates columns to their min value",
vec![Example { vec![
description: "Min aggregation for a group-by", Example {
example: r#"[[a b]; [one 2] [one 4] [two 1]] description: "Min value from columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr min",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_int(1)],),
Column::new("b".to_string(), vec![Value::test_int(1)],),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Min aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| dfr into-df | dfr into-df
| dfr group-by a | dfr group-by a
| dfr agg (dfr col b | dfr min)"#, | dfr agg (dfr col b | dfr min)"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")], vec![Value::test_string("one"), Value::test_string("two")],
), ),
Column::new( Column::new(
"b".to_string(), "b".to_string(),
vec![Value::test_int(2), Value::test_int(1)], vec![Value::test_int(2), Value::test_int(1)],
), ),
]) ])
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
},], },
],
min, min,
test_min test_min
); );
// ExprSum command // ExprSum command
// Expands to a command definition for sum aggregation // Expands to a command definition for sum aggregation
expr_command!( lazy_expr_command!(
ExprSum, ExprSum,
"dfr sum", "dfr sum",
"Creates a sum expression for an aggregation", "Creates a sum expression for an aggregation or aggregates columns to their sum value",
vec![Example { vec![
description: "Sum aggregation for a group-by", Example {
example: r#"[[a b]; [one 2] [one 4] [two 1]] description: "Sums all columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr sum",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_int(11)],),
Column::new("b".to_string(), vec![Value::test_int(7)],),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Sum aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| dfr into-df | dfr into-df
| dfr group-by a | dfr group-by a
| dfr agg (dfr col b | dfr sum)"#, | dfr agg (dfr col b | dfr sum)"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")], vec![Value::test_string("one"), Value::test_string("two")],
), ),
Column::new( Column::new(
"b".to_string(), "b".to_string(),
vec![Value::test_int(6), Value::test_int(1)], vec![Value::test_int(6), Value::test_int(1)],
), ),
]) ])
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
},], },
],
sum, sum,
test_sum test_sum
); );
// ExprMean command // ExprMean command
// Expands to a command definition for mean aggregation // Expands to a command definition for mean aggregation
expr_command!( lazy_expr_command!(
ExprMean, ExprMean,
"dfr mean", "dfr mean",
"Creates a mean expression for an aggregation", "Creates a mean expression for an aggregation or aggregates columns to their mean value",
vec![Example { vec![
description: "Mean aggregation for a group-by", Example {
example: r#"[[a b]; [one 2] [one 4] [two 1]] description: "Mean value from columns in a dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr mean",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)],),
Column::new("b".to_string(), vec![Value::test_float(2.0)],),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Mean aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| dfr into-df | dfr into-df
| dfr group-by a | dfr group-by a
| dfr agg (dfr col b | dfr mean)"#, | dfr agg (dfr col b | dfr mean)"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")], vec![Value::test_string("one"), Value::test_string("two")],
), ),
Column::new( Column::new(
"b".to_string(), "b".to_string(),
vec![Value::test_float(3.0), Value::test_float(1.0)], vec![Value::test_float(3.0), Value::test_float(1.0)],
), ),
]) ])
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
},], },
],
mean, mean,
test_mean test_mean
); );
@ -456,64 +617,93 @@ expr_command!(
// ExprStd command // ExprStd command
// Expands to a command definition for std aggregation // Expands to a command definition for std aggregation
expr_command!( lazy_expr_command!(
ExprStd, ExprStd,
"dfr std", "dfr std",
"Creates a std expression for an aggregation", "Creates a std expression for an aggregation of std value from columns in a dataframe",
vec![Example { vec![
description: "Std aggregation for a group-by", Example {
example: r#"[[a b]; [one 2] [one 2] [two 1] [two 1]] description: "Std value from columns in a dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr std",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_float(2.0)],),
Column::new("b".to_string(), vec![Value::test_float(0.0)],),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Std aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 2] [two 1] [two 1]]
| dfr into-df | dfr into-df
| dfr group-by a | dfr group-by a
| dfr agg (dfr col b | dfr std)"#, | dfr agg (dfr col b | dfr std)"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")], vec![Value::test_string("one"), Value::test_string("two")],
), ),
Column::new( Column::new(
"b".to_string(), "b".to_string(),
vec![Value::test_float(0.0), Value::test_float(0.0)], vec![Value::test_float(0.0), Value::test_float(0.0)],
), ),
]) ])
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
},], },
],
std, std,
test_std, test_std,
0 1
); );
// ExprVar command // ExprVar command
// Expands to a command definition for var aggregation // Expands to a command definition for var aggregation
expr_command!( lazy_expr_command!(
ExprVar, ExprVar,
"dfr var", "dfr var",
"Create a var expression for an aggregation", "Create a var expression for an aggregation",
vec![Example { vec![
description: "Var aggregation for a group-by", Example {
example: r#"[[a b]; [one 2] [one 2] [two 1] [two 1]] description:
"Var value from columns in a dataframe or aggregates columns to their var value",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr var",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)],),
Column::new("b".to_string(), vec![Value::test_float(0.0)],),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Var aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 2] [two 1] [two 1]]
| dfr into-df | dfr into-df
| dfr group-by a | dfr group-by a
| dfr agg (dfr col b | dfr var)"#, | dfr agg (dfr col b | dfr var)"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")], vec![Value::test_string("one"), Value::test_string("two")],
), ),
Column::new( Column::new(
"b".to_string(), "b".to_string(),
vec![Value::test_float(0.0), Value::test_float(0.0)], vec![Value::test_float(0.0), Value::test_float(0.0)],
), ),
]) ])
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
},], },
],
var, var,
test_var, test_var,
0 1
); );

View file

@ -50,12 +50,7 @@ pub fn add_expressions(working_set: &mut StateWorkingSet) {
ExprFlatten, ExprFlatten,
ExprExplode, ExprExplode,
ExprCount, ExprCount,
ExprFirst,
ExprLast,
ExprNUnique,
ExprIsIn, ExprIsIn,
ExprIsNotNull,
ExprIsNull,
ExprNot, ExprNot,
ExprMax, ExprMax,
ExprMin, ExprMin,

View file

@ -157,94 +157,6 @@ lazy_command!(
test_cache test_cache
); );
// LazyMax command
// Expands to a command definition for max aggregation
lazy_command!(
LazyMax,
"dfr max",
"Aggregates columns to their max value",
vec![Example {
description: "Max value from columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr max",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_int(6)],),
Column::new("b".to_string(), vec![Value::test_int(4)],),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},],
max,
test_max
);
// LazyMin command
// Expands to a command definition for min aggregation
lazy_command!(
LazyMin,
"dfr min",
"Aggregates columns to their min value",
vec![Example {
description: "Min value from columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr min",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_int(1)],),
Column::new("b".to_string(), vec![Value::test_int(1)],),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},],
min,
test_min
);
// LazySum command
// Expands to a command definition for sum aggregation
lazy_command!(
LazySum,
"dfr sum",
"Aggregates columns to their sum value",
vec![Example {
description: "Sums all columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr sum",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_int(11)],),
Column::new("b".to_string(), vec![Value::test_int(7)],),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},],
sum,
test_sum
);
// LazyMean command
// Expands to a command definition for mean aggregation
lazy_command!(
LazyMean,
"dfr mean",
"Aggregates columns to their mean value",
vec![Example {
description: "Mean value from columns in a dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr mean",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)],),
Column::new("b".to_string(), vec![Value::test_float(2.0)],),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},],
mean,
test_mean
);
// LazyMedian command // LazyMedian command
// Expands to a command definition for median aggregation // Expands to a command definition for median aggregation
lazy_command!( lazy_command!(
@ -266,49 +178,3 @@ lazy_command!(
median, median,
test_median test_median
); );
// LazyStd command
// Expands to a command definition for std aggregation
lazy_command!(
LazyStd,
"dfr std",
"Aggregates columns to their std value",
vec![Example {
description: "Std value from columns in a dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr std",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_float(2.0)],),
Column::new("b".to_string(), vec![Value::test_float(0.0)],),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},],
std,
test_std,
1
);
// LazyVar command
// Expands to a command definition for var aggregation
lazy_command!(
LazyVar,
"dfr var",
"Aggregates columns to their var value",
vec![Example {
description: "Var value from columns in a dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr var",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)],),
Column::new("b".to_string(), vec![Value::test_float(0.0)],),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},],
var,
test_var,
1
);

View file

@ -49,13 +49,7 @@ pub fn add_lazy_decls(working_set: &mut StateWorkingSet) {
LazyFilter, LazyFilter,
LazyJoin, LazyJoin,
LazyQuantile, LazyQuantile,
LazyMax,
LazyMin,
LazySum,
LazyMean,
LazyMedian, LazyMedian,
LazyStd,
LazyVar,
LazyReverse, LazyReverse,
LazySelect, LazySelect,
LazySortBy, LazySortBy,

View file

@ -1,4 +1,4 @@
use super::super::super::values::{Column, NuDataFrame}; use super::super::super::values::{Column, NuDataFrame, NuExpression};
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
@ -20,33 +20,46 @@ impl Command for IsNotNull {
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build(self.name()) Signature::build(self.name())
.input_output_type( .input_output_types(vec![
Type::Custom("dataframe".into()), (
Type::Custom("dataframe".into()), Type::Custom("expression".into()),
) Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("dataframe".into())) .category(Category::Custom("dataframe".into()))
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![
description: "Create mask where values are not null", Example {
example: r#"let s = ([5 6 0 8] | dfr into-df); description: "Create mask where values are not null",
example: r#"let s = ([5 6 0 8] | dfr into-df);
let res = ($s / $s); let res = ($s / $s);
$res | dfr is-not-null"#, $res | dfr is-not-null"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(vec![Column::new(
"is_not_null".to_string(), "is_not_null".to_string(),
vec![ vec![
Value::test_bool(true), Value::test_bool(true),
Value::test_bool(true), Value::test_bool(true),
Value::test_bool(false), Value::test_bool(false),
Value::test_bool(true), Value::test_bool(true),
], ],
)]) )])
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
}] },
Example {
description: "Creates a is not null expression from a column",
example: "dfr col a | dfr is-not-null",
result: None,
},
]
} }
fn run( fn run(
@ -56,8 +69,19 @@ impl Command for IsNotNull {
call: &Call, call: &Call,
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline(input, call.head)?; let value = input.into_value(call.head);
command(engine_state, stack, call, df) if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(value)?;
command(engine_state, stack, call, df)
} else {
let expr = NuExpression::try_from_value(value)?;
let expr: NuExpression = expr.into_polars().is_not_null().into();
Ok(PipelineData::Value(
NuExpression::into_value(expr, call.head),
None,
))
}
} }
} }
@ -76,11 +100,24 @@ fn command(
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::super::super::super::test_dataframe::test_dataframe;
use super::*; use super::*;
use crate::dataframe::lazy::aggregate::LazyAggregate;
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
use crate::dataframe::test_dataframe::{build_test_engine_state, test_dataframe_example};
#[test] #[test]
fn test_examples() { fn test_examples_dataframe() {
test_dataframe(vec![Box::new(IsNotNull {})]) let mut engine_state = build_test_engine_state(vec![Box::new(IsNotNull {})]);
test_dataframe_example(&mut engine_state, &IsNotNull.examples()[0]);
}
#[test]
fn test_examples_expression() {
let mut engine_state = build_test_engine_state(vec![
Box::new(IsNotNull {}),
Box::new(LazyAggregate {}),
Box::new(ToLazyGroupBy {}),
]);
test_dataframe_example(&mut engine_state, &IsNotNull.examples()[1]);
} }
} }

View file

@ -1,4 +1,4 @@
use super::super::super::values::{Column, NuDataFrame}; use super::super::super::values::{Column, NuDataFrame, NuExpression};
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
@ -20,33 +20,46 @@ impl Command for IsNull {
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build(self.name()) Signature::build(self.name())
.input_output_type( .input_output_types(vec![
Type::Custom("dataframe".into()), (
Type::Custom("dataframe".into()), Type::Custom("expression".into()),
) Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("dataframe".into())) .category(Category::Custom("dataframe".into()))
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![
description: "Create mask where values are null", Example {
example: r#"let s = ([5 6 0 8] | dfr into-df); description: "Create mask where values are null",
example: r#"let s = ([5 6 0 8] | dfr into-df);
let res = ($s / $s); let res = ($s / $s);
$res | dfr is-null"#, $res | dfr is-null"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(vec![Column::new(
"is_null".to_string(), "is_null".to_string(),
vec![ vec![
Value::test_bool(false), Value::test_bool(false),
Value::test_bool(false), Value::test_bool(false),
Value::test_bool(true), Value::test_bool(true),
Value::test_bool(false), Value::test_bool(false),
], ],
)]) )])
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
}] },
Example {
description: "Creates a is null expression from a column",
example: "dfr col a | dfr is-null",
result: None,
},
]
} }
fn run( fn run(
@ -56,8 +69,19 @@ impl Command for IsNull {
call: &Call, call: &Call,
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline(input, call.head)?; let value = input.into_value(call.head);
command(engine_state, stack, call, df) if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(value)?;
command(engine_state, stack, call, df)
} else {
let expr = NuExpression::try_from_value(value)?;
let expr: NuExpression = expr.into_polars().is_null().into();
Ok(PipelineData::Value(
NuExpression::into_value(expr, call.head),
None,
))
}
} }
} }
@ -76,11 +100,24 @@ fn command(
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::super::super::super::test_dataframe::test_dataframe;
use super::*; use super::*;
use crate::dataframe::lazy::aggregate::LazyAggregate;
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
use crate::dataframe::test_dataframe::{build_test_engine_state, test_dataframe_example};
#[test] #[test]
fn test_examples() { fn test_examples_dataframe() {
test_dataframe(vec![Box::new(IsNull {})]) let mut engine_state = build_test_engine_state(vec![Box::new(IsNull {})]);
test_dataframe_example(&mut engine_state, &IsNull.examples()[0]);
}
#[test]
fn test_examples_expression() {
let mut engine_state = build_test_engine_state(vec![
Box::new(IsNull {}),
Box::new(LazyAggregate {}),
Box::new(ToLazyGroupBy {}),
]);
test_dataframe_example(&mut engine_state, &IsNull.examples()[1]);
} }
} }

View file

@ -1,4 +1,4 @@
use super::super::values::{Column, NuDataFrame}; use super::super::values::{Column, NuDataFrame, NuExpression};
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
@ -19,26 +19,39 @@ impl Command for NUnique {
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build(self.name()) Signature::build(self.name())
.input_output_type( .input_output_types(vec![
Type::Custom("dataframe".into()), (
Type::Custom("dataframe".into()), Type::Custom("expression".into()),
) Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("dataframe".into())) .category(Category::Custom("dataframe".into()))
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![
description: "Counts unique values", Example {
example: "[1 1 2 2 3 3 4] | dfr into-df | dfr n-unique", description: "Counts unique values",
result: Some( example: "[1 1 2 2 3 3 4] | dfr into-df | dfr n-unique",
NuDataFrame::try_from_columns(vec![Column::new( result: Some(
"count_unique".to_string(), NuDataFrame::try_from_columns(vec![Column::new(
vec![Value::test_int(4)], "count_unique".to_string(),
)]) vec![Value::test_int(4)],
.expect("simple df for test should not fail") )])
.into_value(Span::test_data()), .expect("simple df for test should not fail")
), .into_value(Span::test_data()),
}] ),
},
Example {
description: "Creates a is n-unique expression from a column",
example: "dfr col a | dfr n-unique",
result: None,
},
]
} }
fn run( fn run(
@ -48,8 +61,19 @@ impl Command for NUnique {
call: &Call, call: &Call,
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline(input, call.head)?; let value = input.into_value(call.head);
command(engine_state, stack, call, df) if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(value)?;
command(engine_state, stack, call, df)
} else {
let expr = NuExpression::try_from_value(value)?;
let expr: NuExpression = expr.into_polars().n_unique().into();
Ok(PipelineData::Value(
NuExpression::into_value(expr, call.head),
None,
))
}
} }
} }
@ -77,11 +101,24 @@ fn command(
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::super::super::test_dataframe::test_dataframe; use super::super::super::test_dataframe::{build_test_engine_state, test_dataframe_example};
use super::*; use super::*;
use crate::dataframe::lazy::aggregate::LazyAggregate;
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
#[test] #[test]
fn test_examples() { fn test_examples_dataframe() {
test_dataframe(vec![Box::new(NUnique {})]) let mut engine_state = build_test_engine_state(vec![Box::new(NUnique {})]);
test_dataframe_example(&mut engine_state, &NUnique.examples()[0]);
}
#[test]
fn test_examples_expression() {
let mut engine_state = build_test_engine_state(vec![
Box::new(NUnique {}),
Box::new(LazyAggregate {}),
Box::new(ToLazyGroupBy {}),
]);
test_dataframe_example(&mut engine_state, &NUnique.examples()[1]);
} }
} }

View file

@ -2,7 +2,7 @@ use nu_engine::eval_block;
use nu_parser::parse; use nu_parser::parse;
use nu_protocol::{ use nu_protocol::{
engine::{Command, EngineState, Stack, StateWorkingSet}, engine::{Command, EngineState, Stack, StateWorkingSet},
PipelineData, Span, Example, PipelineData, Span,
}; };
use super::eager::ToDataFrame; use super::eager::ToDataFrame;
@ -17,6 +17,14 @@ pub fn test_dataframe(cmds: Vec<Box<dyn Command + 'static>>) {
// The first element in the cmds vector must be the one tested // The first element in the cmds vector must be the one tested
let examples = cmds[0].examples(); let examples = cmds[0].examples();
let mut engine_state = build_test_engine_state(cmds.clone());
for example in examples {
test_dataframe_example(&mut engine_state, &example);
}
}
pub fn build_test_engine_state(cmds: Vec<Box<dyn Command + 'static>>) -> Box<EngineState> {
let mut engine_state = Box::new(EngineState::new()); let mut engine_state = Box::new(EngineState::new());
let delta = { let delta = {
@ -41,54 +49,55 @@ pub fn test_dataframe(cmds: Vec<Box<dyn Command + 'static>>) {
.merge_delta(delta) .merge_delta(delta)
.expect("Error merging delta"); .expect("Error merging delta");
for example in examples { engine_state
// Skip tests that don't have results to compare to }
if example.result.is_none() {
continue; pub fn test_dataframe_example(engine_state: &mut Box<EngineState>, example: &Example) {
// Skip tests that don't have results to compare to
if example.result.is_none() {
return;
}
let start = std::time::Instant::now();
let (block, delta) = {
let mut working_set = StateWorkingSet::new(&engine_state);
let output = parse(&mut working_set, None, example.example.as_bytes(), false);
if let Some(err) = working_set.parse_errors.first() {
panic!("test parse error in `{}`: {:?}", example.example, err)
} }
let start = std::time::Instant::now();
let (block, delta) = { (output, working_set.render())
let mut working_set = StateWorkingSet::new(&engine_state); };
let output = parse(&mut working_set, None, example.example.as_bytes(), false);
if let Some(err) = working_set.parse_errors.first() { engine_state
panic!("test parse error in `{}`: {:?}", example.example, err) .merge_delta(delta)
} .expect("Error merging delta");
(output, working_set.render()) let mut stack = Stack::new();
};
engine_state let result = eval_block(
.merge_delta(delta) &engine_state,
.expect("Error merging delta"); &mut stack,
&block,
PipelineData::empty(),
true,
true,
)
.unwrap_or_else(|err| panic!("test eval error in `{}`: {:?}", example.example, err))
.into_value(Span::test_data());
let mut stack = Stack::new(); println!("input: {}", example.example);
println!("result: {result:?}");
println!("done: {:?}", start.elapsed());
let result = eval_block( // Note. Value implements PartialEq for Bool, Int, Float, String and Block
&engine_state, // If the command you are testing requires to compare another case, then
&mut stack, // you need to define its equality in the Value struct
&block, if let Some(expected) = example.result.clone() {
PipelineData::empty(), if result != expected {
true, panic!("the example result is different to expected value: {result:?} != {expected:?}")
true,
)
.unwrap_or_else(|err| panic!("test eval error in `{}`: {:?}", example.example, err))
.into_value(Span::test_data());
println!("input: {}", example.example);
println!("result: {result:?}");
println!("done: {:?}", start.elapsed());
// Note. Value implements PartialEq for Bool, Int, Float, String and Block
// If the command you are testing requires to compare another case, then
// you need to define its equality in the Value struct
if let Some(expected) = example.result {
if result != expected {
panic!(
"the example result is different to expected value: {result:?} != {expected:?}"
)
}
} }
} }
} }