Nushell table list columns -> dataframe list columns. Explode / Flatten dataframe support. (#9951)

# Description
- Adds support for conversion between nushell lists and polars lists
instead of treating them as a polars object.
- Fixed explode and flatten to work both as expressions or lazy
dataframe commands. The previous item was required to make this work.

---------

Co-authored-by: Jack Wright <jack.wright@disqo.com>
Co-authored-by: Darren Schroeder <343840+fdncred@users.noreply.github.com>
This commit is contained in:
Jack Wright 2023-08-15 04:54:37 -07:00 committed by GitHub
parent 696b2cda4a
commit 8b160f9850
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 1129 additions and 569 deletions

View file

@ -344,36 +344,6 @@ expr_command!(
test_groups
);
// ExprFlatten command
// Expands to a command definition for a flatten expression
expr_command!(
ExprFlatten,
"dfr flatten",
"creates a flatten expression",
vec![Example {
description: "",
example: "",
result: None,
}],
flatten,
test_flatten
);
// ExprExplode command
// Expands to a command definition for a explode expression
expr_command!(
ExprExplode,
"dfr explode",
"creates an explode expression",
vec![Example {
description: "",
example: "",
result: None,
}],
explode,
test_explode
);
// ExprCount command
// Expands to a command definition for a count expression
expr_command!(

View file

@ -47,8 +47,6 @@ pub fn add_expressions(working_set: &mut StateWorkingSet) {
ExprQuantile,
ExprList,
ExprAggGroups,
ExprFlatten,
ExprExplode,
ExprCount,
ExprIsIn,
ExprNot,

View file

@ -0,0 +1,158 @@
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
};
#[derive(Clone)]
pub struct LazyExplode;
impl Command for LazyExplode {
fn name(&self) -> &str {
"dfr explode"
}
fn usage(&self) -> &str {
"Explodes a dataframe or creates a explode expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"columns",
SyntaxShape::String,
"columns to explode, only applicable for dataframes",
)
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Explode the specified dataframe",
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | dfr into-df | dfr explode hobbies | dfr collect",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"id".to_string(),
vec![
Value::test_int(1),
Value::test_int(1),
Value::test_int(2),
Value::test_int(2),
]),
Column::new(
"name".to_string(),
vec![
Value::test_string("Mercy"),
Value::test_string("Mercy"),
Value::test_string("Bob"),
Value::test_string("Bob"),
]),
Column::new(
"hobbies".to_string(),
vec![
Value::test_string("Cycling"),
Value::test_string("Knitting"),
Value::test_string("Skiing"),
Value::test_string("Football"),
]),
]).expect("simple df for test should not fail")
.into_value(Span::test_data()),
)
},
Example {
description: "Select a column and explode the values",
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | dfr into-df | dfr select (dfr col hobbies | dfr explode)",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"hobbies".to_string(),
vec![
Value::test_string("Cycling"),
Value::test_string("Knitting"),
Value::test_string("Skiing"),
Value::test_string("Football"),
]),
]).expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
_engine_state: &EngineState,
_stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
explode(call, input)
}
}
pub(crate) fn explode(call: &Call, input: PipelineData) -> Result<PipelineData, ShellError> {
let value = input.into_value(call.head);
if NuDataFrame::can_downcast(&value) {
let df = NuLazyFrame::try_from_value(value)?;
let columns: Vec<String> = call
.positional_iter()
.filter_map(|e| e.as_string())
.collect();
let exploded = df
.into_polars()
.explode(columns.iter().map(AsRef::as_ref).collect::<Vec<&str>>());
Ok(PipelineData::Value(
NuLazyFrame::from(exploded).into_value(call.head)?,
None,
))
} else {
let expr = NuExpression::try_from_value(value)?;
let expr: NuExpression = expr.into_polars().explode().into();
Ok(PipelineData::Value(
NuExpression::into_value(expr, call.head),
None,
))
}
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::{build_test_engine_state, test_dataframe_example};
use super::*;
use crate::dataframe::lazy::aggregate::LazyAggregate;
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
#[test]
fn test_examples_dataframe() {
let mut engine_state = build_test_engine_state(vec![Box::new(LazyExplode {})]);
test_dataframe_example(&mut engine_state, &LazyExplode.examples()[0]);
}
#[ignore]
#[test]
fn test_examples_expression() {
let mut engine_state = build_test_engine_state(vec![
Box::new(LazyExplode {}),
Box::new(LazyAggregate {}),
Box::new(ToLazyGroupBy {}),
]);
test_dataframe_example(&mut engine_state, &LazyExplode.examples()[1]);
}
}

View file

@ -0,0 +1,132 @@
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
};
use crate::dataframe::values::{Column, NuDataFrame};
use super::explode::explode;
#[derive(Clone)]
pub struct LazyFlatten;
impl Command for LazyFlatten {
fn name(&self) -> &str {
"dfr flatten"
}
fn usage(&self) -> &str {
"An alias for dfr explode"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"columns",
SyntaxShape::String,
"columns to flatten, only applicable for dataframes",
)
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Flatten the specified dataframe",
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | dfr into-df | dfr flatten hobbies | dfr collect",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"id".to_string(),
vec![
Value::test_int(1),
Value::test_int(1),
Value::test_int(2),
Value::test_int(2),
]),
Column::new(
"name".to_string(),
vec![
Value::test_string("Mercy"),
Value::test_string("Mercy"),
Value::test_string("Bob"),
Value::test_string("Bob"),
]),
Column::new(
"hobbies".to_string(),
vec![
Value::test_string("Cycling"),
Value::test_string("Knitting"),
Value::test_string("Skiing"),
Value::test_string("Football"),
]),
]).expect("simple df for test should not fail")
.into_value(Span::test_data()),
)
},
Example {
description: "Select a column and flatten the values",
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | dfr into-df | dfr select (dfr col hobbies | dfr flatten)",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"hobbies".to_string(),
vec![
Value::test_string("Cycling"),
Value::test_string("Knitting"),
Value::test_string("Skiing"),
Value::test_string("Football"),
]),
]).expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
_engine_state: &EngineState,
_stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
explode(call, input)
}
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::{build_test_engine_state, test_dataframe_example};
use super::*;
use crate::dataframe::lazy::aggregate::LazyAggregate;
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
#[test]
fn test_examples_dataframe() {
let mut engine_state = build_test_engine_state(vec![Box::new(LazyFlatten {})]);
test_dataframe_example(&mut engine_state, &LazyFlatten.examples()[0]);
}
#[ignore]
#[test]
fn test_examples_expression() {
let mut engine_state = build_test_engine_state(vec![
Box::new(LazyFlatten {}),
Box::new(LazyAggregate {}),
Box::new(ToLazyGroupBy {}),
]);
test_dataframe_example(&mut engine_state, &LazyFlatten.examples()[1]);
}
}

View file

@ -1,9 +1,11 @@
pub mod aggregate;
mod collect;
mod explode;
mod fetch;
mod fill_nan;
mod fill_null;
mod filter;
mod flatten;
pub mod groupby;
mod join;
mod macro_commands;
@ -27,6 +29,8 @@ use crate::dataframe::lazy::quantile::LazyQuantile;
pub(crate) use crate::dataframe::lazy::select::LazySelect;
use crate::dataframe::lazy::sort_by_expr::LazySortBy;
pub use crate::dataframe::lazy::to_lazy::ToLazyFrame;
pub use explode::LazyExplode;
pub use flatten::LazyFlatten;
pub fn add_lazy_decls(working_set: &mut StateWorkingSet) {
macro_rules! bind_command {
@ -54,6 +58,8 @@ pub fn add_lazy_decls(working_set: &mut StateWorkingSet) {
LazySelect,
LazySortBy,
ToLazyFrame,
ToLazyGroupBy
ToLazyGroupBy,
LazyExplode,
LazyFlatten
);
}