mirror of
https://github.com/nushell/nushell
synced 2025-01-26 11:55:20 +00:00
Nushell table list columns -> dataframe list columns. Explode / Flatten dataframe support. (#9951)
# Description - Adds support for conversion between nushell lists and polars lists instead of treating them as a polars object. - Fixed explode and flatten to work both as expressions or lazy dataframe commands. The previous item was required to make this work. --------- Co-authored-by: Jack Wright <jack.wright@disqo.com> Co-authored-by: Darren Schroeder <343840+fdncred@users.noreply.github.com>
This commit is contained in:
parent
696b2cda4a
commit
8b160f9850
6 changed files with 1129 additions and 569 deletions
|
@ -344,36 +344,6 @@ expr_command!(
|
|||
test_groups
|
||||
);
|
||||
|
||||
// ExprFlatten command
|
||||
// Expands to a command definition for a flatten expression
|
||||
expr_command!(
|
||||
ExprFlatten,
|
||||
"dfr flatten",
|
||||
"creates a flatten expression",
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}],
|
||||
flatten,
|
||||
test_flatten
|
||||
);
|
||||
|
||||
// ExprExplode command
|
||||
// Expands to a command definition for a explode expression
|
||||
expr_command!(
|
||||
ExprExplode,
|
||||
"dfr explode",
|
||||
"creates an explode expression",
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}],
|
||||
explode,
|
||||
test_explode
|
||||
);
|
||||
|
||||
// ExprCount command
|
||||
// Expands to a command definition for a count expression
|
||||
expr_command!(
|
||||
|
|
|
@ -47,8 +47,6 @@ pub fn add_expressions(working_set: &mut StateWorkingSet) {
|
|||
ExprQuantile,
|
||||
ExprList,
|
||||
ExprAggGroups,
|
||||
ExprFlatten,
|
||||
ExprExplode,
|
||||
ExprCount,
|
||||
ExprIsIn,
|
||||
ExprNot,
|
||||
|
|
158
crates/nu-cmd-dataframe/src/dataframe/lazy/explode.rs
Normal file
158
crates/nu-cmd-dataframe/src/dataframe/lazy/explode.rs
Normal file
|
@ -0,0 +1,158 @@
|
|||
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
|
||||
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyExplode;
|
||||
|
||||
impl Command for LazyExplode {
|
||||
fn name(&self) -> &str {
|
||||
"dfr explode"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Explodes a dataframe or creates a explode expression."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest(
|
||||
"columns",
|
||||
SyntaxShape::String,
|
||||
"columns to explode, only applicable for dataframes",
|
||||
)
|
||||
.input_output_types(vec![
|
||||
(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
),
|
||||
(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
),
|
||||
])
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Explode the specified dataframe",
|
||||
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | dfr into-df | dfr explode hobbies | dfr collect",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"id".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(2),
|
||||
]),
|
||||
Column::new(
|
||||
"name".to_string(),
|
||||
vec![
|
||||
Value::test_string("Mercy"),
|
||||
Value::test_string("Mercy"),
|
||||
Value::test_string("Bob"),
|
||||
Value::test_string("Bob"),
|
||||
]),
|
||||
Column::new(
|
||||
"hobbies".to_string(),
|
||||
vec![
|
||||
Value::test_string("Cycling"),
|
||||
Value::test_string("Knitting"),
|
||||
Value::test_string("Skiing"),
|
||||
Value::test_string("Football"),
|
||||
]),
|
||||
]).expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
)
|
||||
},
|
||||
Example {
|
||||
description: "Select a column and explode the values",
|
||||
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | dfr into-df | dfr select (dfr col hobbies | dfr explode)",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"hobbies".to_string(),
|
||||
vec![
|
||||
Value::test_string("Cycling"),
|
||||
Value::test_string("Knitting"),
|
||||
Value::test_string("Skiing"),
|
||||
Value::test_string("Football"),
|
||||
]),
|
||||
]).expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
explode(call, input)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn explode(call: &Call, input: PipelineData) -> Result<PipelineData, ShellError> {
|
||||
let value = input.into_value(call.head);
|
||||
if NuDataFrame::can_downcast(&value) {
|
||||
let df = NuLazyFrame::try_from_value(value)?;
|
||||
let columns: Vec<String> = call
|
||||
.positional_iter()
|
||||
.filter_map(|e| e.as_string())
|
||||
.collect();
|
||||
|
||||
let exploded = df
|
||||
.into_polars()
|
||||
.explode(columns.iter().map(AsRef::as_ref).collect::<Vec<&str>>());
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuLazyFrame::from(exploded).into_value(call.head)?,
|
||||
None,
|
||||
))
|
||||
} else {
|
||||
let expr = NuExpression::try_from_value(value)?;
|
||||
let expr: NuExpression = expr.into_polars().explode().into();
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuExpression::into_value(expr, call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::{build_test_engine_state, test_dataframe_example};
|
||||
use super::*;
|
||||
use crate::dataframe::lazy::aggregate::LazyAggregate;
|
||||
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
|
||||
|
||||
#[test]
|
||||
fn test_examples_dataframe() {
|
||||
let mut engine_state = build_test_engine_state(vec![Box::new(LazyExplode {})]);
|
||||
test_dataframe_example(&mut engine_state, &LazyExplode.examples()[0]);
|
||||
}
|
||||
|
||||
#[ignore]
|
||||
#[test]
|
||||
fn test_examples_expression() {
|
||||
let mut engine_state = build_test_engine_state(vec![
|
||||
Box::new(LazyExplode {}),
|
||||
Box::new(LazyAggregate {}),
|
||||
Box::new(ToLazyGroupBy {}),
|
||||
]);
|
||||
test_dataframe_example(&mut engine_state, &LazyExplode.examples()[1]);
|
||||
}
|
||||
}
|
132
crates/nu-cmd-dataframe/src/dataframe/lazy/flatten.rs
Normal file
132
crates/nu-cmd-dataframe/src/dataframe/lazy/flatten.rs
Normal file
|
@ -0,0 +1,132 @@
|
|||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
use crate::dataframe::values::{Column, NuDataFrame};
|
||||
|
||||
use super::explode::explode;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyFlatten;
|
||||
|
||||
impl Command for LazyFlatten {
|
||||
fn name(&self) -> &str {
|
||||
"dfr flatten"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"An alias for dfr explode"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest(
|
||||
"columns",
|
||||
SyntaxShape::String,
|
||||
"columns to flatten, only applicable for dataframes",
|
||||
)
|
||||
.input_output_types(vec![
|
||||
(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
),
|
||||
(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
),
|
||||
])
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Flatten the specified dataframe",
|
||||
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | dfr into-df | dfr flatten hobbies | dfr collect",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"id".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(2),
|
||||
]),
|
||||
Column::new(
|
||||
"name".to_string(),
|
||||
vec![
|
||||
Value::test_string("Mercy"),
|
||||
Value::test_string("Mercy"),
|
||||
Value::test_string("Bob"),
|
||||
Value::test_string("Bob"),
|
||||
]),
|
||||
Column::new(
|
||||
"hobbies".to_string(),
|
||||
vec![
|
||||
Value::test_string("Cycling"),
|
||||
Value::test_string("Knitting"),
|
||||
Value::test_string("Skiing"),
|
||||
Value::test_string("Football"),
|
||||
]),
|
||||
]).expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
)
|
||||
},
|
||||
Example {
|
||||
description: "Select a column and flatten the values",
|
||||
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | dfr into-df | dfr select (dfr col hobbies | dfr flatten)",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"hobbies".to_string(),
|
||||
vec![
|
||||
Value::test_string("Cycling"),
|
||||
Value::test_string("Knitting"),
|
||||
Value::test_string("Skiing"),
|
||||
Value::test_string("Football"),
|
||||
]),
|
||||
]).expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
explode(call, input)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::{build_test_engine_state, test_dataframe_example};
|
||||
use super::*;
|
||||
use crate::dataframe::lazy::aggregate::LazyAggregate;
|
||||
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
|
||||
|
||||
#[test]
|
||||
fn test_examples_dataframe() {
|
||||
let mut engine_state = build_test_engine_state(vec![Box::new(LazyFlatten {})]);
|
||||
test_dataframe_example(&mut engine_state, &LazyFlatten.examples()[0]);
|
||||
}
|
||||
|
||||
#[ignore]
|
||||
#[test]
|
||||
fn test_examples_expression() {
|
||||
let mut engine_state = build_test_engine_state(vec![
|
||||
Box::new(LazyFlatten {}),
|
||||
Box::new(LazyAggregate {}),
|
||||
Box::new(ToLazyGroupBy {}),
|
||||
]);
|
||||
test_dataframe_example(&mut engine_state, &LazyFlatten.examples()[1]);
|
||||
}
|
||||
}
|
|
@ -1,9 +1,11 @@
|
|||
pub mod aggregate;
|
||||
mod collect;
|
||||
mod explode;
|
||||
mod fetch;
|
||||
mod fill_nan;
|
||||
mod fill_null;
|
||||
mod filter;
|
||||
mod flatten;
|
||||
pub mod groupby;
|
||||
mod join;
|
||||
mod macro_commands;
|
||||
|
@ -27,6 +29,8 @@ use crate::dataframe::lazy::quantile::LazyQuantile;
|
|||
pub(crate) use crate::dataframe::lazy::select::LazySelect;
|
||||
use crate::dataframe::lazy::sort_by_expr::LazySortBy;
|
||||
pub use crate::dataframe::lazy::to_lazy::ToLazyFrame;
|
||||
pub use explode::LazyExplode;
|
||||
pub use flatten::LazyFlatten;
|
||||
|
||||
pub fn add_lazy_decls(working_set: &mut StateWorkingSet) {
|
||||
macro_rules! bind_command {
|
||||
|
@ -54,6 +58,8 @@ pub fn add_lazy_decls(working_set: &mut StateWorkingSet) {
|
|||
LazySelect,
|
||||
LazySortBy,
|
||||
ToLazyFrame,
|
||||
ToLazyGroupBy
|
||||
ToLazyGroupBy,
|
||||
LazyExplode,
|
||||
LazyFlatten
|
||||
);
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue