From dba2f6e0f8054c85c58f777765a917755b21bad7 Mon Sep 17 00:00:00 2001 From: Jack Wright <56345+ayax79@users.noreply.github.com> Date: Wed, 16 Oct 2024 19:24:14 -0700 Subject: [PATCH] Implemented `polars unnest` (#14104) # Description Provides the ability to decomes struct columns into seperate columns for each field: Screenshot 2024-10-16 at 09 57 22 # User-Facing Changes - provides a new command `polars unnest` for decomposing struct fields into separate columns. --- .../src/dataframe/command/data/mod.rs | 2 + .../src/dataframe/command/data/unnest.rs | 145 ++++++++++++++++++ 2 files changed, 147 insertions(+) create mode 100644 crates/nu_plugin_polars/src/dataframe/command/data/unnest.rs diff --git a/crates/nu_plugin_polars/src/dataframe/command/data/mod.rs b/crates/nu_plugin_polars/src/dataframe/command/data/mod.rs index 058b277526..0857fff1da 100644 --- a/crates/nu_plugin_polars/src/dataframe/command/data/mod.rs +++ b/crates/nu_plugin_polars/src/dataframe/command/data/mod.rs @@ -32,6 +32,7 @@ mod sort_by_expr; pub mod sql_context; pub mod sql_expr; mod take; +mod unnest; mod unpivot; mod with_column; use filter::LazyFilter; @@ -109,5 +110,6 @@ pub(crate) fn data_commands() -> Vec &str { + "polars unnest" + } + + fn description(&self) -> &str { + "Decompose struct columns into separate columns for each of their fields. The new columns will be inserted into the dataframe at the location of the struct column." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .rest("cols", SyntaxShape::String, "columns to unnest") + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Unnest a dataframe", + example: r#"[[id person]; [1 {name: "Bob", age: 36}] [2 {name: "Betty", age: 63}]] + | polars into-df -s {id: i64, person: {name: str, age: u8}} + | polars unnest person + | polars get id name age + | polars sort-by id"#, + result: Some( + NuDataFrame::from( + df!( + "id" => [1, 2], + "name" => ["Bob", "Betty"], + "age" => [36, 63] + ) + .expect("Should be able to create a simple dataframe"), + ) + .into_value(Span::test_data()), + ), + }, + Example { + description: "Unnest a lazy dataframe", + example: r#"[[id person]; [1 {name: "Bob", age: 36}] [2 {name: "Betty", age: 63}]] + | polars into-df -s {id: i64, person: {name: str, age: u8}} + | polars into-lazy + | polars unnest person + | polars select (polars col id) (polars col name) (polars col age) + | polars collect + | polars sort-by id"#, + result: Some( + NuDataFrame::from( + df!( + "id" => [1, 2], + "name" => ["Bob", "Betty"], + "age" => [36, 63] + ) + .expect("Should be able to create a simple dataframe"), + ) + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + match PolarsPluginObject::try_from_pipeline(plugin, input, call.head)? { + PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df), + PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy), + _ => Err(ShellError::GenericError { + error: "Must be a dataframe or lazy dataframe".into(), + msg: "".into(), + span: Some(call.head), + help: None, + inner: vec![], + }), + } + .map_err(LabeledError::from) + } +} + +fn command_eager( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + df: NuDataFrame, +) -> Result { + let cols = call.rest::(0)?; + let polars = df.to_polars(); + let result: NuDataFrame = polars + .unnest(cols) + .map_err(|e| ShellError::GenericError { + error: format!("Error unnesting dataframe: {e}"), + msg: "".into(), + span: Some(call.head), + help: None, + inner: vec![], + })? + .into(); + result.to_pipeline_data(plugin, engine, call.head) +} + +fn command_lazy( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + df: NuLazyFrame, +) -> Result { + let cols = call.rest::(0)?; + + let polars = df.to_polars(); + let result: NuLazyFrame = polars.unnest(cols).into(); + result.to_pipeline_data(plugin, engine, call.head) +} + +#[cfg(test)] +mod test { + use crate::test::test_polars_plugin_command; + + use super::*; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&UnnestDF) + } +}