From dba2f6e0f8054c85c58f777765a917755b21bad7 Mon Sep 17 00:00:00 2001
From: Jack Wright <56345+ayax79@users.noreply.github.com>
Date: Wed, 16 Oct 2024 19:24:14 -0700
Subject: [PATCH] Implemented `polars unnest` (#14104)
# Description
Provides the ability to decomes struct columns into seperate columns for
each field:
# User-Facing Changes
- provides a new command `polars unnest` for decomposing struct fields
into separate columns.
---
.../src/dataframe/command/data/mod.rs | 2 +
.../src/dataframe/command/data/unnest.rs | 145 ++++++++++++++++++
2 files changed, 147 insertions(+)
create mode 100644 crates/nu_plugin_polars/src/dataframe/command/data/unnest.rs
diff --git a/crates/nu_plugin_polars/src/dataframe/command/data/mod.rs b/crates/nu_plugin_polars/src/dataframe/command/data/mod.rs
index 058b277526..0857fff1da 100644
--- a/crates/nu_plugin_polars/src/dataframe/command/data/mod.rs
+++ b/crates/nu_plugin_polars/src/dataframe/command/data/mod.rs
@@ -32,6 +32,7 @@ mod sort_by_expr;
pub mod sql_context;
pub mod sql_expr;
mod take;
+mod unnest;
mod unpivot;
mod with_column;
use filter::LazyFilter;
@@ -109,5 +110,6 @@ pub(crate) fn data_commands() -> Vec &str {
+ "polars unnest"
+ }
+
+ fn description(&self) -> &str {
+ "Decompose struct columns into separate columns for each of their fields. The new columns will be inserted into the dataframe at the location of the struct column."
+ }
+
+ fn signature(&self) -> Signature {
+ Signature::build(self.name())
+ .rest("cols", SyntaxShape::String, "columns to unnest")
+ .category(Category::Custom("dataframe".into()))
+ }
+
+ fn examples(&self) -> Vec {
+ vec![
+ Example {
+ description: "Unnest a dataframe",
+ example: r#"[[id person]; [1 {name: "Bob", age: 36}] [2 {name: "Betty", age: 63}]]
+ | polars into-df -s {id: i64, person: {name: str, age: u8}}
+ | polars unnest person
+ | polars get id name age
+ | polars sort-by id"#,
+ result: Some(
+ NuDataFrame::from(
+ df!(
+ "id" => [1, 2],
+ "name" => ["Bob", "Betty"],
+ "age" => [36, 63]
+ )
+ .expect("Should be able to create a simple dataframe"),
+ )
+ .into_value(Span::test_data()),
+ ),
+ },
+ Example {
+ description: "Unnest a lazy dataframe",
+ example: r#"[[id person]; [1 {name: "Bob", age: 36}] [2 {name: "Betty", age: 63}]]
+ | polars into-df -s {id: i64, person: {name: str, age: u8}}
+ | polars into-lazy
+ | polars unnest person
+ | polars select (polars col id) (polars col name) (polars col age)
+ | polars collect
+ | polars sort-by id"#,
+ result: Some(
+ NuDataFrame::from(
+ df!(
+ "id" => [1, 2],
+ "name" => ["Bob", "Betty"],
+ "age" => [36, 63]
+ )
+ .expect("Should be able to create a simple dataframe"),
+ )
+ .into_value(Span::test_data()),
+ ),
+ },
+ ]
+ }
+
+ fn run(
+ &self,
+ plugin: &Self::Plugin,
+ engine: &EngineInterface,
+ call: &EvaluatedCall,
+ input: PipelineData,
+ ) -> Result {
+ match PolarsPluginObject::try_from_pipeline(plugin, input, call.head)? {
+ PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df),
+ PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy),
+ _ => Err(ShellError::GenericError {
+ error: "Must be a dataframe or lazy dataframe".into(),
+ msg: "".into(),
+ span: Some(call.head),
+ help: None,
+ inner: vec![],
+ }),
+ }
+ .map_err(LabeledError::from)
+ }
+}
+
+fn command_eager(
+ plugin: &PolarsPlugin,
+ engine: &EngineInterface,
+ call: &EvaluatedCall,
+ df: NuDataFrame,
+) -> Result {
+ let cols = call.rest::(0)?;
+ let polars = df.to_polars();
+ let result: NuDataFrame = polars
+ .unnest(cols)
+ .map_err(|e| ShellError::GenericError {
+ error: format!("Error unnesting dataframe: {e}"),
+ msg: "".into(),
+ span: Some(call.head),
+ help: None,
+ inner: vec![],
+ })?
+ .into();
+ result.to_pipeline_data(plugin, engine, call.head)
+}
+
+fn command_lazy(
+ plugin: &PolarsPlugin,
+ engine: &EngineInterface,
+ call: &EvaluatedCall,
+ df: NuLazyFrame,
+) -> Result {
+ let cols = call.rest::(0)?;
+
+ let polars = df.to_polars();
+ let result: NuLazyFrame = polars.unnest(cols).into();
+ result.to_pipeline_data(plugin, engine, call.head)
+}
+
+#[cfg(test)]
+mod test {
+ use crate::test::test_polars_plugin_command;
+
+ use super::*;
+
+ #[test]
+ fn test_examples() -> Result<(), ShellError> {
+ test_polars_plugin_command(&UnnestDF)
+ }
+}