From 1d6ac16530b2eca5088357b2dbac26213908dce2 Mon Sep 17 00:00:00 2001 From: Jack Wright <56345+ayax79@users.noreply.github.com> Date: Wed, 2 Oct 2024 03:59:14 -0700 Subject: [PATCH] `polars into-df` struct fix (#13977) # Description This fixes an issue with converting to a dataframe when specifying a struct in the schema. Things like the following now work correctly: ```nushell [[foo bar]; [{a: "a_0", b:"b_0"} 1] [{a: "a_1", b: "b_1" } 2]] | polars into-df -s {foo: {a: str, b: str}, bar: u8} ``` --- .../src/dataframe/command/core/to_df.rs | 27 +++++++--- .../values/nu_dataframe/conversion.rs | 50 +++++++++++++++---- 2 files changed, 58 insertions(+), 19 deletions(-) diff --git a/crates/nu_plugin_polars/src/dataframe/command/core/to_df.rs b/crates/nu_plugin_polars/src/dataframe/command/core/to_df.rs index e3a9491411..066e07d01f 100644 --- a/crates/nu_plugin_polars/src/dataframe/command/core/to_df.rs +++ b/crates/nu_plugin_polars/src/dataframe/command/core/to_df.rs @@ -6,6 +6,7 @@ use crate::{ use crate::values::NuDataFrame; +use log::debug; use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; use nu_protocol::{ Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value, @@ -159,7 +160,7 @@ impl PluginCommand for ToDataFrame { }, Example { description: "Convert to a dataframe and provide a schema", - example: "{a: 1, b: {a: [1 2 3]}, c: [a b c]}| polars into-df -s {a: u8, b: {a: list}, c: list}", + example: "[[a b c]; [1 {d: [1 2 3]} [10 11 12] ]]| polars into-df -s {a: u8, b: {d: list}, c: list}", result: Some( NuDataFrame::try_from_series_vec(vec![ Series::new("a", &[1u8]), @@ -172,7 +173,7 @@ impl PluginCommand for ToDataFrame { }, { let dtype = DataType::List(Box::new(DataType::String)); - let vals = vec![AnyValue::List(Series::new("c", &["a", "b", "c"]))]; + let vals = vec![AnyValue::List(Series::new("c", &[10, 11, 12]))]; Series::from_any_values_and_dtype("c", &vals, &dtype, false) .expect("List series should not fail") } @@ -208,6 +209,8 @@ impl PluginCommand for ToDataFrame { .map(|schema| NuSchema::try_from(&schema)) .transpose()?; + debug!("schema: {:?}", maybe_schema); + let maybe_as_columns = call.has_flag("as-columns")?; let df = if !maybe_as_columns { @@ -230,14 +233,22 @@ impl PluginCommand for ToDataFrame { .collect::>(); NuDataFrame::try_from_columns(columns, maybe_schema)? } - Err(_) => NuDataFrame::try_from_iter( - plugin, - input.into_iter(), - maybe_schema.clone(), - )?, + Err(e) => { + debug!( + "Failed to build with multiple columns, attempting as series. failure:{e}" + ); + NuDataFrame::try_from_iter( + plugin, + input.into_iter(), + maybe_schema.clone(), + )? + } } } - _ => NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema.clone())?, + _ => { + debug!("Other input: {input:?}"); + NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema.clone())? + } } }; diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs index c01b0ae834..9cf8c30b1d 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs @@ -1,3 +1,4 @@ +use std::collections::HashMap; use std::ops::{Deref, DerefMut}; use chrono::{DateTime, Duration, FixedOffset, NaiveTime, TimeZone, Utc}; @@ -487,25 +488,52 @@ fn typed_column_to_series(name: &str, column: TypedColumn) -> Result { let schema = Some(NuSchema::new(Schema::from_iter(fields.clone()))); - let mut structs: Vec = Vec::new(); + // let mut structs: Vec = Vec::new(); + let mut structs: HashMap = HashMap::new(); for v in column.values.iter() { let mut column_values: ColumnMap = IndexMap::new(); let record = v.as_record()?; insert_record(&mut column_values, record.clone(), &schema)?; let df = from_parsed_columns(column_values)?; - structs.push(df.as_series(Span::unknown())?); + for name in df.df.get_column_names() { + let series = df.df.column(name).map_err(|e| ShellError::GenericError { + error: format!( + "Error creating struct, could not get column name {name}: {e}" + ), + msg: "".into(), + span: None, + help: None, + inner: vec![], + })?; + + if let Some(v) = structs.get_mut(name) { + let _ = v.append(series) + .map_err(|e| ShellError::GenericError { + error: format!("Error creating struct, could not append to series for col {name}: {e}"), + msg: "".into(), + span: None, + help: None, + inner: vec![], + })?; + } else { + structs.insert(name.to_string(), series.to_owned()); + } + } } - let chunked = StructChunked::new(column.name(), structs.as_ref()).map_err(|e| { - ShellError::GenericError { - error: format!("Error creating struct: {e}"), - msg: "".into(), - span: None, - help: None, - inner: vec![], - } - })?; + let structs: Vec = structs.into_values().collect(); + + let chunked = + StructChunked::new(column.name(), structs.as_slice()).map_err(|e| { + ShellError::GenericError { + error: format!("Error creating struct: {e}"), + msg: "".into(), + span: None, + help: None, + inner: vec![], + } + })?; Ok(chunked.into_series()) } _ => Err(ShellError::GenericError {