polars into-df struct fix (#13977)

# Description
This fixes an issue with converting to a dataframe when specifying a
struct in the schema. Things like the following now work correctly:
```nushell
 [[foo bar]; [{a: "a_0", b:"b_0"} 1] [{a: "a_1", b: "b_1" } 2]] | polars into-df -s {foo: {a: str, b: str}, bar: u8}
```
This commit is contained in:
Jack Wright 2024-10-02 03:59:14 -07:00 committed by GitHub
parent 573a7e2c7b
commit 1d6ac16530
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 58 additions and 19 deletions

View file

@ -6,6 +6,7 @@ use crate::{
use crate::values::NuDataFrame;
use log::debug;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
@ -159,7 +160,7 @@ impl PluginCommand for ToDataFrame {
},
Example {
description: "Convert to a dataframe and provide a schema",
example: "{a: 1, b: {a: [1 2 3]}, c: [a b c]}| polars into-df -s {a: u8, b: {a: list<u64>}, c: list<str>}",
example: "[[a b c]; [1 {d: [1 2 3]} [10 11 12] ]]| polars into-df -s {a: u8, b: {d: list<u64>}, c: list<u8>}",
result: Some(
NuDataFrame::try_from_series_vec(vec![
Series::new("a", &[1u8]),
@ -172,7 +173,7 @@ impl PluginCommand for ToDataFrame {
},
{
let dtype = DataType::List(Box::new(DataType::String));
let vals = vec![AnyValue::List(Series::new("c", &["a", "b", "c"]))];
let vals = vec![AnyValue::List(Series::new("c", &[10, 11, 12]))];
Series::from_any_values_and_dtype("c", &vals, &dtype, false)
.expect("List series should not fail")
}
@ -208,6 +209,8 @@ impl PluginCommand for ToDataFrame {
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
debug!("schema: {:?}", maybe_schema);
let maybe_as_columns = call.has_flag("as-columns")?;
let df = if !maybe_as_columns {
@ -230,14 +233,22 @@ impl PluginCommand for ToDataFrame {
.collect::<Vec<Column>>();
NuDataFrame::try_from_columns(columns, maybe_schema)?
}
Err(_) => NuDataFrame::try_from_iter(
plugin,
input.into_iter(),
maybe_schema.clone(),
)?,
Err(e) => {
debug!(
"Failed to build with multiple columns, attempting as series. failure:{e}"
);
NuDataFrame::try_from_iter(
plugin,
input.into_iter(),
maybe_schema.clone(),
)?
}
}
}
_ => NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema.clone())?,
_ => {
debug!("Other input: {input:?}");
NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema.clone())?
}
}
};

View file

@ -1,3 +1,4 @@
use std::collections::HashMap;
use std::ops::{Deref, DerefMut};
use chrono::{DateTime, Duration, FixedOffset, NaiveTime, TimeZone, Utc};
@ -487,25 +488,52 @@ fn typed_column_to_series(name: &str, column: TypedColumn) -> Result<Series, She
}
DataType::Struct(fields) => {
let schema = Some(NuSchema::new(Schema::from_iter(fields.clone())));
let mut structs: Vec<Series> = Vec::new();
// let mut structs: Vec<Series> = Vec::new();
let mut structs: HashMap<String, Series> = HashMap::new();
for v in column.values.iter() {
let mut column_values: ColumnMap = IndexMap::new();
let record = v.as_record()?;
insert_record(&mut column_values, record.clone(), &schema)?;
let df = from_parsed_columns(column_values)?;
structs.push(df.as_series(Span::unknown())?);
for name in df.df.get_column_names() {
let series = df.df.column(name).map_err(|e| ShellError::GenericError {
error: format!(
"Error creating struct, could not get column name {name}: {e}"
),
msg: "".into(),
span: None,
help: None,
inner: vec![],
})?;
if let Some(v) = structs.get_mut(name) {
let _ = v.append(series)
.map_err(|e| ShellError::GenericError {
error: format!("Error creating struct, could not append to series for col {name}: {e}"),
msg: "".into(),
span: None,
help: None,
inner: vec![],
})?;
} else {
structs.insert(name.to_string(), series.to_owned());
}
}
}
let chunked = StructChunked::new(column.name(), structs.as_ref()).map_err(|e| {
ShellError::GenericError {
error: format!("Error creating struct: {e}"),
msg: "".into(),
span: None,
help: None,
inner: vec![],
}
})?;
let structs: Vec<Series> = structs.into_values().collect();
let chunked =
StructChunked::new(column.name(), structs.as_slice()).map_err(|e| {
ShellError::GenericError {
error: format!("Error creating struct: {e}"),
msg: "".into(),
span: None,
help: None,
inner: vec![],
}
})?;
Ok(chunked.into_series())
}
_ => Err(ShellError::GenericError {