mirror of
https://github.com/nushell/nushell
synced 2025-01-27 20:35:43 +00:00
polars into-df
struct fix (#13977)
# Description This fixes an issue with converting to a dataframe when specifying a struct in the schema. Things like the following now work correctly: ```nushell [[foo bar]; [{a: "a_0", b:"b_0"} 1] [{a: "a_1", b: "b_1" } 2]] | polars into-df -s {foo: {a: str, b: str}, bar: u8} ```
This commit is contained in:
parent
573a7e2c7b
commit
1d6ac16530
2 changed files with 58 additions and 19 deletions
|
@ -6,6 +6,7 @@ use crate::{
|
|||
|
||||
use crate::values::NuDataFrame;
|
||||
|
||||
use log::debug;
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
|
||||
|
@ -159,7 +160,7 @@ impl PluginCommand for ToDataFrame {
|
|||
},
|
||||
Example {
|
||||
description: "Convert to a dataframe and provide a schema",
|
||||
example: "{a: 1, b: {a: [1 2 3]}, c: [a b c]}| polars into-df -s {a: u8, b: {a: list<u64>}, c: list<str>}",
|
||||
example: "[[a b c]; [1 {d: [1 2 3]} [10 11 12] ]]| polars into-df -s {a: u8, b: {d: list<u64>}, c: list<u8>}",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_series_vec(vec![
|
||||
Series::new("a", &[1u8]),
|
||||
|
@ -172,7 +173,7 @@ impl PluginCommand for ToDataFrame {
|
|||
},
|
||||
{
|
||||
let dtype = DataType::List(Box::new(DataType::String));
|
||||
let vals = vec![AnyValue::List(Series::new("c", &["a", "b", "c"]))];
|
||||
let vals = vec![AnyValue::List(Series::new("c", &[10, 11, 12]))];
|
||||
Series::from_any_values_and_dtype("c", &vals, &dtype, false)
|
||||
.expect("List series should not fail")
|
||||
}
|
||||
|
@ -208,6 +209,8 @@ impl PluginCommand for ToDataFrame {
|
|||
.map(|schema| NuSchema::try_from(&schema))
|
||||
.transpose()?;
|
||||
|
||||
debug!("schema: {:?}", maybe_schema);
|
||||
|
||||
let maybe_as_columns = call.has_flag("as-columns")?;
|
||||
|
||||
let df = if !maybe_as_columns {
|
||||
|
@ -230,14 +233,22 @@ impl PluginCommand for ToDataFrame {
|
|||
.collect::<Vec<Column>>();
|
||||
NuDataFrame::try_from_columns(columns, maybe_schema)?
|
||||
}
|
||||
Err(_) => NuDataFrame::try_from_iter(
|
||||
plugin,
|
||||
input.into_iter(),
|
||||
maybe_schema.clone(),
|
||||
)?,
|
||||
Err(e) => {
|
||||
debug!(
|
||||
"Failed to build with multiple columns, attempting as series. failure:{e}"
|
||||
);
|
||||
NuDataFrame::try_from_iter(
|
||||
plugin,
|
||||
input.into_iter(),
|
||||
maybe_schema.clone(),
|
||||
)?
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema.clone())?,
|
||||
_ => {
|
||||
debug!("Other input: {input:?}");
|
||||
NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema.clone())?
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use std::collections::HashMap;
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
use chrono::{DateTime, Duration, FixedOffset, NaiveTime, TimeZone, Utc};
|
||||
|
@ -487,25 +488,52 @@ fn typed_column_to_series(name: &str, column: TypedColumn) -> Result<Series, She
|
|||
}
|
||||
DataType::Struct(fields) => {
|
||||
let schema = Some(NuSchema::new(Schema::from_iter(fields.clone())));
|
||||
let mut structs: Vec<Series> = Vec::new();
|
||||
// let mut structs: Vec<Series> = Vec::new();
|
||||
let mut structs: HashMap<String, Series> = HashMap::new();
|
||||
|
||||
for v in column.values.iter() {
|
||||
let mut column_values: ColumnMap = IndexMap::new();
|
||||
let record = v.as_record()?;
|
||||
insert_record(&mut column_values, record.clone(), &schema)?;
|
||||
let df = from_parsed_columns(column_values)?;
|
||||
structs.push(df.as_series(Span::unknown())?);
|
||||
for name in df.df.get_column_names() {
|
||||
let series = df.df.column(name).map_err(|e| ShellError::GenericError {
|
||||
error: format!(
|
||||
"Error creating struct, could not get column name {name}: {e}"
|
||||
),
|
||||
msg: "".into(),
|
||||
span: None,
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
if let Some(v) = structs.get_mut(name) {
|
||||
let _ = v.append(series)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: format!("Error creating struct, could not append to series for col {name}: {e}"),
|
||||
msg: "".into(),
|
||||
span: None,
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
} else {
|
||||
structs.insert(name.to_string(), series.to_owned());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let chunked = StructChunked::new(column.name(), structs.as_ref()).map_err(|e| {
|
||||
ShellError::GenericError {
|
||||
error: format!("Error creating struct: {e}"),
|
||||
msg: "".into(),
|
||||
span: None,
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}
|
||||
})?;
|
||||
let structs: Vec<Series> = structs.into_values().collect();
|
||||
|
||||
let chunked =
|
||||
StructChunked::new(column.name(), structs.as_slice()).map_err(|e| {
|
||||
ShellError::GenericError {
|
||||
error: format!("Error creating struct: {e}"),
|
||||
msg: "".into(),
|
||||
span: None,
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}
|
||||
})?;
|
||||
Ok(chunked.into_series())
|
||||
}
|
||||
_ => Err(ShellError::GenericError {
|
||||
|
|
Loading…
Reference in a new issue