mirror of
https://github.com/nushell/nushell
synced 2025-01-28 04:45:18 +00:00
polars into-df
struct fix (#13977)
# Description This fixes an issue with converting to a dataframe when specifying a struct in the schema. Things like the following now work correctly: ```nushell [[foo bar]; [{a: "a_0", b:"b_0"} 1] [{a: "a_1", b: "b_1" } 2]] | polars into-df -s {foo: {a: str, b: str}, bar: u8} ```
This commit is contained in:
parent
573a7e2c7b
commit
1d6ac16530
2 changed files with 58 additions and 19 deletions
|
@ -6,6 +6,7 @@ use crate::{
|
||||||
|
|
||||||
use crate::values::NuDataFrame;
|
use crate::values::NuDataFrame;
|
||||||
|
|
||||||
|
use log::debug;
|
||||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||||
use nu_protocol::{
|
use nu_protocol::{
|
||||||
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
|
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
|
||||||
|
@ -159,7 +160,7 @@ impl PluginCommand for ToDataFrame {
|
||||||
},
|
},
|
||||||
Example {
|
Example {
|
||||||
description: "Convert to a dataframe and provide a schema",
|
description: "Convert to a dataframe and provide a schema",
|
||||||
example: "{a: 1, b: {a: [1 2 3]}, c: [a b c]}| polars into-df -s {a: u8, b: {a: list<u64>}, c: list<str>}",
|
example: "[[a b c]; [1 {d: [1 2 3]} [10 11 12] ]]| polars into-df -s {a: u8, b: {d: list<u64>}, c: list<u8>}",
|
||||||
result: Some(
|
result: Some(
|
||||||
NuDataFrame::try_from_series_vec(vec![
|
NuDataFrame::try_from_series_vec(vec![
|
||||||
Series::new("a", &[1u8]),
|
Series::new("a", &[1u8]),
|
||||||
|
@ -172,7 +173,7 @@ impl PluginCommand for ToDataFrame {
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
let dtype = DataType::List(Box::new(DataType::String));
|
let dtype = DataType::List(Box::new(DataType::String));
|
||||||
let vals = vec![AnyValue::List(Series::new("c", &["a", "b", "c"]))];
|
let vals = vec![AnyValue::List(Series::new("c", &[10, 11, 12]))];
|
||||||
Series::from_any_values_and_dtype("c", &vals, &dtype, false)
|
Series::from_any_values_and_dtype("c", &vals, &dtype, false)
|
||||||
.expect("List series should not fail")
|
.expect("List series should not fail")
|
||||||
}
|
}
|
||||||
|
@ -208,6 +209,8 @@ impl PluginCommand for ToDataFrame {
|
||||||
.map(|schema| NuSchema::try_from(&schema))
|
.map(|schema| NuSchema::try_from(&schema))
|
||||||
.transpose()?;
|
.transpose()?;
|
||||||
|
|
||||||
|
debug!("schema: {:?}", maybe_schema);
|
||||||
|
|
||||||
let maybe_as_columns = call.has_flag("as-columns")?;
|
let maybe_as_columns = call.has_flag("as-columns")?;
|
||||||
|
|
||||||
let df = if !maybe_as_columns {
|
let df = if !maybe_as_columns {
|
||||||
|
@ -230,14 +233,22 @@ impl PluginCommand for ToDataFrame {
|
||||||
.collect::<Vec<Column>>();
|
.collect::<Vec<Column>>();
|
||||||
NuDataFrame::try_from_columns(columns, maybe_schema)?
|
NuDataFrame::try_from_columns(columns, maybe_schema)?
|
||||||
}
|
}
|
||||||
Err(_) => NuDataFrame::try_from_iter(
|
Err(e) => {
|
||||||
plugin,
|
debug!(
|
||||||
input.into_iter(),
|
"Failed to build with multiple columns, attempting as series. failure:{e}"
|
||||||
maybe_schema.clone(),
|
);
|
||||||
)?,
|
NuDataFrame::try_from_iter(
|
||||||
|
plugin,
|
||||||
|
input.into_iter(),
|
||||||
|
maybe_schema.clone(),
|
||||||
|
)?
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema.clone())?,
|
_ => {
|
||||||
|
debug!("Other input: {input:?}");
|
||||||
|
NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema.clone())?
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
use std::collections::HashMap;
|
||||||
use std::ops::{Deref, DerefMut};
|
use std::ops::{Deref, DerefMut};
|
||||||
|
|
||||||
use chrono::{DateTime, Duration, FixedOffset, NaiveTime, TimeZone, Utc};
|
use chrono::{DateTime, Duration, FixedOffset, NaiveTime, TimeZone, Utc};
|
||||||
|
@ -487,25 +488,52 @@ fn typed_column_to_series(name: &str, column: TypedColumn) -> Result<Series, She
|
||||||
}
|
}
|
||||||
DataType::Struct(fields) => {
|
DataType::Struct(fields) => {
|
||||||
let schema = Some(NuSchema::new(Schema::from_iter(fields.clone())));
|
let schema = Some(NuSchema::new(Schema::from_iter(fields.clone())));
|
||||||
let mut structs: Vec<Series> = Vec::new();
|
// let mut structs: Vec<Series> = Vec::new();
|
||||||
|
let mut structs: HashMap<String, Series> = HashMap::new();
|
||||||
|
|
||||||
for v in column.values.iter() {
|
for v in column.values.iter() {
|
||||||
let mut column_values: ColumnMap = IndexMap::new();
|
let mut column_values: ColumnMap = IndexMap::new();
|
||||||
let record = v.as_record()?;
|
let record = v.as_record()?;
|
||||||
insert_record(&mut column_values, record.clone(), &schema)?;
|
insert_record(&mut column_values, record.clone(), &schema)?;
|
||||||
let df = from_parsed_columns(column_values)?;
|
let df = from_parsed_columns(column_values)?;
|
||||||
structs.push(df.as_series(Span::unknown())?);
|
for name in df.df.get_column_names() {
|
||||||
|
let series = df.df.column(name).map_err(|e| ShellError::GenericError {
|
||||||
|
error: format!(
|
||||||
|
"Error creating struct, could not get column name {name}: {e}"
|
||||||
|
),
|
||||||
|
msg: "".into(),
|
||||||
|
span: None,
|
||||||
|
help: None,
|
||||||
|
inner: vec![],
|
||||||
|
})?;
|
||||||
|
|
||||||
|
if let Some(v) = structs.get_mut(name) {
|
||||||
|
let _ = v.append(series)
|
||||||
|
.map_err(|e| ShellError::GenericError {
|
||||||
|
error: format!("Error creating struct, could not append to series for col {name}: {e}"),
|
||||||
|
msg: "".into(),
|
||||||
|
span: None,
|
||||||
|
help: None,
|
||||||
|
inner: vec![],
|
||||||
|
})?;
|
||||||
|
} else {
|
||||||
|
structs.insert(name.to_string(), series.to_owned());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let chunked = StructChunked::new(column.name(), structs.as_ref()).map_err(|e| {
|
let structs: Vec<Series> = structs.into_values().collect();
|
||||||
ShellError::GenericError {
|
|
||||||
error: format!("Error creating struct: {e}"),
|
let chunked =
|
||||||
msg: "".into(),
|
StructChunked::new(column.name(), structs.as_slice()).map_err(|e| {
|
||||||
span: None,
|
ShellError::GenericError {
|
||||||
help: None,
|
error: format!("Error creating struct: {e}"),
|
||||||
inner: vec![],
|
msg: "".into(),
|
||||||
}
|
span: None,
|
||||||
})?;
|
help: None,
|
||||||
|
inner: vec![],
|
||||||
|
}
|
||||||
|
})?;
|
||||||
Ok(chunked.into_series())
|
Ok(chunked.into_series())
|
||||||
}
|
}
|
||||||
_ => Err(ShellError::GenericError {
|
_ => Err(ShellError::GenericError {
|
||||||
|
|
Loading…
Reference in a new issue