mirror of
https://github.com/nushell/nushell
synced 2025-01-12 05:09:04 +00:00
Improve handling of columns with null values (#14588)
Addresses some null handling issues in #6882 # Description This changes the implementation of guessing a column type when a schema is not specified. New behavior: 1. Use the first non-Value::Nothing value type for the columns data type 2. If the value type changes (ignoring Value::Nothing) in subsequent values, the datatype will be changed to DataType::Object("Value", None) 3. If a column type does not have a value type, DataType::Object("Value", None) will be assumed.
This commit is contained in:
parent
05ee7ea9c7
commit
219b44a04f
1 changed files with 256 additions and 291 deletions
|
@ -223,57 +223,30 @@ pub fn insert_value(
|
|||
inner: vec![],
|
||||
})
|
||||
}
|
||||
}
|
||||
// Checking that the type for the value is the same
|
||||
// for the previous value in the column
|
||||
else if col_val.values.is_empty() {
|
||||
if let Some(schema) = maybe_schema {
|
||||
if let Some(field) = schema.schema.get_field(&key) {
|
||||
col_val.column_type = Some(field.dtype().clone());
|
||||
}
|
||||
}
|
||||
|
||||
if col_val.column_type.is_none() {
|
||||
col_val.column_type = Some(value_to_data_type(&value));
|
||||
}
|
||||
col_val.values.push(value);
|
||||
Ok(())
|
||||
} else {
|
||||
let prev_value = &col_val.values[col_val.values.len() - 1];
|
||||
|
||||
match (&prev_value, &value) {
|
||||
(Value::Int { .. }, Value::Int { .. })
|
||||
| (Value::Float { .. }, Value::Float { .. })
|
||||
| (Value::String { .. }, Value::String { .. })
|
||||
| (Value::Bool { .. }, Value::Bool { .. })
|
||||
| (Value::Date { .. }, Value::Date { .. })
|
||||
| (Value::Filesize { .. }, Value::Filesize { .. })
|
||||
| (Value::Binary { .. }, Value::Binary { .. })
|
||||
| (Value::Duration { .. }, Value::Duration { .. }) => col_val.values.push(value),
|
||||
(_, Value::Nothing { .. }) => col_val.values.push(value),
|
||||
(Value::List { .. }, _) => {
|
||||
col_val.column_type = Some(value_to_data_type(&value));
|
||||
col_val.values.push(value);
|
||||
}
|
||||
_ => {
|
||||
let current_data_type = value_to_data_type(&value);
|
||||
if col_val.column_type.is_none() {
|
||||
col_val.column_type = value_to_data_type(&value);
|
||||
} else if let Some(current_data_type) = current_data_type {
|
||||
if col_val.column_type.as_ref() != Some(¤t_data_type) {
|
||||
col_val.column_type = Some(DataType::Object("Value", None));
|
||||
}
|
||||
}
|
||||
col_val.values.push(value);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn value_to_data_type(value: &Value) -> DataType {
|
||||
fn value_to_data_type(value: &Value) -> Option<DataType> {
|
||||
match &value {
|
||||
Value::Int { .. } => DataType::Int64,
|
||||
Value::Float { .. } => DataType::Float64,
|
||||
Value::String { .. } => DataType::String,
|
||||
Value::Bool { .. } => DataType::Boolean,
|
||||
Value::Date { .. } => DataType::Date,
|
||||
Value::Duration { .. } => DataType::Duration(TimeUnit::Nanoseconds),
|
||||
Value::Filesize { .. } => DataType::Int64,
|
||||
Value::Binary { .. } => DataType::Binary,
|
||||
Value::Int { .. } => Some(DataType::Int64),
|
||||
Value::Float { .. } => Some(DataType::Float64),
|
||||
Value::String { .. } => Some(DataType::String),
|
||||
Value::Bool { .. } => Some(DataType::Boolean),
|
||||
Value::Date { .. } => Some(DataType::Date),
|
||||
Value::Duration { .. } => Some(DataType::Duration(TimeUnit::Nanoseconds)),
|
||||
Value::Filesize { .. } => Some(DataType::Int64),
|
||||
Value::Binary { .. } => Some(DataType::Binary),
|
||||
Value::List { vals, .. } => {
|
||||
// We need to determined the type inside of the list.
|
||||
// Since Value::List does not have any kind of
|
||||
|
@ -286,16 +259,20 @@ fn value_to_data_type(value: &Value) -> DataType {
|
|||
.filter(|v| !matches!(v, Value::Nothing { .. }))
|
||||
.map(value_to_data_type)
|
||||
.nth(1)
|
||||
.flatten()
|
||||
.unwrap_or(DataType::Object("Value", None));
|
||||
|
||||
DataType::List(Box::new(list_type))
|
||||
Some(DataType::List(Box::new(list_type)))
|
||||
}
|
||||
_ => DataType::Object("Value", None),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn typed_column_to_series(name: PlSmallStr, column: TypedColumn) -> Result<Series, ShellError> {
|
||||
if let Some(column_type) = &column.column_type {
|
||||
let column_type = &column
|
||||
.column_type
|
||||
.clone()
|
||||
.unwrap_or(DataType::Object("Value", None));
|
||||
match column_type {
|
||||
DataType::Float32 => {
|
||||
let series_values: Result<Vec<_>, _> = column
|
||||
|
@ -538,11 +515,8 @@ fn typed_column_to_series(name: PlSmallStr, column: TypedColumn) -> Result<Serie
|
|||
|
||||
let structs: Vec<Series> = structs.into_values().collect();
|
||||
|
||||
let chunked = StructChunked::from_series(
|
||||
column.name().to_owned(),
|
||||
structs.len(),
|
||||
structs.iter(),
|
||||
)
|
||||
let chunked =
|
||||
StructChunked::from_series(column.name().to_owned(), structs.len(), structs.iter())
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: format!("Error creating struct: {e}"),
|
||||
msg: "".into(),
|
||||
|
@ -560,15 +534,6 @@ fn typed_column_to_series(name: PlSmallStr, column: TypedColumn) -> Result<Serie
|
|||
inner: vec![],
|
||||
}),
|
||||
}
|
||||
} else {
|
||||
Err(ShellError::GenericError {
|
||||
error: "Passed a type column with no type".into(),
|
||||
msg: "".into(),
|
||||
span: None,
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// The ColumnMap has the parsed data from the StreamInput
|
||||
|
|
Loading…
Reference in a new issue