check column type during aggregation (#6058)

* check column type during aggregation

* check first if there is schema
This commit is contained in:
Fernando Herrera 2022-07-16 15:34:12 +01:00 committed by GitHub
parent 57a6465ba0
commit 9d0be7d96f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 89 additions and 11 deletions

View file

@ -6,6 +6,7 @@ use nu_protocol::{
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
}; };
use polars::{datatypes::DataType, prelude::Expr};
#[derive(Clone)] #[derive(Clone)]
pub struct LazyAggregate; pub struct LazyAggregate;
@ -118,12 +119,29 @@ impl Command for LazyAggregate {
let expressions = NuExpression::extract_exprs(value)?; let expressions = NuExpression::extract_exprs(value)?;
let group_by = NuLazyGroupBy::try_from_pipeline(input, call.head)?; let group_by = NuLazyGroupBy::try_from_pipeline(input, call.head)?;
let from_eager = group_by.from_eager;
let group_by = group_by.into_polars(); if let Some(schema) = &group_by.schema {
for expr in &expressions {
if let Some(name) = get_col_name(expr) {
let dtype = schema.get(name.as_str());
if matches!(dtype, Some(DataType::Object(..))) {
return Err(ShellError::GenericError(
"Object type column not supported for aggregation".into(),
format!("Column '{}' is type Object", name),
Some(call.head),
Some("Aggregations cannot be performed on Object type columns. Use dtype command to check column types".into()),
Vec::new(),
));
}
}
}
}
let lazy = NuLazyFrame { let lazy = NuLazyFrame {
lazy: group_by.agg(&expressions).into(), from_eager: group_by.from_eager,
from_eager, lazy: Some(group_by.into_polars().agg(&expressions)),
schema: None,
}; };
let res = lazy.into_value(call.head)?; let res = lazy.into_value(call.head)?;
@ -131,6 +149,57 @@ impl Command for LazyAggregate {
} }
} }
fn get_col_name(expr: &Expr) -> Option<String> {
match expr {
Expr::Column(column) => Some(column.to_string()),
Expr::Agg(agg) => match agg {
polars::prelude::AggExpr::Min(e)
| polars::prelude::AggExpr::Max(e)
| polars::prelude::AggExpr::Median(e)
| polars::prelude::AggExpr::NUnique(e)
| polars::prelude::AggExpr::First(e)
| polars::prelude::AggExpr::Last(e)
| polars::prelude::AggExpr::Mean(e)
| polars::prelude::AggExpr::List(e)
| polars::prelude::AggExpr::Count(e)
| polars::prelude::AggExpr::Sum(e)
| polars::prelude::AggExpr::AggGroups(e)
| polars::prelude::AggExpr::Std(e)
| polars::prelude::AggExpr::Var(e) => get_col_name(e.as_ref()),
polars::prelude::AggExpr::Quantile { expr, .. } => get_col_name(expr.as_ref()),
},
Expr::Reverse(expr)
| Expr::Shift { input: expr, .. }
| Expr::Filter { input: expr, .. }
| Expr::Slice { input: expr, .. }
| Expr::Cast { expr, .. }
| Expr::Sort { expr, .. }
| Expr::Take { expr, .. }
| Expr::SortBy { expr, .. }
| Expr::Exclude(expr, _)
| Expr::Alias(expr, _)
| Expr::KeepName(expr)
| Expr::Not(expr)
| Expr::IsNotNull(expr)
| Expr::IsNull(expr)
| Expr::Duplicated(expr)
| Expr::IsUnique(expr)
| Expr::Explode(expr) => get_col_name(expr.as_ref()),
Expr::Ternary { .. }
| Expr::AnonymousFunction { .. }
| Expr::Function { .. }
| Expr::Columns(_)
| Expr::DtypeColumn(_)
| Expr::Literal(_)
| Expr::BinaryExpr { .. }
| Expr::Window { .. }
| Expr::Wildcard
| Expr::RenameAlias { .. }
| Expr::Count
| Expr::Nth(_) => None,
}
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::super::super::test_dataframe::test_dataframe; use super::super::super::test_dataframe::test_dataframe;

View file

@ -128,13 +128,11 @@ impl Command for ToLazyGroupBy {
)); ));
} }
let value = input.into_value(call.head); let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
let lazy = NuLazyFrame::try_from_value(value)?;
let from_eager = lazy.from_eager;
let group_by = NuLazyGroupBy { let group_by = NuLazyGroupBy {
schema: lazy.schema.clone(),
from_eager: lazy.from_eager,
group_by: Some(lazy.into_polars().groupby(&expressions)), group_by: Some(lazy.into_polars().groupby(&expressions)),
from_eager,
}; };
Ok(PipelineData::Value(group_by.into_value(call.head), None)) Ok(PipelineData::Value(group_by.into_value(call.head), None))

View file

@ -15,6 +15,7 @@ impl CustomValue for NuLazyFrame {
let cloned = NuLazyFrame { let cloned = NuLazyFrame {
lazy: self.lazy.clone(), lazy: self.lazy.clone(),
from_eager: self.from_eager, from_eager: self.from_eager,
schema: self.schema.clone(),
}; };
Value::CustomValue { Value::CustomValue {

View file

@ -3,7 +3,7 @@ mod custom_value;
use super::{NuDataFrame, NuExpression}; use super::{NuDataFrame, NuExpression};
use core::fmt; use core::fmt;
use nu_protocol::{PipelineData, ShellError, Span, Value}; use nu_protocol::{PipelineData, ShellError, Span, Value};
use polars::prelude::{Expr, IntoLazy, LazyFrame}; use polars::prelude::{Expr, IntoLazy, LazyFrame, Schema};
use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde::{Deserialize, Deserializer, Serialize, Serializer};
// Lazyframe wrapper for Nushell operations // Lazyframe wrapper for Nushell operations
@ -12,6 +12,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
#[derive(Default)] #[derive(Default)]
pub struct NuLazyFrame { pub struct NuLazyFrame {
pub lazy: Option<LazyFrame>, pub lazy: Option<LazyFrame>,
pub schema: Option<Schema>,
pub from_eager: bool, pub from_eager: bool,
} }
@ -63,6 +64,7 @@ impl From<LazyFrame> for NuLazyFrame {
Self { Self {
lazy: Some(lazy_frame), lazy: Some(lazy_frame),
from_eager: false, from_eager: false,
schema: None,
} }
} }
} }
@ -72,6 +74,7 @@ impl NuLazyFrame {
Self { Self {
lazy: Some(lazy), lazy: Some(lazy),
from_eager, from_eager,
schema: None,
} }
} }
@ -80,6 +83,7 @@ impl NuLazyFrame {
Self { Self {
lazy: Some(lazy), lazy: Some(lazy),
from_eager: true, from_eager: true,
schema: Some(df.as_ref().schema()),
} }
} }
@ -148,6 +152,7 @@ impl NuLazyFrame {
Some(expr) => Ok(Self { Some(expr) => Ok(Self {
lazy: expr.lazy.clone(), lazy: expr.lazy.clone(),
from_eager: false, from_eager: false,
schema: None,
}), }),
None => Err(ShellError::CantConvert( None => Err(ShellError::CantConvert(
"lazy frame".into(), "lazy frame".into(),
@ -184,6 +189,7 @@ impl NuLazyFrame {
Self { Self {
from_eager: self.from_eager, from_eager: self.from_eager,
lazy: Some(new_frame), lazy: Some(new_frame),
schema: None,
} }
} }
} }

View file

@ -14,6 +14,7 @@ impl CustomValue for NuLazyGroupBy {
fn clone_value(&self, span: nu_protocol::Span) -> Value { fn clone_value(&self, span: nu_protocol::Span) -> Value {
let cloned = NuLazyGroupBy { let cloned = NuLazyGroupBy {
group_by: self.group_by.clone(), group_by: self.group_by.clone(),
schema: self.schema.clone(),
from_eager: self.from_eager, from_eager: self.from_eager,
}; };

View file

@ -2,7 +2,7 @@ mod custom_value;
use core::fmt; use core::fmt;
use nu_protocol::{PipelineData, ShellError, Span, Value}; use nu_protocol::{PipelineData, ShellError, Span, Value};
use polars::prelude::LazyGroupBy; use polars::prelude::{LazyGroupBy, Schema};
use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde::{Deserialize, Deserializer, Serialize, Serializer};
// Lazyframe wrapper for Nushell operations // Lazyframe wrapper for Nushell operations
@ -11,6 +11,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
#[derive(Default)] #[derive(Default)]
pub struct NuLazyGroupBy { pub struct NuLazyGroupBy {
pub group_by: Option<LazyGroupBy>, pub group_by: Option<LazyGroupBy>,
pub schema: Option<Schema>,
pub from_eager: bool, pub from_eager: bool,
} }
@ -66,6 +67,7 @@ impl From<LazyGroupBy> for NuLazyGroupBy {
Self { Self {
group_by: Some(group_by), group_by: Some(group_by),
from_eager: false, from_eager: false,
schema: None,
} }
} }
} }
@ -88,6 +90,7 @@ impl NuLazyGroupBy {
match val.as_any().downcast_ref::<NuLazyGroupBy>() { match val.as_any().downcast_ref::<NuLazyGroupBy>() {
Some(group) => Ok(Self { Some(group) => Ok(Self {
group_by: group.group_by.clone(), group_by: group.group_by.clone(),
schema: group.schema.clone(),
from_eager: group.from_eager, from_eager: group.from_eager,
}), }),
None => Err(ShellError::CantConvert( None => Err(ShellError::CantConvert(