check column type during aggregation (#6058)

* check column type during aggregation

* check first if there is schema
This commit is contained in:
Fernando Herrera 2022-07-16 15:34:12 +01:00 committed by GitHub
parent 57a6465ba0
commit 9d0be7d96f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 89 additions and 11 deletions

View file

@ -6,6 +6,7 @@ use nu_protocol::{
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
};
use polars::{datatypes::DataType, prelude::Expr};
#[derive(Clone)]
pub struct LazyAggregate;
@ -118,12 +119,29 @@ impl Command for LazyAggregate {
let expressions = NuExpression::extract_exprs(value)?;
let group_by = NuLazyGroupBy::try_from_pipeline(input, call.head)?;
let from_eager = group_by.from_eager;
let group_by = group_by.into_polars();
if let Some(schema) = &group_by.schema {
for expr in &expressions {
if let Some(name) = get_col_name(expr) {
let dtype = schema.get(name.as_str());
if matches!(dtype, Some(DataType::Object(..))) {
return Err(ShellError::GenericError(
"Object type column not supported for aggregation".into(),
format!("Column '{}' is type Object", name),
Some(call.head),
Some("Aggregations cannot be performed on Object type columns. Use dtype command to check column types".into()),
Vec::new(),
));
}
}
}
}
let lazy = NuLazyFrame {
lazy: group_by.agg(&expressions).into(),
from_eager,
from_eager: group_by.from_eager,
lazy: Some(group_by.into_polars().agg(&expressions)),
schema: None,
};
let res = lazy.into_value(call.head)?;
@ -131,6 +149,57 @@ impl Command for LazyAggregate {
}
}
fn get_col_name(expr: &Expr) -> Option<String> {
match expr {
Expr::Column(column) => Some(column.to_string()),
Expr::Agg(agg) => match agg {
polars::prelude::AggExpr::Min(e)
| polars::prelude::AggExpr::Max(e)
| polars::prelude::AggExpr::Median(e)
| polars::prelude::AggExpr::NUnique(e)
| polars::prelude::AggExpr::First(e)
| polars::prelude::AggExpr::Last(e)
| polars::prelude::AggExpr::Mean(e)
| polars::prelude::AggExpr::List(e)
| polars::prelude::AggExpr::Count(e)
| polars::prelude::AggExpr::Sum(e)
| polars::prelude::AggExpr::AggGroups(e)
| polars::prelude::AggExpr::Std(e)
| polars::prelude::AggExpr::Var(e) => get_col_name(e.as_ref()),
polars::prelude::AggExpr::Quantile { expr, .. } => get_col_name(expr.as_ref()),
},
Expr::Reverse(expr)
| Expr::Shift { input: expr, .. }
| Expr::Filter { input: expr, .. }
| Expr::Slice { input: expr, .. }
| Expr::Cast { expr, .. }
| Expr::Sort { expr, .. }
| Expr::Take { expr, .. }
| Expr::SortBy { expr, .. }
| Expr::Exclude(expr, _)
| Expr::Alias(expr, _)
| Expr::KeepName(expr)
| Expr::Not(expr)
| Expr::IsNotNull(expr)
| Expr::IsNull(expr)
| Expr::Duplicated(expr)
| Expr::IsUnique(expr)
| Expr::Explode(expr) => get_col_name(expr.as_ref()),
Expr::Ternary { .. }
| Expr::AnonymousFunction { .. }
| Expr::Function { .. }
| Expr::Columns(_)
| Expr::DtypeColumn(_)
| Expr::Literal(_)
| Expr::BinaryExpr { .. }
| Expr::Window { .. }
| Expr::Wildcard
| Expr::RenameAlias { .. }
| Expr::Count
| Expr::Nth(_) => None,
}
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;

View file

@ -128,13 +128,11 @@ impl Command for ToLazyGroupBy {
));
}
let value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value(value)?;
let from_eager = lazy.from_eager;
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
let group_by = NuLazyGroupBy {
schema: lazy.schema.clone(),
from_eager: lazy.from_eager,
group_by: Some(lazy.into_polars().groupby(&expressions)),
from_eager,
};
Ok(PipelineData::Value(group_by.into_value(call.head), None))

View file

@ -15,6 +15,7 @@ impl CustomValue for NuLazyFrame {
let cloned = NuLazyFrame {
lazy: self.lazy.clone(),
from_eager: self.from_eager,
schema: self.schema.clone(),
};
Value::CustomValue {

View file

@ -3,7 +3,7 @@ mod custom_value;
use super::{NuDataFrame, NuExpression};
use core::fmt;
use nu_protocol::{PipelineData, ShellError, Span, Value};
use polars::prelude::{Expr, IntoLazy, LazyFrame};
use polars::prelude::{Expr, IntoLazy, LazyFrame, Schema};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
// Lazyframe wrapper for Nushell operations
@ -12,6 +12,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
#[derive(Default)]
pub struct NuLazyFrame {
pub lazy: Option<LazyFrame>,
pub schema: Option<Schema>,
pub from_eager: bool,
}
@ -63,6 +64,7 @@ impl From<LazyFrame> for NuLazyFrame {
Self {
lazy: Some(lazy_frame),
from_eager: false,
schema: None,
}
}
}
@ -72,6 +74,7 @@ impl NuLazyFrame {
Self {
lazy: Some(lazy),
from_eager,
schema: None,
}
}
@ -80,6 +83,7 @@ impl NuLazyFrame {
Self {
lazy: Some(lazy),
from_eager: true,
schema: Some(df.as_ref().schema()),
}
}
@ -148,6 +152,7 @@ impl NuLazyFrame {
Some(expr) => Ok(Self {
lazy: expr.lazy.clone(),
from_eager: false,
schema: None,
}),
None => Err(ShellError::CantConvert(
"lazy frame".into(),
@ -184,6 +189,7 @@ impl NuLazyFrame {
Self {
from_eager: self.from_eager,
lazy: Some(new_frame),
schema: None,
}
}
}

View file

@ -14,6 +14,7 @@ impl CustomValue for NuLazyGroupBy {
fn clone_value(&self, span: nu_protocol::Span) -> Value {
let cloned = NuLazyGroupBy {
group_by: self.group_by.clone(),
schema: self.schema.clone(),
from_eager: self.from_eager,
};

View file

@ -2,7 +2,7 @@ mod custom_value;
use core::fmt;
use nu_protocol::{PipelineData, ShellError, Span, Value};
use polars::prelude::LazyGroupBy;
use polars::prelude::{LazyGroupBy, Schema};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
// Lazyframe wrapper for Nushell operations
@ -11,6 +11,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
#[derive(Default)]
pub struct NuLazyGroupBy {
pub group_by: Option<LazyGroupBy>,
pub schema: Option<Schema>,
pub from_eager: bool,
}
@ -66,6 +67,7 @@ impl From<LazyGroupBy> for NuLazyGroupBy {
Self {
group_by: Some(group_by),
from_eager: false,
schema: None,
}
}
}
@ -88,6 +90,7 @@ impl NuLazyGroupBy {
match val.as_any().downcast_ref::<NuLazyGroupBy>() {
Some(group) => Ok(Self {
group_by: group.group_by.clone(),
schema: group.schema.clone(),
from_eager: group.from_eager,
}),
None => Err(ShellError::CantConvert(