mirror of
https://github.com/nushell/nushell
synced 2025-01-14 14:14:13 +00:00
check column type during aggregation (#6058)
* check column type during aggregation * check first if there is schema
This commit is contained in:
parent
57a6465ba0
commit
9d0be7d96f
6 changed files with 89 additions and 11 deletions
|
@ -6,6 +6,7 @@ use nu_protocol::{
|
|||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::{datatypes::DataType, prelude::Expr};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyAggregate;
|
||||
|
@ -118,12 +119,29 @@ impl Command for LazyAggregate {
|
|||
let expressions = NuExpression::extract_exprs(value)?;
|
||||
|
||||
let group_by = NuLazyGroupBy::try_from_pipeline(input, call.head)?;
|
||||
let from_eager = group_by.from_eager;
|
||||
|
||||
let group_by = group_by.into_polars();
|
||||
if let Some(schema) = &group_by.schema {
|
||||
for expr in &expressions {
|
||||
if let Some(name) = get_col_name(expr) {
|
||||
let dtype = schema.get(name.as_str());
|
||||
|
||||
if matches!(dtype, Some(DataType::Object(..))) {
|
||||
return Err(ShellError::GenericError(
|
||||
"Object type column not supported for aggregation".into(),
|
||||
format!("Column '{}' is type Object", name),
|
||||
Some(call.head),
|
||||
Some("Aggregations cannot be performed on Object type columns. Use dtype command to check column types".into()),
|
||||
Vec::new(),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let lazy = NuLazyFrame {
|
||||
lazy: group_by.agg(&expressions).into(),
|
||||
from_eager,
|
||||
from_eager: group_by.from_eager,
|
||||
lazy: Some(group_by.into_polars().agg(&expressions)),
|
||||
schema: None,
|
||||
};
|
||||
|
||||
let res = lazy.into_value(call.head)?;
|
||||
|
@ -131,6 +149,57 @@ impl Command for LazyAggregate {
|
|||
}
|
||||
}
|
||||
|
||||
fn get_col_name(expr: &Expr) -> Option<String> {
|
||||
match expr {
|
||||
Expr::Column(column) => Some(column.to_string()),
|
||||
Expr::Agg(agg) => match agg {
|
||||
polars::prelude::AggExpr::Min(e)
|
||||
| polars::prelude::AggExpr::Max(e)
|
||||
| polars::prelude::AggExpr::Median(e)
|
||||
| polars::prelude::AggExpr::NUnique(e)
|
||||
| polars::prelude::AggExpr::First(e)
|
||||
| polars::prelude::AggExpr::Last(e)
|
||||
| polars::prelude::AggExpr::Mean(e)
|
||||
| polars::prelude::AggExpr::List(e)
|
||||
| polars::prelude::AggExpr::Count(e)
|
||||
| polars::prelude::AggExpr::Sum(e)
|
||||
| polars::prelude::AggExpr::AggGroups(e)
|
||||
| polars::prelude::AggExpr::Std(e)
|
||||
| polars::prelude::AggExpr::Var(e) => get_col_name(e.as_ref()),
|
||||
polars::prelude::AggExpr::Quantile { expr, .. } => get_col_name(expr.as_ref()),
|
||||
},
|
||||
Expr::Reverse(expr)
|
||||
| Expr::Shift { input: expr, .. }
|
||||
| Expr::Filter { input: expr, .. }
|
||||
| Expr::Slice { input: expr, .. }
|
||||
| Expr::Cast { expr, .. }
|
||||
| Expr::Sort { expr, .. }
|
||||
| Expr::Take { expr, .. }
|
||||
| Expr::SortBy { expr, .. }
|
||||
| Expr::Exclude(expr, _)
|
||||
| Expr::Alias(expr, _)
|
||||
| Expr::KeepName(expr)
|
||||
| Expr::Not(expr)
|
||||
| Expr::IsNotNull(expr)
|
||||
| Expr::IsNull(expr)
|
||||
| Expr::Duplicated(expr)
|
||||
| Expr::IsUnique(expr)
|
||||
| Expr::Explode(expr) => get_col_name(expr.as_ref()),
|
||||
Expr::Ternary { .. }
|
||||
| Expr::AnonymousFunction { .. }
|
||||
| Expr::Function { .. }
|
||||
| Expr::Columns(_)
|
||||
| Expr::DtypeColumn(_)
|
||||
| Expr::Literal(_)
|
||||
| Expr::BinaryExpr { .. }
|
||||
| Expr::Window { .. }
|
||||
| Expr::Wildcard
|
||||
| Expr::RenameAlias { .. }
|
||||
| Expr::Count
|
||||
| Expr::Nth(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
|
|
|
@ -128,13 +128,11 @@ impl Command for ToLazyGroupBy {
|
|||
));
|
||||
}
|
||||
|
||||
let value = input.into_value(call.head);
|
||||
let lazy = NuLazyFrame::try_from_value(value)?;
|
||||
let from_eager = lazy.from_eager;
|
||||
|
||||
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
|
||||
let group_by = NuLazyGroupBy {
|
||||
schema: lazy.schema.clone(),
|
||||
from_eager: lazy.from_eager,
|
||||
group_by: Some(lazy.into_polars().groupby(&expressions)),
|
||||
from_eager,
|
||||
};
|
||||
|
||||
Ok(PipelineData::Value(group_by.into_value(call.head), None))
|
||||
|
|
|
@ -15,6 +15,7 @@ impl CustomValue for NuLazyFrame {
|
|||
let cloned = NuLazyFrame {
|
||||
lazy: self.lazy.clone(),
|
||||
from_eager: self.from_eager,
|
||||
schema: self.schema.clone(),
|
||||
};
|
||||
|
||||
Value::CustomValue {
|
||||
|
|
|
@ -3,7 +3,7 @@ mod custom_value;
|
|||
use super::{NuDataFrame, NuExpression};
|
||||
use core::fmt;
|
||||
use nu_protocol::{PipelineData, ShellError, Span, Value};
|
||||
use polars::prelude::{Expr, IntoLazy, LazyFrame};
|
||||
use polars::prelude::{Expr, IntoLazy, LazyFrame, Schema};
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
|
||||
// Lazyframe wrapper for Nushell operations
|
||||
|
@ -12,6 +12,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
|||
#[derive(Default)]
|
||||
pub struct NuLazyFrame {
|
||||
pub lazy: Option<LazyFrame>,
|
||||
pub schema: Option<Schema>,
|
||||
pub from_eager: bool,
|
||||
}
|
||||
|
||||
|
@ -63,6 +64,7 @@ impl From<LazyFrame> for NuLazyFrame {
|
|||
Self {
|
||||
lazy: Some(lazy_frame),
|
||||
from_eager: false,
|
||||
schema: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -72,6 +74,7 @@ impl NuLazyFrame {
|
|||
Self {
|
||||
lazy: Some(lazy),
|
||||
from_eager,
|
||||
schema: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -80,6 +83,7 @@ impl NuLazyFrame {
|
|||
Self {
|
||||
lazy: Some(lazy),
|
||||
from_eager: true,
|
||||
schema: Some(df.as_ref().schema()),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -148,6 +152,7 @@ impl NuLazyFrame {
|
|||
Some(expr) => Ok(Self {
|
||||
lazy: expr.lazy.clone(),
|
||||
from_eager: false,
|
||||
schema: None,
|
||||
}),
|
||||
None => Err(ShellError::CantConvert(
|
||||
"lazy frame".into(),
|
||||
|
@ -184,6 +189,7 @@ impl NuLazyFrame {
|
|||
Self {
|
||||
from_eager: self.from_eager,
|
||||
lazy: Some(new_frame),
|
||||
schema: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@ impl CustomValue for NuLazyGroupBy {
|
|||
fn clone_value(&self, span: nu_protocol::Span) -> Value {
|
||||
let cloned = NuLazyGroupBy {
|
||||
group_by: self.group_by.clone(),
|
||||
schema: self.schema.clone(),
|
||||
from_eager: self.from_eager,
|
||||
};
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@ mod custom_value;
|
|||
|
||||
use core::fmt;
|
||||
use nu_protocol::{PipelineData, ShellError, Span, Value};
|
||||
use polars::prelude::LazyGroupBy;
|
||||
use polars::prelude::{LazyGroupBy, Schema};
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
|
||||
// Lazyframe wrapper for Nushell operations
|
||||
|
@ -11,6 +11,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
|||
#[derive(Default)]
|
||||
pub struct NuLazyGroupBy {
|
||||
pub group_by: Option<LazyGroupBy>,
|
||||
pub schema: Option<Schema>,
|
||||
pub from_eager: bool,
|
||||
}
|
||||
|
||||
|
@ -66,6 +67,7 @@ impl From<LazyGroupBy> for NuLazyGroupBy {
|
|||
Self {
|
||||
group_by: Some(group_by),
|
||||
from_eager: false,
|
||||
schema: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -88,6 +90,7 @@ impl NuLazyGroupBy {
|
|||
match val.as_any().downcast_ref::<NuLazyGroupBy>() {
|
||||
Some(group) => Ok(Self {
|
||||
group_by: group.group_by.clone(),
|
||||
schema: group.schema.clone(),
|
||||
from_eager: group.from_eager,
|
||||
}),
|
||||
None => Err(ShellError::CantConvert(
|
||||
|
|
Loading…
Reference in a new issue