mirror of
https://github.com/nushell/nushell
synced 2025-01-28 12:55:40 +00:00
check column type during aggregation (#6058)
* check column type during aggregation * check first if there is schema
This commit is contained in:
parent
57a6465ba0
commit
9d0be7d96f
6 changed files with 89 additions and 11 deletions
|
@ -6,6 +6,7 @@ use nu_protocol::{
|
||||||
engine::{Command, EngineState, Stack},
|
engine::{Command, EngineState, Stack},
|
||||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||||
};
|
};
|
||||||
|
use polars::{datatypes::DataType, prelude::Expr};
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct LazyAggregate;
|
pub struct LazyAggregate;
|
||||||
|
@ -118,12 +119,29 @@ impl Command for LazyAggregate {
|
||||||
let expressions = NuExpression::extract_exprs(value)?;
|
let expressions = NuExpression::extract_exprs(value)?;
|
||||||
|
|
||||||
let group_by = NuLazyGroupBy::try_from_pipeline(input, call.head)?;
|
let group_by = NuLazyGroupBy::try_from_pipeline(input, call.head)?;
|
||||||
let from_eager = group_by.from_eager;
|
|
||||||
|
|
||||||
let group_by = group_by.into_polars();
|
if let Some(schema) = &group_by.schema {
|
||||||
|
for expr in &expressions {
|
||||||
|
if let Some(name) = get_col_name(expr) {
|
||||||
|
let dtype = schema.get(name.as_str());
|
||||||
|
|
||||||
|
if matches!(dtype, Some(DataType::Object(..))) {
|
||||||
|
return Err(ShellError::GenericError(
|
||||||
|
"Object type column not supported for aggregation".into(),
|
||||||
|
format!("Column '{}' is type Object", name),
|
||||||
|
Some(call.head),
|
||||||
|
Some("Aggregations cannot be performed on Object type columns. Use dtype command to check column types".into()),
|
||||||
|
Vec::new(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let lazy = NuLazyFrame {
|
let lazy = NuLazyFrame {
|
||||||
lazy: group_by.agg(&expressions).into(),
|
from_eager: group_by.from_eager,
|
||||||
from_eager,
|
lazy: Some(group_by.into_polars().agg(&expressions)),
|
||||||
|
schema: None,
|
||||||
};
|
};
|
||||||
|
|
||||||
let res = lazy.into_value(call.head)?;
|
let res = lazy.into_value(call.head)?;
|
||||||
|
@ -131,6 +149,57 @@ impl Command for LazyAggregate {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn get_col_name(expr: &Expr) -> Option<String> {
|
||||||
|
match expr {
|
||||||
|
Expr::Column(column) => Some(column.to_string()),
|
||||||
|
Expr::Agg(agg) => match agg {
|
||||||
|
polars::prelude::AggExpr::Min(e)
|
||||||
|
| polars::prelude::AggExpr::Max(e)
|
||||||
|
| polars::prelude::AggExpr::Median(e)
|
||||||
|
| polars::prelude::AggExpr::NUnique(e)
|
||||||
|
| polars::prelude::AggExpr::First(e)
|
||||||
|
| polars::prelude::AggExpr::Last(e)
|
||||||
|
| polars::prelude::AggExpr::Mean(e)
|
||||||
|
| polars::prelude::AggExpr::List(e)
|
||||||
|
| polars::prelude::AggExpr::Count(e)
|
||||||
|
| polars::prelude::AggExpr::Sum(e)
|
||||||
|
| polars::prelude::AggExpr::AggGroups(e)
|
||||||
|
| polars::prelude::AggExpr::Std(e)
|
||||||
|
| polars::prelude::AggExpr::Var(e) => get_col_name(e.as_ref()),
|
||||||
|
polars::prelude::AggExpr::Quantile { expr, .. } => get_col_name(expr.as_ref()),
|
||||||
|
},
|
||||||
|
Expr::Reverse(expr)
|
||||||
|
| Expr::Shift { input: expr, .. }
|
||||||
|
| Expr::Filter { input: expr, .. }
|
||||||
|
| Expr::Slice { input: expr, .. }
|
||||||
|
| Expr::Cast { expr, .. }
|
||||||
|
| Expr::Sort { expr, .. }
|
||||||
|
| Expr::Take { expr, .. }
|
||||||
|
| Expr::SortBy { expr, .. }
|
||||||
|
| Expr::Exclude(expr, _)
|
||||||
|
| Expr::Alias(expr, _)
|
||||||
|
| Expr::KeepName(expr)
|
||||||
|
| Expr::Not(expr)
|
||||||
|
| Expr::IsNotNull(expr)
|
||||||
|
| Expr::IsNull(expr)
|
||||||
|
| Expr::Duplicated(expr)
|
||||||
|
| Expr::IsUnique(expr)
|
||||||
|
| Expr::Explode(expr) => get_col_name(expr.as_ref()),
|
||||||
|
Expr::Ternary { .. }
|
||||||
|
| Expr::AnonymousFunction { .. }
|
||||||
|
| Expr::Function { .. }
|
||||||
|
| Expr::Columns(_)
|
||||||
|
| Expr::DtypeColumn(_)
|
||||||
|
| Expr::Literal(_)
|
||||||
|
| Expr::BinaryExpr { .. }
|
||||||
|
| Expr::Window { .. }
|
||||||
|
| Expr::Wildcard
|
||||||
|
| Expr::RenameAlias { .. }
|
||||||
|
| Expr::Count
|
||||||
|
| Expr::Nth(_) => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::super::super::test_dataframe::test_dataframe;
|
use super::super::super::test_dataframe::test_dataframe;
|
||||||
|
|
|
@ -128,13 +128,11 @@ impl Command for ToLazyGroupBy {
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
let value = input.into_value(call.head);
|
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
|
||||||
let lazy = NuLazyFrame::try_from_value(value)?;
|
|
||||||
let from_eager = lazy.from_eager;
|
|
||||||
|
|
||||||
let group_by = NuLazyGroupBy {
|
let group_by = NuLazyGroupBy {
|
||||||
|
schema: lazy.schema.clone(),
|
||||||
|
from_eager: lazy.from_eager,
|
||||||
group_by: Some(lazy.into_polars().groupby(&expressions)),
|
group_by: Some(lazy.into_polars().groupby(&expressions)),
|
||||||
from_eager,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(PipelineData::Value(group_by.into_value(call.head), None))
|
Ok(PipelineData::Value(group_by.into_value(call.head), None))
|
||||||
|
|
|
@ -15,6 +15,7 @@ impl CustomValue for NuLazyFrame {
|
||||||
let cloned = NuLazyFrame {
|
let cloned = NuLazyFrame {
|
||||||
lazy: self.lazy.clone(),
|
lazy: self.lazy.clone(),
|
||||||
from_eager: self.from_eager,
|
from_eager: self.from_eager,
|
||||||
|
schema: self.schema.clone(),
|
||||||
};
|
};
|
||||||
|
|
||||||
Value::CustomValue {
|
Value::CustomValue {
|
||||||
|
|
|
@ -3,7 +3,7 @@ mod custom_value;
|
||||||
use super::{NuDataFrame, NuExpression};
|
use super::{NuDataFrame, NuExpression};
|
||||||
use core::fmt;
|
use core::fmt;
|
||||||
use nu_protocol::{PipelineData, ShellError, Span, Value};
|
use nu_protocol::{PipelineData, ShellError, Span, Value};
|
||||||
use polars::prelude::{Expr, IntoLazy, LazyFrame};
|
use polars::prelude::{Expr, IntoLazy, LazyFrame, Schema};
|
||||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||||
|
|
||||||
// Lazyframe wrapper for Nushell operations
|
// Lazyframe wrapper for Nushell operations
|
||||||
|
@ -12,6 +12,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct NuLazyFrame {
|
pub struct NuLazyFrame {
|
||||||
pub lazy: Option<LazyFrame>,
|
pub lazy: Option<LazyFrame>,
|
||||||
|
pub schema: Option<Schema>,
|
||||||
pub from_eager: bool,
|
pub from_eager: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,6 +64,7 @@ impl From<LazyFrame> for NuLazyFrame {
|
||||||
Self {
|
Self {
|
||||||
lazy: Some(lazy_frame),
|
lazy: Some(lazy_frame),
|
||||||
from_eager: false,
|
from_eager: false,
|
||||||
|
schema: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -72,6 +74,7 @@ impl NuLazyFrame {
|
||||||
Self {
|
Self {
|
||||||
lazy: Some(lazy),
|
lazy: Some(lazy),
|
||||||
from_eager,
|
from_eager,
|
||||||
|
schema: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -80,6 +83,7 @@ impl NuLazyFrame {
|
||||||
Self {
|
Self {
|
||||||
lazy: Some(lazy),
|
lazy: Some(lazy),
|
||||||
from_eager: true,
|
from_eager: true,
|
||||||
|
schema: Some(df.as_ref().schema()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -148,6 +152,7 @@ impl NuLazyFrame {
|
||||||
Some(expr) => Ok(Self {
|
Some(expr) => Ok(Self {
|
||||||
lazy: expr.lazy.clone(),
|
lazy: expr.lazy.clone(),
|
||||||
from_eager: false,
|
from_eager: false,
|
||||||
|
schema: None,
|
||||||
}),
|
}),
|
||||||
None => Err(ShellError::CantConvert(
|
None => Err(ShellError::CantConvert(
|
||||||
"lazy frame".into(),
|
"lazy frame".into(),
|
||||||
|
@ -184,6 +189,7 @@ impl NuLazyFrame {
|
||||||
Self {
|
Self {
|
||||||
from_eager: self.from_eager,
|
from_eager: self.from_eager,
|
||||||
lazy: Some(new_frame),
|
lazy: Some(new_frame),
|
||||||
|
schema: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,6 +14,7 @@ impl CustomValue for NuLazyGroupBy {
|
||||||
fn clone_value(&self, span: nu_protocol::Span) -> Value {
|
fn clone_value(&self, span: nu_protocol::Span) -> Value {
|
||||||
let cloned = NuLazyGroupBy {
|
let cloned = NuLazyGroupBy {
|
||||||
group_by: self.group_by.clone(),
|
group_by: self.group_by.clone(),
|
||||||
|
schema: self.schema.clone(),
|
||||||
from_eager: self.from_eager,
|
from_eager: self.from_eager,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@ mod custom_value;
|
||||||
|
|
||||||
use core::fmt;
|
use core::fmt;
|
||||||
use nu_protocol::{PipelineData, ShellError, Span, Value};
|
use nu_protocol::{PipelineData, ShellError, Span, Value};
|
||||||
use polars::prelude::LazyGroupBy;
|
use polars::prelude::{LazyGroupBy, Schema};
|
||||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||||
|
|
||||||
// Lazyframe wrapper for Nushell operations
|
// Lazyframe wrapper for Nushell operations
|
||||||
|
@ -11,6 +11,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct NuLazyGroupBy {
|
pub struct NuLazyGroupBy {
|
||||||
pub group_by: Option<LazyGroupBy>,
|
pub group_by: Option<LazyGroupBy>,
|
||||||
|
pub schema: Option<Schema>,
|
||||||
pub from_eager: bool,
|
pub from_eager: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -66,6 +67,7 @@ impl From<LazyGroupBy> for NuLazyGroupBy {
|
||||||
Self {
|
Self {
|
||||||
group_by: Some(group_by),
|
group_by: Some(group_by),
|
||||||
from_eager: false,
|
from_eager: false,
|
||||||
|
schema: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -88,6 +90,7 @@ impl NuLazyGroupBy {
|
||||||
match val.as_any().downcast_ref::<NuLazyGroupBy>() {
|
match val.as_any().downcast_ref::<NuLazyGroupBy>() {
|
||||||
Some(group) => Ok(Self {
|
Some(group) => Ok(Self {
|
||||||
group_by: group.group_by.clone(),
|
group_by: group.group_by.clone(),
|
||||||
|
schema: group.schema.clone(),
|
||||||
from_eager: group.from_eager,
|
from_eager: group.from_eager,
|
||||||
}),
|
}),
|
||||||
None => Err(ShellError::CantConvert(
|
None => Err(ShellError::CantConvert(
|
||||||
|
|
Loading…
Reference in a new issue