mirror of
https://github.com/nushell/nushell
synced 2024-12-26 13:03:07 +00:00
Upgrading to polars 0.35 (#11241)
Co-authored-by: Jack Wright <jack.wright@disqo.com>
This commit is contained in:
parent
05d7d6d6ad
commit
31146a7591
14 changed files with 532 additions and 352 deletions
756
Cargo.lock
generated
756
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -19,12 +19,16 @@ nu-protocol = { path = "../nu-protocol", version = "0.87.2" }
|
|||
|
||||
# Potential dependencies for extras
|
||||
chrono = { version = "0.4", features = ["std", "unstable-locales"], default-features = false }
|
||||
fancy-regex = "0.11"
|
||||
chrono-tz = "0.8"
|
||||
fancy-regex = "0.12"
|
||||
indexmap = { version = "2.1" }
|
||||
num = { version = "0.4", optional = true }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
sqlparser = { version = "0.36.1", optional = true }
|
||||
polars-io = { version = "0.33", features = ["avro"], optional = true }
|
||||
sqlparser = { version = "0.39", optional = true }
|
||||
polars-io = { version = "0.35", features = ["avro"], optional = true }
|
||||
polars-arrow = "0.35"
|
||||
polars-ops = "0.35"
|
||||
polars-plan = "0.35"
|
||||
|
||||
[dependencies.polars]
|
||||
features = [
|
||||
|
@ -58,7 +62,7 @@ features = [
|
|||
"to_dummies",
|
||||
]
|
||||
optional = true
|
||||
version = "0.33"
|
||||
version = "0.35"
|
||||
|
||||
[features]
|
||||
dataframe = ["num", "polars", "polars-io", "sqlparser"]
|
||||
|
|
|
@ -150,6 +150,7 @@ fn from_parquet(
|
|||
low_memory: false,
|
||||
cloud_options: None,
|
||||
use_statistics: false,
|
||||
hive_partitioning: false,
|
||||
};
|
||||
|
||||
let df: NuLazyFrame = LazyFrame::scan_parquet(file, args)
|
||||
|
@ -411,7 +412,7 @@ fn from_csv(
|
|||
Some(d) => d as u8,
|
||||
None => unreachable!(),
|
||||
};
|
||||
csv_reader.with_delimiter(delimiter)
|
||||
csv_reader.with_separator(delimiter)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -472,7 +473,7 @@ fn from_csv(
|
|||
Some(d) => d as u8,
|
||||
None => unreachable!(),
|
||||
};
|
||||
csv_reader.with_delimiter(delimiter)
|
||||
csv_reader.with_separator(delimiter)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
|
@ -4,6 +4,8 @@ use nu_protocol::{
|
|||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape, Type,
|
||||
};
|
||||
use polars::prelude::NamedFrom;
|
||||
use polars::series::Series;
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
|
@ -81,7 +83,7 @@ fn command(
|
|||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let rows: Option<Spanned<usize>> = call.get_flag(engine_state, stack, "n-rows")?;
|
||||
let rows: Option<Spanned<i64>> = call.get_flag(engine_state, stack, "n-rows")?;
|
||||
let fraction: Option<Spanned<f64>> = call.get_flag(engine_state, stack, "fraction")?;
|
||||
let seed: Option<u64> = call
|
||||
.get_flag::<i64>(engine_state, stack, "seed")?
|
||||
|
@ -94,7 +96,7 @@ fn command(
|
|||
match (rows, fraction) {
|
||||
(Some(rows), None) => df
|
||||
.as_ref()
|
||||
.sample_n(rows.item, replace, shuffle, seed)
|
||||
.sample_n(&Series::new("s", &[rows.item]), replace, shuffle, seed)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error creating sample".into(),
|
||||
|
@ -106,7 +108,7 @@ fn command(
|
|||
}),
|
||||
(None, Some(frac)) => df
|
||||
.as_ref()
|
||||
.sample_frac(frac.item, replace, shuffle, seed)
|
||||
.sample_frac(&Series::new("frac", &[frac.item]), replace, shuffle, seed)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error creating sample".into(),
|
||||
|
|
|
@ -2,7 +2,8 @@ use crate::dataframe::eager::sql_expr::parse_sql_expr;
|
|||
use polars::error::{ErrString, PolarsError};
|
||||
use polars::prelude::{col, DataFrame, DataType, IntoLazy, LazyFrame};
|
||||
use sqlparser::ast::{
|
||||
Expr as SqlExpr, Select, SelectItem, SetExpr, Statement, TableFactor, Value as SQLValue,
|
||||
Expr as SqlExpr, GroupByExpr, Select, SelectItem, SetExpr, Statement, TableFactor,
|
||||
Value as SQLValue,
|
||||
};
|
||||
use sqlparser::dialect::GenericDialect;
|
||||
use sqlparser::parser::Parser;
|
||||
|
@ -96,8 +97,13 @@ impl SQLContext {
|
|||
.collect::<Result<Vec<_>, PolarsError>>()?;
|
||||
// Check for group by
|
||||
// After projection since there might be number.
|
||||
let group_by = select_stmt
|
||||
.group_by
|
||||
let group_by = match &select_stmt.group_by {
|
||||
GroupByExpr::All =>
|
||||
Err(
|
||||
PolarsError::ComputeError("Group-By Error: Only positive number or expression are supported, not all".into())
|
||||
)?,
|
||||
GroupByExpr::Expressions(expressions) => expressions
|
||||
}
|
||||
.iter()
|
||||
.map(
|
||||
|e|match e {
|
||||
|
|
|
@ -2,8 +2,8 @@ use polars::error::PolarsError;
|
|||
use polars::prelude::{col, lit, DataType, Expr, LiteralValue, PolarsResult as Result, TimeUnit};
|
||||
|
||||
use sqlparser::ast::{
|
||||
BinaryOperator as SQLBinaryOperator, DataType as SQLDataType, Expr as SqlExpr,
|
||||
Function as SQLFunction, Value as SqlValue, WindowType,
|
||||
ArrayElemTypeDef, BinaryOperator as SQLBinaryOperator, DataType as SQLDataType,
|
||||
Expr as SqlExpr, Function as SQLFunction, Value as SqlValue, WindowType,
|
||||
};
|
||||
|
||||
fn map_sql_polars_datatype(data_type: &SQLDataType) -> Result<DataType> {
|
||||
|
@ -13,7 +13,7 @@ fn map_sql_polars_datatype(data_type: &SQLDataType) -> Result<DataType> {
|
|||
| SQLDataType::Uuid
|
||||
| SQLDataType::Clob(_)
|
||||
| SQLDataType::Text
|
||||
| SQLDataType::String => DataType::Utf8,
|
||||
| SQLDataType::String(_) => DataType::Utf8,
|
||||
SQLDataType::Float(_) => DataType::Float32,
|
||||
SQLDataType::Real => DataType::Float32,
|
||||
SQLDataType::Double => DataType::Float64,
|
||||
|
@ -31,9 +31,12 @@ fn map_sql_polars_datatype(data_type: &SQLDataType) -> Result<DataType> {
|
|||
SQLDataType::Time(_, _) => DataType::Time,
|
||||
SQLDataType::Timestamp(_, _) => DataType::Datetime(TimeUnit::Microseconds, None),
|
||||
SQLDataType::Interval => DataType::Duration(TimeUnit::Microseconds),
|
||||
SQLDataType::Array(inner_type) => match inner_type {
|
||||
Some(inner_type) => DataType::List(Box::new(map_sql_polars_datatype(inner_type)?)),
|
||||
None => {
|
||||
SQLDataType::Array(array_type_def) => match array_type_def {
|
||||
ArrayElemTypeDef::AngleBracket(inner_type)
|
||||
| ArrayElemTypeDef::SquareBracket(inner_type) => {
|
||||
DataType::List(Box::new(map_sql_polars_datatype(inner_type)?))
|
||||
}
|
||||
_ => {
|
||||
return Err(PolarsError::ComputeError(
|
||||
"SQL Datatype Array(None) was not supported in polars-sql yet!".into(),
|
||||
))
|
||||
|
@ -114,7 +117,11 @@ pub fn parse_sql_expr(expr: &SqlExpr) -> Result<Expr> {
|
|||
binary_op_(left, right, op)?
|
||||
}
|
||||
SqlExpr::Function(sql_function) => parse_sql_function(sql_function)?,
|
||||
SqlExpr::Cast { expr, data_type } => cast_(parse_sql_expr(expr)?, data_type)?,
|
||||
SqlExpr::Cast {
|
||||
expr,
|
||||
data_type,
|
||||
format: _,
|
||||
} => cast_(parse_sql_expr(expr)?, data_type)?,
|
||||
SqlExpr::Nested(expr) => parse_sql_expr(expr)?,
|
||||
SqlExpr::Value(value) => literal_expr(value)?,
|
||||
_ => {
|
||||
|
|
|
@ -87,9 +87,9 @@ fn command(
|
|||
let writer = CsvWriter::new(&mut file);
|
||||
|
||||
let writer = if no_header {
|
||||
writer.has_header(false)
|
||||
writer.include_header(false)
|
||||
} else {
|
||||
writer.has_header(true)
|
||||
writer.include_header(true)
|
||||
};
|
||||
|
||||
let mut writer = match delimiter {
|
||||
|
@ -109,7 +109,7 @@ fn command(
|
|||
None => unreachable!(),
|
||||
};
|
||||
|
||||
writer.with_delimiter(delimiter)
|
||||
writer.with_separator(delimiter)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
|
@ -171,7 +171,7 @@ fn get_col_name(expr: &Expr) -> Option<String> {
|
|||
| Expr::Slice { input: expr, .. }
|
||||
| Expr::Cast { expr, .. }
|
||||
| Expr::Sort { expr, .. }
|
||||
| Expr::Take { expr, .. }
|
||||
| Expr::Gather { expr, .. }
|
||||
| Expr::SortBy { expr, .. }
|
||||
| Expr::Exclude(expr, _)
|
||||
| Expr::Alias(expr, _)
|
||||
|
@ -189,6 +189,7 @@ fn get_col_name(expr: &Expr) -> Option<String> {
|
|||
| Expr::RenameAlias { .. }
|
||||
| Expr::Count
|
||||
| Expr::Nth(_)
|
||||
| Expr::SubPlan(_, _)
|
||||
| Expr::Selector(_) => None,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ use nu_protocol::{
|
|||
Value,
|
||||
};
|
||||
use polars::prelude::{DataType, IntoSeries};
|
||||
use polars_ops::prelude::{cum_max, cum_min, cum_sum};
|
||||
|
||||
enum CumType {
|
||||
Min,
|
||||
|
@ -119,10 +120,19 @@ fn command(
|
|||
|
||||
let cum_type = CumType::from_str(&cum_type.item, cum_type.span)?;
|
||||
let mut res = match cum_type {
|
||||
CumType::Max => series.cummax(reverse),
|
||||
CumType::Min => series.cummin(reverse),
|
||||
CumType::Sum => series.cumsum(reverse),
|
||||
};
|
||||
CumType::Max => cum_max(&series, reverse),
|
||||
CumType::Min => cum_min(&series, reverse),
|
||||
CumType::Sum => cum_sum(&series, reverse),
|
||||
}
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error creating cumulative".into(),
|
||||
e.to_string(),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let name = format!("{}_{}", series.name(), cum_type.to_str());
|
||||
res.rename(&name);
|
||||
|
|
|
@ -9,6 +9,8 @@ use nu_protocol::{
|
|||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
use polars_plan::prelude::lit;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Shift;
|
||||
|
||||
|
@ -98,7 +100,7 @@ fn command_lazy(
|
|||
let lazy: NuLazyFrame = match fill {
|
||||
Some(fill) => {
|
||||
let expr = NuExpression::try_from_value(fill)?.into_polars();
|
||||
lazy.shift_and_fill(shift, expr).into()
|
||||
lazy.shift_and_fill(lit(shift), expr).into()
|
||||
}
|
||||
None => lazy.shift(shift).into(),
|
||||
};
|
||||
|
|
|
@ -73,7 +73,7 @@ fn command(
|
|||
)
|
||||
})?;
|
||||
|
||||
let res = chunked.as_ref().str_lengths().into_series();
|
||||
let res = chunked.as_ref().str_len_bytes().into_series();
|
||||
|
||||
NuDataFrame::try_from_series(vec![res], call.head)
|
||||
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
|
||||
|
|
|
@ -85,15 +85,7 @@ fn command(
|
|||
)
|
||||
})?;
|
||||
|
||||
let mut res = chunked.str_slice(start, length).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error slicing series".into(),
|
||||
e.to_string(),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
let mut res = chunked.str_slice(start, length);
|
||||
res.rename(series.name());
|
||||
|
||||
NuDataFrame::try_from_series(vec![res.into_series()], call.head)
|
||||
|
|
|
@ -9,6 +9,7 @@ pub use operations::Axis;
|
|||
use indexmap::map::IndexMap;
|
||||
use nu_protocol::{did_you_mean, PipelineData, Record, ShellError, Span, Value};
|
||||
use polars::prelude::{DataFrame, DataType, IntoLazy, LazyFrame, PolarsObject, Series};
|
||||
use polars_arrow::util::total_ord::TotalEq;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{cmp::Ordering, fmt::Display, hash::Hasher};
|
||||
|
||||
|
@ -61,6 +62,12 @@ impl std::hash::Hash for DataFrameValue {
|
|||
}
|
||||
}
|
||||
|
||||
impl TotalEq for DataFrameValue {
|
||||
fn tot_eq(&self, other: &Self) -> bool {
|
||||
self == other
|
||||
}
|
||||
}
|
||||
|
||||
impl PolarsObject for DataFrameValue {
|
||||
fn type_name() -> &'static str {
|
||||
"object"
|
||||
|
|
|
@ -299,7 +299,11 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Result<Value, ShellError> {
|
|||
},
|
||||
span,
|
||||
)),
|
||||
Expr::Take { expr, idx } => Ok(Value::record(
|
||||
Expr::Gather {
|
||||
expr,
|
||||
idx,
|
||||
returns_scalar: _,
|
||||
} => Ok(Value::record(
|
||||
record! {
|
||||
"expr" => expr_to_value(expr.as_ref(), span)?,
|
||||
"idx" => expr_to_value(idx.as_ref(), span)?,
|
||||
|
@ -401,7 +405,6 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Result<Value, ShellError> {
|
|||
Expr::Window {
|
||||
function,
|
||||
partition_by,
|
||||
order_by,
|
||||
options,
|
||||
} => {
|
||||
let partition_by: Result<Vec<Value>, ShellError> = partition_by
|
||||
|
@ -409,22 +412,21 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Result<Value, ShellError> {
|
|||
.map(|e| expr_to_value(e, span))
|
||||
.collect();
|
||||
|
||||
let order_by = order_by
|
||||
.as_ref()
|
||||
.map(|e| expr_to_value(e.as_ref(), span))
|
||||
.transpose()?
|
||||
.unwrap_or_else(|| Value::nothing(span));
|
||||
|
||||
Ok(Value::record(
|
||||
record! {
|
||||
"function" => expr_to_value(function, span)?,
|
||||
"partition_by" => Value::list(partition_by?, span),
|
||||
"order_by" => order_by,
|
||||
"options" => Value::string(format!("{options:?}"), span),
|
||||
},
|
||||
span,
|
||||
))
|
||||
}
|
||||
Expr::SubPlan(_, _) => Err(ShellError::UnsupportedInput {
|
||||
msg: "Expressions of type SubPlan are not yet supported".to_string(),
|
||||
input: format!("Expression is {expr:?}"),
|
||||
msg_span: span,
|
||||
input_span: Span::unknown(),
|
||||
}),
|
||||
// the parameter polars_plan::dsl::selector::Selector is not publicly exposed.
|
||||
// I am not sure what we can meaningfully do with this at this time.
|
||||
Expr::Selector(_) => Err(ShellError::UnsupportedInput {
|
||||
|
|
Loading…
Reference in a new issue