From 8bd68416e339726436d0ceb63b56d7a76ff25625 Mon Sep 17 00:00:00 2001 From: Fernando Herrera Date: Mon, 16 May 2022 08:27:43 +0100 Subject: [PATCH] Lazy dataframes (#5546) * lazyframe definition * expressions and lazy frames * new alias expression * more expression commands * updated to polars main * more expressions and groupby * more expressions, fetch and sort-by * csv reader * removed open csv * unique function * joining functions * join lazy frames commands with eager commands * corrected tests * Update .gitignore * Update .gitignore Co-authored-by: JT <547158+jntrnr@users.noreply.github.com> --- .gitignore | 1 + Cargo.lock | 90 ++-- crates/nu-command/Cargo.toml | 10 +- .../src/dataframe/eager/aggregate.rs | 403 ------------------ .../src/dataframe/eager/drop_duplicates.rs | 8 +- .../nu-command/src/dataframe/eager/dummies.rs | 4 +- .../src/dataframe/eager/filter_with.rs | 51 ++- .../nu-command/src/dataframe/eager/first.rs | 62 ++- .../nu-command/src/dataframe/eager/groupby.rs | 77 ---- crates/nu-command/src/dataframe/eager/join.rs | 235 ---------- crates/nu-command/src/dataframe/eager/last.rs | 62 ++- crates/nu-command/src/dataframe/eager/list.rs | 4 +- crates/nu-command/src/dataframe/eager/mod.rs | 12 - .../nu-command/src/dataframe/eager/pivot.rs | 198 --------- .../nu-command/src/dataframe/eager/rename.rs | 84 +++- .../nu-command/src/dataframe/eager/sample.rs | 32 +- .../src/dataframe/eager/with_column.rs | 126 ++++-- .../src/dataframe/expressions/alias.rs | 57 +++ .../src/dataframe/expressions/dsl/col.rs | 77 ++++ .../src/dataframe/expressions/dsl/lit.rs | 79 ++++ .../src/dataframe/expressions/dsl/mod.rs | 7 + .../src/dataframe/expressions/dsl/when.rs | 96 +++++ .../expressions/expressions_macro.rs | 109 +++++ .../src/dataframe/expressions/mod.rs | 36 ++ .../src/dataframe/expressions/to_nu.rs | 70 +++ .../src/dataframe/lazy/aggregate.rs | 91 ++++ .../nu-command/src/dataframe/lazy/collect.rs | 48 +++ crates/nu-command/src/dataframe/lazy/fetch.rs | 80 ++++ .../nu-command/src/dataframe/lazy/fill_na.rs | 65 +++ .../src/dataframe/lazy/fill_null.rs | 65 +++ .../nu-command/src/dataframe/lazy/groupby.rs | 98 +++++ crates/nu-command/src/dataframe/lazy/join.rs | 139 ++++++ .../src/dataframe/lazy/macro_commands.rs | 232 ++++++++++ crates/nu-command/src/dataframe/lazy/mod.rs | 63 +++ .../nu-command/src/dataframe/lazy/quantile.rs | 67 +++ .../nu-command/src/dataframe/lazy/select.rs | 78 ++++ .../src/dataframe/lazy/sort_by_expr.rs | 100 +++++ .../nu-command/src/dataframe/lazy/to_lazy.rs | 45 ++ crates/nu-command/src/dataframe/mod.rs | 7 + .../src/dataframe/series/date/as_date.rs | 2 +- .../src/dataframe/series/date/as_datetime.rs | 2 +- .../src/dataframe/series/date/get_day.rs | 2 +- .../src/dataframe/series/date/get_hour.rs | 2 +- .../src/dataframe/series/date/get_minute.rs | 2 +- .../src/dataframe/series/date/get_month.rs | 2 +- .../dataframe/series/date/get_nanosecond.rs | 2 +- .../src/dataframe/series/date/get_ordinal.rs | 2 +- .../src/dataframe/series/date/get_second.rs | 2 +- .../src/dataframe/series/date/get_week.rs | 2 +- .../src/dataframe/series/date/get_weekday.rs | 2 +- .../src/dataframe/series/date/get_year.rs | 2 +- .../src/dataframe/series/masks/is_not_null.rs | 71 ++- .../src/dataframe/series/masks/is_null.rs | 71 ++- .../src/dataframe/series/masks/not.rs | 68 ++- .../src/dataframe/series/n_unique.rs | 61 ++- .../nu-command/src/dataframe/series/shift.rs | 53 ++- .../nu-command/src/dataframe/series/unique.rs | 79 +++- .../src/dataframe/series/value_counts.rs | 2 +- crates/nu-command/src/dataframe/utils.rs | 15 + crates/nu-command/src/dataframe/values/mod.rs | 8 +- .../values/nu_dataframe/between_values.rs | 44 +- .../values/nu_dataframe/conversion.rs | 2 +- .../src/dataframe/values/nu_dataframe/mod.rs | 19 +- .../values/nu_expression/custom_value.rs | 149 +++++++ .../src/dataframe/values/nu_expression/mod.rs | 325 ++++++++++++++ .../src/dataframe/values/nu_groupby/mod.rs | 140 ------ .../values/nu_lazyframe/custom_value.rs | 53 +++ .../src/dataframe/values/nu_lazyframe/mod.rs | 156 +++++++ .../custom_value.rs | 21 +- .../dataframe/values/nu_lazygroupby/mod.rs | 114 +++++ crates/nu-protocol/src/value/from_value.rs | 25 ++ 71 files changed, 3304 insertions(+), 1364 deletions(-) delete mode 100644 crates/nu-command/src/dataframe/eager/aggregate.rs delete mode 100644 crates/nu-command/src/dataframe/eager/groupby.rs delete mode 100644 crates/nu-command/src/dataframe/eager/join.rs delete mode 100644 crates/nu-command/src/dataframe/eager/pivot.rs create mode 100644 crates/nu-command/src/dataframe/expressions/alias.rs create mode 100644 crates/nu-command/src/dataframe/expressions/dsl/col.rs create mode 100644 crates/nu-command/src/dataframe/expressions/dsl/lit.rs create mode 100644 crates/nu-command/src/dataframe/expressions/dsl/mod.rs create mode 100644 crates/nu-command/src/dataframe/expressions/dsl/when.rs create mode 100644 crates/nu-command/src/dataframe/expressions/expressions_macro.rs create mode 100644 crates/nu-command/src/dataframe/expressions/mod.rs create mode 100644 crates/nu-command/src/dataframe/expressions/to_nu.rs create mode 100644 crates/nu-command/src/dataframe/lazy/aggregate.rs create mode 100644 crates/nu-command/src/dataframe/lazy/collect.rs create mode 100644 crates/nu-command/src/dataframe/lazy/fetch.rs create mode 100644 crates/nu-command/src/dataframe/lazy/fill_na.rs create mode 100644 crates/nu-command/src/dataframe/lazy/fill_null.rs create mode 100644 crates/nu-command/src/dataframe/lazy/groupby.rs create mode 100644 crates/nu-command/src/dataframe/lazy/join.rs create mode 100644 crates/nu-command/src/dataframe/lazy/macro_commands.rs create mode 100644 crates/nu-command/src/dataframe/lazy/mod.rs create mode 100644 crates/nu-command/src/dataframe/lazy/quantile.rs create mode 100644 crates/nu-command/src/dataframe/lazy/select.rs create mode 100644 crates/nu-command/src/dataframe/lazy/sort_by_expr.rs create mode 100644 crates/nu-command/src/dataframe/lazy/to_lazy.rs create mode 100644 crates/nu-command/src/dataframe/utils.rs create mode 100644 crates/nu-command/src/dataframe/values/nu_expression/custom_value.rs create mode 100644 crates/nu-command/src/dataframe/values/nu_expression/mod.rs delete mode 100644 crates/nu-command/src/dataframe/values/nu_groupby/mod.rs create mode 100644 crates/nu-command/src/dataframe/values/nu_lazyframe/custom_value.rs create mode 100644 crates/nu-command/src/dataframe/values/nu_lazyframe/mod.rs rename crates/nu-command/src/dataframe/values/{nu_groupby => nu_lazygroupby}/custom_value.rs (62%) create mode 100644 crates/nu-command/src/dataframe/values/nu_lazygroupby/mod.rs diff --git a/.gitignore b/.gitignore index 38a1eb9229..9952d38d39 100644 --- a/.gitignore +++ b/.gitignore @@ -23,4 +23,5 @@ debian/nu/ .vscode/* # Helix configuration folder +.helix/* .helix diff --git a/Cargo.lock b/Cargo.lock index 95f2e7481f..3761ebfa84 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -141,9 +141,9 @@ dependencies = [ [[package]] name = "arrow2" -version = "0.10.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e387b20dd573a96f36b173d9027483898f944d696521afd74e2caa3c813d86e" +checksum = "b040061368d1314b0fd8b8f1fde0671eba1afc63a1c61a4dafaf2d4fc10c96f9" dependencies = [ "arrow-format", "base64", @@ -2980,15 +2980,6 @@ dependencies = [ "vcpkg", ] -[[package]] -name = "ordered-float" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3305af35278dd29f46fcdd139e0b1fbfae2153f0e5928b39b035542dd31e37b7" -dependencies = [ - "num-traits", -] - [[package]] name = "output_vt100" version = "0.1.3" @@ -3060,22 +3051,20 @@ dependencies = [ [[package]] name = "parquet-format-async-temp" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03abc2f9c83fe9ceec83f47c76cc071bfd56caba33794340330f35623ab1f544" +checksum = "488c8b5f43521d019fade4bcc0ce88cce5da5fd26eb1d38b933807041f5930bf" dependencies = [ "async-trait", - "byteorder", "futures", "integer-encoding", - "ordered-float", ] [[package]] name = "parquet2" -version = "0.10.3" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b085f9e78e4842865151b693f6d94bdf7b280af66daa6e3587adeb3106a07e9" +checksum = "98f99f9724402d81faadd9cfa1e8dc78055fd0ddfdbefb7adab3a3a13e893408" dependencies = [ "async-stream", "bitpacking", @@ -3247,33 +3236,35 @@ dependencies = [ [[package]] name = "polars" -version = "0.20.0" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "656db3b86c338a8a717476eb29436a380ebdf74915a71cff6ecce78d52173e53" +checksum = "b140da767e129c60c41c8e1968ffab5f114bcf823182edb7fa900464a31bf421" dependencies = [ "polars-core", "polars-io", "polars-lazy", + "polars-ops", "polars-time", ] [[package]] name = "polars-arrow" -version = "0.20.0" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcedf44a7b15b60c69e811c9d343ac459788e961dc4136f002ed1b68a1fada07" +checksum = "6d27df11ee28956bd6f5aed54e7e05ce87b886871995e1da501134627ec89077" dependencies = [ "arrow2", "hashbrown 0.12.0", "num 0.4.0", + "serde", "thiserror", ] [[package]] name = "polars-core" -version = "0.20.0" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dfed0e21ac4d4c85df45b5864a68cfc5b2a97e9fba8a981be7b09c6f02a7eaa" +checksum = "fdf8d12cb7ec278516228fc86469f98c62ab81ca31e4e76d2c0ccf5a09c70491" dependencies = [ "ahash", "anyhow", @@ -3284,8 +3275,8 @@ dependencies = [ "indexmap", "lazy_static", "num 0.4.0", - "num_cpus", "polars-arrow", + "polars-utils", "rand 0.8.5", "rand_distr", "rayon", @@ -3297,9 +3288,9 @@ dependencies = [ [[package]] name = "polars-io" -version = "0.20.0" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8770fb4233ab88affac80c410be090dc7a2c044a9e4e7b942132e94ceeb732b" +checksum = "fdd4b762e5694f359ded21ca0627b5bc95b6eb49f6b330569afc1d20f0564b01" dependencies = [ "ahash", "anyhow", @@ -3311,21 +3302,22 @@ dependencies = [ "memchr", "memmap2", "num 0.4.0", - "num_cpus", "polars-arrow", "polars-core", + "polars-time", "polars-utils", "rayon", "regex", + "serde", "serde_json", "simdutf8", ] [[package]] name = "polars-lazy" -version = "0.20.0" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eca1fed3b88ae1bb9b7f1d7b2958f1655d9c1aed33495d6ba30ff84a0c1e9e9" +checksum = "eedc21001f05611e41bb7439b38d0f4ef9406aa49c17f3b289b5f57d8fa40c59" dependencies = [ "ahash", "glob", @@ -3336,24 +3328,36 @@ dependencies = [ "polars-time", "polars-utils", "rayon", + "serde", ] [[package]] -name = "polars-time" -version = "0.20.0" +name = "polars-ops" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fe48c759ca778a8b6fb30f70e9a81b56f0987a82dc71e61c5b2d3c236b6b8d6" +checksum = "86fae68f0992955f224f09d1f15648a6fb76d8e3b962efac2f97ccc2aa58977a" dependencies = [ - "chrono", - "polars-arrow", "polars-core", ] [[package]] -name = "polars-utils" -version = "0.20.0" +name = "polars-time" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71011e8ed52f123ce23d110b496c8704d0a59c5fd4115cd938e7ff19d4bcb7ca" +checksum = "be499f73749e820f96689c5f9ec59669b7cdd551d864358e2bdaebb5944e4bfb" +dependencies = [ + "chrono", + "lexical", + "polars-arrow", + "polars-core", + "serde", +] + +[[package]] +name = "polars-utils" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7f4cd569d383f5f000abbd6d5146550e6cb4e43fac30d1af98699499a440d56" dependencies = [ "parking_lot 0.12.0", "rayon", @@ -5333,18 +5337,18 @@ dependencies = [ [[package]] name = "zstd" -version = "0.10.0+zstd.1.5.2" +version = "0.11.1+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b1365becbe415f3f0fcd024e2f7b45bacfb5bdd055f0dc113571394114e7bdd" +checksum = "77a16b8414fde0414e90c612eba70985577451c4c504b99885ebed24762cb81a" dependencies = [ "zstd-safe", ] [[package]] name = "zstd-safe" -version = "4.1.4+zstd.1.5.2" +version = "5.0.1+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f7cd17c9af1a4d6c24beb1cc54b17e2ef7b593dc92f19e9d9acad8b182bbaee" +checksum = "7c12659121420dd6365c5c3de4901f97145b79651fb1d25814020ed2ed0585ae" dependencies = [ "libc", "zstd-sys", @@ -5352,9 +5356,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "1.6.3+zstd.1.5.2" +version = "2.0.1+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc49afa5c8d634e75761feda8c592051e7eeb4683ba827211eb0d731d3402ea8" +checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b" dependencies = [ "cc", "libc", diff --git a/crates/nu-command/Cargo.toml b/crates/nu-command/Cargo.toml index 16419644f1..78eab5e4fd 100644 --- a/crates/nu-command/Cargo.toml +++ b/crates/nu-command/Cargo.toml @@ -96,13 +96,15 @@ version = "2.0.2" optional = true [dependencies.polars] -version = "0.20.0" +version = "0.21.1" +# path = "../../../../polars/polars" optional = true features = [ - "default", "parquet", "json", "serde", "object", - "checked_arithmetic", "strings", "cum_agg", "is_in", + "default", "to_dummies", "parquet", "json", "serde", "serde-lazy", + "object", "checked_arithmetic", "strings", "cum_agg", "is_in", "rolling_window", "strings", "rows", "random", - "dtype-datetime" + "dtype-datetime", "dtype-struct", "lazy", "cross_join", + "dynamic_groupby" ] [features] diff --git a/crates/nu-command/src/dataframe/eager/aggregate.rs b/crates/nu-command/src/dataframe/eager/aggregate.rs deleted file mode 100644 index eee308df6d..0000000000 --- a/crates/nu-command/src/dataframe/eager/aggregate.rs +++ /dev/null @@ -1,403 +0,0 @@ -use nu_engine::CallExt; -use nu_protocol::{ - ast::Call, - did_you_mean, - engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value, -}; -use polars::{ - frame::groupby::GroupBy, - prelude::{PolarsError, QuantileInterpolOptions}, -}; - -use crate::dataframe::values::NuGroupBy; - -use super::super::values::{Column, NuDataFrame}; - -enum Operation { - Mean, - Sum, - Min, - Max, - First, - Last, - Nunique, - Quantile(f64), - Median, - Var, - Std, - Count, -} - -impl Operation { - fn from_tagged( - name: &Spanned, - quantile: Option>, - ) -> Result { - match name.item.as_ref() { - "mean" => Ok(Operation::Mean), - "sum" => Ok(Operation::Sum), - "min" => Ok(Operation::Min), - "max" => Ok(Operation::Max), - "first" => Ok(Operation::First), - "last" => Ok(Operation::Last), - "nunique" => Ok(Operation::Nunique), - "quantile" => match quantile { - None => Err(ShellError::GenericError( - "Quantile value not fount".into(), - "Quantile operation requires quantile value".into(), - Some(name.span), - None, - Vec::new(), - )), - Some(value) => { - if (value.item < 0.0) | (value.item > 1.0) { - Err(ShellError::GenericError( - "Inappropriate quantile".into(), - "Quantile value should be between 0.0 and 1.0".into(), - Some(value.span), - None, - Vec::new(), - )) - } else { - Ok(Operation::Quantile(value.item)) - } - } - }, - "median" => Ok(Operation::Median), - "var" => Ok(Operation::Var), - "std" => Ok(Operation::Std), - "count" => Ok(Operation::Count), - selection => { - let possibilities = [ - "mean".to_string(), - "sum".to_string(), - "min".to_string(), - "max".to_string(), - "first".to_string(), - "last".to_string(), - "nunique".to_string(), - "quantile".to_string(), - "median".to_string(), - "var".to_string(), - "std".to_string(), - "count".to_string(), - ]; - - match did_you_mean(&possibilities, selection) { - Some(suggestion) => Err(ShellError::DidYouMean(suggestion, name.span)), - None => Err(ShellError::GenericError( - "Operation not fount".into(), - "Operation does not exist".into(), - Some(name.span), - Some("Perhaps you want: mean, sum, min, max, first, last, nunique, quantile, median, var, std, or count".into()), - Vec::new(), - )) - } - } - } - } - - fn to_str(&self) -> &'static str { - match self { - Self::Mean => "mean", - Self::Sum => "sum", - Self::Min => "min", - Self::Max => "max", - Self::First => "first", - Self::Last => "last", - Self::Nunique => "nunique", - Self::Quantile(_) => "quantile", - Self::Median => "median", - Self::Var => "var", - Self::Std => "std", - Self::Count => "count", - } - } -} - -#[derive(Clone)] -pub struct Aggregate; - -impl Command for Aggregate { - fn name(&self) -> &str { - "dfr aggregate" - } - - fn usage(&self) -> &str { - "Performs an aggregation operation on a dataframe and groupby object" - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "operation_name", - SyntaxShape::String, - "\n\tDataframes: mean, sum, min, max, quantile, median, var, std -\tGroupBy: mean, sum, min, max, first, last, nunique, quantile, median, var, std, count", - ) - .named( - "quantile", - SyntaxShape::Number, - "quantile value for quantile operation", - Some('q'), - ) - .switch( - "explicit", - "returns explicit names for groupby aggregations", - Some('e'), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Aggregate sum by grouping by column a and summing on col b", - example: - "[[a b]; [one 1] [one 2]] | dfr to-df | dfr group-by a | dfr aggregate sum", - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_string("one")]), - Column::new("b".to_string(), vec![Value::test_int(3)]), - ]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Aggregate sum in dataframe columns", - example: "[[a b]; [4 1] [5 2]] | dfr to-df | dfr aggregate sum", - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_int(9)]), - Column::new("b".to_string(), vec![Value::test_int(3)]), - ]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Aggregate sum in series", - example: "[4 1 5 6] | dfr to-df | dfr aggregate sum", - result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![Value::test_int(16)], - )]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let operation: Spanned = call.req(engine_state, stack, 0)?; - let quantile: Option> = call.get_flag(engine_state, stack, "quantile")?; - let op = Operation::from_tagged(&operation, quantile)?; - - match input { - PipelineData::Value(Value::CustomValue { val, span }, _) => { - let df = val.as_any().downcast_ref::(); - let groupby = val.as_any().downcast_ref::(); - - match (df, groupby) { - (Some(df), None) => { - let df = df.as_ref(); - let res = perform_dataframe_aggregation(df, op, operation.span)?; - - Ok(PipelineData::Value( - NuDataFrame::dataframe_into_value(res, span), - None, - )) - } - (None, Some(nu_groupby)) => { - let groupby = nu_groupby.to_groupby()?; - - let res = perform_groupby_aggregation( - groupby, - op, - operation.span, - call.head, - call.has_flag("explicit"), - )?; - - Ok(PipelineData::Value( - NuDataFrame::dataframe_into_value(res, span), - None, - )) - } - _ => Err(ShellError::GenericError( - "Incorrect datatype".into(), - "no groupby or dataframe found in input stream".into(), - Some(call.head), - None, - Vec::new(), - )), - } - } - _ => Err(ShellError::GenericError( - "Incorrect datatype".into(), - "no groupby or dataframe found in input stream".into(), - Some(call.head), - None, - Vec::new(), - )), - } -} - -fn perform_groupby_aggregation( - groupby: GroupBy, - operation: Operation, - operation_span: Span, - agg_span: Span, - explicit: bool, -) -> Result { - let mut res = match operation { - Operation::Mean => groupby.mean(), - Operation::Sum => groupby.sum(), - Operation::Min => groupby.min(), - Operation::Max => groupby.max(), - Operation::First => groupby.first(), - Operation::Last => groupby.last(), - Operation::Nunique => groupby.n_unique(), - Operation::Quantile(quantile) => { - groupby.quantile(quantile, QuantileInterpolOptions::default()) - } - Operation::Median => groupby.median(), - Operation::Var => groupby.var(), - Operation::Std => groupby.std(), - Operation::Count => groupby.count(), - } - .map_err(|e| { - let span = match &e { - PolarsError::NotFound(_) => agg_span, - _ => operation_span, - }; - - ShellError::GenericError( - "Error calculating aggregation".into(), - e.to_string(), - Some(span), - None, - Vec::new(), - ) - })?; - - if !explicit { - let col_names = res - .get_column_names() - .iter() - .map(|name| name.to_string()) - .collect::>(); - - for col in col_names { - let from = match operation { - Operation::Mean => "_mean", - Operation::Sum => "_sum", - Operation::Min => "_min", - Operation::Max => "_max", - Operation::First => "_first", - Operation::Last => "_last", - Operation::Nunique => "_n_unique", - Operation::Quantile(_) => "_quantile", - Operation::Median => "_median", - Operation::Var => "_agg_var", - Operation::Std => "_agg_std", - Operation::Count => "_count", - }; - - let new_col = match col.find(from) { - Some(index) => &col[..index], - None => &col[..], - }; - - res.rename(&col, new_col) - .expect("Column is always there. Looping with known names"); - } - } - - Ok(res) -} - -fn perform_dataframe_aggregation( - dataframe: &polars::prelude::DataFrame, - operation: Operation, - operation_span: Span, -) -> Result { - match operation { - Operation::Mean => Ok(dataframe.mean()), - Operation::Sum => Ok(dataframe.sum()), - Operation::Min => Ok(dataframe.min()), - Operation::Max => Ok(dataframe.max()), - Operation::Quantile(quantile) => dataframe - .quantile(quantile, QuantileInterpolOptions::default()) - .map_err(|e| { - ShellError::GenericError( - "Error calculating quantile".into(), - e.to_string(), - Some(operation_span), - None, - Vec::new(), - ) - }), - Operation::Median => Ok(dataframe.median()), - Operation::Var => Ok(dataframe.var()), - Operation::Std => Ok(dataframe.std()), - operation => { - let possibilities = [ - "mean".to_string(), - "sum".to_string(), - "min".to_string(), - "max".to_string(), - "quantile".to_string(), - "median".to_string(), - "var".to_string(), - "std".to_string(), - ]; - - match did_you_mean(&possibilities, operation.to_str()) { - Some(suggestion) => Err(ShellError::DidYouMean(suggestion, operation_span)), - None => Err(ShellError::GenericError( - "Operation not fount".into(), - "Operation does not exist".into(), - Some(operation_span), - Some( - "Perhaps you want: mean, sum, min, max, quantile, median, var, or std" - .into(), - ), - Vec::new(), - )), - } - } - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::super::CreateGroupBy; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(Aggregate {}), Box::new(CreateGroupBy {})]) - } -} diff --git a/crates/nu-command/src/dataframe/eager/drop_duplicates.rs b/crates/nu-command/src/dataframe/eager/drop_duplicates.rs index 1928eb606d..b6af6636df 100644 --- a/crates/nu-command/src/dataframe/eager/drop_duplicates.rs +++ b/crates/nu-command/src/dataframe/eager/drop_duplicates.rs @@ -4,7 +4,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, }; -use polars::prelude::DistinctKeepStrategy; +use polars::prelude::UniqueKeepStrategy; use super::super::values::utils::convert_columns_string; use super::super::values::{Column, NuDataFrame}; @@ -89,13 +89,13 @@ fn command( let subset_slice = subset.as_ref().map(|cols| &cols[..]); let keep_strategy = if call.has_flag("last") { - DistinctKeepStrategy::Last + UniqueKeepStrategy::Last } else { - DistinctKeepStrategy::First + UniqueKeepStrategy::First }; df.as_ref() - .distinct(subset_slice, keep_strategy) + .unique(subset_slice, keep_strategy) .map_err(|e| { ShellError::GenericError( "Error dropping duplicates".into(), diff --git a/crates/nu-command/src/dataframe/eager/dummies.rs b/crates/nu-command/src/dataframe/eager/dummies.rs index f912cdbb4f..fda678223c 100644 --- a/crates/nu-command/src/dataframe/eager/dummies.rs +++ b/crates/nu-command/src/dataframe/eager/dummies.rs @@ -1,10 +1,10 @@ +use super::super::values::{Column, NuDataFrame}; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, Value, }; - -use super::super::values::{Column, NuDataFrame}; +use polars::prelude::DataFrameOps; #[derive(Clone)] pub struct Dummies; diff --git a/crates/nu-command/src/dataframe/eager/filter_with.rs b/crates/nu-command/src/dataframe/eager/filter_with.rs index 84a0142a1b..a93bca818d 100644 --- a/crates/nu-command/src/dataframe/eager/filter_with.rs +++ b/crates/nu-command/src/dataframe/eager/filter_with.rs @@ -4,6 +4,9 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, }; +use polars::prelude::LazyFrame; + +use crate::dataframe::values::{NuExpression, NuLazyFrame}; use super::super::values::{Column, NuDataFrame}; @@ -16,12 +19,16 @@ impl Command for FilterWith { } fn usage(&self) -> &str { - "Filters dataframe using a mask as reference" + "Filters dataframe using a mask or expression as reference" } fn signature(&self) -> Signature { Signature::build(self.name()) - .required("mask", SyntaxShape::Any, "boolean mask used to filter data") + .required( + "mask or expression", + SyntaxShape::Any, + "boolean mask used to filter data", + ) .category(Category::Custom("dataframe".into())) } @@ -48,15 +55,30 @@ impl Command for FilterWith { call: &Call, input: PipelineData, ) -> Result { - command(engine_state, stack, call, input) + let value = input.into_value(call.head); + + if NuLazyFrame::can_downcast(&value) { + let df = NuLazyFrame::try_from_value(value)?; + command_lazy(engine_state, stack, call, df) + } else if NuDataFrame::can_downcast(&value) { + let df = NuDataFrame::try_from_value(value)?; + command_eager(engine_state, stack, call, df) + } else { + Err(ShellError::CantConvert( + "expression or query".into(), + value.get_type().to_string(), + value.span()?, + None, + )) + } } } -fn command( +fn command_eager( engine_state: &EngineState, stack: &mut Stack, call: &Call, - input: PipelineData, + df: NuDataFrame, ) -> Result { let mask_value: Value = call.req(engine_state, stack, 0)?; @@ -72,8 +94,6 @@ fn command( ) })?; - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - df.as_ref() .filter(mask) .map_err(|e| { @@ -88,6 +108,23 @@ fn command( .map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None)) } +fn command_lazy( + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + lazy: NuLazyFrame, +) -> Result { + let expr: Value = call.req(engine_state, stack, 0)?; + let expr = NuExpression::try_from_value(expr)?; + + let lazy = lazy.apply_with_expr(expr, LazyFrame::filter); + + Ok(PipelineData::Value( + NuLazyFrame::into_value(lazy, call.head), + None, + )) +} + #[cfg(test)] mod test { use super::super::super::test_dataframe::test_dataframe; diff --git a/crates/nu-command/src/dataframe/eager/first.rs b/crates/nu-command/src/dataframe/eager/first.rs index 1371f869cc..7053b954e0 100644 --- a/crates/nu-command/src/dataframe/eager/first.rs +++ b/crates/nu-command/src/dataframe/eager/first.rs @@ -1,3 +1,5 @@ +use super::super::values::{utils::DEFAULT_ROWS, Column, NuDataFrame}; +use crate::dataframe::values::NuExpression; use nu_engine::CallExt; use nu_protocol::{ ast::Call, @@ -5,8 +7,6 @@ use nu_protocol::{ Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, }; -use super::super::values::{utils::DEFAULT_ROWS, Column, NuDataFrame}; - #[derive(Clone)] pub struct FirstDF; @@ -16,7 +16,7 @@ impl Command for FirstDF { } fn usage(&self) -> &str { - "Creates new dataframe with first rows" + "Creates new dataframe with first rows or creates a first expression" } fn signature(&self) -> Signature { @@ -26,18 +26,25 @@ impl Command for FirstDF { } fn examples(&self) -> Vec { - vec![Example { - description: "Create new dataframe with head rows", - example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr first 1", - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_int(1)]), - Column::new("b".to_string(), vec![Value::test_int(2)]), - ]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] + vec![ + Example { + description: "Create new dataframe with head rows", + example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr first 1", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("a".to_string(), vec![Value::test_int(1)]), + Column::new("b".to_string(), vec![Value::test_int(2)]), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Creates a first expression from a column", + example: "dfr col a | dfr first", + result: None, + }, + ] } fn run( @@ -47,7 +54,27 @@ impl Command for FirstDF { call: &Call, input: PipelineData, ) -> Result { - command(engine_state, stack, call, input) + let value = input.into_value(call.head); + + if NuExpression::can_downcast(&value) { + let expr = NuExpression::try_from_value(value)?; + let expr: NuExpression = expr.into_polars().is_null().into(); + + Ok(PipelineData::Value( + NuExpression::into_value(expr, call.head), + None, + )) + } else if NuDataFrame::can_downcast(&value) { + let df = NuDataFrame::try_from_value(value)?; + command(engine_state, stack, call, df) + } else { + Err(ShellError::CantConvert( + "expression or query".into(), + value.get_type().to_string(), + value.span()?, + None, + )) + } } } @@ -55,12 +82,11 @@ fn command( engine_state: &EngineState, stack: &mut Stack, call: &Call, - input: PipelineData, + df: NuDataFrame, ) -> Result { let rows: Option = call.opt(engine_state, stack, 0)?; let rows = rows.unwrap_or(DEFAULT_ROWS); - let df = NuDataFrame::try_from_pipeline(input, call.head)?; let res = df.as_ref().head(Some(rows)); Ok(PipelineData::Value( NuDataFrame::dataframe_into_value(res, call.head), diff --git a/crates/nu-command/src/dataframe/eager/groupby.rs b/crates/nu-command/src/dataframe/eager/groupby.rs deleted file mode 100644 index 84218c4dcd..0000000000 --- a/crates/nu-command/src/dataframe/eager/groupby.rs +++ /dev/null @@ -1,77 +0,0 @@ -use nu_engine::CallExt; -use nu_protocol::{ - ast::Call, - engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value, -}; - -use super::super::values::{utils::convert_columns_string, NuDataFrame, NuGroupBy}; - -#[derive(Clone)] -pub struct CreateGroupBy; - -impl Command for CreateGroupBy { - fn name(&self) -> &str { - "dfr group-by" - } - - fn usage(&self) -> &str { - "Creates a groupby object that can be used for other aggregations" - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .rest("rest", SyntaxShape::Any, "groupby columns") - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Grouping by column a", - example: "[[a b]; [one 1] [one 2]] | dfr to-df | dfr group-by a", - result: None, - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - // Extracting the names of the columns to perform the groupby - let columns: Vec = call.rest(engine_state, stack, 0)?; - let (col_string, col_span) = convert_columns_string(columns, call.head)?; - - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - // This is the expensive part of the groupby; to create the - // groups that will be used for grouping the data in the - // dataframe. Once it has been done these values can be stored - // in a NuGroupBy - let groupby = df.as_ref().groupby(&col_string).map_err(|e| { - ShellError::GenericError( - "Error creating groupby".into(), - e.to_string(), - Some(col_span), - None, - Vec::new(), - ) - })?; - - let groups = groupby.get_groups(); - let groupby = NuGroupBy::new(df.as_ref().clone(), col_string, groups); - - Ok(PipelineData::Value(groupby.into_value(call.head), None)) -} diff --git a/crates/nu-command/src/dataframe/eager/join.rs b/crates/nu-command/src/dataframe/eager/join.rs deleted file mode 100644 index 6bdce66fe0..0000000000 --- a/crates/nu-command/src/dataframe/eager/join.rs +++ /dev/null @@ -1,235 +0,0 @@ -use nu_engine::CallExt; -use nu_protocol::{ - ast::Call, - engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value, -}; -use polars::prelude::JoinType; - -use crate::dataframe::values::utils::convert_columns_string; - -use super::super::values::{Column, NuDataFrame}; - -#[derive(Clone)] -pub struct JoinDF; - -impl Command for JoinDF { - fn name(&self) -> &str { - "dfr join" - } - - fn usage(&self) -> &str { - "Joins a dataframe using columns as reference" - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required("dataframe", SyntaxShape::Any, "right dataframe to join") - .required_named( - "left", - SyntaxShape::Table, - "left column names to perform join", - Some('l'), - ) - .required_named( - "right", - SyntaxShape::Table, - "right column names to perform join", - Some('r'), - ) - .named( - "type", - SyntaxShape::String, - "type of join. Inner by default", - Some('t'), - ) - .named( - "suffix", - SyntaxShape::String, - "suffix for the columns of the right dataframe", - Some('s'), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "inner join dataframe", - example: r#"let right = ([[a b c]; [1 2 5] [3 4 5] [5 6 6]] | dfr to-df); - $right | dfr join $right -l [a b] -r [a b]"#, - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(3), Value::test_int(5)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4), Value::test_int(6)], - ), - Column::new( - "c".to_string(), - vec![Value::test_int(5), Value::test_int(5), Value::test_int(6)], - ), - Column::new( - "c_right".to_string(), - vec![Value::test_int(5), Value::test_int(5), Value::test_int(6)], - ), - ]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let r_df: Value = call.req(engine_state, stack, 0)?; - let l_col: Vec = call - .get_flag(engine_state, stack, "left")? - .expect("required value in syntax"); - let r_col: Vec = call - .get_flag(engine_state, stack, "right")? - .expect("required value in syntax"); - let suffix: Option = call.get_flag(engine_state, stack, "suffix")?; - let join_type_op: Option> = call.get_flag(engine_state, stack, "type")?; - - let join_type = match join_type_op { - None => JoinType::Inner, - Some(val) => match val.item.as_ref() { - "inner" => JoinType::Inner, - "outer" => JoinType::Outer, - "left" => JoinType::Left, - _ => { - return Err(ShellError::GenericError( - "Incorrect join type".into(), - "Invalid join type".into(), - Some(val.span), - Some("Options: inner, outer or left".into()), - Vec::new(), - )) - } - }, - }; - - let (l_col_string, l_col_span) = convert_columns_string(l_col, call.head)?; - let (r_col_string, r_col_span) = convert_columns_string(r_col, call.head)?; - - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let r_df = NuDataFrame::try_from_value(r_df)?; - - check_column_datatypes( - df.as_ref(), - r_df.as_ref(), - &l_col_string, - l_col_span, - &r_col_string, - r_col_span, - )?; - - df.as_ref() - .join( - r_df.as_ref(), - &l_col_string, - &r_col_string, - join_type, - suffix, - ) - .map_err(|e| { - ShellError::GenericError( - "Error joining dataframes".into(), - e.to_string(), - Some(l_col_span), - None, - Vec::new(), - ) - }) - .map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None)) -} - -fn check_column_datatypes>( - df_l: &polars::prelude::DataFrame, - df_r: &polars::prelude::DataFrame, - l_cols: &[T], - l_col_span: Span, - r_cols: &[T], - r_col_span: Span, -) -> Result<(), ShellError> { - if l_cols.len() != r_cols.len() { - return Err(ShellError::GenericError( - "Mismatched number of column names".into(), - format!( - "found {} left names vs {} right names", - l_cols.len(), - r_cols.len() - ), - Some(l_col_span), - Some("perhaps you need to change the number of columns to join".into()), - Vec::new(), - )); - } - - for (l, r) in l_cols.iter().zip(r_cols) { - let l_series = df_l.column(l.as_ref()).map_err(|e| { - ShellError::GenericError( - "Error selecting the columns".into(), - e.to_string(), - Some(l_col_span), - None, - Vec::new(), - ) - })?; - - let r_series = df_r.column(r.as_ref()).map_err(|e| { - ShellError::GenericError( - "Error selecting the columns".into(), - e.to_string(), - Some(r_col_span), - None, - Vec::new(), - ) - })?; - - if l_series.dtype() != r_series.dtype() { - return Err(ShellError::GenericError( - "Mismatched datatypes".into(), - format!( - "left column type '{}' doesn't match '{}' right column match", - l_series.dtype(), - r_series.dtype() - ), - Some(l_col_span), - Some("perhaps you need to select other column to match".into()), - Vec::new(), - )); - } - } - - Ok(()) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(JoinDF {})]) - } -} diff --git a/crates/nu-command/src/dataframe/eager/last.rs b/crates/nu-command/src/dataframe/eager/last.rs index 39294fae3c..a9cc352d7d 100644 --- a/crates/nu-command/src/dataframe/eager/last.rs +++ b/crates/nu-command/src/dataframe/eager/last.rs @@ -1,3 +1,5 @@ +use super::super::values::{utils::DEFAULT_ROWS, Column, NuDataFrame}; +use crate::dataframe::values::NuExpression; use nu_engine::CallExt; use nu_protocol::{ ast::Call, @@ -5,8 +7,6 @@ use nu_protocol::{ Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, }; -use super::super::values::{utils::DEFAULT_ROWS, Column, NuDataFrame}; - #[derive(Clone)] pub struct LastDF; @@ -16,7 +16,7 @@ impl Command for LastDF { } fn usage(&self) -> &str { - "Creates new dataframe with tail rows" + "Creates new dataframe with tail rows or creates a last expression" } fn signature(&self) -> Signature { @@ -26,18 +26,25 @@ impl Command for LastDF { } fn examples(&self) -> Vec { - vec![Example { - description: "Create new dataframe with last rows", - example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr last 1", - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_int(3)]), - Column::new("b".to_string(), vec![Value::test_int(4)]), - ]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] + vec![ + Example { + description: "Create new dataframe with last rows", + example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr last 1", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("a".to_string(), vec![Value::test_int(3)]), + Column::new("b".to_string(), vec![Value::test_int(4)]), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Creates a last expression from a column", + example: "dfr col a | dfr last", + result: None, + }, + ] } fn run( @@ -47,7 +54,27 @@ impl Command for LastDF { call: &Call, input: PipelineData, ) -> Result { - command(engine_state, stack, call, input) + let value = input.into_value(call.head); + + if NuExpression::can_downcast(&value) { + let expr = NuExpression::try_from_value(value)?; + let expr: NuExpression = expr.into_polars().is_null().into(); + + Ok(PipelineData::Value( + NuExpression::into_value(expr, call.head), + None, + )) + } else if NuDataFrame::can_downcast(&value) { + let df = NuDataFrame::try_from_value(value)?; + command(engine_state, stack, call, df) + } else { + Err(ShellError::CantConvert( + "expression or query".into(), + value.get_type().to_string(), + value.span()?, + None, + )) + } } } @@ -55,12 +82,11 @@ fn command( engine_state: &EngineState, stack: &mut Stack, call: &Call, - input: PipelineData, + df: NuDataFrame, ) -> Result { let rows: Option = call.opt(engine_state, stack, 0)?; let rows = rows.unwrap_or(DEFAULT_ROWS); - let df = NuDataFrame::try_from_pipeline(input, call.head)?; let res = df.as_ref().tail(Some(rows)); Ok(PipelineData::Value( NuDataFrame::dataframe_into_value(res, call.head), diff --git a/crates/nu-command/src/dataframe/eager/list.rs b/crates/nu-command/src/dataframe/eager/list.rs index fce19cc854..94dd5bea9a 100644 --- a/crates/nu-command/src/dataframe/eager/list.rs +++ b/crates/nu-command/src/dataframe/eager/list.rs @@ -11,7 +11,7 @@ pub struct ListDF; impl Command for ListDF { fn name(&self) -> &str { - "dfr list" + "dfr ls" } fn usage(&self) -> &str { @@ -26,7 +26,7 @@ impl Command for ListDF { vec![Example { description: "Creates a new dataframe and shows it in the dataframe list", example: r#"let test = ([[a b];[1 2] [3 4]] | dfr to-df); - dfr list"#, + dfr ls"#, result: None, }] } diff --git a/crates/nu-command/src/dataframe/eager/mod.rs b/crates/nu-command/src/dataframe/eager/mod.rs index 148e1ece71..4a54e98c41 100644 --- a/crates/nu-command/src/dataframe/eager/mod.rs +++ b/crates/nu-command/src/dataframe/eager/mod.rs @@ -1,4 +1,3 @@ -mod aggregate; mod append; mod column; mod command; @@ -11,13 +10,10 @@ mod dummies; mod filter_with; mod first; mod get; -mod groupby; -mod join; mod last; mod list; mod melt; mod open; -mod pivot; mod rename; mod sample; mod shape; @@ -32,7 +28,6 @@ mod with_column; use nu_protocol::engine::StateWorkingSet; -pub use aggregate::Aggregate; pub use append::AppendDF; pub use column::ColumnDF; pub use command::Dataframe; @@ -45,13 +40,10 @@ pub use dummies::Dummies; pub use filter_with::FilterWith; pub use first::FirstDF; pub use get::GetDF; -pub use groupby::CreateGroupBy; -pub use join::JoinDF; pub use last::LastDF; pub use list::ListDF; pub use melt::MeltDF; pub use open::OpenDataFrame; -pub use pivot::PivotDF; pub use rename::RenameDF; pub use sample::SampleDF; pub use shape::ShapeDF; @@ -76,10 +68,8 @@ pub fn add_eager_decls(working_set: &mut StateWorkingSet) { // Dataframe commands bind_command!( - Aggregate, AppendDF, ColumnDF, - CreateGroupBy, Dataframe, DataTypes, DescribeDF, @@ -90,12 +80,10 @@ pub fn add_eager_decls(working_set: &mut StateWorkingSet) { FilterWith, FirstDF, GetDF, - JoinDF, LastDF, ListDF, MeltDF, OpenDataFrame, - PivotDF, RenameDF, SampleDF, ShapeDF, diff --git a/crates/nu-command/src/dataframe/eager/pivot.rs b/crates/nu-command/src/dataframe/eager/pivot.rs deleted file mode 100644 index 8c25e2a36d..0000000000 --- a/crates/nu-command/src/dataframe/eager/pivot.rs +++ /dev/null @@ -1,198 +0,0 @@ -use nu_engine::CallExt; -use nu_protocol::{ - ast::Call, - engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape, -}; -use polars::prelude::DataType; - -use crate::dataframe::values::NuGroupBy; - -use super::super::values::NuDataFrame; - -enum Operation { - First, - Sum, - Min, - Max, - Mean, - Median, -} - -impl Operation { - fn from_tagged(name: Spanned) -> Result { - match name.item.as_ref() { - "first" => Ok(Operation::First), - "sum" => Ok(Operation::Sum), - "min" => Ok(Operation::Min), - "max" => Ok(Operation::Max), - "mean" => Ok(Operation::Mean), - "median" => Ok(Operation::Median), - _ => Err(ShellError::GenericError( - "Operation not fount".into(), - "Operation does not exist for pivot".into(), - Some(name.span), - Some("Options: first, sum, min, max, mean, median".into()), - Vec::new(), - )), - } - } -} - -#[derive(Clone)] -pub struct PivotDF; - -impl Command for PivotDF { - fn name(&self) -> &str { - "dfr pivot" - } - - fn usage(&self) -> &str { - "Performs a pivot operation on a groupby object" - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "pivot_column", - SyntaxShape::String, - "pivot column to perform pivot", - ) - .required( - "value_column", - SyntaxShape::String, - "value column to perform pivot", - ) - .required("operation", SyntaxShape::String, "aggregate operation") - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Pivot a dataframe on b and aggregation on col c", - example: - "[[a b c]; [one x 1] [two y 2]] | dfr to-df | dfr group-by a | dfr pivot b c sum", - result: None, - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let pivot_col: Spanned = call.req(engine_state, stack, 0)?; - let value_col: Spanned = call.req(engine_state, stack, 1)?; - let operation: Spanned = call.req(engine_state, stack, 2)?; - let op = Operation::from_tagged(operation)?; - - let nu_groupby = NuGroupBy::try_from_pipeline(input, call.head)?; - let df_ref = nu_groupby.as_ref(); - - check_pivot_column(df_ref, &pivot_col)?; - check_value_column(df_ref, &value_col)?; - - let mut groupby = nu_groupby.to_groupby()?; - - let pivot = groupby.pivot(vec![&pivot_col.item], vec![&value_col.item]); - - match op { - Operation::Mean => pivot.mean(), - Operation::Sum => pivot.sum(), - Operation::Min => pivot.min(), - Operation::Max => pivot.max(), - Operation::First => pivot.first(), - Operation::Median => pivot.median(), - } - .map_err(|e| { - ShellError::GenericError( - "Error creating pivot".into(), - e.to_string(), - Some(call.head), - None, - Vec::new(), - ) - }) - .map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None)) -} - -fn check_pivot_column( - df: &polars::prelude::DataFrame, - col: &Spanned, -) -> Result<(), ShellError> { - let series = df.column(&col.item).map_err(|e| { - ShellError::GenericError( - "Column not found".into(), - e.to_string(), - Some(col.span), - None, - Vec::new(), - ) - })?; - - match series.dtype() { - DataType::UInt8 - | DataType::UInt16 - | DataType::UInt32 - | DataType::UInt64 - | DataType::Int8 - | DataType::Int16 - | DataType::Int32 - | DataType::Int64 - | DataType::Utf8 => Ok(()), - _ => Err(ShellError::GenericError( - "Pivot error".into(), - format!("Unsupported datatype {}", series.dtype()), - Some(col.span), - None, - Vec::new(), - )), - } -} - -fn check_value_column( - df: &polars::prelude::DataFrame, - col: &Spanned, -) -> Result<(), ShellError> { - let series = df.column(&col.item).map_err(|e| { - ShellError::GenericError( - "Column not found".into(), - e.to_string(), - Some(col.span), - None, - Vec::new(), - ) - })?; - - match series.dtype() { - DataType::UInt8 - | DataType::UInt16 - | DataType::UInt32 - | DataType::UInt64 - | DataType::Int8 - | DataType::Int16 - | DataType::Int32 - | DataType::Int64 - | DataType::Float32 - | DataType::Float64 => Ok(()), - _ => Err(ShellError::GenericError( - "Pivot error".into(), - format!("Unsupported datatype {}", series.dtype()), - Some(col.span), - None, - Vec::new(), - )), - } -} diff --git a/crates/nu-command/src/dataframe/eager/rename.rs b/crates/nu-command/src/dataframe/eager/rename.rs index 1e57da4928..117dba47e8 100644 --- a/crates/nu-command/src/dataframe/eager/rename.rs +++ b/crates/nu-command/src/dataframe/eager/rename.rs @@ -5,6 +5,8 @@ use nu_protocol::{ Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, }; +use crate::dataframe::{utils::extract_strings, values::NuLazyFrame}; + use super::super::values::{Column, NuDataFrame}; #[derive(Clone)] @@ -21,8 +23,16 @@ impl Command for RenameDF { fn signature(&self) -> Signature { Signature::build(self.name()) - .required("from", SyntaxShape::String, "column name to be renamed") - .required("to", SyntaxShape::String, "new column name") + .required( + "columns", + SyntaxShape::Any, + "Column(s) to be renamed. A string or list of strings", + ) + .required( + "new names", + SyntaxShape::Any, + "New names for the selected column(s). A string or list of strings", + ) .category(Category::Custom("dataframe".into())) } @@ -54,24 +64,39 @@ impl Command for RenameDF { call: &Call, input: PipelineData, ) -> Result { - command(engine_state, stack, call, input) + let value = input.into_value(call.head); + + if NuLazyFrame::can_downcast(&value) { + let df = NuLazyFrame::try_from_value(value)?; + command_lazy(engine_state, stack, call, df) + } else if NuDataFrame::can_downcast(&value) { + let df = NuDataFrame::try_from_value(value)?; + command_eager(engine_state, stack, call, df) + } else { + Err(ShellError::CantConvert( + "expression or query".into(), + value.get_type().to_string(), + value.span()?, + None, + )) + } } } -fn command( +fn command_eager( engine_state: &EngineState, stack: &mut Stack, call: &Call, - input: PipelineData, + mut df: NuDataFrame, ) -> Result { - let from: String = call.req(engine_state, stack, 0)?; - let to: String = call.req(engine_state, stack, 1)?; + let columns: Value = call.req(engine_state, stack, 0)?; + let columns = extract_strings(columns)?; - let mut df = NuDataFrame::try_from_pipeline(input, call.head)?; + let new_names: Value = call.req(engine_state, stack, 1)?; + let new_names = extract_strings(new_names)?; - df.as_mut() - .rename(&from, &to) - .map_err(|e| { + for (from, to) in columns.iter().zip(new_names.iter()) { + df.as_mut().rename(from, to).map_err(|e| { ShellError::GenericError( "Error renaming".into(), e.to_string(), @@ -79,13 +104,36 @@ fn command( None, Vec::new(), ) - }) - .map(|df| { - PipelineData::Value( - NuDataFrame::dataframe_into_value(df.clone(), call.head), - None, - ) - }) + })?; + } + + Ok(PipelineData::Value(df.into_value(call.head), None)) +} + +fn command_lazy( + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + lazy: NuLazyFrame, +) -> Result { + let columns: Value = call.req(engine_state, stack, 0)?; + let columns = extract_strings(columns)?; + + let new_names: Value = call.req(engine_state, stack, 1)?; + let new_names = extract_strings(new_names)?; + + if columns.len() != new_names.len() { + let value: Value = call.req(engine_state, stack, 1)?; + return Err(ShellError::IncompatibleParametersSingle( + "New name list has different size to column list".into(), + value.span()?, + )); + } + + let lazy = lazy.into_polars(); + let lazy: NuLazyFrame = lazy.rename(&columns, &new_names).into(); + + Ok(PipelineData::Value(lazy.into_value(call.head), None)) } #[cfg(test)] diff --git a/crates/nu-command/src/dataframe/eager/sample.rs b/crates/nu-command/src/dataframe/eager/sample.rs index cf4354e456..45cb5dccca 100644 --- a/crates/nu-command/src/dataframe/eager/sample.rs +++ b/crates/nu-command/src/dataframe/eager/sample.rs @@ -33,6 +33,12 @@ impl Command for SampleDF { "fraction of dataframe to be taken", Some('f'), ) + .named( + "seed", + SyntaxShape::Number, + "seed for the selection", + Some('s'), + ) .switch("replace", "sample with replace", Some('e')) .category(Category::Custom("dataframe".into())) } @@ -71,12 +77,15 @@ fn command( ) -> Result { let rows: Option> = call.get_flag(engine_state, stack, "n-rows")?; let fraction: Option> = call.get_flag(engine_state, stack, "fraction")?; + let seed: Option = call + .get_flag::(engine_state, stack, "seed")? + .map(|val| val as u64); let replace: bool = call.has_flag("replace"); let df = NuDataFrame::try_from_pipeline(input, call.head)?; match (rows, fraction) { - (Some(rows), None) => df.as_ref().sample_n(rows.item, replace, 0).map_err(|e| { + (Some(rows), None) => df.as_ref().sample_n(rows.item, replace, seed).map_err(|e| { ShellError::GenericError( "Error creating sample".into(), e.to_string(), @@ -85,15 +94,18 @@ fn command( Vec::new(), ) }), - (None, Some(frac)) => df.as_ref().sample_frac(frac.item, replace, 0).map_err(|e| { - ShellError::GenericError( - "Error creating sample".into(), - e.to_string(), - Some(frac.span), - None, - Vec::new(), - ) - }), + (None, Some(frac)) => df + .as_ref() + .sample_frac(frac.item, replace, seed) + .map_err(|e| { + ShellError::GenericError( + "Error creating sample".into(), + e.to_string(), + Some(frac.span), + None, + Vec::new(), + ) + }), (Some(_), Some(_)) => Err(ShellError::GenericError( "Incompatible flags".into(), "Only one selection criterion allowed".into(), diff --git a/crates/nu-command/src/dataframe/eager/with_column.rs b/crates/nu-command/src/dataframe/eager/with_column.rs index 368ae7012d..a506ac8dbd 100644 --- a/crates/nu-command/src/dataframe/eager/with_column.rs +++ b/crates/nu-command/src/dataframe/eager/with_column.rs @@ -1,12 +1,12 @@ +use super::super::values::{Column, NuDataFrame}; +use crate::dataframe::values::{NuExpression, NuLazyFrame}; use nu_engine::CallExt; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value, + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, }; -use super::super::values::{Column, NuDataFrame}; - #[derive(Clone)] pub struct WithColumn; @@ -21,35 +21,51 @@ impl Command for WithColumn { fn signature(&self) -> Signature { Signature::build(self.name()) - .required("series", SyntaxShape::Any, "series to be added") - .required_named("name", SyntaxShape::String, "column name", Some('n')) + .named("name", SyntaxShape::String, "new column name", Some('n')) + .rest( + "series or expressions", + SyntaxShape::Any, + "series to be added or expressions used to define the new columns", + ) .category(Category::Custom("dataframe".into())) } fn examples(&self) -> Vec { - vec![Example { - description: "Adds a series to the dataframe", - example: - "[[a b]; [1 2] [3 4]] | dfr to-df | dfr with-column ([5 6] | dfr to-df) --name c", - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - Column::new( - "c".to_string(), - vec![Value::test_int(5), Value::test_int(6)], - ), - ]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] + vec![ + Example { + description: "Adds a series to the dataframe", + example: r#"[[a b]; [1 2] [3 4]] + | dfr to-df + | dfr with-column ([5 6] | dfr to-df) --name c"#, + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + Column::new( + "c".to_string(), + vec![Value::test_int(5), Value::test_int(6)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Adds a series to the dataframe", + example: r#"[[a b]; [1 2] [3 4]] + | dfr to-df + | dfr to-lazy + | dfr with-column ((dfr col a) * 2 | dfr as "c") + | dfr collect"#, + result: None, + }, + ] } fn run( @@ -59,26 +75,41 @@ impl Command for WithColumn { call: &Call, input: PipelineData, ) -> Result { - command(engine_state, stack, call, input) + let value = input.into_value(call.head); + + if NuLazyFrame::can_downcast(&value) { + let df = NuLazyFrame::try_from_value(value)?; + command_lazy(engine_state, stack, call, df) + } else if NuDataFrame::can_downcast(&value) { + let df = NuDataFrame::try_from_value(value)?; + command_eager(engine_state, stack, call, df) + } else { + Err(ShellError::CantConvert( + "expression or query".into(), + value.get_type().to_string(), + value.span()?, + None, + )) + } } } -fn command( +fn command_eager( engine_state: &EngineState, stack: &mut Stack, call: &Call, - input: PipelineData, + mut df: NuDataFrame, ) -> Result { - let name: Spanned = call - .get_flag(engine_state, stack, "name")? - .expect("required named value"); - let other_value: Value = call.req(engine_state, stack, 0)?; let other_span = other_value.span()?; let mut other = NuDataFrame::try_from_value(other_value)?.as_series(other_span)?; - let series = other.rename(&name.item).clone(); - let mut df = NuDataFrame::try_from_pipeline(input, call.head)?; + let name = match call.get_flag::(engine_state, stack, "name")? { + Some(name) => name, + None => other.name().to_string(), + }; + + let series = other.rename(&name).clone(); df.as_mut() .with_column(series) @@ -99,6 +130,27 @@ fn command( }) } +fn command_lazy( + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + lazy: NuLazyFrame, +) -> Result { + let vals: Vec = call.rest(engine_state, stack, 0)?; + let value = Value::List { + vals, + span: call.head, + }; + let expressions = NuExpression::extract_exprs(value)?; + + let lazy: NuLazyFrame = lazy.into_polars().with_columns(&expressions).into(); + + Ok(PipelineData::Value( + NuLazyFrame::into_value(lazy, call.head), + None, + )) +} + #[cfg(test)] mod test { use super::super::super::test_dataframe::test_dataframe; diff --git a/crates/nu-command/src/dataframe/expressions/alias.rs b/crates/nu-command/src/dataframe/expressions/alias.rs new file mode 100644 index 0000000000..d5dd02def2 --- /dev/null +++ b/crates/nu-command/src/dataframe/expressions/alias.rs @@ -0,0 +1,57 @@ +use super::super::values::NuExpression; + +use nu_engine::CallExt; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, SyntaxShape, +}; + +#[derive(Clone)] +pub struct ExprAlias; + +impl Command for ExprAlias { + fn name(&self) -> &str { + "dfr as" + } + + fn usage(&self) -> &str { + "Creates an alias expression" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "Alias name", + SyntaxShape::String, + "Alias name for the expression", + ) + .category(Category::Custom("expressions".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Creates and alias expression", + example: "(dfr col a | df as new_a)", + result: None, + }] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let alias: String = call.req(engine_state, stack, 0)?; + + let expr = NuExpression::try_from_pipeline(input, call.head)?; + let expr: NuExpression = expr.into_polars().alias(alias.as_str()).into(); + + Ok(PipelineData::Value( + NuExpression::into_value(expr, call.head), + None, + )) + } +} diff --git a/crates/nu-command/src/dataframe/expressions/dsl/col.rs b/crates/nu-command/src/dataframe/expressions/dsl/col.rs new file mode 100644 index 0000000000..569c4f1d7f --- /dev/null +++ b/crates/nu-command/src/dataframe/expressions/dsl/col.rs @@ -0,0 +1,77 @@ +use crate::dataframe::values::NuExpression; +use nu_engine::CallExt; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, +}; +use polars::prelude::col; + +#[derive(Clone)] +pub struct ExprCol; + +impl Command for ExprCol { + fn name(&self) -> &str { + "dfr col" + } + + fn usage(&self) -> &str { + "Creates a named column expression" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "column name", + SyntaxShape::String, + "Name of column to be used", + ) + .category(Category::Custom("expressions".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Creates a named column expression and converts it to a nu object", + example: "dfr col col_a | dfr to-nu", + result: Some(Value::Record { + cols: vec!["expr".into(), "value".into()], + vals: vec![ + Value::String { + val: "column".into(), + span: Span::test_data(), + }, + Value::String { + val: "col_a".into(), + span: Span::test_data(), + }, + ], + span: Span::test_data(), + }), + }] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + _input: PipelineData, + ) -> Result { + let name: String = call.req(engine_state, stack, 0)?; + let expr: NuExpression = col(name.as_str()).into(); + + Ok(PipelineData::Value(expr.into_value(call.head), None)) + } +} + +#[cfg(test)] +mod test { + use super::super::super::super::test_dataframe::test_dataframe; + use super::super::super::ExprToNu; + use super::*; + + #[test] + fn test_examples() { + test_dataframe(vec![Box::new(ExprCol {}), Box::new(ExprToNu {})]) + } +} diff --git a/crates/nu-command/src/dataframe/expressions/dsl/lit.rs b/crates/nu-command/src/dataframe/expressions/dsl/lit.rs new file mode 100644 index 0000000000..a631f3b437 --- /dev/null +++ b/crates/nu-command/src/dataframe/expressions/dsl/lit.rs @@ -0,0 +1,79 @@ +use crate::dataframe::values::NuExpression; +use nu_engine::CallExt; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, +}; + +#[derive(Clone)] +pub struct ExprLit; + +impl Command for ExprLit { + fn name(&self) -> &str { + "dfr lit" + } + + fn usage(&self) -> &str { + "Creates a literal expression" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "literal", + SyntaxShape::Any, + "literal to construct the expression", + ) + .category(Category::Custom("expressions".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Created a literal expression and converts it to a nu object", + example: "dfr lit 2 | dfr to-nu", + result: Some(Value::Record { + cols: vec!["expr".into(), "value".into()], + vals: vec![ + Value::String { + val: "literal".into(), + span: Span::test_data(), + }, + Value::String { + val: "2i64".into(), + span: Span::test_data(), + }, + ], + span: Span::test_data(), + }), + }] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + _input: PipelineData, + ) -> Result { + let literal: Value = call.req(engine_state, stack, 0)?; + + let expr = NuExpression::try_from_value(literal)?; + Ok(PipelineData::Value( + NuExpression::into_value(expr, call.head), + None, + )) + } +} + +#[cfg(test)] +mod test { + use super::super::super::super::test_dataframe::test_dataframe; + use super::super::super::ExprToNu; + use super::*; + + #[test] + fn test_examples() { + test_dataframe(vec![Box::new(ExprLit {}), Box::new(ExprToNu {})]) + } +} diff --git a/crates/nu-command/src/dataframe/expressions/dsl/mod.rs b/crates/nu-command/src/dataframe/expressions/dsl/mod.rs new file mode 100644 index 0000000000..b31e6726a8 --- /dev/null +++ b/crates/nu-command/src/dataframe/expressions/dsl/mod.rs @@ -0,0 +1,7 @@ +mod col; +mod lit; +mod when; + +pub(super) use crate::dataframe::expressions::dsl::col::ExprCol; +pub(super) use crate::dataframe::expressions::dsl::lit::ExprLit; +pub(super) use crate::dataframe::expressions::dsl::when::ExprWhen; diff --git a/crates/nu-command/src/dataframe/expressions/dsl/when.rs b/crates/nu-command/src/dataframe/expressions/dsl/when.rs new file mode 100644 index 0000000000..fe58959189 --- /dev/null +++ b/crates/nu-command/src/dataframe/expressions/dsl/when.rs @@ -0,0 +1,96 @@ +use crate::dataframe::values::NuExpression; +use nu_engine::CallExt; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value, +}; +use polars::prelude::when; + +#[derive(Clone)] +pub struct ExprWhen; + +impl Command for ExprWhen { + fn name(&self) -> &str { + "dfr when" + } + + fn usage(&self) -> &str { + "Creates a when expression" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "when predicate", + SyntaxShape::Any, + "Name of column to be used", + ) + .required_named( + "then", + SyntaxShape::Any, + "Expression that will be applied when predicate is true", + Some('t'), + ) + .required_named( + "otherwise", + SyntaxShape::Any, + "Expression that will be applied when predicate is false", + Some('o'), + ) + .category(Category::Custom("expressions".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Create a new column for the dataframe", + example: r#"[[a b]; [1 2] [3 4]] + | dfr to-df + | dfr to-lazy + | dfr with-column ( + dfr when ((dfr col a) > 2) --then 4 --otherwise 5 | dfr as "c" + ) + | dfr collect"#, + result: None, + }] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + _input: PipelineData, + ) -> Result { + let predicate: Value = call.req(engine_state, stack, 0)?; + let predicate = NuExpression::try_from_value(predicate)?; + + let then: Value = call + .get_flag(engine_state, stack, "then")? + .expect("it is a required named value"); + let then = NuExpression::try_from_value(then)?; + let otherwise: Value = call + .get_flag(engine_state, stack, "otherwise")? + .expect("it is a required named value"); + let otherwise = NuExpression::try_from_value(otherwise)?; + + let expr: NuExpression = when(predicate.into_polars()) + .then(then.into_polars()) + .otherwise(otherwise.into_polars()) + .into(); + + Ok(PipelineData::Value(expr.into_value(call.head), None)) + } +} + +#[cfg(test)] +mod test { + use super::super::super::super::test_dataframe::test_dataframe; + use super::super::super::ExprToNu; + use super::*; + + #[test] + fn test_examples() { + test_dataframe(vec![Box::new(ExprWhen {}), Box::new(ExprToNu {})]) + } +} diff --git a/crates/nu-command/src/dataframe/expressions/expressions_macro.rs b/crates/nu-command/src/dataframe/expressions/expressions_macro.rs new file mode 100644 index 0000000000..26b9609edd --- /dev/null +++ b/crates/nu-command/src/dataframe/expressions/expressions_macro.rs @@ -0,0 +1,109 @@ +/// Definition of multiple Expression commands using a macro rule +/// All of these expressions have an identical body and only require +/// to have a change in the name, description and expression function +use super::super::values::NuExpression; + +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, +}; + +// The structs defined in this file are structs that form part of other commands +// since they share a similar name +macro_rules! expr_command { + ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident) => { + #[derive(Clone)] + pub struct $command; + + impl Command for $command { + fn name(&self) -> &str { + $name + } + + fn usage(&self) -> &str { + $desc + } + + fn signature(&self) -> Signature { + Signature::build(self.name()).category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + $examples + } + + fn run( + &self, + _engine_state: &EngineState, + _stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let expr = NuExpression::try_from_pipeline(input, call.head)?; + let expr: NuExpression = expr.into_polars().$func().into(); + + Ok(PipelineData::Value( + NuExpression::into_value(expr, call.head), + None, + )) + } + } + }; +} + +// ExprList command +// Expands to a command definition for a list expression +expr_command!( + ExprList, + "dfr list", + "Aggregates a group to a Series", + vec![Example { + description: "", + example: "", + result: None, + }], + list +); + +// ExprAggGroups command +// Expands to a command definition for a agg groups expression +expr_command!( + ExprAggGroups, + "dfr agg-groups", + "creates an agg_groups expression", + vec![Example { + description: "", + example: "", + result: None, + }], + agg_groups +); + +// ExprFlatten command +// Expands to a command definition for a flatten expression +expr_command!( + ExprFlatten, + "dfr flatten", + "creates a flatten expression", + vec![Example { + description: "", + example: "", + result: None, + }], + flatten +); + +// ExprExplode command +// Expands to a command definition for a explode expression +expr_command!( + ExprExplode, + "dfr explode", + "creates an explode expression", + vec![Example { + description: "", + example: "", + result: None, + }], + explode +); diff --git a/crates/nu-command/src/dataframe/expressions/mod.rs b/crates/nu-command/src/dataframe/expressions/mod.rs new file mode 100644 index 0000000000..ed1338f432 --- /dev/null +++ b/crates/nu-command/src/dataframe/expressions/mod.rs @@ -0,0 +1,36 @@ +mod alias; +mod dsl; +mod expressions_macro; +mod to_nu; + +use nu_protocol::engine::StateWorkingSet; + +use crate::dataframe::expressions::dsl::*; + +use crate::dataframe::expressions::alias::ExprAlias; +use crate::dataframe::expressions::expressions_macro::*; +use crate::dataframe::expressions::to_nu::ExprToNu; + +pub fn add_expressions(working_set: &mut StateWorkingSet) { + macro_rules! bind_command { + ( $command:expr ) => { + working_set.add_decl(Box::new($command)); + }; + ( $( $command:expr ),* ) => { + $( working_set.add_decl(Box::new($command)); )* + }; + } + + // Dataframe commands + bind_command!( + ExprAlias, + ExprCol, + ExprLit, + ExprToNu, + ExprWhen, + ExprList, + ExprAggGroups, + ExprFlatten, + ExprExplode + ); +} diff --git a/crates/nu-command/src/dataframe/expressions/to_nu.rs b/crates/nu-command/src/dataframe/expressions/to_nu.rs new file mode 100644 index 0000000000..c241b1f1c7 --- /dev/null +++ b/crates/nu-command/src/dataframe/expressions/to_nu.rs @@ -0,0 +1,70 @@ +use super::super::values::NuExpression; + +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, Span, Value, +}; + +#[derive(Clone)] +pub struct ExprToNu; + +impl Command for ExprToNu { + fn name(&self) -> &str { + "dfr to-nu" + } + + fn usage(&self) -> &str { + "Convert expression to a nu value for access and exploration" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()).category(Category::Custom("expressions".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Convert a col expression into a nushell value", + example: "dfr col col_a | dfr to-nu", + result: Some(Value::Record { + cols: vec!["expr".into(), "value".into()], + vals: vec![ + Value::String { + val: "column".into(), + span: Span::test_data(), + }, + Value::String { + val: "col_a".into(), + span: Span::test_data(), + }, + ], + span: Span::test_data(), + }), + }] + } + + fn run( + &self, + _engine_state: &EngineState, + _stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let expr = NuExpression::try_from_pipeline(input, call.head)?; + let value = expr.to_value(call.head); + + Ok(PipelineData::Value(value, None)) + } +} + +#[cfg(test)] +mod test { + use super::super::super::test_dataframe::test_dataframe; + use super::super::ExprCol; + use super::*; + + #[test] + fn test_examples() { + test_dataframe(vec![Box::new(ExprToNu {}), Box::new(ExprCol {})]) + } +} diff --git a/crates/nu-command/src/dataframe/lazy/aggregate.rs b/crates/nu-command/src/dataframe/lazy/aggregate.rs new file mode 100644 index 0000000000..d421df9be9 --- /dev/null +++ b/crates/nu-command/src/dataframe/lazy/aggregate.rs @@ -0,0 +1,91 @@ +use crate::dataframe::values::{NuExpression, NuLazyFrame, NuLazyGroupBy}; + +use nu_engine::CallExt; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value, +}; + +#[derive(Clone)] +pub struct LazyAggregate; + +impl Command for LazyAggregate { + fn name(&self) -> &str { + "dfr aggregate" + } + + fn usage(&self) -> &str { + "Performs a series of aggregations from a group by" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .rest( + "Group by expressions", + SyntaxShape::Any, + "Expression(s) that define the aggregations to be applied", + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Group by and perform an aggregation", + example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] + | dfr to-df + | dfr group-by a + | dfr aggregate [ + ("b" | dfr min | dfr as "b_min") + ("b" | dfr max | dfr as "b_max") + ("b" | dfr sum | dfr as "b_sum") + ]"#, + result: None, + }, + Example { + description: "Group by and perform an aggregation", + example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] + | dfr to-df + | dfr to-lazy + | dfr group-by a + | dfr aggregate [ + ("b" | dfr min | dfr as "b_min") + ("b" | dfr max | dfr as "b_max") + ("b" | dfr sum | dfr as "b_sum") + ] + | dfr collect"#, + result: None, + }, + ] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let vals: Vec = call.rest(engine_state, stack, 0)?; + let value = Value::List { + vals, + span: call.head, + }; + let expressions = NuExpression::extract_exprs(value)?; + + let group_by = NuLazyGroupBy::try_from_pipeline(input, call.head)?; + let from_eager = group_by.from_eager; + + let group_by = group_by.into_polars(); + let lazy: NuLazyFrame = group_by.agg(&expressions).into(); + + let res = if from_eager { + lazy.collect(call.head)?.into_value(call.head) + } else { + lazy.into_value(call.head) + }; + + Ok(PipelineData::Value(res, None)) + } +} diff --git a/crates/nu-command/src/dataframe/lazy/collect.rs b/crates/nu-command/src/dataframe/lazy/collect.rs new file mode 100644 index 0000000000..6f93aff437 --- /dev/null +++ b/crates/nu-command/src/dataframe/lazy/collect.rs @@ -0,0 +1,48 @@ +use super::super::values::{NuDataFrame, NuLazyFrame}; + +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, +}; + +#[derive(Clone)] +pub struct LazyCollect; + +impl Command for LazyCollect { + fn name(&self) -> &str { + "dfr collect" + } + + fn usage(&self) -> &str { + "Collect lazy dataframe into dataframe" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()).category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "", + example: "", + result: None, + }] + } + + fn run( + &self, + _engine_state: &EngineState, + _stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?; + let eager = lazy.collect(call.head)?; + + Ok(PipelineData::Value( + NuDataFrame::into_value(eager, call.head), + None, + )) + } +} diff --git a/crates/nu-command/src/dataframe/lazy/fetch.rs b/crates/nu-command/src/dataframe/lazy/fetch.rs new file mode 100644 index 0000000000..e3cabb17cd --- /dev/null +++ b/crates/nu-command/src/dataframe/lazy/fetch.rs @@ -0,0 +1,80 @@ +use super::super::values::NuLazyFrame; +use crate::dataframe::values::NuDataFrame; +use nu_engine::CallExt; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, SyntaxShape, +}; + +#[derive(Clone)] +pub struct LazyFetch; + +impl Command for LazyFetch { + fn name(&self) -> &str { + "dfr fetch" + } + + fn usage(&self) -> &str { + "collects the lazyframe to the selected rows" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "rows", + SyntaxShape::Int, + "number of rows to be fetched from lazyframe", + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "", + example: "", + result: None, + }] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let rows: i64 = call.req(engine_state, stack, 0)?; + + let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?; + let eager: NuDataFrame = lazy + .into_polars() + .fetch(rows as usize) + .map_err(|e| { + ShellError::GenericError( + "Error fetching rows".into(), + e.to_string(), + Some(call.head), + None, + Vec::new(), + ) + })? + .into(); + + Ok(PipelineData::Value( + NuDataFrame::into_value(eager, call.head), + None, + )) + } +} + +//#[cfg(test)] +//mod test { +// use super::super::super::test_dataframe::test_dataframe; +// use super::*; +// +// #[test] +// fn test_examples() { +// test_dataframe(vec![Box::new(LazyFetch {})]) +// } +//} diff --git a/crates/nu-command/src/dataframe/lazy/fill_na.rs b/crates/nu-command/src/dataframe/lazy/fill_na.rs new file mode 100644 index 0000000000..7c0adbc023 --- /dev/null +++ b/crates/nu-command/src/dataframe/lazy/fill_na.rs @@ -0,0 +1,65 @@ +use crate::dataframe::values::{NuExpression, NuLazyFrame}; +use nu_engine::CallExt; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value, +}; + +#[derive(Clone)] +pub struct LazyFillNA; + +impl Command for LazyFillNA { + fn name(&self) -> &str { + "dfr fill-na" + } + + fn usage(&self) -> &str { + "Replaces NA values with the given expression" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "fill", + SyntaxShape::Any, + "Expression to use to fill the NAN values", + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "", + example: "", + result: None, + }] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let fill: Value = call.req(engine_state, stack, 0)?; + + let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars(); + let expr = NuExpression::try_from_value(fill)?.into_polars(); + let lazy: NuLazyFrame = lazy.fill_nan(expr).into(); + + Ok(PipelineData::Value(lazy.into_value(call.head), None)) + } +} + +//#[cfg(test)] +//mod test { +// use super::super::super::test_dataframe::test_dataframe; +// use super::*; +// +// #[test] +// fn test_examples() { +// test_dataframe(vec![Box::new(LazyFillNA {})]) +// } +//} diff --git a/crates/nu-command/src/dataframe/lazy/fill_null.rs b/crates/nu-command/src/dataframe/lazy/fill_null.rs new file mode 100644 index 0000000000..4fdb041a2d --- /dev/null +++ b/crates/nu-command/src/dataframe/lazy/fill_null.rs @@ -0,0 +1,65 @@ +use crate::dataframe::values::{NuExpression, NuLazyFrame}; +use nu_engine::CallExt; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value, +}; + +#[derive(Clone)] +pub struct LazyFillNull; + +impl Command for LazyFillNull { + fn name(&self) -> &str { + "dfr fill-null" + } + + fn usage(&self) -> &str { + "Replaces NULL values with the given expression" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "fill", + SyntaxShape::Any, + "Expression to use to fill the null values", + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "", + example: "", + result: None, + }] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let fill: Value = call.req(engine_state, stack, 0)?; + + let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars(); + let expr = NuExpression::try_from_value(fill)?.into_polars(); + let lazy: NuLazyFrame = lazy.fill_null(expr).into(); + + Ok(PipelineData::Value(lazy.into_value(call.head), None)) + } +} + +//#[cfg(test)] +//mod test { +// use super::super::super::test_dataframe::test_dataframe; +// use super::*; +// +// #[test] +// fn test_examples() { +// test_dataframe(vec![Box::new(LazyFillNull {})]) +// } +//} diff --git a/crates/nu-command/src/dataframe/lazy/groupby.rs b/crates/nu-command/src/dataframe/lazy/groupby.rs new file mode 100644 index 0000000000..b05f5df15b --- /dev/null +++ b/crates/nu-command/src/dataframe/lazy/groupby.rs @@ -0,0 +1,98 @@ +use crate::dataframe::values::{NuExpression, NuLazyFrame, NuLazyGroupBy}; +use nu_engine::CallExt; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value, +}; +use polars::prelude::Expr; + +#[derive(Clone)] +pub struct ToLazyGroupBy; + +impl Command for ToLazyGroupBy { + fn name(&self) -> &str { + "dfr group-by" + } + + fn usage(&self) -> &str { + "Creates a groupby object that can be used for other aggregations" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .rest( + "Group by expressions", + SyntaxShape::Any, + "Expression(s) that define the lazy group by", + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Group by and perform an aggregation", + example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] + | dfr to-df + | dfr group-by a + | dfr aggregate [ + ("b" | dfr min | dfr as "b_min") + ("b" | dfr max | dfr as "b_max") + ("b" | dfr sum | dfr as "b_sum") + ]"#, + result: None, + }, + Example { + description: "Group by and perform an aggregation", + example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] + | dfr to-df + | dfr to-lazy + | dfr group-by a + | dfr aggregate [ + ("b" | dfr min | dfr as "b_min") + ("b" | dfr max | dfr as "b_max") + ("b" | dfr sum | dfr as "b_sum") + ] + | dfr collect"#, + result: None, + }, + ] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let vals: Vec = call.rest(engine_state, stack, 0)?; + let value = Value::List { + vals, + span: call.head, + }; + let expressions = NuExpression::extract_exprs(value)?; + + if expressions + .iter() + .any(|expr| !matches!(expr, Expr::Column(..))) + { + let value: Value = call.req(engine_state, stack, 0)?; + return Err(ShellError::IncompatibleParametersSingle( + "Expected only Col expressions".into(), + value.span()?, + )); + } + + let value = input.into_value(call.head); + let (lazy, from_eager) = NuLazyFrame::maybe_is_eager(value)?; + + let group_by = NuLazyGroupBy { + group_by: Some(lazy.into_polars().groupby(&expressions)), + from_eager, + }; + + Ok(PipelineData::Value(group_by.into_value(call.head), None)) + } +} diff --git a/crates/nu-command/src/dataframe/lazy/join.rs b/crates/nu-command/src/dataframe/lazy/join.rs new file mode 100644 index 0000000000..032491b887 --- /dev/null +++ b/crates/nu-command/src/dataframe/lazy/join.rs @@ -0,0 +1,139 @@ +use crate::dataframe::values::{NuExpression, NuLazyFrame}; +use nu_engine::CallExt; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value, +}; +use polars::prelude::{Expr, JoinType}; + +#[derive(Clone)] +pub struct LazyJoin; + +impl Command for LazyJoin { + fn name(&self) -> &str { + "dfr join" + } + + fn usage(&self) -> &str { + "Joins a lazy frame with other lazy frame" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("other", SyntaxShape::Any, "LazyFrame to join with") + .required("left_on", SyntaxShape::Any, "Left column(s) to join on") + .required("right_on", SyntaxShape::Any, "Right column(s) to join on") + .switch( + "inner", + "inner joing between lazyframes (default)", + Some('i'), + ) + .switch("left", "left join between lazyframes", Some('l')) + .switch("outer", "outer join between lazyframes", Some('o')) + .switch("cross", "cross join between lazyframes", Some('c')) + .named( + "suffix", + SyntaxShape::String, + "Suffix to use on columns with same name", + Some('s'), + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Join two lazy dataframes", + example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | dfr to-lazy); + let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [1 "c" "var"] [1 "c" "const"]] | dfr to-lazy); + $df_a | dfr join $df_b a foo | dfr collect"#, + result: None, + }, + Example { + description: "Join one eager dataframe with a lazy dataframe", + example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | dfr to-df); + let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [1 "c" "var"] [1 "c" "const"]] | dfr to-lazy); + $df_a | dfr join $df_b a foo"#, + result: None, + }, + ] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let left = call.has_flag("left"); + let outer = call.has_flag("outer"); + let cross = call.has_flag("cross"); + + let how = if left { + JoinType::Left + } else if outer { + JoinType::Outer + } else if cross { + JoinType::Cross + } else { + JoinType::Inner + }; + + let other: Value = call.req(engine_state, stack, 0)?; + let (other, _) = NuLazyFrame::maybe_is_eager(other)?; + let other = other.into_polars(); + + let left_on: Value = call.req(engine_state, stack, 1)?; + let left_on = NuExpression::extract_exprs(left_on)?; + + let right_on: Value = call.req(engine_state, stack, 2)?; + let right_on = NuExpression::extract_exprs(right_on)?; + + if left_on.len() != right_on.len() { + let right_on: Value = call.req(engine_state, stack, 2)?; + return Err(ShellError::IncompatibleParametersSingle( + "The right column list has a different size to the left column list".into(), + right_on.span()?, + )); + } + + // Checking that both list of expressions are made out of col expressions or strings + for (index, list) in &[(1usize, &left_on), (2, &left_on)] { + if list.iter().any(|expr| !matches!(expr, Expr::Column(..))) { + let value: Value = call.req(engine_state, stack, *index)?; + return Err(ShellError::IncompatibleParametersSingle( + "Expected only a string, col expressions or list of strings".into(), + value.span()?, + )); + } + } + + let suffix: Option = call.get_flag(engine_state, stack, "suffix")?; + let suffix = suffix.unwrap_or_else(|| "_x".into()); + + let value = input.into_value(call.head); + let (lazy, from_eager) = NuLazyFrame::maybe_is_eager(value)?; + let lazy = lazy.into_polars(); + + let lazy: NuLazyFrame = lazy + .join_builder() + .with(other) + .left_on(left_on) + .right_on(right_on) + .how(how) + .force_parallel(true) + .suffix(suffix) + .finish() + .into(); + + let res = if from_eager { + lazy.collect(call.head)?.into_value(call.head) + } else { + lazy.into_value(call.head) + }; + + Ok(PipelineData::Value(res, None)) + } +} diff --git a/crates/nu-command/src/dataframe/lazy/macro_commands.rs b/crates/nu-command/src/dataframe/lazy/macro_commands.rs new file mode 100644 index 0000000000..7c9bfe42f5 --- /dev/null +++ b/crates/nu-command/src/dataframe/lazy/macro_commands.rs @@ -0,0 +1,232 @@ +/// Definition of multiple lazyframe commands using a macro rule +/// All of these commands have an identical body and only require +/// to have a change in the name, description and function +use crate::dataframe::values::{NuExpression, NuLazyFrame}; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, +}; + +macro_rules! lazy_command { + ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident) => { + #[derive(Clone)] + pub struct $command; + + impl Command for $command { + fn name(&self) -> &str { + $name + } + + fn usage(&self) -> &str { + $desc + } + + fn signature(&self) -> Signature { + Signature::build(self.name()).category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + $examples + } + + fn run( + &self, + _engine_state: &EngineState, + _stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars(); + let lazy: NuLazyFrame = lazy.$func().into(); + + Ok(PipelineData::Value(lazy.into_value(call.head), None)) + } + } + }; +} + +// LazyReverse command +// Expands to a command definition for reverse +lazy_command!( + LazyReverse, + "dfr reverse", + "Reverses the LazyFrame", + vec![Example { + description: "", + example: "", + result: None, + }], + reverse +); + +// LazyCache command +// Expands to a command definition for cache +lazy_command!( + LazyCache, + "dfr cache", + "Caches operations in a new LazyFrame", + vec![Example { + description: "", + example: "", + result: None, + }], + cache +); + +// Creates a command that may result in a lazy frame operation or +// lazy frame expression +macro_rules! lazy_expr_command { + ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident) => { + #[derive(Clone)] + pub struct $command; + + impl Command for $command { + fn name(&self) -> &str { + $name + } + + fn usage(&self) -> &str { + $desc + } + + fn signature(&self) -> Signature { + Signature::build(self.name()).category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + $examples + } + + fn run( + &self, + _engine_state: &EngineState, + _stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + + if NuExpression::can_downcast(&value) { + let expr = NuExpression::try_from_value(value)?; + let expr: NuExpression = expr.into_polars().$func().into(); + + Ok(PipelineData::Value( + NuExpression::into_value(expr, call.head), + None, + )) + } else if NuLazyFrame::can_downcast(&value) { + let lazy = NuLazyFrame::try_from_value(value)?.into_polars(); + let lazy: NuLazyFrame = lazy.$func().into(); + + Ok(PipelineData::Value(lazy.into_value(call.head), None)) + } else { + Err(ShellError::CantConvert( + "expression or lazyframe".into(), + value.get_type().to_string(), + value.span()?, + None, + )) + } + } + } + }; +} + +// LazyMax command +// Expands to a command definition for max aggregation +lazy_expr_command!( + LazyMax, + "dfr max", + "Aggregates columns to their max value or creates a max expression", + vec![Example { + description: "", + example: "", + result: None, + }], + max +); + +// LazyMin command +// Expands to a command definition for min aggregation +lazy_expr_command!( + LazyMin, + "dfr min", + "Aggregates columns to their min value or creates a min expression", + vec![Example { + description: "", + example: "", + result: None, + }], + min +); + +// LazySum command +// Expands to a command definition for sum aggregation +lazy_expr_command!( + LazySum, + "dfr sum", + "Aggregates columns to their sum value or creates a sum expression", + vec![Example { + description: "", + example: "", + result: None, + }], + sum +); + +// LazyMean command +// Expands to a command definition for mean aggregation +lazy_expr_command!( + LazyMean, + "dfr mean", + "Aggregates columns to their mean value or creates a mean expression", + vec![Example { + description: "", + example: "", + result: None, + }], + mean +); + +// LazyMedian command +// Expands to a command definition for median aggregation +lazy_expr_command!( + LazyMedian, + "dfr median", + "Aggregates columns to their median value or creates a median expression", + vec![Example { + description: "", + example: "", + result: None, + }], + median +); + +// LazyStd command +// Expands to a command definition for std aggregation +lazy_expr_command!( + LazyStd, + "dfr std", + "Aggregates columns to their std value", + vec![Example { + description: "", + example: "", + result: None, + }], + std +); + +// LazyVar command +// Expands to a command definition for var aggregation +lazy_expr_command!( + LazyVar, + "dfr var", + "Aggregates columns to their var value", + vec![Example { + description: "", + example: "", + result: None, + }], + var +); diff --git a/crates/nu-command/src/dataframe/lazy/mod.rs b/crates/nu-command/src/dataframe/lazy/mod.rs new file mode 100644 index 0000000000..f34bc21eba --- /dev/null +++ b/crates/nu-command/src/dataframe/lazy/mod.rs @@ -0,0 +1,63 @@ +mod aggregate; +mod collect; +mod fetch; +mod fill_na; +mod fill_null; +mod groupby; +mod join; +mod macro_commands; +mod quantile; +mod select; +mod sort_by_expr; +mod to_lazy; + +use nu_protocol::engine::StateWorkingSet; + +use crate::dataframe::lazy::macro_commands::*; + +use crate::dataframe::lazy::aggregate::LazyAggregate; +use crate::dataframe::lazy::collect::LazyCollect; +use crate::dataframe::lazy::fetch::LazyFetch; +use crate::dataframe::lazy::fill_na::LazyFillNA; +use crate::dataframe::lazy::fill_null::LazyFillNull; +use crate::dataframe::lazy::groupby::ToLazyGroupBy; +use crate::dataframe::lazy::join::LazyJoin; +use crate::dataframe::lazy::quantile::LazyQuantile; +use crate::dataframe::lazy::select::LazySelect; +use crate::dataframe::lazy::sort_by_expr::LazySortBy; +use crate::dataframe::lazy::to_lazy::ToLazyFrame; + +pub fn add_lazy_decls(working_set: &mut StateWorkingSet) { + macro_rules! bind_command { + ( $command:expr ) => { + working_set.add_decl(Box::new($command)); + }; + ( $( $command:expr ),* ) => { + $( working_set.add_decl(Box::new($command)); )* + }; + } + + // Dataframe commands + bind_command!( + LazyAggregate, + LazyCache, + LazyCollect, + LazyFetch, + LazyFillNA, + LazyFillNull, + LazyJoin, + LazyQuantile, + LazyMax, + LazyMin, + LazySum, + LazyMean, + LazyMedian, + LazyStd, + LazyVar, + LazyReverse, + LazySelect, + LazySortBy, + ToLazyFrame, + ToLazyGroupBy + ); +} diff --git a/crates/nu-command/src/dataframe/lazy/quantile.rs b/crates/nu-command/src/dataframe/lazy/quantile.rs new file mode 100644 index 0000000000..067ba4602c --- /dev/null +++ b/crates/nu-command/src/dataframe/lazy/quantile.rs @@ -0,0 +1,67 @@ +use crate::dataframe::values::NuLazyFrame; +use nu_engine::CallExt; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, SyntaxShape, +}; +use polars::prelude::QuantileInterpolOptions; + +#[derive(Clone)] +pub struct LazyQuantile; + +impl Command for LazyQuantile { + fn name(&self) -> &str { + "dfr quantile" + } + + fn usage(&self) -> &str { + "Aggregates the columns to the selected quantile" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "quantile", + SyntaxShape::Number, + "quantile value for quantile operation", + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "", + example: "", + result: None, + }] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let quantile: f64 = call.req(engine_state, stack, 0)?; + + let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars(); + let lazy: NuLazyFrame = lazy + .quantile(quantile, QuantileInterpolOptions::default()) + .into(); + + Ok(PipelineData::Value(lazy.into_value(call.head), None)) + } +} + +//#[cfg(test)] +//mod test { +// use super::super::super::test_dataframe::test_dataframe; +// use super::*; +// +// #[test] +// fn test_examples() { +// test_dataframe(vec![Box::new(LazyQuantile {})]) +// } +//} diff --git a/crates/nu-command/src/dataframe/lazy/select.rs b/crates/nu-command/src/dataframe/lazy/select.rs new file mode 100644 index 0000000000..31dd3e5607 --- /dev/null +++ b/crates/nu-command/src/dataframe/lazy/select.rs @@ -0,0 +1,78 @@ +use crate::dataframe::values::{NuExpression, NuLazyFrame}; + +use nu_engine::CallExt; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value, +}; +use polars::prelude::Expr; + +#[derive(Clone)] +pub struct LazySelect; + +impl Command for LazySelect { + fn name(&self) -> &str { + "dfr select" + } + + fn usage(&self) -> &str { + "Selects columns from lazyframe" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "select expressions", + SyntaxShape::Any, + "Expression(s) that define the column selection", + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "", + example: "", + result: None, + }] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let value: Value = call.req(engine_state, stack, 0)?; + let expressions = NuExpression::extract_exprs(value)?; + + if expressions + .iter() + .any(|expr| !matches!(expr, Expr::Column(..))) + { + let value: Value = call.req(engine_state, stack, 0)?; + return Err(ShellError::IncompatibleParametersSingle( + "Expected only Col expressions".into(), + value.span()?, + )); + } + + let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars(); + let lazy: NuLazyFrame = lazy.select(&expressions).into(); + + Ok(PipelineData::Value(lazy.into_value(call.head), None)) + } +} + +//#[cfg(test)] +//mod test { +// use super::super::super::test_dataframe::test_dataframe; +// use super::*; +// +// #[test] +// fn test_examples() { +// test_dataframe(vec![Box::new(LazySelect {})]) +// } +//} diff --git a/crates/nu-command/src/dataframe/lazy/sort_by_expr.rs b/crates/nu-command/src/dataframe/lazy/sort_by_expr.rs new file mode 100644 index 0000000000..257d8d0aac --- /dev/null +++ b/crates/nu-command/src/dataframe/lazy/sort_by_expr.rs @@ -0,0 +1,100 @@ +use super::super::values::NuLazyFrame; +use crate::dataframe::values::NuExpression; +use nu_engine::CallExt; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value, +}; + +#[derive(Clone)] +pub struct LazySortBy; + +impl Command for LazySortBy { + fn name(&self) -> &str { + "dfr sort-by" + } + + fn usage(&self) -> &str { + "sorts a lazy dataframe based on expression(s)" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "filter expression", + SyntaxShape::Any, + "filtering expression", + ) + .named( + "reverse", + SyntaxShape::List(Box::new(SyntaxShape::Boolean)), + "list indicating if reverse search should be done in the column. Default is false", + Some('r'), + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "", + example: "", + result: None, + }] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let value: Value = call.req(engine_state, stack, 0)?; + let expressions = NuExpression::extract_exprs(value)?; + + let reverse: Option> = call.get_flag(engine_state, stack, "reverse")?; + let reverse = match reverse { + Some(list) => { + if expressions.len() != list.len() { + let span = call + .get_flag::(engine_state, stack, "reverse")? + .expect("already checked and it exists") + .span()?; + return Err(ShellError::GenericError( + "Incorrect list size".into(), + "Size doesn't match expression list".into(), + Some(span), + None, + Vec::new(), + )); + } else { + list + } + } + None => expressions.iter().map(|_| false).collect::>(), + }; + + let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?; + let lazy: NuLazyFrame = lazy + .into_polars() + .sort_by_exprs(&expressions, reverse) + .into(); + + Ok(PipelineData::Value( + NuLazyFrame::into_value(lazy, call.head), + None, + )) + } +} + +//#[cfg(test)] +//mod test { +// use super::super::super::test_dataframe::test_dataframe; +// use super::*; +// +// #[test] +// fn test_examples() { +// test_dataframe(vec![Box::new(LazySortBy {})]) +// } +//} diff --git a/crates/nu-command/src/dataframe/lazy/to_lazy.rs b/crates/nu-command/src/dataframe/lazy/to_lazy.rs new file mode 100644 index 0000000000..434f761230 --- /dev/null +++ b/crates/nu-command/src/dataframe/lazy/to_lazy.rs @@ -0,0 +1,45 @@ +use super::super::values::{NuDataFrame, NuLazyFrame}; + +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, +}; + +#[derive(Clone)] +pub struct ToLazyFrame; + +impl Command for ToLazyFrame { + fn name(&self) -> &str { + "dfr to-lazy" + } + + fn usage(&self) -> &str { + "Converts a dataframe into a lazy dataframe" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()).category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Takes a dictionary and creates a lazy dataframe", + example: "[[a b];[1 2] [3 4]] | dfr to-df | dfl to-lazy", + result: None, + }] + } + + fn run( + &self, + _engine_state: &EngineState, + _stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let df = NuDataFrame::try_from_iter(input.into_iter())?; + let lazy = NuLazyFrame::from_dataframe(df); + + Ok(PipelineData::Value(lazy.into_value(call.head), None)) + } +} diff --git a/crates/nu-command/src/dataframe/mod.rs b/crates/nu-command/src/dataframe/mod.rs index 61abdea795..3f93b3936f 100644 --- a/crates/nu-command/src/dataframe/mod.rs +++ b/crates/nu-command/src/dataframe/mod.rs @@ -1,8 +1,13 @@ mod eager; +mod expressions; +mod lazy; mod series; +mod utils; mod values; pub use eager::add_eager_decls; +pub use expressions::add_expressions; +pub use lazy::add_lazy_decls; pub use series::add_series_decls; use nu_protocol::engine::StateWorkingSet; @@ -10,6 +15,8 @@ use nu_protocol::engine::StateWorkingSet; pub fn add_dataframe_decls(working_set: &mut StateWorkingSet) { add_series_decls(working_set); add_eager_decls(working_set); + add_expressions(working_set); + add_lazy_decls(working_set); } #[cfg(test)] diff --git a/crates/nu-command/src/dataframe/series/date/as_date.rs b/crates/nu-command/src/dataframe/series/date/as_date.rs index f195c736ad..e12260a6e8 100644 --- a/crates/nu-command/src/dataframe/series/date/as_date.rs +++ b/crates/nu-command/src/dataframe/series/date/as_date.rs @@ -6,7 +6,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, SyntaxShape, }; -use polars::prelude::IntoSeries; +use polars::prelude::{IntoSeries, Utf8Methods}; #[derive(Clone)] pub struct AsDate; diff --git a/crates/nu-command/src/dataframe/series/date/as_datetime.rs b/crates/nu-command/src/dataframe/series/date/as_datetime.rs index 198ccfd327..e14fc54322 100644 --- a/crates/nu-command/src/dataframe/series/date/as_datetime.rs +++ b/crates/nu-command/src/dataframe/series/date/as_datetime.rs @@ -7,7 +7,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, }; -use polars::prelude::{IntoSeries, TimeUnit}; +use polars::prelude::{IntoSeries, TimeUnit, Utf8Methods}; #[derive(Clone)] pub struct AsDateTime; diff --git a/crates/nu-command/src/dataframe/series/date/get_day.rs b/crates/nu-command/src/dataframe/series/date/get_day.rs index dc394148da..b3db0d5685 100644 --- a/crates/nu-command/src/dataframe/series/date/get_day.rs +++ b/crates/nu-command/src/dataframe/series/date/get_day.rs @@ -5,7 +5,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, Value, }; -use polars::prelude::IntoSeries; +use polars::prelude::{DatetimeMethods, IntoSeries}; #[derive(Clone)] pub struct GetDay; diff --git a/crates/nu-command/src/dataframe/series/date/get_hour.rs b/crates/nu-command/src/dataframe/series/date/get_hour.rs index c8061f0409..deda7edb45 100644 --- a/crates/nu-command/src/dataframe/series/date/get_hour.rs +++ b/crates/nu-command/src/dataframe/series/date/get_hour.rs @@ -5,7 +5,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, Value, }; -use polars::prelude::IntoSeries; +use polars::prelude::{DatetimeMethods, IntoSeries}; #[derive(Clone)] pub struct GetHour; diff --git a/crates/nu-command/src/dataframe/series/date/get_minute.rs b/crates/nu-command/src/dataframe/series/date/get_minute.rs index f64fe9afb0..3a1133e1e0 100644 --- a/crates/nu-command/src/dataframe/series/date/get_minute.rs +++ b/crates/nu-command/src/dataframe/series/date/get_minute.rs @@ -5,7 +5,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, Value, }; -use polars::prelude::IntoSeries; +use polars::prelude::{DatetimeMethods, IntoSeries}; #[derive(Clone)] pub struct GetMinute; diff --git a/crates/nu-command/src/dataframe/series/date/get_month.rs b/crates/nu-command/src/dataframe/series/date/get_month.rs index b3caa4d8d3..e6e842680d 100644 --- a/crates/nu-command/src/dataframe/series/date/get_month.rs +++ b/crates/nu-command/src/dataframe/series/date/get_month.rs @@ -5,7 +5,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, Value, }; -use polars::prelude::IntoSeries; +use polars::prelude::{DatetimeMethods, IntoSeries}; #[derive(Clone)] pub struct GetMonth; diff --git a/crates/nu-command/src/dataframe/series/date/get_nanosecond.rs b/crates/nu-command/src/dataframe/series/date/get_nanosecond.rs index 37647d824f..3b6ea1e111 100644 --- a/crates/nu-command/src/dataframe/series/date/get_nanosecond.rs +++ b/crates/nu-command/src/dataframe/series/date/get_nanosecond.rs @@ -5,7 +5,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, Value, }; -use polars::prelude::IntoSeries; +use polars::prelude::{DatetimeMethods, IntoSeries}; #[derive(Clone)] pub struct GetNanosecond; diff --git a/crates/nu-command/src/dataframe/series/date/get_ordinal.rs b/crates/nu-command/src/dataframe/series/date/get_ordinal.rs index 3e5f693752..088a309a7e 100644 --- a/crates/nu-command/src/dataframe/series/date/get_ordinal.rs +++ b/crates/nu-command/src/dataframe/series/date/get_ordinal.rs @@ -5,7 +5,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, Value, }; -use polars::prelude::IntoSeries; +use polars::prelude::{DatetimeMethods, IntoSeries}; #[derive(Clone)] pub struct GetOrdinal; diff --git a/crates/nu-command/src/dataframe/series/date/get_second.rs b/crates/nu-command/src/dataframe/series/date/get_second.rs index 302820773e..da69832a16 100644 --- a/crates/nu-command/src/dataframe/series/date/get_second.rs +++ b/crates/nu-command/src/dataframe/series/date/get_second.rs @@ -5,7 +5,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, Value, }; -use polars::prelude::IntoSeries; +use polars::prelude::{DatetimeMethods, IntoSeries}; #[derive(Clone)] pub struct GetSecond; diff --git a/crates/nu-command/src/dataframe/series/date/get_week.rs b/crates/nu-command/src/dataframe/series/date/get_week.rs index 4736ac296a..27402492d9 100644 --- a/crates/nu-command/src/dataframe/series/date/get_week.rs +++ b/crates/nu-command/src/dataframe/series/date/get_week.rs @@ -5,7 +5,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, Value, }; -use polars::prelude::IntoSeries; +use polars::prelude::{DatetimeMethods, IntoSeries}; #[derive(Clone)] pub struct GetWeek; diff --git a/crates/nu-command/src/dataframe/series/date/get_weekday.rs b/crates/nu-command/src/dataframe/series/date/get_weekday.rs index d490ed147b..73ba75c84f 100644 --- a/crates/nu-command/src/dataframe/series/date/get_weekday.rs +++ b/crates/nu-command/src/dataframe/series/date/get_weekday.rs @@ -5,7 +5,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, Value, }; -use polars::prelude::IntoSeries; +use polars::prelude::{DatetimeMethods, IntoSeries}; #[derive(Clone)] pub struct GetWeekDay; diff --git a/crates/nu-command/src/dataframe/series/date/get_year.rs b/crates/nu-command/src/dataframe/series/date/get_year.rs index 519e61e7d8..4816556325 100644 --- a/crates/nu-command/src/dataframe/series/date/get_year.rs +++ b/crates/nu-command/src/dataframe/series/date/get_year.rs @@ -5,7 +5,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, Value, }; -use polars::prelude::IntoSeries; +use polars::prelude::{DatetimeMethods, IntoSeries}; #[derive(Clone)] pub struct GetYear; diff --git a/crates/nu-command/src/dataframe/series/masks/is_not_null.rs b/crates/nu-command/src/dataframe/series/masks/is_not_null.rs index c8226c13e2..446f31462f 100644 --- a/crates/nu-command/src/dataframe/series/masks/is_not_null.rs +++ b/crates/nu-command/src/dataframe/series/masks/is_not_null.rs @@ -1,5 +1,5 @@ use super::super::super::values::{Column, NuDataFrame}; - +use crate::dataframe::values::NuExpression; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, @@ -16,7 +16,7 @@ impl Command for IsNotNull { } fn usage(&self) -> &str { - "Creates mask where value is not null" + "Creates mask where value is not null or creates a is-not-null expression" } fn signature(&self) -> Signature { @@ -24,25 +24,32 @@ impl Command for IsNotNull { } fn examples(&self) -> Vec { - vec![Example { - description: "Create mask where values are not null", - example: r#"let s = ([5 6 0 8] | dfr to-df); + vec![ + Example { + description: "Create mask where values are not null", + example: r#"let s = ([5 6 0 8] | dfr to-df); let res = ($s / $s); $res | dfr is-not-null"#, - result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "is_not_null".to_string(), - vec![ - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(false), - Value::test_bool(true), - ], - )]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] + result: Some( + NuDataFrame::try_from_columns(vec![Column::new( + "is_not_null".to_string(), + vec![ + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(false), + Value::test_bool(true), + ], + )]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Creates a is not null expression from a column", + example: "dfr col a | dfr is-not-null", + result: None, + }, + ] } fn run( @@ -52,7 +59,27 @@ impl Command for IsNotNull { call: &Call, input: PipelineData, ) -> Result { - command(engine_state, stack, call, input) + let value = input.into_value(call.head); + + if NuExpression::can_downcast(&value) { + let expr = NuExpression::try_from_value(value)?; + let expr: NuExpression = expr.into_polars().is_not_null().into(); + + Ok(PipelineData::Value( + NuExpression::into_value(expr, call.head), + None, + )) + } else if NuDataFrame::can_downcast(&value) { + let df = NuDataFrame::try_from_value(value)?; + command(engine_state, stack, call, df) + } else { + Err(ShellError::CantConvert( + "expression or query".into(), + value.get_type().to_string(), + value.span()?, + None, + )) + } } } @@ -60,10 +87,8 @@ fn command( _engine_state: &EngineState, _stack: &mut Stack, call: &Call, - input: PipelineData, + df: NuDataFrame, ) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let mut res = df.as_series(call.head)?.is_not_null(); res.rename("is_not_null"); diff --git a/crates/nu-command/src/dataframe/series/masks/is_null.rs b/crates/nu-command/src/dataframe/series/masks/is_null.rs index 397a87fe1d..008b90a363 100644 --- a/crates/nu-command/src/dataframe/series/masks/is_null.rs +++ b/crates/nu-command/src/dataframe/series/masks/is_null.rs @@ -1,5 +1,5 @@ use super::super::super::values::{Column, NuDataFrame}; - +use crate::dataframe::values::NuExpression; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, @@ -16,7 +16,7 @@ impl Command for IsNull { } fn usage(&self) -> &str { - "Creates mask where value is null" + "Creates mask where value is null or creates a is-null expression" } fn signature(&self) -> Signature { @@ -24,25 +24,32 @@ impl Command for IsNull { } fn examples(&self) -> Vec { - vec![Example { - description: "Create mask where values are null", - example: r#"let s = ([5 6 0 8] | dfr to-df); + vec![ + Example { + description: "Create mask where values are null", + example: r#"let s = ([5 6 0 8] | dfr to-df); let res = ($s / $s); $res | dfr is-null"#, - result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "is_null".to_string(), - vec![ - Value::test_bool(false), - Value::test_bool(false), - Value::test_bool(true), - Value::test_bool(false), - ], - )]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] + result: Some( + NuDataFrame::try_from_columns(vec![Column::new( + "is_null".to_string(), + vec![ + Value::test_bool(false), + Value::test_bool(false), + Value::test_bool(true), + Value::test_bool(false), + ], + )]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Creates a is not null expression from a column", + example: "dfr col a | dfr is-null", + result: None, + }, + ] } fn run( @@ -52,7 +59,27 @@ impl Command for IsNull { call: &Call, input: PipelineData, ) -> Result { - command(engine_state, stack, call, input) + let value = input.into_value(call.head); + + if NuExpression::can_downcast(&value) { + let expr = NuExpression::try_from_value(value)?; + let expr: NuExpression = expr.into_polars().is_null().into(); + + Ok(PipelineData::Value( + NuExpression::into_value(expr, call.head), + None, + )) + } else if NuDataFrame::can_downcast(&value) { + let df = NuDataFrame::try_from_value(value)?; + command(engine_state, stack, call, df) + } else { + Err(ShellError::CantConvert( + "expression or query".into(), + value.get_type().to_string(), + value.span()?, + None, + )) + } } } @@ -60,10 +87,8 @@ fn command( _engine_state: &EngineState, _stack: &mut Stack, call: &Call, - input: PipelineData, + df: NuDataFrame, ) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let mut res = df.as_series(call.head)?.is_null(); res.rename("is_null"); diff --git a/crates/nu-command/src/dataframe/series/masks/not.rs b/crates/nu-command/src/dataframe/series/masks/not.rs index 5265e17605..72877383a1 100644 --- a/crates/nu-command/src/dataframe/series/masks/not.rs +++ b/crates/nu-command/src/dataframe/series/masks/not.rs @@ -1,5 +1,5 @@ use super::super::super::values::{Column, NuDataFrame}; - +use crate::dataframe::values::NuExpression; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, @@ -18,7 +18,7 @@ impl Command for NotSeries { } fn usage(&self) -> &str { - "Inverts boolean mask" + "Inverts boolean mask or creates a not expression" } fn signature(&self) -> Signature { @@ -26,22 +26,29 @@ impl Command for NotSeries { } fn examples(&self) -> Vec { - vec![Example { - description: "Inverts boolean mask", - example: "[true false true] | dfr to-df | dfr not", - result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![ - Value::test_bool(false), - Value::test_bool(true), - Value::test_bool(false), - ], - )]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] + vec![ + Example { + description: "Inverts boolean mask", + example: "[true false true] | dfr to-df | dfr not", + result: Some( + NuDataFrame::try_from_columns(vec![Column::new( + "0".to_string(), + vec![ + Value::test_bool(false), + Value::test_bool(true), + Value::test_bool(false), + ], + )]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Creates a not expression from a column", + example: "dfr col a | dfr not", + result: None, + }, + ] } fn run( @@ -51,7 +58,27 @@ impl Command for NotSeries { call: &Call, input: PipelineData, ) -> Result { - command(engine_state, stack, call, input) + let value = input.into_value(call.head); + + if NuExpression::can_downcast(&value) { + let expr = NuExpression::try_from_value(value)?; + let expr: NuExpression = expr.into_polars().is_null().into(); + + Ok(PipelineData::Value( + NuExpression::into_value(expr, call.head), + None, + )) + } else if NuDataFrame::can_downcast(&value) { + let df = NuDataFrame::try_from_value(value)?; + command(engine_state, stack, call, df) + } else { + Err(ShellError::CantConvert( + "expression or query".into(), + value.get_type().to_string(), + value.span()?, + None, + )) + } } } @@ -59,9 +86,8 @@ fn command( _engine_state: &EngineState, _stack: &mut Stack, call: &Call, - input: PipelineData, + df: NuDataFrame, ) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; let series = df.as_series(call.head)?; let bool = series.bool().map_err(|e| { diff --git a/crates/nu-command/src/dataframe/series/n_unique.rs b/crates/nu-command/src/dataframe/series/n_unique.rs index 0710819a3c..1105b62864 100644 --- a/crates/nu-command/src/dataframe/series/n_unique.rs +++ b/crates/nu-command/src/dataframe/series/n_unique.rs @@ -1,5 +1,5 @@ use super::super::values::{Column, NuDataFrame}; - +use crate::dataframe::values::NuExpression; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, @@ -15,7 +15,7 @@ impl Command for NUnique { } fn usage(&self) -> &str { - "Counts unique values" + "Counts unique values or creates a n-unique expression" } fn signature(&self) -> Signature { @@ -23,18 +23,25 @@ impl Command for NUnique { } fn examples(&self) -> Vec { - vec![Example { - description: "Counts unique values", - example: "[1 1 2 2 3 3 4] | dfr to-df | dfr count-unique", - result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "count_unique".to_string(), - vec![Value::test_int(4)], - )]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] + vec![ + Example { + description: "Counts unique values", + example: "[1 1 2 2 3 3 4] | dfr to-df | dfr count-unique", + result: Some( + NuDataFrame::try_from_columns(vec![Column::new( + "count_unique".to_string(), + vec![Value::test_int(4)], + )]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Creates a is n-unique expression from a column", + example: "dfr col a | dfr n-unique", + result: None, + }, + ] } fn run( @@ -44,7 +51,27 @@ impl Command for NUnique { call: &Call, input: PipelineData, ) -> Result { - command(engine_state, stack, call, input) + let value = input.into_value(call.head); + + if NuExpression::can_downcast(&value) { + let expr = NuExpression::try_from_value(value)?; + let expr: NuExpression = expr.into_polars().n_unique().into(); + + Ok(PipelineData::Value( + NuExpression::into_value(expr, call.head), + None, + )) + } else if NuDataFrame::can_downcast(&value) { + let df = NuDataFrame::try_from_value(value)?; + command(engine_state, stack, call, df) + } else { + Err(ShellError::CantConvert( + "expression or query".into(), + value.get_type().to_string(), + value.span()?, + None, + )) + } } } @@ -52,10 +79,8 @@ fn command( _engine_state: &EngineState, _stack: &mut Stack, call: &Call, - input: PipelineData, + df: NuDataFrame, ) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let res = df.as_series(call.head)?.n_unique().map_err(|e| { ShellError::GenericError( "Error counting unique values".into(), diff --git a/crates/nu-command/src/dataframe/series/shift.rs b/crates/nu-command/src/dataframe/series/shift.rs index cc35a2ca3f..78bf52c920 100644 --- a/crates/nu-command/src/dataframe/series/shift.rs +++ b/crates/nu-command/src/dataframe/series/shift.rs @@ -1,3 +1,5 @@ +use crate::dataframe::values::{NuExpression, NuLazyFrame}; + use super::super::values::{Column, NuDataFrame}; use nu_engine::CallExt; @@ -22,6 +24,12 @@ impl Command for Shift { fn signature(&self) -> Signature { Signature::build(self.name()) .required("period", SyntaxShape::Int, "shift period") + .named( + "fill", + SyntaxShape::Any, + "Expression to use to fill the null values (lazy df)", + Some('f'), + ) .category(Category::Custom("dataframe".into())) } @@ -47,25 +55,60 @@ impl Command for Shift { call: &Call, input: PipelineData, ) -> Result { - command(engine_state, stack, call, input) + let value = input.into_value(call.head); + + if NuLazyFrame::can_downcast(&value) { + let df = NuLazyFrame::try_from_value(value)?; + command_lazy(engine_state, stack, call, df) + } else if NuDataFrame::can_downcast(&value) { + let df = NuDataFrame::try_from_value(value)?; + command_eager(engine_state, stack, call, df) + } else { + Err(ShellError::CantConvert( + "expression or query".into(), + value.get_type().to_string(), + value.span()?, + None, + )) + } } } -fn command( +fn command_eager( engine_state: &EngineState, stack: &mut Stack, call: &Call, - input: PipelineData, + df: NuDataFrame, ) -> Result { let period: i64 = call.req(engine_state, stack, 0)?; - - let df = NuDataFrame::try_from_pipeline(input, call.head)?; let series = df.as_series(call.head)?.shift(period); NuDataFrame::try_from_series(vec![series], call.head) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) } +fn command_lazy( + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + lazy: NuLazyFrame, +) -> Result { + let shift: i64 = call.req(engine_state, stack, 0)?; + let fill: Option = call.get_flag(engine_state, stack, "fill")?; + + let lazy = lazy.into_polars(); + + let lazy: NuLazyFrame = match fill { + Some(fill) => { + let expr = NuExpression::try_from_value(fill)?.into_polars(); + lazy.shift_and_fill(shift, expr).into() + } + None => lazy.shift(shift).into(), + }; + + Ok(PipelineData::Value(lazy.into_value(call.head), None)) +} + #[cfg(test)] mod test { use super::super::super::eager::DropNulls; diff --git a/crates/nu-command/src/dataframe/series/unique.rs b/crates/nu-command/src/dataframe/series/unique.rs index 849c958622..1b849e3989 100644 --- a/crates/nu-command/src/dataframe/series/unique.rs +++ b/crates/nu-command/src/dataframe/series/unique.rs @@ -1,11 +1,14 @@ +use crate::dataframe::{utils::extract_strings, values::NuLazyFrame}; + use super::super::values::{Column, NuDataFrame}; +use nu_engine::CallExt; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, Span, Value, + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, }; -use polars::prelude::IntoSeries; +use polars::prelude::{IntoSeries, UniqueKeepStrategy}; #[derive(Clone)] pub struct Unique; @@ -20,7 +23,24 @@ impl Command for Unique { } fn signature(&self) -> Signature { - Signature::build(self.name()).category(Category::Custom("dataframe".into())) + Signature::build(self.name()) + .named( + "subset", + SyntaxShape::Any, + "Subset of column(s) to use to maintain rows (lazy df)", + Some('s'), + ) + .switch( + "last", + "Keeps last unique value. Default keeps first value (lazy df)", + Some('l'), + ) + .switch( + "maintain-order", + "Keep the same order as the original DataFrame (lazy df)", + Some('k'), + ) + .category(Category::Custom("dataframe".into())) } fn examples(&self) -> Vec { @@ -45,17 +65,31 @@ impl Command for Unique { call: &Call, input: PipelineData, ) -> Result { - command(engine_state, stack, call, input) + let value = input.into_value(call.head); + + if NuLazyFrame::can_downcast(&value) { + let df = NuLazyFrame::try_from_value(value)?; + command_lazy(engine_state, stack, call, df) + } else if NuDataFrame::can_downcast(&value) { + let df = NuDataFrame::try_from_value(value)?; + command_eager(engine_state, stack, call, df) + } else { + Err(ShellError::CantConvert( + "expression or query".into(), + value.get_type().to_string(), + value.span()?, + None, + )) + } } } -fn command( +fn command_eager( _engine_state: &EngineState, _stack: &mut Stack, call: &Call, - input: PipelineData, + df: NuDataFrame, ) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; let series = df.as_series(call.head)?; let res = series.unique().map_err(|e| { @@ -72,6 +106,37 @@ fn command( .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) } +fn command_lazy( + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + lazy: NuLazyFrame, +) -> Result { + let last = call.has_flag("last"); + let maintain = call.has_flag("maintain-order"); + + let subset: Option = call.get_flag(engine_state, stack, "subset")?; + let subset = match subset { + Some(value) => Some(extract_strings(value)?), + None => None, + }; + + let strategy = if last { + UniqueKeepStrategy::Last + } else { + UniqueKeepStrategy::First + }; + + let lazy = lazy.into_polars(); + let lazy: NuLazyFrame = if maintain { + lazy.unique(subset, strategy).into() + } else { + lazy.unique_stable(subset, strategy).into() + }; + + Ok(PipelineData::Value(lazy.into_value(call.head), None)) +} + #[cfg(test)] mod test { use super::super::super::test_dataframe::test_dataframe; diff --git a/crates/nu-command/src/dataframe/series/value_counts.rs b/crates/nu-command/src/dataframe/series/value_counts.rs index fb544476d7..5ecbe7a123 100644 --- a/crates/nu-command/src/dataframe/series/value_counts.rs +++ b/crates/nu-command/src/dataframe/series/value_counts.rs @@ -63,7 +63,7 @@ fn command( let df = NuDataFrame::try_from_pipeline(input, call.head)?; let series = df.as_series(call.head)?; - let res = series.value_counts().map_err(|e| { + let res = series.value_counts(false).map_err(|e| { ShellError::GenericError( "Error calculating value counts values".into(), e.to_string(), diff --git a/crates/nu-command/src/dataframe/utils.rs b/crates/nu-command/src/dataframe/utils.rs new file mode 100644 index 0000000000..58f7589f63 --- /dev/null +++ b/crates/nu-command/src/dataframe/utils.rs @@ -0,0 +1,15 @@ +use nu_protocol::{FromValue, ShellError, Value}; + +pub fn extract_strings(value: Value) -> Result, ShellError> { + match ( + ::from_value(&value), + as FromValue>::from_value(&value), + ) { + (Ok(col), Err(_)) => Ok(vec![col]), + (Err(_), Ok(cols)) => Ok(cols), + _ => Err(ShellError::IncompatibleParametersSingle( + "Expected a string or list of strings".into(), + value.span()?, + )), + } +} diff --git a/crates/nu-command/src/dataframe/values/mod.rs b/crates/nu-command/src/dataframe/values/mod.rs index b952137a0c..e4f11222fd 100644 --- a/crates/nu-command/src/dataframe/values/mod.rs +++ b/crates/nu-command/src/dataframe/values/mod.rs @@ -1,6 +1,10 @@ mod nu_dataframe; -mod nu_groupby; +mod nu_expression; +mod nu_lazyframe; +mod nu_lazygroupby; pub mod utils; pub use nu_dataframe::{Axis, Column, NuDataFrame}; -pub use nu_groupby::NuGroupBy; +pub use nu_expression::NuExpression; +pub use nu_lazyframe::NuLazyFrame; +pub use nu_lazygroupby::NuLazyGroupBy; diff --git a/crates/nu-command/src/dataframe/values/nu_dataframe/between_values.rs b/crates/nu-command/src/dataframe/values/nu_dataframe/between_values.rs index de075b0d01..f4d8e8ad36 100644 --- a/crates/nu-command/src/dataframe/values/nu_dataframe/between_values.rs +++ b/crates/nu-command/src/dataframe/values/nu_dataframe/between_values.rs @@ -76,39 +76,33 @@ pub(super) fn compute_between_series( } } Operator::Equal => { - let mut res = Series::equal(lhs, rhs).into_series(); let name = format!("eq_{}_{}", lhs.name(), rhs.name()); - res.rename(&name); + let res = compare_series(lhs, rhs, name.as_str(), right.span().ok(), Series::equal)?; NuDataFrame::series_to_value(res, operation_span) } Operator::NotEqual => { - let mut res = Series::not_equal(lhs, rhs).into_series(); let name = format!("neq_{}_{}", lhs.name(), rhs.name()); - res.rename(&name); + let res = compare_series(lhs, rhs, name.as_str(), right.span().ok(), Series::equal)?; NuDataFrame::series_to_value(res, operation_span) } Operator::LessThan => { - let mut res = Series::lt(lhs, rhs).into_series(); let name = format!("lt_{}_{}", lhs.name(), rhs.name()); - res.rename(&name); + let res = compare_series(lhs, rhs, name.as_str(), right.span().ok(), Series::equal)?; NuDataFrame::series_to_value(res, operation_span) } Operator::LessThanOrEqual => { - let mut res = Series::lt_eq(lhs, rhs).into_series(); let name = format!("lte_{}_{}", lhs.name(), rhs.name()); - res.rename(&name); + let res = compare_series(lhs, rhs, name.as_str(), right.span().ok(), Series::equal)?; NuDataFrame::series_to_value(res, operation_span) } Operator::GreaterThan => { - let mut res = Series::gt(lhs, rhs).into_series(); let name = format!("gt_{}_{}", lhs.name(), rhs.name()); - res.rename(&name); + let res = compare_series(lhs, rhs, name.as_str(), right.span().ok(), Series::equal)?; NuDataFrame::series_to_value(res, operation_span) } Operator::GreaterThanOrEqual => { - let mut res = Series::gt_eq(lhs, rhs).into_series(); let name = format!("gte_{}_{}", lhs.name(), rhs.name()); - res.rename(&name); + let res = compare_series(lhs, rhs, name.as_str(), right.span().ok(), Series::equal)?; NuDataFrame::series_to_value(res, operation_span) } Operator::And => match lhs.dtype() { @@ -179,6 +173,32 @@ pub(super) fn compute_between_series( } } +fn compare_series<'s, F>( + lhs: &'s Series, + rhs: &'s Series, + name: &'s str, + span: Option, + f: F, +) -> Result +where + F: Fn(&'s Series, &'s Series) -> Result, PolarsError>, +{ + let mut res = f(lhs, rhs) + .map_err(|e| { + ShellError::GenericError( + "Equality error".into(), + e.to_string(), + span, + None, + Vec::new(), + ) + })? + .into_series(); + + res.rename(name); + Ok(res) +} + pub(super) fn compute_series_single_value( operator: Spanned, left: &Value, diff --git a/crates/nu-command/src/dataframe/values/nu_dataframe/conversion.rs b/crates/nu-command/src/dataframe/values/nu_dataframe/conversion.rs index 494d5b1103..7ae745c939 100644 --- a/crates/nu-command/src/dataframe/values/nu_dataframe/conversion.rs +++ b/crates/nu-command/src/dataframe/values/nu_dataframe/conversion.rs @@ -7,7 +7,7 @@ use polars::chunked_array::object::builder::ObjectChunkedBuilder; use polars::chunked_array::ChunkedArray; use polars::prelude::{ DataFrame, DataType, DatetimeChunked, Int64Type, IntoSeries, NamedFrom, NewChunkedArray, - ObjectType, Series, TimeUnit, + ObjectType, Series, TemporalMethods, TimeUnit, }; use std::ops::{Deref, DerefMut}; diff --git a/crates/nu-command/src/dataframe/values/nu_dataframe/mod.rs b/crates/nu-command/src/dataframe/values/nu_dataframe/mod.rs index 5da8abf473..72fffc2a00 100644 --- a/crates/nu-command/src/dataframe/values/nu_dataframe/mod.rs +++ b/crates/nu-command/src/dataframe/values/nu_dataframe/mod.rs @@ -84,6 +84,12 @@ impl AsMut for NuDataFrame { } } +impl From for NuDataFrame { + fn from(dataframe: DataFrame) -> Self { + Self(dataframe) + } +} + impl NuDataFrame { pub fn new(dataframe: DataFrame) -> Self { Self(dataframe) @@ -132,6 +138,7 @@ impl NuDataFrame { for value in iter { match value { + Value::CustomValue { .. } => return Self::try_from_value(value), Value::List { vals, .. } => { let cols = (0..vals.len()) .map(|i| format!("{}", i)) @@ -181,7 +188,7 @@ impl NuDataFrame { pub fn try_from_value(value: Value) -> Result { match value { - Value::CustomValue { val, span } => match val.as_any().downcast_ref::() { + Value::CustomValue { val, span } => match val.as_any().downcast_ref::() { Some(df) => Ok(NuDataFrame(df.0.clone())), None => Err(ShellError::CantConvert( "dataframe".into(), @@ -201,7 +208,15 @@ impl NuDataFrame { pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result { let value = input.into_value(span); - NuDataFrame::try_from_value(value) + Self::try_from_value(value) + } + + pub fn can_downcast(value: &Value) -> bool { + if let Value::CustomValue { val, .. } = value { + val.as_any().downcast_ref::().is_some() + } else { + false + } } pub fn column(&self, column: &str, span: Span) -> Result { diff --git a/crates/nu-command/src/dataframe/values/nu_expression/custom_value.rs b/crates/nu-command/src/dataframe/values/nu_expression/custom_value.rs new file mode 100644 index 0000000000..c223bda088 --- /dev/null +++ b/crates/nu-command/src/dataframe/values/nu_expression/custom_value.rs @@ -0,0 +1,149 @@ +use std::ops::{Add, Div, Mul, Rem, Sub}; + +use super::NuExpression; +use nu_protocol::{ast::Operator, CustomValue, ShellError, Span, Type, Value}; +use polars::prelude::Expr; + +// CustomValue implementation for NuDataFrame +impl CustomValue for NuExpression { + fn typetag_name(&self) -> &'static str { + "expression" + } + + fn typetag_deserialize(&self) { + unimplemented!("typetag_deserialize") + } + + fn clone_value(&self, span: nu_protocol::Span) -> Value { + let cloned = NuExpression(self.0.clone()); + + Value::CustomValue { + val: Box::new(cloned), + span, + } + } + + fn value_string(&self) -> String { + self.typetag_name().to_string() + } + + fn to_base_value(&self, span: Span) -> Result { + Ok(self.to_value(span)) + } + + fn to_json(&self) -> nu_json::Value { + nu_json::Value::Null + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn operation( + &self, + lhs_span: Span, + operator: Operator, + op: Span, + right: &Value, + ) -> Result { + compute_with_value(self, lhs_span, operator, op, right) + } +} + +fn compute_with_value( + left: &NuExpression, + lhs_span: Span, + operator: Operator, + op: Span, + right: &Value, +) -> Result { + match right { + Value::CustomValue { + val: rhs, + span: rhs_span, + } => { + let rhs = rhs.as_any().downcast_ref::().ok_or_else(|| { + ShellError::DowncastNotPossible( + "Unable to create expression".to_string(), + *rhs_span, + ) + })?; + + match rhs.as_ref() { + polars::prelude::Expr::Literal(..) => { + with_operator(operator, left, rhs, lhs_span, right.span()?, op) + } + _ => Err(ShellError::TypeMismatch( + "Only literal expressions or number".into(), + right.span()?, + )), + } + } + _ => { + let rhs = NuExpression::try_from_value(right.clone())?; + with_operator(operator, left, &rhs, lhs_span, right.span()?, op) + } + } +} + +fn with_operator( + operator: Operator, + left: &NuExpression, + right: &NuExpression, + lhs_span: Span, + rhs_span: Span, + op_span: Span, +) -> Result { + match operator { + Operator::Plus => apply_arithmetic(left, right, lhs_span, Add::add), + Operator::Minus => apply_arithmetic(left, right, lhs_span, Sub::sub), + Operator::Multiply => apply_arithmetic(left, right, lhs_span, Mul::mul), + Operator::Divide => apply_arithmetic(left, right, lhs_span, Div::div), + Operator::Modulo => apply_arithmetic(left, right, lhs_span, Rem::rem), + Operator::Equal => Ok(left + .clone() + .apply_with_expr(right.clone(), Expr::eq) + .into_value(lhs_span)), + Operator::NotEqual => Ok(left + .clone() + .apply_with_expr(right.clone(), Expr::neq) + .into_value(lhs_span)), + Operator::GreaterThan => Ok(left + .clone() + .apply_with_expr(right.clone(), Expr::gt) + .into_value(lhs_span)), + Operator::GreaterThanOrEqual => Ok(left + .clone() + .apply_with_expr(right.clone(), Expr::gt_eq) + .into_value(lhs_span)), + Operator::LessThan => Ok(left + .clone() + .apply_with_expr(right.clone(), Expr::lt) + .into_value(lhs_span)), + Operator::LessThanOrEqual => Ok(left + .clone() + .apply_with_expr(right.clone(), Expr::lt_eq) + .into_value(lhs_span)), + _ => Err(ShellError::OperatorMismatch { + op_span, + lhs_ty: Type::Custom, + lhs_span, + rhs_ty: Type::Custom, + rhs_span, + }), + } +} + +fn apply_arithmetic( + left: &NuExpression, + right: &NuExpression, + span: Span, + f: F, +) -> Result +where + F: Fn(Expr, Expr) -> Expr, +{ + let expr: NuExpression = f(left.as_ref().clone(), right.as_ref().clone()).into(); + + Ok(expr.into_value(span)) +} diff --git a/crates/nu-command/src/dataframe/values/nu_expression/mod.rs b/crates/nu-command/src/dataframe/values/nu_expression/mod.rs new file mode 100644 index 0000000000..eed8e814c6 --- /dev/null +++ b/crates/nu-command/src/dataframe/values/nu_expression/mod.rs @@ -0,0 +1,325 @@ +mod custom_value; + +use core::fmt; +use nu_protocol::{PipelineData, ShellError, Span, Value}; +use polars::prelude::{col, AggExpr, Expr, Literal}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + +// Polars Expression wrapper for Nushell operations +// Object is behind and Option to allow easy implementation of +// the Deserialize trait +#[derive(Default, Clone)] +pub struct NuExpression(Option); + +// Mocked serialization of the LazyFrame object +impl Serialize for NuExpression { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_none() + } +} + +// Mocked deserialization of the LazyFrame object +impl<'de> Deserialize<'de> for NuExpression { + fn deserialize(_deserializer: D) -> Result + where + D: Deserializer<'de>, + { + Ok(NuExpression::default()) + } +} + +impl fmt::Debug for NuExpression { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "NuExpression") + } +} + +// Referenced access to the real LazyFrame +impl AsRef for NuExpression { + fn as_ref(&self) -> &polars::prelude::Expr { + // The only case when there cannot be an expr is if it is created + // using the default function or if created by deserializing something + self.0.as_ref().expect("there should always be a frame") + } +} + +impl AsMut for NuExpression { + fn as_mut(&mut self) -> &mut polars::prelude::Expr { + // The only case when there cannot be an expr is if it is created + // using the default function or if created by deserializing something + self.0.as_mut().expect("there should always be a frame") + } +} + +impl From for NuExpression { + fn from(expr: Expr) -> Self { + Self(Some(expr)) + } +} + +impl NuExpression { + pub fn into_value(self, span: Span) -> Value { + Value::CustomValue { + val: Box::new(self), + span, + } + } + + pub fn try_from_value(value: Value) -> Result { + match value { + Value::CustomValue { val, span } => match val.as_any().downcast_ref::() { + Some(expr) => Ok(NuExpression(expr.0.clone())), + None => Err(ShellError::CantConvert( + "lazy expression".into(), + "non-dataframe".into(), + span, + None, + )), + }, + Value::String { val, .. } => Ok(col(val.as_str()).into()), + Value::Int { val, .. } => Ok(val.lit().into()), + Value::Bool { val, .. } => Ok(val.lit().into()), + Value::Float { val, .. } => Ok(val.lit().into()), + x => Err(ShellError::CantConvert( + "lazy expression".into(), + x.get_type().to_string(), + x.span()?, + None, + )), + } + } + + pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result { + let value = input.into_value(span); + Self::try_from_value(value) + } + + pub fn can_downcast(value: &Value) -> bool { + match value { + Value::CustomValue { val, .. } => val.as_any().downcast_ref::().is_some(), + Value::String { .. } | Value::Int { .. } | Value::Bool { .. } | Value::Float { .. } => { + true + } + _ => false, + } + } + + pub fn into_polars(self) -> Expr { + self.0.expect("Expression cannot be none to convert") + } + + pub fn apply_with_expr(self, other: NuExpression, f: F) -> Self + where + F: Fn(Expr, Expr) -> Expr, + { + let expr = self.0.expect("Lazy expression must not be empty to apply"); + let other = other.0.expect("Lazy expression must not be empty to apply"); + + f(expr, other).into() + } + + pub fn to_value(&self, span: Span) -> Value { + expr_to_value(self.as_ref(), span) + } + + // Convenient function to extrac multiple Expr that could be inside a nushell Value + pub fn extract_exprs(value: Value) -> Result, ShellError> { + ExtractedExpr::extract_exprs(value).map(ExtractedExpr::into_exprs) + } +} + +// Enum to represent the parsing of the expressions from Value +enum ExtractedExpr { + Single(Expr), + List(Vec), +} + +impl ExtractedExpr { + fn into_exprs(self) -> Vec { + match self { + Self::Single(expr) => vec![expr], + Self::List(expressions) => expressions + .into_iter() + .flat_map(ExtractedExpr::into_exprs) + .collect(), + } + } + + fn extract_exprs(value: Value) -> Result { + match value { + Value::String { val, .. } => Ok(ExtractedExpr::Single(col(val.as_str()))), + Value::CustomValue { .. } => NuExpression::try_from_value(value) + .map(NuExpression::into_polars) + .map(ExtractedExpr::Single), + Value::List { vals, .. } => vals + .into_iter() + .map(Self::extract_exprs) + .collect::, ShellError>>() + .map(ExtractedExpr::List), + x => Err(ShellError::CantConvert( + "expression".into(), + x.get_type().to_string(), + x.span()?, + None, + )), + } + } +} + +pub fn expr_to_value(expr: &Expr, span: Span) -> Value { + let cols = vec!["expr".to_string(), "value".to_string()]; + + match expr { + Expr::Not(_) => todo!(), + Expr::Alias(expr, alias) => { + let expr = expr_to_value(expr.as_ref(), span); + let alias = Value::String { + val: alias.as_ref().into(), + span, + }; + + let cols = vec!["expr".to_string(), "alias".to_string()]; + + Value::Record { + cols, + vals: vec![expr, alias], + span, + } + } + Expr::Column(name) => { + let expr_type = Value::String { + val: "column".into(), + span, + }; + let value = Value::String { + val: name.to_string(), + span, + }; + + let vals = vec![expr_type, value]; + Value::Record { cols, vals, span } + } + Expr::Columns(columns) => { + let expr_type = Value::String { + val: "columns".into(), + span, + }; + let value = Value::List { + vals: columns + .iter() + .map(|col| Value::String { + val: col.clone(), + span, + }) + .collect(), + span, + }; + + let vals = vec![expr_type, value]; + Value::Record { cols, vals, span } + } + Expr::DtypeColumn(_) => todo!(), + Expr::Literal(literal) => { + let expr_type = Value::String { + val: "literal".into(), + span, + }; + let value = Value::String { + val: format!("{:?}", literal), + span, + }; + + let vals = vec![expr_type, value]; + Value::Record { cols, vals, span } + } + Expr::BinaryExpr { left, op, right } => { + let left_val = expr_to_value(left, span); + let right_val = expr_to_value(right, span); + + let operator = Value::String { + val: format!("{:?}", op), + span, + }; + + let cols = vec!["left".to_string(), "op".to_string(), "right".to_string()]; + + Value::Record { + cols, + vals: vec![left_val, operator, right_val], + span, + } + } + Expr::Ternary { + predicate, + truthy, + falsy, + } => { + let predicate = expr_to_value(predicate.as_ref(), span); + let truthy = expr_to_value(truthy.as_ref(), span); + let falsy = expr_to_value(falsy.as_ref(), span); + + let cols = vec![ + "predicate".to_string(), + "truthy".to_string(), + "falsy".to_string(), + ]; + + Value::Record { + cols, + vals: vec![predicate, truthy, falsy], + span, + } + } + Expr::Agg(agg_expr) => { + let value = match agg_expr { + AggExpr::Min(expr) + | AggExpr::Max(expr) + | AggExpr::Median(expr) + | AggExpr::NUnique(expr) + | AggExpr::First(expr) + | AggExpr::Last(expr) + | AggExpr::Mean(expr) + | AggExpr::List(expr) + | AggExpr::Count(expr) + | AggExpr::Sum(expr) + | AggExpr::AggGroups(expr) + | AggExpr::Std(expr) + | AggExpr::Var(expr) => expr_to_value(expr.as_ref(), span), + AggExpr::Quantile { .. } => todo!(), + }; + + let expr_type = Value::String { + val: "agg".into(), + span, + }; + + let vals = vec![expr_type, value]; + Value::Record { cols, vals, span } + } + Expr::IsNotNull(_) => todo!(), + Expr::IsNull(_) => todo!(), + Expr::Cast { .. } => todo!(), + Expr::Sort { .. } => todo!(), + Expr::Take { .. } => todo!(), + Expr::SortBy { .. } => todo!(), + Expr::Function { .. } => todo!(), + Expr::Shift { .. } => todo!(), + Expr::Reverse(_) => todo!(), + Expr::Duplicated(_) => todo!(), + Expr::IsUnique(_) => todo!(), + Expr::Explode(_) => todo!(), + Expr::Filter { .. } => todo!(), + Expr::Window { .. } => todo!(), + Expr::Wildcard => todo!(), + Expr::Slice { .. } => todo!(), + Expr::Exclude(_, _) => todo!(), + Expr::KeepName(_) => todo!(), + Expr::RenameAlias { .. } => todo!(), + Expr::Count => todo!(), + Expr::Nth(_) => todo!(), + Expr::AnonymousFunction { .. } => todo!(), + } +} diff --git a/crates/nu-command/src/dataframe/values/nu_groupby/mod.rs b/crates/nu-command/src/dataframe/values/nu_groupby/mod.rs deleted file mode 100644 index a1fefc87a7..0000000000 --- a/crates/nu-command/src/dataframe/values/nu_groupby/mod.rs +++ /dev/null @@ -1,140 +0,0 @@ -mod custom_value; - -use nu_protocol::{PipelineData, ShellError, Span, Value}; -use polars::frame::groupby::{GroupBy, GroupsProxy}; -use polars::prelude::{DataFrame, GroupsIdx}; -use serde::{Deserialize, Serialize}; - -#[derive(Clone, Debug, Serialize, Deserialize)] -pub enum NuGroupsProxy { - Idx { - sorted: bool, - all: Vec<(u32, Vec)>, - }, - Slice(Vec<[u32; 2]>), -} - -impl NuGroupsProxy { - fn from_polars(groups: &GroupsProxy) -> Self { - match groups { - GroupsProxy::Idx(indexes) => NuGroupsProxy::Idx { - sorted: indexes.is_sorted(), - all: indexes - .iter() - .map(|(index, values)| (index, values.clone())) - .collect(), - }, - GroupsProxy::Slice(slice) => NuGroupsProxy::Slice(slice.clone()), - } - } - - fn to_polars(&self) -> GroupsProxy { - match self { - Self::Idx { sorted, all } => { - let mut groups: GroupsIdx = all.clone().into(); - if *sorted { - groups.sort() - } - - GroupsProxy::Idx(groups) - } - Self::Slice(slice) => GroupsProxy::Slice(slice.clone()), - } - } -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct NuGroupBy { - dataframe: DataFrame, - by: Vec, - groups: NuGroupsProxy, -} - -impl NuGroupBy { - pub fn new(dataframe: DataFrame, by: Vec, groups: &GroupsProxy) -> Self { - NuGroupBy { - dataframe, - by, - groups: NuGroupsProxy::from_polars(groups), - } - } - - pub fn into_value(self, span: Span) -> Value { - Value::CustomValue { - val: Box::new(self), - span, - } - } - - pub fn try_from_value(value: Value) -> Result { - match value { - Value::CustomValue { val, span } => match val.as_any().downcast_ref::() { - Some(groupby) => Ok(NuGroupBy { - dataframe: groupby.dataframe.clone(), - by: groupby.by.clone(), - groups: groupby.groups.clone(), - }), - None => Err(ShellError::CantConvert( - "groupby".into(), - "non-dataframe".into(), - span, - None, - )), - }, - x => Err(ShellError::CantConvert( - "groupby".into(), - x.get_type().to_string(), - x.span()?, - None, - )), - } - } - - pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result { - let value = input.into_value(span); - NuGroupBy::try_from_value(value) - } - - pub fn to_groupby(&self) -> Result { - let by = self.dataframe.select_series(&self.by).map_err(|e| { - ShellError::GenericError( - "Error creating groupby".into(), - "".to_string(), - None, - Some(e.to_string()), - Vec::new(), - ) - })?; - - Ok(GroupBy::new( - &self.dataframe, - by, - self.groups.to_polars(), - None, - )) - } - - pub fn print(&self, span: Span) -> Result, ShellError> { - let values = self - .by - .iter() - .map(|col| { - let cols = vec!["group by".to_string()]; - let vals = vec![Value::String { - val: col.into(), - span, - }]; - - Value::Record { cols, vals, span } - }) - .collect::>(); - - Ok(values) - } -} - -impl AsRef for NuGroupBy { - fn as_ref(&self) -> &polars::prelude::DataFrame { - &self.dataframe - } -} diff --git a/crates/nu-command/src/dataframe/values/nu_lazyframe/custom_value.rs b/crates/nu-command/src/dataframe/values/nu_lazyframe/custom_value.rs new file mode 100644 index 0000000000..e968189fc9 --- /dev/null +++ b/crates/nu-command/src/dataframe/values/nu_lazyframe/custom_value.rs @@ -0,0 +1,53 @@ +use super::NuLazyFrame; +use nu_protocol::{CustomValue, ShellError, Span, Value}; + +// CustomValue implementation for NuDataFrame +impl CustomValue for NuLazyFrame { + fn typetag_name(&self) -> &'static str { + "lazyframe" + } + + fn typetag_deserialize(&self) { + unimplemented!("typetag_deserialize") + } + + fn clone_value(&self, span: nu_protocol::Span) -> Value { + let cloned = NuLazyFrame(self.0.clone()); + + Value::CustomValue { + val: Box::new(cloned), + span, + } + } + + fn value_string(&self) -> String { + self.typetag_name().to_string() + } + + fn to_base_value(&self, span: Span) -> Result { + let cols = vec!["plan".into(), "optimized_plan".into()]; + let vals = vec![ + Value::String { + val: self.as_ref().describe_plan(), + span, + }, + Value::String { + val: self + .as_ref() + .describe_optimized_plan() + .unwrap_or_else(|_| "".to_string()), + span, + }, + ]; + + Ok(Value::Record { cols, vals, span }) + } + + fn to_json(&self) -> nu_json::Value { + nu_json::Value::Null + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } +} diff --git a/crates/nu-command/src/dataframe/values/nu_lazyframe/mod.rs b/crates/nu-command/src/dataframe/values/nu_lazyframe/mod.rs new file mode 100644 index 0000000000..b0d7a2f9fe --- /dev/null +++ b/crates/nu-command/src/dataframe/values/nu_lazyframe/mod.rs @@ -0,0 +1,156 @@ +mod custom_value; + +use super::{NuDataFrame, NuExpression}; +use core::fmt; +use nu_protocol::{PipelineData, ShellError, Span, Value}; +use polars::prelude::{Expr, IntoLazy, LazyFrame}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + +// Lazyframe wrapper for Nushell operations +// Polars LazyFrame is behind and Option to allow easy implementation of +// the Deserialize trait +#[derive(Default)] +pub struct NuLazyFrame(Option); + +// Mocked serialization of the LazyFrame object +impl Serialize for NuLazyFrame { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_none() + } +} + +// Mocked deserialization of the LazyFrame object +impl<'de> Deserialize<'de> for NuLazyFrame { + fn deserialize(_deserializer: D) -> Result + where + D: Deserializer<'de>, + { + Ok(NuLazyFrame::default()) + } +} + +impl fmt::Debug for NuLazyFrame { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "NuLazyframe") + } +} + +// Referenced access to the real LazyFrame +impl AsRef for NuLazyFrame { + fn as_ref(&self) -> &polars::prelude::LazyFrame { + // The only case when there cannot be a lazy frame is if it is created + // using the default function or if created by deserializing something + self.0.as_ref().expect("there should always be a frame") + } +} + +impl AsMut for NuLazyFrame { + fn as_mut(&mut self) -> &mut polars::prelude::LazyFrame { + // The only case when there cannot be a lazy frame is if it is created + // using the default function or if created by deserializing something + self.0.as_mut().expect("there should always be a frame") + } +} + +impl From for NuLazyFrame { + fn from(lazy_frame: LazyFrame) -> Self { + Self(Some(lazy_frame)) + } +} + +impl NuLazyFrame { + pub fn from_dataframe(df: NuDataFrame) -> Self { + let lazy = df.as_ref().clone().lazy(); + Self(Some(lazy)) + } + + pub fn into_value(self, span: Span) -> Value { + Value::CustomValue { + val: Box::new(self), + span, + } + } + + pub fn into_polars(self) -> LazyFrame { + self.0.expect("lazyframe cannot be none to convert") + } + + pub fn collect(self, span: Span) -> Result { + self.0 + .expect("No empty lazy for collect") + .collect() + .map_err(|e| { + ShellError::GenericError( + "Error collecting lazy frame".to_string(), + e.to_string(), + Some(span), + None, + Vec::new(), + ) + }) + .map(NuDataFrame::new) + } + + pub fn try_from_value(value: Value) -> Result { + match value { + Value::CustomValue { val, span } => match val.as_any().downcast_ref::() { + Some(expr) => Ok(Self(expr.0.clone())), + None => Err(ShellError::CantConvert( + "lazy frame".into(), + "non-dataframe".into(), + span, + None, + )), + }, + x => Err(ShellError::CantConvert( + "lazy frame".into(), + x.get_type().to_string(), + x.span()?, + None, + )), + } + } + + pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result { + let value = input.into_value(span); + Self::try_from_value(value) + } + + pub fn can_downcast(value: &Value) -> bool { + if let Value::CustomValue { val, .. } = value { + val.as_any().downcast_ref::().is_some() + } else { + false + } + } + + pub fn maybe_is_eager(value: Value) -> Result<(Self, bool), ShellError> { + if Self::can_downcast(&value) { + Ok((Self::try_from_value(value)?, false)) + } else if NuDataFrame::can_downcast(&value) { + let df = NuDataFrame::try_from_value(value)?; + Ok((NuLazyFrame::from_dataframe(df), true)) + } else { + Err(ShellError::CantConvert( + "lazy or eager dataframe".into(), + value.get_type().to_string(), + value.span()?, + None, + )) + } + } + + pub fn apply_with_expr(self, expr: NuExpression, f: F) -> Self + where + F: Fn(LazyFrame, Expr) -> LazyFrame, + { + let df = self.0.expect("Lazy frame must not be empty to apply"); + let expr = expr.into_polars(); + let new_frame = f(df, expr); + + new_frame.into() + } +} diff --git a/crates/nu-command/src/dataframe/values/nu_groupby/custom_value.rs b/crates/nu-command/src/dataframe/values/nu_lazygroupby/custom_value.rs similarity index 62% rename from crates/nu-command/src/dataframe/values/nu_groupby/custom_value.rs rename to crates/nu-command/src/dataframe/values/nu_lazygroupby/custom_value.rs index f60a6bff7a..2ceefb232d 100644 --- a/crates/nu-command/src/dataframe/values/nu_groupby/custom_value.rs +++ b/crates/nu-command/src/dataframe/values/nu_lazygroupby/custom_value.rs @@ -1,10 +1,10 @@ -use super::NuGroupBy; +use super::NuLazyGroupBy; use nu_protocol::{CustomValue, ShellError, Span, Value}; // CustomValue implementation for NuDataFrame -impl CustomValue for NuGroupBy { +impl CustomValue for NuLazyGroupBy { fn typetag_name(&self) -> &'static str { - "groupby" + "lazygroupby" } fn typetag_deserialize(&self) { @@ -12,10 +12,9 @@ impl CustomValue for NuGroupBy { } fn clone_value(&self, span: nu_protocol::Span) -> Value { - let cloned = NuGroupBy { - dataframe: self.dataframe.clone(), - by: self.by.clone(), - groups: self.groups.clone(), + let cloned = NuLazyGroupBy { + group_by: self.group_by.clone(), + from_eager: self.from_eager, }; Value::CustomValue { @@ -29,9 +28,13 @@ impl CustomValue for NuGroupBy { } fn to_base_value(&self, span: Span) -> Result { - let vals = self.print(span)?; + let cols = vec!["LazyGroupBy".into()]; + let vals = vec![Value::String { + val: "apply aggregation to complete execution plan".into(), + span, + }]; - Ok(Value::List { vals, span }) + Ok(Value::Record { cols, vals, span }) } fn to_json(&self) -> nu_json::Value { diff --git a/crates/nu-command/src/dataframe/values/nu_lazygroupby/mod.rs b/crates/nu-command/src/dataframe/values/nu_lazygroupby/mod.rs new file mode 100644 index 0000000000..1b75d6b5e3 --- /dev/null +++ b/crates/nu-command/src/dataframe/values/nu_lazygroupby/mod.rs @@ -0,0 +1,114 @@ +mod custom_value; + +use core::fmt; +use nu_protocol::{PipelineData, ShellError, Span, Value}; +use polars::prelude::LazyGroupBy; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + +// Lazyframe wrapper for Nushell operations +// Polars LazyFrame is behind and Option to allow easy implementation of +// the Deserialize trait +#[derive(Default)] +pub struct NuLazyGroupBy { + pub group_by: Option, + pub from_eager: bool, +} + +// Mocked serialization of the LazyFrame object +impl Serialize for NuLazyGroupBy { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_none() + } +} + +// Mocked deserialization of the LazyFrame object +impl<'de> Deserialize<'de> for NuLazyGroupBy { + fn deserialize(_deserializer: D) -> Result + where + D: Deserializer<'de>, + { + Ok(NuLazyGroupBy::default()) + } +} + +impl fmt::Debug for NuLazyGroupBy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "NuLazyGroupBy") + } +} + +// Referenced access to the real LazyFrame +impl AsRef for NuLazyGroupBy { + fn as_ref(&self) -> &polars::prelude::LazyGroupBy { + // The only case when there cannot be a lazy frame is if it is created + // using the default function or if created by deserializing something + self.group_by + .as_ref() + .expect("there should always be a frame") + } +} + +impl AsMut for NuLazyGroupBy { + fn as_mut(&mut self) -> &mut polars::prelude::LazyGroupBy { + // The only case when there cannot be a lazy frame is if it is created + // using the default function or if created by deserializing something + self.group_by + .as_mut() + .expect("there should always be a frame") + } +} + +impl From for NuLazyGroupBy { + fn from(group_by: LazyGroupBy) -> Self { + Self { + group_by: Some(group_by), + from_eager: false, + } + } +} + +impl NuLazyGroupBy { + pub fn into_value(self, span: Span) -> Value { + Value::CustomValue { + val: Box::new(self), + span, + } + } + + pub fn into_polars(self) -> LazyGroupBy { + self.group_by.expect("GroupBy cannot be none to convert") + } + + pub fn try_from_value(value: Value) -> Result { + match value { + Value::CustomValue { val, span } => { + match val.as_any().downcast_ref::() { + Some(group) => Ok(Self { + group_by: group.group_by.clone(), + from_eager: group.from_eager, + }), + None => Err(ShellError::CantConvert( + "lazy frame".into(), + "non-dataframe".into(), + span, + None, + )), + } + } + x => Err(ShellError::CantConvert( + "lazy groupby".into(), + x.get_type().to_string(), + x.span()?, + None, + )), + } + } + + pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result { + let value = input.into_value(span); + Self::try_from_value(value) + } +} diff --git a/crates/nu-protocol/src/value/from_value.rs b/crates/nu-protocol/src/value/from_value.rs index 55d67d5c8b..66b7b89a5d 100644 --- a/crates/nu-protocol/src/value/from_value.rs +++ b/crates/nu-protocol/src/value/from_value.rs @@ -238,6 +238,31 @@ impl FromValue for Vec { } } +impl FromValue for Vec { + fn from_value(v: &Value) -> Result { + match v { + Value::List { vals, .. } => vals + .iter() + .map(|val| match val { + Value::Bool { val, .. } => Ok(*val), + c => Err(ShellError::CantConvert( + "bool".into(), + c.get_type().to_string(), + c.span()?, + None, + )), + }) + .collect::, ShellError>>(), + v => Err(ShellError::CantConvert( + "bool".into(), + v.get_type().to_string(), + v.span()?, + None, + )), + } + } +} + impl FromValue for CellPath { fn from_value(v: &Value) -> Result { let span = v.span()?;