diff --git a/Cargo.lock b/Cargo.lock index d4da037dd1..a677cb4f35 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -209,7 +209,7 @@ checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" [[package]] name = "arrow" version = "5.0.0-SNAPSHOT" -source = "git+https://github.com/apache/arrow-rs?rev=0f55b828883b3b3afda43ae404b130d374e6f1a1#0f55b828883b3b3afda43ae404b130d374e6f1a1" +source = "git+https://github.com/apache/arrow-rs?rev=9f56afb2d2347310184706f7d5e46af583557bea#9f56afb2d2347310184706f7d5e46af583557bea" dependencies = [ "chrono", "csv", @@ -4404,7 +4404,7 @@ dependencies = [ [[package]] name = "parquet" version = "5.0.0-SNAPSHOT" -source = "git+https://github.com/apache/arrow-rs?rev=0f55b828883b3b3afda43ae404b130d374e6f1a1#0f55b828883b3b3afda43ae404b130d374e6f1a1" +source = "git+https://github.com/apache/arrow-rs?rev=9f56afb2d2347310184706f7d5e46af583557bea#9f56afb2d2347310184706f7d5e46af583557bea" dependencies = [ "arrow", "base64 0.13.0", @@ -4415,6 +4415,7 @@ dependencies = [ "lz4", "num-bigint 0.4.0", "parquet-format", + "rand 0.8.3", "snap", "thrift", "zstd", @@ -4642,8 +4643,8 @@ dependencies = [ [[package]] name = "polars" -version = "0.14.1" -source = "git+https://github.com/pola-rs/polars?rev=9e1506cca9fb646fc55f949ab6345290c3d198a7#9e1506cca9fb646fc55f949ab6345290c3d198a7" +version = "0.14.2" +source = "git+https://github.com/pola-rs/polars?rev=f60d86bc0921bd42635e8a33e7aad28ebe62dc3e#f60d86bc0921bd42635e8a33e7aad28ebe62dc3e" dependencies = [ "polars-core", "polars-io", @@ -4652,8 +4653,8 @@ dependencies = [ [[package]] name = "polars-arrow" -version = "0.14.1" -source = "git+https://github.com/pola-rs/polars?rev=9e1506cca9fb646fc55f949ab6345290c3d198a7#9e1506cca9fb646fc55f949ab6345290c3d198a7" +version = "0.14.2" +source = "git+https://github.com/pola-rs/polars?rev=f60d86bc0921bd42635e8a33e7aad28ebe62dc3e#f60d86bc0921bd42635e8a33e7aad28ebe62dc3e" dependencies = [ "arrow", "num 0.4.0", @@ -4662,8 +4663,8 @@ dependencies = [ [[package]] name = "polars-core" -version = "0.14.1" -source = "git+https://github.com/pola-rs/polars?rev=9e1506cca9fb646fc55f949ab6345290c3d198a7#9e1506cca9fb646fc55f949ab6345290c3d198a7" +version = "0.14.2" +source = "git+https://github.com/pola-rs/polars?rev=f60d86bc0921bd42635e8a33e7aad28ebe62dc3e#f60d86bc0921bd42635e8a33e7aad28ebe62dc3e" dependencies = [ "ahash", "anyhow", @@ -4688,8 +4689,8 @@ dependencies = [ [[package]] name = "polars-io" -version = "0.14.1" -source = "git+https://github.com/pola-rs/polars?rev=9e1506cca9fb646fc55f949ab6345290c3d198a7#9e1506cca9fb646fc55f949ab6345290c3d198a7" +version = "0.14.2" +source = "git+https://github.com/pola-rs/polars?rev=f60d86bc0921bd42635e8a33e7aad28ebe62dc3e#f60d86bc0921bd42635e8a33e7aad28ebe62dc3e" dependencies = [ "ahash", "anyhow", @@ -4711,8 +4712,8 @@ dependencies = [ [[package]] name = "polars-lazy" -version = "0.14.1" -source = "git+https://github.com/pola-rs/polars?rev=9e1506cca9fb646fc55f949ab6345290c3d198a7#9e1506cca9fb646fc55f949ab6345290c3d198a7" +version = "0.14.2" +source = "git+https://github.com/pola-rs/polars?rev=f60d86bc0921bd42635e8a33e7aad28ebe62dc3e#f60d86bc0921bd42635e8a33e7aad28ebe62dc3e" dependencies = [ "ahash", "itertools", diff --git a/crates/nu-command/Cargo.toml b/crates/nu-command/Cargo.toml index e787fea862..8da475f894 100644 --- a/crates/nu-command/Cargo.toml +++ b/crates/nu-command/Cargo.toml @@ -100,10 +100,10 @@ zip = { version = "0.5.9", optional = true } [dependencies.polars] git = "https://github.com/pola-rs/polars" -rev = "9e1506cca9fb646fc55f949ab6345290c3d198a7" -version = "0.14.1" +rev = "f60d86bc0921bd42635e8a33e7aad28ebe62dc3e" +version = "0.14.2" optional = true -features = ["parquet", "json", "random", "pivot", "strings"] +features = ["parquet", "json", "random", "pivot", "strings", "is_in"] [target.'cfg(unix)'.dependencies] umask = "1.0.0" diff --git a/crates/nu-command/src/commands/dataframe/aggregate.rs b/crates/nu-command/src/commands/dataframe/aggregate.rs index 4279765f5c..2c1d48522d 100644 --- a/crates/nu-command/src/commands/dataframe/aggregate.rs +++ b/crates/nu-command/src/commands/dataframe/aggregate.rs @@ -3,10 +3,13 @@ use nu_engine::WholeStreamCommand; use nu_errors::ShellError; use nu_protocol::{ dataframe::{NuDataFrame, PolarsData}, - Signature, SyntaxShape, UntaggedValue, Value, + Signature, SyntaxShape, TaggedDictBuilder, UntaggedValue, Value, }; use nu_source::Tagged; -use polars::{frame::groupby::GroupBy, prelude::PolarsError}; +use polars::{ + frame::groupby::GroupBy, + prelude::{DataType, PolarsError, Series}, +}; use super::utils::convert_columns; @@ -109,7 +112,7 @@ impl WholeStreamCommand for DataFrame { Example { description: "Aggregate sum by grouping by column a and summing on col b", example: - "[[a b]; [one 1] [one 2]] | dataframe to-df | dataframe groupby [a] | dataframe aggregate sum", + "[[a b]; [one 1] [one 2]] | dataframe to-df | dataframe group-by [a] | dataframe aggregate sum", result: None, }, Example { @@ -117,6 +120,11 @@ impl WholeStreamCommand for DataFrame { example: "[[a b]; [4 1] [5 2]] | dataframe to-df | dataframe aggregate sum", result: None, }, + Example { + description: "Aggregate sum in series", + example: "[4 1 5 6] | dataframe to-series | dataframe aggregate sum", + result: None, + }, ] } } @@ -142,7 +150,7 @@ fn command(mut args: CommandArgs) -> Result { ShellError::labeled_error("Empty stream", "No value found in the stream", &tag) })?; - let res = match value.value { + match value.value { UntaggedValue::DataFrame(PolarsData::GroupBy(nu_groupby)) => { let groupby = nu_groupby.to_groupby()?; @@ -151,12 +159,14 @@ fn command(mut args: CommandArgs) -> Result { None => groupby, }; - perform_groupby_aggregation(groupby, op, &operation.tag, &agg_span) + let res = perform_groupby_aggregation(groupby, op, &operation.tag, &agg_span)?; + + Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) } UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => { let df = df.as_ref(); - match &selection { + let res = match &selection { Some(cols) => { let df = df .select(cols) @@ -165,16 +175,21 @@ fn command(mut args: CommandArgs) -> Result { perform_dataframe_aggregation(&df, op, &operation.tag) } None => perform_dataframe_aggregation(&df, op, &operation.tag), - } + }?; + + Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) + } + UntaggedValue::DataFrame(PolarsData::Series(series)) => { + let value = perform_series_aggregation(series.as_ref(), op, &operation.tag)?; + + Ok(OutputStream::one(value)) } _ => Err(ShellError::labeled_error( "No groupby or dataframe", "no groupby or found in input stream", &value.tag.span, )), - }?; - - Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) + } } fn perform_groupby_aggregation( @@ -232,3 +247,163 @@ fn perform_dataframe_aggregation( )), } } + +fn perform_series_aggregation( + series: &Series, + operation: Operation, + operation_tag: &Tag, +) -> Result { + match operation { + Operation::Mean => { + let res = match series.mean() { + Some(val) => UntaggedValue::Primitive(val.into()), + None => UntaggedValue::Primitive(0.into()), + }; + + let value = Value { + value: res, + tag: operation_tag.clone(), + }; + + let mut data = TaggedDictBuilder::new(operation_tag.clone()); + data.insert_value("mean", value); + + Ok(data.into_value()) + } + Operation::Median => { + let res = match series.median() { + Some(val) => UntaggedValue::Primitive(val.into()), + None => UntaggedValue::Primitive(0.into()), + }; + + let value = Value { + value: res, + tag: operation_tag.clone(), + }; + + let mut data = TaggedDictBuilder::new(operation_tag.clone()); + data.insert_value("median", value); + + Ok(data.into_value()) + } + Operation::Sum => { + let untagged = match series.dtype() { + DataType::Int8 + | DataType::Int16 + | DataType::Int32 + | DataType::Int64 + | DataType::UInt8 + | DataType::UInt16 + | DataType::UInt32 + | DataType::UInt64 => { + let res: i64 = series.sum().unwrap_or(0); + Ok(UntaggedValue::Primitive(res.into())) + } + DataType::Float32 | DataType::Float64 => { + let res: f64 = series.sum().unwrap_or(0.0); + Ok(UntaggedValue::Primitive(res.into())) + } + _ => Err(ShellError::labeled_error( + "Not valid type", + format!( + "this operation can not be performed with series of type {}", + series.dtype() + ), + &operation_tag.span, + )), + }?; + + let value = Value { + value: untagged, + tag: operation_tag.clone(), + }; + + let mut data = TaggedDictBuilder::new(operation_tag.clone()); + data.insert_value("sum", value); + + Ok(data.into_value()) + } + Operation::Max => { + let untagged = match series.dtype() { + DataType::Int8 + | DataType::Int16 + | DataType::Int32 + | DataType::Int64 + | DataType::UInt8 + | DataType::UInt16 + | DataType::UInt32 + | DataType::UInt64 => { + let res: i64 = series.max().unwrap_or(0); + Ok(UntaggedValue::Primitive(res.into())) + } + DataType::Float32 | DataType::Float64 => { + let res: f64 = series.max().unwrap_or(0.0); + Ok(UntaggedValue::Primitive(res.into())) + } + _ => Err(ShellError::labeled_error( + "Not valid type", + format!( + "this operation can not be performed with series of type {}", + series.dtype() + ), + &operation_tag.span, + )), + }?; + + let value = Value { + value: untagged, + tag: operation_tag.clone(), + }; + + let mut data = TaggedDictBuilder::new(operation_tag.clone()); + data.insert_value("max", value); + + Ok(data.into_value()) + } + Operation::Min => { + let untagged = match series.dtype() { + DataType::Int8 + | DataType::Int16 + | DataType::Int32 + | DataType::Int64 + | DataType::UInt8 + | DataType::UInt16 + | DataType::UInt32 + | DataType::UInt64 => { + let res: i64 = series.min().unwrap_or(0); + Ok(UntaggedValue::Primitive(res.into())) + } + DataType::Float32 | DataType::Float64 => { + let res: f64 = series.min().unwrap_or(0.0); + Ok(UntaggedValue::Primitive(res.into())) + } + _ => Err(ShellError::labeled_error( + "Not valid type", + format!( + "this operation can not be performed with series of type {}", + series.dtype() + ), + &operation_tag.span, + )), + }?; + + let value = Value { + value: untagged, + tag: operation_tag.clone(), + }; + + let mut data = TaggedDictBuilder::new(operation_tag.clone()); + data.insert_value("min", value); + + Ok(data.into_value()) + } + + _ => Err(ShellError::labeled_error_with_secondary( + "Not valid operation", + "operation not valid for series", + &operation_tag.span, + "Perhaps you want: mean, median, sum, max, min", + &operation_tag.span, + )), + } +} diff --git a/crates/nu-command/src/commands/dataframe/drop_nulls.rs b/crates/nu-command/src/commands/dataframe/drop_nulls.rs index c081efe96f..68558c1e99 100644 --- a/crates/nu-command/src/commands/dataframe/drop_nulls.rs +++ b/crates/nu-command/src/commands/dataframe/drop_nulls.rs @@ -1,7 +1,10 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value}; +use nu_protocol::{ + dataframe::{NuDataFrame, NuSeries, PolarsData}, + Signature, SyntaxShape, UntaggedValue, Value, +}; use super::utils::{convert_columns, parse_polars_error}; @@ -29,35 +32,62 @@ impl WholeStreamCommand for DataFrame { } fn examples(&self) -> Vec { - vec![Example { - description: "drop null values duplicates", - example: "[[a b]; [1 2] [3 4] [1 2]] | dataframe to-df | dataframe drop-nulls", - result: None, - }] + vec![ + Example { + description: "drop null values in dataframe", + example: r#"let df = ([[a b]; [1 2] [3 0] [1 2]] | dataframe to-df); +let res = ($df.b / $df.b); +let df = ($df | dataframe with-column $res as res); +$df | dataframe drop-nulls +"#, + result: None, + }, + Example { + description: "drop null values in dataframe", + example: r#"let s = ([1 2 0 0 3 4] | dataframe to-series); +($s / $s) | dataframe drop-nulls"#, + result: None, + }, + ] } } fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); - // Extracting the selection columns of the columns to perform the aggregation - let columns: Option> = args.opt(0)?; - let (subset, col_span) = match columns { - Some(cols) => { - let (agg_string, col_span) = convert_columns(&cols, &tag)?; - (Some(agg_string), col_span) + let value = args.input.next().ok_or_else(|| { + ShellError::labeled_error("Empty stream", "No value found in stream", &tag.span) + })?; + + match value.value { + UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => { + // Extracting the selection columns of the columns to perform the aggregation + let columns: Option> = args.opt(0)?; + let (subset, col_span) = match columns { + Some(cols) => { + let (agg_string, col_span) = convert_columns(&cols, &tag)?; + (Some(agg_string), col_span) + } + None => (None, Span::unknown()), + }; + + let subset_slice = subset.as_ref().map(|cols| &cols[..]); + + let res = df + .as_ref() + .drop_nulls(subset_slice) + .map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?; + + Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) } - None => (None, Span::unknown()), - }; - - let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; - - let subset_slice = subset.as_ref().map(|cols| &cols[..]); - - let res = df - .as_ref() - .drop_nulls(subset_slice) - .map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?; - - Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) + UntaggedValue::DataFrame(PolarsData::Series(series)) => { + let res = series.as_ref().drop_nulls(); + Ok(OutputStream::one(NuSeries::series_to_value(res, tag))) + } + _ => Err(ShellError::labeled_error( + "Incorrect type", + "drop nulls cannot be done with this value", + &value.tag.span, + )), + } } diff --git a/crates/nu-command/src/commands/dataframe/dummies.rs b/crates/nu-command/src/commands/dataframe/dummies.rs index 1542e11bf7..bd5361233b 100644 --- a/crates/nu-command/src/commands/dataframe/dummies.rs +++ b/crates/nu-command/src/commands/dataframe/dummies.rs @@ -1,7 +1,10 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuDataFrame, Signature}; +use nu_protocol::{ + dataframe::{NuDataFrame, PolarsData}, + Signature, UntaggedValue, +}; use super::utils::parse_polars_error; @@ -25,25 +28,55 @@ impl WholeStreamCommand for DataFrame { } fn examples(&self) -> Vec { - vec![Example { - description: "Create new dataframe with dummy variables", - example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe to-dummies", - result: None, - }] + vec![ + Example { + description: "Create new dataframe with dummy variables from a dataframe", + example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe to-dummies", + result: None, + }, + Example { + description: "Create new dataframe with dummy variables from a series", + example: "[1 2 2 3 3] | dataframe to-series | dataframe to-dummies", + result: None, + }, + ] } } fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); - let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; - let res = df.as_ref().to_dummies().map_err(|e| { - parse_polars_error( - &e, - &tag.span, - Some("The only allowed column types for dummies are String or Int"), - ) + let value = args.input.next().ok_or_else(|| { + ShellError::labeled_error("Empty stream", "No value found in stream", &tag.span) })?; - Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) + match value.value { + UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => { + let res = df.as_ref().to_dummies().map_err(|e| { + parse_polars_error( + &e, + &tag.span, + Some("The only allowed column types for dummies are String or Int"), + ) + })?; + + Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) + } + UntaggedValue::DataFrame(PolarsData::Series(series)) => { + let res = series.as_ref().to_dummies().map_err(|e| { + parse_polars_error( + &e, + &tag.span, + Some("The only allowed column types for dummies are String or Int"), + ) + })?; + + Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) + } + _ => Err(ShellError::labeled_error( + "Incorrect type", + "dummies cannot be done with this value", + &value.tag.span, + )), + } } diff --git a/crates/nu-command/src/commands/dataframe/mod.rs b/crates/nu-command/src/commands/dataframe/mod.rs index 487758c8df..337b178473 100644 --- a/crates/nu-command/src/commands/dataframe/mod.rs +++ b/crates/nu-command/src/commands/dataframe/mod.rs @@ -17,7 +17,6 @@ pub mod melt; pub mod pivot; pub mod sample; pub mod select; -pub mod series_rename; pub mod show; pub mod slice; pub mod sort; @@ -49,7 +48,6 @@ pub use melt::DataFrame as DataFrameMelt; pub use pivot::DataFrame as DataFramePivot; pub use sample::DataFrame as DataFrameSample; pub use select::DataFrame as DataFrameSelect; -pub use series_rename::DataFrame as DataFrameSeriesRename; pub use show::DataFrame as DataFrameShow; pub use slice::DataFrame as DataFrameSlice; pub use sort::DataFrame as DataFrameSort; @@ -60,3 +58,24 @@ pub use to_parquet::DataFrame as DataFrameToParquet; pub use to_series::DataFrame as DataFrameToSeries; pub use where_::DataFrame as DataFrameWhere; pub use with_column::DataFrame as DataFrameWithColumn; + +pub mod series; +pub use series::DataFrameAllFalse; +pub use series::DataFrameAllTrue; +pub use series::DataFrameArgMax; +pub use series::DataFrameArgMin; +pub use series::DataFrameArgSort; +pub use series::DataFrameArgTrue; +pub use series::DataFrameArgUnique; +pub use series::DataFrameIsDuplicated; +pub use series::DataFrameIsIn; +pub use series::DataFrameIsNotNull; +pub use series::DataFrameIsNull; +pub use series::DataFrameIsUnique; +pub use series::DataFrameNNull; +pub use series::DataFrameNUnique; +pub use series::DataFrameSeriesRename; +pub use series::DataFrameSet; +pub use series::DataFrameShift; +pub use series::DataFrameUnique; +pub use series::DataFrameValueCounts; diff --git a/crates/nu-command/src/commands/dataframe/series/all_false.rs b/crates/nu-command/src/commands/dataframe/series/all_false.rs new file mode 100644 index 0000000000..1a9f800dcb --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/all_false.rs @@ -0,0 +1,67 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{dataframe::NuSeries, Signature, TaggedDictBuilder, UntaggedValue, Value}; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe all-false" + } + + fn usage(&self) -> &str { + "Returns true if all values are false" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe all-false") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Returns true if all values are false", + example: "[$false $false $false] | dataframe to-series | dataframe all-false", + result: None, + }, + Example { + description: "Checks the result from a comparison", + example: r#"let s = ([5 6 2 8] | dataframe to-series); +let res = ($s > 9); +$res | dataframe all-false"#, + result: None, + }, + ] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + + let bool = series.as_ref().bool().map_err(|e| { + parse_polars_error::<&str>( + &e, + &tag.span, + Some("all-false only works with series of type bool"), + ) + })?; + + let res = bool.all_false(); + + let value = Value { + value: UntaggedValue::Primitive(res.into()), + tag: tag.clone(), + }; + + let mut data = TaggedDictBuilder::new(tag); + data.insert_value("all_false", value); + + Ok(OutputStream::one(data.into_value())) +} diff --git a/crates/nu-command/src/commands/dataframe/series/all_true.rs b/crates/nu-command/src/commands/dataframe/series/all_true.rs new file mode 100644 index 0000000000..7b2503be72 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/all_true.rs @@ -0,0 +1,67 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{dataframe::NuSeries, Signature, TaggedDictBuilder, UntaggedValue, Value}; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe all-true" + } + + fn usage(&self) -> &str { + "Returns true if all values are true" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe all-true") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Returns true if all values are true", + example: "[$true $true $true] | dataframe to-series | dataframe all-true", + result: None, + }, + Example { + description: "Checks the result from a comparison", + example: r#"let s = ([5 6 2 8] | dataframe to-series); +let res = ($s > 9); +$res | dataframe all-true"#, + result: None, + }, + ] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + + let bool = series.as_ref().bool().map_err(|e| { + parse_polars_error::<&str>( + &e, + &tag.span, + Some("all-true only works with series of type bool"), + ) + })?; + + let res = bool.all_true(); + + let value = Value { + value: UntaggedValue::Primitive(res.into()), + tag: tag.clone(), + }; + + let mut data = TaggedDictBuilder::new(tag); + data.insert_value("all_true", value); + + Ok(OutputStream::one(data.into_value())) +} diff --git a/crates/nu-command/src/commands/dataframe/series/arg_max.rs b/crates/nu-command/src/commands/dataframe/series/arg_max.rs new file mode 100644 index 0000000000..dcc3fc6cce --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/arg_max.rs @@ -0,0 +1,57 @@ +use crate::prelude::*; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::NuSeries, Primitive, Signature, TaggedDictBuilder, UntaggedValue, Value, +}; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe arg-max" + } + + fn usage(&self) -> &str { + "Return index for max value in series" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe arg-max") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns index for max value", + example: "[1 3 2] | dataframe to-series | dataframe arg-max", + result: None, + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + + let res = series.as_ref().arg_max(); + + let value = match res { + Some(index) => UntaggedValue::Primitive(Primitive::Int(index as i64)), + None => UntaggedValue::Primitive(Primitive::Nothing), + }; + + let value = Value { + value, + tag: tag.clone(), + }; + + let mut data = TaggedDictBuilder::new(tag); + data.insert_value("arg-max", value); + + Ok(OutputStream::one(data.into_value())) +} diff --git a/crates/nu-command/src/commands/dataframe/series/arg_min.rs b/crates/nu-command/src/commands/dataframe/series/arg_min.rs new file mode 100644 index 0000000000..ddd9209805 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/arg_min.rs @@ -0,0 +1,57 @@ +use crate::prelude::*; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::NuSeries, Primitive, Signature, TaggedDictBuilder, UntaggedValue, Value, +}; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe arg-min" + } + + fn usage(&self) -> &str { + "Return index for min value in series" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe arg-min") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns index for min value", + example: "[1 3 2] | dataframe to-series | dataframe arg-min", + result: None, + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + + let res = series.as_ref().arg_min(); + + let value = match res { + Some(index) => UntaggedValue::Primitive(Primitive::Int(index as i64)), + None => UntaggedValue::Primitive(Primitive::Nothing), + }; + + let value = Value { + value, + tag: tag.clone(), + }; + + let mut data = TaggedDictBuilder::new(tag); + data.insert_value("arg-min", value); + + Ok(OutputStream::one(data.into_value())) +} diff --git a/crates/nu-command/src/commands/dataframe/series/arg_sort.rs b/crates/nu-command/src/commands/dataframe/series/arg_sort.rs new file mode 100644 index 0000000000..3dc9c51770 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/arg_sort.rs @@ -0,0 +1,47 @@ +use crate::prelude::*; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{dataframe::NuSeries, Signature}; +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe arg-sort" + } + + fn usage(&self) -> &str { + "Returns indexes for a sorted series" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe arg-sort").switch("reverse", "reverse order", Some('r')) + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns indexes for a sorted series", + example: "[1 2 2 3 3] | dataframe to-series | dataframe arg-sort", + result: None, + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + let reverse = args.has_flag("reverse"); + + let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + + let res = series.as_ref().argsort(reverse); + + Ok(OutputStream::one(NuSeries::series_to_value( + res.into_series(), + tag, + ))) +} diff --git a/crates/nu-command/src/commands/dataframe/series/arg_true.rs b/crates/nu-command/src/commands/dataframe/series/arg_true.rs new file mode 100644 index 0000000000..1aac61d132 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/arg_true.rs @@ -0,0 +1,52 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{dataframe::NuSeries, Signature}; +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe arg-true" + } + + fn usage(&self) -> &str { + "Returns indexes where values are true" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe arg-true") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns indexes where values are true", + example: "[$false $true $false] | dataframe to-series | dataframe arg-true", + result: None, + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + + let bool = series.as_ref().bool().map_err(|e| { + parse_polars_error::<&str>( + &e, + &tag.span, + Some("arg-true only works with series of type bool"), + ) + })?; + + let mut res = bool.arg_true().into_series(); + res.rename("int"); + + Ok(OutputStream::one(NuSeries::series_to_value(res, tag))) +} diff --git a/crates/nu-command/src/commands/dataframe/series/arg_unique.rs b/crates/nu-command/src/commands/dataframe/series/arg_unique.rs new file mode 100644 index 0000000000..e40eeb23d4 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/arg_unique.rs @@ -0,0 +1,49 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{dataframe::NuSeries, Signature}; +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe arg-unique" + } + + fn usage(&self) -> &str { + "Returns indexes for unique values" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe arg-unique") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns indexes for unique values", + example: "[1 2 2 3 3] | dataframe to-series | dataframe arg-unique", + result: None, + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + + let res = series + .as_ref() + .arg_unique() + .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?; + + Ok(OutputStream::one(NuSeries::series_to_value( + res.into_series(), + tag, + ))) +} diff --git a/crates/nu-command/src/commands/dataframe/series/is_duplicated.rs b/crates/nu-command/src/commands/dataframe/series/is_duplicated.rs new file mode 100644 index 0000000000..002a02be9f --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/is_duplicated.rs @@ -0,0 +1,49 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{dataframe::NuSeries, Signature}; +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe is-duplicated" + } + + fn usage(&self) -> &str { + "Creates mask indicating duplicated values" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe is-duplicated") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Create mask indicating duplicated values", + example: "[5 6 6 6 8 8 8] | dataframe to-series | dataframe is-duplicated", + result: None, + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + + let res = series + .as_ref() + .is_duplicated() + .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?; + + Ok(OutputStream::one(NuSeries::series_to_value( + res.into_series(), + tag, + ))) +} diff --git a/crates/nu-command/src/commands/dataframe/series/is_in.rs b/crates/nu-command/src/commands/dataframe/series/is_in.rs new file mode 100644 index 0000000000..64e6cc5cad --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/is_in.rs @@ -0,0 +1,63 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::{NuSeries, PolarsData}, + Signature, SyntaxShape, UntaggedValue, Value, +}; +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe is-in" + } + + fn usage(&self) -> &str { + "Checks if elements from a series are contained in right series" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe is-in").required("other", SyntaxShape::Any, "right series") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Checks if elements from a series are contained in right series", + example: r#"let other = ([1 3 6] | dataframe to-series); +[5 6 6 6 8 8 8] | dataframe to-series | dataframe is-in $other"#, + result: None, + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + let value: Value = args.req(0)?; + + let other = match value.value { + UntaggedValue::DataFrame(PolarsData::Series(series)) => Ok(series), + _ => Err(ShellError::labeled_error( + "Incorrect type", + "can only search in a series", + value.tag.span, + )), + }?; + + let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + + let res = series + .as_ref() + .is_in(other.as_ref()) + .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?; + + Ok(OutputStream::one(NuSeries::series_to_value( + res.into_series(), + tag, + ))) +} diff --git a/crates/nu-command/src/commands/dataframe/series/is_not_null.rs b/crates/nu-command/src/commands/dataframe/series/is_not_null.rs new file mode 100644 index 0000000000..f3ce8cdd03 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/is_not_null.rs @@ -0,0 +1,48 @@ +use crate::prelude::*; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{dataframe::NuSeries, Signature}; +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe is-not-null" + } + + fn usage(&self) -> &str { + "Creates mask where value is not null" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe is-not-null") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Create mask where values are not null", + example: r#"let s = ([5 6 0 8] | dataframe to-series); +let res = ($s / $s); +$res | dataframe is-not-null"#, + result: None, + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + + let res = series.as_ref().is_not_null(); + + Ok(OutputStream::one(NuSeries::series_to_value( + res.into_series(), + tag, + ))) +} diff --git a/crates/nu-command/src/commands/dataframe/series/is_null.rs b/crates/nu-command/src/commands/dataframe/series/is_null.rs new file mode 100644 index 0000000000..33a3d9fed4 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/is_null.rs @@ -0,0 +1,48 @@ +use crate::prelude::*; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{dataframe::NuSeries, Signature}; +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe is-null" + } + + fn usage(&self) -> &str { + "Creates mask where value is null" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe is-null") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Create mask where values are null", + example: r#"let s = ([5 6 0 8] | dataframe to-series); +let res = ($s / $s); +$res | dataframe is-null"#, + result: None, + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + + let res = series.as_ref().is_null(); + + Ok(OutputStream::one(NuSeries::series_to_value( + res.into_series(), + tag, + ))) +} diff --git a/crates/nu-command/src/commands/dataframe/series/is_unique.rs b/crates/nu-command/src/commands/dataframe/series/is_unique.rs new file mode 100644 index 0000000000..02369344c3 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/is_unique.rs @@ -0,0 +1,49 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{dataframe::NuSeries, Signature}; +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe is-unique" + } + + fn usage(&self) -> &str { + "Creates mask indicating unique values" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe is-unique") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Create mask indicating unique values", + example: "[5 6 6 6 8 8 8] | dataframe to-series | dataframe is-unique", + result: None, + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + + let res = series + .as_ref() + .is_unique() + .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?; + + Ok(OutputStream::one(NuSeries::series_to_value( + res.into_series(), + tag, + ))) +} diff --git a/crates/nu-command/src/commands/dataframe/series/mod.rs b/crates/nu-command/src/commands/dataframe/series/mod.rs new file mode 100644 index 0000000000..11517d91a0 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/mod.rs @@ -0,0 +1,39 @@ +pub mod all_false; +pub mod all_true; +pub mod arg_max; +pub mod arg_min; +pub mod arg_sort; +pub mod arg_true; +pub mod arg_unique; +pub mod is_duplicated; +pub mod is_in; +pub mod is_not_null; +pub mod is_null; +pub mod is_unique; +pub mod n_null; +pub mod n_unique; +pub mod rename; +pub mod set; +pub mod shift; +pub mod unique; +pub mod value_counts; + +pub use all_false::DataFrame as DataFrameAllFalse; +pub use all_true::DataFrame as DataFrameAllTrue; +pub use arg_max::DataFrame as DataFrameArgMax; +pub use arg_min::DataFrame as DataFrameArgMin; +pub use arg_sort::DataFrame as DataFrameArgSort; +pub use arg_true::DataFrame as DataFrameArgTrue; +pub use arg_unique::DataFrame as DataFrameArgUnique; +pub use is_duplicated::DataFrame as DataFrameIsDuplicated; +pub use is_in::DataFrame as DataFrameIsIn; +pub use is_not_null::DataFrame as DataFrameIsNotNull; +pub use is_null::DataFrame as DataFrameIsNull; +pub use is_unique::DataFrame as DataFrameIsUnique; +pub use n_null::DataFrame as DataFrameNNull; +pub use n_unique::DataFrame as DataFrameNUnique; +pub use rename::DataFrame as DataFrameSeriesRename; +pub use set::DataFrame as DataFrameSet; +pub use shift::DataFrame as DataFrameShift; +pub use unique::DataFrame as DataFrameUnique; +pub use value_counts::DataFrame as DataFrameValueCounts; diff --git a/crates/nu-command/src/commands/dataframe/series/n_null.rs b/crates/nu-command/src/commands/dataframe/series/n_null.rs new file mode 100644 index 0000000000..6e8eb9f19b --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/n_null.rs @@ -0,0 +1,53 @@ +use crate::prelude::*; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::NuSeries, Primitive, Signature, TaggedDictBuilder, UntaggedValue, Value, +}; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe count-null" + } + + fn usage(&self) -> &str { + "Counts null values" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe count-null") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Counts null values", + example: r#"let s = ([1 1 0 0 3 3 4] | dataframe to-series); +($s / ss) | dataframe count-null"#, + result: None, + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + + let res = series.as_ref().null_count(); + + let value = Value { + value: UntaggedValue::Primitive(Primitive::Int(res as i64)), + tag: tag.clone(), + }; + + let mut data = TaggedDictBuilder::new(tag); + data.insert_value("count-null", value); + + Ok(OutputStream::one(data.into_value())) +} diff --git a/crates/nu-command/src/commands/dataframe/series/n_unique.rs b/crates/nu-command/src/commands/dataframe/series/n_unique.rs new file mode 100644 index 0000000000..2181bd44a3 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/n_unique.rs @@ -0,0 +1,55 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::NuSeries, Primitive, Signature, TaggedDictBuilder, UntaggedValue, Value, +}; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe count-unique" + } + + fn usage(&self) -> &str { + "Counts unique value" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe count-unique") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Counts unique values", + example: "[1 1 2 2 3 3 4] | dataframe to-series | dataframe count-unique", + result: None, + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + + let res = series + .as_ref() + .n_unique() + .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?; + + let value = Value { + value: UntaggedValue::Primitive(Primitive::Int(res as i64)), + tag: tag.clone(), + }; + + let mut data = TaggedDictBuilder::new(tag); + data.insert_value("count-unique", value); + + Ok(OutputStream::one(data.into_value())) +} diff --git a/crates/nu-command/src/commands/dataframe/series_rename.rs b/crates/nu-command/src/commands/dataframe/series/rename.rs similarity index 92% rename from crates/nu-command/src/commands/dataframe/series_rename.rs rename to crates/nu-command/src/commands/dataframe/series/rename.rs index d67079f3fe..a3c9730b36 100644 --- a/crates/nu-command/src/commands/dataframe/series_rename.rs +++ b/crates/nu-command/src/commands/dataframe/series/rename.rs @@ -8,7 +8,7 @@ pub struct DataFrame; impl WholeStreamCommand for DataFrame { fn name(&self) -> &str { - "dataframe rename-series" + "dataframe rename" } fn usage(&self) -> &str { @@ -16,7 +16,7 @@ impl WholeStreamCommand for DataFrame { } fn signature(&self) -> Signature { - Signature::build("dataframe rename-series").required( + Signature::build("dataframe rename").required( "name", SyntaxShape::String, "new series name", diff --git a/crates/nu-command/src/commands/dataframe/series/set.rs b/crates/nu-command/src/commands/dataframe/series/set.rs new file mode 100644 index 0000000000..6fa56249d0 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/set.rs @@ -0,0 +1,139 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{dataframe::NuSeries, Primitive, Signature, SyntaxShape, UntaggedValue, Value}; +use polars::prelude::{ChunkSet, DataType, IntoSeries}; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe set" + } + + fn usage(&self) -> &str { + "Sets value where given mask is true" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe set") + .required("value", SyntaxShape::Any, "value to be inserted in series") + .required_named( + "mask", + SyntaxShape::Any, + "mask indicating insertions", + Some('m'), + ) + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Shifts the values by a given period", + example: r#"let s = ([1 2 2 3 3] | dataframe to-series | dataframe shift 2); +let mask = ($s | dataframe is-null); +$s | dataframe set 0 --mask $mask"#, + result: None, + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + let value: Value = args.req(0)?; + let mask: Value = args.req_named("mask")?; + + let bool_mask = match &mask.value { + UntaggedValue::DataFrame(nu_protocol::dataframe::PolarsData::Series(series)) => { + match series.as_ref().dtype() { + DataType::Boolean => series + .as_ref() + .bool() + .map_err(|e| parse_polars_error::<&str>(&e, &mask.tag.span, None)), + _ => Err(ShellError::labeled_error( + "Incorrect type", + "can only use bool series as mask", + value.tag.span, + )), + } + } + _ => Err(ShellError::labeled_error( + "Incorrect type", + "can only use bool series as mask", + value.tag.span, + )), + }?; + + let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + + match &value.value { + UntaggedValue::Primitive(Primitive::Int(val)) => { + let chunked = series.as_ref().i64().map_err(|e| { + parse_polars_error::<&str>( + &e, + &value.tag.span, + Some("The value has to match the set value type"), + ) + })?; + + let res = chunked + .set(bool_mask, Some(*val)) + .map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?; + + Ok(OutputStream::one(NuSeries::series_to_value( + res.into_series(), + tag, + ))) + } + UntaggedValue::Primitive(Primitive::Decimal(val)) => { + let chunked = series.as_ref().f64().map_err(|e| { + parse_polars_error::<&str>( + &e, + &value.tag.span, + Some("The value has to match the series type"), + ) + })?; + + let res = chunked + .set(bool_mask, Some(val.to_f64().unwrap())) + .map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?; + + Ok(OutputStream::one(NuSeries::series_to_value( + res.into_series(), + tag, + ))) + } + UntaggedValue::Primitive(Primitive::String(val)) => { + let chunked = series.as_ref().utf8().map_err(|e| { + parse_polars_error::<&str>( + &e, + &value.tag.span, + Some("The value has to match the series type"), + ) + })?; + + let res = chunked + .set(bool_mask, Some(val.as_ref())) + .map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?; + + let mut res = res.into_series(); + res.rename("string"); + + Ok(OutputStream::one(NuSeries::series_to_value( + res.into_series(), + tag, + ))) + } + _ => Err(ShellError::labeled_error( + "Incorrect type", + format!( + "this value cannot be set in a series of type '{}'", + series.as_ref().dtype() + ), + value.tag.span, + )), + } +} diff --git a/crates/nu-command/src/commands/dataframe/series/shift.rs b/crates/nu-command/src/commands/dataframe/series/shift.rs new file mode 100644 index 0000000000..816a2ef927 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/shift.rs @@ -0,0 +1,48 @@ +use crate::prelude::*; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{dataframe::NuSeries, Signature, SyntaxShape}; +use nu_source::Tagged; +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe shift" + } + + fn usage(&self) -> &str { + "Shifts the values by a given period" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe unique").required("period", SyntaxShape::Int, "shift period") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Shifts the values by a given period", + example: "[1 2 2 3 3] | dataframe to-series | dataframe shift 2", + result: None, + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + let period: Tagged = args.req(0)?; + + let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + + let res = series.as_ref().shift(period.item); + + Ok(OutputStream::one(NuSeries::series_to_value( + res.into_series(), + tag, + ))) +} diff --git a/crates/nu-command/src/commands/dataframe/series/unique.rs b/crates/nu-command/src/commands/dataframe/series/unique.rs new file mode 100644 index 0000000000..371e820002 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/unique.rs @@ -0,0 +1,49 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{dataframe::NuSeries, Signature}; +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe unique" + } + + fn usage(&self) -> &str { + "Returns unique values from a series" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe unique") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns unique values from a series", + example: "[1 2 2 3 3] | dataframe to-series | dataframe unique", + result: None, + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + + let res = series + .as_ref() + .unique() + .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?; + + Ok(OutputStream::one(NuSeries::series_to_value( + res.into_series(), + tag, + ))) +} diff --git a/crates/nu-command/src/commands/dataframe/series/value_counts.rs b/crates/nu-command/src/commands/dataframe/series/value_counts.rs new file mode 100644 index 0000000000..71b797e29e --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/value_counts.rs @@ -0,0 +1,50 @@ +use crate::prelude::*; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::{NuDataFrame, NuSeries}, + Signature, +}; + +use crate::commands::dataframe::utils::parse_polars_error; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe value-counts" + } + + fn usage(&self) -> &str { + "Returns a dataframe with the counts for unique values in series" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe value-counts") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Calculates value counts", + example: "[5 5 6 6] | dataframe to-series | dataframe value-counts", + result: None, + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + + let df = series + .as_ref() + .value_counts() + .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?; + + Ok(OutputStream::one(NuDataFrame::dataframe_to_value(df, tag))) +} diff --git a/crates/nu-command/src/commands/dataframe/sort.rs b/crates/nu-command/src/commands/dataframe/sort.rs index e1382d8b16..9fa7b845d0 100644 --- a/crates/nu-command/src/commands/dataframe/sort.rs +++ b/crates/nu-command/src/commands/dataframe/sort.rs @@ -1,7 +1,10 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value}; +use nu_protocol::{ + dataframe::{NuDataFrame, NuSeries, PolarsData}, + Signature, SyntaxShape, UntaggedValue, Value, +}; use super::utils::{convert_columns, parse_polars_error}; pub struct DataFrame; @@ -12,12 +15,12 @@ impl WholeStreamCommand for DataFrame { } fn usage(&self) -> &str { - "Creates new sorted dataframe" + "Creates new sorted dataframe or series" } fn signature(&self) -> Signature { Signature::build("dataframe sort") - .required( + .optional( "columns", SyntaxShape::Table, "column names to sort dataframe", @@ -30,27 +33,60 @@ impl WholeStreamCommand for DataFrame { } fn examples(&self) -> Vec { - vec![Example { - description: "Create new sorted dataframe", - example: "[[a b]; [3 4] [1 2]] | dataframe to-df | dataframe sort [a]", - result: None, - }] + vec![ + Example { + description: "Create new sorted dataframe", + example: "[[a b]; [3 4] [1 2]] | dataframe to-df | dataframe sort [a]", + result: None, + }, + Example { + description: "Create new sorted series", + example: "[3 4 1 2] | dataframe to-series | dataframe sort", + result: None, + }, + ] } } fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); - let columns: Vec = args.req(0)?; + + let value = args.input.next().ok_or_else(|| { + ShellError::labeled_error("Empty stream", "No value found in stream", &tag.span) + })?; + let reverse = args.has_flag("reverse"); - let (col_string, col_span) = convert_columns(&columns, &tag)?; + match value.value { + UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => { + let columns: Option> = args.opt(0)?; - let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + match columns { + Some(columns) => { + let (col_string, col_span) = convert_columns(&columns, &tag)?; - let res = df - .as_ref() - .sort(&col_string, reverse) - .map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?; + let res = df + .as_ref() + .sort(&col_string, reverse) + .map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?; - Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) + Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) + } + None => Err(ShellError::labeled_error( + "Missing columns", + "missing column name to perform sort", + &tag.span, + )), + } + } + UntaggedValue::DataFrame(PolarsData::Series(series)) => { + let res = series.as_ref().sort(reverse); + Ok(OutputStream::one(NuSeries::series_to_value(res, tag))) + } + _ => Err(ShellError::labeled_error( + "Incorrect type", + "sort cannot be done with this value", + &value.tag.span, + )), + } } diff --git a/crates/nu-command/src/commands/mod.rs b/crates/nu-command/src/commands/mod.rs index 57ec63d156..e9dae6419b 100644 --- a/crates/nu-command/src/commands/mod.rs +++ b/crates/nu-command/src/commands/mod.rs @@ -24,12 +24,16 @@ pub use conversions::*; pub use core_commands::*; #[cfg(feature = "dataframe")] pub use dataframe::{ - DataFrame, DataFrameAggregate, DataFrameColumn, DataFrameDTypes, DataFrameDrop, - DataFrameDropDuplicates, DataFrameDropNulls, DataFrameDummies, DataFrameFilter, DataFrameGet, - DataFrameGroupBy, DataFrameHead, DataFrameJoin, DataFrameList, DataFrameLoad, DataFrameMelt, - DataFramePivot, DataFrameSample, DataFrameSelect, DataFrameSeriesRename, DataFrameShow, - DataFrameSlice, DataFrameSort, DataFrameTail, DataFrameToCsv, DataFrameToDF, - DataFrameToParquet, DataFrameToSeries, DataFrameWhere, DataFrameWithColumn, + DataFrame, DataFrameAggregate, DataFrameAllFalse, DataFrameAllTrue, DataFrameArgMax, + DataFrameArgMin, DataFrameArgSort, DataFrameArgTrue, DataFrameArgUnique, DataFrameColumn, + DataFrameDTypes, DataFrameDrop, DataFrameDropDuplicates, DataFrameDropNulls, DataFrameDummies, + DataFrameFilter, DataFrameGet, DataFrameGroupBy, DataFrameHead, DataFrameIsDuplicated, + DataFrameIsIn, DataFrameIsNotNull, DataFrameIsNull, DataFrameIsUnique, DataFrameJoin, + DataFrameList, DataFrameLoad, DataFrameMelt, DataFrameNNull, DataFrameNUnique, DataFramePivot, + DataFrameSample, DataFrameSelect, DataFrameSeriesRename, DataFrameSet, DataFrameShift, + DataFrameShow, DataFrameSlice, DataFrameSort, DataFrameTail, DataFrameToCsv, DataFrameToDF, + DataFrameToParquet, DataFrameToSeries, DataFrameUnique, DataFrameValueCounts, DataFrameWhere, + DataFrameWithColumn, }; pub use env::*; pub use filesystem::*; diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index 0b2f2fa68e..400c8efd50 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -291,6 +291,24 @@ pub fn create_default_context(interactive: bool) -> Result