Series commands (#3652)

* new series commands

* clippy corrections
This commit is contained in:
Fernando Herrera 2021-06-19 23:59:39 +01:00 committed by GitHub
parent 51c685aa99
commit b9f1371994
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
31 changed files with 1496 additions and 94 deletions

25
Cargo.lock generated
View file

@ -209,7 +209,7 @@ checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
[[package]]
name = "arrow"
version = "5.0.0-SNAPSHOT"
source = "git+https://github.com/apache/arrow-rs?rev=0f55b828883b3b3afda43ae404b130d374e6f1a1#0f55b828883b3b3afda43ae404b130d374e6f1a1"
source = "git+https://github.com/apache/arrow-rs?rev=9f56afb2d2347310184706f7d5e46af583557bea#9f56afb2d2347310184706f7d5e46af583557bea"
dependencies = [
"chrono",
"csv",
@ -4404,7 +4404,7 @@ dependencies = [
[[package]]
name = "parquet"
version = "5.0.0-SNAPSHOT"
source = "git+https://github.com/apache/arrow-rs?rev=0f55b828883b3b3afda43ae404b130d374e6f1a1#0f55b828883b3b3afda43ae404b130d374e6f1a1"
source = "git+https://github.com/apache/arrow-rs?rev=9f56afb2d2347310184706f7d5e46af583557bea#9f56afb2d2347310184706f7d5e46af583557bea"
dependencies = [
"arrow",
"base64 0.13.0",
@ -4415,6 +4415,7 @@ dependencies = [
"lz4",
"num-bigint 0.4.0",
"parquet-format",
"rand 0.8.3",
"snap",
"thrift",
"zstd",
@ -4642,8 +4643,8 @@ dependencies = [
[[package]]
name = "polars"
version = "0.14.1"
source = "git+https://github.com/pola-rs/polars?rev=9e1506cca9fb646fc55f949ab6345290c3d198a7#9e1506cca9fb646fc55f949ab6345290c3d198a7"
version = "0.14.2"
source = "git+https://github.com/pola-rs/polars?rev=f60d86bc0921bd42635e8a33e7aad28ebe62dc3e#f60d86bc0921bd42635e8a33e7aad28ebe62dc3e"
dependencies = [
"polars-core",
"polars-io",
@ -4652,8 +4653,8 @@ dependencies = [
[[package]]
name = "polars-arrow"
version = "0.14.1"
source = "git+https://github.com/pola-rs/polars?rev=9e1506cca9fb646fc55f949ab6345290c3d198a7#9e1506cca9fb646fc55f949ab6345290c3d198a7"
version = "0.14.2"
source = "git+https://github.com/pola-rs/polars?rev=f60d86bc0921bd42635e8a33e7aad28ebe62dc3e#f60d86bc0921bd42635e8a33e7aad28ebe62dc3e"
dependencies = [
"arrow",
"num 0.4.0",
@ -4662,8 +4663,8 @@ dependencies = [
[[package]]
name = "polars-core"
version = "0.14.1"
source = "git+https://github.com/pola-rs/polars?rev=9e1506cca9fb646fc55f949ab6345290c3d198a7#9e1506cca9fb646fc55f949ab6345290c3d198a7"
version = "0.14.2"
source = "git+https://github.com/pola-rs/polars?rev=f60d86bc0921bd42635e8a33e7aad28ebe62dc3e#f60d86bc0921bd42635e8a33e7aad28ebe62dc3e"
dependencies = [
"ahash",
"anyhow",
@ -4688,8 +4689,8 @@ dependencies = [
[[package]]
name = "polars-io"
version = "0.14.1"
source = "git+https://github.com/pola-rs/polars?rev=9e1506cca9fb646fc55f949ab6345290c3d198a7#9e1506cca9fb646fc55f949ab6345290c3d198a7"
version = "0.14.2"
source = "git+https://github.com/pola-rs/polars?rev=f60d86bc0921bd42635e8a33e7aad28ebe62dc3e#f60d86bc0921bd42635e8a33e7aad28ebe62dc3e"
dependencies = [
"ahash",
"anyhow",
@ -4711,8 +4712,8 @@ dependencies = [
[[package]]
name = "polars-lazy"
version = "0.14.1"
source = "git+https://github.com/pola-rs/polars?rev=9e1506cca9fb646fc55f949ab6345290c3d198a7#9e1506cca9fb646fc55f949ab6345290c3d198a7"
version = "0.14.2"
source = "git+https://github.com/pola-rs/polars?rev=f60d86bc0921bd42635e8a33e7aad28ebe62dc3e#f60d86bc0921bd42635e8a33e7aad28ebe62dc3e"
dependencies = [
"ahash",
"itertools",

View file

@ -100,10 +100,10 @@ zip = { version = "0.5.9", optional = true }
[dependencies.polars]
git = "https://github.com/pola-rs/polars"
rev = "9e1506cca9fb646fc55f949ab6345290c3d198a7"
version = "0.14.1"
rev = "f60d86bc0921bd42635e8a33e7aad28ebe62dc3e"
version = "0.14.2"
optional = true
features = ["parquet", "json", "random", "pivot", "strings"]
features = ["parquet", "json", "random", "pivot", "strings", "is_in"]
[target.'cfg(unix)'.dependencies]
umask = "1.0.0"

View file

@ -3,10 +3,13 @@ use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, PolarsData},
Signature, SyntaxShape, UntaggedValue, Value,
Signature, SyntaxShape, TaggedDictBuilder, UntaggedValue, Value,
};
use nu_source::Tagged;
use polars::{frame::groupby::GroupBy, prelude::PolarsError};
use polars::{
frame::groupby::GroupBy,
prelude::{DataType, PolarsError, Series},
};
use super::utils::convert_columns;
@ -109,7 +112,7 @@ impl WholeStreamCommand for DataFrame {
Example {
description: "Aggregate sum by grouping by column a and summing on col b",
example:
"[[a b]; [one 1] [one 2]] | dataframe to-df | dataframe groupby [a] | dataframe aggregate sum",
"[[a b]; [one 1] [one 2]] | dataframe to-df | dataframe group-by [a] | dataframe aggregate sum",
result: None,
},
Example {
@ -117,6 +120,11 @@ impl WholeStreamCommand for DataFrame {
example: "[[a b]; [4 1] [5 2]] | dataframe to-df | dataframe aggregate sum",
result: None,
},
Example {
description: "Aggregate sum in series",
example: "[4 1 5 6] | dataframe to-series | dataframe aggregate sum",
result: None,
},
]
}
}
@ -142,7 +150,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
ShellError::labeled_error("Empty stream", "No value found in the stream", &tag)
})?;
let res = match value.value {
match value.value {
UntaggedValue::DataFrame(PolarsData::GroupBy(nu_groupby)) => {
let groupby = nu_groupby.to_groupby()?;
@ -151,12 +159,14 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
None => groupby,
};
perform_groupby_aggregation(groupby, op, &operation.tag, &agg_span)
let res = perform_groupby_aggregation(groupby, op, &operation.tag, &agg_span)?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => {
let df = df.as_ref();
match &selection {
let res = match &selection {
Some(cols) => {
let df = df
.select(cols)
@ -165,16 +175,21 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
perform_dataframe_aggregation(&df, op, &operation.tag)
}
None => perform_dataframe_aggregation(&df, op, &operation.tag),
}
}?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
UntaggedValue::DataFrame(PolarsData::Series(series)) => {
let value = perform_series_aggregation(series.as_ref(), op, &operation.tag)?;
Ok(OutputStream::one(value))
}
_ => Err(ShellError::labeled_error(
"No groupby or dataframe",
"no groupby or found in input stream",
&value.tag.span,
)),
}?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
}
fn perform_groupby_aggregation(
@ -232,3 +247,163 @@ fn perform_dataframe_aggregation(
)),
}
}
fn perform_series_aggregation(
series: &Series,
operation: Operation,
operation_tag: &Tag,
) -> Result<Value, ShellError> {
match operation {
Operation::Mean => {
let res = match series.mean() {
Some(val) => UntaggedValue::Primitive(val.into()),
None => UntaggedValue::Primitive(0.into()),
};
let value = Value {
value: res,
tag: operation_tag.clone(),
};
let mut data = TaggedDictBuilder::new(operation_tag.clone());
data.insert_value("mean", value);
Ok(data.into_value())
}
Operation::Median => {
let res = match series.median() {
Some(val) => UntaggedValue::Primitive(val.into()),
None => UntaggedValue::Primitive(0.into()),
};
let value = Value {
value: res,
tag: operation_tag.clone(),
};
let mut data = TaggedDictBuilder::new(operation_tag.clone());
data.insert_value("median", value);
Ok(data.into_value())
}
Operation::Sum => {
let untagged = match series.dtype() {
DataType::Int8
| DataType::Int16
| DataType::Int32
| DataType::Int64
| DataType::UInt8
| DataType::UInt16
| DataType::UInt32
| DataType::UInt64 => {
let res: i64 = series.sum().unwrap_or(0);
Ok(UntaggedValue::Primitive(res.into()))
}
DataType::Float32 | DataType::Float64 => {
let res: f64 = series.sum().unwrap_or(0.0);
Ok(UntaggedValue::Primitive(res.into()))
}
_ => Err(ShellError::labeled_error(
"Not valid type",
format!(
"this operation can not be performed with series of type {}",
series.dtype()
),
&operation_tag.span,
)),
}?;
let value = Value {
value: untagged,
tag: operation_tag.clone(),
};
let mut data = TaggedDictBuilder::new(operation_tag.clone());
data.insert_value("sum", value);
Ok(data.into_value())
}
Operation::Max => {
let untagged = match series.dtype() {
DataType::Int8
| DataType::Int16
| DataType::Int32
| DataType::Int64
| DataType::UInt8
| DataType::UInt16
| DataType::UInt32
| DataType::UInt64 => {
let res: i64 = series.max().unwrap_or(0);
Ok(UntaggedValue::Primitive(res.into()))
}
DataType::Float32 | DataType::Float64 => {
let res: f64 = series.max().unwrap_or(0.0);
Ok(UntaggedValue::Primitive(res.into()))
}
_ => Err(ShellError::labeled_error(
"Not valid type",
format!(
"this operation can not be performed with series of type {}",
series.dtype()
),
&operation_tag.span,
)),
}?;
let value = Value {
value: untagged,
tag: operation_tag.clone(),
};
let mut data = TaggedDictBuilder::new(operation_tag.clone());
data.insert_value("max", value);
Ok(data.into_value())
}
Operation::Min => {
let untagged = match series.dtype() {
DataType::Int8
| DataType::Int16
| DataType::Int32
| DataType::Int64
| DataType::UInt8
| DataType::UInt16
| DataType::UInt32
| DataType::UInt64 => {
let res: i64 = series.min().unwrap_or(0);
Ok(UntaggedValue::Primitive(res.into()))
}
DataType::Float32 | DataType::Float64 => {
let res: f64 = series.min().unwrap_or(0.0);
Ok(UntaggedValue::Primitive(res.into()))
}
_ => Err(ShellError::labeled_error(
"Not valid type",
format!(
"this operation can not be performed with series of type {}",
series.dtype()
),
&operation_tag.span,
)),
}?;
let value = Value {
value: untagged,
tag: operation_tag.clone(),
};
let mut data = TaggedDictBuilder::new(operation_tag.clone());
data.insert_value("min", value);
Ok(data.into_value())
}
_ => Err(ShellError::labeled_error_with_secondary(
"Not valid operation",
"operation not valid for series",
&operation_tag.span,
"Perhaps you want: mean, median, sum, max, min",
&operation_tag.span,
)),
}
}

View file

@ -1,7 +1,10 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
use nu_protocol::{
dataframe::{NuDataFrame, NuSeries, PolarsData},
Signature, SyntaxShape, UntaggedValue, Value,
};
use super::utils::{convert_columns, parse_polars_error};
@ -29,35 +32,62 @@ impl WholeStreamCommand for DataFrame {
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "drop null values duplicates",
example: "[[a b]; [1 2] [3 4] [1 2]] | dataframe to-df | dataframe drop-nulls",
result: None,
}]
vec![
Example {
description: "drop null values in dataframe",
example: r#"let df = ([[a b]; [1 2] [3 0] [1 2]] | dataframe to-df);
let res = ($df.b / $df.b);
let df = ($df | dataframe with-column $res as res);
$df | dataframe drop-nulls
"#,
result: None,
},
Example {
description: "drop null values in dataframe",
example: r#"let s = ([1 2 0 0 3 4] | dataframe to-series);
($s / $s) | dataframe drop-nulls"#,
result: None,
},
]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
// Extracting the selection columns of the columns to perform the aggregation
let columns: Option<Vec<Value>> = args.opt(0)?;
let (subset, col_span) = match columns {
Some(cols) => {
let (agg_string, col_span) = convert_columns(&cols, &tag)?;
(Some(agg_string), col_span)
let value = args.input.next().ok_or_else(|| {
ShellError::labeled_error("Empty stream", "No value found in stream", &tag.span)
})?;
match value.value {
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => {
// Extracting the selection columns of the columns to perform the aggregation
let columns: Option<Vec<Value>> = args.opt(0)?;
let (subset, col_span) = match columns {
Some(cols) => {
let (agg_string, col_span) = convert_columns(&cols, &tag)?;
(Some(agg_string), col_span)
}
None => (None, Span::unknown()),
};
let subset_slice = subset.as_ref().map(|cols| &cols[..]);
let res = df
.as_ref()
.drop_nulls(subset_slice)
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
None => (None, Span::unknown()),
};
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let subset_slice = subset.as_ref().map(|cols| &cols[..]);
let res = df
.as_ref()
.drop_nulls(subset_slice)
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
UntaggedValue::DataFrame(PolarsData::Series(series)) => {
let res = series.as_ref().drop_nulls();
Ok(OutputStream::one(NuSeries::series_to_value(res, tag)))
}
_ => Err(ShellError::labeled_error(
"Incorrect type",
"drop nulls cannot be done with this value",
&value.tag.span,
)),
}
}

View file

@ -1,7 +1,10 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, Signature};
use nu_protocol::{
dataframe::{NuDataFrame, PolarsData},
Signature, UntaggedValue,
};
use super::utils::parse_polars_error;
@ -25,25 +28,55 @@ impl WholeStreamCommand for DataFrame {
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create new dataframe with dummy variables",
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe to-dummies",
result: None,
}]
vec![
Example {
description: "Create new dataframe with dummy variables from a dataframe",
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe to-dummies",
result: None,
},
Example {
description: "Create new dataframe with dummy variables from a series",
example: "[1 2 2 3 3] | dataframe to-series | dataframe to-dummies",
result: None,
},
]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = df.as_ref().to_dummies().map_err(|e| {
parse_polars_error(
&e,
&tag.span,
Some("The only allowed column types for dummies are String or Int"),
)
let value = args.input.next().ok_or_else(|| {
ShellError::labeled_error("Empty stream", "No value found in stream", &tag.span)
})?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
match value.value {
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => {
let res = df.as_ref().to_dummies().map_err(|e| {
parse_polars_error(
&e,
&tag.span,
Some("The only allowed column types for dummies are String or Int"),
)
})?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
UntaggedValue::DataFrame(PolarsData::Series(series)) => {
let res = series.as_ref().to_dummies().map_err(|e| {
parse_polars_error(
&e,
&tag.span,
Some("The only allowed column types for dummies are String or Int"),
)
})?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
_ => Err(ShellError::labeled_error(
"Incorrect type",
"dummies cannot be done with this value",
&value.tag.span,
)),
}
}

View file

@ -17,7 +17,6 @@ pub mod melt;
pub mod pivot;
pub mod sample;
pub mod select;
pub mod series_rename;
pub mod show;
pub mod slice;
pub mod sort;
@ -49,7 +48,6 @@ pub use melt::DataFrame as DataFrameMelt;
pub use pivot::DataFrame as DataFramePivot;
pub use sample::DataFrame as DataFrameSample;
pub use select::DataFrame as DataFrameSelect;
pub use series_rename::DataFrame as DataFrameSeriesRename;
pub use show::DataFrame as DataFrameShow;
pub use slice::DataFrame as DataFrameSlice;
pub use sort::DataFrame as DataFrameSort;
@ -60,3 +58,24 @@ pub use to_parquet::DataFrame as DataFrameToParquet;
pub use to_series::DataFrame as DataFrameToSeries;
pub use where_::DataFrame as DataFrameWhere;
pub use with_column::DataFrame as DataFrameWithColumn;
pub mod series;
pub use series::DataFrameAllFalse;
pub use series::DataFrameAllTrue;
pub use series::DataFrameArgMax;
pub use series::DataFrameArgMin;
pub use series::DataFrameArgSort;
pub use series::DataFrameArgTrue;
pub use series::DataFrameArgUnique;
pub use series::DataFrameIsDuplicated;
pub use series::DataFrameIsIn;
pub use series::DataFrameIsNotNull;
pub use series::DataFrameIsNull;
pub use series::DataFrameIsUnique;
pub use series::DataFrameNNull;
pub use series::DataFrameNUnique;
pub use series::DataFrameSeriesRename;
pub use series::DataFrameSet;
pub use series::DataFrameShift;
pub use series::DataFrameUnique;
pub use series::DataFrameValueCounts;

View file

@ -0,0 +1,67 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature, TaggedDictBuilder, UntaggedValue, Value};
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe all-false"
}
fn usage(&self) -> &str {
"Returns true if all values are false"
}
fn signature(&self) -> Signature {
Signature::build("dataframe all-false")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Returns true if all values are false",
example: "[$false $false $false] | dataframe to-series | dataframe all-false",
result: None,
},
Example {
description: "Checks the result from a comparison",
example: r#"let s = ([5 6 2 8] | dataframe to-series);
let res = ($s > 9);
$res | dataframe all-false"#,
result: None,
},
]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let bool = series.as_ref().bool().map_err(|e| {
parse_polars_error::<&str>(
&e,
&tag.span,
Some("all-false only works with series of type bool"),
)
})?;
let res = bool.all_false();
let value = Value {
value: UntaggedValue::Primitive(res.into()),
tag: tag.clone(),
};
let mut data = TaggedDictBuilder::new(tag);
data.insert_value("all_false", value);
Ok(OutputStream::one(data.into_value()))
}

View file

@ -0,0 +1,67 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature, TaggedDictBuilder, UntaggedValue, Value};
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe all-true"
}
fn usage(&self) -> &str {
"Returns true if all values are true"
}
fn signature(&self) -> Signature {
Signature::build("dataframe all-true")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Returns true if all values are true",
example: "[$true $true $true] | dataframe to-series | dataframe all-true",
result: None,
},
Example {
description: "Checks the result from a comparison",
example: r#"let s = ([5 6 2 8] | dataframe to-series);
let res = ($s > 9);
$res | dataframe all-true"#,
result: None,
},
]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let bool = series.as_ref().bool().map_err(|e| {
parse_polars_error::<&str>(
&e,
&tag.span,
Some("all-true only works with series of type bool"),
)
})?;
let res = bool.all_true();
let value = Value {
value: UntaggedValue::Primitive(res.into()),
tag: tag.clone(),
};
let mut data = TaggedDictBuilder::new(tag);
data.insert_value("all_true", value);
Ok(OutputStream::one(data.into_value()))
}

View file

@ -0,0 +1,57 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::NuSeries, Primitive, Signature, TaggedDictBuilder, UntaggedValue, Value,
};
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe arg-max"
}
fn usage(&self) -> &str {
"Return index for max value in series"
}
fn signature(&self) -> Signature {
Signature::build("dataframe arg-max")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns index for max value",
example: "[1 3 2] | dataframe to-series | dataframe arg-max",
result: None,
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let res = series.as_ref().arg_max();
let value = match res {
Some(index) => UntaggedValue::Primitive(Primitive::Int(index as i64)),
None => UntaggedValue::Primitive(Primitive::Nothing),
};
let value = Value {
value,
tag: tag.clone(),
};
let mut data = TaggedDictBuilder::new(tag);
data.insert_value("arg-max", value);
Ok(OutputStream::one(data.into_value()))
}

View file

@ -0,0 +1,57 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::NuSeries, Primitive, Signature, TaggedDictBuilder, UntaggedValue, Value,
};
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe arg-min"
}
fn usage(&self) -> &str {
"Return index for min value in series"
}
fn signature(&self) -> Signature {
Signature::build("dataframe arg-min")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns index for min value",
example: "[1 3 2] | dataframe to-series | dataframe arg-min",
result: None,
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let res = series.as_ref().arg_min();
let value = match res {
Some(index) => UntaggedValue::Primitive(Primitive::Int(index as i64)),
None => UntaggedValue::Primitive(Primitive::Nothing),
};
let value = Value {
value,
tag: tag.clone(),
};
let mut data = TaggedDictBuilder::new(tag);
data.insert_value("arg-min", value);
Ok(OutputStream::one(data.into_value()))
}

View file

@ -0,0 +1,47 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature};
use polars::prelude::IntoSeries;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe arg-sort"
}
fn usage(&self) -> &str {
"Returns indexes for a sorted series"
}
fn signature(&self) -> Signature {
Signature::build("dataframe arg-sort").switch("reverse", "reverse order", Some('r'))
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns indexes for a sorted series",
example: "[1 2 2 3 3] | dataframe to-series | dataframe arg-sort",
result: None,
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let reverse = args.has_flag("reverse");
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let res = series.as_ref().argsort(reverse);
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
}

View file

@ -0,0 +1,52 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature};
use polars::prelude::IntoSeries;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe arg-true"
}
fn usage(&self) -> &str {
"Returns indexes where values are true"
}
fn signature(&self) -> Signature {
Signature::build("dataframe arg-true")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns indexes where values are true",
example: "[$false $true $false] | dataframe to-series | dataframe arg-true",
result: None,
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let bool = series.as_ref().bool().map_err(|e| {
parse_polars_error::<&str>(
&e,
&tag.span,
Some("arg-true only works with series of type bool"),
)
})?;
let mut res = bool.arg_true().into_series();
res.rename("int");
Ok(OutputStream::one(NuSeries::series_to_value(res, tag)))
}

View file

@ -0,0 +1,49 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature};
use polars::prelude::IntoSeries;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe arg-unique"
}
fn usage(&self) -> &str {
"Returns indexes for unique values"
}
fn signature(&self) -> Signature {
Signature::build("dataframe arg-unique")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns indexes for unique values",
example: "[1 2 2 3 3] | dataframe to-series | dataframe arg-unique",
result: None,
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let res = series
.as_ref()
.arg_unique()
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
}

View file

@ -0,0 +1,49 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature};
use polars::prelude::IntoSeries;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe is-duplicated"
}
fn usage(&self) -> &str {
"Creates mask indicating duplicated values"
}
fn signature(&self) -> Signature {
Signature::build("dataframe is-duplicated")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create mask indicating duplicated values",
example: "[5 6 6 6 8 8 8] | dataframe to-series | dataframe is-duplicated",
result: None,
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let res = series
.as_ref()
.is_duplicated()
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
}

View file

@ -0,0 +1,63 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuSeries, PolarsData},
Signature, SyntaxShape, UntaggedValue, Value,
};
use polars::prelude::IntoSeries;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe is-in"
}
fn usage(&self) -> &str {
"Checks if elements from a series are contained in right series"
}
fn signature(&self) -> Signature {
Signature::build("dataframe is-in").required("other", SyntaxShape::Any, "right series")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Checks if elements from a series are contained in right series",
example: r#"let other = ([1 3 6] | dataframe to-series);
[5 6 6 6 8 8 8] | dataframe to-series | dataframe is-in $other"#,
result: None,
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let value: Value = args.req(0)?;
let other = match value.value {
UntaggedValue::DataFrame(PolarsData::Series(series)) => Ok(series),
_ => Err(ShellError::labeled_error(
"Incorrect type",
"can only search in a series",
value.tag.span,
)),
}?;
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let res = series
.as_ref()
.is_in(other.as_ref())
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
}

View file

@ -0,0 +1,48 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature};
use polars::prelude::IntoSeries;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe is-not-null"
}
fn usage(&self) -> &str {
"Creates mask where value is not null"
}
fn signature(&self) -> Signature {
Signature::build("dataframe is-not-null")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create mask where values are not null",
example: r#"let s = ([5 6 0 8] | dataframe to-series);
let res = ($s / $s);
$res | dataframe is-not-null"#,
result: None,
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let res = series.as_ref().is_not_null();
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
}

View file

@ -0,0 +1,48 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature};
use polars::prelude::IntoSeries;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe is-null"
}
fn usage(&self) -> &str {
"Creates mask where value is null"
}
fn signature(&self) -> Signature {
Signature::build("dataframe is-null")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create mask where values are null",
example: r#"let s = ([5 6 0 8] | dataframe to-series);
let res = ($s / $s);
$res | dataframe is-null"#,
result: None,
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let res = series.as_ref().is_null();
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
}

View file

@ -0,0 +1,49 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature};
use polars::prelude::IntoSeries;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe is-unique"
}
fn usage(&self) -> &str {
"Creates mask indicating unique values"
}
fn signature(&self) -> Signature {
Signature::build("dataframe is-unique")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create mask indicating unique values",
example: "[5 6 6 6 8 8 8] | dataframe to-series | dataframe is-unique",
result: None,
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let res = series
.as_ref()
.is_unique()
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
}

View file

@ -0,0 +1,39 @@
pub mod all_false;
pub mod all_true;
pub mod arg_max;
pub mod arg_min;
pub mod arg_sort;
pub mod arg_true;
pub mod arg_unique;
pub mod is_duplicated;
pub mod is_in;
pub mod is_not_null;
pub mod is_null;
pub mod is_unique;
pub mod n_null;
pub mod n_unique;
pub mod rename;
pub mod set;
pub mod shift;
pub mod unique;
pub mod value_counts;
pub use all_false::DataFrame as DataFrameAllFalse;
pub use all_true::DataFrame as DataFrameAllTrue;
pub use arg_max::DataFrame as DataFrameArgMax;
pub use arg_min::DataFrame as DataFrameArgMin;
pub use arg_sort::DataFrame as DataFrameArgSort;
pub use arg_true::DataFrame as DataFrameArgTrue;
pub use arg_unique::DataFrame as DataFrameArgUnique;
pub use is_duplicated::DataFrame as DataFrameIsDuplicated;
pub use is_in::DataFrame as DataFrameIsIn;
pub use is_not_null::DataFrame as DataFrameIsNotNull;
pub use is_null::DataFrame as DataFrameIsNull;
pub use is_unique::DataFrame as DataFrameIsUnique;
pub use n_null::DataFrame as DataFrameNNull;
pub use n_unique::DataFrame as DataFrameNUnique;
pub use rename::DataFrame as DataFrameSeriesRename;
pub use set::DataFrame as DataFrameSet;
pub use shift::DataFrame as DataFrameShift;
pub use unique::DataFrame as DataFrameUnique;
pub use value_counts::DataFrame as DataFrameValueCounts;

View file

@ -0,0 +1,53 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::NuSeries, Primitive, Signature, TaggedDictBuilder, UntaggedValue, Value,
};
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe count-null"
}
fn usage(&self) -> &str {
"Counts null values"
}
fn signature(&self) -> Signature {
Signature::build("dataframe count-null")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Counts null values",
example: r#"let s = ([1 1 0 0 3 3 4] | dataframe to-series);
($s / ss) | dataframe count-null"#,
result: None,
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let res = series.as_ref().null_count();
let value = Value {
value: UntaggedValue::Primitive(Primitive::Int(res as i64)),
tag: tag.clone(),
};
let mut data = TaggedDictBuilder::new(tag);
data.insert_value("count-null", value);
Ok(OutputStream::one(data.into_value()))
}

View file

@ -0,0 +1,55 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::NuSeries, Primitive, Signature, TaggedDictBuilder, UntaggedValue, Value,
};
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe count-unique"
}
fn usage(&self) -> &str {
"Counts unique value"
}
fn signature(&self) -> Signature {
Signature::build("dataframe count-unique")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Counts unique values",
example: "[1 1 2 2 3 3 4] | dataframe to-series | dataframe count-unique",
result: None,
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let res = series
.as_ref()
.n_unique()
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
let value = Value {
value: UntaggedValue::Primitive(Primitive::Int(res as i64)),
tag: tag.clone(),
};
let mut data = TaggedDictBuilder::new(tag);
data.insert_value("count-unique", value);
Ok(OutputStream::one(data.into_value()))
}

View file

@ -8,7 +8,7 @@ pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe rename-series"
"dataframe rename"
}
fn usage(&self) -> &str {
@ -16,7 +16,7 @@ impl WholeStreamCommand for DataFrame {
}
fn signature(&self) -> Signature {
Signature::build("dataframe rename-series").required(
Signature::build("dataframe rename").required(
"name",
SyntaxShape::String,
"new series name",

View file

@ -0,0 +1,139 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Primitive, Signature, SyntaxShape, UntaggedValue, Value};
use polars::prelude::{ChunkSet, DataType, IntoSeries};
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe set"
}
fn usage(&self) -> &str {
"Sets value where given mask is true"
}
fn signature(&self) -> Signature {
Signature::build("dataframe set")
.required("value", SyntaxShape::Any, "value to be inserted in series")
.required_named(
"mask",
SyntaxShape::Any,
"mask indicating insertions",
Some('m'),
)
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Shifts the values by a given period",
example: r#"let s = ([1 2 2 3 3] | dataframe to-series | dataframe shift 2);
let mask = ($s | dataframe is-null);
$s | dataframe set 0 --mask $mask"#,
result: None,
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let value: Value = args.req(0)?;
let mask: Value = args.req_named("mask")?;
let bool_mask = match &mask.value {
UntaggedValue::DataFrame(nu_protocol::dataframe::PolarsData::Series(series)) => {
match series.as_ref().dtype() {
DataType::Boolean => series
.as_ref()
.bool()
.map_err(|e| parse_polars_error::<&str>(&e, &mask.tag.span, None)),
_ => Err(ShellError::labeled_error(
"Incorrect type",
"can only use bool series as mask",
value.tag.span,
)),
}
}
_ => Err(ShellError::labeled_error(
"Incorrect type",
"can only use bool series as mask",
value.tag.span,
)),
}?;
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
match &value.value {
UntaggedValue::Primitive(Primitive::Int(val)) => {
let chunked = series.as_ref().i64().map_err(|e| {
parse_polars_error::<&str>(
&e,
&value.tag.span,
Some("The value has to match the set value type"),
)
})?;
let res = chunked
.set(bool_mask, Some(*val))
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?;
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
}
UntaggedValue::Primitive(Primitive::Decimal(val)) => {
let chunked = series.as_ref().f64().map_err(|e| {
parse_polars_error::<&str>(
&e,
&value.tag.span,
Some("The value has to match the series type"),
)
})?;
let res = chunked
.set(bool_mask, Some(val.to_f64().unwrap()))
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?;
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
}
UntaggedValue::Primitive(Primitive::String(val)) => {
let chunked = series.as_ref().utf8().map_err(|e| {
parse_polars_error::<&str>(
&e,
&value.tag.span,
Some("The value has to match the series type"),
)
})?;
let res = chunked
.set(bool_mask, Some(val.as_ref()))
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?;
let mut res = res.into_series();
res.rename("string");
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
}
_ => Err(ShellError::labeled_error(
"Incorrect type",
format!(
"this value cannot be set in a series of type '{}'",
series.as_ref().dtype()
),
value.tag.span,
)),
}
}

View file

@ -0,0 +1,48 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature, SyntaxShape};
use nu_source::Tagged;
use polars::prelude::IntoSeries;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe shift"
}
fn usage(&self) -> &str {
"Shifts the values by a given period"
}
fn signature(&self) -> Signature {
Signature::build("dataframe unique").required("period", SyntaxShape::Int, "shift period")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Shifts the values by a given period",
example: "[1 2 2 3 3] | dataframe to-series | dataframe shift 2",
result: None,
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let period: Tagged<i64> = args.req(0)?;
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let res = series.as_ref().shift(period.item);
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
}

View file

@ -0,0 +1,49 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature};
use polars::prelude::IntoSeries;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe unique"
}
fn usage(&self) -> &str {
"Returns unique values from a series"
}
fn signature(&self) -> Signature {
Signature::build("dataframe unique")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns unique values from a series",
example: "[1 2 2 3 3] | dataframe to-series | dataframe unique",
result: None,
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let res = series
.as_ref()
.unique()
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
}

View file

@ -0,0 +1,50 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, NuSeries},
Signature,
};
use crate::commands::dataframe::utils::parse_polars_error;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe value-counts"
}
fn usage(&self) -> &str {
"Returns a dataframe with the counts for unique values in series"
}
fn signature(&self) -> Signature {
Signature::build("dataframe value-counts")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Calculates value counts",
example: "[5 5 6 6] | dataframe to-series | dataframe value-counts",
result: None,
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let df = series
.as_ref()
.value_counts()
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(df, tag)))
}

View file

@ -1,7 +1,10 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
use nu_protocol::{
dataframe::{NuDataFrame, NuSeries, PolarsData},
Signature, SyntaxShape, UntaggedValue, Value,
};
use super::utils::{convert_columns, parse_polars_error};
pub struct DataFrame;
@ -12,12 +15,12 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Creates new sorted dataframe"
"Creates new sorted dataframe or series"
}
fn signature(&self) -> Signature {
Signature::build("dataframe sort")
.required(
.optional(
"columns",
SyntaxShape::Table,
"column names to sort dataframe",
@ -30,27 +33,60 @@ impl WholeStreamCommand for DataFrame {
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create new sorted dataframe",
example: "[[a b]; [3 4] [1 2]] | dataframe to-df | dataframe sort [a]",
result: None,
}]
vec![
Example {
description: "Create new sorted dataframe",
example: "[[a b]; [3 4] [1 2]] | dataframe to-df | dataframe sort [a]",
result: None,
},
Example {
description: "Create new sorted series",
example: "[3 4 1 2] | dataframe to-series | dataframe sort",
result: None,
},
]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let columns: Vec<Value> = args.req(0)?;
let value = args.input.next().ok_or_else(|| {
ShellError::labeled_error("Empty stream", "No value found in stream", &tag.span)
})?;
let reverse = args.has_flag("reverse");
let (col_string, col_span) = convert_columns(&columns, &tag)?;
match value.value {
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => {
let columns: Option<Vec<Value>> = args.opt(0)?;
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
match columns {
Some(columns) => {
let (col_string, col_span) = convert_columns(&columns, &tag)?;
let res = df
.as_ref()
.sort(&col_string, reverse)
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?;
let res = df
.as_ref()
.sort(&col_string, reverse)
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
None => Err(ShellError::labeled_error(
"Missing columns",
"missing column name to perform sort",
&tag.span,
)),
}
}
UntaggedValue::DataFrame(PolarsData::Series(series)) => {
let res = series.as_ref().sort(reverse);
Ok(OutputStream::one(NuSeries::series_to_value(res, tag)))
}
_ => Err(ShellError::labeled_error(
"Incorrect type",
"sort cannot be done with this value",
&value.tag.span,
)),
}
}

View file

@ -24,12 +24,16 @@ pub use conversions::*;
pub use core_commands::*;
#[cfg(feature = "dataframe")]
pub use dataframe::{
DataFrame, DataFrameAggregate, DataFrameColumn, DataFrameDTypes, DataFrameDrop,
DataFrameDropDuplicates, DataFrameDropNulls, DataFrameDummies, DataFrameFilter, DataFrameGet,
DataFrameGroupBy, DataFrameHead, DataFrameJoin, DataFrameList, DataFrameLoad, DataFrameMelt,
DataFramePivot, DataFrameSample, DataFrameSelect, DataFrameSeriesRename, DataFrameShow,
DataFrameSlice, DataFrameSort, DataFrameTail, DataFrameToCsv, DataFrameToDF,
DataFrameToParquet, DataFrameToSeries, DataFrameWhere, DataFrameWithColumn,
DataFrame, DataFrameAggregate, DataFrameAllFalse, DataFrameAllTrue, DataFrameArgMax,
DataFrameArgMin, DataFrameArgSort, DataFrameArgTrue, DataFrameArgUnique, DataFrameColumn,
DataFrameDTypes, DataFrameDrop, DataFrameDropDuplicates, DataFrameDropNulls, DataFrameDummies,
DataFrameFilter, DataFrameGet, DataFrameGroupBy, DataFrameHead, DataFrameIsDuplicated,
DataFrameIsIn, DataFrameIsNotNull, DataFrameIsNull, DataFrameIsUnique, DataFrameJoin,
DataFrameList, DataFrameLoad, DataFrameMelt, DataFrameNNull, DataFrameNUnique, DataFramePivot,
DataFrameSample, DataFrameSelect, DataFrameSeriesRename, DataFrameSet, DataFrameShift,
DataFrameShow, DataFrameSlice, DataFrameSort, DataFrameTail, DataFrameToCsv, DataFrameToDF,
DataFrameToParquet, DataFrameToSeries, DataFrameUnique, DataFrameValueCounts, DataFrameWhere,
DataFrameWithColumn,
};
pub use env::*;
pub use filesystem::*;

View file

@ -291,6 +291,24 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo
whole_stream_command(DataFrameWithColumn),
whole_stream_command(DataFrameFilter),
whole_stream_command(DataFrameSeriesRename),
whole_stream_command(DataFrameValueCounts),
whole_stream_command(DataFrameIsNull),
whole_stream_command(DataFrameIsNotNull),
whole_stream_command(DataFrameAllTrue),
whole_stream_command(DataFrameAllFalse),
whole_stream_command(DataFrameArgMax),
whole_stream_command(DataFrameArgMin),
whole_stream_command(DataFrameArgTrue),
whole_stream_command(DataFrameArgUnique),
whole_stream_command(DataFrameArgSort),
whole_stream_command(DataFrameUnique),
whole_stream_command(DataFrameNUnique),
whole_stream_command(DataFrameNNull),
whole_stream_command(DataFrameIsUnique),
whole_stream_command(DataFrameIsDuplicated),
whole_stream_command(DataFrameIsIn),
whole_stream_command(DataFrameShift),
whole_stream_command(DataFrameSet),
]);
#[cfg(feature = "clipboard-cli")]

View file

@ -39,8 +39,8 @@ nu-ansi-term = { version = "0.32.1", path = "../nu-ansi-term" }
[dependencies.polars]
git = "https://github.com/pola-rs/polars"
rev = "9e1506cca9fb646fc55f949ab6345290c3d198a7"
version = "0.14.1"
rev = "f60d86bc0921bd42635e8a33e7aad28ebe62dc3e"
version = "0.14.2"
optional = true
features = ["strings", "checked_arithmetic"]

View file

@ -32,8 +32,8 @@ toml = "0.5.8"
[dependencies.polars]
git = "https://github.com/pola-rs/polars"
rev = "9e1506cca9fb646fc55f949ab6345290c3d198a7"
version = "0.14.1"
rev = "f60d86bc0921bd42635e8a33e7aad28ebe62dc3e"
version = "0.14.2"
optional = true
features = ["serde", "rows"]