Polars upgrade 0.23 (#6303)

* more lazy expressions

* upgrade polars and correct functions

* arg-where example

* cargo clippy

* restore modified filter files

* correct string addition with str

* correct string addition with str

* correct message in test
This commit is contained in:
Fernando Herrera 2022-08-12 14:10:36 +02:00 committed by GitHub
parent ff6868b329
commit ae64c58f59
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 583 additions and 121 deletions

77
Cargo.lock generated
View file

@ -130,9 +130,9 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
[[package]]
name = "arrow-format"
version = "0.6.0"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "216249afef413d7e9e9b4b543e73b3e371ace3a812380af98f1c871521572cdd"
checksum = "8df5d25bc6d676271277120c41ef28760fe0a9f070677a58db621c0f983f9c20"
dependencies = [
"planus",
"serde",
@ -140,16 +140,19 @@ dependencies = [
[[package]]
name = "arrow2"
version = "0.12.0"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5feafd6df4e3f577529e6aa2b9b7cdb3c9fe8e8f66ebc8dc29abbe71a7e968f0"
checksum = "afc54f0b14083abaf6bc71cf1aeccd7831a24b1e29d07683ba9a4a0f6c5d9326"
dependencies = [
"ahash",
"arrow-format",
"base64",
"bytemuck",
"chrono",
"dyn-clone",
"either",
"fallible-streaming-iterator",
"foreign_vec",
"futures",
"hash_hasher",
"indexmap",
@ -1024,6 +1027,12 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "453440c271cf5577fd2a40e4942540cb7d0d2f85e27c8d07dd0023c925a67541"
[[package]]
name = "dyn-clone"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d07a982d1fb29db01e5a59b1918e03da4df7297eaeee7686ac45542fd4e59c8"
[[package]]
name = "ego-tree"
version = "0.6.2"
@ -1204,6 +1213,12 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
[[package]]
name = "foreign_vec"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673"
[[package]]
name = "form_urlencoded"
version = "1.0.1"
@ -3172,9 +3187,9 @@ dependencies = [
[[package]]
name = "parquet2"
version = "0.13.2"
version = "0.14.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73fd2690ad041f9296876daef1f2706f6347073bdbcc719090887f1691e4a09d"
checksum = "33e434af3293ba384075a56d4b400ce659868ca7823142194ef204f01ab35e50"
dependencies = [
"async-stream",
"bitpacking",
@ -3381,18 +3396,18 @@ checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae"
[[package]]
name = "planus"
version = "0.2.0"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bffebaf174d6cad46a5f0f1bb1c45c6eb509571688bcb18dfab217f3c9f9b151"
checksum = "fc1691dd09e82f428ce8d6310bd6d5da2557c82ff17694d2a32cad7242aea89f"
dependencies = [
"array-init-cursor",
]
[[package]]
name = "polars"
version = "0.22.8"
version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d175c67e80ceaef7219258cfc3a8686531d9510875b0cefa25404e5b80a7933"
checksum = "a75b1077fda63c0f67acc1cdc8586e7afce419be1e85bf7dfa8935e0e266d6b3"
dependencies = [
"polars-core",
"polars-io",
@ -3403,9 +3418,9 @@ dependencies = [
[[package]]
name = "polars-arrow"
version = "0.22.7"
version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f66c7d3da2c10a09131294dbe7802fac792f570be639dc6ebf207bfc3e144287"
checksum = "f7b28f858b252436550679609a23be34d62705faf783887f172f845eb58bcb8b"
dependencies = [
"arrow2",
"hashbrown",
@ -3416,13 +3431,14 @@ dependencies = [
[[package]]
name = "polars-core"
version = "0.22.7"
version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7f15f443a90d5367c4fbbb151e203f03b5b96055c8b928c6bc30655a3644f13"
checksum = "eeaec1ca3ac4829ca24b33743adeeb323a43b5a85515bfce20c2c81799c82790"
dependencies = [
"ahash",
"anyhow",
"arrow2",
"bitflags",
"chrono",
"comfy-table",
"hashbrown",
@ -3437,14 +3453,15 @@ dependencies = [
"regex",
"serde",
"serde_json",
"smartstring",
"thiserror",
]
[[package]]
name = "polars-io"
version = "0.22.7"
version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "058d0a847ce5009b974c69ec878ed416e306436f21b626543019f738cee12315"
checksum = "51405e46f93e306a3c9280c60ba1101c662e8a6dab33344680d31c3161045f1c"
dependencies = [
"ahash",
"anyhow",
@ -3470,11 +3487,12 @@ dependencies = [
[[package]]
name = "polars-lazy"
version = "0.22.7"
version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dad86a4ce7e32540ff12089bce6f77270fd133a5b263328a92be61defdd6b151"
checksum = "1340af778bc8124180d8ca1a566f076a5339566a207a42130796048b087fe977"
dependencies = [
"ahash",
"bitflags",
"glob",
"parking_lot",
"polars-arrow",
@ -3489,9 +3507,9 @@ dependencies = [
[[package]]
name = "polars-ops"
version = "0.22.7"
version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "030ecd473be113cd0264f1bc19de39a844fa12fa565db9dc52c859cbc292cf04"
checksum = "4a1812e5d5e589d5bd23f8d89dcd8bd4508082c50d055b8ff5fafb6f2a519c9a"
dependencies = [
"polars-arrow",
"polars-core",
@ -3499,9 +3517,9 @@ dependencies = [
[[package]]
name = "polars-time"
version = "0.22.7"
version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94047b20d2da3bcc55c421be187a0c6f316cf1eea7fe7ed7347c1160a32d017c"
checksum = "fc4ebe97d601a4b443337df71d0b7e673fce953654871c3311850ea394d48297"
dependencies = [
"chrono",
"lexical",
@ -3513,9 +3531,9 @@ dependencies = [
[[package]]
name = "polars-utils"
version = "0.22.7"
version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcd3d0238462d5d9f7fbeaaea46e73ed4d58f6fae8b70d53cbe51d7538cc43f5"
checksum = "4ea836afadcddee3f1a513dae7624f6d7d0d64abb129063ec7476b8347c8725b"
dependencies = [
"parking_lot",
"rayon",
@ -4479,6 +4497,17 @@ version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1"
[[package]]
name = "smartstring"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29"
dependencies = [
"autocfg",
"static_assertions",
"version_check",
]
[[package]]
name = "smawk"
version = "0.3.1"

View file

@ -99,15 +99,14 @@ version = "2.1.3"
optional = true
[dependencies.polars]
version = "0.22.8"
# path = "../../../../polars/polars"
version = "0.23.2"
optional = true
features = [
"default", "to_dummies", "parquet", "json", "serde", "serde-lazy",
"object", "checked_arithmetic", "strings", "cum_agg", "is_in",
"rolling_window", "strings", "rows", "random",
"dtype-datetime", "dtype-struct", "lazy", "cross_join",
"dynamic_groupby", "dtype-categorical", "concat_str"
"dynamic_groupby", "dtype-categorical", "concat_str", "arg_where"
]
[target.'cfg(windows)'.dependencies.windows]

View file

@ -0,0 +1,94 @@
use super::super::values::NuDataFrame;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Type, Value,
};
#[derive(Clone)]
pub struct ColumnsDF;
impl Command for ColumnsDF {
fn name(&self) -> &str {
"columns"
}
fn usage(&self) -> &str {
"Show dataframe columns"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_type(Type::Custom("dataframe".into()))
.output_type(Type::Any)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Dataframe columns",
example: "[[a b]; [1 2] [3 4]] | into df | columns",
result: Some(Value::List {
vals: vec![
Value::String {
val: "a".into(),
span: Span::test_data(),
},
Value::String {
val: "b".into(),
span: Span::test_data(),
},
],
span: Span::test_data(),
}),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
#[allow(clippy::needless_collect)]
fn command(
_engine_state: &EngineState,
_stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let names: Vec<Value> = df
.as_ref()
.get_column_names()
.iter()
.map(|v| Value::String {
val: v.to_string(),
span: call.head,
})
.collect();
let names = Value::List {
vals: names,
span: call.head,
};
Ok(PipelineData::Value(names, None))
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(ColumnsDF {})])
}
}

View file

@ -1,4 +1,5 @@
mod append;
mod columns;
mod describe;
mod drop;
mod drop_duplicates;
@ -26,6 +27,7 @@ mod with_column;
use nu_protocol::engine::StateWorkingSet;
pub use append::AppendDF;
pub use columns::ColumnsDF;
pub use describe::DescribeDF;
pub use drop::DropDF;
pub use drop_duplicates::DropDuplicates;
@ -63,6 +65,7 @@ pub fn add_eager_decls(working_set: &mut StateWorkingSet) {
// Dataframe commands
bind_command!(
AppendDF,
ColumnsDF,
DataTypes,
DescribeDF,
DropDF,

View file

@ -0,0 +1,76 @@
use crate::dataframe::values::{Column, NuDataFrame, NuExpression};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
};
use polars::prelude::arg_where;
#[derive(Clone)]
pub struct ExprArgWhere;
impl Command for ExprArgWhere {
fn name(&self) -> &str {
"arg-where"
}
fn usage(&self) -> &str {
"Creates an expression that returns the arguments where expression is true"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("column name", SyntaxShape::Any, "Expression to evaluate")
.input_type(Type::Any)
.output_type(Type::Custom("expression".into()))
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Return a dataframe where the value match the expression",
example: "let df = ([[a b]; [one 1] [two 2] [three 3]] | into df);
$df | select (arg-where ((col b) >= 2) | as b_arg)",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"b_arg".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
_input: PipelineData,
) -> Result<PipelineData, ShellError> {
let value: Value = call.req(engine_state, stack, 0)?;
let expr = NuExpression::try_from_value(value)?;
let expr: NuExpression = arg_where(expr.into_polars()).into();
Ok(PipelineData::Value(expr.into_value(call.head), None))
}
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
use crate::dataframe::expressions::ExprAlias;
use crate::dataframe::lazy::LazySelect;
#[test]
fn test_examples() {
test_dataframe(vec![
Box::new(ExprArgWhere {}),
Box::new(ExprAlias {}),
Box::new(LazySelect {}),
])
}
}

View file

@ -0,0 +1,110 @@
use crate::dataframe::values::{Column, NuDataFrame, NuExpression};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
};
use polars::prelude::{lit, DataType};
#[derive(Clone)]
pub struct ExprIsIn;
impl Command for ExprIsIn {
fn name(&self) -> &str {
"is-in"
}
fn usage(&self) -> &str {
"Creates an is-in expression"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"list",
SyntaxShape::List(Box::new(SyntaxShape::Any)),
"List to check if values are in",
)
.input_type(Type::Custom("expression".into()))
.output_type(Type::Custom("expression".into()))
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Creates a is-in expression",
example: r#"let df = ([[a b]; [one 1] [two 2] [three 3]] | into df);
$df | with-column (col a | is-in [one two] | as a_in)"#,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![
Value::test_string("one"),
Value::test_string("two"),
Value::test_string("three"),
],
),
Column::new(
"b".to_string(),
vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)],
),
Column::new(
"a_in".to_string(),
vec![
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(false),
],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let list: Vec<Value> = call.req(engine_state, stack, 0)?;
let expr = NuExpression::try_from_pipeline(input, call.head)?;
let values = NuDataFrame::try_from_columns(vec![Column::new("list".to_string(), list)])?;
let list = values.as_series(call.head)?;
if matches!(list.dtype(), DataType::Object(..)) {
return Err(ShellError::IncompatibleParametersSingle(
"Cannot use a mixed list as argument".into(),
call.head,
));
}
let expr: NuExpression = expr.into_polars().is_in(lit(list)).into();
Ok(PipelineData::Value(expr.into_value(call.head), None))
}
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
use crate::dataframe::eager::WithColumn;
use crate::dataframe::expressions::alias::ExprAlias;
use crate::dataframe::expressions::col::ExprCol;
#[test]
fn test_examples() {
test_dataframe(vec![
Box::new(ExprIsIn {}),
Box::new(ExprAlias {}),
Box::new(ExprCol {}),
Box::new(WithColumn {}),
])
}
}

View file

@ -1,8 +1,10 @@
mod alias;
mod arg_where;
mod as_nu;
mod col;
mod concat_str;
mod expressions_macro;
mod is_in;
mod lit;
mod otherwise;
mod quantile;
@ -11,10 +13,12 @@ mod when;
use nu_protocol::engine::StateWorkingSet;
pub(crate) use crate::dataframe::expressions::alias::ExprAlias;
use crate::dataframe::expressions::arg_where::ExprArgWhere;
use crate::dataframe::expressions::as_nu::ExprAsNu;
pub(super) use crate::dataframe::expressions::col::ExprCol;
pub(super) use crate::dataframe::expressions::concat_str::ExprConcatStr;
pub(crate) use crate::dataframe::expressions::expressions_macro::*;
pub(super) use crate::dataframe::expressions::is_in::ExprIsIn;
pub(super) use crate::dataframe::expressions::lit::ExprLit;
pub(super) use crate::dataframe::expressions::otherwise::ExprOtherwise;
pub(super) use crate::dataframe::expressions::quantile::ExprQuantile;
@ -33,6 +37,7 @@ pub fn add_expressions(working_set: &mut StateWorkingSet) {
// Dataframe commands
bind_command!(
ExprAlias,
ExprArgWhere,
ExprCol,
ExprConcatStr,
ExprCount,
@ -49,6 +54,7 @@ pub fn add_expressions(working_set: &mut StateWorkingSet) {
ExprFirst,
ExprLast,
ExprNUnique,
ExprIsIn,
ExprIsNotNull,
ExprIsNull,
ExprNot,

View file

@ -0,0 +1,84 @@
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
};
#[derive(Clone)]
pub struct LazyFilter;
impl Command for LazyFilter {
fn name(&self) -> &str {
"filter"
}
fn usage(&self) -> &str {
"Filter dataframe based in expression"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"filter expression",
SyntaxShape::Any,
"Expression that define the column selection",
)
.input_type(Type::Custom("dataframe".into()))
.output_type(Type::Custom("dataframe".into()))
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Filter dataframe using an expression",
example: "[[a b]; [6 2] [4 2] [2 2]] | into df | filter ((col a) >= 4)",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(4)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(2)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let value: Value = call.req(engine_state, stack, 0)?;
let expression = NuExpression::try_from_value(value)?;
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.into_polars().filter(expression.into_polars()),
);
Ok(PipelineData::Value(lazy.into_value(call.head)?, None))
}
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(LazyFilter {})])
}
}

View file

@ -3,6 +3,7 @@ mod collect;
mod fetch;
mod fill_na;
mod fill_null;
mod filter;
pub mod groupby;
mod join;
mod macro_commands;
@ -13,17 +14,17 @@ mod to_lazy;
use nu_protocol::engine::StateWorkingSet;
pub(crate) use crate::dataframe::lazy::macro_commands::*;
use crate::dataframe::lazy::aggregate::LazyAggregate;
pub use crate::dataframe::lazy::collect::LazyCollect;
use crate::dataframe::lazy::fetch::LazyFetch;
use crate::dataframe::lazy::fill_na::LazyFillNA;
use crate::dataframe::lazy::fill_null::LazyFillNull;
use crate::dataframe::lazy::filter::LazyFilter;
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
use crate::dataframe::lazy::join::LazyJoin;
pub(crate) use crate::dataframe::lazy::macro_commands::*;
use crate::dataframe::lazy::quantile::LazyQuantile;
use crate::dataframe::lazy::select::LazySelect;
pub(crate) use crate::dataframe::lazy::select::LazySelect;
use crate::dataframe::lazy::sort_by_expr::LazySortBy;
pub use crate::dataframe::lazy::to_lazy::ToLazyFrame;
@ -45,6 +46,7 @@ pub fn add_lazy_decls(working_set: &mut StateWorkingSet) {
LazyFetch,
LazyFillNA,
LazyFillNull,
LazyFilter,
LazyJoin,
LazyQuantile,
LazyMax,

View file

@ -6,8 +6,6 @@ use nu_protocol::{
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
};
use polars::prelude::Expr;
#[derive(Clone)]
pub struct LazySelect;
@ -61,17 +59,6 @@ impl Command for LazySelect {
};
let expressions = NuExpression::extract_exprs(value)?;
if expressions
.iter()
.any(|expr| !matches!(expr, Expr::Column(..)))
{
let value: Value = call.req(engine_state, stack, 0)?;
return Err(ShellError::IncompatibleParametersSingle(
"Expected only Col expressions".into(),
value.span()?,
));
}
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().select(&expressions));

View file

@ -32,6 +32,11 @@ impl Command for LazySortBy {
"Reverse sorting. Default is false",
Some('r'),
)
.switch(
"nulls-last",
"nulls are shown last in the dataframe",
Some('n'),
)
.input_type(Type::Custom("dataframe".into()))
.output_type(Type::Custom("dataframe".into()))
.category(Category::Custom("lazyframe".into()))
@ -102,6 +107,7 @@ impl Command for LazySortBy {
span: call.head,
};
let expressions = NuExpression::extract_exprs(value)?;
let nulls_last = call.has_flag("nulls-last");
let reverse: Option<Vec<bool>> = call.get_flag(engine_state, stack, "reverse")?;
let reverse = match reverse {
@ -128,7 +134,8 @@ impl Command for LazySortBy {
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.into_polars().sort_by_exprs(&expressions, reverse),
lazy.into_polars()
.sort_by_exprs(&expressions, reverse, nulls_last),
);
Ok(PipelineData::Value(

View file

@ -35,7 +35,7 @@ impl Command for GetWeekDay {
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"0".to_string(),
vec![Value::test_int(2), Value::test_int(2)],
vec![Value::test_int(1), Value::test_int(1)],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),

View file

@ -5,7 +5,7 @@ use nu_protocol::{
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::IntoSeries;
use polars::prelude::{arg_where, col, IntoLazy};
#[derive(Clone)]
pub struct ArgTrue;
@ -59,23 +59,41 @@ fn command(
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let series = df.as_series(call.head)?;
let bool = series.bool().map_err(|_| {
ShellError::GenericError(
"Error converting to bool".into(),
"all-false only works with series of type bool".into(),
let columns = df.as_ref().get_column_names();
if columns.len() > 1 {
return Err(ShellError::GenericError(
"Error using as series".into(),
"dataframe has more than one column".into(),
Some(call.head),
None,
Vec::new(),
)
})?;
));
}
let mut res = bool.arg_true().into_series();
res.rename("arg_true");
match columns.first() {
Some(column) => {
let expression = arg_where(col(column).eq(true)).alias("arg_true");
let res = df
.as_ref()
.clone()
.lazy()
.select(&[expression])
.collect()
.map_err(|err| {
ShellError::GenericError(
"Error creating index column".into(),
err.to_string(),
Some(call.head),
None,
Vec::new(),
)
})?;
NuDataFrame::try_from_series(vec![res], call.head)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
let value = NuDataFrame::dataframe_into_value(res, call.head);
Ok(PipelineData::Value(value, None))
}
_ => todo!(),
}
}
#[cfg(test)]

View file

@ -146,7 +146,7 @@ fn command(
NuDataFrame::try_from_series(vec![res.into_series()], call.head)
}
Value::Float { val, span } => {
let chunked = series.as_ref().f64().map_err(|e| {
let chunked = series.f64().map_err(|e| {
ShellError::GenericError(
"Error casting to f64".into(),
e.to_string(),
@ -169,7 +169,7 @@ fn command(
NuDataFrame::try_from_series(vec![res.into_series()], call.head)
}
Value::String { val, span } => {
let chunked = series.as_ref().utf8().map_err(|e| {
let chunked = series.utf8().map_err(|e| {
ShellError::GenericError(
"Error casting to string".into(),
e.to_string(),

View file

@ -27,26 +27,46 @@ impl Command for IsDuplicated {
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create mask indicating duplicated values",
example: "[5 6 6 6 8 8 8] | into df | is-duplicated",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"is_duplicated".to_string(),
vec![
Value::test_bool(false),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
vec![
Example {
description: "Create mask indicating duplicated values",
example: "[5 6 6 6 8 8 8] | into df | is-duplicated",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"is_duplicated".to_string(),
vec![
Value::test_bool(false),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Create mask indicating duplicated rows in a dataframe",
example: "[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | into df | is-duplicated",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"is_duplicated".to_string(),
vec![
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(false),
],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
@ -69,7 +89,7 @@ fn command(
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let mut res = df
.as_series(call.head)?
.as_ref()
.is_duplicated()
.map_err(|e| {
ShellError::GenericError(
@ -84,7 +104,7 @@ fn command(
res.rename("is_duplicated");
NuDataFrame::try_from_series(vec![res.into_series()], call.head)
NuDataFrame::try_from_series(vec![res], call.head)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
}

View file

@ -27,26 +27,46 @@ impl Command for IsUnique {
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create mask indicating unique values",
example: "[5 6 6 6 8 8 8] | into df | is-unique",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"is_unique".to_string(),
vec![
Value::test_bool(true),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
vec![
Example {
description: "Create mask indicating unique values",
example: "[5 6 6 6 8 8 8] | into df | is-unique",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"is_unique".to_string(),
vec![
Value::test_bool(true),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Create mask indicating duplicated rows in a dataframe",
example: "[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | into df | is-unique",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"is_unique".to_string(),
vec![
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(true),
],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
@ -68,18 +88,23 @@ fn command(
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let mut res = df.as_series(call.head)?.is_unique().map_err(|e| {
ShellError::GenericError(
"Error finding unique values".into(),
e.to_string(),
Some(call.head),
None,
Vec::new(),
)
})?;
let mut res = df
.as_ref()
.is_unique()
.map_err(|e| {
ShellError::GenericError(
"Error finding unique values".into(),
e.to_string(),
Some(call.head),
None,
Vec::new(),
)
})?
.into_series();
res.rename("is_unique");
NuDataFrame::try_from_series(vec![res.into_series()], call.head)
NuDataFrame::try_from_series(vec![res], call.head)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
}

View file

@ -129,7 +129,7 @@ fn command(
NuDataFrame::try_from_series(vec![res.into_series()], call.head)
}
Value::Float { val, span } => {
let chunked = series.as_ref().f64().map_err(|e| {
let chunked = series.f64().map_err(|e| {
ShellError::GenericError(
"Error casting to f64".into(),
e.to_string(),
@ -152,7 +152,7 @@ fn command(
NuDataFrame::try_from_series(vec![res.into_series()], call.head)
}
Value::String { val, span } => {
let chunked = series.as_ref().utf8().map_err(|e| {
let chunked = series.utf8().map_err(|e| {
ShellError::GenericError(
"Error casting to string".into(),
e.to_string(),

View file

@ -6,6 +6,8 @@ use nu_protocol::{
Category, Example, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::SeriesMethods;
#[derive(Clone)]
pub struct ValueCount;
@ -66,7 +68,7 @@ fn command(
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let series = df.as_series(call.head)?;
let res = series.value_counts(false).map_err(|e| {
let res = series.value_counts(false, false).map_err(|e| {
ShellError::GenericError(
"Error calculating value counts values".into(),
e.to_string(),

View file

@ -190,7 +190,7 @@ fn find_with_regex(
(true, true, true) => "(?ims)",
};
let regex = flags.to_string() + &regex;
let regex = flags.to_string() + regex.as_str();
let re = Regex::new(regex.as_str())
.map_err(|e| ShellError::UnsupportedInput(format!("incorrect regex: {}", e), span))?;

View file

@ -199,7 +199,7 @@ impl Iterator for RawStreamLinesAdapter {
if !self.incomplete_line.is_empty() {
if let Some(first) = lines.first() {
let new_incomplete_line =
self.incomplete_line.to_string() + first;
self.incomplete_line.to_string() + first.as_str();
lines.splice(0..1, vec![new_incomplete_line]);
self.incomplete_line = String::new();
}

View file

@ -133,7 +133,7 @@ fn convert_yaml_value_to_nu_value(v: &serde_yaml::Value, span: Span) -> Result<V
.and_then(|e| match e {
(serde_yaml::Value::String(s), serde_yaml::Value::Null) => {
Some(Value::String {
val: "{{ ".to_owned() + s + " }}",
val: "{{ ".to_owned() + s.as_str() + " }}",
span,
})
}

View file

@ -59,7 +59,7 @@ fn try_source_foo_with_double_quotes_in(testdir: &str, playdir: &str) {
sandbox.mkdir(&testdir);
sandbox.with_files(vec![FileWithContent(&foo_file, "echo foo")]);
let cmd = String::from("source ") + r#"""# + &foo_file + r#"""#;
let cmd = String::from("source ") + r#"""# + foo_file.as_str() + r#"""#;
let actual = nu!(cwd: dirs.test(), &cmd);
@ -76,7 +76,7 @@ fn try_source_foo_with_single_quotes_in(testdir: &str, playdir: &str) {
sandbox.mkdir(&testdir);
sandbox.with_files(vec![FileWithContent(&foo_file, "echo foo")]);
let cmd = String::from("source ") + r#"'"# + &foo_file + r#"'"#;
let cmd = String::from("source ") + r#"'"# + foo_file.as_str() + r#"'"#;
let actual = nu!(cwd: dirs.test(), &cmd);
@ -93,7 +93,7 @@ fn try_source_foo_without_quotes_in(testdir: &str, playdir: &str) {
sandbox.mkdir(&testdir);
sandbox.with_files(vec![FileWithContent(&foo_file, "echo foo")]);
let cmd = String::from("source ") + &foo_file;
let cmd = String::from("source ") + foo_file.as_str();
let actual = nu!(cwd: dirs.test(), &cmd);