From 38f3957edf851b1f43b4cad1570f7e81dae44ae2 Mon Sep 17 00:00:00 2001 From: Fernando Herrera Date: Wed, 15 Jun 2022 11:45:03 -0500 Subject: [PATCH] update polars (#5791) --- Cargo.lock | 63 ++++++++++--------- crates/nu-command/Cargo.toml | 4 +- crates/nu-command/src/dataframe/eager/open.rs | 4 +- .../nu-command/src/dataframe/eager/sample.rs | 25 +++++--- .../nu-command/src/dataframe/eager/to_csv.rs | 2 +- .../src/dataframe/series/date/get_weekday.rs | 2 +- .../src/dataframe/series/rolling.rs | 13 ++-- .../dataframe/series/string/concatenate.rs | 2 +- .../src/dataframe/series/string/contains.rs | 2 +- .../src/dataframe/series/string/replace.rs | 2 +- .../dataframe/series/string/replace_all.rs | 2 +- .../dataframe/series/string/str_lengths.rs | 2 +- .../src/dataframe/series/string/str_slice.rs | 2 +- .../dataframe/series/string/to_lowercase.rs | 2 +- .../dataframe/series/string/to_uppercase.rs | 2 +- .../values/nu_dataframe/between_values.rs | 2 +- 16 files changed, 73 insertions(+), 58 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8d77da491f..b06bfa6641 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -131,9 +131,9 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" [[package]] name = "arrow-format" -version = "0.4.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2333f8ccf0d597ba779863c57a0b61f635721187fb2fdeabae92691d7d582fe5" +checksum = "216249afef413d7e9e9b4b543e73b3e371ace3a812380af98f1c871521572cdd" dependencies = [ "planus", "serde", @@ -141,26 +141,24 @@ dependencies = [ [[package]] name = "arrow2" -version = "0.11.2" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b040061368d1314b0fd8b8f1fde0671eba1afc63a1c61a4dafaf2d4fc10c96f9" +checksum = "5feafd6df4e3f577529e6aa2b9b7cdb3c9fe8e8f66ebc8dc29abbe71a7e968f0" dependencies = [ "arrow-format", "base64", "bytemuck", "chrono", - "csv-core", "either", "fallible-streaming-iterator", "futures", "hash_hasher", "indexmap", + "json-deserializer", "lexical-core", "multiversion", "num-traits", "parquet2", - "serde", - "serde_json", "simdutf8", "streaming-iterator", "strength_reduce", @@ -1875,6 +1873,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "json-deserializer" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47631885425c482fcf2dc4b182fc973c3c5b81a8f43a028055559bd24cccfa6e" + [[package]] name = "kernel32-sys" version = "0.2.2" @@ -3145,9 +3149,9 @@ dependencies = [ [[package]] name = "parquet2" -version = "0.12.1" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbacca5619bdee7f942938890451dea1a61f082c682aac913d7b4e326e66d7b4" +checksum = "73fd2690ad041f9296876daef1f2706f6347073bdbcc719090887f1691e4a09d" dependencies = [ "async-stream", "bitpacking", @@ -3319,9 +3323,9 @@ dependencies = [ [[package]] name = "polars" -version = "0.21.1" +version = "0.22.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b140da767e129c60c41c8e1968ffab5f114bcf823182edb7fa900464a31bf421" +checksum = "3d175c67e80ceaef7219258cfc3a8686531d9510875b0cefa25404e5b80a7933" dependencies = [ "polars-core", "polars-io", @@ -3332,9 +3336,9 @@ dependencies = [ [[package]] name = "polars-arrow" -version = "0.21.1" +version = "0.22.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d27df11ee28956bd6f5aed54e7e05ce87b886871995e1da501134627ec89077" +checksum = "f66c7d3da2c10a09131294dbe7802fac792f570be639dc6ebf207bfc3e144287" dependencies = [ "arrow2", "hashbrown 0.12.1", @@ -3345,9 +3349,9 @@ dependencies = [ [[package]] name = "polars-core" -version = "0.21.1" +version = "0.22.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdf8d12cb7ec278516228fc86469f98c62ab81ca31e4e76d2c0ccf5a09c70491" +checksum = "f7f15f443a90d5367c4fbbb151e203f03b5b96055c8b928c6bc30655a3644f13" dependencies = [ "ahash", "anyhow", @@ -3356,8 +3360,8 @@ dependencies = [ "comfy-table", "hashbrown 0.12.1", "indexmap", - "lazy_static", "num 0.4.0", + "once_cell", "polars-arrow", "polars-utils", "rand 0.8.5", @@ -3371,20 +3375,21 @@ dependencies = [ [[package]] name = "polars-io" -version = "0.21.1" +version = "0.22.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdd4b762e5694f359ded21ca0627b5bc95b6eb49f6b330569afc1d20f0564b01" +checksum = "058d0a847ce5009b974c69ec878ed416e306436f21b626543019f738cee12315" dependencies = [ "ahash", "anyhow", "arrow2", "csv-core", "dirs", - "lazy_static", "lexical", + "lexical-core", "memchr", "memmap2", "num 0.4.0", + "once_cell", "polars-arrow", "polars-core", "polars-time", @@ -3398,9 +3403,9 @@ dependencies = [ [[package]] name = "polars-lazy" -version = "0.21.1" +version = "0.22.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eedc21001f05611e41bb7439b38d0f4ef9406aa49c17f3b289b5f57d8fa40c59" +checksum = "dad86a4ce7e32540ff12089bce6f77270fd133a5b263328a92be61defdd6b151" dependencies = [ "ahash", "glob", @@ -3408,6 +3413,7 @@ dependencies = [ "polars-arrow", "polars-core", "polars-io", + "polars-ops", "polars-time", "polars-utils", "rayon", @@ -3416,31 +3422,33 @@ dependencies = [ [[package]] name = "polars-ops" -version = "0.21.1" +version = "0.22.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86fae68f0992955f224f09d1f15648a6fb76d8e3b962efac2f97ccc2aa58977a" +checksum = "030ecd473be113cd0264f1bc19de39a844fa12fa565db9dc52c859cbc292cf04" dependencies = [ + "polars-arrow", "polars-core", ] [[package]] name = "polars-time" -version = "0.21.1" +version = "0.22.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be499f73749e820f96689c5f9ec59669b7cdd551d864358e2bdaebb5944e4bfb" +checksum = "94047b20d2da3bcc55c421be187a0c6f316cf1eea7fe7ed7347c1160a32d017c" dependencies = [ "chrono", "lexical", "polars-arrow", "polars-core", + "polars-utils", "serde", ] [[package]] name = "polars-utils" -version = "0.21.1" +version = "0.22.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7f4cd569d383f5f000abbd6d5146550e6cb4e43fac30d1af98699499a440d56" +checksum = "fcd3d0238462d5d9f7fbeaaea46e73ed4d58f6fae8b70d53cbe51d7538cc43f5" dependencies = [ "parking_lot 0.12.1", "rayon", @@ -4224,7 +4232,6 @@ version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b7ce2b32a1aed03c558dc61a5cd328f15aff2dbc17daad8fb8af04d2100e15c" dependencies = [ - "indexmap", "itoa 1.0.2", "ryu", "serde", diff --git a/crates/nu-command/Cargo.toml b/crates/nu-command/Cargo.toml index 3f560d21d4..5c94a3b19c 100644 --- a/crates/nu-command/Cargo.toml +++ b/crates/nu-command/Cargo.toml @@ -97,7 +97,7 @@ version = "2.1.3" optional = true [dependencies.polars] -version = "0.21.1" +version = "0.22.8" # path = "../../../../polars/polars" optional = true features = [ @@ -105,7 +105,7 @@ features = [ "object", "checked_arithmetic", "strings", "cum_agg", "is_in", "rolling_window", "strings", "rows", "random", "dtype-datetime", "dtype-struct", "lazy", "cross_join", - "dynamic_groupby" + "dynamic_groupby", "dtype-categorical" ] [target.'cfg(windows)'.dependencies.windows] diff --git a/crates/nu-command/src/dataframe/eager/open.rs b/crates/nu-command/src/dataframe/eager/open.rs index 9239f79815..876a548cfa 100644 --- a/crates/nu-command/src/dataframe/eager/open.rs +++ b/crates/nu-command/src/dataframe/eager/open.rs @@ -155,7 +155,7 @@ fn from_json( call: &Call, ) -> Result { let file: Spanned = call.req(engine_state, stack, 0)?; - let mut file = File::open(&file.item).map_err(|e| { + let file = File::open(&file.item).map_err(|e| { ShellError::GenericError( "Error opening file".into(), e.to_string(), @@ -165,7 +165,7 @@ fn from_json( ) })?; - let buf_reader = BufReader::new(&mut file); + let buf_reader = BufReader::new(file); let reader = JsonReader::new(buf_reader); reader.finish().map_err(|e| { diff --git a/crates/nu-command/src/dataframe/eager/sample.rs b/crates/nu-command/src/dataframe/eager/sample.rs index 73ec8f8bf0..4f791808f5 100644 --- a/crates/nu-command/src/dataframe/eager/sample.rs +++ b/crates/nu-command/src/dataframe/eager/sample.rs @@ -40,6 +40,7 @@ impl Command for SampleDF { Some('s'), ) .switch("replace", "sample with replace", Some('e')) + .switch("shuffle", "shuffle sample", Some('u')) .category(Category::Custom("dataframe".into())) } @@ -89,22 +90,26 @@ fn command( .get_flag::(engine_state, stack, "seed")? .map(|val| val as u64); let replace: bool = call.has_flag("replace"); + let shuffle: bool = call.has_flag("shuffle"); let df = NuDataFrame::try_from_pipeline(input, call.head)?; match (rows, fraction) { - (Some(rows), None) => df.as_ref().sample_n(rows.item, replace, seed).map_err(|e| { - ShellError::GenericError( - "Error creating sample".into(), - e.to_string(), - Some(rows.span), - None, - Vec::new(), - ) - }), + (Some(rows), None) => df + .as_ref() + .sample_n(rows.item, replace, shuffle, seed) + .map_err(|e| { + ShellError::GenericError( + "Error creating sample".into(), + e.to_string(), + Some(rows.span), + None, + Vec::new(), + ) + }), (None, Some(frac)) => df .as_ref() - .sample_frac(frac.item, replace, seed) + .sample_frac(frac.item, replace, shuffle, seed) .map_err(|e| { ShellError::GenericError( "Error creating sample".into(), diff --git a/crates/nu-command/src/dataframe/eager/to_csv.rs b/crates/nu-command/src/dataframe/eager/to_csv.rs index bb4ffbf14f..30d27241d2 100644 --- a/crates/nu-command/src/dataframe/eager/to_csv.rs +++ b/crates/nu-command/src/dataframe/eager/to_csv.rs @@ -99,7 +99,7 @@ fn command( writer.has_header(true) }; - let writer = match delimiter { + let mut writer = match delimiter { None => writer, Some(d) => { if d.item.len() != 1 { diff --git a/crates/nu-command/src/dataframe/series/date/get_weekday.rs b/crates/nu-command/src/dataframe/series/date/get_weekday.rs index 611320cc3e..0f323b7173 100644 --- a/crates/nu-command/src/dataframe/series/date/get_weekday.rs +++ b/crates/nu-command/src/dataframe/series/date/get_weekday.rs @@ -32,7 +32,7 @@ impl Command for GetWeekDay { result: Some( NuDataFrame::try_from_columns(vec![Column::new( "0".to_string(), - vec![Value::test_int(1), Value::test_int(1)], + vec![Value::test_int(2), Value::test_int(2)], )]) .expect("simple df for test should not fail") .into_value(Span::test_data()), diff --git a/crates/nu-command/src/dataframe/series/rolling.rs b/crates/nu-command/src/dataframe/series/rolling.rs index d250a308e6..a7ebb85981 100644 --- a/crates/nu-command/src/dataframe/series/rolling.rs +++ b/crates/nu-command/src/dataframe/series/rolling.rs @@ -7,7 +7,7 @@ use nu_protocol::{ Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Type, Value, }; -use polars::prelude::{DataType, IntoSeries, RollingOptions}; +use polars::prelude::{DataType, Duration, IntoSeries, RollingOptionsImpl, SeriesOpsTime}; enum RollType { Min, @@ -127,7 +127,7 @@ fn command( input: PipelineData, ) -> Result { let roll_type: Spanned = call.req(engine_state, stack, 0)?; - let window_size: usize = call.req(engine_state, stack, 1)?; + let window_size: i64 = call.req(engine_state, stack, 1)?; let df = NuDataFrame::try_from_pipeline(input, call.head)?; let series = df.as_series(call.head)?; @@ -144,11 +144,14 @@ fn command( let roll_type = RollType::from_str(&roll_type.item, roll_type.span)?; - let rolling_opts = RollingOptions { - window_size, - min_periods: window_size, + let rolling_opts = RollingOptionsImpl { + window_size: Duration::new(window_size), + min_periods: window_size as usize, weights: None, center: false, + by: None, + closed_window: None, + tu: None, }; let res = match roll_type { RollType::Max => series.rolling_max(rolling_opts), diff --git a/crates/nu-command/src/dataframe/series/string/concatenate.rs b/crates/nu-command/src/dataframe/series/string/concatenate.rs index 6aa752ce0f..c402652698 100644 --- a/crates/nu-command/src/dataframe/series/string/concatenate.rs +++ b/crates/nu-command/src/dataframe/series/string/concatenate.rs @@ -6,7 +6,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value, }; -use polars::prelude::IntoSeries; +use polars::prelude::{IntoSeries, Utf8NameSpaceImpl}; #[derive(Clone)] pub struct Concatenate; diff --git a/crates/nu-command/src/dataframe/series/string/contains.rs b/crates/nu-command/src/dataframe/series/string/contains.rs index 68ffb338b3..ceef0e6845 100644 --- a/crates/nu-command/src/dataframe/series/string/contains.rs +++ b/crates/nu-command/src/dataframe/series/string/contains.rs @@ -6,7 +6,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value, }; -use polars::prelude::IntoSeries; +use polars::prelude::{IntoSeries, Utf8NameSpaceImpl}; #[derive(Clone)] pub struct Contains; diff --git a/crates/nu-command/src/dataframe/series/string/replace.rs b/crates/nu-command/src/dataframe/series/string/replace.rs index 65ca899c67..58759814da 100644 --- a/crates/nu-command/src/dataframe/series/string/replace.rs +++ b/crates/nu-command/src/dataframe/series/string/replace.rs @@ -6,7 +6,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value, }; -use polars::prelude::IntoSeries; +use polars::prelude::{IntoSeries, Utf8NameSpaceImpl}; #[derive(Clone)] pub struct Replace; diff --git a/crates/nu-command/src/dataframe/series/string/replace_all.rs b/crates/nu-command/src/dataframe/series/string/replace_all.rs index e0cfbbfb4f..08e0797c6e 100644 --- a/crates/nu-command/src/dataframe/series/string/replace_all.rs +++ b/crates/nu-command/src/dataframe/series/string/replace_all.rs @@ -6,7 +6,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value, }; -use polars::prelude::IntoSeries; +use polars::prelude::{IntoSeries, Utf8NameSpaceImpl}; #[derive(Clone)] pub struct ReplaceAll; diff --git a/crates/nu-command/src/dataframe/series/string/str_lengths.rs b/crates/nu-command/src/dataframe/series/string/str_lengths.rs index b0be9679a4..7fb7865881 100644 --- a/crates/nu-command/src/dataframe/series/string/str_lengths.rs +++ b/crates/nu-command/src/dataframe/series/string/str_lengths.rs @@ -5,7 +5,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, Type, Value, }; -use polars::prelude::IntoSeries; +use polars::prelude::{IntoSeries, Utf8NameSpaceImpl}; #[derive(Clone)] pub struct StrLengths; diff --git a/crates/nu-command/src/dataframe/series/string/str_slice.rs b/crates/nu-command/src/dataframe/series/string/str_slice.rs index dfb31e7ee2..ad3507ff8b 100644 --- a/crates/nu-command/src/dataframe/series/string/str_slice.rs +++ b/crates/nu-command/src/dataframe/series/string/str_slice.rs @@ -6,7 +6,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value, }; -use polars::prelude::IntoSeries; +use polars::prelude::{IntoSeries, Utf8NameSpaceImpl}; #[derive(Clone)] pub struct StrSlice; diff --git a/crates/nu-command/src/dataframe/series/string/to_lowercase.rs b/crates/nu-command/src/dataframe/series/string/to_lowercase.rs index 43c254cd25..ec3291c1d9 100644 --- a/crates/nu-command/src/dataframe/series/string/to_lowercase.rs +++ b/crates/nu-command/src/dataframe/series/string/to_lowercase.rs @@ -5,7 +5,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, Type, Value, }; -use polars::prelude::IntoSeries; +use polars::prelude::{IntoSeries, Utf8NameSpaceImpl}; #[derive(Clone)] pub struct ToLowerCase; diff --git a/crates/nu-command/src/dataframe/series/string/to_uppercase.rs b/crates/nu-command/src/dataframe/series/string/to_uppercase.rs index 1b5c86a70b..3aca7b9c3c 100644 --- a/crates/nu-command/src/dataframe/series/string/to_uppercase.rs +++ b/crates/nu-command/src/dataframe/series/string/to_uppercase.rs @@ -5,7 +5,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, Type, Value, }; -use polars::prelude::IntoSeries; +use polars::prelude::{IntoSeries, Utf8NameSpaceImpl}; #[derive(Clone)] pub struct ToUpperCase; diff --git a/crates/nu-command/src/dataframe/values/nu_dataframe/between_values.rs b/crates/nu-command/src/dataframe/values/nu_dataframe/between_values.rs index f4d8e8ad36..fb687fb42d 100644 --- a/crates/nu-command/src/dataframe/values/nu_dataframe/between_values.rs +++ b/crates/nu-command/src/dataframe/values/nu_dataframe/between_values.rs @@ -4,7 +4,7 @@ use nu_protocol::{ast::Operator, span, ShellError, Span, Spanned, Value}; use num::Zero; use polars::prelude::{ BooleanType, ChunkCompare, ChunkedArray, DataType, Float64Type, Int64Type, IntoSeries, - NumOpsDispatchChecked, PolarsError, Series, TimeUnit, + NumOpsDispatchChecked, PolarsError, Series, TimeUnit, Utf8NameSpaceImpl, }; use std::ops::{Add, BitAnd, BitOr, Div, Mul, Sub};