mirror of
https://github.com/nushell/nushell
synced 2024-12-26 13:03:07 +00:00
Polars update (#4875)
* update to polars 0.20 * add to date parser for series
This commit is contained in:
parent
3db608eb5c
commit
d6669d3f33
17 changed files with 255 additions and 161 deletions
219
Cargo.lock
generated
219
Cargo.lock
generated
|
@ -102,12 +102,6 @@ version = "0.2.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76"
|
||||
|
||||
[[package]]
|
||||
name = "arrayref"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544"
|
||||
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
version = "0.4.12"
|
||||
|
@ -135,15 +129,16 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "arrow2"
|
||||
version = "0.9.2"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "185b18ba35287d4e6989e451ab3d184f4699befaa05cf95b0da74152b0c5c24a"
|
||||
checksum = "2e387b20dd573a96f36b173d9027483898f944d696521afd74e2caa3c813d86e"
|
||||
dependencies = [
|
||||
"arrow-format",
|
||||
"base64",
|
||||
"bytemuck",
|
||||
"chrono",
|
||||
"csv",
|
||||
"csv-core",
|
||||
"either",
|
||||
"fallible-streaming-iterator",
|
||||
"futures",
|
||||
"hash_hasher",
|
||||
|
@ -291,17 +286,6 @@ dependencies = [
|
|||
"crunchy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "blake2b_simd"
|
||||
version = "0.5.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "afa748e348ad3be8263be728124b24a24f268266f6f5d58af9d75f6a40b5c587"
|
||||
dependencies = [
|
||||
"arrayref",
|
||||
"arrayvec 0.5.2",
|
||||
"constant_time_eq",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "block-buffer"
|
||||
version = "0.9.0"
|
||||
|
@ -527,6 +511,18 @@ dependencies = [
|
|||
"encoding_rs",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "comfy-table"
|
||||
version = "5.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b103d85ca6e209388771bfb7aa6b68a7aeec4afbf6f0a0264bfbf50360e5212e"
|
||||
dependencies = [
|
||||
"crossterm",
|
||||
"strum 0.23.0",
|
||||
"strum_macros 0.23.1",
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "console"
|
||||
version = "0.15.0"
|
||||
|
@ -548,12 +544,6 @@ version = "0.2.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fb58b6451e8c2a812ad979ed1d83378caa5e927eef2622017a45f251457c2c9d"
|
||||
|
||||
[[package]]
|
||||
name = "constant_time_eq"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
|
||||
|
||||
[[package]]
|
||||
name = "convert_case"
|
||||
version = "0.4.0"
|
||||
|
@ -821,17 +811,6 @@ dependencies = [
|
|||
"crypto-common",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dirs"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3fd78930633bd1c6e35c4b42b1df7b0cbc6bc191146e512bb3bedf243fcc3901"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"redox_users 0.3.5",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dirs"
|
||||
version = "4.0.0"
|
||||
|
@ -858,7 +837,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "03d86534ed367a67548dc68113a0f5db55432fdfbb6e6f9d77704397d95d5780"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"redox_users 0.4.0",
|
||||
"redox_users",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
|
@ -869,7 +848,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"redox_users 0.4.0",
|
||||
"redox_users",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
|
@ -1420,6 +1399,15 @@ dependencies = [
|
|||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
|
||||
dependencies = [
|
||||
"unicode-segmentation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.4.0"
|
||||
|
@ -2763,7 +2751,7 @@ dependencies = [
|
|||
"cfg-if",
|
||||
"instant",
|
||||
"libc",
|
||||
"redox_syscall 0.2.11",
|
||||
"redox_syscall",
|
||||
"smallvec",
|
||||
"winapi",
|
||||
]
|
||||
|
@ -2776,7 +2764,7 @@ checksum = "28141e0cc4143da2443301914478dc976a61ffdb3f043058310c70df2fed8954"
|
|||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"redox_syscall 0.2.11",
|
||||
"redox_syscall",
|
||||
"smallvec",
|
||||
"windows-sys 0.32.0",
|
||||
]
|
||||
|
@ -2796,9 +2784,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "parquet2"
|
||||
version = "0.9.2"
|
||||
version = "0.10.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "45476d276db539ec4076f6abe62392619460fb70a1a8edebcc06e11cd93c0ec3"
|
||||
checksum = "6b085f9e78e4842865151b693f6d94bdf7b280af66daa6e3587adeb3106a07e9"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"bitpacking",
|
||||
|
@ -2970,20 +2958,21 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "polars"
|
||||
version = "0.19.1"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2620069231dd93a27f7fc982c10379394a540775057fc569a669a40c2ad7207d"
|
||||
checksum = "656db3b86c338a8a717476eb29436a380ebdf74915a71cff6ecce78d52173e53"
|
||||
dependencies = [
|
||||
"polars-core",
|
||||
"polars-io",
|
||||
"polars-lazy",
|
||||
"polars-time",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars-arrow"
|
||||
version = "0.19.1"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "863d723959237470af38fee06ca74c58e52ee3778e4796fbf66e05deb5f925f6"
|
||||
checksum = "fcedf44a7b15b60c69e811c9d343ac459788e961dc4136f002ed1b68a1fada07"
|
||||
dependencies = [
|
||||
"arrow2",
|
||||
"hashbrown 0.12.0",
|
||||
|
@ -2993,20 +2982,21 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "polars-core"
|
||||
version = "0.19.1"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "006c8d5c4b057ec2287766c14293d70c76bfc65e949dd028a76a501098e3253b"
|
||||
checksum = "6dfed0e21ac4d4c85df45b5864a68cfc5b2a97e9fba8a981be7b09c6f02a7eaa"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
"arrow2",
|
||||
"chrono",
|
||||
"comfy-table",
|
||||
"hashbrown 0.12.0",
|
||||
"indexmap",
|
||||
"lazy_static",
|
||||
"num 0.4.0",
|
||||
"num_cpus",
|
||||
"polars-arrow",
|
||||
"polars-time",
|
||||
"prettytable-rs",
|
||||
"rand 0.8.5",
|
||||
"rand_distr",
|
||||
"rayon",
|
||||
|
@ -3014,20 +3004,19 @@ dependencies = [
|
|||
"serde",
|
||||
"serde_json",
|
||||
"thiserror",
|
||||
"unsafe_unwrap",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars-io"
|
||||
version = "0.19.1"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c99b04f1c31a6d0d121242e0b4f725c7c608665772e66621e42ed374a28ea6bc"
|
||||
checksum = "d8770fb4233ab88affac80c410be090dc7a2c044a9e4e7b942132e94ceeb732b"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
"arrow2",
|
||||
"csv-core",
|
||||
"dirs 4.0.0",
|
||||
"dirs",
|
||||
"lazy_static",
|
||||
"lexical",
|
||||
"memchr",
|
||||
|
@ -3039,41 +3028,46 @@ dependencies = [
|
|||
"polars-utils",
|
||||
"rayon",
|
||||
"regex",
|
||||
"serde_json",
|
||||
"simdutf8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars-lazy"
|
||||
version = "0.19.1"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3545b1e34365293d761e06d98d535da322ce04c02d638eefade1db61e0aa22b4"
|
||||
checksum = "4eca1fed3b88ae1bb9b7f1d7b2958f1655d9c1aed33495d6ba30ff84a0c1e9e9"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"glob",
|
||||
"parking_lot 0.12.0",
|
||||
"polars-arrow",
|
||||
"polars-core",
|
||||
"polars-io",
|
||||
"polars-time",
|
||||
"polars-utils",
|
||||
"rayon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars-time"
|
||||
version = "0.1.1"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0c28b9d7a4badf14d54819ed1c49112e44348a3c7087854414a55b47bb46eea"
|
||||
checksum = "0fe48c759ca778a8b6fb30f70e9a81b56f0987a82dc71e61c5b2d3c236b6b8d6"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"polars-arrow",
|
||||
"polars-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars-utils"
|
||||
version = "0.1.0"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "03f6f755d66fedfc506fb1df64aa8adb904cd0ef19fd4fb7f339e7ec3619aa65"
|
||||
checksum = "71011e8ed52f123ce23d110b496c8704d0a59c5fd4115cd938e7ff19d4bcb7ca"
|
||||
dependencies = [
|
||||
"parking_lot 0.11.2",
|
||||
"parking_lot 0.12.0",
|
||||
"rayon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3137,20 +3131,6 @@ dependencies = [
|
|||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prettytable-rs"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fd04b170004fa2daccf418a7f8253aaf033c27760b5f225889024cf66d7ac2e"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"csv",
|
||||
"encode_unicode",
|
||||
"lazy_static",
|
||||
"term",
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro-error"
|
||||
version = "1.0.4"
|
||||
|
@ -3423,12 +3403,6 @@ dependencies = [
|
|||
"rand_core 0.3.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.1.57"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.2.11"
|
||||
|
@ -3438,17 +3412,6 @@ dependencies = [
|
|||
"bitflags",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_users"
|
||||
version = "0.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "de0737333e7a9502c789a36d7c7fa6092a49895d4faa31ca5df163857ded2e9d"
|
||||
dependencies = [
|
||||
"getrandom 0.1.16",
|
||||
"redox_syscall 0.1.57",
|
||||
"rust-argon2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_users"
|
||||
version = "0.4.0"
|
||||
|
@ -3456,7 +3419,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64"
|
||||
dependencies = [
|
||||
"getrandom 0.2.5",
|
||||
"redox_syscall 0.2.11",
|
||||
"redox_syscall",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3471,8 +3434,8 @@ dependencies = [
|
|||
"nu-ansi-term",
|
||||
"serde",
|
||||
"strip-ansi-escapes",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"strum 0.24.0",
|
||||
"strum_macros 0.24.0",
|
||||
"unicode-segmentation",
|
||||
"unicode-width",
|
||||
]
|
||||
|
@ -3573,18 +3536,6 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rust-argon2"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4b18820d944b33caa75a71378964ac46f58517c92b6ae5f762636247c09e78fb"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"blake2b_simd",
|
||||
"constant_time_eq",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rust-embed"
|
||||
version = "6.3.0"
|
||||
|
@ -4086,19 +4037,38 @@ dependencies = [
|
|||
"vte",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
version = "0.23.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cae14b91c7d11c9a851d3fbc80a963198998c2a64eec840477fa92d8ce9b70bb"
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
version = "0.24.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e96acfc1b70604b8b2f1ffa4c57e59176c7dbb05d556c71ecd2f5498a1dee7f8"
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
version = "0.23.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5bb0dc7ee9c15cea6199cde9a127fa16a4c5819af85395457ad72d68edc85a38"
|
||||
dependencies = [
|
||||
"heck 0.3.3",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
version = "0.24.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6878079b17446e4d3eba6192bb0a2950d5b14f0ed8424b852310e5a94345d0ef"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"heck 0.4.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
|
@ -4212,7 +4182,7 @@ dependencies = [
|
|||
"cfg-if",
|
||||
"fastrand",
|
||||
"libc",
|
||||
"redox_syscall 0.2.11",
|
||||
"redox_syscall",
|
||||
"remove_dir_all",
|
||||
"winapi",
|
||||
]
|
||||
|
@ -4228,17 +4198,6 @@ dependencies = [
|
|||
"utf-8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "term"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "edd106a334b7657c10b7c540a0106114feadeb4dc314513e97df481d5d966f42"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"dirs 1.0.5",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "termcolor"
|
||||
version = "1.1.3"
|
||||
|
@ -4533,12 +4492,6 @@ version = "0.2.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
|
||||
|
||||
[[package]]
|
||||
name = "unsafe_unwrap"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1230ec65f13e0f9b28d789da20d2d419511893ea9dac2c1f4ef67b8b14e5da80"
|
||||
|
||||
[[package]]
|
||||
name = "url"
|
||||
version = "2.2.2"
|
||||
|
@ -4984,18 +4937,18 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "zstd"
|
||||
version = "0.9.2+zstd.1.5.1"
|
||||
version = "0.10.0+zstd.1.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2390ea1bf6c038c39674f22d95f0564725fc06034a47129179810b2fc58caa54"
|
||||
checksum = "3b1365becbe415f3f0fcd024e2f7b45bacfb5bdd055f0dc113571394114e7bdd"
|
||||
dependencies = [
|
||||
"zstd-safe",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd-safe"
|
||||
version = "4.1.3+zstd.1.5.1"
|
||||
version = "4.1.4+zstd.1.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e99d81b99fb3c2c2c794e3fe56c305c63d5173a16a46b5850b07c935ffc7db79"
|
||||
checksum = "2f7cd17c9af1a4d6c24beb1cc54b17e2ef7b593dc92f19e9d9acad8b182bbaee"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"zstd-sys",
|
||||
|
@ -5003,9 +4956,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "zstd-sys"
|
||||
version = "1.6.2+zstd.1.5.1"
|
||||
version = "1.6.3+zstd.1.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2daf2f248d9ea44454bfcb2516534e8b8ad2fc91bf818a1885495fc42bc8ac9f"
|
||||
checksum = "fc49afa5c8d634e75761feda8c592051e7eeb4683ba827211eb0d731d3402ea8"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
|
|
|
@ -84,7 +84,7 @@ umask = "1.0.0"
|
|||
users = "0.11.0"
|
||||
|
||||
[dependencies.polars]
|
||||
version = "0.19.1"
|
||||
version = "0.20.0"
|
||||
optional = true
|
||||
features = [
|
||||
"default", "parquet", "json", "serde", "object",
|
||||
|
|
|
@ -162,7 +162,7 @@ fn command(
|
|||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let names = ChunkedArray::<Utf8Type>::new_from_opt_slice("descriptor", &labels).into_series();
|
||||
let names = ChunkedArray::<Utf8Type>::from_slice_options("descriptor", &labels).into_series();
|
||||
|
||||
let head = std::iter::once(names);
|
||||
|
||||
|
@ -235,7 +235,7 @@ fn command(
|
|||
descriptors.push(max);
|
||||
|
||||
let name = format!("{} ({})", col.name(), col.dtype());
|
||||
ChunkedArray::<Float64Type>::new_from_opt_slice(&name, &descriptors).into_series()
|
||||
ChunkedArray::<Float64Type>::from_slice_options(&name, &descriptors).into_series()
|
||||
});
|
||||
|
||||
let res = head.chain(tail).collect::<Vec<Series>>();
|
||||
|
|
|
@ -4,6 +4,7 @@ use nu_protocol::{
|
|||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
use polars::prelude::DistinctKeepStrategy;
|
||||
|
||||
use super::super::values::utils::convert_columns_string;
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
@ -28,6 +29,11 @@ impl Command for DropDuplicates {
|
|||
"subset of columns to drop duplicates",
|
||||
)
|
||||
.switch("maintain", "maintain order", Some('m'))
|
||||
.switch(
|
||||
"last",
|
||||
"keeps last duplicate value (by default keeps first)",
|
||||
Some('l'),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
|
@ -82,8 +88,14 @@ fn command(
|
|||
|
||||
let subset_slice = subset.as_ref().map(|cols| &cols[..]);
|
||||
|
||||
let keep_strategy = if call.has_flag("last") {
|
||||
DistinctKeepStrategy::Last
|
||||
} else {
|
||||
DistinctKeepStrategy::First
|
||||
};
|
||||
|
||||
df.as_ref()
|
||||
.drop_duplicates(call.has_flag("maintain"), subset_slice)
|
||||
.distinct(subset_slice, keep_strategy)
|
||||
.map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error dropping duplicates".into(),
|
||||
|
|
|
@ -71,7 +71,7 @@ fn command(
|
|||
let delimiter: Option<Spanned<String>> = call.get_flag(engine_state, stack, "delimiter")?;
|
||||
let no_header: bool = call.has_flag("no-header");
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
let mut df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let mut file = File::create(&file_name.item).map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
|
@ -109,7 +109,7 @@ fn command(
|
|||
}
|
||||
};
|
||||
|
||||
writer.finish(df.as_ref()).map_err(|e| {
|
||||
writer.finish(df.as_mut()).map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error writing to file".into(),
|
||||
e.to_string(),
|
||||
|
|
|
@ -55,7 +55,7 @@ fn command(
|
|||
) -> Result<PipelineData, ShellError> {
|
||||
let file_name: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
let mut df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let file = File::create(&file_name.item).map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
|
@ -65,7 +65,7 @@ fn command(
|
|||
)
|
||||
})?;
|
||||
|
||||
ParquetWriter::new(file).finish(df.as_ref()).map_err(|e| {
|
||||
ParquetWriter::new(file).finish(df.as_mut()).map_err(|e| {
|
||||
ShellError::SpannedLabeledError("Error saving file".into(), e.to_string(), file_name.span)
|
||||
})?;
|
||||
|
||||
|
|
|
@ -60,8 +60,8 @@ fn command(
|
|||
|
||||
let res = series.arg_max();
|
||||
let chunked = match res {
|
||||
Some(index) => UInt32Chunked::new_from_slice("arg_max", &[index as u32]),
|
||||
None => UInt32Chunked::new_from_slice("arg_max", &[]),
|
||||
Some(index) => UInt32Chunked::from_slice("arg_max", &[index as u32]),
|
||||
None => UInt32Chunked::from_slice("arg_max", &[]),
|
||||
};
|
||||
|
||||
let res = chunked.into_series();
|
||||
|
|
|
@ -60,8 +60,8 @@ fn command(
|
|||
|
||||
let res = series.arg_min();
|
||||
let chunked = match res {
|
||||
Some(index) => UInt32Chunked::new_from_slice("arg_min", &[index as u32]),
|
||||
None => UInt32Chunked::new_from_slice("arg_min", &[]),
|
||||
Some(index) => UInt32Chunked::from_slice("arg_min", &[index as u32]),
|
||||
None => UInt32Chunked::from_slice("arg_min", &[]),
|
||||
};
|
||||
|
||||
let res = chunked.into_series();
|
||||
|
|
87
crates/nu-command/src/dataframe/series/date/as_date.rs
Normal file
87
crates/nu-command/src/dataframe/series/date/as_date.rs
Normal file
|
@ -0,0 +1,87 @@
|
|||
use super::super::super::values::NuDataFrame;
|
||||
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, SyntaxShape,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AsDate;
|
||||
|
||||
impl Command for AsDate {
|
||||
fn name(&self) -> &str {
|
||||
"dfr as-date"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
r#"Converts string to date. Format example:
|
||||
"%Y-%m-%d" => 2021-12-31
|
||||
"%d-%m-%Y" => 31-12-2021
|
||||
"%Y%m%d" => 2021319 (2021-03-19)"#
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("format", SyntaxShape::String, "formating date string")
|
||||
.switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n'))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Converts string to date",
|
||||
example: r#"["2021-12-30" "2021-12-31"] | dfr to-df | dfr as-datetime "%Y-%m-%d""#,
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let format: String = call.req(engine_state, stack, 0)?;
|
||||
let not_exact = call.has_flag("not-exact");
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
let casted = series.utf8().map_err(|e| {
|
||||
ShellError::SpannedLabeledError("Error casting to string".into(), e.to_string(), call.head)
|
||||
})?;
|
||||
|
||||
let res = if not_exact {
|
||||
casted.as_date_not_exact(Some(format.as_str()))
|
||||
} else {
|
||||
casted.as_date(Some(format.as_str()))
|
||||
};
|
||||
|
||||
let mut res = res
|
||||
.map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error creating datetime".into(),
|
||||
e.to_string(),
|
||||
call.head,
|
||||
)
|
||||
})?
|
||||
.into_series();
|
||||
|
||||
res.rename("date");
|
||||
|
||||
NuDataFrame::try_from_series(vec![res], call.head)
|
||||
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
|
||||
}
|
|
@ -34,7 +34,7 @@ impl Command for AsDateTime {
|
|||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("format", SyntaxShape::String, "formating date string")
|
||||
.required("format", SyntaxShape::String, "formating date time string")
|
||||
.switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n'))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
@ -45,7 +45,7 @@ impl Command for AsDateTime {
|
|||
example: r#"["2021-12-30 00:00:00" "2021-12-31 00:00:00"] | dfr to-df | dfr as-datetime "%Y-%m-%d %H:%M:%S""#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"0".to_string(),
|
||||
"datetime".to_string(),
|
||||
vec![
|
||||
Value::Date {
|
||||
val: DateTime::parse_from_str(
|
||||
|
@ -103,7 +103,7 @@ fn command(
|
|||
casted.as_datetime(Some(format.as_str()), TimeUnit::Milliseconds)
|
||||
};
|
||||
|
||||
let res = res
|
||||
let mut res = res
|
||||
.map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error creating datetime".into(),
|
||||
|
@ -113,6 +113,7 @@ fn command(
|
|||
})?
|
||||
.into_series();
|
||||
|
||||
res.rename("datetime");
|
||||
NuDataFrame::try_from_series(vec![res], call.head)
|
||||
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
mod as_date;
|
||||
mod as_datetime;
|
||||
mod get_day;
|
||||
mod get_hour;
|
||||
|
@ -10,6 +11,7 @@ mod get_week;
|
|||
mod get_weekday;
|
||||
mod get_year;
|
||||
|
||||
pub use as_date::AsDate;
|
||||
pub use as_datetime::AsDateTime;
|
||||
pub use get_day::GetDay;
|
||||
pub use get_hour::GetHour;
|
||||
|
|
|
@ -5,7 +5,7 @@ use nu_protocol::{
|
|||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Value,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
use polars::prelude::{IntoSeries, SortOptions};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ArgSort;
|
||||
|
@ -22,6 +22,7 @@ impl Command for ArgSort {
|
|||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.switch("reverse", "reverse order", Some('r'))
|
||||
.switch("nulls-last", "nulls ordered last", Some('n'))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
|
@ -85,10 +86,12 @@ fn command(
|
|||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let mut res = df
|
||||
.as_series(call.head)?
|
||||
.argsort(call.has_flag("reverse"))
|
||||
.into_series();
|
||||
let sort_options = SortOptions {
|
||||
descending: call.has_flag("reverse"),
|
||||
nulls_last: call.has_flag("nulls-last"),
|
||||
};
|
||||
|
||||
let mut res = df.as_series(call.head)?.argsort(sort_options).into_series();
|
||||
res.rename("arg_sort");
|
||||
|
||||
NuDataFrame::try_from_series(vec![res], call.head)
|
||||
|
|
|
@ -57,6 +57,7 @@ pub fn add_series_decls(working_set: &mut StateWorkingSet) {
|
|||
ArgSort,
|
||||
ArgTrue,
|
||||
ArgUnique,
|
||||
AsDate,
|
||||
AsDateTime,
|
||||
Concatenate,
|
||||
Contains,
|
||||
|
|
|
@ -530,6 +530,25 @@ where
|
|||
)),
|
||||
}
|
||||
}
|
||||
DataType::Date => {
|
||||
let to_i64 = series.cast(&DataType::Int64);
|
||||
|
||||
match to_i64 {
|
||||
Ok(series) => {
|
||||
let nanosecs_per_day: i64 = 24 * 60 * 60 * 1_000_000_000;
|
||||
let casted = series
|
||||
.i64()
|
||||
.map(|chunked| chunked.mul(nanosecs_per_day))
|
||||
.expect("already checked for casting");
|
||||
compare_casted_i64(Ok(&casted), val, f, span)
|
||||
}
|
||||
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||
"Unable to cast to f64".into(),
|
||||
e.to_string(),
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
DataType::Int64 => {
|
||||
let casted = series.i64();
|
||||
compare_casted_i64(casted, val, f, span)
|
||||
|
|
|
@ -430,7 +430,7 @@ pub fn create_column(
|
|||
Ok(Column::new(casted.name().into(), values))
|
||||
}
|
||||
DataType::Time => {
|
||||
let casted = series.time().map_err(|e| {
|
||||
let casted = series.timestamp(TimeUnit::Nanoseconds).map_err(|e| {
|
||||
ShellError::LabeledError("Error casting column to time".into(), e.to_string())
|
||||
})?;
|
||||
|
||||
|
@ -596,7 +596,7 @@ pub fn from_parsed_columns(column_values: ColumnMap) -> Result<NuDataFrame, Shel
|
|||
});
|
||||
|
||||
let res: DatetimeChunked =
|
||||
ChunkedArray::<Int64Type>::new_from_opt_iter(&name, it)
|
||||
ChunkedArray::<Int64Type>::from_iter_options(&name, it)
|
||||
.into_datetime(TimeUnit::Milliseconds, None);
|
||||
|
||||
df_series.push(res.into_series())
|
||||
|
@ -610,7 +610,7 @@ pub fn from_parsed_columns(column_values: ColumnMap) -> Result<NuDataFrame, Shel
|
|||
}
|
||||
});
|
||||
|
||||
let res = ChunkedArray::<Int64Type>::new_from_opt_iter(&name, it);
|
||||
let res = ChunkedArray::<Int64Type>::from_iter_options(&name, it);
|
||||
|
||||
df_series.push(res.into_series())
|
||||
}
|
||||
|
|
|
@ -394,7 +394,7 @@ impl NuDataFrame {
|
|||
// Casting needed to compare other numeric types with nushell numeric type.
|
||||
// In nushell we only have i64 integer numeric types and any array created
|
||||
// with nushell untagged primitives will be of type i64
|
||||
DataType::UInt32 => match self_series.cast(&DataType::Int64) {
|
||||
DataType::UInt32 | DataType::Int32 => match self_series.cast(&DataType::Int64) {
|
||||
Ok(series) => series,
|
||||
Err(_) => return None,
|
||||
},
|
||||
|
|
|
@ -2,26 +2,42 @@ mod custom_value;
|
|||
|
||||
use nu_protocol::{PipelineData, ShellError, Span, Value};
|
||||
use polars::frame::groupby::{GroupBy, GroupsProxy};
|
||||
use polars::prelude::DataFrame;
|
||||
use polars::prelude::{DataFrame, GroupsIdx};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub enum NuGroupsProxy {
|
||||
Idx(Vec<(u32, Vec<u32>)>),
|
||||
Idx {
|
||||
sorted: bool,
|
||||
all: Vec<(u32, Vec<u32>)>,
|
||||
},
|
||||
Slice(Vec<[u32; 2]>),
|
||||
}
|
||||
|
||||
impl NuGroupsProxy {
|
||||
fn from_polars(groups: &GroupsProxy) -> Self {
|
||||
match groups {
|
||||
GroupsProxy::Idx(indexes) => NuGroupsProxy::Idx(indexes.clone()),
|
||||
GroupsProxy::Idx(indexes) => NuGroupsProxy::Idx {
|
||||
sorted: indexes.is_sorted(),
|
||||
all: indexes
|
||||
.iter()
|
||||
.map(|(index, values)| (index, values.clone()))
|
||||
.collect(),
|
||||
},
|
||||
GroupsProxy::Slice(slice) => NuGroupsProxy::Slice(slice.clone()),
|
||||
}
|
||||
}
|
||||
|
||||
fn to_polars(&self) -> GroupsProxy {
|
||||
match self {
|
||||
Self::Idx(indexes) => GroupsProxy::Idx(indexes.clone()),
|
||||
Self::Idx { sorted, all } => {
|
||||
let mut groups: GroupsIdx = all.clone().into();
|
||||
if *sorted {
|
||||
groups.sort()
|
||||
}
|
||||
|
||||
GroupsProxy::Idx(groups)
|
||||
}
|
||||
Self::Slice(slice) => GroupsProxy::Slice(slice.clone()),
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue