mirror of
https://github.com/nushell/nushell
synced 2024-11-10 07:04:13 +00:00
Polars upgrade (#4665)
* polars upgrade * Update describe.rs Co-authored-by: JT <547158+jntrnr@users.noreply.github.com>
This commit is contained in:
parent
10ceac998e
commit
4ebbe07d27
13 changed files with 236 additions and 128 deletions
122
Cargo.lock
generated
122
Cargo.lock
generated
|
@ -96,6 +96,12 @@ version = "1.0.53"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94a45b455c14666b85fc40a019e8ab9eb75e3a124e05494f5397122bc9eb06e0"
|
||||
|
||||
[[package]]
|
||||
name = "array-init-cursor"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76"
|
||||
|
||||
[[package]]
|
||||
name = "arrayref"
|
||||
version = "0.3.6"
|
||||
|
@ -119,23 +125,26 @@ checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
|
|||
|
||||
[[package]]
|
||||
name = "arrow-format"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9f7da2d9660bfaebbdb0a44a33b3bd1dcb5a952fafa02c0dfc6a51ea471fef2a"
|
||||
checksum = "2333f8ccf0d597ba779863c57a0b61f635721187fb2fdeabae92691d7d582fe5"
|
||||
dependencies = [
|
||||
"flatbuffers",
|
||||
"planus",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow2"
|
||||
version = "0.8.1"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d3452b2ae9727464a31a726c07ffec0c0da3b87831610d9ac99fc691c78b3a44"
|
||||
checksum = "aea88c49c98db9de8c72ccaa0470182857e70faa635f32fc4aa3c9e1a1dfefea"
|
||||
dependencies = [
|
||||
"arrow-format",
|
||||
"base64",
|
||||
"bytemuck",
|
||||
"chrono",
|
||||
"csv",
|
||||
"fallible-streaming-iterator",
|
||||
"futures",
|
||||
"hash_hasher",
|
||||
"indexmap",
|
||||
|
@ -359,6 +368,26 @@ dependencies = [
|
|||
"utf8-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bytemuck"
|
||||
version = "1.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "439989e6b8c38d1b6570a384ef1e49c8848128f5a97f3914baef02920842712f"
|
||||
dependencies = [
|
||||
"bytemuck_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bytemuck_derive"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e215f8c2f9f79cb53c8335e687ffd07d5bfcb6fe5fc80723762d0be46e7cc54"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.4.3"
|
||||
|
@ -1008,17 +1037,6 @@ dependencies = [
|
|||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flatbuffers"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ef4c5738bcd7fad10315029c50026f83c9da5e4a21f8ed66826f43e0e2bde5f6"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"smallvec",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.0.22"
|
||||
|
@ -1349,6 +1367,12 @@ name = "hashbrown"
|
|||
version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c21d40587b92fa6a6c6e3c1bdbf87d75511db5672f9c93175574b3a00df1758"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"rayon",
|
||||
|
@ -1542,7 +1566,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "282a6247722caba404c065016bbfa522806e51714c34f5dfc3e4a3a46fcb4223"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"hashbrown",
|
||||
"hashbrown 0.11.2",
|
||||
"serde",
|
||||
]
|
||||
|
||||
|
@ -2705,9 +2729,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "parquet2"
|
||||
version = "0.8.1"
|
||||
version = "0.9.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "57e98d7da0076cead49c49580cc5771dfe0ba8a93cadff9b47c1681a4a78e1f9"
|
||||
checksum = "45476d276db539ec4076f6abe62392619460fb70a1a8edebcc06e11cd93c0ec3"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"bitpacking",
|
||||
|
@ -2869,10 +2893,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "58893f751c9b0412871a09abd62ecd2a00298c6c83befa223ef98c52aef40cbe"
|
||||
|
||||
[[package]]
|
||||
name = "polars"
|
||||
version = "0.18.0"
|
||||
name = "planus"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3e9211d1bb8d2d81541e4ab80ce9148a8e2a987d6412c2a48017fbbe24231ea1"
|
||||
checksum = "bffebaf174d6cad46a5f0f1bb1c45c6eb509571688bcb18dfab217f3c9f9b151"
|
||||
dependencies = [
|
||||
"array-init-cursor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2620069231dd93a27f7fc982c10379394a540775057fc569a669a40c2ad7207d"
|
||||
dependencies = [
|
||||
"polars-core",
|
||||
"polars-io",
|
||||
|
@ -2881,31 +2914,31 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "polars-arrow"
|
||||
version = "0.18.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa5ee9c385bf6643893f98efa80ff5a07169b50f65962c7843c0a13e12f0b0cf"
|
||||
checksum = "863d723959237470af38fee06ca74c58e52ee3778e4796fbf66e05deb5f925f6"
|
||||
dependencies = [
|
||||
"arrow2",
|
||||
"hashbrown 0.12.0",
|
||||
"num 0.4.0",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars-core"
|
||||
version = "0.18.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3cb1de44e479ce2764a7a3ad057e16f434efa334feb993284e1a48bb8888c6d1"
|
||||
checksum = "006c8d5c4b057ec2287766c14293d70c76bfc65e949dd028a76a501098e3253b"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
"arrow2",
|
||||
"chrono",
|
||||
"hashbrown",
|
||||
"itertools",
|
||||
"hashbrown 0.12.0",
|
||||
"lazy_static",
|
||||
"num 0.4.0",
|
||||
"num_cpus",
|
||||
"polars-arrow",
|
||||
"polars-time",
|
||||
"prettytable-rs",
|
||||
"rand 0.8.4",
|
||||
"rand_distr",
|
||||
|
@ -2919,9 +2952,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "polars-io"
|
||||
version = "0.18.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8bcb74f52ee9ff84863ae01de6ba25db092a9880302db4bf8f351f65b3ff0d12"
|
||||
checksum = "c99b04f1c31a6d0d121242e0b4f725c7c608665772e66621e42ed374a28ea6bc"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
|
@ -2936,6 +2969,7 @@ dependencies = [
|
|||
"num_cpus",
|
||||
"polars-arrow",
|
||||
"polars-core",
|
||||
"polars-utils",
|
||||
"rayon",
|
||||
"regex",
|
||||
"simdutf8",
|
||||
|
@ -2943,18 +2977,38 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "polars-lazy"
|
||||
version = "0.18.0"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43f91022ba6463df71ad6eb80ac2307884578d9959e85e1fe9dac18988291d46"
|
||||
checksum = "3545b1e34365293d761e06d98d535da322ce04c02d638eefade1db61e0aa22b4"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"itertools",
|
||||
"glob",
|
||||
"polars-arrow",
|
||||
"polars-core",
|
||||
"polars-io",
|
||||
"polars-utils",
|
||||
"rayon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars-time"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0c28b9d7a4badf14d54819ed1c49112e44348a3c7087854414a55b47bb46eea"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"polars-arrow",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars-utils"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "03f6f755d66fedfc506fb1df64aa8adb904cd0ef19fd4fb7f339e7ec3619aa65"
|
||||
dependencies = [
|
||||
"parking_lot",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.16"
|
||||
|
|
|
@ -85,12 +85,12 @@ umask = "1.0.0"
|
|||
users = "0.11.0"
|
||||
|
||||
[dependencies.polars]
|
||||
version = "0.18.0"
|
||||
version = "0.19.1"
|
||||
optional = true
|
||||
features = [
|
||||
"default", "parquet", "json", "serde", "object",
|
||||
"checked_arithmetic", "strings", "cum_agg", "is_in",
|
||||
"rolling_window", "strings", "pivot", "random"
|
||||
"rolling_window", "strings", "rows", "random"
|
||||
]
|
||||
|
||||
[features]
|
||||
|
|
|
@ -5,7 +5,10 @@ use nu_protocol::{
|
|||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value,
|
||||
};
|
||||
use polars::{frame::groupby::GroupBy, prelude::PolarsError};
|
||||
use polars::{
|
||||
frame::groupby::GroupBy,
|
||||
prelude::{PolarsError, QuantileInterpolOptions},
|
||||
};
|
||||
|
||||
use crate::dataframe::values::NuGroupBy;
|
||||
|
||||
|
@ -266,7 +269,9 @@ fn perform_groupby_aggregation(
|
|||
Operation::First => groupby.first(),
|
||||
Operation::Last => groupby.last(),
|
||||
Operation::Nunique => groupby.n_unique(),
|
||||
Operation::Quantile(quantile) => groupby.quantile(quantile),
|
||||
Operation::Quantile(quantile) => {
|
||||
groupby.quantile(quantile, QuantileInterpolOptions::default())
|
||||
}
|
||||
Operation::Median => groupby.median(),
|
||||
Operation::Var => groupby.var(),
|
||||
Operation::Std => groupby.std(),
|
||||
|
@ -327,13 +332,15 @@ fn perform_dataframe_aggregation(
|
|||
Operation::Sum => Ok(dataframe.sum()),
|
||||
Operation::Min => Ok(dataframe.min()),
|
||||
Operation::Max => Ok(dataframe.max()),
|
||||
Operation::Quantile(quantile) => dataframe.quantile(quantile).map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error calculating quantile".into(),
|
||||
e.to_string(),
|
||||
operation_span,
|
||||
)
|
||||
}),
|
||||
Operation::Quantile(quantile) => dataframe
|
||||
.quantile(quantile, QuantileInterpolOptions::default())
|
||||
.map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error calculating quantile".into(),
|
||||
e.to_string(),
|
||||
operation_span,
|
||||
)
|
||||
}),
|
||||
Operation::Median => Ok(dataframe.median()),
|
||||
Operation::Var => Ok(dataframe.var()),
|
||||
Operation::Std => Ok(dataframe.std()),
|
||||
|
|
|
@ -1,14 +1,16 @@
|
|||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Value,
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
use polars::{
|
||||
chunked_array::ChunkedArray,
|
||||
prelude::{
|
||||
AnyValue, DataFrame, DataType, Float64Type, IntoSeries, NewChunkedArray, Series, Utf8Type,
|
||||
AnyValue, DataFrame, DataType, Float64Type, IntoSeries, NewChunkedArray,
|
||||
QuantileInterpolOptions, Series, Utf8Type,
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -25,7 +27,14 @@ impl Command for DescribeDF {
|
|||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
|
||||
Signature::build(self.name())
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
.named(
|
||||
"quantiles",
|
||||
SyntaxShape::Table,
|
||||
"optional quantiles for describe",
|
||||
Some('q'),
|
||||
)
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
|
@ -98,29 +107,62 @@ impl Command for DescribeDF {
|
|||
}
|
||||
|
||||
fn command(
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let quantiles: Option<Vec<Value>> = call.get_flag(engine_state, stack, "quantiles")?;
|
||||
let quantiles = quantiles.map(|values| {
|
||||
values
|
||||
.iter()
|
||||
.map(|value| match value {
|
||||
Value::Float { val, span } => {
|
||||
if (&0.0..=&1.0).contains(&val) {
|
||||
Ok(*val)
|
||||
} else {
|
||||
Err(ShellError::SpannedLabeledError(
|
||||
"Incorrect value for quantile".to_string(),
|
||||
"value should be between 0 and 1".to_string(),
|
||||
*span,
|
||||
))
|
||||
}
|
||||
}
|
||||
_ => match value.span() {
|
||||
Ok(span) => Err(ShellError::SpannedLabeledError(
|
||||
"Incorrect value for quantile".to_string(),
|
||||
"value should be a float".to_string(),
|
||||
span,
|
||||
)),
|
||||
Err(e) => Err(e),
|
||||
},
|
||||
})
|
||||
.collect::<Result<Vec<f64>, ShellError>>()
|
||||
});
|
||||
|
||||
let quantiles = match quantiles {
|
||||
Some(quantiles) => quantiles?,
|
||||
None => vec![0.25, 0.50, 0.75],
|
||||
};
|
||||
|
||||
let mut quantiles_labels = quantiles
|
||||
.iter()
|
||||
.map(|q| Some(format!("{}%", q * 100.0)))
|
||||
.collect::<Vec<Option<String>>>();
|
||||
let mut labels = vec![
|
||||
Some("count".to_string()),
|
||||
Some("sum".to_string()),
|
||||
Some("mean".to_string()),
|
||||
Some("median".to_string()),
|
||||
Some("std".to_string()),
|
||||
Some("min".to_string()),
|
||||
];
|
||||
labels.append(&mut quantiles_labels);
|
||||
labels.push(Some("max".to_string()));
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let names = ChunkedArray::<Utf8Type>::new_from_opt_slice(
|
||||
"descriptor",
|
||||
&[
|
||||
Some("count"),
|
||||
Some("sum"),
|
||||
Some("mean"),
|
||||
Some("median"),
|
||||
Some("std"),
|
||||
Some("min"),
|
||||
Some("25%"),
|
||||
Some("50%"),
|
||||
Some("75%"),
|
||||
Some("max"),
|
||||
],
|
||||
)
|
||||
.into_series();
|
||||
let names = ChunkedArray::<Utf8Type>::new_from_opt_slice("descriptor", &labels).into_series();
|
||||
|
||||
let head = std::iter::once(names);
|
||||
|
||||
|
@ -165,32 +207,19 @@ fn command(
|
|||
_ => None,
|
||||
});
|
||||
|
||||
let q_25 = col
|
||||
.quantile_as_series(0.25)
|
||||
.ok()
|
||||
.and_then(|ca| ca.cast(&DataType::Float64).ok())
|
||||
.and_then(|ca| match ca.get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
});
|
||||
|
||||
let q_50 = col
|
||||
.quantile_as_series(0.50)
|
||||
.ok()
|
||||
.and_then(|ca| ca.cast(&DataType::Float64).ok())
|
||||
.and_then(|ca| match ca.get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
});
|
||||
|
||||
let q_75 = col
|
||||
.quantile_as_series(0.75)
|
||||
.ok()
|
||||
.and_then(|ca| ca.cast(&DataType::Float64).ok())
|
||||
.and_then(|ca| match ca.get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
});
|
||||
let mut quantiles = quantiles
|
||||
.clone()
|
||||
.into_iter()
|
||||
.map(|q| {
|
||||
col.quantile_as_series(q, QuantileInterpolOptions::default())
|
||||
.ok()
|
||||
.and_then(|ca| ca.cast(&DataType::Float64).ok())
|
||||
.and_then(|ca| match ca.get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
})
|
||||
})
|
||||
.collect::<Vec<Option<f64>>>();
|
||||
|
||||
let max = col
|
||||
.max_as_series()
|
||||
|
@ -201,23 +230,12 @@ fn command(
|
|||
_ => None,
|
||||
});
|
||||
|
||||
let mut descriptors = vec![Some(count), sum, mean, median, std, min];
|
||||
descriptors.append(&mut quantiles);
|
||||
descriptors.push(max);
|
||||
|
||||
let name = format!("{} ({})", col.name(), col.dtype());
|
||||
ChunkedArray::<Float64Type>::new_from_opt_slice(
|
||||
&name,
|
||||
&[
|
||||
Some(count),
|
||||
sum,
|
||||
mean,
|
||||
median,
|
||||
std,
|
||||
min,
|
||||
q_25,
|
||||
q_50,
|
||||
q_75,
|
||||
max,
|
||||
],
|
||||
)
|
||||
.into_series()
|
||||
ChunkedArray::<Float64Type>::new_from_opt_slice(&name, &descriptors).into_series()
|
||||
});
|
||||
|
||||
let res = head.chain(tail).collect::<Vec<Series>>();
|
||||
|
|
|
@ -64,7 +64,7 @@ fn command(
|
|||
ShellError::SpannedLabeledError("Error creating groupby".into(), e.to_string(), col_span)
|
||||
})?;
|
||||
|
||||
let groups = groupby.get_groups().to_vec();
|
||||
let groups = groupby.get_groups();
|
||||
let groupby = NuGroupBy::new(df.as_ref().clone(), col_string, groups);
|
||||
|
||||
Ok(PipelineData::Value(groupby.into_value(call.head), None))
|
||||
|
|
|
@ -5,7 +5,8 @@ use nu_protocol::{
|
|||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
|
||||
};
|
||||
use std::{fs::File, path::PathBuf};
|
||||
|
||||
use std::{fs::File, io::BufReader, path::PathBuf};
|
||||
|
||||
use polars::prelude::{CsvEncoding, CsvReader, JsonReader, ParquetReader, SerReader};
|
||||
|
||||
|
@ -138,12 +139,12 @@ fn from_json(
|
|||
call: &Call,
|
||||
) -> Result<polars::prelude::DataFrame, ShellError> {
|
||||
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||
|
||||
let r = File::open(&file.item).map_err(|e| {
|
||||
let mut file = File::open(&file.item).map_err(|e| {
|
||||
ShellError::SpannedLabeledError("Error opening file".into(), e.to_string(), file.span)
|
||||
})?;
|
||||
|
||||
let reader = JsonReader::new(r);
|
||||
let buf_reader = BufReader::new(&mut file);
|
||||
let reader = JsonReader::new(buf_reader);
|
||||
|
||||
reader.finish().map_err(|e| {
|
||||
ShellError::SpannedLabeledError("Json reader error".into(), format!("{:?}", e), call.head)
|
||||
|
|
|
@ -105,7 +105,7 @@ fn command(
|
|||
|
||||
let mut groupby = nu_groupby.to_groupby()?;
|
||||
|
||||
let pivot = groupby.pivot(&pivot_col.item, &value_col.item);
|
||||
let pivot = groupby.pivot(vec![&pivot_col.item], vec![&value_col.item]);
|
||||
|
||||
match op {
|
||||
Operation::Mean => pivot.mean(),
|
||||
|
|
|
@ -76,14 +76,14 @@ fn command(
|
|||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
match (rows, fraction) {
|
||||
(Some(rows), None) => df.as_ref().sample_n(rows.item, replace).map_err(|e| {
|
||||
(Some(rows), None) => df.as_ref().sample_n(rows.item, replace, 0).map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error creating sample".into(),
|
||||
e.to_string(),
|
||||
rows.span,
|
||||
)
|
||||
}),
|
||||
(None, Some(frac)) => df.as_ref().sample_frac(frac.item, replace).map_err(|e| {
|
||||
(None, Some(frac)) => df.as_ref().sample_frac(frac.item, replace, 0).map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error creating sample".into(),
|
||||
e.to_string(),
|
||||
|
|
|
@ -82,7 +82,7 @@ fn command(
|
|||
})?;
|
||||
|
||||
let value = Value::Bool {
|
||||
val: bool.all_false(),
|
||||
val: !bool.any(),
|
||||
span: call.head,
|
||||
};
|
||||
|
||||
|
|
|
@ -82,7 +82,7 @@ fn command(
|
|||
})?;
|
||||
|
||||
let value = Value::Bool {
|
||||
val: bool.all_true(),
|
||||
val: bool.all(),
|
||||
span: call.head,
|
||||
};
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ use polars::chunked_array::object::builder::ObjectChunkedBuilder;
|
|||
use polars::chunked_array::ChunkedArray;
|
||||
use polars::prelude::{
|
||||
DataFrame, DataType, DatetimeChunked, Int64Type, IntoSeries, NamedFrom, NewChunkedArray,
|
||||
ObjectType, Series,
|
||||
ObjectType, Series, TimeUnit,
|
||||
};
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
|
@ -399,7 +399,7 @@ pub fn create_column(
|
|||
|
||||
Ok(Column::new(casted.name().into(), values))
|
||||
}
|
||||
DataType::Datetime => {
|
||||
DataType::Datetime(_, _) => {
|
||||
let casted = series.datetime().map_err(|e| {
|
||||
ShellError::LabeledError("Error casting column to datetime".into(), e.to_string())
|
||||
})?;
|
||||
|
@ -596,7 +596,8 @@ pub fn from_parsed_columns(column_values: ColumnMap) -> Result<NuDataFrame, Shel
|
|||
});
|
||||
|
||||
let res: DatetimeChunked =
|
||||
ChunkedArray::<Int64Type>::new_from_opt_iter(&name, it).into();
|
||||
ChunkedArray::<Int64Type>::new_from_opt_iter(&name, it)
|
||||
.into_datetime(TimeUnit::Milliseconds, None);
|
||||
|
||||
df_series.push(res.into_series())
|
||||
}
|
||||
|
|
|
@ -369,12 +369,12 @@ impl NuDataFrame {
|
|||
.expect("already checked that dataframe is different than 0");
|
||||
|
||||
// if unable to sort, then unable to compare
|
||||
let lhs = match self.as_ref().sort(*first_col, false) {
|
||||
let lhs = match self.as_ref().sort(vec![*first_col], false) {
|
||||
Ok(df) => df,
|
||||
Err(_) => return None,
|
||||
};
|
||||
|
||||
let rhs = match other.as_ref().sort(*first_col, false) {
|
||||
let rhs = match other.as_ref().sort(vec![*first_col], false) {
|
||||
Ok(df) => df,
|
||||
Err(_) => return None,
|
||||
};
|
||||
|
|
|
@ -1,23 +1,45 @@
|
|||
mod custom_value;
|
||||
|
||||
use nu_protocol::{PipelineData, ShellError, Span, Value};
|
||||
use polars::frame::groupby::{GroupBy, GroupTuples};
|
||||
use polars::frame::groupby::{GroupBy, GroupsProxy};
|
||||
use polars::prelude::DataFrame;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub enum NuGroupsProxy {
|
||||
Idx(Vec<(u32, Vec<u32>)>),
|
||||
Slice(Vec<[u32; 2]>),
|
||||
}
|
||||
|
||||
impl NuGroupsProxy {
|
||||
fn from_polars(groups: &GroupsProxy) -> Self {
|
||||
match groups {
|
||||
GroupsProxy::Idx(indexes) => NuGroupsProxy::Idx(indexes.clone()),
|
||||
GroupsProxy::Slice(slice) => NuGroupsProxy::Slice(slice.clone()),
|
||||
}
|
||||
}
|
||||
|
||||
fn to_polars(&self) -> GroupsProxy {
|
||||
match self {
|
||||
Self::Idx(indexes) => GroupsProxy::Idx(indexes.clone()),
|
||||
Self::Slice(slice) => GroupsProxy::Slice(slice.clone()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct NuGroupBy {
|
||||
dataframe: DataFrame,
|
||||
by: Vec<String>,
|
||||
groups: GroupTuples,
|
||||
groups: NuGroupsProxy,
|
||||
}
|
||||
|
||||
impl NuGroupBy {
|
||||
pub fn new(dataframe: DataFrame, by: Vec<String>, groups: GroupTuples) -> Self {
|
||||
pub fn new(dataframe: DataFrame, by: Vec<String>, groups: &GroupsProxy) -> Self {
|
||||
NuGroupBy {
|
||||
dataframe,
|
||||
by,
|
||||
groups,
|
||||
groups: NuGroupsProxy::from_polars(groups),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -60,7 +82,12 @@ impl NuGroupBy {
|
|||
ShellError::LabeledError("Error creating groupby".into(), e.to_string())
|
||||
})?;
|
||||
|
||||
Ok(GroupBy::new(&self.dataframe, by, self.groups.clone(), None))
|
||||
Ok(GroupBy::new(
|
||||
&self.dataframe,
|
||||
by,
|
||||
self.groups.to_polars(),
|
||||
None,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn print(&self, span: Span) -> Result<Vec<Value>, ShellError> {
|
||||
|
|
Loading…
Reference in a new issue