upgrade polars to 0.17 (#4122)

This commit is contained in:
Nico Mandery 2021-11-16 00:01:02 +01:00 committed by GitHub
parent df87d90b8c
commit 16db368232
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 67 additions and 74 deletions

View file

@ -89,7 +89,7 @@ zip = { version="0.5.9", optional=true }
digest = "0.9.0"
[dependencies.polars]
version = "0.16.0"
version = "0.17.0"
optional = true
features = ["parquet", "json", "random", "pivot", "strings", "is_in", "temporal", "cum_agg", "rolling_window"]

View file

@ -121,7 +121,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tail = df.as_ref().get_columns().iter().map(|col| {
let count = col.len() as f64;
let sum = match col.sum_as_series().cast_with_dtype(&DataType::Float64) {
let sum = match col.sum_as_series().cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
@ -144,7 +144,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
_ => None,
};
let min = match col.min_as_series().cast_with_dtype(&DataType::Float64) {
let min = match col.min_as_series().cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
@ -153,7 +153,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
};
let q_25 = match col.quantile_as_series(0.25) {
Ok(ca) => match ca.cast_with_dtype(&DataType::Float64) {
Ok(ca) => match ca.cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
@ -164,7 +164,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
};
let q_50 = match col.quantile_as_series(0.50) {
Ok(ca) => match ca.cast_with_dtype(&DataType::Float64) {
Ok(ca) => match ca.cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
@ -175,7 +175,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
};
let q_75 = match col.quantile_as_series(0.75) {
Ok(ca) => match ca.cast_with_dtype(&DataType::Float64) {
Ok(ca) => match ca.cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
@ -185,7 +185,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
Err(_) => None,
};
let max = match col.max_as_series().cast_with_dtype(&DataType::Float64) {
let max = match col.max_as_series().cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,

View file

@ -44,6 +44,12 @@ impl WholeStreamCommand for DataFrame {
"type of join. Inner by default",
Some('t'),
)
.named(
"suffix",
SyntaxShape::String,
"suffix for the columns of the right dataframe",
Some('s'),
)
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
@ -104,6 +110,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let r_df: Value = args.req(0)?;
let l_col: Vec<Value> = args.req_named("left")?;
let r_col: Vec<Value> = args.req_named("right")?;
let r_suffix: Option<Tagged<String>> = args.get_flag("suffix")?;
let join_type_op: Option<Tagged<String>> = args.get_flag("type")?;
let join_type = match join_type_op {
@ -124,6 +131,8 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
},
};
let suffix = r_suffix.map(|s| s.item);
let (l_col_string, l_col_span) = convert_columns(&l_col, &tag)?;
let (r_col_string, r_col_span) = convert_columns(&r_col, &tag)?;
@ -142,7 +151,13 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
)?;
df.as_ref()
.join(r_df.as_ref(), &l_col_string, &r_col_string, join_type)
.join(
r_df.as_ref(),
&l_col_string,
&r_col_string,
join_type,
suffix,
)
.map_err(|e| parse_polars_error::<&str>(&e, &l_col_span, None))
}
_ => Err(ShellError::labeled_error(

View file

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?;
let casted = series
.date64()
.datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.day().into_series();

View file

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?;
let casted = series
.date64()
.datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.hour().into_series();

View file

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?;
let casted = series
.date64()
.datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.minute().into_series();

View file

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?;
let casted = series
.date64()
.datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.month().into_series();

View file

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?;
let casted = series
.date64()
.datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.nanosecond().into_series();

View file

@ -56,7 +56,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?;
let casted = series
.date64()
.datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.ordinal().into_series();

View file

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?;
let casted = series
.date64()
.datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.second().into_series();

View file

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?;
let casted = series
.date64()
.datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.week().into_series();

View file

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?;
let casted = series
.date64()
.datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.weekday().into_series();

View file

@ -56,7 +56,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?;
let casted = series
.date64()
.datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.year().into_series();

View file

@ -6,7 +6,7 @@ use nu_protocol::{
Signature, SyntaxShape, UntaggedValue,
};
use nu_source::Tagged;
use polars::prelude::DataType;
use polars::prelude::{DataType, RollingOptions};
enum RollType {
Min,
@ -57,7 +57,6 @@ impl WholeStreamCommand for DataFrame {
Signature::build("dataframe rolling")
.required("type", SyntaxShape::String, "rolling operation")
.required("window", SyntaxShape::Int, "Window size for rolling")
.switch("ignore_nulls", "Ignore nulls in column", Some('i'))
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
@ -112,7 +111,6 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let roll_type: Tagged<String> = args.req(0)?;
let window_size: Tagged<i64> = args.req(1)?;
let ignore_nulls = args.has_flag("ignore_nulls");
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let series = df.as_series(&df_tag.span)?;
@ -126,31 +124,17 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
}
let roll_type = RollType::from_str(&roll_type.item, &roll_type.tag.span)?;
let rolling_opts = RollingOptions {
window_size: window_size.item as usize,
min_periods: window_size.item as usize,
weights: None,
center: false,
};
let res = match roll_type {
RollType::Max => series.rolling_max(
window_size.item as u32,
None,
ignore_nulls,
window_size.item as u32,
),
RollType::Min => series.rolling_min(
window_size.item as u32,
None,
ignore_nulls,
window_size.item as u32,
),
RollType::Sum => series.rolling_sum(
window_size.item as u32,
None,
ignore_nulls,
window_size.item as u32,
),
RollType::Mean => series.rolling_mean(
window_size.item as u32,
None,
ignore_nulls,
window_size.item as u32,
),
RollType::Max => series.rolling_max(rolling_opts),
RollType::Min => series.rolling_min(rolling_opts),
RollType::Sum => series.rolling_sum(rolling_opts),
RollType::Mean => series.rolling_mean(rolling_opts),
};
let mut res = res.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;

View file

@ -78,7 +78,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let casted = match indices.dtype() {
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => indices
.as_ref()
.cast_with_dtype(&DataType::UInt32)
.cast(&DataType::UInt32)
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None)),
_ => Err(ShellError::labeled_error_with_secondary(
"Incorrect type",

View file

@ -58,7 +58,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?;
let casted = series
.date64()
.datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.strftime(&fmt.item).into_series();

View file

@ -92,7 +92,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let casted = match series.dtype() {
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => series
.as_ref()
.cast_with_dtype(&DataType::UInt32)
.cast(&DataType::UInt32)
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None)),
_ => Err(ShellError::labeled_error_with_secondary(
"Incorrect type",

View file

@ -73,9 +73,9 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let writer = CsvWriter::new(&mut file);
let writer = if no_header {
writer.has_headers(false)
writer.has_header(false)
} else {
writer.has_headers(true)
writer.has_header(true)
};
let writer = match delimiter {

View file

@ -27,9 +27,9 @@ serde = { version="1.0", features=["derive"] }
serde_bytes = "0.11.5"
[dependencies.polars]
version = "0.16.0"
version = "0.17.0"
optional = true
features = ["default", "serde", "rows", "strings", "checked_arithmetic", "object", "dtype-duration-ns"]
features = ["default", "serde", "rows", "strings", "checked_arithmetic", "object", "dtype-date", "dtype-datetime", "dtype-time"]
[features]
dataframe = ["polars"]

View file

@ -603,7 +603,7 @@ where
{
match series.dtype() {
DataType::UInt32 | DataType::Int32 | DataType::UInt64 => {
let to_i64 = series.cast_with_dtype(&DataType::Int64);
let to_i64 = series.cast(&DataType::Int64);
match to_i64 {
Ok(series) => {
@ -661,7 +661,7 @@ where
{
match series.dtype() {
DataType::Float32 => {
let to_f64 = series.cast_with_dtype(&DataType::Float64);
let to_f64 = series.cast(&DataType::Float64);
match to_f64 {
Ok(series) => {
@ -731,7 +731,7 @@ where
{
match series.dtype() {
DataType::UInt32 | DataType::Int32 | DataType::UInt64 => {
let to_i64 = series.cast_with_dtype(&DataType::Int64);
let to_i64 = series.cast(&DataType::Int64);
match to_i64 {
Ok(series) => {
@ -789,7 +789,7 @@ where
{
match series.dtype() {
DataType::Float32 => {
let to_f64 = series.cast_with_dtype(&DataType::Float64);
let to_f64 = series.cast(&DataType::Float64);
match to_f64 {
Ok(series) => {

View file

@ -8,8 +8,8 @@ use nu_errors::ShellError;
use nu_source::{Span, Tag};
use num_bigint::BigInt;
use polars::prelude::{
DataFrame, DataType, Date64Type, Int64Type, IntoSeries, NamedFrom, NewChunkedArray, ObjectType,
PolarsNumericType, Series, TimeUnit,
DataFrame, DataType, DatetimeChunked, Int64Type, IntoSeries, NamedFrom, NewChunkedArray,
ObjectType, PolarsNumericType, Series,
};
use std::ops::{Deref, DerefMut};
@ -310,8 +310,8 @@ pub fn create_column(
}
}
}
DataType::Date32 => {
let casted = series.date32().map_err(|e| {
DataType::Date => {
let casted = series.date().map_err(|e| {
ShellError::labeled_error(
"Casting error",
format!("casting error: {}", e),
@ -347,8 +347,8 @@ pub fn create_column(
Ok(Column::new(casted.name().into(), values))
}
DataType::Date64 => {
let casted = series.date64().map_err(|e| {
DataType::Datetime => {
let casted = series.datetime().map_err(|e| {
ShellError::labeled_error(
"Casting error",
format!("casting error: {}", e),
@ -384,8 +384,8 @@ pub fn create_column(
Ok(Column::new(casted.name().into(), values))
}
DataType::Time64(timeunit) | DataType::Duration(timeunit) => {
let casted = series.time64_nanosecond().map_err(|e| {
DataType::Time => {
let casted = series.time().map_err(|e| {
ShellError::labeled_error(
"Casting error",
format!("casting error: {}", e),
@ -398,14 +398,7 @@ pub fn create_column(
.skip(from_row)
.take(size)
.map(|v| match v {
Some(a) => {
let nanoseconds = match timeunit {
TimeUnit::Second => a / 1_000_000_000,
TimeUnit::Millisecond => a / 1_000_000,
TimeUnit::Microsecond => a / 1_000,
TimeUnit::Nanosecond => a,
};
Some(nanoseconds) => {
let untagged = if let Some(bigint) = BigInt::from_i64(nanoseconds) {
UntaggedValue::Primitive(Primitive::Duration(bigint))
} else {
@ -633,7 +626,8 @@ pub fn from_parsed_columns(
}
});
let res = ChunkedArray::<Date64Type>::new_from_opt_iter(&name, it);
let res: DatetimeChunked =
ChunkedArray::<Int64Type>::new_from_opt_iter(&name, it).into();
df_series.push(res.into_series())
}

View file

@ -87,7 +87,7 @@ impl PartialEq for NuDataFrame {
// Casting needed to compare other numeric types with nushell numeric type.
// In nushell we only have i64 integer numeric types and any array created
// with nushell untagged primitives will be of type i64
DataType::UInt32 => match self_series.cast_with_dtype(&DataType::Int64) {
DataType::UInt32 => match self_series.cast(&DataType::Int64) {
Ok(series) => series,
Err(_) => return false,
},