str to datetime dfr (#4833)

* str to datetime dfr

* change description
This commit is contained in:
Fernando Herrera 2022-03-13 13:53:13 +00:00 committed by GitHub
parent dfffd45bcd
commit 30bb090cd4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 170 additions and 13 deletions

View file

@ -90,7 +90,8 @@ optional = true
features = [ features = [
"default", "parquet", "json", "serde", "object", "default", "parquet", "json", "serde", "object",
"checked_arithmetic", "strings", "cum_agg", "is_in", "checked_arithmetic", "strings", "cum_agg", "is_in",
"rolling_window", "strings", "rows", "random" "rolling_window", "strings", "rows", "random",
"dtype-datetime"
] ]
[features] [features]

View file

@ -0,0 +1,129 @@
use super::super::super::values::{Column, NuDataFrame};
use chrono::DateTime;
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use polars::prelude::{IntoSeries, TimeUnit};
#[derive(Clone)]
pub struct AsDateTime;
impl Command for AsDateTime {
fn name(&self) -> &str {
"dfr as-datetime"
}
fn usage(&self) -> &str {
r#"Converts string to datetime. Format example:
"%y/%m/%d %H:%M:%S" => 21/12/31 12:54:98
"%y-%m-%d %H:%M:%S" => 2021-12-31 24:58:01
"%y/%m/%d %H:%M:%S" => 21/12/31 24:58:01
"%y%m%d %H:%M:%S" => 210319 23:58:50
"%Y/%m/%d %H:%M:%S" => 2021/12/31 12:54:98
"%Y-%m-%d %H:%M:%S" => 2021-12-31 24:58:01
"%Y/%m/%d %H:%M:%S" => 2021/12/31 24:58:01
"%Y%m%d %H:%M:%S" => 20210319 23:58:50
"%FT%H:%M:%S" => 2019-04-18T02:45:55
"%FT%H:%M:%S.%6f" => microseconds
"%FT%H:%M:%S.%9f" => nanoseconds"#
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("format", SyntaxShape::String, "formating date string")
.switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n'))
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Converts string to datetime",
example: r#"["2021-12-30 00:00:00" "2021-12-31 00:00:00"] | dfr to-df | dfr as-datetime "%Y-%m-%d %H:%M:%S""#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"0".to_string(),
vec![
Value::Date {
val: DateTime::parse_from_str(
"2021-12-30 00:00:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
span: Span::test_data(),
},
Value::Date {
val: DateTime::parse_from_str(
"2021-12-31 00:00:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
span: Span::test_data(),
},
],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let format: String = call.req(engine_state, stack, 0)?;
let not_exact = call.has_flag("not-exact");
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.utf8().map_err(|e| {
ShellError::SpannedLabeledError("Error casting to string".into(), e.to_string(), call.head)
})?;
let res = if not_exact {
casted.as_datetime_not_exact(Some(format.as_str()), TimeUnit::Milliseconds)
} else {
casted.as_datetime(Some(format.as_str()), TimeUnit::Milliseconds)
};
let res = res
.map_err(|e| {
ShellError::SpannedLabeledError(
"Error creating datetime".into(),
e.to_string(),
call.head,
)
})?
.into_series();
NuDataFrame::try_from_series(vec![res], call.head)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
}
#[cfg(test)]
mod test {
use super::super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(AsDateTime {})])
}
}

View file

@ -70,7 +70,7 @@ fn command(
let res = casted.day().into_series(); let res = casted.day().into_series();
NuDataFrame::try_from_series(vec![res.into_series()], call.head) NuDataFrame::try_from_series(vec![res], call.head)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
} }

View file

@ -70,7 +70,7 @@ fn command(
let res = casted.hour().into_series(); let res = casted.hour().into_series();
NuDataFrame::try_from_series(vec![res.into_series()], call.head) NuDataFrame::try_from_series(vec![res], call.head)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
} }

View file

@ -70,7 +70,7 @@ fn command(
let res = casted.minute().into_series(); let res = casted.minute().into_series();
NuDataFrame::try_from_series(vec![res.into_series()], call.head) NuDataFrame::try_from_series(vec![res], call.head)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
} }

View file

@ -70,7 +70,7 @@ fn command(
let res = casted.month().into_series(); let res = casted.month().into_series();
NuDataFrame::try_from_series(vec![res.into_series()], call.head) NuDataFrame::try_from_series(vec![res], call.head)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
} }

View file

@ -70,7 +70,7 @@ fn command(
let res = casted.nanosecond().into_series(); let res = casted.nanosecond().into_series();
NuDataFrame::try_from_series(vec![res.into_series()], call.head) NuDataFrame::try_from_series(vec![res], call.head)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
} }

View file

@ -70,7 +70,7 @@ fn command(
let res = casted.ordinal().into_series(); let res = casted.ordinal().into_series();
NuDataFrame::try_from_series(vec![res.into_series()], call.head) NuDataFrame::try_from_series(vec![res], call.head)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
} }

View file

@ -70,7 +70,7 @@ fn command(
let res = casted.second().into_series(); let res = casted.second().into_series();
NuDataFrame::try_from_series(vec![res.into_series()], call.head) NuDataFrame::try_from_series(vec![res], call.head)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
} }

View file

@ -70,7 +70,7 @@ fn command(
let res = casted.week().into_series(); let res = casted.week().into_series();
NuDataFrame::try_from_series(vec![res.into_series()], call.head) NuDataFrame::try_from_series(vec![res], call.head)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
} }

View file

@ -70,7 +70,7 @@ fn command(
let res = casted.weekday().into_series(); let res = casted.weekday().into_series();
NuDataFrame::try_from_series(vec![res.into_series()], call.head) NuDataFrame::try_from_series(vec![res], call.head)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
} }

View file

@ -70,7 +70,7 @@ fn command(
let res = casted.year().into_series(); let res = casted.year().into_series();
NuDataFrame::try_from_series(vec![res.into_series()], call.head) NuDataFrame::try_from_series(vec![res], call.head)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
} }

View file

@ -1,3 +1,4 @@
mod as_datetime;
mod get_day; mod get_day;
mod get_hour; mod get_hour;
mod get_minute; mod get_minute;
@ -9,6 +10,7 @@ mod get_week;
mod get_weekday; mod get_weekday;
mod get_year; mod get_year;
pub use as_datetime::AsDateTime;
pub use get_day::GetDay; pub use get_day::GetDay;
pub use get_hour::GetHour; pub use get_hour::GetHour;
pub use get_minute::GetMinute; pub use get_minute::GetMinute;

View file

@ -57,6 +57,7 @@ pub fn add_series_decls(working_set: &mut StateWorkingSet) {
ArgSort, ArgSort,
ArgTrue, ArgTrue,
ArgUnique, ArgUnique,
AsDateTime,
Concatenate, Concatenate,
Contains, Contains,
Cumulative, Cumulative,

View file

@ -4,7 +4,7 @@ use nu_protocol::{ast::Operator, span, ShellError, Span, Spanned, Value};
use num::Zero; use num::Zero;
use polars::prelude::{ use polars::prelude::{
BooleanType, ChunkCompare, ChunkedArray, DataType, Float64Type, Int64Type, IntoSeries, BooleanType, ChunkCompare, ChunkedArray, DataType, Float64Type, Int64Type, IntoSeries,
NumOpsDispatchChecked, PolarsError, Series, NumOpsDispatchChecked, PolarsError, Series, TimeUnit,
}; };
use std::ops::{Add, BitAnd, BitOr, Div, Mul, Sub}; use std::ops::{Add, BitAnd, BitOr, Div, Mul, Sub};
@ -270,6 +270,9 @@ pub(super) fn compute_series_single_value(
let equal_pattern = format!("^{}$", val); let equal_pattern = format!("^{}$", val);
contains_series_pat(&lhs, &equal_pattern, lhs_span) contains_series_pat(&lhs, &equal_pattern, lhs_span)
} }
Value::Date { val, .. } => {
compare_series_i64(&lhs, val.timestamp_millis(), ChunkedArray::equal, lhs_span)
}
_ => Err(ShellError::OperatorMismatch { _ => Err(ShellError::OperatorMismatch {
op_span: operator.span, op_span: operator.span,
lhs_ty: left.get_type(), lhs_ty: left.get_type(),
@ -285,6 +288,12 @@ pub(super) fn compute_series_single_value(
Value::Float { val, .. } => { Value::Float { val, .. } => {
compare_series_decimal(&lhs, *val, ChunkedArray::not_equal, lhs_span) compare_series_decimal(&lhs, *val, ChunkedArray::not_equal, lhs_span)
} }
Value::Date { val, .. } => compare_series_i64(
&lhs,
val.timestamp_millis(),
ChunkedArray::not_equal,
lhs_span,
),
_ => Err(ShellError::OperatorMismatch { _ => Err(ShellError::OperatorMismatch {
op_span: operator.span, op_span: operator.span,
lhs_ty: left.get_type(), lhs_ty: left.get_type(),
@ -298,6 +307,9 @@ pub(super) fn compute_series_single_value(
Value::Float { val, .. } => { Value::Float { val, .. } => {
compare_series_decimal(&lhs, *val, ChunkedArray::lt, lhs_span) compare_series_decimal(&lhs, *val, ChunkedArray::lt, lhs_span)
} }
Value::Date { val, .. } => {
compare_series_i64(&lhs, val.timestamp_millis(), ChunkedArray::lt, lhs_span)
}
_ => Err(ShellError::OperatorMismatch { _ => Err(ShellError::OperatorMismatch {
op_span: operator.span, op_span: operator.span,
lhs_ty: left.get_type(), lhs_ty: left.get_type(),
@ -311,6 +323,9 @@ pub(super) fn compute_series_single_value(
Value::Float { val, .. } => { Value::Float { val, .. } => {
compare_series_decimal(&lhs, *val, ChunkedArray::lt_eq, lhs_span) compare_series_decimal(&lhs, *val, ChunkedArray::lt_eq, lhs_span)
} }
Value::Date { val, .. } => {
compare_series_i64(&lhs, val.timestamp_millis(), ChunkedArray::lt_eq, lhs_span)
}
_ => Err(ShellError::OperatorMismatch { _ => Err(ShellError::OperatorMismatch {
op_span: operator.span, op_span: operator.span,
lhs_ty: left.get_type(), lhs_ty: left.get_type(),
@ -324,6 +339,9 @@ pub(super) fn compute_series_single_value(
Value::Float { val, .. } => { Value::Float { val, .. } => {
compare_series_decimal(&lhs, *val, ChunkedArray::gt, lhs_span) compare_series_decimal(&lhs, *val, ChunkedArray::gt, lhs_span)
} }
Value::Date { val, .. } => {
compare_series_i64(&lhs, val.timestamp_millis(), ChunkedArray::gt, lhs_span)
}
_ => Err(ShellError::OperatorMismatch { _ => Err(ShellError::OperatorMismatch {
op_span: operator.span, op_span: operator.span,
lhs_ty: left.get_type(), lhs_ty: left.get_type(),
@ -337,6 +355,9 @@ pub(super) fn compute_series_single_value(
Value::Float { val, .. } => { Value::Float { val, .. } => {
compare_series_decimal(&lhs, *val, ChunkedArray::gt_eq, lhs_span) compare_series_decimal(&lhs, *val, ChunkedArray::gt_eq, lhs_span)
} }
Value::Date { val, .. } => {
compare_series_i64(&lhs, val.timestamp_millis(), ChunkedArray::gt_eq, lhs_span)
}
_ => Err(ShellError::OperatorMismatch { _ => Err(ShellError::OperatorMismatch {
op_span: operator.span, op_span: operator.span,
lhs_ty: left.get_type(), lhs_ty: left.get_type(),
@ -491,7 +512,10 @@ where
F: Fn(&ChunkedArray<Int64Type>, i64) -> ChunkedArray<BooleanType>, F: Fn(&ChunkedArray<Int64Type>, i64) -> ChunkedArray<BooleanType>,
{ {
match series.dtype() { match series.dtype() {
DataType::UInt32 | DataType::Int32 | DataType::UInt64 => { DataType::UInt32
| DataType::Int32
| DataType::UInt64
| DataType::Datetime(TimeUnit::Milliseconds, _) => {
let to_i64 = series.cast(&DataType::Int64); let to_i64 = series.cast(&DataType::Int64);
match to_i64 { match to_i64 {