diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/sql_expr.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/sql_expr.rs index b078a39f98..66785ebb9f 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/sql_expr.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/sql_expr.rs @@ -29,8 +29,8 @@ fn map_sql_polars_datatype(data_type: &SQLDataType) -> Result { SQLDataType::Boolean => DataType::Boolean, SQLDataType::Date => DataType::Date, SQLDataType::Time(_, _) => DataType::Time, - SQLDataType::Timestamp(_, _) => DataType::Datetime(TimeUnit::Milliseconds, None), - SQLDataType::Interval => DataType::Duration(TimeUnit::Milliseconds), + SQLDataType::Timestamp(_, _) => DataType::Datetime(TimeUnit::Microseconds, None), + SQLDataType::Interval => DataType::Duration(TimeUnit::Microseconds), SQLDataType::Array(inner_type) => match inner_type { Some(inner_type) => DataType::List(Box::new(map_sql_polars_datatype(inner_type)?)), None => { diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/datepart.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/datepart.rs new file mode 100644 index 0000000000..bdecd50913 --- /dev/null +++ b/crates/nu-cmd-dataframe/src/dataframe/expressions/datepart.rs @@ -0,0 +1,162 @@ +use super::super::values::NuExpression; + +use crate::dataframe::values::{Column, NuDataFrame}; +use chrono::{DateTime, FixedOffset}; +use nu_engine::CallExt; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Type, + Value, +}; + +#[derive(Clone)] +pub struct ExprDatePart; + +impl Command for ExprDatePart { + fn name(&self) -> &str { + "dfr datepart" + } + + fn usage(&self) -> &str { + "Creates an expression for capturing the specified datepart in a column." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "Datepart name", + SyntaxShape::String, + "Part of the date to capture. Possible values are year, quarter, month, week, weekday, day, hour, minute, second, millisecond, microsecond, nanosecond", + ) + .input_type(Type::Custom("expression".into())) + .output_type(Type::Custom("expression".into())) + .category(Category::Custom("expression".into())) + } + + fn examples(&self) -> Vec { + let dt = DateTime::::parse_from_str( + "2021-12-30T01:02:03.123456789 +0000", + "%Y-%m-%dT%H:%M:%S.%9f %z", + ) + .expect("date calculation should not fail in test"); + vec![ + Example { + description: "Creates an expression to capture the year date part", + example: r#"[["2021-12-30T01:02:03.123456789"]] | dfr into-df | dfr as-datetime "%Y-%m-%dT%H:%M:%S.%9f" | dfr with-column [(dfr col datetime | dfr datepart year | dfr as datetime_year )]"#, + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("datetime".to_string(), vec![Value::test_date(dt)]), + Column::new("datetime_year".to_string(), vec![Value::test_int(2021)]), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Creates an expression to capture multiple date parts", + example: r#"[["2021-12-30T01:02:03.123456789"]] | dfr into-df | dfr as-datetime "%Y-%m-%dT%H:%M:%S.%9f" | + dfr with-column [ (dfr col datetime | dfr datepart year | dfr as datetime_year ), + (dfr col datetime | dfr datepart month | dfr as datetime_month ), + (dfr col datetime | dfr datepart day | dfr as datetime_day ), + (dfr col datetime | dfr datepart hour | dfr as datetime_hour ), + (dfr col datetime | dfr datepart minute | dfr as datetime_minute ), + (dfr col datetime | dfr datepart second | dfr as datetime_second ), + (dfr col datetime | dfr datepart nanosecond | dfr as datetime_ns ) ]"#, + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("datetime".to_string(), vec![Value::test_date(dt)]), + Column::new("datetime_year".to_string(), vec![Value::test_int(2021)]), + Column::new("datetime_month".to_string(), vec![Value::test_int(12)]), + Column::new("datetime_day".to_string(), vec![Value::test_int(30)]), + Column::new("datetime_hour".to_string(), vec![Value::test_int(1)]), + Column::new("datetime_minute".to_string(), vec![Value::test_int(2)]), + Column::new("datetime_second".to_string(), vec![Value::test_int(3)]), + Column::new("datetime_ns".to_string(), vec![Value::test_int(123456789)]), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn search_terms(&self) -> Vec<&str> { + vec![ + "year", + "month", + "week", + "weekday", + "quarter", + "day", + "hour", + "minute", + "second", + "millisecond", + "microsecond", + "nanosecond", + ] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let part: Spanned = call.req(engine_state, stack, 0)?; + + let expr = NuExpression::try_from_pipeline(input, call.head)?; + let expr_dt = expr.into_polars().dt(); + let expr = match part.item.as_str() { + "year" => expr_dt.year(), + "quarter" => expr_dt.quarter(), + "month" => expr_dt.month(), + "week" => expr_dt.week(), + "day" => expr_dt.day(), + "hour" => expr_dt.hour(), + "minute" => expr_dt.minute(), + "second" => expr_dt.second(), + "millisecond" => expr_dt.millisecond(), + "microsecond" => expr_dt.microsecond(), + "nanosecond" => expr_dt.nanosecond(), + _ => { + return Err(ShellError::UnsupportedInput( + format!("{} is not a valid datepart, expected one of year, month, day, hour, minute, second, millisecond, microsecond, nanosecond", part.item), + "value originates from here".to_string(), + call.head, + part.span, + )); + } + }.into(); + + Ok(PipelineData::Value( + NuExpression::into_value(expr, call.head), + None, + )) + } +} + +#[cfg(test)] +mod test { + use super::super::super::test_dataframe::test_dataframe; + use super::*; + use crate::dataframe::eager::WithColumn; + use crate::dataframe::expressions::ExprAlias; + use crate::dataframe::expressions::ExprAsNu; + use crate::dataframe::expressions::ExprCol; + use crate::dataframe::series::AsDateTime; + + #[test] + fn test_examples() { + test_dataframe(vec![ + Box::new(ExprDatePart {}), + Box::new(ExprCol {}), + Box::new(ExprAsNu {}), + Box::new(AsDateTime {}), + Box::new(WithColumn {}), + Box::new(ExprAlias {}), + ]) + } +} diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/mod.rs index 25bfc4d419..75f9a007f2 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/mod.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/expressions/mod.rs @@ -3,6 +3,7 @@ mod arg_where; mod as_nu; mod col; mod concat_str; +mod datepart; mod expressions_macro; mod is_in; mod lit; @@ -17,6 +18,7 @@ use crate::dataframe::expressions::arg_where::ExprArgWhere; use crate::dataframe::expressions::as_nu::ExprAsNu; pub(super) use crate::dataframe::expressions::col::ExprCol; pub(super) use crate::dataframe::expressions::concat_str::ExprConcatStr; +pub(crate) use crate::dataframe::expressions::datepart::ExprDatePart; pub(crate) use crate::dataframe::expressions::expressions_macro::*; pub(super) use crate::dataframe::expressions::is_in::ExprIsIn; pub(super) use crate::dataframe::expressions::lit::ExprLit; @@ -64,6 +66,7 @@ pub fn add_expressions(working_set: &mut StateWorkingSet) { ExprMean, ExprMedian, ExprStd, - ExprVar + ExprVar, + ExprDatePart ); } diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/as_datetime.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/as_datetime.rs index aaff3bd0a9..ba69b5a9ad 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/as_datetime.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/date/as_datetime.rs @@ -46,35 +46,66 @@ impl Command for AsDateTime { } fn examples(&self) -> Vec { - vec![Example { - description: "Converts string to datetime", - example: r#"["2021-12-30 00:00:00" "2021-12-31 00:00:00"] | dfr into-df | dfr as-datetime "%Y-%m-%d %H:%M:%S""#, - result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "datetime".to_string(), - vec![ - Value::Date { - val: DateTime::parse_from_str( - "2021-12-30 00:00:00 +0000", - "%Y-%m-%d %H:%M:%S %z", - ) - .expect("date calculation should not fail in test"), - span: Span::test_data(), - }, - Value::Date { - val: DateTime::parse_from_str( - "2021-12-31 00:00:00 +0000", - "%Y-%m-%d %H:%M:%S %z", - ) - .expect("date calculation should not fail in test"), - span: Span::test_data(), - }, - ], - )]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] + vec![ + Example { + description: "Converts string to datetime", + example: r#"["2021-12-30 00:00:00" "2021-12-31 00:00:00"] | dfr into-df | dfr as-datetime "%Y-%m-%d %H:%M:%S""#, + result: Some( + NuDataFrame::try_from_columns(vec![Column::new( + "datetime".to_string(), + vec![ + Value::Date { + val: DateTime::parse_from_str( + "2021-12-30 00:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + span: Span::test_data(), + }, + Value::Date { + val: DateTime::parse_from_str( + "2021-12-31 00:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + span: Span::test_data(), + }, + ], + )]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Converts string to datetime with high resolutions", + example: r#"["2021-12-30 00:00:00.123456789" "2021-12-31 00:00:00.123456789"] | dfr into-df | dfr as-datetime "%Y-%m-%d %H:%M:%S.%9f""#, + result: Some( + NuDataFrame::try_from_columns(vec![Column::new( + "datetime".to_string(), + vec![ + Value::Date { + val: DateTime::parse_from_str( + "2021-12-30 00:00:00.123456789 +0000", + "%Y-%m-%d %H:%M:%S.%9f %z", + ) + .expect("date calculation should not fail in test"), + span: Span::test_data(), + }, + Value::Date { + val: DateTime::parse_from_str( + "2021-12-31 00:00:00.123456789 +0000", + "%Y-%m-%d %H:%M:%S.%9f %z", + ) + .expect("date calculation should not fail in test"), + span: Span::test_data(), + }, + ], + )]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] } fn run( @@ -110,11 +141,11 @@ fn command( })?; let res = if not_exact { - casted.as_datetime_not_exact(Some(format.as_str()), TimeUnit::Milliseconds, None) + casted.as_datetime_not_exact(Some(format.as_str()), TimeUnit::Nanoseconds, None) } else { casted.as_datetime( Some(format.as_str()), - TimeUnit::Milliseconds, + TimeUnit::Nanoseconds, false, false, true, diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/between_values.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/between_values.rs index ade5ca59a3..88ba19d993 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/between_values.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/between_values.rs @@ -6,7 +6,7 @@ use nu_protocol::{ use num::Zero; use polars::prelude::{ BooleanType, ChunkCompare, ChunkedArray, DataType, Float64Type, Int64Type, IntoSeries, - NumOpsDispatchChecked, PolarsError, Series, TimeUnit, Utf8NameSpaceImpl, + NumOpsDispatchChecked, PolarsError, Series, Utf8NameSpaceImpl, }; use std::ops::{Add, BitAnd, BitOr, Div, Mul, Sub}; @@ -580,10 +580,7 @@ where F: Fn(&ChunkedArray, i64) -> ChunkedArray, { match series.dtype() { - DataType::UInt32 - | DataType::Int32 - | DataType::UInt64 - | DataType::Datetime(TimeUnit::Milliseconds, _) => { + DataType::UInt32 | DataType::Int32 | DataType::UInt64 | DataType::Datetime(_, _) => { let to_i64 = series.cast(&DataType::Int64); match to_i64 { diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs index 5aa82743a0..06c7e81f4c 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs @@ -749,7 +749,7 @@ pub fn from_parsed_columns(column_values: ColumnMap) -> Result { let it = column.values.iter().map(|v| { if let Value::Date { val, .. } = &v { - Some(val.timestamp_millis()) + Some(val.timestamp_nanos()) } else { None } @@ -757,7 +757,7 @@ pub fn from_parsed_columns(column_values: ColumnMap) -> Result::from_iter_options(&name, it) - .into_datetime(TimeUnit::Milliseconds, None); + .into_datetime(TimeUnit::Nanoseconds, None); df_series.push(res.into_series()) } diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_expression/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_expression/mod.rs index 83f931bcb1..5743fa8414 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_expression/mod.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/values/nu_expression/mod.rs @@ -1,6 +1,5 @@ mod custom_value; -use core::fmt; use nu_protocol::{PipelineData, ShellError, Span, Value}; use polars::prelude::{col, AggExpr, Expr, Literal}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -8,7 +7,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; // Polars Expression wrapper for Nushell operations // Object is behind and Option to allow easy implementation of // the Deserialize trait -#[derive(Default, Clone)] +#[derive(Default, Clone, Debug)] pub struct NuExpression(Option); // Mocked serialization of the LazyFrame object @@ -31,12 +30,6 @@ impl<'de> Deserialize<'de> for NuExpression { } } -impl fmt::Debug for NuExpression { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "NuExpression") - } -} - // Referenced access to the real LazyFrame impl AsRef for NuExpression { fn as_ref(&self) -> &polars::prelude::Expr { @@ -132,6 +125,7 @@ impl NuExpression { } } +#[derive(Debug)] // Enum to represent the parsing of the expressions from Value enum ExtractedExpr { Single(Expr),