From f879c00f9d7fff55b2f5b211927104eefda5c2ef Mon Sep 17 00:00:00 2001 From: Jack Wright <56345+ayax79@users.noreply.github.com> Date: Mon, 29 Jan 2024 11:26:04 -0800 Subject: [PATCH] The ability to specify a schema when using `dfr open` and `dfr into-df` (#11634) # Description There are times where explicitly specifying a schema for a dataframe is needed such as: - Opening CSV and JSON lines files and needing provide more information to polars to keep it from failing or in a desire to override default type conversion - When converting a nushell value to a dataframe and wanting to override the default conversion behaviors. This pull requests provides: - A flag to allow specifying a schema when using dfr into-df - A flag to allow specifying a schema when using dfr open that works for CSV and JSON types - A new command `dfr schema` which displays schema information and will allow display support schema dtypes Schema is specified creating a record that has the key value and the dtype. Examples usages: ``` {a:1, b:{a:2}} | dfr into-df -s {a: u8, b: {a: i32}} | dfr schema {a: 1, b: {a: [1 2 3]}, c: [a b c]} | dfr into-df -s {a: u8, b: {a: list}, c: list} | dfr schema dfr open -s {pid: i32, ppid: i32, name: str, status: str, cpu: f64, mem: i64, virtual: i64} /tmp/ps.jsonl | dfr schema ``` Supported dtypes: null bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 str binary date datetime[time_unit: (ms, us, ns) timezone (optional)] duration[time_unit: (ms, us, ns)] time object unknown list[dtype] structs are also supported but are specified via another record: {a: u8, b: {d: str}} Another feature with the dfr schema command is that it returns the data back in a format that can be passed to provide a valid schema that can be passed in as schema argument: Screenshot 2024-01-29 at 10 23 58 --------- Co-authored-by: Jack Wright --- .../src/dataframe/eager/append.rs | 82 +-- .../src/dataframe/eager/drop.rs | 11 +- .../src/dataframe/eager/drop_duplicates.rs | 23 +- .../src/dataframe/eager/drop_nulls.rs | 52 +- .../src/dataframe/eager/dtypes.rs | 26 +- .../src/dataframe/eager/filter_with.rs | 22 +- .../src/dataframe/eager/first.rs | 34 +- .../src/dataframe/eager/get.rs | 11 +- .../src/dataframe/eager/last.rs | 11 +- .../src/dataframe/eager/melt.rs | 2 +- .../src/dataframe/eager/mod.rs | 3 + .../src/dataframe/eager/open.rs | 41 ++ .../src/dataframe/eager/query_df.rs | 11 +- .../src/dataframe/eager/rename.rs | 67 +-- .../src/dataframe/eager/schema.rs | 119 +++++ .../src/dataframe/eager/shape.rs | 13 +- .../src/dataframe/eager/slice.rs | 11 +- .../src/dataframe/eager/summary.rs | 97 ++-- .../src/dataframe/eager/take.rs | 34 +- .../src/dataframe/eager/to_df.rs | 147 ++++-- .../src/dataframe/eager/with_column.rs | 70 +-- .../src/dataframe/expressions/arg_where.rs | 11 +- .../src/dataframe/expressions/concat_str.rs | 45 +- .../src/dataframe/expressions/datepart.rs | 11 +- .../expressions/expressions_macro.rs | 227 +++++---- .../src/dataframe/expressions/is_in.rs | 50 +- .../src/dataframe/expressions/otherwise.rs | 39 +- .../src/dataframe/expressions/quantile.rs | 23 +- .../src/dataframe/expressions/when.rs | 39 +- .../src/dataframe/lazy/aggregate.rs | 78 +-- .../src/dataframe/lazy/collect.rs | 23 +- .../src/dataframe/lazy/explode.rs | 4 +- .../src/dataframe/lazy/fetch.rs | 23 +- .../src/dataframe/lazy/fill_nan.rs | 48 +- .../src/dataframe/lazy/fill_null.rs | 23 +- .../src/dataframe/lazy/filter.rs | 23 +- .../src/dataframe/lazy/flatten.rs | 4 +- .../src/dataframe/lazy/groupby.rs | 78 +-- .../src/dataframe/lazy/join.rs | 194 +++---- .../src/dataframe/lazy/macro_commands.rs | 34 +- .../src/dataframe/lazy/quantile.rs | 11 +- .../src/dataframe/lazy/select.rs | 11 +- .../src/dataframe/lazy/sort_by_expr.rs | 4 +- .../src/dataframe/lazy/to_lazy.rs | 22 +- .../src/dataframe/series/all_false.rs | 29 +- .../src/dataframe/series/all_true.rs | 24 +- .../src/dataframe/series/arg_max.rs | 8 +- .../src/dataframe/series/arg_min.rs | 8 +- .../src/dataframe/series/cumulative.rs | 23 +- .../src/dataframe/series/date/as_datetime.rs | 90 ++-- .../src/dataframe/series/date/get_day.rs | 11 +- .../src/dataframe/series/date/get_hour.rs | 11 +- .../src/dataframe/series/date/get_minute.rs | 11 +- .../src/dataframe/series/date/get_month.rs | 11 +- .../dataframe/series/date/get_nanosecond.rs | 11 +- .../src/dataframe/series/date/get_ordinal.rs | 11 +- .../src/dataframe/series/date/get_second.rs | 11 +- .../src/dataframe/series/date/get_week.rs | 11 +- .../src/dataframe/series/date/get_weekday.rs | 11 +- .../src/dataframe/series/date/get_year.rs | 11 +- .../src/dataframe/series/indexes/arg_sort.rs | 46 +- .../src/dataframe/series/indexes/arg_true.rs | 11 +- .../dataframe/series/indexes/arg_unique.rs | 11 +- .../dataframe/series/indexes/set_with_idx.rs | 25 +- .../dataframe/series/masks/is_duplicated.rs | 50 +- .../src/dataframe/series/masks/is_in.rs | 27 +- .../src/dataframe/series/masks/is_not_null.rs | 21 +- .../src/dataframe/series/masks/is_null.rs | 21 +- .../src/dataframe/series/masks/is_unique.rs | 50 +- .../src/dataframe/series/masks/not.rs | 19 +- .../src/dataframe/series/masks/set.rs | 23 +- .../src/dataframe/series/n_null.rs | 18 +- .../src/dataframe/series/n_unique.rs | 18 +- .../src/dataframe/series/rolling.rs | 42 +- .../src/dataframe/series/shift.rs | 11 +- .../dataframe/series/string/concatenate.rs | 19 +- .../src/dataframe/series/string/contains.rs | 19 +- .../src/dataframe/series/string/replace.rs | 19 +- .../dataframe/series/string/replace_all.rs | 19 +- .../dataframe/series/string/str_lengths.rs | 11 +- .../src/dataframe/series/string/str_slice.rs | 19 +- .../src/dataframe/series/string/strftime.rs | 17 +- .../dataframe/series/string/to_lowercase.rs | 19 +- .../dataframe/series/string/to_uppercase.rs | 19 +- .../src/dataframe/series/unique.rs | 8 +- .../src/dataframe/series/value_counts.rs | 23 +- .../src/dataframe/values/mod.rs | 2 + .../values/nu_dataframe/conversion.rs | 477 ++++++++++++------ .../src/dataframe/values/nu_dataframe/mod.rs | 20 +- .../src/dataframe/values/nu_schema.rs | 397 +++++++++++++++ 90 files changed, 2408 insertions(+), 1277 deletions(-) create mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/schema.rs create mode 100644 crates/nu-cmd-dataframe/src/dataframe/values/nu_schema.rs diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/append.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/append.rs index 9c7d7b99c2..3607420f45 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/append.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/append.rs @@ -37,24 +37,27 @@ impl Command for AppendDF { example: r#"let a = ([[a b]; [1 2] [3 4]] | dfr into-df); $a | dfr append $a"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - Column::new( - "a_x".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b_x".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + Column::new( + "a_x".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b_x".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -64,26 +67,29 @@ impl Command for AppendDF { example: r#"let a = ([[a b]; [1 2] [3 4]] | dfr into-df); $a | dfr append $a --col"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![ - Value::test_int(1), - Value::test_int(3), - Value::test_int(1), - Value::test_int(3), - ], - ), - Column::new( - "b".to_string(), - vec![ - Value::test_int(2), - Value::test_int(4), - Value::test_int(2), - Value::test_int(4), - ], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![ + Value::test_int(1), + Value::test_int(3), + Value::test_int(1), + Value::test_int(3), + ], + ), + Column::new( + "b".to_string(), + vec![ + Value::test_int(2), + Value::test_int(4), + Value::test_int(2), + Value::test_int(4), + ], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/drop.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/drop.rs index 205985d8ab..3fee7721d3 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/drop.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/drop.rs @@ -35,10 +35,13 @@ impl Command for DropDF { description: "drop column a", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr drop a", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/drop_duplicates.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/drop_duplicates.rs index 3c44065849..12004032ec 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/drop_duplicates.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/drop_duplicates.rs @@ -46,16 +46,19 @@ impl Command for DropDuplicates { description: "drop duplicates", example: "[[a b]; [1 2] [3 4] [1 2]] | dfr into-df | dfr drop-duplicates", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(3), Value::test_int(1)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(4), Value::test_int(2)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(3), Value::test_int(1)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(4), Value::test_int(2)], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/drop_nulls.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/drop_nulls.rs index 6a15439150..b0a229cd71 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/drop_nulls.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/drop_nulls.rs @@ -43,20 +43,23 @@ impl Command for DropNulls { let a = ($df | dfr with-column $res --name res); $a | dfr drop-nulls"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(1)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(2)], - ), - Column::new( - "res".to_string(), - vec![Value::test_int(1), Value::test_int(1)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(1)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(2)], + ), + Column::new( + "res".to_string(), + vec![Value::test_int(1), Value::test_int(1)], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -66,15 +69,18 @@ impl Command for DropNulls { example: r#"let s = ([1 2 0 0 3 4] | dfr into-df); ($s / $s) | dfr drop-nulls"#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "div_0_0".to_string(), - vec![ - Value::test_int(1), - Value::test_int(1), - Value::test_int(1), - Value::test_int(1), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "div_0_0".to_string(), + vec![ + Value::test_int(1), + Value::test_int(1), + Value::test_int(1), + Value::test_int(1), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/dtypes.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/dtypes.rs index 275c4c87c1..1fcf3ba7ca 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/dtypes.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/dtypes.rs @@ -31,16 +31,19 @@ impl Command for DataTypes { description: "Dataframe dtypes", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr dtypes", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "column".to_string(), - vec![Value::test_string("a"), Value::test_string("b")], - ), - Column::new( - "dtype".to_string(), - vec![Value::test_string("i64"), Value::test_string("i64")], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "column".to_string(), + vec![Value::test_string("a"), Value::test_string("b")], + ), + Column::new( + "dtype".to_string(), + vec![Value::test_string("i64"), Value::test_string("i64")], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -79,6 +82,7 @@ fn command( .dtype(); let dtype_str = dtype.to_string(); + dtypes.push(Value::string(dtype_str, call.head)); Value::string(*v, call.head) @@ -88,7 +92,7 @@ fn command( let names_col = Column::new("column".to_string(), names); let dtypes_col = Column::new("dtype".to_string(), dtypes); - NuDataFrame::try_from_columns(vec![names_col, dtypes_col]) + NuDataFrame::try_from_columns(vec![names_col, dtypes_col], None) .map(|df| PipelineData::Value(df.into_value(call.head), None)) } diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/filter_with.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/filter_with.rs index 9cd0874abb..c04e3d3bd4 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/filter_with.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/filter_with.rs @@ -43,10 +43,13 @@ impl Command for FilterWith { example: r#"let mask = ([true false] | dfr into-df); [[a b]; [1 2] [3 4]] | dfr into-df | dfr filter-with $mask"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_int(1)]), - Column::new("b".to_string(), vec![Value::test_int(2)]), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_int(1)]), + Column::new("b".to_string(), vec![Value::test_int(2)]), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -55,10 +58,13 @@ impl Command for FilterWith { description: "Filter dataframe using an expression", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr filter-with ((dfr col a) > 1)", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_int(3)]), - Column::new("b".to_string(), vec![Value::test_int(4)]), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_int(3)]), + Column::new("b".to_string(), vec![Value::test_int(4)]), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/first.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/first.rs index 6e88c111a5..aa92712eea 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/first.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/first.rs @@ -44,10 +44,13 @@ impl Command for FirstDF { description: "Return the first row of a dataframe", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr first", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_int(1)]), - Column::new("b".to_string(), vec![Value::test_int(2)]), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_int(1)]), + Column::new("b".to_string(), vec![Value::test_int(2)]), + ], + None, + ) .expect("should not fail") .into_value(Span::test_data()), ), @@ -56,16 +59,19 @@ impl Command for FirstDF { description: "Return the first two rows of a dataframe", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr first 2", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + ], + None, + ) .expect("should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/get.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/get.rs index 5d6e4aa6f8..236b6c0eee 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/get.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/get.rs @@ -36,10 +36,13 @@ impl Command for GetDF { description: "Returns the selected column", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr get a", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/last.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/last.rs index b9db26b0dd..4258d17003 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/last.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/last.rs @@ -40,10 +40,13 @@ impl Command for LastDF { description: "Create new dataframe with last rows", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr last 1", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_int(3)]), - Column::new("b".to_string(), vec![Value::test_int(4)]), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_int(3)]), + Column::new("b".to_string(), vec![Value::test_int(4)]), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/melt.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/melt.rs index 7e3f67007a..07cc85d265 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/melt.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/melt.rs @@ -106,7 +106,7 @@ impl Command for MeltDF { Value::test_string("c"), ], ), - ]) + ], None) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/mod.rs index 1bc76b3121..7aedf5ebcb 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/mod.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/mod.rs @@ -15,6 +15,7 @@ mod open; mod query_df; mod rename; mod sample; +mod schema; mod shape; mod slice; mod sql_context; @@ -49,6 +50,7 @@ pub use melt::MeltDF; pub use query_df::QueryDf; pub use rename::RenameDF; pub use sample::SampleDF; +pub use schema::SchemaDF; pub use shape::ShapeDF; pub use slice::SliceDF; pub use sql_context::SQLContext; @@ -93,6 +95,7 @@ pub fn add_eager_decls(working_set: &mut StateWorkingSet) { QueryDf, RenameDF, SampleDF, + SchemaDF, ShapeDF, SliceDF, TakeDF, diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/open.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/open.rs index 29638b8289..b83013c011 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/open.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/open.rs @@ -1,3 +1,5 @@ +use crate::dataframe::values::NuSchema; + use super::super::values::{NuDataFrame, NuLazyFrame}; use nu_engine::CallExt; use nu_protocol::{ @@ -70,6 +72,12 @@ impl Command for OpenDataFrame { "Columns to be selected from csv file. CSV and Parquet file", None, ) + .named( + "schema", + SyntaxShape::Record(vec![]), + r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#, + Some('s') + ) .input_output_type(Type::Any, Type::Custom("dataframe".into())) .category(Category::Custom("dataframe".into())) } @@ -305,10 +313,19 @@ fn from_json( help: None, inner: vec![], })?; + let maybe_schema = call + .get_flag(engine_state, stack, "schema")? + .map(|schema| NuSchema::try_from(&schema)) + .transpose()?; let buf_reader = BufReader::new(file); let reader = JsonReader::new(buf_reader); + let reader = match maybe_schema { + Some(schema) => reader.with_schema(schema.into()), + None => reader, + }; + let df: NuDataFrame = reader .finish() .map_err(|e| ShellError::GenericError { @@ -329,6 +346,10 @@ fn from_jsonl( call: &Call, ) -> Result { let infer_schema: Option = call.get_flag(engine_state, stack, "infer-schema")?; + let maybe_schema = call + .get_flag(engine_state, stack, "schema")? + .map(|schema| NuSchema::try_from(&schema)) + .transpose()?; let file: Spanned = call.req(engine_state, stack, 0)?; let file = File::open(&file.item).map_err(|e| ShellError::GenericError { error: "Error opening file".into(), @@ -343,6 +364,11 @@ fn from_jsonl( .with_json_format(JsonFormat::JsonLines) .infer_schema_len(infer_schema); + let reader = match maybe_schema { + Some(schema) => reader.with_schema(schema.into()), + None => reader, + }; + let df: NuDataFrame = reader .finish() .map_err(|e| ShellError::GenericError { @@ -368,6 +394,11 @@ fn from_csv( let skip_rows: Option = call.get_flag(engine_state, stack, "skip-rows")?; let columns: Option> = call.get_flag(engine_state, stack, "columns")?; + let maybe_schema = call + .get_flag(engine_state, stack, "schema")? + .map(|schema| NuSchema::try_from(&schema)) + .transpose()?; + if call.has_flag(engine_state, stack, "lazy")? { let file: String = call.req(engine_state, stack, 0)?; let csv_reader = LazyCsvReader::new(file); @@ -395,6 +426,11 @@ fn from_csv( let csv_reader = csv_reader.has_header(!no_header); + let csv_reader = match maybe_schema { + Some(schema) => csv_reader.with_schema(Some(schema.into())), + None => csv_reader, + }; + let csv_reader = match infer_schema { None => csv_reader, Some(r) => csv_reader.with_infer_schema_length(Some(r)), @@ -452,6 +488,11 @@ fn from_csv( let csv_reader = csv_reader.has_header(!no_header); + let csv_reader = match maybe_schema { + Some(schema) => csv_reader.with_schema(Some(schema.into())), + None => csv_reader, + }; + let csv_reader = match infer_schema { None => csv_reader, Some(r) => csv_reader.infer_schema(Some(r)), diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/query_df.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/query_df.rs index 3b475cba8a..82128909ba 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/query_df.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/query_df.rs @@ -44,10 +44,13 @@ impl Command for QueryDf { description: "Query dataframe using SQL", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr query 'select a from df'", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/rename.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/rename.rs index c0c2c04001..61db1c8c2f 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/rename.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/rename.rs @@ -46,15 +46,18 @@ impl Command for RenameDF { description: "Renames a series", example: "[5 6 7 8] | dfr into-df | dfr rename '0' new_name", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "new_name".to_string(), - vec![ - Value::test_int(5), - Value::test_int(6), - Value::test_int(7), - Value::test_int(8), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "new_name".to_string(), + vec![ + Value::test_int(5), + Value::test_int(6), + Value::test_int(7), + Value::test_int(8), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -63,16 +66,19 @@ impl Command for RenameDF { description: "Renames a dataframe column", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr rename a a_new", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a_new".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a_new".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -81,16 +87,19 @@ impl Command for RenameDF { description: "Renames two dataframe columns", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr rename [a b] [a_new b_new]", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a_new".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b_new".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a_new".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b_new".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/schema.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/schema.rs new file mode 100644 index 0000000000..9dd0c9a858 --- /dev/null +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/schema.rs @@ -0,0 +1,119 @@ +use super::super::values::NuDataFrame; +use nu_engine::CallExt; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, Record, ShellError, Signature, Span, Type, Value, +}; + +#[derive(Clone)] +pub struct SchemaDF; + +impl Command for SchemaDF { + fn name(&self) -> &str { + "dfr schema" + } + + fn usage(&self) -> &str { + "Show schema for a dataframe." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .switch("datatype-list", "creates a lazy dataframe", Some('l')) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Dataframe schema", + example: r#"[[a b]; [1 "foo"] [3 "bar"]] | dfr into-df | dfr schema"#, + result: Some(Value::record( + Record::from_raw_cols_vals( + vec!["a".to_string(), "b".to_string()], + vec![ + Value::string("i64", Span::test_data()), + Value::string("str", Span::test_data()), + ], + ), + Span::test_data(), + )), + }] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + if call.has_flag(engine_state, stack, "datatype-list")? { + Ok(PipelineData::Value(datatype_list(Span::unknown()), None)) + } else { + command(engine_state, stack, call, input) + } + } +} + +fn command( + _engine_state: &EngineState, + _stack: &mut Stack, + call: &Call, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline(input, call.head)?; + let schema = df.schema(); + let value: Value = schema.into(); + Ok(PipelineData::Value(value, None)) +} + +fn datatype_list(span: Span) -> Value { + let types: Vec = [ + ("null", ""), + ("bool", ""), + ("u8", ""), + ("u16", ""), + ("u32", ""), + ("u64", ""), + ("i8", ""), + ("i16", ""), + ("i32", ""), + ("i64", ""), + ("f32", ""), + ("f64", ""), + ("str", ""), + ("binary", ""), + ("date", ""), + ("datetime", "Time Unit can be: milliseconds: ms, microseconds: us, nanoseconds: ns. Timezone wildcard is *. Other Timezone examples: UTC, America/Los_Angeles."), + ("duration", "Time Unit can be: milliseconds: ms, microseconds: us, nanoseconds: ns."), + ("time", ""), + ("object", ""), + ("unknown", ""), + ("list", ""), + ] + .iter() + .map(|(dtype, note)| { + Value::record(Record::from_raw_cols_vals( + vec!["dtype".to_string(), "note".to_string()], + vec![Value::string(*dtype, span), Value::string(*note, span)], + ),span) + }) + .collect(); + Value::list(types, span) +} + +#[cfg(test)] +mod test { + use super::super::super::test_dataframe::test_dataframe; + use super::*; + + #[test] + fn test_examples() { + test_dataframe(vec![Box::new(SchemaDF {})]) + } +} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/shape.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/shape.rs index 71ca997767..a139ae8504 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/shape.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/shape.rs @@ -34,10 +34,13 @@ impl Command for ShapeDF { description: "Shows row and column shape", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr shape", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("rows".to_string(), vec![Value::test_int(2)]), - Column::new("columns".to_string(), vec![Value::test_int(2)]), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new("rows".to_string(), vec![Value::test_int(2)]), + Column::new("columns".to_string(), vec![Value::test_int(2)]), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -70,7 +73,7 @@ fn command( let rows_col = Column::new("rows".to_string(), vec![rows]); let cols_col = Column::new("columns".to_string(), vec![cols]); - NuDataFrame::try_from_columns(vec![rows_col, cols_col]) + NuDataFrame::try_from_columns(vec![rows_col, cols_col], None) .map(|df| PipelineData::Value(df.into_value(call.head), None)) } diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/slice.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/slice.rs index 21be93094b..d1f73ffc22 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/slice.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/slice.rs @@ -37,10 +37,13 @@ impl Command for SliceDF { description: "Create new dataframe from a slice of the rows", example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr slice 0 1", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_int(1)]), - Column::new("b".to_string(), vec![Value::test_int(2)]), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_int(1)]), + Column::new("b".to_string(), vec![Value::test_int(2)]), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/summary.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/summary.rs index 798f1e70b0..dafb29afee 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/summary.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/summary.rs @@ -46,53 +46,56 @@ impl Command for Summary { description: "list dataframe descriptives", example: "[[a b]; [1 1] [1 1]] | dfr into-df | dfr summary", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "descriptor".to_string(), - vec![ - Value::test_string("count"), - Value::test_string("sum"), - Value::test_string("mean"), - Value::test_string("median"), - Value::test_string("std"), - Value::test_string("min"), - Value::test_string("25%"), - Value::test_string("50%"), - Value::test_string("75%"), - Value::test_string("max"), - ], - ), - Column::new( - "a (i64)".to_string(), - vec![ - Value::test_float(2.0), - Value::test_float(2.0), - Value::test_float(1.0), - Value::test_float(1.0), - Value::test_float(0.0), - Value::test_float(1.0), - Value::test_float(1.0), - Value::test_float(1.0), - Value::test_float(1.0), - Value::test_float(1.0), - ], - ), - Column::new( - "b (i64)".to_string(), - vec![ - Value::test_float(2.0), - Value::test_float(2.0), - Value::test_float(1.0), - Value::test_float(1.0), - Value::test_float(0.0), - Value::test_float(1.0), - Value::test_float(1.0), - Value::test_float(1.0), - Value::test_float(1.0), - Value::test_float(1.0), - ], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "descriptor".to_string(), + vec![ + Value::test_string("count"), + Value::test_string("sum"), + Value::test_string("mean"), + Value::test_string("median"), + Value::test_string("std"), + Value::test_string("min"), + Value::test_string("25%"), + Value::test_string("50%"), + Value::test_string("75%"), + Value::test_string("max"), + ], + ), + Column::new( + "a (i64)".to_string(), + vec![ + Value::test_float(2.0), + Value::test_float(2.0), + Value::test_float(1.0), + Value::test_float(1.0), + Value::test_float(0.0), + Value::test_float(1.0), + Value::test_float(1.0), + Value::test_float(1.0), + Value::test_float(1.0), + Value::test_float(1.0), + ], + ), + Column::new( + "b (i64)".to_string(), + vec![ + Value::test_float(2.0), + Value::test_float(2.0), + Value::test_float(1.0), + Value::test_float(1.0), + Value::test_float(0.0), + Value::test_float(1.0), + Value::test_float(1.0), + Value::test_float(1.0), + Value::test_float(1.0), + Value::test_float(1.0), + ], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/take.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/take.rs index 97f40f3c01..e7699898e5 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/take.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/take.rs @@ -44,16 +44,19 @@ impl Command for TakeDF { let indices = ([0 2] | dfr into-df); $df | dfr take $indices"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(4), Value::test_int(4)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(4), Value::test_int(4)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -64,10 +67,13 @@ impl Command for TakeDF { let indices = ([0 2] | dfr into-df); $series | dfr take $indices"#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![Value::test_int(4), Value::test_int(5)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![Value::test_int(4), Value::test_int(5)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/to_df.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/to_df.rs index 45bf5ea42b..8a96751b84 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/to_df.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/to_df.rs @@ -1,10 +1,14 @@ +use crate::dataframe::values::NuSchema; + use super::super::values::{Column, NuDataFrame}; +use nu_engine::CallExt; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, Span, Type, Value, + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value, }; +use polars::prelude::*; #[derive(Clone)] pub struct ToDataFrame; @@ -20,6 +24,12 @@ impl Command for ToDataFrame { fn signature(&self) -> Signature { Signature::build(self.name()) + .named( + "schema", + SyntaxShape::Record(vec![]), + r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#, + Some('s'), + ) .input_output_type(Type::Any, Type::Custom("dataframe".into())) .category(Category::Custom("dataframe".into())) } @@ -30,16 +40,19 @@ impl Command for ToDataFrame { description: "Takes a dictionary and creates a dataframe", example: "[[a b];[1 2] [3 4]] | dfr into-df", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -48,24 +61,27 @@ impl Command for ToDataFrame { description: "Takes a list of tables and creates a dataframe", example: "[[1 2 a] [3 4 b] [5 6 c]] | dfr into-df", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "0".to_string(), - vec![Value::test_int(1), Value::test_int(3), Value::test_int(5)], - ), - Column::new( - "1".to_string(), - vec![Value::test_int(2), Value::test_int(4), Value::test_int(6)], - ), - Column::new( - "2".to_string(), - vec![ - Value::test_string("a"), - Value::test_string("b"), - Value::test_string("c"), - ], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "0".to_string(), + vec![Value::test_int(1), Value::test_int(3), Value::test_int(5)], + ), + Column::new( + "1".to_string(), + vec![Value::test_int(2), Value::test_int(4), Value::test_int(6)], + ), + Column::new( + "2".to_string(), + vec![ + Value::test_string("a"), + Value::test_string("b"), + Value::test_string("c"), + ], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -74,14 +90,17 @@ impl Command for ToDataFrame { description: "Takes a list and creates a dataframe", example: "[a b c] | dfr into-df", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![ - Value::test_string("a"), - Value::test_string("b"), - Value::test_string("c"), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_string("a"), + Value::test_string("b"), + Value::test_string("c"), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -90,14 +109,41 @@ impl Command for ToDataFrame { description: "Takes a list of booleans and creates a dataframe", example: "[true true false] | dfr into-df", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![ - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(false), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(false), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Convert to a dataframe and provide a schema", + example: "{a: 1, b: {a: [1 2 3]}, c: [a b c]}| dfr into-df -s {a: u8, b: {a: list}, c: list}", + result: Some( + NuDataFrame::try_from_series(vec![ + Series::new("a", &[1u8]), + { + let dtype = DataType::Struct(vec![Field::new("a", DataType::List(Box::new(DataType::UInt64)))]); + let vals = vec![AnyValue::StructOwned( + Box::new((vec![AnyValue::List(Series::new("a", &[1u64, 2, 3]))], vec![Field::new("a", DataType::String)]))); 1]; + Series::from_any_values_and_dtype("b", &vals, &dtype, false) + .expect("Struct series should not fail") + }, + { + let dtype = DataType::List(Box::new(DataType::String)); + let vals = vec![AnyValue::List(Series::new("c", &["a", "b", "c"]))]; + Series::from_any_values_and_dtype("c", &vals, &dtype, false) + .expect("List series should not fail") + } + ], Span::test_data()) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -107,12 +153,17 @@ impl Command for ToDataFrame { fn run( &self, - _engine_state: &EngineState, - _stack: &mut Stack, + engine_state: &EngineState, + stack: &mut Stack, call: &Call, input: PipelineData, ) -> Result { - NuDataFrame::try_from_iter(input.into_iter()) + let maybe_schema = call + .get_flag(engine_state, stack, "schema")? + .map(|schema| NuSchema::try_from(&schema)) + .transpose()?; + + NuDataFrame::try_from_iter(input.into_iter(), maybe_schema) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) } } diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/with_column.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/with_column.rs index e1d547a3ba..c3c2661b59 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/with_column.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/with_column.rs @@ -42,20 +42,23 @@ impl Command for WithColumn { | dfr into-df | dfr with-column ([5 6] | dfr into-df) --name c"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - Column::new( - "c".to_string(), - vec![Value::test_int(5), Value::test_int(6)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + Column::new( + "c".to_string(), + vec![Value::test_int(5), Value::test_int(6)], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -70,24 +73,27 @@ impl Command for WithColumn { ] | dfr collect"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - Column::new( - "c".to_string(), - vec![Value::test_int(2), Value::test_int(6)], - ), - Column::new( - "d".to_string(), - vec![Value::test_int(3), Value::test_int(9)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + Column::new( + "c".to_string(), + vec![Value::test_int(2), Value::test_int(6)], + ), + Column::new( + "d".to_string(), + vec![Value::test_int(3), Value::test_int(9)], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/arg_where.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/arg_where.rs index f04be2511e..af826f9cd1 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/arg_where.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/expressions/arg_where.rs @@ -32,10 +32,13 @@ impl Command for ExprArgWhere { example: "let df = ([[a b]; [one 1] [two 2] [three 3]] | dfr into-df); $df | dfr select (dfr arg-where ((dfr col b) >= 2) | dfr as b_arg)", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "b_arg".to_string(), - vec![Value::test_int(1), Value::test_int(2)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "b_arg".to_string(), + vec![Value::test_int(1), Value::test_int(2)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/concat_str.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/concat_str.rs index 789142c463..0bab26b644 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/concat_str.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/expressions/concat_str.rs @@ -41,27 +41,30 @@ impl Command for ExprConcatStr { example: r#"let df = ([[a b c]; [one two 1] [three four 2]] | dfr into-df); $df | dfr with-column ((dfr concat-str "-" [(dfr col a) (dfr col b) ((dfr col c) * 2)]) | dfr as concat)"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("three")], - ), - Column::new( - "b".to_string(), - vec![Value::test_string("two"), Value::test_string("four")], - ), - Column::new( - "c".to_string(), - vec![Value::test_int(1), Value::test_int(2)], - ), - Column::new( - "concat".to_string(), - vec![ - Value::test_string("one-two-2"), - Value::test_string("three-four-4"), - ], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("three")], + ), + Column::new( + "b".to_string(), + vec![Value::test_string("two"), Value::test_string("four")], + ), + Column::new( + "c".to_string(), + vec![Value::test_int(1), Value::test_int(2)], + ), + Column::new( + "concat".to_string(), + vec![ + Value::test_string("one-two-2"), + Value::test_string("three-four-4"), + ], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/datepart.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/datepart.rs index 3947a6a4e9..30542ea0b0 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/datepart.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/expressions/datepart.rs @@ -52,10 +52,13 @@ impl Command for ExprDatePart { description: "Creates an expression to capture the year date part", example: r#"[["2021-12-30T01:02:03.123456789"]] | dfr into-df | dfr as-datetime "%Y-%m-%dT%H:%M:%S.%9f" | dfr with-column [(dfr col datetime | dfr datepart year | dfr as datetime_year )]"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("datetime".to_string(), vec![Value::test_date(dt)]), - Column::new("datetime_year".to_string(), vec![Value::test_int(2021)]), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new("datetime".to_string(), vec![Value::test_date(dt)]), + Column::new("datetime_year".to_string(), vec![Value::test_int(2021)]), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/expressions_macro.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/expressions_macro.rs index c96c8297f5..6a5585cdc8 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/expressions_macro.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/expressions/expressions_macro.rs @@ -407,10 +407,13 @@ lazy_expr_command!( description: "Max value from columns in a dataframe", example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr max", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_int(6)],), - Column::new("b".to_string(), vec![Value::test_int(4)],), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_int(6)],), + Column::new("b".to_string(), vec![Value::test_int(4)],), + ], + None + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -422,16 +425,19 @@ lazy_expr_command!( | dfr group-by a | dfr agg (dfr col b | dfr max)"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("two")], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(4), Value::test_int(1)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(4), Value::test_int(1)], + ), + ], + None + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -452,10 +458,13 @@ lazy_expr_command!( description: "Min value from columns in a dataframe", example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr min", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_int(1)],), - Column::new("b".to_string(), vec![Value::test_int(1)],), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_int(1)],), + Column::new("b".to_string(), vec![Value::test_int(1)],), + ], + None + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -467,16 +476,19 @@ lazy_expr_command!( | dfr group-by a | dfr agg (dfr col b | dfr min)"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("two")], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(1)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(1)], + ), + ], + None + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -497,10 +509,13 @@ lazy_expr_command!( description: "Sums all columns in a dataframe", example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr sum", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_int(11)],), - Column::new("b".to_string(), vec![Value::test_int(7)],), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_int(11)],), + Column::new("b".to_string(), vec![Value::test_int(7)],), + ], + None + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -512,16 +527,19 @@ lazy_expr_command!( | dfr group-by a | dfr agg (dfr col b | dfr sum)"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("two")], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(6), Value::test_int(1)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(6), Value::test_int(1)], + ), + ], + None + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -542,10 +560,13 @@ lazy_expr_command!( description: "Mean value from columns in a dataframe", example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr mean", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_float(4.0)],), - Column::new("b".to_string(), vec![Value::test_float(2.0)],), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_float(4.0)],), + Column::new("b".to_string(), vec![Value::test_float(2.0)],), + ], + None + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -557,16 +578,19 @@ lazy_expr_command!( | dfr group-by a | dfr agg (dfr col b | dfr mean)"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("two")], - ), - Column::new( - "b".to_string(), - vec![Value::test_float(3.0), Value::test_float(1.0)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_float(3.0), Value::test_float(1.0)], + ), + ], + None + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -589,16 +613,19 @@ expr_command!( | dfr group-by a | dfr agg (dfr col b | dfr median)"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("two")], - ), - Column::new( - "b".to_string(), - vec![Value::test_float(3.0), Value::test_float(1.0)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_float(3.0), Value::test_float(1.0)], + ), + ], + None + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -618,10 +645,13 @@ lazy_expr_command!( description: "Std value from columns in a dataframe", example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr std", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_float(2.0)],), - Column::new("b".to_string(), vec![Value::test_float(0.0)],), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_float(2.0)],), + Column::new("b".to_string(), vec![Value::test_float(0.0)],), + ], + None + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -633,16 +663,19 @@ lazy_expr_command!( | dfr group-by a | dfr agg (dfr col b | dfr std)"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("two")], - ), - Column::new( - "b".to_string(), - vec![Value::test_float(0.0), Value::test_float(0.0)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_float(0.0), Value::test_float(0.0)], + ), + ], + None + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -665,10 +698,13 @@ lazy_expr_command!( "Var value from columns in a dataframe or aggregates columns to their var value", example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr var", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_float(4.0)],), - Column::new("b".to_string(), vec![Value::test_float(0.0)],), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_float(4.0)],), + Column::new("b".to_string(), vec![Value::test_float(0.0)],), + ], + None + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -680,16 +716,19 @@ lazy_expr_command!( | dfr group-by a | dfr agg (dfr col b | dfr var)"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("two")], - ), - Column::new( - "b".to_string(), - vec![Value::test_float(0.0), Value::test_float(0.0)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_float(0.0), Value::test_float(0.0)], + ), + ], + None + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/is_in.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/is_in.rs index 587dd21bad..06994fd50b 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/is_in.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/expressions/is_in.rs @@ -39,28 +39,31 @@ impl Command for ExprIsIn { example: r#"let df = ([[a b]; [one 1] [two 2] [three 3]] | dfr into-df); $df | dfr with-column (dfr col a | dfr is-in [one two] | dfr as a_in)"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![ - Value::test_string("one"), - Value::test_string("two"), - Value::test_string("three"), - ], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)], - ), - Column::new( - "a_in".to_string(), - vec![ - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(false), - ], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![ + Value::test_string("one"), + Value::test_string("two"), + Value::test_string("three"), + ], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)], + ), + Column::new( + "a_in".to_string(), + vec![ + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(false), + ], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -81,7 +84,8 @@ impl Command for ExprIsIn { let list: Vec = call.req(engine_state, stack, 0)?; let expr = NuExpression::try_from_pipeline(input, call.head)?; - let values = NuDataFrame::try_from_columns(vec![Column::new("list".to_string(), list)])?; + let values = + NuDataFrame::try_from_columns(vec![Column::new("list".to_string(), list)], None)?; let list = values.as_series(call.head)?; if matches!(list.dtype(), DataType::Object(..)) { diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/otherwise.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/otherwise.rs index e768f316d5..0125d3bded 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/otherwise.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/expressions/otherwise.rs @@ -54,24 +54,27 @@ impl Command for ExprOtherwise { ) | dfr collect"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4), Value::test_int(1)], - ), - Column::new( - "c".to_string(), - vec![Value::test_int(4), Value::test_int(5), Value::test_int(4)], - ), - Column::new( - "d".to_string(), - vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4), Value::test_int(1)], + ), + Column::new( + "c".to_string(), + vec![Value::test_int(4), Value::test_int(5), Value::test_int(4)], + ), + Column::new( + "d".to_string(), + vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/quantile.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/quantile.rs index 6952c27e68..60cab60739 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/quantile.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/expressions/quantile.rs @@ -41,16 +41,19 @@ impl Command for ExprQuantile { | dfr group-by a | dfr agg (dfr col b | dfr quantile 0.5)"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("two")], - ), - Column::new( - "b".to_string(), - vec![Value::test_float(4.0), Value::test_float(1.0)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_float(4.0), Value::test_float(1.0)], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/when.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/when.rs index 39885852c9..1248acb3bd 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/when.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/expressions/when.rs @@ -62,24 +62,27 @@ impl Command for ExprWhen { ) | dfr collect"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4), Value::test_int(1)], - ), - Column::new( - "c".to_string(), - vec![Value::test_int(4), Value::test_int(5), Value::test_int(4)], - ), - Column::new( - "d".to_string(), - vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4), Value::test_int(1)], + ), + Column::new( + "c".to_string(), + vec![Value::test_int(4), Value::test_int(5), Value::test_int(4)], + ), + Column::new( + "d".to_string(), + vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/aggregate.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/aggregate.rs index 4f393b7d5c..562fa5de18 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/aggregate.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/aggregate.rs @@ -47,24 +47,27 @@ impl Command for LazyAggregate { (dfr col b | dfr sum | dfr as "b_sum") ]"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(2)], - ), - Column::new( - "b_min".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - Column::new( - "b_max".to_string(), - vec![Value::test_int(4), Value::test_int(6)], - ), - Column::new( - "b_sum".to_string(), - vec![Value::test_int(6), Value::test_int(10)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(2)], + ), + Column::new( + "b_min".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + Column::new( + "b_max".to_string(), + vec![Value::test_int(4), Value::test_int(6)], + ), + Column::new( + "b_sum".to_string(), + vec![Value::test_int(6), Value::test_int(10)], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -81,24 +84,27 @@ impl Command for LazyAggregate { ] | dfr collect"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(2)], - ), - Column::new( - "b_min".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - Column::new( - "b_max".to_string(), - vec![Value::test_int(4), Value::test_int(6)], - ), - Column::new( - "b_sum".to_string(), - vec![Value::test_int(6), Value::test_int(10)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(2)], + ), + Column::new( + "b_min".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + Column::new( + "b_max".to_string(), + vec![Value::test_int(4), Value::test_int(6)], + ), + Column::new( + "b_sum".to_string(), + vec![Value::test_int(6), Value::test_int(10)], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/collect.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/collect.rs index 30a635ee2e..9f919049b2 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/collect.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/collect.rs @@ -33,16 +33,19 @@ impl Command for LazyCollect { description: "drop duplicates", example: "[[a b]; [1 2] [3 4]] | dfr into-lazy | dfr collect", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/explode.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/explode.rs index a445a5d09c..c8e4ed3686 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/explode.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/explode.rs @@ -69,7 +69,7 @@ impl Command for LazyExplode { Value::test_string("Skiing"), Value::test_string("Football"), ]), - ]).expect("simple df for test should not fail") + ], None).expect("simple df for test should not fail") .into_value(Span::test_data()), ) }, @@ -86,7 +86,7 @@ impl Command for LazyExplode { Value::test_string("Skiing"), Value::test_string("Football"), ]), - ]).expect("simple df for test should not fail") + ], None).expect("simple df for test should not fail") .into_value(Span::test_data()), ), }, diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/fetch.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/fetch.rs index 163a949191..aad5c812e6 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/fetch.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/fetch.rs @@ -38,16 +38,19 @@ impl Command for LazyFetch { description: "Fetch a rows from the dataframe", example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr fetch 2", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(6), Value::test_int(4)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(2)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(6), Value::test_int(4)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(2)], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/fill_nan.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/fill_nan.rs index 10321b0be2..7febcb115f 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/fill_nan.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/fill_nan.rs @@ -38,16 +38,19 @@ impl Command for LazyFillNA { description: "Fills the NaN values with 0", example: "[1 2 NaN 3 NaN] | dfr into-df | dfr fill-nan 0", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![ - Value::test_int(1), - Value::test_int(2), - Value::test_int(0), - Value::test_int(3), - Value::test_int(0), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_int(1), + Value::test_int(2), + Value::test_int(0), + Value::test_int(3), + Value::test_int(0), + ], + )], + None, + ) .expect("Df for test should not fail") .into_value(Span::test_data()), ), @@ -56,16 +59,19 @@ impl Command for LazyFillNA { description: "Fills the NaN values of a whole dataframe", example: "[[a b]; [0.2 1] [0.1 NaN]] | dfr into-df | dfr fill-nan 0", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_float(0.2), Value::test_float(0.1)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(1), Value::test_int(0)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_float(0.2), Value::test_float(0.1)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(1), Value::test_int(0)], + ), + ], + None, + ) .expect("Df for test should not fail") .into_value(Span::test_data()), ), @@ -123,7 +129,7 @@ impl Command for LazyFillNA { }) .collect::>(); Ok(PipelineData::Value( - NuDataFrame::try_from_columns(dataframe)?.into_value(call.head), + NuDataFrame::try_from_columns(dataframe, None)?.into_value(call.head), None, )) } diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/fill_null.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/fill_null.rs index 445d5366e7..0acf433532 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/fill_null.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/fill_null.rs @@ -37,16 +37,19 @@ impl Command for LazyFillNull { description: "Fills the null values by 0", example: "[1 2 2 3 3] | dfr into-df | dfr shift 2 | dfr fill-null 0", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![ - Value::test_int(0), - Value::test_int(0), - Value::test_int(1), - Value::test_int(2), - Value::test_int(2), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_int(0), + Value::test_int(0), + Value::test_int(1), + Value::test_int(2), + Value::test_int(2), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/filter.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/filter.rs index 5250c8beed..1c4f68c19c 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/filter.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/filter.rs @@ -38,16 +38,19 @@ impl Command for LazyFilter { description: "Filter dataframe using an expression", example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr filter ((dfr col a) >= 4)", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(6), Value::test_int(4)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(2)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(6), Value::test_int(4)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(2)], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/flatten.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/flatten.rs index d1cc60acac..1facfc1d29 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/flatten.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/flatten.rs @@ -71,7 +71,7 @@ Example { Value::test_string("Skiing"), Value::test_string("Football"), ]), - ]).expect("simple df for test should not fail") + ], None).expect("simple df for test should not fail") .into_value(Span::test_data()), ) }, @@ -88,7 +88,7 @@ Example { Value::test_string("Skiing"), Value::test_string("Football"), ]), - ]).expect("simple df for test should not fail") + ], None).expect("simple df for test should not fail") .into_value(Span::test_data()), ), }, diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/groupby.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/groupby.rs index 8f8d293f7d..01c27671ae 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/groupby.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/groupby.rs @@ -46,24 +46,27 @@ impl Command for ToLazyGroupBy { (dfr col b | dfr sum | dfr as "b_sum") ]"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(2)], - ), - Column::new( - "b_min".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - Column::new( - "b_max".to_string(), - vec![Value::test_int(4), Value::test_int(6)], - ), - Column::new( - "b_sum".to_string(), - vec![Value::test_int(6), Value::test_int(10)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(2)], + ), + Column::new( + "b_min".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + Column::new( + "b_max".to_string(), + vec![Value::test_int(4), Value::test_int(6)], + ), + Column::new( + "b_sum".to_string(), + vec![Value::test_int(6), Value::test_int(10)], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -80,24 +83,27 @@ impl Command for ToLazyGroupBy { ] | dfr collect"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(2)], - ), - Column::new( - "b_min".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - Column::new( - "b_max".to_string(), - vec![Value::test_int(4), Value::test_int(6)], - ), - Column::new( - "b_sum".to_string(), - vec![Value::test_int(6), Value::test_int(10)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(2)], + ), + Column::new( + "b_min".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + Column::new( + "b_max".to_string(), + vec![Value::test_int(4), Value::test_int(6)], + ), + Column::new( + "b_sum".to_string(), + vec![Value::test_int(6), Value::test_int(10)], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/join.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/join.rs index e6a057bd72..232d7228f1 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/join.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/join.rs @@ -53,53 +53,56 @@ impl Command for LazyJoin { let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | dfr into-lazy); $df_a | dfr join $df_b a foo | dfr collect"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![ - Value::test_int(1), - Value::test_int(2), - Value::test_int(1), - Value::test_int(1), - ], - ), - Column::new( - "b".to_string(), - vec![ - Value::test_string("a"), - Value::test_string("b"), - Value::test_string("c"), - Value::test_string("c"), - ], - ), - Column::new( - "c".to_string(), - vec![ - Value::test_int(0), - Value::test_int(1), - Value::test_int(2), - Value::test_int(3), - ], - ), - Column::new( - "bar".to_string(), - vec![ - Value::test_string("a"), - Value::test_string("c"), - Value::test_string("a"), - Value::test_string("a"), - ], - ), - Column::new( - "ham".to_string(), - vec![ - Value::test_string("let"), - Value::test_string("var"), - Value::test_string("let"), - Value::test_string("let"), - ], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![ + Value::test_int(1), + Value::test_int(2), + Value::test_int(1), + Value::test_int(1), + ], + ), + Column::new( + "b".to_string(), + vec![ + Value::test_string("a"), + Value::test_string("b"), + Value::test_string("c"), + Value::test_string("c"), + ], + ), + Column::new( + "c".to_string(), + vec![ + Value::test_int(0), + Value::test_int(1), + Value::test_int(2), + Value::test_int(3), + ], + ), + Column::new( + "bar".to_string(), + vec![ + Value::test_string("a"), + Value::test_string("c"), + Value::test_string("a"), + Value::test_string("a"), + ], + ), + Column::new( + "ham".to_string(), + vec![ + Value::test_string("let"), + Value::test_string("var"), + Value::test_string("let"), + Value::test_string("let"), + ], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -110,53 +113,56 @@ impl Command for LazyJoin { let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | dfr into-lazy); $df_a | dfr join $df_b a foo"#, result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![ - Value::test_int(1), - Value::test_int(2), - Value::test_int(1), - Value::test_int(1), - ], - ), - Column::new( - "b".to_string(), - vec![ - Value::test_string("a"), - Value::test_string("b"), - Value::test_string("c"), - Value::test_string("c"), - ], - ), - Column::new( - "c".to_string(), - vec![ - Value::test_int(0), - Value::test_int(1), - Value::test_int(2), - Value::test_int(3), - ], - ), - Column::new( - "bar".to_string(), - vec![ - Value::test_string("a"), - Value::test_string("c"), - Value::test_string("a"), - Value::test_string("a"), - ], - ), - Column::new( - "ham".to_string(), - vec![ - Value::test_string("let"), - Value::test_string("var"), - Value::test_string("let"), - Value::test_string("let"), - ], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![ + Value::test_int(1), + Value::test_int(2), + Value::test_int(1), + Value::test_int(1), + ], + ), + Column::new( + "b".to_string(), + vec![ + Value::test_string("a"), + Value::test_string("b"), + Value::test_string("c"), + Value::test_string("c"), + ], + ), + Column::new( + "c".to_string(), + vec![ + Value::test_int(0), + Value::test_int(1), + Value::test_int(2), + Value::test_int(3), + ], + ), + Column::new( + "bar".to_string(), + vec![ + Value::test_string("a"), + Value::test_string("c"), + Value::test_string("a"), + Value::test_string("a"), + ], + ), + Column::new( + "ham".to_string(), + vec![ + Value::test_string("let"), + Value::test_string("var"), + Value::test_string("let"), + Value::test_string("let"), + ], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/macro_commands.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/macro_commands.rs index 14bbead9e8..c3fc73fae1 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/macro_commands.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/macro_commands.rs @@ -188,16 +188,19 @@ lazy_command!( description: "Reverses the dataframe.", example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr reverse", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(2), Value::test_int(4), Value::test_int(6),], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(2), Value::test_int(2),], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(2), Value::test_int(4), Value::test_int(6),], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(2), Value::test_int(2),], + ), + ], + None + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -231,10 +234,13 @@ lazy_command!( description: "Median value from columns in a dataframe", example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr median", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_float(4.0)],), - Column::new("b".to_string(), vec![Value::test_float(2.0)],), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_float(4.0)],), + Column::new("b".to_string(), vec![Value::test_float(2.0)],), + ], + None + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/quantile.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/quantile.rs index 56162330d4..1882477f12 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/quantile.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/quantile.rs @@ -38,10 +38,13 @@ impl Command for LazyQuantile { description: "quantile value from columns in a dataframe", example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr quantile 0.5", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_float(4.0)]), - Column::new("b".to_string(), vec![Value::test_float(2.0)]), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_float(4.0)]), + Column::new("b".to_string(), vec![Value::test_float(2.0)]), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/select.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/select.rs index 25809ee11c..614c029892 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/select.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/select.rs @@ -37,10 +37,13 @@ impl Command for LazySelect { description: "Select a column from the dataframe", example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr select a", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "a".to_string(), - vec![Value::test_int(6), Value::test_int(4), Value::test_int(2)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "a".to_string(), + vec![Value::test_int(6), Value::test_int(4), Value::test_int(2)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/sort_by_expr.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/sort_by_expr.rs index bd14b7a416..a81cbfd5d4 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/sort_by_expr.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/sort_by_expr.rs @@ -60,7 +60,7 @@ impl Command for LazySortBy { "b".to_string(), vec![Value::test_int(4), Value::test_int(1), Value::test_int(2)], ), - ]) + ], None) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -89,7 +89,7 @@ impl Command for LazySortBy { Value::test_int(2), ], ), - ]) + ], None) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/to_lazy.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/to_lazy.rs index 168da2d370..32b7163524 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/to_lazy.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/to_lazy.rs @@ -1,9 +1,12 @@ +use crate::dataframe::values::NuSchema; + use super::super::values::{NuDataFrame, NuLazyFrame}; +use nu_engine::CallExt; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, Type, Value, + Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Type, Value, }; #[derive(Clone)] @@ -20,6 +23,12 @@ impl Command for ToLazyFrame { fn signature(&self) -> Signature { Signature::build(self.name()) + .named( + "schema", + SyntaxShape::Record(vec![]), + r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#, + Some('s'), + ) .input_output_type(Type::Any, Type::Custom("dataframe".into())) .category(Category::Custom("lazyframe".into())) } @@ -34,12 +43,17 @@ impl Command for ToLazyFrame { fn run( &self, - _engine_state: &EngineState, - _stack: &mut Stack, + engine_state: &EngineState, + stack: &mut Stack, call: &Call, input: PipelineData, ) -> Result { - let df = NuDataFrame::try_from_iter(input.into_iter())?; + let maybe_schema = call + .get_flag(engine_state, stack, "schema")? + .map(|schema| NuSchema::try_from(&schema)) + .transpose()?; + + let df = NuDataFrame::try_from_iter(input.into_iter(), maybe_schema)?; let lazy = NuLazyFrame::from_dataframe(df); let value = Value::custom_value(Box::new(lazy), call.head); diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/all_false.rs b/crates/nu-cmd-dataframe/src/dataframe/series/all_false.rs index b9aa05bcee..cbe1709611 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/all_false.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/all_false.rs @@ -33,10 +33,13 @@ impl Command for AllFalse { description: "Returns true if all values are false", example: "[false false false] | dfr into-df | dfr all-false", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "all_false".to_string(), - vec![Value::test_bool(true)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "all_false".to_string(), + vec![Value::test_bool(true)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -47,10 +50,13 @@ impl Command for AllFalse { let res = ($s > 9); $res | dfr all-false"#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "all_false".to_string(), - vec![Value::test_bool(false)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "all_false".to_string(), + vec![Value::test_bool(false)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -88,8 +94,11 @@ fn command( let value = Value::bool(!bool.any(), call.head); - NuDataFrame::try_from_columns(vec![Column::new("all_false".to_string(), vec![value])]) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) + NuDataFrame::try_from_columns( + vec![Column::new("all_false".to_string(), vec![value])], + None, + ) + .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) } #[cfg(test)] diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/all_true.rs b/crates/nu-cmd-dataframe/src/dataframe/series/all_true.rs index 47be833fb7..564519489d 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/all_true.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/all_true.rs @@ -33,10 +33,13 @@ impl Command for AllTrue { description: "Returns true if all values are true", example: "[true true true] | dfr into-df | dfr all-true", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "all_true".to_string(), - vec![Value::test_bool(true)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "all_true".to_string(), + vec![Value::test_bool(true)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -47,10 +50,13 @@ impl Command for AllTrue { let res = ($s > 9); $res | dfr all-true"#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "all_true".to_string(), - vec![Value::test_bool(false)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "all_true".to_string(), + vec![Value::test_bool(false)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -88,7 +94,7 @@ fn command( let value = Value::bool(bool.all(), call.head); - NuDataFrame::try_from_columns(vec![Column::new("all_true".to_string(), vec![value])]) + NuDataFrame::try_from_columns(vec![Column::new("all_true".to_string(), vec![value])], None) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) } diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/arg_max.rs b/crates/nu-cmd-dataframe/src/dataframe/series/arg_max.rs index c517eb2acb..9f35af1206 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/arg_max.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/arg_max.rs @@ -37,10 +37,10 @@ impl Command for ArgMax { description: "Returns index for max value", example: "[1 3 2] | dfr into-df | dfr arg-max", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "arg_max".to_string(), - vec![Value::test_int(1)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new("arg_max".to_string(), vec![Value::test_int(1)])], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/arg_min.rs b/crates/nu-cmd-dataframe/src/dataframe/series/arg_min.rs index fd7df8f5d2..9a56efc0e3 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/arg_min.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/arg_min.rs @@ -37,10 +37,10 @@ impl Command for ArgMin { description: "Returns index for min value", example: "[1 3 2] | dfr into-df | dfr arg-min", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "arg_min".to_string(), - vec![Value::test_int(0)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new("arg_min".to_string(), vec![Value::test_int(0)])], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/cumulative.rs b/crates/nu-cmd-dataframe/src/dataframe/series/cumulative.rs index accf75e615..a4e0730b4d 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/cumulative.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/cumulative.rs @@ -69,16 +69,19 @@ impl Command for Cumulative { description: "Cumulative sum for a series", example: "[1 2 3 4 5] | dfr into-df | dfr cumulative sum", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0_cumulative_sum".to_string(), - vec![ - Value::test_int(1), - Value::test_int(3), - Value::test_int(6), - Value::test_int(10), - Value::test_int(15), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0_cumulative_sum".to_string(), + vec![ + Value::test_int(1), + Value::test_int(3), + Value::test_int(6), + Value::test_int(10), + Value::test_int(15), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/as_datetime.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/as_datetime.rs index 50e91d08f3..c7b590b22d 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/as_datetime.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/date/as_datetime.rs @@ -53,27 +53,30 @@ impl Command for AsDateTime { description: "Converts string to datetime", example: r#"["2021-12-30 00:00:00" "2021-12-31 00:00:00"] | dfr into-df | dfr as-datetime "%Y-%m-%d %H:%M:%S""#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "datetime".to_string(), - vec![ - Value::date( - DateTime::parse_from_str( - "2021-12-30 00:00:00 +0000", - "%Y-%m-%d %H:%M:%S %z", - ) - .expect("date calculation should not fail in test"), - Span::test_data(), - ), - Value::date( - DateTime::parse_from_str( - "2021-12-31 00:00:00 +0000", - "%Y-%m-%d %H:%M:%S %z", - ) - .expect("date calculation should not fail in test"), - Span::test_data(), - ), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "datetime".to_string(), + vec![ + Value::date( + DateTime::parse_from_str( + "2021-12-30 00:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + Value::date( + DateTime::parse_from_str( + "2021-12-31 00:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -82,27 +85,30 @@ impl Command for AsDateTime { description: "Converts string to datetime with high resolutions", example: r#"["2021-12-30 00:00:00.123456789" "2021-12-31 00:00:00.123456789"] | dfr into-df | dfr as-datetime "%Y-%m-%d %H:%M:%S.%9f""#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "datetime".to_string(), - vec![ - Value::date( - DateTime::parse_from_str( - "2021-12-30 00:00:00.123456789 +0000", - "%Y-%m-%d %H:%M:%S.%9f %z", - ) - .expect("date calculation should not fail in test"), - Span::test_data(), - ), - Value::date( - DateTime::parse_from_str( - "2021-12-31 00:00:00.123456789 +0000", - "%Y-%m-%d %H:%M:%S.%9f %z", - ) - .expect("date calculation should not fail in test"), - Span::test_data(), - ), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "datetime".to_string(), + vec![ + Value::date( + DateTime::parse_from_str( + "2021-12-30 00:00:00.123456789 +0000", + "%Y-%m-%d %H:%M:%S.%9f %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + Value::date( + DateTime::parse_from_str( + "2021-12-31 00:00:00.123456789 +0000", + "%Y-%m-%d %H:%M:%S.%9f %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_day.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_day.rs index f7b5baba0c..e441289a31 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_day.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_day.rs @@ -35,10 +35,13 @@ impl Command for GetDay { let df = ([$dt $dt] | dfr into-df); $df | dfr get-day"#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![Value::test_int(4), Value::test_int(4)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![Value::test_int(4), Value::test_int(4)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_hour.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_hour.rs index 890ab11965..88402a459e 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_hour.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_hour.rs @@ -35,10 +35,13 @@ impl Command for GetHour { let df = ([$dt $dt] | dfr into-df); $df | dfr get-hour"#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![Value::test_int(16), Value::test_int(16)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![Value::test_int(16), Value::test_int(16)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_minute.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_minute.rs index b005aad595..acf5777a08 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_minute.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_minute.rs @@ -35,10 +35,13 @@ impl Command for GetMinute { let df = ([$dt $dt] | dfr into-df); $df | dfr get-minute"#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![Value::test_int(39), Value::test_int(39)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![Value::test_int(39), Value::test_int(39)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_month.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_month.rs index 7da2b0ed29..820eee58d2 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_month.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_month.rs @@ -35,10 +35,13 @@ impl Command for GetMonth { let df = ([$dt $dt] | dfr into-df); $df | dfr get-month"#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![Value::test_int(8), Value::test_int(8)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![Value::test_int(8), Value::test_int(8)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_nanosecond.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_nanosecond.rs index 28f97d39ae..4279ac741b 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_nanosecond.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_nanosecond.rs @@ -35,10 +35,13 @@ impl Command for GetNanosecond { let df = ([$dt $dt] | dfr into-df); $df | dfr get-nanosecond"#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![Value::test_int(0), Value::test_int(0)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![Value::test_int(0), Value::test_int(0)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_ordinal.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_ordinal.rs index fb30c8d753..3b38d7ff00 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_ordinal.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_ordinal.rs @@ -35,10 +35,13 @@ impl Command for GetOrdinal { let df = ([$dt $dt] | dfr into-df); $df | dfr get-ordinal"#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![Value::test_int(217), Value::test_int(217)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![Value::test_int(217), Value::test_int(217)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_second.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_second.rs index 16037c0b55..fa01c66d0e 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_second.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_second.rs @@ -35,10 +35,13 @@ impl Command for GetSecond { let df = ([$dt $dt] | dfr into-df); $df | dfr get-second"#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![Value::test_int(18), Value::test_int(18)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![Value::test_int(18), Value::test_int(18)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_week.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_week.rs index 5edb0a94d1..cc5f60fad0 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_week.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_week.rs @@ -35,10 +35,13 @@ impl Command for GetWeek { let df = ([$dt $dt] | dfr into-df); $df | dfr get-week"#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![Value::test_int(32), Value::test_int(32)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![Value::test_int(32), Value::test_int(32)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_weekday.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_weekday.rs index 9470101c90..24aa90ace3 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_weekday.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_weekday.rs @@ -35,10 +35,13 @@ impl Command for GetWeekDay { let df = ([$dt $dt] | dfr into-df); $df | dfr get-weekday"#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![Value::test_int(2), Value::test_int(2)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![Value::test_int(2), Value::test_int(2)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_year.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_year.rs index 4f12659e3a..22c216ae4b 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_year.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_year.rs @@ -35,10 +35,13 @@ impl Command for GetYear { let df = ([$dt $dt] | dfr into-df); $df | dfr get-year"#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![Value::test_int(2020), Value::test_int(2020)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![Value::test_int(2020), Value::test_int(2020)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_sort.rs b/crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_sort.rs index 4707ea6eee..e74c64c557 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_sort.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_sort.rs @@ -46,16 +46,19 @@ impl Command for ArgSort { description: "Returns indexes for a sorted series", example: "[1 2 2 3 3] | dfr into-df | dfr arg-sort", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "arg_sort".to_string(), - vec![ - Value::test_int(0), - Value::test_int(1), - Value::test_int(2), - Value::test_int(3), - Value::test_int(4), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "arg_sort".to_string(), + vec![ + Value::test_int(0), + Value::test_int(1), + Value::test_int(2), + Value::test_int(3), + Value::test_int(4), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -64,16 +67,19 @@ impl Command for ArgSort { description: "Returns indexes for a sorted series", example: "[1 2 2 3 3] | dfr into-df | dfr arg-sort --reverse", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "arg_sort".to_string(), - vec![ - Value::test_int(3), - Value::test_int(4), - Value::test_int(1), - Value::test_int(2), - Value::test_int(0), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "arg_sort".to_string(), + vec![ + Value::test_int(3), + Value::test_int(4), + Value::test_int(1), + Value::test_int(2), + Value::test_int(0), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_true.rs b/crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_true.rs index 9ce0127caa..042e17b112 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_true.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_true.rs @@ -37,10 +37,13 @@ impl Command for ArgTrue { description: "Returns indexes where values are true", example: "[false true false] | dfr into-df | dfr arg-true", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "arg_true".to_string(), - vec![Value::test_int(1)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "arg_true".to_string(), + vec![Value::test_int(1)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_unique.rs b/crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_unique.rs index f22e5c2e5c..65af1e6974 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_unique.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_unique.rs @@ -37,10 +37,13 @@ impl Command for ArgUnique { description: "Returns indexes for unique values", example: "[1 2 2 3 3] | dfr into-df | dfr arg-unique", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "arg_unique".to_string(), - vec![Value::test_int(0), Value::test_int(1), Value::test_int(3)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "arg_unique".to_string(), + vec![Value::test_int(0), Value::test_int(1), Value::test_int(3)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/indexes/set_with_idx.rs b/crates/nu-cmd-dataframe/src/dataframe/series/indexes/set_with_idx.rs index 94d995a73a..d2bc5a623e 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/indexes/set_with_idx.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/indexes/set_with_idx.rs @@ -43,17 +43,20 @@ impl Command for SetWithIndex { let indices = ([0 2] | dfr into-df); $series | dfr set-with-idx 6 --indices $indices"#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![ - Value::test_int(6), - Value::test_int(1), - Value::test_int(6), - Value::test_int(2), - Value::test_int(4), - Value::test_int(3), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_int(6), + Value::test_int(1), + Value::test_int(6), + Value::test_int(2), + Value::test_int(4), + Value::test_int(3), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_duplicated.rs b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_duplicated.rs index 32cf51ca83..4c0c1490e6 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_duplicated.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_duplicated.rs @@ -34,18 +34,21 @@ impl Command for IsDuplicated { description: "Create mask indicating duplicated values", example: "[5 6 6 6 8 8 8] | dfr into-df | dfr is-duplicated", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "is_duplicated".to_string(), - vec![ - Value::test_bool(false), - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(true), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "is_duplicated".to_string(), + vec![ + Value::test_bool(false), + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(true), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -55,16 +58,19 @@ impl Command for IsDuplicated { example: "[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | dfr into-df | dfr is-duplicated", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "is_duplicated".to_string(), - vec![ - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(false), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "is_duplicated".to_string(), + vec![ + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(false), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_in.rs b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_in.rs index 2e71271861..5b7a4e208d 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_in.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_in.rs @@ -36,18 +36,21 @@ impl Command for IsIn { example: r#"let other = ([1 3 6] | dfr into-df); [5 6 6 6 8 8 8] | dfr into-df | dfr is-in $other"#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "is_in".to_string(), - vec![ - Value::test_bool(false), - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(false), - Value::test_bool(false), - Value::test_bool(false), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "is_in".to_string(), + vec![ + Value::test_bool(false), + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(false), + Value::test_bool(false), + Value::test_bool(false), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_not_null.rs b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_not_null.rs index e7fdcec35b..8015e5016c 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_not_null.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_not_null.rs @@ -41,15 +41,18 @@ impl Command for IsNotNull { let res = ($s / $s); $res | dfr is-not-null"#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "is_not_null".to_string(), - vec![ - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(false), - Value::test_bool(true), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "is_not_null".to_string(), + vec![ + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(false), + Value::test_bool(true), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_null.rs b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_null.rs index 77e32dfa5b..7ba7790722 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_null.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_null.rs @@ -41,15 +41,18 @@ impl Command for IsNull { let res = ($s / $s); $res | dfr is-null"#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "is_null".to_string(), - vec![ - Value::test_bool(false), - Value::test_bool(false), - Value::test_bool(true), - Value::test_bool(false), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "is_null".to_string(), + vec![ + Value::test_bool(false), + Value::test_bool(false), + Value::test_bool(true), + Value::test_bool(false), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_unique.rs b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_unique.rs index 9ea33f248f..daf477c9c1 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_unique.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_unique.rs @@ -34,18 +34,21 @@ impl Command for IsUnique { description: "Create mask indicating unique values", example: "[5 6 6 6 8 8 8] | dfr into-df | dfr is-unique", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "is_unique".to_string(), - vec![ - Value::test_bool(true), - Value::test_bool(false), - Value::test_bool(false), - Value::test_bool(false), - Value::test_bool(false), - Value::test_bool(false), - Value::test_bool(false), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "is_unique".to_string(), + vec![ + Value::test_bool(true), + Value::test_bool(false), + Value::test_bool(false), + Value::test_bool(false), + Value::test_bool(false), + Value::test_bool(false), + Value::test_bool(false), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -54,16 +57,19 @@ impl Command for IsUnique { description: "Create mask indicating duplicated rows in a dataframe", example: "[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | dfr into-df | dfr is-unique", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "is_unique".to_string(), - vec![ - Value::test_bool(false), - Value::test_bool(false), - Value::test_bool(false), - Value::test_bool(false), - Value::test_bool(true), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "is_unique".to_string(), + vec![ + Value::test_bool(false), + Value::test_bool(false), + Value::test_bool(false), + Value::test_bool(false), + Value::test_bool(true), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/masks/not.rs b/crates/nu-cmd-dataframe/src/dataframe/series/masks/not.rs index 694c7193ab..448cf15e1f 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/masks/not.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/masks/not.rs @@ -34,14 +34,17 @@ impl Command for NotSeries { description: "Inverts boolean mask", example: "[true false true] | dfr into-df | dfr not", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![ - Value::test_bool(false), - Value::test_bool(true), - Value::test_bool(false), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_bool(false), + Value::test_bool(true), + Value::test_bool(false), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/masks/set.rs b/crates/nu-cmd-dataframe/src/dataframe/series/masks/set.rs index 3a7e63f142..a296eb3c90 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/masks/set.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/masks/set.rs @@ -43,16 +43,19 @@ impl Command for SetSeries { let mask = ($s | dfr is-null); $s | dfr set 0 --mask $mask"#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![ - Value::test_int(0), - Value::test_int(0), - Value::test_int(1), - Value::test_int(2), - Value::test_int(2), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_int(0), + Value::test_int(0), + Value::test_int(1), + Value::test_int(2), + Value::test_int(2), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/n_null.rs b/crates/nu-cmd-dataframe/src/dataframe/series/n_null.rs index 52f61a0a15..79f27c0e41 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/n_null.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/n_null.rs @@ -33,10 +33,13 @@ impl Command for NNull { example: r#"let s = ([1 1 0 0 3 3 4] | dfr into-df); ($s / $s) | dfr count-null"#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "count_null".to_string(), - vec![Value::test_int(2)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "count_null".to_string(), + vec![Value::test_int(2)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -65,8 +68,11 @@ fn command( let res = df.as_series(call.head)?.null_count(); let value = Value::int(res as i64, call.head); - NuDataFrame::try_from_columns(vec![Column::new("count_null".to_string(), vec![value])]) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) + NuDataFrame::try_from_columns( + vec![Column::new("count_null".to_string(), vec![value])], + None, + ) + .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) } #[cfg(test)] diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/n_unique.rs b/crates/nu-cmd-dataframe/src/dataframe/series/n_unique.rs index 857af76924..ad39a12ef2 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/n_unique.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/n_unique.rs @@ -38,10 +38,13 @@ impl Command for NUnique { description: "Counts unique values", example: "[1 1 2 2 3 3 4] | dfr into-df | dfr n-unique", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "count_unique".to_string(), - vec![Value::test_int(4)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "count_unique".to_string(), + vec![Value::test_int(4)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -96,8 +99,11 @@ fn command( let value = Value::int(res as i64, call.head); - NuDataFrame::try_from_columns(vec![Column::new("count_unique".to_string(), vec![value])]) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) + NuDataFrame::try_from_columns( + vec![Column::new("count_unique".to_string(), vec![value])], + None, + ) + .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) } #[cfg(test)] diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/rolling.rs b/crates/nu-cmd-dataframe/src/dataframe/series/rolling.rs index e3159becd8..059c6c56a2 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/rolling.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/rolling.rs @@ -72,15 +72,18 @@ impl Command for Rolling { description: "Rolling sum for a series", example: "[1 2 3 4 5] | dfr into-df | dfr rolling sum 2 | dfr drop-nulls", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0_rolling_sum".to_string(), - vec![ - Value::test_int(3), - Value::test_int(5), - Value::test_int(7), - Value::test_int(9), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0_rolling_sum".to_string(), + vec![ + Value::test_int(3), + Value::test_int(5), + Value::test_int(7), + Value::test_int(9), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), @@ -89,15 +92,18 @@ impl Command for Rolling { description: "Rolling max for a series", example: "[1 2 3 4 5] | dfr into-df | dfr rolling max 2 | dfr drop-nulls", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0_rolling_max".to_string(), - vec![ - Value::test_int(2), - Value::test_int(3), - Value::test_int(4), - Value::test_int(5), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0_rolling_max".to_string(), + vec![ + Value::test_int(2), + Value::test_int(3), + Value::test_int(4), + Value::test_int(5), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/shift.rs b/crates/nu-cmd-dataframe/src/dataframe/series/shift.rs index 7234039caf..75410097f5 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/shift.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/shift.rs @@ -44,10 +44,13 @@ impl Command for Shift { description: "Shifts the values by a given period", example: "[1 2 2 3 3] | dfr into-df | dfr shift 2 | dfr drop-nulls", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![Value::test_int(1), Value::test_int(2), Value::test_int(2)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![Value::test_int(1), Value::test_int(2), Value::test_int(2)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/string/concatenate.rs b/crates/nu-cmd-dataframe/src/dataframe/series/string/concatenate.rs index 03e94ea855..762a766d9d 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/string/concatenate.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/string/concatenate.rs @@ -40,14 +40,17 @@ impl Command for Concatenate { example: r#"let other = ([za xs cd] | dfr into-df); [abc abc abc] | dfr into-df | dfr concatenate $other"#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![ - Value::test_string("abcza"), - Value::test_string("abcxs"), - Value::test_string("abccd"), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_string("abcza"), + Value::test_string("abcxs"), + Value::test_string("abccd"), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/string/contains.rs b/crates/nu-cmd-dataframe/src/dataframe/series/string/contains.rs index ba05e3458f..5ebc2dbfc3 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/string/contains.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/string/contains.rs @@ -39,14 +39,17 @@ impl Command for Contains { description: "Returns boolean indicating if pattern was found", example: "[abc acb acb] | dfr into-df | dfr contains ab", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![ - Value::test_bool(true), - Value::test_bool(false), - Value::test_bool(false), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_bool(true), + Value::test_bool(false), + Value::test_bool(false), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/string/replace.rs b/crates/nu-cmd-dataframe/src/dataframe/series/string/replace.rs index 3e3de9dc16..3f6a89afa2 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/string/replace.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/string/replace.rs @@ -46,14 +46,17 @@ impl Command for Replace { description: "Replaces string", example: "[abc abc abc] | dfr into-df | dfr replace --pattern ab --replace AB", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![ - Value::test_string("ABc"), - Value::test_string("ABc"), - Value::test_string("ABc"), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_string("ABc"), + Value::test_string("ABc"), + Value::test_string("ABc"), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/string/replace_all.rs b/crates/nu-cmd-dataframe/src/dataframe/series/string/replace_all.rs index aa82bc82bb..35f53ca60b 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/string/replace_all.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/string/replace_all.rs @@ -46,14 +46,17 @@ impl Command for ReplaceAll { description: "Replaces string", example: "[abac abac abac] | dfr into-df | dfr replace-all --pattern a --replace A", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![ - Value::test_string("AbAc"), - Value::test_string("AbAc"), - Value::test_string("AbAc"), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_string("AbAc"), + Value::test_string("AbAc"), + Value::test_string("AbAc"), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/string/str_lengths.rs b/crates/nu-cmd-dataframe/src/dataframe/series/string/str_lengths.rs index 85f9437161..653893d02c 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/string/str_lengths.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/string/str_lengths.rs @@ -33,10 +33,13 @@ impl Command for StrLengths { description: "Returns string lengths", example: "[a ab abc] | dfr into-df | dfr str-lengths", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/string/str_slice.rs b/crates/nu-cmd-dataframe/src/dataframe/series/string/str_slice.rs index 79dc5f7938..7e7ed2e875 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/string/str_slice.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/string/str_slice.rs @@ -36,14 +36,17 @@ impl Command for StrSlice { description: "Creates slices from the strings", example: "[abcded abc321 abc123] | dfr into-df | dfr str-slice 1 --length 2", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![ - Value::test_string("bc"), - Value::test_string("bc"), - Value::test_string("bc"), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_string("bc"), + Value::test_string("bc"), + Value::test_string("bc"), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/string/strftime.rs b/crates/nu-cmd-dataframe/src/dataframe/series/string/strftime.rs index b17ea48af2..f16c1fe6c3 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/string/strftime.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/string/strftime.rs @@ -37,13 +37,16 @@ impl Command for StrFTime { let df = ([$dt $dt] | dfr into-df); $df | dfr strftime "%Y/%m/%d""#, result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![ - Value::test_string("2020/08/04"), - Value::test_string("2020/08/04"), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_string("2020/08/04"), + Value::test_string("2020/08/04"), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/string/to_lowercase.rs b/crates/nu-cmd-dataframe/src/dataframe/series/string/to_lowercase.rs index 86f9ec6b40..58f12bc2b0 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/string/to_lowercase.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/string/to_lowercase.rs @@ -33,14 +33,17 @@ impl Command for ToLowerCase { description: "Modifies strings to lowercase", example: "[Abc aBc abC] | dfr into-df | dfr lowercase", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![ - Value::test_string("abc"), - Value::test_string("abc"), - Value::test_string("abc"), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_string("abc"), + Value::test_string("abc"), + Value::test_string("abc"), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/string/to_uppercase.rs b/crates/nu-cmd-dataframe/src/dataframe/series/string/to_uppercase.rs index daf260b617..111c7e40ea 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/string/to_uppercase.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/string/to_uppercase.rs @@ -37,14 +37,17 @@ impl Command for ToUpperCase { description: "Modifies strings to uppercase", example: "[Abc aBc abC] | dfr into-df | dfr uppercase", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![ - Value::test_string("ABC"), - Value::test_string("ABC"), - Value::test_string("ABC"), - ], - )]) + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_string("ABC"), + Value::test_string("ABC"), + Value::test_string("ABC"), + ], + )], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/unique.rs b/crates/nu-cmd-dataframe/src/dataframe/series/unique.rs index df40e8fa4c..fa09052a32 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/unique.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/unique.rs @@ -53,10 +53,10 @@ impl Command for Unique { description: "Returns unique values from a series", example: "[2 2 2 2 2] | dfr into-df | dfr unique", result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![Value::test_int(2)], - )]) + NuDataFrame::try_from_columns( + vec![Column::new("0".to_string(), vec![Value::test_int(2)])], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/value_counts.rs b/crates/nu-cmd-dataframe/src/dataframe/series/value_counts.rs index a56c13ec2a..20930e931c 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/value_counts.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/value_counts.rs @@ -34,16 +34,19 @@ impl Command for ValueCount { description: "Calculates value counts", example: "[5 5 5 5 6 6] | dfr into-df | dfr value-counts", result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "0".to_string(), - vec![Value::test_int(5), Value::test_int(6)], - ), - Column::new( - "count".to_string(), - vec![Value::test_int(4), Value::test_int(2)], - ), - ]) + NuDataFrame::try_from_columns( + vec![ + Column::new( + "0".to_string(), + vec![Value::test_int(5), Value::test_int(6)], + ), + Column::new( + "count".to_string(), + vec![Value::test_int(4), Value::test_int(2)], + ), + ], + None, + ) .expect("simple df for test should not fail") .into_value(Span::test_data()), ), diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/values/mod.rs index 1a5c54a296..f5824b2c46 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/values/mod.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/values/mod.rs @@ -2,6 +2,7 @@ mod nu_dataframe; mod nu_expression; mod nu_lazyframe; mod nu_lazygroupby; +mod nu_schema; mod nu_when; pub mod utils; @@ -9,4 +10,5 @@ pub use nu_dataframe::{Axis, Column, NuDataFrame}; pub use nu_expression::NuExpression; pub use nu_lazyframe::NuLazyFrame; pub use nu_lazygroupby::NuLazyGroupBy; +pub use nu_schema::NuSchema; pub use nu_when::NuWhen; diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs index e232cec301..e7a06c2060 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs @@ -9,14 +9,17 @@ use polars::chunked_array::ChunkedArray; use polars::datatypes::AnyValue; use polars::export::arrow::Either; use polars::prelude::{ - DataFrame, DataType, DatetimeChunked, Float64Type, Int64Type, IntoSeries, - ListBooleanChunkedBuilder, ListBuilderTrait, ListPrimitiveChunkedBuilder, - ListStringChunkedBuilder, ListType, NamedFrom, NewChunkedArray, ObjectType, Series, - TemporalMethods, TimeUnit, + DataFrame, DataType, DatetimeChunked, Float32Type, Float64Type, Int16Type, Int32Type, + Int64Type, Int8Type, IntoSeries, ListBooleanChunkedBuilder, ListBuilderTrait, + ListPrimitiveChunkedBuilder, ListStringChunkedBuilder, ListType, NamedFrom, NewChunkedArray, + ObjectType, Schema, Series, StructChunked, TemporalMethods, TimeUnit, UInt16Type, UInt32Type, + UInt64Type, UInt8Type, }; use nu_protocol::{Record, ShellError, Span, Value}; +use crate::dataframe::values::NuSchema; + use super::{DataFrameValue, NuDataFrame}; const NANOS_PER_DAY: i64 = 86_400_000_000_000; @@ -28,6 +31,39 @@ const NANOS_PER_DAY: i64 = 86_400_000_000_000; // practical reasons (~ a few thousand rows). const VALUES_CAPACITY: usize = 10; +macro_rules! value_to_primitive { + ($value:ident, u8) => { + $value.as_i64().map(|v| v as u8) + }; + ($value:ident, u16) => { + $value.as_i64().map(|v| v as u16) + }; + ($value:ident, u32) => { + $value.as_i64().map(|v| v as u32) + }; + ($value:ident, u64) => { + $value.as_i64().map(|v| v as u64) + }; + ($value:ident, i8) => { + $value.as_i64().map(|v| v as i8) + }; + ($value:ident, i16) => { + $value.as_i64().map(|v| v as i16) + }; + ($value:ident, i32) => { + $value.as_i64().map(|v| v as i32) + }; + ($value:ident, i64) => { + $value.as_i64() + }; + ($value:ident, f32) => { + $value.as_f64().map(|v| v as f32) + }; + ($value:ident, f64) => { + $value.as_f64() + }; +} + #[derive(Debug)] pub struct Column { name: String, @@ -74,23 +110,10 @@ impl DerefMut for Column { } } -#[derive(Debug)] -pub enum InputType { - Integer, - Float, - String, - Boolean, - Object, - Date, - Duration, - Filesize, - List(Box), -} - #[derive(Debug)] pub struct TypedColumn { column: Column, - column_type: Option, + column_type: Option, } impl TypedColumn { @@ -144,9 +167,13 @@ pub fn add_separator(values: &mut Vec, df: &DataFrame, span: Span) { } // Inserting the values found in a Value::List or Value::Record -pub fn insert_record(column_values: &mut ColumnMap, record: Record) -> Result<(), ShellError> { +pub fn insert_record( + column_values: &mut ColumnMap, + record: Record, + maybe_schema: &Option, +) -> Result<(), ShellError> { for (col, value) in record { - insert_value(value, col, column_values)?; + insert_value(value, col, column_values, maybe_schema)?; } Ok(()) @@ -156,16 +183,26 @@ pub fn insert_value( value: Value, key: String, column_values: &mut ColumnMap, + maybe_schema: &Option, ) -> Result<(), ShellError> { let col_val = match column_values.entry(key.clone()) { - Entry::Vacant(entry) => entry.insert(TypedColumn::new_empty(key)), + Entry::Vacant(entry) => entry.insert(TypedColumn::new_empty(key.clone())), Entry::Occupied(entry) => entry.into_mut(), }; // Checking that the type for the value is the same // for the previous value in the column if col_val.values.is_empty() { - col_val.column_type = Some(value_to_input_type(&value)); + if let Some(schema) = maybe_schema { + if let Some(field) = schema.schema.get_field(&key) { + col_val.column_type = Some(field.data_type().clone()); + } + } + + if col_val.column_type.is_none() { + col_val.column_type = Some(value_to_data_type(&value)); + } + col_val.values.push(value); } else { let prev_value = &col_val.values[col_val.values.len() - 1]; @@ -179,11 +216,11 @@ pub fn insert_value( | (Value::Filesize { .. }, Value::Filesize { .. }) | (Value::Duration { .. }, Value::Duration { .. }) => col_val.values.push(value), (Value::List { .. }, _) => { - col_val.column_type = Some(value_to_input_type(&value)); + col_val.column_type = Some(value_to_data_type(&value)); col_val.values.push(value); } _ => { - col_val.column_type = Some(InputType::Object); + col_val.column_type = Some(DataType::Object("Value", None)); col_val.values.push(value); } } @@ -192,15 +229,15 @@ pub fn insert_value( Ok(()) } -fn value_to_input_type(value: &Value) -> InputType { +fn value_to_data_type(value: &Value) -> DataType { match &value { - Value::Int { .. } => InputType::Integer, - Value::Float { .. } => InputType::Float, - Value::String { .. } => InputType::String, - Value::Bool { .. } => InputType::Boolean, - Value::Date { .. } => InputType::Date, - Value::Duration { .. } => InputType::Duration, - Value::Filesize { .. } => InputType::Filesize, + Value::Int { .. } => DataType::Int64, + Value::Float { .. } => DataType::Float64, + Value::String { .. } => DataType::String, + Value::Bool { .. } => DataType::Boolean, + Value::Date { .. } => DataType::Date, + Value::Duration { .. } => DataType::Duration(TimeUnit::Nanoseconds), + Value::Filesize { .. } => DataType::Int64, Value::List { vals, .. } => { // We need to determined the type inside of the list. // Since Value::List does not have any kind of @@ -211,13 +248,213 @@ fn value_to_input_type(value: &Value) -> InputType { let list_type = vals .iter() .filter(|v| !matches!(v, Value::Nothing { .. })) - .map(value_to_input_type) + .map(value_to_data_type) .nth(1) - .unwrap_or(InputType::Object); + .unwrap_or(DataType::Object("Value", None)); - InputType::List(Box::new(list_type)) + DataType::List(Box::new(list_type)) } - _ => InputType::Object, + _ => DataType::Object("Value", None), + } +} + +fn typed_column_to_series(name: &str, column: TypedColumn) -> Result { + if let Some(column_type) = &column.column_type { + match column_type { + DataType::Float32 => { + let series_values: Result, _> = column + .values + .iter() + .map(|v| v.as_f64().map(|v| v as f32)) + .collect(); + Ok(Series::new(name, series_values?)) + } + DataType::Float64 => { + let series_values: Result, _> = + column.values.iter().map(|v| v.as_f64()).collect(); + Ok(Series::new(name, series_values?)) + } + DataType::UInt8 => { + let series_values: Result, _> = column + .values + .iter() + .map(|v| v.as_i64().map(|v| v as u8)) + .collect(); + Ok(Series::new(name, series_values?)) + } + DataType::UInt16 => { + let series_values: Result, _> = column + .values + .iter() + .map(|v| v.as_i64().map(|v| v as u16)) + .collect(); + Ok(Series::new(name, series_values?)) + } + DataType::UInt32 => { + let series_values: Result, _> = column + .values + .iter() + .map(|v| v.as_i64().map(|v| v as u32)) + .collect(); + Ok(Series::new(name, series_values?)) + } + DataType::UInt64 => { + let series_values: Result, _> = column + .values + .iter() + .map(|v| v.as_i64().map(|v| v as u64)) + .collect(); + Ok(Series::new(name, series_values?)) + } + DataType::Int8 => { + let series_values: Result, _> = column + .values + .iter() + .map(|v| v.as_i64().map(|v| v as i8)) + .collect(); + Ok(Series::new(name, series_values?)) + } + DataType::Int16 => { + let series_values: Result, _> = column + .values + .iter() + .map(|v| v.as_i64().map(|v| v as i16)) + .collect(); + Ok(Series::new(name, series_values?)) + } + DataType::Int32 => { + let series_values: Result, _> = column + .values + .iter() + .map(|v| v.as_i64().map(|v| v as i32)) + .collect(); + Ok(Series::new(name, series_values?)) + } + DataType::Int64 => { + let series_values: Result, _> = + column.values.iter().map(|v| v.as_i64()).collect(); + Ok(Series::new(name, series_values?)) + } + DataType::Boolean => { + let series_values: Result, _> = + column.values.iter().map(|v| v.as_bool()).collect(); + Ok(Series::new(name, series_values?)) + } + DataType::String => { + let series_values: Result, _> = + column.values.iter().map(|v| v.as_string()).collect(); + Ok(Series::new(name, series_values?)) + } + DataType::Object(_, _) => value_to_series(name, &column.values), + DataType::Duration(time_unit) => { + //todo - finish type conversion + let series_values: Result, _> = column + .values + .iter() + .map(|v| v.as_i64().map(|v| nanos_from_timeunit(v, *time_unit))) + .collect(); + Ok(Series::new(name, series_values?)) + } + DataType::List(list_type) => { + match input_type_list_to_series(name, list_type.as_ref(), &column.values) { + Ok(series) => Ok(series), + Err(_) => { + // An error case will occur when there are lists of mixed types. + // If this happens, fallback to object list + input_type_list_to_series( + name, + &DataType::Object("unknown", None), + &column.values, + ) + } + } + } + DataType::Date => { + let it = column.values.iter().map(|v| { + if let Value::Date { val, .. } = &v { + Some(val.timestamp_nanos_opt().unwrap_or_default()) + } else { + None + } + }); + + let res: DatetimeChunked = ChunkedArray::::from_iter_options(name, it) + .into_datetime(TimeUnit::Nanoseconds, None); + + Ok(res.into_series()) + } + DataType::Datetime(tu, maybe_tz) => { + let dates = column + .values + .iter() + .map(|v| { + if let Value::Date { val, .. } = &v { + // If there is a timezone specified, make sure + // the value is converted to it + Ok(maybe_tz + .as_ref() + .map(|tz| tz.parse::().map(|tz| val.with_timezone(&tz))) + .transpose() + .map_err(|e| ShellError::GenericError { + error: "Error parsing timezone".into(), + msg: "".into(), + span: None, + help: Some(e.to_string()), + inner: vec![], + })? + .and_then(|dt| dt.timestamp_nanos_opt()) + .map(|nanos| nanos_from_timeunit(nanos, *tu))) + } else { + Ok(None) + } + }) + .collect::>, ShellError>>()?; + + let res: DatetimeChunked = + ChunkedArray::::from_iter_options(name, dates.into_iter()) + .into_datetime(*tu, maybe_tz.clone()); + + Ok(res.into_series()) + } + DataType::Struct(fields) => { + let schema = Some(NuSchema::new(Schema::from_iter(fields.clone()))); + let mut structs: Vec = Vec::new(); + + for v in column.values.iter() { + let mut column_values: ColumnMap = IndexMap::new(); + let record = v.as_record()?; + insert_record(&mut column_values, record.clone(), &schema)?; + let df = from_parsed_columns(column_values)?; + structs.push(df.as_series(Span::unknown())?); + } + + let chunked = StructChunked::new(column.name(), structs.as_ref()).map_err(|e| { + ShellError::GenericError { + error: format!("Error creating struct: {e}"), + msg: "".into(), + span: None, + help: None, + inner: vec![], + } + })?; + Ok(chunked.into_series()) + } + _ => Err(ShellError::GenericError { + error: format!("Error creating dataframe: Unsupported type: {column_type:?}"), + msg: "".into(), + span: None, + help: None, + inner: vec![], + }), + } + } else { + Err(ShellError::GenericError { + error: "Passed a type column with no type".into(), + msg: "".into(), + span: None, + help: None, + inner: vec![], + }) } } @@ -227,80 +464,22 @@ fn value_to_input_type(value: &Value) -> InputType { pub fn from_parsed_columns(column_values: ColumnMap) -> Result { let mut df_series: Vec = Vec::new(); for (name, column) in column_values { - if let Some(column_type) = &column.column_type { - match column_type { - InputType::Float => { - let series_values: Result, _> = - column.values.iter().map(|v| v.as_f64()).collect(); - let series = Series::new(&name, series_values?); - df_series.push(series) - } - InputType::Integer | InputType::Filesize | InputType::Duration => { - let series_values: Result, _> = - column.values.iter().map(|v| v.as_i64()).collect(); - let series = Series::new(&name, series_values?); - df_series.push(series) - } - InputType::String => { - let series_values: Result, _> = - column.values.iter().map(|v| v.as_string()).collect(); - let series = Series::new(&name, series_values?); - df_series.push(series) - } - InputType::Boolean => { - let series_values: Result, _> = - column.values.iter().map(|v| v.as_bool()).collect(); - let series = Series::new(&name, series_values?); - df_series.push(series) - } - InputType::Object => { - df_series.push(input_type_object_to_series(&name, &column.values)?) - } - InputType::List(list_type) => { - match input_type_list_to_series(&name, list_type.as_ref(), &column.values) { - Ok(series) => df_series.push(series), - Err(_) => { - // An error case will occur when there are lists of mixed types. - // If this happens, fallback to object list - df_series.push(input_type_list_to_series( - &name, - &InputType::Object, - &column.values, - )?) - } - } - } - InputType::Date => { - let it = column.values.iter().map(|v| { - if let Value::Date { val, .. } = &v { - Some(val.timestamp_nanos_opt().unwrap_or_default()) - } else { - None - } - }); - - let res: DatetimeChunked = - ChunkedArray::::from_iter_options(&name, it) - .into_datetime(TimeUnit::Nanoseconds, None); - - df_series.push(res.into_series()) - } - } - } + let series = typed_column_to_series(&name, column)?; + df_series.push(series); } DataFrame::new(df_series) .map(|df| NuDataFrame::new(false, df)) .map_err(|e| ShellError::GenericError { error: "Error creating dataframe".into(), - msg: "".into(), + msg: e.to_string(), span: None, - help: Some(e.to_string()), + help: None, inner: vec![], }) } -fn input_type_object_to_series(name: &str, values: &[Value]) -> Result { +fn value_to_series(name: &str, values: &[Value]) -> Result { let mut builder = ObjectChunkedBuilder::::new(name, values.len()); for v in values { @@ -313,21 +492,45 @@ fn input_type_object_to_series(name: &str, values: &[Value]) -> Result Result { let inconsistent_error = |_| ShellError::GenericError { error: format!( - "column {name} contains a list with inconsistent types: Expecting: {list_type:?}" + "column {name} contains a list with inconsistent types: Expecting: {data_type:?}" ), msg: "".into(), span: None, help: None, inner: vec![], }; - match *list_type { + + macro_rules! primitive_list_series { + ($list_type:ty, $vec_type:tt) => {{ + let mut builder = ListPrimitiveChunkedBuilder::<$list_type>::new( + name, + values.len(), + VALUES_CAPACITY, + data_type.clone(), + ); + + for v in values { + let value_list = v + .as_list()? + .iter() + .map(|v| value_to_primitive!(v, $vec_type)) + .collect::, _>>() + .map_err(inconsistent_error)?; + builder.append_iter_values(value_list.iter().copied()); + } + let res = builder.finish(); + Ok(res.into_series()) + }}; + } + + match *data_type { // list of boolean values - InputType::Boolean => { + DataType::Boolean => { let mut builder = ListBooleanChunkedBuilder::new(name, values.len(), VALUES_CAPACITY); for v in values { let value_list = v @@ -341,52 +544,18 @@ fn input_type_list_to_series( let res = builder.finish(); Ok(res.into_series()) } - // list of values that reduce down to i64 - InputType::Integer | InputType::Filesize | InputType::Duration => { - let logical_type = match list_type { - InputType::Duration => DataType::Duration(TimeUnit::Milliseconds), - _ => DataType::Int64, - }; - - let mut builder = ListPrimitiveChunkedBuilder::::new( - name, - values.len(), - VALUES_CAPACITY, - logical_type, - ); - - for v in values { - let value_list = v - .as_list()? - .iter() - .map(|v| v.as_i64()) - .collect::, _>>() - .map_err(inconsistent_error)?; - builder.append_iter_values(value_list.iter().copied()); - } - let res = builder.finish(); - Ok(res.into_series()) - } - InputType::Float => { - let mut builder = ListPrimitiveChunkedBuilder::::new( - name, - values.len(), - VALUES_CAPACITY, - DataType::Float64, - ); - for v in values { - let value_list = v - .as_list()? - .iter() - .map(|v| v.as_f64()) - .collect::, _>>() - .map_err(inconsistent_error)?; - builder.append_iter_values(value_list.iter().copied()); - } - let res = builder.finish(); - Ok(res.into_series()) - } - InputType::String => { + DataType::Duration(_) => primitive_list_series!(Int64Type, i64), + DataType::UInt8 => primitive_list_series!(UInt8Type, u8), + DataType::UInt16 => primitive_list_series!(UInt16Type, u16), + DataType::UInt32 => primitive_list_series!(UInt32Type, u32), + DataType::UInt64 => primitive_list_series!(UInt64Type, u64), + DataType::Int8 => primitive_list_series!(Int8Type, i8), + DataType::Int16 => primitive_list_series!(Int16Type, i16), + DataType::Int32 => primitive_list_series!(Int32Type, i32), + DataType::Int64 => primitive_list_series!(Int64Type, i64), + DataType::Float32 => primitive_list_series!(Float32Type, f32), + DataType::Float64 => primitive_list_series!(Float64Type, f64), + DataType::String => { let mut builder = ListStringChunkedBuilder::new(name, values.len(), VALUES_CAPACITY); for v in values { let value_list = v @@ -400,9 +569,7 @@ fn input_type_list_to_series( let res = builder.finish(); Ok(res.into_series()) } - // Treat lists as objects at this depth as it is expensive to calculate the list type - // We can revisit this later if necessary - InputType::Date => { + DataType::Date => { let mut builder = AnonymousOwnedListBuilder::new( name, values.len(), @@ -434,11 +601,11 @@ fn input_type_list_to_series( let res = builder.finish(); Ok(res.into_series()) } - InputType::List(ref sub_list_type) => { + DataType::List(ref sub_list_type) => { Ok(input_type_list_to_series(name, sub_list_type, values)?) } // treat everything else as an object - _ => Ok(input_type_object_to_series(name, values)?), + _ => Ok(value_to_series(name, values)?), } } @@ -1081,7 +1248,7 @@ mod tests { }; let typed_column = TypedColumn { column, - column_type: Some(InputType::List(Box::new(InputType::String))), + column_type: Some(DataType::List(Box::new(DataType::String))), }; let column_map = indexmap!("foo".to_string() => typed_column); diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/mod.rs index de3bb95e7f..5ddcd840d0 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/mod.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/mod.rs @@ -13,7 +13,7 @@ use polars_utils::total_ord::TotalEq; use serde::{Deserialize, Serialize}; use std::{cmp::Ordering, fmt::Display, hash::Hasher}; -use super::{utils::DEFAULT_ROWS, NuLazyFrame}; +use super::{nu_schema::NuSchema, utils::DEFAULT_ROWS, NuLazyFrame}; // DataFrameValue is an encapsulation of Nushell Value that can be used // to define the PolarsObject Trait. The polars object trait allows to @@ -141,7 +141,7 @@ impl NuDataFrame { } } - pub fn try_from_iter(iter: T) -> Result + pub fn try_from_iter(iter: T, maybe_schema: Option) -> Result where T: Iterator, { @@ -161,14 +161,15 @@ impl NuDataFrame { conversion::insert_record( &mut column_values, Record::from_raw_cols_vals(cols, vals), + &maybe_schema, )? } Value::Record { val: record, .. } => { - conversion::insert_record(&mut column_values, record)? + conversion::insert_record(&mut column_values, record, &maybe_schema)? } _ => { let key = "0".to_string(); - conversion::insert_value(value, key, &mut column_values)? + conversion::insert_value(value, key, &mut column_values, &maybe_schema)? } } } @@ -188,13 +189,16 @@ impl NuDataFrame { Ok(Self::new(false, dataframe)) } - pub fn try_from_columns(columns: Vec) -> Result { + pub fn try_from_columns( + columns: Vec, + maybe_schema: Option, + ) -> Result { let mut column_values: ColumnMap = IndexMap::new(); for column in columns { let name = column.name().to_string(); for value in column { - conversion::insert_value(value, name.clone(), &mut column_values)?; + conversion::insert_value(value, name.clone(), &mut column_values, &maybe_schema)?; } } @@ -503,4 +507,8 @@ impl NuDataFrame { Some(Ordering::Equal) } + + pub fn schema(&self) -> NuSchema { + NuSchema::new(self.df.schema()) + } } diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_schema.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_schema.rs new file mode 100644 index 0000000000..c75eb74c70 --- /dev/null +++ b/crates/nu-cmd-dataframe/src/dataframe/values/nu_schema.rs @@ -0,0 +1,397 @@ +use std::sync::Arc; + +use nu_protocol::{Record, ShellError, Span, Value}; +use polars::prelude::{DataType, Field, Schema, SchemaRef, TimeUnit}; + +#[derive(Debug, Clone)] +pub struct NuSchema { + pub schema: SchemaRef, +} + +impl NuSchema { + pub fn new(schema: Schema) -> Self { + Self { + schema: Arc::new(schema), + } + } +} + +impl TryFrom<&Value> for NuSchema { + type Error = ShellError; + fn try_from(value: &Value) -> Result { + let schema = value_to_schema(value, Span::unknown())?; + Ok(Self::new(schema)) + } +} + +impl From for Value { + fn from(schema: NuSchema) -> Self { + fields_to_value(schema.schema.iter_fields(), Span::unknown()) + } +} + +impl From for SchemaRef { + fn from(val: NuSchema) -> Self { + Arc::clone(&val.schema) + } +} + +fn fields_to_value(fields: impl Iterator, span: Span) -> Value { + let (cols, vals) = fields + .map(|field| { + let val = dtype_to_value(field.data_type(), span); + let col = field.name().to_string(); + (col, val) + }) + .unzip(); + + let record = Record::from_raw_cols_vals(cols, vals); + Value::record(record, Span::unknown()) +} + +fn dtype_to_value(dtype: &DataType, span: Span) -> Value { + match dtype { + DataType::Struct(fields) => fields_to_value(fields.iter().cloned(), span), + _ => Value::string(dtype.to_string().replace('[', "<").replace(']', ">"), span), + } +} + +fn value_to_schema(value: &Value, span: Span) -> Result { + let fields = value_to_fields(value, span)?; + let schema = Schema::from_iter(fields); + Ok(schema) +} + +fn value_to_fields(value: &Value, span: Span) -> Result, ShellError> { + let fields = value + .as_record()? + .into_iter() + .map(|(col, val)| match val { + Value::Record { .. } => { + let fields = value_to_fields(val, span)?; + let dtype = DataType::Struct(fields); + Ok(Field::new(col, dtype)) + } + _ => { + let dtype = dtype_str_to_schema(&val.as_string()?, span)?; + Ok(Field::new(col, dtype)) + } + }) + .collect::, ShellError>>()?; + Ok(fields) +} + +fn dtype_str_to_schema(dtype: &str, span: Span) -> Result { + match dtype { + "bool" => Ok(DataType::Boolean), + "u8" => Ok(DataType::UInt8), + "u16" => Ok(DataType::UInt16), + "u32" => Ok(DataType::UInt32), + "u64" => Ok(DataType::UInt64), + "i8" => Ok(DataType::Int8), + "i16" => Ok(DataType::Int16), + "i32" => Ok(DataType::Int32), + "i64" => Ok(DataType::Int64), + "f32" => Ok(DataType::Float32), + "f64" => Ok(DataType::Float64), + "str" => Ok(DataType::String), + "binary" => Ok(DataType::Binary), + "date" => Ok(DataType::Date), + "time" => Ok(DataType::Time), + "null" => Ok(DataType::Null), + "unknown" => Ok(DataType::Unknown), + "object" => Ok(DataType::Object("unknown", None)), + _ if dtype.starts_with("list") => { + let dtype = dtype + .trim_start_matches("list") + .trim_start_matches('<') + .trim_end_matches('>') + .trim(); + let dtype = dtype_str_to_schema(dtype, span)?; + Ok(DataType::List(Box::new(dtype))) + } + _ if dtype.starts_with("datetime") => { + let dtype = dtype + .trim_start_matches("datetime") + .trim_start_matches('<') + .trim_end_matches('>'); + let mut split = dtype.split(','); + let next = split + .next() + .ok_or_else(|| ShellError::GenericError { + error: "Invalid polars data type".into(), + msg: "Missing time unit".into(), + span: Some(span), + help: None, + inner: vec![], + })? + .trim(); + let time_unit = str_to_time_unit(next, span)?; + let next = split + .next() + .ok_or_else(|| ShellError::GenericError { + error: "Invalid polars data type".into(), + msg: "Missing time zone".into(), + span: Some(span), + help: None, + inner: vec![], + })? + .trim(); + let timezone = if "*" == next { + None + } else { + Some(next.to_string()) + }; + Ok(DataType::Datetime(time_unit, timezone)) + } + _ if dtype.starts_with("duration") => { + let inner = dtype.trim_start_matches("duration<").trim_end_matches('>'); + let next = inner + .split(',') + .next() + .ok_or_else(|| ShellError::GenericError { + error: "Invalid polars data type".into(), + msg: "Missing time unit".into(), + span: Some(span), + help: None, + inner: vec![], + })? + .trim(); + let time_unit = str_to_time_unit(next, span)?; + Ok(DataType::Duration(time_unit)) + } + _ => Err(ShellError::GenericError { + error: "Invalid polars data type".into(), + msg: format!("Unknown type: {dtype}"), + span: Some(span), + help: None, + inner: vec![], + }), + } +} + +fn str_to_time_unit(ts_string: &str, span: Span) -> Result { + match ts_string { + "ms" => Ok(TimeUnit::Milliseconds), + "us" | "μs" => Ok(TimeUnit::Microseconds), + "ns" => Ok(TimeUnit::Nanoseconds), + _ => Err(ShellError::GenericError { + error: "Invalid polars data type".into(), + msg: "Invalid time unit".into(), + span: Some(span), + help: None, + inner: vec![], + }), + } +} + +#[cfg(test)] +mod test { + + use super::*; + + #[test] + fn test_value_to_schema() { + let value = Value::Record { + val: Record::from_raw_cols_vals( + vec!["name".into(), "age".into(), "address".into()], + vec![ + Value::String { + val: "str".into(), + internal_span: Span::test_data(), + }, + Value::String { + val: "i32".into(), + internal_span: Span::test_data(), + }, + Value::Record { + val: Record::from_raw_cols_vals( + vec!["street".into(), "city".into()], + vec![ + Value::String { + val: "str".into(), + internal_span: Span::test_data(), + }, + Value::String { + val: "str".into(), + internal_span: Span::test_data(), + }, + ], + ), + internal_span: Span::test_data(), + }, + ], + ), + internal_span: Span::test_data(), + }; + let schema = value_to_schema(&value, Span::unknown()).unwrap(); + let expected = Schema::from_iter(vec![ + Field::new("name", DataType::String), + Field::new("age", DataType::Int32), + Field::new( + "address", + DataType::Struct(vec![ + Field::new("street", DataType::String), + Field::new("city", DataType::String), + ]), + ), + ]); + assert_eq!(schema, expected); + } + + #[test] + fn test_dtype_str_to_schema_simple_types() { + let dtype = "bool"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::Boolean; + assert_eq!(schema, expected); + + let dtype = "u8"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::UInt8; + assert_eq!(schema, expected); + + let dtype = "u16"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::UInt16; + assert_eq!(schema, expected); + + let dtype = "u32"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::UInt32; + assert_eq!(schema, expected); + + let dtype = "u64"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::UInt64; + assert_eq!(schema, expected); + + let dtype = "i8"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::Int8; + assert_eq!(schema, expected); + + let dtype = "i16"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::Int16; + assert_eq!(schema, expected); + + let dtype = "i32"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::Int32; + assert_eq!(schema, expected); + + let dtype = "i64"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::Int64; + assert_eq!(schema, expected); + + let dtype = "str"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::String; + assert_eq!(schema, expected); + + let dtype = "binary"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::Binary; + assert_eq!(schema, expected); + + let dtype = "date"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::Date; + assert_eq!(schema, expected); + + let dtype = "time"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::Time; + assert_eq!(schema, expected); + + let dtype = "null"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::Null; + assert_eq!(schema, expected); + + let dtype = "unknown"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::Unknown; + assert_eq!(schema, expected); + + let dtype = "object"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::Object("unknown", None); + assert_eq!(schema, expected); + } + + #[test] + fn test_dtype_str_schema_datetime() { + let dtype = "datetime"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::Datetime(TimeUnit::Milliseconds, None); + assert_eq!(schema, expected); + + let dtype = "datetime"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::Datetime(TimeUnit::Microseconds, None); + assert_eq!(schema, expected); + + let dtype = "datetime<μs, *>"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::Datetime(TimeUnit::Microseconds, None); + assert_eq!(schema, expected); + + let dtype = "datetime"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::Datetime(TimeUnit::Nanoseconds, None); + assert_eq!(schema, expected); + + let dtype = "datetime"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::Datetime(TimeUnit::Milliseconds, Some("UTC".into())); + assert_eq!(schema, expected); + + let dtype = "invalid"; + let schema = dtype_str_to_schema(dtype, Span::unknown()); + assert!(schema.is_err()) + } + + #[test] + fn test_dtype_str_schema_duration() { + let dtype = "duration"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::Duration(TimeUnit::Milliseconds); + assert_eq!(schema, expected); + + let dtype = "duration"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::Duration(TimeUnit::Microseconds); + assert_eq!(schema, expected); + + let dtype = "duration<μs>"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::Duration(TimeUnit::Microseconds); + assert_eq!(schema, expected); + + let dtype = "duration"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::Duration(TimeUnit::Nanoseconds); + assert_eq!(schema, expected); + } + + #[test] + fn test_dtype_str_to_schema_list_types() { + let dtype = "list"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::List(Box::new(DataType::Int32)); + assert_eq!(schema, expected); + + let dtype = "list>"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::List(Box::new(DataType::Duration(TimeUnit::Milliseconds))); + assert_eq!(schema, expected); + + let dtype = "list>"; + let schema = dtype_str_to_schema(dtype, Span::unknown()).unwrap(); + let expected = DataType::List(Box::new(DataType::Datetime(TimeUnit::Milliseconds, None))); + assert_eq!(schema, expected); + } +}