From 53beba7acc9eb7b27c303f1732d1d7d8f4e12a50 Mon Sep 17 00:00:00 2001 From: Benjamin Lee Date: Wed, 29 Mar 2023 19:55:38 -0700 Subject: [PATCH] Support passing an empty list to sort, uniq, sort-by, and uniq-by (issue #5957) (#8669) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description Currently, all four of these commands return a (rather-confusing) spanless error when passed an empty list: ``` > [] | sort Error: × no values to work with help: no values to work with ``` This PR changes these commands to always output `[]` if the input is `[]`. ``` > [] | sort ╭────────────╮ │ empty list │ ╰────────────╯ > [] | uniq-by foo ╭────────────╮ │ empty list │ ╰────────────╯ ``` I'm not sure what the original logic was here, but in the case of `sort` and `uniq`, I think the current behavior is straightforwardly wrong. `sort-by` and `uniq-by` are a bit more complicated, since they currently try to perform some validation that the specified column name is present in the input (see #8667 for problems with this validation, where a possible outcome is removing the validation entirely). When passed `[]`, it's not possible to do any validation because there are no records. This opens up the possibility for situations like the following: ``` > [[foo]; [5] [6]] | where foo < 3 | sort-by bar ╭────────────╮ │ empty list │ ╰────────────╯ ``` I think there's a strong argument that `[]` is the best output for these commands as well, since it makes pipelines like `$table | filter $condition | sort-by $column` more predictable. Currently, this pipeline will throw an error if `filter` evaluates to `[]`, but work fine otherwise. This makes it difficult to write reliable code, especially since users are not likely to encounter the `filter -> []` case in testing (issue #5957). The only workaround is to insert manual checks for an empty result. IMO, this is significantly worse than the "you can typo a column name without getting an error" problem shown above. Other commands that take column arguments (`get`, `select`, `rename`, etc) already have `[] -> []`, so there's existing precedent for this behavior. The core question here is "what columns does `[]` have"? The current behavior of `sort-by` is "no columns", while the current behavior of `select` is "all possible columns". Both answers lead to accepting some likely-buggy code without throwing on error, but in order to do better here we would need something like `Value::Table` that tracks columns on empty tables. If other people disagree with this logic, I'm happy to split out the `sort-by` and `uniq-by` changes into another PR. # User-Facing Changes `sort`, `uniq`, `sort-by`, and `uniq-by` now return `[]` instead of throwing an error when input is `[]`. # After Submitting > If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. The existing behavior was not documented, and the new behavior is what you would expect by default, so I don't think we need to update documentation. --------- Co-authored-by: Reilly Wood --- crates/nu-command/src/filters/sort.rs | 16 +++------------- crates/nu-command/src/filters/uniq_by.rs | 14 ++------------ crates/nu-command/src/sort_utils.rs | 16 +++------------- crates/nu-command/tests/commands/sort.rs | 7 +++++++ crates/nu-command/tests/commands/sort_by.rs | 7 +++++++ crates/nu-command/tests/commands/uniq.rs | 7 +++++++ crates/nu-command/tests/commands/uniq_by.rs | 7 +++++++ 7 files changed, 36 insertions(+), 38 deletions(-) diff --git a/crates/nu-command/src/filters/sort.rs b/crates/nu-command/src/filters/sort.rs index 5c440ff5c2..a6a3944e97 100644 --- a/crates/nu-command/src/filters/sort.rs +++ b/crates/nu-command/src/filters/sort.rs @@ -271,22 +271,12 @@ pub fn sort( insensitive: bool, natural: bool, ) -> Result<(), ShellError> { - if vec.is_empty() { - return Err(ShellError::GenericError( - "no values to work with".to_string(), - "".to_string(), - None, - Some("no values to work with".to_string()), - Vec::new(), - )); - } - - match &vec[0] { - Value::Record { + match vec.first() { + Some(Value::Record { cols, vals: _input_vals, .. - } => { + }) => { let columns = cols.clone(); vec.sort_by(|a, b| process(a, b, &columns, span, insensitive, natural)); } diff --git a/crates/nu-command/src/filters/uniq_by.rs b/crates/nu-command/src/filters/uniq_by.rs index 0c64b951a9..c1e65e6a6a 100644 --- a/crates/nu-command/src/filters/uniq_by.rs +++ b/crates/nu-command/src/filters/uniq_by.rs @@ -107,21 +107,11 @@ impl Command for UniqBy { } fn validate(vec: Vec, columns: &Vec, span: Span) -> Result<(), ShellError> { - if vec.is_empty() { - return Err(ShellError::GenericError( - "no values to work with".to_string(), - "".to_string(), - None, - Some("no values to work with".to_string()), - Vec::new(), - )); - } - - if let Value::Record { + if let Some(Value::Record { cols, vals: _input_vals, span: val_span, - } = &vec[0] + }) = vec.first() { if columns.is_empty() { // This uses the same format as the 'requires a column name' error in split_by.rs diff --git a/crates/nu-command/src/sort_utils.rs b/crates/nu-command/src/sort_utils.rs index fca2b49135..92d9e395f2 100644 --- a/crates/nu-command/src/sort_utils.rs +++ b/crates/nu-command/src/sort_utils.rs @@ -61,22 +61,12 @@ pub fn sort( insensitive: bool, natural: bool, ) -> Result<(), ShellError> { - if vec.is_empty() { - return Err(ShellError::GenericError( - "no values to work with".to_string(), - "".to_string(), - None, - Some("no values to work with".to_string()), - Vec::new(), - )); - } - - match &vec[0] { - Value::Record { + match vec.first() { + Some(Value::Record { cols, vals: _input_vals, span: val_span, - } => { + }) => { if sort_columns.is_empty() { // This uses the same format as the 'requires a column name' error in split_by.rs return Err(ShellError::GenericError( diff --git a/crates/nu-command/tests/commands/sort.rs b/crates/nu-command/tests/commands/sort.rs index c72b3fa497..c99e140586 100644 --- a/crates/nu-command/tests/commands/sort.rs +++ b/crates/nu-command/tests/commands/sort.rs @@ -118,3 +118,10 @@ fn sort_record_values_insensitive_reverse() { assert_eq!(actual.out, r#"{"2": zed, "3": ABE, "1": abe}"#); } + +#[test] +fn sort_empty() { + let actual = nu!("[] | sort | to nuon"); + + assert_eq!(actual.out, "[]"); +} diff --git a/crates/nu-command/tests/commands/sort_by.rs b/crates/nu-command/tests/commands/sort_by.rs index 914f29e610..c39bb9e66f 100644 --- a/crates/nu-command/tests/commands/sort_by.rs +++ b/crates/nu-command/tests/commands/sort_by.rs @@ -43,6 +43,13 @@ fn by_invalid_column() { assert!(actual.err.contains("value originates here")); } +#[test] +fn sort_by_empty() { + let actual = nu!("[] | sort-by foo | to nuon"); + + assert_eq!(actual.out, "[]"); +} + #[test] fn ls_sort_by_name_sensitive() { let actual = nu!( diff --git a/crates/nu-command/tests/commands/uniq.rs b/crates/nu-command/tests/commands/uniq.rs index 9246287ef9..90ff7d5c31 100644 --- a/crates/nu-command/tests/commands/uniq.rs +++ b/crates/nu-command/tests/commands/uniq.rs @@ -61,6 +61,13 @@ fn uniq_values() { }) } +#[test] +fn uniq_empty() { + let actual = nu!("[] | uniq | to nuon"); + + assert_eq!(actual.out, "[]"); +} + #[test] fn nested_json_structures() { Playground::setup("uniq_test_3", |dirs, sandbox| { diff --git a/crates/nu-command/tests/commands/uniq_by.rs b/crates/nu-command/tests/commands/uniq_by.rs index 20100bbfa1..08cca2d556 100644 --- a/crates/nu-command/tests/commands/uniq_by.rs +++ b/crates/nu-command/tests/commands/uniq_by.rs @@ -126,6 +126,13 @@ fn table() { assert_eq!(actual.out, expected.out); } +#[test] +fn uniq_by_empty() { + let actual = nu!("[] | uniq-by foo | to nuon"); + + assert_eq!(actual.out, "[]"); +} + #[test] fn uniq_by_multiple_columns() { let actual = nu!(