nushell/crates/nu-std/testing.nu
Bahex b6e84879b6
add multiple grouper support to group-by (#14337)
- closes #14330 

Related:
- #2607 
- #14019
- #14316 

# Description
This PR changes `group-by` to support grouping by multiple `grouper`
arguments.

# Changes

- No grouper: no change in behavior 
- Single grouper
  - `--to-table=false`: no change in behavior
  - `--to-table=true`:
    - closure grouper: named group0
    - cell-path grouper: named after the cell-path
- Multiple groupers:
  - `--to-table=false`: nested groups
- `--to-table=true`: one column for each grouper argument, followed by
the `items` column
    - columns corresponding to cell-paths are named after them
- columns corresponding to closure groupers are named `group{i}` where
`i` is the index of the grouper argument

# Examples
```nushell
> [1 3 1 3 2 1 1] | group-by
╭───┬───────────╮
│   │ ╭───┬───╮ │
│ 1 │ │ 0 │ 1 │ │
│   │ │ 1 │ 1 │ │
│   │ │ 2 │ 1 │ │
│   │ │ 3 │ 1 │ │
│   │ ╰───┴───╯ │
│   │ ╭───┬───╮ │
│ 3 │ │ 0 │ 3 │ │
│   │ │ 1 │ 3 │ │
│   │ ╰───┴───╯ │
│   │ ╭───┬───╮ │
│ 2 │ │ 0 │ 2 │ │
│   │ ╰───┴───╯ │
╰───┴───────────╯

> [1 3 1 3 2 1 1] | group-by --to-table
╭─#─┬─group─┬───items───╮
│ 0 │ 1     │ ╭───┬───╮ │
│   │       │ │ 0 │ 1 │ │
│   │       │ │ 1 │ 1 │ │
│   │       │ │ 2 │ 1 │ │
│   │       │ │ 3 │ 1 │ │
│   │       │ ╰───┴───╯ │
│ 1 │ 3     │ ╭───┬───╮ │
│   │       │ │ 0 │ 3 │ │
│   │       │ │ 1 │ 3 │ │
│   │       │ ╰───┴───╯ │
│ 2 │ 2     │ ╭───┬───╮ │
│   │       │ │ 0 │ 2 │ │
│   │       │ ╰───┴───╯ │
╰─#─┴─group─┴───items───╯

> [1 3 1 3 2 1 1] | group-by { $in >= 2 }
╭───────┬───────────╮
│       │ ╭───┬───╮ │
│ false │ │ 0 │ 1 │ │
│       │ │ 1 │ 1 │ │
│       │ │ 2 │ 1 │ │
│       │ │ 3 │ 1 │ │
│       │ ╰───┴───╯ │
│       │ ╭───┬───╮ │
│ true  │ │ 0 │ 3 │ │
│       │ │ 1 │ 3 │ │
│       │ │ 2 │ 2 │ │
│       │ ╰───┴───╯ │
╰───────┴───────────╯

> [1 3 1 3 2 1 1] | group-by { $in >= 2 } --to-table
╭─#─┬─group0─┬───items───╮
│ 0 │ false  │ ╭───┬───╮ │
│   │        │ │ 0 │ 1 │ │
│   │        │ │ 1 │ 1 │ │
│   │        │ │ 2 │ 1 │ │
│   │        │ │ 3 │ 1 │ │
│   │        │ ╰───┴───╯ │
│ 1 │ true   │ ╭───┬───╮ │
│   │        │ │ 0 │ 3 │ │
│   │        │ │ 1 │ 3 │ │
│   │        │ │ 2 │ 2 │ │
│   │        │ ╰───┴───╯ │
╰─#─┴─group0─┴───items───╯
```

```nushell
let data = [
    [name, lang, year];
    [andres, rb, "2019"],
    [jt, rs, "2019"],
    [storm, rs, "2021"]
]

> $data
╭─#─┬──name──┬─lang─┬─year─╮
│ 0 │ andres │ rb   │ 2019 │
│ 1 │ jt     │ rs   │ 2019 │
│ 2 │ storm  │ rs   │ 2021 │
╰─#─┴──name──┴─lang─┴─year─╯
```

```nushell
> $data | group-by lang
╭────┬──────────────────────────────╮
│    │ ╭─#─┬──name──┬─lang─┬─year─╮ │
│ rb │ │ 0 │ andres │ rb   │ 2019 │ │
│    │ ╰─#─┴──name──┴─lang─┴─year─╯ │
│    │ ╭─#─┬─name──┬─lang─┬─year─╮  │
│ rs │ │ 0 │ jt    │ rs   │ 2019 │  │
│    │ │ 1 │ storm │ rs   │ 2021 │  │
│    │ ╰─#─┴─name──┴─lang─┴─year─╯  │
╰────┴──────────────────────────────╯
```

Group column is now named after the grouper, to allow multiple groupers.
```nushell
> $data | group-by lang --to-table  # column names changed!
╭─#─┬─lang─┬────────────items─────────────╮
│ 0 │ rb   │ ╭─#─┬──name──┬─lang─┬─year─╮ │
│   │      │ │ 0 │ andres │ rb   │ 2019 │ │
│   │      │ ╰─#─┴──name──┴─lang─┴─year─╯ │
│ 1 │ rs   │ ╭─#─┬─name──┬─lang─┬─year─╮  │
│   │      │ │ 0 │ jt    │ rs   │ 2019 │  │
│   │      │ │ 1 │ storm │ rs   │ 2021 │  │
│   │      │ ╰─#─┴─name──┴─lang─┴─year─╯  │
╰─#─┴─lang─┴────────────items─────────────╯
```

Grouping by multiple columns makes finer grained aggregations possible.
```nushell
> $data | group-by lang year --to-table
╭─#─┬─lang─┬─year─┬────────────items─────────────╮
│ 0 │ rb   │ 2019 │ ╭─#─┬──name──┬─lang─┬─year─╮ │
│   │      │      │ │ 0 │ andres │ rb   │ 2019 │ │
│   │      │      │ ╰─#─┴──name──┴─lang─┴─year─╯ │
│ 1 │ rs   │ 2019 │ ╭─#─┬─name─┬─lang─┬─year─╮   │
│   │      │      │ │ 0 │ jt   │ rs   │ 2019 │   │
│   │      │      │ ╰─#─┴─name─┴─lang─┴─year─╯   │
│ 2 │ rs   │ 2021 │ ╭─#─┬─name──┬─lang─┬─year─╮  │
│   │      │      │ │ 0 │ storm │ rs   │ 2021 │  │
│   │      │      │ ╰─#─┴─name──┴─lang─┴─year─╯  │
╰─#─┴─lang─┴─year─┴────────────items─────────────╯
```

Grouping by multiple columns, without `--to-table` returns a nested
structure.
This is equivalent to `$data | group-by year | split-by lang`, making
`split-by` obsolete.
```nushell
> $data | group-by lang year
╭────┬─────────────────────────────────────────╮
│    │ ╭──────┬──────────────────────────────╮ │
│ rb │ │      │ ╭─#─┬──name──┬─lang─┬─year─╮ │ │
│    │ │ 2019 │ │ 0 │ andres │ rb   │ 2019 │ │ │
│    │ │      │ ╰─#─┴──name──┴─lang─┴─year─╯ │ │
│    │ ╰──────┴──────────────────────────────╯ │
│    │ ╭──────┬─────────────────────────────╮  │
│ rs │ │      │ ╭─#─┬─name─┬─lang─┬─year─╮  │  │
│    │ │ 2019 │ │ 0 │ jt   │ rs   │ 2019 │  │  │
│    │ │      │ ╰─#─┴─name─┴─lang─┴─year─╯  │  │
│    │ │      │ ╭─#─┬─name──┬─lang─┬─year─╮ │  │
│    │ │ 2021 │ │ 0 │ storm │ rs   │ 2021 │ │  │
│    │ │      │ ╰─#─┴─name──┴─lang─┴─year─╯ │  │
│    │ ╰──────┴─────────────────────────────╯  │
╰────┴─────────────────────────────────────────╯
```

From #2607:
> Here's a couple more examples without much explanation. This one shows
adding two grouping keys. I'm always wanting to add more columns when
using group-by and it just-work™️ `gb.exe -f movies-2.csv -k 3,2 -s 7
--skip_header`
> 
> ```
>  k:3                   | k:2       | count | sum:7
> -----------------------+-----------+-------+--------------------
>  20th Century Fox      | Drama     | 1     | 117.09
>  20th Century Fox      | Romance   | 1     | 39.66
>  CBS                   | Comedy    | 1     | 77.09
>  Disney                | Animation | 4     | 1264.23
>  Disney                | Comedy    | 4     | 950.27
>  Fox                   | Comedy    | 5     | 661.85
>  Independent           | Comedy    | 7     | 399.07
>  Independent           | Drama     | 4     | 69.75
>  Independent           | Romance   | 7     | 1048.75
>  Independent           | romance   | 1     | 29.37
> ...
> ```

This example can be achieved like this:
```nushell
> open movies-2.csv
  | group-by "Lead Studio" Genre --to-table
  | insert count {get items | length}
  | insert sum { get items."Worldwide Gross" | math sum}
  | reject items
  | sort-by "Lead Studio" Genre
╭─#──┬──────Lead Studio──────┬───Genre───┬─count─┬───sum───╮
│ 0  │ 20th Century Fox      │ Drama     │     1 │  117.09 │
│ 1  │ 20th Century Fox      │ Romance   │     1 │   39.66 │
│ 2  │ CBS                   │ Comedy    │     1 │   77.09 │
│ 3  │ Disney                │ Animation │     4 │ 1264.23 │
│ 4  │ Disney                │ Comedy    │     4 │  950.27 │
│ 5  │ Fox                   │ Comedy    │     5 │  661.85 │
│ 6  │ Fox                   │ comedy    │     1 │   60.72 │
│ 7  │ Independent           │ Comedy    │     7 │  399.07 │
│ 8  │ Independent           │ Drama     │     4 │   69.75 │
│ 9  │ Independent           │ Romance   │     7 │ 1048.75 │
│ 10 │ Independent           │ romance   │     1 │   29.37 │
...
```
2024-11-15 06:40:49 -06:00

385 lines
13 KiB
Text

use std/log
export-env {
# Place NU_FORMAT... environment variables in module-scope
export use std/log *
}
def "nu-complete threads" [] {
seq 1 (sys cpu | length)
}
# Here we store the map of annotations internal names and the annotation actually used during test creation
# The reason we do that is to allow annotations to be easily renamed without modifying rest of the code
# Functions with no annotations or with annotations not on the list are rejected during module evaluation
# test and test-skip annotations may be used multiple times throughout the module as the function names are stored in a list
# Other annotations should only be used once within a module file
# If you find yourself in need of multiple before- or after- functions it's a sign your test suite probably needs redesign
def valid-annotations [] {
{
"#[test]": "test",
"#[ignore]": "test-skip",
"#[before-each]": "before-each"
"#[before-all]": "before-all"
"#[after-each]": "after-each"
"#[after-all]": "after-all"
}
}
# Returns a table containing the list of function names together with their annotations (comments above the declaration)
def get-annotated [
file: path
] path -> table<function_name: string, annotation: string> {
let raw_file = (
open $file
| lines
| enumerate
| flatten
)
$raw_file
| where item starts-with def and index > 0
| insert annotation {|x|
$raw_file
| get ($x.index - 1)
| get item
| str trim
}
| where annotation in (valid-annotations|columns)
| reject index
| update item {
split column --collapse-empty ' '
| get column2.0
}
| rename function_name
}
# Takes table of function names and their annotations such as the one returned by get-annotated
#
# Returns a record where keys are internal names of valid annotations and values are corresponding function names
# Annotations that allow multiple functions are of type list<string>
# Other annotations are of type string
# Result gets merged with the template record so that the output shape remains consistent regardless of the table content
def create-test-record [] nothing -> record<before-each: string, after-each: string, before-all: string, after-all: string, test: list<string>, test-skip: list<string>> {
let input = $in
let template_record = {
before-each: '',
before-all: '',
after-each: '',
after-all: '',
test-skip: []
}
let test_record = (
$input
| update annotation {|x|
valid-annotations
| get $x.annotation
}
| group-by --to-table annotation
| update items {|x|
$x.items.function_name
| if $x.annotation in ["test", "test-skip"] {
$in
} else {
get 0
}
}
| transpose --ignore-titles -r -d
)
$template_record
| merge $test_record
}
def throw-error [error: record] {
error make {
msg: $"(ansi red)($error.msg)(ansi reset)"
label: {
text: ($error.label)
span: $error.span
}
}
}
# show a test record in a pretty way
#
# `$in` must be a `record<file: string, module: string, name: string, pass: bool>`.
#
# the output would be like
# - "<indentation> x <module> <test>" all in red if failed
# - "<indentation> s <module> <test>" all in yellow if skipped
# - "<indentation> <module> <test>" all in green if passed
def show-pretty-test [indent: int = 4] {
let test = $in
[
(1..$indent | each {" "} | str join)
(match $test.result {
"pass" => { ansi green },
"skip" => { ansi yellow },
_ => { ansi red }
})
(match $test.result {
"pass" => " ",
"skip" => "s",
_ => { char failed }
})
" "
$"($test.name) ($test.test)"
(ansi reset)
] | str join
}
# Takes a test record and returns the execution result
# Test is executed via following steps:
# * Public function with random name is generated that runs specified test in try/catch block
# * Module file is opened
# * Random public function is appended to the end of the file
# * Modified file is saved under random name
# * Nu subprocess is spawned
# * Inside subprocess the modified file is imported and random function called
# * Output of the random function is serialized into nuon and returned to parent process
# * Modified file is removed
def run-test [
test: record
] {
let test_file_name = (random chars --length 10)
let test_function_name = (random chars --length 10)
let rendered_module_path = ({parent: ($test.file|path dirname), stem: $test_file_name, extension: nu}| path join)
let test_function = $"
export def ($test_function_name) [] {
($test.before-each)
try {
$context | ($test.test)
($test.after-each)
} catch { |err|
($test.after-each)
$err | get raw
}
}
"
open $test.file
| lines
| append ($test_function)
| str join (char nl)
| save $rendered_module_path
let result = (
^$nu.current-exe --no-config-file -c $"use ($rendered_module_path) *; ($test_function_name)|to nuon"
| complete
)
rm $rendered_module_path
return $result
}
# Takes a module record and returns a table with following columns:
#
# * file - path to file under test
# * name - name of the module under test
# * test - name of specific test
# * result - test execution result
def run-tests-for-module [
module: record<file: path name: string before-each: string after-each: string before-all: string after-all: string test: list test-skip: list>
threads: int
] -> table<file: path, name: string, test: string, result: string> {
let global_context = if not ($module.before-all|is-empty) {
log info $"Running before-all for module ($module.name)"
run-test {
file: $module.file,
before-each: 'let context = {}',
after-each: '',
test: $module.before-all
}
| if $in.exit_code == 0 {
$in.stdout
} else {
throw-error {
msg: "Before-all failed"
label: "Failure in test setup"
span: (metadata $in | get span)
}
}
} else {
{}
}
# since tests are skipped based on their annotation and never actually executed we can generate their list in advance
let skipped_tests = (
if not ($module.test-skip|is-empty) {
$module
| update test $module.test-skip
| reject test-skip
| flatten
| insert result 'skip'
} else {
[]
}
)
let tests = (
$module
| reject test-skip
| flatten test
| update before-each {|x|
if not ($module.before-each|is-empty) {
$"let context = \(($global_context)|merge \(($module.before-each)\)\)"
} else {
$"let context = ($global_context)"
}
}
| update after-each {|x|
if not ($module.after-each|is-empty) {
$"$context | ($module.after-each)"
} else {
''
}
}
| par-each --threads $threads {|test|
log info $"Running ($test.test) in module ($module.name)"
log debug $"Global context is ($global_context)"
$test|insert result {|x|
run-test $test
| if $in.exit_code == 0 {
'pass'
} else {
'fail'
}
}
}
| append $skipped_tests
| select file name test result
)
if not ($module.after-all|is-empty) {
log info $"Running after-all for module ($module.name)"
run-test {
file: $module.file,
before-each: $"let context = ($global_context)",
after-each: '',
test: $module.after-all
}
}
return $tests
}
# Run tests for nushell code
#
# By default all detected tests are executed
# Test list can be filtered out by specifying either path to search for, name of the module to run tests for or specific test name
# In order for a function to be recognized as a test by the test runner it needs to be annotated with # test
# Following annotations are supported by the test runner:
# * test - test case to be executed during test run
# * test-skip - test case to be skipped during test run
# * before-all - function to run at the beginning of test run. Returns a global context record that is piped into every test function
# * before-each - function to run before every test case. Returns a per-test context record that is merged with global context and piped into test functions
# * after-each - function to run after every test case. Receives the context record just like the test cases
# * after-all - function to run after all test cases have been executed. Receives the global context record
export def run-tests [
--path: path, # Path to look for tests. Default: current directory.
--module: string, # Test module to run. Default: all test modules found.
--test: string, # Pattern to use to include tests. Default: all tests found in the files.
--exclude: string, # Pattern to use to exclude tests. Default: no tests are excluded
--exclude-module: string, # Pattern to use to exclude test modules. Default: No modules are excluded
--list, # list the selected tests without running them.
--threads: int@"nu-complete threads", # Amount of threads to use for parallel execution. Default: All threads are utilized
] {
let available_threads = (sys cpu | length)
# Can't use pattern matching here due to https://github.com/nushell/nushell/issues/9198
let threads = (if $threads == null {
$available_threads
} else if $threads < 1 {
1
} else if $threads <= $available_threads {
$threads
} else {
$available_threads
})
let module_search_pattern = ('**' | path join ({
stem: ($module | default "*")
extension: nu
} | path join))
let path = if $path == null {
$env.PWD
} else {
if not ($path | path exists) {
throw-error {
msg: "directory_not_found"
label: "no such directory"
span: (metadata $path | get span)
}
}
$path
}
if not ($module | is-empty) {
try { ls ($path | path join $module_search_pattern) | null } catch {
throw-error {
msg: "module_not_found"
label: $"no such module in ($path)"
span: (metadata $module | get span)
}
}
}
let modules = (
ls ($path | path join $module_search_pattern | into glob)
| par-each --threads $threads {|row|
{
file: $row.name
name: ($row.name | path parse | get stem)
commands: (get-annotated $row.name)
}
}
| filter {|x| ($x.commands|length) > 0}
| upsert commands {|module|
$module.commands
| create-test-record
}
| flatten
| filter {|x| ($x.test|length) > 0}
| filter {|x| if ($exclude_module|is-empty) {true} else {$x.name !~ $exclude_module}}
| filter {|x| if ($test|is-empty) {true} else {$x.test|any {|y| $y =~ $test}}}
| filter {|x| if ($module|is-empty) {true} else {$module == $x.name}}
| update test {|x|
$x.test
| filter {|y| if ($test|is-empty) {true} else {$y =~ $test}}
| filter {|y| if ($exclude|is-empty) {true} else {$y !~ $exclude}}
}
)
if $list {
return $modules
}
if ($modules | is-empty) {
error make --unspanned {msg: "no test to run"}
}
let results = (
$modules
| par-each --threads $threads {|module|
run-tests-for-module $module $threads
}
| flatten
)
if ($results | any {|x| $x.result == fail}) {
let text = ([
$"(ansi purple)some tests did not pass (char lparen)see complete errors below(char rparen):(ansi reset)"
""
($results | par-each --threads $threads {|test| ($test | show-pretty-test 4)} | str join "\n")
""
] | str join "\n")
error make --unspanned { msg: $text }
}
}