replace Criterion with Divan (#12000)

# Description  This is a test of changing out the current criterion microbenchmark tool to [Divan](https://nikolaivazquez.com/blog/divan/), a new and more straightforward microbenchmark suit. Itself states it is robust to noise, and even allow it to be used in CI settings. It by default has no external dependencies and is very fast to run, the sampling method allows it to be a lot faster compared to criterion requiring less samples. The output is also nicely displayed and easy to get a quick overview of the performance. ![image](https://github.com/nushell/nushell/assets/17986183/587a1fb1-1da3-402c-b668-a27fde9a0657) # User-Facing Changes  # Tests + Formatting  # After Submitting
2024-12-26 04:53:09 +00:00 · 2024-03-01 19:09:21 +01:00 · 2024-03-01 19:09:21 +01:00 · 9ff92c6878
commit 9ff92c6878
parent 38a42905ae
4 changed files with 187 additions and 273 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -98,12 +98,6 @@ dependencies = [
 "libc",
 ]

-[[package]]
-name = "anes"
-version = "0.1.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
-
 [[package]]
 name = "ansi-str"
 version = "0.8.0"
@ -599,12 +593,6 @@ version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53"

-[[package]]
-name = "cast"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
-
 [[package]]
 name = "castaway"
 version = "0.2.2"
@ -723,33 +711,6 @@ dependencies = [
 "stacker",
 ]

-[[package]]
-name = "ciborium"
-version = "0.2.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
-dependencies = [
- "ciborium-io",
- "ciborium-ll",
- "serde",
-]
-
-[[package]]
-name = "ciborium-io"
-version = "0.2.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
-
-[[package]]
-name = "ciborium-ll"
-version = "0.2.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
-dependencies = [
- "ciborium-io",
- "half",
-]
-
 [[package]]
 name = "clang-sys"
 version = "1.7.0"
@ -840,6 +801,12 @@ dependencies = [
 "static_assertions",
 ]

+[[package]]
+name = "condtype"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf0a07a401f374238ab8e2f11a104d2851bf9ce711ec69804834de8af45c7af"
+
 [[package]]
 name = "console"
 version = "0.15.8"
@ -942,42 +909,6 @@ dependencies = [
 "cfg-if",
 ]

-[[package]]
-name = "criterion"
-version = "0.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
-dependencies = [
- "anes",
- "cast",
- "ciborium",
- "clap",
- "criterion-plot",
- "is-terminal",
- "itertools 0.10.5",
- "num-traits",
- "once_cell",
- "oorandom",
- "plotters",
- "rayon",
- "regex",
- "serde",
- "serde_derive",
- "serde_json",
- "tinytemplate",
- "walkdir",
-]
-
-[[package]]
-name = "criterion-plot"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
-dependencies = [
- "cast",
- "itertools 0.10.5",
-]
-
 [[package]]
 name = "crossbeam-channel"
 version = "0.5.11"
@ -1216,6 +1147,31 @@ dependencies = [
 "winapi",
 ]

+[[package]]
+name = "divan"
+version = "0.1.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0d567df2c9c2870a43f3f2bd65aaeb18dbce1c18f217c3e564b4fbaeb3ee56c"
+dependencies = [
+ "cfg-if",
+ "clap",
+ "condtype",
+ "divan-macros",
+ "libc",
+ "regex-lite",
+]
+
+[[package]]
+name = "divan-macros"
+version = "0.1.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "27540baf49be0d484d8f0130d7d8da3011c32a44d4fc873368154f1510e574a2"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.48",
+]
+
 [[package]]
 name = "dlib"
 version = "0.5.2"
@ -1751,16 +1707,6 @@ dependencies = [
 "tracing",
 ]

-[[package]]
-name = "half"
-version = "2.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872"
-dependencies = [
- "cfg-if",
- "crunchy",
-]
-
 [[package]]
 name = "halfbrown"
 version = "0.2.4"
@ -2064,17 +2010,6 @@ dependencies = [
 "once_cell",
 ]

-[[package]]
-name = "is-terminal"
-version = "0.4.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0bad00257d07be169d870ab665980b06cdb366d792ad690bf2e76876dc503455"
-dependencies = [
- "hermit-abi",
- "rustix",
- "windows-sys 0.52.0",
-]
-
 [[package]]
 name = "is-wsl"
 version = "0.4.0"
@ -2106,15 +2041,6 @@ dependencies = [
 "winapi",
 ]

-[[package]]
-name = "itertools"
-version = "0.10.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
-dependencies = [
- "either",
-]
-
 [[package]]
 name = "itertools"
 version = "0.11.0"
@ -2791,9 +2717,9 @@ name = "nu"
 version = "0.90.2"
 dependencies = [
 "assert_cmd",
- "criterion",
 "crossterm",
 "ctrlc",
+ "divan",
 "log",
 "miette",
 "mimalloc",
@ -3551,12 +3477,6 @@ version = "1.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"

-[[package]]
-name = "oorandom"
-version = "11.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
-
 [[package]]
 name = "open"
 version = "5.0.1"
@ -3914,34 +3834,6 @@ dependencies = [
 "array-init-cursor",
 ]

-[[package]]
-name = "plotters"
-version = "0.3.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45"
-dependencies = [
- "num-traits",
- "plotters-backend",
- "plotters-svg",
- "wasm-bindgen",
- "web-sys",
-]
-
-[[package]]
-name = "plotters-backend"
-version = "0.3.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609"
-
-[[package]]
-name = "plotters-svg"
-version = "0.3.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab"
-dependencies = [
- "plotters-backend",
-]
-
 [[package]]
 name = "polars"
 version = "0.37.0"
@ -4753,6 +4645,12 @@ dependencies = [
 "regex-syntax",
 ]

+[[package]]
+name = "regex-lite"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e"
+
 [[package]]
 name = "regex-syntax"
 version = "0.8.2"
@ -5808,16 +5706,6 @@ dependencies = [
 "crunchy",
 ]

-[[package]]
-name = "tinytemplate"
-version = "1.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
-dependencies = [
- "serde",
- "serde_json",
-]
-
 [[package]]
 name = "tinyvec"
 version = "1.6.0"
@ -6506,16 +6394,6 @@ dependencies = [
 "pkg-config",
 ]

-[[package]]
-name = "web-sys"
-version = "0.3.67"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "58cd2333b6e0be7a39605f0e255892fd7418a682d8da8fe042fe25128794d2ed"
-dependencies = [
- "js-sys",
- "wasm-bindgen",
-]
-
 [[package]]
 name = "which"
 version = "6.0.0"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -102,7 +102,7 @@ nix = { version = "0.27", default-features = false, features = [
 [dev-dependencies]
 nu-test-support = { path = "./crates/nu-test-support", version = "0.90.2" }
 assert_cmd = "2.0"
-criterion = "0.5"
+divan = "0.1.14"
 pretty_assertions = "1.4"
 rstest = { version = "0.18", default-features = false }
 serial_test = "3.0"
@ -182,7 +182,6 @@ bench = false
 reedline = { git = "https://github.com/nushell/reedline", branch = "main" }
 # nu-ansi-term = {git = "https://github.com/nushell/nu-ansi-term.git", branch = "main"}

-# Criterion benchmarking setup
 # Run all benchmarks with `cargo bench`
 # Run individual benchmarks like `cargo bench -- <regex>` e.g. `cargo bench -- parse`
 [[bench]]
--- a/benches/README.md
+++ b/benches/README.md
@ -1,6 +1,6 @@
-# Criterion benchmarks
+# Divan benchmarks

-These are benchmarks using [Criterion](https://github.com/bheisler/criterion.rs), a microbenchmarking tool for Rust.
+These are benchmarks using [Divan](https://github.com/nvzqz/divan), a microbenchmarking tool for Rust.

 Run all benchmarks with `cargo bench`

--- a/benches/benchmarks.rs
+++ b/benches/benchmarks.rs
@ -1,4 +1,3 @@
-use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
 use nu_cli::eval_source;
 use nu_parser::parse;
 use nu_plugin::{Encoder, EncodingType, PluginCallResponse, PluginOutput};
@ -8,6 +7,11 @@ use nu_protocol::{
 use nu_utils::{get_default_config, get_default_env};
 use std::path::{Path, PathBuf};

+fn main() {
+    // Run registered benchmarks.
+    divan::main();
+}
+
 fn load_bench_commands() -> EngineState {
    nu_command::add_shell_command_context(nu_cmd_lang::create_default_context())
 }
@ -31,41 +35,7 @@ fn get_home_path(engine_state: &EngineState) -> PathBuf {
        .unwrap_or_default()
 }

-// FIXME: All benchmarks live in this 1 file to speed up build times when benchmarking.
-// When the *_benchmarks functions were in different files, `cargo bench` would build
-// an executable for every single one - incredibly slowly. Would be nice to figure out
-// a way to split things up again.
-
-fn parser_benchmarks(c: &mut Criterion) {
-    let mut engine_state = load_bench_commands();
-    let home_path = get_home_path(&engine_state);
-
-    // parsing config.nu breaks without PWD set, so set a valid path
-    engine_state.add_env_var(
-        "PWD".into(),
-        Value::string(home_path.to_string_lossy(), Span::test_data()),
-    );
-
-    let default_env = get_default_env().as_bytes();
-    c.bench_function("parse_default_env_file", |b| {
-        b.iter_batched(
-            || nu_protocol::engine::StateWorkingSet::new(&engine_state),
-            |mut working_set| parse(&mut working_set, None, default_env, false),
-            BatchSize::SmallInput,
-        )
-    });
-
-    let default_config = get_default_config().as_bytes();
-    c.bench_function("parse_default_config_file", |b| {
-        b.iter_batched(
-            || nu_protocol::engine::StateWorkingSet::new(&engine_state),
-            |mut working_set| parse(&mut working_set, None, default_config, false),
-            BatchSize::SmallInput,
-        )
-    });
-}
-
-fn eval_benchmarks(c: &mut Criterion) {
+fn setup_engine() -> EngineState {
    let mut engine_state = load_bench_commands();
    let home_path = get_home_path(&engine_state);

@ -79,33 +49,78 @@ fn eval_benchmarks(c: &mut Criterion) {
        .expect("Failed to create nushell constant.");
    engine_state.set_variable_const_val(NU_VARIABLE_ID, nu_const);

-    c.bench_function("eval default_env.nu", |b| {
-        b.iter(|| {
-            let mut stack = nu_protocol::engine::Stack::new();
-            eval_source(
-                &mut engine_state,
-                &mut stack,
-                get_default_env().as_bytes(),
-                "default_env.nu",
-                PipelineData::empty(),
-                false,
-            )
-        })
-    });
+    engine_state
+}

-    c.bench_function("eval default_config.nu", |b| {
-        b.iter(|| {
-            let mut stack = nu_protocol::engine::Stack::new();
+// FIXME: All benchmarks live in this 1 file to speed up build times when benchmarking.
+// When the *_benchmarks functions were in different files, `cargo bench` would build
+// an executable for every single one - incredibly slowly. Would be nice to figure out
+// a way to split things up again.
+
+#[divan::bench_group()]
+mod parser_benchmarks {
+    use super::*;
+
+    #[divan::bench()]
+    fn parse_default_config_file(bencher: divan::Bencher) {
+        let engine_state = setup_engine();
+        let default_env = get_default_config().as_bytes();
+
+        bencher
+            .with_inputs(|| nu_protocol::engine::StateWorkingSet::new(&engine_state))
+            .bench_refs(|mut working_set| parse(&mut working_set, None, default_env, false))
+    }
+
+    #[divan::bench()]
+    fn parse_default_env_file(bencher: divan::Bencher) {
+        let engine_state = setup_engine();
+        let default_env = get_default_env().as_bytes();
+
+        bencher
+            .with_inputs(|| nu_protocol::engine::StateWorkingSet::new(&engine_state))
+            .bench_refs(|mut working_set| parse(&mut working_set, None, default_env, false))
+    }
+}
+
+#[divan::bench_group()]
+mod eval_benchmarks {
+    use super::*;
+
+    #[divan::bench()]
+    fn eval_default_env(bencher: divan::Bencher) {
+        let default_env = get_default_env().as_bytes();
+        let fname = "default_env.nu";
+        bencher
+            .with_inputs(|| (setup_engine(), nu_protocol::engine::Stack::new()))
+            .bench_values(|(mut engine_state, mut stack)| {
                eval_source(
                    &mut engine_state,
                    &mut stack,
-                get_default_config().as_bytes(),
-                "default_config.nu",
+                    default_env,
+                    fname,
                    PipelineData::empty(),
                    false,
                )
            })
-    });
+    }
+
+    #[divan::bench()]
+    fn eval_default_config(bencher: divan::Bencher) {
+        let default_env = get_default_config().as_bytes();
+        let fname = "default_config.nu";
+        bencher
+            .with_inputs(|| (setup_engine(), nu_protocol::engine::Stack::new()))
+            .bench_values(|(mut engine_state, mut stack)| {
+                eval_source(
+                    &mut engine_state,
+                    &mut stack,
+                    default_env,
+                    fname,
+                    PipelineData::empty(),
+                    false,
+                )
+            })
+    }
 }

 // generate a new table data with `row_cnt` rows, `col_cnt` columns.
@ -119,54 +134,76 @@ fn encoding_test_data(row_cnt: usize, col_cnt: usize) -> Value {
    Value::list(vec![record; row_cnt], Span::test_data())
 }

-fn encoding_benchmarks(c: &mut Criterion) {
-    let mut group = c.benchmark_group("Encoding");
-    let test_cnt_pairs = [(100, 5), (10000, 15)];
-    for (row_cnt, col_cnt) in test_cnt_pairs.into_iter() {
-        for fmt in ["json", "msgpack"] {
-            group.bench_function(&format!("{fmt} encode {row_cnt} * {col_cnt}"), |b| {
-                let mut res = vec![];
+#[divan::bench_group()]
+mod encoding_benchmarks {
+    use super::*;
+
+    #[divan::bench(args = [(100, 5), (10000, 15)])]
+    fn json_encode(bencher: divan::Bencher, (row_cnt, col_cnt): (usize, usize)) {
        let test_data = PluginOutput::CallResponse(
            0,
            PluginCallResponse::value(encoding_test_data(row_cnt, col_cnt)),
        );
-                let encoder = EncodingType::try_from_bytes(fmt.as_bytes()).unwrap();
-                b.iter(|| encoder.encode(&test_data, &mut res))
-            });
+        let encoder = EncodingType::try_from_bytes(b"json").unwrap();
+        bencher
+            .with_inputs(|| (vec![]))
+            .bench_values(|mut res| encoder.encode(&test_data, &mut res))
    }
+
+    #[divan::bench(args = [(100, 5), (10000, 15)])]
+    fn msgpack_encode(bencher: divan::Bencher, (row_cnt, col_cnt): (usize, usize)) {
+        let test_data = PluginOutput::CallResponse(
+            0,
+            PluginCallResponse::value(encoding_test_data(row_cnt, col_cnt)),
+        );
+        let encoder = EncodingType::try_from_bytes(b"msgpack").unwrap();
+        bencher
+            .with_inputs(|| (vec![]))
+            .bench_values(|mut res| encoder.encode(&test_data, &mut res))
    }
-    group.finish();
 }

-fn decoding_benchmarks(c: &mut Criterion) {
-    let mut group = c.benchmark_group("Decoding");
-    let test_cnt_pairs = [(100, 5), (10000, 15)];
-    for (row_cnt, col_cnt) in test_cnt_pairs.into_iter() {
-        for fmt in ["json", "msgpack"] {
-            group.bench_function(&format!("{fmt} decode for {row_cnt} * {col_cnt}"), |b| {
-                let mut res = vec![];
+#[divan::bench_group()]
+mod decoding_benchmarks {
+    use super::*;
+
+    #[divan::bench(args = [(100, 5), (10000, 15)])]
+    fn json_decode(bencher: divan::Bencher, (row_cnt, col_cnt): (usize, usize)) {
        let test_data = PluginOutput::CallResponse(
            0,
            PluginCallResponse::value(encoding_test_data(row_cnt, col_cnt)),
        );
-                let encoder = EncodingType::try_from_bytes(fmt.as_bytes()).unwrap();
+        let encoder = EncodingType::try_from_bytes(b"json").unwrap();
+        let mut res = vec![];
        encoder.encode(&test_data, &mut res).unwrap();
-                let mut binary_data = std::io::Cursor::new(res);
-                b.iter(|| -> Result<Option<PluginOutput>, _> {
+        bencher
+            .with_inputs(|| {
+                let mut binary_data = std::io::Cursor::new(res.clone());
                binary_data.set_position(0);
+                binary_data
+            })
+            .bench_values(|mut binary_data| -> Result<Option<PluginOutput>, _> {
                encoder.decode(&mut binary_data)
            })
-            });
    }
-    }
-    group.finish();
-}

-criterion_group!(
-    benches,
-    parser_benchmarks,
-    eval_benchmarks,
-    encoding_benchmarks,
-    decoding_benchmarks
-);
-criterion_main!(benches);
+    #[divan::bench(args = [(100, 5), (10000, 15)])]
+    fn msgpack_decode(bencher: divan::Bencher, (row_cnt, col_cnt): (usize, usize)) {
+        let test_data = PluginOutput::CallResponse(
+            0,
+            PluginCallResponse::value(encoding_test_data(row_cnt, col_cnt)),
+        );
+        let encoder = EncodingType::try_from_bytes(b"msgpack").unwrap();
+        let mut res = vec![];
+        encoder.encode(&test_data, &mut res).unwrap();
+        bencher
+            .with_inputs(|| {
+                let mut binary_data = std::io::Cursor::new(res.clone());
+                binary_data.set_position(0);
+                binary_data
+            })
+            .bench_values(|mut binary_data| -> Result<Option<PluginOutput>, _> {
+                encoder.decode(&mut binary_data)
+            })
+    }
+}