Add tee command for operating on copies of streams (#11928)

[Related conversation on Discord](https://discord.com/channels/601130461678272522/615329862395101194/1209951539901366292) # Description  This is inspired by the Unix tee command, but significantly more powerful. Rather than just writing to a file, you can do any kind of stream operation that Nushell supports within the closure. The equivalent of Unix `tee -a file.txt` would be, for example, `command | tee { save -a file.txt }` - but of course this is Nushell, and you can do the same with structured data to JSON objects, or even just run any other command on the system with it. A `--stderr` flag is provided for operating on the stderr stream from external programs. This may produce unexpected results if the stderr stream is not then also printed by something else - nushell currently doesn't. See #11929 for the fix for that. # User-Facing Changes  If someone was using the system `tee` command, they might be surprised to find that it's different. # Tests + Formatting  - 🟢 `toolkit fmt` - 🟢 `toolkit clippy` - 🟢 `toolkit test` - 🟢 `toolkit test stdlib` # After Submitting
2024-12-27 05:23:11 +00:00 · 2024-02-28 15:08:31 -08:00 · 2024-02-28 15:08:31 -08:00 · e69a02d379
commit e69a02d379
parent 0126620c19
6 changed files with 397 additions and 2 deletions
--- a/crates/nu-command/src/default_context.rs
+++ b/crates/nu-command/src/default_context.rs
@ -79,6 +79,7 @@ pub fn add_shell_command_context(mut engine_state: EngineState) -> EngineState {
            Sort,
            SortBy,
            SplitList,
            Tee,
            Transpose,
            Uniq,
            UniqBy,
--- a/crates/nu-command/src/example_test.rs
+++ b/crates/nu-command/src/example_test.rs
@ -10,8 +10,8 @@ pub fn test_examples(cmd: impl Command + 'static) {
 mod test_examples {
    use super::super::{
        Ansi, Date, Enumerate, Filter, First, Flatten, From, Get, Into, IntoDatetime, IntoString,
-        Math, MathRound, ParEach, Path, PathParse, Random, Seq, Sort, SortBy, Split, SplitColumn,
+        Math, MathRound, MathSum, ParEach, Path, PathParse, Random, Seq, Sort, SortBy, Split,
-        SplitRow, Str, StrJoin, StrLength, StrReplace, Update, Url, Values, Wrap,
+        SplitColumn, SplitRow, Str, StrJoin, StrLength, StrReplace, Update, Url, Values, Wrap,
    };
    use crate::{Default, Each, To};
    use nu_cmd_lang::example_support::{
@ -83,6 +83,7 @@ mod test_examples {
            working_set.add_decl(Box::new(Let));
            working_set.add_decl(Box::new(Math));
            working_set.add_decl(Box::new(MathRound));
            working_set.add_decl(Box::new(MathSum));
            working_set.add_decl(Box::new(Mut));
            working_set.add_decl(Box::new(Path));
            working_set.add_decl(Box::new(PathParse));
--- a/crates/nu-command/src/filters/mod.rs
+++ b/crates/nu-command/src/filters/mod.rs
@ -39,6 +39,7 @@ mod sort;
 mod sort_by;
 mod split_by;
 mod take;
 mod tee;
 mod transpose;
 mod uniq;
 mod uniq_by;
@ -92,6 +93,7 @@ pub use sort::Sort;
 pub use sort_by::SortBy;
 pub use split_by::SplitBy;
 pub use take::*;
 pub use tee::Tee;
 pub use transpose::Transpose;
 pub use uniq::*;
 pub use uniq_by::UniqBy;
--- a/crates/nu-command/src/filters/tee.rs
+++ b/crates/nu-command/src/filters/tee.rs
@ -0,0 +1,341 @@
 use std::{sync::mpsc, thread};
 use nu_engine::{eval_block_with_early_return, CallExt};
 use nu_protocol::{
    ast::Call,
    engine::{Closure, Command, EngineState, Stack},
    Category, Example, IntoInterruptiblePipelineData, PipelineData, RawStream, ShellError,
    Signature, Spanned, SyntaxShape, Type, Value,
 };
 #[derive(Clone)]
 pub struct Tee;
 impl Command for Tee {
    fn name(&self) -> &str {
        "tee"
    }
    fn usage(&self) -> &str {
        "Copy a stream to another command in parallel."
    }
    fn extra_usage(&self) -> &str {
        r#"This is useful for doing something else with a stream while still continuing to
 use it in your pipeline."#
    }
    fn signature(&self) -> Signature {
        Signature::build("tee")
            .input_output_type(Type::Any, Type::Any)
            .switch(
                "stderr",
                "For external commands: copy the standard error stream instead.",
                Some('e'),
            )
            .required(
                "closure",
                SyntaxShape::Closure(None),
                "The other command to send the stream to.",
            )
            .category(Category::Filters)
    }
    fn examples(&self) -> Vec<Example> {
        vec![
            Example {
                example: "http get http://example.org/ | tee { save example.html }",
                description: "Save a webpage to a file while also printing it",
                result: None,
            },
            Example {
                example: "do { nu --commands 'print -e error; print ok' } | \
                    tee --stderr { save error.log } | complete",
                description: "Save error messages from an external command to a file without \
                    redirecting them",
                result: None,
            },
            Example {
                example: "1..100 | tee { each { print } } | math sum | wrap sum",
                description: "Print numbers and their sum",
                result: None,
            },
        ]
    }
    fn run(
        &self,
        engine_state: &EngineState,
        stack: &mut Stack,
        call: &Call,
        input: PipelineData,
    ) -> Result<PipelineData, ShellError> {
        let use_stderr = call.has_flag(engine_state, stack, "stderr")?;
        let Spanned {
            item: Closure { block_id, captures },
            span: closure_span,
        } = call.req(engine_state, stack, 0)?;
        let closure_engine_state = engine_state.clone();
        let mut closure_stack = stack.captures_to_stack(captures);
        let metadata = input.metadata();
        let metadata_clone = metadata.clone();
        match input {
            // Handle external streams specially, to make sure they pass through
            PipelineData::ExternalStream {
                stdout,
                stderr,
                exit_code,
                span,
                metadata,
                trim_end_newline,
            } => {
                let known_size = if use_stderr {
                    stderr.as_ref().and_then(|s| s.known_size)
                } else {
                    stdout.as_ref().and_then(|s| s.known_size)
                };
                let with_stream = move |rx: mpsc::Receiver<Result<Vec<u8>, ShellError>>| {
                    let iter = rx.into_iter();
                    let input_from_channel = PipelineData::ExternalStream {
                        stdout: Some(RawStream::new(
                            Box::new(iter),
                            closure_engine_state.ctrlc.clone(),
                            span,
                            known_size,
                        )),
                        stderr: None,
                        exit_code: None,
                        span,
                        metadata: metadata_clone,
                        trim_end_newline,
                    };
                    let result = eval_block_with_early_return(
                        &closure_engine_state,
                        &mut closure_stack,
                        closure_engine_state.get_block(block_id),
                        input_from_channel,
                        false,
                        false,
                    );
                    // Make sure to drain any iterator produced to avoid unexpected behavior
                    result.and_then(|data| data.drain())
                };
                if use_stderr {
                    if let Some(stderr) = stderr {
                        let raw_stream = RawStream::new(
                            Box::new(tee(stderr.stream, with_stream).map(flatten_result)),
                            stderr.ctrlc,
                            stderr.span,
                            stderr.known_size,
                        );
                        Ok(PipelineData::ExternalStream {
                            stdout,
                            stderr: Some(raw_stream),
                            exit_code,
                            span,
                            metadata,
                            trim_end_newline,
                        })
                    } else {
                        // Throw an error if the stream doesn't have stderr. This is probably the
                        // user's mistake (e.g., forgetting to use `do`)
                        Err(ShellError::GenericError {
                            error: "Stream passed to `tee --stderr` does not have stderr".into(),
                            msg: "this stream does not contain stderr".into(),
                            span: Some(span),
                            help: Some(
                                "if this is an external command, you probably need to wrap \
                                it in `do { ... }`"
                                    .into(),
                            ),
                            inner: vec![],
                        })
                    }
                } else {
                    let stdout = stdout.map(|stdout| {
                        RawStream::new(
                            Box::new(tee(stdout.stream, with_stream).map(flatten_result)),
                            stdout.ctrlc,
                            stdout.span,
                            stdout.known_size,
                        )
                    });
                    Ok(PipelineData::ExternalStream {
                        stdout,
                        stderr,
                        exit_code,
                        span,
                        metadata,
                        trim_end_newline,
                    })
                }
            }
            // --stderr is not allowed if the input is not an external stream
            _ if use_stderr => Err(ShellError::UnsupportedInput {
                msg: "--stderr can only be used on external streams".into(),
                input: "the input to `tee` is not an external stream".into(),
                msg_span: call.head,
                input_span: input.span().unwrap_or(call.head),
            }),
            // Handle others with the plain iterator
            _ => {
                let teed = tee(input.into_iter(), move |rx| {
                    let input_from_channel = rx.into_pipeline_data_with_metadata(
                        metadata_clone,
                        closure_engine_state.ctrlc.clone(),
                    );
                    let result = eval_block_with_early_return(
                        &closure_engine_state,
                        &mut closure_stack,
                        closure_engine_state.get_block(block_id),
                        input_from_channel,
                        false,
                        false,
                    );
                    // Make sure to drain any iterator produced to avoid unexpected behavior
                    result.and_then(|data| data.drain())
                })
                .map(move |result| result.unwrap_or_else(|err| Value::error(err, closure_span)))
                .into_pipeline_data_with_metadata(metadata, engine_state.ctrlc.clone());
                Ok(teed)
            }
        }
    }
 }
 fn panic_error() -> ShellError {
    ShellError::NushellFailed {
        msg: "A panic occurred on a thread spawned by `tee`".into(),
    }
 }
 fn flatten_result<T, E>(result: Result<Result<T, E>, E>) -> Result<T, E> {
    result.unwrap_or_else(Err)
 }
 /// Copies the iterator to a channel on another thread. If an error is produced on that thread,
 /// it is embedded in the resulting iterator as an `Err` as soon as possible. When the iterator
 /// finishes, it waits for the other thread to finish, also handling any error produced at that
 /// point.
 fn tee<T>(
    input: impl Iterator<Item = T>,
    with_cloned_stream: impl FnOnce(mpsc::Receiver<T>) -> Result<(), ShellError> + Send + 'static,
 ) -> impl Iterator<Item = Result<T, ShellError>>
 where
    T: Clone + Send + 'static,
 {
    // For sending the values to the other thread
    let (tx, rx) = mpsc::channel();
    let mut thread = Some(
        thread::Builder::new()
            .name("stderr consumer".into())
            .spawn(move || with_cloned_stream(rx))
            .expect("could not create thread"),
    );
    let mut iter = input.into_iter();
    let mut tx = Some(tx);
    std::iter::from_fn(move || {
        if thread.as_ref().is_some_and(|t| t.is_finished()) {
            // Check for an error from the other thread
            let result = thread
                .take()
                .expect("thread was taken early")
                .join()
                .unwrap_or_else(|_| Err(panic_error()));
            if let Err(err) = result {
                // Embed the error early
                return Some(Err(err));
            }
        }
        // Get a value from the iterator
        if let Some(value) = iter.next() {
            // Send a copy, ignoring any error if the channel is closed
            let _ = tx.as_ref().map(|tx| tx.send(value.clone()));
            Some(Ok(value))
        } else {
            // Close the channel so the stream ends for the other thread
            drop(tx.take());
            // Wait for the other thread, and embed any error produced
            thread.take().and_then(|t| {
                t.join()
                    .unwrap_or_else(|_| Err(panic_error()))
                    .err()
                    .map(Err)
            })
        }
    })
 }
 #[test]
 fn tee_copies_values_to_other_thread_and_passes_them_through() {
    let (tx, rx) = mpsc::channel();
    let expected_values = vec![1, 2, 3, 4];
    let my_result = tee(expected_values.clone().into_iter(), move |rx| {
        for val in rx {
            let _ = tx.send(val);
        }
        Ok(())
    })
    .collect::<Result<Vec<i32>, ShellError>>()
    .expect("should not produce error");
    assert_eq!(expected_values, my_result);
    let other_threads_result = rx.into_iter().collect::<Vec<_>>();
    assert_eq!(expected_values, other_threads_result);
 }
 #[test]
 fn tee_forwards_errors_back_immediately() {
    use std::time::Duration;
    let slow_input = (0..100).inspect(|_| std::thread::sleep(Duration::from_millis(1)));
    let iter = tee(slow_input, |_| {
        Err(ShellError::IOError { msg: "test".into() })
    });
    for result in iter {
        if let Ok(val) = result {
            // should not make it to the end
            assert!(val < 99, "the error did not come early enough");
        } else {
            // got the error
            return;
        }
    }
    panic!("never received the error");
 }
 #[test]
 fn tee_waits_for_the_other_thread() {
    use std::sync::{
        atomic::{AtomicBool, Ordering},
        Arc,
    };
    use std::time::Duration;
    let waited = Arc::new(AtomicBool::new(false));
    let waited_clone = waited.clone();
    let iter = tee(0..100, move |_| {
        std::thread::sleep(Duration::from_millis(10));
        waited_clone.store(true, Ordering::Relaxed);
        Err(ShellError::IOError { msg: "test".into() })
    });
    let last = iter.last();
    assert!(waited.load(Ordering::Relaxed), "failed to wait");
    assert!(
        last.is_some_and(|res| res.is_err()),
        "failed to return error from wait"
    );
 }
--- a/crates/nu-command/tests/commands/mod.rs
+++ b/crates/nu-command/tests/commands/mod.rs
@ -104,6 +104,7 @@ mod split_row;
 mod str_;
 mod table;
 mod take;
 mod tee;
 mod terminal;
 mod to_text;
 mod touch;
--- a/crates/nu-command/tests/commands/tee.rs
+++ b/crates/nu-command/tests/commands/tee.rs
@ -0,0 +1,49 @@
 use nu_test_support::{fs::file_contents, nu, playground::Playground};
 #[test]
 fn tee_save_values_to_file() {
    Playground::setup("tee_save_values_to_file_test", |dirs, _sandbox| {
        let output = nu!(
            cwd: dirs.test(),
            r#"1..5 | tee { save copy.txt } | to text"#
        );
        assert_eq!("12345", output.out);
        assert_eq!(
            "1\n2\n3\n4\n5\n",
            file_contents(dirs.test().join("copy.txt"))
        );
    })
 }
 #[test]
 fn tee_save_stdout_to_file() {
    Playground::setup("tee_save_stdout_to_file_test", |dirs, _sandbox| {
        let output = nu!(
            cwd: dirs.test(),
            r#"
                $env.FOO = "teststring"
                nu --testbin echo_env FOO | tee { save copy.txt }
            "#
        );
        assert_eq!("teststring", output.out);
        assert_eq!("teststring\n", file_contents(dirs.test().join("copy.txt")));
    })
 }
 #[test]
 fn tee_save_stderr_to_file() {
    Playground::setup("tee_save_stderr_to_file_test", |dirs, _sandbox| {
        let output = nu!(
            cwd: dirs.test(),
            "\
                $env.FOO = \"teststring\"; \
                do { nu --testbin echo_env_stderr FOO } | \
                    tee --stderr { save copy.txt } | \
                    complete | \
                    get stderr
            "
        );
        assert_eq!("teststring", output.out);
        assert_eq!("teststring\n", file_contents(dirs.test().join("copy.txt")));
    })
 }