Make assignment and const consistent with let/mut (#13385)

# Description This makes assignment operations and `const` behave the same way `let` and `mut` do, absorbing the rest of the pipeline. Changes the lexer to be able to recognize assignment operators as a separate token, and then makes the lite parser continue to push spans into the same command regardless of any redirections or pipes if an assignment operator is encountered. Because the pipeline is no longer split up by the lite parser at this point, it's trivial to just parse the right hand side as if it were a subexpression not contained within parentheses. # User-Facing Changes Big breaking change. These are all now possible: ```nushell const path = 'a' | path join 'b' mut x = 2 $x = random int $x = [1 2 3] | math sum $env.FOO = random chars ``` In the past, these would have led to (an attempt at) bare word string parsing. So while `$env.FOO = bar` would have previously set the environment variable `FOO` to the string `"bar"`, it now tries to run the command named `bar`, hence the major breaking change. However, this is desirable because it is very consistent - if you see the `=`, you can just assume it absorbs everything else to the right of it. # Tests + Formatting Added tests for the new behaviour. Adjusted some existing tests that depended on the right hand side of assignments being parsed as barewords. # After Submitting - [ ] release notes (breaking change!)
2024-11-10 07:04:13 +00:00 · 2024-07-30 16:55:22 -07:00 · 2024-07-30 16:55:22 -07:00 · 8e2917b9ae
commit 8e2917b9ae
parent 3c3ec7891c
13 changed files with 247 additions and 91 deletions
--- a/crates/nu-command/tests/commands/complete.rs
+++ b/crates/nu-command/tests/commands/complete.rs
@ -95,13 +95,13 @@ fn capture_error_with_both_stdout_stderr_messages_not_hang_nushell() {

 #[test]
 fn combined_pipe_redirection() {
-    let actual = nu!("$env.FOO = hello; $env.BAR = world; nu --testbin echo_env_mixed out-err FOO BAR o+e>| complete | get stdout");
+    let actual = nu!("$env.FOO = 'hello'; $env.BAR = 'world'; nu --testbin echo_env_mixed out-err FOO BAR o+e>| complete | get stdout");
    assert_eq!(actual.out, "helloworld");
 }

 #[test]
 fn err_pipe_redirection() {
    let actual =
-        nu!("$env.FOO = hello; nu --testbin echo_env_stderr FOO e>| complete | get stdout");
+        nu!("$env.FOO = 'hello'; nu --testbin echo_env_stderr FOO e>| complete | get stdout");
    assert_eq!(actual.out, "hello");
 }
--- a/crates/nu-command/tests/commands/let_.rs
+++ b/crates/nu-command/tests/commands/let_.rs
@ -23,6 +23,13 @@ fn let_takes_pipeline() {
    assert_eq!(actual.out, "11");
 }

+#[test]
+fn let_takes_pipeline_with_declared_type() {
+    let actual = nu!(r#"let x: list<string> = [] | append "hello world"; print $x.0"#);
+
+    assert_eq!(actual.out, "hello world");
+}
+
 #[test]
 fn let_pipeline_allows_in() {
    let actual =
@ -38,6 +45,13 @@ fn mut_takes_pipeline() {
    assert_eq!(actual.out, "11");
 }

+#[test]
+fn mut_takes_pipeline_with_declared_type() {
+    let actual = nu!(r#"mut x: list<string> = [] | append "hello world"; print $x.0"#);
+
+    assert_eq!(actual.out, "hello world");
+}
+
 #[test]
 fn mut_pipeline_allows_in() {
    let actual =
--- a/crates/nu-command/tests/commands/rm.rs
+++ b/crates/nu-command/tests/commands/rm.rs
@ -144,7 +144,7 @@ fn errors_if_attempting_to_delete_home() {
    Playground::setup("rm_test_8", |dirs, _| {
        let actual = nu!(
            cwd: dirs.root(),
-            "$env.HOME = myhome ; rm -rf ~"
+            "$env.HOME = 'myhome' ; rm -rf ~"
        );

        assert!(actual.err.contains("please use -I or -i"));
--- a/crates/nu-command/tests/commands/table.rs
+++ b/crates/nu-command/tests/commands/table.rs
@ -2567,7 +2567,7 @@ fn theme_cmd(theme: &str, footer: bool, then: &str) -> String {
        with_footer = "$env.config.footer_mode = \"always\"".to_string();
    }

-    format!("$env.config.table.mode = {theme}; $env.config.table.header_on_separator = true; {with_footer}; {then}")
+    format!("$env.config.table.mode = \"{theme}\"; $env.config.table.header_on_separator = true; {with_footer}; {then}")
 }

 #[test]
--- a/crates/nu-parser/src/lex.rs
+++ b/crates/nu-parser/src/lex.rs
@ -6,6 +6,7 @@ pub enum TokenContents {
    Comment,
    Pipe,
    PipePipe,
+    AssignmentOperator,
    ErrGreaterPipe,
    OutErrGreaterPipe,
    Semicolon,
@ -69,6 +70,12 @@ fn is_item_terminator(
            || special_tokens.contains(&c))
 }

+/// Assignment operators have special handling distinct from math expressions, as they cause the
+/// rest of the pipeline to be consumed.
+pub fn is_assignment_operator(bytes: &[u8]) -> bool {
+    matches!(bytes, b"=" | b"+=" | b"++=" | b"-=" | b"*=" | b"/=")
+}
+
 // A special token is one that is a byte that stands alone as its own token. For example
 // when parsing a signature you may want to have `:` be able to separate tokens and also
 // to be handled as its own token to notify you you're about to parse a type in the example
@ -297,6 +304,10 @@ pub fn lex_item(

    let mut err = None;
    let output = match &input[(span.start - span_offset)..(span.end - span_offset)] {
+        bytes if is_assignment_operator(bytes) => Token {
+            contents: TokenContents::AssignmentOperator,
+            span,
+        },
        b"out>" | b"o>" => Token {
            contents: TokenContents::OutGreaterThan,
            span,
--- a/crates/nu-parser/src/lite_parser.rs
+++ b/crates/nu-parser/src/lite_parser.rs
@ -196,10 +196,43 @@ pub fn lite_parse(tokens: &[Token]) -> (LiteBlock, Option<ParseError>) {
    let mut last_token = TokenContents::Eol;
    let mut file_redirection = None;
    let mut curr_comment: Option<Vec<Span>> = None;
+    let mut is_assignment = false;
    let mut error = None;

    for (idx, token) in tokens.iter().enumerate() {
-        if let Some((source, append, span)) = file_redirection.take() {
+        if is_assignment {
+            match &token.contents {
+                // Consume until semicolon or terminating EOL. Assignments absorb pipelines and
+                // redirections.
+                TokenContents::Eol => {
+                    // Handle `[Command] [Pipe] ([Comment] | [Eol])+ [Command]`
+                    //
+                    // `[Eol]` branch checks if previous token is `[Pipe]` to construct pipeline
+                    // and so `[Comment] | [Eol]` should be ignore to make it work
+                    let actual_token = last_non_comment_token(tokens, idx);
+                    if actual_token != Some(TokenContents::Pipe) {
+                        is_assignment = false;
+                        pipeline.push(&mut command);
+                        block.push(&mut pipeline);
+                    }
+
+                    if last_token == TokenContents::Eol {
+                        // Clear out the comment as we're entering a new comment
+                        curr_comment = None;
+                    }
+                }
+                TokenContents::Semicolon => {
+                    is_assignment = false;
+                    pipeline.push(&mut command);
+                    block.push(&mut pipeline);
+                }
+                TokenContents::Comment => {
+                    command.comments.push(token.span);
+                    curr_comment = None;
+                }
+                _ => command.push(token.span),
+            }
+        } else if let Some((source, append, span)) = file_redirection.take() {
            match &token.contents {
                TokenContents::PipePipe => {
                    error = error.or(Some(ParseError::ShellOrOr(token.span)));
@ -218,6 +251,11 @@ pub fn lite_parse(tokens: &[Token]) -> (LiteBlock, Option<ParseError>) {
                        command.push(token.span)
                    }
                }
+                TokenContents::AssignmentOperator => {
+                    error = error.or(Some(ParseError::Expected("redirection target", token.span)));
+                    command.push(span);
+                    command.push(token.span);
+                }
                TokenContents::OutGreaterThan
                | TokenContents::OutGreaterGreaterThan
                | TokenContents::ErrGreaterThan
@ -280,6 +318,15 @@ pub fn lite_parse(tokens: &[Token]) -> (LiteBlock, Option<ParseError>) {
                    }
                    command.push(token.span);
                }
+                TokenContents::AssignmentOperator => {
+                    // When in assignment mode, we'll just consume pipes or redirections as part of
+                    // the command.
+                    is_assignment = true;
+                    if let Some(curr_comment) = curr_comment.take() {
+                        command.comments = curr_comment;
+                    }
+                    command.push(token.span);
+                }
                TokenContents::OutGreaterThan => {
                    error = error.or(command.check_accepts_redirection(token.span));
                    file_redirection = Some((RedirectionSource::Stdout, false, token.span));
--- a/crates/nu-parser/src/parse_keywords.rs
+++ b/crates/nu-parser/src/parse_keywords.rs
@ -35,8 +35,8 @@ use crate::{
    lite_parser::{lite_parse, LiteCommand},
    parser::{
        check_call, garbage, garbage_pipeline, parse, parse_call, parse_expression,
-        parse_full_signature, parse_import_pattern, parse_internal_call, parse_multispan_value,
-        parse_string, parse_value, parse_var_with_opt_type, trim_quotes, ParsedInternalCall,
+        parse_full_signature, parse_import_pattern, parse_internal_call, parse_string, parse_value,
+        parse_var_with_opt_type, trim_quotes, ParsedInternalCall,
    },
    unescape_unquote_string, Token, TokenContents,
 };
@ -3171,9 +3171,6 @@ pub fn parse_const(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipelin
    // }

    if let Some(decl_id) = working_set.find_decl(b"const") {
-        let cmd = working_set.get_decl(decl_id);
-        let call_signature = cmd.signature().call_signature();
-
        if spans.len() >= 4 {
            // This is a bit of by-hand parsing to get around the issue where we want to parse in the reverse order
            // so that the var-id created by the variable isn't visible in the expression that init it
@ -3181,18 +3178,29 @@ pub fn parse_const(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipelin
                let item = working_set.get_span_contents(*span.1);
                // const x = 'f', = at least start from index 2
                if item == b"=" && spans.len() > (span.0 + 1) && span.0 > 1 {
-                    let mut idx = span.0;
+                    // Parse the rvalue as a subexpression
+                    let rvalue_span = Span::concat(&spans[(span.0 + 1)..]);

-                    let rvalue = parse_multispan_value(
-                        working_set,
-                        spans,
-                        &mut idx,
-                        &SyntaxShape::Keyword(b"=".to_vec(), Box::new(SyntaxShape::MathExpression)),
+                    let (rvalue_tokens, rvalue_error) = lex(
+                        working_set.get_span_contents(rvalue_span),
+                        rvalue_span.start,
+                        &[],
+                        &[],
+                        false,
+                    );
+                    working_set.parse_errors.extend(rvalue_error);
+
+                    trace!("parsing: const right-hand side subexpression");
+                    let rvalue_block =
+                        parse_block(working_set, &rvalue_tokens, rvalue_span, false, true);
+                    let rvalue_ty = rvalue_block.output_type();
+                    let rvalue_block_id = working_set.add_block(Arc::new(rvalue_block));
+                    let rvalue = Expression::new(
+                        working_set,
+                        Expr::Subexpression(rvalue_block_id),
+                        rvalue_span,
+                        rvalue_ty,
                    );
-                    if idx < (spans.len() - 1) {
-                        working_set
-                            .error(ParseError::ExtraPositional(call_signature, spans[idx + 1]));
-                    }

                    let mut idx = 0;

--- a/crates/nu-parser/src/parser.rs
+++ b/crates/nu-parser/src/parser.rs
@ -1,5 +1,5 @@
 use crate::{
-    lex::{lex, lex_signature},
+    lex::{is_assignment_operator, lex, lex_signature},
    lite_parser::{lite_parse, LiteCommand, LitePipeline, LiteRedirection, LiteRedirectionTarget},
    parse_keywords::*,
    parse_patterns::parse_pattern,
@ -1458,7 +1458,8 @@ fn parse_binary_with_base(
                    | TokenContents::ErrGreaterThan
                    | TokenContents::ErrGreaterGreaterThan
                    | TokenContents::OutErrGreaterThan
-                    | TokenContents::OutErrGreaterGreaterThan => {
+                    | TokenContents::OutErrGreaterGreaterThan
+                    | TokenContents::AssignmentOperator => {
                        working_set.error(ParseError::Expected("binary", span));
                        return garbage(working_set, span);
                    }
@ -3409,7 +3410,7 @@ pub fn parse_signature_helper(working_set: &mut StateWorkingSet, span: Span) ->
    for token in &output {
        match token {
            Token {
-                contents: crate::TokenContents::Item,
+                contents: crate::TokenContents::Item | crate::TokenContents::AssignmentOperator,
                span,
            } => {
                let span = *span;
@ -4829,7 +4830,7 @@ pub fn parse_value(
    }
 }

-pub fn parse_operator(working_set: &mut StateWorkingSet, span: Span) -> Expression {
+pub fn parse_assignment_operator(working_set: &mut StateWorkingSet, span: Span) -> Expression {
    let contents = working_set.get_span_contents(span);

    let operator = match contents {
@ -4839,6 +4840,95 @@ pub fn parse_operator(working_set: &mut StateWorkingSet, span: Span) -> Expressi
        b"-=" => Operator::Assignment(Assignment::MinusAssign),
        b"*=" => Operator::Assignment(Assignment::MultiplyAssign),
        b"/=" => Operator::Assignment(Assignment::DivideAssign),
+        _ => {
+            working_set.error(ParseError::Expected("assignment operator", span));
+            return garbage(working_set, span);
+        }
+    };
+
+    Expression::new(working_set, Expr::Operator(operator), span, Type::Any)
+}
+
+pub fn parse_assignment_expression(
+    working_set: &mut StateWorkingSet,
+    spans: &[Span],
+) -> Expression {
+    trace!("parsing: assignment expression");
+    let expr_span = Span::concat(spans);
+
+    // Assignment always has the most precedence, and its right-hand side can be a pipeline
+    let Some(op_index) = spans
+        .iter()
+        .position(|span| is_assignment_operator(working_set.get_span_contents(*span)))
+    else {
+        working_set.error(ParseError::Expected("assignment expression", expr_span));
+        return garbage(working_set, expr_span);
+    };
+
+    let lhs_spans = &spans[0..op_index];
+    let op_span = spans[op_index];
+    let rhs_spans = &spans[(op_index + 1)..];
+
+    if lhs_spans.is_empty() {
+        working_set.error(ParseError::Expected(
+            "left hand side of assignment",
+            op_span,
+        ));
+        return garbage(working_set, expr_span);
+    }
+
+    if rhs_spans.is_empty() {
+        working_set.error(ParseError::Expected(
+            "right hand side of assignment",
+            op_span,
+        ));
+        return garbage(working_set, expr_span);
+    }
+
+    // Parse the lhs and operator as usual for a math expression
+    let mut lhs = parse_expression(working_set, lhs_spans);
+    let mut operator = parse_assignment_operator(working_set, op_span);
+
+    // Re-parse the right-hand side as a subexpression
+    let rhs_span = Span::concat(rhs_spans);
+
+    let (rhs_tokens, rhs_error) = lex(
+        working_set.get_span_contents(rhs_span),
+        rhs_span.start,
+        &[],
+        &[],
+        true,
+    );
+    working_set.parse_errors.extend(rhs_error);
+
+    trace!("parsing: assignment right-hand side subexpression");
+    let rhs_block = parse_block(working_set, &rhs_tokens, rhs_span, false, true);
+    let rhs_ty = rhs_block.output_type();
+    let rhs_block_id = working_set.add_block(Arc::new(rhs_block));
+    let mut rhs = Expression::new(
+        working_set,
+        Expr::Subexpression(rhs_block_id),
+        rhs_span,
+        rhs_ty,
+    );
+
+    let (result_ty, err) = math_result_type(working_set, &mut lhs, &mut operator, &mut rhs);
+    if let Some(err) = err {
+        working_set.parse_errors.push(err);
+    }
+
+    Expression::new(
+        working_set,
+        Expr::BinaryOp(Box::new(lhs), Box::new(operator), Box::new(rhs)),
+        expr_span,
+        result_ty,
+    )
+}
+
+pub fn parse_operator(working_set: &mut StateWorkingSet, span: Span) -> Expression {
+    let contents = working_set.get_span_contents(span);
+
+    let operator = match contents {
        b"==" => Operator::Comparison(Comparison::Equal),
        b"!=" => Operator::Comparison(Comparison::NotEqual),
        b"<" => Operator::Comparison(Comparison::LessThan),
@ -4954,6 +5044,10 @@ pub fn parse_operator(working_set: &mut StateWorkingSet, span: Span) -> Expressi
            ));
            return garbage(working_set, span);
        }
+        op if is_assignment_operator(op) => {
+            working_set.error(ParseError::Expected("a non-assignment operator", span));
+            return garbage(working_set, span);
+        }
        _ => {
            working_set.error(ParseError::Expected("operator", span));
            return garbage(working_set, span);
@ -5258,7 +5352,12 @@ pub fn parse_expression(working_set: &mut StateWorkingSet, spans: &[Span]) -> Ex
        return garbage(working_set, Span::concat(spans));
    }

-    let output = if is_math_expression_like(working_set, spans[pos]) {
+    let output = if spans[pos..]
+        .iter()
+        .any(|span| is_assignment_operator(working_set.get_span_contents(*span)))
+    {
+        parse_assignment_expression(working_set, &spans[pos..])
+    } else if is_math_expression_like(working_set, spans[pos]) {
        parse_math_expression(working_set, &spans[pos..], None)
    } else {
        let bytes = working_set.get_span_contents(spans[pos]).to_vec();
@ -5690,69 +5789,24 @@ pub(crate) fn redirecting_builtin_error(
 }

 pub fn parse_pipeline(working_set: &mut StateWorkingSet, pipeline: &LitePipeline) -> Pipeline {
-    let first_command = pipeline.commands.first();
-    let first_command_name = first_command
-        .and_then(|command| command.parts.first())
-        .map(|span| working_set.get_span_contents(*span));
-
    if pipeline.commands.len() > 1 {
-        // Special case: allow "let" or "mut" to consume the whole pipeline, if this is a pipeline
-        // with multiple commands
-        if matches!(first_command_name, Some(b"let" | b"mut")) {
-            // Merge the pipeline into one command
-            let first_command = first_command.expect("must be Some");
+        // Parse a normal multi command pipeline
+        let elements: Vec<_> = pipeline
+            .commands
+            .iter()
+            .enumerate()
+            .map(|(index, element)| {
+                let element = parse_pipeline_element(working_set, element);
+                // Handle $in for pipeline elements beyond the first one
+                if index > 0 && element.has_in_variable(working_set) {
+                    wrap_element_with_collect(working_set, element.clone())
+                } else {
+                    element
+                }
+            })
+            .collect();

-            let remainder_span = first_command
-                .parts_including_redirection()
-                .skip(3)
-                .chain(
-                    pipeline.commands[1..]
-                        .iter()
-                        .flat_map(|command| command.parts_including_redirection()),
-                )
-                .reduce(Span::append);
-
-            let parts = first_command
-                .parts
-                .iter()
-                .take(3) // the let/mut start itself
-                .copied()
-                .chain(remainder_span) // everything else
-                .collect();
-
-            let comments = pipeline
-                .commands
-                .iter()
-                .flat_map(|command| command.comments.iter())
-                .copied()
-                .collect();
-
-            let new_command = LiteCommand {
-                pipe: None,
-                comments,
-                parts,
-                redirection: None,
-            };
-            parse_builtin_commands(working_set, &new_command)
-        } else {
-            // Parse a normal multi command pipeline
-            let elements: Vec<_> = pipeline
-                .commands
-                .iter()
-                .enumerate()
-                .map(|(index, element)| {
-                    let element = parse_pipeline_element(working_set, element);
-                    // Handle $in for pipeline elements beyond the first one
-                    if index > 0 && element.has_in_variable(working_set) {
-                        wrap_element_with_collect(working_set, element.clone())
-                    } else {
-                        element
-                    }
-                })
-                .collect();
-
-            Pipeline { elements }
-        }
+        Pipeline { elements }
    } else {
        // If there's only one command in the pipeline, this could be a builtin command
        parse_builtin_commands(working_set, &pipeline.commands[0])
--- a/crates/nu-parser/tests/test_parser.rs
+++ b/crates/nu-parser/tests/test_parser.rs
@ -1177,9 +1177,9 @@ fn test_nothing_comparison_eq() {
 #[rstest]
 #[case(b"let a = 1 err> /dev/null")]
 #[case(b"let a = 1 out> /dev/null")]
+#[case(b"let a = 1 out+err> /dev/null")]
 #[case(b"mut a = 1 err> /dev/null")]
 #[case(b"mut a = 1 out> /dev/null")]
-#[case(b"let a = 1 out+err> /dev/null")]
 #[case(b"mut a = 1 out+err> /dev/null")]
 fn test_redirection_with_letmut(#[case] phase: &[u8]) {
    let engine_state = EngineState::new();
--- a/tests/const_/mod.rs
+++ b/tests/const_/mod.rs
@ -415,3 +415,9 @@ fn const_raw_string() {
    let actual = nu!(r#"const x = r#'abc'#; $x"#);
    assert_eq!(actual.out, "abc");
 }
+
+#[test]
+fn const_takes_pipeline() {
+    let actual = nu!(r#"const list = 'bar_baz_quux' | split row '_'; $list | length"#);
+    assert_eq!(actual.out, "3");
+}
--- a/tests/plugins/env.rs
+++ b/tests/plugins/env.rs
@ -6,9 +6,9 @@ fn get_env_by_name() {
        cwd: ".",
        plugin: ("nu_plugin_example"),
        r#"
-            $env.FOO = bar
+            $env.FOO = 'bar'
            example env FOO | print
-            $env.FOO = baz
+            $env.FOO = 'baz'
            example env FOO | print
        "#
    );
@ -21,7 +21,7 @@ fn get_envs() {
    let result = nu_with_plugins!(
        cwd: ".",
        plugin: ("nu_plugin_example"),
-        "$env.BAZ = foo; example env | get BAZ"
+        "$env.BAZ = 'foo'; example env | get BAZ"
    );
    assert!(result.status.success());
    assert_eq!("foo", result.out);
--- a/tests/repl/test_config.rs
+++ b/tests/repl/test_config.rs
@ -20,7 +20,7 @@ fn mutate_nu_config_nested_ls() -> TestResult {
 fn mutate_nu_config_nested_table() -> TestResult {
    run_test_std(
        r#"
-            $env.config.table.trim.methodology = wrapping
+            $env.config.table.trim.methodology = 'wrapping'
            $env.config.table.trim.wrapping_try_keep_words = false
            $env.config.table.trim.wrapping_try_keep_words
        "#,
--- a/tests/repl/test_parser.rs
+++ b/tests/repl/test_parser.rs
@ -295,6 +295,22 @@ fn assign_expressions() -> TestResult {
    )
 }

+#[test]
+fn assign_takes_pipeline() -> TestResult {
+    run_test(
+        r#"mut foo = 'bar'; $foo = $foo | str upcase | str reverse; $foo"#,
+        "RAB",
+    )
+}
+
+#[test]
+fn append_assign_takes_pipeline() -> TestResult {
+    run_test(
+        r#"mut foo = 'bar'; $foo ++= $foo | str upcase; $foo"#,
+        "barBAR",
+    )
+}
+
 #[test]
 fn string_interpolation_paren_test() -> TestResult {
    run_test(r#"$"('(')(')')""#, "()")