Make assignment and const consistent with let/mut (#13385)

# Description

This makes assignment operations and `const` behave the same way `let`
and `mut` do, absorbing the rest of the pipeline.

Changes the lexer to be able to recognize assignment operators as a
separate token, and then makes the lite parser continue to push spans
into the same command regardless of any redirections or pipes if an
assignment operator is encountered. Because the pipeline is no longer
split up by the lite parser at this point, it's trivial to just parse
the right hand side as if it were a subexpression not contained within
parentheses.

# User-Facing Changes
Big breaking change. These are all now possible:

```nushell
const path = 'a' | path join 'b'

mut x = 2
$x = random int
$x = [1 2 3] | math sum

$env.FOO = random chars
```

In the past, these would have led to (an attempt at) bare word string
parsing. So while `$env.FOO = bar` would have previously set the
environment variable `FOO` to the string `"bar"`, it now tries to run
the command named `bar`, hence the major breaking change.

However, this is desirable because it is very consistent - if you see
the `=`, you can just assume it absorbs everything else to the right of
it.

# Tests + Formatting
Added tests for the new behaviour. Adjusted some existing tests that
depended on the right hand side of assignments being parsed as
barewords.

# After Submitting
- [ ] release notes (breaking change!)
This commit is contained in:
Devyn Cairns 2024-07-30 16:55:22 -07:00 committed by GitHub
parent 3c3ec7891c
commit 8e2917b9ae
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 247 additions and 91 deletions

View file

@ -95,13 +95,13 @@ fn capture_error_with_both_stdout_stderr_messages_not_hang_nushell() {
#[test]
fn combined_pipe_redirection() {
let actual = nu!("$env.FOO = hello; $env.BAR = world; nu --testbin echo_env_mixed out-err FOO BAR o+e>| complete | get stdout");
let actual = nu!("$env.FOO = 'hello'; $env.BAR = 'world'; nu --testbin echo_env_mixed out-err FOO BAR o+e>| complete | get stdout");
assert_eq!(actual.out, "helloworld");
}
#[test]
fn err_pipe_redirection() {
let actual =
nu!("$env.FOO = hello; nu --testbin echo_env_stderr FOO e>| complete | get stdout");
nu!("$env.FOO = 'hello'; nu --testbin echo_env_stderr FOO e>| complete | get stdout");
assert_eq!(actual.out, "hello");
}

View file

@ -23,6 +23,13 @@ fn let_takes_pipeline() {
assert_eq!(actual.out, "11");
}
#[test]
fn let_takes_pipeline_with_declared_type() {
let actual = nu!(r#"let x: list<string> = [] | append "hello world"; print $x.0"#);
assert_eq!(actual.out, "hello world");
}
#[test]
fn let_pipeline_allows_in() {
let actual =
@ -38,6 +45,13 @@ fn mut_takes_pipeline() {
assert_eq!(actual.out, "11");
}
#[test]
fn mut_takes_pipeline_with_declared_type() {
let actual = nu!(r#"mut x: list<string> = [] | append "hello world"; print $x.0"#);
assert_eq!(actual.out, "hello world");
}
#[test]
fn mut_pipeline_allows_in() {
let actual =

View file

@ -144,7 +144,7 @@ fn errors_if_attempting_to_delete_home() {
Playground::setup("rm_test_8", |dirs, _| {
let actual = nu!(
cwd: dirs.root(),
"$env.HOME = myhome ; rm -rf ~"
"$env.HOME = 'myhome' ; rm -rf ~"
);
assert!(actual.err.contains("please use -I or -i"));

View file

@ -2567,7 +2567,7 @@ fn theme_cmd(theme: &str, footer: bool, then: &str) -> String {
with_footer = "$env.config.footer_mode = \"always\"".to_string();
}
format!("$env.config.table.mode = {theme}; $env.config.table.header_on_separator = true; {with_footer}; {then}")
format!("$env.config.table.mode = \"{theme}\"; $env.config.table.header_on_separator = true; {with_footer}; {then}")
}
#[test]

View file

@ -6,6 +6,7 @@ pub enum TokenContents {
Comment,
Pipe,
PipePipe,
AssignmentOperator,
ErrGreaterPipe,
OutErrGreaterPipe,
Semicolon,
@ -69,6 +70,12 @@ fn is_item_terminator(
|| special_tokens.contains(&c))
}
/// Assignment operators have special handling distinct from math expressions, as they cause the
/// rest of the pipeline to be consumed.
pub fn is_assignment_operator(bytes: &[u8]) -> bool {
matches!(bytes, b"=" | b"+=" | b"++=" | b"-=" | b"*=" | b"/=")
}
// A special token is one that is a byte that stands alone as its own token. For example
// when parsing a signature you may want to have `:` be able to separate tokens and also
// to be handled as its own token to notify you you're about to parse a type in the example
@ -297,6 +304,10 @@ pub fn lex_item(
let mut err = None;
let output = match &input[(span.start - span_offset)..(span.end - span_offset)] {
bytes if is_assignment_operator(bytes) => Token {
contents: TokenContents::AssignmentOperator,
span,
},
b"out>" | b"o>" => Token {
contents: TokenContents::OutGreaterThan,
span,

View file

@ -196,10 +196,43 @@ pub fn lite_parse(tokens: &[Token]) -> (LiteBlock, Option<ParseError>) {
let mut last_token = TokenContents::Eol;
let mut file_redirection = None;
let mut curr_comment: Option<Vec<Span>> = None;
let mut is_assignment = false;
let mut error = None;
for (idx, token) in tokens.iter().enumerate() {
if let Some((source, append, span)) = file_redirection.take() {
if is_assignment {
match &token.contents {
// Consume until semicolon or terminating EOL. Assignments absorb pipelines and
// redirections.
TokenContents::Eol => {
// Handle `[Command] [Pipe] ([Comment] | [Eol])+ [Command]`
//
// `[Eol]` branch checks if previous token is `[Pipe]` to construct pipeline
// and so `[Comment] | [Eol]` should be ignore to make it work
let actual_token = last_non_comment_token(tokens, idx);
if actual_token != Some(TokenContents::Pipe) {
is_assignment = false;
pipeline.push(&mut command);
block.push(&mut pipeline);
}
if last_token == TokenContents::Eol {
// Clear out the comment as we're entering a new comment
curr_comment = None;
}
}
TokenContents::Semicolon => {
is_assignment = false;
pipeline.push(&mut command);
block.push(&mut pipeline);
}
TokenContents::Comment => {
command.comments.push(token.span);
curr_comment = None;
}
_ => command.push(token.span),
}
} else if let Some((source, append, span)) = file_redirection.take() {
match &token.contents {
TokenContents::PipePipe => {
error = error.or(Some(ParseError::ShellOrOr(token.span)));
@ -218,6 +251,11 @@ pub fn lite_parse(tokens: &[Token]) -> (LiteBlock, Option<ParseError>) {
command.push(token.span)
}
}
TokenContents::AssignmentOperator => {
error = error.or(Some(ParseError::Expected("redirection target", token.span)));
command.push(span);
command.push(token.span);
}
TokenContents::OutGreaterThan
| TokenContents::OutGreaterGreaterThan
| TokenContents::ErrGreaterThan
@ -280,6 +318,15 @@ pub fn lite_parse(tokens: &[Token]) -> (LiteBlock, Option<ParseError>) {
}
command.push(token.span);
}
TokenContents::AssignmentOperator => {
// When in assignment mode, we'll just consume pipes or redirections as part of
// the command.
is_assignment = true;
if let Some(curr_comment) = curr_comment.take() {
command.comments = curr_comment;
}
command.push(token.span);
}
TokenContents::OutGreaterThan => {
error = error.or(command.check_accepts_redirection(token.span));
file_redirection = Some((RedirectionSource::Stdout, false, token.span));

View file

@ -35,8 +35,8 @@ use crate::{
lite_parser::{lite_parse, LiteCommand},
parser::{
check_call, garbage, garbage_pipeline, parse, parse_call, parse_expression,
parse_full_signature, parse_import_pattern, parse_internal_call, parse_multispan_value,
parse_string, parse_value, parse_var_with_opt_type, trim_quotes, ParsedInternalCall,
parse_full_signature, parse_import_pattern, parse_internal_call, parse_string, parse_value,
parse_var_with_opt_type, trim_quotes, ParsedInternalCall,
},
unescape_unquote_string, Token, TokenContents,
};
@ -3171,9 +3171,6 @@ pub fn parse_const(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipelin
// }
if let Some(decl_id) = working_set.find_decl(b"const") {
let cmd = working_set.get_decl(decl_id);
let call_signature = cmd.signature().call_signature();
if spans.len() >= 4 {
// This is a bit of by-hand parsing to get around the issue where we want to parse in the reverse order
// so that the var-id created by the variable isn't visible in the expression that init it
@ -3181,18 +3178,29 @@ pub fn parse_const(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipelin
let item = working_set.get_span_contents(*span.1);
// const x = 'f', = at least start from index 2
if item == b"=" && spans.len() > (span.0 + 1) && span.0 > 1 {
let mut idx = span.0;
// Parse the rvalue as a subexpression
let rvalue_span = Span::concat(&spans[(span.0 + 1)..]);
let rvalue = parse_multispan_value(
working_set,
spans,
&mut idx,
&SyntaxShape::Keyword(b"=".to_vec(), Box::new(SyntaxShape::MathExpression)),
let (rvalue_tokens, rvalue_error) = lex(
working_set.get_span_contents(rvalue_span),
rvalue_span.start,
&[],
&[],
false,
);
working_set.parse_errors.extend(rvalue_error);
trace!("parsing: const right-hand side subexpression");
let rvalue_block =
parse_block(working_set, &rvalue_tokens, rvalue_span, false, true);
let rvalue_ty = rvalue_block.output_type();
let rvalue_block_id = working_set.add_block(Arc::new(rvalue_block));
let rvalue = Expression::new(
working_set,
Expr::Subexpression(rvalue_block_id),
rvalue_span,
rvalue_ty,
);
if idx < (spans.len() - 1) {
working_set
.error(ParseError::ExtraPositional(call_signature, spans[idx + 1]));
}
let mut idx = 0;

View file

@ -1,5 +1,5 @@
use crate::{
lex::{lex, lex_signature},
lex::{is_assignment_operator, lex, lex_signature},
lite_parser::{lite_parse, LiteCommand, LitePipeline, LiteRedirection, LiteRedirectionTarget},
parse_keywords::*,
parse_patterns::parse_pattern,
@ -1458,7 +1458,8 @@ fn parse_binary_with_base(
| TokenContents::ErrGreaterThan
| TokenContents::ErrGreaterGreaterThan
| TokenContents::OutErrGreaterThan
| TokenContents::OutErrGreaterGreaterThan => {
| TokenContents::OutErrGreaterGreaterThan
| TokenContents::AssignmentOperator => {
working_set.error(ParseError::Expected("binary", span));
return garbage(working_set, span);
}
@ -3409,7 +3410,7 @@ pub fn parse_signature_helper(working_set: &mut StateWorkingSet, span: Span) ->
for token in &output {
match token {
Token {
contents: crate::TokenContents::Item,
contents: crate::TokenContents::Item | crate::TokenContents::AssignmentOperator,
span,
} => {
let span = *span;
@ -4829,7 +4830,7 @@ pub fn parse_value(
}
}
pub fn parse_operator(working_set: &mut StateWorkingSet, span: Span) -> Expression {
pub fn parse_assignment_operator(working_set: &mut StateWorkingSet, span: Span) -> Expression {
let contents = working_set.get_span_contents(span);
let operator = match contents {
@ -4839,6 +4840,95 @@ pub fn parse_operator(working_set: &mut StateWorkingSet, span: Span) -> Expressi
b"-=" => Operator::Assignment(Assignment::MinusAssign),
b"*=" => Operator::Assignment(Assignment::MultiplyAssign),
b"/=" => Operator::Assignment(Assignment::DivideAssign),
_ => {
working_set.error(ParseError::Expected("assignment operator", span));
return garbage(working_set, span);
}
};
Expression::new(working_set, Expr::Operator(operator), span, Type::Any)
}
pub fn parse_assignment_expression(
working_set: &mut StateWorkingSet,
spans: &[Span],
) -> Expression {
trace!("parsing: assignment expression");
let expr_span = Span::concat(spans);
// Assignment always has the most precedence, and its right-hand side can be a pipeline
let Some(op_index) = spans
.iter()
.position(|span| is_assignment_operator(working_set.get_span_contents(*span)))
else {
working_set.error(ParseError::Expected("assignment expression", expr_span));
return garbage(working_set, expr_span);
};
let lhs_spans = &spans[0..op_index];
let op_span = spans[op_index];
let rhs_spans = &spans[(op_index + 1)..];
if lhs_spans.is_empty() {
working_set.error(ParseError::Expected(
"left hand side of assignment",
op_span,
));
return garbage(working_set, expr_span);
}
if rhs_spans.is_empty() {
working_set.error(ParseError::Expected(
"right hand side of assignment",
op_span,
));
return garbage(working_set, expr_span);
}
// Parse the lhs and operator as usual for a math expression
let mut lhs = parse_expression(working_set, lhs_spans);
let mut operator = parse_assignment_operator(working_set, op_span);
// Re-parse the right-hand side as a subexpression
let rhs_span = Span::concat(rhs_spans);
let (rhs_tokens, rhs_error) = lex(
working_set.get_span_contents(rhs_span),
rhs_span.start,
&[],
&[],
true,
);
working_set.parse_errors.extend(rhs_error);
trace!("parsing: assignment right-hand side subexpression");
let rhs_block = parse_block(working_set, &rhs_tokens, rhs_span, false, true);
let rhs_ty = rhs_block.output_type();
let rhs_block_id = working_set.add_block(Arc::new(rhs_block));
let mut rhs = Expression::new(
working_set,
Expr::Subexpression(rhs_block_id),
rhs_span,
rhs_ty,
);
let (result_ty, err) = math_result_type(working_set, &mut lhs, &mut operator, &mut rhs);
if let Some(err) = err {
working_set.parse_errors.push(err);
}
Expression::new(
working_set,
Expr::BinaryOp(Box::new(lhs), Box::new(operator), Box::new(rhs)),
expr_span,
result_ty,
)
}
pub fn parse_operator(working_set: &mut StateWorkingSet, span: Span) -> Expression {
let contents = working_set.get_span_contents(span);
let operator = match contents {
b"==" => Operator::Comparison(Comparison::Equal),
b"!=" => Operator::Comparison(Comparison::NotEqual),
b"<" => Operator::Comparison(Comparison::LessThan),
@ -4954,6 +5044,10 @@ pub fn parse_operator(working_set: &mut StateWorkingSet, span: Span) -> Expressi
));
return garbage(working_set, span);
}
op if is_assignment_operator(op) => {
working_set.error(ParseError::Expected("a non-assignment operator", span));
return garbage(working_set, span);
}
_ => {
working_set.error(ParseError::Expected("operator", span));
return garbage(working_set, span);
@ -5258,7 +5352,12 @@ pub fn parse_expression(working_set: &mut StateWorkingSet, spans: &[Span]) -> Ex
return garbage(working_set, Span::concat(spans));
}
let output = if is_math_expression_like(working_set, spans[pos]) {
let output = if spans[pos..]
.iter()
.any(|span| is_assignment_operator(working_set.get_span_contents(*span)))
{
parse_assignment_expression(working_set, &spans[pos..])
} else if is_math_expression_like(working_set, spans[pos]) {
parse_math_expression(working_set, &spans[pos..], None)
} else {
let bytes = working_set.get_span_contents(spans[pos]).to_vec();
@ -5690,69 +5789,24 @@ pub(crate) fn redirecting_builtin_error(
}
pub fn parse_pipeline(working_set: &mut StateWorkingSet, pipeline: &LitePipeline) -> Pipeline {
let first_command = pipeline.commands.first();
let first_command_name = first_command
.and_then(|command| command.parts.first())
.map(|span| working_set.get_span_contents(*span));
if pipeline.commands.len() > 1 {
// Special case: allow "let" or "mut" to consume the whole pipeline, if this is a pipeline
// with multiple commands
if matches!(first_command_name, Some(b"let" | b"mut")) {
// Merge the pipeline into one command
let first_command = first_command.expect("must be Some");
// Parse a normal multi command pipeline
let elements: Vec<_> = pipeline
.commands
.iter()
.enumerate()
.map(|(index, element)| {
let element = parse_pipeline_element(working_set, element);
// Handle $in for pipeline elements beyond the first one
if index > 0 && element.has_in_variable(working_set) {
wrap_element_with_collect(working_set, element.clone())
} else {
element
}
})
.collect();
let remainder_span = first_command
.parts_including_redirection()
.skip(3)
.chain(
pipeline.commands[1..]
.iter()
.flat_map(|command| command.parts_including_redirection()),
)
.reduce(Span::append);
let parts = first_command
.parts
.iter()
.take(3) // the let/mut start itself
.copied()
.chain(remainder_span) // everything else
.collect();
let comments = pipeline
.commands
.iter()
.flat_map(|command| command.comments.iter())
.copied()
.collect();
let new_command = LiteCommand {
pipe: None,
comments,
parts,
redirection: None,
};
parse_builtin_commands(working_set, &new_command)
} else {
// Parse a normal multi command pipeline
let elements: Vec<_> = pipeline
.commands
.iter()
.enumerate()
.map(|(index, element)| {
let element = parse_pipeline_element(working_set, element);
// Handle $in for pipeline elements beyond the first one
if index > 0 && element.has_in_variable(working_set) {
wrap_element_with_collect(working_set, element.clone())
} else {
element
}
})
.collect();
Pipeline { elements }
}
Pipeline { elements }
} else {
// If there's only one command in the pipeline, this could be a builtin command
parse_builtin_commands(working_set, &pipeline.commands[0])

View file

@ -1177,9 +1177,9 @@ fn test_nothing_comparison_eq() {
#[rstest]
#[case(b"let a = 1 err> /dev/null")]
#[case(b"let a = 1 out> /dev/null")]
#[case(b"let a = 1 out+err> /dev/null")]
#[case(b"mut a = 1 err> /dev/null")]
#[case(b"mut a = 1 out> /dev/null")]
#[case(b"let a = 1 out+err> /dev/null")]
#[case(b"mut a = 1 out+err> /dev/null")]
fn test_redirection_with_letmut(#[case] phase: &[u8]) {
let engine_state = EngineState::new();

View file

@ -415,3 +415,9 @@ fn const_raw_string() {
let actual = nu!(r#"const x = r#'abc'#; $x"#);
assert_eq!(actual.out, "abc");
}
#[test]
fn const_takes_pipeline() {
let actual = nu!(r#"const list = 'bar_baz_quux' | split row '_'; $list | length"#);
assert_eq!(actual.out, "3");
}

View file

@ -6,9 +6,9 @@ fn get_env_by_name() {
cwd: ".",
plugin: ("nu_plugin_example"),
r#"
$env.FOO = bar
$env.FOO = 'bar'
example env FOO | print
$env.FOO = baz
$env.FOO = 'baz'
example env FOO | print
"#
);
@ -21,7 +21,7 @@ fn get_envs() {
let result = nu_with_plugins!(
cwd: ".",
plugin: ("nu_plugin_example"),
"$env.BAZ = foo; example env | get BAZ"
"$env.BAZ = 'foo'; example env | get BAZ"
);
assert!(result.status.success());
assert_eq!("foo", result.out);

View file

@ -20,7 +20,7 @@ fn mutate_nu_config_nested_ls() -> TestResult {
fn mutate_nu_config_nested_table() -> TestResult {
run_test_std(
r#"
$env.config.table.trim.methodology = wrapping
$env.config.table.trim.methodology = 'wrapping'
$env.config.table.trim.wrapping_try_keep_words = false
$env.config.table.trim.wrapping_try_keep_words
"#,

View file

@ -295,6 +295,22 @@ fn assign_expressions() -> TestResult {
)
}
#[test]
fn assign_takes_pipeline() -> TestResult {
run_test(
r#"mut foo = 'bar'; $foo = $foo | str upcase | str reverse; $foo"#,
"RAB",
)
}
#[test]
fn append_assign_takes_pipeline() -> TestResult {
run_test(
r#"mut foo = 'bar'; $foo ++= $foo | str upcase; $foo"#,
"barBAR",
)
}
#[test]
fn string_interpolation_paren_test() -> TestResult {
run_test(r#"$"('(')(')')""#, "()")