From ef05d1419d4f1f0c8bb643230b9708f696afbe60 Mon Sep 17 00:00:00 2001 From: YizhePKU Date: Thu, 21 Mar 2024 23:37:52 +0800 Subject: [PATCH] Fix: missing parse error when extra tokens are given to let bindings (#12238) Manual checks are added to `parse_let`, `parse_mut`, and `parse_const`. `parse_var_with_opt_type` is also fixed to update `spans_idx` correctly. Fixes #12125. It's technically a fix, but I'd rather not merge this directly. I'm making this PR to bring into attention the code quality of the parser code. For example: * Inconsistent usage of `spans_idx`. What is its purpose, and which parsing functions need it? I suspect it's possible to remove the usage of `spans_idx` entirely. * Lacking documentation for top-level functions. What does `mutable` mean for `parse_var_with_opt_type()`? * Inconsistent error reporting. Usage of both `working_set.error()` and `working_set.parse_errors.push()`. Using `ParseError::Expected` for an invalid variable name when there's `ParseError::VariableNotValid` (from `parser.rs:5237`). Checking variable names manually when there's `is_variable()` (from `parser.rs:2905`). * `span()` is a terrible name for a function that flattens a bunch of spans into one (from `nu-protocal/src/span.rs:92`). The top-level comment (`Used when you have a slice of spans of at least size 1`) doesn't help either. I've only looked at a small portion of the parser code; I expect there are a lot more. These issues made it much harder to fix a simple bug like #12125. I believe we should invest some effort to cleanup the parser code, which will ease maintainance in the future. I'll willing to help if there is interest. --- crates/nu-parser/src/parse_keywords.rs | 13 ++++++++++++- crates/nu-parser/src/parser.rs | 8 ++++++-- tests/parsing/mod.rs | 18 ++++++++++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/crates/nu-parser/src/parse_keywords.rs b/crates/nu-parser/src/parse_keywords.rs index 3aa133d177..4a67c98be8 100644 --- a/crates/nu-parser/src/parse_keywords.rs +++ b/crates/nu-parser/src/parse_keywords.rs @@ -2975,9 +2975,12 @@ pub fn parse_let(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipeline }; let mut idx = 0; - let (lvalue, explicit_type) = parse_var_with_opt_type(working_set, &spans[1..(span.0)], &mut idx, false); + // check for extra tokens after the identifier + if idx + 1 < span.0 - 1 { + working_set.error(ParseError::ExtraTokens(spans[idx + 2])); + } let var_name = String::from_utf8_lossy(working_set.get_span_contents(lvalue.span)) @@ -3083,6 +3086,10 @@ pub fn parse_const(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipelin let (lvalue, explicit_type) = parse_var_with_opt_type(working_set, &spans[1..(span.0)], &mut idx, false); + // check for extra tokens after the identifier + if idx + 1 < span.0 - 1 { + working_set.error(ParseError::ExtraTokens(spans[idx + 2])); + } let var_name = String::from_utf8_lossy(working_set.get_span_contents(lvalue.span)) @@ -3235,6 +3242,10 @@ pub fn parse_mut(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipeline let (lvalue, explicit_type) = parse_var_with_opt_type(working_set, &spans[1..(span.0)], &mut idx, true); + // check for extra tokens after the identifier + if idx + 1 < span.0 - 1 { + working_set.error(ParseError::ExtraTokens(spans[idx + 2])); + } let var_name = String::from_utf8_lossy(working_set.get_span_contents(lvalue.span)) diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index 6fb5e67900..b937252d47 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -2893,6 +2893,10 @@ pub fn parse_import_pattern(working_set: &mut StateWorkingSet, spans: &[Span]) - } } +/// Parse `spans[spans_idx..]` into a variable, with optional type annotation. +/// If the name of the variable ends with a colon (no space in-between allowed), then a type annotation +/// can appear after the variable, in which case the colon is stripped from the name of the variable. +/// `spans_idx` is updated to point to the last span that has been parsed. pub fn parse_var_with_opt_type( working_set: &mut StateWorkingSet, spans: &[Span], @@ -2928,7 +2932,7 @@ pub fn parse_var_with_opt_type( } let ty = parse_type(working_set, &type_bytes, tokens[0].span); - *spans_idx += spans.len() - *spans_idx - 1; + *spans_idx = spans.len() - 1; let var_name = bytes[0..(bytes.len() - 1)].to_vec(); @@ -2996,7 +3000,7 @@ pub fn parse_var_with_opt_type( ( Expression { expr: Expr::VarDecl(id), - span: span(&spans[*spans_idx..*spans_idx + 1]), + span: spans[*spans_idx], ty: Type::Any, custom_completion: None, }, diff --git a/tests/parsing/mod.rs b/tests/parsing/mod.rs index 63e38d0d2c..be9f3aa4f6 100644 --- a/tests/parsing/mod.rs +++ b/tests/parsing/mod.rs @@ -331,3 +331,21 @@ fn parse_let_signature(#[case] phrase: &str) { let actual = nu!(phrase); assert!(actual.err.is_empty()); } + +#[test] +fn parse_let_signature_missing_colon() { + let actual = nu!("let a int = 1"); + assert!(actual.err.contains("nu::parser::extra_tokens")); +} + +#[test] +fn parse_mut_signature_missing_colon() { + let actual = nu!("mut a record = {a: 1 b: 1}"); + assert!(actual.err.contains("nu::parser::extra_tokens")); +} + +#[test] +fn parse_const_signature_missing_colon() { + let actual = nu!("const a string = 'Hello World\n'"); + assert!(actual.err.contains("nu::parser::extra_tokens")); +}