From 93b5f3f421dcfa8e42a6df7dcb1c239652dc48cc Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 25 Jun 2021 17:50:24 +1200 Subject: [PATCH] Make lexing configurable wrt newlines (#3682) --- crates/nu-cli/src/app/options_parser.rs | 2 +- crates/nu-cli/src/cli.rs | 2 +- crates/nu-cli/src/shell.rs | 2 +- crates/nu-command/src/examples.rs | 2 +- crates/nu-completion/src/completer.rs | 3 +- crates/nu-completion/src/engine.rs | 2 +- crates/nu-parser/src/flag.rs | 2 +- crates/nu-parser/src/lex/lexer.rs | 24 +++++++-- crates/nu-parser/src/lex/tests.rs | 60 ++++++++++----------- crates/nu-parser/src/lib.rs | 2 +- crates/nu-parser/src/parse.rs | 20 ++++--- crates/nu-parser/src/parse/def.rs | 5 +- crates/nu-parser/src/parse/def/signature.rs | 8 ++- tests/shell/pipeline/commands/internal.rs | 31 +++++++++++ 14 files changed, 111 insertions(+), 54 deletions(-) diff --git a/crates/nu-cli/src/app/options_parser.rs b/crates/nu-cli/src/app/options_parser.rs index 54d5cc84d6..28512fe5bb 100644 --- a/crates/nu-cli/src/app/options_parser.rs +++ b/crates/nu-cli/src/app/options_parser.rs @@ -39,7 +39,7 @@ impl OptionsParser for NuParser { fn parse(&self, input: &str) -> Result { let options = Options::default(); - let (lite_result, _err) = nu_parser::lex(input, 0); + let (lite_result, _err) = nu_parser::lex(input, 0, nu_parser::NewlineMode::Normal); let (lite_result, _err) = nu_parser::parse_block(lite_result); let (parsed, err) = nu_parser::classify_block(&lite_result, &self.context.scope); diff --git a/crates/nu-cli/src/cli.rs b/crates/nu-cli/src/cli.rs index 42ac5dc7dc..6622a6ab94 100644 --- a/crates/nu-cli/src/cli.rs +++ b/crates/nu-cli/src/cli.rs @@ -463,7 +463,7 @@ mod tests { #[quickcheck] fn quickcheck_parse(data: String) -> bool { - let (tokens, err) = nu_parser::lex(&data, 0); + let (tokens, err) = nu_parser::lex(&data, 0, nu_parser::NewlineMode::Normal); let (lite_block, err2) = nu_parser::parse_block(tokens); if err.is_none() && err2.is_none() { let context = EvaluationContext::basic(); diff --git a/crates/nu-cli/src/shell.rs b/crates/nu-cli/src/shell.rs index 998b94a2b0..0371abe8f1 100644 --- a/crates/nu-cli/src/shell.rs +++ b/crates/nu-cli/src/shell.rs @@ -149,7 +149,7 @@ impl rustyline::validate::Validator for NuValidator { ) -> rustyline::Result { let src = ctx.input(); - let (tokens, err) = nu_parser::lex(src, 0); + let (tokens, err) = nu_parser::lex(src, 0, nu_parser::NewlineMode::Normal); if let Some(err) = err { if let nu_errors::ParseErrorReason::Eof { .. } = err.reason() { return Ok(rustyline::validate::ValidationResult::Incomplete); diff --git a/crates/nu-command/src/examples.rs b/crates/nu-command/src/examples.rs index 74d81835fd..8714b2406a 100644 --- a/crates/nu-command/src/examples.rs +++ b/crates/nu-command/src/examples.rs @@ -214,7 +214,7 @@ fn parse_line(line: &str, ctx: &EvaluationContext) -> Result (usize, Vec) { use engine::LocationType; - let tokens = nu_parser::lex(line, 0).0; + let tokens = nu_parser::lex(line, 0, NewlineMode::Normal).0; let locations = Some(nu_parser::parse_block(tokens).0) .map(|block| nu_parser::classify_block(&block, context.scope())) diff --git a/crates/nu-completion/src/engine.rs b/crates/nu-completion/src/engine.rs index fba8465968..78b67acee2 100644 --- a/crates/nu-completion/src/engine.rs +++ b/crates/nu-completion/src/engine.rs @@ -336,7 +336,7 @@ mod tests { scope: &dyn ParserScope, pos: usize, ) -> Vec { - let (tokens, _) = lex(line, 0); + let (tokens, _) = lex(line, 0, nu_parser::NewlineMode::Normal); let (lite_block, _) = parse_block(tokens); scope.enter_scope(); diff --git a/crates/nu-parser/src/flag.rs b/crates/nu-parser/src/flag.rs index 2cab639fcc..a68f323156 100644 --- a/crates/nu-parser/src/flag.rs +++ b/crates/nu-parser/src/flag.rs @@ -94,7 +94,7 @@ mod tests { #[test] fn parses_longform_flag_containing_equal_sign() { let input = "bundle add rails --group=development"; - let (tokens, _) = lex(input, 0); + let (tokens, _) = lex(input, 0, lex::lexer::NewlineMode::Normal); let (root_node, _) = parse_block(tokens); assert_eq!(root_node.block.len(), 1); diff --git a/crates/nu-parser/src/lex/lexer.rs b/crates/nu-parser/src/lex/lexer.rs index 994717acc9..4cad0b8be0 100644 --- a/crates/nu-parser/src/lex/lexer.rs +++ b/crates/nu-parser/src/lex/lexer.rs @@ -26,6 +26,14 @@ impl Token { } } +#[derive(Debug, Eq, PartialEq, Clone)] +pub enum NewlineMode { + /// Treat newlines as a group separator + Normal, + /// Treat newlines as just another whitespace + Whitespace, +} + #[derive(Clone, Copy)] enum BlockKind { Paren, @@ -427,7 +435,11 @@ pub fn parse_block(tokens: Vec) -> (LiteBlock, Option) { /// Breaks the input string into a vector of tokens. This tokenization only tries to classify separators like /// semicolons, pipes, etc from external bare values (values that haven't been classified further) /// Takes in a string and and offset, which is used to offset the spans created (for when this function is used to parse inner strings) -pub fn lex(input: &str, span_offset: usize) -> (Vec, Option) { +pub fn lex( + input: &str, + span_offset: usize, + newline_mode: NewlineMode, +) -> (Vec, Option) { // Break the input slice into an iterator of Unicode characters. let mut char_indices = input.char_indices().peekable(); let mut error = None; @@ -489,10 +501,12 @@ pub fn lex(input: &str, span_offset: usize) -> (Vec, Option) let idx = *idx; let _ = char_indices.next(); - output.push(Token::new( - TokenContents::Eol, - Span::new(span_offset + idx, span_offset + idx + 1), - )); + if newline_mode == NewlineMode::Normal { + output.push(Token::new( + TokenContents::Eol, + Span::new(span_offset + idx, span_offset + idx + 1), + )); + } } else if *c == '#' { // If the next character is `#`, we're at the beginning of a line // comment. The comment continues until the next newline. diff --git a/crates/nu-parser/src/lex/tests.rs b/crates/nu-parser/src/lex/tests.rs index 175bfbae5e..ab9acf16a1 100644 --- a/crates/nu-parser/src/lex/tests.rs +++ b/crates/nu-parser/src/lex/tests.rs @@ -15,7 +15,7 @@ mod bare { fn simple_1() { let input = "foo bar baz"; - let (result, err) = lex(input, 0); + let (result, err) = lex(input, 0, NewlineMode::Normal); assert!(err.is_none()); assert_eq!(result[0].span, span(0, 3)); @@ -25,7 +25,7 @@ mod bare { fn simple_2() { let input = "'foo bar' baz"; - let (result, err) = lex(input, 0); + let (result, err) = lex(input, 0, NewlineMode::Normal); assert!(err.is_none()); assert_eq!(result[0].span, span(0, 9)); @@ -35,7 +35,7 @@ mod bare { fn simple_3() { let input = "'foo\" bar' baz"; - let (result, err) = lex(input, 0); + let (result, err) = lex(input, 0, NewlineMode::Normal); assert!(err.is_none()); assert_eq!(result[0].span, span(0, 10)); @@ -45,7 +45,7 @@ mod bare { fn simple_4() { let input = "[foo bar] baz"; - let (result, err) = lex(input, 0); + let (result, err) = lex(input, 0, NewlineMode::Normal); assert!(err.is_none()); assert_eq!(result[0].span, span(0, 9)); @@ -55,7 +55,7 @@ mod bare { fn simple_5() { let input = "'foo 'bar baz"; - let (result, err) = lex(input, 0); + let (result, err) = lex(input, 0, NewlineMode::Normal); assert!(err.is_none()); assert_eq!(result[0].span, span(0, 9)); @@ -65,7 +65,7 @@ mod bare { fn simple_6() { let input = "''foo baz"; - let (result, err) = lex(input, 0); + let (result, err) = lex(input, 0, NewlineMode::Normal); assert!(err.is_none()); assert_eq!(result[0].span, span(0, 5)); @@ -75,7 +75,7 @@ mod bare { fn simple_7() { let input = "'' foo"; - let (result, err) = lex(input, 0); + let (result, err) = lex(input, 0, NewlineMode::Normal); assert!(err.is_none()); assert_eq!(result[0].span, span(0, 2)); @@ -85,7 +85,7 @@ mod bare { fn simple_8() { let input = " '' foo"; - let (result, err) = lex(input, 0); + let (result, err) = lex(input, 0, NewlineMode::Normal); assert!(err.is_none()); assert_eq!(result[0].span, span(1, 3)); @@ -95,7 +95,7 @@ mod bare { fn simple_9() { let input = " 'foo' foo"; - let (result, err) = lex(input, 0); + let (result, err) = lex(input, 0, NewlineMode::Normal); assert!(err.is_none()); assert_eq!(result[0].span, span(1, 6)); @@ -105,7 +105,7 @@ mod bare { fn simple_10() { let input = "[foo, bar]"; - let (result, err) = lex(input, 0); + let (result, err) = lex(input, 0, NewlineMode::Normal); assert!(err.is_none()); assert_eq!(result[0].span, span(0, 10)); @@ -118,7 +118,7 @@ mod bare { def e [] {echo hi} "#; - let (result, err) = lex(input, 0); + let (result, err) = lex(input, 0, NewlineMode::Normal); assert!(err.is_none()); //result[0] == EOL @@ -141,7 +141,7 @@ def e [] {echo hi} def e2 [] {echo hello} "#; - let (result, err) = lex(input, 0); + let (result, err) = lex(input, 0, NewlineMode::Normal); assert!(err.is_none()); let span1 = span(2, 11); @@ -166,7 +166,7 @@ def e2 [] {echo hello} # shouldn't return error echo hi }"#; - let (_result, err) = lex(input, 0); + let (_result, err) = lex(input, 0, NewlineMode::Normal); assert!(err.is_none()); } @@ -176,7 +176,7 @@ def e2 [] {echo hello} # should "not return error echo hi }"#; - let (_result, err) = lex(input, 0); + let (_result, err) = lex(input, 0, NewlineMode::Normal); assert!(err.is_none()); } @@ -186,7 +186,7 @@ def e2 [] {echo hello} # should not [return error echo hi }"#; - let (_result, err) = lex(input, 0); + let (_result, err) = lex(input, 0, NewlineMode::Normal); assert!(err.is_none()); } @@ -196,7 +196,7 @@ def e2 [] {echo hello} # should not return {error echo hi }"#; - let (_result, err) = lex(input, 0); + let (_result, err) = lex(input, 0, NewlineMode::Normal); assert!(err.is_none()); } @@ -204,7 +204,7 @@ def e2 [] {echo hello} fn ignore_future() { let input = "foo 'bar"; - let (result, _) = lex(input, 0); + let (result, _) = lex(input, 0, NewlineMode::Normal); assert_eq!(result[0].span, span(0, 3)); } @@ -213,7 +213,7 @@ def e2 [] {echo hello} fn invalid_1() { let input = "'foo bar"; - let (_, err) = lex(input, 0); + let (_, err) = lex(input, 0, NewlineMode::Normal); assert!(err.is_some()); } @@ -222,7 +222,7 @@ def e2 [] {echo hello} fn invalid_2() { let input = "'bar"; - let (_, err) = lex(input, 0); + let (_, err) = lex(input, 0, NewlineMode::Normal); assert!(err.is_some()); } @@ -231,7 +231,7 @@ def e2 [] {echo hello} fn invalid_4() { let input = " 'bar"; - let (_, err) = lex(input, 0); + let (_, err) = lex(input, 0, NewlineMode::Normal); assert!(err.is_some()); } @@ -244,7 +244,7 @@ mod lite_parse { #[test] fn pipeline() { - let (result, err) = lex("cmd1 | cmd2 ; deploy", 0); + let (result, err) = lex("cmd1 | cmd2 ; deploy", 0, NewlineMode::Normal); assert!(err.is_none()); let (result, err) = parse_block(result); assert!(err.is_none()); @@ -255,7 +255,7 @@ mod lite_parse { #[test] fn simple_1() { - let (result, err) = lex("foo", 0); + let (result, err) = lex("foo", 0, NewlineMode::Normal); assert!(err.is_none()); let (result, err) = parse_block(result); assert!(err.is_none()); @@ -271,7 +271,7 @@ mod lite_parse { #[test] fn simple_offset() { - let (result, err) = lex("foo", 10); + let (result, err) = lex("foo", 10, NewlineMode::Normal); assert!(err.is_none()); let (result, err) = parse_block(result); assert!(err.is_none()); @@ -286,7 +286,7 @@ mod lite_parse { #[test] fn incomplete_result() { - let (result, err) = lex("my_command \"foo' --test", 10); + let (result, err) = lex("my_command \"foo' --test", 10, NewlineMode::Normal); assert!(matches!( err.unwrap().reason(), nu_errors::ParseErrorReason::Eof { .. } @@ -314,7 +314,7 @@ mod lite_parse { # * It's much better :) def my_echo [arg] { echo $arg } "#; - let (result, err) = lex(code, 0); + let (result, err) = lex(code, 0, NewlineMode::Normal); assert!(err.is_none()); let (result, err) = parse_block(result); assert!(err.is_none()); @@ -352,7 +352,7 @@ def my_echo [arg] { echo $arg } # * It's even better! def my_echo2 [arg] { echo $arg } "#; - let (result, err) = lex(code, 0); + let (result, err) = lex(code, 0, NewlineMode::Normal); assert!(err.is_none()); let (result, err) = parse_block(result); assert!(err.is_none()); @@ -404,7 +404,7 @@ def my_echo2 [arg] { echo $arg } echo 42 "#; - let (result, err) = lex(code, 0); + let (result, err) = lex(code, 0, NewlineMode::Normal); assert!(err.is_none()); // assert_eq!(format!("{:?}", result), ""); let (result, err) = parse_block(result); @@ -425,7 +425,7 @@ echo 42 echo 42 "#; - let (result, err) = lex(code, 0); + let (result, err) = lex(code, 0, NewlineMode::Normal); assert!(err.is_none()); // assert_eq!(format!("{:?}", result), ""); let (result, err) = parse_block(result); @@ -445,7 +445,7 @@ fn no_discarded_white_space_start_of_comment() { # Starting space is not discarded echo 42 "#; - let (result, err) = lex(code, 0); + let (result, err) = lex(code, 0, NewlineMode::Normal); assert!(err.is_none()); // assert_eq!(format!("{:?}", result), ""); let (result, err) = parse_block(result); @@ -479,7 +479,7 @@ fn multiple_discarded_white_space_start_of_comment() { # Discard 2 spaces echo 42 "#; - let (result, err) = lex(code, 0); + let (result, err) = lex(code, 0, NewlineMode::Normal); assert!(err.is_none()); // assert_eq!(format!("{:?}", result), ""); let (result, err) = parse_block(result); diff --git a/crates/nu-parser/src/lib.rs b/crates/nu-parser/src/lib.rs index 7952c3d6b1..f576d188fc 100644 --- a/crates/nu-parser/src/lib.rs +++ b/crates/nu-parser/src/lib.rs @@ -10,7 +10,7 @@ mod parse; mod scope; mod shapes; -pub use lex::lexer::{lex, parse_block}; +pub use lex::lexer::{lex, parse_block, NewlineMode}; pub use lex::tokens::{LiteBlock, LiteCommand, LiteGroup, LitePipeline}; pub use parse::{classify_block, garbage, parse, parse_full_column_path, parse_math_expression}; pub use scope::ParserScope; diff --git a/crates/nu-parser/src/parse.rs b/crates/nu-parser/src/parse.rs index ae1db7cc94..e57a31b3a4 100644 --- a/crates/nu-parser/src/parse.rs +++ b/crates/nu-parser/src/parse.rs @@ -18,7 +18,7 @@ use nu_protocol::{NamedType, PositionalType, Signature, SyntaxShape, UnspannedPa use nu_source::{HasSpan, Span, Spanned, SpannedItem}; use num_bigint::BigInt; -use crate::parse::def::parse_parameter; +use crate::{lex::lexer::NewlineMode, parse::def::parse_parameter}; use crate::{ lex::lexer::{lex, parse_block}, ParserScope, @@ -489,7 +489,7 @@ fn parse_subexpression( .collect(); // We haven't done much with the inner string, so let's go ahead and work with it - let (tokens, err) = lex(&string, lite_arg.span.start() + 1); + let (tokens, err) = lex(&string, lite_arg.span.start() + 1, NewlineMode::Whitespace); if error.is_none() { error = err; }; @@ -792,7 +792,11 @@ fn parse_table( error = err; } - let (tokens, err) = lex(&string, lite_inner.parts[0].span.start() + 1); + let (tokens, err) = lex( + &string, + lite_inner.parts[0].span.start() + 1, + NewlineMode::Whitespace, + ); if err.is_some() { return (garbage(lite_inner.span()), err); } @@ -816,7 +820,7 @@ fn parse_table( if error.is_none() { error = err; } - let (tokens, err) = lex(&string, arg.span.start() + 1); + let (tokens, err) = lex(&string, arg.span.start() + 1, NewlineMode::Whitespace); if err.is_some() { return (garbage(arg.span), err); } @@ -1005,7 +1009,8 @@ fn parse_arg( let string: String = chars.collect(); // We haven't done much with the inner string, so let's go ahead and work with it - let (tokens, err) = lex(&string, lite_arg.span.start() + 1); + let (tokens, err) = + lex(&string, lite_arg.span.start() + 1, NewlineMode::Whitespace); if err.is_some() { return (garbage(lite_arg.span), err); } @@ -1071,7 +1076,8 @@ fn parse_arg( let string: String = chars.into_iter().collect(); // We haven't done much with the inner string, so let's go ahead and work with it - let (mut tokens, err) = lex(&string, lite_arg.span.start() + 1); + let (mut tokens, err) = + lex(&string, lite_arg.span.start() + 1, NewlineMode::Normal); if error.is_none() { error = err; } @@ -2172,7 +2178,7 @@ pub fn parse( scope: &dyn ParserScope, ) -> (Arc, Option) { let mut error = None; - let (output, err) = lex(input, span_offset); + let (output, err) = lex(input, span_offset, NewlineMode::Normal); if error.is_none() { error = err; } diff --git a/crates/nu-parser/src/parse/def.rs b/crates/nu-parser/src/parse/def.rs index bf8d134949..b868e0c4fd 100644 --- a/crates/nu-parser/src/parse/def.rs +++ b/crates/nu-parser/src/parse/def.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use crate::{ - lex::tokens::LiteCommand, + lex::{lexer::NewlineMode, tokens::LiteCommand}, parse::{classify_block, util::trim_quotes}, }; @@ -52,7 +52,8 @@ pub(crate) fn parse_definition(call: &LiteCommand, scope: &dyn ParserScope) -> O scope.enter_scope(); - let (tokens, err) = lex(&string, call.parts[3].span.start() + 1); + let (tokens, err) = + lex(&string, call.parts[3].span.start() + 1, NewlineMode::Normal); if err.is_some() { return err; }; diff --git a/crates/nu-parser/src/parse/def/signature.rs b/crates/nu-parser/src/parse/def/signature.rs index e10f1b001a..e806013ff9 100644 --- a/crates/nu-parser/src/parse/def/signature.rs +++ b/crates/nu-parser/src/parse/def/signature.rs @@ -18,7 +18,7 @@ use nu_protocol::{NamedType, PositionalType, Signature, SyntaxShape}; use nu_source::{Span, Spanned}; use crate::lex::{ - lexer::{lex, Token}, + lexer::{lex, NewlineMode, Token}, tokens::TokenContents, }; @@ -58,7 +58,11 @@ pub fn parse_signature( "signature vec span start: {}", signature_vec.span.start() + 1 ); - let (tokens, error) = lex(&string, signature_vec.span.start() + 1); + let (tokens, error) = lex( + &string, + signature_vec.span.start() + 1, + NewlineMode::Whitespace, + ); err = err.or(error); //After normal lexing, tokens also need to be split on ',' and ':' diff --git a/tests/shell/pipeline/commands/internal.rs b/tests/shell/pipeline/commands/internal.rs index 22e1592aff..8b45c23d83 100644 --- a/tests/shell/pipeline/commands/internal.rs +++ b/tests/shell/pipeline/commands/internal.rs @@ -1028,6 +1028,37 @@ fn pipeline_params_inner() { assert_eq!(actual.out, "126"); } +#[test] +fn better_table_lex() { + let actual = nu!( + cwd: ".", pipeline( + r#" + let table = [ + [name, size]; + [small, 7] + [medium, 10] + [large, 12] + ]; + $table.1.size + "#) + ); + + assert_eq!(actual.out, "10"); +} + +#[test] +fn better_subexpr_lex() { + let actual = nu!( + cwd: ".", pipeline( + r#" + (echo boo + sam | str length | math sum) + "#) + ); + + assert_eq!(actual.out, "6"); +} + mod parse { use nu_test_support::nu;