From 90863439d136022dab8cfd0416c80294bb71c7b6 Mon Sep 17 00:00:00 2001 From: WindSoilder Date: Thu, 15 Jun 2023 19:11:42 +0800 Subject: [PATCH] allow comment in multiple line pipeline (#9436) # Description - fixes: #5517 - fixes: #9250 For the following commands: ``` ls # | le | length ``` I found that it generates a bad lite parsing result: ``` LiteBlock { block: [ LitePipeline { commands: [ Command(None, LiteCommand { comments: [], parts: [Span { start: 138600, end: 138602 }] }) ] }, LitePipeline { commands: [ Command(Some(Span { start: 138610, end: 138611 }), LiteCommand { comments: [Span { start: 138603, end: 138609 }], parts: [Span { start: 138612, end: 138618 }] }) ] } ] } ``` Which should contains only one `LitePipeline`, and the second `LitePipeline` is generated because of `Eol` lex token: ``` [ Token { contents: Item, span: Span { start: 138600, end: 138602 } }, Token { contents: Eol, span: Span { start: 138602, end: 138603 } }, // it generates the second LitePipeline Token { contents: Comment, span: Span { start: 138603, end: 138609 } }, Token { contents: Pipe, span: Span { start: 138610, end: 138611 } }, Token { contents: Item, span: Span { start: 138612, end: 138618 } } ] ``` To fix the issue, I remove the `Eol` token when we meet `Comment` right after `Eol`, then it will generate a good LiteBlock, and everything will work fine. ### After the fix: Token: ``` [ Token { contents: Item, span: Span { start: 138618, end: 138620 } }, Token { contents: Comment, span: Span { start: 138622, end: 138628 } }, Token { contents: Pipe, span: Span { start: 138629, end: 138630 } }, Token { contents: Item, span: Span { start: 138631, end: 138637 } } ] ``` LiteBlock: ``` LiteBlock { block: [ LitePipeline { commands: [ Command( None, LiteCommand { comments: [Span { start: 138622, end: 138628 }], parts: [Span { start: 138618, end: 138620 }] } ), Command( Some(Span { start: 138629, end: 138630 }), LiteCommand { comments: [], parts: [Span { start: 138631, end: 138637 }] })] }] } ``` # User-Facing Changes # Tests + Formatting # After Submitting --- crates/nu-parser/src/lex.rs | 19 ++++++++++++++++++- src/tests/test_parser.rs | 10 ++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs index 40f2baaac4..34701efdf4 100644 --- a/crates/nu-parser/src/lex.rs +++ b/crates/nu-parser/src/lex.rs @@ -388,7 +388,24 @@ fn lex_internal( *prev = Token::new( TokenContents::Pipe, Span::new(span_offset + idx, span_offset + idx + 1), - ) + ); + // And this is a continuation of the previous line if previous line is a + // comment line (combined with EOL + Comment) + // + // Initially, the last one token is TokenContents::Pipe, we don't need to + // check it, so the beginning offset is 2. + let mut offset = 2; + while output.len() > offset { + let index = output.len() - offset; + if output[index].contents == TokenContents::Comment + && output[index - 1].contents == TokenContents::Eol + { + output.remove(index - 1); + offset += 1; + } else { + break; + } + } } _ => { output.push(Token::new( diff --git a/src/tests/test_parser.rs b/src/tests/test_parser.rs index 510c4220f0..3382aa9c7a 100644 --- a/src/tests/test_parser.rs +++ b/src/tests/test_parser.rs @@ -442,6 +442,16 @@ fn unary_not_6() -> TestResult { ) } +#[test] +fn comment_in_multiple_pipelines() -> TestResult { + run_test( + r#"[[name, present]; [abc, true], [def, false]] + # | where not present + | get name.0"#, + "abc", + ) +} + #[test] fn date_literal() -> TestResult { run_test(r#"2022-09-10 | date to-record | get day"#, "10")