From 90863439d136022dab8cfd0416c80294bb71c7b6 Mon Sep 17 00:00:00 2001
From: WindSoilder <WindSoilder@outlook.com>
Date: Thu, 15 Jun 2023 19:11:42 +0800
Subject: [PATCH] allow comment in multiple line pipeline (#9436)

<!--
if this PR closes one or more issues, you can automatically link the PR
with
them by using one of the [*linking
keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword),
e.g.
- this PR should close #xxxx
- fixes #xxxx

you can also mention related issues, PRs or discussions!
-->

# Description
- fixes: #5517
- fixes: #9250

For the following commands:
```
ls
# | le
| length
```

I found that it generates a bad lite parsing result:
```
LiteBlock {
    block: [
        LitePipeline {
            commands: [
                Command(None, LiteCommand { comments: [], parts: [Span { start: 138600, end: 138602 }] })
            ]
        },
        LitePipeline {
            commands: [
                Command(Some(Span { start: 138610, end: 138611 }),
                LiteCommand { comments: [Span { start: 138603, end: 138609 }], parts: [Span { start: 138612, end: 138618 }] })
            ]
        }
    ]
}
```

Which should contains only one `LitePipeline`, and the second
`LitePipeline` is generated because of `Eol` lex token:
```
[
    Token { contents: Item, span: Span { start: 138600, end: 138602 } },
    Token { contents: Eol, span: Span { start: 138602, end: 138603 } },    // it generates the second LitePipeline
    Token { contents: Comment, span: Span { start: 138603, end: 138609 } },
    Token { contents: Pipe, span: Span { start: 138610, end: 138611 } },
    Token { contents: Item, span: Span { start: 138612, end: 138618 } }
]
```

To fix the issue, I remove the `Eol` token when we meet `Comment` right
after `Eol`, then it will generate a good LiteBlock, and everything will
work fine.

### After the fix:
Token:
```
[
  Token { contents: Item, span: Span { start: 138618, end: 138620 } },
  Token { contents: Comment, span: Span { start: 138622, end: 138628 } },
  Token { contents: Pipe, span: Span { start: 138629, end: 138630 } },
  Token { contents: Item, span: Span { start: 138631, end: 138637 } }
]
```

LiteBlock:
```
LiteBlock {
  block: [
    LitePipeline {
      commands: [
        Command(
            None,
            LiteCommand {
                comments: [Span { start: 138622, end: 138628 }],
                parts: [Span { start: 138618, end: 138620 }]
            }
        ),
        Command(
            Some(Span { start: 138629, end: 138630 }),
            LiteCommand { comments: [], parts: [Span { start: 138631, end: 138637 }] })] }] }
```
<!--
Thank you for improving Nushell. Please, check our [contributing
guide](../CONTRIBUTING.md) and talk to the core team before making major
changes.

Description of your pull request goes here. **Provide examples and/or
screenshots** if your changes affect the user experience.
-->

# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A
clippy::needless_collect -A clippy::result_large_err` to check that
you're using the standard code style
- `cargo test --workspace` to check that all tests pass
- `cargo run -- crates/nu-std/tests/run.nu` to run the tests for the
standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
---
 crates/nu-parser/src/lex.rs | 19 ++++++++++++++++++-
 src/tests/test_parser.rs    | 10 ++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs
index 40f2baaac4..34701efdf4 100644
--- a/crates/nu-parser/src/lex.rs
+++ b/crates/nu-parser/src/lex.rs
@@ -388,7 +388,24 @@ fn lex_internal(
                         *prev = Token::new(
                             TokenContents::Pipe,
                             Span::new(span_offset + idx, span_offset + idx + 1),
-                        )
+                        );
+                        // And this is a continuation of the previous line if previous line is a
+                        // comment line (combined with EOL + Comment)
+                        //
+                        // Initially, the last one token is TokenContents::Pipe, we don't need to
+                        // check it, so the beginning offset is 2.
+                        let mut offset = 2;
+                        while output.len() > offset {
+                            let index = output.len() - offset;
+                            if output[index].contents == TokenContents::Comment
+                                && output[index - 1].contents == TokenContents::Eol
+                            {
+                                output.remove(index - 1);
+                                offset += 1;
+                            } else {
+                                break;
+                            }
+                        }
                     }
                     _ => {
                         output.push(Token::new(
diff --git a/src/tests/test_parser.rs b/src/tests/test_parser.rs
index 510c4220f0..3382aa9c7a 100644
--- a/src/tests/test_parser.rs
+++ b/src/tests/test_parser.rs
@@ -442,6 +442,16 @@ fn unary_not_6() -> TestResult {
     )
 }
 
+#[test]
+fn comment_in_multiple_pipelines() -> TestResult {
+    run_test(
+        r#"[[name, present]; [abc, true], [def, false]]
+        # | where not present
+        | get name.0"#,
+        "abc",
+    )
+}
+
 #[test]
 fn date_literal() -> TestResult {
     run_test(r#"2022-09-10 | date to-record | get day"#, "10")