Add better comment skipping (#359)

2024-12-27 05:23:11 +00:00 · 2021-11-22 07:13:09 +13:00 · 2021-11-22 07:13:09 +13:00 · 143855b662
commit 143855b662
parent d30dfc63c4
6 changed files with 58 additions and 32 deletions
--- a/crates/nu-parser/src/lex.rs
+++ b/crates/nu-parser/src/lex.rs
@ -207,6 +207,7 @@ pub fn lex(
    span_offset: usize,
    additional_whitespace: &[u8],
    special_tokens: &[u8],
+    skip_comment: bool,
 ) -> (Vec<Token>, Option<ParseError>) {
    let mut error = None;

@ -277,24 +278,26 @@ pub fn lex(
            while let Some(input) = input.get(curr_offset) {
                curr_offset += 1;
                if *input == b'\n' || *input == b'\r' {
-                    output.push(Token::new(
-                        TokenContents::Comment,
-                        Span::new(start, curr_offset - 1),
-                    ));
+                    if !skip_comment {
+                        output.push(Token::new(
+                            TokenContents::Comment,
+                            Span::new(start, curr_offset - 1),
+                        ));

-                    // Adding an end of line token after a comment
-                    // This helps during lite_parser to avoid losing a command
-                    // in a statement
-                    output.push(Token::new(
-                        TokenContents::Eol,
-                        Span::new(curr_offset - 1, curr_offset),
-                    ));
+                        // Adding an end of line token after a comment
+                        // This helps during lite_parser to avoid losing a command
+                        // in a statement
+                        output.push(Token::new(
+                            TokenContents::Eol,
+                            Span::new(curr_offset - 1, curr_offset),
+                        ));
+                    }
                    start = curr_offset;

                    break;
                }
            }
-            if start != curr_offset {
+            if start != curr_offset && !skip_comment {
                output.push(Token::new(
                    TokenContents::Comment,
                    Span::new(span_offset + start, span_offset + curr_offset),
--- a/crates/nu-parser/src/parse_keywords.rs
+++ b/crates/nu-parser/src/parse_keywords.rs
@ -488,7 +488,7 @@ pub fn parse_module_block(

    let source = working_set.get_span_contents(span);

-    let (output, err) = lex(source, span.start, &[], &[]);
+    let (output, err) = lex(source, span.start, &[], &[], true);
    error = error.or(err);

    let (output, err) = lite_parse(&output);
--- a/crates/nu-parser/src/parser.rs
+++ b/crates/nu-parser/src/parser.rs
@ -1311,7 +1311,7 @@ pub fn parse_full_cell_path(
    let source = working_set.get_span_contents(span);
    let mut error = None;

-    let (tokens, err) = lex(source, span.start, &[b'\n', b'\r'], &[b'.']);
+    let (tokens, err) = lex(source, span.start, &[b'\n', b'\r'], &[b'.'], true);
    error = error.or(err);

    let mut tokens = tokens.into_iter().peekable();
@ -1336,7 +1336,7 @@ pub fn parse_full_cell_path(

            let source = working_set.get_span_contents(span);

-            let (output, err) = lex(source, span.start, &[b'\n', b'\r'], &[]);
+            let (output, err) = lex(source, span.start, &[b'\n', b'\r'], &[], true);
            error = error.or(err);

            let (output, err) = lite_parse(&output);
@ -2062,7 +2062,7 @@ pub fn parse_signature_helper(
    let mut error = None;
    let source = working_set.get_span_contents(span);

-    let (output, err) = lex(source, span.start, &[b'\n', b'\r', b','], &[b':']);
+    let (output, err) = lex(source, span.start, &[b'\n', b'\r', b','], &[b':'], false);
    error = error.or(err);

    let mut args: Vec<Arg> = vec![];
@ -2391,7 +2391,7 @@ pub fn parse_list_expression(
    let span = Span { start, end };
    let source = working_set.get_span_contents(span);

-    let (output, err) = lex(source, span.start, &[b'\n', b'\r', b','], &[]);
+    let (output, err) = lex(source, span.start, &[b'\n', b'\r', b','], &[], true);
    error = error.or(err);

    let (output, err) = lite_parse(&output);
@ -2463,7 +2463,7 @@ pub fn parse_table_expression(

    let source = working_set.get_span_contents(span);

-    let (output, err) = lex(source, start, &[b'\n', b'\r', b','], &[]);
+    let (output, err) = lex(source, start, &[b'\n', b'\r', b','], &[], true);
    error = error.or(err);

    let (output, err) = lite_parse(&output);
@ -2578,7 +2578,7 @@ pub fn parse_block_expression(

    let source = working_set.get_span_contents(span);

-    let (output, err) = lex(source, start, &[], &[]);
+    let (output, err) = lex(source, start, &[], &[], true);
    error = error.or(err);

    working_set.enter_scope();
@ -2797,7 +2797,7 @@ pub fn parse_value(
            let source = working_set.get_span_contents(span);
            let mut error = None;

-            let (tokens, err) = lex(source, span.start, &[b'\n', b'\r'], &[b'.']);
+            let (tokens, err) = lex(source, span.start, &[b'\n', b'\r'], &[b'.'], true);
            error = error.or(err);

            let tokens = tokens.into_iter().peekable();
@ -3232,7 +3232,7 @@ pub fn parse_record(
    let span = Span { start, end };
    let source = working_set.get_span_contents(span);

-    let (tokens, err) = lex(source, start, &[b'\n', b'\r', b','], &[b':']);
+    let (tokens, err) = lex(source, start, &[b'\n', b'\r', b','], &[b':'], true);
    error = error.or(err);

    let mut output = vec![];
@ -3598,7 +3598,7 @@ pub fn parse(

    working_set.add_file(name, contents);

-    let (output, err) = lex(contents, span_offset, &[], &[]);
+    let (output, err) = lex(contents, span_offset, &[], &[], true);
    error = error.or(err);

    let (output, err) = lite_parse(&output);
--- a/crates/nu-parser/tests/test_lex.rs
+++ b/crates/nu-parser/tests/test_lex.rs
@ -5,7 +5,7 @@ use nu_protocol::Span;
 fn lex_basic() {
    let file = b"let x = 4";

-    let output = lex(file, 0, &[], &[]);
+    let output = lex(file, 0, &[], &[], true);

    assert!(output.1.is_none());
 }
@ -14,7 +14,7 @@ fn lex_basic() {
 fn lex_newline() {
    let file = b"let x = 300\nlet y = 500;";

-    let output = lex(file, 0, &[], &[]);
+    let output = lex(file, 0, &[], &[], true);

    assert!(output.0.contains(&Token {
        contents: TokenContents::Eol,
@ -26,7 +26,7 @@ fn lex_newline() {
 fn lex_empty() {
    let file = b"";

-    let output = lex(file, 0, &[], &[]);
+    let output = lex(file, 0, &[], &[], true);

    assert!(output.0.is_empty());
    assert!(output.1.is_none());
@ -37,7 +37,7 @@ fn lex_parenthesis() {
    // The whole parenthesis is an item for the lexer
    let file = b"let x = (300 + (322 * 444));";

-    let output = lex(file, 0, &[], &[]);
+    let output = lex(file, 0, &[], &[], true);

    assert_eq!(
        output.0.get(3).unwrap(),
@ -52,7 +52,7 @@ fn lex_parenthesis() {
 fn lex_comment() {
    let file = b"let x = 300 # a comment \n $x + 444";

-    let output = lex(file, 0, &[], &[]);
+    let output = lex(file, 0, &[], &[], false);

    assert_eq!(
        output.0.get(4).unwrap(),
@ -67,7 +67,7 @@ fn lex_comment() {
 fn lex_is_incomplete() {
    let file = b"let x = 300 | ;";

-    let output = lex(file, 0, &[], &[]);
+    let output = lex(file, 0, &[], &[], true);

    let err = output.1.unwrap();
    assert!(matches!(err, ParseError::ExtraTokens(_)));
@ -77,7 +77,7 @@ fn lex_is_incomplete() {
 fn lex_incomplete_paren() {
    let file = b"let x = (300 + ( 4 + 1)";

-    let output = lex(file, 0, &[], &[]);
+    let output = lex(file, 0, &[], &[], true);

    let err = output.1.unwrap();
    assert!(matches!(err, ParseError::UnexpectedEof(v, _) if v == ")"));
@ -87,7 +87,7 @@ fn lex_incomplete_paren() {
 fn lex_incomplete_quote() {
    let file = b"let x = '300 + 4 + 1";

-    let output = lex(file, 0, &[], &[]);
+    let output = lex(file, 0, &[], &[], true);

    let err = output.1.unwrap();
    assert!(matches!(err, ParseError::UnexpectedEof(v, _) if v == "'"));
@ -102,7 +102,7 @@ fn lex_comments() {
    // let y = 1 # comment
    let file = b"let z = 4 #comment \n let x = 4 # comment\n let y = 1 # comment";

-    let output = lex(file, 0, &[], &[]);
+    let output = lex(file, 0, &[], &[], false);

    assert_eq!(
        output.0.get(4).unwrap(),
--- a/crates/nu-parser/tests/test_lite_parser.rs
+++ b/crates/nu-parser/tests/test_lite_parser.rs
@ -2,7 +2,7 @@ use nu_parser::{lex, lite_parse, LiteBlock, ParseError};
 use nu_protocol::Span;

 fn lite_parse_helper(input: &[u8]) -> Result<LiteBlock, ParseError> {
-    let (output, err) = lex(input, 0, &[], &[]);
+    let (output, err) = lex(input, 0, &[], &[], false);
    if let Some(err) = err {
        return Err(err);
    }
--- a/src/tests.rs
+++ b/src/tests.rs
@ -1120,3 +1120,26 @@ fn config_var_2() -> TestResult {
        "40.0 KB",
    )
 }
+
+#[test]
+fn comment_skipping_1() -> TestResult {
+    run_test(
+        r#"let x = {
+        y: 20
+        # foo
+    }; $x.y"#,
+        "20",
+    )
+}
+
+#[test]
+fn comment_skipping_2() -> TestResult {
+    run_test(
+        r#"let x = {
+        y: 20
+        # foo
+        z: 40
+    }; $x.z"#,
+        "40",
+    )
+}