From 4604e12eb0e914eb6b660ec2739b4c9e00641f47 Mon Sep 17 00:00:00 2001 From: RobbingDaHood <16130319+RobbingDaHood@users.noreply.github.com> Date: Wed, 18 Dec 2024 08:53:02 +0100 Subject: [PATCH] =?UTF-8?q?Reapply=20"For=20`#`=20to=20start=20a=20comment?= =?UTF-8?q?,=20then=20it=20either=20need=20to=20be=20the=20first=20chara?= =?UTF-8?q?=E2=80=A6"=20(#14606)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit d94b344342d055434868dca8860fc0a9d1bbdbfb. --- crates/nu-parser/src/lex.rs | 12 ++++----- crates/nu-parser/tests/test_lex.rs | 23 +++++++++++++++++ tests/repl/test_parser.rs | 40 ++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 6 deletions(-) diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs index f0802fcd7a..6d1adf28ef 100644 --- a/crates/nu-parser/src/lex.rs +++ b/crates/nu-parser/src/lex.rs @@ -51,7 +51,7 @@ impl BlockKind { } // A baseline token is terminated if it's not nested inside of a paired -// delimiter and the next character is one of: `|`, `;`, `#` or any +// delimiter and the next character is one of: `|`, `;` or any // whitespace. fn is_item_terminator( block_level: &[BlockKind], @@ -115,6 +115,7 @@ pub fn lex_item( // character (whitespace, `|`, `;` or `#`) is encountered, the baseline // token is done. // - Otherwise, accumulate the character into the current baseline token. + let mut previous_char = None; while let Some(c) = input.get(*curr_offset) { let c = *c; @@ -147,11 +148,9 @@ pub fn lex_item( // Also need to check to make sure we aren't escaped quote_start = None; } - } else if c == b'#' { - if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) { - break; - } - in_comment = true; + } else if c == b'#' && !in_comment { + // To start a comment, It either need to be the first character of the token or prefixed with space. + in_comment = previous_char.map(|pc| pc == b' ').unwrap_or(true); } else if c == b'\n' || c == b'\r' { in_comment = false; if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) { @@ -254,6 +253,7 @@ pub fn lex_item( } *curr_offset += 1; + previous_char = Some(c); } let span = Span::new(span_offset + token_start, span_offset + *curr_offset); diff --git a/crates/nu-parser/tests/test_lex.rs b/crates/nu-parser/tests/test_lex.rs index a14843f3f0..54ff674bb9 100644 --- a/crates/nu-parser/tests/test_lex.rs +++ b/crates/nu-parser/tests/test_lex.rs @@ -159,6 +159,29 @@ fn lex_comment() { ); } +#[test] +fn lex_not_comment_needs_space_in_front_of_hashtag() { + let file = b"1..10 | each {echo test#testing }"; + + let output = lex(file, 0, &[], &[], false); + + assert!(output.1.is_none()); +} + +#[test] +fn lex_comment_with_space_in_front_of_hashtag() { + let file = b"1..10 | each {echo test #testing }"; + + let output = lex(file, 0, &[], &[], false); + + assert!(output.1.is_some()); + assert!(matches!( + output.1.unwrap(), + ParseError::UnexpectedEof(missing_token, span) if missing_token == "}" + && span == Span::new(33, 34) + )); +} + #[test] fn lex_is_incomplete() { let file = b"let x = 300 | ;"; diff --git a/tests/repl/test_parser.rs b/tests/repl/test_parser.rs index f77d431110..ddc790324a 100644 --- a/tests/repl/test_parser.rs +++ b/tests/repl/test_parser.rs @@ -169,6 +169,41 @@ fn comment_skipping_in_pipeline_3() -> TestResult { ) } +#[test] +fn still_string_if_hashtag_is_middle_of_string() -> TestResult { + run_test(r#"echo test#testing"#, "test#testing") +} + +#[test] +fn non_comment_hashtag_in_comment_does_not_stop_comment() -> TestResult { + run_test(r#"# command_bar_text: { fg: '#C4C9C6' },"#, "") +} + +#[test] +fn non_comment_hashtag_in_comment_does_not_stop_comment_in_block() -> TestResult { + run_test( + r#"{ + explore: { + # command_bar_text: { fg: '#C4C9C6' }, + } + } | get explore | is-empty"#, + "true", + ) +} + +#[test] +fn still_string_if_hashtag_is_middle_of_string_inside_each() -> TestResult { + run_test( + r#"1..1 | each {echo test#testing } | get 0"#, + "test#testing", + ) +} + +#[test] +fn still_string_if_hashtag_is_middle_of_string_inside_each_also_with_dot() -> TestResult { + run_test(r#"1..1 | each {echo '.#testing' } | get 0"#, ".#testing") +} + #[test] fn bad_var_name() -> TestResult { fail_test(r#"let $"foo bar" = 4"#, "can't contain") @@ -282,6 +317,11 @@ fn raw_string_with_equals() -> TestResult { ) } +#[test] +fn raw_string_with_hashtag() -> TestResult { + run_test(r#"r##' one # two '##"#, "one # two") +} + #[test] fn list_quotes_with_equals() -> TestResult { run_test(