14523 all comments should be prefixed with space tab or be beginning of token (#14616)

This PR should close
1. #10327 
1. #13667 
1. #13810 
1. #14129 

# Description
This got reverted https://github.com/nushell/nushell/pull/14606 because
the previous changes only considered space a whitespace and forgot about
tabs. I now added a check for any whitespace, even if it is only those
two that would be relevant.

The added test failed before the changes. 

For `#` to start a comment, then it either need to be the first
character of the token or prefixed with ` ` (space).

So now you can do this:
``` 
~/Projects/nushell> 1..10 | each {echo test#testing }                                                                                                                     12/05/2024 05:37:19 PM
╭───┬──────────────╮
│ 0 │ test#testing │
│ 1 │ test#testing │
│ 2 │ test#testing │
│ 3 │ test#testing │
│ 4 │ test#testing │
│ 5 │ test#testing │
│ 6 │ test#testing │
│ 7 │ test#testing │
│ 8 │ test#testing │
│ 9 │ test#testing │
╰───┴──────────────╯
```  

# User-Facing Changes
It is a breaking change if anyone expected comments to start in the
middle of a string without any prefixing ` ` (space).

# Tests + Formatting
Did all: 
- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the
tests for the standard library

# After Submitting
I cant see that I need to update anything in [the
documentation](https://github.com/nushell/nushell.github.io) but please
point me in the direction if there is anything.
This commit is contained in:
RobbingDaHood 2024-12-25 14:31:51 +01:00 committed by GitHub
parent 469e23cae4
commit b1da50774a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 86 additions and 6 deletions

View file

@ -51,7 +51,7 @@ impl BlockKind {
}
// A baseline token is terminated if it's not nested inside of a paired
// delimiter and the next character is one of: `|`, `;`, `#` or any
// delimiter and the next character is one of: `|`, `;` or any
// whitespace.
fn is_item_terminator(
block_level: &[BlockKind],
@ -115,6 +115,7 @@ pub fn lex_item(
// character (whitespace, `|`, `;` or `#`) is encountered, the baseline
// token is done.
// - Otherwise, accumulate the character into the current baseline token.
let mut previous_char = None;
while let Some(c) = input.get(*curr_offset) {
let c = *c;
@ -147,11 +148,12 @@ pub fn lex_item(
// Also need to check to make sure we aren't escaped
quote_start = None;
}
} else if c == b'#' {
if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
break;
}
in_comment = true;
} else if c == b'#' && !in_comment {
// To start a comment, It either need to be the first character of the token or prefixed with whitespace.
in_comment = previous_char
.map(char::from)
.map(char::is_whitespace)
.unwrap_or(true);
} else if c == b'\n' || c == b'\r' {
in_comment = false;
if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
@ -254,6 +256,7 @@ pub fn lex_item(
}
*curr_offset += 1;
previous_char = Some(c);
}
let span = Span::new(span_offset + token_start, span_offset + *curr_offset);

View file

@ -159,6 +159,43 @@ fn lex_comment() {
);
}
#[test]
fn lex_not_comment_needs_space_in_front_of_hashtag() {
let file = b"1..10 | each {echo test#testing }";
let output = lex(file, 0, &[], &[], false);
assert!(output.1.is_none());
}
#[test]
fn lex_comment_with_space_in_front_of_hashtag() {
let file = b"1..10 | each {echo test #testing }";
let output = lex(file, 0, &[], &[], false);
assert!(output.1.is_some());
assert!(matches!(
output.1.unwrap(),
ParseError::UnexpectedEof(missing_token, span) if missing_token == "}"
&& span == Span::new(33, 34)
));
}
#[test]
fn lex_comment_with_tab_in_front_of_hashtag() {
let file = b"1..10 | each {echo test\t#testing }";
let output = lex(file, 0, &[], &[], false);
assert!(output.1.is_some());
assert!(matches!(
output.1.unwrap(),
ParseError::UnexpectedEof(missing_token, span) if missing_token == "}"
&& span == Span::new(33, 34)
));
}
#[test]
fn lex_is_incomplete() {
let file = b"let x = 300 | ;";

View file

@ -169,6 +169,41 @@ fn comment_skipping_in_pipeline_3() -> TestResult {
)
}
#[test]
fn still_string_if_hashtag_is_middle_of_string() -> TestResult {
run_test(r#"echo test#testing"#, "test#testing")
}
#[test]
fn non_comment_hashtag_in_comment_does_not_stop_comment() -> TestResult {
run_test(r#"# command_bar_text: { fg: '#C4C9C6' },"#, "")
}
#[test]
fn non_comment_hashtag_in_comment_does_not_stop_comment_in_block() -> TestResult {
run_test(
r#"{
explore: {
# command_bar_text: { fg: '#C4C9C6' },
}
} | get explore | is-empty"#,
"true",
)
}
#[test]
fn still_string_if_hashtag_is_middle_of_string_inside_each() -> TestResult {
run_test(
r#"1..1 | each {echo test#testing } | get 0"#,
"test#testing",
)
}
#[test]
fn still_string_if_hashtag_is_middle_of_string_inside_each_also_with_dot() -> TestResult {
run_test(r#"1..1 | each {echo '.#testing' } | get 0"#, ".#testing")
}
#[test]
fn bad_var_name() -> TestResult {
fail_test(r#"let $"foo bar" = 4"#, "can't contain")
@ -282,6 +317,11 @@ fn raw_string_with_equals() -> TestResult {
)
}
#[test]
fn raw_string_with_hashtag() -> TestResult {
run_test(r#"r##' one # two '##"#, "one # two")
}
#[test]
fn list_quotes_with_equals() -> TestResult {
run_test(