From 3b3beaab31de7b5f2a7e9c17d39e4fd2ee7a56ac Mon Sep 17 00:00:00 2001 From: Chayim Refael Friedman Date: Sun, 27 Oct 2024 00:47:13 +0300 Subject: [PATCH] Correctly handle `#""` in edition <2024 --- crates/parser/src/lexed_str.rs | 13 ++++++++++--- crates/parser/src/tests.rs | 17 +++++++++++++---- .../ok/guarded_str_prefix_edition_2021.rast | 4 ++++ .../lexer/ok/guarded_str_prefix_edition_2021.rs | 3 +++ 4 files changed, 30 insertions(+), 7 deletions(-) create mode 100644 crates/parser/test_data/lexer/ok/guarded_str_prefix_edition_2021.rast create mode 100644 crates/parser/test_data/lexer/ok/guarded_str_prefix_edition_2021.rs diff --git a/crates/parser/src/lexed_str.rs b/crates/parser/src/lexed_str.rs index 5322463a71..3c0eb1b42a 100644 --- a/crates/parser/src/lexed_str.rs +++ b/crates/parser/src/lexed_str.rs @@ -39,7 +39,9 @@ impl<'a> LexedStr<'a> { conv.offset = shebang_len; }; - for token in rustc_lexer::tokenize(&text[conv.offset..]) { + // Re-create the tokenizer from scratch every token because `GuardedStrPrefix` is one token in the lexer + // but we want to split it to two in edition <2024. + while let Some(token) = rustc_lexer::tokenize(&text[conv.offset..]).next() { let token_text = &text[conv.offset..][..token.len as usize]; conv.extend_token(&token.kind, token_text); @@ -158,7 +160,7 @@ impl<'a> Converter<'a> { } } - fn extend_token(&mut self, kind: &rustc_lexer::TokenKind, token_text: &str) { + fn extend_token(&mut self, kind: &rustc_lexer::TokenKind, mut token_text: &str) { // A note on an intended tradeoff: // We drop some useful information here (see patterns with double dots `..`) // Storing that info in `SyntaxKind` is not possible due to its layout requirements of @@ -189,10 +191,15 @@ impl<'a> Converter<'a> { rustc_lexer::TokenKind::RawIdent => IDENT, rustc_lexer::TokenKind::GuardedStrPrefix if self.edition.at_least_2024() => { + // FIXME: rustc does something better for recovery. err = "Invalid string literal (reserved syntax)"; ERROR } - rustc_lexer::TokenKind::GuardedStrPrefix => POUND, + rustc_lexer::TokenKind::GuardedStrPrefix => { + // The token is `#"` or `##`, split it into two. + token_text = &token_text[1..]; + POUND + } rustc_lexer::TokenKind::Literal { kind, .. } => { self.extend_literal(token_text.len(), kind); diff --git a/crates/parser/src/tests.rs b/crates/parser/src/tests.rs index e7bccb6685..4b19ddc752 100644 --- a/crates/parser/src/tests.rs +++ b/crates/parser/src/tests.rs @@ -15,11 +15,20 @@ use crate::{Edition, LexedStr, TopEntryPoint}; #[path = "../test_data/generated/runner.rs"] mod runner; +fn infer_edition(file_path: &Path) -> Edition { + let file_content = std::fs::read_to_string(file_path).unwrap(); + if let Some(edition) = file_content.strip_prefix("//@ edition: ") { + edition[..4].parse().expect("invalid edition directive") + } else { + Edition::CURRENT + } +} + #[test] fn lex_ok() { for case in TestCase::list("lexer/ok") { let _guard = stdx::panic_context::enter(format!("{:?}", case.rs)); - let actual = lex(&case.text); + let actual = lex(&case.text, infer_edition(&case.rs)); expect_file![case.rast].assert_eq(&actual) } } @@ -28,13 +37,13 @@ fn lex_ok() { fn lex_err() { for case in TestCase::list("lexer/err") { let _guard = stdx::panic_context::enter(format!("{:?}", case.rs)); - let actual = lex(&case.text); + let actual = lex(&case.text, infer_edition(&case.rs)); expect_file![case.rast].assert_eq(&actual) } } -fn lex(text: &str) -> String { - let lexed = LexedStr::new(Edition::CURRENT, text); +fn lex(text: &str, edition: Edition) -> String { + let lexed = LexedStr::new(edition, text); let mut res = String::new(); for i in 0..lexed.len() { diff --git a/crates/parser/test_data/lexer/ok/guarded_str_prefix_edition_2021.rast b/crates/parser/test_data/lexer/ok/guarded_str_prefix_edition_2021.rast new file mode 100644 index 0000000000..1bdd672044 --- /dev/null +++ b/crates/parser/test_data/lexer/ok/guarded_str_prefix_edition_2021.rast @@ -0,0 +1,4 @@ +COMMENT "//@ edition: 2021" +WHITESPACE "\n\n" +POUND "#" +STRING "\"foo\"" diff --git a/crates/parser/test_data/lexer/ok/guarded_str_prefix_edition_2021.rs b/crates/parser/test_data/lexer/ok/guarded_str_prefix_edition_2021.rs new file mode 100644 index 0000000000..f00f949f0d --- /dev/null +++ b/crates/parser/test_data/lexer/ok/guarded_str_prefix_edition_2021.rs @@ -0,0 +1,3 @@ +//@ edition: 2021 + +#"foo" \ No newline at end of file