From 6fa6efe90fc8a79395cacb5c71315f0e2b32e623 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Fri, 3 Feb 2023 17:18:48 +0100 Subject: [PATCH] fix: Fix parsing of nested tuple field accesses in a cursed way --- crates/parser/src/event.rs | 10 +- crates/parser/src/grammar/expressions.rs | 106 ++++++++++++------ crates/parser/src/lib.rs | 2 +- crates/parser/src/output.rs | 12 ++ crates/parser/src/parser.rs | 32 ++++++ crates/parser/src/shortcuts.rs | 54 ++++++++- crates/parser/src/tests/prefix_entries.rs | 4 + .../parser/inline/ok/0011_field_expr.rast | 33 ++++++ .../parser/inline/ok/0011_field_expr.rs | 2 + .../inline/ok/0107_method_call_expr.rast | 43 +++++++ .../parser/inline/ok/0107_method_call_expr.rs | 2 + .../parser/inline/ok/0137_await_expr.rast | 35 ++++++ .../parser/inline/ok/0137_await_expr.rs | 2 + 13 files changed, 298 insertions(+), 39 deletions(-) diff --git a/crates/parser/src/event.rs b/crates/parser/src/event.rs index b0e70e7943..fb2616cf01 100644 --- a/crates/parser/src/event.rs +++ b/crates/parser/src/event.rs @@ -72,9 +72,12 @@ pub(crate) enum Event { /// `n_raw_tokens = 2` is used to produced a single `>>`. Token { kind: SyntaxKind, + // Consider custom enum here? n_raw_tokens: u8, }, - + FloatSplitHack { + has_pseudo_dot: bool, + }, Error { msg: String, }, @@ -125,6 +128,11 @@ pub(super) fn process(mut events: Vec) -> Output { Event::Token { kind, n_raw_tokens } => { res.token(kind, n_raw_tokens); } + Event::FloatSplitHack { has_pseudo_dot } => { + res.float_split_hack(has_pseudo_dot); + let ev = mem::replace(&mut events[i + 1], Event::tombstone()); + assert!(matches!(ev, Event::Finish), "{ev:?}"); + } Event::Error { msg } => res.error(msg), } } diff --git a/crates/parser/src/grammar/expressions.rs b/crates/parser/src/grammar/expressions.rs index 8932330b82..7516ac3c4b 100644 --- a/crates/parser/src/grammar/expressions.rs +++ b/crates/parser/src/grammar/expressions.rs @@ -379,7 +379,7 @@ fn postfix_expr( // } T!['('] if allow_calls => call_expr(p, lhs), T!['['] if allow_calls => index_expr(p, lhs), - T![.] => match postfix_dot_expr(p, lhs) { + T![.] => match postfix_dot_expr::(p, lhs) { Ok(it) => it, Err(it) => { lhs = it; @@ -393,35 +393,44 @@ fn postfix_expr( block_like = BlockLike::NotBlock; } return (lhs, block_like); +} - fn postfix_dot_expr( - p: &mut Parser<'_>, - lhs: CompletedMarker, - ) -> Result { +fn postfix_dot_expr( + p: &mut Parser<'_>, + lhs: CompletedMarker, +) -> Result { + if !FLOAT_RECOVERY { assert!(p.at(T![.])); - if p.nth(1) == IDENT && (p.nth(2) == T!['('] || p.nth_at(2, T![::])) { - return Ok(method_call_expr(p, lhs)); - } - - // test await_expr - // fn foo() { - // x.await; - // x.0.await; - // x.0().await?.hello(); - // } - if p.nth(1) == T![await] { - let m = lhs.precede(p); - p.bump(T![.]); - p.bump(T![await]); - return Ok(m.complete(p, AWAIT_EXPR)); - } - - if p.at(T![..=]) || p.at(T![..]) { - return Err(lhs); - } - - Ok(field_expr(p, lhs)) } + let nth1 = if FLOAT_RECOVERY { 0 } else { 1 }; + let nth2 = if FLOAT_RECOVERY { 1 } else { 2 }; + + if p.nth(nth1) == IDENT && (p.nth(nth2) == T!['('] || p.nth_at(nth2, T![::])) { + return Ok(method_call_expr::(p, lhs)); + } + + // test await_expr + // fn foo() { + // x.await; + // x.0.await; + // x.0().await?.hello(); + // x.0.0.await; + // x.0. await; + // } + if p.nth(nth1) == T![await] { + let m = lhs.precede(p); + if !FLOAT_RECOVERY { + p.bump(T![.]); + } + p.bump(T![await]); + return Ok(m.complete(p, AWAIT_EXPR)); + } + + if p.at(T![..=]) || p.at(T![..]) { + return Err(lhs); + } + + field_expr::(p, lhs) } // test call_expr @@ -455,11 +464,22 @@ fn index_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker { // fn foo() { // x.foo(); // y.bar::(1, 2,); +// x.0.0.call(); +// x.0. call(); // } -fn method_call_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker { - assert!(p.at(T![.]) && p.nth(1) == IDENT && (p.nth(2) == T!['('] || p.nth_at(2, T![::]))); +fn method_call_expr( + p: &mut Parser<'_>, + lhs: CompletedMarker, +) -> CompletedMarker { + if FLOAT_RECOVERY { + assert!(p.nth(0) == IDENT && (p.nth(1) == T!['('] || p.nth_at(1, T![::]))); + } else { + assert!(p.at(T![.]) && p.nth(1) == IDENT && (p.nth(2) == T!['('] || p.nth_at(2, T![::]))); + } let m = lhs.precede(p); - p.bump_any(); + if !FLOAT_RECOVERY { + p.bump(T![.]); + } name_ref(p); generic_args::opt_generic_arg_list(p, true); if p.at(T!['(']) { @@ -472,21 +492,35 @@ fn method_call_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker // fn foo() { // x.foo; // x.0.bar; +// x.0.1; +// x.0. bar; // x.0(); // } -fn field_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker { - assert!(p.at(T![.])); +fn field_expr( + p: &mut Parser<'_>, + lhs: CompletedMarker, +) -> Result { + if !FLOAT_RECOVERY { + assert!(p.at(T![.])); + } let m = lhs.precede(p); - p.bump(T![.]); + if !FLOAT_RECOVERY { + p.bump(T![.]); + } if p.at(IDENT) || p.at(INT_NUMBER) { name_ref_or_index(p); } else if p.at(FLOAT_NUMBER) { - // FIXME: How to recover and instead parse INT + T![.]? - p.bump_any(); + return match p.split_float(m) { + (true, m) => { + let lhs = m.complete(p, FIELD_EXPR); + postfix_dot_expr::(p, lhs) + } + (false, m) => Ok(m.complete(p, FIELD_EXPR)), + }; } else { p.error("expected field name or number"); } - m.complete(p, FIELD_EXPR) + Ok(m.complete(p, FIELD_EXPR)) } // test try_expr diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index 87be479277..f20d32d6cf 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -102,7 +102,7 @@ impl TopEntryPoint { match step { Step::Enter { .. } => depth += 1, Step::Exit => depth -= 1, - Step::Token { .. } | Step::Error { .. } => (), + Step::FloatSplit { .. } | Step::Token { .. } | Step::Error { .. } => (), } } assert!(!first, "no tree at all"); diff --git a/crates/parser/src/output.rs b/crates/parser/src/output.rs index 3de6c0aba8..9587c8cb1b 100644 --- a/crates/parser/src/output.rs +++ b/crates/parser/src/output.rs @@ -25,6 +25,7 @@ pub struct Output { #[derive(Debug)] pub enum Step<'a> { Token { kind: SyntaxKind, n_input_tokens: u8 }, + FloatSplit { has_pseudo_dot: bool }, Enter { kind: SyntaxKind }, Exit, Error { msg: &'a str }, @@ -44,6 +45,7 @@ impl Output { const TOKEN_EVENT: u8 = 0; const ENTER_EVENT: u8 = 1; const EXIT_EVENT: u8 = 2; + const SPLIT_EVENT: u8 = 3; pub fn iter(&self) -> impl Iterator> { self.event.iter().map(|&event| { @@ -67,6 +69,9 @@ impl Output { Step::Enter { kind } } Self::EXIT_EVENT => Step::Exit, + Self::SPLIT_EVENT => { + Step::FloatSplit { has_pseudo_dot: event & Self::N_INPUT_TOKEN_MASK != 0 } + } _ => unreachable!(), } }) @@ -79,6 +84,13 @@ impl Output { self.event.push(e) } + pub(crate) fn float_split_hack(&mut self, has_pseudo_dot: bool) { + let e = (Self::SPLIT_EVENT as u32) << Self::TAG_SHIFT + | ((has_pseudo_dot as u32) << Self::N_INPUT_TOKEN_SHIFT) + | Self::EVENT_MASK; + self.event.push(e); + } + pub(crate) fn enter_node(&mut self, kind: SyntaxKind) { let e = ((kind as u16 as u32) << Self::KIND_SHIFT) | ((Self::ENTER_EVENT as u32) << Self::TAG_SHIFT) diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs index 48aecb35be..0f4fa60229 100644 --- a/crates/parser/src/parser.rs +++ b/crates/parser/src/parser.rs @@ -181,6 +181,38 @@ impl<'t> Parser<'t> { self.do_bump(kind, 1); } + /// Advances the parser by one token + pub(crate) fn split_float(&mut self, marker: Marker) -> (bool, Marker) { + assert!(self.at(SyntaxKind::FLOAT_NUMBER)); + // we have parse `.` + // ``.0.1 + // here we need to insert an extra event + // + // ``. 0. 1; + // here we need to change the follow up parse, the return value will cause us to emulate a dot + // the actual splitting happens later + let has_pseudo_dot = !self.inp.is_joint(self.pos); + let marker = if !has_pseudo_dot { + let new_pos = self.start(); + let idx = marker.pos as usize; + match &mut self.events[idx] { + Event::Start { forward_parent, kind } => { + *kind = SyntaxKind::FIELD_EXPR; + *forward_parent = Some(new_pos.pos - marker.pos); + } + _ => unreachable!(), + } + // NOTE: This brings the start / finish pairs out of balance! + std::mem::forget(marker); + new_pos + } else { + marker + }; + self.pos += 1 as usize; + self.push_event(Event::FloatSplitHack { has_pseudo_dot }); + (has_pseudo_dot, marker) + } + /// Advances the parser by one token, remapping its kind. /// This is useful to create contextual keywords from /// identifiers. For example, the lexer creates a `union` diff --git a/crates/parser/src/shortcuts.rs b/crates/parser/src/shortcuts.rs index 2be4050d13..18a6f838fa 100644 --- a/crates/parser/src/shortcuts.rs +++ b/crates/parser/src/shortcuts.rs @@ -44,7 +44,17 @@ impl<'a> LexedStr<'a> { } res.push(kind); } - was_joint = true; + if kind == SyntaxKind::FLOAT_NUMBER { + // we set jointness for floating point numbers as a hack to inform the + // parser about whether we have a `0.` or `0.1` style float + if self.text(i).split_once('.').map_or(false, |(_, it)| it.is_empty()) { + was_joint = false; + } else { + was_joint = true; + } + } else { + was_joint = true; + } } } res @@ -63,6 +73,7 @@ impl<'a> LexedStr<'a> { Step::Token { kind, n_input_tokens: n_raw_tokens } => { builder.token(kind, n_raw_tokens) } + Step::FloatSplit { has_pseudo_dot } => builder.float_split(has_pseudo_dot), Step::Enter { kind } => builder.enter(kind), Step::Exit => builder.exit(), Step::Error { msg } => { @@ -109,6 +120,16 @@ impl Builder<'_, '_> { self.do_token(kind, n_tokens as usize); } + fn float_split(&mut self, has_pseudo_dot: bool) { + match mem::replace(&mut self.state, State::Normal) { + State::PendingEnter => unreachable!(), + State::PendingExit => (self.sink)(StrStep::Exit), + State::Normal => (), + } + self.eat_trivias(); + self.do_float_split(has_pseudo_dot); + } + fn enter(&mut self, kind: SyntaxKind) { match mem::replace(&mut self.state, State::Normal) { State::PendingEnter => { @@ -164,6 +185,37 @@ impl Builder<'_, '_> { self.pos += n_tokens; (self.sink)(StrStep::Token { kind, text }); } + + fn do_float_split(&mut self, has_pseudo_dot: bool) { + let text = &self.lexed.range_text(self.pos..self.pos + 1); + self.pos += 1; + match text.split_once('.') { + Some((left, right)) => { + assert!(!left.is_empty()); + (self.sink)(StrStep::Enter { kind: SyntaxKind::NAME_REF }); + (self.sink)(StrStep::Token { kind: SyntaxKind::INT_NUMBER, text: left }); + (self.sink)(StrStep::Exit); + + // here we move the exit up, the original exit has been deleted in process + (self.sink)(StrStep::Exit); + + (self.sink)(StrStep::Token { kind: SyntaxKind::DOT, text: "." }); + + if has_pseudo_dot { + assert!(right.is_empty()); + self.state = State::Normal; + } else { + (self.sink)(StrStep::Enter { kind: SyntaxKind::NAME_REF }); + (self.sink)(StrStep::Token { kind: SyntaxKind::INT_NUMBER, text: right }); + (self.sink)(StrStep::Exit); + + // the parser creates an unbalanced start node, we are required to close it here + self.state = State::PendingExit; + } + } + None => unreachable!(), + } + } } fn n_attached_trivias<'a>( diff --git a/crates/parser/src/tests/prefix_entries.rs b/crates/parser/src/tests/prefix_entries.rs index e626b4f27e..40f92e5880 100644 --- a/crates/parser/src/tests/prefix_entries.rs +++ b/crates/parser/src/tests/prefix_entries.rs @@ -51,6 +51,9 @@ fn expr() { check(PrefixEntryPoint::Expr, "-1", "-1"); check(PrefixEntryPoint::Expr, "fn foo() {}", "fn"); check(PrefixEntryPoint::Expr, "#[attr] ()", "#[attr] ()"); + check(PrefixEntryPoint::Expr, "foo.0", "foo.0"); + check(PrefixEntryPoint::Expr, "foo.0.1", "foo.0.1"); + check(PrefixEntryPoint::Expr, "foo.0. foo", "foo.0. foo"); } #[test] @@ -88,6 +91,7 @@ fn check(entry: PrefixEntryPoint, input: &str, prefix: &str) { for step in entry.parse(&input).iter() { match step { Step::Token { n_input_tokens, .. } => n_tokens += n_input_tokens as usize, + Step::FloatSplit { .. } => n_tokens += 1, Step::Enter { .. } | Step::Exit | Step::Error { .. } => (), } } diff --git a/crates/parser/test_data/parser/inline/ok/0011_field_expr.rast b/crates/parser/test_data/parser/inline/ok/0011_field_expr.rast index 8498724b9e..dd27dc4896 100644 --- a/crates/parser/test_data/parser/inline/ok/0011_field_expr.rast +++ b/crates/parser/test_data/parser/inline/ok/0011_field_expr.rast @@ -40,6 +40,39 @@ SOURCE_FILE IDENT "bar" SEMICOLON ";" WHITESPACE "\n " + EXPR_STMT + FIELD_EXPR + FIELD_EXPR + PATH_EXPR + PATH + PATH_SEGMENT + NAME_REF + IDENT "x" + DOT "." + NAME_REF + INT_NUMBER "0" + DOT "." + NAME_REF + INT_NUMBER "1" + SEMICOLON ";" + WHITESPACE "\n " + EXPR_STMT + FIELD_EXPR + FIELD_EXPR + PATH_EXPR + PATH + PATH_SEGMENT + NAME_REF + IDENT "x" + DOT "." + NAME_REF + INT_NUMBER "0" + DOT "." + WHITESPACE " " + NAME_REF + IDENT "bar" + SEMICOLON ";" + WHITESPACE "\n " EXPR_STMT CALL_EXPR FIELD_EXPR diff --git a/crates/parser/test_data/parser/inline/ok/0011_field_expr.rs b/crates/parser/test_data/parser/inline/ok/0011_field_expr.rs index b8da2ddc30..98dbe45a7e 100644 --- a/crates/parser/test_data/parser/inline/ok/0011_field_expr.rs +++ b/crates/parser/test_data/parser/inline/ok/0011_field_expr.rs @@ -1,5 +1,7 @@ fn foo() { x.foo; x.0.bar; + x.0.1; + x.0. bar; x.0(); } diff --git a/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rast b/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rast index dcbcfe1231..b28b8eb673 100644 --- a/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rast +++ b/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rast @@ -58,6 +58,49 @@ SOURCE_FILE COMMA "," R_PAREN ")" SEMICOLON ";" + WHITESPACE "\n " + EXPR_STMT + METHOD_CALL_EXPR + FIELD_EXPR + FIELD_EXPR + PATH_EXPR + PATH + PATH_SEGMENT + NAME_REF + IDENT "x" + DOT "." + NAME_REF + INT_NUMBER "0" + DOT "." + NAME_REF + INT_NUMBER "0" + DOT "." + NAME_REF + IDENT "call" + ARG_LIST + L_PAREN "(" + R_PAREN ")" + SEMICOLON ";" + WHITESPACE "\n " + EXPR_STMT + METHOD_CALL_EXPR + FIELD_EXPR + PATH_EXPR + PATH + PATH_SEGMENT + NAME_REF + IDENT "x" + DOT "." + NAME_REF + INT_NUMBER "0" + DOT "." + WHITESPACE " " + NAME_REF + IDENT "call" + ARG_LIST + L_PAREN "(" + R_PAREN ")" + SEMICOLON ";" WHITESPACE "\n" R_CURLY "}" WHITESPACE "\n" diff --git a/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rs b/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rs index 1a3aa35ae8..48bb6381e8 100644 --- a/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rs +++ b/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rs @@ -1,4 +1,6 @@ fn foo() { x.foo(); y.bar::(1, 2,); + x.0.0.call(); + x.0. call(); } diff --git a/crates/parser/test_data/parser/inline/ok/0137_await_expr.rast b/crates/parser/test_data/parser/inline/ok/0137_await_expr.rast index 9d37ada0da..af713a2207 100644 --- a/crates/parser/test_data/parser/inline/ok/0137_await_expr.rast +++ b/crates/parser/test_data/parser/inline/ok/0137_await_expr.rast @@ -65,6 +65,41 @@ SOURCE_FILE L_PAREN "(" R_PAREN ")" SEMICOLON ";" + WHITESPACE "\n " + EXPR_STMT + AWAIT_EXPR + FIELD_EXPR + FIELD_EXPR + PATH_EXPR + PATH + PATH_SEGMENT + NAME_REF + IDENT "x" + DOT "." + NAME_REF + INT_NUMBER "0" + DOT "." + NAME_REF + INT_NUMBER "0" + DOT "." + AWAIT_KW "await" + SEMICOLON ";" + WHITESPACE "\n " + EXPR_STMT + AWAIT_EXPR + FIELD_EXPR + PATH_EXPR + PATH + PATH_SEGMENT + NAME_REF + IDENT "x" + DOT "." + NAME_REF + INT_NUMBER "0" + DOT "." + WHITESPACE " " + AWAIT_KW "await" + SEMICOLON ";" WHITESPACE "\n" R_CURLY "}" WHITESPACE "\n" diff --git a/crates/parser/test_data/parser/inline/ok/0137_await_expr.rs b/crates/parser/test_data/parser/inline/ok/0137_await_expr.rs index d2ba89ca60..fe9a3211bb 100644 --- a/crates/parser/test_data/parser/inline/ok/0137_await_expr.rs +++ b/crates/parser/test_data/parser/inline/ok/0137_await_expr.rs @@ -2,4 +2,6 @@ fn foo() { x.await; x.0.await; x.0().await?.hello(); + x.0.0.await; + x.0. await; }