//! See [`Parser`]. use std::cell::Cell; use drop_bomb::DropBomb; use limit::Limit; use crate::{ event::Event, input::Input, SyntaxKind::{self, EOF, ERROR, TOMBSTONE}, TokenSet, T, }; /// `Parser` struct provides the low-level API for /// navigating through the stream of tokens and /// constructing the parse tree. The actual parsing /// happens in the [`grammar`](super::grammar) module. /// /// However, the result of this `Parser` is not a real /// tree, but rather a flat stream of events of the form /// "start expression, consume number literal, /// finish expression". See `Event` docs for more. pub(crate) struct Parser<'t> { inp: &'t Input, pos: usize, events: Vec, steps: Cell, } static PARSER_STEP_LIMIT: Limit = Limit::new(15_000_000); impl<'t> Parser<'t> { pub(super) fn new(inp: &'t Input) -> Parser<'t> { Parser { inp, pos: 0, events: Vec::new(), steps: Cell::new(0) } } pub(crate) fn finish(self) -> Vec { self.events } /// Returns the kind of the current token. /// If parser has already reached the end of input, /// the special `EOF` kind is returned. pub(crate) fn current(&self) -> SyntaxKind { self.nth(0) } /// Lookahead operation: returns the kind of the next nth /// token. pub(crate) fn nth(&self, n: usize) -> SyntaxKind { assert!(n <= 3); let steps = self.steps.get(); assert!(PARSER_STEP_LIMIT.check(steps as usize).is_ok(), "the parser seems stuck"); self.steps.set(steps + 1); self.inp.kind(self.pos + n) } /// Checks if the current token is `kind`. pub(crate) fn at(&self, kind: SyntaxKind) -> bool { self.nth_at(0, kind) } pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool { match kind { T![-=] => self.at_composite2(n, T![-], T![=]), T![->] => self.at_composite2(n, T![-], T![>]), T![::] => self.at_composite2(n, T![:], T![:]), T![!=] => self.at_composite2(n, T![!], T![=]), T![..] => self.at_composite2(n, T![.], T![.]), T![*=] => self.at_composite2(n, T![*], T![=]), T![/=] => self.at_composite2(n, T![/], T![=]), T![&&] => self.at_composite2(n, T![&], T![&]), T![&=] => self.at_composite2(n, T![&], T![=]), T![%=] => self.at_composite2(n, T![%], T![=]), T![^=] => self.at_composite2(n, T![^], T![=]), T![+=] => self.at_composite2(n, T![+], T![=]), T![<<] => self.at_composite2(n, T![<], T![<]), T![<=] => self.at_composite2(n, T![<], T![=]), T![==] => self.at_composite2(n, T![=], T![=]), T![=>] => self.at_composite2(n, T![=], T![>]), T![>=] => self.at_composite2(n, T![>], T![=]), T![>>] => self.at_composite2(n, T![>], T![>]), T![|=] => self.at_composite2(n, T![|], T![=]), T![||] => self.at_composite2(n, T![|], T![|]), T![...] => self.at_composite3(n, T![.], T![.], T![.]), T![..=] => self.at_composite3(n, T![.], T![.], T![=]), T![<<=] => self.at_composite3(n, T![<], T![<], T![=]), T![>>=] => self.at_composite3(n, T![>], T![>], T![=]), _ => self.inp.kind(self.pos + n) == kind, } } /// Consume the next token if `kind` matches. pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { if !self.at(kind) { return false; } let n_raw_tokens = match kind { T![-=] | T![->] | T![::] | T![!=] | T![..] | T![*=] | T![/=] | T![&&] | T![&=] | T![%=] | T![^=] | T![+=] | T![<<] | T![<=] | T![==] | T![=>] | T![>=] | T![>>] | T![|=] | T![||] => 2, T![...] | T![..=] | T![<<=] | T![>>=] => 3, _ => 1, }; self.do_bump(kind, n_raw_tokens); true } fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool { self.inp.kind(self.pos + n) == k1 && self.inp.kind(self.pos + n + 1) == k2 && self.inp.is_joint(self.pos + n) } fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool { self.inp.kind(self.pos + n) == k1 && self.inp.kind(self.pos + n + 1) == k2 && self.inp.kind(self.pos + n + 2) == k3 && self.inp.is_joint(self.pos + n) && self.inp.is_joint(self.pos + n + 1) } /// Checks if the current token is in `kinds`. pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool { kinds.contains(self.current()) } /// Checks if the current token is contextual keyword `kw`. pub(crate) fn at_contextual_kw(&self, kw: SyntaxKind) -> bool { self.inp.contextual_kind(self.pos) == kw } /// Checks if the nth token is contextual keyword `kw`. pub(crate) fn nth_at_contextual_kw(&self, n: usize, kw: SyntaxKind) -> bool { self.inp.contextual_kind(self.pos + n) == kw } /// Starts a new node in the syntax tree. All nodes and tokens /// consumed between the `start` and the corresponding `Marker::complete` /// belong to the same node. pub(crate) fn start(&mut self) -> Marker { let pos = self.events.len() as u32; self.push_event(Event::tombstone()); Marker::new(pos) } /// Consume the next token. Panics if the parser isn't currently at `kind`. pub(crate) fn bump(&mut self, kind: SyntaxKind) { assert!(self.eat(kind)); } /// Advances the parser by one token pub(crate) fn bump_any(&mut self) { let kind = self.nth(0); if kind == EOF { return; } self.do_bump(kind, 1); } /// Advances the parser by one token pub(crate) fn split_float(&mut self, mut marker: Marker) -> (bool, Marker) { assert!(self.at(SyntaxKind::FLOAT_NUMBER)); // we have parse `.` // ``.0.1 // here we need to insert an extra event // // ``. 0. 1; // here we need to change the follow up parse, the return value will cause us to emulate a dot // the actual splitting happens later let ends_in_dot = !self.inp.is_joint(self.pos); if !ends_in_dot { let new_marker = self.start(); let idx = marker.pos as usize; match &mut self.events[idx] { Event::Start { forward_parent, kind } => { *kind = SyntaxKind::FIELD_EXPR; *forward_parent = Some(new_marker.pos - marker.pos); } _ => unreachable!(), } marker.bomb.defuse(); marker = new_marker; }; self.pos += 1; self.push_event(Event::FloatSplitHack { ends_in_dot }); (ends_in_dot, marker) } /// Advances the parser by one token, remapping its kind. /// This is useful to create contextual keywords from /// identifiers. For example, the lexer creates a `union` /// *identifier* token, but the parser remaps it to the /// `union` keyword, and keyword is what ends up in the /// final tree. pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { if self.nth(0) == EOF { // FIXME: panic!? return; } self.do_bump(kind, 1); } /// Emit error with the `message` /// FIXME: this should be much more fancy and support /// structured errors with spans and notes, like rustc /// does. pub(crate) fn error>(&mut self, message: T) { let msg = message.into(); self.push_event(Event::Error { msg }); } /// Consume the next token if it is `kind` or emit an error /// otherwise. pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool { if self.eat(kind) { return true; } self.error(format!("expected {kind:?}")); false } /// Create an error node and consume the next token. pub(crate) fn err_and_bump(&mut self, message: &str) { self.err_recover(message, TokenSet::EMPTY); } /// Create an error node and consume the next token. pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) { match self.current() { T!['{'] | T!['}'] => { self.error(message); return; } _ => (), } if self.at_ts(recovery) { self.error(message); return; } let m = self.start(); self.error(message); self.bump_any(); m.complete(self, ERROR); } fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { self.pos += n_raw_tokens as usize; self.steps.set(0); self.push_event(Event::Token { kind, n_raw_tokens }); } fn push_event(&mut self, event: Event) { self.events.push(event); } } /// See [`Parser::start`]. pub(crate) struct Marker { pos: u32, bomb: DropBomb, } impl Marker { fn new(pos: u32) -> Marker { Marker { pos, bomb: DropBomb::new("Marker must be either completed or abandoned") } } /// Finishes the syntax tree node and assigns `kind` to it, /// and mark the create a `CompletedMarker` for possible future /// operation like `.precede()` to deal with forward_parent. pub(crate) fn complete(mut self, p: &mut Parser<'_>, kind: SyntaxKind) -> CompletedMarker { self.bomb.defuse(); let idx = self.pos as usize; match &mut p.events[idx] { Event::Start { kind: slot, .. } => { *slot = kind; } _ => unreachable!(), } p.push_event(Event::Finish); CompletedMarker::new(self.pos, kind) } /// Abandons the syntax tree node. All its children /// are attached to its parent instead. pub(crate) fn abandon(mut self, p: &mut Parser<'_>) { self.bomb.defuse(); let idx = self.pos as usize; if idx == p.events.len() - 1 { match p.events.pop() { Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (), _ => unreachable!(), } } } } pub(crate) struct CompletedMarker { pos: u32, kind: SyntaxKind, } impl CompletedMarker { fn new(pos: u32, kind: SyntaxKind) -> Self { CompletedMarker { pos, kind } } /// This method allows to create a new node which starts /// *before* the current one. That is, parser could start /// node `A`, then complete it, and then after parsing the /// whole `A`, decide that it should have started some node /// `B` before starting `A`. `precede` allows to do exactly /// that. See also docs about /// [`Event::Start::forward_parent`](crate::event::Event::Start::forward_parent). /// /// Given completed events `[START, FINISH]` and its corresponding /// `CompletedMarker(pos: 0, _)`. /// Append a new `START` events as `[START, FINISH, NEWSTART]`, /// then mark `NEWSTART` as `START`'s parent with saving its relative /// distance to `NEWSTART` into forward_parent(=2 in this case); pub(crate) fn precede(self, p: &mut Parser<'_>) -> Marker { let new_pos = p.start(); let idx = self.pos as usize; match &mut p.events[idx] { Event::Start { forward_parent, .. } => { *forward_parent = Some(new_pos.pos - self.pos); } _ => unreachable!(), } new_pos } /// Extends this completed marker *to the left* up to `m`. pub(crate) fn extend_to(self, p: &mut Parser<'_>, mut m: Marker) -> CompletedMarker { m.bomb.defuse(); let idx = m.pos as usize; match &mut p.events[idx] { Event::Start { forward_parent, .. } => { *forward_parent = Some(self.pos - m.pos); } _ => unreachable!(), } self } pub(crate) fn kind(&self) -> SyntaxKind { self.kind } }