2021-05-22 14:20:22 +00:00
|
|
|
//! See [`Parser`].
|
2019-09-30 08:58:53 +00:00
|
|
|
|
2019-02-20 19:44:06 +00:00
|
|
|
use std::cell::Cell;
|
|
|
|
|
2018-12-06 17:49:36 +00:00
|
|
|
use drop_bomb::DropBomb;
|
2021-12-06 09:47:36 +00:00
|
|
|
use limit::Limit;
|
2018-12-06 17:49:36 +00:00
|
|
|
|
2018-10-15 16:55:32 +00:00
|
|
|
use crate::{
|
2019-02-21 10:27:45 +00:00
|
|
|
event::Event,
|
2021-12-25 18:59:02 +00:00
|
|
|
input::Input,
|
2021-10-23 17:43:45 +00:00
|
|
|
SyntaxKind::{self, EOF, ERROR, TOMBSTONE},
|
2021-11-14 19:13:44 +00:00
|
|
|
TokenSet, T,
|
2018-07-31 20:38:19 +00:00
|
|
|
};
|
2018-02-11 13:53:57 +00:00
|
|
|
|
2018-02-11 14:58:22 +00:00
|
|
|
/// `Parser` struct provides the low-level API for
|
|
|
|
/// navigating through the stream of tokens and
|
|
|
|
/// constructing the parse tree. The actual parsing
|
2021-08-04 03:57:31 +00:00
|
|
|
/// happens in the [`grammar`](super::grammar) module.
|
2018-02-11 14:58:22 +00:00
|
|
|
///
|
|
|
|
/// However, the result of this `Parser` is not a real
|
|
|
|
/// tree, but rather a flat stream of events of the form
|
|
|
|
/// "start expression, consume number literal,
|
|
|
|
/// finish expression". See `Event` docs for more.
|
2019-02-20 19:44:06 +00:00
|
|
|
pub(crate) struct Parser<'t> {
|
2021-12-25 18:59:02 +00:00
|
|
|
inp: &'t Input,
|
2021-11-14 19:13:44 +00:00
|
|
|
pos: usize,
|
2019-02-20 19:44:06 +00:00
|
|
|
events: Vec<Event>,
|
|
|
|
steps: Cell<u32>,
|
|
|
|
}
|
2018-02-11 13:53:57 +00:00
|
|
|
|
2021-12-06 09:47:36 +00:00
|
|
|
static PARSER_STEP_LIMIT: Limit = Limit::new(15_000_000);
|
|
|
|
|
2018-02-11 13:53:57 +00:00
|
|
|
impl<'t> Parser<'t> {
|
2021-12-25 18:59:02 +00:00
|
|
|
pub(super) fn new(inp: &'t Input) -> Parser<'t> {
|
|
|
|
Parser { inp, pos: 0, events: Vec::new(), steps: Cell::new(0) }
|
2019-02-20 19:44:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
pub(crate) fn finish(self) -> Vec<Event> {
|
|
|
|
self.events
|
|
|
|
}
|
|
|
|
|
2018-02-11 14:58:22 +00:00
|
|
|
/// Returns the kind of the current token.
|
|
|
|
/// If parser has already reached the end of input,
|
|
|
|
/// the special `EOF` kind is returned.
|
2018-02-11 13:53:57 +00:00
|
|
|
pub(crate) fn current(&self) -> SyntaxKind {
|
|
|
|
self.nth(0)
|
|
|
|
}
|
|
|
|
|
2018-02-11 14:58:22 +00:00
|
|
|
/// Lookahead operation: returns the kind of the next nth
|
|
|
|
/// token.
|
2019-02-20 19:58:56 +00:00
|
|
|
pub(crate) fn nth(&self, n: usize) -> SyntaxKind {
|
2019-05-25 12:31:53 +00:00
|
|
|
assert!(n <= 3);
|
|
|
|
|
2019-02-20 19:44:06 +00:00
|
|
|
let steps = self.steps.get();
|
2021-12-06 09:47:36 +00:00
|
|
|
assert!(PARSER_STEP_LIMIT.check(steps as usize).is_ok(), "the parser seems stuck");
|
2019-02-20 19:44:06 +00:00
|
|
|
self.steps.set(steps + 1);
|
2019-04-13 10:38:31 +00:00
|
|
|
|
2021-12-25 18:59:02 +00:00
|
|
|
self.inp.kind(self.pos + n)
|
2018-02-11 13:53:57 +00:00
|
|
|
}
|
|
|
|
|
2018-02-11 14:58:22 +00:00
|
|
|
/// Checks if the current token is `kind`.
|
2018-02-11 13:53:57 +00:00
|
|
|
pub(crate) fn at(&self, kind: SyntaxKind) -> bool {
|
2019-09-09 11:52:31 +00:00
|
|
|
self.nth_at(0, kind)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool {
|
|
|
|
match kind {
|
|
|
|
T![-=] => self.at_composite2(n, T![-], T![=]),
|
|
|
|
T![->] => self.at_composite2(n, T![-], T![>]),
|
|
|
|
T![::] => self.at_composite2(n, T![:], T![:]),
|
|
|
|
T![!=] => self.at_composite2(n, T![!], T![=]),
|
|
|
|
T![..] => self.at_composite2(n, T![.], T![.]),
|
|
|
|
T![*=] => self.at_composite2(n, T![*], T![=]),
|
|
|
|
T![/=] => self.at_composite2(n, T![/], T![=]),
|
|
|
|
T![&&] => self.at_composite2(n, T![&], T![&]),
|
|
|
|
T![&=] => self.at_composite2(n, T![&], T![=]),
|
|
|
|
T![%=] => self.at_composite2(n, T![%], T![=]),
|
|
|
|
T![^=] => self.at_composite2(n, T![^], T![=]),
|
|
|
|
T![+=] => self.at_composite2(n, T![+], T![=]),
|
|
|
|
T![<<] => self.at_composite2(n, T![<], T![<]),
|
|
|
|
T![<=] => self.at_composite2(n, T![<], T![=]),
|
|
|
|
T![==] => self.at_composite2(n, T![=], T![=]),
|
|
|
|
T![=>] => self.at_composite2(n, T![=], T![>]),
|
|
|
|
T![>=] => self.at_composite2(n, T![>], T![=]),
|
|
|
|
T![>>] => self.at_composite2(n, T![>], T![>]),
|
|
|
|
T![|=] => self.at_composite2(n, T![|], T![=]),
|
|
|
|
T![||] => self.at_composite2(n, T![|], T![|]),
|
|
|
|
|
|
|
|
T![...] => self.at_composite3(n, T![.], T![.], T![.]),
|
|
|
|
T![..=] => self.at_composite3(n, T![.], T![.], T![=]),
|
|
|
|
T![<<=] => self.at_composite3(n, T![<], T![<], T![=]),
|
|
|
|
T![>>=] => self.at_composite3(n, T![>], T![>], T![=]),
|
|
|
|
|
2021-12-25 18:59:02 +00:00
|
|
|
_ => self.inp.kind(self.pos + n) == kind,
|
2019-09-09 11:52:31 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Consume the next token if `kind` matches.
|
|
|
|
pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool {
|
|
|
|
if !self.at(kind) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
let n_raw_tokens = match kind {
|
|
|
|
T![-=]
|
|
|
|
| T![->]
|
|
|
|
| T![::]
|
|
|
|
| T![!=]
|
|
|
|
| T![..]
|
|
|
|
| T![*=]
|
|
|
|
| T![/=]
|
|
|
|
| T![&&]
|
|
|
|
| T![&=]
|
|
|
|
| T![%=]
|
|
|
|
| T![^=]
|
|
|
|
| T![+=]
|
|
|
|
| T![<<]
|
|
|
|
| T![<=]
|
|
|
|
| T![==]
|
|
|
|
| T![=>]
|
|
|
|
| T![>=]
|
|
|
|
| T![>>]
|
|
|
|
| T![|=]
|
|
|
|
| T![||] => 2,
|
|
|
|
|
|
|
|
T![...] | T![..=] | T![<<=] | T![>>=] => 3,
|
|
|
|
_ => 1,
|
|
|
|
};
|
|
|
|
self.do_bump(kind, n_raw_tokens);
|
|
|
|
true
|
|
|
|
}
|
|
|
|
|
|
|
|
fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool {
|
2021-12-25 18:59:02 +00:00
|
|
|
self.inp.kind(self.pos + n) == k1
|
|
|
|
&& self.inp.kind(self.pos + n + 1) == k2
|
|
|
|
&& self.inp.is_joint(self.pos + n)
|
2019-09-09 11:52:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool {
|
2021-12-25 18:59:02 +00:00
|
|
|
self.inp.kind(self.pos + n) == k1
|
|
|
|
&& self.inp.kind(self.pos + n + 1) == k2
|
|
|
|
&& self.inp.kind(self.pos + n + 2) == k3
|
|
|
|
&& self.inp.is_joint(self.pos + n)
|
|
|
|
&& self.inp.is_joint(self.pos + n + 1)
|
2018-02-11 13:53:57 +00:00
|
|
|
}
|
|
|
|
|
2018-12-18 21:15:14 +00:00
|
|
|
/// Checks if the current token is in `kinds`.
|
2018-09-08 07:38:53 +00:00
|
|
|
pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool {
|
|
|
|
kinds.contains(self.current())
|
|
|
|
}
|
|
|
|
|
2018-02-11 14:58:22 +00:00
|
|
|
/// Checks if the current token is contextual keyword with text `t`.
|
2021-11-14 19:13:44 +00:00
|
|
|
pub(crate) fn at_contextual_kw(&self, kw: SyntaxKind) -> bool {
|
2021-12-25 18:59:02 +00:00
|
|
|
self.inp.contextual_kind(self.pos) == kw
|
2018-02-11 13:53:57 +00:00
|
|
|
}
|
|
|
|
|
2018-02-11 14:58:22 +00:00
|
|
|
/// Starts a new node in the syntax tree. All nodes and tokens
|
|
|
|
/// consumed between the `start` and the corresponding `Marker::complete`
|
|
|
|
/// belong to the same node.
|
2018-02-11 13:53:57 +00:00
|
|
|
pub(crate) fn start(&mut self) -> Marker {
|
2019-02-20 19:44:06 +00:00
|
|
|
let pos = self.events.len() as u32;
|
|
|
|
self.push_event(Event::tombstone());
|
|
|
|
Marker::new(pos)
|
2018-02-11 13:53:57 +00:00
|
|
|
}
|
|
|
|
|
2019-09-09 11:52:31 +00:00
|
|
|
/// Consume the next token if `kind` matches.
|
|
|
|
pub(crate) fn bump(&mut self, kind: SyntaxKind) {
|
|
|
|
assert!(self.eat(kind));
|
|
|
|
}
|
|
|
|
|
2020-02-22 12:16:13 +00:00
|
|
|
/// Advances the parser by one token
|
2019-09-09 21:59:29 +00:00
|
|
|
pub(crate) fn bump_any(&mut self) {
|
2019-02-20 19:44:06 +00:00
|
|
|
let kind = self.nth(0);
|
|
|
|
if kind == EOF {
|
|
|
|
return;
|
|
|
|
}
|
2021-10-03 12:39:43 +00:00
|
|
|
self.do_bump(kind, 1);
|
2019-09-09 22:03:00 +00:00
|
|
|
}
|
|
|
|
|
2018-02-11 14:58:22 +00:00
|
|
|
/// Advances the parser by one token, remapping its kind.
|
|
|
|
/// This is useful to create contextual keywords from
|
2021-08-22 15:48:15 +00:00
|
|
|
/// identifiers. For example, the lexer creates a `union`
|
2018-02-11 14:58:22 +00:00
|
|
|
/// *identifier* token, but the parser remaps it to the
|
|
|
|
/// `union` keyword, and keyword is what ends up in the
|
|
|
|
/// final tree.
|
2018-02-11 13:53:57 +00:00
|
|
|
pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) {
|
2019-02-20 19:44:06 +00:00
|
|
|
if self.nth(0) == EOF {
|
2019-03-23 07:53:48 +00:00
|
|
|
// FIXME: panic!?
|
2019-02-20 19:44:06 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
self.do_bump(kind, 1);
|
2018-02-11 13:53:57 +00:00
|
|
|
}
|
|
|
|
|
2018-02-11 14:58:22 +00:00
|
|
|
/// Emit error with the `message`
|
2019-03-23 07:53:48 +00:00
|
|
|
/// FIXME: this should be much more fancy and support
|
2018-02-11 14:58:22 +00:00
|
|
|
/// structured errors with spans and notes, like rustc
|
|
|
|
/// does.
|
2018-02-11 13:53:57 +00:00
|
|
|
pub(crate) fn error<T: Into<String>>(&mut self, message: T) {
|
internal: replace TreeSink with a data structure
The general theme of this is to make parser a better independent
library.
The specific thing we do here is replacing callback based TreeSink with
a data structure. That is, rather than calling user-provided tree
construction methods, the parser now spits out a very bare-bones tree,
effectively a log of a DFS traversal.
This makes the parser usable without any *specifc* tree sink, and allows
us to, eg, move tests into this crate.
Now, it's also true that this is a distinction without a difference, as
the old and the new interface are equivalent in expressiveness. Still,
this new thing seems somewhat simpler. But yeah, I admit I don't have a
suuper strong motivation here, just a hunch that this is better.
2021-12-19 14:36:23 +00:00
|
|
|
let msg = message.into();
|
2021-10-03 12:39:43 +00:00
|
|
|
self.push_event(Event::Error { msg });
|
2018-02-11 13:53:57 +00:00
|
|
|
}
|
|
|
|
|
2018-02-11 14:58:22 +00:00
|
|
|
/// Consume the next token if it is `kind` or emit an error
|
|
|
|
/// otherwise.
|
|
|
|
pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool {
|
|
|
|
if self.eat(kind) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
self.error(format!("expected {:?}", kind));
|
|
|
|
false
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Create an error node and consume the next token.
|
2018-02-11 13:53:57 +00:00
|
|
|
pub(crate) fn err_and_bump(&mut self, message: &str) {
|
2021-10-23 17:43:45 +00:00
|
|
|
self.err_recover(message, TokenSet::EMPTY);
|
2018-08-28 08:12:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Create an error node and consume the next token.
|
2018-08-31 10:35:48 +00:00
|
|
|
pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) {
|
2019-09-10 18:28:27 +00:00
|
|
|
match self.current() {
|
2021-10-23 17:43:45 +00:00
|
|
|
T!['{'] | T!['}'] => {
|
2019-09-10 18:28:27 +00:00
|
|
|
self.error(message);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
_ => (),
|
|
|
|
}
|
|
|
|
|
|
|
|
if self.at_ts(recovery) {
|
2018-08-28 08:12:42 +00:00
|
|
|
self.error(message);
|
2019-09-10 18:28:27 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
let m = self.start();
|
|
|
|
self.error(message);
|
|
|
|
self.bump_any();
|
|
|
|
m.complete(self, ERROR);
|
2018-02-11 13:53:57 +00:00
|
|
|
}
|
2019-02-20 19:44:06 +00:00
|
|
|
|
|
|
|
fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
|
2021-11-14 19:13:44 +00:00
|
|
|
self.pos += n_raw_tokens as usize;
|
2019-02-20 19:44:06 +00:00
|
|
|
self.push_event(Event::Token { kind, n_raw_tokens });
|
|
|
|
}
|
|
|
|
|
|
|
|
fn push_event(&mut self, event: Event) {
|
2021-10-03 12:39:43 +00:00
|
|
|
self.events.push(event);
|
2019-02-20 19:44:06 +00:00
|
|
|
}
|
2018-02-11 13:53:57 +00:00
|
|
|
}
|
|
|
|
|
2021-08-04 03:57:31 +00:00
|
|
|
/// See [`Parser::start`].
|
2018-08-01 08:58:19 +00:00
|
|
|
pub(crate) struct Marker {
|
|
|
|
pos: u32,
|
|
|
|
bomb: DropBomb,
|
|
|
|
}
|
2018-02-11 13:53:57 +00:00
|
|
|
|
|
|
|
impl Marker {
|
2018-08-01 08:58:19 +00:00
|
|
|
fn new(pos: u32) -> Marker {
|
2019-02-08 11:49:43 +00:00
|
|
|
Marker { pos, bomb: DropBomb::new("Marker must be either completed or abandoned") }
|
2018-08-01 08:58:19 +00:00
|
|
|
}
|
|
|
|
|
2019-01-01 08:09:51 +00:00
|
|
|
/// Finishes the syntax tree node and assigns `kind` to it,
|
|
|
|
/// and mark the create a `CompletedMarker` for possible future
|
|
|
|
/// operation like `.precede()` to deal with forward_parent.
|
2022-07-20 13:02:08 +00:00
|
|
|
pub(crate) fn complete(mut self, p: &mut Parser<'_>, kind: SyntaxKind) -> CompletedMarker {
|
2018-08-01 08:58:19 +00:00
|
|
|
self.bomb.defuse();
|
2019-02-20 19:44:06 +00:00
|
|
|
let idx = self.pos as usize;
|
2020-08-12 08:14:08 +00:00
|
|
|
match &mut p.events[idx] {
|
|
|
|
Event::Start { kind: slot, .. } => {
|
2019-02-20 19:44:06 +00:00
|
|
|
*slot = kind;
|
|
|
|
}
|
|
|
|
_ => unreachable!(),
|
|
|
|
}
|
|
|
|
p.push_event(Event::Finish);
|
2021-09-25 15:07:51 +00:00
|
|
|
CompletedMarker::new(self.pos, kind)
|
2018-02-11 13:53:57 +00:00
|
|
|
}
|
|
|
|
|
2018-02-11 14:58:22 +00:00
|
|
|
/// Abandons the syntax tree node. All its children
|
|
|
|
/// are attached to its parent instead.
|
2022-07-20 13:02:08 +00:00
|
|
|
pub(crate) fn abandon(mut self, p: &mut Parser<'_>) {
|
2018-08-01 08:58:19 +00:00
|
|
|
self.bomb.defuse();
|
2019-02-20 19:44:06 +00:00
|
|
|
let idx = self.pos as usize;
|
|
|
|
if idx == p.events.len() - 1 {
|
|
|
|
match p.events.pop() {
|
|
|
|
Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (),
|
|
|
|
_ => unreachable!(),
|
|
|
|
}
|
|
|
|
}
|
2018-02-11 13:53:57 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-19 09:44:23 +00:00
|
|
|
pub(crate) struct CompletedMarker {
|
2021-09-25 15:07:51 +00:00
|
|
|
pos: u32,
|
2019-03-19 09:44:23 +00:00
|
|
|
kind: SyntaxKind,
|
|
|
|
}
|
2018-02-11 13:53:57 +00:00
|
|
|
|
|
|
|
impl CompletedMarker {
|
2021-09-25 15:07:51 +00:00
|
|
|
fn new(pos: u32, kind: SyntaxKind) -> Self {
|
|
|
|
CompletedMarker { pos, kind }
|
2019-01-01 08:09:51 +00:00
|
|
|
}
|
|
|
|
|
2018-02-11 14:58:22 +00:00
|
|
|
/// This method allows to create a new node which starts
|
|
|
|
/// *before* the current one. That is, parser could start
|
|
|
|
/// node `A`, then complete it, and then after parsing the
|
|
|
|
/// whole `A`, decide that it should have started some node
|
|
|
|
/// `B` before starting `A`. `precede` allows to do exactly
|
2021-08-04 03:57:31 +00:00
|
|
|
/// that. See also docs about
|
|
|
|
/// [`Event::Start::forward_parent`](crate::event::Event::Start::forward_parent).
|
2019-01-01 08:09:51 +00:00
|
|
|
///
|
|
|
|
/// Given completed events `[START, FINISH]` and its corresponding
|
|
|
|
/// `CompletedMarker(pos: 0, _)`.
|
|
|
|
/// Append a new `START` events as `[START, FINISH, NEWSTART]`,
|
|
|
|
/// then mark `NEWSTART` as `START`'s parent with saving its relative
|
|
|
|
/// distance to `NEWSTART` into forward_parent(=2 in this case);
|
2022-07-20 13:02:08 +00:00
|
|
|
pub(crate) fn precede(self, p: &mut Parser<'_>) -> Marker {
|
2019-02-20 19:44:06 +00:00
|
|
|
let new_pos = p.start();
|
2021-09-25 15:07:51 +00:00
|
|
|
let idx = self.pos as usize;
|
2020-08-12 08:14:08 +00:00
|
|
|
match &mut p.events[idx] {
|
|
|
|
Event::Start { forward_parent, .. } => {
|
2021-09-25 15:07:51 +00:00
|
|
|
*forward_parent = Some(new_pos.pos - self.pos);
|
2019-02-20 19:44:06 +00:00
|
|
|
}
|
|
|
|
_ => unreachable!(),
|
|
|
|
}
|
|
|
|
new_pos
|
2018-02-11 13:53:57 +00:00
|
|
|
}
|
2018-08-07 13:32:09 +00:00
|
|
|
|
2021-09-25 15:07:51 +00:00
|
|
|
/// Extends this completed marker *to the left* up to `m`.
|
2022-07-20 13:02:08 +00:00
|
|
|
pub(crate) fn extend_to(self, p: &mut Parser<'_>, mut m: Marker) -> CompletedMarker {
|
2021-09-25 15:07:51 +00:00
|
|
|
m.bomb.defuse();
|
2021-09-25 16:11:45 +00:00
|
|
|
let idx = m.pos as usize;
|
|
|
|
match &mut p.events[idx] {
|
|
|
|
Event::Start { forward_parent, .. } => {
|
|
|
|
*forward_parent = Some(self.pos - m.pos);
|
|
|
|
}
|
|
|
|
_ => unreachable!(),
|
|
|
|
}
|
2021-09-25 16:51:54 +00:00
|
|
|
self
|
2019-03-19 09:44:23 +00:00
|
|
|
}
|
|
|
|
|
2018-08-07 13:32:09 +00:00
|
|
|
pub(crate) fn kind(&self) -> SyntaxKind {
|
2019-03-19 09:44:23 +00:00
|
|
|
self.kind
|
2018-08-07 13:32:09 +00:00
|
|
|
}
|
2018-02-11 13:53:57 +00:00
|
|
|
}
|