rust-analyzer/crates/ra_parser/src/parser.rs

430 lines
14 KiB
Rust
Raw Normal View History

2019-02-20 19:44:06 +00:00
use std::cell::Cell;
2018-12-06 17:49:36 +00:00
use drop_bomb::DropBomb;
2018-10-15 16:55:32 +00:00
use crate::{
2019-02-20 19:44:06 +00:00
SyntaxKind::{self, ERROR, EOF, TOMBSTONE},
2019-02-21 10:27:45 +00:00
TokenSource, ParseError, TokenSet,
event::Event,
2018-07-31 20:38:19 +00:00
};
2018-02-11 14:58:22 +00:00
/// `Parser` struct provides the low-level API for
/// navigating through the stream of tokens and
/// constructing the parse tree. The actual parsing
/// happens in the `grammar` module.
///
/// However, the result of this `Parser` is not a real
/// tree, but rather a flat stream of events of the form
/// "start expression, consume number literal,
/// finish expression". See `Event` docs for more.
2019-02-20 19:44:06 +00:00
pub(crate) struct Parser<'t> {
token_source: &'t dyn TokenSource,
2019-02-20 19:58:56 +00:00
token_pos: usize,
2019-02-20 19:44:06 +00:00
events: Vec<Event>,
steps: Cell<u32>,
}
impl<'t> Parser<'t> {
2019-02-20 19:44:06 +00:00
pub(super) fn new(token_source: &'t dyn TokenSource) -> Parser<'t> {
2019-02-20 19:58:56 +00:00
Parser { token_source, token_pos: 0, events: Vec::new(), steps: Cell::new(0) }
2019-02-20 19:44:06 +00:00
}
pub(crate) fn finish(self) -> Vec<Event> {
self.events
}
2018-02-11 14:58:22 +00:00
/// Returns the kind of the current token.
/// If parser has already reached the end of input,
/// the special `EOF` kind is returned.
pub(crate) fn current(&self) -> SyntaxKind {
self.nth(0)
}
2019-02-20 12:03:31 +00:00
/// Returns the kinds of the current two tokens, if they are not separated
/// by trivia.
///
/// Useful for parsing things like `>>`.
pub(crate) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> {
let c1 = self.nth(0);
let c2 = self.nth(1);
2019-02-20 19:58:56 +00:00
if self.token_source.is_token_joint_to_next(self.token_pos) {
2019-02-20 19:44:06 +00:00
Some((c1, c2))
} else {
None
}
2019-02-20 12:03:31 +00:00
}
/// Returns the kinds of the current three tokens, if they are not separated
/// by trivia.
///
/// Useful for parsing things like `=>>`.
pub(crate) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> {
let c1 = self.nth(0);
let c2 = self.nth(1);
let c3 = self.nth(2);
2019-02-20 19:58:56 +00:00
if self.token_source.is_token_joint_to_next(self.token_pos)
&& self.token_source.is_token_joint_to_next(self.token_pos + 1)
2019-02-20 19:44:06 +00:00
{
Some((c1, c2, c3))
} else {
None
}
2019-02-20 12:03:31 +00:00
}
2018-02-11 14:58:22 +00:00
/// Lookahead operation: returns the kind of the next nth
/// token.
2019-02-20 19:58:56 +00:00
pub(crate) fn nth(&self, n: usize) -> SyntaxKind {
2019-02-20 19:44:06 +00:00
let steps = self.steps.get();
assert!(steps <= 10_000_000, "the parser seems stuck");
self.steps.set(steps + 1);
// It is beecause the Dollar will appear between nth
// Following code skips through it
let mut non_dollars_count = 0;
let mut i = 0;
loop {
let mut kind = self.token_source.token_kind(self.token_pos + i);
if let Some((composited, step)) = self.is_composite(kind, i) {
kind = composited;
i += step;
} else {
i += 1;
}
match kind {
EOF => return EOF,
SyntaxKind::L_DOLLAR | SyntaxKind::R_DOLLAR => {}
_ if non_dollars_count == n => return kind,
_ => non_dollars_count += 1,
}
}
}
2018-02-11 14:58:22 +00:00
/// Checks if the current token is `kind`.
pub(crate) fn at(&self, kind: SyntaxKind) -> bool {
self.current() == kind
}
/// Checks if the current token is in `kinds`.
2018-09-08 07:38:53 +00:00
pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool {
kinds.contains(self.current())
}
2018-02-11 14:58:22 +00:00
/// Checks if the current token is contextual keyword with text `t`.
2019-02-20 19:44:06 +00:00
pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool {
2019-02-20 19:58:56 +00:00
self.token_source.is_keyword(self.token_pos, kw)
}
2018-02-11 14:58:22 +00:00
/// Starts a new node in the syntax tree. All nodes and tokens
/// consumed between the `start` and the corresponding `Marker::complete`
/// belong to the same node.
pub(crate) fn start(&mut self) -> Marker {
2019-02-20 19:44:06 +00:00
let pos = self.events.len() as u32;
self.push_event(Event::tombstone());
Marker::new(pos)
}
/// Advances the parser by one token unconditionally
/// Mainly use in `token_tree` parsing
pub(crate) fn bump_raw(&mut self) {
2019-05-01 14:39:47 +00:00
let mut kind = self.token_source.token_kind(self.token_pos);
// Skip dollars, do_bump will eat these later
let mut i = 0;
while kind == SyntaxKind::L_DOLLAR || kind == SyntaxKind::R_DOLLAR {
kind = self.token_source.token_kind(self.token_pos + i);
i += 1;
}
if kind == EOF {
return;
}
self.do_bump(kind, 1);
}
/// Advances the parser by one token with composite puncts handled
pub(crate) fn bump(&mut self) {
2019-02-20 19:44:06 +00:00
let kind = self.nth(0);
if kind == EOF {
return;
}
use SyntaxKind::*;
// Handle parser composites
match kind {
DOTDOTDOT | DOTDOTEQ => {
self.bump_compound(kind, 3);
}
DOTDOT | COLONCOLON | EQEQ | FAT_ARROW | NEQ | THIN_ARROW => {
self.bump_compound(kind, 2);
}
_ => {
self.do_bump(kind, 1);
}
}
}
2018-02-11 14:58:22 +00:00
/// Advances the parser by one token, remapping its kind.
/// This is useful to create contextual keywords from
/// identifiers. For example, the lexer creates an `union`
/// *identifier* token, but the parser remaps it to the
/// `union` keyword, and keyword is what ends up in the
/// final tree.
pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) {
2019-02-20 19:44:06 +00:00
if self.nth(0) == EOF {
2019-03-23 07:53:48 +00:00
// FIXME: panic!?
2019-02-20 19:44:06 +00:00
return;
}
self.do_bump(kind, 1);
}
2018-08-05 13:09:25 +00:00
/// Advances the parser by `n` tokens, remapping its kind.
/// This is useful to create compound tokens from parts. For
/// example, an `<<` token is two consecutive remapped `<` tokens
pub(crate) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) {
2019-02-20 19:44:06 +00:00
self.do_bump(kind, n);
2018-08-05 13:09:25 +00:00
}
2018-02-11 14:58:22 +00:00
/// Emit error with the `message`
2019-03-23 07:53:48 +00:00
/// FIXME: this should be much more fancy and support
2018-02-11 14:58:22 +00:00
/// structured errors with spans and notes, like rustc
/// does.
pub(crate) fn error<T: Into<String>>(&mut self, message: T) {
2019-02-20 19:44:06 +00:00
let msg = ParseError(message.into());
self.push_event(Event::Error { msg })
}
2018-12-31 12:53:43 +00:00
/// Consume the next token if `kind` matches.
pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool {
if !self.at(kind) {
return false;
}
self.bump();
true
}
2018-02-11 14:58:22 +00:00
/// Consume the next token if it is `kind` or emit an error
/// otherwise.
pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool {
if self.eat(kind) {
return true;
}
self.error(format!("expected {:?}", kind));
false
}
/// Create an error node and consume the next token.
pub(crate) fn err_and_bump(&mut self, message: &str) {
2019-01-18 08:02:30 +00:00
self.err_recover(message, TokenSet::empty());
2018-08-28 08:12:42 +00:00
}
/// Create an error node and consume the next token.
2018-08-31 10:35:48 +00:00
pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) {
if self.at(SyntaxKind::L_CURLY) || self.at(SyntaxKind::R_CURLY) || self.at_ts(recovery) {
2018-08-28 08:12:42 +00:00
self.error(message);
} else {
let m = self.start();
self.error(message);
2018-08-26 06:12:18 +00:00
self.bump();
2018-08-28 08:12:42 +00:00
m.complete(self, ERROR);
2018-08-28 08:17:08 +00:00
};
}
2019-02-20 19:44:06 +00:00
fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
self.eat_dollars();
2019-02-20 19:58:56 +00:00
self.token_pos += usize::from(n_raw_tokens);
2019-02-20 19:44:06 +00:00
self.push_event(Event::Token { kind, n_raw_tokens });
}
fn push_event(&mut self, event: Event) {
self.events.push(event)
}
/// helper function for check if it is composite.
fn is_composite(&self, kind: SyntaxKind, n: usize) -> Option<(SyntaxKind, usize)> {
// We assume the dollars will not occuried between
// mult-byte tokens
let jn1 = self.token_source.is_token_joint_to_next(self.token_pos + n);
let la2 = self.token_source.token_kind(self.token_pos + n + 1);
let jn2 = self.token_source.is_token_joint_to_next(self.token_pos + n + 1);
let la3 = self.token_source.token_kind(self.token_pos + n + 2);
use SyntaxKind::*;
match kind {
DOT if jn1 && la2 == DOT && jn2 && la3 == DOT => Some((DOTDOTDOT, 3)),
DOT if jn1 && la2 == DOT && la3 == EQ => Some((DOTDOTEQ, 3)),
DOT if jn1 && la2 == DOT => Some((DOTDOT, 2)),
COLON if jn1 && la2 == COLON => Some((COLONCOLON, 2)),
EQ if jn1 && la2 == EQ => Some((EQEQ, 2)),
EQ if jn1 && la2 == R_ANGLE => Some((FAT_ARROW, 2)),
EXCL if la2 == EQ => Some((NEQ, 2)),
MINUS if la2 == R_ANGLE => Some((THIN_ARROW, 2)),
_ => None,
}
}
fn eat_dollars(&mut self) {
loop {
match self.token_source.token_kind(self.token_pos) {
k @ SyntaxKind::L_DOLLAR | k @ SyntaxKind::R_DOLLAR => {
self.token_pos += 1;
self.push_event(Event::Token { kind: k, n_raw_tokens: 1 });
}
_ => {
return;
}
}
}
}
pub(crate) fn eat_l_dollars(&mut self) -> usize {
let mut ate_count = 0;
loop {
match self.token_source.token_kind(self.token_pos) {
k @ SyntaxKind::L_DOLLAR => {
self.token_pos += 1;
self.push_event(Event::Token { kind: k, n_raw_tokens: 1 });
ate_count += 1;
}
_ => {
return ate_count;
}
}
}
}
pub(crate) fn eat_r_dollars(&mut self, max_count: usize) -> usize {
let mut ate_count = 0;
loop {
match self.token_source.token_kind(self.token_pos) {
k @ SyntaxKind::R_DOLLAR => {
self.token_pos += 1;
self.push_event(Event::Token { kind: k, n_raw_tokens: 1 });
ate_count += 1;
if max_count >= ate_count {
return ate_count;
}
}
_ => {
return ate_count;
}
}
}
}
pub(crate) fn at_l_dollar(&self) -> bool {
let kind = self.token_source.token_kind(self.token_pos);
(kind == SyntaxKind::L_DOLLAR)
}
pub(crate) fn at_r_dollar(&self) -> bool {
let kind = self.token_source.token_kind(self.token_pos);
(kind == SyntaxKind::R_DOLLAR)
}
}
2018-02-11 14:58:22 +00:00
/// See `Parser::start`.
2018-08-01 08:58:19 +00:00
pub(crate) struct Marker {
pos: u32,
bomb: DropBomb,
}
impl Marker {
2018-08-01 08:58:19 +00:00
fn new(pos: u32) -> Marker {
2019-02-08 11:49:43 +00:00
Marker { pos, bomb: DropBomb::new("Marker must be either completed or abandoned") }
2018-08-01 08:58:19 +00:00
}
2019-01-01 08:09:51 +00:00
/// Finishes the syntax tree node and assigns `kind` to it,
/// and mark the create a `CompletedMarker` for possible future
/// operation like `.precede()` to deal with forward_parent.
2018-08-01 08:58:19 +00:00
pub(crate) fn complete(mut self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker {
self.bomb.defuse();
2019-02-20 19:44:06 +00:00
let idx = self.pos as usize;
match p.events[idx] {
Event::Start { kind: ref mut slot, .. } => {
*slot = kind;
}
_ => unreachable!(),
}
let finish_pos = p.events.len() as u32;
2019-02-20 19:44:06 +00:00
p.push_event(Event::Finish);
CompletedMarker::new(self.pos, finish_pos, kind)
}
2018-02-11 14:58:22 +00:00
/// Abandons the syntax tree node. All its children
/// are attached to its parent instead.
2018-08-01 08:58:19 +00:00
pub(crate) fn abandon(mut self, p: &mut Parser) {
self.bomb.defuse();
2019-02-20 19:44:06 +00:00
let idx = self.pos as usize;
if idx == p.events.len() - 1 {
match p.events.pop() {
Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (),
_ => unreachable!(),
}
}
}
}
pub(crate) struct CompletedMarker {
start_pos: u32,
finish_pos: u32,
kind: SyntaxKind,
}
impl CompletedMarker {
fn new(start_pos: u32, finish_pos: u32, kind: SyntaxKind) -> Self {
CompletedMarker { start_pos, finish_pos, kind }
2019-01-01 08:09:51 +00:00
}
2018-02-11 14:58:22 +00:00
/// This method allows to create a new node which starts
/// *before* the current one. That is, parser could start
/// node `A`, then complete it, and then after parsing the
/// whole `A`, decide that it should have started some node
/// `B` before starting `A`. `precede` allows to do exactly
/// that. See also docs about `forward_parent` in `Event::Start`.
2019-01-01 08:09:51 +00:00
///
/// Given completed events `[START, FINISH]` and its corresponding
/// `CompletedMarker(pos: 0, _)`.
/// Append a new `START` events as `[START, FINISH, NEWSTART]`,
/// then mark `NEWSTART` as `START`'s parent with saving its relative
/// distance to `NEWSTART` into forward_parent(=2 in this case);
pub(crate) fn precede(self, p: &mut Parser) -> Marker {
2019-02-20 19:44:06 +00:00
let new_pos = p.start();
let idx = self.start_pos as usize;
2019-02-20 19:44:06 +00:00
match p.events[idx] {
Event::Start { ref mut forward_parent, .. } => {
*forward_parent = Some(new_pos.pos - self.start_pos);
2019-02-20 19:44:06 +00:00
}
_ => unreachable!(),
}
new_pos
}
2018-08-07 13:32:09 +00:00
/// Undo this completion and turns into a `Marker`
pub(crate) fn undo_completion(self, p: &mut Parser) -> Marker {
let start_idx = self.start_pos as usize;
let finish_idx = self.finish_pos as usize;
match p.events[start_idx] {
Event::Start { ref mut kind, forward_parent: None } => *kind = TOMBSTONE,
_ => unreachable!(),
}
match p.events[finish_idx] {
ref mut slot @ Event::Finish => *slot = Event::tombstone(),
_ => unreachable!(),
}
Marker::new(self.start_pos)
}
2018-08-07 13:32:09 +00:00
pub(crate) fn kind(&self) -> SyntaxKind {
self.kind
2018-08-07 13:32:09 +00:00
}
}