rust-analyzer/crates/ra_syntax/src/parsing/event.rs

248 lines
7.9 KiB
Rust
Raw Normal View History

2018-07-29 12:16:07 +00:00
//! This module provides a way to construct a `File`.
//! It is intended to be completely decoupled from the
//! parser, so as to allow to evolve the tree representation
//! and the parser algorithm independently.
//!
2019-02-20 18:08:59 +00:00
//! The `TreeSink` trait is the bridge between the parser and the
2018-07-29 12:16:07 +00:00
//! tree builder: the parser produces a stream of events like
//! `start node`, `finish node`, and `FileBuilder` converts
//! this stream to a real tree.
use std::mem;
2018-10-15 16:55:32 +00:00
use crate::{
SmolStr,
2018-10-08 14:33:13 +00:00
SyntaxKind::{self, *},
TextRange, TextUnit,
parsing::{
2019-02-20 20:17:07 +00:00
ParseError, TreeSink,
lexer::Token,
},
2018-07-28 10:07:10 +00:00
};
2018-08-07 15:28:30 +00:00
2018-02-04 10:53:47 +00:00
/// `Parser` produces a flat list of `Event`s.
/// They are converted to a tree-structure in
/// a separate pass, via `TreeBuilder`.
#[derive(Debug)]
pub(crate) enum Event {
/// This event signifies the start of the node.
/// It should be either abandoned (in which case the
/// `kind` is `TOMBSTONE`, and the event is ignored),
/// or completed via a `Finish` event.
///
/// All tokens between a `Start` and a `Finish` would
/// become the children of the respective node.
///
/// For left-recursive syntactic constructs, the parser produces
/// a child node before it sees a parent. `forward_parent`
2019-01-01 08:09:51 +00:00
/// saves the position of current event's parent.
2018-02-04 10:53:47 +00:00
///
/// Consider this path
///
/// foo::bar
///
/// The events for it would look like this:
///
///
/// START(PATH) IDENT('foo') FINISH START(PATH) COLONCOLON IDENT('bar') FINISH
/// | /\
/// | |
/// +------forward-parent------+
///
/// And the tree would look like this
///
/// +--PATH---------+
/// | | |
/// | | |
/// | '::' 'bar'
/// |
/// PATH
/// |
/// 'foo'
///
2018-02-11 14:58:22 +00:00
/// See also `CompletedMarker::precede`.
2018-02-04 10:53:47 +00:00
Start {
kind: SyntaxKind,
forward_parent: Option<u32>,
},
/// Complete the previous `Start` event
Finish,
/// Produce a single leaf-element.
/// `n_raw_tokens` is used to glue complex contextual tokens.
/// For example, lexer tokenizes `>>` as `>`, `>`, and
/// `n_raw_tokens = 2` is used to produced a single `>>`.
Token {
kind: SyntaxKind,
n_raw_tokens: u8,
},
Error {
msg: ParseError,
2018-02-04 10:53:47 +00:00
},
}
2019-01-01 08:09:51 +00:00
impl Event {
pub(crate) fn tombstone() -> Self {
2019-02-08 11:49:43 +00:00
Event::Start { kind: TOMBSTONE, forward_parent: None }
2019-01-01 08:09:51 +00:00
}
}
2019-02-20 18:08:59 +00:00
pub(super) struct EventProcessor<'a, S: TreeSink> {
2018-10-08 12:44:00 +00:00
sink: S,
text_pos: TextUnit,
text: &'a str,
token_pos: usize,
tokens: &'a [Token],
events: &'a mut [Event],
}
2018-08-08 22:57:51 +00:00
2019-02-20 18:08:59 +00:00
impl<'a, S: TreeSink> EventProcessor<'a, S> {
pub(super) fn new(
sink: S,
text: &'a str,
tokens: &'a [Token],
events: &'a mut [Event],
) -> EventProcessor<'a, S> {
2019-02-08 11:49:43 +00:00
EventProcessor { sink, text_pos: 0.into(), text, token_pos: 0, tokens, events }
2018-08-08 22:57:51 +00:00
}
2018-10-08 12:44:00 +00:00
2019-01-01 08:09:51 +00:00
/// Generate the syntax tree with the control of events.
2019-02-20 19:52:32 +00:00
pub(crate) fn process(mut self) -> S {
2018-10-08 12:44:00 +00:00
let mut forward_parents = Vec::new();
2018-02-04 10:53:47 +00:00
2018-10-08 12:44:00 +00:00
for i in 0..self.events.len() {
2019-01-01 08:09:51 +00:00
match mem::replace(&mut self.events[i], Event::tombstone()) {
2019-02-08 11:49:43 +00:00
Event::Start { kind: TOMBSTONE, .. } => (),
2018-02-04 10:53:47 +00:00
2019-02-08 11:49:43 +00:00
Event::Start { kind, forward_parent } => {
2019-01-01 08:09:51 +00:00
// For events[A, B, C], B is A's forward_parent, C is B's forward_parent,
// in the normal control flow, the parent-child relation: `A -> B -> C`,
// while with the magic forward_parent, it writes: `C <- B <- A`.
// append `A` into parents.
2018-10-08 12:44:00 +00:00
forward_parents.push(kind);
let mut idx = i;
let mut fp = forward_parent;
while let Some(fwd) = fp {
idx += fwd as usize;
2019-01-01 08:09:51 +00:00
// append `A`'s forward_parent `B`
fp = match mem::replace(&mut self.events[idx], Event::tombstone()) {
2019-02-08 11:49:43 +00:00
Event::Start { kind, forward_parent } => {
2018-10-08 12:44:00 +00:00
forward_parents.push(kind);
forward_parent
}
2018-10-08 12:44:00 +00:00
_ => unreachable!(),
};
2019-01-01 08:09:51 +00:00
// append `B`'s forward_parent `C` in the next stage.
2018-10-08 12:44:00 +00:00
}
2019-01-01 08:09:51 +00:00
2018-10-08 12:44:00 +00:00
for kind in forward_parents.drain(..).rev() {
2018-10-08 14:33:13 +00:00
self.start(kind);
2018-10-08 12:44:00 +00:00
}
2018-02-04 10:53:47 +00:00
}
2018-10-08 12:44:00 +00:00
Event::Finish => {
2019-01-01 08:09:51 +00:00
let is_last = i == self.events.len() - 1;
self.finish(is_last);
}
2018-10-08 14:33:13 +00:00
Event::Token { kind, n_raw_tokens } => {
2018-12-31 12:53:43 +00:00
self.eat_trivias();
2018-10-08 12:46:14 +00:00
let n_raw_tokens = n_raw_tokens as usize;
let len = self.tokens[self.token_pos..self.token_pos + n_raw_tokens]
.iter()
.map(|it| it.len)
.sum::<TextUnit>();
self.leaf(kind, len, n_raw_tokens);
2018-02-04 10:53:47 +00:00
}
2019-02-20 20:17:07 +00:00
Event::Error { msg } => self.sink.error(msg),
2018-02-04 10:53:47 +00:00
}
2018-10-08 12:44:00 +00:00
}
self.sink
}
2019-01-01 08:09:51 +00:00
/// Add the node into syntax tree but discard the comments/whitespaces.
2018-10-08 14:33:13 +00:00
fn start(&mut self, kind: SyntaxKind) {
2018-11-07 15:38:43 +00:00
if kind == SOURCE_FILE {
2018-12-31 12:53:43 +00:00
self.sink.start_branch(kind);
2018-10-08 14:33:13 +00:00
return;
}
2019-02-08 11:49:43 +00:00
let n_trivias =
self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count();
2018-10-08 14:33:13 +00:00
let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias];
let mut trivia_end =
self.text_pos + leading_trivias.iter().map(|it| it.len).sum::<TextUnit>();
2018-10-08 14:33:13 +00:00
let n_attached_trivias = {
let leading_trivias = leading_trivias.iter().rev().map(|it| {
let next_end = trivia_end - it.len;
let range = TextRange::from_to(next_end, trivia_end);
trivia_end = next_end;
(it.kind, &self.text[range])
});
2018-10-08 14:33:13 +00:00
n_attached_trivias(kind, leading_trivias)
};
self.eat_n_trivias(n_trivias - n_attached_trivias);
2018-12-31 12:53:43 +00:00
self.sink.start_branch(kind);
2018-10-08 14:33:13 +00:00
self.eat_n_trivias(n_attached_trivias);
}
2019-01-01 08:09:51 +00:00
fn finish(&mut self, is_last: bool) {
if is_last {
2018-12-31 12:53:43 +00:00
self.eat_trivias()
2018-10-08 14:33:13 +00:00
}
2018-12-31 12:53:43 +00:00
self.sink.finish_branch();
2018-10-08 14:33:13 +00:00
}
2018-12-31 12:53:43 +00:00
fn eat_trivias(&mut self) {
2018-10-08 12:44:00 +00:00
while let Some(&token) = self.tokens.get(self.token_pos) {
if !token.kind.is_trivia() {
break;
2018-02-04 14:06:43 +00:00
}
2018-10-08 12:44:00 +00:00
self.leaf(token.kind, token.len, 1);
2018-02-04 10:53:47 +00:00
}
}
2018-10-08 12:44:00 +00:00
2018-10-08 14:33:13 +00:00
fn eat_n_trivias(&mut self, n: usize) {
for _ in 0..n {
let token = self.tokens[self.token_pos];
assert!(token.kind.is_trivia());
self.leaf(token.kind, token.len, 1);
}
}
2018-10-08 12:44:00 +00:00
fn leaf(&mut self, kind: SyntaxKind, len: TextUnit, n_tokens: usize) {
let range = TextRange::offset_len(self.text_pos, len);
let text: SmolStr = self.text[range].into();
self.text_pos += len;
self.token_pos += n_tokens;
self.sink.leaf(kind, text);
}
2018-02-04 10:53:47 +00:00
}
2018-10-08 14:33:13 +00:00
fn n_attached_trivias<'a>(
kind: SyntaxKind,
trivias: impl Iterator<Item = (SyntaxKind, &'a str)>,
) -> usize {
2018-10-08 14:33:13 +00:00
match kind {
2019-01-23 20:59:19 +00:00
CONST_DEF | TYPE_DEF | STRUCT_DEF | ENUM_DEF | ENUM_VARIANT | FN_DEF | TRAIT_DEF
2019-01-25 19:29:56 +00:00
| MODULE | NAMED_FIELD_DEF => {
2018-10-08 14:33:13 +00:00
let mut res = 0;
for (i, (kind, text)) in trivias.enumerate() {
match kind {
WHITESPACE => {
if text.contains("\n\n") {
break;
}
}
COMMENT => {
res = i + 1;
}
_ => (),
}
}
res
}
_ => 0,
}
}