2018-07-29 12:16:07 +00:00
|
|
|
//! This module provides a way to construct a `File`.
|
|
|
|
//! It is intended to be completely decoupled from the
|
|
|
|
//! parser, so as to allow to evolve the tree representation
|
|
|
|
//! and the parser algorithm independently.
|
|
|
|
//!
|
2019-02-20 18:08:59 +00:00
|
|
|
//! The `TreeSink` trait is the bridge between the parser and the
|
2018-07-29 12:16:07 +00:00
|
|
|
//! tree builder: the parser produces a stream of events like
|
|
|
|
//! `start node`, `finish node`, and `FileBuilder` converts
|
|
|
|
//! this stream to a real tree.
|
2019-02-20 12:47:32 +00:00
|
|
|
use std::mem;
|
|
|
|
|
2018-10-15 16:55:32 +00:00
|
|
|
use crate::{
|
2021-12-25 18:59:02 +00:00
|
|
|
output::Output,
|
2018-10-08 14:33:13 +00:00
|
|
|
SyntaxKind::{self, *},
|
2018-07-28 10:07:10 +00:00
|
|
|
};
|
2018-08-07 15:28:30 +00:00
|
|
|
|
2018-02-04 10:53:47 +00:00
|
|
|
/// `Parser` produces a flat list of `Event`s.
|
|
|
|
/// They are converted to a tree-structure in
|
|
|
|
/// a separate pass, via `TreeBuilder`.
|
|
|
|
#[derive(Debug)]
|
|
|
|
pub(crate) enum Event {
|
|
|
|
/// This event signifies the start of the node.
|
|
|
|
/// It should be either abandoned (in which case the
|
|
|
|
/// `kind` is `TOMBSTONE`, and the event is ignored),
|
|
|
|
/// or completed via a `Finish` event.
|
|
|
|
///
|
|
|
|
/// All tokens between a `Start` and a `Finish` would
|
|
|
|
/// become the children of the respective node.
|
|
|
|
///
|
|
|
|
/// For left-recursive syntactic constructs, the parser produces
|
|
|
|
/// a child node before it sees a parent. `forward_parent`
|
2019-01-01 08:09:51 +00:00
|
|
|
/// saves the position of current event's parent.
|
2018-02-04 10:53:47 +00:00
|
|
|
///
|
|
|
|
/// Consider this path
|
|
|
|
///
|
|
|
|
/// foo::bar
|
|
|
|
///
|
|
|
|
/// The events for it would look like this:
|
|
|
|
///
|
2021-01-18 21:44:40 +00:00
|
|
|
/// ```text
|
2019-05-15 12:35:47 +00:00
|
|
|
/// START(PATH) IDENT('foo') FINISH START(PATH) T![::] IDENT('bar') FINISH
|
2018-02-04 10:53:47 +00:00
|
|
|
/// | /\
|
|
|
|
/// | |
|
|
|
|
/// +------forward-parent------+
|
2021-01-18 21:44:40 +00:00
|
|
|
/// ```
|
2018-02-04 10:53:47 +00:00
|
|
|
///
|
|
|
|
/// And the tree would look like this
|
|
|
|
///
|
2021-01-18 21:44:40 +00:00
|
|
|
/// ```text
|
2018-02-04 10:53:47 +00:00
|
|
|
/// +--PATH---------+
|
|
|
|
/// | | |
|
|
|
|
/// | | |
|
|
|
|
/// | '::' 'bar'
|
|
|
|
/// |
|
|
|
|
/// PATH
|
|
|
|
/// |
|
|
|
|
/// 'foo'
|
2021-01-18 21:44:40 +00:00
|
|
|
/// ```
|
2018-02-04 10:53:47 +00:00
|
|
|
///
|
2018-02-11 14:58:22 +00:00
|
|
|
/// See also `CompletedMarker::precede`.
|
2018-02-04 10:53:47 +00:00
|
|
|
Start {
|
|
|
|
kind: SyntaxKind,
|
|
|
|
forward_parent: Option<u32>,
|
|
|
|
},
|
|
|
|
|
|
|
|
/// Complete the previous `Start` event
|
|
|
|
Finish,
|
|
|
|
|
|
|
|
/// Produce a single leaf-element.
|
|
|
|
/// `n_raw_tokens` is used to glue complex contextual tokens.
|
|
|
|
/// For example, lexer tokenizes `>>` as `>`, `>`, and
|
|
|
|
/// `n_raw_tokens = 2` is used to produced a single `>>`.
|
|
|
|
Token {
|
|
|
|
kind: SyntaxKind,
|
|
|
|
n_raw_tokens: u8,
|
|
|
|
},
|
2023-02-13 11:55:14 +00:00
|
|
|
/// When we parse `foo.0.0` or `foo. 0. 0` the lexer will hand us a float literal
|
|
|
|
/// instead of an integer literal followed by a dot as the lexer has no contextual knowledge.
|
|
|
|
/// This event instructs whatever consumes the events to split the float literal into
|
|
|
|
/// the corresponding parts.
|
|
|
|
FloatSplitHack {
|
|
|
|
ends_in_dot: bool,
|
|
|
|
},
|
2018-02-04 10:53:47 +00:00
|
|
|
Error {
|
internal: replace TreeSink with a data structure
The general theme of this is to make parser a better independent
library.
The specific thing we do here is replacing callback based TreeSink with
a data structure. That is, rather than calling user-provided tree
construction methods, the parser now spits out a very bare-bones tree,
effectively a log of a DFS traversal.
This makes the parser usable without any *specifc* tree sink, and allows
us to, eg, move tests into this crate.
Now, it's also true that this is a distinction without a difference, as
the old and the new interface are equivalent in expressiveness. Still,
this new thing seems somewhat simpler. But yeah, I admit I don't have a
suuper strong motivation here, just a hunch that this is better.
2021-12-19 14:36:23 +00:00
|
|
|
msg: String,
|
2018-02-04 10:53:47 +00:00
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2019-01-01 08:09:51 +00:00
|
|
|
impl Event {
|
|
|
|
pub(crate) fn tombstone() -> Self {
|
2019-02-08 11:49:43 +00:00
|
|
|
Event::Start { kind: TOMBSTONE, forward_parent: None }
|
2019-01-01 08:09:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-21 09:03:42 +00:00
|
|
|
/// Generate the syntax tree with the control of events.
|
2021-12-25 18:59:02 +00:00
|
|
|
pub(super) fn process(mut events: Vec<Event>) -> Output {
|
|
|
|
let mut res = Output::default();
|
2019-02-21 09:03:42 +00:00
|
|
|
let mut forward_parents = Vec::new();
|
|
|
|
|
|
|
|
for i in 0..events.len() {
|
|
|
|
match mem::replace(&mut events[i], Event::tombstone()) {
|
|
|
|
Event::Start { kind, forward_parent } => {
|
|
|
|
// For events[A, B, C], B is A's forward_parent, C is B's forward_parent,
|
|
|
|
// in the normal control flow, the parent-child relation: `A -> B -> C`,
|
|
|
|
// while with the magic forward_parent, it writes: `C <- B <- A`.
|
|
|
|
|
|
|
|
// append `A` into parents.
|
|
|
|
forward_parents.push(kind);
|
|
|
|
let mut idx = i;
|
|
|
|
let mut fp = forward_parent;
|
|
|
|
while let Some(fwd) = fp {
|
|
|
|
idx += fwd as usize;
|
|
|
|
// append `A`'s forward_parent `B`
|
|
|
|
fp = match mem::replace(&mut events[idx], Event::tombstone()) {
|
|
|
|
Event::Start { kind, forward_parent } => {
|
2021-09-25 16:11:45 +00:00
|
|
|
forward_parents.push(kind);
|
2019-02-21 09:03:42 +00:00
|
|
|
forward_parent
|
|
|
|
}
|
|
|
|
_ => unreachable!(),
|
|
|
|
};
|
|
|
|
// append `B`'s forward_parent `C` in the next stage.
|
2018-02-04 10:53:47 +00:00
|
|
|
}
|
2018-10-08 14:33:13 +00:00
|
|
|
|
2019-02-21 12:24:42 +00:00
|
|
|
for kind in forward_parents.drain(..).rev() {
|
2021-09-25 16:11:45 +00:00
|
|
|
if kind != TOMBSTONE {
|
internal: replace TreeSink with a data structure
The general theme of this is to make parser a better independent
library.
The specific thing we do here is replacing callback based TreeSink with
a data structure. That is, rather than calling user-provided tree
construction methods, the parser now spits out a very bare-bones tree,
effectively a log of a DFS traversal.
This makes the parser usable without any *specifc* tree sink, and allows
us to, eg, move tests into this crate.
Now, it's also true that this is a distinction without a difference, as
the old and the new interface are equivalent in expressiveness. Still,
this new thing seems somewhat simpler. But yeah, I admit I don't have a
suuper strong motivation here, just a hunch that this is better.
2021-12-19 14:36:23 +00:00
|
|
|
res.enter_node(kind);
|
2021-09-25 16:11:45 +00:00
|
|
|
}
|
2018-10-08 14:33:13 +00:00
|
|
|
}
|
|
|
|
}
|
internal: replace TreeSink with a data structure
The general theme of this is to make parser a better independent
library.
The specific thing we do here is replacing callback based TreeSink with
a data structure. That is, rather than calling user-provided tree
construction methods, the parser now spits out a very bare-bones tree,
effectively a log of a DFS traversal.
This makes the parser usable without any *specifc* tree sink, and allows
us to, eg, move tests into this crate.
Now, it's also true that this is a distinction without a difference, as
the old and the new interface are equivalent in expressiveness. Still,
this new thing seems somewhat simpler. But yeah, I admit I don't have a
suuper strong motivation here, just a hunch that this is better.
2021-12-19 14:36:23 +00:00
|
|
|
Event::Finish => res.leave_node(),
|
2019-02-21 09:03:42 +00:00
|
|
|
Event::Token { kind, n_raw_tokens } => {
|
internal: replace TreeSink with a data structure
The general theme of this is to make parser a better independent
library.
The specific thing we do here is replacing callback based TreeSink with
a data structure. That is, rather than calling user-provided tree
construction methods, the parser now spits out a very bare-bones tree,
effectively a log of a DFS traversal.
This makes the parser usable without any *specifc* tree sink, and allows
us to, eg, move tests into this crate.
Now, it's also true that this is a distinction without a difference, as
the old and the new interface are equivalent in expressiveness. Still,
this new thing seems somewhat simpler. But yeah, I admit I don't have a
suuper strong motivation here, just a hunch that this is better.
2021-12-19 14:36:23 +00:00
|
|
|
res.token(kind, n_raw_tokens);
|
2019-02-21 09:03:42 +00:00
|
|
|
}
|
2023-02-13 11:55:14 +00:00
|
|
|
Event::FloatSplitHack { ends_in_dot } => {
|
|
|
|
res.float_split_hack(ends_in_dot);
|
|
|
|
let ev = mem::replace(&mut events[i + 1], Event::tombstone());
|
|
|
|
assert!(matches!(ev, Event::Finish), "{ev:?}");
|
|
|
|
}
|
internal: replace TreeSink with a data structure
The general theme of this is to make parser a better independent
library.
The specific thing we do here is replacing callback based TreeSink with
a data structure. That is, rather than calling user-provided tree
construction methods, the parser now spits out a very bare-bones tree,
effectively a log of a DFS traversal.
This makes the parser usable without any *specifc* tree sink, and allows
us to, eg, move tests into this crate.
Now, it's also true that this is a distinction without a difference, as
the old and the new interface are equivalent in expressiveness. Still,
this new thing seems somewhat simpler. But yeah, I admit I don't have a
suuper strong motivation here, just a hunch that this is better.
2021-12-19 14:36:23 +00:00
|
|
|
Event::Error { msg } => res.error(msg),
|
2018-10-08 14:33:13 +00:00
|
|
|
}
|
|
|
|
}
|
internal: replace TreeSink with a data structure
The general theme of this is to make parser a better independent
library.
The specific thing we do here is replacing callback based TreeSink with
a data structure. That is, rather than calling user-provided tree
construction methods, the parser now spits out a very bare-bones tree,
effectively a log of a DFS traversal.
This makes the parser usable without any *specifc* tree sink, and allows
us to, eg, move tests into this crate.
Now, it's also true that this is a distinction without a difference, as
the old and the new interface are equivalent in expressiveness. Still,
this new thing seems somewhat simpler. But yeah, I admit I don't have a
suuper strong motivation here, just a hunch that this is better.
2021-12-19 14:36:23 +00:00
|
|
|
|
|
|
|
res
|
2018-10-08 14:33:13 +00:00
|
|
|
}
|