2019-02-21 12:24:42 +00:00
|
|
|
//! The Rust parser.
|
|
|
|
//!
|
2021-12-12 16:06:40 +00:00
|
|
|
//! NOTE: The crate is undergoing refactors, don't believe everything the docs
|
|
|
|
//! say :-)
|
|
|
|
//!
|
2019-02-21 12:24:42 +00:00
|
|
|
//! The parser doesn't know about concrete representation of tokens and syntax
|
2021-12-12 16:06:40 +00:00
|
|
|
//! trees. Abstract [`TokenSource`] and [`TreeSink`] traits are used instead. As
|
|
|
|
//! a consequence, this crate does not contain a lexer.
|
2019-02-21 12:24:42 +00:00
|
|
|
//!
|
2021-08-04 03:57:31 +00:00
|
|
|
//! The [`Parser`] struct from the [`parser`] module is a cursor into the
|
|
|
|
//! sequence of tokens. Parsing routines use [`Parser`] to inspect current
|
|
|
|
//! state and advance the parsing.
|
2019-02-21 12:24:42 +00:00
|
|
|
//!
|
2021-08-04 03:57:31 +00:00
|
|
|
//! The actual parsing happens in the [`grammar`] module.
|
2019-02-21 12:24:42 +00:00
|
|
|
//!
|
2021-08-04 03:57:31 +00:00
|
|
|
//! Tests for this crate live in the `syntax` crate.
|
|
|
|
//!
|
|
|
|
//! [`Parser`]: crate::parser::Parser
|
|
|
|
#![allow(rustdoc::private_intra_doc_links)]
|
2021-09-06 15:42:07 +00:00
|
|
|
|
2021-12-18 12:31:50 +00:00
|
|
|
mod lexed_str;
|
2019-02-21 10:27:45 +00:00
|
|
|
mod token_set;
|
|
|
|
mod syntax_kind;
|
|
|
|
mod event;
|
|
|
|
mod parser;
|
|
|
|
mod grammar;
|
2021-12-25 18:59:02 +00:00
|
|
|
mod input;
|
|
|
|
mod output;
|
2021-12-26 13:47:10 +00:00
|
|
|
mod shortcuts;
|
2019-02-21 10:27:45 +00:00
|
|
|
|
2021-12-12 18:32:58 +00:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests;
|
|
|
|
|
2019-02-21 10:27:45 +00:00
|
|
|
pub(crate) use token_set::TokenSet;
|
|
|
|
|
internal: replace TreeSink with a data structure
The general theme of this is to make parser a better independent
library.
The specific thing we do here is replacing callback based TreeSink with
a data structure. That is, rather than calling user-provided tree
construction methods, the parser now spits out a very bare-bones tree,
effectively a log of a DFS traversal.
This makes the parser usable without any *specifc* tree sink, and allows
us to, eg, move tests into this crate.
Now, it's also true that this is a distinction without a difference, as
the old and the new interface are equivalent in expressiveness. Still,
this new thing seems somewhat simpler. But yeah, I admit I don't have a
suuper strong motivation here, just a hunch that this is better.
2021-12-19 14:36:23 +00:00
|
|
|
pub use crate::{
|
2021-12-25 18:59:02 +00:00
|
|
|
input::Input,
|
internal: replace TreeSink with a data structure
The general theme of this is to make parser a better independent
library.
The specific thing we do here is replacing callback based TreeSink with
a data structure. That is, rather than calling user-provided tree
construction methods, the parser now spits out a very bare-bones tree,
effectively a log of a DFS traversal.
This makes the parser usable without any *specifc* tree sink, and allows
us to, eg, move tests into this crate.
Now, it's also true that this is a distinction without a difference, as
the old and the new interface are equivalent in expressiveness. Still,
this new thing seems somewhat simpler. But yeah, I admit I don't have a
suuper strong motivation here, just a hunch that this is better.
2021-12-19 14:36:23 +00:00
|
|
|
lexed_str::LexedStr,
|
2021-12-25 18:59:02 +00:00
|
|
|
output::{Output, Step},
|
2021-12-26 13:47:10 +00:00
|
|
|
shortcuts::StrStep,
|
internal: replace TreeSink with a data structure
The general theme of this is to make parser a better independent
library.
The specific thing we do here is replacing callback based TreeSink with
a data structure. That is, rather than calling user-provided tree
construction methods, the parser now spits out a very bare-bones tree,
effectively a log of a DFS traversal.
This makes the parser usable without any *specifc* tree sink, and allows
us to, eg, move tests into this crate.
Now, it's also true that this is a distinction without a difference, as
the old and the new interface are equivalent in expressiveness. Still,
this new thing seems somewhat simpler. But yeah, I admit I don't have a
suuper strong motivation here, just a hunch that this is better.
2021-12-19 14:36:23 +00:00
|
|
|
syntax_kind::SyntaxKind,
|
|
|
|
};
|
2019-02-21 10:27:45 +00:00
|
|
|
|
2021-12-27 12:17:48 +00:00
|
|
|
/// Parse a syntactic construct at the *start* of the input.
|
|
|
|
///
|
|
|
|
/// This is used by macro-by-example parser to implement things like `$i:item`.
|
|
|
|
///
|
|
|
|
/// Note that this is generally non-optional -- the result is intentionally not
|
|
|
|
/// `Option<Output>`. The way MBE work, by the time we *try* to parse `$e:expr`
|
|
|
|
/// we already commit to expression. In other words, this API by design can't be
|
|
|
|
/// used to implement "rollback and try another alternative" logic.
|
2021-12-27 12:22:44 +00:00
|
|
|
#[derive(Debug)]
|
2021-12-27 12:17:48 +00:00
|
|
|
pub enum PrefixEntryPoint {
|
|
|
|
Vis,
|
2021-12-27 12:39:17 +00:00
|
|
|
Block,
|
2021-12-27 12:17:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl PrefixEntryPoint {
|
2021-12-27 12:22:44 +00:00
|
|
|
pub fn parse(&self, input: &Input) -> Output {
|
2021-12-27 12:17:48 +00:00
|
|
|
let entry_point: fn(&'_ mut parser::Parser) = match self {
|
|
|
|
PrefixEntryPoint::Vis => grammar::entry::prefix::vis,
|
2021-12-27 12:39:17 +00:00
|
|
|
PrefixEntryPoint::Block => grammar::entry::prefix::block,
|
2021-12-27 12:17:48 +00:00
|
|
|
};
|
|
|
|
let mut p = parser::Parser::new(input);
|
|
|
|
entry_point(&mut p);
|
|
|
|
let events = p.finish();
|
|
|
|
event::process(events)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-06 15:34:03 +00:00
|
|
|
/// rust-analyzer parser allows you to choose one of the possible entry points.
|
|
|
|
///
|
|
|
|
/// The primary consumer of this API are declarative macros, `$x:expr` matchers
|
|
|
|
/// are implemented by calling into the parser with non-standard entry point.
|
2020-03-02 06:05:15 +00:00
|
|
|
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
|
2021-09-06 15:34:03 +00:00
|
|
|
pub enum ParserEntryPoint {
|
|
|
|
SourceFile,
|
2019-09-02 15:51:03 +00:00
|
|
|
Path,
|
|
|
|
Expr,
|
|
|
|
Statement,
|
2020-11-17 19:02:46 +00:00
|
|
|
StatementOptionalSemi,
|
2019-09-02 15:51:03 +00:00
|
|
|
Type,
|
|
|
|
Pattern,
|
|
|
|
Item,
|
|
|
|
Block,
|
2021-12-27 12:22:44 +00:00
|
|
|
// Visibility,
|
2019-09-02 15:51:03 +00:00
|
|
|
MetaItem,
|
|
|
|
Items,
|
|
|
|
Statements,
|
2020-12-18 17:58:42 +00:00
|
|
|
Attr,
|
2019-09-02 15:51:03 +00:00
|
|
|
}
|
|
|
|
|
2021-09-06 15:34:03 +00:00
|
|
|
/// Parse given tokens into the given sink as a rust file.
|
2021-12-25 18:59:02 +00:00
|
|
|
pub fn parse_source_file(inp: &Input) -> Output {
|
|
|
|
parse(inp, ParserEntryPoint::SourceFile)
|
2021-09-06 15:34:03 +00:00
|
|
|
}
|
|
|
|
|
2021-12-25 18:59:02 +00:00
|
|
|
/// Parses the given [`Input`] into [`Output`] assuming that the top-level
|
|
|
|
/// syntactic construct is the given [`ParserEntryPoint`].
|
|
|
|
///
|
|
|
|
/// Both input and output here are fairly abstract. The overall flow is that the
|
|
|
|
/// caller has some "real" tokens, converts them to [`Input`], parses them to
|
|
|
|
/// [`Output`], and then converts that into a "real" tree. The "real" tree is
|
|
|
|
/// made of "real" tokens, so this all hinges on rather tight coordination of
|
|
|
|
/// indices between the four stages.
|
|
|
|
pub fn parse(inp: &Input, entry_point: ParserEntryPoint) -> Output {
|
2021-09-06 15:34:03 +00:00
|
|
|
let entry_point: fn(&'_ mut parser::Parser) = match entry_point {
|
|
|
|
ParserEntryPoint::SourceFile => grammar::entry_points::source_file,
|
|
|
|
ParserEntryPoint::Path => grammar::entry_points::path,
|
|
|
|
ParserEntryPoint::Expr => grammar::entry_points::expr,
|
|
|
|
ParserEntryPoint::Type => grammar::entry_points::type_,
|
|
|
|
ParserEntryPoint::Pattern => grammar::entry_points::pattern,
|
|
|
|
ParserEntryPoint::Item => grammar::entry_points::item,
|
|
|
|
ParserEntryPoint::Block => grammar::entry_points::block_expr,
|
2021-12-27 12:22:44 +00:00
|
|
|
// ParserEntryPoint::Visibility => grammar::entry_points::visibility,
|
2021-09-06 15:34:03 +00:00
|
|
|
ParserEntryPoint::MetaItem => grammar::entry_points::meta_item,
|
|
|
|
ParserEntryPoint::Statement => grammar::entry_points::stmt,
|
|
|
|
ParserEntryPoint::StatementOptionalSemi => grammar::entry_points::stmt_optional_semi,
|
|
|
|
ParserEntryPoint::Items => grammar::entry_points::macro_items,
|
|
|
|
ParserEntryPoint::Statements => grammar::entry_points::macro_stmts,
|
|
|
|
ParserEntryPoint::Attr => grammar::entry_points::attr,
|
2019-09-02 15:51:03 +00:00
|
|
|
};
|
2021-09-06 15:34:03 +00:00
|
|
|
|
2021-12-25 18:59:02 +00:00
|
|
|
let mut p = parser::Parser::new(inp);
|
2021-09-06 15:34:03 +00:00
|
|
|
entry_point(&mut p);
|
|
|
|
let events = p.finish();
|
internal: replace TreeSink with a data structure
The general theme of this is to make parser a better independent
library.
The specific thing we do here is replacing callback based TreeSink with
a data structure. That is, rather than calling user-provided tree
construction methods, the parser now spits out a very bare-bones tree,
effectively a log of a DFS traversal.
This makes the parser usable without any *specifc* tree sink, and allows
us to, eg, move tests into this crate.
Now, it's also true that this is a distinction without a difference, as
the old and the new interface are equivalent in expressiveness. Still,
this new thing seems somewhat simpler. But yeah, I admit I don't have a
suuper strong motivation here, just a hunch that this is better.
2021-12-19 14:36:23 +00:00
|
|
|
event::process(events)
|
2019-04-18 19:49:56 +00:00
|
|
|
}
|
|
|
|
|
2019-02-21 12:24:42 +00:00
|
|
|
/// A parsing function for a specific braced-block.
|
2019-02-21 10:27:45 +00:00
|
|
|
pub struct Reparser(fn(&mut parser::Parser));
|
|
|
|
|
|
|
|
impl Reparser {
|
2019-02-21 12:24:42 +00:00
|
|
|
/// If the node is a braced block, return the corresponding `Reparser`.
|
2019-02-21 10:27:45 +00:00
|
|
|
pub fn for_node(
|
|
|
|
node: SyntaxKind,
|
|
|
|
first_child: Option<SyntaxKind>,
|
|
|
|
parent: Option<SyntaxKind>,
|
|
|
|
) -> Option<Reparser> {
|
|
|
|
grammar::reparser(node, first_child, parent).map(Reparser)
|
|
|
|
}
|
|
|
|
|
2019-02-21 12:24:42 +00:00
|
|
|
/// Re-parse given tokens using this `Reparser`.
|
|
|
|
///
|
|
|
|
/// Tokens must start with `{`, end with `}` and form a valid brace
|
|
|
|
/// sequence.
|
2021-12-25 18:59:02 +00:00
|
|
|
pub fn parse(self, tokens: &Input) -> Output {
|
2019-02-21 10:37:32 +00:00
|
|
|
let Reparser(r) = self;
|
2021-11-14 19:13:44 +00:00
|
|
|
let mut p = parser::Parser::new(tokens);
|
2019-02-21 10:37:32 +00:00
|
|
|
r(&mut p);
|
|
|
|
let events = p.finish();
|
internal: replace TreeSink with a data structure
The general theme of this is to make parser a better independent
library.
The specific thing we do here is replacing callback based TreeSink with
a data structure. That is, rather than calling user-provided tree
construction methods, the parser now spits out a very bare-bones tree,
effectively a log of a DFS traversal.
This makes the parser usable without any *specifc* tree sink, and allows
us to, eg, move tests into this crate.
Now, it's also true that this is a distinction without a difference, as
the old and the new interface are equivalent in expressiveness. Still,
this new thing seems somewhat simpler. But yeah, I admit I don't have a
suuper strong motivation here, just a hunch that this is better.
2021-12-19 14:36:23 +00:00
|
|
|
event::process(events)
|
2019-02-21 10:37:32 +00:00
|
|
|
}
|
2019-02-21 10:27:45 +00:00
|
|
|
}
|