rust-analyzer/crates/parser/src/lib.rs

//! The Rust parser.
//!
//! NOTE: The crate is undergoing refactors, don't believe everything the docs
//! say :-)
//!
//! The parser doesn't know about concrete representation of tokens and syntax
//! trees. Abstract [`TokenSource`] and [`TreeSink`] traits are used instead. As
//! a consequence, this crate does not contain a lexer.
//!
//! The [`Parser`] struct from the [`parser`] module is a cursor into the
//! sequence of tokens.  Parsing routines use [`Parser`] to inspect current
//! state and advance the parsing.
//!
//! The actual parsing happens in the [`grammar`] module.
//!
//! Tests for this crate live in the `syntax` crate.
//!
//! [`Parser`]: crate::parser::Parser
#![allow(rustdoc::private_intra_doc_links)]

mod lexed_str;
mod token_set;
mod syntax_kind;
mod event;
mod parser;
mod grammar;
mod input;
mod output;
mod shortcuts;

#[cfg(test)]
mod tests;

pub(crate) use token_set::TokenSet;

pub use crate::{
    input::Input,
    lexed_str::LexedStr,
    output::{Output, Step},
    shortcuts::StrStep,
    syntax_kind::SyntaxKind,
};

/// Parse a syntactic construct at the *start* of the input.
///
/// This is used by macro-by-example parser to implement things like `$i:item`.
///
/// Note that this is generally non-optional -- the result is intentionally not
/// `Option<Output>`. The way MBE work, by the time we *try* to parse `$e:expr`
/// we already commit to expression. In other words, this API by design can't be
/// used to implement "rollback and try another alternative" logic.
#[derive(Debug)]
pub enum PrefixEntryPoint {
    Vis,
    Block,
}

impl PrefixEntryPoint {
    pub fn parse(&self, input: &Input) -> Output {
        let entry_point: fn(&'_ mut parser::Parser) = match self {
            PrefixEntryPoint::Vis => grammar::entry::prefix::vis,
            PrefixEntryPoint::Block => grammar::entry::prefix::block,
        };
        let mut p = parser::Parser::new(input);
        entry_point(&mut p);
        let events = p.finish();
        event::process(events)
    }
}

/// rust-analyzer parser allows you to choose one of the possible entry points.
///
/// The primary consumer of this API are declarative macros, `$x:expr` matchers
/// are implemented by calling into the parser with non-standard entry point.
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub enum ParserEntryPoint {
    SourceFile,
    Path,
    Expr,
    Statement,
    StatementOptionalSemi,
    Type,
    Pattern,
    Item,
    Block,
    // Visibility,
    MetaItem,
    Items,
    Statements,
    Attr,
}

/// Parse given tokens into the given sink as a rust file.
pub fn parse_source_file(inp: &Input) -> Output {
    parse(inp, ParserEntryPoint::SourceFile)
}

/// Parses the given [`Input`] into [`Output`] assuming that the top-level
/// syntactic construct is the given [`ParserEntryPoint`].
///
/// Both input and output here are fairly abstract. The overall flow is that the
/// caller has some "real" tokens, converts them to [`Input`], parses them to
/// [`Output`], and then converts that into a "real" tree. The "real" tree is
/// made of "real" tokens, so this all hinges on rather tight coordination of
/// indices between the four stages.
pub fn parse(inp: &Input, entry_point: ParserEntryPoint) -> Output {
    let entry_point: fn(&'_ mut parser::Parser) = match entry_point {
        ParserEntryPoint::SourceFile => grammar::entry_points::source_file,
        ParserEntryPoint::Path => grammar::entry_points::path,
        ParserEntryPoint::Expr => grammar::entry_points::expr,
        ParserEntryPoint::Type => grammar::entry_points::type_,
        ParserEntryPoint::Pattern => grammar::entry_points::pattern,
        ParserEntryPoint::Item => grammar::entry_points::item,
        ParserEntryPoint::Block => grammar::entry_points::block_expr,
        // ParserEntryPoint::Visibility => grammar::entry_points::visibility,
        ParserEntryPoint::MetaItem => grammar::entry_points::meta_item,
        ParserEntryPoint::Statement => grammar::entry_points::stmt,
        ParserEntryPoint::StatementOptionalSemi => grammar::entry_points::stmt_optional_semi,
        ParserEntryPoint::Items => grammar::entry_points::macro_items,
        ParserEntryPoint::Statements => grammar::entry_points::macro_stmts,
        ParserEntryPoint::Attr => grammar::entry_points::attr,
    };

    let mut p = parser::Parser::new(inp);
    entry_point(&mut p);
    let events = p.finish();
    event::process(events)
}

/// A parsing function for a specific braced-block.
pub struct Reparser(fn(&mut parser::Parser));

impl Reparser {
    /// If the node is a braced block, return the corresponding `Reparser`.
    pub fn for_node(
        node: SyntaxKind,
        first_child: Option<SyntaxKind>,
        parent: Option<SyntaxKind>,
    ) -> Option<Reparser> {
        grammar::reparser(node, first_child, parent).map(Reparser)
    }

    /// Re-parse given tokens using this `Reparser`.
    ///
    /// Tokens must start with `{`, end with `}` and form a valid brace
    /// sequence.
    pub fn parse(self, tokens: &Input) -> Output {
        let Reparser(r) = self;
        let mut p = parser::Parser::new(tokens);
        r(&mut p);
        let events = p.finish();
        event::process(events)
    }
}
docs 2019-02-21 12:24:42 +00:00			`//! The Rust parser.`
			`//!`
port mbe to soa tokens 2021-12-12 16:06:40 +00:00			`//! NOTE: The crate is undergoing refactors, don't believe everything the docs`
			`//! say :-)`
			`//!`
docs 2019-02-21 12:24:42 +00:00			`//! The parser doesn't know about concrete representation of tokens and syntax`
port mbe to soa tokens 2021-12-12 16:06:40 +00:00			//! trees. Abstract [`TokenSource`] and [`TreeSink`] traits are used instead. As
			`//! a consequence, this crate does not contain a lexer.`
docs 2019-02-21 12:24:42 +00:00			`//!`
tree-wide: fix rustdoc warnings, add some links 2021-08-04 03:57:31 +00:00			//! The [`Parser`] struct from the [`parser`] module is a cursor into the
			//! sequence of tokens. Parsing routines use [`Parser`] to inspect current
			`//! state and advance the parsing.`
docs 2019-02-21 12:24:42 +00:00			`//!`
tree-wide: fix rustdoc warnings, add some links 2021-08-04 03:57:31 +00:00			//! The actual parsing happens in the [`grammar`] module.
docs 2019-02-21 12:24:42 +00:00			`//!`
tree-wide: fix rustdoc warnings, add some links 2021-08-04 03:57:31 +00:00			//! Tests for this crate live in the `syntax` crate.
			`//!`
			//! [`Parser`]: crate::parser::Parser
			`#![allow(rustdoc::private_intra_doc_links)]`
minor: modernize 2021-09-06 15:42:07 +00:00
soa all the things 2021-12-18 12:31:50 +00:00			`mod lexed_str;`
move parser to a separate crate 2019-02-21 10:27:45 +00:00			`mod token_set;`
			`mod syntax_kind;`
			`mod event;`
			`mod parser;`
			`mod grammar;`
internal: rename 2021-12-25 18:59:02 +00:00			`mod input;`
			`mod output;`
internal: move ws attachment logic to the parser crate This has to re-introduce the `sink` pattern, because doing this purely with iterators is awkward :( Maaaybe the event vector was a false start? But, anyway, I like the current factoring more -- it sort-of obvious that we do want to keep ws-attachment business in the parser, and that we also don't want that to depend on the particular tree structure. I think `shortcuts` module achieves that. 2021-12-26 13:47:10 +00:00			`mod shortcuts;`
move parser to a separate crate 2019-02-21 10:27:45 +00:00
move lexing to the parser crate 2021-12-12 18:32:58 +00:00			`#[cfg(test)]`
			`mod tests;`

move parser to a separate crate 2019-02-21 10:27:45 +00:00			`pub(crate) use token_set::TokenSet;`

internal: replace TreeSink with a data structure The general theme of this is to make parser a better independent library. The specific thing we do here is replacing callback based TreeSink with a data structure. That is, rather than calling user-provided tree construction methods, the parser now spits out a very bare-bones tree, effectively a log of a DFS traversal. This makes the parser usable without any specifc tree sink, and allows us to, eg, move tests into this crate. Now, it's also true that this is a distinction without a difference, as the old and the new interface are equivalent in expressiveness. Still, this new thing seems somewhat simpler. But yeah, I admit I don't have a suuper strong motivation here, just a hunch that this is better. 2021-12-19 14:36:23 +00:00			`pub use crate::{`
internal: rename 2021-12-25 18:59:02 +00:00			`input::Input,`
internal: replace TreeSink with a data structure The general theme of this is to make parser a better independent library. The specific thing we do here is replacing callback based TreeSink with a data structure. That is, rather than calling user-provided tree construction methods, the parser now spits out a very bare-bones tree, effectively a log of a DFS traversal. This makes the parser usable without any specifc tree sink, and allows us to, eg, move tests into this crate. Now, it's also true that this is a distinction without a difference, as the old and the new interface are equivalent in expressiveness. Still, this new thing seems somewhat simpler. But yeah, I admit I don't have a suuper strong motivation here, just a hunch that this is better. 2021-12-19 14:36:23 +00:00			`lexed_str::LexedStr,`
internal: rename 2021-12-25 18:59:02 +00:00			`output::{Output, Step},`
internal: move ws attachment logic to the parser crate This has to re-introduce the `sink` pattern, because doing this purely with iterators is awkward :( Maaaybe the event vector was a false start? But, anyway, I like the current factoring more -- it sort-of obvious that we do want to keep ws-attachment business in the parser, and that we also don't want that to depend on the particular tree structure. I think `shortcuts` module achieves that. 2021-12-26 13:47:10 +00:00			`shortcuts::StrStep,`
internal: replace TreeSink with a data structure The general theme of this is to make parser a better independent library. The specific thing we do here is replacing callback based TreeSink with a data structure. That is, rather than calling user-provided tree construction methods, the parser now spits out a very bare-bones tree, effectively a log of a DFS traversal. This makes the parser usable without any specifc tree sink, and allows us to, eg, move tests into this crate. Now, it's also true that this is a distinction without a difference, as the old and the new interface are equivalent in expressiveness. Still, this new thing seems somewhat simpler. But yeah, I admit I don't have a suuper strong motivation here, just a hunch that this is better. 2021-12-19 14:36:23 +00:00			`syntax_kind::SyntaxKind,`
			`};`
move parser to a separate crate 2019-02-21 10:27:45 +00:00
internal: add prefix entry points 2021-12-27 12:17:48 +00:00			`/// Parse a syntactic construct at the start of the input.`
			`///`
			/// This is used by macro-by-example parser to implement things like `$i:item`.
			`///`
			`/// Note that this is generally non-optional -- the result is intentionally not`
			/// `Option<Output>`. The way MBE work, by the time we try to parse `$e:expr`
			`/// we already commit to expression. In other words, this API by design can't be`
			`/// used to implement "rollback and try another alternative" logic.`
internal: move visibility to a prefix entry point 2021-12-27 12:22:44 +00:00			`#[derive(Debug)]`
internal: add prefix entry points 2021-12-27 12:17:48 +00:00			`pub enum PrefixEntryPoint {`
			`Vis,`
internal: move block to prefix entry point 2021-12-27 12:39:17 +00:00			`Block,`
internal: add prefix entry points 2021-12-27 12:17:48 +00:00			`}`

			`impl PrefixEntryPoint {`
internal: move visibility to a prefix entry point 2021-12-27 12:22:44 +00:00			`pub fn parse(&self, input: &Input) -> Output {`
internal: add prefix entry points 2021-12-27 12:17:48 +00:00			`let entry_point: fn(&'_ mut parser::Parser) = match self {`
			`PrefixEntryPoint::Vis => grammar::entry::prefix::vis,`
internal: move block to prefix entry point 2021-12-27 12:39:17 +00:00			`PrefixEntryPoint::Block => grammar::entry::prefix::block,`
internal: add prefix entry points 2021-12-27 12:17:48 +00:00			`};`
			`let mut p = parser::Parser::new(input);`
			`entry_point(&mut p);`
			`let events = p.finish();`
			`event::process(events)`
			`}`
			`}`

internal: make name consistent with usage 2021-09-06 15:34:03 +00:00			`/// rust-analyzer parser allows you to choose one of the possible entry points.`
			`///`
			/// The primary consumer of this API are declarative macros, `$x:expr` matchers
			`/// are implemented by calling into the parser with non-standard entry point.`
Implement concat macro 2020-03-02 06:05:15 +00:00			`#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]`
internal: make name consistent with usage 2021-09-06 15:34:03 +00:00			`pub enum ParserEntryPoint {`
			`SourceFile,`
simplify 2019-09-02 15:51:03 +00:00			`Path,`
			`Expr,`
			`Statement,`
parser,syntax: Add separate parser for stmt with optional semicolon Adjusting `grammar::fragments::stmt` to Optional or Yes will break original functionality and tests. 2020-11-17 19:02:46 +00:00			`StatementOptionalSemi,`
simplify 2019-09-02 15:51:03 +00:00			`Type,`
			`Pattern,`
			`Item,`
			`Block,`
internal: move visibility to a prefix entry point 2021-12-27 12:22:44 +00:00			`// Visibility,`
simplify 2019-09-02 15:51:03 +00:00			`MetaItem,`
			`Items,`
			`Statements,`
Implement `RawAttr::filter` 2020-12-18 17:58:42 +00:00			`Attr,`
simplify 2019-09-02 15:51:03 +00:00			`}`

internal: make name consistent with usage 2021-09-06 15:34:03 +00:00			`/// Parse given tokens into the given sink as a rust file.`
internal: rename 2021-12-25 18:59:02 +00:00			`pub fn parse_source_file(inp: &Input) -> Output {`
			`parse(inp, ParserEntryPoint::SourceFile)`
internal: make name consistent with usage 2021-09-06 15:34:03 +00:00			`}`

internal: rename 2021-12-25 18:59:02 +00:00			/// Parses the given [`Input`] into [`Output`] assuming that the top-level
			/// syntactic construct is the given [`ParserEntryPoint`].
			`///`
			`/// Both input and output here are fairly abstract. The overall flow is that the`
			/// caller has some "real" tokens, converts them to [`Input`], parses them to
			/// [`Output`], and then converts that into a "real" tree. The "real" tree is
			`/// made of "real" tokens, so this all hinges on rather tight coordination of`
			`/// indices between the four stages.`
			`pub fn parse(inp: &Input, entry_point: ParserEntryPoint) -> Output {`
internal: make name consistent with usage 2021-09-06 15:34:03 +00:00			`let entry_point: fn(&'_ mut parser::Parser) = match entry_point {`
			`ParserEntryPoint::SourceFile => grammar::entry_points::source_file,`
			`ParserEntryPoint::Path => grammar::entry_points::path,`
			`ParserEntryPoint::Expr => grammar::entry_points::expr,`
			`ParserEntryPoint::Type => grammar::entry_points::type_,`
			`ParserEntryPoint::Pattern => grammar::entry_points::pattern,`
			`ParserEntryPoint::Item => grammar::entry_points::item,`
			`ParserEntryPoint::Block => grammar::entry_points::block_expr,`
internal: move visibility to a prefix entry point 2021-12-27 12:22:44 +00:00			`// ParserEntryPoint::Visibility => grammar::entry_points::visibility,`
internal: make name consistent with usage 2021-09-06 15:34:03 +00:00			`ParserEntryPoint::MetaItem => grammar::entry_points::meta_item,`
			`ParserEntryPoint::Statement => grammar::entry_points::stmt,`
			`ParserEntryPoint::StatementOptionalSemi => grammar::entry_points::stmt_optional_semi,`
			`ParserEntryPoint::Items => grammar::entry_points::macro_items,`
			`ParserEntryPoint::Statements => grammar::entry_points::macro_stmts,`
			`ParserEntryPoint::Attr => grammar::entry_points::attr,`
simplify 2019-09-02 15:51:03 +00:00			`};`
internal: make name consistent with usage 2021-09-06 15:34:03 +00:00
internal: rename 2021-12-25 18:59:02 +00:00			`let mut p = parser::Parser::new(inp);`
internal: make name consistent with usage 2021-09-06 15:34:03 +00:00			`entry_point(&mut p);`
			`let events = p.finish();`
internal: replace TreeSink with a data structure The general theme of this is to make parser a better independent library. The specific thing we do here is replacing callback based TreeSink with a data structure. That is, rather than calling user-provided tree construction methods, the parser now spits out a very bare-bones tree, effectively a log of a DFS traversal. This makes the parser usable without any specifc tree sink, and allows us to, eg, move tests into this crate. Now, it's also true that this is a distinction without a difference, as the old and the new interface are equivalent in expressiveness. Still, this new thing seems somewhat simpler. But yeah, I admit I don't have a suuper strong motivation here, just a hunch that this is better. 2021-12-19 14:36:23 +00:00			`event::process(events)`
Add expr, pat, ty and macro_stmts 2019-04-18 19:49:56 +00:00			`}`

docs 2019-02-21 12:24:42 +00:00			`/// A parsing function for a specific braced-block.`
move parser to a separate crate 2019-02-21 10:27:45 +00:00			`pub struct Reparser(fn(&mut parser::Parser));`

			`impl Reparser {`
docs 2019-02-21 12:24:42 +00:00			/// If the node is a braced block, return the corresponding `Reparser`.
move parser to a separate crate 2019-02-21 10:27:45 +00:00			`pub fn for_node(`
			`node: SyntaxKind,`
			`first_child: Option<SyntaxKind>,`
			`parent: Option<SyntaxKind>,`
			`) -> Option<Reparser> {`
			`grammar::reparser(node, first_child, parent).map(Reparser)`
			`}`

docs 2019-02-21 12:24:42 +00:00			/// Re-parse given tokens using this `Reparser`.
			`///`
			/// Tokens must start with `{`, end with `}` and form a valid brace
			`/// sequence.`
internal: rename 2021-12-25 18:59:02 +00:00			`pub fn parse(self, tokens: &Input) -> Output {`
fix compilation 2019-02-21 10:37:32 +00:00			`let Reparser(r) = self;`
Switch parser to use tokens 2021-11-14 19:13:44 +00:00			`let mut p = parser::Parser::new(tokens);`
fix compilation 2019-02-21 10:37:32 +00:00			`r(&mut p);`
			`let events = p.finish();`
internal: replace TreeSink with a data structure The general theme of this is to make parser a better independent library. The specific thing we do here is replacing callback based TreeSink with a data structure. That is, rather than calling user-provided tree construction methods, the parser now spits out a very bare-bones tree, effectively a log of a DFS traversal. This makes the parser usable without any specifc tree sink, and allows us to, eg, move tests into this crate. Now, it's also true that this is a distinction without a difference, as the old and the new interface are equivalent in expressiveness. Still, this new thing seems somewhat simpler. But yeah, I admit I don't have a suuper strong motivation here, just a hunch that this is better. 2021-12-19 14:36:23 +00:00			`event::process(events)`
fix compilation 2019-02-21 10:37:32 +00:00			`}`
move parser to a separate crate 2019-02-21 10:27:45 +00:00			`}`