mirror of
https://github.com/rust-lang/rust-analyzer
synced 2024-11-15 01:17:27 +00:00
Merge #870
870: docs r=matklad a=matklad Co-authored-by: Aleksey Kladov <aleksey.kladov@gmail.com>
This commit is contained in:
commit
782cb43c14
11 changed files with 143 additions and 98 deletions
|
@ -113,12 +113,11 @@ pub(super) fn process(sink: &mut dyn TreeSink, mut events: Vec<Event>) {
|
|||
// append `B`'s forward_parent `C` in the next stage.
|
||||
}
|
||||
|
||||
for (j, kind) in forward_parents.drain(..).rev().enumerate() {
|
||||
let is_root_node = i == 0 && j == 0;
|
||||
sink.start_branch(kind, is_root_node);
|
||||
for kind in forward_parents.drain(..).rev() {
|
||||
sink.start_branch(kind);
|
||||
}
|
||||
}
|
||||
Event::Finish => sink.finish_branch(i == events.len() - 1),
|
||||
Event::Finish => sink.finish_branch(),
|
||||
Event::Token { kind, n_raw_tokens } => {
|
||||
sink.leaf(kind, n_raw_tokens);
|
||||
}
|
||||
|
|
|
@ -1,3 +1,17 @@
|
|||
//! The Rust parser.
|
||||
//!
|
||||
//! The parser doesn't know about concrete representation of tokens and syntax
|
||||
//! trees. Abstract `TokenSource` and `TreeSink` traits are used instead. As a
|
||||
//! consequence, this crates does not contain a lexer.
|
||||
//!
|
||||
//! The `Parser` struct from the `parser` module is a cursor into the sequence
|
||||
//! of tokens. Parsing routines use `Parser` to inspect current state and
|
||||
//! advance the parsing.
|
||||
//!
|
||||
//! The actual parsing happens in the `grammar` module.
|
||||
//!
|
||||
//! Tests for this crate live in `ra_syntax` crate.
|
||||
|
||||
#[macro_use]
|
||||
mod token_set;
|
||||
mod syntax_kind;
|
||||
|
@ -12,30 +26,34 @@ pub use syntax_kind::SyntaxKind;
|
|||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct ParseError(pub String);
|
||||
|
||||
/// `TokenSource` abstracts the source of the tokens parser operates one.
|
||||
///
|
||||
/// Hopefully this will allow us to treat text and token trees in the same way!
|
||||
pub trait TokenSource {
|
||||
/// What is the current token?
|
||||
fn token_kind(&self, pos: usize) -> SyntaxKind;
|
||||
/// Is the current token joined to the next one (`> >` vs `>>`).
|
||||
fn is_token_joint_to_next(&self, pos: usize) -> bool;
|
||||
/// Is the current token a specified keyword?
|
||||
fn is_keyword(&self, pos: usize, kw: &str) -> bool;
|
||||
}
|
||||
|
||||
/// `TreeSink` abstracts details of a particular syntax tree implementation.
|
||||
pub trait TreeSink {
|
||||
/// Adds new leaf to the current branch.
|
||||
fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8);
|
||||
|
||||
/// Start new branch and make it current.
|
||||
fn start_branch(&mut self, kind: SyntaxKind, root: bool);
|
||||
fn start_branch(&mut self, kind: SyntaxKind);
|
||||
|
||||
/// Finish current branch and restore previous
|
||||
/// branch as current.
|
||||
fn finish_branch(&mut self, root: bool);
|
||||
fn finish_branch(&mut self);
|
||||
|
||||
fn error(&mut self, error: ParseError);
|
||||
}
|
||||
|
||||
/// `TokenSource` abstracts the source of the tokens parser operates one.
|
||||
///
|
||||
/// Hopefully this will allow us to treat text and token trees in the same way!
|
||||
pub trait TokenSource {
|
||||
fn token_kind(&self, pos: usize) -> SyntaxKind;
|
||||
fn is_token_joint_to_next(&self, pos: usize) -> bool;
|
||||
fn is_keyword(&self, pos: usize, kw: &str) -> bool;
|
||||
}
|
||||
|
||||
/// Parse given tokens into the given sink as a rust file.
|
||||
pub fn parse(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) {
|
||||
let mut p = parser::Parser::new(token_source);
|
||||
grammar::root(&mut p);
|
||||
|
@ -43,9 +61,11 @@ pub fn parse(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) {
|
|||
event::process(tree_sink, events);
|
||||
}
|
||||
|
||||
/// A parsing function for a specific braced-block.
|
||||
pub struct Reparser(fn(&mut parser::Parser));
|
||||
|
||||
impl Reparser {
|
||||
/// If the node is a braced block, return the corresponding `Reparser`.
|
||||
pub fn for_node(
|
||||
node: SyntaxKind,
|
||||
first_child: Option<SyntaxKind>,
|
||||
|
@ -54,6 +74,10 @@ impl Reparser {
|
|||
grammar::reparser(node, first_child, parent).map(Reparser)
|
||||
}
|
||||
|
||||
/// Re-parse given tokens using this `Reparser`.
|
||||
///
|
||||
/// Tokens must start with `{`, end with `}` and form a valid brace
|
||||
/// sequence.
|
||||
pub fn parse(self, token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) {
|
||||
let Reparser(r) = self;
|
||||
let mut p = parser::Parser::new(token_source);
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
use crate::SyntaxKind;
|
||||
|
||||
/// A bit-set of `SyntaxKind`s
|
||||
#[derive(Clone, Copy)]
|
||||
pub(crate) struct TokenSet(u128);
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ pub fn find_covering_node(root: &SyntaxNode, range: TextRange) -> &SyntaxNode {
|
|||
SyntaxNode::from_repr(root.0.covering_node(range))
|
||||
}
|
||||
|
||||
// Replace with `std::iter::successors` in `1.34.0`
|
||||
pub fn generate<T>(seed: Option<T>, step: impl Fn(&T) -> Option<T>) -> impl Iterator<Item = T> {
|
||||
::itertools::unfold(seed, move |slot| {
|
||||
slot.take().map(|curr| {
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
//! Abstract Syntax Tree, layered on top of untyped `SyntaxNode`s
|
||||
mod generated;
|
||||
|
||||
use std::marker::PhantomData;
|
||||
|
|
|
@ -1,20 +1,21 @@
|
|||
//! An experimental implementation of [Rust RFC#2256 libsyntax2.0][rfc#2256].
|
||||
//! Syntax Tree library used throughout the rust analyzer.
|
||||
//!
|
||||
//! The intent is to be an IDE-ready parser, i.e. one that offers
|
||||
//! Properties:
|
||||
//! - easy and fast incremental re-parsing
|
||||
//! - graceful handling of errors
|
||||
//! - full-fidelity representation (*any* text can be precisely represented as
|
||||
//! a syntax tree)
|
||||
//!
|
||||
//! - easy and fast incremental re-parsing,
|
||||
//! - graceful handling of errors, and
|
||||
//! - maintains all information in the source file.
|
||||
//! For more information, see the [RFC]. Current implementation is inspired by
|
||||
//! the [Swift] one.
|
||||
//!
|
||||
//! For more information, see [the RFC][rfc#2265], or [the working draft][RFC.md].
|
||||
//! The most interesting modules here are `syntax_node` (which defines concrete
|
||||
//! syntax tree) and `ast` (which defines abstract syntax tree on top of the
|
||||
//! CST). The actual parser live in a separate `ra_parser` crate, thought the
|
||||
//! lexer lives in this crate.
|
||||
//!
|
||||
//! [rfc#2256]: <https://github.com/rust-lang/rfcs/pull/2256>
|
||||
//! [RFC.md]: <https://github.com/matklad/libsyntax2/blob/master/docs/RFC.md>
|
||||
|
||||
#![forbid(missing_debug_implementations, unconditional_recursion, future_incompatible)]
|
||||
#![deny(bad_style, missing_docs)]
|
||||
#![allow(missing_docs)]
|
||||
//#![warn(unreachable_pub)] // rust-lang/rust#47816
|
||||
//! [RFC]: <https://github.com/rust-lang/rfcs/pull/2256>
|
||||
//! [Swift]: <https://github.com/apple/swift/blob/13d593df6f359d0cb2fc81cfaac273297c539455/lib/Syntax/README.md>
|
||||
|
||||
mod syntax_node;
|
||||
mod syntax_text;
|
||||
|
|
|
@ -1,17 +1,18 @@
|
|||
mod builder;
|
||||
//! Lexing, bridging to ra_parser (which does the actual parsing) and
|
||||
//! incremental reparsing.
|
||||
|
||||
mod lexer;
|
||||
mod input;
|
||||
mod builder;
|
||||
mod reparsing;
|
||||
|
||||
use ra_parser::{parse, ParseError};
|
||||
|
||||
use crate::{
|
||||
SyntaxKind, SyntaxError,
|
||||
SyntaxError,
|
||||
syntax_node::GreenNode,
|
||||
parsing::{
|
||||
builder::TreeBuilder,
|
||||
input::ParserInput,
|
||||
},
|
||||
syntax_node::GreenNode,
|
||||
};
|
||||
|
||||
pub use self::lexer::{tokenize, Token};
|
||||
|
@ -22,37 +23,6 @@ pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) {
|
|||
let tokens = tokenize(&text);
|
||||
let token_source = ParserInput::new(text, &tokens);
|
||||
let mut tree_sink = TreeBuilder::new(text, &tokens);
|
||||
parse(&token_source, &mut tree_sink);
|
||||
ra_parser::parse(&token_source, &mut tree_sink);
|
||||
tree_sink.finish()
|
||||
}
|
||||
|
||||
/// `TreeSink` abstracts details of a particular syntax tree implementation.
|
||||
trait TreeSink {
|
||||
type Tree;
|
||||
|
||||
/// Adds new leaf to the current branch.
|
||||
fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8);
|
||||
|
||||
/// Start new branch and make it current.
|
||||
fn start_branch(&mut self, kind: SyntaxKind, root: bool);
|
||||
|
||||
/// Finish current branch and restore previous
|
||||
/// branch as current.
|
||||
fn finish_branch(&mut self, root: bool);
|
||||
|
||||
fn error(&mut self, error: ParseError);
|
||||
|
||||
/// Complete tree building. Make sure that
|
||||
/// `start_branch` and `finish_branch` calls
|
||||
/// are paired!
|
||||
fn finish(self) -> Self::Tree;
|
||||
}
|
||||
|
||||
/// `TokenSource` abstracts the source of the tokens parser operates one.
|
||||
///
|
||||
/// Hopefully this will allow us to treat text and token trees in the same way!
|
||||
trait TokenSource {
|
||||
fn token_kind(&self, pos: usize) -> SyntaxKind;
|
||||
fn is_token_joint_to_next(&self, pos: usize) -> bool;
|
||||
fn is_keyword(&self, pos: usize, kw: &str) -> bool;
|
||||
}
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
use std::mem;
|
||||
|
||||
use ra_parser::{TreeSink, ParseError};
|
||||
use rowan::GreenNodeBuilder;
|
||||
|
||||
use crate::{
|
||||
SmolStr, SyntaxError, SyntaxErrorKind, TextUnit, TextRange,
|
||||
|
@ -7,19 +10,32 @@ use crate::{
|
|||
syntax_node::{GreenNode, RaTypes},
|
||||
};
|
||||
|
||||
use rowan::GreenNodeBuilder;
|
||||
|
||||
/// Bridges the parser with our specific syntax tree representation.
|
||||
///
|
||||
/// `TreeBuilder` also handles attachment of trivia (whitespace) to nodes.
|
||||
pub(crate) struct TreeBuilder<'a> {
|
||||
text: &'a str,
|
||||
tokens: &'a [Token],
|
||||
text_pos: TextUnit,
|
||||
token_pos: usize,
|
||||
state: State,
|
||||
errors: Vec<SyntaxError>,
|
||||
inner: GreenNodeBuilder<RaTypes>,
|
||||
}
|
||||
|
||||
enum State {
|
||||
PendingStart,
|
||||
Normal,
|
||||
PendingFinish,
|
||||
}
|
||||
|
||||
impl<'a> TreeSink for TreeBuilder<'a> {
|
||||
fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8) {
|
||||
match mem::replace(&mut self.state, State::Normal) {
|
||||
State::PendingStart => unreachable!(),
|
||||
State::PendingFinish => self.inner.finish_internal(),
|
||||
State::Normal => (),
|
||||
}
|
||||
self.eat_trivias();
|
||||
let n_tokens = n_tokens as usize;
|
||||
let len = self.tokens[self.token_pos..self.token_pos + n_tokens]
|
||||
|
@ -29,11 +45,18 @@ impl<'a> TreeSink for TreeBuilder<'a> {
|
|||
self.do_leaf(kind, len, n_tokens);
|
||||
}
|
||||
|
||||
fn start_branch(&mut self, kind: SyntaxKind, root: bool) {
|
||||
if root {
|
||||
self.inner.start_internal(kind);
|
||||
return;
|
||||
fn start_branch(&mut self, kind: SyntaxKind) {
|
||||
match mem::replace(&mut self.state, State::Normal) {
|
||||
State::PendingStart => {
|
||||
self.inner.start_internal(kind);
|
||||
// No need to attach trivias to previous node: there is no
|
||||
// previous node.
|
||||
return;
|
||||
}
|
||||
State::PendingFinish => self.inner.finish_internal(),
|
||||
State::Normal => (),
|
||||
}
|
||||
|
||||
let n_trivias =
|
||||
self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count();
|
||||
let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias];
|
||||
|
@ -54,11 +77,12 @@ impl<'a> TreeSink for TreeBuilder<'a> {
|
|||
self.eat_n_trivias(n_attached_trivias);
|
||||
}
|
||||
|
||||
fn finish_branch(&mut self, root: bool) {
|
||||
if root {
|
||||
self.eat_trivias()
|
||||
fn finish_branch(&mut self) {
|
||||
match mem::replace(&mut self.state, State::PendingFinish) {
|
||||
State::PendingStart => unreachable!(),
|
||||
State::PendingFinish => self.inner.finish_internal(),
|
||||
State::Normal => (),
|
||||
}
|
||||
self.inner.finish_internal();
|
||||
}
|
||||
|
||||
fn error(&mut self, error: ParseError) {
|
||||
|
@ -74,12 +98,21 @@ impl<'a> TreeBuilder<'a> {
|
|||
tokens,
|
||||
text_pos: 0.into(),
|
||||
token_pos: 0,
|
||||
state: State::PendingStart,
|
||||
errors: Vec::new(),
|
||||
inner: GreenNodeBuilder::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn finish(self) -> (GreenNode, Vec<SyntaxError>) {
|
||||
pub(super) fn finish(mut self) -> (GreenNode, Vec<SyntaxError>) {
|
||||
match mem::replace(&mut self.state, State::Normal) {
|
||||
State::PendingFinish => {
|
||||
self.eat_trivias();
|
||||
self.inner.finish_internal()
|
||||
}
|
||||
State::PendingStart | State::Normal => unreachable!(),
|
||||
}
|
||||
|
||||
(self.inner.finish(), self.errors)
|
||||
}
|
||||
|
||||
|
|
|
@ -5,6 +5,26 @@ use crate::{
|
|||
parsing::lexer::Token,
|
||||
};
|
||||
|
||||
pub(crate) struct ParserInput<'t> {
|
||||
text: &'t str,
|
||||
/// start position of each token(expect whitespace and comment)
|
||||
/// ```non-rust
|
||||
/// struct Foo;
|
||||
/// ^------^---
|
||||
/// | | ^-
|
||||
/// 0 7 10
|
||||
/// ```
|
||||
/// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]`
|
||||
start_offsets: Vec<TextUnit>,
|
||||
/// non-whitespace/comment tokens
|
||||
/// ```non-rust
|
||||
/// struct Foo {}
|
||||
/// ^^^^^^ ^^^ ^^
|
||||
/// ```
|
||||
/// tokens: `[struct, Foo, {, }]`
|
||||
tokens: Vec<Token>,
|
||||
}
|
||||
|
||||
impl<'t> TokenSource for ParserInput<'t> {
|
||||
fn token_kind(&self, pos: usize) -> SyntaxKind {
|
||||
if !(pos < self.tokens.len()) {
|
||||
|
@ -28,26 +48,6 @@ impl<'t> TokenSource for ParserInput<'t> {
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) struct ParserInput<'t> {
|
||||
text: &'t str,
|
||||
/// start position of each token(expect whitespace and comment)
|
||||
/// ```non-rust
|
||||
/// struct Foo;
|
||||
/// ^------^---
|
||||
/// | | ^-
|
||||
/// 0 7 10
|
||||
/// ```
|
||||
/// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]`
|
||||
start_offsets: Vec<TextUnit>,
|
||||
/// non-whitespace/comment tokens
|
||||
/// ```non-rust
|
||||
/// struct Foo {}
|
||||
/// ^^^^^^ ^^^ ^^
|
||||
/// ```
|
||||
/// tokens: `[struct, Foo, {, }]`
|
||||
tokens: Vec<Token>,
|
||||
}
|
||||
|
||||
impl<'t> ParserInput<'t> {
|
||||
/// Generate input from tokens(expect comment and whitespace).
|
||||
pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> {
|
||||
|
|
|
@ -1,11 +1,18 @@
|
|||
//! Implementation of incremental re-parsing.
|
||||
//!
|
||||
//! We use two simple strategies for this:
|
||||
//! - if the edit modifies only a single token (like changing an identifier's
|
||||
//! letter), we replace only this token.
|
||||
//! - otherwise, we search for the nearest `{}` block which contains the edit
|
||||
//! and try to parse only this block.
|
||||
|
||||
use ra_text_edit::AtomTextEdit;
|
||||
use ra_parser::Reparser;
|
||||
|
||||
use crate::{
|
||||
SyntaxKind::*, TextRange, TextUnit,
|
||||
SyntaxKind::*, TextRange, TextUnit, SyntaxError,
|
||||
algo,
|
||||
syntax_node::{GreenNode, SyntaxNode},
|
||||
syntax_error::SyntaxError,
|
||||
parsing::{
|
||||
input::ParserInput,
|
||||
builder::TreeBuilder,
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
//! This module defines Concrete Syntax Tree (CST), used by rust-analyzer.
|
||||
//!
|
||||
//! The CST includes comments and whitespace, provides a single node type,
|
||||
//! `SyntaxNode`, and a basic traversal API (parent, children, siblings).
|
||||
//!
|
||||
//! The *real* implementation is in the (language-agnostic) `rowan` crate, this
|
||||
//! modules just wraps its API.
|
||||
|
||||
use std::{fmt, borrow::Borrow};
|
||||
|
||||
use rowan::{Types, TransparentNewType};
|
||||
|
|
Loading…
Reference in a new issue