2019-02-21 12:24:42 +00:00
|
|
|
//! Syntax Tree library used throughout the rust analyzer.
|
2018-01-28 01:29:14 +00:00
|
|
|
//!
|
2019-02-21 12:24:42 +00:00
|
|
|
//! Properties:
|
|
|
|
//! - easy and fast incremental re-parsing
|
|
|
|
//! - graceful handling of errors
|
|
|
|
//! - full-fidelity representation (*any* text can be precisely represented as
|
|
|
|
//! a syntax tree)
|
2018-01-28 01:29:14 +00:00
|
|
|
//!
|
2019-02-21 12:24:42 +00:00
|
|
|
//! For more information, see the [RFC]. Current implementation is inspired by
|
|
|
|
//! the [Swift] one.
|
2018-01-28 01:29:14 +00:00
|
|
|
//!
|
2019-02-21 12:24:42 +00:00
|
|
|
//! The most interesting modules here are `syntax_node` (which defines concrete
|
|
|
|
//! syntax tree) and `ast` (which defines abstract syntax tree on top of the
|
2019-06-15 07:24:02 +00:00
|
|
|
//! CST). The actual parser live in a separate `ra_parser` crate, though the
|
2019-02-21 12:24:42 +00:00
|
|
|
//! lexer lives in this crate.
|
2018-01-28 01:29:14 +00:00
|
|
|
//!
|
2019-02-21 14:35:45 +00:00
|
|
|
//! See `api_walkthrough` test in this file for a quick API tour!
|
|
|
|
//!
|
2019-02-21 12:24:42 +00:00
|
|
|
//! [RFC]: <https://github.com/rust-lang/rfcs/pull/2256>
|
|
|
|
//! [Swift]: <https://github.com/apple/swift/blob/13d593df6f359d0cb2fc81cfaac273297c539455/lib/Syntax/README.md>
|
2018-01-28 01:29:14 +00:00
|
|
|
|
2019-02-20 13:24:39 +00:00
|
|
|
mod syntax_node;
|
|
|
|
mod syntax_text;
|
|
|
|
mod syntax_error;
|
2019-02-20 12:47:32 +00:00
|
|
|
mod parsing;
|
2018-11-04 14:06:38 +00:00
|
|
|
mod validation;
|
2019-01-23 14:37:10 +00:00
|
|
|
mod ptr;
|
2019-02-20 13:24:39 +00:00
|
|
|
|
|
|
|
pub mod algo;
|
|
|
|
pub mod ast;
|
2019-03-21 17:05:12 +00:00
|
|
|
#[doc(hidden)]
|
|
|
|
pub mod fuzz;
|
2018-07-29 12:16:07 +00:00
|
|
|
|
2019-07-04 20:05:17 +00:00
|
|
|
use std::{fmt::Write, sync::Arc};
|
2019-05-05 08:31:27 +00:00
|
|
|
|
|
|
|
use ra_text_edit::AtomTextEdit;
|
|
|
|
|
|
|
|
use crate::syntax_node::GreenNode;
|
|
|
|
|
2018-10-15 16:55:32 +00:00
|
|
|
pub use crate::{
|
2018-08-25 08:40:17 +00:00
|
|
|
ast::AstNode,
|
2019-07-04 20:05:17 +00:00
|
|
|
parsing::{classify_literal, tokenize, Token},
|
|
|
|
ptr::{AstPtr, SyntaxNodePtr},
|
|
|
|
syntax_error::{Location, SyntaxError, SyntaxErrorKind},
|
|
|
|
syntax_node::{
|
|
|
|
Direction, InsertPosition, SyntaxElement, SyntaxNode, SyntaxToken, SyntaxTreeBuilder,
|
|
|
|
TreeArc, WalkEvent,
|
|
|
|
},
|
2019-02-20 13:16:14 +00:00
|
|
|
syntax_text::SyntaxText,
|
2018-07-29 12:16:07 +00:00
|
|
|
};
|
2019-07-04 20:05:17 +00:00
|
|
|
pub use ra_parser::SyntaxKind;
|
|
|
|
pub use ra_parser::T;
|
|
|
|
pub use rowan::{SmolStr, TextRange, TextUnit};
|
2018-07-29 12:16:07 +00:00
|
|
|
|
2019-05-05 08:31:27 +00:00
|
|
|
/// `Parse` is the result of the parsing: a syntax tree and a collection of
|
|
|
|
/// errors.
|
|
|
|
///
|
|
|
|
/// Note that we always produce a syntax tree, even for completely invalid
|
|
|
|
/// files.
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
|
|
pub struct Parse {
|
2019-07-12 16:41:13 +00:00
|
|
|
tree: TreeArc<SourceFile>,
|
|
|
|
errors: Arc<Vec<SyntaxError>>,
|
2019-05-05 08:31:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Parse {
|
2019-07-12 16:41:13 +00:00
|
|
|
pub fn tree(&self) -> &SourceFile {
|
|
|
|
&*self.tree
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn errors(&self) -> &[SyntaxError] {
|
|
|
|
&*self.errors
|
|
|
|
}
|
|
|
|
|
2019-05-05 08:31:27 +00:00
|
|
|
pub fn ok(self) -> Result<TreeArc<SourceFile>, Arc<Vec<SyntaxError>>> {
|
|
|
|
if self.errors.is_empty() {
|
|
|
|
Ok(self.tree)
|
|
|
|
} else {
|
|
|
|
Err(self.errors)
|
|
|
|
}
|
|
|
|
}
|
2019-05-28 13:59:22 +00:00
|
|
|
|
2019-05-28 14:34:28 +00:00
|
|
|
pub fn reparse(&self, edit: &AtomTextEdit) -> Parse {
|
|
|
|
self.incremental_reparse(edit).unwrap_or_else(|| self.full_reparse(edit))
|
|
|
|
}
|
|
|
|
|
2019-05-28 13:59:22 +00:00
|
|
|
pub fn debug_dump(&self) -> String {
|
|
|
|
let mut buf = self.tree.syntax().debug_dump();
|
|
|
|
for err in self.errors.iter() {
|
2019-05-29 07:12:08 +00:00
|
|
|
writeln!(buf, "error {:?}: {}", err.location(), err.kind()).unwrap();
|
2019-05-28 13:59:22 +00:00
|
|
|
}
|
|
|
|
buf
|
|
|
|
}
|
2019-05-28 14:34:28 +00:00
|
|
|
|
|
|
|
fn incremental_reparse(&self, edit: &AtomTextEdit) -> Option<Parse> {
|
|
|
|
// FIXME: validation errors are not handled here
|
|
|
|
parsing::incremental_reparse(self.tree.syntax(), edit, self.errors.to_vec()).map(
|
|
|
|
|(green_node, errors, _reparsed_range)| Parse {
|
|
|
|
tree: SourceFile::new(green_node),
|
|
|
|
errors: Arc::new(errors),
|
|
|
|
},
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn full_reparse(&self, edit: &AtomTextEdit) -> Parse {
|
|
|
|
let text = edit.apply(self.tree.syntax().text().to_string());
|
|
|
|
SourceFile::parse(&text)
|
|
|
|
}
|
2019-05-05 08:31:27 +00:00
|
|
|
}
|
2018-08-25 09:10:35 +00:00
|
|
|
|
2019-01-07 13:15:47 +00:00
|
|
|
/// `SourceFile` represents a parse tree for a single Rust file.
|
|
|
|
pub use crate::ast::SourceFile;
|
2018-08-25 08:40:17 +00:00
|
|
|
|
2019-01-07 13:15:47 +00:00
|
|
|
impl SourceFile {
|
2019-05-28 13:34:23 +00:00
|
|
|
fn new(green: GreenNode) -> TreeArc<SourceFile> {
|
2019-05-05 08:33:07 +00:00
|
|
|
let root = SyntaxNode::new(green);
|
2018-09-08 15:34:41 +00:00
|
|
|
if cfg!(debug_assertions) {
|
2019-02-21 12:51:22 +00:00
|
|
|
validation::validate_block_structure(&root);
|
2018-09-08 15:34:41 +00:00
|
|
|
}
|
2018-11-07 15:38:43 +00:00
|
|
|
assert_eq!(root.kind(), SyntaxKind::SOURCE_FILE);
|
2019-01-11 16:59:06 +00:00
|
|
|
TreeArc::cast(root)
|
2018-08-25 08:40:17 +00:00
|
|
|
}
|
2019-01-14 18:30:21 +00:00
|
|
|
|
2019-05-28 14:34:28 +00:00
|
|
|
pub fn parse(text: &str) -> Parse {
|
2019-05-28 14:09:45 +00:00
|
|
|
let (green, mut errors) = parsing::parse_text(text);
|
2019-05-28 13:59:22 +00:00
|
|
|
let tree = SourceFile::new(green);
|
2019-05-28 14:09:45 +00:00
|
|
|
errors.extend(validation::validate(&tree));
|
2019-05-28 13:59:22 +00:00
|
|
|
Parse { tree, errors: Arc::new(errors) }
|
|
|
|
}
|
2018-08-24 16:27:30 +00:00
|
|
|
}
|
2019-02-21 12:51:22 +00:00
|
|
|
|
2019-02-21 14:35:45 +00:00
|
|
|
/// This test does not assert anything and instead just shows off the crate's
|
|
|
|
/// API.
|
|
|
|
#[test]
|
|
|
|
fn api_walkthrough() {
|
|
|
|
use ast::{ModuleItemOwner, NameOwner};
|
|
|
|
|
|
|
|
let source_code = "
|
|
|
|
fn foo() {
|
|
|
|
1 + 1
|
|
|
|
}
|
|
|
|
";
|
|
|
|
// `SourceFile` is the main entry point.
|
|
|
|
//
|
2019-05-28 14:34:28 +00:00
|
|
|
// The `parse` method returns a `Parse` -- a pair of syntax tree and a list
|
|
|
|
// of errors. That is, syntax tree is constructed even in presence of errors.
|
|
|
|
let parse = SourceFile::parse(source_code);
|
|
|
|
assert!(parse.errors.is_empty());
|
2019-02-21 14:35:45 +00:00
|
|
|
|
|
|
|
// Due to the way ownership is set up, owned syntax Nodes always live behind
|
|
|
|
// a `TreeArc` smart pointer. `TreeArc` is roughly an `std::sync::Arc` which
|
|
|
|
// points to the whole file instead of an individual node.
|
2019-05-28 14:34:28 +00:00
|
|
|
let file: TreeArc<SourceFile> = parse.tree;
|
2019-02-21 14:35:45 +00:00
|
|
|
|
|
|
|
// `SourceFile` is the root of the syntax tree. We can iterate file's items:
|
|
|
|
let mut func = None;
|
|
|
|
for item in file.items() {
|
|
|
|
match item.kind() {
|
|
|
|
ast::ModuleItemKind::FnDef(f) => func = Some(f),
|
|
|
|
_ => unreachable!(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// The returned items are always references.
|
|
|
|
let func: &ast::FnDef = func.unwrap();
|
|
|
|
|
|
|
|
// All nodes implement `ToOwned` trait, with `Owned = TreeArc<Self>`.
|
|
|
|
// `to_owned` is a cheap operation: atomic increment.
|
|
|
|
let _owned_func: TreeArc<ast::FnDef> = func.to_owned();
|
|
|
|
|
|
|
|
// Each AST node has a bunch of getters for children. All getters return
|
|
|
|
// `Option`s though, to account for incomplete code. Some getters are common
|
|
|
|
// for several kinds of node. In this case, a trait like `ast::NameOwner`
|
|
|
|
// usually exists. By convention, all ast types should be used with `ast::`
|
|
|
|
// qualifier.
|
|
|
|
let name: Option<&ast::Name> = func.name();
|
|
|
|
let name = name.unwrap();
|
|
|
|
assert_eq!(name.text(), "foo");
|
|
|
|
|
|
|
|
// Let's get the `1 + 1` expression!
|
|
|
|
let block: &ast::Block = func.body().unwrap();
|
|
|
|
let expr: &ast::Expr = block.expr().unwrap();
|
|
|
|
|
|
|
|
// "Enum"-like nodes are represented using the "kind" pattern. It allows us
|
|
|
|
// to match exhaustively against all flavors of nodes, while maintaining
|
|
|
|
// internal representation flexibility. The drawback is that one can't write
|
|
|
|
// nested matches as one pattern.
|
|
|
|
let bin_expr: &ast::BinExpr = match expr.kind() {
|
|
|
|
ast::ExprKind::BinExpr(e) => e,
|
|
|
|
_ => unreachable!(),
|
|
|
|
};
|
|
|
|
|
|
|
|
// Besides the "typed" AST API, there's an untyped CST one as well.
|
|
|
|
// To switch from AST to CST, call `.syntax()` method:
|
|
|
|
let expr_syntax: &SyntaxNode = expr.syntax();
|
|
|
|
|
|
|
|
// Note how `expr` and `bin_expr` are in fact the same node underneath:
|
|
|
|
assert!(std::ptr::eq(expr_syntax, bin_expr.syntax()));
|
|
|
|
|
|
|
|
// To go from CST to AST, `AstNode::cast` function is used:
|
|
|
|
let expr = match ast::Expr::cast(expr_syntax) {
|
|
|
|
Some(e) => e,
|
|
|
|
None => unreachable!(),
|
|
|
|
};
|
|
|
|
|
|
|
|
// Note how expr is also a reference!
|
|
|
|
let expr: &ast::Expr = expr;
|
|
|
|
|
|
|
|
// This is possible because the underlying representation is the same:
|
|
|
|
assert_eq!(
|
|
|
|
expr as *const ast::Expr as *const u8,
|
|
|
|
expr_syntax as *const SyntaxNode as *const u8
|
|
|
|
);
|
|
|
|
|
|
|
|
// The two properties each syntax node has is a `SyntaxKind`:
|
|
|
|
assert_eq!(expr_syntax.kind(), SyntaxKind::BIN_EXPR);
|
|
|
|
|
|
|
|
// And text range:
|
|
|
|
assert_eq!(expr_syntax.range(), TextRange::from_to(32.into(), 37.into()));
|
|
|
|
|
|
|
|
// You can get node's text as a `SyntaxText` object, which will traverse the
|
|
|
|
// tree collecting token's text:
|
|
|
|
let text: SyntaxText<'_> = expr_syntax.text();
|
|
|
|
assert_eq!(text.to_string(), "1 + 1");
|
|
|
|
|
|
|
|
// There's a bunch of traversal methods on `SyntaxNode`:
|
|
|
|
assert_eq!(expr_syntax.parent(), Some(block.syntax()));
|
2019-05-15 12:35:47 +00:00
|
|
|
assert_eq!(block.syntax().first_child_or_token().map(|it| it.kind()), Some(T!['{']));
|
2019-03-30 10:25:53 +00:00
|
|
|
assert_eq!(
|
|
|
|
expr_syntax.next_sibling_or_token().map(|it| it.kind()),
|
|
|
|
Some(SyntaxKind::WHITESPACE)
|
|
|
|
);
|
2019-02-21 14:35:45 +00:00
|
|
|
|
|
|
|
// As well as some iterator helpers:
|
|
|
|
let f = expr_syntax.ancestors().find_map(ast::FnDef::cast);
|
|
|
|
assert_eq!(f, Some(&*func));
|
2019-05-15 12:35:47 +00:00
|
|
|
assert!(expr_syntax.siblings_with_tokens(Direction::Next).any(|it| it.kind() == T!['}']));
|
2019-02-21 14:35:45 +00:00
|
|
|
assert_eq!(
|
2019-03-30 10:25:53 +00:00
|
|
|
expr_syntax.descendants_with_tokens().count(),
|
2019-02-21 14:35:45 +00:00
|
|
|
8, // 5 tokens `1`, ` `, `+`, ` `, `!`
|
|
|
|
// 2 child literal expressions: `1`, `1`
|
|
|
|
// 1 the node itself: `1 + 1`
|
|
|
|
);
|
|
|
|
|
|
|
|
// There's also a `preorder` method with a more fine-grained iteration control:
|
|
|
|
let mut buf = String::new();
|
|
|
|
let mut indent = 0;
|
2019-03-30 10:25:53 +00:00
|
|
|
for event in expr_syntax.preorder_with_tokens() {
|
2019-02-21 14:35:45 +00:00
|
|
|
match event {
|
|
|
|
WalkEvent::Enter(node) => {
|
2019-03-30 10:25:53 +00:00
|
|
|
let text = match node {
|
|
|
|
SyntaxElement::Node(it) => it.text().to_string(),
|
|
|
|
SyntaxElement::Token(it) => it.text().to_string(),
|
|
|
|
};
|
|
|
|
buf += &format!("{:indent$}{:?} {:?}\n", " ", text, node.kind(), indent = indent);
|
2019-02-21 14:35:45 +00:00
|
|
|
indent += 2;
|
|
|
|
}
|
|
|
|
WalkEvent::Leave(_) => indent -= 2,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert_eq!(indent, 0);
|
|
|
|
assert_eq!(
|
|
|
|
buf.trim(),
|
|
|
|
r#"
|
|
|
|
"1 + 1" BIN_EXPR
|
|
|
|
"1" LITERAL
|
|
|
|
"1" INT_NUMBER
|
|
|
|
" " WHITESPACE
|
|
|
|
"+" PLUS
|
|
|
|
" " WHITESPACE
|
|
|
|
"1" LITERAL
|
|
|
|
"1" INT_NUMBER
|
|
|
|
"#
|
|
|
|
.trim()
|
|
|
|
);
|
|
|
|
|
|
|
|
// To recursively process the tree, there are three approaches:
|
|
|
|
// 1. explicitly call getter methods on AST nodes.
|
|
|
|
// 2. use descendants and `AstNode::cast`.
|
|
|
|
// 3. use descendants and the visitor.
|
|
|
|
//
|
|
|
|
// Here's how the first one looks like:
|
|
|
|
let exprs_cast: Vec<String> = file
|
|
|
|
.syntax()
|
|
|
|
.descendants()
|
|
|
|
.filter_map(ast::Expr::cast)
|
|
|
|
.map(|expr| expr.syntax().text().to_string())
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
// An alternative is to use a visitor. The visitor does not do traversal
|
|
|
|
// automatically (so it's more akin to a generic lambda) and is constructed
|
|
|
|
// from closures. This seems more flexible than a single generated visitor
|
|
|
|
// trait.
|
|
|
|
use algo::visit::{visitor, Visitor};
|
|
|
|
let mut exprs_visit = Vec::new();
|
|
|
|
for node in file.syntax().descendants() {
|
|
|
|
if let Some(result) =
|
|
|
|
visitor().visit::<ast::Expr, _>(|expr| expr.syntax().text().to_string()).accept(node)
|
|
|
|
{
|
|
|
|
exprs_visit.push(result);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert_eq!(exprs_cast, exprs_visit);
|
|
|
|
}
|