mirror of
https://github.com/rust-lang/rust-analyzer
synced 2024-12-25 20:43:21 +00:00
Minor tweaks
This commit is contained in:
parent
80c3e57f96
commit
2c74af7ddc
1 changed files with 128 additions and 119 deletions
233
rfc.md
233
rfc.md
|
@ -38,8 +38,8 @@ be `0.1.0`.
|
||||||
|
|
||||||
## Reusability
|
## Reusability
|
||||||
|
|
||||||
In theory, parsing can be a pure function, which takes a `&str` as an
|
In theory, the parser can be a pure function, which takes a `&str` as
|
||||||
input, and produces a `ParseTree` as an output.
|
an input, and produces a `ParseTree` as an output.
|
||||||
|
|
||||||
This is great for reusability: for example, you can compile this
|
This is great for reusability: for example, you can compile this
|
||||||
function to WASM and use it for fast client-side validation of syntax
|
function to WASM and use it for fast client-side validation of syntax
|
||||||
|
@ -64,7 +64,7 @@ Unfortunately, the current libsyntax is far from this ideal. For
|
||||||
example, even the lexer makes use of the `FileMap` which is
|
example, even the lexer makes use of the `FileMap` which is
|
||||||
essentially a global state of the compiler which represents all know
|
essentially a global state of the compiler which represents all know
|
||||||
files. As a data point, it turned out to be easier to move `rustfmt`
|
files. As a data point, it turned out to be easier to move `rustfmt`
|
||||||
inside of main `rustc` repository than to move libsyntax outside!
|
into the main `rustc` repository than to move libsyntax outside!
|
||||||
|
|
||||||
|
|
||||||
## IDE support
|
## IDE support
|
||||||
|
@ -86,9 +86,8 @@ necessary to correctly handle certain code-editing actions like
|
||||||
autoindentation or joining lines. IDE also must be able to produce
|
autoindentation or joining lines. IDE also must be able to produce
|
||||||
partial parse trees when some input is missing or invalid.
|
partial parse trees when some input is missing or invalid.
|
||||||
|
|
||||||
Currently rustc uses the AST approach, which preserves the source code
|
Currently rustc uses the AST approach, and preserves some of the
|
||||||
information to some extent by storing spans in the AST.
|
source code information in the form of spans in the AST.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Guide-level explanation
|
# Guide-level explanation
|
||||||
|
@ -114,8 +113,8 @@ compiler.
|
||||||
## Untyped Tree
|
## Untyped Tree
|
||||||
|
|
||||||
The main idea is to store the minimal amount of information in the
|
The main idea is to store the minimal amount of information in the
|
||||||
tree itself, and instead lean heavily on the source code string for
|
tree itself, and instead lean heavily on the source code for the
|
||||||
the actual data about identifier names, constant values etc.
|
actual data about identifier names, constant values etc.
|
||||||
|
|
||||||
All nodes in the tree are of the same type and store a constant for
|
All nodes in the tree are of the same type and store a constant for
|
||||||
the syntactic category of the element and a range in the source code.
|
the syntactic category of the element and a range in the source code.
|
||||||
|
@ -129,70 +128,70 @@ syntactic categories
|
||||||
pub struct NodeKind(u16);
|
pub struct NodeKind(u16);
|
||||||
|
|
||||||
pub struct File {
|
pub struct File {
|
||||||
text: String,
|
text: String,
|
||||||
nodes: Vec<NodeData>,
|
nodes: Vec<NodeData>,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct NodeData {
|
struct NodeData {
|
||||||
kind: NodeKind,
|
kind: NodeKind,
|
||||||
range: (u32, u32),
|
range: (u32, u32),
|
||||||
parent: Option<u32>,
|
parent: Option<u32>,
|
||||||
first_child: Option<u32>,
|
first_child: Option<u32>,
|
||||||
next_sibling: Option<u32>,
|
next_sibling: Option<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
pub struct Node<'f> {
|
pub struct Node<'f> {
|
||||||
file: &'f File,
|
file: &'f File,
|
||||||
idx: u32,
|
idx: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Children<'f> {
|
pub struct Children<'f> {
|
||||||
next: Option<Node<'f>>,
|
next: Option<Node<'f>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl File {
|
impl File {
|
||||||
pub fn root<'f>(&'f self) -> Node<'f> {
|
pub fn root<'f>(&'f self) -> Node<'f> {
|
||||||
assert!(!self.nodes.is_empty());
|
assert!(!self.nodes.is_empty());
|
||||||
Node { file: self, idx: 0 }
|
Node { file: self, idx: 0 }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'f> Node<'f> {
|
impl<'f> Node<'f> {
|
||||||
pub fn kind(&self) -> NodeKind {
|
pub fn kind(&self) -> NodeKind {
|
||||||
self.data().kind
|
self.data().kind
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn text(&self) -> &'f str {
|
pub fn text(&self) -> &'f str {
|
||||||
let (start, end) = self.data().range;
|
let (start, end) = self.data().range;
|
||||||
&self.file.text[start as usize..end as usize]
|
&self.file.text[start as usize..end as usize]
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parent(&self) -> Option<Node<'f>> {
|
pub fn parent(&self) -> Option<Node<'f>> {
|
||||||
self.as_node(self.data().parent)
|
self.as_node(self.data().parent)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn children(&self) -> Children<'f> {
|
pub fn children(&self) -> Children<'f> {
|
||||||
Children { next: self.as_node(self.data().first_child) }
|
Children { next: self.as_node(self.data().first_child) }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn data(&self) -> &'f NodeData {
|
fn data(&self) -> &'f NodeData {
|
||||||
&self.file.nodes[self.idx as usize]
|
&self.file.nodes[self.idx as usize]
|
||||||
}
|
}
|
||||||
|
|
||||||
fn as_node(&self, idx: Option<u32>) -> Option<Node<'f>> {
|
fn as_node(&self, idx: Option<u32>) -> Option<Node<'f>> {
|
||||||
idx.map(|idx| Node { file: self.file, idx })
|
idx.map(|idx| Node { file: self.file, idx })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'f> Iterator for Children<'f> {
|
impl<'f> Iterator for Children<'f> {
|
||||||
type Item = Node<'f>;
|
type Item = Node<'f>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Node<'f>> {
|
fn next(&mut self) -> Option<Node<'f>> {
|
||||||
let next = self.next;
|
let next = self.next;
|
||||||
self.next = next.and_then(|node| node.as_node(node.data().next_sibling));
|
self.next = next.and_then(|node| node.as_node(node.data().next_sibling));
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub const ERROR: NodeKind = NodeKind(0);
|
pub const ERROR: NodeKind = NodeKind(0);
|
||||||
|
@ -215,10 +214,10 @@ Here is a rust snippet and the corresponding parse tree:
|
||||||
|
|
||||||
```rust
|
```rust
|
||||||
struct Foo {
|
struct Foo {
|
||||||
field1: u32,
|
field1: u32,
|
||||||
&
|
&
|
||||||
// non-doc comment
|
// non-doc comment
|
||||||
field2:
|
field2:
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -227,30 +226,30 @@ struct Foo {
|
||||||
FILE
|
FILE
|
||||||
STRUCT_DEF
|
STRUCT_DEF
|
||||||
STRUCT_KW
|
STRUCT_KW
|
||||||
WHITESPACE
|
WHITESPACE
|
||||||
IDENT
|
IDENT
|
||||||
WHITESPACE
|
WHITESPACE
|
||||||
L_CURLY
|
L_CURLY
|
||||||
WHITESPACE
|
WHITESPACE
|
||||||
FIELD_DEF
|
FIELD_DEF
|
||||||
IDENT
|
IDENT
|
||||||
COLON
|
COLON
|
||||||
WHITESPACE
|
WHITESPACE
|
||||||
TYPE_REF
|
TYPE_REF
|
||||||
IDENT
|
IDENT
|
||||||
COMMA
|
COMMA
|
||||||
WHITESPACE
|
WHITESPACE
|
||||||
ERROR
|
ERROR
|
||||||
AMP
|
AMP
|
||||||
WHITESPACE
|
WHITESPACE
|
||||||
FIELD_DEF
|
FIELD_DEF
|
||||||
LINE_COMMENT
|
LINE_COMMENT
|
||||||
WHITESPACE
|
WHITESPACE
|
||||||
IDENT
|
IDENT
|
||||||
COLON
|
COLON
|
||||||
ERROR
|
ERROR
|
||||||
WHITESPACE
|
WHITESPACE
|
||||||
R_CURLY
|
R_CURLY
|
||||||
```
|
```
|
||||||
|
|
||||||
Note several features of the tree:
|
Note several features of the tree:
|
||||||
|
@ -273,23 +272,27 @@ Note several features of the tree:
|
||||||
It's hard to work with this raw parse tree, because it is untyped:
|
It's hard to work with this raw parse tree, because it is untyped:
|
||||||
node containing a struct definition has the same API as the node for
|
node containing a struct definition has the same API as the node for
|
||||||
the struct field. But it's possible to add a strongly typed layer on
|
the struct field. But it's possible to add a strongly typed layer on
|
||||||
top of this raw tree, and get a zero-cost typed AST. Here is an
|
top of this raw tree, and get a zero-cost AST. Here is an example
|
||||||
example which adds type-safe wrappers for structs and fields:
|
which adds type-safe wrappers for structs and fields:
|
||||||
|
|
||||||
```rust
|
```rust
|
||||||
|
// generic infrastructure
|
||||||
|
|
||||||
pub trait AstNode<'f>: Copy + 'f {
|
pub trait AstNode<'f>: Copy + 'f {
|
||||||
fn new(node: Node<'f>) -> Option<Self>;
|
fn new(node: Node<'f>) -> Option<Self>;
|
||||||
fn node(&self) -> Node<'f>;
|
fn node(&self) -> Node<'f>;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn child_of_kind<'f>(node: Node<'f>, kind: NodeKind) -> Option<Node<'f>> {
|
pub fn child_of_kind<'f>(node: Node<'f>, kind: NodeKind) -> Option<Node<'f>> {
|
||||||
node.children().find(|child| child.kind() == kind)
|
node.children().find(|child| child.kind() == kind)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn ast_children<'f, A: AstNode<'f>>(node: Node<'f>) -> Box<Iterator<Item=A> + 'f> {
|
pub fn ast_children<'f, A: AstNode<'f>>(node: Node<'f>) -> Box<Iterator<Item=A> + 'f> {
|
||||||
Box::new(node.children().filter_map(A::new))
|
Box::new(node.children().filter_map(A::new))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AST elements, specific to Rust
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
pub struct StructDef<'f>(Node<'f>);
|
pub struct StructDef<'f>(Node<'f>);
|
||||||
|
|
||||||
|
@ -300,48 +303,51 @@ pub struct FieldDef<'f>(Node<'f>);
|
||||||
pub struct TypeRef<'f>(Node<'f>);
|
pub struct TypeRef<'f>(Node<'f>);
|
||||||
|
|
||||||
pub trait NameOwner<'f>: AstNode<'f> {
|
pub trait NameOwner<'f>: AstNode<'f> {
|
||||||
fn name_ident(&self) -> Node<'f> {
|
fn name_ident(&self) -> Node<'f> {
|
||||||
child_of_kind(self.node(), IDENT).unwrap()
|
child_of_kind(self.node(), IDENT).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn name(&self) -> &'f str { self.name_ident().text() }
|
fn name(&self) -> &'f str { self.name_ident().text() }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
impl<'f> AstNode<'f> for StructDef<'f> {
|
impl<'f> AstNode<'f> for StructDef<'f> {
|
||||||
fn new(node: Node<'f>) -> Option<Self> {
|
fn new(node: Node<'f>) -> Option<Self> {
|
||||||
if node.kind() == STRUCT_DEF { Some(StructDef(node)) } else { None }
|
if node.kind() == STRUCT_DEF { Some(StructDef(node)) } else { None }
|
||||||
}
|
}
|
||||||
fn node(&self) -> Node<'f> { self.0 }
|
fn node(&self) -> Node<'f> { self.0 }
|
||||||
}
|
|
||||||
|
|
||||||
impl<'f> AstNode<'f> for FieldDef<'f> {
|
|
||||||
fn new(node: Node<'f>) -> Option<Self> {
|
|
||||||
if node.kind() == FIELD_DEF { Some(FieldDef(node)) } else { None }
|
|
||||||
}
|
|
||||||
fn node(&self) -> Node<'f> { self.0 }
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'f> AstNode<'f> for TypeRef<'f> {
|
|
||||||
fn new(node: Node<'f>) -> Option<Self> {
|
|
||||||
if node.kind() == TYPE_REF { Some(TypeRef(node)) } else { None }
|
|
||||||
}
|
|
||||||
fn node(&self) -> Node<'f> { self.0 }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'f> NameOwner<'f> for StructDef<'f> {}
|
impl<'f> NameOwner<'f> for StructDef<'f> {}
|
||||||
impl<'f> NameOwner<'f> for FieldDef<'f> {}
|
|
||||||
|
|
||||||
impl<'f> StructDef<'f> {
|
impl<'f> StructDef<'f> {
|
||||||
pub fn fields(&self) -> Box<Iterator<Item=FieldDef<'f>> + 'f> {
|
pub fn fields(&self) -> Box<Iterator<Item=FieldDef<'f>> + 'f> {
|
||||||
ast_children(self.node())
|
ast_children(self.node())
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
impl<'f> AstNode<'f> for FieldDef<'f> {
|
||||||
|
fn new(node: Node<'f>) -> Option<Self> {
|
||||||
|
if node.kind() == FIELD_DEF { Some(FieldDef(node)) } else { None }
|
||||||
|
}
|
||||||
|
fn node(&self) -> Node<'f> { self.0 }
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'f> FieldDef<'f> {
|
impl<'f> FieldDef<'f> {
|
||||||
pub fn type_ref(&self) -> Option<TypeRef<'f>> {
|
pub fn type_ref(&self) -> Option<TypeRef<'f>> {
|
||||||
ast_children(self.node()).next()
|
ast_children(self.node()).next()
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'f> NameOwner<'f> for FieldDef<'f> {}
|
||||||
|
|
||||||
|
|
||||||
|
impl<'f> AstNode<'f> for TypeRef<'f> {
|
||||||
|
fn new(node: Node<'f>) -> Option<Self> {
|
||||||
|
if node.kind() == TYPE_REF { Some(TypeRef(node)) } else { None }
|
||||||
|
}
|
||||||
|
fn node(&self) -> Node<'f> { self.0 }
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -371,9 +377,11 @@ plan is suggested:
|
||||||
* RFC discussion about the theoretical feasibility of the proposal.
|
* RFC discussion about the theoretical feasibility of the proposal.
|
||||||
|
|
||||||
* Implementation of the proposal as a completely separate crates.io
|
* Implementation of the proposal as a completely separate crates.io
|
||||||
crate.
|
crate, by refactoring existing libsyntax source code to produce a
|
||||||
|
new tree.
|
||||||
|
|
||||||
* A prototype implementation of the macro expansion on top of the new sytnax tree.
|
* A prototype implementation of the macro expansion on top of the new
|
||||||
|
sytnax tree.
|
||||||
|
|
||||||
* Additional round of discussion/RFC about merging with the mainline
|
* Additional round of discussion/RFC about merging with the mainline
|
||||||
compiler.
|
compiler.
|
||||||
|
@ -392,6 +400,7 @@ plan is suggested:
|
||||||
- Incrementally add more information about source code to the current AST.
|
- Incrementally add more information about source code to the current AST.
|
||||||
- Move the current libsyntax to crates.io as is.
|
- Move the current libsyntax to crates.io as is.
|
||||||
- Explore alternative representations for the parse tree.
|
- Explore alternative representations for the parse tree.
|
||||||
|
- Use parser generator instead of hand written parser.
|
||||||
|
|
||||||
# Unresolved questions
|
# Unresolved questions
|
||||||
[unresolved]: #unresolved-questions
|
[unresolved]: #unresolved-questions
|
||||||
|
|
Loading…
Reference in a new issue