From 6ff019c25f027be1bf2896ce82659dc8d99515f8 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 22 Dec 2017 16:28:04 +0300 Subject: [PATCH] Add minimal syntax tree implementation --- minirust.rs | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++ rfc.md | 22 +++++++++++++++-- 2 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 minirust.rs diff --git a/minirust.rs b/minirust.rs new file mode 100644 index 0000000000..d92c03bea8 --- /dev/null +++ b/minirust.rs @@ -0,0 +1,68 @@ +pub struct NodeKind(u16); + +pub struct File { + text: String, + nodes: Vec, +} + +struct NodeData { + kind: NodeKind, + range: (u32, u32), + parent: Option, + first_child: Option, + next_sibling: Option, +} + +#[derive(Clone, Copy)] +pub struct Node<'f> { + file: &'f File, + idx: u32, +} + +pub struct Children<'f> { + next: Option>, +} + +impl File { + pub fn root<'f>(&'f self) -> Node<'f> { + assert!(!self.nodes.is_empty()); + Node { file: self, idx: 0 } + } +} + +impl<'f> Node<'f> { + pub fn kind(&self) -> NodeKind { + self.data().kind + } + + pub fn text(&self) -> &'f str { + let (start, end) = self.data().range; + &self.file.text[start as usize..end as usize] + } + + pub fn parent(&self) -> Option> { + self.as_node(self.data().parent) + } + + pub fn children(&self) -> Children<'f> { + Children { next: self.as_node(self.data().first_child) } + } + + fn data(&self) -> &'f NodeData { + &self.file.nodes[self.idx as usize] + } + + fn as_node(&self, idx: Option) -> Option> { + idx.map(|idx| Node { file: self.file, idx }) + } +} + +impl<'f> Iterator for Children<'f> { + type Item = Node<'f>; + + fn next(&mut self) -> Option> { + let next = self.next; + self.next = next.and_then(|node| node.as_node(node.data().next_sibling)); + next + } +} diff --git a/rfc.md b/rfc.md index 9b7c799911..1476cbaf21 100644 --- a/rfc.md +++ b/rfc.md @@ -80,7 +80,10 @@ simpler ones. In contrast, for IDEs it is crucial to have a lossless view of the source code because, for example, it's important to preserve comments -during refactorings. +during refactorings. Ideally, IDEs should be able to incrementally +relex and reparse the file as the user types, because syntax tree is +necessary to correctly handle certain code-editing actions like +autoindentation or joining lines. Currently rustc uses the AST approach, which preserves the source code information to some extent by storing spans in the AST. @@ -98,7 +101,7 @@ Not applicable. This section proposes a new syntax tree data structure, which should be suitable for both compiler and IDE. It is heavily inspired by [PSI] -data structure which used in [IntelliJ] based IDEs and in the Kotlin +data structure which used in [IntelliJ] based IDEs and in the [Kotlin] compiler. @@ -107,6 +110,21 @@ compiler. [Kotlin]: https://kotlinlang.org/ +The main idea is to store the minimal amount of information in the +tree itself, and instead lean heavily on the source code string for +the actual data about identifier names, constant values etc. + +All nodes in the tree are of the same type and store a constant for +the syntactic category of the element and a range in the source code. + +Here is a minimal implementation of this data structure: + + +```Rust +``` + + + # Drawbacks [drawbacks]: #drawbacks