From 9ce4db545efba697f20ab5cecbefc0589c7146ca Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 31 Dec 2017 17:54:33 +0300 Subject: [PATCH] Parser: groundwork --- src/lexer/mod.rs | 11 +++++++ src/lib.rs | 4 +-- src/parser/mod.rs | 5 ++++ src/text.rs | 59 +++++++++++++++++++++++++++++++++++++ src/tree.rs | 74 +++++++++++++++++++++++++++++++++++++++++++++-- tests/lexer.rs | 14 +-------- 6 files changed, 150 insertions(+), 17 deletions(-) create mode 100644 src/parser/mod.rs diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index f46746bee4..7c42597630 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -16,6 +16,17 @@ use self::strings::{is_string_literal_start, scan_char, scan_byte_char_or_string mod comments; use self::comments::{scan_shebang, scan_comment}; +pub fn tokenize(text: &str) -> Vec { + let mut text = text; + let mut acc = Vec::new(); + while !text.is_empty() { + let token = next_token(text); + acc.push(token); + let len: u32 = token.len.into(); + text = &text[len as usize..]; + } + acc +} pub fn next_token(text: &str) -> Token { assert!(!text.is_empty()); let mut ptr = Ptr::new(text); diff --git a/src/lib.rs b/src/lib.rs index 3b9dbc8f74..82213e2b39 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,6 @@ mod tree; mod lexer; pub mod syntax_kinds; -pub use text::TextUnit; +pub use text::{TextUnit, TextRange}; pub use tree::{SyntaxKind, Token}; -pub use lexer::next_token; +pub use lexer::{next_token, tokenize}; diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000000..da902e2b7e --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,5 @@ +use {Token, File}; + +pub fn parse(tokens: &[Token]) -> File { + unimplemented!() +} \ No newline at end of file diff --git a/src/text.rs b/src/text.rs index c3ef1ac8ea..ee0dc83983 100644 --- a/src/text.rs +++ b/src/text.rs @@ -56,4 +56,63 @@ impl ops::SubAssign for TextUnit { fn sub_assign(&mut self, rhs: TextUnit) { self.0 -= rhs.0 } +} + + +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct TextRange { + start: TextUnit, + end: TextUnit, +} + +impl fmt::Debug for TextRange { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + ::fmt(self, f) + } +} + +impl fmt::Display for TextRange { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "[{}; {})", self.start(), self.end()) + } +} + + +impl TextRange { + pub fn empty() -> TextRange { + TextRange::from_to(TextUnit::new(0), TextUnit::new(0)) + } + + pub fn from_to(from: TextUnit, to: TextUnit) -> TextRange { + assert!(from <= to, "Invalid text range [{}; {})", from, to); + TextRange { start: from, end: to } + } + + pub fn from_len(from: TextUnit, len: TextUnit) -> TextRange { + TextRange::from_to(from, from + len) + } + + pub fn start(&self) -> TextUnit { + self.start + } + + pub fn end(&self) -> TextUnit { + self.end + } + + pub fn len(&self) -> TextUnit { + self.end - self.start + } + + pub fn is_empty(&self) -> bool { + self.start() == self.end() + } +} + +impl ops::Index for str { + type Output = str; + + fn index(&self, index: TextRange) -> &str { + &self[index.start().0 as usize..index.end().0 as usize] + } } \ No newline at end of file diff --git a/src/tree.rs b/src/tree.rs index 0924f38d0d..2ac25e795e 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -1,4 +1,4 @@ -use text::{TextUnit}; +use text::{TextUnit, TextRange}; use syntax_kinds::syntax_info; use std::fmt; @@ -28,4 +28,74 @@ pub(crate) struct SyntaxInfo { pub struct Token { pub kind: SyntaxKind, pub len: TextUnit, -} \ No newline at end of file +} + +pub struct File { + text: String, + nodes: Vec, +} + +impl File { + pub fn root<'f>(&'f self) -> Node<'f> { + assert!(!self.nodes.is_empty()); + Node { file: self, idx: NodeIdx(0) } + } +} + +#[derive(Clone, Copy)] +pub struct Node<'f> { + file: &'f File, + idx: NodeIdx, +} + +impl<'f> Node<'f> { + pub fn kind(&self) -> SyntaxKind { + self.data().kind + } + + pub fn text(&self) -> &'f str { + let range = self.data().range; + &self.file.text.as_str()[range] + } + + pub fn parent(&self) -> Option> { + self.as_node(self.data().parent) + } + + pub fn children(&self) -> Children<'f> { + Children { next: self.as_node(self.data().first_child) } + } + + fn data(&self) -> &'f NodeData { + &self.file.nodes[self.idx.0 as usize] + } + + fn as_node(&self, idx: Option) -> Option> { + idx.map(|idx| Node { file: self.file, idx }) + } +} + +pub struct Children<'f> { + next: Option>, +} + +impl<'f> Iterator for Children<'f> { + type Item = Node<'f>; + + fn next(&mut self) -> Option> { + let next = self.next; + self.next = next.and_then(|node| node.as_node(node.data().next_sibling)); + next + } +} + +#[derive(Clone, Copy)] +struct NodeIdx(u32); + +struct NodeData { + kind: SyntaxKind, + range: TextRange, + parent: Option, + first_child: Option, + next_sibling: Option, +} diff --git a/tests/lexer.rs b/tests/lexer.rs index 6a9bab66bd..beca19c24c 100644 --- a/tests/lexer.rs +++ b/tests/lexer.rs @@ -7,7 +7,7 @@ use std::path::{PathBuf, Path}; use std::fs::read_dir; use std::fmt::Write; -use libsyntax2::{Token, next_token}; +use libsyntax2::{Token, tokenize}; #[test] fn lexer_tests() { @@ -53,18 +53,6 @@ fn lexer_test_case(path: &Path) { assert_diff!(expected, actual, "\n", 0) } -fn tokenize(text: &str) -> Vec { - let mut text = text; - let mut acc = Vec::new(); - while !text.is_empty() { - let token = next_token(text); - acc.push(token); - let len: u32 = token.len.into(); - text = &text[len as usize..]; - } - acc -} - fn dump_tokens(tokens: &[Token], text: &str) -> String { let mut acc = String::new(); let mut offset = 0;