Parser: groundwork

2024-12-26 13:03:31 +00:00 · 2017-12-31 17:54:33 +03:00 · 2017-12-31 17:54:33 +03:00 · 9ce4db545e
commit 9ce4db545e
parent 98a58bf806
6 changed files with 150 additions and 17 deletions
--- a/src/lexer/mod.rs
+++ b/src/lexer/mod.rs
@ -16,6 +16,17 @@ use self::strings::{is_string_literal_start, scan_char, scan_byte_char_or_string
 mod comments;
 use self::comments::{scan_shebang, scan_comment};
 pub fn tokenize(text: &str) -> Vec<Token> {
    let mut text = text;
    let mut acc = Vec::new();
    while !text.is_empty() {
        let token = next_token(text);
        acc.push(token);
        let len: u32 = token.len.into();
        text = &text[len as usize..];
    }
    acc
 }
 pub fn next_token(text: &str) -> Token {
    assert!(!text.is_empty());
    let mut ptr = Ptr::new(text);
--- a/src/lib.rs
+++ b/src/lib.rs
@ -5,6 +5,6 @@ mod tree;
 mod lexer;
 pub mod syntax_kinds;
-pub use text::TextUnit;
+pub use text::{TextUnit, TextRange};
 pub use tree::{SyntaxKind, Token};
-pub use lexer::next_token;
+pub use lexer::{next_token, tokenize};
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@ -0,0 +1,5 @@
 use {Token, File};
 pub fn parse(tokens: &[Token]) -> File {
    unimplemented!()
 }
--- a/src/text.rs
+++ b/src/text.rs
@ -57,3 +57,62 @@ impl ops::SubAssign<TextUnit> for TextUnit {
        self.0 -= rhs.0
    }
 }
 #[derive(Clone, Copy, PartialEq, Eq)]
 pub struct TextRange {
    start: TextUnit,
    end: TextUnit,
 }
 impl fmt::Debug for TextRange {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        <Self as fmt::Display>::fmt(self, f)
    }
 }
 impl fmt::Display for TextRange {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "[{}; {})", self.start(), self.end())
    }
 }
 impl TextRange {
    pub fn empty() -> TextRange {
        TextRange::from_to(TextUnit::new(0), TextUnit::new(0))
    }
    pub fn from_to(from: TextUnit, to: TextUnit) -> TextRange {
        assert!(from <= to, "Invalid text range [{}; {})", from, to);
        TextRange { start: from, end: to }
    }
    pub fn from_len(from: TextUnit, len: TextUnit) -> TextRange {
        TextRange::from_to(from, from + len)
    }
    pub fn start(&self) -> TextUnit {
        self.start
    }
    pub fn end(&self) -> TextUnit {
        self.end
    }
    pub fn len(&self) -> TextUnit {
        self.end - self.start
    }
    pub fn is_empty(&self) -> bool {
        self.start() == self.end()
    }
 }
 impl ops::Index<TextRange> for str {
    type Output = str;
    fn index(&self, index: TextRange) -> &str {
        &self[index.start().0 as usize..index.end().0 as usize]
    }
 }
--- a/src/tree.rs
+++ b/src/tree.rs
@ -1,4 +1,4 @@
-use text::{TextUnit};
+use text::{TextUnit, TextRange};
 use syntax_kinds::syntax_info;
 use std::fmt;
@ -29,3 +29,73 @@ pub struct Token {
    pub kind: SyntaxKind,
    pub len: TextUnit,
 }
 pub struct File {
 	text: String,
 	nodes: Vec<NodeData>,
 }
 impl File {
 	pub fn root<'f>(&'f self) -> Node<'f> {
 		assert!(!self.nodes.is_empty());
 		Node { file: self, idx: NodeIdx(0) }
 	}
 }
 #[derive(Clone, Copy)]
 pub struct Node<'f> {
 	file: &'f File,
 	idx: NodeIdx,
 }
 impl<'f> Node<'f> {
 	pub fn kind(&self) -> SyntaxKind {
 		self.data().kind
 	}
 	pub fn text(&self) -> &'f str {
 		let range = self.data().range;
 		&self.file.text.as_str()[range]
 	}
 	pub fn parent(&self) -> Option<Node<'f>> {
 		self.as_node(self.data().parent)
 	}
 	pub fn children(&self) -> Children<'f> {
 		Children { next: self.as_node(self.data().first_child) }
 	}
 	fn data(&self) -> &'f NodeData {
 		&self.file.nodes[self.idx.0 as usize]
 	}
 	fn as_node(&self, idx: Option<NodeIdx>) -> Option<Node<'f>> {
 		idx.map(|idx| Node { file: self.file, idx })
 	}
 }
 pub struct Children<'f> {
 	next: Option<Node<'f>>,
 }
 impl<'f> Iterator for Children<'f> {
 	type Item = Node<'f>;
 	fn next(&mut self) -> Option<Node<'f>> {
 		let next = self.next;
 		self.next = next.and_then(|node| node.as_node(node.data().next_sibling));
 		next
 	}
 }
 #[derive(Clone, Copy)]
 struct NodeIdx(u32);
 struct NodeData {
 	kind: SyntaxKind,
 	range: TextRange,
 	parent: Option<NodeIdx>,
 	first_child: Option<NodeIdx>,
 	next_sibling: Option<NodeIdx>,
 }
--- a/tests/lexer.rs
+++ b/tests/lexer.rs
@ -7,7 +7,7 @@ use std::path::{PathBuf, Path};
 use std::fs::read_dir;
 use std::fmt::Write;
-use libsyntax2::{Token, next_token};
+use libsyntax2::{Token, tokenize};
 #[test]
 fn lexer_tests() {
@ -53,18 +53,6 @@ fn lexer_test_case(path: &Path) {
    assert_diff!(expected, actual, "\n", 0)
 }
 fn tokenize(text: &str) -> Vec<Token> {
    let mut text = text;
    let mut acc = Vec::new();
    while !text.is_empty() {
        let token = next_token(text);
        acc.push(token);
        let len: u32 = token.len.into();
        text = &text[len as usize..];
    }
    acc
 }
 fn dump_tokens(tokens: &[Token], text: &str) -> String {
    let mut acc = String::new();
    let mut offset = 0;