mirror of
https://github.com/rust-lang/rust-analyzer
synced 2024-12-26 13:03:31 +00:00
Parser: groundwork
This commit is contained in:
parent
98a58bf806
commit
9ce4db545e
6 changed files with 150 additions and 17 deletions
|
@ -16,6 +16,17 @@ use self::strings::{is_string_literal_start, scan_char, scan_byte_char_or_string
|
||||||
mod comments;
|
mod comments;
|
||||||
use self::comments::{scan_shebang, scan_comment};
|
use self::comments::{scan_shebang, scan_comment};
|
||||||
|
|
||||||
|
pub fn tokenize(text: &str) -> Vec<Token> {
|
||||||
|
let mut text = text;
|
||||||
|
let mut acc = Vec::new();
|
||||||
|
while !text.is_empty() {
|
||||||
|
let token = next_token(text);
|
||||||
|
acc.push(token);
|
||||||
|
let len: u32 = token.len.into();
|
||||||
|
text = &text[len as usize..];
|
||||||
|
}
|
||||||
|
acc
|
||||||
|
}
|
||||||
pub fn next_token(text: &str) -> Token {
|
pub fn next_token(text: &str) -> Token {
|
||||||
assert!(!text.is_empty());
|
assert!(!text.is_empty());
|
||||||
let mut ptr = Ptr::new(text);
|
let mut ptr = Ptr::new(text);
|
||||||
|
|
|
@ -5,6 +5,6 @@ mod tree;
|
||||||
mod lexer;
|
mod lexer;
|
||||||
|
|
||||||
pub mod syntax_kinds;
|
pub mod syntax_kinds;
|
||||||
pub use text::TextUnit;
|
pub use text::{TextUnit, TextRange};
|
||||||
pub use tree::{SyntaxKind, Token};
|
pub use tree::{SyntaxKind, Token};
|
||||||
pub use lexer::next_token;
|
pub use lexer::{next_token, tokenize};
|
||||||
|
|
5
src/parser/mod.rs
Normal file
5
src/parser/mod.rs
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
use {Token, File};
|
||||||
|
|
||||||
|
pub fn parse(tokens: &[Token]) -> File {
|
||||||
|
unimplemented!()
|
||||||
|
}
|
59
src/text.rs
59
src/text.rs
|
@ -57,3 +57,62 @@ impl ops::SubAssign<TextUnit> for TextUnit {
|
||||||
self.0 -= rhs.0
|
self.0 -= rhs.0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub struct TextRange {
|
||||||
|
start: TextUnit,
|
||||||
|
end: TextUnit,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for TextRange {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
<Self as fmt::Display>::fmt(self, f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for TextRange {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f, "[{}; {})", self.start(), self.end())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
impl TextRange {
|
||||||
|
pub fn empty() -> TextRange {
|
||||||
|
TextRange::from_to(TextUnit::new(0), TextUnit::new(0))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_to(from: TextUnit, to: TextUnit) -> TextRange {
|
||||||
|
assert!(from <= to, "Invalid text range [{}; {})", from, to);
|
||||||
|
TextRange { start: from, end: to }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_len(from: TextUnit, len: TextUnit) -> TextRange {
|
||||||
|
TextRange::from_to(from, from + len)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn start(&self) -> TextUnit {
|
||||||
|
self.start
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn end(&self) -> TextUnit {
|
||||||
|
self.end
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn len(&self) -> TextUnit {
|
||||||
|
self.end - self.start
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.start() == self.end()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ops::Index<TextRange> for str {
|
||||||
|
type Output = str;
|
||||||
|
|
||||||
|
fn index(&self, index: TextRange) -> &str {
|
||||||
|
&self[index.start().0 as usize..index.end().0 as usize]
|
||||||
|
}
|
||||||
|
}
|
72
src/tree.rs
72
src/tree.rs
|
@ -1,4 +1,4 @@
|
||||||
use text::{TextUnit};
|
use text::{TextUnit, TextRange};
|
||||||
use syntax_kinds::syntax_info;
|
use syntax_kinds::syntax_info;
|
||||||
|
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
@ -29,3 +29,73 @@ pub struct Token {
|
||||||
pub kind: SyntaxKind,
|
pub kind: SyntaxKind,
|
||||||
pub len: TextUnit,
|
pub len: TextUnit,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct File {
|
||||||
|
text: String,
|
||||||
|
nodes: Vec<NodeData>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl File {
|
||||||
|
pub fn root<'f>(&'f self) -> Node<'f> {
|
||||||
|
assert!(!self.nodes.is_empty());
|
||||||
|
Node { file: self, idx: NodeIdx(0) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
pub struct Node<'f> {
|
||||||
|
file: &'f File,
|
||||||
|
idx: NodeIdx,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'f> Node<'f> {
|
||||||
|
pub fn kind(&self) -> SyntaxKind {
|
||||||
|
self.data().kind
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn text(&self) -> &'f str {
|
||||||
|
let range = self.data().range;
|
||||||
|
&self.file.text.as_str()[range]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parent(&self) -> Option<Node<'f>> {
|
||||||
|
self.as_node(self.data().parent)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn children(&self) -> Children<'f> {
|
||||||
|
Children { next: self.as_node(self.data().first_child) }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn data(&self) -> &'f NodeData {
|
||||||
|
&self.file.nodes[self.idx.0 as usize]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_node(&self, idx: Option<NodeIdx>) -> Option<Node<'f>> {
|
||||||
|
idx.map(|idx| Node { file: self.file, idx })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Children<'f> {
|
||||||
|
next: Option<Node<'f>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'f> Iterator for Children<'f> {
|
||||||
|
type Item = Node<'f>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Node<'f>> {
|
||||||
|
let next = self.next;
|
||||||
|
self.next = next.and_then(|node| node.as_node(node.data().next_sibling));
|
||||||
|
next
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
struct NodeIdx(u32);
|
||||||
|
|
||||||
|
struct NodeData {
|
||||||
|
kind: SyntaxKind,
|
||||||
|
range: TextRange,
|
||||||
|
parent: Option<NodeIdx>,
|
||||||
|
first_child: Option<NodeIdx>,
|
||||||
|
next_sibling: Option<NodeIdx>,
|
||||||
|
}
|
||||||
|
|
|
@ -7,7 +7,7 @@ use std::path::{PathBuf, Path};
|
||||||
use std::fs::read_dir;
|
use std::fs::read_dir;
|
||||||
use std::fmt::Write;
|
use std::fmt::Write;
|
||||||
|
|
||||||
use libsyntax2::{Token, next_token};
|
use libsyntax2::{Token, tokenize};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn lexer_tests() {
|
fn lexer_tests() {
|
||||||
|
@ -53,18 +53,6 @@ fn lexer_test_case(path: &Path) {
|
||||||
assert_diff!(expected, actual, "\n", 0)
|
assert_diff!(expected, actual, "\n", 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn tokenize(text: &str) -> Vec<Token> {
|
|
||||||
let mut text = text;
|
|
||||||
let mut acc = Vec::new();
|
|
||||||
while !text.is_empty() {
|
|
||||||
let token = next_token(text);
|
|
||||||
acc.push(token);
|
|
||||||
let len: u32 = token.len.into();
|
|
||||||
text = &text[len as usize..];
|
|
||||||
}
|
|
||||||
acc
|
|
||||||
}
|
|
||||||
|
|
||||||
fn dump_tokens(tokens: &[Token], text: &str) -> String {
|
fn dump_tokens(tokens: &[Token], text: &str) -> String {
|
||||||
let mut acc = String::new();
|
let mut acc = String::new();
|
||||||
let mut offset = 0;
|
let mut offset = 0;
|
||||||
|
|
Loading…
Reference in a new issue