Lexer scaffold

This commit is contained in:
Aleksey Kladov 2017-12-29 00:56:36 +03:00
parent 45fce4b3ef
commit 5e1e8ed34a
6 changed files with 117 additions and 11 deletions

10
src/lexer.rs Normal file
View file

@ -0,0 +1,10 @@
use {Token, TextUnit};
use syntax_kinds::*;
pub fn next_token(text: &str) -> Token {
let c = text.chars().next().unwrap();
Token {
kind: IDENT,
len: TextUnit::len_of_char(c),
}
}

View file

@ -1,7 +1,8 @@
#[cfg(test)] mod text;
mod tests { mod tree;
#[test] mod lexer;
fn it_works() {
assert_eq!(2 + 2, 4); pub mod syntax_kinds;
} pub use text::TextUnit;
} pub use tree::{SyntaxKind, Token};
pub use lexer::next_token;

16
src/syntax_kinds.rs Normal file
View file

@ -0,0 +1,16 @@
use tree::{SyntaxKind, SyntaxInfo};
pub const IDENT: SyntaxKind = SyntaxKind(1);
pub const WHITESPACE: SyntaxKind = SyntaxKind(2);
static IDENT_INFO: SyntaxInfo = SyntaxInfo {
name: "IDENT",
};
pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {
match kind {
IDENT => &IDENT_INFO,
_ => unreachable!(),
}
}

32
src/text.rs Normal file
View file

@ -0,0 +1,32 @@
use std::fmt;
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct TextUnit(u32);
impl TextUnit {
pub fn len_of_char(c: char) -> TextUnit {
TextUnit(c.len_utf8() as u32)
}
pub fn new(val: u32) -> TextUnit {
TextUnit(val)
}
}
impl fmt::Debug for TextUnit {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
<Self as fmt::Display>::fmt(self, f)
}
}
impl fmt::Display for TextUnit {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.0.fmt(f)
}
}
impl From<TextUnit> for u32 {
fn from(tu: TextUnit) -> u32 {
tu.0
}
}

31
src/tree.rs Normal file
View file

@ -0,0 +1,31 @@
use text::{TextUnit};
use syntax_kinds::syntax_info;
use std::fmt;
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct SyntaxKind(pub(crate) u32);
impl SyntaxKind {
fn info(self) -> &'static SyntaxInfo {
syntax_info(self)
}
}
impl fmt::Debug for SyntaxKind {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let name = self.info().name;
f.write_str(name)
}
}
pub(crate) struct SyntaxInfo {
pub name: &'static str,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Token {
pub kind: SyntaxKind,
pub len: TextUnit,
}

View file

@ -1,9 +1,13 @@
extern crate file; extern crate file;
#[macro_use(assert_diff)] #[macro_use(assert_diff)]
extern crate difference; extern crate difference;
extern crate libsyntax2;
use std::path::{PathBuf, Path}; use std::path::{PathBuf, Path};
use std::fs::read_dir; use std::fs::read_dir;
use std::fmt::Write;
use libsyntax2::{Token, next_token};
#[test] #[test]
fn lexer_tests() { fn lexer_tests() {
@ -46,10 +50,22 @@ fn lexer_test_case(path: &Path) {
) )
} }
fn tokenize(text: &str) -> Vec<()> { fn tokenize(text: &str) -> Vec<Token> {
Vec::new() let mut text = text;
let mut acc = Vec::new();
while !text.is_empty() {
let token = next_token(text);
acc.push(token);
let len: u32 = token.len.into();
text = &text[len as usize..];
}
acc
} }
fn dump_tokens(tokens: &[()]) -> String { fn dump_tokens(tokens: &[Token]) -> String {
"IDENT 5\nKEYWORD 1\nIDENT 5\n".to_string() let mut acc = String::new();
for token in tokens {
write!(acc, "{:?} {}\n", token.kind, token.len).unwrap()
}
acc
} }