mirror of
https://github.com/rust-lang/rust-analyzer
synced 2024-12-25 20:43:21 +00:00
Lexer scaffold
This commit is contained in:
parent
45fce4b3ef
commit
5e1e8ed34a
6 changed files with 117 additions and 11 deletions
10
src/lexer.rs
Normal file
10
src/lexer.rs
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
use {Token, TextUnit};
|
||||||
|
use syntax_kinds::*;
|
||||||
|
|
||||||
|
pub fn next_token(text: &str) -> Token {
|
||||||
|
let c = text.chars().next().unwrap();
|
||||||
|
Token {
|
||||||
|
kind: IDENT,
|
||||||
|
len: TextUnit::len_of_char(c),
|
||||||
|
}
|
||||||
|
}
|
15
src/lib.rs
15
src/lib.rs
|
@ -1,7 +1,8 @@
|
||||||
#[cfg(test)]
|
mod text;
|
||||||
mod tests {
|
mod tree;
|
||||||
#[test]
|
mod lexer;
|
||||||
fn it_works() {
|
|
||||||
assert_eq!(2 + 2, 4);
|
pub mod syntax_kinds;
|
||||||
}
|
pub use text::TextUnit;
|
||||||
}
|
pub use tree::{SyntaxKind, Token};
|
||||||
|
pub use lexer::next_token;
|
||||||
|
|
16
src/syntax_kinds.rs
Normal file
16
src/syntax_kinds.rs
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
use tree::{SyntaxKind, SyntaxInfo};
|
||||||
|
|
||||||
|
pub const IDENT: SyntaxKind = SyntaxKind(1);
|
||||||
|
pub const WHITESPACE: SyntaxKind = SyntaxKind(2);
|
||||||
|
|
||||||
|
|
||||||
|
static IDENT_INFO: SyntaxInfo = SyntaxInfo {
|
||||||
|
name: "IDENT",
|
||||||
|
};
|
||||||
|
|
||||||
|
pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {
|
||||||
|
match kind {
|
||||||
|
IDENT => &IDENT_INFO,
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
32
src/text.rs
Normal file
32
src/text.rs
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
use std::fmt;
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct TextUnit(u32);
|
||||||
|
|
||||||
|
impl TextUnit {
|
||||||
|
pub fn len_of_char(c: char) -> TextUnit {
|
||||||
|
TextUnit(c.len_utf8() as u32)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new(val: u32) -> TextUnit {
|
||||||
|
TextUnit(val)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for TextUnit {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
<Self as fmt::Display>::fmt(self, f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for TextUnit {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
self.0.fmt(f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<TextUnit> for u32 {
|
||||||
|
fn from(tu: TextUnit) -> u32 {
|
||||||
|
tu.0
|
||||||
|
}
|
||||||
|
}
|
31
src/tree.rs
Normal file
31
src/tree.rs
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
use text::{TextUnit};
|
||||||
|
use syntax_kinds::syntax_info;
|
||||||
|
|
||||||
|
use std::fmt;
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct SyntaxKind(pub(crate) u32);
|
||||||
|
|
||||||
|
impl SyntaxKind {
|
||||||
|
fn info(self) -> &'static SyntaxInfo {
|
||||||
|
syntax_info(self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for SyntaxKind {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
let name = self.info().name;
|
||||||
|
f.write_str(name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
pub(crate) struct SyntaxInfo {
|
||||||
|
pub name: &'static str,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct Token {
|
||||||
|
pub kind: SyntaxKind,
|
||||||
|
pub len: TextUnit,
|
||||||
|
}
|
|
@ -1,9 +1,13 @@
|
||||||
extern crate file;
|
extern crate file;
|
||||||
#[macro_use(assert_diff)]
|
#[macro_use(assert_diff)]
|
||||||
extern crate difference;
|
extern crate difference;
|
||||||
|
extern crate libsyntax2;
|
||||||
|
|
||||||
use std::path::{PathBuf, Path};
|
use std::path::{PathBuf, Path};
|
||||||
use std::fs::read_dir;
|
use std::fs::read_dir;
|
||||||
|
use std::fmt::Write;
|
||||||
|
|
||||||
|
use libsyntax2::{Token, next_token};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn lexer_tests() {
|
fn lexer_tests() {
|
||||||
|
@ -46,10 +50,22 @@ fn lexer_test_case(path: &Path) {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn tokenize(text: &str) -> Vec<()> {
|
fn tokenize(text: &str) -> Vec<Token> {
|
||||||
Vec::new()
|
let mut text = text;
|
||||||
|
let mut acc = Vec::new();
|
||||||
|
while !text.is_empty() {
|
||||||
|
let token = next_token(text);
|
||||||
|
acc.push(token);
|
||||||
|
let len: u32 = token.len.into();
|
||||||
|
text = &text[len as usize..];
|
||||||
|
}
|
||||||
|
acc
|
||||||
}
|
}
|
||||||
|
|
||||||
fn dump_tokens(tokens: &[()]) -> String {
|
fn dump_tokens(tokens: &[Token]) -> String {
|
||||||
"IDENT 5\nKEYWORD 1\nIDENT 5\n".to_string()
|
let mut acc = String::new();
|
||||||
|
for token in tokens {
|
||||||
|
write!(acc, "{:?} {}\n", token.kind, token.len).unwrap()
|
||||||
|
}
|
||||||
|
acc
|
||||||
}
|
}
|
Loading…
Reference in a new issue