Parser: guess what? Groundwork!

This commit is contained in:
Aleksey Kladov 2018-01-01 18:58:46 +03:00
parent 0af33a2587
commit cb362626f3
9 changed files with 199 additions and 58 deletions

View file

@ -1,4 +1,12 @@
Grammar( Grammar(
keywords: [
"use",
"fn",
"struct",
"enum",
"trait",
"impl",
],
tokens: [ tokens: [
"ERROR", "ERROR",
"IDENT", "IDENT",
@ -53,6 +61,6 @@ Grammar(
"SHEBANG", "SHEBANG",
], ],
nodes: [ nodes: [
"FILE" "FILE",
] ]
) )

View file

@ -17,6 +17,7 @@ fn main() {
#[derive(Deserialize)] #[derive(Deserialize)]
struct Grammar { struct Grammar {
keywords: Vec<String>,
tokens: Vec<String>, tokens: Vec<String>,
nodes: Vec<String>, nodes: Vec<String>,
} }
@ -33,8 +34,10 @@ impl Grammar {
acc.push_str("use tree::{SyntaxKind, SyntaxInfo};\n"); acc.push_str("use tree::{SyntaxKind, SyntaxInfo};\n");
acc.push_str("\n"); acc.push_str("\n");
let syntax_kinds: Vec<&String> = let syntax_kinds: Vec<String> =
self.tokens.iter().chain(self.nodes.iter()) self.keywords.iter().map(|kw| kw_token(kw))
.chain(self.tokens.iter().cloned())
.chain(self.nodes.iter().cloned())
.collect(); .collect();
for (idx, kind) in syntax_kinds.iter().enumerate() { for (idx, kind) in syntax_kinds.iter().enumerate() {
@ -60,6 +63,14 @@ impl Grammar {
acc.push_str("pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {\n"); acc.push_str("pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {\n");
acc.push_str(" &INFOS[kind.0 as usize]\n"); acc.push_str(" &INFOS[kind.0 as usize]\n");
acc.push_str("}\n\n");
acc.push_str("pub(crate) fn ident_to_keyword(ident: &str) -> Option<SyntaxKind> {\n");
acc.push_str(" match ident {\n");
for kw in self.keywords.iter() {
write!(acc, " {:?} => Some({}),\n", kw, kw_token(kw)).unwrap();
}
acc.push_str(" _ => None,\n");
acc.push_str(" }\n");
acc.push_str("}\n"); acc.push_str("}\n");
acc acc
} }
@ -77,4 +88,8 @@ fn generated_file() -> PathBuf {
fn scream(word: &str) -> String { fn scream(word: &str) -> String {
word.chars().map(|c| c.to_ascii_uppercase()).collect() word.chars().map(|c| c.to_ascii_uppercase()).collect()
}
fn kw_token(keyword: &str) -> String {
format!("{}_KW", scream(keyword))
} }

View file

@ -187,6 +187,9 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
return if c == '_' { UNDERSCORE } else { IDENT }; return if c == '_' { UNDERSCORE } else { IDENT };
} }
ptr.bump_while(is_ident_continue); ptr.bump_while(is_ident_continue);
if let Some(kind) = ident_to_keyword(ptr.current_token_text()) {
return kind;
}
IDENT IDENT
} }

View file

@ -59,6 +59,11 @@ impl<'s> Ptr<'s> {
} }
} }
pub fn current_token_text(&self) -> &str {
let len: u32 = self.len.into();
&self.text[..len as usize]
}
fn chars(&self) -> Chars { fn chars(&self) -> Chars {
let len: u32 = self.len.into(); let len: u32 = self.len.into();
self.text[len as usize ..].chars() self.text[len as usize ..].chars()

View file

@ -3,8 +3,68 @@ use super::parser::Parser;
use syntax_kinds::*; use syntax_kinds::*;
// Items //
pub fn file(p: &mut Parser) { pub fn file(p: &mut Parser) {
p.start(FILE); p.start(FILE);
//TODO: parse_shebang shebang(p);
inner_attributes(p);
mod_items(p);
p.finish();
}
type Result = ::std::result::Result<(), ()>;
const OK: Result = Ok(());
const ERR: Result = Err(());
fn shebang(_: &mut Parser) {
//TODO
}
fn inner_attributes(_: &mut Parser) {
//TODO
}
fn mod_items(p: &mut Parser) {
loop {
skip_until_item(p);
if p.is_eof() {
return;
}
if item(p).is_err() {
skip_one_token(p);
}
}
}
fn item(p: &mut Parser) -> Result {
outer_attributes(p)?;
visibility(p)?;
ERR
}
// Paths, types, attributes, and stuff //
fn outer_attributes(_: &mut Parser) -> Result {
OK
}
fn visibility(_: &mut Parser) -> Result {
OK
}
// Expressions //
// Error recovery and high-order utils //
fn skip_until_item(_: &mut Parser) {
//TODO
}
fn skip_one_token(p: &mut Parser) {
p.start(ERROR);
p.bump().unwrap();
p.finish(); p.finish();
} }

View file

@ -34,10 +34,14 @@ impl<'t> Parser<'t> {
} }
pub(crate) fn into_events(self) -> Vec<Event> { pub(crate) fn into_events(self) -> Vec<Event> {
assert!(self.pos == self.non_ws_tokens.len()); assert!(self.is_eof());
self.events self.events
} }
pub(crate) fn is_eof(&self) -> bool {
self.pos == self.non_ws_tokens.len()
}
pub(crate) fn start(&mut self, kind: SyntaxKind) { pub(crate) fn start(&mut self, kind: SyntaxKind) {
self.event(Event::Start { kind }); self.event(Event::Start { kind });
} }
@ -46,6 +50,15 @@ impl<'t> Parser<'t> {
self.event(Event::Finish); self.event(Event::Finish);
} }
pub(crate) fn bump(&mut self) -> Option<SyntaxKind> {
if self.is_eof() {
return None;
}
let idx = self.non_ws_tokens[self.pos].0;
self.pos += 1;
Some(self.raw_tokens[idx].kind)
}
fn event(&mut self, event: Event) { fn event(&mut self, event: Event) {
self.events.push(event) self.events.push(event)
} }

View file

@ -1,60 +1,72 @@
// Generated from grammar.ron // Generated from grammar.ron
use tree::{SyntaxKind, SyntaxInfo}; use tree::{SyntaxKind, SyntaxInfo};
pub const ERROR: SyntaxKind = SyntaxKind(0); pub const USE_KW: SyntaxKind = SyntaxKind(0);
pub const IDENT: SyntaxKind = SyntaxKind(1); pub const FN_KW: SyntaxKind = SyntaxKind(1);
pub const UNDERSCORE: SyntaxKind = SyntaxKind(2); pub const STRUCT_KW: SyntaxKind = SyntaxKind(2);
pub const WHITESPACE: SyntaxKind = SyntaxKind(3); pub const ENUM_KW: SyntaxKind = SyntaxKind(3);
pub const INT_NUMBER: SyntaxKind = SyntaxKind(4); pub const TRAIT_KW: SyntaxKind = SyntaxKind(4);
pub const FLOAT_NUMBER: SyntaxKind = SyntaxKind(5); pub const IMPL_KW: SyntaxKind = SyntaxKind(5);
pub const SEMI: SyntaxKind = SyntaxKind(6); pub const ERROR: SyntaxKind = SyntaxKind(6);
pub const COMMA: SyntaxKind = SyntaxKind(7); pub const IDENT: SyntaxKind = SyntaxKind(7);
pub const DOT: SyntaxKind = SyntaxKind(8); pub const UNDERSCORE: SyntaxKind = SyntaxKind(8);
pub const DOTDOT: SyntaxKind = SyntaxKind(9); pub const WHITESPACE: SyntaxKind = SyntaxKind(9);
pub const DOTDOTDOT: SyntaxKind = SyntaxKind(10); pub const INT_NUMBER: SyntaxKind = SyntaxKind(10);
pub const DOTDOTEQ: SyntaxKind = SyntaxKind(11); pub const FLOAT_NUMBER: SyntaxKind = SyntaxKind(11);
pub const L_PAREN: SyntaxKind = SyntaxKind(12); pub const SEMI: SyntaxKind = SyntaxKind(12);
pub const R_PAREN: SyntaxKind = SyntaxKind(13); pub const COMMA: SyntaxKind = SyntaxKind(13);
pub const L_CURLY: SyntaxKind = SyntaxKind(14); pub const DOT: SyntaxKind = SyntaxKind(14);
pub const R_CURLY: SyntaxKind = SyntaxKind(15); pub const DOTDOT: SyntaxKind = SyntaxKind(15);
pub const L_BRACK: SyntaxKind = SyntaxKind(16); pub const DOTDOTDOT: SyntaxKind = SyntaxKind(16);
pub const R_BRACK: SyntaxKind = SyntaxKind(17); pub const DOTDOTEQ: SyntaxKind = SyntaxKind(17);
pub const L_ANGLE: SyntaxKind = SyntaxKind(18); pub const L_PAREN: SyntaxKind = SyntaxKind(18);
pub const R_ANGLE: SyntaxKind = SyntaxKind(19); pub const R_PAREN: SyntaxKind = SyntaxKind(19);
pub const AT: SyntaxKind = SyntaxKind(20); pub const L_CURLY: SyntaxKind = SyntaxKind(20);
pub const POUND: SyntaxKind = SyntaxKind(21); pub const R_CURLY: SyntaxKind = SyntaxKind(21);
pub const TILDE: SyntaxKind = SyntaxKind(22); pub const L_BRACK: SyntaxKind = SyntaxKind(22);
pub const QUESTION: SyntaxKind = SyntaxKind(23); pub const R_BRACK: SyntaxKind = SyntaxKind(23);
pub const COLON: SyntaxKind = SyntaxKind(24); pub const L_ANGLE: SyntaxKind = SyntaxKind(24);
pub const COLONCOLON: SyntaxKind = SyntaxKind(25); pub const R_ANGLE: SyntaxKind = SyntaxKind(25);
pub const DOLLAR: SyntaxKind = SyntaxKind(26); pub const AT: SyntaxKind = SyntaxKind(26);
pub const EQ: SyntaxKind = SyntaxKind(27); pub const POUND: SyntaxKind = SyntaxKind(27);
pub const EQEQ: SyntaxKind = SyntaxKind(28); pub const TILDE: SyntaxKind = SyntaxKind(28);
pub const FAT_ARROW: SyntaxKind = SyntaxKind(29); pub const QUESTION: SyntaxKind = SyntaxKind(29);
pub const NEQ: SyntaxKind = SyntaxKind(30); pub const COLON: SyntaxKind = SyntaxKind(30);
pub const NOT: SyntaxKind = SyntaxKind(31); pub const COLONCOLON: SyntaxKind = SyntaxKind(31);
pub const LIFETIME: SyntaxKind = SyntaxKind(32); pub const DOLLAR: SyntaxKind = SyntaxKind(32);
pub const CHAR: SyntaxKind = SyntaxKind(33); pub const EQ: SyntaxKind = SyntaxKind(33);
pub const BYTE: SyntaxKind = SyntaxKind(34); pub const EQEQ: SyntaxKind = SyntaxKind(34);
pub const STRING: SyntaxKind = SyntaxKind(35); pub const FAT_ARROW: SyntaxKind = SyntaxKind(35);
pub const RAW_STRING: SyntaxKind = SyntaxKind(36); pub const NEQ: SyntaxKind = SyntaxKind(36);
pub const BYTE_STRING: SyntaxKind = SyntaxKind(37); pub const NOT: SyntaxKind = SyntaxKind(37);
pub const RAW_BYTE_STRING: SyntaxKind = SyntaxKind(38); pub const LIFETIME: SyntaxKind = SyntaxKind(38);
pub const PLUS: SyntaxKind = SyntaxKind(39); pub const CHAR: SyntaxKind = SyntaxKind(39);
pub const MINUS: SyntaxKind = SyntaxKind(40); pub const BYTE: SyntaxKind = SyntaxKind(40);
pub const STAR: SyntaxKind = SyntaxKind(41); pub const STRING: SyntaxKind = SyntaxKind(41);
pub const SLASH: SyntaxKind = SyntaxKind(42); pub const RAW_STRING: SyntaxKind = SyntaxKind(42);
pub const CARET: SyntaxKind = SyntaxKind(43); pub const BYTE_STRING: SyntaxKind = SyntaxKind(43);
pub const PERCENT: SyntaxKind = SyntaxKind(44); pub const RAW_BYTE_STRING: SyntaxKind = SyntaxKind(44);
pub const AMPERSAND: SyntaxKind = SyntaxKind(45); pub const PLUS: SyntaxKind = SyntaxKind(45);
pub const PIPE: SyntaxKind = SyntaxKind(46); pub const MINUS: SyntaxKind = SyntaxKind(46);
pub const THIN_ARROW: SyntaxKind = SyntaxKind(47); pub const STAR: SyntaxKind = SyntaxKind(47);
pub const COMMENT: SyntaxKind = SyntaxKind(48); pub const SLASH: SyntaxKind = SyntaxKind(48);
pub const DOC_COMMENT: SyntaxKind = SyntaxKind(49); pub const CARET: SyntaxKind = SyntaxKind(49);
pub const SHEBANG: SyntaxKind = SyntaxKind(50); pub const PERCENT: SyntaxKind = SyntaxKind(50);
pub const FILE: SyntaxKind = SyntaxKind(51); pub const AMPERSAND: SyntaxKind = SyntaxKind(51);
pub const PIPE: SyntaxKind = SyntaxKind(52);
pub const THIN_ARROW: SyntaxKind = SyntaxKind(53);
pub const COMMENT: SyntaxKind = SyntaxKind(54);
pub const DOC_COMMENT: SyntaxKind = SyntaxKind(55);
pub const SHEBANG: SyntaxKind = SyntaxKind(56);
pub const FILE: SyntaxKind = SyntaxKind(57);
static INFOS: [SyntaxInfo; 52] = [ static INFOS: [SyntaxInfo; 58] = [
SyntaxInfo { name: "USE_KW" },
SyntaxInfo { name: "FN_KW" },
SyntaxInfo { name: "STRUCT_KW" },
SyntaxInfo { name: "ENUM_KW" },
SyntaxInfo { name: "TRAIT_KW" },
SyntaxInfo { name: "IMPL_KW" },
SyntaxInfo { name: "ERROR" }, SyntaxInfo { name: "ERROR" },
SyntaxInfo { name: "IDENT" }, SyntaxInfo { name: "IDENT" },
SyntaxInfo { name: "UNDERSCORE" }, SyntaxInfo { name: "UNDERSCORE" },
@ -112,3 +124,15 @@ static INFOS: [SyntaxInfo; 52] = [
pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo { pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {
&INFOS[kind.0 as usize] &INFOS[kind.0 as usize]
} }
pub(crate) fn ident_to_keyword(ident: &str) -> Option<SyntaxKind> {
match ident {
"use" => Some(USE_KW),
"fn" => Some(FN_KW),
"struct" => Some(STRUCT_KW),
"enum" => Some(ENUM_KW),
"trait" => Some(TRAIT_KW),
"impl" => Some(IMPL_KW),
_ => None,
}
}

View file

@ -0,0 +1 @@
fn use struct trait enum impl

View file

@ -0,0 +1,12 @@
FN_KW 2 "fn"
WHITESPACE 1 " "
USE_KW 3 "use"
WHITESPACE 1 " "
STRUCT_KW 6 "struct"
WHITESPACE 1 " "
TRAIT_KW 5 "trait"
WHITESPACE 1 " "
ENUM_KW 4 "enum"
WHITESPACE 1 " "
IMPL_KW 4 "impl"
WHITESPACE 1 "\n"