mirror of
https://github.com/rust-lang/rust-analyzer
synced 2025-01-13 05:38:46 +00:00
Parser: guess what? Groundwork!
This commit is contained in:
parent
0af33a2587
commit
cb362626f3
9 changed files with 199 additions and 58 deletions
10
grammar.ron
10
grammar.ron
|
@ -1,4 +1,12 @@
|
|||
Grammar(
|
||||
keywords: [
|
||||
"use",
|
||||
"fn",
|
||||
"struct",
|
||||
"enum",
|
||||
"trait",
|
||||
"impl",
|
||||
],
|
||||
tokens: [
|
||||
"ERROR",
|
||||
"IDENT",
|
||||
|
@ -53,6 +61,6 @@ Grammar(
|
|||
"SHEBANG",
|
||||
],
|
||||
nodes: [
|
||||
"FILE"
|
||||
"FILE",
|
||||
]
|
||||
)
|
|
@ -17,6 +17,7 @@ fn main() {
|
|||
|
||||
#[derive(Deserialize)]
|
||||
struct Grammar {
|
||||
keywords: Vec<String>,
|
||||
tokens: Vec<String>,
|
||||
nodes: Vec<String>,
|
||||
}
|
||||
|
@ -33,8 +34,10 @@ impl Grammar {
|
|||
acc.push_str("use tree::{SyntaxKind, SyntaxInfo};\n");
|
||||
acc.push_str("\n");
|
||||
|
||||
let syntax_kinds: Vec<&String> =
|
||||
self.tokens.iter().chain(self.nodes.iter())
|
||||
let syntax_kinds: Vec<String> =
|
||||
self.keywords.iter().map(|kw| kw_token(kw))
|
||||
.chain(self.tokens.iter().cloned())
|
||||
.chain(self.nodes.iter().cloned())
|
||||
.collect();
|
||||
|
||||
for (idx, kind) in syntax_kinds.iter().enumerate() {
|
||||
|
@ -60,6 +63,14 @@ impl Grammar {
|
|||
|
||||
acc.push_str("pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {\n");
|
||||
acc.push_str(" &INFOS[kind.0 as usize]\n");
|
||||
acc.push_str("}\n\n");
|
||||
acc.push_str("pub(crate) fn ident_to_keyword(ident: &str) -> Option<SyntaxKind> {\n");
|
||||
acc.push_str(" match ident {\n");
|
||||
for kw in self.keywords.iter() {
|
||||
write!(acc, " {:?} => Some({}),\n", kw, kw_token(kw)).unwrap();
|
||||
}
|
||||
acc.push_str(" _ => None,\n");
|
||||
acc.push_str(" }\n");
|
||||
acc.push_str("}\n");
|
||||
acc
|
||||
}
|
||||
|
@ -78,3 +89,7 @@ fn generated_file() -> PathBuf {
|
|||
fn scream(word: &str) -> String {
|
||||
word.chars().map(|c| c.to_ascii_uppercase()).collect()
|
||||
}
|
||||
|
||||
fn kw_token(keyword: &str) -> String {
|
||||
format!("{}_KW", scream(keyword))
|
||||
}
|
|
@ -187,6 +187,9 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
|
|||
return if c == '_' { UNDERSCORE } else { IDENT };
|
||||
}
|
||||
ptr.bump_while(is_ident_continue);
|
||||
if let Some(kind) = ident_to_keyword(ptr.current_token_text()) {
|
||||
return kind;
|
||||
}
|
||||
IDENT
|
||||
}
|
||||
|
||||
|
|
|
@ -59,6 +59,11 @@ impl<'s> Ptr<'s> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn current_token_text(&self) -> &str {
|
||||
let len: u32 = self.len.into();
|
||||
&self.text[..len as usize]
|
||||
}
|
||||
|
||||
fn chars(&self) -> Chars {
|
||||
let len: u32 = self.len.into();
|
||||
self.text[len as usize ..].chars()
|
||||
|
|
|
@ -3,8 +3,68 @@ use super::parser::Parser;
|
|||
|
||||
use syntax_kinds::*;
|
||||
|
||||
// Items //
|
||||
|
||||
pub fn file(p: &mut Parser) {
|
||||
p.start(FILE);
|
||||
//TODO: parse_shebang
|
||||
shebang(p);
|
||||
inner_attributes(p);
|
||||
mod_items(p);
|
||||
p.finish();
|
||||
}
|
||||
|
||||
type Result = ::std::result::Result<(), ()>;
|
||||
const OK: Result = Ok(());
|
||||
const ERR: Result = Err(());
|
||||
|
||||
fn shebang(_: &mut Parser) {
|
||||
//TODO
|
||||
}
|
||||
|
||||
fn inner_attributes(_: &mut Parser) {
|
||||
//TODO
|
||||
}
|
||||
|
||||
fn mod_items(p: &mut Parser) {
|
||||
loop {
|
||||
skip_until_item(p);
|
||||
if p.is_eof() {
|
||||
return;
|
||||
}
|
||||
if item(p).is_err() {
|
||||
skip_one_token(p);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn item(p: &mut Parser) -> Result {
|
||||
outer_attributes(p)?;
|
||||
visibility(p)?;
|
||||
ERR
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Paths, types, attributes, and stuff //
|
||||
|
||||
fn outer_attributes(_: &mut Parser) -> Result {
|
||||
OK
|
||||
}
|
||||
|
||||
fn visibility(_: &mut Parser) -> Result {
|
||||
OK
|
||||
}
|
||||
|
||||
// Expressions //
|
||||
|
||||
// Error recovery and high-order utils //
|
||||
|
||||
fn skip_until_item(_: &mut Parser) {
|
||||
//TODO
|
||||
}
|
||||
|
||||
fn skip_one_token(p: &mut Parser) {
|
||||
p.start(ERROR);
|
||||
p.bump().unwrap();
|
||||
p.finish();
|
||||
}
|
|
@ -34,10 +34,14 @@ impl<'t> Parser<'t> {
|
|||
}
|
||||
|
||||
pub(crate) fn into_events(self) -> Vec<Event> {
|
||||
assert!(self.pos == self.non_ws_tokens.len());
|
||||
assert!(self.is_eof());
|
||||
self.events
|
||||
}
|
||||
|
||||
pub(crate) fn is_eof(&self) -> bool {
|
||||
self.pos == self.non_ws_tokens.len()
|
||||
}
|
||||
|
||||
pub(crate) fn start(&mut self, kind: SyntaxKind) {
|
||||
self.event(Event::Start { kind });
|
||||
}
|
||||
|
@ -46,6 +50,15 @@ impl<'t> Parser<'t> {
|
|||
self.event(Event::Finish);
|
||||
}
|
||||
|
||||
pub(crate) fn bump(&mut self) -> Option<SyntaxKind> {
|
||||
if self.is_eof() {
|
||||
return None;
|
||||
}
|
||||
let idx = self.non_ws_tokens[self.pos].0;
|
||||
self.pos += 1;
|
||||
Some(self.raw_tokens[idx].kind)
|
||||
}
|
||||
|
||||
fn event(&mut self, event: Event) {
|
||||
self.events.push(event)
|
||||
}
|
||||
|
|
|
@ -1,60 +1,72 @@
|
|||
// Generated from grammar.ron
|
||||
use tree::{SyntaxKind, SyntaxInfo};
|
||||
|
||||
pub const ERROR: SyntaxKind = SyntaxKind(0);
|
||||
pub const IDENT: SyntaxKind = SyntaxKind(1);
|
||||
pub const UNDERSCORE: SyntaxKind = SyntaxKind(2);
|
||||
pub const WHITESPACE: SyntaxKind = SyntaxKind(3);
|
||||
pub const INT_NUMBER: SyntaxKind = SyntaxKind(4);
|
||||
pub const FLOAT_NUMBER: SyntaxKind = SyntaxKind(5);
|
||||
pub const SEMI: SyntaxKind = SyntaxKind(6);
|
||||
pub const COMMA: SyntaxKind = SyntaxKind(7);
|
||||
pub const DOT: SyntaxKind = SyntaxKind(8);
|
||||
pub const DOTDOT: SyntaxKind = SyntaxKind(9);
|
||||
pub const DOTDOTDOT: SyntaxKind = SyntaxKind(10);
|
||||
pub const DOTDOTEQ: SyntaxKind = SyntaxKind(11);
|
||||
pub const L_PAREN: SyntaxKind = SyntaxKind(12);
|
||||
pub const R_PAREN: SyntaxKind = SyntaxKind(13);
|
||||
pub const L_CURLY: SyntaxKind = SyntaxKind(14);
|
||||
pub const R_CURLY: SyntaxKind = SyntaxKind(15);
|
||||
pub const L_BRACK: SyntaxKind = SyntaxKind(16);
|
||||
pub const R_BRACK: SyntaxKind = SyntaxKind(17);
|
||||
pub const L_ANGLE: SyntaxKind = SyntaxKind(18);
|
||||
pub const R_ANGLE: SyntaxKind = SyntaxKind(19);
|
||||
pub const AT: SyntaxKind = SyntaxKind(20);
|
||||
pub const POUND: SyntaxKind = SyntaxKind(21);
|
||||
pub const TILDE: SyntaxKind = SyntaxKind(22);
|
||||
pub const QUESTION: SyntaxKind = SyntaxKind(23);
|
||||
pub const COLON: SyntaxKind = SyntaxKind(24);
|
||||
pub const COLONCOLON: SyntaxKind = SyntaxKind(25);
|
||||
pub const DOLLAR: SyntaxKind = SyntaxKind(26);
|
||||
pub const EQ: SyntaxKind = SyntaxKind(27);
|
||||
pub const EQEQ: SyntaxKind = SyntaxKind(28);
|
||||
pub const FAT_ARROW: SyntaxKind = SyntaxKind(29);
|
||||
pub const NEQ: SyntaxKind = SyntaxKind(30);
|
||||
pub const NOT: SyntaxKind = SyntaxKind(31);
|
||||
pub const LIFETIME: SyntaxKind = SyntaxKind(32);
|
||||
pub const CHAR: SyntaxKind = SyntaxKind(33);
|
||||
pub const BYTE: SyntaxKind = SyntaxKind(34);
|
||||
pub const STRING: SyntaxKind = SyntaxKind(35);
|
||||
pub const RAW_STRING: SyntaxKind = SyntaxKind(36);
|
||||
pub const BYTE_STRING: SyntaxKind = SyntaxKind(37);
|
||||
pub const RAW_BYTE_STRING: SyntaxKind = SyntaxKind(38);
|
||||
pub const PLUS: SyntaxKind = SyntaxKind(39);
|
||||
pub const MINUS: SyntaxKind = SyntaxKind(40);
|
||||
pub const STAR: SyntaxKind = SyntaxKind(41);
|
||||
pub const SLASH: SyntaxKind = SyntaxKind(42);
|
||||
pub const CARET: SyntaxKind = SyntaxKind(43);
|
||||
pub const PERCENT: SyntaxKind = SyntaxKind(44);
|
||||
pub const AMPERSAND: SyntaxKind = SyntaxKind(45);
|
||||
pub const PIPE: SyntaxKind = SyntaxKind(46);
|
||||
pub const THIN_ARROW: SyntaxKind = SyntaxKind(47);
|
||||
pub const COMMENT: SyntaxKind = SyntaxKind(48);
|
||||
pub const DOC_COMMENT: SyntaxKind = SyntaxKind(49);
|
||||
pub const SHEBANG: SyntaxKind = SyntaxKind(50);
|
||||
pub const FILE: SyntaxKind = SyntaxKind(51);
|
||||
pub const USE_KW: SyntaxKind = SyntaxKind(0);
|
||||
pub const FN_KW: SyntaxKind = SyntaxKind(1);
|
||||
pub const STRUCT_KW: SyntaxKind = SyntaxKind(2);
|
||||
pub const ENUM_KW: SyntaxKind = SyntaxKind(3);
|
||||
pub const TRAIT_KW: SyntaxKind = SyntaxKind(4);
|
||||
pub const IMPL_KW: SyntaxKind = SyntaxKind(5);
|
||||
pub const ERROR: SyntaxKind = SyntaxKind(6);
|
||||
pub const IDENT: SyntaxKind = SyntaxKind(7);
|
||||
pub const UNDERSCORE: SyntaxKind = SyntaxKind(8);
|
||||
pub const WHITESPACE: SyntaxKind = SyntaxKind(9);
|
||||
pub const INT_NUMBER: SyntaxKind = SyntaxKind(10);
|
||||
pub const FLOAT_NUMBER: SyntaxKind = SyntaxKind(11);
|
||||
pub const SEMI: SyntaxKind = SyntaxKind(12);
|
||||
pub const COMMA: SyntaxKind = SyntaxKind(13);
|
||||
pub const DOT: SyntaxKind = SyntaxKind(14);
|
||||
pub const DOTDOT: SyntaxKind = SyntaxKind(15);
|
||||
pub const DOTDOTDOT: SyntaxKind = SyntaxKind(16);
|
||||
pub const DOTDOTEQ: SyntaxKind = SyntaxKind(17);
|
||||
pub const L_PAREN: SyntaxKind = SyntaxKind(18);
|
||||
pub const R_PAREN: SyntaxKind = SyntaxKind(19);
|
||||
pub const L_CURLY: SyntaxKind = SyntaxKind(20);
|
||||
pub const R_CURLY: SyntaxKind = SyntaxKind(21);
|
||||
pub const L_BRACK: SyntaxKind = SyntaxKind(22);
|
||||
pub const R_BRACK: SyntaxKind = SyntaxKind(23);
|
||||
pub const L_ANGLE: SyntaxKind = SyntaxKind(24);
|
||||
pub const R_ANGLE: SyntaxKind = SyntaxKind(25);
|
||||
pub const AT: SyntaxKind = SyntaxKind(26);
|
||||
pub const POUND: SyntaxKind = SyntaxKind(27);
|
||||
pub const TILDE: SyntaxKind = SyntaxKind(28);
|
||||
pub const QUESTION: SyntaxKind = SyntaxKind(29);
|
||||
pub const COLON: SyntaxKind = SyntaxKind(30);
|
||||
pub const COLONCOLON: SyntaxKind = SyntaxKind(31);
|
||||
pub const DOLLAR: SyntaxKind = SyntaxKind(32);
|
||||
pub const EQ: SyntaxKind = SyntaxKind(33);
|
||||
pub const EQEQ: SyntaxKind = SyntaxKind(34);
|
||||
pub const FAT_ARROW: SyntaxKind = SyntaxKind(35);
|
||||
pub const NEQ: SyntaxKind = SyntaxKind(36);
|
||||
pub const NOT: SyntaxKind = SyntaxKind(37);
|
||||
pub const LIFETIME: SyntaxKind = SyntaxKind(38);
|
||||
pub const CHAR: SyntaxKind = SyntaxKind(39);
|
||||
pub const BYTE: SyntaxKind = SyntaxKind(40);
|
||||
pub const STRING: SyntaxKind = SyntaxKind(41);
|
||||
pub const RAW_STRING: SyntaxKind = SyntaxKind(42);
|
||||
pub const BYTE_STRING: SyntaxKind = SyntaxKind(43);
|
||||
pub const RAW_BYTE_STRING: SyntaxKind = SyntaxKind(44);
|
||||
pub const PLUS: SyntaxKind = SyntaxKind(45);
|
||||
pub const MINUS: SyntaxKind = SyntaxKind(46);
|
||||
pub const STAR: SyntaxKind = SyntaxKind(47);
|
||||
pub const SLASH: SyntaxKind = SyntaxKind(48);
|
||||
pub const CARET: SyntaxKind = SyntaxKind(49);
|
||||
pub const PERCENT: SyntaxKind = SyntaxKind(50);
|
||||
pub const AMPERSAND: SyntaxKind = SyntaxKind(51);
|
||||
pub const PIPE: SyntaxKind = SyntaxKind(52);
|
||||
pub const THIN_ARROW: SyntaxKind = SyntaxKind(53);
|
||||
pub const COMMENT: SyntaxKind = SyntaxKind(54);
|
||||
pub const DOC_COMMENT: SyntaxKind = SyntaxKind(55);
|
||||
pub const SHEBANG: SyntaxKind = SyntaxKind(56);
|
||||
pub const FILE: SyntaxKind = SyntaxKind(57);
|
||||
|
||||
static INFOS: [SyntaxInfo; 52] = [
|
||||
static INFOS: [SyntaxInfo; 58] = [
|
||||
SyntaxInfo { name: "USE_KW" },
|
||||
SyntaxInfo { name: "FN_KW" },
|
||||
SyntaxInfo { name: "STRUCT_KW" },
|
||||
SyntaxInfo { name: "ENUM_KW" },
|
||||
SyntaxInfo { name: "TRAIT_KW" },
|
||||
SyntaxInfo { name: "IMPL_KW" },
|
||||
SyntaxInfo { name: "ERROR" },
|
||||
SyntaxInfo { name: "IDENT" },
|
||||
SyntaxInfo { name: "UNDERSCORE" },
|
||||
|
@ -112,3 +124,15 @@ static INFOS: [SyntaxInfo; 52] = [
|
|||
pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {
|
||||
&INFOS[kind.0 as usize]
|
||||
}
|
||||
|
||||
pub(crate) fn ident_to_keyword(ident: &str) -> Option<SyntaxKind> {
|
||||
match ident {
|
||||
"use" => Some(USE_KW),
|
||||
"fn" => Some(FN_KW),
|
||||
"struct" => Some(STRUCT_KW),
|
||||
"enum" => Some(ENUM_KW),
|
||||
"trait" => Some(TRAIT_KW),
|
||||
"impl" => Some(IMPL_KW),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
1
tests/data/lexer/0011_keywords.rs
Normal file
1
tests/data/lexer/0011_keywords.rs
Normal file
|
@ -0,0 +1 @@
|
|||
fn use struct trait enum impl
|
12
tests/data/lexer/0011_keywords.txt
Normal file
12
tests/data/lexer/0011_keywords.txt
Normal file
|
@ -0,0 +1,12 @@
|
|||
FN_KW 2 "fn"
|
||||
WHITESPACE 1 " "
|
||||
USE_KW 3 "use"
|
||||
WHITESPACE 1 " "
|
||||
STRUCT_KW 6 "struct"
|
||||
WHITESPACE 1 " "
|
||||
TRAIT_KW 5 "trait"
|
||||
WHITESPACE 1 " "
|
||||
ENUM_KW 4 "enum"
|
||||
WHITESPACE 1 " "
|
||||
IMPL_KW 4 "impl"
|
||||
WHITESPACE 1 "\n"
|
Loading…
Reference in a new issue