mirror of
https://github.com/rust-lang/rust-analyzer
synced 2025-01-13 05:38:46 +00:00
Parser: guess what? Groundwork!
This commit is contained in:
parent
0af33a2587
commit
cb362626f3
9 changed files with 199 additions and 58 deletions
10
grammar.ron
10
grammar.ron
|
@ -1,4 +1,12 @@
|
||||||
Grammar(
|
Grammar(
|
||||||
|
keywords: [
|
||||||
|
"use",
|
||||||
|
"fn",
|
||||||
|
"struct",
|
||||||
|
"enum",
|
||||||
|
"trait",
|
||||||
|
"impl",
|
||||||
|
],
|
||||||
tokens: [
|
tokens: [
|
||||||
"ERROR",
|
"ERROR",
|
||||||
"IDENT",
|
"IDENT",
|
||||||
|
@ -53,6 +61,6 @@ Grammar(
|
||||||
"SHEBANG",
|
"SHEBANG",
|
||||||
],
|
],
|
||||||
nodes: [
|
nodes: [
|
||||||
"FILE"
|
"FILE",
|
||||||
]
|
]
|
||||||
)
|
)
|
|
@ -17,6 +17,7 @@ fn main() {
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
struct Grammar {
|
struct Grammar {
|
||||||
|
keywords: Vec<String>,
|
||||||
tokens: Vec<String>,
|
tokens: Vec<String>,
|
||||||
nodes: Vec<String>,
|
nodes: Vec<String>,
|
||||||
}
|
}
|
||||||
|
@ -33,8 +34,10 @@ impl Grammar {
|
||||||
acc.push_str("use tree::{SyntaxKind, SyntaxInfo};\n");
|
acc.push_str("use tree::{SyntaxKind, SyntaxInfo};\n");
|
||||||
acc.push_str("\n");
|
acc.push_str("\n");
|
||||||
|
|
||||||
let syntax_kinds: Vec<&String> =
|
let syntax_kinds: Vec<String> =
|
||||||
self.tokens.iter().chain(self.nodes.iter())
|
self.keywords.iter().map(|kw| kw_token(kw))
|
||||||
|
.chain(self.tokens.iter().cloned())
|
||||||
|
.chain(self.nodes.iter().cloned())
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
for (idx, kind) in syntax_kinds.iter().enumerate() {
|
for (idx, kind) in syntax_kinds.iter().enumerate() {
|
||||||
|
@ -60,6 +63,14 @@ impl Grammar {
|
||||||
|
|
||||||
acc.push_str("pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {\n");
|
acc.push_str("pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {\n");
|
||||||
acc.push_str(" &INFOS[kind.0 as usize]\n");
|
acc.push_str(" &INFOS[kind.0 as usize]\n");
|
||||||
|
acc.push_str("}\n\n");
|
||||||
|
acc.push_str("pub(crate) fn ident_to_keyword(ident: &str) -> Option<SyntaxKind> {\n");
|
||||||
|
acc.push_str(" match ident {\n");
|
||||||
|
for kw in self.keywords.iter() {
|
||||||
|
write!(acc, " {:?} => Some({}),\n", kw, kw_token(kw)).unwrap();
|
||||||
|
}
|
||||||
|
acc.push_str(" _ => None,\n");
|
||||||
|
acc.push_str(" }\n");
|
||||||
acc.push_str("}\n");
|
acc.push_str("}\n");
|
||||||
acc
|
acc
|
||||||
}
|
}
|
||||||
|
@ -77,4 +88,8 @@ fn generated_file() -> PathBuf {
|
||||||
|
|
||||||
fn scream(word: &str) -> String {
|
fn scream(word: &str) -> String {
|
||||||
word.chars().map(|c| c.to_ascii_uppercase()).collect()
|
word.chars().map(|c| c.to_ascii_uppercase()).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn kw_token(keyword: &str) -> String {
|
||||||
|
format!("{}_KW", scream(keyword))
|
||||||
}
|
}
|
|
@ -187,6 +187,9 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
|
||||||
return if c == '_' { UNDERSCORE } else { IDENT };
|
return if c == '_' { UNDERSCORE } else { IDENT };
|
||||||
}
|
}
|
||||||
ptr.bump_while(is_ident_continue);
|
ptr.bump_while(is_ident_continue);
|
||||||
|
if let Some(kind) = ident_to_keyword(ptr.current_token_text()) {
|
||||||
|
return kind;
|
||||||
|
}
|
||||||
IDENT
|
IDENT
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -59,6 +59,11 @@ impl<'s> Ptr<'s> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn current_token_text(&self) -> &str {
|
||||||
|
let len: u32 = self.len.into();
|
||||||
|
&self.text[..len as usize]
|
||||||
|
}
|
||||||
|
|
||||||
fn chars(&self) -> Chars {
|
fn chars(&self) -> Chars {
|
||||||
let len: u32 = self.len.into();
|
let len: u32 = self.len.into();
|
||||||
self.text[len as usize ..].chars()
|
self.text[len as usize ..].chars()
|
||||||
|
|
|
@ -3,8 +3,68 @@ use super::parser::Parser;
|
||||||
|
|
||||||
use syntax_kinds::*;
|
use syntax_kinds::*;
|
||||||
|
|
||||||
|
// Items //
|
||||||
|
|
||||||
pub fn file(p: &mut Parser) {
|
pub fn file(p: &mut Parser) {
|
||||||
p.start(FILE);
|
p.start(FILE);
|
||||||
//TODO: parse_shebang
|
shebang(p);
|
||||||
|
inner_attributes(p);
|
||||||
|
mod_items(p);
|
||||||
|
p.finish();
|
||||||
|
}
|
||||||
|
|
||||||
|
type Result = ::std::result::Result<(), ()>;
|
||||||
|
const OK: Result = Ok(());
|
||||||
|
const ERR: Result = Err(());
|
||||||
|
|
||||||
|
fn shebang(_: &mut Parser) {
|
||||||
|
//TODO
|
||||||
|
}
|
||||||
|
|
||||||
|
fn inner_attributes(_: &mut Parser) {
|
||||||
|
//TODO
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mod_items(p: &mut Parser) {
|
||||||
|
loop {
|
||||||
|
skip_until_item(p);
|
||||||
|
if p.is_eof() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if item(p).is_err() {
|
||||||
|
skip_one_token(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn item(p: &mut Parser) -> Result {
|
||||||
|
outer_attributes(p)?;
|
||||||
|
visibility(p)?;
|
||||||
|
ERR
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Paths, types, attributes, and stuff //
|
||||||
|
|
||||||
|
fn outer_attributes(_: &mut Parser) -> Result {
|
||||||
|
OK
|
||||||
|
}
|
||||||
|
|
||||||
|
fn visibility(_: &mut Parser) -> Result {
|
||||||
|
OK
|
||||||
|
}
|
||||||
|
|
||||||
|
// Expressions //
|
||||||
|
|
||||||
|
// Error recovery and high-order utils //
|
||||||
|
|
||||||
|
fn skip_until_item(_: &mut Parser) {
|
||||||
|
//TODO
|
||||||
|
}
|
||||||
|
|
||||||
|
fn skip_one_token(p: &mut Parser) {
|
||||||
|
p.start(ERROR);
|
||||||
|
p.bump().unwrap();
|
||||||
p.finish();
|
p.finish();
|
||||||
}
|
}
|
|
@ -34,10 +34,14 @@ impl<'t> Parser<'t> {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn into_events(self) -> Vec<Event> {
|
pub(crate) fn into_events(self) -> Vec<Event> {
|
||||||
assert!(self.pos == self.non_ws_tokens.len());
|
assert!(self.is_eof());
|
||||||
self.events
|
self.events
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn is_eof(&self) -> bool {
|
||||||
|
self.pos == self.non_ws_tokens.len()
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn start(&mut self, kind: SyntaxKind) {
|
pub(crate) fn start(&mut self, kind: SyntaxKind) {
|
||||||
self.event(Event::Start { kind });
|
self.event(Event::Start { kind });
|
||||||
}
|
}
|
||||||
|
@ -46,6 +50,15 @@ impl<'t> Parser<'t> {
|
||||||
self.event(Event::Finish);
|
self.event(Event::Finish);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn bump(&mut self) -> Option<SyntaxKind> {
|
||||||
|
if self.is_eof() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let idx = self.non_ws_tokens[self.pos].0;
|
||||||
|
self.pos += 1;
|
||||||
|
Some(self.raw_tokens[idx].kind)
|
||||||
|
}
|
||||||
|
|
||||||
fn event(&mut self, event: Event) {
|
fn event(&mut self, event: Event) {
|
||||||
self.events.push(event)
|
self.events.push(event)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,60 +1,72 @@
|
||||||
// Generated from grammar.ron
|
// Generated from grammar.ron
|
||||||
use tree::{SyntaxKind, SyntaxInfo};
|
use tree::{SyntaxKind, SyntaxInfo};
|
||||||
|
|
||||||
pub const ERROR: SyntaxKind = SyntaxKind(0);
|
pub const USE_KW: SyntaxKind = SyntaxKind(0);
|
||||||
pub const IDENT: SyntaxKind = SyntaxKind(1);
|
pub const FN_KW: SyntaxKind = SyntaxKind(1);
|
||||||
pub const UNDERSCORE: SyntaxKind = SyntaxKind(2);
|
pub const STRUCT_KW: SyntaxKind = SyntaxKind(2);
|
||||||
pub const WHITESPACE: SyntaxKind = SyntaxKind(3);
|
pub const ENUM_KW: SyntaxKind = SyntaxKind(3);
|
||||||
pub const INT_NUMBER: SyntaxKind = SyntaxKind(4);
|
pub const TRAIT_KW: SyntaxKind = SyntaxKind(4);
|
||||||
pub const FLOAT_NUMBER: SyntaxKind = SyntaxKind(5);
|
pub const IMPL_KW: SyntaxKind = SyntaxKind(5);
|
||||||
pub const SEMI: SyntaxKind = SyntaxKind(6);
|
pub const ERROR: SyntaxKind = SyntaxKind(6);
|
||||||
pub const COMMA: SyntaxKind = SyntaxKind(7);
|
pub const IDENT: SyntaxKind = SyntaxKind(7);
|
||||||
pub const DOT: SyntaxKind = SyntaxKind(8);
|
pub const UNDERSCORE: SyntaxKind = SyntaxKind(8);
|
||||||
pub const DOTDOT: SyntaxKind = SyntaxKind(9);
|
pub const WHITESPACE: SyntaxKind = SyntaxKind(9);
|
||||||
pub const DOTDOTDOT: SyntaxKind = SyntaxKind(10);
|
pub const INT_NUMBER: SyntaxKind = SyntaxKind(10);
|
||||||
pub const DOTDOTEQ: SyntaxKind = SyntaxKind(11);
|
pub const FLOAT_NUMBER: SyntaxKind = SyntaxKind(11);
|
||||||
pub const L_PAREN: SyntaxKind = SyntaxKind(12);
|
pub const SEMI: SyntaxKind = SyntaxKind(12);
|
||||||
pub const R_PAREN: SyntaxKind = SyntaxKind(13);
|
pub const COMMA: SyntaxKind = SyntaxKind(13);
|
||||||
pub const L_CURLY: SyntaxKind = SyntaxKind(14);
|
pub const DOT: SyntaxKind = SyntaxKind(14);
|
||||||
pub const R_CURLY: SyntaxKind = SyntaxKind(15);
|
pub const DOTDOT: SyntaxKind = SyntaxKind(15);
|
||||||
pub const L_BRACK: SyntaxKind = SyntaxKind(16);
|
pub const DOTDOTDOT: SyntaxKind = SyntaxKind(16);
|
||||||
pub const R_BRACK: SyntaxKind = SyntaxKind(17);
|
pub const DOTDOTEQ: SyntaxKind = SyntaxKind(17);
|
||||||
pub const L_ANGLE: SyntaxKind = SyntaxKind(18);
|
pub const L_PAREN: SyntaxKind = SyntaxKind(18);
|
||||||
pub const R_ANGLE: SyntaxKind = SyntaxKind(19);
|
pub const R_PAREN: SyntaxKind = SyntaxKind(19);
|
||||||
pub const AT: SyntaxKind = SyntaxKind(20);
|
pub const L_CURLY: SyntaxKind = SyntaxKind(20);
|
||||||
pub const POUND: SyntaxKind = SyntaxKind(21);
|
pub const R_CURLY: SyntaxKind = SyntaxKind(21);
|
||||||
pub const TILDE: SyntaxKind = SyntaxKind(22);
|
pub const L_BRACK: SyntaxKind = SyntaxKind(22);
|
||||||
pub const QUESTION: SyntaxKind = SyntaxKind(23);
|
pub const R_BRACK: SyntaxKind = SyntaxKind(23);
|
||||||
pub const COLON: SyntaxKind = SyntaxKind(24);
|
pub const L_ANGLE: SyntaxKind = SyntaxKind(24);
|
||||||
pub const COLONCOLON: SyntaxKind = SyntaxKind(25);
|
pub const R_ANGLE: SyntaxKind = SyntaxKind(25);
|
||||||
pub const DOLLAR: SyntaxKind = SyntaxKind(26);
|
pub const AT: SyntaxKind = SyntaxKind(26);
|
||||||
pub const EQ: SyntaxKind = SyntaxKind(27);
|
pub const POUND: SyntaxKind = SyntaxKind(27);
|
||||||
pub const EQEQ: SyntaxKind = SyntaxKind(28);
|
pub const TILDE: SyntaxKind = SyntaxKind(28);
|
||||||
pub const FAT_ARROW: SyntaxKind = SyntaxKind(29);
|
pub const QUESTION: SyntaxKind = SyntaxKind(29);
|
||||||
pub const NEQ: SyntaxKind = SyntaxKind(30);
|
pub const COLON: SyntaxKind = SyntaxKind(30);
|
||||||
pub const NOT: SyntaxKind = SyntaxKind(31);
|
pub const COLONCOLON: SyntaxKind = SyntaxKind(31);
|
||||||
pub const LIFETIME: SyntaxKind = SyntaxKind(32);
|
pub const DOLLAR: SyntaxKind = SyntaxKind(32);
|
||||||
pub const CHAR: SyntaxKind = SyntaxKind(33);
|
pub const EQ: SyntaxKind = SyntaxKind(33);
|
||||||
pub const BYTE: SyntaxKind = SyntaxKind(34);
|
pub const EQEQ: SyntaxKind = SyntaxKind(34);
|
||||||
pub const STRING: SyntaxKind = SyntaxKind(35);
|
pub const FAT_ARROW: SyntaxKind = SyntaxKind(35);
|
||||||
pub const RAW_STRING: SyntaxKind = SyntaxKind(36);
|
pub const NEQ: SyntaxKind = SyntaxKind(36);
|
||||||
pub const BYTE_STRING: SyntaxKind = SyntaxKind(37);
|
pub const NOT: SyntaxKind = SyntaxKind(37);
|
||||||
pub const RAW_BYTE_STRING: SyntaxKind = SyntaxKind(38);
|
pub const LIFETIME: SyntaxKind = SyntaxKind(38);
|
||||||
pub const PLUS: SyntaxKind = SyntaxKind(39);
|
pub const CHAR: SyntaxKind = SyntaxKind(39);
|
||||||
pub const MINUS: SyntaxKind = SyntaxKind(40);
|
pub const BYTE: SyntaxKind = SyntaxKind(40);
|
||||||
pub const STAR: SyntaxKind = SyntaxKind(41);
|
pub const STRING: SyntaxKind = SyntaxKind(41);
|
||||||
pub const SLASH: SyntaxKind = SyntaxKind(42);
|
pub const RAW_STRING: SyntaxKind = SyntaxKind(42);
|
||||||
pub const CARET: SyntaxKind = SyntaxKind(43);
|
pub const BYTE_STRING: SyntaxKind = SyntaxKind(43);
|
||||||
pub const PERCENT: SyntaxKind = SyntaxKind(44);
|
pub const RAW_BYTE_STRING: SyntaxKind = SyntaxKind(44);
|
||||||
pub const AMPERSAND: SyntaxKind = SyntaxKind(45);
|
pub const PLUS: SyntaxKind = SyntaxKind(45);
|
||||||
pub const PIPE: SyntaxKind = SyntaxKind(46);
|
pub const MINUS: SyntaxKind = SyntaxKind(46);
|
||||||
pub const THIN_ARROW: SyntaxKind = SyntaxKind(47);
|
pub const STAR: SyntaxKind = SyntaxKind(47);
|
||||||
pub const COMMENT: SyntaxKind = SyntaxKind(48);
|
pub const SLASH: SyntaxKind = SyntaxKind(48);
|
||||||
pub const DOC_COMMENT: SyntaxKind = SyntaxKind(49);
|
pub const CARET: SyntaxKind = SyntaxKind(49);
|
||||||
pub const SHEBANG: SyntaxKind = SyntaxKind(50);
|
pub const PERCENT: SyntaxKind = SyntaxKind(50);
|
||||||
pub const FILE: SyntaxKind = SyntaxKind(51);
|
pub const AMPERSAND: SyntaxKind = SyntaxKind(51);
|
||||||
|
pub const PIPE: SyntaxKind = SyntaxKind(52);
|
||||||
|
pub const THIN_ARROW: SyntaxKind = SyntaxKind(53);
|
||||||
|
pub const COMMENT: SyntaxKind = SyntaxKind(54);
|
||||||
|
pub const DOC_COMMENT: SyntaxKind = SyntaxKind(55);
|
||||||
|
pub const SHEBANG: SyntaxKind = SyntaxKind(56);
|
||||||
|
pub const FILE: SyntaxKind = SyntaxKind(57);
|
||||||
|
|
||||||
static INFOS: [SyntaxInfo; 52] = [
|
static INFOS: [SyntaxInfo; 58] = [
|
||||||
|
SyntaxInfo { name: "USE_KW" },
|
||||||
|
SyntaxInfo { name: "FN_KW" },
|
||||||
|
SyntaxInfo { name: "STRUCT_KW" },
|
||||||
|
SyntaxInfo { name: "ENUM_KW" },
|
||||||
|
SyntaxInfo { name: "TRAIT_KW" },
|
||||||
|
SyntaxInfo { name: "IMPL_KW" },
|
||||||
SyntaxInfo { name: "ERROR" },
|
SyntaxInfo { name: "ERROR" },
|
||||||
SyntaxInfo { name: "IDENT" },
|
SyntaxInfo { name: "IDENT" },
|
||||||
SyntaxInfo { name: "UNDERSCORE" },
|
SyntaxInfo { name: "UNDERSCORE" },
|
||||||
|
@ -112,3 +124,15 @@ static INFOS: [SyntaxInfo; 52] = [
|
||||||
pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {
|
pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {
|
||||||
&INFOS[kind.0 as usize]
|
&INFOS[kind.0 as usize]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn ident_to_keyword(ident: &str) -> Option<SyntaxKind> {
|
||||||
|
match ident {
|
||||||
|
"use" => Some(USE_KW),
|
||||||
|
"fn" => Some(FN_KW),
|
||||||
|
"struct" => Some(STRUCT_KW),
|
||||||
|
"enum" => Some(ENUM_KW),
|
||||||
|
"trait" => Some(TRAIT_KW),
|
||||||
|
"impl" => Some(IMPL_KW),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
1
tests/data/lexer/0011_keywords.rs
Normal file
1
tests/data/lexer/0011_keywords.rs
Normal file
|
@ -0,0 +1 @@
|
||||||
|
fn use struct trait enum impl
|
12
tests/data/lexer/0011_keywords.txt
Normal file
12
tests/data/lexer/0011_keywords.txt
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
FN_KW 2 "fn"
|
||||||
|
WHITESPACE 1 " "
|
||||||
|
USE_KW 3 "use"
|
||||||
|
WHITESPACE 1 " "
|
||||||
|
STRUCT_KW 6 "struct"
|
||||||
|
WHITESPACE 1 " "
|
||||||
|
TRAIT_KW 5 "trait"
|
||||||
|
WHITESPACE 1 " "
|
||||||
|
ENUM_KW 4 "enum"
|
||||||
|
WHITESPACE 1 " "
|
||||||
|
IMPL_KW 4 "impl"
|
||||||
|
WHITESPACE 1 "\n"
|
Loading…
Reference in a new issue