Support contextual tokens

This commit is contained in:
Aleksey Kladov 2018-02-04 16:46:26 +03:00
parent 852543212b
commit 85c42fba12
11 changed files with 155 additions and 72 deletions

View file

@ -27,6 +27,11 @@ Grammar(
"mut", "mut",
"unsafe", "unsafe",
], ],
contextual_keywords: [
"auto",
"default",
"union",
],
tokens: [ tokens: [
"ERROR", "ERROR",
"IDENT", "IDENT",

View file

@ -1,4 +1,4 @@
use {File, FileBuilder, Sink, SyntaxKind, Token}; use {File, FileBuilder, Sink, SyntaxKind, Token, TextUnit};
use syntax_kinds::TOMBSTONE; use syntax_kinds::TOMBSTONE;
use super::is_insignificant; use super::is_insignificant;
@ -120,18 +120,25 @@ pub(super) fn to_file(text: String, tokens: &[Token], events: Vec<Event>) -> Fil
builder.finish_internal() builder.finish_internal()
} }
&Event::Token { &Event::Token {
kind: _, kind,
mut n_raw_tokens, mut n_raw_tokens,
} => loop { } => {
let token = tokens[idx]; // FIXME: currently, we attach whitespace to some random node
if !is_insignificant(token.kind) { // this should be done in a sensible manner instead
n_raw_tokens -= 1; loop {
let token = tokens[idx];
if !is_insignificant(token.kind) {
break;
}
builder.leaf(token.kind, token.len);
idx += 1
} }
idx += 1; let mut len = TextUnit::new(0);
builder.leaf(token.kind, token.len); for _ in 0..n_raw_tokens {
if n_raw_tokens == 0 { len += tokens[idx].len;
break; idx += 1;
} }
builder.leaf(kind, len);
}, },
&Event::Error { ref message } => builder.error().message(message.clone()).emit(), &Event::Error { ref message } => builder.error().message(message.clone()).emit(),
} }

View file

@ -81,7 +81,6 @@ fn item(p: &mut Parser) {
CONST_ITEM CONST_ITEM
} }
}, },
// TODO: auto trait
// test unsafe_trait // test unsafe_trait
// unsafe trait T {} // unsafe trait T {}
UNSAFE_KW if la == TRAIT_KW => { UNSAFE_KW if la == TRAIT_KW => {
@ -89,7 +88,16 @@ fn item(p: &mut Parser) {
traits::trait_item(p); traits::trait_item(p);
TRAIT_ITEM TRAIT_ITEM
} }
// TODO: default impl
// test unsafe_auto_trait
// unsafe auto trait T {}
UNSAFE_KW if p.at_kw(1, "auto") && p.nth(2) == TRAIT_KW => {
p.bump();
p.bump_remap(AUTO_KW);
traits::trait_item(p);
TRAIT_ITEM
}
// test unsafe_impl // test unsafe_impl
// unsafe impl Foo {} // unsafe impl Foo {}
UNSAFE_KW if la == IMPL_KW => { UNSAFE_KW if la == IMPL_KW => {
@ -97,6 +105,16 @@ fn item(p: &mut Parser) {
traits::impl_item(p); traits::impl_item(p);
IMPL_ITEM IMPL_ITEM
} }
// test unsafe_default_impl
// unsafe default impl Foo {}
UNSAFE_KW if p.at_kw(1, "default") && p.nth(2) == IMPL_KW => {
p.bump();
p.bump_remap(DEFAULT_KW);
traits::impl_item(p);
IMPL_ITEM
}
MOD_KW => { MOD_KW => {
mod_item(p); mod_item(p);
MOD_ITEM MOD_ITEM

View file

@ -46,9 +46,10 @@ impl<'t> ParserInput<'t> {
if !(idx < self.tokens.len()) { if !(idx < self.tokens.len()) {
return ""; return "";
} }
let start_offset = self.start_offsets[idx]; let range = TextRange::from_len(
let end_offset = self.tokens[idx].len; self.start_offsets[idx],
let range = TextRange::from_to(start_offset, end_offset); self.tokens[idx].len
);
&self.text[range] &self.text[range]
} }
} }

View file

@ -1,6 +1,6 @@
use super::Event; use super::Event;
use super::input::{InputPosition, ParserInput}; use super::input::{InputPosition, ParserInput};
use SyntaxKind::{self, EOF, TOMBSTONE}; use SyntaxKind::{self, EOF, TOMBSTONE, IDENT};
pub(crate) struct Marker { pub(crate) struct Marker {
pos: u32, pos: u32,
@ -145,14 +145,31 @@ impl<'t> Parser<'t> {
}); });
} }
pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) {
if self.current() == EOF {
// TODO: panic!?
return;
}
self.pos += 1;
self.event(Event::Token {
kind,
n_raw_tokens: 1,
});
}
pub(crate) fn nth(&self, n: u32) -> SyntaxKind { pub(crate) fn nth(&self, n: u32) -> SyntaxKind {
self.inp.kind(self.pos + n) self.inp.kind(self.pos + n)
} }
pub(crate) fn at_kw(&self, n: u32, t: &str) -> bool {
self.nth(n) == IDENT && self.inp.text(self.pos + n) == t
}
pub(crate) fn current(&self) -> SyntaxKind { pub(crate) fn current(&self) -> SyntaxKind {
self.nth(0) self.nth(0)
} }
fn event(&mut self, event: Event) { fn event(&mut self, event: Event) {
self.events.push(event) self.events.push(event)
} }

View file

@ -6,32 +6,6 @@ use tree::SyntaxInfo;
/// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`. /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum SyntaxKind { pub enum SyntaxKind {
USE_KW,
FN_KW,
STRUCT_KW,
ENUM_KW,
TRAIT_KW,
IMPL_KW,
TRUE_KW,
FALSE_KW,
AS_KW,
EXTERN_KW,
CRATE_KW,
MOD_KW,
PUB_KW,
SELF_KW,
SUPER_KW,
IN_KW,
WHERE_KW,
FOR_KW,
LOOP_KW,
WHILE_KW,
IF_KW,
MATCH_KW,
CONST_KW,
STATIC_KW,
MUT_KW,
UNSAFE_KW,
ERROR, ERROR,
IDENT, IDENT,
UNDERSCORE, UNDERSCORE,
@ -83,6 +57,35 @@ pub enum SyntaxKind {
COMMENT, COMMENT,
DOC_COMMENT, DOC_COMMENT,
SHEBANG, SHEBANG,
USE_KW,
FN_KW,
STRUCT_KW,
ENUM_KW,
TRAIT_KW,
IMPL_KW,
TRUE_KW,
FALSE_KW,
AS_KW,
EXTERN_KW,
CRATE_KW,
MOD_KW,
PUB_KW,
SELF_KW,
SUPER_KW,
IN_KW,
WHERE_KW,
FOR_KW,
LOOP_KW,
WHILE_KW,
IF_KW,
MATCH_KW,
CONST_KW,
STATIC_KW,
MUT_KW,
UNSAFE_KW,
AUTO_KW,
DEFAULT_KW,
UNION_KW,
FILE, FILE,
STRUCT_ITEM, STRUCT_ITEM,
ENUM_ITEM, ENUM_ITEM,
@ -123,32 +126,6 @@ pub(crate) use self::SyntaxKind::*;
impl SyntaxKind { impl SyntaxKind {
pub(crate) fn info(self) -> &'static SyntaxInfo { pub(crate) fn info(self) -> &'static SyntaxInfo {
match self { match self {
USE_KW => &SyntaxInfo { name: "USE_KW" },
FN_KW => &SyntaxInfo { name: "FN_KW" },
STRUCT_KW => &SyntaxInfo { name: "STRUCT_KW" },
ENUM_KW => &SyntaxInfo { name: "ENUM_KW" },
TRAIT_KW => &SyntaxInfo { name: "TRAIT_KW" },
IMPL_KW => &SyntaxInfo { name: "IMPL_KW" },
TRUE_KW => &SyntaxInfo { name: "TRUE_KW" },
FALSE_KW => &SyntaxInfo { name: "FALSE_KW" },
AS_KW => &SyntaxInfo { name: "AS_KW" },
EXTERN_KW => &SyntaxInfo { name: "EXTERN_KW" },
CRATE_KW => &SyntaxInfo { name: "CRATE_KW" },
MOD_KW => &SyntaxInfo { name: "MOD_KW" },
PUB_KW => &SyntaxInfo { name: "PUB_KW" },
SELF_KW => &SyntaxInfo { name: "SELF_KW" },
SUPER_KW => &SyntaxInfo { name: "SUPER_KW" },
IN_KW => &SyntaxInfo { name: "IN_KW" },
WHERE_KW => &SyntaxInfo { name: "WHERE_KW" },
FOR_KW => &SyntaxInfo { name: "FOR_KW" },
LOOP_KW => &SyntaxInfo { name: "LOOP_KW" },
WHILE_KW => &SyntaxInfo { name: "WHILE_KW" },
IF_KW => &SyntaxInfo { name: "IF_KW" },
MATCH_KW => &SyntaxInfo { name: "MATCH_KW" },
CONST_KW => &SyntaxInfo { name: "CONST_KW" },
STATIC_KW => &SyntaxInfo { name: "STATIC_KW" },
MUT_KW => &SyntaxInfo { name: "MUT_KW" },
UNSAFE_KW => &SyntaxInfo { name: "UNSAFE_KW" },
ERROR => &SyntaxInfo { name: "ERROR" }, ERROR => &SyntaxInfo { name: "ERROR" },
IDENT => &SyntaxInfo { name: "IDENT" }, IDENT => &SyntaxInfo { name: "IDENT" },
UNDERSCORE => &SyntaxInfo { name: "UNDERSCORE" }, UNDERSCORE => &SyntaxInfo { name: "UNDERSCORE" },
@ -200,6 +177,35 @@ impl SyntaxKind {
COMMENT => &SyntaxInfo { name: "COMMENT" }, COMMENT => &SyntaxInfo { name: "COMMENT" },
DOC_COMMENT => &SyntaxInfo { name: "DOC_COMMENT" }, DOC_COMMENT => &SyntaxInfo { name: "DOC_COMMENT" },
SHEBANG => &SyntaxInfo { name: "SHEBANG" }, SHEBANG => &SyntaxInfo { name: "SHEBANG" },
USE_KW => &SyntaxInfo { name: "USE_KW" },
FN_KW => &SyntaxInfo { name: "FN_KW" },
STRUCT_KW => &SyntaxInfo { name: "STRUCT_KW" },
ENUM_KW => &SyntaxInfo { name: "ENUM_KW" },
TRAIT_KW => &SyntaxInfo { name: "TRAIT_KW" },
IMPL_KW => &SyntaxInfo { name: "IMPL_KW" },
TRUE_KW => &SyntaxInfo { name: "TRUE_KW" },
FALSE_KW => &SyntaxInfo { name: "FALSE_KW" },
AS_KW => &SyntaxInfo { name: "AS_KW" },
EXTERN_KW => &SyntaxInfo { name: "EXTERN_KW" },
CRATE_KW => &SyntaxInfo { name: "CRATE_KW" },
MOD_KW => &SyntaxInfo { name: "MOD_KW" },
PUB_KW => &SyntaxInfo { name: "PUB_KW" },
SELF_KW => &SyntaxInfo { name: "SELF_KW" },
SUPER_KW => &SyntaxInfo { name: "SUPER_KW" },
IN_KW => &SyntaxInfo { name: "IN_KW" },
WHERE_KW => &SyntaxInfo { name: "WHERE_KW" },
FOR_KW => &SyntaxInfo { name: "FOR_KW" },
LOOP_KW => &SyntaxInfo { name: "LOOP_KW" },
WHILE_KW => &SyntaxInfo { name: "WHILE_KW" },
IF_KW => &SyntaxInfo { name: "IF_KW" },
MATCH_KW => &SyntaxInfo { name: "MATCH_KW" },
CONST_KW => &SyntaxInfo { name: "CONST_KW" },
STATIC_KW => &SyntaxInfo { name: "STATIC_KW" },
MUT_KW => &SyntaxInfo { name: "MUT_KW" },
UNSAFE_KW => &SyntaxInfo { name: "UNSAFE_KW" },
AUTO_KW => &SyntaxInfo { name: "AUTO_KW" },
DEFAULT_KW => &SyntaxInfo { name: "DEFAULT_KW" },
UNION_KW => &SyntaxInfo { name: "UNION_KW" },
FILE => &SyntaxInfo { name: "FILE" }, FILE => &SyntaxInfo { name: "FILE" },
STRUCT_ITEM => &SyntaxInfo { name: "STRUCT_ITEM" }, STRUCT_ITEM => &SyntaxInfo { name: "STRUCT_ITEM" },
ENUM_ITEM => &SyntaxInfo { name: "ENUM_ITEM" }, ENUM_ITEM => &SyntaxInfo { name: "ENUM_ITEM" },

View file

@ -0,0 +1 @@
unsafe auto trait T {}

View file

@ -0,0 +1,13 @@
FILE@[0; 23)
TRAIT_ITEM@[0; 23)
UNSAFE_KW@[0; 6)
WHITESPACE@[6; 7)
AUTO_KW@[7; 11)
WHITESPACE@[11; 12)
TRAIT_KW@[12; 17)
WHITESPACE@[17; 18)
IDENT@[18; 19) "T"
WHITESPACE@[19; 20)
L_CURLY@[20; 21)
R_CURLY@[21; 22)
WHITESPACE@[22; 23)

View file

@ -0,0 +1 @@
unsafe default impl Foo {}

View file

@ -0,0 +1,13 @@
FILE@[0; 27)
IMPL_ITEM@[0; 27)
UNSAFE_KW@[0; 6)
WHITESPACE@[6; 7)
DEFAULT_KW@[7; 14)
WHITESPACE@[14; 15)
IMPL_KW@[15; 19)
WHITESPACE@[19; 20)
IDENT@[20; 23) "Foo"
WHITESPACE@[23; 24)
L_CURLY@[24; 25)
R_CURLY@[25; 26)
WHITESPACE@[26; 27)

View file

@ -20,6 +20,7 @@ fn main() {
#[derive(Deserialize)] #[derive(Deserialize)]
struct Grammar { struct Grammar {
keywords: Vec<String>, keywords: Vec<String>,
contextual_keywords: Vec<String>,
tokens: Vec<String>, tokens: Vec<String>,
nodes: Vec<String>, nodes: Vec<String>,
} }
@ -38,10 +39,9 @@ impl Grammar {
acc.push_str("use tree::SyntaxInfo;\n"); acc.push_str("use tree::SyntaxInfo;\n");
acc.push_str("\n"); acc.push_str("\n");
let syntax_kinds: Vec<String> = self.keywords let syntax_kinds: Vec<String> =self.tokens.iter().cloned()
.iter() .chain(self.keywords.iter().map(|kw| kw_token(kw)))
.map(|kw| kw_token(kw)) .chain(self.contextual_keywords.iter().map(|kw| kw_token(kw)))
.chain(self.tokens.iter().cloned())
.chain(self.nodes.iter().cloned()) .chain(self.nodes.iter().cloned())
.collect(); .collect();
@ -86,6 +86,7 @@ impl Grammar {
// fn ident_to_keyword // fn ident_to_keyword
acc.push_str("pub(crate) fn ident_to_keyword(ident: &str) -> Option<SyntaxKind> {\n"); acc.push_str("pub(crate) fn ident_to_keyword(ident: &str) -> Option<SyntaxKind> {\n");
acc.push_str(" match ident {\n"); acc.push_str(" match ident {\n");
// NB: no contextual_keywords here!
for kw in self.keywords.iter() { for kw in self.keywords.iter() {
write!(acc, " {:?} => Some({}),\n", kw, kw_token(kw)).unwrap(); write!(acc, " {:?} => Some({}),\n", kw, kw_token(kw)).unwrap();
} }