Switch parser to use tokens

This commit is contained in:
Aleksey Kladov 2021-11-14 22:13:44 +03:00
parent d5ad0f3ca0
commit 26bfd6023f
6 changed files with 47 additions and 53 deletions

View file

@ -296,10 +296,7 @@ fn lhs(p: &mut Parser, r: Restrictions) -> Option<(CompletedMarker, BlockLike)>
T![&] => {
m = p.start();
p.bump(T![&]);
if p.at(IDENT)
&& p.at_contextual_kw("raw")
&& (p.nth_at(1, T![mut]) || p.nth_at(1, T![const]))
{
if p.at_contextual_kw(T![raw]) && (p.nth_at(1, T![mut]) || p.nth_at(1, T![const])) {
p.bump_remap(T![raw]);
p.bump_any();
} else {

View file

@ -122,14 +122,14 @@ pub(super) fn opt_item(p: &mut Parser, m: Marker) -> Result<(), Marker> {
has_mods = true;
abi(p);
}
if p.at(IDENT) && p.at_contextual_kw("auto") && p.nth(1) == T![trait] {
if p.at_contextual_kw(T![auto]) && p.nth(1) == T![trait] {
p.bump_remap(T![auto]);
has_mods = true;
}
// test default_item
// default impl T for Foo {}
if p.at(IDENT) && p.at_contextual_kw("default") {
if p.at_contextual_kw(T![default]) {
match p.nth(1) {
T![fn] | T![type] | T![const] | T![impl] => {
p.bump_remap(T![default]);
@ -176,7 +176,7 @@ pub(super) fn opt_item(p: &mut Parser, m: Marker) -> Result<(), Marker> {
// test existential_type
// existential type Foo: Fn() -> usize;
if p.at(IDENT) && p.at_contextual_kw("existential") && p.nth(1) == T![type] {
if p.at_contextual_kw(T![existential]) && p.nth(1) == T![type] {
p.bump_remap(T![existential]);
has_mods = true;
}
@ -224,10 +224,10 @@ fn opt_item_without_modifiers(p: &mut Parser, m: Marker) -> Result<(), Marker> {
T![type] => type_alias(p, m),
T![struct] => adt::strukt(p, m),
T![enum] => adt::enum_(p, m),
IDENT if p.at_contextual_kw("union") && p.nth(1) == IDENT => adt::union(p, m),
IDENT if p.at_contextual_kw(T![union]) && p.nth(1) == IDENT => adt::union(p, m),
T![macro] => macro_def(p, m),
IDENT if p.at_contextual_kw("macro_rules") && p.nth(1) == BANG => macro_rules(p, m),
IDENT if p.at_contextual_kw(T![macro_rules]) && p.nth(1) == BANG => macro_rules(p, m),
T![const] if (la == IDENT || la == T![_] || la == T![mut]) => consts::konst(p, m),
T![static] => consts::static_(p, m),
@ -319,7 +319,7 @@ pub(crate) fn extern_item_list(p: &mut Parser) {
}
fn macro_rules(p: &mut Parser, m: Marker) {
assert!(p.at_contextual_kw("macro_rules"));
assert!(p.at_contextual_kw(T![macro_rules]));
p.bump_remap(T![macro_rules]);
p.expect(T![!]);

View file

@ -10,7 +10,7 @@ pub(super) fn strukt(p: &mut Parser, m: Marker) {
// test union_item
// struct U { i: i32, f: f32 }
pub(super) fn union(p: &mut Parser, m: Marker) {
assert!(p.at_contextual_kw("union"));
assert!(p.at_contextual_kw(T![union]));
p.bump_remap(T![union]);
struct_or_union(p, m, false);
}

View file

@ -26,6 +26,8 @@ pub(crate) use token_set::TokenSet;
pub use syntax_kind::SyntaxKind;
use crate::tokens::Tokens;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ParseError(pub Box<String>);
@ -53,6 +55,7 @@ pub struct Token {
/// Is the current token joined to the next one (`> >` vs `>>`).
pub is_jointed_to_next: bool,
pub contextual_kw: SyntaxKind,
}
/// `TreeSink` abstracts details of a particular syntax tree implementation.
@ -93,15 +96,11 @@ pub enum ParserEntryPoint {
}
/// Parse given tokens into the given sink as a rust file.
pub fn parse_source_file(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) {
parse(token_source, tree_sink, ParserEntryPoint::SourceFile);
pub fn parse_source_file(tokens: &Tokens, tree_sink: &mut dyn TreeSink) {
parse(tokens, tree_sink, ParserEntryPoint::SourceFile);
}
pub fn parse(
token_source: &mut dyn TokenSource,
tree_sink: &mut dyn TreeSink,
entry_point: ParserEntryPoint,
) {
pub fn parse(tokens: &Tokens, tree_sink: &mut dyn TreeSink, entry_point: ParserEntryPoint) {
let entry_point: fn(&'_ mut parser::Parser) = match entry_point {
ParserEntryPoint::SourceFile => grammar::entry_points::source_file,
ParserEntryPoint::Path => grammar::entry_points::path,
@ -119,7 +118,7 @@ pub fn parse(
ParserEntryPoint::Attr => grammar::entry_points::attr,
};
let mut p = parser::Parser::new(token_source);
let mut p = parser::Parser::new(tokens);
entry_point(&mut p);
let events = p.finish();
event::process(tree_sink, events);
@ -142,9 +141,9 @@ impl Reparser {
///
/// Tokens must start with `{`, end with `}` and form a valid brace
/// sequence.
pub fn parse(self, token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) {
pub fn parse(self, tokens: &Tokens, tree_sink: &mut dyn TreeSink) {
let Reparser(r) = self;
let mut p = parser::Parser::new(token_source);
let mut p = parser::Parser::new(tokens);
r(&mut p);
let events = p.finish();
event::process(tree_sink, events);

View file

@ -7,9 +7,10 @@ use limit::Limit;
use crate::{
event::Event,
tokens::Tokens,
ParseError,
SyntaxKind::{self, EOF, ERROR, TOMBSTONE},
TokenSet, TokenSource, T,
TokenSet, T,
};
/// `Parser` struct provides the low-level API for
@ -22,7 +23,8 @@ use crate::{
/// "start expression, consume number literal,
/// finish expression". See `Event` docs for more.
pub(crate) struct Parser<'t> {
token_source: &'t mut dyn TokenSource,
tokens: &'t Tokens,
pos: usize,
events: Vec<Event>,
steps: Cell<u32>,
}
@ -30,8 +32,8 @@ pub(crate) struct Parser<'t> {
static PARSER_STEP_LIMIT: Limit = Limit::new(15_000_000);
impl<'t> Parser<'t> {
pub(super) fn new(token_source: &'t mut dyn TokenSource) -> Parser<'t> {
Parser { token_source, events: Vec::new(), steps: Cell::new(0) }
pub(super) fn new(tokens: &'t Tokens) -> Parser<'t> {
Parser { tokens, pos: 0, events: Vec::new(), steps: Cell::new(0) }
}
pub(crate) fn finish(self) -> Vec<Event> {
@ -54,7 +56,7 @@ impl<'t> Parser<'t> {
assert!(PARSER_STEP_LIMIT.check(steps as usize).is_ok(), "the parser seems stuck");
self.steps.set(steps + 1);
self.token_source.lookahead_nth(n).kind
self.tokens.get(self.pos + n).kind
}
/// Checks if the current token is `kind`.
@ -90,7 +92,7 @@ impl<'t> Parser<'t> {
T![<<=] => self.at_composite3(n, T![<], T![<], T![=]),
T![>>=] => self.at_composite3(n, T![>], T![>], T![=]),
_ => self.token_source.lookahead_nth(n).kind == kind,
_ => self.tokens.get(self.pos + n).kind == kind,
}
}
@ -129,24 +131,24 @@ impl<'t> Parser<'t> {
}
fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool {
let t1 = self.token_source.lookahead_nth(n);
let t1 = self.tokens.get(self.pos + n);
if t1.kind != k1 || !t1.is_jointed_to_next {
return false;
}
let t2 = self.token_source.lookahead_nth(n + 1);
let t2 = self.tokens.get(self.pos + n + 1);
t2.kind == k2
}
fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool {
let t1 = self.token_source.lookahead_nth(n);
let t1 = self.tokens.get(self.pos + n);
if t1.kind != k1 || !t1.is_jointed_to_next {
return false;
}
let t2 = self.token_source.lookahead_nth(n + 1);
let t2 = self.tokens.get(self.pos + n + 1);
if t2.kind != k2 || !t2.is_jointed_to_next {
return false;
}
let t3 = self.token_source.lookahead_nth(n + 2);
let t3 = self.tokens.get(self.pos + n + 2);
t3.kind == k3
}
@ -156,8 +158,8 @@ impl<'t> Parser<'t> {
}
/// Checks if the current token is contextual keyword with text `t`.
pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool {
self.token_source.is_keyword(kw)
pub(crate) fn at_contextual_kw(&self, kw: SyntaxKind) -> bool {
self.tokens.get(self.pos).contextual_kw == kw
}
/// Starts a new node in the syntax tree. All nodes and tokens
@ -243,10 +245,7 @@ impl<'t> Parser<'t> {
}
fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
for _ in 0..n_raw_tokens {
self.token_source.bump();
}
self.pos += n_raw_tokens as usize;
self.push_event(Event::Token { kind, n_raw_tokens });
}

View file

@ -1,9 +1,8 @@
use crate::SyntaxKind;
use crate::{SyntaxKind, Token};
#[allow(non_camel_case_types)]
type bits = u64;
pub type IdentKind = u8;
/// Main input to the parser.
///
/// A sequence of tokens represented internally as a struct of arrays.
@ -11,17 +10,17 @@ pub type IdentKind = u8;
pub struct Tokens {
kind: Vec<SyntaxKind>,
joint: Vec<bits>,
ident_kind: Vec<IdentKind>,
contextual_kw: Vec<SyntaxKind>,
}
impl Tokens {
pub fn push(&mut self, was_joint: bool, kind: SyntaxKind) {
self.push_impl(was_joint, kind, 0)
self.push_impl(was_joint, kind, SyntaxKind::EOF)
}
pub fn push_ident(&mut self, ident_kind: IdentKind) {
self.push_impl(false, SyntaxKind::IDENT, ident_kind)
pub fn push_ident(&mut self, contextual_kw: SyntaxKind) {
self.push_impl(false, SyntaxKind::IDENT, contextual_kw)
}
fn push_impl(&mut self, was_joint: bool, kind: SyntaxKind, ctx: IdentKind) {
fn push_impl(&mut self, was_joint: bool, kind: SyntaxKind, contextual_kw: SyntaxKind) {
let idx = self.len();
if idx % (bits::BITS as usize) == 0 {
self.joint.push(0);
@ -30,7 +29,7 @@ impl Tokens {
self.set_joint(idx - 1);
}
self.kind.push(kind);
self.ident_kind.push(ctx);
self.contextual_kw.push(contextual_kw);
}
fn set_joint(&mut self, n: usize) {
let (idx, b_idx) = self.bit_index(n);
@ -49,18 +48,18 @@ impl Tokens {
pub fn len(&self) -> usize {
self.kind.len()
}
pub(crate) fn get(&self, idx: usize) -> (SyntaxKind, bool, IdentKind) {
pub(crate) fn get(&self, idx: usize) -> Token {
if idx > self.len() {
return self.eof();
}
let kind = self.kind[idx];
let joint = self.get_joint(idx);
let ident_kind = self.ident_kind[idx];
(kind, joint, ident_kind)
let is_jointed_to_next = self.get_joint(idx);
let contextual_kw = self.contextual_kw[idx];
Token { kind, is_jointed_to_next, contextual_kw }
}
#[cold]
fn eof(&self) -> (SyntaxKind, bool, IdentKind) {
(SyntaxKind::EOF, false, 0)
fn eof(&self) -> Token {
Token { kind: SyntaxKind::EOF, is_jointed_to_next: false, contextual_kw: SyntaxKind::EOF }
}
}