Switch parser to use tokens

This commit is contained in:
Aleksey Kladov 2021-11-14 22:13:44 +03:00
parent d5ad0f3ca0
commit 26bfd6023f
6 changed files with 47 additions and 53 deletions

View file

@ -296,10 +296,7 @@ fn lhs(p: &mut Parser, r: Restrictions) -> Option<(CompletedMarker, BlockLike)>
T![&] => { T![&] => {
m = p.start(); m = p.start();
p.bump(T![&]); p.bump(T![&]);
if p.at(IDENT) if p.at_contextual_kw(T![raw]) && (p.nth_at(1, T![mut]) || p.nth_at(1, T![const])) {
&& p.at_contextual_kw("raw")
&& (p.nth_at(1, T![mut]) || p.nth_at(1, T![const]))
{
p.bump_remap(T![raw]); p.bump_remap(T![raw]);
p.bump_any(); p.bump_any();
} else { } else {

View file

@ -122,14 +122,14 @@ pub(super) fn opt_item(p: &mut Parser, m: Marker) -> Result<(), Marker> {
has_mods = true; has_mods = true;
abi(p); abi(p);
} }
if p.at(IDENT) && p.at_contextual_kw("auto") && p.nth(1) == T![trait] { if p.at_contextual_kw(T![auto]) && p.nth(1) == T![trait] {
p.bump_remap(T![auto]); p.bump_remap(T![auto]);
has_mods = true; has_mods = true;
} }
// test default_item // test default_item
// default impl T for Foo {} // default impl T for Foo {}
if p.at(IDENT) && p.at_contextual_kw("default") { if p.at_contextual_kw(T![default]) {
match p.nth(1) { match p.nth(1) {
T![fn] | T![type] | T![const] | T![impl] => { T![fn] | T![type] | T![const] | T![impl] => {
p.bump_remap(T![default]); p.bump_remap(T![default]);
@ -176,7 +176,7 @@ pub(super) fn opt_item(p: &mut Parser, m: Marker) -> Result<(), Marker> {
// test existential_type // test existential_type
// existential type Foo: Fn() -> usize; // existential type Foo: Fn() -> usize;
if p.at(IDENT) && p.at_contextual_kw("existential") && p.nth(1) == T![type] { if p.at_contextual_kw(T![existential]) && p.nth(1) == T![type] {
p.bump_remap(T![existential]); p.bump_remap(T![existential]);
has_mods = true; has_mods = true;
} }
@ -224,10 +224,10 @@ fn opt_item_without_modifiers(p: &mut Parser, m: Marker) -> Result<(), Marker> {
T![type] => type_alias(p, m), T![type] => type_alias(p, m),
T![struct] => adt::strukt(p, m), T![struct] => adt::strukt(p, m),
T![enum] => adt::enum_(p, m), T![enum] => adt::enum_(p, m),
IDENT if p.at_contextual_kw("union") && p.nth(1) == IDENT => adt::union(p, m), IDENT if p.at_contextual_kw(T![union]) && p.nth(1) == IDENT => adt::union(p, m),
T![macro] => macro_def(p, m), T![macro] => macro_def(p, m),
IDENT if p.at_contextual_kw("macro_rules") && p.nth(1) == BANG => macro_rules(p, m), IDENT if p.at_contextual_kw(T![macro_rules]) && p.nth(1) == BANG => macro_rules(p, m),
T![const] if (la == IDENT || la == T![_] || la == T![mut]) => consts::konst(p, m), T![const] if (la == IDENT || la == T![_] || la == T![mut]) => consts::konst(p, m),
T![static] => consts::static_(p, m), T![static] => consts::static_(p, m),
@ -319,7 +319,7 @@ pub(crate) fn extern_item_list(p: &mut Parser) {
} }
fn macro_rules(p: &mut Parser, m: Marker) { fn macro_rules(p: &mut Parser, m: Marker) {
assert!(p.at_contextual_kw("macro_rules")); assert!(p.at_contextual_kw(T![macro_rules]));
p.bump_remap(T![macro_rules]); p.bump_remap(T![macro_rules]);
p.expect(T![!]); p.expect(T![!]);

View file

@ -10,7 +10,7 @@ pub(super) fn strukt(p: &mut Parser, m: Marker) {
// test union_item // test union_item
// struct U { i: i32, f: f32 } // struct U { i: i32, f: f32 }
pub(super) fn union(p: &mut Parser, m: Marker) { pub(super) fn union(p: &mut Parser, m: Marker) {
assert!(p.at_contextual_kw("union")); assert!(p.at_contextual_kw(T![union]));
p.bump_remap(T![union]); p.bump_remap(T![union]);
struct_or_union(p, m, false); struct_or_union(p, m, false);
} }

View file

@ -26,6 +26,8 @@ pub(crate) use token_set::TokenSet;
pub use syntax_kind::SyntaxKind; pub use syntax_kind::SyntaxKind;
use crate::tokens::Tokens;
#[derive(Debug, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ParseError(pub Box<String>); pub struct ParseError(pub Box<String>);
@ -53,6 +55,7 @@ pub struct Token {
/// Is the current token joined to the next one (`> >` vs `>>`). /// Is the current token joined to the next one (`> >` vs `>>`).
pub is_jointed_to_next: bool, pub is_jointed_to_next: bool,
pub contextual_kw: SyntaxKind,
} }
/// `TreeSink` abstracts details of a particular syntax tree implementation. /// `TreeSink` abstracts details of a particular syntax tree implementation.
@ -93,15 +96,11 @@ pub enum ParserEntryPoint {
} }
/// Parse given tokens into the given sink as a rust file. /// Parse given tokens into the given sink as a rust file.
pub fn parse_source_file(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) { pub fn parse_source_file(tokens: &Tokens, tree_sink: &mut dyn TreeSink) {
parse(token_source, tree_sink, ParserEntryPoint::SourceFile); parse(tokens, tree_sink, ParserEntryPoint::SourceFile);
} }
pub fn parse( pub fn parse(tokens: &Tokens, tree_sink: &mut dyn TreeSink, entry_point: ParserEntryPoint) {
token_source: &mut dyn TokenSource,
tree_sink: &mut dyn TreeSink,
entry_point: ParserEntryPoint,
) {
let entry_point: fn(&'_ mut parser::Parser) = match entry_point { let entry_point: fn(&'_ mut parser::Parser) = match entry_point {
ParserEntryPoint::SourceFile => grammar::entry_points::source_file, ParserEntryPoint::SourceFile => grammar::entry_points::source_file,
ParserEntryPoint::Path => grammar::entry_points::path, ParserEntryPoint::Path => grammar::entry_points::path,
@ -119,7 +118,7 @@ pub fn parse(
ParserEntryPoint::Attr => grammar::entry_points::attr, ParserEntryPoint::Attr => grammar::entry_points::attr,
}; };
let mut p = parser::Parser::new(token_source); let mut p = parser::Parser::new(tokens);
entry_point(&mut p); entry_point(&mut p);
let events = p.finish(); let events = p.finish();
event::process(tree_sink, events); event::process(tree_sink, events);
@ -142,9 +141,9 @@ impl Reparser {
/// ///
/// Tokens must start with `{`, end with `}` and form a valid brace /// Tokens must start with `{`, end with `}` and form a valid brace
/// sequence. /// sequence.
pub fn parse(self, token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) { pub fn parse(self, tokens: &Tokens, tree_sink: &mut dyn TreeSink) {
let Reparser(r) = self; let Reparser(r) = self;
let mut p = parser::Parser::new(token_source); let mut p = parser::Parser::new(tokens);
r(&mut p); r(&mut p);
let events = p.finish(); let events = p.finish();
event::process(tree_sink, events); event::process(tree_sink, events);

View file

@ -7,9 +7,10 @@ use limit::Limit;
use crate::{ use crate::{
event::Event, event::Event,
tokens::Tokens,
ParseError, ParseError,
SyntaxKind::{self, EOF, ERROR, TOMBSTONE}, SyntaxKind::{self, EOF, ERROR, TOMBSTONE},
TokenSet, TokenSource, T, TokenSet, T,
}; };
/// `Parser` struct provides the low-level API for /// `Parser` struct provides the low-level API for
@ -22,7 +23,8 @@ use crate::{
/// "start expression, consume number literal, /// "start expression, consume number literal,
/// finish expression". See `Event` docs for more. /// finish expression". See `Event` docs for more.
pub(crate) struct Parser<'t> { pub(crate) struct Parser<'t> {
token_source: &'t mut dyn TokenSource, tokens: &'t Tokens,
pos: usize,
events: Vec<Event>, events: Vec<Event>,
steps: Cell<u32>, steps: Cell<u32>,
} }
@ -30,8 +32,8 @@ pub(crate) struct Parser<'t> {
static PARSER_STEP_LIMIT: Limit = Limit::new(15_000_000); static PARSER_STEP_LIMIT: Limit = Limit::new(15_000_000);
impl<'t> Parser<'t> { impl<'t> Parser<'t> {
pub(super) fn new(token_source: &'t mut dyn TokenSource) -> Parser<'t> { pub(super) fn new(tokens: &'t Tokens) -> Parser<'t> {
Parser { token_source, events: Vec::new(), steps: Cell::new(0) } Parser { tokens, pos: 0, events: Vec::new(), steps: Cell::new(0) }
} }
pub(crate) fn finish(self) -> Vec<Event> { pub(crate) fn finish(self) -> Vec<Event> {
@ -54,7 +56,7 @@ impl<'t> Parser<'t> {
assert!(PARSER_STEP_LIMIT.check(steps as usize).is_ok(), "the parser seems stuck"); assert!(PARSER_STEP_LIMIT.check(steps as usize).is_ok(), "the parser seems stuck");
self.steps.set(steps + 1); self.steps.set(steps + 1);
self.token_source.lookahead_nth(n).kind self.tokens.get(self.pos + n).kind
} }
/// Checks if the current token is `kind`. /// Checks if the current token is `kind`.
@ -90,7 +92,7 @@ impl<'t> Parser<'t> {
T![<<=] => self.at_composite3(n, T![<], T![<], T![=]), T![<<=] => self.at_composite3(n, T![<], T![<], T![=]),
T![>>=] => self.at_composite3(n, T![>], T![>], T![=]), T![>>=] => self.at_composite3(n, T![>], T![>], T![=]),
_ => self.token_source.lookahead_nth(n).kind == kind, _ => self.tokens.get(self.pos + n).kind == kind,
} }
} }
@ -129,24 +131,24 @@ impl<'t> Parser<'t> {
} }
fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool { fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool {
let t1 = self.token_source.lookahead_nth(n); let t1 = self.tokens.get(self.pos + n);
if t1.kind != k1 || !t1.is_jointed_to_next { if t1.kind != k1 || !t1.is_jointed_to_next {
return false; return false;
} }
let t2 = self.token_source.lookahead_nth(n + 1); let t2 = self.tokens.get(self.pos + n + 1);
t2.kind == k2 t2.kind == k2
} }
fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool { fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool {
let t1 = self.token_source.lookahead_nth(n); let t1 = self.tokens.get(self.pos + n);
if t1.kind != k1 || !t1.is_jointed_to_next { if t1.kind != k1 || !t1.is_jointed_to_next {
return false; return false;
} }
let t2 = self.token_source.lookahead_nth(n + 1); let t2 = self.tokens.get(self.pos + n + 1);
if t2.kind != k2 || !t2.is_jointed_to_next { if t2.kind != k2 || !t2.is_jointed_to_next {
return false; return false;
} }
let t3 = self.token_source.lookahead_nth(n + 2); let t3 = self.tokens.get(self.pos + n + 2);
t3.kind == k3 t3.kind == k3
} }
@ -156,8 +158,8 @@ impl<'t> Parser<'t> {
} }
/// Checks if the current token is contextual keyword with text `t`. /// Checks if the current token is contextual keyword with text `t`.
pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool { pub(crate) fn at_contextual_kw(&self, kw: SyntaxKind) -> bool {
self.token_source.is_keyword(kw) self.tokens.get(self.pos).contextual_kw == kw
} }
/// Starts a new node in the syntax tree. All nodes and tokens /// Starts a new node in the syntax tree. All nodes and tokens
@ -243,10 +245,7 @@ impl<'t> Parser<'t> {
} }
fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
for _ in 0..n_raw_tokens { self.pos += n_raw_tokens as usize;
self.token_source.bump();
}
self.push_event(Event::Token { kind, n_raw_tokens }); self.push_event(Event::Token { kind, n_raw_tokens });
} }

View file

@ -1,9 +1,8 @@
use crate::SyntaxKind; use crate::{SyntaxKind, Token};
#[allow(non_camel_case_types)]
type bits = u64; type bits = u64;
pub type IdentKind = u8;
/// Main input to the parser. /// Main input to the parser.
/// ///
/// A sequence of tokens represented internally as a struct of arrays. /// A sequence of tokens represented internally as a struct of arrays.
@ -11,17 +10,17 @@ pub type IdentKind = u8;
pub struct Tokens { pub struct Tokens {
kind: Vec<SyntaxKind>, kind: Vec<SyntaxKind>,
joint: Vec<bits>, joint: Vec<bits>,
ident_kind: Vec<IdentKind>, contextual_kw: Vec<SyntaxKind>,
} }
impl Tokens { impl Tokens {
pub fn push(&mut self, was_joint: bool, kind: SyntaxKind) { pub fn push(&mut self, was_joint: bool, kind: SyntaxKind) {
self.push_impl(was_joint, kind, 0) self.push_impl(was_joint, kind, SyntaxKind::EOF)
} }
pub fn push_ident(&mut self, ident_kind: IdentKind) { pub fn push_ident(&mut self, contextual_kw: SyntaxKind) {
self.push_impl(false, SyntaxKind::IDENT, ident_kind) self.push_impl(false, SyntaxKind::IDENT, contextual_kw)
} }
fn push_impl(&mut self, was_joint: bool, kind: SyntaxKind, ctx: IdentKind) { fn push_impl(&mut self, was_joint: bool, kind: SyntaxKind, contextual_kw: SyntaxKind) {
let idx = self.len(); let idx = self.len();
if idx % (bits::BITS as usize) == 0 { if idx % (bits::BITS as usize) == 0 {
self.joint.push(0); self.joint.push(0);
@ -30,7 +29,7 @@ impl Tokens {
self.set_joint(idx - 1); self.set_joint(idx - 1);
} }
self.kind.push(kind); self.kind.push(kind);
self.ident_kind.push(ctx); self.contextual_kw.push(contextual_kw);
} }
fn set_joint(&mut self, n: usize) { fn set_joint(&mut self, n: usize) {
let (idx, b_idx) = self.bit_index(n); let (idx, b_idx) = self.bit_index(n);
@ -49,18 +48,18 @@ impl Tokens {
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
self.kind.len() self.kind.len()
} }
pub(crate) fn get(&self, idx: usize) -> (SyntaxKind, bool, IdentKind) { pub(crate) fn get(&self, idx: usize) -> Token {
if idx > self.len() { if idx > self.len() {
return self.eof(); return self.eof();
} }
let kind = self.kind[idx]; let kind = self.kind[idx];
let joint = self.get_joint(idx); let is_jointed_to_next = self.get_joint(idx);
let ident_kind = self.ident_kind[idx]; let contextual_kw = self.contextual_kw[idx];
(kind, joint, ident_kind) Token { kind, is_jointed_to_next, contextual_kw }
} }
#[cold] #[cold]
fn eof(&self) -> (SyntaxKind, bool, IdentKind) { fn eof(&self) -> Token {
(SyntaxKind::EOF, false, 0) Token { kind: SyntaxKind::EOF, is_jointed_to_next: false, contextual_kw: SyntaxKind::EOF }
} }
} }