mirror of
https://github.com/rust-lang/rust-analyzer
synced 2025-01-14 14:13:58 +00:00
Switch parser to use tokens
This commit is contained in:
parent
d5ad0f3ca0
commit
26bfd6023f
6 changed files with 47 additions and 53 deletions
|
@ -296,10 +296,7 @@ fn lhs(p: &mut Parser, r: Restrictions) -> Option<(CompletedMarker, BlockLike)>
|
||||||
T![&] => {
|
T![&] => {
|
||||||
m = p.start();
|
m = p.start();
|
||||||
p.bump(T![&]);
|
p.bump(T![&]);
|
||||||
if p.at(IDENT)
|
if p.at_contextual_kw(T![raw]) && (p.nth_at(1, T![mut]) || p.nth_at(1, T![const])) {
|
||||||
&& p.at_contextual_kw("raw")
|
|
||||||
&& (p.nth_at(1, T![mut]) || p.nth_at(1, T![const]))
|
|
||||||
{
|
|
||||||
p.bump_remap(T![raw]);
|
p.bump_remap(T![raw]);
|
||||||
p.bump_any();
|
p.bump_any();
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -122,14 +122,14 @@ pub(super) fn opt_item(p: &mut Parser, m: Marker) -> Result<(), Marker> {
|
||||||
has_mods = true;
|
has_mods = true;
|
||||||
abi(p);
|
abi(p);
|
||||||
}
|
}
|
||||||
if p.at(IDENT) && p.at_contextual_kw("auto") && p.nth(1) == T![trait] {
|
if p.at_contextual_kw(T![auto]) && p.nth(1) == T![trait] {
|
||||||
p.bump_remap(T![auto]);
|
p.bump_remap(T![auto]);
|
||||||
has_mods = true;
|
has_mods = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// test default_item
|
// test default_item
|
||||||
// default impl T for Foo {}
|
// default impl T for Foo {}
|
||||||
if p.at(IDENT) && p.at_contextual_kw("default") {
|
if p.at_contextual_kw(T![default]) {
|
||||||
match p.nth(1) {
|
match p.nth(1) {
|
||||||
T![fn] | T![type] | T![const] | T![impl] => {
|
T![fn] | T![type] | T![const] | T![impl] => {
|
||||||
p.bump_remap(T![default]);
|
p.bump_remap(T![default]);
|
||||||
|
@ -176,7 +176,7 @@ pub(super) fn opt_item(p: &mut Parser, m: Marker) -> Result<(), Marker> {
|
||||||
|
|
||||||
// test existential_type
|
// test existential_type
|
||||||
// existential type Foo: Fn() -> usize;
|
// existential type Foo: Fn() -> usize;
|
||||||
if p.at(IDENT) && p.at_contextual_kw("existential") && p.nth(1) == T![type] {
|
if p.at_contextual_kw(T![existential]) && p.nth(1) == T![type] {
|
||||||
p.bump_remap(T![existential]);
|
p.bump_remap(T![existential]);
|
||||||
has_mods = true;
|
has_mods = true;
|
||||||
}
|
}
|
||||||
|
@ -224,10 +224,10 @@ fn opt_item_without_modifiers(p: &mut Parser, m: Marker) -> Result<(), Marker> {
|
||||||
T![type] => type_alias(p, m),
|
T![type] => type_alias(p, m),
|
||||||
T![struct] => adt::strukt(p, m),
|
T![struct] => adt::strukt(p, m),
|
||||||
T![enum] => adt::enum_(p, m),
|
T![enum] => adt::enum_(p, m),
|
||||||
IDENT if p.at_contextual_kw("union") && p.nth(1) == IDENT => adt::union(p, m),
|
IDENT if p.at_contextual_kw(T![union]) && p.nth(1) == IDENT => adt::union(p, m),
|
||||||
|
|
||||||
T![macro] => macro_def(p, m),
|
T![macro] => macro_def(p, m),
|
||||||
IDENT if p.at_contextual_kw("macro_rules") && p.nth(1) == BANG => macro_rules(p, m),
|
IDENT if p.at_contextual_kw(T![macro_rules]) && p.nth(1) == BANG => macro_rules(p, m),
|
||||||
|
|
||||||
T![const] if (la == IDENT || la == T![_] || la == T![mut]) => consts::konst(p, m),
|
T![const] if (la == IDENT || la == T![_] || la == T![mut]) => consts::konst(p, m),
|
||||||
T![static] => consts::static_(p, m),
|
T![static] => consts::static_(p, m),
|
||||||
|
@ -319,7 +319,7 @@ pub(crate) fn extern_item_list(p: &mut Parser) {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn macro_rules(p: &mut Parser, m: Marker) {
|
fn macro_rules(p: &mut Parser, m: Marker) {
|
||||||
assert!(p.at_contextual_kw("macro_rules"));
|
assert!(p.at_contextual_kw(T![macro_rules]));
|
||||||
p.bump_remap(T![macro_rules]);
|
p.bump_remap(T![macro_rules]);
|
||||||
p.expect(T![!]);
|
p.expect(T![!]);
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ pub(super) fn strukt(p: &mut Parser, m: Marker) {
|
||||||
// test union_item
|
// test union_item
|
||||||
// struct U { i: i32, f: f32 }
|
// struct U { i: i32, f: f32 }
|
||||||
pub(super) fn union(p: &mut Parser, m: Marker) {
|
pub(super) fn union(p: &mut Parser, m: Marker) {
|
||||||
assert!(p.at_contextual_kw("union"));
|
assert!(p.at_contextual_kw(T![union]));
|
||||||
p.bump_remap(T![union]);
|
p.bump_remap(T![union]);
|
||||||
struct_or_union(p, m, false);
|
struct_or_union(p, m, false);
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,6 +26,8 @@ pub(crate) use token_set::TokenSet;
|
||||||
|
|
||||||
pub use syntax_kind::SyntaxKind;
|
pub use syntax_kind::SyntaxKind;
|
||||||
|
|
||||||
|
use crate::tokens::Tokens;
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
pub struct ParseError(pub Box<String>);
|
pub struct ParseError(pub Box<String>);
|
||||||
|
|
||||||
|
@ -53,6 +55,7 @@ pub struct Token {
|
||||||
|
|
||||||
/// Is the current token joined to the next one (`> >` vs `>>`).
|
/// Is the current token joined to the next one (`> >` vs `>>`).
|
||||||
pub is_jointed_to_next: bool,
|
pub is_jointed_to_next: bool,
|
||||||
|
pub contextual_kw: SyntaxKind,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// `TreeSink` abstracts details of a particular syntax tree implementation.
|
/// `TreeSink` abstracts details of a particular syntax tree implementation.
|
||||||
|
@ -93,15 +96,11 @@ pub enum ParserEntryPoint {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse given tokens into the given sink as a rust file.
|
/// Parse given tokens into the given sink as a rust file.
|
||||||
pub fn parse_source_file(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) {
|
pub fn parse_source_file(tokens: &Tokens, tree_sink: &mut dyn TreeSink) {
|
||||||
parse(token_source, tree_sink, ParserEntryPoint::SourceFile);
|
parse(tokens, tree_sink, ParserEntryPoint::SourceFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse(
|
pub fn parse(tokens: &Tokens, tree_sink: &mut dyn TreeSink, entry_point: ParserEntryPoint) {
|
||||||
token_source: &mut dyn TokenSource,
|
|
||||||
tree_sink: &mut dyn TreeSink,
|
|
||||||
entry_point: ParserEntryPoint,
|
|
||||||
) {
|
|
||||||
let entry_point: fn(&'_ mut parser::Parser) = match entry_point {
|
let entry_point: fn(&'_ mut parser::Parser) = match entry_point {
|
||||||
ParserEntryPoint::SourceFile => grammar::entry_points::source_file,
|
ParserEntryPoint::SourceFile => grammar::entry_points::source_file,
|
||||||
ParserEntryPoint::Path => grammar::entry_points::path,
|
ParserEntryPoint::Path => grammar::entry_points::path,
|
||||||
|
@ -119,7 +118,7 @@ pub fn parse(
|
||||||
ParserEntryPoint::Attr => grammar::entry_points::attr,
|
ParserEntryPoint::Attr => grammar::entry_points::attr,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut p = parser::Parser::new(token_source);
|
let mut p = parser::Parser::new(tokens);
|
||||||
entry_point(&mut p);
|
entry_point(&mut p);
|
||||||
let events = p.finish();
|
let events = p.finish();
|
||||||
event::process(tree_sink, events);
|
event::process(tree_sink, events);
|
||||||
|
@ -142,9 +141,9 @@ impl Reparser {
|
||||||
///
|
///
|
||||||
/// Tokens must start with `{`, end with `}` and form a valid brace
|
/// Tokens must start with `{`, end with `}` and form a valid brace
|
||||||
/// sequence.
|
/// sequence.
|
||||||
pub fn parse(self, token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) {
|
pub fn parse(self, tokens: &Tokens, tree_sink: &mut dyn TreeSink) {
|
||||||
let Reparser(r) = self;
|
let Reparser(r) = self;
|
||||||
let mut p = parser::Parser::new(token_source);
|
let mut p = parser::Parser::new(tokens);
|
||||||
r(&mut p);
|
r(&mut p);
|
||||||
let events = p.finish();
|
let events = p.finish();
|
||||||
event::process(tree_sink, events);
|
event::process(tree_sink, events);
|
||||||
|
|
|
@ -7,9 +7,10 @@ use limit::Limit;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
event::Event,
|
event::Event,
|
||||||
|
tokens::Tokens,
|
||||||
ParseError,
|
ParseError,
|
||||||
SyntaxKind::{self, EOF, ERROR, TOMBSTONE},
|
SyntaxKind::{self, EOF, ERROR, TOMBSTONE},
|
||||||
TokenSet, TokenSource, T,
|
TokenSet, T,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// `Parser` struct provides the low-level API for
|
/// `Parser` struct provides the low-level API for
|
||||||
|
@ -22,7 +23,8 @@ use crate::{
|
||||||
/// "start expression, consume number literal,
|
/// "start expression, consume number literal,
|
||||||
/// finish expression". See `Event` docs for more.
|
/// finish expression". See `Event` docs for more.
|
||||||
pub(crate) struct Parser<'t> {
|
pub(crate) struct Parser<'t> {
|
||||||
token_source: &'t mut dyn TokenSource,
|
tokens: &'t Tokens,
|
||||||
|
pos: usize,
|
||||||
events: Vec<Event>,
|
events: Vec<Event>,
|
||||||
steps: Cell<u32>,
|
steps: Cell<u32>,
|
||||||
}
|
}
|
||||||
|
@ -30,8 +32,8 @@ pub(crate) struct Parser<'t> {
|
||||||
static PARSER_STEP_LIMIT: Limit = Limit::new(15_000_000);
|
static PARSER_STEP_LIMIT: Limit = Limit::new(15_000_000);
|
||||||
|
|
||||||
impl<'t> Parser<'t> {
|
impl<'t> Parser<'t> {
|
||||||
pub(super) fn new(token_source: &'t mut dyn TokenSource) -> Parser<'t> {
|
pub(super) fn new(tokens: &'t Tokens) -> Parser<'t> {
|
||||||
Parser { token_source, events: Vec::new(), steps: Cell::new(0) }
|
Parser { tokens, pos: 0, events: Vec::new(), steps: Cell::new(0) }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn finish(self) -> Vec<Event> {
|
pub(crate) fn finish(self) -> Vec<Event> {
|
||||||
|
@ -54,7 +56,7 @@ impl<'t> Parser<'t> {
|
||||||
assert!(PARSER_STEP_LIMIT.check(steps as usize).is_ok(), "the parser seems stuck");
|
assert!(PARSER_STEP_LIMIT.check(steps as usize).is_ok(), "the parser seems stuck");
|
||||||
self.steps.set(steps + 1);
|
self.steps.set(steps + 1);
|
||||||
|
|
||||||
self.token_source.lookahead_nth(n).kind
|
self.tokens.get(self.pos + n).kind
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Checks if the current token is `kind`.
|
/// Checks if the current token is `kind`.
|
||||||
|
@ -90,7 +92,7 @@ impl<'t> Parser<'t> {
|
||||||
T![<<=] => self.at_composite3(n, T![<], T![<], T![=]),
|
T![<<=] => self.at_composite3(n, T![<], T![<], T![=]),
|
||||||
T![>>=] => self.at_composite3(n, T![>], T![>], T![=]),
|
T![>>=] => self.at_composite3(n, T![>], T![>], T![=]),
|
||||||
|
|
||||||
_ => self.token_source.lookahead_nth(n).kind == kind,
|
_ => self.tokens.get(self.pos + n).kind == kind,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -129,24 +131,24 @@ impl<'t> Parser<'t> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool {
|
fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool {
|
||||||
let t1 = self.token_source.lookahead_nth(n);
|
let t1 = self.tokens.get(self.pos + n);
|
||||||
if t1.kind != k1 || !t1.is_jointed_to_next {
|
if t1.kind != k1 || !t1.is_jointed_to_next {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
let t2 = self.token_source.lookahead_nth(n + 1);
|
let t2 = self.tokens.get(self.pos + n + 1);
|
||||||
t2.kind == k2
|
t2.kind == k2
|
||||||
}
|
}
|
||||||
|
|
||||||
fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool {
|
fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool {
|
||||||
let t1 = self.token_source.lookahead_nth(n);
|
let t1 = self.tokens.get(self.pos + n);
|
||||||
if t1.kind != k1 || !t1.is_jointed_to_next {
|
if t1.kind != k1 || !t1.is_jointed_to_next {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
let t2 = self.token_source.lookahead_nth(n + 1);
|
let t2 = self.tokens.get(self.pos + n + 1);
|
||||||
if t2.kind != k2 || !t2.is_jointed_to_next {
|
if t2.kind != k2 || !t2.is_jointed_to_next {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
let t3 = self.token_source.lookahead_nth(n + 2);
|
let t3 = self.tokens.get(self.pos + n + 2);
|
||||||
t3.kind == k3
|
t3.kind == k3
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -156,8 +158,8 @@ impl<'t> Parser<'t> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Checks if the current token is contextual keyword with text `t`.
|
/// Checks if the current token is contextual keyword with text `t`.
|
||||||
pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool {
|
pub(crate) fn at_contextual_kw(&self, kw: SyntaxKind) -> bool {
|
||||||
self.token_source.is_keyword(kw)
|
self.tokens.get(self.pos).contextual_kw == kw
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Starts a new node in the syntax tree. All nodes and tokens
|
/// Starts a new node in the syntax tree. All nodes and tokens
|
||||||
|
@ -243,10 +245,7 @@ impl<'t> Parser<'t> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
|
fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
|
||||||
for _ in 0..n_raw_tokens {
|
self.pos += n_raw_tokens as usize;
|
||||||
self.token_source.bump();
|
|
||||||
}
|
|
||||||
|
|
||||||
self.push_event(Event::Token { kind, n_raw_tokens });
|
self.push_event(Event::Token { kind, n_raw_tokens });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,8 @@
|
||||||
use crate::SyntaxKind;
|
use crate::{SyntaxKind, Token};
|
||||||
|
|
||||||
|
#[allow(non_camel_case_types)]
|
||||||
type bits = u64;
|
type bits = u64;
|
||||||
|
|
||||||
pub type IdentKind = u8;
|
|
||||||
|
|
||||||
/// Main input to the parser.
|
/// Main input to the parser.
|
||||||
///
|
///
|
||||||
/// A sequence of tokens represented internally as a struct of arrays.
|
/// A sequence of tokens represented internally as a struct of arrays.
|
||||||
|
@ -11,17 +10,17 @@ pub type IdentKind = u8;
|
||||||
pub struct Tokens {
|
pub struct Tokens {
|
||||||
kind: Vec<SyntaxKind>,
|
kind: Vec<SyntaxKind>,
|
||||||
joint: Vec<bits>,
|
joint: Vec<bits>,
|
||||||
ident_kind: Vec<IdentKind>,
|
contextual_kw: Vec<SyntaxKind>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Tokens {
|
impl Tokens {
|
||||||
pub fn push(&mut self, was_joint: bool, kind: SyntaxKind) {
|
pub fn push(&mut self, was_joint: bool, kind: SyntaxKind) {
|
||||||
self.push_impl(was_joint, kind, 0)
|
self.push_impl(was_joint, kind, SyntaxKind::EOF)
|
||||||
}
|
}
|
||||||
pub fn push_ident(&mut self, ident_kind: IdentKind) {
|
pub fn push_ident(&mut self, contextual_kw: SyntaxKind) {
|
||||||
self.push_impl(false, SyntaxKind::IDENT, ident_kind)
|
self.push_impl(false, SyntaxKind::IDENT, contextual_kw)
|
||||||
}
|
}
|
||||||
fn push_impl(&mut self, was_joint: bool, kind: SyntaxKind, ctx: IdentKind) {
|
fn push_impl(&mut self, was_joint: bool, kind: SyntaxKind, contextual_kw: SyntaxKind) {
|
||||||
let idx = self.len();
|
let idx = self.len();
|
||||||
if idx % (bits::BITS as usize) == 0 {
|
if idx % (bits::BITS as usize) == 0 {
|
||||||
self.joint.push(0);
|
self.joint.push(0);
|
||||||
|
@ -30,7 +29,7 @@ impl Tokens {
|
||||||
self.set_joint(idx - 1);
|
self.set_joint(idx - 1);
|
||||||
}
|
}
|
||||||
self.kind.push(kind);
|
self.kind.push(kind);
|
||||||
self.ident_kind.push(ctx);
|
self.contextual_kw.push(contextual_kw);
|
||||||
}
|
}
|
||||||
fn set_joint(&mut self, n: usize) {
|
fn set_joint(&mut self, n: usize) {
|
||||||
let (idx, b_idx) = self.bit_index(n);
|
let (idx, b_idx) = self.bit_index(n);
|
||||||
|
@ -49,18 +48,18 @@ impl Tokens {
|
||||||
pub fn len(&self) -> usize {
|
pub fn len(&self) -> usize {
|
||||||
self.kind.len()
|
self.kind.len()
|
||||||
}
|
}
|
||||||
pub(crate) fn get(&self, idx: usize) -> (SyntaxKind, bool, IdentKind) {
|
pub(crate) fn get(&self, idx: usize) -> Token {
|
||||||
if idx > self.len() {
|
if idx > self.len() {
|
||||||
return self.eof();
|
return self.eof();
|
||||||
}
|
}
|
||||||
let kind = self.kind[idx];
|
let kind = self.kind[idx];
|
||||||
let joint = self.get_joint(idx);
|
let is_jointed_to_next = self.get_joint(idx);
|
||||||
let ident_kind = self.ident_kind[idx];
|
let contextual_kw = self.contextual_kw[idx];
|
||||||
(kind, joint, ident_kind)
|
Token { kind, is_jointed_to_next, contextual_kw }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cold]
|
#[cold]
|
||||||
fn eof(&self) -> (SyntaxKind, bool, IdentKind) {
|
fn eof(&self) -> Token {
|
||||||
(SyntaxKind::EOF, false, 0)
|
Token { kind: SyntaxKind::EOF, is_jointed_to_next: false, contextual_kw: SyntaxKind::EOF }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue