mirror of
https://github.com/rust-lang/rust-analyzer
synced 2025-01-12 05:08:52 +00:00
NFA parser for mbe matcher
This commit is contained in:
parent
7f57a01b3d
commit
cff2201c30
9 changed files with 592 additions and 175 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -878,6 +878,7 @@ dependencies = [
|
|||
"profile",
|
||||
"rustc-hash",
|
||||
"smallvec",
|
||||
"stdx",
|
||||
"syntax",
|
||||
"test_utils",
|
||||
"tt",
|
||||
|
|
|
@ -18,6 +18,7 @@ syntax = { path = "../syntax", version = "0.0.0" }
|
|||
parser = { path = "../parser", version = "0.0.0" }
|
||||
tt = { path = "../tt", version = "0.0.0" }
|
||||
test_utils = { path = "../test_utils", version = "0.0.0" }
|
||||
stdx = { path = "../stdx", version = "0.0.0" }
|
||||
|
||||
[dev-dependencies]
|
||||
profile = { path = "../profile", version = "0.0.0" }
|
||||
|
|
|
@ -40,18 +40,12 @@ fn benchmark_expand_macro_rules() {
|
|||
.into_iter()
|
||||
.map(|(id, tt)| {
|
||||
let res = rules[&id].expand(&tt);
|
||||
if res.err.is_some() {
|
||||
// FIXME:
|
||||
// Currently `invocation_fixtures` will generate some correct invocations but
|
||||
// cannot be expanded by mbe. We ignore errors here.
|
||||
// See: https://github.com/rust-analyzer/rust-analyzer/issues/4777
|
||||
eprintln!("err from {} {:?}", id, res.err);
|
||||
}
|
||||
assert!(res.err.is_none());
|
||||
res.value.token_trees.len()
|
||||
})
|
||||
.sum()
|
||||
};
|
||||
assert_eq!(hash, 66995);
|
||||
assert_eq!(hash, 69413);
|
||||
}
|
||||
|
||||
fn macro_rules_fixtures() -> FxHashMap<String, MacroRules> {
|
||||
|
@ -77,7 +71,7 @@ fn macro_rules_fixtures_tt() -> FxHashMap<String, tt::Subtree> {
|
|||
.collect()
|
||||
}
|
||||
|
||||
// Generate random invocation fixtures from rules
|
||||
/// Generate random invocation fixtures from rules
|
||||
fn invocation_fixtures(rules: &FxHashMap<String, MacroRules>) -> Vec<(String, tt::Subtree)> {
|
||||
let mut seed = 123456789;
|
||||
let mut res = Vec::new();
|
||||
|
@ -86,11 +80,31 @@ fn invocation_fixtures(rules: &FxHashMap<String, MacroRules>) -> Vec<(String, tt
|
|||
for rule in &it.rules {
|
||||
// Generate twice
|
||||
for _ in 0..2 {
|
||||
// The input are generated by filling the `Op` randomly.
|
||||
// However, there are some cases generated are ambiguous for expanding, for example:
|
||||
// ```rust
|
||||
// macro_rules! m {
|
||||
// ($($t:ident),* as $ty:ident) => {}
|
||||
// }
|
||||
// m!(as u32); // error: local ambiguity: multiple parsing options: built-in NTs ident ('t') or 1 other option.
|
||||
// ```
|
||||
//
|
||||
// So we just skip any error cases and try again
|
||||
let mut try_cnt = 0;
|
||||
loop {
|
||||
let mut subtree = tt::Subtree::default();
|
||||
for op in rule.lhs.iter() {
|
||||
collect_from_op(op, &mut subtree, &mut seed);
|
||||
}
|
||||
if it.expand(&subtree).err.is_none() {
|
||||
res.push((name.clone(), subtree));
|
||||
break;
|
||||
}
|
||||
try_cnt += 1;
|
||||
if try_cnt > 100 {
|
||||
panic!("invocaton fixture {} cannot be generated.\n", name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
mod matcher;
|
||||
mod transcriber;
|
||||
|
||||
use rustc_hash::FxHashMap;
|
||||
use smallvec::SmallVec;
|
||||
use syntax::SmolStr;
|
||||
|
||||
use crate::{ExpandError, ExpandResult};
|
||||
|
@ -28,10 +28,10 @@ pub(crate) fn expand_rules(
|
|||
return ExpandResult::ok(value);
|
||||
}
|
||||
}
|
||||
// Use the rule if we matched more tokens, or had fewer errors
|
||||
// Use the rule if we matched more tokens, or bound variables count
|
||||
if let Some((prev_match, _)) = &match_ {
|
||||
if (new_match.unmatched_tts, new_match.err_count)
|
||||
< (prev_match.unmatched_tts, prev_match.err_count)
|
||||
if (new_match.unmatched_tts, -(new_match.bound_count as i32))
|
||||
< (prev_match.unmatched_tts, -(prev_match.bound_count as i32))
|
||||
{
|
||||
match_ = Some((new_match, rule));
|
||||
}
|
||||
|
@ -94,19 +94,19 @@ pub(crate) fn expand_rules(
|
|||
/// In other words, `Bindings` is a *multi* mapping from `SmolStr` to
|
||||
/// `tt::TokenTree`, where the index to select a particular `TokenTree` among
|
||||
/// many is not a plain `usize`, but an `&[usize]`.
|
||||
#[derive(Debug, Default)]
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq)]
|
||||
struct Bindings {
|
||||
inner: FxHashMap<SmolStr, Binding>,
|
||||
inner: SmallVec<[(SmolStr, Binding); 4]>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
enum Binding {
|
||||
Fragment(Fragment),
|
||||
Nested(Vec<Binding>),
|
||||
Empty,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
enum Fragment {
|
||||
/// token fragments are just copy-pasted into the output
|
||||
Tokens(tt::TokenTree),
|
||||
|
|
|
@ -1,14 +1,74 @@
|
|||
//! FIXME: write short doc here
|
||||
//! An NFA-based parser, which is porting from rustc mbe parsing code
|
||||
//!
|
||||
//! See https://github.com/rust-lang/rust/blob/70b18bc2cbac4712020019f5bf57c00905373205/compiler/rustc_expand/src/mbe/macro_parser.rs
|
||||
//! Here is a quick intro to how the parser works, copied from rustc:
|
||||
//!
|
||||
//! A 'position' is a dot in the middle of a matcher, usually represented as a
|
||||
//! dot. For example `· a $( a )* a b` is a position, as is `a $( · a )* a b`.
|
||||
//!
|
||||
//! The parser walks through the input a character at a time, maintaining a list
|
||||
//! of threads consistent with the current position in the input string: `cur_items`.
|
||||
//!
|
||||
//! As it processes them, it fills up `eof_items` with threads that would be valid if
|
||||
//! the macro invocation is now over, `bb_items` with threads that are waiting on
|
||||
//! a Rust non-terminal like `$e:expr`, and `next_items` with threads that are waiting
|
||||
//! on a particular token. Most of the logic concerns moving the · through the
|
||||
//! repetitions indicated by Kleene stars. The rules for moving the · without
|
||||
//! consuming any input are called epsilon transitions. It only advances or calls
|
||||
//! out to the real Rust parser when no `cur_items` threads remain.
|
||||
//!
|
||||
//! Example:
|
||||
//!
|
||||
//! ```text, ignore
|
||||
//! Start parsing a a a a b against [· a $( a )* a b].
|
||||
//!
|
||||
//! Remaining input: a a a a b
|
||||
//! next: [· a $( a )* a b]
|
||||
//!
|
||||
//! - - - Advance over an a. - - -
|
||||
//!
|
||||
//! Remaining input: a a a b
|
||||
//! cur: [a · $( a )* a b]
|
||||
//! Descend/Skip (first item).
|
||||
//! next: [a $( · a )* a b] [a $( a )* · a b].
|
||||
//!
|
||||
//! - - - Advance over an a. - - -
|
||||
//!
|
||||
//! Remaining input: a a b
|
||||
//! cur: [a $( a · )* a b] [a $( a )* a · b]
|
||||
//! Follow epsilon transition: Finish/Repeat (first item)
|
||||
//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
|
||||
//!
|
||||
//! - - - Advance over an a. - - - (this looks exactly like the last step)
|
||||
//!
|
||||
//! Remaining input: a b
|
||||
//! cur: [a $( a · )* a b] [a $( a )* a · b]
|
||||
//! Follow epsilon transition: Finish/Repeat (first item)
|
||||
//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
|
||||
//!
|
||||
//! - - - Advance over an a. - - - (this looks exactly like the last step)
|
||||
//!
|
||||
//! Remaining input: b
|
||||
//! cur: [a $( a · )* a b] [a $( a )* a · b]
|
||||
//! Follow epsilon transition: Finish/Repeat (first item)
|
||||
//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
|
||||
//!
|
||||
//! - - - Advance over a b. - - -
|
||||
//!
|
||||
//! Remaining input: ''
|
||||
//! eof: [a $( a )* a b ·]
|
||||
//! ```
|
||||
|
||||
use crate::{
|
||||
expander::{Binding, Bindings, Fragment},
|
||||
parser::{Op, RepeatKind, Separator},
|
||||
parser::{Op, OpDelimited, OpDelimitedIter, RepeatKind, Separator},
|
||||
tt_iter::TtIter,
|
||||
ExpandError, MetaTemplate,
|
||||
};
|
||||
|
||||
use super::ExpandResult;
|
||||
use parser::FragmentKind::*;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use syntax::SmolStr;
|
||||
|
||||
impl Bindings {
|
||||
|
@ -16,19 +76,19 @@ impl Bindings {
|
|||
// FIXME: Do we have a better way to represent an empty token ?
|
||||
// Insert an empty subtree for empty token
|
||||
let tt = tt::Subtree::default().into();
|
||||
self.inner.insert(name.clone(), Binding::Fragment(Fragment::Tokens(tt)));
|
||||
self.inner.push((name.clone(), Binding::Fragment(Fragment::Tokens(tt))));
|
||||
}
|
||||
|
||||
fn push_empty(&mut self, name: &SmolStr) {
|
||||
self.inner.insert(name.clone(), Binding::Empty);
|
||||
self.inner.push((name.clone(), Binding::Empty));
|
||||
}
|
||||
|
||||
fn push_nested(&mut self, idx: usize, nested: Bindings) -> Result<(), ExpandError> {
|
||||
for (key, value) in nested.inner {
|
||||
if !self.inner.contains_key(&key) {
|
||||
self.inner.insert(key.clone(), Binding::Nested(Vec::new()));
|
||||
if self.get_mut(&key).is_none() {
|
||||
self.inner.push((key.clone(), Binding::Nested(Vec::new())));
|
||||
}
|
||||
match self.inner.get_mut(&key) {
|
||||
match self.get_mut(&key) {
|
||||
Some(Binding::Nested(it)) => {
|
||||
// insert empty nested bindings before this one
|
||||
while it.len() < idx {
|
||||
|
@ -46,6 +106,14 @@ impl Bindings {
|
|||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_mut(&mut self, name: &str) -> Option<&mut Binding> {
|
||||
self.inner.iter_mut().find_map(|(n, b)| if n == name { Some(b) } else { None })
|
||||
}
|
||||
|
||||
fn bindings(&self) -> impl Iterator<Item = &Binding> {
|
||||
self.inner.iter().map(|(_, b)| b)
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! err {
|
||||
|
@ -57,7 +125,7 @@ macro_rules! err {
|
|||
};
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
pub(super) struct Match {
|
||||
pub(super) bindings: Bindings,
|
||||
/// We currently just keep the first error and count the rest to compare matches.
|
||||
|
@ -65,6 +133,8 @@ pub(super) struct Match {
|
|||
pub(super) err_count: usize,
|
||||
/// How many top-level token trees were left to match.
|
||||
pub(super) unmatched_tts: usize,
|
||||
/// The number of bound variables
|
||||
pub(super) bound_count: usize,
|
||||
}
|
||||
|
||||
impl Match {
|
||||
|
@ -76,72 +146,373 @@ impl Match {
|
|||
}
|
||||
|
||||
/// Matching errors are added to the `Match`.
|
||||
pub(super) fn match_(pattern: &MetaTemplate, src: &tt::Subtree) -> Match {
|
||||
let mut res = Match::default();
|
||||
let mut src = TtIter::new(src);
|
||||
pub(super) fn match_(pattern: &MetaTemplate, input: &tt::Subtree) -> Match {
|
||||
let mut res = match_loop(pattern, &input);
|
||||
res.bound_count = count(res.bindings.bindings());
|
||||
return res;
|
||||
|
||||
match_tokens(&mut res, pattern, &mut src);
|
||||
|
||||
if src.len() > 0 {
|
||||
res.unmatched_tts += src.len();
|
||||
res.add_err(err!("leftover tokens"));
|
||||
fn count<'a>(bindings: impl Iterator<Item = &'a Binding>) -> usize {
|
||||
bindings
|
||||
.map(|it| match it {
|
||||
Binding::Fragment(_) => 1,
|
||||
Binding::Empty => 1,
|
||||
Binding::Nested(it) => count(it.iter()),
|
||||
})
|
||||
.sum()
|
||||
}
|
||||
|
||||
res
|
||||
}
|
||||
|
||||
fn match_tokens(res: &mut Match, pattern: &MetaTemplate, src: &mut TtIter) {
|
||||
for op in pattern.iter() {
|
||||
match op {
|
||||
Op::Leaf(lhs) => {
|
||||
if let Err(err) = match_leaf(lhs, src) {
|
||||
res.add_err(err);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Op::Subtree { tokens, delimiter: delim } => {
|
||||
let rhs = match src.expect_subtree() {
|
||||
Ok(s) => s,
|
||||
Err(()) => {
|
||||
res.add_err(err!("expected subtree"));
|
||||
continue;
|
||||
#[derive(Debug, Clone)]
|
||||
struct MatchState<'t> {
|
||||
/// The position of the "dot" in this matcher
|
||||
dot: OpDelimitedIter<'t>,
|
||||
|
||||
/// Token subtree stack
|
||||
/// When matching against matchers with nested delimited submatchers (e.g., `pat ( pat ( .. )
|
||||
/// pat ) pat`), we need to keep track of the matchers we are descending into. This stack does
|
||||
/// that where the bottom of the stack is the outermost matcher.
|
||||
stack: SmallVec<[OpDelimitedIter<'t>; 4]>,
|
||||
|
||||
/// The "parent" matcher position if we are in a repetition. That is, the matcher position just
|
||||
/// before we enter the repetition.
|
||||
up: Option<Box<MatchState<'t>>>,
|
||||
|
||||
/// The separator if we are in a repetition.
|
||||
sep: Option<Separator>,
|
||||
|
||||
/// The KleeneOp of this sequence if we are in a repetition.
|
||||
sep_kind: Option<RepeatKind>,
|
||||
|
||||
/// Number of tokens of seperator parsed
|
||||
sep_parsed: Option<usize>,
|
||||
|
||||
/// Matched meta variables bindings
|
||||
bindings: SmallVec<[Bindings; 4]>,
|
||||
|
||||
/// Cached result of meta variable parsing
|
||||
meta_result: Option<(TtIter<'t>, ExpandResult<Option<Fragment>>)>,
|
||||
|
||||
/// Is error occuried in this state, will `poised` to "parent"
|
||||
is_error: bool,
|
||||
}
|
||||
|
||||
/// Process the matcher positions of `cur_items` until it is empty. In the process, this will
|
||||
/// produce more items in `next_items`, `eof_items`, and `bb_items`.
|
||||
///
|
||||
/// For more info about the how this happens, see the module-level doc comments and the inline
|
||||
/// comments of this function.
|
||||
///
|
||||
/// # Parameters
|
||||
///
|
||||
/// - `src`: the current token of the parser.
|
||||
/// - `stack`: the "parent" frames of the token tree
|
||||
/// - `res`: the match result to store errors
|
||||
/// - `cur_items`: the set of current items to be processed. This should be empty by the end of a
|
||||
/// successful execution of this function.
|
||||
/// - `next_items`: the set of newly generated items. These are used to replenish `cur_items` in
|
||||
/// the function `parse`.
|
||||
/// - `eof_items`: the set of items that would be valid if this was the EOF.
|
||||
/// - `bb_items`: the set of items that are waiting for the black-box parser.
|
||||
/// - `error_items`: the set of items in errors, used for error-resilient parsing
|
||||
fn match_loop_inner<'t>(
|
||||
src: TtIter<'t>,
|
||||
stack: &[TtIter<'t>],
|
||||
res: &mut Match,
|
||||
cur_items: &mut SmallVec<[MatchState<'t>; 1]>,
|
||||
bb_items: &mut SmallVec<[MatchState<'t>; 1]>,
|
||||
next_items: &mut Vec<MatchState<'t>>,
|
||||
eof_items: &mut SmallVec<[MatchState<'t>; 1]>,
|
||||
error_items: &mut SmallVec<[MatchState<'t>; 1]>,
|
||||
) {
|
||||
macro_rules! try_push {
|
||||
($items: expr, $it:expr) => {
|
||||
if $it.is_error {
|
||||
error_items.push($it);
|
||||
} else {
|
||||
$items.push($it);
|
||||
}
|
||||
};
|
||||
if delim.map(|it| it.kind) != rhs.delimiter_kind() {
|
||||
res.add_err(err!("mismatched delimiter"));
|
||||
continue;
|
||||
}
|
||||
let mut src = TtIter::new(rhs);
|
||||
match_tokens(res, tokens, &mut src);
|
||||
if src.len() > 0 {
|
||||
res.add_err(err!("leftover tokens"));
|
||||
|
||||
while let Some(mut item) = cur_items.pop() {
|
||||
while item.dot.is_eof() {
|
||||
match item.stack.pop() {
|
||||
Some(frame) => {
|
||||
item.dot = frame;
|
||||
item.dot.next();
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
Op::Var { name, kind, .. } => {
|
||||
let kind = match kind {
|
||||
Some(k) => k,
|
||||
let op = match item.dot.peek() {
|
||||
None => {
|
||||
res.add_err(ExpandError::UnexpectedToken);
|
||||
// We are at or past the end of the matcher of `item`.
|
||||
if item.up.is_some() {
|
||||
if item.sep_parsed.is_none() {
|
||||
// Get the `up` matcher
|
||||
let mut new_pos = *item.up.clone().unwrap();
|
||||
// Add matches from this repetition to the `matches` of `up`
|
||||
if let Some(bindings) = new_pos.bindings.last_mut() {
|
||||
for (i, b) in item.bindings.iter_mut().enumerate() {
|
||||
bindings.push_nested(i, b.clone()).unwrap();
|
||||
}
|
||||
}
|
||||
// Move the "dot" past the repetition in `up`
|
||||
new_pos.dot.next();
|
||||
new_pos.is_error = new_pos.is_error || item.is_error;
|
||||
cur_items.push(new_pos);
|
||||
}
|
||||
|
||||
// Check if we need a separator.
|
||||
// We check the separator one by one
|
||||
let sep_idx = *item.sep_parsed.as_ref().unwrap_or(&0);
|
||||
let sep_len = item.sep.as_ref().map_or(0, Separator::tt_count);
|
||||
if item.sep.is_some() && sep_idx != sep_len {
|
||||
let sep = item.sep.as_ref().unwrap();
|
||||
if src.clone().expect_separator(&sep, sep_idx) {
|
||||
item.dot.next();
|
||||
item.sep_parsed = Some(sep_idx + 1);
|
||||
try_push!(next_items, item);
|
||||
}
|
||||
}
|
||||
// We don't need a separator. Move the "dot" back to the beginning of the matcher
|
||||
// and try to match again UNLESS we are only allowed to have _one_ repetition.
|
||||
else if item.sep_kind != Some(RepeatKind::ZeroOrOne) {
|
||||
item.dot = item.dot.reset();
|
||||
item.sep_parsed = None;
|
||||
item.bindings.push(Bindings::default());
|
||||
cur_items.push(item);
|
||||
}
|
||||
} else {
|
||||
// If we are not in a repetition, then being at the end of a matcher means that we have
|
||||
// reached the potential end of the input.
|
||||
try_push!(eof_items, item);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
Some(it) => it,
|
||||
};
|
||||
let ExpandResult { value: matched, err: match_err } =
|
||||
match_meta_var(kind.as_str(), src);
|
||||
match matched {
|
||||
Some(fragment) => {
|
||||
res.bindings.inner.insert(name.clone(), Binding::Fragment(fragment));
|
||||
|
||||
// We are in the middle of a matcher.
|
||||
match op {
|
||||
OpDelimited::Op(Op::Repeat { tokens, kind, separator }) => {
|
||||
if matches!(kind, RepeatKind::ZeroOrMore | RepeatKind::ZeroOrOne) {
|
||||
let mut new_item = item.clone();
|
||||
new_item.dot.next();
|
||||
let mut vars = Vec::new();
|
||||
let bindings = new_item.bindings.last_mut().unwrap();
|
||||
collect_vars(&mut vars, tokens);
|
||||
for var in vars {
|
||||
bindings.push_empty(&var);
|
||||
}
|
||||
cur_items.push(new_item);
|
||||
}
|
||||
cur_items.push(MatchState {
|
||||
dot: tokens.iter_delimited(None),
|
||||
stack: Default::default(),
|
||||
up: Some(Box::new(item)),
|
||||
sep: separator.clone(),
|
||||
sep_kind: Some(*kind),
|
||||
sep_parsed: None,
|
||||
bindings: smallvec![Bindings::default()],
|
||||
meta_result: None,
|
||||
is_error: false,
|
||||
})
|
||||
}
|
||||
OpDelimited::Op(Op::Subtree { tokens, delimiter }) => {
|
||||
if let Ok(subtree) = src.clone().expect_subtree() {
|
||||
if subtree.delimiter_kind() == delimiter.map(|it| it.kind) {
|
||||
item.stack.push(item.dot);
|
||||
item.dot = tokens.iter_delimited(delimiter.as_ref());
|
||||
cur_items.push(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
OpDelimited::Op(Op::Var { kind, name, .. }) => {
|
||||
if let Some(kind) = kind {
|
||||
let mut fork = src.clone();
|
||||
let match_res = match_meta_var(kind.as_str(), &mut fork);
|
||||
match match_res.err {
|
||||
None => {
|
||||
// Some meta variables are optional (e.g. vis)
|
||||
if match_res.value.is_some() {
|
||||
item.meta_result = Some((fork, match_res));
|
||||
try_push!(bb_items, item);
|
||||
} else {
|
||||
item.bindings.last_mut().unwrap().push_optional(name);
|
||||
item.dot.next();
|
||||
cur_items.push(item);
|
||||
}
|
||||
}
|
||||
Some(err) => {
|
||||
res.add_err(err);
|
||||
match match_res.value {
|
||||
Some(fragment) => {
|
||||
item.bindings
|
||||
.last_mut()
|
||||
.unwrap()
|
||||
.inner
|
||||
.push((name.clone(), Binding::Fragment(fragment)));
|
||||
}
|
||||
None if match_err.is_none() => res.bindings.push_optional(name),
|
||||
_ => {}
|
||||
}
|
||||
if let Some(err) = match_err {
|
||||
item.is_error = true;
|
||||
error_items.push(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
OpDelimited::Op(Op::Leaf(leaf)) => {
|
||||
if let Err(err) = match_leaf(&leaf, &mut src.clone()) {
|
||||
res.add_err(err);
|
||||
item.is_error = true;
|
||||
} else {
|
||||
item.dot.next();
|
||||
}
|
||||
try_push!(next_items, item);
|
||||
}
|
||||
OpDelimited::Open => {
|
||||
if matches!(src.clone().next(), Some(tt::TokenTree::Subtree(..))) {
|
||||
item.dot.next();
|
||||
try_push!(next_items, item);
|
||||
}
|
||||
}
|
||||
OpDelimited::Close => {
|
||||
let is_delim_closed = src.peek_n(0).is_none() && !stack.is_empty();
|
||||
if is_delim_closed {
|
||||
item.dot.next();
|
||||
try_push!(next_items, item);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn match_loop(pattern: &MetaTemplate, src: &tt::Subtree) -> Match {
|
||||
let mut src = TtIter::new(src);
|
||||
let mut stack: SmallVec<[TtIter; 1]> = SmallVec::new();
|
||||
let mut res = Match::default();
|
||||
let mut error_reover_item = None;
|
||||
|
||||
let mut cur_items = smallvec![MatchState {
|
||||
dot: pattern.iter_delimited(None),
|
||||
stack: Default::default(),
|
||||
up: None,
|
||||
sep: None,
|
||||
sep_kind: None,
|
||||
sep_parsed: None,
|
||||
bindings: smallvec![Bindings::default()],
|
||||
is_error: false,
|
||||
meta_result: None,
|
||||
}];
|
||||
|
||||
let mut next_items = vec![];
|
||||
|
||||
loop {
|
||||
let mut bb_items = SmallVec::new();
|
||||
let mut eof_items = SmallVec::new();
|
||||
let mut error_items = SmallVec::new();
|
||||
|
||||
stdx::always!(next_items.is_empty());
|
||||
|
||||
match_loop_inner(
|
||||
src.clone(),
|
||||
&stack,
|
||||
&mut res,
|
||||
&mut cur_items,
|
||||
&mut bb_items,
|
||||
&mut next_items,
|
||||
&mut eof_items,
|
||||
&mut error_items,
|
||||
);
|
||||
stdx::always!(cur_items.is_empty());
|
||||
|
||||
if error_items.len() > 0 {
|
||||
error_reover_item = error_items.pop();
|
||||
} else if eof_items.len() > 0 {
|
||||
error_reover_item = Some(eof_items[0].clone());
|
||||
}
|
||||
|
||||
// We need to do some post processing after the `match_loop_inner`.
|
||||
// If we reached the EOF, check that there is EXACTLY ONE possible matcher. Otherwise,
|
||||
// either the parse is ambiguous (which should never happen) or there is a syntax error.
|
||||
if src.peek_n(0).is_none() && stack.is_empty() {
|
||||
if eof_items.len() == 1 {
|
||||
// remove all errors, because it is the correct answer !
|
||||
res = Match::default();
|
||||
res.bindings = eof_items[0].bindings[0].clone();
|
||||
} else {
|
||||
// Error recovery
|
||||
if error_reover_item.is_some() {
|
||||
res.bindings = error_reover_item.unwrap().bindings[0].clone();
|
||||
}
|
||||
res.add_err(ExpandError::UnexpectedToken);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
// If there are no possible next positions AND we aren't waiting for the black-box parser,
|
||||
// then there is a syntax error.
|
||||
//
|
||||
// Another possibility is that we need to call out to parse some rust nonterminal
|
||||
// (black-box) parser. However, if there is not EXACTLY ONE of these, something is wrong.
|
||||
if (bb_items.is_empty() && next_items.is_empty())
|
||||
|| (!bb_items.is_empty() && !next_items.is_empty())
|
||||
|| bb_items.len() > 1
|
||||
{
|
||||
res.unmatched_tts += src.len();
|
||||
while let Some(it) = stack.pop() {
|
||||
src = it;
|
||||
res.unmatched_tts += src.len();
|
||||
}
|
||||
res.add_err(err!("leftover tokens"));
|
||||
|
||||
if let Some(mut error_reover_item) = error_reover_item {
|
||||
res.bindings = error_reover_item.bindings.remove(0);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
// Dump all possible `next_items` into `cur_items` for the next iteration.
|
||||
else if !next_items.is_empty() {
|
||||
// Now process the next token
|
||||
cur_items.extend(next_items.drain(..));
|
||||
|
||||
match src.next() {
|
||||
Some(tt::TokenTree::Subtree(subtree)) => {
|
||||
stack.push(src.clone());
|
||||
src = TtIter::new(subtree);
|
||||
}
|
||||
None if !stack.is_empty() => src = stack.pop().unwrap(),
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
// Finally, we have the case where we need to call the black-box parser to get some
|
||||
// nonterminal.
|
||||
else {
|
||||
stdx::always!(bb_items.len() == 1);
|
||||
let mut item = bb_items.pop().unwrap();
|
||||
|
||||
if let Some(OpDelimited::Op(Op::Var { name, .. })) = item.dot.peek() {
|
||||
let (iter, match_res) = item.meta_result.take().unwrap();
|
||||
let bindings = item.bindings.last_mut().unwrap();
|
||||
match match_res.value {
|
||||
Some(fragment) => {
|
||||
bindings.inner.push((name.clone(), Binding::Fragment(fragment)));
|
||||
}
|
||||
None if match_res.err.is_none() => bindings.push_optional(name),
|
||||
_ => {}
|
||||
}
|
||||
if let Some(err) = match_res.err {
|
||||
res.add_err(err);
|
||||
}
|
||||
src = iter.clone();
|
||||
item.dot.next();
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
Op::Repeat { tokens: subtree, kind, separator } => {
|
||||
match_repeat(res, subtree, *kind, separator, src);
|
||||
}
|
||||
cur_items.push(item);
|
||||
}
|
||||
stdx::always!(!cur_items.is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -173,73 +544,6 @@ fn match_leaf(lhs: &tt::Leaf, src: &mut TtIter) -> Result<(), ExpandError> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn match_repeat(
|
||||
res: &mut Match,
|
||||
pattern: &MetaTemplate,
|
||||
kind: RepeatKind,
|
||||
separator: &Option<Separator>,
|
||||
src: &mut TtIter,
|
||||
) {
|
||||
// Dirty hack to make macro-expansion terminate.
|
||||
// This should be replaced by a proper macro-by-example implementation
|
||||
let mut limit = 65536;
|
||||
let mut counter = 0;
|
||||
|
||||
for i in 0.. {
|
||||
let mut fork = src.clone();
|
||||
|
||||
if let Some(separator) = &separator {
|
||||
if i != 0 && !fork.eat_separator(separator) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let mut nested = Match::default();
|
||||
match_tokens(&mut nested, pattern, &mut fork);
|
||||
if nested.err.is_none() {
|
||||
limit -= 1;
|
||||
if limit == 0 {
|
||||
log::warn!(
|
||||
"match_lhs exceeded repeat pattern limit => {:#?}\n{:#?}\n{:#?}\n{:#?}",
|
||||
pattern,
|
||||
src,
|
||||
kind,
|
||||
separator
|
||||
);
|
||||
break;
|
||||
}
|
||||
*src = fork;
|
||||
|
||||
if let Err(err) = res.bindings.push_nested(counter, nested.bindings) {
|
||||
res.add_err(err);
|
||||
}
|
||||
counter += 1;
|
||||
if counter == 1 {
|
||||
if let RepeatKind::ZeroOrOne = kind {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
match (kind, counter) {
|
||||
(RepeatKind::OneOrMore, 0) => {
|
||||
res.add_err(ExpandError::UnexpectedToken);
|
||||
}
|
||||
(_, 0) => {
|
||||
// Collect all empty variables in subtrees
|
||||
let mut vars = Vec::new();
|
||||
collect_vars(&mut vars, pattern);
|
||||
for var in vars {
|
||||
res.bindings.push_empty(&var)
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
fn match_meta_var(kind: &str, input: &mut TtIter) -> ExpandResult<Option<Fragment>> {
|
||||
let fragment = match kind {
|
||||
"path" => Path,
|
||||
|
@ -303,14 +607,14 @@ fn collect_vars(buf: &mut Vec<SmolStr>, pattern: &MetaTemplate) {
|
|||
}
|
||||
|
||||
impl<'a> TtIter<'a> {
|
||||
fn eat_separator(&mut self, separator: &Separator) -> bool {
|
||||
fn expect_separator(&mut self, separator: &Separator, idx: usize) -> bool {
|
||||
let mut fork = self.clone();
|
||||
let ok = match separator {
|
||||
Separator::Ident(lhs) => match fork.expect_ident() {
|
||||
Separator::Ident(lhs) if idx == 0 => match fork.expect_ident() {
|
||||
Ok(rhs) => rhs.text == lhs.text,
|
||||
_ => false,
|
||||
},
|
||||
Separator::Literal(lhs) => match fork.expect_literal() {
|
||||
Separator::Literal(lhs) if idx == 0 => match fork.expect_literal() {
|
||||
Ok(rhs) => match rhs {
|
||||
tt::Leaf::Literal(rhs) => rhs.text == lhs.text,
|
||||
tt::Leaf::Ident(rhs) => rhs.text == lhs.text,
|
||||
|
@ -318,10 +622,11 @@ impl<'a> TtIter<'a> {
|
|||
},
|
||||
_ => false,
|
||||
},
|
||||
Separator::Puncts(lhss) => lhss.iter().all(|lhs| match fork.expect_punct() {
|
||||
Ok(rhs) => rhs.char == lhs.char,
|
||||
Separator::Puncts(lhss) if idx < lhss.len() => match fork.expect_punct() {
|
||||
Ok(rhs) => rhs.char == lhss[idx].char,
|
||||
_ => false,
|
||||
},
|
||||
_ => false,
|
||||
}),
|
||||
};
|
||||
if ok {
|
||||
*self = fork;
|
||||
|
|
|
@ -13,11 +13,15 @@ use crate::{
|
|||
|
||||
impl Bindings {
|
||||
fn contains(&self, name: &str) -> bool {
|
||||
self.inner.contains_key(name)
|
||||
self.inner.iter().any(|(n, _)| n == name)
|
||||
}
|
||||
|
||||
fn get(&self, name: &str, nesting: &mut [NestingState]) -> Result<&Fragment, ExpandError> {
|
||||
let mut b = self.inner.get(name).ok_or_else(|| {
|
||||
let mut b: &Binding = self
|
||||
.inner
|
||||
.iter()
|
||||
.find_map(|(n, b)| if n == name { Some(b) } else { None })
|
||||
.ok_or_else(|| {
|
||||
ExpandError::BindingError(format!("could not find binding `{}`", name))
|
||||
})?;
|
||||
for nesting_state in nesting.iter_mut() {
|
||||
|
|
|
@ -21,7 +21,7 @@ use test_utils::mark;
|
|||
pub use tt::{Delimiter, DelimiterKind, Punct};
|
||||
|
||||
use crate::{
|
||||
parser::{parse_pattern, parse_template, Op},
|
||||
parser::{parse_pattern, parse_template, MetaTemplate, Op},
|
||||
tt_iter::TtIter,
|
||||
};
|
||||
|
||||
|
@ -94,15 +94,6 @@ struct Rule {
|
|||
rhs: MetaTemplate,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
struct MetaTemplate(Vec<Op>);
|
||||
|
||||
impl<'a> MetaTemplate {
|
||||
fn iter(&self) -> impl Iterator<Item = &Op> {
|
||||
self.0.iter()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
struct Shift(u32);
|
||||
|
||||
|
|
|
@ -5,7 +5,75 @@ use smallvec::SmallVec;
|
|||
use syntax::SmolStr;
|
||||
use tt::Delimiter;
|
||||
|
||||
use crate::{tt_iter::TtIter, MetaTemplate, ParseError};
|
||||
use crate::{tt_iter::TtIter, ParseError};
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct MetaTemplate(pub(crate) Vec<Op>);
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub(crate) enum OpDelimited<'a> {
|
||||
Op(&'a Op),
|
||||
Open,
|
||||
Close,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub(crate) struct OpDelimitedIter<'a> {
|
||||
inner: &'a Vec<Op>,
|
||||
delimited: Option<&'a Delimiter>,
|
||||
idx: usize,
|
||||
}
|
||||
|
||||
impl<'a> OpDelimitedIter<'a> {
|
||||
pub(crate) fn is_eof(&self) -> bool {
|
||||
let len = self.inner.len() + if self.delimited.is_some() { 2 } else { 0 };
|
||||
self.idx >= len
|
||||
}
|
||||
|
||||
pub(crate) fn peek(&self) -> Option<OpDelimited<'a>> {
|
||||
match self.delimited {
|
||||
None => self.inner.get(self.idx).map(OpDelimited::Op),
|
||||
Some(_) => match self.idx {
|
||||
0 => Some(OpDelimited::Open),
|
||||
i if i == self.inner.len() + 1 => Some(OpDelimited::Close),
|
||||
i => self.inner.get(i - 1).map(OpDelimited::Op),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn reset(&self) -> Self {
|
||||
Self { inner: &self.inner, idx: 0, delimited: self.delimited }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for OpDelimitedIter<'a> {
|
||||
type Item = OpDelimited<'a>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let res = self.peek();
|
||||
self.idx += 1;
|
||||
res
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
let len = self.inner.len() + if self.delimited.is_some() { 2 } else { 0 };
|
||||
let remain = len.checked_sub(self.idx).unwrap_or(0);
|
||||
(remain, Some(remain))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> MetaTemplate {
|
||||
pub(crate) fn iter(&self) -> impl Iterator<Item = &Op> {
|
||||
self.0.iter()
|
||||
}
|
||||
|
||||
pub(crate) fn iter_delimited(
|
||||
&'a self,
|
||||
delimited: Option<&'a Delimiter>,
|
||||
) -> OpDelimitedIter<'a> {
|
||||
OpDelimitedIter { inner: &self.0, idx: 0, delimited }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum Op {
|
||||
|
@ -47,6 +115,16 @@ impl PartialEq for Separator {
|
|||
}
|
||||
}
|
||||
|
||||
impl Separator {
|
||||
pub(crate) fn tt_count(&self) -> usize {
|
||||
match self {
|
||||
Separator::Literal(_) => 1,
|
||||
Separator::Ident(_) => 1,
|
||||
Separator::Puncts(it) => it.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn parse_template(template: &tt::Subtree) -> Result<Vec<Op>, ParseError> {
|
||||
parse_inner(&template, Mode::Template).into_iter().collect()
|
||||
}
|
||||
|
|
|
@ -456,6 +456,17 @@ fn test_match_group_with_multichar_sep() {
|
|||
.assert_expand_items("foo! (fn baz {true true} );", "fn baz () -> bool {true &&true}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_match_group_with_multichar_sep2() {
|
||||
parse_macro(
|
||||
r#"
|
||||
macro_rules! foo {
|
||||
(fn $name:ident {$($i:literal)&&*} ) => ( fn $name() -> bool { $($i)&&*} );
|
||||
}"#,
|
||||
)
|
||||
.assert_expand_items("foo! (fn baz {true && true} );", "fn baz () -> bool {true &&true}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_match_group_zero_match() {
|
||||
parse_macro(
|
||||
|
@ -1267,6 +1278,18 @@ macro_rules! m {
|
|||
.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_match_is_not_greedy() {
|
||||
parse_macro(
|
||||
r#"
|
||||
macro_rules! foo {
|
||||
($($i:ident $(,)*),*) => {};
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.assert_expand_items(r#"foo!(a,b);"#, r#""#);
|
||||
}
|
||||
|
||||
// The following tests are based on real world situations
|
||||
#[test]
|
||||
fn test_vec() {
|
||||
|
|
Loading…
Reference in a new issue