7513: NFA parser for mbe matcher r=matklad a=edwin0cheng

Almost straight porting from rustc one, but a little bit slow :(

```
rust-analyzer analysis-stats -q . 
```

From:
```log
Database loaded:     636.11ms, 277minstr
  crates: 36, mods: 594, decls: 11527, fns: 9017
Item Collection:     10.99s, 60ginstr
  exprs: 249618, ??ty: 2699 (1%), ?ty: 2101 (0%), !ty: 932
Inference:           28.94s, 123ginstr
Total:               39.93s, 184ginstr
```

To:
```log
Database loaded:     630.90ms, 277minstr
  crates: 36, mods: 594, decls: 11528, fns: 9018
Item Collection:     13.70s, 77ginstr
  exprs: 249482, ??ty: 2699 (1%), ?ty: 2101 (0%), !ty: 932
Inference:           30.27s, 133ginstr
Total:               43.97s, 211ginstr
```

Fixes #4777

Co-authored-by: Edwin Cheng <edwin0cheng@gmail.com>
This commit is contained in:
bors[bot] 2021-03-02 13:20:47 +00:00 committed by GitHub
commit 91bf5fa827
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 592 additions and 175 deletions

1
Cargo.lock generated
View file

@ -879,6 +879,7 @@ dependencies = [
"profile",
"rustc-hash",
"smallvec",
"stdx",
"syntax",
"test_utils",
"tt",

View file

@ -18,6 +18,7 @@ syntax = { path = "../syntax", version = "0.0.0" }
parser = { path = "../parser", version = "0.0.0" }
tt = { path = "../tt", version = "0.0.0" }
test_utils = { path = "../test_utils", version = "0.0.0" }
stdx = { path = "../stdx", version = "0.0.0" }
[dev-dependencies]
profile = { path = "../profile" }

View file

@ -40,18 +40,12 @@ fn benchmark_expand_macro_rules() {
.into_iter()
.map(|(id, tt)| {
let res = rules[&id].expand(&tt);
if res.err.is_some() {
// FIXME:
// Currently `invocation_fixtures` will generate some correct invocations but
// cannot be expanded by mbe. We ignore errors here.
// See: https://github.com/rust-analyzer/rust-analyzer/issues/4777
eprintln!("err from {} {:?}", id, res.err);
}
assert!(res.err.is_none());
res.value.token_trees.len()
})
.sum()
};
assert_eq!(hash, 66995);
assert_eq!(hash, 69413);
}
fn macro_rules_fixtures() -> FxHashMap<String, MacroRules> {
@ -77,7 +71,7 @@ fn macro_rules_fixtures_tt() -> FxHashMap<String, tt::Subtree> {
.collect()
}
// Generate random invocation fixtures from rules
/// Generate random invocation fixtures from rules
fn invocation_fixtures(rules: &FxHashMap<String, MacroRules>) -> Vec<(String, tt::Subtree)> {
let mut seed = 123456789;
let mut res = Vec::new();
@ -86,11 +80,31 @@ fn invocation_fixtures(rules: &FxHashMap<String, MacroRules>) -> Vec<(String, tt
for rule in &it.rules {
// Generate twice
for _ in 0..2 {
let mut subtree = tt::Subtree::default();
for op in rule.lhs.iter() {
collect_from_op(op, &mut subtree, &mut seed);
// The input are generated by filling the `Op` randomly.
// However, there are some cases generated are ambiguous for expanding, for example:
// ```rust
// macro_rules! m {
// ($($t:ident),* as $ty:ident) => {}
// }
// m!(as u32); // error: local ambiguity: multiple parsing options: built-in NTs ident ('t') or 1 other option.
// ```
//
// So we just skip any error cases and try again
let mut try_cnt = 0;
loop {
let mut subtree = tt::Subtree::default();
for op in rule.lhs.iter() {
collect_from_op(op, &mut subtree, &mut seed);
}
if it.expand(&subtree).err.is_none() {
res.push((name.clone(), subtree));
break;
}
try_cnt += 1;
if try_cnt > 100 {
panic!("invocaton fixture {} cannot be generated.\n", name);
}
}
res.push((name.clone(), subtree));
}
}
}

View file

@ -5,7 +5,7 @@
mod matcher;
mod transcriber;
use rustc_hash::FxHashMap;
use smallvec::SmallVec;
use syntax::SmolStr;
use crate::{ExpandError, ExpandResult};
@ -28,10 +28,10 @@ pub(crate) fn expand_rules(
return ExpandResult::ok(value);
}
}
// Use the rule if we matched more tokens, or had fewer errors
// Use the rule if we matched more tokens, or bound variables count
if let Some((prev_match, _)) = &match_ {
if (new_match.unmatched_tts, new_match.err_count)
< (prev_match.unmatched_tts, prev_match.err_count)
if (new_match.unmatched_tts, -(new_match.bound_count as i32))
< (prev_match.unmatched_tts, -(prev_match.bound_count as i32))
{
match_ = Some((new_match, rule));
}
@ -94,19 +94,19 @@ pub(crate) fn expand_rules(
/// In other words, `Bindings` is a *multi* mapping from `SmolStr` to
/// `tt::TokenTree`, where the index to select a particular `TokenTree` among
/// many is not a plain `usize`, but an `&[usize]`.
#[derive(Debug, Default)]
#[derive(Debug, Default, Clone, PartialEq, Eq)]
struct Bindings {
inner: FxHashMap<SmolStr, Binding>,
inner: SmallVec<[(SmolStr, Binding); 4]>,
}
#[derive(Debug)]
#[derive(Debug, Clone, PartialEq, Eq)]
enum Binding {
Fragment(Fragment),
Nested(Vec<Binding>),
Empty,
}
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq)]
enum Fragment {
/// token fragments are just copy-pasted into the output
Tokens(tt::TokenTree),

View file

@ -1,14 +1,74 @@
//! FIXME: write short doc here
//! An NFA-based parser, which is porting from rustc mbe parsing code
//!
//! See https://github.com/rust-lang/rust/blob/70b18bc2cbac4712020019f5bf57c00905373205/compiler/rustc_expand/src/mbe/macro_parser.rs
//! Here is a quick intro to how the parser works, copied from rustc:
//!
//! A 'position' is a dot in the middle of a matcher, usually represented as a
//! dot. For example `· a $( a )* a b` is a position, as is `a $( · a )* a b`.
//!
//! The parser walks through the input a character at a time, maintaining a list
//! of threads consistent with the current position in the input string: `cur_items`.
//!
//! As it processes them, it fills up `eof_items` with threads that would be valid if
//! the macro invocation is now over, `bb_items` with threads that are waiting on
//! a Rust non-terminal like `$e:expr`, and `next_items` with threads that are waiting
//! on a particular token. Most of the logic concerns moving the · through the
//! repetitions indicated by Kleene stars. The rules for moving the · without
//! consuming any input are called epsilon transitions. It only advances or calls
//! out to the real Rust parser when no `cur_items` threads remain.
//!
//! Example:
//!
//! ```text, ignore
//! Start parsing a a a a b against [· a $( a )* a b].
//!
//! Remaining input: a a a a b
//! next: [· a $( a )* a b]
//!
//! - - - Advance over an a. - - -
//!
//! Remaining input: a a a b
//! cur: [a · $( a )* a b]
//! Descend/Skip (first item).
//! next: [a $( · a )* a b] [a $( a )* · a b].
//!
//! - - - Advance over an a. - - -
//!
//! Remaining input: a a b
//! cur: [a $( a · )* a b] [a $( a )* a · b]
//! Follow epsilon transition: Finish/Repeat (first item)
//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
//!
//! - - - Advance over an a. - - - (this looks exactly like the last step)
//!
//! Remaining input: a b
//! cur: [a $( a · )* a b] [a $( a )* a · b]
//! Follow epsilon transition: Finish/Repeat (first item)
//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
//!
//! - - - Advance over an a. - - - (this looks exactly like the last step)
//!
//! Remaining input: b
//! cur: [a $( a · )* a b] [a $( a )* a · b]
//! Follow epsilon transition: Finish/Repeat (first item)
//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
//!
//! - - - Advance over a b. - - -
//!
//! Remaining input: ''
//! eof: [a $( a )* a b ·]
//! ```
use crate::{
expander::{Binding, Bindings, Fragment},
parser::{Op, RepeatKind, Separator},
parser::{Op, OpDelimited, OpDelimitedIter, RepeatKind, Separator},
tt_iter::TtIter,
ExpandError, MetaTemplate,
};
use super::ExpandResult;
use parser::FragmentKind::*;
use smallvec::{smallvec, SmallVec};
use syntax::SmolStr;
impl Bindings {
@ -16,19 +76,19 @@ impl Bindings {
// FIXME: Do we have a better way to represent an empty token ?
// Insert an empty subtree for empty token
let tt = tt::Subtree::default().into();
self.inner.insert(name.clone(), Binding::Fragment(Fragment::Tokens(tt)));
self.inner.push((name.clone(), Binding::Fragment(Fragment::Tokens(tt))));
}
fn push_empty(&mut self, name: &SmolStr) {
self.inner.insert(name.clone(), Binding::Empty);
self.inner.push((name.clone(), Binding::Empty));
}
fn push_nested(&mut self, idx: usize, nested: Bindings) -> Result<(), ExpandError> {
for (key, value) in nested.inner {
if !self.inner.contains_key(&key) {
self.inner.insert(key.clone(), Binding::Nested(Vec::new()));
if self.get_mut(&key).is_none() {
self.inner.push((key.clone(), Binding::Nested(Vec::new())));
}
match self.inner.get_mut(&key) {
match self.get_mut(&key) {
Some(Binding::Nested(it)) => {
// insert empty nested bindings before this one
while it.len() < idx {
@ -46,6 +106,14 @@ impl Bindings {
}
Ok(())
}
fn get_mut(&mut self, name: &str) -> Option<&mut Binding> {
self.inner.iter_mut().find_map(|(n, b)| if n == name { Some(b) } else { None })
}
fn bindings(&self) -> impl Iterator<Item = &Binding> {
self.inner.iter().map(|(_, b)| b)
}
}
macro_rules! err {
@ -57,7 +125,7 @@ macro_rules! err {
};
}
#[derive(Debug, Default)]
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub(super) struct Match {
pub(super) bindings: Bindings,
/// We currently just keep the first error and count the rest to compare matches.
@ -65,6 +133,8 @@ pub(super) struct Match {
pub(super) err_count: usize,
/// How many top-level token trees were left to match.
pub(super) unmatched_tts: usize,
/// The number of bound variables
pub(super) bound_count: usize,
}
impl Match {
@ -76,72 +146,373 @@ impl Match {
}
/// Matching errors are added to the `Match`.
pub(super) fn match_(pattern: &MetaTemplate, src: &tt::Subtree) -> Match {
let mut res = Match::default();
let mut src = TtIter::new(src);
pub(super) fn match_(pattern: &MetaTemplate, input: &tt::Subtree) -> Match {
let mut res = match_loop(pattern, &input);
res.bound_count = count(res.bindings.bindings());
return res;
match_tokens(&mut res, pattern, &mut src);
if src.len() > 0 {
res.unmatched_tts += src.len();
res.add_err(err!("leftover tokens"));
fn count<'a>(bindings: impl Iterator<Item = &'a Binding>) -> usize {
bindings
.map(|it| match it {
Binding::Fragment(_) => 1,
Binding::Empty => 1,
Binding::Nested(it) => count(it.iter()),
})
.sum()
}
res
}
fn match_tokens(res: &mut Match, pattern: &MetaTemplate, src: &mut TtIter) {
for op in pattern.iter() {
match op {
Op::Leaf(lhs) => {
if let Err(err) = match_leaf(lhs, src) {
res.add_err(err);
continue;
}
#[derive(Debug, Clone)]
struct MatchState<'t> {
/// The position of the "dot" in this matcher
dot: OpDelimitedIter<'t>,
/// Token subtree stack
/// When matching against matchers with nested delimited submatchers (e.g., `pat ( pat ( .. )
/// pat ) pat`), we need to keep track of the matchers we are descending into. This stack does
/// that where the bottom of the stack is the outermost matcher.
stack: SmallVec<[OpDelimitedIter<'t>; 4]>,
/// The "parent" matcher position if we are in a repetition. That is, the matcher position just
/// before we enter the repetition.
up: Option<Box<MatchState<'t>>>,
/// The separator if we are in a repetition.
sep: Option<Separator>,
/// The KleeneOp of this sequence if we are in a repetition.
sep_kind: Option<RepeatKind>,
/// Number of tokens of seperator parsed
sep_parsed: Option<usize>,
/// Matched meta variables bindings
bindings: SmallVec<[Bindings; 4]>,
/// Cached result of meta variable parsing
meta_result: Option<(TtIter<'t>, ExpandResult<Option<Fragment>>)>,
/// Is error occuried in this state, will `poised` to "parent"
is_error: bool,
}
/// Process the matcher positions of `cur_items` until it is empty. In the process, this will
/// produce more items in `next_items`, `eof_items`, and `bb_items`.
///
/// For more info about the how this happens, see the module-level doc comments and the inline
/// comments of this function.
///
/// # Parameters
///
/// - `src`: the current token of the parser.
/// - `stack`: the "parent" frames of the token tree
/// - `res`: the match result to store errors
/// - `cur_items`: the set of current items to be processed. This should be empty by the end of a
/// successful execution of this function.
/// - `next_items`: the set of newly generated items. These are used to replenish `cur_items` in
/// the function `parse`.
/// - `eof_items`: the set of items that would be valid if this was the EOF.
/// - `bb_items`: the set of items that are waiting for the black-box parser.
/// - `error_items`: the set of items in errors, used for error-resilient parsing
fn match_loop_inner<'t>(
src: TtIter<'t>,
stack: &[TtIter<'t>],
res: &mut Match,
cur_items: &mut SmallVec<[MatchState<'t>; 1]>,
bb_items: &mut SmallVec<[MatchState<'t>; 1]>,
next_items: &mut Vec<MatchState<'t>>,
eof_items: &mut SmallVec<[MatchState<'t>; 1]>,
error_items: &mut SmallVec<[MatchState<'t>; 1]>,
) {
macro_rules! try_push {
($items: expr, $it:expr) => {
if $it.is_error {
error_items.push($it);
} else {
$items.push($it);
}
Op::Subtree { tokens, delimiter: delim } => {
let rhs = match src.expect_subtree() {
Ok(s) => s,
Err(()) => {
res.add_err(err!("expected subtree"));
continue;
}
};
if delim.map(|it| it.kind) != rhs.delimiter_kind() {
res.add_err(err!("mismatched delimiter"));
continue;
};
}
while let Some(mut item) = cur_items.pop() {
while item.dot.is_eof() {
match item.stack.pop() {
Some(frame) => {
item.dot = frame;
item.dot.next();
}
let mut src = TtIter::new(rhs);
match_tokens(res, tokens, &mut src);
if src.len() > 0 {
res.add_err(err!("leftover tokens"));
}
}
Op::Var { name, kind, .. } => {
let kind = match kind {
Some(k) => k,
None => {
res.add_err(ExpandError::UnexpectedToken);
continue;
}
};
let ExpandResult { value: matched, err: match_err } =
match_meta_var(kind.as_str(), src);
match matched {
Some(fragment) => {
res.bindings.inner.insert(name.clone(), Binding::Fragment(fragment));
}
None if match_err.is_none() => res.bindings.push_optional(name),
_ => {}
}
if let Some(err) = match_err {
res.add_err(err);
}
}
Op::Repeat { tokens: subtree, kind, separator } => {
match_repeat(res, subtree, *kind, separator, src);
None => break,
}
}
let op = match item.dot.peek() {
None => {
// We are at or past the end of the matcher of `item`.
if item.up.is_some() {
if item.sep_parsed.is_none() {
// Get the `up` matcher
let mut new_pos = *item.up.clone().unwrap();
// Add matches from this repetition to the `matches` of `up`
if let Some(bindings) = new_pos.bindings.last_mut() {
for (i, b) in item.bindings.iter_mut().enumerate() {
bindings.push_nested(i, b.clone()).unwrap();
}
}
// Move the "dot" past the repetition in `up`
new_pos.dot.next();
new_pos.is_error = new_pos.is_error || item.is_error;
cur_items.push(new_pos);
}
// Check if we need a separator.
// We check the separator one by one
let sep_idx = *item.sep_parsed.as_ref().unwrap_or(&0);
let sep_len = item.sep.as_ref().map_or(0, Separator::tt_count);
if item.sep.is_some() && sep_idx != sep_len {
let sep = item.sep.as_ref().unwrap();
if src.clone().expect_separator(&sep, sep_idx) {
item.dot.next();
item.sep_parsed = Some(sep_idx + 1);
try_push!(next_items, item);
}
}
// We don't need a separator. Move the "dot" back to the beginning of the matcher
// and try to match again UNLESS we are only allowed to have _one_ repetition.
else if item.sep_kind != Some(RepeatKind::ZeroOrOne) {
item.dot = item.dot.reset();
item.sep_parsed = None;
item.bindings.push(Bindings::default());
cur_items.push(item);
}
} else {
// If we are not in a repetition, then being at the end of a matcher means that we have
// reached the potential end of the input.
try_push!(eof_items, item);
}
continue;
}
Some(it) => it,
};
// We are in the middle of a matcher.
match op {
OpDelimited::Op(Op::Repeat { tokens, kind, separator }) => {
if matches!(kind, RepeatKind::ZeroOrMore | RepeatKind::ZeroOrOne) {
let mut new_item = item.clone();
new_item.dot.next();
let mut vars = Vec::new();
let bindings = new_item.bindings.last_mut().unwrap();
collect_vars(&mut vars, tokens);
for var in vars {
bindings.push_empty(&var);
}
cur_items.push(new_item);
}
cur_items.push(MatchState {
dot: tokens.iter_delimited(None),
stack: Default::default(),
up: Some(Box::new(item)),
sep: separator.clone(),
sep_kind: Some(*kind),
sep_parsed: None,
bindings: smallvec![Bindings::default()],
meta_result: None,
is_error: false,
})
}
OpDelimited::Op(Op::Subtree { tokens, delimiter }) => {
if let Ok(subtree) = src.clone().expect_subtree() {
if subtree.delimiter_kind() == delimiter.map(|it| it.kind) {
item.stack.push(item.dot);
item.dot = tokens.iter_delimited(delimiter.as_ref());
cur_items.push(item);
}
}
}
OpDelimited::Op(Op::Var { kind, name, .. }) => {
if let Some(kind) = kind {
let mut fork = src.clone();
let match_res = match_meta_var(kind.as_str(), &mut fork);
match match_res.err {
None => {
// Some meta variables are optional (e.g. vis)
if match_res.value.is_some() {
item.meta_result = Some((fork, match_res));
try_push!(bb_items, item);
} else {
item.bindings.last_mut().unwrap().push_optional(name);
item.dot.next();
cur_items.push(item);
}
}
Some(err) => {
res.add_err(err);
match match_res.value {
Some(fragment) => {
item.bindings
.last_mut()
.unwrap()
.inner
.push((name.clone(), Binding::Fragment(fragment)));
}
_ => {}
}
item.is_error = true;
error_items.push(item);
}
}
}
}
OpDelimited::Op(Op::Leaf(leaf)) => {
if let Err(err) = match_leaf(&leaf, &mut src.clone()) {
res.add_err(err);
item.is_error = true;
} else {
item.dot.next();
}
try_push!(next_items, item);
}
OpDelimited::Open => {
if matches!(src.clone().next(), Some(tt::TokenTree::Subtree(..))) {
item.dot.next();
try_push!(next_items, item);
}
}
OpDelimited::Close => {
let is_delim_closed = src.peek_n(0).is_none() && !stack.is_empty();
if is_delim_closed {
item.dot.next();
try_push!(next_items, item);
}
}
}
}
}
fn match_loop(pattern: &MetaTemplate, src: &tt::Subtree) -> Match {
let mut src = TtIter::new(src);
let mut stack: SmallVec<[TtIter; 1]> = SmallVec::new();
let mut res = Match::default();
let mut error_reover_item = None;
let mut cur_items = smallvec![MatchState {
dot: pattern.iter_delimited(None),
stack: Default::default(),
up: None,
sep: None,
sep_kind: None,
sep_parsed: None,
bindings: smallvec![Bindings::default()],
is_error: false,
meta_result: None,
}];
let mut next_items = vec![];
loop {
let mut bb_items = SmallVec::new();
let mut eof_items = SmallVec::new();
let mut error_items = SmallVec::new();
stdx::always!(next_items.is_empty());
match_loop_inner(
src.clone(),
&stack,
&mut res,
&mut cur_items,
&mut bb_items,
&mut next_items,
&mut eof_items,
&mut error_items,
);
stdx::always!(cur_items.is_empty());
if error_items.len() > 0 {
error_reover_item = error_items.pop();
} else if eof_items.len() > 0 {
error_reover_item = Some(eof_items[0].clone());
}
// We need to do some post processing after the `match_loop_inner`.
// If we reached the EOF, check that there is EXACTLY ONE possible matcher. Otherwise,
// either the parse is ambiguous (which should never happen) or there is a syntax error.
if src.peek_n(0).is_none() && stack.is_empty() {
if eof_items.len() == 1 {
// remove all errors, because it is the correct answer !
res = Match::default();
res.bindings = eof_items[0].bindings[0].clone();
} else {
// Error recovery
if error_reover_item.is_some() {
res.bindings = error_reover_item.unwrap().bindings[0].clone();
}
res.add_err(ExpandError::UnexpectedToken);
}
return res;
}
// If there are no possible next positions AND we aren't waiting for the black-box parser,
// then there is a syntax error.
//
// Another possibility is that we need to call out to parse some rust nonterminal
// (black-box) parser. However, if there is not EXACTLY ONE of these, something is wrong.
if (bb_items.is_empty() && next_items.is_empty())
|| (!bb_items.is_empty() && !next_items.is_empty())
|| bb_items.len() > 1
{
res.unmatched_tts += src.len();
while let Some(it) = stack.pop() {
src = it;
res.unmatched_tts += src.len();
}
res.add_err(err!("leftover tokens"));
if let Some(mut error_reover_item) = error_reover_item {
res.bindings = error_reover_item.bindings.remove(0);
}
return res;
}
// Dump all possible `next_items` into `cur_items` for the next iteration.
else if !next_items.is_empty() {
// Now process the next token
cur_items.extend(next_items.drain(..));
match src.next() {
Some(tt::TokenTree::Subtree(subtree)) => {
stack.push(src.clone());
src = TtIter::new(subtree);
}
None if !stack.is_empty() => src = stack.pop().unwrap(),
_ => (),
}
}
// Finally, we have the case where we need to call the black-box parser to get some
// nonterminal.
else {
stdx::always!(bb_items.len() == 1);
let mut item = bb_items.pop().unwrap();
if let Some(OpDelimited::Op(Op::Var { name, .. })) = item.dot.peek() {
let (iter, match_res) = item.meta_result.take().unwrap();
let bindings = item.bindings.last_mut().unwrap();
match match_res.value {
Some(fragment) => {
bindings.inner.push((name.clone(), Binding::Fragment(fragment)));
}
None if match_res.err.is_none() => bindings.push_optional(name),
_ => {}
}
if let Some(err) = match_res.err {
res.add_err(err);
}
src = iter.clone();
item.dot.next();
} else {
unreachable!()
}
cur_items.push(item);
}
stdx::always!(!cur_items.is_empty());
}
}
@ -173,73 +544,6 @@ fn match_leaf(lhs: &tt::Leaf, src: &mut TtIter) -> Result<(), ExpandError> {
Ok(())
}
fn match_repeat(
res: &mut Match,
pattern: &MetaTemplate,
kind: RepeatKind,
separator: &Option<Separator>,
src: &mut TtIter,
) {
// Dirty hack to make macro-expansion terminate.
// This should be replaced by a proper macro-by-example implementation
let mut limit = 65536;
let mut counter = 0;
for i in 0.. {
let mut fork = src.clone();
if let Some(separator) = &separator {
if i != 0 && !fork.eat_separator(separator) {
break;
}
}
let mut nested = Match::default();
match_tokens(&mut nested, pattern, &mut fork);
if nested.err.is_none() {
limit -= 1;
if limit == 0 {
log::warn!(
"match_lhs exceeded repeat pattern limit => {:#?}\n{:#?}\n{:#?}\n{:#?}",
pattern,
src,
kind,
separator
);
break;
}
*src = fork;
if let Err(err) = res.bindings.push_nested(counter, nested.bindings) {
res.add_err(err);
}
counter += 1;
if counter == 1 {
if let RepeatKind::ZeroOrOne = kind {
break;
}
}
} else {
break;
}
}
match (kind, counter) {
(RepeatKind::OneOrMore, 0) => {
res.add_err(ExpandError::UnexpectedToken);
}
(_, 0) => {
// Collect all empty variables in subtrees
let mut vars = Vec::new();
collect_vars(&mut vars, pattern);
for var in vars {
res.bindings.push_empty(&var)
}
}
_ => (),
}
}
fn match_meta_var(kind: &str, input: &mut TtIter) -> ExpandResult<Option<Fragment>> {
let fragment = match kind {
"path" => Path,
@ -303,14 +607,14 @@ fn collect_vars(buf: &mut Vec<SmolStr>, pattern: &MetaTemplate) {
}
impl<'a> TtIter<'a> {
fn eat_separator(&mut self, separator: &Separator) -> bool {
fn expect_separator(&mut self, separator: &Separator, idx: usize) -> bool {
let mut fork = self.clone();
let ok = match separator {
Separator::Ident(lhs) => match fork.expect_ident() {
Separator::Ident(lhs) if idx == 0 => match fork.expect_ident() {
Ok(rhs) => rhs.text == lhs.text,
_ => false,
},
Separator::Literal(lhs) => match fork.expect_literal() {
Separator::Literal(lhs) if idx == 0 => match fork.expect_literal() {
Ok(rhs) => match rhs {
tt::Leaf::Literal(rhs) => rhs.text == lhs.text,
tt::Leaf::Ident(rhs) => rhs.text == lhs.text,
@ -318,10 +622,11 @@ impl<'a> TtIter<'a> {
},
_ => false,
},
Separator::Puncts(lhss) => lhss.iter().all(|lhs| match fork.expect_punct() {
Ok(rhs) => rhs.char == lhs.char,
Separator::Puncts(lhss) if idx < lhss.len() => match fork.expect_punct() {
Ok(rhs) => rhs.char == lhss[idx].char,
_ => false,
}),
},
_ => false,
};
if ok {
*self = fork;

View file

@ -13,13 +13,17 @@ use crate::{
impl Bindings {
fn contains(&self, name: &str) -> bool {
self.inner.contains_key(name)
self.inner.iter().any(|(n, _)| n == name)
}
fn get(&self, name: &str, nesting: &mut [NestingState]) -> Result<&Fragment, ExpandError> {
let mut b = self.inner.get(name).ok_or_else(|| {
ExpandError::BindingError(format!("could not find binding `{}`", name))
})?;
let mut b: &Binding = self
.inner
.iter()
.find_map(|(n, b)| if n == name { Some(b) } else { None })
.ok_or_else(|| {
ExpandError::BindingError(format!("could not find binding `{}`", name))
})?;
for nesting_state in nesting.iter_mut() {
nesting_state.hit = true;
b = match b {

View file

@ -21,7 +21,7 @@ use test_utils::mark;
pub use tt::{Delimiter, DelimiterKind, Punct};
use crate::{
parser::{parse_pattern, parse_template, Op},
parser::{parse_pattern, parse_template, MetaTemplate, Op},
tt_iter::TtIter,
};
@ -94,15 +94,6 @@ struct Rule {
rhs: MetaTemplate,
}
#[derive(Clone, Debug, PartialEq, Eq)]
struct MetaTemplate(Vec<Op>);
impl<'a> MetaTemplate {
fn iter(&self) -> impl Iterator<Item = &Op> {
self.0.iter()
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
struct Shift(u32);

View file

@ -5,7 +5,75 @@ use smallvec::SmallVec;
use syntax::SmolStr;
use tt::Delimiter;
use crate::{tt_iter::TtIter, MetaTemplate, ParseError};
use crate::{tt_iter::TtIter, ParseError};
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct MetaTemplate(pub(crate) Vec<Op>);
#[derive(Debug, Clone, Copy)]
pub(crate) enum OpDelimited<'a> {
Op(&'a Op),
Open,
Close,
}
#[derive(Debug, Clone, Copy)]
pub(crate) struct OpDelimitedIter<'a> {
inner: &'a Vec<Op>,
delimited: Option<&'a Delimiter>,
idx: usize,
}
impl<'a> OpDelimitedIter<'a> {
pub(crate) fn is_eof(&self) -> bool {
let len = self.inner.len() + if self.delimited.is_some() { 2 } else { 0 };
self.idx >= len
}
pub(crate) fn peek(&self) -> Option<OpDelimited<'a>> {
match self.delimited {
None => self.inner.get(self.idx).map(OpDelimited::Op),
Some(_) => match self.idx {
0 => Some(OpDelimited::Open),
i if i == self.inner.len() + 1 => Some(OpDelimited::Close),
i => self.inner.get(i - 1).map(OpDelimited::Op),
},
}
}
pub(crate) fn reset(&self) -> Self {
Self { inner: &self.inner, idx: 0, delimited: self.delimited }
}
}
impl<'a> Iterator for OpDelimitedIter<'a> {
type Item = OpDelimited<'a>;
fn next(&mut self) -> Option<Self::Item> {
let res = self.peek();
self.idx += 1;
res
}
fn size_hint(&self) -> (usize, Option<usize>) {
let len = self.inner.len() + if self.delimited.is_some() { 2 } else { 0 };
let remain = len.checked_sub(self.idx).unwrap_or(0);
(remain, Some(remain))
}
}
impl<'a> MetaTemplate {
pub(crate) fn iter(&self) -> impl Iterator<Item = &Op> {
self.0.iter()
}
pub(crate) fn iter_delimited(
&'a self,
delimited: Option<&'a Delimiter>,
) -> OpDelimitedIter<'a> {
OpDelimitedIter { inner: &self.0, idx: 0, delimited }
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) enum Op {
@ -47,6 +115,16 @@ impl PartialEq for Separator {
}
}
impl Separator {
pub(crate) fn tt_count(&self) -> usize {
match self {
Separator::Literal(_) => 1,
Separator::Ident(_) => 1,
Separator::Puncts(it) => it.len(),
}
}
}
pub(crate) fn parse_template(template: &tt::Subtree) -> Result<Vec<Op>, ParseError> {
parse_inner(&template, Mode::Template).into_iter().collect()
}

View file

@ -456,6 +456,17 @@ fn test_match_group_with_multichar_sep() {
.assert_expand_items("foo! (fn baz {true true} );", "fn baz () -> bool {true &&true}");
}
#[test]
fn test_match_group_with_multichar_sep2() {
parse_macro(
r#"
macro_rules! foo {
(fn $name:ident {$($i:literal)&&*} ) => ( fn $name() -> bool { $($i)&&*} );
}"#,
)
.assert_expand_items("foo! (fn baz {true && true} );", "fn baz () -> bool {true &&true}");
}
#[test]
fn test_match_group_zero_match() {
parse_macro(
@ -1267,6 +1278,18 @@ macro_rules! m {
.is_some());
}
#[test]
fn test_match_is_not_greedy() {
parse_macro(
r#"
macro_rules! foo {
($($i:ident $(,)*),*) => {};
}
"#,
)
.assert_expand_items(r#"foo!(a,b);"#, r#""#);
}
// The following tests are based on real world situations
#[test]
fn test_vec() {