From cff2201c30bda7b346e3b47875d95a2cf9cafaa3 Mon Sep 17 00:00:00 2001 From: Edwin Cheng Date: Tue, 2 Feb 2021 04:42:37 +0800 Subject: [PATCH] NFA parser for mbe matcher --- Cargo.lock | 1 + crates/mbe/Cargo.toml | 1 + crates/mbe/src/benchmark.rs | 40 +- crates/mbe/src/expander.rs | 16 +- crates/mbe/src/expander/matcher.rs | 583 +++++++++++++++++++------ crates/mbe/src/expander/transcriber.rs | 12 +- crates/mbe/src/lib.rs | 11 +- crates/mbe/src/parser.rs | 80 +++- crates/mbe/src/tests.rs | 23 + 9 files changed, 592 insertions(+), 175 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dbd7a746eb..3d2bf8f057 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -878,6 +878,7 @@ dependencies = [ "profile", "rustc-hash", "smallvec", + "stdx", "syntax", "test_utils", "tt", diff --git a/crates/mbe/Cargo.toml b/crates/mbe/Cargo.toml index 0abba35845..46f11ad384 100644 --- a/crates/mbe/Cargo.toml +++ b/crates/mbe/Cargo.toml @@ -18,6 +18,7 @@ syntax = { path = "../syntax", version = "0.0.0" } parser = { path = "../parser", version = "0.0.0" } tt = { path = "../tt", version = "0.0.0" } test_utils = { path = "../test_utils", version = "0.0.0" } +stdx = { path = "../stdx", version = "0.0.0" } [dev-dependencies] profile = { path = "../profile", version = "0.0.0" } diff --git a/crates/mbe/src/benchmark.rs b/crates/mbe/src/benchmark.rs index 0d0acd5897..13db5eef18 100644 --- a/crates/mbe/src/benchmark.rs +++ b/crates/mbe/src/benchmark.rs @@ -40,18 +40,12 @@ fn benchmark_expand_macro_rules() { .into_iter() .map(|(id, tt)| { let res = rules[&id].expand(&tt); - if res.err.is_some() { - // FIXME: - // Currently `invocation_fixtures` will generate some correct invocations but - // cannot be expanded by mbe. We ignore errors here. - // See: https://github.com/rust-analyzer/rust-analyzer/issues/4777 - eprintln!("err from {} {:?}", id, res.err); - } + assert!(res.err.is_none()); res.value.token_trees.len() }) .sum() }; - assert_eq!(hash, 66995); + assert_eq!(hash, 69413); } fn macro_rules_fixtures() -> FxHashMap { @@ -77,7 +71,7 @@ fn macro_rules_fixtures_tt() -> FxHashMap { .collect() } -// Generate random invocation fixtures from rules +/// Generate random invocation fixtures from rules fn invocation_fixtures(rules: &FxHashMap) -> Vec<(String, tt::Subtree)> { let mut seed = 123456789; let mut res = Vec::new(); @@ -86,11 +80,31 @@ fn invocation_fixtures(rules: &FxHashMap) -> Vec<(String, tt for rule in &it.rules { // Generate twice for _ in 0..2 { - let mut subtree = tt::Subtree::default(); - for op in rule.lhs.iter() { - collect_from_op(op, &mut subtree, &mut seed); + // The input are generated by filling the `Op` randomly. + // However, there are some cases generated are ambiguous for expanding, for example: + // ```rust + // macro_rules! m { + // ($($t:ident),* as $ty:ident) => {} + // } + // m!(as u32); // error: local ambiguity: multiple parsing options: built-in NTs ident ('t') or 1 other option. + // ``` + // + // So we just skip any error cases and try again + let mut try_cnt = 0; + loop { + let mut subtree = tt::Subtree::default(); + for op in rule.lhs.iter() { + collect_from_op(op, &mut subtree, &mut seed); + } + if it.expand(&subtree).err.is_none() { + res.push((name.clone(), subtree)); + break; + } + try_cnt += 1; + if try_cnt > 100 { + panic!("invocaton fixture {} cannot be generated.\n", name); + } } - res.push((name.clone(), subtree)); } } } diff --git a/crates/mbe/src/expander.rs b/crates/mbe/src/expander.rs index e7e14b3cce..2efff8f52e 100644 --- a/crates/mbe/src/expander.rs +++ b/crates/mbe/src/expander.rs @@ -5,7 +5,7 @@ mod matcher; mod transcriber; -use rustc_hash::FxHashMap; +use smallvec::SmallVec; use syntax::SmolStr; use crate::{ExpandError, ExpandResult}; @@ -28,10 +28,10 @@ pub(crate) fn expand_rules( return ExpandResult::ok(value); } } - // Use the rule if we matched more tokens, or had fewer errors + // Use the rule if we matched more tokens, or bound variables count if let Some((prev_match, _)) = &match_ { - if (new_match.unmatched_tts, new_match.err_count) - < (prev_match.unmatched_tts, prev_match.err_count) + if (new_match.unmatched_tts, -(new_match.bound_count as i32)) + < (prev_match.unmatched_tts, -(prev_match.bound_count as i32)) { match_ = Some((new_match, rule)); } @@ -94,19 +94,19 @@ pub(crate) fn expand_rules( /// In other words, `Bindings` is a *multi* mapping from `SmolStr` to /// `tt::TokenTree`, where the index to select a particular `TokenTree` among /// many is not a plain `usize`, but an `&[usize]`. -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, PartialEq, Eq)] struct Bindings { - inner: FxHashMap, + inner: SmallVec<[(SmolStr, Binding); 4]>, } -#[derive(Debug)] +#[derive(Debug, Clone, PartialEq, Eq)] enum Binding { Fragment(Fragment), Nested(Vec), Empty, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] enum Fragment { /// token fragments are just copy-pasted into the output Tokens(tt::TokenTree), diff --git a/crates/mbe/src/expander/matcher.rs b/crates/mbe/src/expander/matcher.rs index e3bd4c09a0..9d3d280552 100644 --- a/crates/mbe/src/expander/matcher.rs +++ b/crates/mbe/src/expander/matcher.rs @@ -1,14 +1,74 @@ -//! FIXME: write short doc here +//! An NFA-based parser, which is porting from rustc mbe parsing code +//! +//! See https://github.com/rust-lang/rust/blob/70b18bc2cbac4712020019f5bf57c00905373205/compiler/rustc_expand/src/mbe/macro_parser.rs +//! Here is a quick intro to how the parser works, copied from rustc: +//! +//! A 'position' is a dot in the middle of a matcher, usually represented as a +//! dot. For example `· a $( a )* a b` is a position, as is `a $( · a )* a b`. +//! +//! The parser walks through the input a character at a time, maintaining a list +//! of threads consistent with the current position in the input string: `cur_items`. +//! +//! As it processes them, it fills up `eof_items` with threads that would be valid if +//! the macro invocation is now over, `bb_items` with threads that are waiting on +//! a Rust non-terminal like `$e:expr`, and `next_items` with threads that are waiting +//! on a particular token. Most of the logic concerns moving the · through the +//! repetitions indicated by Kleene stars. The rules for moving the · without +//! consuming any input are called epsilon transitions. It only advances or calls +//! out to the real Rust parser when no `cur_items` threads remain. +//! +//! Example: +//! +//! ```text, ignore +//! Start parsing a a a a b against [· a $( a )* a b]. +//! +//! Remaining input: a a a a b +//! next: [· a $( a )* a b] +//! +//! - - - Advance over an a. - - - +//! +//! Remaining input: a a a b +//! cur: [a · $( a )* a b] +//! Descend/Skip (first item). +//! next: [a $( · a )* a b] [a $( a )* · a b]. +//! +//! - - - Advance over an a. - - - +//! +//! Remaining input: a a b +//! cur: [a $( a · )* a b] [a $( a )* a · b] +//! Follow epsilon transition: Finish/Repeat (first item) +//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] +//! +//! - - - Advance over an a. - - - (this looks exactly like the last step) +//! +//! Remaining input: a b +//! cur: [a $( a · )* a b] [a $( a )* a · b] +//! Follow epsilon transition: Finish/Repeat (first item) +//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] +//! +//! - - - Advance over an a. - - - (this looks exactly like the last step) +//! +//! Remaining input: b +//! cur: [a $( a · )* a b] [a $( a )* a · b] +//! Follow epsilon transition: Finish/Repeat (first item) +//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] +//! +//! - - - Advance over a b. - - - +//! +//! Remaining input: '' +//! eof: [a $( a )* a b ·] +//! ``` use crate::{ expander::{Binding, Bindings, Fragment}, - parser::{Op, RepeatKind, Separator}, + parser::{Op, OpDelimited, OpDelimitedIter, RepeatKind, Separator}, tt_iter::TtIter, ExpandError, MetaTemplate, }; use super::ExpandResult; use parser::FragmentKind::*; +use smallvec::{smallvec, SmallVec}; use syntax::SmolStr; impl Bindings { @@ -16,19 +76,19 @@ impl Bindings { // FIXME: Do we have a better way to represent an empty token ? // Insert an empty subtree for empty token let tt = tt::Subtree::default().into(); - self.inner.insert(name.clone(), Binding::Fragment(Fragment::Tokens(tt))); + self.inner.push((name.clone(), Binding::Fragment(Fragment::Tokens(tt)))); } fn push_empty(&mut self, name: &SmolStr) { - self.inner.insert(name.clone(), Binding::Empty); + self.inner.push((name.clone(), Binding::Empty)); } fn push_nested(&mut self, idx: usize, nested: Bindings) -> Result<(), ExpandError> { for (key, value) in nested.inner { - if !self.inner.contains_key(&key) { - self.inner.insert(key.clone(), Binding::Nested(Vec::new())); + if self.get_mut(&key).is_none() { + self.inner.push((key.clone(), Binding::Nested(Vec::new()))); } - match self.inner.get_mut(&key) { + match self.get_mut(&key) { Some(Binding::Nested(it)) => { // insert empty nested bindings before this one while it.len() < idx { @@ -46,6 +106,14 @@ impl Bindings { } Ok(()) } + + fn get_mut(&mut self, name: &str) -> Option<&mut Binding> { + self.inner.iter_mut().find_map(|(n, b)| if n == name { Some(b) } else { None }) + } + + fn bindings(&self) -> impl Iterator { + self.inner.iter().map(|(_, b)| b) + } } macro_rules! err { @@ -57,7 +125,7 @@ macro_rules! err { }; } -#[derive(Debug, Default)] +#[derive(Clone, Debug, Default, PartialEq, Eq)] pub(super) struct Match { pub(super) bindings: Bindings, /// We currently just keep the first error and count the rest to compare matches. @@ -65,6 +133,8 @@ pub(super) struct Match { pub(super) err_count: usize, /// How many top-level token trees were left to match. pub(super) unmatched_tts: usize, + /// The number of bound variables + pub(super) bound_count: usize, } impl Match { @@ -76,72 +146,373 @@ impl Match { } /// Matching errors are added to the `Match`. -pub(super) fn match_(pattern: &MetaTemplate, src: &tt::Subtree) -> Match { - let mut res = Match::default(); - let mut src = TtIter::new(src); +pub(super) fn match_(pattern: &MetaTemplate, input: &tt::Subtree) -> Match { + let mut res = match_loop(pattern, &input); + res.bound_count = count(res.bindings.bindings()); + return res; - match_tokens(&mut res, pattern, &mut src); - - if src.len() > 0 { - res.unmatched_tts += src.len(); - res.add_err(err!("leftover tokens")); + fn count<'a>(bindings: impl Iterator) -> usize { + bindings + .map(|it| match it { + Binding::Fragment(_) => 1, + Binding::Empty => 1, + Binding::Nested(it) => count(it.iter()), + }) + .sum() } - - res } -fn match_tokens(res: &mut Match, pattern: &MetaTemplate, src: &mut TtIter) { - for op in pattern.iter() { - match op { - Op::Leaf(lhs) => { - if let Err(err) = match_leaf(lhs, src) { - res.add_err(err); - continue; - } +#[derive(Debug, Clone)] +struct MatchState<'t> { + /// The position of the "dot" in this matcher + dot: OpDelimitedIter<'t>, + + /// Token subtree stack + /// When matching against matchers with nested delimited submatchers (e.g., `pat ( pat ( .. ) + /// pat ) pat`), we need to keep track of the matchers we are descending into. This stack does + /// that where the bottom of the stack is the outermost matcher. + stack: SmallVec<[OpDelimitedIter<'t>; 4]>, + + /// The "parent" matcher position if we are in a repetition. That is, the matcher position just + /// before we enter the repetition. + up: Option>>, + + /// The separator if we are in a repetition. + sep: Option, + + /// The KleeneOp of this sequence if we are in a repetition. + sep_kind: Option, + + /// Number of tokens of seperator parsed + sep_parsed: Option, + + /// Matched meta variables bindings + bindings: SmallVec<[Bindings; 4]>, + + /// Cached result of meta variable parsing + meta_result: Option<(TtIter<'t>, ExpandResult>)>, + + /// Is error occuried in this state, will `poised` to "parent" + is_error: bool, +} + +/// Process the matcher positions of `cur_items` until it is empty. In the process, this will +/// produce more items in `next_items`, `eof_items`, and `bb_items`. +/// +/// For more info about the how this happens, see the module-level doc comments and the inline +/// comments of this function. +/// +/// # Parameters +/// +/// - `src`: the current token of the parser. +/// - `stack`: the "parent" frames of the token tree +/// - `res`: the match result to store errors +/// - `cur_items`: the set of current items to be processed. This should be empty by the end of a +/// successful execution of this function. +/// - `next_items`: the set of newly generated items. These are used to replenish `cur_items` in +/// the function `parse`. +/// - `eof_items`: the set of items that would be valid if this was the EOF. +/// - `bb_items`: the set of items that are waiting for the black-box parser. +/// - `error_items`: the set of items in errors, used for error-resilient parsing +fn match_loop_inner<'t>( + src: TtIter<'t>, + stack: &[TtIter<'t>], + res: &mut Match, + cur_items: &mut SmallVec<[MatchState<'t>; 1]>, + bb_items: &mut SmallVec<[MatchState<'t>; 1]>, + next_items: &mut Vec>, + eof_items: &mut SmallVec<[MatchState<'t>; 1]>, + error_items: &mut SmallVec<[MatchState<'t>; 1]>, +) { + macro_rules! try_push { + ($items: expr, $it:expr) => { + if $it.is_error { + error_items.push($it); + } else { + $items.push($it); } - Op::Subtree { tokens, delimiter: delim } => { - let rhs = match src.expect_subtree() { - Ok(s) => s, - Err(()) => { - res.add_err(err!("expected subtree")); - continue; - } - }; - if delim.map(|it| it.kind) != rhs.delimiter_kind() { - res.add_err(err!("mismatched delimiter")); - continue; + }; + } + + while let Some(mut item) = cur_items.pop() { + while item.dot.is_eof() { + match item.stack.pop() { + Some(frame) => { + item.dot = frame; + item.dot.next(); } - let mut src = TtIter::new(rhs); - match_tokens(res, tokens, &mut src); - if src.len() > 0 { - res.add_err(err!("leftover tokens")); - } - } - Op::Var { name, kind, .. } => { - let kind = match kind { - Some(k) => k, - None => { - res.add_err(ExpandError::UnexpectedToken); - continue; - } - }; - let ExpandResult { value: matched, err: match_err } = - match_meta_var(kind.as_str(), src); - match matched { - Some(fragment) => { - res.bindings.inner.insert(name.clone(), Binding::Fragment(fragment)); - } - None if match_err.is_none() => res.bindings.push_optional(name), - _ => {} - } - if let Some(err) = match_err { - res.add_err(err); - } - } - Op::Repeat { tokens: subtree, kind, separator } => { - match_repeat(res, subtree, *kind, separator, src); + None => break, } } + let op = match item.dot.peek() { + None => { + // We are at or past the end of the matcher of `item`. + if item.up.is_some() { + if item.sep_parsed.is_none() { + // Get the `up` matcher + let mut new_pos = *item.up.clone().unwrap(); + // Add matches from this repetition to the `matches` of `up` + if let Some(bindings) = new_pos.bindings.last_mut() { + for (i, b) in item.bindings.iter_mut().enumerate() { + bindings.push_nested(i, b.clone()).unwrap(); + } + } + // Move the "dot" past the repetition in `up` + new_pos.dot.next(); + new_pos.is_error = new_pos.is_error || item.is_error; + cur_items.push(new_pos); + } + + // Check if we need a separator. + // We check the separator one by one + let sep_idx = *item.sep_parsed.as_ref().unwrap_or(&0); + let sep_len = item.sep.as_ref().map_or(0, Separator::tt_count); + if item.sep.is_some() && sep_idx != sep_len { + let sep = item.sep.as_ref().unwrap(); + if src.clone().expect_separator(&sep, sep_idx) { + item.dot.next(); + item.sep_parsed = Some(sep_idx + 1); + try_push!(next_items, item); + } + } + // We don't need a separator. Move the "dot" back to the beginning of the matcher + // and try to match again UNLESS we are only allowed to have _one_ repetition. + else if item.sep_kind != Some(RepeatKind::ZeroOrOne) { + item.dot = item.dot.reset(); + item.sep_parsed = None; + item.bindings.push(Bindings::default()); + cur_items.push(item); + } + } else { + // If we are not in a repetition, then being at the end of a matcher means that we have + // reached the potential end of the input. + try_push!(eof_items, item); + } + continue; + } + Some(it) => it, + }; + + // We are in the middle of a matcher. + match op { + OpDelimited::Op(Op::Repeat { tokens, kind, separator }) => { + if matches!(kind, RepeatKind::ZeroOrMore | RepeatKind::ZeroOrOne) { + let mut new_item = item.clone(); + new_item.dot.next(); + let mut vars = Vec::new(); + let bindings = new_item.bindings.last_mut().unwrap(); + collect_vars(&mut vars, tokens); + for var in vars { + bindings.push_empty(&var); + } + cur_items.push(new_item); + } + cur_items.push(MatchState { + dot: tokens.iter_delimited(None), + stack: Default::default(), + up: Some(Box::new(item)), + sep: separator.clone(), + sep_kind: Some(*kind), + sep_parsed: None, + bindings: smallvec![Bindings::default()], + meta_result: None, + is_error: false, + }) + } + OpDelimited::Op(Op::Subtree { tokens, delimiter }) => { + if let Ok(subtree) = src.clone().expect_subtree() { + if subtree.delimiter_kind() == delimiter.map(|it| it.kind) { + item.stack.push(item.dot); + item.dot = tokens.iter_delimited(delimiter.as_ref()); + cur_items.push(item); + } + } + } + OpDelimited::Op(Op::Var { kind, name, .. }) => { + if let Some(kind) = kind { + let mut fork = src.clone(); + let match_res = match_meta_var(kind.as_str(), &mut fork); + match match_res.err { + None => { + // Some meta variables are optional (e.g. vis) + if match_res.value.is_some() { + item.meta_result = Some((fork, match_res)); + try_push!(bb_items, item); + } else { + item.bindings.last_mut().unwrap().push_optional(name); + item.dot.next(); + cur_items.push(item); + } + } + Some(err) => { + res.add_err(err); + match match_res.value { + Some(fragment) => { + item.bindings + .last_mut() + .unwrap() + .inner + .push((name.clone(), Binding::Fragment(fragment))); + } + _ => {} + } + item.is_error = true; + error_items.push(item); + } + } + } + } + OpDelimited::Op(Op::Leaf(leaf)) => { + if let Err(err) = match_leaf(&leaf, &mut src.clone()) { + res.add_err(err); + item.is_error = true; + } else { + item.dot.next(); + } + try_push!(next_items, item); + } + OpDelimited::Open => { + if matches!(src.clone().next(), Some(tt::TokenTree::Subtree(..))) { + item.dot.next(); + try_push!(next_items, item); + } + } + OpDelimited::Close => { + let is_delim_closed = src.peek_n(0).is_none() && !stack.is_empty(); + if is_delim_closed { + item.dot.next(); + try_push!(next_items, item); + } + } + } + } +} + +fn match_loop(pattern: &MetaTemplate, src: &tt::Subtree) -> Match { + let mut src = TtIter::new(src); + let mut stack: SmallVec<[TtIter; 1]> = SmallVec::new(); + let mut res = Match::default(); + let mut error_reover_item = None; + + let mut cur_items = smallvec![MatchState { + dot: pattern.iter_delimited(None), + stack: Default::default(), + up: None, + sep: None, + sep_kind: None, + sep_parsed: None, + bindings: smallvec![Bindings::default()], + is_error: false, + meta_result: None, + }]; + + let mut next_items = vec![]; + + loop { + let mut bb_items = SmallVec::new(); + let mut eof_items = SmallVec::new(); + let mut error_items = SmallVec::new(); + + stdx::always!(next_items.is_empty()); + + match_loop_inner( + src.clone(), + &stack, + &mut res, + &mut cur_items, + &mut bb_items, + &mut next_items, + &mut eof_items, + &mut error_items, + ); + stdx::always!(cur_items.is_empty()); + + if error_items.len() > 0 { + error_reover_item = error_items.pop(); + } else if eof_items.len() > 0 { + error_reover_item = Some(eof_items[0].clone()); + } + + // We need to do some post processing after the `match_loop_inner`. + // If we reached the EOF, check that there is EXACTLY ONE possible matcher. Otherwise, + // either the parse is ambiguous (which should never happen) or there is a syntax error. + if src.peek_n(0).is_none() && stack.is_empty() { + if eof_items.len() == 1 { + // remove all errors, because it is the correct answer ! + res = Match::default(); + res.bindings = eof_items[0].bindings[0].clone(); + } else { + // Error recovery + if error_reover_item.is_some() { + res.bindings = error_reover_item.unwrap().bindings[0].clone(); + } + res.add_err(ExpandError::UnexpectedToken); + } + return res; + } + + // If there are no possible next positions AND we aren't waiting for the black-box parser, + // then there is a syntax error. + // + // Another possibility is that we need to call out to parse some rust nonterminal + // (black-box) parser. However, if there is not EXACTLY ONE of these, something is wrong. + if (bb_items.is_empty() && next_items.is_empty()) + || (!bb_items.is_empty() && !next_items.is_empty()) + || bb_items.len() > 1 + { + res.unmatched_tts += src.len(); + while let Some(it) = stack.pop() { + src = it; + res.unmatched_tts += src.len(); + } + res.add_err(err!("leftover tokens")); + + if let Some(mut error_reover_item) = error_reover_item { + res.bindings = error_reover_item.bindings.remove(0); + } + return res; + } + // Dump all possible `next_items` into `cur_items` for the next iteration. + else if !next_items.is_empty() { + // Now process the next token + cur_items.extend(next_items.drain(..)); + + match src.next() { + Some(tt::TokenTree::Subtree(subtree)) => { + stack.push(src.clone()); + src = TtIter::new(subtree); + } + None if !stack.is_empty() => src = stack.pop().unwrap(), + _ => (), + } + } + // Finally, we have the case where we need to call the black-box parser to get some + // nonterminal. + else { + stdx::always!(bb_items.len() == 1); + let mut item = bb_items.pop().unwrap(); + + if let Some(OpDelimited::Op(Op::Var { name, .. })) = item.dot.peek() { + let (iter, match_res) = item.meta_result.take().unwrap(); + let bindings = item.bindings.last_mut().unwrap(); + match match_res.value { + Some(fragment) => { + bindings.inner.push((name.clone(), Binding::Fragment(fragment))); + } + None if match_res.err.is_none() => bindings.push_optional(name), + _ => {} + } + if let Some(err) = match_res.err { + res.add_err(err); + } + src = iter.clone(); + item.dot.next(); + } else { + unreachable!() + } + cur_items.push(item); + } + stdx::always!(!cur_items.is_empty()); } } @@ -173,73 +544,6 @@ fn match_leaf(lhs: &tt::Leaf, src: &mut TtIter) -> Result<(), ExpandError> { Ok(()) } -fn match_repeat( - res: &mut Match, - pattern: &MetaTemplate, - kind: RepeatKind, - separator: &Option, - src: &mut TtIter, -) { - // Dirty hack to make macro-expansion terminate. - // This should be replaced by a proper macro-by-example implementation - let mut limit = 65536; - let mut counter = 0; - - for i in 0.. { - let mut fork = src.clone(); - - if let Some(separator) = &separator { - if i != 0 && !fork.eat_separator(separator) { - break; - } - } - - let mut nested = Match::default(); - match_tokens(&mut nested, pattern, &mut fork); - if nested.err.is_none() { - limit -= 1; - if limit == 0 { - log::warn!( - "match_lhs exceeded repeat pattern limit => {:#?}\n{:#?}\n{:#?}\n{:#?}", - pattern, - src, - kind, - separator - ); - break; - } - *src = fork; - - if let Err(err) = res.bindings.push_nested(counter, nested.bindings) { - res.add_err(err); - } - counter += 1; - if counter == 1 { - if let RepeatKind::ZeroOrOne = kind { - break; - } - } - } else { - break; - } - } - - match (kind, counter) { - (RepeatKind::OneOrMore, 0) => { - res.add_err(ExpandError::UnexpectedToken); - } - (_, 0) => { - // Collect all empty variables in subtrees - let mut vars = Vec::new(); - collect_vars(&mut vars, pattern); - for var in vars { - res.bindings.push_empty(&var) - } - } - _ => (), - } -} - fn match_meta_var(kind: &str, input: &mut TtIter) -> ExpandResult> { let fragment = match kind { "path" => Path, @@ -303,14 +607,14 @@ fn collect_vars(buf: &mut Vec, pattern: &MetaTemplate) { } impl<'a> TtIter<'a> { - fn eat_separator(&mut self, separator: &Separator) -> bool { + fn expect_separator(&mut self, separator: &Separator, idx: usize) -> bool { let mut fork = self.clone(); let ok = match separator { - Separator::Ident(lhs) => match fork.expect_ident() { + Separator::Ident(lhs) if idx == 0 => match fork.expect_ident() { Ok(rhs) => rhs.text == lhs.text, _ => false, }, - Separator::Literal(lhs) => match fork.expect_literal() { + Separator::Literal(lhs) if idx == 0 => match fork.expect_literal() { Ok(rhs) => match rhs { tt::Leaf::Literal(rhs) => rhs.text == lhs.text, tt::Leaf::Ident(rhs) => rhs.text == lhs.text, @@ -318,10 +622,11 @@ impl<'a> TtIter<'a> { }, _ => false, }, - Separator::Puncts(lhss) => lhss.iter().all(|lhs| match fork.expect_punct() { - Ok(rhs) => rhs.char == lhs.char, + Separator::Puncts(lhss) if idx < lhss.len() => match fork.expect_punct() { + Ok(rhs) => rhs.char == lhss[idx].char, _ => false, - }), + }, + _ => false, }; if ok { *self = fork; diff --git a/crates/mbe/src/expander/transcriber.rs b/crates/mbe/src/expander/transcriber.rs index 78368a33e5..ad9953a7d2 100644 --- a/crates/mbe/src/expander/transcriber.rs +++ b/crates/mbe/src/expander/transcriber.rs @@ -13,13 +13,17 @@ use crate::{ impl Bindings { fn contains(&self, name: &str) -> bool { - self.inner.contains_key(name) + self.inner.iter().any(|(n, _)| n == name) } fn get(&self, name: &str, nesting: &mut [NestingState]) -> Result<&Fragment, ExpandError> { - let mut b = self.inner.get(name).ok_or_else(|| { - ExpandError::BindingError(format!("could not find binding `{}`", name)) - })?; + let mut b: &Binding = self + .inner + .iter() + .find_map(|(n, b)| if n == name { Some(b) } else { None }) + .ok_or_else(|| { + ExpandError::BindingError(format!("could not find binding `{}`", name)) + })?; for nesting_state in nesting.iter_mut() { nesting_state.hit = true; b = match b { diff --git a/crates/mbe/src/lib.rs b/crates/mbe/src/lib.rs index 4c298f85fb..f3d2da55a7 100644 --- a/crates/mbe/src/lib.rs +++ b/crates/mbe/src/lib.rs @@ -21,7 +21,7 @@ use test_utils::mark; pub use tt::{Delimiter, DelimiterKind, Punct}; use crate::{ - parser::{parse_pattern, parse_template, Op}, + parser::{parse_pattern, parse_template, MetaTemplate, Op}, tt_iter::TtIter, }; @@ -94,15 +94,6 @@ struct Rule { rhs: MetaTemplate, } -#[derive(Clone, Debug, PartialEq, Eq)] -struct MetaTemplate(Vec); - -impl<'a> MetaTemplate { - fn iter(&self) -> impl Iterator { - self.0.iter() - } -} - #[derive(Clone, Copy, Debug, PartialEq, Eq)] struct Shift(u32); diff --git a/crates/mbe/src/parser.rs b/crates/mbe/src/parser.rs index f891ec29c6..8671322e1f 100644 --- a/crates/mbe/src/parser.rs +++ b/crates/mbe/src/parser.rs @@ -5,7 +5,75 @@ use smallvec::SmallVec; use syntax::SmolStr; use tt::Delimiter; -use crate::{tt_iter::TtIter, MetaTemplate, ParseError}; +use crate::{tt_iter::TtIter, ParseError}; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) struct MetaTemplate(pub(crate) Vec); + +#[derive(Debug, Clone, Copy)] +pub(crate) enum OpDelimited<'a> { + Op(&'a Op), + Open, + Close, +} + +#[derive(Debug, Clone, Copy)] +pub(crate) struct OpDelimitedIter<'a> { + inner: &'a Vec, + delimited: Option<&'a Delimiter>, + idx: usize, +} + +impl<'a> OpDelimitedIter<'a> { + pub(crate) fn is_eof(&self) -> bool { + let len = self.inner.len() + if self.delimited.is_some() { 2 } else { 0 }; + self.idx >= len + } + + pub(crate) fn peek(&self) -> Option> { + match self.delimited { + None => self.inner.get(self.idx).map(OpDelimited::Op), + Some(_) => match self.idx { + 0 => Some(OpDelimited::Open), + i if i == self.inner.len() + 1 => Some(OpDelimited::Close), + i => self.inner.get(i - 1).map(OpDelimited::Op), + }, + } + } + + pub(crate) fn reset(&self) -> Self { + Self { inner: &self.inner, idx: 0, delimited: self.delimited } + } +} + +impl<'a> Iterator for OpDelimitedIter<'a> { + type Item = OpDelimited<'a>; + + fn next(&mut self) -> Option { + let res = self.peek(); + self.idx += 1; + res + } + + fn size_hint(&self) -> (usize, Option) { + let len = self.inner.len() + if self.delimited.is_some() { 2 } else { 0 }; + let remain = len.checked_sub(self.idx).unwrap_or(0); + (remain, Some(remain)) + } +} + +impl<'a> MetaTemplate { + pub(crate) fn iter(&self) -> impl Iterator { + self.0.iter() + } + + pub(crate) fn iter_delimited( + &'a self, + delimited: Option<&'a Delimiter>, + ) -> OpDelimitedIter<'a> { + OpDelimitedIter { inner: &self.0, idx: 0, delimited } + } +} #[derive(Clone, Debug, PartialEq, Eq)] pub(crate) enum Op { @@ -47,6 +115,16 @@ impl PartialEq for Separator { } } +impl Separator { + pub(crate) fn tt_count(&self) -> usize { + match self { + Separator::Literal(_) => 1, + Separator::Ident(_) => 1, + Separator::Puncts(it) => it.len(), + } + } +} + pub(crate) fn parse_template(template: &tt::Subtree) -> Result, ParseError> { parse_inner(&template, Mode::Template).into_iter().collect() } diff --git a/crates/mbe/src/tests.rs b/crates/mbe/src/tests.rs index f1eadcd1ee..5c641ebf28 100644 --- a/crates/mbe/src/tests.rs +++ b/crates/mbe/src/tests.rs @@ -456,6 +456,17 @@ fn test_match_group_with_multichar_sep() { .assert_expand_items("foo! (fn baz {true true} );", "fn baz () -> bool {true &&true}"); } +#[test] +fn test_match_group_with_multichar_sep2() { + parse_macro( + r#" + macro_rules! foo { + (fn $name:ident {$($i:literal)&&*} ) => ( fn $name() -> bool { $($i)&&*} ); + }"#, + ) + .assert_expand_items("foo! (fn baz {true && true} );", "fn baz () -> bool {true &&true}"); +} + #[test] fn test_match_group_zero_match() { parse_macro( @@ -1267,6 +1278,18 @@ macro_rules! m { .is_some()); } +#[test] +fn test_match_is_not_greedy() { + parse_macro( + r#" +macro_rules! foo { + ($($i:ident $(,)*),*) => {}; +} +"#, + ) + .assert_expand_items(r#"foo!(a,b);"#, r#""#); +} + // The following tests are based on real world situations #[test] fn test_vec() {