rust-analyzer/crates/mbe/src/expander.rs

//! This module takes a (parsed) definition of `macro_rules` invocation, a
//! `tt::TokenTree` representing an argument of macro invocation, and produces a
//! `tt::TokenTree` for the result of the expansion.

mod matcher;
mod transcriber;

use rustc_hash::FxHashMap;
use span::{Edition, Span};
use syntax::SmolStr;

use crate::{parser::MetaVarKind, ExpandError, ExpandResult, MatchedArmIndex};

pub(crate) fn expand_rules(
    rules: &[crate::Rule],
    input: &tt::Subtree<Span>,
    marker: impl Fn(&mut Span) + Copy,
    new_meta_vars: bool,
    call_site: Span,
    def_site_edition: Edition,
) -> ExpandResult<(tt::Subtree<Span>, MatchedArmIndex)> {
    let mut match_: Option<(matcher::Match, &crate::Rule, usize)> = None;
    for (idx, rule) in rules.iter().enumerate() {
        let new_match = matcher::match_(&rule.lhs, input, def_site_edition);

        if new_match.err.is_none() {
            // If we find a rule that applies without errors, we're done.
            // Unconditionally returning the transcription here makes the
            // `test_repeat_bad_var` test fail.
            let ExpandResult { value, err: transcribe_err } = transcriber::transcribe(
                &rule.rhs,
                &new_match.bindings,
                marker,
                new_meta_vars,
                call_site,
            );
            if transcribe_err.is_none() {
                return ExpandResult::ok((value, Some(idx as u32)));
            }
        }
        // Use the rule if we matched more tokens, or bound variables count
        if let Some((prev_match, _, _)) = &match_ {
            if (new_match.unmatched_tts, -(new_match.bound_count as i32))
                < (prev_match.unmatched_tts, -(prev_match.bound_count as i32))
            {
                match_ = Some((new_match, rule, idx));
            }
        } else {
            match_ = Some((new_match, rule, idx));
        }
    }
    if let Some((match_, rule, idx)) = match_ {
        // if we got here, there was no match without errors
        let ExpandResult { value, err: transcribe_err } =
            transcriber::transcribe(&rule.rhs, &match_.bindings, marker, new_meta_vars, call_site);
        ExpandResult { value: (value, idx.try_into().ok()), err: match_.err.or(transcribe_err) }
    } else {
        ExpandResult::new(
            (
                tt::Subtree {
                    delimiter: tt::Delimiter::invisible_spanned(call_site),
                    token_trees: Box::default(),
                },
                None,
            ),
            ExpandError::NoMatchingRule,
        )
    }
}

/// The actual algorithm for expansion is not too hard, but is pretty tricky.
/// `Bindings` structure is the key to understanding what we are doing here.
///
/// On the high level, it stores mapping from meta variables to the bits of
/// syntax it should be substituted with. For example, if `$e:expr` is matched
/// with `1 + 1` by macro_rules, the `Binding` will store `$e -> 1 + 1`.
///
/// The tricky bit is dealing with repetitions (`$()*`). Consider this example:
///
/// ```not_rust
/// macro_rules! foo {
///     ($($ i:ident $($ e:expr),*);*) => {
///         $(fn $ i() { $($ e);*; })*
///     }
/// }
/// foo! { foo 1,2,3; bar 4,5,6 }
/// ```
///
/// Here, the `$i` meta variable is matched first with `foo` and then with
/// `bar`, and `$e` is matched in turn with `1`, `2`, `3`, `4`, `5`, `6`.
///
/// To represent such "multi-mappings", we use a recursive structures: we map
/// variables not to values, but to *lists* of values or other lists (that is,
/// to the trees).
///
/// For the above example, the bindings would store
///
/// ```not_rust
/// i -> [foo, bar]
/// e -> [[1, 2, 3], [4, 5, 6]]
/// ```
///
/// We construct `Bindings` in the `match_lhs`. The interesting case is
/// `TokenTree::Repeat`, where we use `push_nested` to create the desired
/// nesting structure.
///
/// The other side of the puzzle is `expand_subtree`, where we use the bindings
/// to substitute meta variables in the output template. When expanding, we
/// maintain a `nesting` stack of indices which tells us which occurrence from
/// the `Bindings` we should take. We push to the stack when we enter a
/// repetition.
///
/// In other words, `Bindings` is a *multi* mapping from `SmolStr` to
/// `tt::TokenTree`, where the index to select a particular `TokenTree` among
/// many is not a plain `usize`, but a `&[usize]`.
#[derive(Debug, Default, Clone, PartialEq, Eq)]
struct Bindings {
    inner: FxHashMap<SmolStr, Binding>,
}

#[derive(Debug, Clone, PartialEq, Eq)]
enum Binding {
    Fragment(Fragment),
    Nested(Vec<Binding>),
    Empty,
    Missing(MetaVarKind),
}

#[derive(Debug, Clone, PartialEq, Eq)]
enum Fragment {
    Empty,
    /// token fragments are just copy-pasted into the output
    Tokens(tt::TokenTree<Span>),
    /// Expr ast fragments are surrounded with `()` on insertion to preserve
    /// precedence. Note that this impl is different from the one currently in
    /// `rustc` -- `rustc` doesn't translate fragments into token trees at all.
    ///
    /// At one point in time, we tried to use "fake" delimiters here à la
    /// proc-macro delimiter=none. As we later discovered, "none" delimiters are
    /// tricky to handle in the parser, and rustc doesn't handle those either.
    Expr(tt::Subtree<Span>),
    /// There are roughly two types of paths: paths in expression context, where a
    /// separator `::` between an identifier and its following generic argument list
    /// is mandatory, and paths in type context, where `::` can be omitted.
    ///
    /// Unlike rustc, we need to transform the parsed fragments back into tokens
    /// during transcription. When the matched path fragment is a type-context path
    /// and is trasncribed as an expression-context path, verbatim transcription
    /// would cause a syntax error. We need to fix it up just before transcribing;
    /// see `transcriber::fix_up_and_push_path_tt()`.
    Path(tt::Subtree<Span>),
}
simplify 2019-09-02 15:51:03 +00:00			//! This module takes a (parsed) definition of `macro_rules` invocation, a
			//! `tt::TokenTree` representing an argument of macro invocation, and produces a
			//! `tt::TokenTree` for the result of the expansion.

split mbe expander code into two modules 2019-09-16 23:06:14 +00:00			`mod matcher;`
			`mod transcriber;`

Add bindings builder for speed up matching 2021-03-13 12:17:54 +00:00			`use rustc_hash::FxHashMap;`
internal: Thread edition through to parsing/tt-to-syntax-tree routines for macros 2024-04-14 14:02:38 +00:00			`use span::{Edition, Span};`
Rename ra_syntax -> syntax 2020-08-12 16:26:51 +00:00			`use syntax::SmolStr;`
more expand boilerplate 2019-01-31 10:59:25 +00:00
Cleanup 2024-04-18 09:00:22 +00:00			`use crate::{parser::MetaVarKind, ExpandError, ExpandResult, MatchedArmIndex};`
wip 2020-03-14 19:24:18 +00:00
Remove span generics from most of the mbe crate 2024-03-19 16:06:50 +00:00			`pub(crate) fn expand_rules(`
			`rules: &[crate::Rule],`
			`input: &tt::Subtree<Span>,`
			`marker: impl Fn(&mut Span) + Copy,`
Try to support pre and post-change metavars 2023-12-19 11:53:10 +00:00			`new_meta_vars: bool,`
Remove span generics from most of the mbe crate 2024-03-19 16:06:50 +00:00			`call_site: Span,`
internal: Thread edition through to parsing/tt-to-syntax-tree routines for macros 2024-04-14 14:02:38 +00:00			`def_site_edition: Edition,`
Cleanup 2024-04-18 09:00:22 +00:00			`) -> ExpandResult<(tt::Subtree<Span>, MatchedArmIndex)> {`
Render matched macro arm on hover of macro calls 2023-12-08 17:24:24 +00:00			`let mut match_: Option<(matcher::Match, &crate::Rule, usize)> = None;`
			`for (idx, rule) in rules.iter().enumerate() {`
internal: Thread edition through to parsing/tt-to-syntax-tree routines for macros 2024-04-14 14:02:38 +00:00			`let new_match = matcher::match_(&rule.lhs, input, def_site_edition);`
Simplify mbe match error. Handle parse error in rule parsing instead of match in mbe 2021-01-29 16:21:43 +00:00
Some more refactoring 2020-03-16 17:38:10 +00:00			`if new_match.err.is_none() {`
Some cleanup 2020-03-16 17:04:07 +00:00			`// If we find a rule that applies without errors, we're done.`
			`// Unconditionally returning the transcription here makes the`
			// `test_repeat_bad_var` test fail.
Remove usages of Span::DUMMY 2023-12-20 11:53:46 +00:00			`let ExpandResult { value, err: transcribe_err } = transcriber::transcribe(`
			`&rule.rhs,`
			`&new_match.bindings,`
			`marker,`
			`new_meta_vars,`
			`call_site,`
			`);`
Fix performance problem 2020-03-15 13:37:30 +00:00			`if transcribe_err.is_none() {`
Render matched macro arm on hover of macro calls 2023-12-08 17:24:24 +00:00			`return ExpandResult::ok((value, Some(idx as u32)));`
Fix performance problem 2020-03-15 13:37:30 +00:00			`}`
Make MBE expansion more resilient (WIP) 2020-03-13 12:03:31 +00:00			`}`
NFA parser for mbe matcher 2021-02-01 20:42:37 +00:00			`// Use the rule if we matched more tokens, or bound variables count`
Render matched macro arm on hover of macro calls 2023-12-08 17:24:24 +00:00			`if let Some((prev_match, _, _)) = &match_ {`
NFA parser for mbe matcher 2021-02-01 20:42:37 +00:00			`if (new_match.unmatched_tts, -(new_match.bound_count as i32))`
			`< (prev_match.unmatched_tts, -(prev_match.bound_count as i32))`
Fix performance problem 2020-03-15 13:37:30 +00:00			`{`
Render matched macro arm on hover of macro calls 2023-12-08 17:24:24 +00:00			`match_ = Some((new_match, rule, idx));`
Fix performance problem 2020-03-15 13:37:30 +00:00			`}`
			`} else {`
Render matched macro arm on hover of macro calls 2023-12-08 17:24:24 +00:00			`match_ = Some((new_match, rule, idx));`
Attempt to implement ranking of rules when none matches perfectly (wip) 2020-03-13 14:18:17 +00:00			`}`
Make MBE expansion more resilient (WIP) 2020-03-13 12:03:31 +00:00			`}`
Render matched macro arm on hover of macro calls 2023-12-08 17:24:24 +00:00			`if let Some((match_, rule, idx)) = match_ {`
Fix performance problem 2020-03-15 13:37:30 +00:00			`// if we got here, there was no match without errors`
Use named fields in `ExpandResult` 2020-11-26 15:04:23 +00:00			`let ExpandResult { value, err: transcribe_err } =`
Remove usages of Span::DUMMY 2023-12-20 11:53:46 +00:00			`transcriber::transcribe(&rule.rhs, &match_.bindings, marker, new_meta_vars, call_site);`
Cleanup 2024-04-18 09:00:22 +00:00			`ExpandResult { value: (value, idx.try_into().ok()), err: match_.err.or(transcribe_err) }`
Fix performance problem 2020-03-15 13:37:30 +00:00			`} else {`
Option begone part 2 2023-04-16 17:20:48 +00:00			`ExpandResult::new(`
Render matched macro arm on hover of macro calls 2023-12-08 17:24:24 +00:00			`(`
			`tt::Subtree {`
			`delimiter: tt::Delimiter::invisible_spanned(call_site),`
			`token_trees: Box::default(),`
			`},`
			`None,`
			`),`
Make tt generic over the span data 2023-01-31 10:49:49 +00:00			`ExpandError::NoMatchingRule,`
			`)`
Fix performance problem 2020-03-15 13:37:30 +00:00			`}`
more expand boilerplate 2019-01-31 10:59:25 +00:00			`}`

explain the magic 2019-01-31 20:01:34 +00:00			`/// The actual algorithm for expansion is not too hard, but is pretty tricky.`
			/// `Bindings` structure is the key to understanding what we are doing here.
			`///`
			`/// On the high level, it stores mapping from meta variables to the bits of`
			/// syntax it should be substituted with. For example, if `$e:expr` is matched
			/// with `1 + 1` by macro_rules, the `Binding` will store `$e -> 1 + 1`.
			`///`
			/// The tricky bit is dealing with repetitions (`$()*`). Consider this example:
			`///`
avoid 'ignored' in test output 2019-02-08 10:55:45 +00:00			/// ```not_rust
explain the magic 2019-01-31 20:01:34 +00:00			`/// macro_rules! foo {`
			`/// ($($ i:ident $($ e:expr),);) => {`
			`/// $(fn $ i() { $($ e);; })`
			`/// }`
			`/// }`
			`/// foo! { foo 1,2,3; bar 4,5,6 }`
			/// ```
			`///`
			/// Here, the `$i` meta variable is matched first with `foo` and then with
			/// `bar`, and `$e` is matched in turn with `1`, `2`, `3`, `4`, `5`, `6`.
			`///`
			`/// To represent such "multi-mappings", we use a recursive structures: we map`
			`/// variables not to values, but to lists of values or other lists (that is,`
			`/// to the trees).`
			`///`
			`/// For the above example, the bindings would store`
			`///`
avoid 'ignored' in test output 2019-02-08 10:55:45 +00:00			/// ```not_rust
explain the magic 2019-01-31 20:01:34 +00:00			`/// i -> [foo, bar]`
			`/// e -> [[1, 2, 3], [4, 5, 6]]`
			/// ```
			`///`
			/// We construct `Bindings` in the `match_lhs`. The interesting case is
			/// `TokenTree::Repeat`, where we use `push_nested` to create the desired
			`/// nesting structure.`
			`///`
			/// The other side of the puzzle is `expand_subtree`, where we use the bindings
			`/// to substitute meta variables in the output template. When expanding, we`
Fix some typos 2019-02-11 16:18:27 +00:00			/// maintain a `nesting` stack of indices which tells us which occurrence from
explain the magic 2019-01-31 20:01:34 +00:00			/// the `Bindings` we should take. We push to the stack when we enter a
			`/// repetition.`
			`///`
			/// In other words, `Bindings` is a multi mapping from `SmolStr` to
			/// `tt::TokenTree`, where the index to select a particular `TokenTree` among
Fix two more “a”/“an” typos (this time the other way) 2021-08-22 15:35:52 +00:00			/// many is not a plain `usize`, but a `&[usize]`.
Remove span generics from most of the mbe crate 2024-03-19 16:06:50 +00:00			`#[derive(Debug, Default, Clone, PartialEq, Eq)]`
			`struct Bindings {`
			`inner: FxHashMap<SmolStr, Binding>,`
more expand boilerplate 2019-01-31 10:59:25 +00:00			`}`

NFA parser for mbe matcher 2021-02-01 20:42:37 +00:00			`#[derive(Debug, Clone, PartialEq, Eq)]`
Remove span generics from most of the mbe crate 2024-03-19 16:06:50 +00:00			`enum Binding {`
			`Fragment(Fragment),`
			`Nested(Vec<Binding>),`
Mark unused mbe variable as `Binding::Empty` 2019-05-03 17:14:25 +00:00			`Empty,`
Expand unmatched mbe fragments to reasonable default token trees Currently we expand unmatched fragments by not replacing them at all, leaving us with `$ident`. This trips up the parser or subsequent macro calls. Instead it makes more sense to replace these with some reasonable default depending on the fragment kind which should make more recursive macro calls work better for completions. 2022-10-10 12:25:14 +00:00			`Missing(MetaVarKind),`
binders boilerplate 2019-01-31 12:22:55 +00:00			`}`

NFA parser for mbe matcher 2021-02-01 20:42:37 +00:00			`#[derive(Debug, Clone, PartialEq, Eq)]`
Remove span generics from most of the mbe crate 2024-03-19 16:06:50 +00:00			`enum Fragment {`
Remove Delimiter::DUMMY_INVISIBLE 2023-12-20 13:00:14 +00:00			`Empty,`
add fragmets to expansion 2019-09-10 17:09:43 +00:00			`/// token fragments are just copy-pasted into the output`
Remove span generics from most of the mbe crate 2024-03-19 16:06:50 +00:00			`Tokens(tt::TokenTree<Span>),`
internal: replace L_DOLLAR/R_DOLLAR with parenthesis hack The general problem we are dealing with here is this: ``` macro_rules! thrice { ($e:expr) => { $e * 3} } fn main() { let x = thrice!(1 + 2); } ``` we really want this to print 9 rather than 7. The way rustc solves this is rather ad-hoc. In rustc, token trees are allowed to include whole AST fragments, so 1+2 is passed through macro expansion as a single unit. This is a significant violation of token tree model. In rust-analyzer, we intended to handle this in a more elegant way, using token trees with "invisible" delimiters. The idea was is that we introduce a new kind of parenthesis, "left $"/"right $", and let the parser intelligently handle this. The idea was inspired by the relevant comment in the proc_macro crate: https://doc.rust-lang.org/stable/proc_macro/enum.Delimiter.html#variant.None > An implicit delimiter, that may, for example, appear around tokens > coming from a “macro variable” $var. It is important to preserve > operator priorities in cases like $var * 3 where $var is 1 + 2. > Implicit delimiters might not survive roundtrip of a token stream > through a string. Now that we are older and wiser, we conclude that the idea doesn't work. _First_, the comment in the proc-macro crate is wishful thinking. Rustc currently completely ignores none delimiters. It solves the (1 + 2) * 3 problem by having magical token trees which can't be duplicated: * https://rust-lang.zulipchat.com/#narrow/stream/185405-t-compiler.2Frust-analyzer/topic/TIL.20that.20token.20streams.20are.20magic * https://rust-lang.zulipchat.com/#narrow/stream/131828-t-compiler/topic/Handling.20of.20Delimiter.3A.3ANone.20by.20the.20parser _Second_, it's not like our implementation in rust-analyzer works. We special-case expressions (as opposed to treating all kinds of $var captures the same) and we don't know how parser error recovery should work with these dollar-parenthesis. So, in this PR we simplify the whole thing away by not pretending that we are doing something proper and instead just explicitly special-casing expressions by wrapping them into real `()`. In the future, to maintain bug-parity with `rustc` what we are going to do is probably adding an explicit `CAPTURED_EXPR` token which we can explicitly account for in the parser. If/when rustc starts handling delimiter=none properly, we'll port that logic as well, in addition to special handling. 2021-10-23 17:08:42 +00:00			/// Expr ast fragments are surrounded with `()` on insertion to preserve
			`/// precedence. Note that this impl is different from the one currently in`
			/// `rustc` -- `rustc` doesn't translate fragments into token trees at all.
			`///`
Remove repetitive words Signed-off-by: cui fliter <imcusg@gmail.com> 2023-10-05 09:41:50 +00:00			`/// At one point in time, we tried to use "fake" delimiters here à la`
internal: replace L_DOLLAR/R_DOLLAR with parenthesis hack The general problem we are dealing with here is this: ``` macro_rules! thrice { ($e:expr) => { $e * 3} } fn main() { let x = thrice!(1 + 2); } ``` we really want this to print 9 rather than 7. The way rustc solves this is rather ad-hoc. In rustc, token trees are allowed to include whole AST fragments, so 1+2 is passed through macro expansion as a single unit. This is a significant violation of token tree model. In rust-analyzer, we intended to handle this in a more elegant way, using token trees with "invisible" delimiters. The idea was is that we introduce a new kind of parenthesis, "left $"/"right $", and let the parser intelligently handle this. The idea was inspired by the relevant comment in the proc_macro crate: https://doc.rust-lang.org/stable/proc_macro/enum.Delimiter.html#variant.None > An implicit delimiter, that may, for example, appear around tokens > coming from a “macro variable” $var. It is important to preserve > operator priorities in cases like $var * 3 where $var is 1 + 2. > Implicit delimiters might not survive roundtrip of a token stream > through a string. Now that we are older and wiser, we conclude that the idea doesn't work. _First_, the comment in the proc-macro crate is wishful thinking. Rustc currently completely ignores none delimiters. It solves the (1 + 2) * 3 problem by having magical token trees which can't be duplicated: * https://rust-lang.zulipchat.com/#narrow/stream/185405-t-compiler.2Frust-analyzer/topic/TIL.20that.20token.20streams.20are.20magic * https://rust-lang.zulipchat.com/#narrow/stream/131828-t-compiler/topic/Handling.20of.20Delimiter.3A.3ANone.20by.20the.20parser _Second_, it's not like our implementation in rust-analyzer works. We special-case expressions (as opposed to treating all kinds of $var captures the same) and we don't know how parser error recovery should work with these dollar-parenthesis. So, in this PR we simplify the whole thing away by not pretending that we are doing something proper and instead just explicitly special-casing expressions by wrapping them into real `()`. In the future, to maintain bug-parity with `rustc` what we are going to do is probably adding an explicit `CAPTURED_EXPR` token which we can explicitly account for in the parser. If/when rustc starts handling delimiter=none properly, we'll port that logic as well, in addition to special handling. 2021-10-23 17:08:42 +00:00			`/// proc-macro delimiter=none. As we later discovered, "none" delimiters are`
			`/// tricky to handle in the parser, and rustc doesn't handle those either.`
Remove span generics from most of the mbe crate 2024-03-19 16:06:50 +00:00			`Expr(tt::Subtree<Span>),`
Fixup path fragments upon MBE transcription 2023-07-30 14:36:42 +00:00			`/// There are roughly two types of paths: paths in expression context, where a`
			/// separator `::` between an identifier and its following generic argument list
			/// is mandatory, and paths in type context, where `::` can be omitted.
			`///`
			`/// Unlike rustc, we need to transform the parsed fragments back into tokens`
			`/// during transcription. When the matched path fragment is a type-context path`
			`/// and is trasncribed as an expression-context path, verbatim transcription`
			`/// would cause a syntax error. We need to fix it up just before transcribing;`
			/// see `transcriber::fix_up_and_push_path_tt()`.
Remove span generics from most of the mbe crate 2024-03-19 16:06:50 +00:00			`Path(tt::Subtree<Span>),`
add fragmets to expansion 2019-09-10 17:09:43 +00:00			`}`