2019-09-02 15:51:03 +00:00
|
|
|
//! This module takes a (parsed) definition of `macro_rules` invocation, a
|
|
|
|
//! `tt::TokenTree` representing an argument of macro invocation, and produces a
|
|
|
|
//! `tt::TokenTree` for the result of the expansion.
|
|
|
|
|
2019-09-16 23:06:14 +00:00
|
|
|
mod matcher;
|
|
|
|
mod transcriber;
|
|
|
|
|
2021-03-13 12:17:54 +00:00
|
|
|
use rustc_hash::FxHashMap;
|
2024-04-14 14:02:38 +00:00
|
|
|
use span::{Edition, Span};
|
2020-08-12 16:26:51 +00:00
|
|
|
use syntax::SmolStr;
|
2019-01-31 10:59:25 +00:00
|
|
|
|
2024-04-18 09:00:22 +00:00
|
|
|
use crate::{parser::MetaVarKind, ExpandError, ExpandResult, MatchedArmIndex};
|
2020-03-14 19:24:18 +00:00
|
|
|
|
2024-03-19 16:06:50 +00:00
|
|
|
pub(crate) fn expand_rules(
|
|
|
|
rules: &[crate::Rule],
|
|
|
|
input: &tt::Subtree<Span>,
|
|
|
|
marker: impl Fn(&mut Span) + Copy,
|
2023-12-19 11:53:10 +00:00
|
|
|
new_meta_vars: bool,
|
2024-03-19 16:06:50 +00:00
|
|
|
call_site: Span,
|
2024-04-14 14:02:38 +00:00
|
|
|
def_site_edition: Edition,
|
2024-04-18 09:00:22 +00:00
|
|
|
) -> ExpandResult<(tt::Subtree<Span>, MatchedArmIndex)> {
|
2023-12-08 17:24:24 +00:00
|
|
|
let mut match_: Option<(matcher::Match, &crate::Rule, usize)> = None;
|
|
|
|
for (idx, rule) in rules.iter().enumerate() {
|
2024-04-14 14:02:38 +00:00
|
|
|
let new_match = matcher::match_(&rule.lhs, input, def_site_edition);
|
2021-01-29 16:21:43 +00:00
|
|
|
|
2020-03-16 17:38:10 +00:00
|
|
|
if new_match.err.is_none() {
|
2020-03-16 17:04:07 +00:00
|
|
|
// If we find a rule that applies without errors, we're done.
|
|
|
|
// Unconditionally returning the transcription here makes the
|
|
|
|
// `test_repeat_bad_var` test fail.
|
2023-12-20 11:53:46 +00:00
|
|
|
let ExpandResult { value, err: transcribe_err } = transcriber::transcribe(
|
|
|
|
&rule.rhs,
|
|
|
|
&new_match.bindings,
|
|
|
|
marker,
|
|
|
|
new_meta_vars,
|
|
|
|
call_site,
|
|
|
|
);
|
2020-03-15 13:37:30 +00:00
|
|
|
if transcribe_err.is_none() {
|
2023-12-08 17:24:24 +00:00
|
|
|
return ExpandResult::ok((value, Some(idx as u32)));
|
2020-03-15 13:37:30 +00:00
|
|
|
}
|
2020-03-13 12:03:31 +00:00
|
|
|
}
|
2021-02-01 20:42:37 +00:00
|
|
|
// Use the rule if we matched more tokens, or bound variables count
|
2023-12-08 17:24:24 +00:00
|
|
|
if let Some((prev_match, _, _)) = &match_ {
|
2021-02-01 20:42:37 +00:00
|
|
|
if (new_match.unmatched_tts, -(new_match.bound_count as i32))
|
|
|
|
< (prev_match.unmatched_tts, -(prev_match.bound_count as i32))
|
2020-03-15 13:37:30 +00:00
|
|
|
{
|
2023-12-08 17:24:24 +00:00
|
|
|
match_ = Some((new_match, rule, idx));
|
2020-03-15 13:37:30 +00:00
|
|
|
}
|
|
|
|
} else {
|
2023-12-08 17:24:24 +00:00
|
|
|
match_ = Some((new_match, rule, idx));
|
2020-03-13 14:18:17 +00:00
|
|
|
}
|
2020-03-13 12:03:31 +00:00
|
|
|
}
|
2023-12-08 17:24:24 +00:00
|
|
|
if let Some((match_, rule, idx)) = match_ {
|
2020-03-15 13:37:30 +00:00
|
|
|
// if we got here, there was no match without errors
|
2020-11-26 15:04:23 +00:00
|
|
|
let ExpandResult { value, err: transcribe_err } =
|
2023-12-20 11:53:46 +00:00
|
|
|
transcriber::transcribe(&rule.rhs, &match_.bindings, marker, new_meta_vars, call_site);
|
2024-04-18 09:00:22 +00:00
|
|
|
ExpandResult { value: (value, idx.try_into().ok()), err: match_.err.or(transcribe_err) }
|
2020-03-15 13:37:30 +00:00
|
|
|
} else {
|
2023-04-16 17:20:48 +00:00
|
|
|
ExpandResult::new(
|
2023-12-08 17:24:24 +00:00
|
|
|
(
|
|
|
|
tt::Subtree {
|
|
|
|
delimiter: tt::Delimiter::invisible_spanned(call_site),
|
|
|
|
token_trees: Box::default(),
|
|
|
|
},
|
|
|
|
None,
|
|
|
|
),
|
2023-01-31 10:49:49 +00:00
|
|
|
ExpandError::NoMatchingRule,
|
|
|
|
)
|
2020-03-15 13:37:30 +00:00
|
|
|
}
|
2019-01-31 10:59:25 +00:00
|
|
|
}
|
|
|
|
|
2019-01-31 20:01:34 +00:00
|
|
|
/// The actual algorithm for expansion is not too hard, but is pretty tricky.
|
|
|
|
/// `Bindings` structure is the key to understanding what we are doing here.
|
|
|
|
///
|
|
|
|
/// On the high level, it stores mapping from meta variables to the bits of
|
|
|
|
/// syntax it should be substituted with. For example, if `$e:expr` is matched
|
|
|
|
/// with `1 + 1` by macro_rules, the `Binding` will store `$e -> 1 + 1`.
|
|
|
|
///
|
|
|
|
/// The tricky bit is dealing with repetitions (`$()*`). Consider this example:
|
|
|
|
///
|
2019-02-08 10:55:45 +00:00
|
|
|
/// ```not_rust
|
2019-01-31 20:01:34 +00:00
|
|
|
/// macro_rules! foo {
|
|
|
|
/// ($($ i:ident $($ e:expr),*);*) => {
|
|
|
|
/// $(fn $ i() { $($ e);*; })*
|
|
|
|
/// }
|
|
|
|
/// }
|
|
|
|
/// foo! { foo 1,2,3; bar 4,5,6 }
|
|
|
|
/// ```
|
|
|
|
///
|
|
|
|
/// Here, the `$i` meta variable is matched first with `foo` and then with
|
|
|
|
/// `bar`, and `$e` is matched in turn with `1`, `2`, `3`, `4`, `5`, `6`.
|
|
|
|
///
|
|
|
|
/// To represent such "multi-mappings", we use a recursive structures: we map
|
|
|
|
/// variables not to values, but to *lists* of values or other lists (that is,
|
|
|
|
/// to the trees).
|
|
|
|
///
|
|
|
|
/// For the above example, the bindings would store
|
|
|
|
///
|
2019-02-08 10:55:45 +00:00
|
|
|
/// ```not_rust
|
2019-01-31 20:01:34 +00:00
|
|
|
/// i -> [foo, bar]
|
|
|
|
/// e -> [[1, 2, 3], [4, 5, 6]]
|
|
|
|
/// ```
|
|
|
|
///
|
|
|
|
/// We construct `Bindings` in the `match_lhs`. The interesting case is
|
|
|
|
/// `TokenTree::Repeat`, where we use `push_nested` to create the desired
|
|
|
|
/// nesting structure.
|
|
|
|
///
|
|
|
|
/// The other side of the puzzle is `expand_subtree`, where we use the bindings
|
|
|
|
/// to substitute meta variables in the output template. When expanding, we
|
2019-02-11 16:18:27 +00:00
|
|
|
/// maintain a `nesting` stack of indices which tells us which occurrence from
|
2019-01-31 20:01:34 +00:00
|
|
|
/// the `Bindings` we should take. We push to the stack when we enter a
|
|
|
|
/// repetition.
|
|
|
|
///
|
|
|
|
/// In other words, `Bindings` is a *multi* mapping from `SmolStr` to
|
|
|
|
/// `tt::TokenTree`, where the index to select a particular `TokenTree` among
|
2021-08-22 15:35:52 +00:00
|
|
|
/// many is not a plain `usize`, but a `&[usize]`.
|
2024-03-19 16:06:50 +00:00
|
|
|
#[derive(Debug, Default, Clone, PartialEq, Eq)]
|
|
|
|
struct Bindings {
|
|
|
|
inner: FxHashMap<SmolStr, Binding>,
|
2019-01-31 10:59:25 +00:00
|
|
|
}
|
|
|
|
|
2021-02-01 20:42:37 +00:00
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
2024-03-19 16:06:50 +00:00
|
|
|
enum Binding {
|
|
|
|
Fragment(Fragment),
|
|
|
|
Nested(Vec<Binding>),
|
2019-05-03 17:14:25 +00:00
|
|
|
Empty,
|
2022-10-10 12:25:14 +00:00
|
|
|
Missing(MetaVarKind),
|
2019-01-31 12:22:55 +00:00
|
|
|
}
|
|
|
|
|
2021-02-01 20:42:37 +00:00
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
2024-03-19 16:06:50 +00:00
|
|
|
enum Fragment {
|
2023-12-20 13:00:14 +00:00
|
|
|
Empty,
|
2019-09-10 17:09:43 +00:00
|
|
|
/// token fragments are just copy-pasted into the output
|
2024-03-19 16:06:50 +00:00
|
|
|
Tokens(tt::TokenTree<Span>),
|
internal: replace L_DOLLAR/R_DOLLAR with parenthesis hack
The general problem we are dealing with here is this:
```
macro_rules! thrice {
($e:expr) => { $e * 3}
}
fn main() {
let x = thrice!(1 + 2);
}
```
we really want this to print 9 rather than 7.
The way rustc solves this is rather ad-hoc. In rustc, token trees are
allowed to include whole AST fragments, so 1+2 is passed through macro
expansion as a single unit. This is a significant violation of token
tree model.
In rust-analyzer, we intended to handle this in a more elegant way,
using token trees with "invisible" delimiters. The idea was is that we
introduce a new kind of parenthesis, "left $"/"right $", and let the
parser intelligently handle this.
The idea was inspired by the relevant comment in the proc_macro crate:
https://doc.rust-lang.org/stable/proc_macro/enum.Delimiter.html#variant.None
> An implicit delimiter, that may, for example, appear around tokens
> coming from a “macro variable” $var. It is important to preserve
> operator priorities in cases like $var * 3 where $var is 1 + 2.
> Implicit delimiters might not survive roundtrip of a token stream
> through a string.
Now that we are older and wiser, we conclude that the idea doesn't work.
_First_, the comment in the proc-macro crate is wishful thinking. Rustc
currently completely ignores none delimiters. It solves the (1 + 2) * 3
problem by having magical token trees which can't be duplicated:
* https://rust-lang.zulipchat.com/#narrow/stream/185405-t-compiler.2Frust-analyzer/topic/TIL.20that.20token.20streams.20are.20magic
* https://rust-lang.zulipchat.com/#narrow/stream/131828-t-compiler/topic/Handling.20of.20Delimiter.3A.3ANone.20by.20the.20parser
_Second_, it's not like our implementation in rust-analyzer works. We
special-case expressions (as opposed to treating all kinds of $var
captures the same) and we don't know how parser error recovery should
work with these dollar-parenthesis.
So, in this PR we simplify the whole thing away by not pretending that
we are doing something proper and instead just explicitly special-casing
expressions by wrapping them into real `()`.
In the future, to maintain bug-parity with `rustc` what we are going to
do is probably adding an explicit `CAPTURED_EXPR` *token* which we can
explicitly account for in the parser.
If/when rustc starts handling delimiter=none properly, we'll port that
logic as well, in addition to special handling.
2021-10-23 17:08:42 +00:00
|
|
|
/// Expr ast fragments are surrounded with `()` on insertion to preserve
|
|
|
|
/// precedence. Note that this impl is different from the one currently in
|
|
|
|
/// `rustc` -- `rustc` doesn't translate fragments into token trees at all.
|
|
|
|
///
|
2023-10-05 09:41:50 +00:00
|
|
|
/// At one point in time, we tried to use "fake" delimiters here à la
|
internal: replace L_DOLLAR/R_DOLLAR with parenthesis hack
The general problem we are dealing with here is this:
```
macro_rules! thrice {
($e:expr) => { $e * 3}
}
fn main() {
let x = thrice!(1 + 2);
}
```
we really want this to print 9 rather than 7.
The way rustc solves this is rather ad-hoc. In rustc, token trees are
allowed to include whole AST fragments, so 1+2 is passed through macro
expansion as a single unit. This is a significant violation of token
tree model.
In rust-analyzer, we intended to handle this in a more elegant way,
using token trees with "invisible" delimiters. The idea was is that we
introduce a new kind of parenthesis, "left $"/"right $", and let the
parser intelligently handle this.
The idea was inspired by the relevant comment in the proc_macro crate:
https://doc.rust-lang.org/stable/proc_macro/enum.Delimiter.html#variant.None
> An implicit delimiter, that may, for example, appear around tokens
> coming from a “macro variable” $var. It is important to preserve
> operator priorities in cases like $var * 3 where $var is 1 + 2.
> Implicit delimiters might not survive roundtrip of a token stream
> through a string.
Now that we are older and wiser, we conclude that the idea doesn't work.
_First_, the comment in the proc-macro crate is wishful thinking. Rustc
currently completely ignores none delimiters. It solves the (1 + 2) * 3
problem by having magical token trees which can't be duplicated:
* https://rust-lang.zulipchat.com/#narrow/stream/185405-t-compiler.2Frust-analyzer/topic/TIL.20that.20token.20streams.20are.20magic
* https://rust-lang.zulipchat.com/#narrow/stream/131828-t-compiler/topic/Handling.20of.20Delimiter.3A.3ANone.20by.20the.20parser
_Second_, it's not like our implementation in rust-analyzer works. We
special-case expressions (as opposed to treating all kinds of $var
captures the same) and we don't know how parser error recovery should
work with these dollar-parenthesis.
So, in this PR we simplify the whole thing away by not pretending that
we are doing something proper and instead just explicitly special-casing
expressions by wrapping them into real `()`.
In the future, to maintain bug-parity with `rustc` what we are going to
do is probably adding an explicit `CAPTURED_EXPR` *token* which we can
explicitly account for in the parser.
If/when rustc starts handling delimiter=none properly, we'll port that
logic as well, in addition to special handling.
2021-10-23 17:08:42 +00:00
|
|
|
/// proc-macro delimiter=none. As we later discovered, "none" delimiters are
|
|
|
|
/// tricky to handle in the parser, and rustc doesn't handle those either.
|
2024-03-19 16:06:50 +00:00
|
|
|
Expr(tt::Subtree<Span>),
|
2023-07-30 14:36:42 +00:00
|
|
|
/// There are roughly two types of paths: paths in expression context, where a
|
|
|
|
/// separator `::` between an identifier and its following generic argument list
|
|
|
|
/// is mandatory, and paths in type context, where `::` can be omitted.
|
|
|
|
///
|
|
|
|
/// Unlike rustc, we need to transform the parsed fragments back into tokens
|
|
|
|
/// during transcription. When the matched path fragment is a type-context path
|
|
|
|
/// and is trasncribed as an expression-context path, verbatim transcription
|
|
|
|
/// would cause a syntax error. We need to fix it up just before transcribing;
|
|
|
|
/// see `transcriber::fix_up_and_push_path_tt()`.
|
2024-03-19 16:06:50 +00:00
|
|
|
Path(tt::Subtree<Span>),
|
2019-09-10 17:09:43 +00:00
|
|
|
}
|