Don't mutate syntax trees when preparing proc-macro input

2025-01-12 05:08:52 +00:00 · 2021-08-25 18:57:24 +02:00 · 2021-08-25 18:57:24 +02:00 · d6134b6802
commit d6134b6802
parent ce4670f299
7 changed files with 58 additions and 136 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -511,6 +511,7 @@ dependencies = [
 "cov-mark",
 "either",
 "expect-test",
+ "itertools",
 "la-arena",
 "limit",
 "log",
--- a/crates/hir_expand/Cargo.toml
+++ b/crates/hir_expand/Cargo.toml
@ -14,6 +14,7 @@ log = "0.4.8"
 either = "1.5.3"
 rustc-hash = "1.0.0"
 la-arena = { version = "0.2.0", path = "../../lib/arena" }
+itertools = "0.10.0"

 base_db = { path = "../base_db", version = "0.0.0" }
 cfg = { path = "../cfg", version = "0.0.0" }
--- a/crates/hir_expand/src/db.rs
+++ b/crates/hir_expand/src/db.rs
@ -3,19 +3,20 @@
 use std::sync::Arc;

 use base_db::{salsa, SourceDatabase};
+use itertools::Itertools;
 use limit::Limit;
 use mbe::{ExpandError, ExpandResult};
 use parser::{FragmentKind, T};
 use syntax::{
    algo::diff,
-    ast::{self, NameOwner},
-    AstNode, GreenNode, Parse, SyntaxNode, SyntaxToken,
+    ast::{self, AttrsOwner, NameOwner},
+    AstNode, GreenNode, Parse, SyntaxNode, SyntaxToken, TextRange,
 };

 use crate::{
-    ast_id_map::AstIdMap, hygiene::HygieneFrame, input::process_macro_input, BuiltinAttrExpander,
-    BuiltinDeriveExpander, BuiltinFnLikeExpander, HirFileId, HirFileIdRepr, MacroCallId,
-    MacroCallKind, MacroCallLoc, MacroDefId, MacroDefKind, MacroFile, ProcMacroExpander,
+    ast_id_map::AstIdMap, hygiene::HygieneFrame, BuiltinAttrExpander, BuiltinDeriveExpander,
+    BuiltinFnLikeExpander, HirFileId, HirFileIdRepr, MacroCallId, MacroCallKind, MacroCallLoc,
+    MacroDefId, MacroDefKind, MacroFile, ProcMacroExpander,
 };

 /// Total limit on the number of tokens produced by any macro invocation.
@ -257,9 +258,28 @@ fn parse_macro_expansion(

 fn macro_arg(db: &dyn AstDatabase, id: MacroCallId) -> Option<Arc<(tt::Subtree, mbe::TokenMap)>> {
    let arg = db.macro_arg_text(id)?;
-    let (mut tt, tmap) = mbe::syntax_node_to_token_tree(&SyntaxNode::new_root(arg));
+    let loc = db.lookup_intern_macro(id);
+
+    let node = SyntaxNode::new_root(arg);
+    let censor = match loc.kind {
+        MacroCallKind::FnLike { .. } => None,
+        MacroCallKind::Derive { derive_attr_index, .. } => match ast::Item::cast(node.clone()) {
+            Some(item) => item
+                .attrs()
+                .map(|attr| attr.syntax().text_range())
+                .take(derive_attr_index as usize + 1)
+                .fold1(TextRange::cover),
+            None => None,
+        },
+        MacroCallKind::Attr { invoc_attr_index, .. } => match ast::Item::cast(node.clone()) {
+            Some(item) => {
+                item.attrs().nth(invoc_attr_index as usize).map(|attr| attr.syntax().text_range())
+            }
+            None => None,
+        },
+    };
+    let (mut tt, tmap) = mbe::syntax_node_to_token_tree_censored(&node, censor);

-    let loc: MacroCallLoc = db.lookup_intern_macro(id);
    if loc.def.is_proc_macro() {
        // proc macros expect their inputs without parentheses, MBEs expect it with them included
        tt.delimiter = None;
@ -271,7 +291,6 @@ fn macro_arg(db: &dyn AstDatabase, id: MacroCallId) -> Option<Arc<(tt::Subtree,
 fn macro_arg_text(db: &dyn AstDatabase, id: MacroCallId) -> Option<GreenNode> {
    let loc = db.lookup_intern_macro(id);
    let arg = loc.kind.arg(db)?;
-    let arg = process_macro_input(&loc.kind, arg);
    if matches!(loc.kind, MacroCallKind::FnLike { .. }) {
        let first = arg.first_child_or_token().map_or(T![.], |it| it.kind());
        let last = arg.last_child_or_token().map_or(T![.], |it| it.kind());
--- a/crates/hir_expand/src/input.rs
+++ b/crates/hir_expand/src/input.rs
@ -1,120 +0,0 @@
-//! Macro input conditioning.
-
-use syntax::{
-    ast::{self, make, AttrsOwner},
-    AstNode, SyntaxNode,
-};
-
-use crate::{
-    name::{name, AsName},
-    MacroCallKind,
-};
-
-pub(crate) fn process_macro_input(macro_call_kind: &MacroCallKind, node: SyntaxNode) -> SyntaxNode {
-    match macro_call_kind {
-        MacroCallKind::FnLike { .. } => node,
-        MacroCallKind::Derive { derive_attr_index, .. } => {
-            let item = match ast::Item::cast(node.clone()) {
-                Some(item) => item,
-                None => return node,
-            };
-
-            remove_derives_up_to(item, *derive_attr_index as usize).syntax().clone()
-        }
-        MacroCallKind::Attr { invoc_attr_index, .. } => {
-            let item = match ast::Item::cast(node.clone()) {
-                Some(item) => item,
-                None => return node,
-            };
-
-            remove_attr_invoc(item, *invoc_attr_index as usize).syntax().clone()
-        }
-    }
-}
-
-/// Removes `#[derive]` attributes from `item`, up to `attr_index`.
-fn remove_derives_up_to(item: ast::Item, attr_index: usize) -> ast::Item {
-    let item = item.clone_for_update();
-    for attr in item.attrs().take(attr_index + 1) {
-        if let Some(name) =
-            attr.path().and_then(|path| path.as_single_segment()).and_then(|seg| seg.name_ref())
-        {
-            if name.as_name() == name![derive] {
-                replace_attr(&item, &attr);
-            }
-        }
-    }
-    item
-}
-
-/// Removes the attribute invoking an attribute macro from `item`.
-fn remove_attr_invoc(item: ast::Item, attr_index: usize) -> ast::Item {
-    let item = item.clone_for_update();
-    let attr = item
-        .attrs()
-        .nth(attr_index)
-        .unwrap_or_else(|| panic!("cannot find attribute #{}", attr_index));
-    replace_attr(&item, &attr);
-    item
-}
-
-fn replace_attr(item: &ast::Item, attr: &ast::Attr) {
-    let syntax_index = attr.syntax().index();
-    let ws = make::tokens::whitespace(&" ".repeat(u32::from(attr.syntax().text().len()) as usize));
-    item.syntax().splice_children(syntax_index..syntax_index + 1, vec![ws.into()]);
-}
-
-#[cfg(test)]
-mod tests {
-    use base_db::{fixture::WithFixture, SourceDatabase};
-    use expect_test::{expect, Expect};
-
-    use crate::test_db::TestDB;
-
-    use super::*;
-
-    fn test_remove_derives_up_to(attr: usize, ra_fixture: &str, expect: Expect) {
-        let (db, file_id) = TestDB::with_single_file(ra_fixture);
-        let parsed = db.parse(file_id);
-
-        let mut items: Vec<_> =
-            parsed.syntax_node().descendants().filter_map(ast::Item::cast).collect();
-        assert_eq!(items.len(), 1);
-
-        let item = remove_derives_up_to(items.pop().unwrap(), attr);
-        let res: String =
-            item.syntax().children_with_tokens().map(|e| format!("{:?}\n", e)).collect();
-        expect.assert_eq(&res);
-    }
-
-    #[test]
-    fn remove_derive() {
-        test_remove_derives_up_to(
-            2,
-            r#"
-#[allow(unused)]
-#[derive(Copy)]
-#[derive(Hello)]
-#[derive(Clone)]
-struct A {
-    bar: u32
-}
-        "#,
-            expect![[r#"
-                Node(ATTR@0..16)
-                Token(WHITESPACE@16..17 "\n")
-                Token(WHITESPACE@17..32 "               ")
-                Token(WHITESPACE@32..33 "\n")
-                Token(WHITESPACE@33..49 "                ")
-                Token(WHITESPACE@49..50 "\n")
-                Node(ATTR@50..66)
-                Token(WHITESPACE@66..67 "\n")
-                Token(STRUCT_KW@67..73 "struct")
-                Token(WHITESPACE@73..74 " ")
-                Node(NAME@74..75)
-                Token(WHITESPACE@75..76 " ")
-                Node(RECORD_FIELD_LIST@76..92)
-            "#]],
-        );
-    }
-}
--- a/crates/hir_expand/src/lib.rs
+++ b/crates/hir_expand/src/lib.rs
@ -14,7 +14,6 @@ pub mod builtin_macro;
 pub mod proc_macro;
 pub mod quote;
 pub mod eager;
-mod input;

 use base_db::ProcMacroKind;
 use either::Either;
--- a/crates/mbe/src/lib.rs
+++ b/crates/mbe/src/lib.rs
@ -67,7 +67,7 @@ impl fmt::Display for ExpandError {
 pub use crate::{
    syntax_bridge::{
        parse_exprs_with_sep, parse_to_token_tree, syntax_node_to_token_tree,
-        token_tree_to_syntax_node,
+        syntax_node_to_token_tree_censored, token_tree_to_syntax_node,
    },
    token_map::TokenMap,
 };
--- a/crates/mbe/src/syntax_bridge.rs
+++ b/crates/mbe/src/syntax_bridge.rs
@ -1,5 +1,7 @@
 //! Conversions between [`SyntaxNode`] and [`tt::TokenTree`].

+use std::iter;
+
 use parser::{FragmentKind, ParseError, TreeSink};
 use rustc_hash::FxHashMap;
 use syntax::{
@ -13,11 +15,20 @@ use tt::buffer::{Cursor, TokenBuffer};
 use crate::{subtree_source::SubtreeTokenSource, tt_iter::TtIter};
 use crate::{ExpandError, TokenMap};

-/// Convert the syntax node to a `TokenTree` (what macro
+/// Convert the syntax node to a `TokenTree` with the censored nodes excluded (what macro
 /// will consume).
 pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> (tt::Subtree, TokenMap) {
+    syntax_node_to_token_tree_censored(node, None)
+}
+
+/// Convert the syntax node to a `TokenTree` with the censored nodes excluded (what macro
+/// will consume).
+pub fn syntax_node_to_token_tree_censored(
+    node: &SyntaxNode,
+    censor: Option<TextRange>,
+) -> (tt::Subtree, TokenMap) {
    let global_offset = node.text_range().start();
-    let mut c = Convertor::new(node, global_offset);
+    let mut c = Convertor::new(node, global_offset, censor);
    let subtree = convert_tokens(&mut c);
    c.id_alloc.map.shrink_to_fit();
    (subtree, c.id_alloc.map)
@ -446,16 +457,24 @@ impl<'a> TokenConvertor for RawConvertor<'a> {
 struct Convertor {
    id_alloc: TokenIdAlloc,
    current: Option<SyntaxToken>,
+    censor: Option<TextRange>,
    range: TextRange,
    punct_offset: Option<(SyntaxToken, TextSize)>,
 }

 impl Convertor {
-    fn new(node: &SyntaxNode, global_offset: TextSize) -> Convertor {
+    fn new(node: &SyntaxNode, global_offset: TextSize, censor: Option<TextRange>) -> Convertor {
+        let first = node.first_token();
+        let current = match censor {
+            Some(censor) => iter::successors(first, |token| token.next_token())
+                .find(|token| !censor.contains_range(token.text_range())),
+            None => first,
+        };
        Convertor {
            id_alloc: { TokenIdAlloc { map: TokenMap::default(), global_offset, next_id: 0 } },
-            current: node.first_token(),
+            current,
            range: node.text_range(),
+            censor,
            punct_offset: None,
        }
    }
@ -512,8 +531,11 @@ impl TokenConvertor for Convertor {
        if !&self.range.contains_range(curr.text_range()) {
            return None;
        }
-        self.current = curr.next_token();
-
+        self.current = match self.censor {
+            Some(censor) => iter::successors(curr.next_token(), |token| token.next_token())
+                .find(|token| !censor.contains_range(token.text_range())),
+            None => curr.next_token(),
+        };
        let token = if curr.kind().is_punct() {
            let range = curr.text_range();
            let range = TextRange::at(range.start(), TextSize::of('.'));