Search for inactive cfg attributes and empty macro expansion through

the entire block
2025-02-17 06:28:42 +00:00 · 2023-03-07 22:51:35 -05:00 · 2023-03-07 22:51:35 -05:00 · 0b3c2ed811
commit 0b3c2ed811
parent 392e9551d4
5 changed files with 229 additions and 74 deletions
--- a/clippy_lints/src/matches/mod.rs
+++ b/clippy_lints/src/matches/mod.rs
@ -25,9 +25,9 @@ mod wild_in_or_pats;

 use clippy_utils::msrvs::{self, Msrv};
 use clippy_utils::source::{snippet_opt, walk_span_to_context};
-use clippy_utils::{higher, in_constant, is_span_match};
+use clippy_utils::{higher, in_constant, is_span_match, tokenize_with_text};
 use rustc_hir::{Arm, Expr, ExprKind, Local, MatchSource, Pat};
-use rustc_lexer::{tokenize, TokenKind};
+use rustc_lexer::TokenKind;
 use rustc_lint::{LateContext, LateLintPass, LintContext};
 use rustc_middle::lint::in_external_macro;
 use rustc_session::{declare_tool_lint, impl_lint_pass};
@ -1147,12 +1147,7 @@ fn span_contains_cfg(cx: &LateContext<'_>, s: Span) -> bool {
        // Assume true. This would require either an invalid span, or one which crosses file boundaries.
        return true;
    };
-    let mut pos = 0usize;
-    let mut iter = tokenize(&snip).map(|t| {
-        let start = pos;
-        pos += t.len as usize;
-        (t.kind, start..pos)
-    });
+    let mut iter = tokenize_with_text(&snip);

    // Search for the token sequence [`#`, `[`, `cfg`]
    while iter.any(|(t, _)| matches!(t, TokenKind::Pound)) {
@ -1163,7 +1158,7 @@ fn span_contains_cfg(cx: &LateContext<'_>, s: Span) -> bool {
            )
        });
        if matches!(iter.next(), Some((TokenKind::OpenBracket, _)))
-            && matches!(iter.next(), Some((TokenKind::Ident, range)) if &snip[range.clone()] == "cfg")
+            && matches!(iter.next(), Some((TokenKind::Ident, "cfg")))
        {
            return true;
        }
--- a/clippy_utils/src/hir_utils.rs
+++ b/clippy_utils/src/hir_utils.rs
@ -1,6 +1,7 @@
 use crate::consts::constant_simple;
 use crate::macros::macro_backtrace;
-use crate::source::snippet_opt;
+use crate::source::{get_source_text, snippet_opt, walk_span_to_context, SpanRange};
+use crate::tokenize_with_text;
 use rustc_ast::ast::InlineAsmTemplatePiece;
 use rustc_data_structures::fx::FxHasher;
 use rustc_hir::def::Res;
@ -13,8 +14,9 @@ use rustc_hir::{
 use rustc_lexer::{tokenize, TokenKind};
 use rustc_lint::LateContext;
 use rustc_middle::ty::TypeckResults;
-use rustc_span::{sym, Symbol};
+use rustc_span::{sym, BytePos, Symbol, SyntaxContext};
 use std::hash::{Hash, Hasher};
+use std::ops::Range;

 /// Callback that is called when two expressions are not equal in the sense of `SpanlessEq`, but
 /// other conditions would make them equal.
@ -127,51 +129,83 @@ impl HirEqInterExpr<'_, '_, '_> {

    /// Checks whether two blocks are the same.
    fn eq_block(&mut self, left: &Block<'_>, right: &Block<'_>) -> bool {
-        match (left.stmts, left.expr, right.stmts, right.expr) {
-            ([], None, [], None) => {
-                // For empty blocks, check to see if the tokens are equal. This will catch the case where a macro
-                // expanded to nothing, or the cfg attribute was used.
-                let (Some(left), Some(right)) = (
-                    snippet_opt(self.inner.cx, left.span),
-                    snippet_opt(self.inner.cx, right.span),
-                ) else { return true };
-                let mut left_pos = 0;
-                let left = tokenize(&left)
-                    .map(|t| {
-                        let end = left_pos + t.len as usize;
-                        let s = &left[left_pos..end];
-                        left_pos = end;
-                        (t, s)
-                    })
-                    .filter(|(t, _)| {
-                        !matches!(
-                            t.kind,
-                            TokenKind::LineComment { .. } | TokenKind::BlockComment { .. } | TokenKind::Whitespace
-                        )
-                    })
-                    .map(|(_, s)| s);
-                let mut right_pos = 0;
-                let right = tokenize(&right)
-                    .map(|t| {
-                        let end = right_pos + t.len as usize;
-                        let s = &right[right_pos..end];
-                        right_pos = end;
-                        (t, s)
-                    })
-                    .filter(|(t, _)| {
-                        !matches!(
-                            t.kind,
-                            TokenKind::LineComment { .. } | TokenKind::BlockComment { .. } | TokenKind::Whitespace
-                        )
-                    })
-                    .map(|(_, s)| s);
-                left.eq(right)
-            },
-            _ => {
-                over(left.stmts, right.stmts, |l, r| self.eq_stmt(l, r))
-                    && both(&left.expr, &right.expr, |l, r| self.eq_expr(l, r))
-            },
+        use TokenKind::{BlockComment, LineComment, Semi, Whitespace};
+        if left.stmts.len() != right.stmts.len() {
+            return false;
        }
+        let lspan = left.span.data();
+        let rspan = right.span.data();
+        if lspan.ctxt != SyntaxContext::root() && rspan.ctxt != SyntaxContext::root() {
+            // Don't try to check in between statements inside macros.
+            return over(left.stmts, right.stmts, |left, right| self.eq_stmt(left, right))
+                && both(&left.expr, &right.expr, |left, right| self.eq_expr(left, right));
+        }
+        if lspan.ctxt != rspan.ctxt {
+            return false;
+        }
+
+        let mut lstart = lspan.lo;
+        let mut rstart = rspan.lo;
+
+        for (left, right) in left.stmts.iter().zip(right.stmts) {
+            if !self.eq_stmt(left, right) {
+                return false;
+            }
+            let Some(lstmt_span) = walk_span_to_context(left.span, lspan.ctxt) else {
+                return false;
+            };
+            let Some(rstmt_span) = walk_span_to_context(right.span, rspan.ctxt) else {
+                return false;
+            };
+            let lstmt_span = lstmt_span.data();
+            let rstmt_span = rstmt_span.data();
+
+            if lstmt_span.lo < lstart && rstmt_span.lo < rstart {
+                // Can happen when macros expand to multiple statements, or rearrange statements.
+                // Nothing in between the statements to check in this case.
+                continue;
+            } else if lstmt_span.lo < lstart || rstmt_span.lo < rstart {
+                // Only one of the blocks had a weird macro.
+                return false;
+            }
+            if !eq_span_tokens(self.inner.cx, lstart..lstmt_span.lo, rstart..rstmt_span.lo, |t| {
+                !matches!(t, Whitespace | LineComment { .. } | BlockComment { .. } | Semi)
+            }) {
+                return false;
+            }
+
+            lstart = lstmt_span.hi;
+            rstart = rstmt_span.hi;
+        }
+
+        let (lend, rend) = match (left.expr, right.expr) {
+            (Some(left), Some(right)) => {
+                if !self.eq_expr(left, right) {
+                    return false;
+                }
+                let Some(lexpr_span) = walk_span_to_context(left.span, lspan.ctxt) else {
+                    return false;
+                };
+                let Some(rexpr_span) = walk_span_to_context(right.span, rspan.ctxt) else {
+                    return false;
+                };
+                (lexpr_span.lo(), rexpr_span.lo())
+            },
+            (None, None) => (lspan.hi, rspan.hi),
+            (Some(_), None) | (None, Some(_)) => return false,
+        };
+
+        if lend < lstart && rend < rstart {
+            // Can happen when macros rearrange the input.
+            // Nothing in between the statements to check in this case.
+            return true;
+        } else if lend < lstart || rend < rstart {
+            // Only one of the blocks had a weird macro
+            return false;
+        }
+        eq_span_tokens(self.inner.cx, lstart..lend, rstart..rend, |t| {
+            !matches!(t, Whitespace | LineComment { .. } | BlockComment { .. } | Semi)
+        })
    }

    fn should_ignore(&mut self, expr: &Expr<'_>) -> bool {
@ -1038,3 +1072,33 @@ pub fn hash_expr(cx: &LateContext<'_>, e: &Expr<'_>) -> u64 {
    h.hash_expr(e);
    h.finish()
 }
+
+fn eq_span_tokens(
+    cx: &LateContext<'_>,
+    left: impl SpanRange,
+    right: impl SpanRange,
+    pred: impl Fn(TokenKind) -> bool,
+) -> bool {
+    fn f(cx: &LateContext<'_>, left: Range<BytePos>, right: Range<BytePos>, pred: impl Fn(TokenKind) -> bool) -> bool {
+        if let Some(lsrc) = get_source_text(cx, left)
+            && let Some(lsrc) = lsrc.as_str()
+            && let Some(rsrc) = get_source_text(cx, right)
+            && let Some(rsrc) = rsrc.as_str()
+        {
+            let pred = |t: &(_, _)| pred(t.0);
+            let map = |(_, x)| x;
+
+            let ltok = tokenize_with_text(lsrc)
+                .filter(pred)
+                .map(map);
+            let rtok = tokenize_with_text(rsrc)
+                .filter(pred)
+                .map(map);
+            ltok.eq(rtok)
+        } else {
+            // Unable to access the source. Conservatively assume the blocks aren't equal.
+            false
+        }
+    }
+    f(cx, left.into_range(), right.into_range(), pred)
+}
--- a/clippy_utils/src/lib.rs
+++ b/clippy_utils/src/lib.rs
@ -77,6 +77,7 @@ use std::sync::OnceLock;
 use std::sync::{Mutex, MutexGuard};

 use if_chain::if_chain;
+use itertools::Itertools;
 use rustc_ast::ast::{self, LitKind, RangeLimits};
 use rustc_ast::Attribute;
 use rustc_data_structures::fx::FxHashMap;
@ -2490,6 +2491,17 @@ pub fn walk_to_expr_usage<'tcx, T>(
    None
 }

+/// Tokenizes the input while keeping the text associated with each token.
+pub fn tokenize_with_text(s: &str) -> impl Iterator<Item = (TokenKind, &str)> {
+    let mut pos = 0;
+    tokenize(s).map(move |t| {
+        let end = pos + t.len;
+        let range = pos as usize..end as usize;
+        pos = end;
+        (t.kind, s.get(range).unwrap_or_default())
+    })
+}
+
 /// Checks whether a given span has any comment token
 /// This checks for all types of comment: line "//", block "/**", doc "///" "//!"
 pub fn span_contains_comment(sm: &SourceMap, span: Span) -> bool {
@ -2506,23 +2518,11 @@ pub fn span_contains_comment(sm: &SourceMap, span: Span) -> bool {
 /// Comments are returned wrapped with their relevant delimiters
 pub fn span_extract_comment(sm: &SourceMap, span: Span) -> String {
    let snippet = sm.span_to_snippet(span).unwrap_or_default();
-    let mut comments_buf: Vec<String> = Vec::new();
-    let mut index: usize = 0;
-
-    for token in tokenize(&snippet) {
-        let token_range = index..(index + token.len as usize);
-        index += token.len as usize;
-        match token.kind {
-            TokenKind::BlockComment { .. } | TokenKind::LineComment { .. } => {
-                if let Some(comment) = snippet.get(token_range) {
-                    comments_buf.push(comment.to_string());
-                }
-            },
-            _ => (),
-        }
-    }
-
-    comments_buf.join("\n")
+    let res = tokenize_with_text(&snippet)
+        .filter(|(t, _)| matches!(t, TokenKind::BlockComment { .. } | TokenKind::LineComment { .. }))
+        .map(|(_, s)| s)
+        .join("\n");
+    res
 }

 pub fn span_find_starting_semi(sm: &SourceMap, span: Span) -> Span {
--- a/clippy_utils/src/source.rs
+++ b/clippy_utils/src/source.rs
@ -2,14 +2,64 @@

 #![allow(clippy::module_name_repetitions)]

+use rustc_data_structures::sync::Lrc;
 use rustc_errors::Applicability;
 use rustc_hir::{Expr, ExprKind};
 use rustc_lint::{LateContext, LintContext};
 use rustc_session::Session;
-use rustc_span::hygiene;
 use rustc_span::source_map::{original_sp, SourceMap};
+use rustc_span::{hygiene, SourceFile};
 use rustc_span::{BytePos, Pos, Span, SpanData, SyntaxContext, DUMMY_SP};
 use std::borrow::Cow;
+use std::ops::Range;
+
+/// A type which can be converted to the range portion of a `Span`.
+pub trait SpanRange {
+    fn into_range(self) -> Range<BytePos>;
+}
+impl SpanRange for Span {
+    fn into_range(self) -> Range<BytePos> {
+        let data = self.data();
+        data.lo..data.hi
+    }
+}
+impl SpanRange for SpanData {
+    fn into_range(self) -> Range<BytePos> {
+        self.lo..self.hi
+    }
+}
+impl SpanRange for Range<BytePos> {
+    fn into_range(self) -> Range<BytePos> {
+        self
+    }
+}
+
+pub struct SourceFileRange {
+    pub sf: Lrc<SourceFile>,
+    pub range: Range<usize>,
+}
+impl SourceFileRange {
+    /// Attempts to get the text from the source file. This can fail if the source text isn't
+    /// loaded.
+    pub fn as_str(&self) -> Option<&str> {
+        self.sf.src.as_ref().and_then(|x| x.get(self.range.clone()))
+    }
+}
+
+/// Gets the source file, and range in the file, of the given span. Returns `None` if the span
+/// extends through multiple files, or is malformed.
+pub fn get_source_text(cx: &impl LintContext, sp: impl SpanRange) -> Option<SourceFileRange> {
+    fn f(sm: &SourceMap, sp: Range<BytePos>) -> Option<SourceFileRange> {
+        let start = sm.lookup_byte_offset(sp.start);
+        let end = sm.lookup_byte_offset(sp.end);
+        if !Lrc::ptr_eq(&start.sf, &end.sf) || start.pos > end.pos {
+            return None;
+        }
+        let range = start.pos.to_usize()..end.pos.to_usize();
+        Some(SourceFileRange { sf: start.sf, range })
+    }
+    f(cx.sess().source_map(), sp.into_range())
+}

 /// Like `snippet_block`, but add braces if the expr is not an `ExprKind::Block`.
 pub fn expr_block<T: LintContext>(
--- a/tests/ui/match_same_arms.rs
+++ b/tests/ui/match_same_arms.rs
@ -53,4 +53,50 @@ mod issue4244 {
    }
 }

-fn main() {}
+macro_rules! m {
+    (foo) => {};
+    (bar) => {};
+}
+
+fn main() {
+    let x = 0;
+    let _ = match 0 {
+        0 => {
+            m!(foo);
+            x
+        },
+        1 => {
+            m!(bar);
+            x
+        },
+        _ => 1,
+    };
+
+    let _ = match 0 {
+        0 => {
+            let mut x = 0;
+            #[cfg(not_enabled)]
+            {
+                x = 5;
+            }
+            #[cfg(not(not_enabled))]
+            {
+                x = 6;
+            }
+            x
+        },
+        1 => {
+            let mut x = 0;
+            #[cfg(also_not_enabled)]
+            {
+                x = 5;
+            }
+            #[cfg(not(also_not_enabled))]
+            {
+                x = 6;
+            }
+            x
+        },
+        _ => 0,
+    };
+}