From 986f40fab0828257016477174f9a6036a4a7f4d3 Mon Sep 17 00:00:00 2001 From: Alex Macleod Date: Thu, 26 Jan 2023 15:30:44 +0000 Subject: [PATCH] `invalid_regex`: Show full error when string value doesn't match source --- clippy_lints/src/regex.rs | 89 +++++++++++++++++++-------------------- tests/ui/regex.rs | 4 ++ tests/ui/regex.stderr | 66 +++++++++++++++++++++-------- 3 files changed, 97 insertions(+), 62 deletions(-) diff --git a/clippy_lints/src/regex.rs b/clippy_lints/src/regex.rs index 1fda58fa5..9e6c6c73d 100644 --- a/clippy_lints/src/regex.rs +++ b/clippy_lints/src/regex.rs @@ -1,5 +1,8 @@ +use std::fmt::Display; + use clippy_utils::consts::{constant, Constant}; use clippy_utils::diagnostics::{span_lint, span_lint_and_help}; +use clippy_utils::source::snippet_opt; use clippy_utils::{match_def_path, paths}; use if_chain::if_chain; use rustc_ast::ast::{LitKind, StrStyle}; @@ -77,13 +80,45 @@ impl<'tcx> LateLintPass<'tcx> for Regex { } } -#[must_use] -fn str_span(base: Span, c: regex_syntax::ast::Span, offset: u8) -> Span { - let offset = u32::from(offset); - let end = base.lo() + BytePos(u32::try_from(c.end.offset).expect("offset too large") + offset); - let start = base.lo() + BytePos(u32::try_from(c.start.offset).expect("offset too large") + offset); - assert!(start <= end); - Span::new(start, end, base.ctxt(), base.parent()) +fn lint_syntax_error(cx: &LateContext<'_>, error: ®ex_syntax::Error, unescaped: &str, base: Span, offset: u8) { + let parts: Option<(_, _, &dyn Display)> = match &error { + regex_syntax::Error::Parse(e) => Some((e.span(), e.auxiliary_span(), e.kind())), + regex_syntax::Error::Translate(e) => Some((e.span(), None, e.kind())), + _ => None, + }; + + let convert_span = |regex_span: ®ex_syntax::ast::Span| { + let offset = u32::from(offset); + let start = base.lo() + BytePos(u32::try_from(regex_span.start.offset).expect("offset too large") + offset); + let end = base.lo() + BytePos(u32::try_from(regex_span.end.offset).expect("offset too large") + offset); + + Span::new(start, end, base.ctxt(), base.parent()) + }; + + if let Some((primary, auxiliary, kind)) = parts + && let Some(literal_snippet) = snippet_opt(cx, base) + && let Some(inner) = literal_snippet.get(offset as usize..) + // Only convert to native rustc spans if the parsed regex matches the + // source snippet exactly, to ensure the span offsets are correct + && inner.get(..unescaped.len()) == Some(unescaped) + { + let spans = if let Some(auxiliary) = auxiliary { + vec![convert_span(primary), convert_span(auxiliary)] + } else { + vec![convert_span(primary)] + }; + + span_lint(cx, INVALID_REGEX, spans, &format!("regex syntax error: {kind}")); + } else { + span_lint_and_help( + cx, + INVALID_REGEX, + base, + &error.to_string(), + None, + "consider using a raw string literal: `r\"..\"`", + ); + } } fn const_str<'tcx>(cx: &LateContext<'tcx>, e: &'tcx Expr<'_>) -> Option { @@ -155,25 +190,7 @@ fn check_regex<'tcx>(cx: &LateContext<'tcx>, expr: &'tcx Expr<'_>, utf8: bool) { span_lint_and_help(cx, TRIVIAL_REGEX, expr.span, "trivial regex", None, repl); } }, - Err(regex_syntax::Error::Parse(e)) => { - span_lint( - cx, - INVALID_REGEX, - str_span(expr.span, *e.span(), offset), - &format!("regex syntax error: {}", e.kind()), - ); - }, - Err(regex_syntax::Error::Translate(e)) => { - span_lint( - cx, - INVALID_REGEX, - str_span(expr.span, *e.span(), offset), - &format!("regex syntax error: {}", e.kind()), - ); - }, - Err(e) => { - span_lint(cx, INVALID_REGEX, expr.span, &format!("regex syntax error: {e}")); - }, + Err(e) => lint_syntax_error(cx, &e, r, expr.span, offset), } } } else if let Some(r) = const_str(cx, expr) { @@ -183,25 +200,7 @@ fn check_regex<'tcx>(cx: &LateContext<'tcx>, expr: &'tcx Expr<'_>, utf8: bool) { span_lint_and_help(cx, TRIVIAL_REGEX, expr.span, "trivial regex", None, repl); } }, - Err(regex_syntax::Error::Parse(e)) => { - span_lint( - cx, - INVALID_REGEX, - expr.span, - &format!("regex syntax error on position {}: {}", e.span().start.offset, e.kind()), - ); - }, - Err(regex_syntax::Error::Translate(e)) => { - span_lint( - cx, - INVALID_REGEX, - expr.span, - &format!("regex syntax error on position {}: {}", e.span().start.offset, e.kind()), - ); - }, - Err(e) => { - span_lint(cx, INVALID_REGEX, expr.span, &format!("regex syntax error: {e}")); - }, + Err(e) => span_lint(cx, INVALID_REGEX, expr.span, &e.to_string()), } } } diff --git a/tests/ui/regex.rs b/tests/ui/regex.rs index f0e1a8128..ab8ac97a0 100644 --- a/tests/ui/regex.rs +++ b/tests/ui/regex.rs @@ -36,6 +36,10 @@ fn syntax_error() { let raw_string_error = Regex::new(r"[...\/...]"); let raw_string_error = Regex::new(r#"[...\/...]"#); + + let escaped_string_span = Regex::new("\\b\\c"); + + let aux_span = Regex::new("(?ixi)"); } fn trivial_regex() { diff --git a/tests/ui/regex.stderr b/tests/ui/regex.stderr index 2424644c6..c2440f39e 100644 --- a/tests/ui/regex.stderr +++ b/tests/ui/regex.stderr @@ -29,7 +29,10 @@ error: regex syntax error: invalid character class range, the start must be <= t LL | let some_unicode = Regex::new("[é-è]"); | ^^^ -error: regex syntax error on position 0: unclosed group +error: regex parse error: + ( + ^ + error: unclosed group --> $DIR/regex.rs:18:33 | LL | let some_regex = Regex::new(OPENING_PAREN); @@ -43,25 +46,37 @@ LL | let binary_pipe_in_wrong_position = BRegex::new("|"); | = help: the regex is unlikely to be useful as it is -error: regex syntax error on position 0: unclosed group +error: regex parse error: + ( + ^ + error: unclosed group --> $DIR/regex.rs:21:41 | LL | let some_binary_regex = BRegex::new(OPENING_PAREN); | ^^^^^^^^^^^^^ -error: regex syntax error on position 0: unclosed group +error: regex parse error: + ( + ^ + error: unclosed group --> $DIR/regex.rs:22:56 | LL | let some_binary_regex_builder = BRegexBuilder::new(OPENING_PAREN); | ^^^^^^^^^^^^^ -error: regex syntax error on position 0: unclosed group +error: regex parse error: + ( + ^ + error: unclosed group --> $DIR/regex.rs:34:37 | LL | let set_error = RegexSet::new(&[OPENING_PAREN, r"[a-z]+/.(com|org|net)"]); | ^^^^^^^^^^^^^ -error: regex syntax error on position 0: unclosed group +error: regex parse error: + ( + ^ + error: unclosed group --> $DIR/regex.rs:35:39 | LL | let bset_error = BRegexSet::new(&[OPENING_PAREN, r"[a-z]+/.(com|org|net)"]); @@ -79,8 +94,25 @@ error: regex syntax error: unrecognized escape sequence LL | let raw_string_error = Regex::new(r#"[...//...]"#); | ^^ +error: regex parse error: + /b/c + ^^ + error: unrecognized escape sequence + --> $DIR/regex.rs:40:42 + | +LL | let escaped_string_span = Regex::new("/b/c"); + | ^^^^^^^^ + | + = help: consider using a raw string literal: `r".."` + +error: regex syntax error: duplicate flag + --> $DIR/regex.rs:42:34 + | +LL | let aux_span = Regex::new("(?ixi)"); + | ^ ^ + error: trivial regex - --> $DIR/regex.rs:42:33 + --> $DIR/regex.rs:46:33 | LL | let trivial_eq = Regex::new("^foobar$"); | ^^^^^^^^^^ @@ -88,7 +120,7 @@ LL | let trivial_eq = Regex::new("^foobar$"); = help: consider using `==` on `str`s error: trivial regex - --> $DIR/regex.rs:44:48 + --> $DIR/regex.rs:48:48 | LL | let trivial_eq_builder = RegexBuilder::new("^foobar$"); | ^^^^^^^^^^ @@ -96,7 +128,7 @@ LL | let trivial_eq_builder = RegexBuilder::new("^foobar$"); = help: consider using `==` on `str`s error: trivial regex - --> $DIR/regex.rs:46:42 + --> $DIR/regex.rs:50:42 | LL | let trivial_starts_with = Regex::new("^foobar"); | ^^^^^^^^^ @@ -104,7 +136,7 @@ LL | let trivial_starts_with = Regex::new("^foobar"); = help: consider using `str::starts_with` error: trivial regex - --> $DIR/regex.rs:48:40 + --> $DIR/regex.rs:52:40 | LL | let trivial_ends_with = Regex::new("foobar$"); | ^^^^^^^^^ @@ -112,7 +144,7 @@ LL | let trivial_ends_with = Regex::new("foobar$"); = help: consider using `str::ends_with` error: trivial regex - --> $DIR/regex.rs:50:39 + --> $DIR/regex.rs:54:39 | LL | let trivial_contains = Regex::new("foobar"); | ^^^^^^^^ @@ -120,7 +152,7 @@ LL | let trivial_contains = Regex::new("foobar"); = help: consider using `str::contains` error: trivial regex - --> $DIR/regex.rs:52:39 + --> $DIR/regex.rs:56:39 | LL | let trivial_contains = Regex::new(NOT_A_REAL_REGEX); | ^^^^^^^^^^^^^^^^ @@ -128,7 +160,7 @@ LL | let trivial_contains = Regex::new(NOT_A_REAL_REGEX); = help: consider using `str::contains` error: trivial regex - --> $DIR/regex.rs:54:40 + --> $DIR/regex.rs:58:40 | LL | let trivial_backslash = Regex::new("a/.b"); | ^^^^^^^ @@ -136,7 +168,7 @@ LL | let trivial_backslash = Regex::new("a/.b"); = help: consider using `str::contains` error: trivial regex - --> $DIR/regex.rs:57:36 + --> $DIR/regex.rs:61:36 | LL | let trivial_empty = Regex::new(""); | ^^ @@ -144,7 +176,7 @@ LL | let trivial_empty = Regex::new(""); = help: the regex is unlikely to be useful as it is error: trivial regex - --> $DIR/regex.rs:59:36 + --> $DIR/regex.rs:63:36 | LL | let trivial_empty = Regex::new("^"); | ^^^ @@ -152,7 +184,7 @@ LL | let trivial_empty = Regex::new("^"); = help: the regex is unlikely to be useful as it is error: trivial regex - --> $DIR/regex.rs:61:36 + --> $DIR/regex.rs:65:36 | LL | let trivial_empty = Regex::new("^$"); | ^^^^ @@ -160,12 +192,12 @@ LL | let trivial_empty = Regex::new("^$"); = help: consider using `str::is_empty` error: trivial regex - --> $DIR/regex.rs:63:44 + --> $DIR/regex.rs:67:44 | LL | let binary_trivial_empty = BRegex::new("^$"); | ^^^^ | = help: consider using `str::is_empty` -error: aborting due to 23 previous errors +error: aborting due to 25 previous errors