From 4f11f84dee891e80680f47d48cc33a0cb0229080 Mon Sep 17 00:00:00 2001 From: mcarton Date: Wed, 25 May 2016 17:15:19 +0200 Subject: [PATCH] Lint binary regexes --- CHANGELOG.md | 6 +- src/regex.rs | 108 ++++++++++++++++++++++-------------- src/utils/paths.rs | 6 +- tests/compile-fail/regex.rs | 33 ++++++++++- 4 files changed, 109 insertions(+), 44 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a1e7322e..f17b1dd08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,13 @@ # Change Log All notable changes to this project will be documented in this file. +## 0.0.70 — TBD +* [`invalid_regex`] and [`trivial_regex`] can now warn on `RegexSet::new` and + byte regexes + ## 0.0.69 — 2016-05-20 * Rustup to *rustc 1.10.0-nightly (476fe6eef 2016-05-21)* -* `used_underscore_binding` has been made `Allow` temporarily +* [`used_underscore_binding`] has been made `Allow` temporarily ## 0.0.68 — 2016-05-17 * Rustup to *rustc 1.10.0-nightly (cd6a40017 2016-05-16)* diff --git a/src/regex.rs b/src/regex.rs index e1b4237b9..8876a649e 100644 --- a/src/regex.rs +++ b/src/regex.rs @@ -9,7 +9,7 @@ use std::error::Error; use syntax::ast::{LitKind, NodeId}; use syntax::codemap::{Span, BytePos}; use syntax::parse::token::InternedString; -use utils::{is_expn_of, match_path, match_type, paths, span_lint, span_help_and_lint}; +use utils::{is_expn_of, match_def_path, match_type, paths, span_lint, span_help_and_lint}; /// **What it does:** This lint checks `Regex::new(_)` invocations for correct regex syntax. /// @@ -81,7 +81,7 @@ impl LateLintPass for RegexPass { span, "`regex!(_)` found. \ Please use `Regex::new(_)`, which is faster for now."); - self.spans.insert(span); + self.spans.insert(span); } self.last = Some(block.id); }} @@ -96,46 +96,18 @@ impl LateLintPass for RegexPass { fn check_expr(&mut self, cx: &LateContext, expr: &Expr) { if_let_chain!{[ let ExprCall(ref fun, ref args) = expr.node, - let ExprPath(_, ref path) = fun.node, - match_path(path, &paths::REGEX_NEW) && args.len() == 1 + args.len() == 1, + let Some(def) = cx.tcx.def_map.borrow().get(&fun.id), ], { - if let ExprLit(ref lit) = args[0].node { - if let LitKind::Str(ref r, _) = lit.node { - match regex_syntax::Expr::parse(r) { - Ok(r) => { - if let Some(repl) = is_trivial_regex(&r) { - span_help_and_lint(cx, TRIVIAL_REGEX, args[0].span, - "trivial regex", - &format!("consider using {}", repl)); - } - } - Err(e) => { - span_lint(cx, - INVALID_REGEX, - str_span(args[0].span, &r, e.position()), - &format!("regex syntax error: {}", - e.description())); - } - } - } - } else if let Some(r) = const_str(cx, &*args[0]) { - match regex_syntax::Expr::parse(&r) { - Ok(r) => { - if let Some(repl) = is_trivial_regex(&r) { - span_help_and_lint(cx, TRIVIAL_REGEX, args[0].span, - "trivial regex", - &format!("consider using {}", repl)); - } - } - Err(e) => { - span_lint(cx, - INVALID_REGEX, - args[0].span, - &format!("regex syntax error on position {}: {}", - e.position(), - e.description())); - } - } + let def_id = def.def_id(); + if match_def_path(cx, def_id, &paths::REGEX_NEW) { + check_regex(cx, &args[0], true); + } else if match_def_path(cx, def_id, &paths::REGEX_BYTES_NEW) { + check_regex(cx, &args[0], false); + } else if match_def_path(cx, def_id, &paths::REGEX_SET_NEW) { + check_set(cx, &args[0], true); + } else if match_def_path(cx, def_id, &paths::REGEX_BYTES_SET_NEW) { + check_set(cx, &args[0], false); } }} } @@ -193,3 +165,57 @@ fn is_trivial_regex(s: ®ex_syntax::Expr) -> Option<&'static str> { _ => None, } } + +fn check_set(cx: &LateContext, expr: &Expr, utf8: bool) { + if_let_chain! {[ + let ExprAddrOf(_, ref expr) = expr.node, + let ExprVec(ref exprs) = expr.node, + ], { + for expr in exprs { + check_regex(cx, expr, utf8); + } + }} +} + +fn check_regex(cx: &LateContext, expr: &Expr, utf8: bool) { + let builder = regex_syntax::ExprBuilder::new().unicode(utf8); + + if let ExprLit(ref lit) = expr.node { + if let LitKind::Str(ref r, _) = lit.node { + match builder.parse(r) { + Ok(r) => { + if let Some(repl) = is_trivial_regex(&r) { + span_help_and_lint(cx, TRIVIAL_REGEX, expr.span, + "trivial regex", + &format!("consider using {}", repl)); + } + } + Err(e) => { + span_lint(cx, + INVALID_REGEX, + str_span(expr.span, r, e.position()), + &format!("regex syntax error: {}", + e.description())); + } + } + } + } else if let Some(r) = const_str(cx, expr) { + match builder.parse(&r) { + Ok(r) => { + if let Some(repl) = is_trivial_regex(&r) { + span_help_and_lint(cx, TRIVIAL_REGEX, expr.span, + "trivial regex", + &format!("consider using {}", repl)); + } + } + Err(e) => { + span_lint(cx, + INVALID_REGEX, + expr.span, + &format!("regex syntax error on position {}: {}", + e.position(), + e.description())); + } + } + } +} diff --git a/src/utils/paths.rs b/src/utils/paths.rs index 3db1e1c55..b0ce8b4a2 100644 --- a/src/utils/paths.rs +++ b/src/utils/paths.rs @@ -46,7 +46,11 @@ pub const RANGE_TO_INCLUSIVE: [&'static str; 3] = ["core", "ops", "RangeToInclus pub const RANGE_TO_INCLUSIVE_STD: [&'static str; 3] = ["std", "ops", "RangeToInclusive"]; pub const RANGE_TO_STD: [&'static str; 3] = ["std", "ops", "RangeTo"]; pub const REGEX: [&'static str; 3] = ["regex", "re_unicode", "Regex"]; -pub const REGEX_NEW: [&'static str; 3] = ["regex", "Regex", "new"]; +pub const REGEX_BYTES: [&'static str; 3] = ["regex", "re_bytes", "Regex"]; +pub const REGEX_BYTES_NEW: [&'static str; 4] = ["regex", "re_bytes", "Regex", "new"]; +pub const REGEX_BYTES_SET_NEW: [&'static str; 5] = ["regex", "re_set", "bytes", "RegexSet", "new"]; +pub const REGEX_NEW: [&'static str; 4] = ["regex", "re_unicode", "Regex", "new"]; +pub const REGEX_SET_NEW: [&'static str; 5] = ["regex", "re_set", "unicode", "RegexSet", "new"]; pub const RESULT: [&'static str; 3] = ["core", "result", "Result"]; pub const STRING: [&'static str; 3] = ["collections", "string", "String"]; pub const TRANSMUTE: [&'static str; 4] = ["core", "intrinsics", "", "transmute"]; diff --git a/tests/compile-fail/regex.rs b/tests/compile-fail/regex.rs index 9cd2bc809..d9f262fb0 100644 --- a/tests/compile-fail/regex.rs +++ b/tests/compile-fail/regex.rs @@ -6,7 +6,8 @@ extern crate regex; -use regex::Regex; +use regex::{Regex, RegexSet}; +use regex::bytes::{Regex as BRegex, RegexSet as BRegexSet}; const OPENING_PAREN : &'static str = "("; const NOT_A_REAL_REGEX : &'static str = "foobar"; @@ -22,8 +23,33 @@ fn syntax_error() { let some_regex = Regex::new(OPENING_PAREN); //~^ERROR: regex syntax error on position 0: unclosed + let binary_pipe_in_wrong_position = BRegex::new("|"); + //~^ERROR: regex syntax error: empty alternate + let some_binary_regex = BRegex::new(OPENING_PAREN); + //~^ERROR: regex syntax error on position 0: unclosed + let closing_paren = ")"; let not_linted = Regex::new(closing_paren); + + let set = RegexSet::new(&[ + r"[a-z]+@[a-z]+\.(com|org|net)", + r"[a-z]+\.(com|org|net)", + ]); + let bset = BRegexSet::new(&[ + r"[a-z]+@[a-z]+\.(com|org|net)", + r"[a-z]+\.(com|org|net)", + ]); + + let set_error = RegexSet::new(&[ + OPENING_PAREN, + //~^ERROR: regex syntax error on position 0: unclosed + r"[a-z]+\.(com|org|net)", + ]); + let bset_error = BRegexSet::new(&[ + OPENING_PAREN, + //~^ERROR: regex syntax error on position 0: unclosed + r"[a-z]+\.(com|org|net)", + ]); } fn trivial_regex() { @@ -64,12 +90,17 @@ fn trivial_regex() { //~^ERROR: trivial regex //~|HELP consider using `str::is_empty` + let binary_trivial_empty = BRegex::new("^$"); + //~^ERROR: trivial regex + //~|HELP consider using `str::is_empty` + // non-trivial regexes let non_trivial_dot = Regex::new("a.b"); let non_trivial_eq = Regex::new("^foo|bar$"); let non_trivial_starts_with = Regex::new("^foo|bar"); let non_trivial_ends_with = Regex::new("^foo|bar"); let non_trivial_ends_with = Regex::new("foo|bar"); + let non_trivial_binary = BRegex::new("foo|bar"); } fn main() {