Lint binary regexes

This commit is contained in:
mcarton 2016-05-25 17:15:19 +02:00
parent 77ed899941
commit 4f11f84dee
4 changed files with 109 additions and 44 deletions

View file

@ -1,9 +1,13 @@
# Change Log
All notable changes to this project will be documented in this file.
## 0.0.70 — TBD
* [`invalid_regex`] and [`trivial_regex`] can now warn on `RegexSet::new` and
byte regexes
## 0.0.69 — 2016-05-20
* Rustup to *rustc 1.10.0-nightly (476fe6eef 2016-05-21)*
* `used_underscore_binding` has been made `Allow` temporarily
* [`used_underscore_binding`] has been made `Allow` temporarily
## 0.0.68 — 2016-05-17
* Rustup to *rustc 1.10.0-nightly (cd6a40017 2016-05-16)*

View file

@ -9,7 +9,7 @@ use std::error::Error;
use syntax::ast::{LitKind, NodeId};
use syntax::codemap::{Span, BytePos};
use syntax::parse::token::InternedString;
use utils::{is_expn_of, match_path, match_type, paths, span_lint, span_help_and_lint};
use utils::{is_expn_of, match_def_path, match_type, paths, span_lint, span_help_and_lint};
/// **What it does:** This lint checks `Regex::new(_)` invocations for correct regex syntax.
///
@ -81,7 +81,7 @@ impl LateLintPass for RegexPass {
span,
"`regex!(_)` found. \
Please use `Regex::new(_)`, which is faster for now.");
self.spans.insert(span);
self.spans.insert(span);
}
self.last = Some(block.id);
}}
@ -96,46 +96,18 @@ impl LateLintPass for RegexPass {
fn check_expr(&mut self, cx: &LateContext, expr: &Expr) {
if_let_chain!{[
let ExprCall(ref fun, ref args) = expr.node,
let ExprPath(_, ref path) = fun.node,
match_path(path, &paths::REGEX_NEW) && args.len() == 1
args.len() == 1,
let Some(def) = cx.tcx.def_map.borrow().get(&fun.id),
], {
if let ExprLit(ref lit) = args[0].node {
if let LitKind::Str(ref r, _) = lit.node {
match regex_syntax::Expr::parse(r) {
Ok(r) => {
if let Some(repl) = is_trivial_regex(&r) {
span_help_and_lint(cx, TRIVIAL_REGEX, args[0].span,
"trivial regex",
&format!("consider using {}", repl));
}
}
Err(e) => {
span_lint(cx,
INVALID_REGEX,
str_span(args[0].span, &r, e.position()),
&format!("regex syntax error: {}",
e.description()));
}
}
}
} else if let Some(r) = const_str(cx, &*args[0]) {
match regex_syntax::Expr::parse(&r) {
Ok(r) => {
if let Some(repl) = is_trivial_regex(&r) {
span_help_and_lint(cx, TRIVIAL_REGEX, args[0].span,
"trivial regex",
&format!("consider using {}", repl));
}
}
Err(e) => {
span_lint(cx,
INVALID_REGEX,
args[0].span,
&format!("regex syntax error on position {}: {}",
e.position(),
e.description()));
}
}
let def_id = def.def_id();
if match_def_path(cx, def_id, &paths::REGEX_NEW) {
check_regex(cx, &args[0], true);
} else if match_def_path(cx, def_id, &paths::REGEX_BYTES_NEW) {
check_regex(cx, &args[0], false);
} else if match_def_path(cx, def_id, &paths::REGEX_SET_NEW) {
check_set(cx, &args[0], true);
} else if match_def_path(cx, def_id, &paths::REGEX_BYTES_SET_NEW) {
check_set(cx, &args[0], false);
}
}}
}
@ -193,3 +165,57 @@ fn is_trivial_regex(s: &regex_syntax::Expr) -> Option<&'static str> {
_ => None,
}
}
fn check_set(cx: &LateContext, expr: &Expr, utf8: bool) {
if_let_chain! {[
let ExprAddrOf(_, ref expr) = expr.node,
let ExprVec(ref exprs) = expr.node,
], {
for expr in exprs {
check_regex(cx, expr, utf8);
}
}}
}
fn check_regex(cx: &LateContext, expr: &Expr, utf8: bool) {
let builder = regex_syntax::ExprBuilder::new().unicode(utf8);
if let ExprLit(ref lit) = expr.node {
if let LitKind::Str(ref r, _) = lit.node {
match builder.parse(r) {
Ok(r) => {
if let Some(repl) = is_trivial_regex(&r) {
span_help_and_lint(cx, TRIVIAL_REGEX, expr.span,
"trivial regex",
&format!("consider using {}", repl));
}
}
Err(e) => {
span_lint(cx,
INVALID_REGEX,
str_span(expr.span, r, e.position()),
&format!("regex syntax error: {}",
e.description()));
}
}
}
} else if let Some(r) = const_str(cx, expr) {
match builder.parse(&r) {
Ok(r) => {
if let Some(repl) = is_trivial_regex(&r) {
span_help_and_lint(cx, TRIVIAL_REGEX, expr.span,
"trivial regex",
&format!("consider using {}", repl));
}
}
Err(e) => {
span_lint(cx,
INVALID_REGEX,
expr.span,
&format!("regex syntax error on position {}: {}",
e.position(),
e.description()));
}
}
}
}

View file

@ -46,7 +46,11 @@ pub const RANGE_TO_INCLUSIVE: [&'static str; 3] = ["core", "ops", "RangeToInclus
pub const RANGE_TO_INCLUSIVE_STD: [&'static str; 3] = ["std", "ops", "RangeToInclusive"];
pub const RANGE_TO_STD: [&'static str; 3] = ["std", "ops", "RangeTo"];
pub const REGEX: [&'static str; 3] = ["regex", "re_unicode", "Regex"];
pub const REGEX_NEW: [&'static str; 3] = ["regex", "Regex", "new"];
pub const REGEX_BYTES: [&'static str; 3] = ["regex", "re_bytes", "Regex"];
pub const REGEX_BYTES_NEW: [&'static str; 4] = ["regex", "re_bytes", "Regex", "new"];
pub const REGEX_BYTES_SET_NEW: [&'static str; 5] = ["regex", "re_set", "bytes", "RegexSet", "new"];
pub const REGEX_NEW: [&'static str; 4] = ["regex", "re_unicode", "Regex", "new"];
pub const REGEX_SET_NEW: [&'static str; 5] = ["regex", "re_set", "unicode", "RegexSet", "new"];
pub const RESULT: [&'static str; 3] = ["core", "result", "Result"];
pub const STRING: [&'static str; 3] = ["collections", "string", "String"];
pub const TRANSMUTE: [&'static str; 4] = ["core", "intrinsics", "", "transmute"];

View file

@ -6,7 +6,8 @@
extern crate regex;
use regex::Regex;
use regex::{Regex, RegexSet};
use regex::bytes::{Regex as BRegex, RegexSet as BRegexSet};
const OPENING_PAREN : &'static str = "(";
const NOT_A_REAL_REGEX : &'static str = "foobar";
@ -22,8 +23,33 @@ fn syntax_error() {
let some_regex = Regex::new(OPENING_PAREN);
//~^ERROR: regex syntax error on position 0: unclosed
let binary_pipe_in_wrong_position = BRegex::new("|");
//~^ERROR: regex syntax error: empty alternate
let some_binary_regex = BRegex::new(OPENING_PAREN);
//~^ERROR: regex syntax error on position 0: unclosed
let closing_paren = ")";
let not_linted = Regex::new(closing_paren);
let set = RegexSet::new(&[
r"[a-z]+@[a-z]+\.(com|org|net)",
r"[a-z]+\.(com|org|net)",
]);
let bset = BRegexSet::new(&[
r"[a-z]+@[a-z]+\.(com|org|net)",
r"[a-z]+\.(com|org|net)",
]);
let set_error = RegexSet::new(&[
OPENING_PAREN,
//~^ERROR: regex syntax error on position 0: unclosed
r"[a-z]+\.(com|org|net)",
]);
let bset_error = BRegexSet::new(&[
OPENING_PAREN,
//~^ERROR: regex syntax error on position 0: unclosed
r"[a-z]+\.(com|org|net)",
]);
}
fn trivial_regex() {
@ -64,12 +90,17 @@ fn trivial_regex() {
//~^ERROR: trivial regex
//~|HELP consider using `str::is_empty`
let binary_trivial_empty = BRegex::new("^$");
//~^ERROR: trivial regex
//~|HELP consider using `str::is_empty`
// non-trivial regexes
let non_trivial_dot = Regex::new("a.b");
let non_trivial_eq = Regex::new("^foo|bar$");
let non_trivial_starts_with = Regex::new("^foo|bar");
let non_trivial_ends_with = Regex::new("^foo|bar");
let non_trivial_ends_with = Regex::new("foo|bar");
let non_trivial_binary = BRegex::new("foo|bar");
}
fn main() {