From 3d85cc24e7c9a62359ca837570d5e08105d22036 Mon Sep 17 00:00:00 2001 From: llogiq Date: Fri, 5 Feb 2016 00:36:06 +0100 Subject: [PATCH 1/2] new regex syntax lint, fixes #597 --- Cargo.toml | 1 + README.md | 4 ++- src/lib.rs | 8 ++++-- src/regex.rs | 53 +++++++++++++++++++++++++++++++++++++ src/utils.rs | 1 + tests/compile-fail/regex.rs | 16 +++++++++++ tests/compile-test.rs | 2 +- 7 files changed, 81 insertions(+), 4 deletions(-) create mode 100644 src/regex.rs create mode 100644 tests/compile-fail/regex.rs diff --git a/Cargo.toml b/Cargo.toml index b74db8f95..c0d600e41 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,7 @@ plugin = true [dependencies] unicode-normalization = "0.1" semver = "0.2.1" +regex-syntax = "0.2.2" [dev-dependencies] compiletest_rs = "0.0.11" diff --git a/README.md b/README.md index 3f99b73c4..05cc5031f 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ A collection of lints to catch common mistakes and improve your Rust code. [Jump to usage instructions](#usage) ##Lints -There are 109 lints included in this crate: +There are 111 lints included in this crate: name | default | meaning ---------------------------------------------------------------------------------------------------------------|---------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ @@ -46,6 +46,7 @@ name [identity_op](https://github.com/Manishearth/rust-clippy/wiki#identity_op) | warn | using identity operations, e.g. `x + 0` or `y / 1` [ineffective_bit_mask](https://github.com/Manishearth/rust-clippy/wiki#ineffective_bit_mask) | warn | expressions where a bit mask will be rendered useless by a comparison, e.g. `(x | 1) > 2` [inline_always](https://github.com/Manishearth/rust-clippy/wiki#inline_always) | warn | `#[inline(always)]` is a bad idea in most cases +[invalid_regex](https://github.com/Manishearth/rust-clippy/wiki#invalid_regex) | deny | finds invalid regular expressions in `Regex::new(_)` invocations [items_after_statements](https://github.com/Manishearth/rust-clippy/wiki#items_after_statements) | warn | finds blocks where an item comes after a statement [iter_next_loop](https://github.com/Manishearth/rust-clippy/wiki#iter_next_loop) | warn | for-looping over `_.next()` which is probably not intended [len_without_is_empty](https://github.com/Manishearth/rust-clippy/wiki#len_without_is_empty) | warn | traits and impls that have `.len()` but not `.is_empty()` @@ -85,6 +86,7 @@ name [range_zip_with_len](https://github.com/Manishearth/rust-clippy/wiki#range_zip_with_len) | warn | zipping iterator with a range when enumerate() would do [redundant_closure](https://github.com/Manishearth/rust-clippy/wiki#redundant_closure) | warn | using redundant closures, i.e. `|a| foo(a)` (which can be written as just `foo`) [redundant_pattern](https://github.com/Manishearth/rust-clippy/wiki#redundant_pattern) | warn | using `name @ _` in a pattern +[regex_macro](https://github.com/Manishearth/rust-clippy/wiki#regex_macro) | allow | finds use of `regex!(_)`, suggests `Regex::new(_)` instead [result_unwrap_used](https://github.com/Manishearth/rust-clippy/wiki#result_unwrap_used) | allow | using `Result.unwrap()`, which might be better handled [reverse_range_loop](https://github.com/Manishearth/rust-clippy/wiki#reverse_range_loop) | warn | Iterating over an empty range, such as `10..0` or `5..5` [search_is_some](https://github.com/Manishearth/rust-clippy/wiki#search_is_some) | warn | using an iterator search followed by `is_some()`, which is more succinctly expressed as a call to `any()` diff --git a/src/lib.rs b/src/lib.rs index 44dd9fa8c..87d23c96d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,6 +28,9 @@ extern crate unicode_normalization; // for semver check in attrs.rs extern crate semver; +// for regex checking +extern crate regex_syntax; + extern crate rustc_plugin; use rustc_plugin::Registry; @@ -82,6 +85,7 @@ pub mod derive; pub mod print; pub mod vec; pub mod drop_ref; +pub mod regex; mod reexport { pub use syntax::ast::{Name, NodeId}; @@ -150,7 +154,7 @@ pub fn plugin_registrar(reg: &mut Registry) { reg.register_late_lint_pass(box vec::UselessVec); reg.register_late_lint_pass(box drop_ref::DropRefPass); reg.register_late_lint_pass(box types::AbsurdUnsignedComparisons); - + reg.register_late_lint_pass(box regex::RegexPass); reg.register_lint_group("clippy_pedantic", vec![ matches::SINGLE_MATCH_ELSE, @@ -163,7 +167,6 @@ pub fn plugin_registrar(reg: &mut Registry) { shadow::SHADOW_REUSE, shadow::SHADOW_SAME, shadow::SHADOW_UNRELATED, - strings::STRING_ADD, strings::STRING_ADD_ASSIGN, types::CAST_POSSIBLE_TRUNCATION, types::CAST_POSSIBLE_WRAP, @@ -250,6 +253,7 @@ pub fn plugin_registrar(reg: &mut Registry) { ptr_arg::PTR_ARG, ranges::RANGE_STEP_BY_ZERO, ranges::RANGE_ZIP_WITH_LEN, + regex::INVALID_REGEX, returns::LET_AND_RETURN, returns::NEEDLESS_RETURN, strings::STRING_LIT_AS_BYTES, diff --git a/src/regex.rs b/src/regex.rs new file mode 100644 index 000000000..e3363ad1d --- /dev/null +++ b/src/regex.rs @@ -0,0 +1,53 @@ +use regex_syntax; +use std::error::Error; +use syntax::codemap::{Span, BytePos, Pos}; +use rustc_front::hir::*; +use rustc::middle::const_eval::{eval_const_expr_partial, ConstVal}; +use rustc::middle::const_eval::EvalHint::ExprTypeChecked; +use rustc::lint::*; + +use utils::{match_path, REGEX_NEW_PATH, span_lint}; + +/// **What it does:** This lint checks `Regex::new(_)` invocations for correct regex syntax. It is `deny` by default. +/// +/// **Why is this bad?** This will lead to a runtime panic. +/// +/// **Known problems:** None. +/// +/// **Example:** `Regex::new("|")` +declare_lint! { + pub INVALID_REGEX, + Deny, + "finds invalid regular expressions in `Regex::new(_)` invocations" +} + +#[derive(Copy,Clone)] +pub struct RegexPass; + +impl LintPass for RegexPass { + fn get_lints(&self) -> LintArray { + lint_array!(INVALID_REGEX) + } +} + +impl LateLintPass for RegexPass { + fn check_expr(&mut self, cx: &LateContext, expr: &Expr) { + if_let_chain!{[ + let ExprCall(ref fun, ref args) = expr.node, + let ExprPath(_, ref path) = fun.node, + match_path(path, ®EX_NEW_PATH) && args.len() == 1, + let Ok(ConstVal::Str(r)) = eval_const_expr_partial(cx.tcx, + &*args[0], + ExprTypeChecked, + None), + let Err(e) = regex_syntax::Expr::parse(&r) + ], { + let lo = args[0].span.lo + BytePos::from_usize(e.position()); + let span = Span{ lo: lo, hi: lo, expn_id: args[0].span.expn_id }; + span_lint(cx, + INVALID_REGEX, + span, + &format!("Regex syntax error: {}", e.description())); + }} + } +} diff --git a/src/utils.rs b/src/utils.rs index 78fb45cd6..8f542431d 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -36,6 +36,7 @@ pub const LL_PATH: [&'static str; 3] = ["collections", "linked_list", "LinkedLis pub const MUTEX_PATH: [&'static str; 4] = ["std", "sync", "mutex", "Mutex"]; pub const OPEN_OPTIONS_PATH: [&'static str; 3] = ["std", "fs", "OpenOptions"]; pub const OPTION_PATH: [&'static str; 3] = ["core", "option", "Option"]; +pub const REGEX_NEW_PATH: [&'static str; 3] = ["regex", "Regex", "new"]; pub const RESULT_PATH: [&'static str; 3] = ["core", "result", "Result"]; pub const STRING_PATH: [&'static str; 3] = ["collections", "string", "String"]; pub const VEC_FROM_ELEM_PATH: [&'static str; 3] = ["std", "vec", "from_elem"]; diff --git a/tests/compile-fail/regex.rs b/tests/compile-fail/regex.rs new file mode 100644 index 000000000..e2be26a99 --- /dev/null +++ b/tests/compile-fail/regex.rs @@ -0,0 +1,16 @@ +#![feature(plugin)] +#![plugin(clippy)] + +#![allow(unused)] +#![deny(invalid_regex)] + +extern crate regex; + +use regex::Regex; + +fn main() { + let pipe_in_wrong_position = Regex::new("|"); + //~^ERROR: Regex syntax error: empty alternate + let wrong_char_range = Regex::new("[z-a]"); + //~^ERROR: Regex syntax error: invalid character class range +} diff --git a/tests/compile-test.rs b/tests/compile-test.rs index 602937a40..92d2671ea 100644 --- a/tests/compile-test.rs +++ b/tests/compile-test.rs @@ -7,7 +7,7 @@ fn run_mode(mode: &'static str) { let mut config = compiletest::default_config(); let cfg_mode = mode.parse().ok().expect("Invalid mode"); - config.target_rustcflags = Some("-L target/debug/".to_owned()); + config.target_rustcflags = Some("-L target/debug/ -L target/debug/deps".to_owned()); if let Ok(name) = var::<&str>("TESTNAME") { let s : String = name.to_owned(); config.filter = Some(s) From a14514f7c8a9a93ed09f6c1ee3e3a21560ab66b8 Mon Sep 17 00:00:00 2001 From: llogiq Date: Fri, 5 Feb 2016 16:48:35 +0100 Subject: [PATCH 2/2] fixed span position and README --- README.md | 3 +- src/lib.rs | 1 + src/regex.rs | 56 ++++++++++++++++++++++++++++--------- tests/compile-fail/regex.rs | 10 ++++++- 4 files changed, 54 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 05cc5031f..eb843daab 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ A collection of lints to catch common mistakes and improve your Rust code. [Jump to usage instructions](#usage) ##Lints -There are 111 lints included in this crate: +There are 110 lints included in this crate: name | default | meaning ---------------------------------------------------------------------------------------------------------------|---------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ @@ -86,7 +86,6 @@ name [range_zip_with_len](https://github.com/Manishearth/rust-clippy/wiki#range_zip_with_len) | warn | zipping iterator with a range when enumerate() would do [redundant_closure](https://github.com/Manishearth/rust-clippy/wiki#redundant_closure) | warn | using redundant closures, i.e. `|a| foo(a)` (which can be written as just `foo`) [redundant_pattern](https://github.com/Manishearth/rust-clippy/wiki#redundant_pattern) | warn | using `name @ _` in a pattern -[regex_macro](https://github.com/Manishearth/rust-clippy/wiki#regex_macro) | allow | finds use of `regex!(_)`, suggests `Regex::new(_)` instead [result_unwrap_used](https://github.com/Manishearth/rust-clippy/wiki#result_unwrap_used) | allow | using `Result.unwrap()`, which might be better handled [reverse_range_loop](https://github.com/Manishearth/rust-clippy/wiki#reverse_range_loop) | warn | Iterating over an empty range, such as `10..0` or `5..5` [search_is_some](https://github.com/Manishearth/rust-clippy/wiki#search_is_some) | warn | using an iterator search followed by `is_some()`, which is more succinctly expressed as a call to `any()` diff --git a/src/lib.rs b/src/lib.rs index 87d23c96d..a77f6829b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -167,6 +167,7 @@ pub fn plugin_registrar(reg: &mut Registry) { shadow::SHADOW_REUSE, shadow::SHADOW_SAME, shadow::SHADOW_UNRELATED, + strings::STRING_ADD, strings::STRING_ADD_ASSIGN, types::CAST_POSSIBLE_TRUNCATION, types::CAST_POSSIBLE_WRAP, diff --git a/src/regex.rs b/src/regex.rs index e3363ad1d..259bbb1b6 100644 --- a/src/regex.rs +++ b/src/regex.rs @@ -1,6 +1,8 @@ use regex_syntax; use std::error::Error; -use syntax::codemap::{Span, BytePos, Pos}; +use syntax::ast::Lit_::LitStr; +use syntax::codemap::{Span, BytePos}; +use syntax::parse::token::InternedString; use rustc_front::hir::*; use rustc::middle::const_eval::{eval_const_expr_partial, ConstVal}; use rustc::middle::const_eval::EvalHint::ExprTypeChecked; @@ -35,19 +37,47 @@ impl LateLintPass for RegexPass { if_let_chain!{[ let ExprCall(ref fun, ref args) = expr.node, let ExprPath(_, ref path) = fun.node, - match_path(path, ®EX_NEW_PATH) && args.len() == 1, - let Ok(ConstVal::Str(r)) = eval_const_expr_partial(cx.tcx, - &*args[0], - ExprTypeChecked, - None), - let Err(e) = regex_syntax::Expr::parse(&r) + match_path(path, ®EX_NEW_PATH) && args.len() == 1 ], { - let lo = args[0].span.lo + BytePos::from_usize(e.position()); - let span = Span{ lo: lo, hi: lo, expn_id: args[0].span.expn_id }; - span_lint(cx, - INVALID_REGEX, - span, - &format!("Regex syntax error: {}", e.description())); + if let ExprLit(ref lit) = args[0].node { + if let LitStr(ref r, _) = lit.node { + if let Err(e) = regex_syntax::Expr::parse(r) { + span_lint(cx, + INVALID_REGEX, + str_span(args[0].span, &r, e.position()), + &format!("Regex syntax error: {}", + e.description())); + } + } + } else { + if_let_chain!{[ + let Some(r) = const_str(cx, &*args[0]), + let Err(e) = regex_syntax::Expr::parse(&r) + ], { + span_lint(cx, + INVALID_REGEX, + args[0].span, + &format!("Regex syntax error on position {}: {}", + e.position(), + e.description())); + }} + } }} } } + +#[allow(cast_possible_truncation)] +fn str_span(base: Span, s: &str, c: usize) -> Span { + let lo = match s.char_indices().nth(c) { + Some((b, _)) => base.lo + BytePos(b as u32), + _ => base.hi + }; + Span{ lo: lo, hi: lo, ..base } +} + +fn const_str(cx: &LateContext, e: &Expr) -> Option { + match eval_const_expr_partial(cx.tcx, e, ExprTypeChecked, None) { + Ok(ConstVal::Str(r)) => Some(r), + _ => None + } +} diff --git a/tests/compile-fail/regex.rs b/tests/compile-fail/regex.rs index e2be26a99..34dfc1ef2 100644 --- a/tests/compile-fail/regex.rs +++ b/tests/compile-fail/regex.rs @@ -8,9 +8,17 @@ extern crate regex; use regex::Regex; +const OPENING_PAREN : &'static str = "("; + fn main() { let pipe_in_wrong_position = Regex::new("|"); //~^ERROR: Regex syntax error: empty alternate - let wrong_char_range = Regex::new("[z-a]"); + let wrong_char_ranice = Regex::new("[z-a]"); //~^ERROR: Regex syntax error: invalid character class range + + let some_regex = Regex::new(OPENING_PAREN); + //~^ERROR: Regex syntax error on position 0: unclosed + + let closing_paren = ")"; + let not_linted = Regex::new(closing_paren); }