2016-02-04 23:36:06 +00:00
use regex_syntax ;
2016-04-07 15:46:48 +00:00
use rustc ::hir ::* ;
2016-02-24 16:38:57 +00:00
use rustc ::lint ::* ;
2016-03-31 15:05:43 +00:00
use rustc ::middle ::const_val ::ConstVal ;
use rustc_const_eval ::EvalHint ::ExprTypeChecked ;
use rustc_const_eval ::eval_const_expr_partial ;
2016-02-08 22:48:04 +00:00
use std ::collections ::HashSet ;
2016-02-24 16:38:57 +00:00
use std ::error ::Error ;
2016-02-14 15:55:02 +00:00
use syntax ::ast ::{ LitKind , NodeId } ;
2016-02-05 15:48:35 +00:00
use syntax ::codemap ::{ Span , BytePos } ;
use syntax ::parse ::token ::InternedString ;
2016-05-25 15:15:19 +00:00
use utils ::{ is_expn_of , match_def_path , match_type , paths , span_lint , span_help_and_lint } ;
2016-02-04 23:36:06 +00:00
2016-02-05 23:41:54 +00:00
/// **What it does:** This lint checks `Regex::new(_)` invocations for correct regex syntax.
2016-02-04 23:36:06 +00:00
///
/// **Why is this bad?** This will lead to a runtime panic.
///
/// **Known problems:** None.
///
/// **Example:** `Regex::new("|")`
declare_lint! {
pub INVALID_REGEX ,
Deny ,
" finds invalid regular expressions in `Regex::new(_)` invocations "
}
2016-02-05 22:10:48 +00:00
/// **What it does:** This lint checks for `Regex::new(_)` invocations with trivial regex.
///
/// **Why is this bad?** This can likely be replaced by `==` or `str::starts_with`,
/// `str::ends_with` or `std::contains` or other `str` methods.
///
/// **Known problems:** None.
///
/// **Example:** `Regex::new("^foobar")`
declare_lint! {
pub TRIVIAL_REGEX ,
Warn ,
" finds trivial regular expressions in `Regex::new(_)` invocations "
}
2016-02-07 21:50:54 +00:00
/// **What it does:** This lint checks for usage of `regex!(_)` which as of now is usually slower than `Regex::new(_)` unless called in a loop (which is a bad idea anyway).
///
/// **Why is this bad?** Performance, at least for now. The macro version is likely to catch up long-term, but for now the dynamic version is faster.
///
/// **Known problems:** None
///
/// **Example:** `regex!("foo|bar")`
declare_lint! {
pub REGEX_MACRO ,
2016-02-08 22:48:04 +00:00
Warn ,
2016-02-07 21:50:54 +00:00
" finds use of `regex!(_)`, suggests `Regex::new(_)` instead "
}
2016-02-14 15:55:02 +00:00
#[ derive(Clone, Default) ]
pub struct RegexPass {
spans : HashSet < Span > ,
2016-02-24 16:38:57 +00:00
last : Option < NodeId > ,
2016-02-14 15:55:02 +00:00
}
2016-02-04 23:36:06 +00:00
impl LintPass for RegexPass {
fn get_lints ( & self ) -> LintArray {
2016-02-07 21:50:54 +00:00
lint_array! ( INVALID_REGEX , REGEX_MACRO , TRIVIAL_REGEX )
2016-02-04 23:36:06 +00:00
}
}
impl LateLintPass for RegexPass {
2016-02-14 15:55:02 +00:00
fn check_crate ( & mut self , _ : & LateContext , _ : & Crate ) {
self . spans . clear ( ) ;
2016-02-07 21:50:54 +00:00
}
2016-02-14 15:55:02 +00:00
fn check_block ( & mut self , cx : & LateContext , block : & Block ) {
if_let_chain! { [
self . last . is_none ( ) ,
let Some ( ref expr ) = block . expr ,
2016-04-14 22:09:37 +00:00
match_type ( cx , cx . tcx . expr_ty ( expr ) , & paths ::REGEX ) ,
let Some ( span ) = is_expn_of ( cx , expr . span , " regex " ) ,
2016-02-14 15:55:02 +00:00
] , {
if ! self . spans . contains ( & span ) {
span_lint ( cx ,
REGEX_MACRO ,
span ,
" `regex!(_)` found. \
Please use ` Regex ::new ( _ ) ` , which is faster for now . " );
2016-05-25 15:15:19 +00:00
self . spans . insert ( span ) ;
2016-02-14 15:55:02 +00:00
}
self . last = Some ( block . id ) ;
} }
}
2016-02-24 16:38:57 +00:00
2016-02-14 15:55:02 +00:00
fn check_block_post ( & mut self , _ : & LateContext , block : & Block ) {
if self . last . map_or ( false , | id | block . id = = id ) {
2016-02-24 16:38:57 +00:00
self . last = None ;
2016-02-14 15:55:02 +00:00
}
}
2016-02-07 21:50:54 +00:00
2016-02-04 23:36:06 +00:00
fn check_expr ( & mut self , cx : & LateContext , expr : & Expr ) {
if_let_chain! { [
let ExprCall ( ref fun , ref args ) = expr . node ,
2016-05-25 15:15:19 +00:00
args . len ( ) = = 1 ,
let Some ( def ) = cx . tcx . def_map . borrow ( ) . get ( & fun . id ) ,
2016-02-04 23:36:06 +00:00
] , {
2016-05-25 15:15:19 +00:00
let def_id = def . def_id ( ) ;
if match_def_path ( cx , def_id , & paths ::REGEX_NEW ) {
check_regex ( cx , & args [ 0 ] , true ) ;
} else if match_def_path ( cx , def_id , & paths ::REGEX_BYTES_NEW ) {
check_regex ( cx , & args [ 0 ] , false ) ;
} else if match_def_path ( cx , def_id , & paths ::REGEX_SET_NEW ) {
check_set ( cx , & args [ 0 ] , true ) ;
} else if match_def_path ( cx , def_id , & paths ::REGEX_BYTES_SET_NEW ) {
check_set ( cx , & args [ 0 ] , false ) ;
2016-02-05 15:48:35 +00:00
}
2016-02-04 23:36:06 +00:00
} }
}
}
2016-02-05 15:48:35 +00:00
#[ allow(cast_possible_truncation) ]
fn str_span ( base : Span , s : & str , c : usize ) -> Span {
2016-05-07 22:56:23 +00:00
let mut si = s . char_indices ( ) . skip ( c ) ;
match ( si . next ( ) , si . next ( ) ) {
( Some ( ( l , _ ) ) , Some ( ( h , _ ) ) ) = > {
Span {
lo : base . lo + BytePos ( l as u32 ) ,
hi : base . lo + BytePos ( h as u32 ) ,
.. base
}
}
_ = > base ,
2016-02-29 11:19:32 +00:00
}
2016-02-05 15:48:35 +00:00
}
fn const_str ( cx : & LateContext , e : & Expr ) -> Option < InternedString > {
match eval_const_expr_partial ( cx . tcx , e , ExprTypeChecked , None ) {
Ok ( ConstVal ::Str ( r ) ) = > Some ( r ) ,
2016-02-24 16:38:57 +00:00
_ = > None ,
2016-02-05 15:48:35 +00:00
}
}
2016-02-05 22:10:48 +00:00
2016-02-06 17:06:39 +00:00
fn is_trivial_regex ( s : & regex_syntax ::Expr ) -> Option < & 'static str > {
use regex_syntax ::Expr ;
2016-02-05 22:10:48 +00:00
2016-02-06 17:06:39 +00:00
match * s {
Expr ::Empty | Expr ::StartText | Expr ::EndText = > Some ( " the regex is unlikely to be useful as it is " ) ,
2016-04-14 18:14:03 +00:00
Expr ::Literal { .. } = > Some ( " consider using `str::contains` " ) ,
2016-02-06 17:06:39 +00:00
Expr ::Concat ( ref exprs ) = > {
match exprs . len ( ) {
2016-02-24 16:38:57 +00:00
2 = > {
match ( & exprs [ 0 ] , & exprs [ 1 ] ) {
( & Expr ::StartText , & Expr ::EndText ) = > Some ( " consider using `str::is_empty` " ) ,
2016-04-14 18:14:03 +00:00
( & Expr ::StartText , & Expr ::Literal { .. } ) = > Some ( " consider using `str::starts_with` " ) ,
( & Expr ::Literal { .. } , & Expr ::EndText ) = > Some ( " consider using `str::ends_with` " ) ,
2016-02-24 16:38:57 +00:00
_ = > None ,
}
}
2016-02-06 17:06:39 +00:00
3 = > {
if let ( & Expr ::StartText , & Expr ::Literal { .. } , & Expr ::EndText ) = ( & exprs [ 0 ] , & exprs [ 1 ] , & exprs [ 2 ] ) {
Some ( " consider using `==` on `str`s " )
2016-02-24 16:38:57 +00:00
} else {
2016-02-06 17:06:39 +00:00
None
}
2016-02-24 16:38:57 +00:00
}
2016-02-06 17:06:39 +00:00
_ = > None ,
}
}
_ = > None ,
2016-02-05 22:10:48 +00:00
}
}
2016-05-25 15:15:19 +00:00
fn check_set ( cx : & LateContext , expr : & Expr , utf8 : bool ) {
if_let_chain! { [
let ExprAddrOf ( _ , ref expr ) = expr . node ,
let ExprVec ( ref exprs ) = expr . node ,
] , {
for expr in exprs {
check_regex ( cx , expr , utf8 ) ;
}
} }
}
fn check_regex ( cx : & LateContext , expr : & Expr , utf8 : bool ) {
let builder = regex_syntax ::ExprBuilder ::new ( ) . unicode ( utf8 ) ;
if let ExprLit ( ref lit ) = expr . node {
if let LitKind ::Str ( ref r , _ ) = lit . node {
match builder . parse ( r ) {
Ok ( r ) = > {
if let Some ( repl ) = is_trivial_regex ( & r ) {
span_help_and_lint ( cx , TRIVIAL_REGEX , expr . span ,
" trivial regex " ,
& format! ( " consider using {} " , repl ) ) ;
}
}
Err ( e ) = > {
span_lint ( cx ,
INVALID_REGEX ,
str_span ( expr . span , r , e . position ( ) ) ,
& format! ( " regex syntax error: {} " ,
e . description ( ) ) ) ;
}
}
}
} else if let Some ( r ) = const_str ( cx , expr ) {
match builder . parse ( & r ) {
Ok ( r ) = > {
if let Some ( repl ) = is_trivial_regex ( & r ) {
span_help_and_lint ( cx , TRIVIAL_REGEX , expr . span ,
" trivial regex " ,
& format! ( " consider using {} " , repl ) ) ;
}
}
Err ( e ) = > {
span_lint ( cx ,
INVALID_REGEX ,
expr . span ,
& format! ( " regex syntax error on position {} : {} " ,
e . position ( ) ,
e . description ( ) ) ) ;
}
}
}
}