Implement 'disallowed_script_idents' lint

This commit is contained in:
Igor Aleksanov 2021-06-30 19:06:33 +03:00
parent b286b38a29
commit 018be41dee
8 changed files with 152 additions and 2 deletions

View file

@ -2487,6 +2487,7 @@ Released 2018-09-13
[`derive_hash_xor_eq`]: https://rust-lang.github.io/rust-clippy/master/index.html#derive_hash_xor_eq
[`derive_ord_xor_partial_ord`]: https://rust-lang.github.io/rust-clippy/master/index.html#derive_ord_xor_partial_ord
[`disallowed_method`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_method
[`disallowed_script_idents`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_script_idents
[`disallowed_type`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_type
[`diverging_sub_expression`]: https://rust-lang.github.io/rust-clippy/master/index.html#diverging_sub_expression
[`doc_markdown`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_markdown

View file

@ -23,6 +23,7 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = { version = "1.0", optional = true }
toml = "0.5.3"
unicode-normalization = "0.1"
unicode-script = { version = "0.5.3", default-features = false }
semver = "0.11"
rustc-semver = "1.1.0"
# NOTE: cargo requires serde feat in its url dep

View file

@ -0,0 +1,112 @@
use clippy_utils::diagnostics::span_lint;
use rustc_ast::ast;
use rustc_data_structures::fx::FxHashSet;
use rustc_lint::{EarlyContext, EarlyLintPass, Level};
use rustc_session::{declare_tool_lint, impl_lint_pass};
use unicode_script::{Script, UnicodeScript};
declare_clippy_lint! {
/// **What it does:** Checks for usage of unicode scripts other than those explicitly allowed
/// by the lint config.
///
/// This lint doesn't take into account non-text scripts such as `Unknown` and `Linear_A`.
/// It also ignores the `Common` script type.
/// While configuring, be sure to use official script name [aliases] from
/// [the list of supported scripts][supported_scripts].
///
/// See also: [`non_ascii_idents`].
///
/// [aliases]: http://www.unicode.org/reports/tr24/tr24-31.html#Script_Value_Aliases
/// [supported_scripts]: https://www.unicode.org/iso15924/iso15924-codes.html
///
/// **Why is this bad?** It may be not desired to have many different scripts for
/// identifiers in the codebase.
///
/// Note that if you only want to allow plain English, you might want to use
/// built-in [`non_ascii_idents`] lint instead.
///
/// [`non_ascii_idents`]: https://doc.rust-lang.org/rustc/lints/listing/allowed-by-default.html#non-ascii-idents
///
/// **Known problems:** None.
///
/// **Example:**
/// ```rust
/// // Assuming that `clippy.toml` contains the following line:
/// // allowed-locales = ["Latin", "Cyrillic"]
/// let counter = 10; // OK, latin is allowed.
/// let счётчик = 10; // OK, cyrillic is allowed.
/// let zähler = 10; // OK, it's still latin.
/// let カウンタ = 10; // Will spawn the lint.
/// ```
pub DISALLOWED_SCRIPT_IDENTS,
restriction,
"usage of non-allowed Unicode scripts"
}
#[derive(Clone, Debug)]
pub struct DisallowedScriptIdents {
whitelist: FxHashSet<Script>,
}
impl DisallowedScriptIdents {
pub fn new(whitelist: &[String]) -> Self {
let whitelist = whitelist
.iter()
.map(String::as_str)
.filter_map(Script::from_full_name)
.collect();
Self { whitelist }
}
}
impl_lint_pass!(DisallowedScriptIdents => [DISALLOWED_SCRIPT_IDENTS]);
impl EarlyLintPass for DisallowedScriptIdents {
fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
// Implementation is heavily inspired by the implementation of [`non_ascii_idents`] lint:
// https://github.com/rust-lang/rust/blob/master/compiler/rustc_lint/src/non_ascii_idents.rs
let check_disallowed_script_idents = cx.builder.lint_level(DISALLOWED_SCRIPT_IDENTS).0 != Level::Allow;
if !check_disallowed_script_idents {
return;
}
let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock();
// Sort by `Span` so that error messages make sense with respect to the
// order of identifier locations in the code.
let mut symbols: Vec<_> = symbols.iter().collect();
symbols.sort_unstable_by_key(|k| k.1);
for (symbol, &span) in &symbols {
// Note: `symbol.as_str()` is an expensive operation, thus should not be called
// more than once for a single symbol.
let symbol_str = symbol.as_str();
if symbol_str.is_ascii() {
continue;
}
for c in symbol_str.chars() {
// We want to iterate through all the scripts associated with this character
// and check whether at least of one scripts is in the whitelist.
let forbidden_script = c
.script_extension()
.iter()
.find(|script| !self.whitelist.contains(script));
if let Some(script) = forbidden_script {
span_lint(
cx,
DISALLOWED_SCRIPT_IDENTS,
span,
&format!(
"identifier `{}` has a Unicode script that is not allowed by configuration: {}",
symbol_str,
script.full_name()
),
);
// We don't want to spawn warning multiple times over a single identifier.
break;
}
}
}
}
}

View file

@ -187,6 +187,7 @@ mod default_numeric_fallback;
mod dereference;
mod derive;
mod disallowed_method;
mod disallowed_script_idents;
mod disallowed_type;
mod doc;
mod double_comparison;
@ -585,6 +586,7 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
derive::EXPL_IMPL_CLONE_ON_COPY,
derive::UNSAFE_DERIVE_DESERIALIZE,
disallowed_method::DISALLOWED_METHOD,
disallowed_script_idents::DISALLOWED_SCRIPT_IDENTS,
disallowed_type::DISALLOWED_TYPE,
doc::DOC_MARKDOWN,
doc::MISSING_ERRORS_DOC,
@ -995,6 +997,7 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
LintId::of(create_dir::CREATE_DIR),
LintId::of(dbg_macro::DBG_MACRO),
LintId::of(default_numeric_fallback::DEFAULT_NUMERIC_FALLBACK),
LintId::of(disallowed_script_idents::DISALLOWED_SCRIPT_IDENTS),
LintId::of(else_if_without_else::ELSE_IF_WITHOUT_ELSE),
LintId::of(exhaustive_items::EXHAUSTIVE_ENUMS),
LintId::of(exhaustive_items::EXHAUSTIVE_STRUCTS),
@ -2082,7 +2085,8 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
store.register_late_pass(move || box disallowed_type::DisallowedType::new(&disallowed_types));
let import_renames = conf.enforced_import_renames.clone();
store.register_late_pass(move || box missing_enforced_import_rename::ImportRename::new(import_renames.clone()));
let scripts = conf.allowed_scripts.clone();
store.register_early_pass(move || box disallowed_script_idents::DisallowedScriptIdents::new(&scripts));
}
#[rustfmt::skip]

View file

@ -212,6 +212,8 @@ define_Conf! {
(standard_macro_braces: Vec<crate::nonstandard_macro_braces::MacroMatcher> = Vec::new()),
/// Lint: MISSING_ENFORCED_IMPORT_RENAMES. The list of imports to always rename, a fully qualified path followed by the rename.
(enforced_import_renames: Vec<crate::utils::conf::Rename> = Vec::new()),
/// Lint: RESTRICTED_SCRIPTS. The list of unicode scripts allowed to be used in the scope.
(allowed_scripts: Vec<String> = vec!["Latin".to_string()]),
}
/// Search for the configuration file.

View file

@ -1,4 +1,4 @@
error: error reading Clippy's configuration file `$DIR/clippy.toml`: unknown field `foobar`, expected one of `avoid-breaking-exported-api`, `msrv`, `blacklisted-names`, `cognitive-complexity-threshold`, `cyclomatic-complexity-threshold`, `doc-valid-idents`, `too-many-arguments-threshold`, `type-complexity-threshold`, `single-char-binding-names-threshold`, `too-large-for-stack`, `enum-variant-name-threshold`, `enum-variant-size-threshold`, `verbose-bit-mask-threshold`, `literal-representation-threshold`, `trivial-copy-size-limit`, `pass-by-value-size-limit`, `too-many-lines-threshold`, `array-size-threshold`, `vec-box-size-threshold`, `max-trait-bounds`, `max-struct-bools`, `max-fn-params-bools`, `warn-on-all-wildcard-imports`, `disallowed-methods`, `disallowed-types`, `unreadable-literal-lint-fractions`, `upper-case-acronyms-aggressive`, `cargo-ignore-publish`, `standard-macro-braces`, `enforced-import-renames`, `third-party` at line 5 column 1
error: error reading Clippy's configuration file `$DIR/clippy.toml`: unknown field `foobar`, expected one of `avoid-breaking-exported-api`, `msrv`, `blacklisted-names`, `cognitive-complexity-threshold`, `cyclomatic-complexity-threshold`, `doc-valid-idents`, `too-many-arguments-threshold`, `type-complexity-threshold`, `single-char-binding-names-threshold`, `too-large-for-stack`, `enum-variant-name-threshold`, `enum-variant-size-threshold`, `verbose-bit-mask-threshold`, `literal-representation-threshold`, `trivial-copy-size-limit`, `pass-by-value-size-limit`, `too-many-lines-threshold`, `array-size-threshold`, `vec-box-size-threshold`, `max-trait-bounds`, `max-struct-bools`, `max-fn-params-bools`, `warn-on-all-wildcard-imports`, `disallowed-methods`, `disallowed-types`, `unreadable-literal-lint-fractions`, `upper-case-acronyms-aggressive`, `cargo-ignore-publish`, `standard-macro-braces`, `enforced-import-renames`, `allowed-scripts`, `third-party` at line 5 column 1
error: aborting due to previous error

View file

@ -0,0 +1,10 @@
#![deny(clippy::disallowed_script_idents)]
#![allow(dead_code)]
fn main() {
let counter = 10; // OK, latin is allowed.
let zähler = 10; // OK, it's still latin.
let счётчик = 10; // Cyrillic is not allowed by default.
let = 10; // Same for japanese.
}

View file

@ -0,0 +1,20 @@
error: identifier `счётчик` has a Unicode script that is not allowed by configuration: Cyrillic
--> $DIR/disallowed_script_idents.rs:8:9
|
LL | let счётчик = 10; // Cyrillic is not allowed by default.
| ^^^^^^^
|
note: the lint level is defined here
--> $DIR/disallowed_script_idents.rs:1:9
|
LL | #![deny(clippy::disallowed_script_idents)]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
error: identifier `カウンタ` has a Unicode script that is not allowed by configuration: Katakana
--> $DIR/disallowed_script_idents.rs:9:9
|
LL | let カウンタ = 10; // Same for japanese.
| ^^^^^^^^
error: aborting due to 2 previous errors