rust-clippy/clippy_lints/src/tabs_in_doc_comments.rs

use clippy_utils::diagnostics::span_lint_and_sugg;
use rustc_ast::ast;
use rustc_errors::Applicability;
use rustc_lint::{EarlyContext, EarlyLintPass};
use rustc_session::{declare_lint_pass, declare_tool_lint};
use rustc_span::source_map::{BytePos, Span};

declare_clippy_lint! {
    /// ### What it does
    /// Checks doc comments for usage of tab characters.
    ///
    /// ### Why is this bad?
    /// The rust style-guide promotes spaces instead of tabs for indentation.
    /// To keep a consistent view on the source, also doc comments should not have tabs.
    /// Also, explaining ascii-diagrams containing tabs can get displayed incorrectly when the
    /// display settings of the author and reader differ.
    ///
    /// ### Example
    /// ```rust
    /// ///
    /// /// Struct to hold two strings:
    /// /// 	- first		one
    /// /// 	- second	one
    /// pub struct DoubleString {
    ///    ///
    ///    /// 	- First String:
    ///    /// 		- needs to be inside here
    ///    first_string: String,
    ///    ///
    ///    /// 	- Second String:
    ///    /// 		- needs to be inside here
    ///    second_string: String,
    ///}
    /// ```
    ///
    /// Will be converted to:
    /// ```rust
    /// ///
    /// /// Struct to hold two strings:
    /// ///     - first        one
    /// ///     - second    one
    /// pub struct DoubleString {
    ///    ///
    ///    ///     - First String:
    ///    ///         - needs to be inside here
    ///    first_string: String,
    ///    ///
    ///    ///     - Second String:
    ///    ///         - needs to be inside here
    ///    second_string: String,
    ///}
    /// ```
    #[clippy::version = "1.41.0"]
    pub TABS_IN_DOC_COMMENTS,
    style,
    "using tabs in doc comments is not recommended"
}

declare_lint_pass!(TabsInDocComments => [TABS_IN_DOC_COMMENTS]);

impl TabsInDocComments {
    fn warn_if_tabs_in_doc(cx: &EarlyContext<'_>, attr: &ast::Attribute) {
        if let ast::AttrKind::DocComment(_, comment) = attr.kind {
            let comment = comment.as_str();

            for (lo, hi) in get_chunks_of_tabs(comment) {
                // +3 skips the opening delimiter
                let new_span = Span::new(
                    attr.span.lo() + BytePos(3 + lo),
                    attr.span.lo() + BytePos(3 + hi),
                    attr.span.ctxt(),
                    attr.span.parent(),
                );
                span_lint_and_sugg(
                    cx,
                    TABS_IN_DOC_COMMENTS,
                    new_span,
                    "using tabs in doc comments is not recommended",
                    "consider using four spaces per tab",
                    "    ".repeat((hi - lo) as usize),
                    Applicability::MaybeIncorrect,
                );
            }
        }
    }
}

impl EarlyLintPass for TabsInDocComments {
    fn check_attribute(&mut self, cx: &EarlyContext<'_>, attribute: &ast::Attribute) {
        Self::warn_if_tabs_in_doc(cx, attribute);
    }
}

///
/// scans the string for groups of tabs and returns the start(inclusive) and end positions
/// (exclusive) of all groups
/// e.g. "sd\tasd\t\taa" will be converted to [(2, 3), (6, 8)] as
///       012 3456 7 89
///         ^-^  ^---^
fn get_chunks_of_tabs(the_str: &str) -> Vec<(u32, u32)> {
    let line_length_way_to_long = "doc comment longer than 2^32 chars";
    let mut spans: Vec<(u32, u32)> = vec![];
    let mut current_start: u32 = 0;

    // tracker to decide if the last group of tabs is not closed by a non-tab character
    let mut is_active = false;

    // Note that we specifically need the char _byte_ indices here, not the positional indexes
    // within the char array to deal with multi-byte characters properly. `char_indices` does
    // exactly that. It provides an iterator over tuples of the form `(byte position, char)`.
    let char_indices: Vec<_> = the_str.char_indices().collect();

    if let [(_, '\t')] = char_indices.as_slice() {
        return vec![(0, 1)];
    }

    for entry in char_indices.windows(2) {
        match entry {
            [(_, '\t'), (_, '\t')] => {
                // either string starts with double tab, then we have to set it active,
                // otherwise is_active is true anyway
                is_active = true;
            },
            [(_, _), (index_b, '\t')] => {
                // as ['\t', '\t'] is excluded, this has to be a start of a tab group,
                // set indices accordingly
                is_active = true;
                current_start = u32::try_from(*index_b).unwrap();
            },
            [(_, '\t'), (index_b, _)] => {
                // this now has to be an end of the group, hence we have to push a new tuple
                is_active = false;
                spans.push((current_start, u32::try_from(*index_b).unwrap()));
            },
            _ => {},
        }
    }

    // only possible when tabs are at the end, insert last group
    if is_active {
        spans.push((
            current_start,
            u32::try_from(char_indices.last().unwrap().0 + 1).expect(line_length_way_to_long),
        ));
    }

    spans
}

#[cfg(test)]
mod tests_for_get_chunks_of_tabs {
    use super::get_chunks_of_tabs;

    #[test]
    fn test_unicode_han_string() {
        let res = get_chunks_of_tabs(" \u{4f4d}\t");

        assert_eq!(res, vec![(4, 5)]);
    }

    #[test]
    fn test_empty_string() {
        let res = get_chunks_of_tabs("");

        assert_eq!(res, vec![]);
    }

    #[test]
    fn test_simple() {
        let res = get_chunks_of_tabs("sd\t\t\taa");

        assert_eq!(res, vec![(2, 5)]);
    }

    #[test]
    fn test_only_t() {
        let res = get_chunks_of_tabs("\t\t");

        assert_eq!(res, vec![(0, 2)]);
    }

    #[test]
    fn test_only_one_t() {
        let res = get_chunks_of_tabs("\t");

        assert_eq!(res, vec![(0, 1)]);
    }

    #[test]
    fn test_double() {
        let res = get_chunks_of_tabs("sd\tasd\t\taa");

        assert_eq!(res, vec![(2, 3), (6, 8)]);
    }

    #[test]
    fn test_start() {
        let res = get_chunks_of_tabs("\t\taa");

        assert_eq!(res, vec![(0, 2)]);
    }

    #[test]
    fn test_end() {
        let res = get_chunks_of_tabs("aa\t\t");

        assert_eq!(res, vec![(2, 4)]);
    }

    #[test]
    fn test_start_single() {
        let res = get_chunks_of_tabs("\taa");

        assert_eq!(res, vec![(0, 1)]);
    }

    #[test]
    fn test_end_single() {
        let res = get_chunks_of_tabs("aa\t");

        assert_eq!(res, vec![(2, 3)]);
    }

    #[test]
    fn test_no_tabs() {
        let res = get_chunks_of_tabs("dsfs");

        assert_eq!(res, vec![]);
    }
}
Merge commit '0e87918536b9833bbc6c683d1f9d51ee2bf03ef1' into clippyup 2021-03-25 18:29:11 +00:00			`use clippy_utils::diagnostics::span_lint_and_sugg;`
Rustup to rust-lang/rust#69592 2020-03-01 03:23:33 +00:00			`use rustc_ast::ast;`
add new lint tabs in doc comments 2019-11-15 15:18:08 +00:00			`use rustc_errors::Applicability;`
Rustup to rust-lang/rust#68045 2020-01-12 06:08:41 +00:00			`use rustc_lint::{EarlyContext, EarlyLintPass};`
Rustup to rust-lang/rust#67806 2020-01-11 11:37:08 +00:00			`use rustc_session::{declare_lint_pass, declare_tool_lint};`
Rustup to https://github.com/rust-lang/rust/pull/67853 Specifically caused by https://github.com/rust-lang/rust/pull/67786 2020-01-04 10:00:00 +00:00			`use rustc_span::source_map::{BytePos, Span};`
add new lint tabs in doc comments 2019-11-15 15:18:08 +00:00
			`declare_clippy_lint! {`
Merge commit '0cce3f643bfcbb92d5a1bb71858c9cbaff749d6b' into clippyup 2021-07-29 10:16:06 +00:00			`/// ### What it does`
			`/// Checks doc comments for usage of tab characters.`
add new lint tabs in doc comments 2019-11-15 15:18:08 +00:00			`///`
Merge commit '0cce3f643bfcbb92d5a1bb71858c9cbaff749d6b' into clippyup 2021-07-29 10:16:06 +00:00			`/// ### Why is this bad?`
			`/// The rust style-guide promotes spaces instead of tabs for indentation.`
add new lint tabs in doc comments 2019-11-15 15:18:08 +00:00			`/// To keep a consistent view on the source, also doc comments should not have tabs.`
			`/// Also, explaining ascii-diagrams containing tabs can get displayed incorrectly when the`
			`/// display settings of the author and reader differ.`
			`///`
Merge commit '0cce3f643bfcbb92d5a1bb71858c9cbaff749d6b' into clippyup 2021-07-29 10:16:06 +00:00			`/// ### Example`
add new lint tabs in doc comments 2019-11-15 15:18:08 +00:00			/// ```rust
			`/// ///`
			`/// /// Struct to hold two strings:`
			`/// /// - first one`
			`/// /// - second one`
			`/// pub struct DoubleString {`
			`/// ///`
			`/// /// - First String:`
			`/// /// - needs to be inside here`
			`/// first_string: String,`
			`/// ///`
			`/// /// - Second String:`
			`/// /// - needs to be inside here`
			`/// second_string: String,`
			`///}`
			/// ```
			`///`
			`/// Will be converted to:`
Normalize lint messages 2020-01-06 06:30:43 +00:00			/// ```rust
add new lint tabs in doc comments 2019-11-15 15:18:08 +00:00			`/// ///`
			`/// /// Struct to hold two strings:`
			`/// /// - first one`
			`/// /// - second one`
			`/// pub struct DoubleString {`
			`/// ///`
			`/// /// - First String:`
			`/// /// - needs to be inside here`
			`/// first_string: String,`
			`/// ///`
			`/// /// - Second String:`
			`/// /// - needs to be inside here`
			`/// second_string: String,`
			`///}`
			/// ```
Merge commit 'a5d597637dcb78dc73f93561ce474f23d4177c35' into clippyup 2021-12-06 11:33:31 +00:00			`#[clippy::version = "1.41.0"]`
add new lint tabs in doc comments 2019-11-15 15:18:08 +00:00			`pub TABS_IN_DOC_COMMENTS,`
			`style,`
			`"using tabs in doc comments is not recommended"`
			`}`

			`declare_lint_pass!(TabsInDocComments => [TABS_IN_DOC_COMMENTS]);`

			`impl TabsInDocComments {`
			`fn warn_if_tabs_in_doc(cx: &EarlyContext<'_>, attr: &ast::Attribute) {`
Fix clippy 2020-07-22 14:59:17 +00:00			`if let ast::AttrKind::DocComment(_, comment) = attr.kind {`
add new lint tabs in doc comments 2019-11-15 15:18:08 +00:00			`let comment = comment.as_str();`

Merge remote-tracking branch 'upstream/master' into rustup 2021-12-30 13:04:13 +00:00			`for (lo, hi) in get_chunks_of_tabs(comment) {`
Add some comments for magic numbers + Add tests 2020-08-04 21:26:23 +00:00			`// +3 skips the opening delimiter`
add new lint tabs in doc comments 2019-11-15 15:18:08 +00:00			`let new_span = Span::new(`
Fix clippy 2020-07-22 14:59:17 +00:00			`attr.span.lo() + BytePos(3 + lo),`
			`attr.span.lo() + BytePos(3 + hi),`
add new lint tabs in doc comments 2019-11-15 15:18:08 +00:00			`attr.span.ctxt(),`
Keep a parent LocalDefId in SpanData. 2021-04-18 12:27:04 +00:00			`attr.span.parent(),`
add new lint tabs in doc comments 2019-11-15 15:18:08 +00:00			`);`
			`span_lint_and_sugg(`
			`cx,`
			`TABS_IN_DOC_COMMENTS,`
			`new_span,`
			`"using tabs in doc comments is not recommended",`
			`"consider using four spaces per tab",`
			`" ".repeat((hi - lo) as usize),`
			`Applicability::MaybeIncorrect,`
			`);`
			`}`
			`}`
			`}`
			`}`

			`impl EarlyLintPass for TabsInDocComments {`
			`fn check_attribute(&mut self, cx: &EarlyContext<'_>, attribute: &ast::Attribute) {`
Merge commit 'b40ea209e7f14c8193ddfc98143967b6a2f4f5c9' into clippyup 2021-04-08 15:50:13 +00:00			`Self::warn_if_tabs_in_doc(cx, attribute);`
add new lint tabs in doc comments 2019-11-15 15:18:08 +00:00			`}`
			`}`

			`///`
			`/// scans the string for groups of tabs and returns the start(inclusive) and end positions`
			`/// (exclusive) of all groups`
			`/// e.g. "sd\tasd\t\taa" will be converted to [(2, 3), (6, 8)] as`
			`/// 012 3456 7 89`
			`/// ^-^ ^---^`
			`fn get_chunks_of_tabs(the_str: &str) -> Vec<(u32, u32)> {`
			`let line_length_way_to_long = "doc comment longer than 2^32 chars";`
			`let mut spans: Vec<(u32, u32)> = vec![];`
			`let mut current_start: u32 = 0;`

			`// tracker to decide if the last group of tabs is not closed by a non-tab character`
			`let mut is_active = false;`

Merge commit '98e2b9f25b6db4b2680a3d388456d9f95cb28344' into clippyup 2021-04-22 09:31:13 +00:00			`// Note that we specifically need the char _byte_ indices here, not the positional indexes`
			// within the char array to deal with multi-byte characters properly. `char_indices` does
			// exactly that. It provides an iterator over tuples of the form `(byte position, char)`.
			`let char_indices: Vec<_> = the_str.char_indices().collect();`
add new lint tabs in doc comments 2019-11-15 15:18:08 +00:00
Merge commit '98e2b9f25b6db4b2680a3d388456d9f95cb28344' into clippyup 2021-04-22 09:31:13 +00:00			`if let [(_, '\t')] = char_indices.as_slice() {`
add new lint tabs in doc comments 2019-11-15 15:18:08 +00:00			`return vec![(0, 1)];`
			`}`

Merge commit '98e2b9f25b6db4b2680a3d388456d9f95cb28344' into clippyup 2021-04-22 09:31:13 +00:00			`for entry in char_indices.windows(2) {`
			`match entry {`
			`[(_, '\t'), (_, '\t')] => {`
add new lint tabs in doc comments 2019-11-15 15:18:08 +00:00			`// either string starts with double tab, then we have to set it active,`
			`// otherwise is_active is true anyway`
			`is_active = true;`
			`},`
Merge commit '98e2b9f25b6db4b2680a3d388456d9f95cb28344' into clippyup 2021-04-22 09:31:13 +00:00			`[(_, _), (index_b, '\t')] => {`
add new lint tabs in doc comments 2019-11-15 15:18:08 +00:00			`// as ['\t', '\t'] is excluded, this has to be a start of a tab group,`
			`// set indices accordingly`
			`is_active = true;`
Merge commit '98e2b9f25b6db4b2680a3d388456d9f95cb28344' into clippyup 2021-04-22 09:31:13 +00:00			`current_start = u32::try_from(*index_b).unwrap();`
add new lint tabs in doc comments 2019-11-15 15:18:08 +00:00			`},`
Merge commit '98e2b9f25b6db4b2680a3d388456d9f95cb28344' into clippyup 2021-04-22 09:31:13 +00:00			`[(_, '\t'), (index_b, _)] => {`
add new lint tabs in doc comments 2019-11-15 15:18:08 +00:00			`// this now has to be an end of the group, hence we have to push a new tuple`
			`is_active = false;`
Merge commit '98e2b9f25b6db4b2680a3d388456d9f95cb28344' into clippyup 2021-04-22 09:31:13 +00:00			`spans.push((current_start, u32::try_from(*index_b).unwrap()));`
add new lint tabs in doc comments 2019-11-15 15:18:08 +00:00			`},`
			`_ => {},`
			`}`
			`}`

			`// only possible when tabs are at the end, insert last group`
			`if is_active {`
			`spans.push((`
			`current_start,`
Merge commit '98e2b9f25b6db4b2680a3d388456d9f95cb28344' into clippyup 2021-04-22 09:31:13 +00:00			`u32::try_from(char_indices.last().unwrap().0 + 1).expect(line_length_way_to_long),`
add new lint tabs in doc comments 2019-11-15 15:18:08 +00:00			`));`
			`}`

			`spans`
			`}`

			`#[cfg(test)]`
			`mod tests_for_get_chunks_of_tabs {`
			`use super::get_chunks_of_tabs;`

Merge commit '98e2b9f25b6db4b2680a3d388456d9f95cb28344' into clippyup 2021-04-22 09:31:13 +00:00			`#[test]`
			`fn test_unicode_han_string() {`
			`let res = get_chunks_of_tabs(" \u{4f4d}\t");`

			`assert_eq!(res, vec![(4, 5)]);`
			`}`

add new lint tabs in doc comments 2019-11-15 15:18:08 +00:00			`#[test]`
			`fn test_empty_string() {`
			`let res = get_chunks_of_tabs("");`

			`assert_eq!(res, vec![]);`
			`}`

			`#[test]`
			`fn test_simple() {`
			`let res = get_chunks_of_tabs("sd\t\t\taa");`

			`assert_eq!(res, vec![(2, 5)]);`
			`}`

			`#[test]`
			`fn test_only_t() {`
			`let res = get_chunks_of_tabs("\t\t");`

			`assert_eq!(res, vec![(0, 2)]);`
			`}`

			`#[test]`
			`fn test_only_one_t() {`
			`let res = get_chunks_of_tabs("\t");`

			`assert_eq!(res, vec![(0, 1)]);`
			`}`

			`#[test]`
			`fn test_double() {`
			`let res = get_chunks_of_tabs("sd\tasd\t\taa");`

			`assert_eq!(res, vec![(2, 3), (6, 8)]);`
			`}`

			`#[test]`
			`fn test_start() {`
			`let res = get_chunks_of_tabs("\t\taa");`

			`assert_eq!(res, vec![(0, 2)]);`
			`}`

			`#[test]`
			`fn test_end() {`
			`let res = get_chunks_of_tabs("aa\t\t");`

			`assert_eq!(res, vec![(2, 4)]);`
			`}`

			`#[test]`
			`fn test_start_single() {`
			`let res = get_chunks_of_tabs("\taa");`

			`assert_eq!(res, vec![(0, 1)]);`
			`}`

			`#[test]`
			`fn test_end_single() {`
			`let res = get_chunks_of_tabs("aa\t");`

			`assert_eq!(res, vec![(2, 3)]);`
			`}`

			`#[test]`
			`fn test_no_tabs() {`
			`let res = get_chunks_of_tabs("dsfs");`

			`assert_eq!(res, vec![]);`
			`}`
			`}`