Merge pull request #962 from Manishearth/fix-doc

Fix DOC_MARKDOWN and multiline links and quotes
2024-11-27 07:00:55 +00:00 · 2016-06-04 09:24:39 +02:00 · 2016-06-04 09:24:39 +02:00 · 66c03f980e
commit 66c03f980e
parent 7944fa811b 97c9930a3f
2 changed files with 182 additions and 90 deletions
--- a/clippy_lints/src/doc.rs
+++ b/clippy_lints/src/doc.rs
@ -51,6 +51,8 @@ impl EarlyLintPass for Doc {
 }

 pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute]) {
+    let mut docs = vec![];
+
    let mut in_multiline = false;
    for attr in attrs {
        if attr.node.is_sugared_doc {
@ -64,39 +66,21 @@ pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [a
                    // check for multiline code blocks
                    if real_doc.trim_left().starts_with("```") {
                        in_multiline = !in_multiline;
-                    }
-                    if !in_multiline {
-                        check_doc(cx, valid_idents, real_doc, span);
+                    } else if !in_multiline {
+                        docs.push((real_doc, span));
                    }
                }
            }
        }
    }
-}

-macro_rules! jump_to {
-    // Get the next character’s first byte UTF-8 friendlyly.
-    (@next_char, $chars: expr, $len: expr) => {{
-        if let Some(&(pos, _)) = $chars.peek() {
-            pos
-        } else {
-            $len
-        }
-    }};
-
-    // Jump to the next `$c`. If no such character is found, give up.
-    ($chars: expr, $c: expr, $len: expr) => {{
-        if $chars.find(|&(_, c)| c == $c).is_some() {
-            jump_to!(@next_char, $chars, $len)
-        }
-        else {
-            return;
-        }
-    }};
+    if !docs.is_empty() {
+        let _ = check_doc(cx, valid_idents, &docs);
+    }
 }

 #[allow(while_let_loop)] // #362
-pub fn check_doc(cx: &EarlyContext, valid_idents: &[String], doc: &str, span: Span) {
+pub fn check_doc(cx: &EarlyContext, valid_idents: &[String], docs: &[(&str, Span)]) -> Result<(), ()> {
    // In markdown, `_` can be used to emphasize something, or, is a raw `_` depending on context.
    // There really is no markdown specification that would disambiguate this properly. This is
    // what GitHub and Rustdoc do:
@ -108,8 +92,8 @@ pub fn check_doc(cx: &EarlyContext, valid_idents: &[String], doc: &str, span: Sp
    // (_baz_)             → (<em>baz</em>)
    // foo _ bar _ baz     → foo _ bar _ baz

-    /// Character that can appear in a word
-    fn is_word_char(c: char) -> bool {
+    /// Character that can appear in a path
+    fn is_path_char(c: char) -> bool {
        match c {
            t if t.is_alphanumeric() => true,
            ':' | '_' => true,
@ -117,81 +101,180 @@ pub fn check_doc(cx: &EarlyContext, valid_idents: &[String], doc: &str, span: Sp
        }
    }

-    #[allow(cast_possible_truncation)]
-    fn word_span(mut span: Span, begin: usize, end: usize) -> Span {
-        debug_assert_eq!(end as u32 as usize, end);
-        debug_assert_eq!(begin as u32 as usize, begin);
-        span.hi = span.lo + BytePos(end as u32);
-        span.lo = span.lo + BytePos(begin as u32);
-        span
+    #[derive(Clone, Debug)]
+    /// This type is used to iterate through the documentation characters, keeping the span at the
+    /// same time.
+    struct Parser<'a> {
+        /// First byte of the current potential match
+        current_word_begin: usize,
+        /// List of lines and their associated span
+        docs: &'a[(&'a str, Span)],
+        /// Index of the current line we are parsing
+        line: usize,
+        /// Whether we are in a link
+        link: bool,
+        /// Whether we are at the beginning of a line
+        new_line: bool,
+        /// Whether we were to the end of a line last time `next` was called
+        reset: bool,
+        /// The position of the current character within the current line
+        pos: usize,
    }

-    let mut new_line = true;
-    let len = doc.len();
-    let mut chars = doc.char_indices().peekable();
-    let mut current_word_begin = 0;
+    impl<'a> Parser<'a> {
+        fn advance_begin(&mut self) {
+            self.current_word_begin = self.pos;
+        }
+
+        fn line(&self) -> (&'a str, Span) {
+            self.docs[self.line]
+        }
+
+        fn peek(&self) -> Option<char> {
+            self.line().0[self.pos..].chars().next()
+        }
+
+        #[allow(while_let_on_iterator)] // borrowck complains about for
+        fn jump_to(&mut self, n: char) -> Result<(), ()> {
+            while let Some((_, c)) = self.next() {
+                if c == n {
+                    self.advance_begin();
+                    return Ok(());
+                }
+            }
+
+            Err(())
+        }
+
+        fn next_line(&mut self) {
+            self.pos = 0;
+            self.current_word_begin = 0;
+            self.line += 1;
+            self.new_line = true;
+        }
+
+        fn put_back(&mut self, c: char) {
+            self.pos -= c.len_utf8();
+        }
+
+        #[allow(cast_possible_truncation)]
+        fn word(&self) -> (&'a str, Span) {
+            let begin = self.current_word_begin;
+            let end = self.pos;
+
+            debug_assert_eq!(end as u32 as usize, end);
+            debug_assert_eq!(begin as u32 as usize, begin);
+
+            let (doc, mut span) = self.line();
+            span.hi = span.lo + BytePos(end as u32);
+            span.lo = span.lo + BytePos(begin as u32);
+
+            (&doc[begin..end], span)
+        }
+    }
+
+    impl<'a> Iterator for Parser<'a> {
+        type Item = (bool, char);
+
+        fn next(&mut self) -> Option<(bool, char)> {
+            while self.line < self.docs.len() {
+                if self.reset {
+                    self.line += 1;
+                    self.reset = false;
+                    self.pos = 0;
+                    self.current_word_begin = 0;
+                }
+
+                let mut chars = self.line().0[self.pos..].chars();
+                let c = chars.next();
+
+                if let Some(c) = c {
+                    self.pos += c.len_utf8();
+                    let new_line = self.new_line;
+                    self.new_line = c == '\n' || (self.new_line && c.is_whitespace());
+                    return Some((new_line, c));
+                } else if self.line == self.docs.len() - 1 {
+                    return None;
+                } else {
+                    self.new_line = true;
+                    self.reset = true;
+                    self.pos += 1;
+                    return Some((true, '\n'));
+                }
+            }
+
+            None
+        }
+    }
+
+    let mut parser = Parser {
+        current_word_begin: 0,
+        docs: docs,
+        line: 0,
+        link: false,
+        new_line: true,
+        reset: false,
+        pos: 0,
+    };
+
    loop {
-        match chars.next() {
-            Some((_, c)) => {
+        match parser.next() {
+            Some((new_line, c)) => {
                match c {
                    '#' if new_line => { // don’t warn on titles
-                        current_word_begin = jump_to!(chars, '\n', len);
+                        parser.next_line();
                    }
                    '`' => {
-                        current_word_begin = jump_to!(chars, '`', len);
+                        try!(parser.jump_to('`'));
                    }
                    '[' => {
-                        let end = jump_to!(chars, ']', len);
-                        let link_text = &doc[current_word_begin + 1..end];
-                        let word_span = word_span(span, current_word_begin + 1, end + 1);
+                        // Check for a reference definition `[foo]:` at the beginning of a line
+                        let mut link = true;

-                        match chars.peek() {
-                            Some(&(_, c)) => {
-                                // Trying to parse a link. Let’s ignore the link.
-
-                                // FIXME: how does markdown handles such link?
-                                // https://en.wikipedia.org/w/index.php?title=)
-                                match c {
-                                    '(' => { // inline link
-                                        current_word_begin = jump_to!(chars, ')', len);
-                                        check_doc(cx, valid_idents, link_text, word_span);
-                                    }
-                                    '[' => { // reference link
-                                        current_word_begin = jump_to!(chars, ']', len);
-                                        check_doc(cx, valid_idents, link_text, word_span);
-                                    }
-                                    ':' => { // reference link
-                                        current_word_begin = jump_to!(chars, '\n', len);
-                                    }
-                                    _ => { // automatic reference link
-                                        current_word_begin = jump_to!(@next_char, chars, len);
-                                        check_doc(cx, valid_idents, link_text, word_span);
-                                    }
+                        if new_line {
+                            let mut lookup_parser = parser.clone();
+                            if let Some(_) = lookup_parser.find(|&(_, c)| c == ']') {
+                                if let Some((_, ':')) = lookup_parser.next() {
+                                    lookup_parser.next_line();
+                                    parser = lookup_parser;
+                                    link = false;
                                }
                            }
-                            None => return,
+                        }
+
+                        parser.advance_begin();
+                        parser.link = link;
+                    }
+                    ']' if parser.link => {
+                        parser.link = false;
+
+                        match parser.peek() {
+                            Some('(') => try!(parser.jump_to(')')),
+                            Some('[') => try!(parser.jump_to(']')),
+                            Some(_) => continue,
+                            None => return Err(()),
                        }
                    }
-                    // anything that’s neither alphanumeric nor '_' is not part of an ident anyway
-                    c if !c.is_alphanumeric() && c != '_' => {
-                        current_word_begin = jump_to!(@next_char, chars, len);
+                    c if !is_path_char(c) => {
+                        parser.advance_begin();
                    }
                    _ => {
-                        let end = match chars.find(|&(_, c)| !is_word_char(c)) {
-                            Some((end, _)) => end,
-                            None => len,
-                        };
-                        let word_span = word_span(span, current_word_begin, end);
-                        check_word(cx, valid_idents, &doc[current_word_begin..end], word_span);
-                        current_word_begin = jump_to!(@next_char, chars, len);
+                        if let Some((_, c)) = parser.find(|&(_, c)| !is_path_char(c)) {
+                            parser.put_back(c);
+                        }
+
+                        let (word, span) = parser.word();
+                        check_word(cx, valid_idents, word, span);
+                        parser.advance_begin();
                    }
                }

-                new_line = c == '\n' || (new_line && c.is_whitespace());
            }
            None => break,
        }
    }
+
+    Ok(())
 }

 fn check_word(cx: &EarlyContext, valid_idents: &[String], word: &str, span: Span) {
--- a/tests/compile-fail/doc.rs
+++ b/tests/compile-fail/doc.rs
@ -12,6 +12,8 @@
 /// Markdown is _weird_. I mean _really weird_.  This \_ is ok. So is `_`. But not Foo::some_fun
 //~^ ERROR: you should put `Foo::some_fun` between ticks
 /// which should be reported only once despite being __doubly bad__.
+/// Here be ::is::a::global:path.
+//~^ ERROR: you should put `is::a::global:path` between ticks
 /// be_sure_we_got_to_the_end_of_it
 //~^ ERROR: you should put `be_sure_we_got_to_the_end_of_it` between ticks
 fn foo_bar() {
@ -66,18 +68,18 @@ fn test_units() {
 //~^ ERROR: you should put `foo_ℝ` between ticks
 /// foo_💣
 /// foo_❤️
-/// [ßdummy textß][foo_ß]
-/// [ℝdummy textℝ][foo_ℝ]
-/// [💣dummy tex💣t][foo_💣]
-/// [❤️dummy text❤️][foo_❤️]
-/// [ßdummy textß](foo_ß)
-/// [ℝdummy textℝ](foo_ℝ)
-/// [💣dummy tex💣t](foo_💣)
-/// [❤️dummy text❤️](foo_❤️)
-/// [foo_ß]: dummy text
-/// [foo_ℝ]: dummy text
-/// [foo_💣]: dummy text
-/// [foo_❤️]: dummy text
+/// [ßdummy textß][foo_1ß]
+/// [ℝdummy textℝ][foo_2ℝ]
+/// [💣dummy tex💣t][foo3_💣]
+/// [❤️dummy text❤️][foo_4❤️]
+/// [ßdummy textß](foo_5ß)
+/// [ℝdummy textℝ](foo_6ℝ)
+/// [💣dummy tex💣t](fo7o_💣)
+/// [❤️dummy text❤️](foo_8❤️)
+/// [foo1_ß]: dummy text
+/// [foo2_ℝ]: dummy text
+/// [foo3_💣]: dummy text
+/// [foo4_❤️]: dummy text
 /// be_sure_we_got_to_the_end_of_it
 //~^ ERROR: you should put `be_sure_we_got_to_the_end_of_it` between ticks
 fn test_unicode() {
@ -141,3 +143,10 @@ fn issue900() {
 //~^ ERROR: you should put `be_sure_we_got_to_the_end_of_it` between ticks
 fn issue883() {
 }
+
+/// `foo_bar
+/// baz_quz`
+/// [foo
+/// bar](https://doc.rust-lang.org/stable/std/iter/trait.IteratorFooBar.html)
+fn multiline() {
+}