Rewrote summary parser from a state machine to use recursive descent

2024-12-12 05:42:35 +00:00 · 2017-12-11 15:17:20 +11:00 · 2017-12-11 15:17:20 +11:00 · f5e9b857de
commit f5e9b857de
parent 4f4e86db3a
4 changed files with 412 additions and 367 deletions
--- a/src/book/summary.rs
+++ b/src/book/summary.rs
@ -1,4 +1,5 @@
 use std::fmt::{self, Display, Formatter};
+use std::iter::FromIterator;
 use std::ops::{Deref, DerefMut};
 use std::path::{Path, PathBuf};
 use pulldown_cmark::{self, Event, Tag};
@ -123,31 +124,7 @@ impl From<Link> for SummaryItem {
    }
 }

-#[derive(Debug, Copy, Clone, PartialEq)]
-enum State {
-    Begin,
-    PrefixChapters,
-    /// Numbered chapters, including the nesting level.
-    NumberedChapters(u32),
-    SuffixChapters,
-    End,
-}
-
-/// A state machine parser for parsing a `SUMMARY.md` file.
-///
-/// The parser has roughly 5 states,
-///
-/// - **Begin:** the initial state
-/// - **Prefix Chapters:** Parsing the prefix chapters
-/// - **Numbered Chapters:** Parsing the numbered chapters, using a `usize` to
-///   indicate the nesting level (because chapters can have sub-chapters)
-/// - **Suffix Chapters:** pretty much identical to the Prefix Chapters
-/// - **End:** The final state
-///
-/// The `parse()` method then continually invokes `step()` until it reaches the
-/// `End` state. Parsing is guaranteed to (eventually) finish because the next
-/// `Event` is read from the underlying `pulldown_cmark::Parser` and passed
-/// into the current state's associated method.
+/// A recursive descent parser for a `SUMMARY.md`.
 ///
 ///
 /// # Grammar
@ -174,9 +151,8 @@ enum State {
 /// > **Note:** the `TEXT` terminal is "normal" text, and should (roughly)
 /// > match the following regex: "[^<>\n[]]+".
 struct SummaryParser<'a> {
+    src: &'a str,
    stream: pulldown_cmark::Parser<'a>,
-    summary: Summary,
-    state: State,
 }

 /// Reads `Events` from the provided stream until the corresponding
@ -187,6 +163,12 @@ struct SummaryParser<'a> {
 /// use pattern matching and you won't get errors because `take_while()`
 /// moves `$stream` out of self.
 macro_rules! collect_events {
+    ($stream:expr, start $delimiter:pat) => {
+        collect_events!($stream, Event::Start($delimiter))
+    };
+    ($stream:expr, end $delimiter:pat) => {
+        collect_events!($stream, Event::End($delimiter))
+    };
    ($stream:expr, $delimiter:pat) => {
        {
            let mut events = Vec::new();
@ -195,7 +177,7 @@ macro_rules! collect_events {
                let event = $stream.next();
                trace!("Next event: {:?}", event);
                match event {
-                    Some(Event::End($delimiter)) => break,
+                    Some($delimiter) => break,
                    Some(other) => events.push(other),
                    None => {
                        debug!("Reached end of stream without finding the closing pattern, {}", stringify!($delimiter));
@ -212,186 +194,182 @@ macro_rules! collect_events {
 impl<'a> SummaryParser<'a> {
    fn new(text: &str) -> SummaryParser {
        let pulldown_parser = pulldown_cmark::Parser::new(text);
-        let intermediate_summary = Summary::default();

        SummaryParser {
+            src: text,
            stream: pulldown_parser,
-            summary: intermediate_summary,
-            state: State::Begin,
        }
    }

+    fn current_location(&self) -> (usize, usize) {
+        let byte_offset = self.stream.get_offset();
+
+        let line = 0;
+        let col = 0;
+        (line, col)
+    }
+
    /// Parse the text the `SummaryParser` was created with.
    fn parse(mut self) -> Result<Summary> {
-        self.summary.title = self.parse_title();
+        let title = self.parse_title();

-        if let Some(ref title) = self.summary.title {
-            debug!("[*] Title is {:?}", title);
-        }
+        let prefix_chapters = self.parse_affix(true)
+            .chain_err(|| "There was an error parsing the prefix chapters")?;
+        let numbered_chapters = self.parse_numbered()
+            .chain_err(|| "There was an error parsing the numbered chapters")?;
+        let suffix_chapters = self.parse_affix(false)
+            .chain_err(|| "There was an error parsing the suffix chapters")?;

-        while self.state != State::End {
-            self.step()?;
-        }
-
-        Ok(self.summary)
+        Ok(Summary {
+            title,
+            prefix_chapters,
+            numbered_chapters,
+            suffix_chapters,
+        })
    }

-    fn step(&mut self) -> Result<()> {
-        if let Some(next_event) = self.stream.next() {
-            trace!("[*] Current state: {:?}, next event: {:?}", self.state, next_event);
+    /// Parse the affix chapters. This expects the first event (start of
+    /// paragraph) to have already been consumed by the previous parser.
+    fn parse_affix(&mut self, is_prefix: bool) -> Result<Vec<SummaryItem>> {
+        let mut items = Vec::new();
+        debug!(
+            "[*] Parsing {} items",
+            if is_prefix { "prefix" } else { "suffix" }
+        );

-            match self.state {
-                State::Begin => self.step_start(next_event)?,
-                State::PrefixChapters | State::SuffixChapters => self.step_affix(next_event)?,
-                State::NumberedChapters(_) => self.step_numbered(next_event)?,
-                State::End => {},
-            }
-        } else {
-            trace!("[*] Reached end of SUMMARY.md");
-            self.state = State::End;
-        }
+        loop {
+            let next = self.stream.next();
+            trace!("[*] Next event: {:?}", next);

-        Ok(())
-    }
-
-    /// The very first state, we should see a `Begin Paragraph` token or
-    /// it's an error...
-    fn step_start(&mut self, event: Event<'a>) -> Result<()> {
-        match event {
-            Event::Start(Tag::Paragraph) => self.state = State::PrefixChapters,
-            Event::Start(Tag::List(_)) => self.state = State::NumberedChapters(0),
-            other => bail!("Expected a start of paragraph but got {:?}", other),
-        }
-
-        Ok(())
-    }
-
-    /// Try to step through an "affix" section (recognising prefix and suffix 
-    /// chapters).
-    ///
-    /// If we encounter a link or horizontal line, it'll get added to the 
-    /// section. If we encounter a list, we'll either change to 
-    /// `State::NumberedChapter` (for prefix) or throw an error (suffix chapters).
-    ///
-    /// Anything else will be ignored.
-    fn step_affix(&mut self, event: Event<'a>) -> Result<()> {
-
-        match event {
-            Event::Start(tag) => self.handle_start_tag_in_affix_chapter(tag)?,
-            Event::End(Tag::Rule) => {
-                debug!("[*] Found an affix chapter separator");
-                self.affix_chapter_list().push(SummaryItem::Separator);
-            },
-            other => {
-                trace!("[*] Skipping unexpected token in summary: {:?}", other);
-            },
-        }
-
-        Ok(())
-    }
-
-    /// A helper function to get the `SummaryItem` list we should add items to
-    /// when parsing an affix chapter (i.e. prefix or suffix chapters). 
-    fn affix_chapter_list(&mut self) -> &mut Vec<SummaryItem> {
-        match self.state {
-            State::PrefixChapters => &mut self.summary.prefix_chapters,
-            State::SuffixChapters => &mut self.summary.suffix_chapters,
-            other => panic!("affix_chapter_list() called with invalid state: {:?}", other),
-        }
-    }
-
-    fn handle_start_tag_in_affix_chapter(&mut self, tag: Tag) -> Result<()> {
-        match tag {
-                Tag::Link(location, _) => {
-                    let content = collect_events!(self.stream, Tag::Link(_, _));
-                    let text = stringify_events(content);
-                    let link = Link::new(text, location.as_ref());
-
-                    debug!("[*] Found an affix chapter: {:?}", link.name);
-                    self.affix_chapter_list().push(SummaryItem::Link(link));
-                },
-                Tag::List(_) => {
-                    match self.state {
-                        State::PrefixChapters => {
-                            debug!("[*] Changing from prefix chapters to numbered chapters");
-                            self.state = State::NumberedChapters(0);
-                        },
-                        State::SuffixChapters => bail!("Suffix chapters can't be followed by a list"),
-                        _ => unreachable!(),
-                    }
-                },
-                other => trace!("[*] Skipping unknown start tag while parsing affix chapters: {:?}", other),
-        }
-
-        Ok(())
-    }
-
-    /// Parse the numbered chapters.
-    ///
-    /// If the event is the start of a list item, consume the entire item and
-    /// add a new link to the summary with `push_numbered_section`.
-    ///
-    /// If the event is the start of a new list, bump the nesting level.
-    ///
-    /// If the event is the end of a list, decrement the nesting level. When
-    /// the nesting level would go negative, we've finished the numbered
-    /// section and need to parse the suffix section.
-    ///
-    /// Otherwise, ignore the event.
-    fn step_numbered(&mut self, event: Event) -> Result<()> {
-        match event {
-            Event::Start(Tag::Item) => {
-                let it = self.parse_item().chain_err(
-                    || "List items should only contain links",
-                )?;
-
-                debug!("[*] Found a chapter: {:?} ({})", it.name, it.location.display());
-                let section_number = self.push_numbered_section(SummaryItem::Link(it));
-                trace!("[*] Section number is {}", section_number);
-            },
-            Event::End(Tag::Rule) => {
-                debug!("[*] Found a numbered chapter separator");
-                self.summary.numbered_chapters.push(SummaryItem::Separator);
-                self.state = State::NumberedChapters(0);
-            },
-            Event::Start(Tag::List(_)) => {
-                if let State::NumberedChapters(n) = self.state {
-                    self.state = State::NumberedChapters(n + 1);
-                    trace!("[*] Nesting level increased to {}", n + 1);
-                }
-            },
-            Event::End(Tag::List(_)) => {
-                if let State::NumberedChapters(n) = self.state {
-                    if n == 0 {
-                        trace!("[*] Finished parsing the numbered chapters");
-                        self.state = State::SuffixChapters;
+            match next {
+                Some(Event::Start(Tag::List(..))) => {
+                    if is_prefix {
+                        // we've finished prefix chapters and are at the start
+                        // of the numbered section.
+                        break;
                    } else {
-                        trace!("[*] Nesting level decreased to {}", n - 1);
-                        self.state = State::NumberedChapters(n - 1);
+                        bail!(self.parse_error("Suffix chapters cannot be followed by a list"));
                    }
                }
-            },
-            Event::End(Tag::Item) => { /* Ignore */ },
-            other => {
-                trace!("[*] ignoring token: {:?}", other);
-            },
+                Some(Event::Start(Tag::Link(href, _))) => {
+                    let link = self.parse_link(href.to_string())?;
+                    items.push(SummaryItem::Link(link));
+                }
+                Some(Event::Start(Tag::Rule)) => items.push(SummaryItem::Separator),
+                Some(_) => {}
+                None => break,
+            }
        }

-        Ok(())
+        Ok(items)
    }

-    /// Parse a single item (`[Some Chapter Name](./path/to/chapter.md)`).
-    fn parse_item(&mut self) -> Result<Link> {
-        let next = self.stream.next();
-        trace!("Parsing an item, next event is {:?}", next);
+    fn parse_link(&mut self, href: String) -> Result<Link> {
+        let link_content = collect_events!(self.stream, end Tag::Link(..));
+        let name = stringify_events(link_content);

-        if let Some(Event::Start(Tag::Link(dest, _))) = next {
-            let content = collect_events!(self.stream, Tag::Link(..));
+        Ok(Link {
+            name: name,
+            location: PathBuf::from(href.to_string()),
+            number: None,
+            nested_items: Vec::new(),
+        })
+    }

-            Ok(Link::new(stringify_events(content), dest.as_ref()))
-        } else {
-            bail!("Expected a link, got {:?}", next)
+    /// Parse the numbered chapters. This assumes the opening list tag has
+    /// already been consumed by a previous parser.
+    fn parse_numbered(&mut self) -> Result<Vec<SummaryItem>> {
+        let mut items = Vec::new();
+        let root_number = SectionNumber::default();
+
+        // we need to do this funny loop-match-if-let dance because a rule will
+        // close off any currently running list. Therefore we try to read the
+        // list items before the rule, then if we encounter a rule we'll add a
+        // separator and try to resume parsing numbered chapters if we start a
+        // list immediately afterwards.
+        //
+        // If you can think of a better way to do this then please make a PR :)
+
+        loop {
+            let bunch_of_items = self.parse_nested_numbered(&root_number)?;
+            items.extend(bunch_of_items);
+
+            match self.stream.next() {
+                Some(Event::Start(Tag::Rule)) => {
+                    items.push(SummaryItem::Separator);
+
+                    if let Some(Event::Start(Tag::List(..))) = self.stream.next() {
+                        continue;
+                    } else {
+                        break;
+                    }
+                }
+                _ => {
+                    break;
+                }
+            }
        }
+
+        Ok(items)
+    }
+
+    fn parse_nested_numbered(&mut self, parent: &SectionNumber) -> Result<Vec<SummaryItem>> {
+        debug!("[*] Parsing numbered chapters at level {}", parent);
+        let mut items = Vec::new();
+
+        loop {
+            let next = self.stream.next();
+            trace!("[*] Next event: {:?}", next);
+
+            match next {
+                Some(Event::Start(Tag::Item)) => match self.stream.next() {
+                    Some(Event::Start(Tag::Link(href, _))) => {
+                        let mut link = self.parse_link(href.to_string())?;
+
+                        let mut number = parent.clone();
+                        number.0.push(items.len() as u32 + 1);
+                        trace!(
+                            "[*] Found chapter: {} {} ({})",
+                            number,
+                            link.name,
+                            link.location.display()
+                        );
+
+                        link.number = Some(number);
+                        items.push(SummaryItem::Link(link));
+                    }
+                    other => {
+                        warn!("Expected a start of a link, actually got {:?}", other);
+                        bail!(self.parse_error(
+                            "The link items for nested chapters must only contain a hyperlink"
+                        ));
+                    }
+                },
+                Some(Event::Start(Tag::List(..))) => {
+                    // recurse
+                    let (_, last_item) = get_last_link(&mut items)?;
+                    let last_item_number = last_item
+                        .number
+                        .as_ref()
+                        .expect("All numbered chapters have numbers");
+
+                    let sub_items = self.parse_nested_numbered(last_item_number)?;
+
+                    last_item.nested_items = sub_items;
+                }
+                Some(Event::End(Tag::List(..))) => break,
+                Some(_) => {}
+                None => break,
+            }
+        }
+
+        Ok(items)
+    }
+
+    fn parse_error<D: Display>(&self, msg: D) -> Error {
+        let (line, col) = self.current_location();
+
+        ErrorKind::ParseError(line, col, msg.to_string()).into()
    }

    /// Try to parse the title line.
@ -399,7 +377,7 @@ impl<'a> SummaryParser<'a> {
        if let Some(Event::Start(Tag::Header(1))) = self.stream.next() {
            debug!("[*] Found a h1 in the SUMMARY");

-            let tags = collect_events!(self.stream, Tag::Header(1));
+            let tags = collect_events!(self.stream, end Tag::Header(1));

            // TODO: How do we deal with headings like "# My **awesome** summary"?
            // for now, I'm just going to scan through and concatenate the
@ -409,64 +387,11 @@ impl<'a> SummaryParser<'a> {
            None
        }
    }
-
-    /// Push a new section at the end of the current nesting level.
-    fn push_numbered_section(&mut self, item: SummaryItem) -> SectionNumber {
-        if let State::NumberedChapters(level) = self.state {
-            push_item_at_nesting_level(
-                &mut self.summary.numbered_chapters,
-                item,
-                level as usize,
-                SectionNumber::default(),
-            ).chain_err(|| {
-                format!("The parser should always ensure we add the next \
-                item at the correct level ({}:{})", module_path!(), line!())
-            })
-                .unwrap()
-        } else {
-            // this method should only ever be called when parsing a numbered
-            // section, therefore if we ever get here something has gone
-            // hideously wrong...
-            error!("Calling push_numbered_section() when not in a numbered section");
-            error!("Current state: {:?}", self.state);
-            error!("Item: {:?}", item);
-            error!("Summary:");
-            error!("{:#?}", self.summary);
-            panic!("Entered unreachable code, this is a bug");
-        }
-    }
-}
-
-/// Given a particular level (e.g. 3), go that many levels down the `Link`'s
-/// nested items then append the provided item to the last `Link` in the
-/// list.
-fn push_item_at_nesting_level(links: &mut Vec<SummaryItem>, mut item: SummaryItem, level: usize, mut section_number: SectionNumber)
-    -> Result<SectionNumber> {
-    if level == 0 {
-        // set the section number, if applicable
-        section_number.push(links.len() as u32 + 1);
-
-        if let SummaryItem::Link(ref mut l) = item {
-            l.number = Some(section_number.clone());
-        }
-
-        links.push(item);
-        Ok(section_number)
-    } else {
-        let (index, last_link) = get_last_link(links).chain_err(|| {
-            format!("The list of links needs to be {} levels deeper (current position {})", 
-                level, section_number)
-        })?;
-
-        section_number.push(index as u32 + 1);
-        push_item_at_nesting_level(&mut last_link.nested_items, item, level - 1, section_number)
-    }
 }

 /// Gets a pointer to the last `Link` in a list of `SummaryItem`s, and its
 /// index.
 fn get_last_link(links: &mut [SummaryItem]) -> Result<(usize, &mut Link)> {
-    // TODO: This should probably be integrated into `Link::push_item()`
    links
        .iter_mut()
        .enumerate()
@ -496,10 +421,14 @@ pub struct SectionNumber(pub Vec<u32>);

 impl Display for SectionNumber {
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
-        for item in &self.0 {
-            write!(f, "{}.", item)?;
+        if self.0.is_empty() {
+            write!(f, "0")
+        } else {
+            for item in &self.0 {
+                write!(f, "{}.", item)?;
+            }
+            Ok(())
        }
-        Ok(())
    }
 }

@ -516,6 +445,12 @@ impl DerefMut for SectionNumber {
    }
 }

+impl FromIterator<u32> for SectionNumber {
+    fn from_iter<I: IntoIterator<Item = u32>>(it: I) -> Self {
+        SectionNumber(it.into_iter().collect())
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@ -556,23 +491,6 @@ mod tests {
        assert_eq!(got, should_be);
    }

-    #[test]
-    fn parse_a_single_item() {
-        let src = "[A Chapter](./path/to/chapter)";
-        let should_be = Link {
-            name: String::from("A Chapter"),
-            location: PathBuf::from("./path/to/chapter"),
-            number: None,
-            nested_items: Vec::new(),
-        };
-
-        let mut parser = SummaryParser::new(src);
-        let _ = parser.stream.next(); // skip the opening paragraph tag
-        let got = parser.parse_item().unwrap();
-
-        assert_eq!(got, should_be);
-    }
-
    #[test]
    fn convert_markdown_events_to_a_string() {
        let src = "Hello *World*, `this` is some text [and a link](./path/to/link)";
@ -585,136 +503,123 @@ mod tests {
    }

    #[test]
-    fn can_step_past_first_token() {
-        let src = "hello world";
-        let should_be = State::PrefixChapters;
-
+    fn parse_some_prefix_items() {
+        let src = "[First](./first.md)\n[Second](./second.md)\n";
        let mut parser = SummaryParser::new(src);
-        assert_eq!(parser.state, State::Begin);
-        parser.step().unwrap();
-        assert_eq!(parser.state, should_be);
+
+        let should_be = vec![
+            SummaryItem::Link(Link {
+                name: String::from("First"),
+                location: PathBuf::from("./first.md"),
+                ..Default::default()
+            }),
+            SummaryItem::Link(Link {
+                name: String::from("Second"),
+                location: PathBuf::from("./second.md"),
+                ..Default::default()
+            }),
+        ];
+
+        let _ = parser.stream.next(); // step past first event
+        let got = parser.parse_affix(true).unwrap();
+
+        assert_eq!(got, should_be);
    }

    #[test]
-    fn first_token_must_be_open_paragraph() {
-        let src = "hello world";
-
+    fn parse_prefix_items_with_a_separator() {
+        let src = "[First](./first.md)\n\n---\n\n[Second](./second.md)\n";
        let mut parser = SummaryParser::new(src);
-        let _ = parser.stream.next(); // manually step past the Start Paragraph
-        assert!(parser.step().is_err());
+
+        let _ = parser.stream.next(); // step past first event
+        let got = parser.parse_affix(true).unwrap();
+
+        assert_eq!(got.len(), 3);
+        assert_eq!(got[1], SummaryItem::Separator);
    }

    #[test]
-    fn can_parse_prefix_chapter_links() {
-        let src = "[Hello World](./foo/bar/baz)";
+    fn suffix_items_cannot_be_followed_by_a_list() {
+        let src = "[First](./first.md)\n- [Second](./second.md)\n";
+        let mut parser = SummaryParser::new(src);
+
+        let _ = parser.stream.next(); // step past first event
+        let got = parser.parse_affix(false);
+
+        assert!(got.is_err());
+    }
+
+    #[test]
+    fn parse_a_link() {
+        let src = "[First](./first.md)";
        let should_be = Link {
-            name: String::from("Hello World"),
-            location: PathBuf::from("./foo/bar/baz"),
-            number: None,
-            nested_items: Vec::new(),
+            name: String::from("First"),
+            location: PathBuf::from("./first.md"),
+            ..Default::default()
        };

        let mut parser = SummaryParser::new(src);
-        parser.state = State::PrefixChapters;
-        assert!(parser.summary.prefix_chapters.is_empty());
+        let _ = parser.stream.next(); // skip past start of paragraph

-        let _ = parser.stream.next(); // manually step past the Start Paragraph
-        parser.step().unwrap();
+        let href = match parser.stream.next() {
+            Some(Event::Start(Tag::Link(href, _))) => href.to_string(),
+            other => panic!("Unreachable, {:?}", other),
+        };

-        assert_eq!(parser.summary.prefix_chapters.len(), 1);
-        assert_eq!(parser.summary.prefix_chapters[0], SummaryItem::Link(should_be));
-        assert_eq!(parser.state, State::PrefixChapters);
-    }
-
-    #[test]
-    fn can_parse_prefix_chapter_horizontal_rules() {
-        let src = "---";
-        let should_be = SummaryItem::Separator;
-
-        let mut parser = SummaryParser::new(src);
-        parser.state = State::PrefixChapters;
-        assert!(parser.summary.prefix_chapters.is_empty());
-
-        let _ = parser.stream.next(); // manually step past the Start Paragraph
-        parser.step().unwrap();
-
-        assert_eq!(parser.summary.prefix_chapters.len(), 1);
-        assert_eq!(parser.summary.prefix_chapters[0], should_be);
-        assert_eq!(parser.state, State::PrefixChapters);
-    }
-
-    #[test]
-    fn step_from_prefix_chapters_to_numbered() {
-        let src = "- foo";
-
-        let mut parser = SummaryParser::new(src);
-        parser.state = State::PrefixChapters;
-
-        // let _ = parser.stream.next(); // manually step past the Start Paragraph
-        parser.step().unwrap();
-
-        assert_eq!(parser.state, State::NumberedChapters(0));
-    }
-
-    #[test]
-    fn push_item_onto_empty_link() {
-        let root = Link::new("First", "/");
-        let mut links = vec![SummaryItem::Link(root)];
-
-        assert_eq!(links[0].maybe_link_mut().unwrap().nested_items.len(), 0);
-        let got = push_item_at_nesting_level(&mut links, SummaryItem::Separator, 1, SectionNumber::default()).unwrap();
-        assert_eq!(links[0].maybe_link_mut().unwrap().nested_items.len(), 1);
-        assert_eq!(*got, vec![1, 1]);
-    }
-
-    #[test]
-    fn push_item_onto_complex_link() {
-        let mut root = Link::new("First", "/first");
-        root.nested_items.push(SummaryItem::Separator);
-
-        let mut child = Link::new("Second", "/first/second");
-        child.nested_items.push(SummaryItem::Link(
-            Link::new("Third", "/first/second/third"),
-        ));
-        root.nested_items.push(SummaryItem::Link(child));
-        root.nested_items.push(SummaryItem::Separator);
-
-        let mut links = vec![SummaryItem::Link(root)];
-
-        // FIXME: This crap for getting a deeply nested member is just plain ugly :(
-        assert_eq!(links[0].maybe_link_mut().unwrap()
-            .nested_items[1].maybe_link_mut()
-            .unwrap()
-            .nested_items[0].maybe_link_mut()
-            .unwrap()
-            .nested_items.len() , 0);
-        let got = push_item_at_nesting_level(
-            &mut links,
-            SummaryItem::Link(Link::new("Dummy", "")),
-            3,
-            SectionNumber::default(),
-        ).unwrap();
-        assert_eq!(links[0].maybe_link_mut().unwrap()
-            .nested_items[1].maybe_link_mut()
-            .unwrap()
-            .nested_items[0].maybe_link_mut()
-            .unwrap()
-            .nested_items.len() , 1);
-        println!("{:#?}", links);
-        assert_eq!(*got, vec![1, 2, 1, 1]);
+        let got = parser.parse_link(href).unwrap();
+        assert_eq!(got, should_be);
    }

    #[test]
    fn parse_a_numbered_chapter() {
-        let src = "- [First](./second)";
+        let src = "- [First](./first.md)\n";
+        let link = Link {
+            name: String::from("First"),
+            location: PathBuf::from("./first.md"),
+            number: Some(SectionNumber(vec![1])),
+            ..Default::default()
+        };
+        let should_be = vec![SummaryItem::Link(link)];
+
        let mut parser = SummaryParser::new(src);
        let _ = parser.stream.next();

-        assert_eq!(parser.summary.numbered_chapters.len(), 0);
+        let got = parser.parse_numbered().unwrap();

-        parser.state = State::NumberedChapters(0);
-        parser.step().unwrap();
+        assert_eq!(got, should_be);
+    }

-        assert_eq!(parser.summary.numbered_chapters.len(), 1);
+    #[test]
+    fn parse_nested_numbered_chapters() {
+        let src = "- [First](./first.md)\n  - [Nested](./nested.md)\n- [Second](./second.md)";
+
+        let should_be = vec![
+            SummaryItem::Link(Link {
+                name: String::from("First"),
+                location: PathBuf::from("./first.md"),
+                number: Some(SectionNumber(vec![1])),
+                nested_items: vec![
+                    SummaryItem::Link(Link {
+                        name: String::from("Nested"),
+                        location: PathBuf::from("./nested.md"),
+                        number: Some(SectionNumber(vec![1, 1])),
+                        nested_items: Vec::new(),
+                    }),
+                ],
+            }),
+            SummaryItem::Link(Link {
+                name: String::from("Second"),
+                location: PathBuf::from("./second.md"),
+                number: Some(SectionNumber(vec![2])),
+                nested_items: Vec::new(),
+            }),
+        ];
+
+        let mut parser = SummaryParser::new(src);
+        let _ = parser.stream.next();
+
+        let got = parser.parse_numbered().unwrap();
+
+        assert_eq!(got, should_be);
    }
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -105,6 +105,10 @@ extern crate serde_json;
 extern crate tempdir;
 extern crate toml;

+#[cfg(test)]
+#[macro_use]
+extern crate pretty_assertions;
+
 mod preprocess;
 pub mod book;
 pub mod config;
@ -131,6 +135,11 @@ pub mod errors {
                description("A subprocess failed")
                display("{}: {}", message, String::from_utf8_lossy(&output.stdout))
            }
+
+            ParseError(line: usize, col: usize, message: String) {
+                description("A SUMMARY.md parsing error")
+                display("Error at line {}, column {}: {}", line, col, message)
+            }
        }
    }

--- a/tests/summary_md_files/the_book-2nd_edition.md
+++ b/tests/summary_md_files/the_book-2nd_edition.md
@ -0,0 +1,130 @@
+# The Rust Programming Language
+
+## Getting started
+
+- [Introduction](ch01-00-introduction.md)
+    - [Installation](ch01-01-installation.md)
+    - [Hello, World!](ch01-02-hello-world.md)
+
+- [Guessing Game Tutorial](ch02-00-guessing-game-tutorial.md)
+
+- [Common Programming Concepts](ch03-00-common-programming-concepts.md)
+    - [Variables and Mutability](ch03-01-variables-and-mutability.md)
+    - [Data Types](ch03-02-data-types.md)
+    - [How Functions Work](ch03-03-how-functions-work.md)
+    - [Comments](ch03-04-comments.md)
+    - [Control Flow](ch03-05-control-flow.md)
+
+- [Understanding Ownership](ch04-00-understanding-ownership.md)
+    - [What is Ownership?](ch04-01-what-is-ownership.md)
+    - [References & Borrowing](ch04-02-references-and-borrowing.md)
+    - [Slices](ch04-03-slices.md)
+
+- [Using Structs to Structure Related Data](ch05-00-structs.md)
+    - [Defining and Instantiating Structs](ch05-01-defining-structs.md)
+    - [An Example Program Using Structs](ch05-02-example-structs.md)
+    - [Method Syntax](ch05-03-method-syntax.md)
+
+- [Enums and Pattern Matching](ch06-00-enums.md)
+    - [Defining an Enum](ch06-01-defining-an-enum.md)
+    - [The `match` Control Flow Operator](ch06-02-match.md)
+    - [Concise Control Flow with `if let`](ch06-03-if-let.md)
+
+## Basic Rust Literacy
+
+- [Modules](ch07-00-modules.md)
+    - [`mod` and the Filesystem](ch07-01-mod-and-the-filesystem.md)
+    - [Controlling Visibility with `pub`](ch07-02-controlling-visibility-with-pub.md)
+    - [Referring to Names in Different Modules](ch07-03-importing-names-with-use.md)
+
+- [Common Collections](ch08-00-common-collections.md)
+    - [Vectors](ch08-01-vectors.md)
+    - [Strings](ch08-02-strings.md)
+    - [Hash Maps](ch08-03-hash-maps.md)
+
+- [Error Handling](ch09-00-error-handling.md)
+    - [Unrecoverable Errors with `panic!`](ch09-01-unrecoverable-errors-with-panic.md)
+    - [Recoverable Errors with `Result`](ch09-02-recoverable-errors-with-result.md)
+    - [To `panic!` or Not To `panic!`](ch09-03-to-panic-or-not-to-panic.md)
+
+- [Generic Types, Traits, and Lifetimes](ch10-00-generics.md)
+    - [Generic Data Types](ch10-01-syntax.md)
+    - [Traits: Defining Shared Behavior](ch10-02-traits.md)
+    - [Validating References with Lifetimes](ch10-03-lifetime-syntax.md)
+
+- [Testing](ch11-00-testing.md)
+    - [Writing tests](ch11-01-writing-tests.md)
+    - [Running tests](ch11-02-running-tests.md)
+    - [Test Organization](ch11-03-test-organization.md)
+
+- [An I/O Project: Building a Command Line Program](ch12-00-an-io-project.md)
+    - [Accepting Command Line Arguments](ch12-01-accepting-command-line-arguments.md)
+    - [Reading a File](ch12-02-reading-a-file.md)
+    - [Refactoring to Improve Modularity and Error Handling](ch12-03-improving-error-handling-and-modularity.md)
+    - [Developing the Library’s Functionality with Test Driven Development](ch12-04-testing-the-librarys-functionality.md)
+    - [Working with Environment Variables](ch12-05-working-with-environment-variables.md)
+    - [Writing Error Messages to Standard Error Instead of Standard Output](ch12-06-writing-to-stderr-instead-of-stdout.md)
+
+## Thinking in Rust
+
+- [Functional Language Features: Iterators and Closures](ch13-00-functional-features.md)
+    - [Closures: Anonymous Functions that Can Capture Their Environment](ch13-01-closures.md)
+    - [Processing a Series of Items with Iterators](ch13-02-iterators.md)
+    - [Improving Our I/O Project](ch13-03-improving-our-io-project.md)
+    - [Comparing Performance: Loops vs. Iterators](ch13-04-performance.md)
+
+- [More about Cargo and Crates.io](ch14-00-more-about-cargo.md)
+    - [Customizing Builds with Release Profiles](ch14-01-release-profiles.md)
+    - [Publishing a Crate to Crates.io](ch14-02-publishing-to-crates-io.md)
+    - [Cargo Workspaces](ch14-03-cargo-workspaces.md)
+    - [Installing Binaries from Crates.io with `cargo install`](ch14-04-installing-binaries.md)
+    - [Extending Cargo with Custom Commands](ch14-05-extending-cargo.md)
+
+- [Smart Pointers](ch15-00-smart-pointers.md)
+    - [`Box<T>` Points to Data on the Heap and Has a Known Size](ch15-01-box.md)
+    - [The `Deref` Trait Allows Access to the Data Through a Reference](ch15-02-deref.md)
+    - [The `Drop` Trait Runs Code on Cleanup](ch15-03-drop.md)
+    - [`Rc<T>`, the Reference Counted Smart Pointer](ch15-04-rc.md)
+    - [`RefCell<T>` and the Interior Mutability Pattern](ch15-05-interior-mutability.md)
+    - [Creating Reference Cycles and Leaking Memory is Safe](ch15-06-reference-cycles.md)
+
+- [Fearless Concurrency](ch16-00-concurrency.md)
+    - [Threads](ch16-01-threads.md)
+    - [Message Passing](ch16-02-message-passing.md)
+    - [Shared State](ch16-03-shared-state.md)
+    - [Extensible Concurrency: `Sync` and `Send`](ch16-04-extensible-concurrency-sync-and-send.md)
+
+- [Is Rust an Object-Oriented Programming Language?](ch17-00-oop.md)
+    - [What Does Object-Oriented Mean?](ch17-01-what-is-oo.md)
+    - [Trait Objects for Using Values of Different Types](ch17-02-trait-objects.md)
+    - [Object-Oriented Design Pattern Implementations](ch17-03-oo-design-patterns.md)
+
+## Advanced Topics
+
+- [Patterns Match the Structure of Values](ch18-00-patterns.md)
+    - [All the Places Patterns May be Used](ch18-01-all-the-places-for-patterns.md)
+    - [Refutability: Whether a Pattern Might Fail to Match](ch18-02-refutability.md)
+    - [All the Pattern Syntax](ch18-03-pattern-syntax.md)
+
+- [Advanced Features](ch19-00-advanced-features.md)
+    - [Unsafe Rust](ch19-01-unsafe-rust.md)
+    - [Advanced Lifetimes](ch19-02-advanced-lifetimes.md)
+    - [Advanced Traits](ch19-03-advanced-traits.md)
+    - [Advanced Types](ch19-04-advanced-types.md)
+    - [Advanced Functions & Closures](ch19-05-advanced-functions-and-closures.md)
+
+- [Final Project: Building a Multithreaded Web Server](ch20-00-final-project-a-web-server.md)
+    - [A Single Threaded Web Server](ch20-01-single-threaded.md)
+    - [How Slow Requests Affect Throughput](ch20-02-slow-requests.md)
+    - [Designing the Thread Pool Interface](ch20-03-designing-the-interface.md)
+    - [Creating the Thread Pool and Storing Threads](ch20-04-storing-threads.md)
+    - [Sending Requests to Threads Via Channels](ch20-05-sending-requests-via-channels.md)
+    - [Graceful Shutdown and Cleanup](ch20-06-graceful-shutdown-and-cleanup.md)
+
+- [Appendix](appendix-00.md)
+    - [A - Keywords](appendix-01-keywords.md)
+    - [B - Operators and Symbols](appendix-02-operators.md)
+    - [C - Derivable Traits](appendix-03-derivable-traits.md)
+    - [D - Macros](appendix-04-macros.md)
+    - [E - Translations](appendix-05-translation.md)
+    - [F - Newest Features](appendix-06-newest-features.md)
--- a/tests/summary_parser.rs
+++ b/tests/summary_parser.rs
@ -1,8 +1,8 @@
 //! Some integration tests to make sure the `SUMMARY.md` parser can deal with
 //! some real-life examples.

-extern crate error_chain;
 extern crate env_logger;
+extern crate error_chain;
 extern crate mdbook;

 use std::fs::File;
@ -46,3 +46,4 @@ macro_rules! summary_md_test {
 summary_md_test!(rust_by_example, "rust_by_example.md");
 summary_md_test!(rust_ffi_guide, "rust_ffi_guide.md");
 summary_md_test!(example_book, "example_book.md");
+summary_md_test!(the_book_2nd_edition, "the_book-2nd_edition.md");