search: fix anchor ids for duplicate headers

2024-12-13 14:22:35 +00:00 · 2022-02-18 15:27:24 +00:00 · 2022-02-18 15:27:24 +00:00 · 972c61fa76
commit 972c61fa76
parent 2213312938
7 changed files with 554 additions and 137 deletions
--- a/src/renderer/html_handlebars/hbs_renderer.rs
+++ b/src/renderer/html_handlebars/hbs_renderer.rs
@ -768,16 +768,7 @@ fn insert_link_into_header(
    content: &str,
    id_counter: &mut HashMap<String, usize>,
 ) -> String {
-    let raw_id = utils::id_from_content(content);
-
-    let id_count = id_counter.entry(raw_id.clone()).or_insert(0);
-
-    let id = match *id_count {
-        0 => raw_id,
-        other => format!("{}-{}", raw_id, other),
-    };
-
-    *id_count += 1;
+    let id = utils::unique_id_from_content(content, id_counter);

    format!(
        r##"<h{level} id="{id}"><a class="header" href="#{id}">{text}</a></h{level}>"##,
--- a/src/renderer/html_handlebars/search.rs
+++ b/src/renderer/html_handlebars/search.rs
@ -97,6 +97,7 @@ fn render_item(

    breadcrumbs.push(chapter.name.clone());

+    let mut id_counter = HashMap::new();
    while let Some(event) = p.next() {
        match event {
            Event::Start(Tag::Heading(i, ..)) if i as u32 <= max_section_depth => {
@ -120,7 +121,7 @@ fn render_item(
            }
            Event::End(Tag::Heading(i, ..)) if i as u32 <= max_section_depth => {
                in_heading = false;
-                section_id = Some(utils::id_from_content(&heading));
+                section_id = Some(utils::unique_id_from_content(&heading, &mut id_counter));
                breadcrumbs.push(heading.clone());
            }
            Event::Start(Tag::FootnoteDefinition(name)) => {
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@ -9,6 +9,7 @@ use regex::Regex;
 use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag};

 use std::borrow::Cow;
+use std::collections::HashMap;
 use std::fmt::Write;
 use std::path::Path;

@ -44,6 +45,8 @@ pub fn normalize_id(content: &str) -> String {

 /// Generate an ID for use with anchors which is derived from a "normalised"
 /// string.
+// This function should be made private when the deprecation expires.
+#[deprecated(since = "0.4.16", note = "use unique_id_from_content instead")]
 pub fn id_from_content(content: &str) -> String {
    let mut content = content.to_string();

@ -59,10 +62,30 @@ pub fn id_from_content(content: &str) -> String {

    // Remove spaces and hashes indicating a header
    let trimmed = content.trim().trim_start_matches('#').trim();
-
    normalize_id(trimmed)
 }

+/// Generate an ID for use with anchors which is derived from a "normalised"
+/// string.
+///
+/// Each ID returned will be unique, if the same `id_counter` is provided on
+/// each call.
+pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap<String, usize>) -> String {
+    let id = {
+        #[allow(deprecated)]
+        id_from_content(content)
+    };
+
+    // If we have headers with the same normalized id, append an incrementing counter
+    let id_count = id_counter.entry(id.clone()).or_insert(0);
+    let unique_id = match *id_count {
+        0 => id,
+        id_count => format!("{}-{}", id, id_count),
+    };
+    *id_count += 1;
+    unique_id
+}
+
 /// Fix links to the correct location.
 ///
 /// This adjusts links, such as turning `.md` extensions to `.html`.
@ -332,8 +355,9 @@ more text with spaces
        }
    }

-    mod html_munging {
-        use super::super::{id_from_content, normalize_id};
+    #[allow(deprecated)]
+    mod id_from_content {
+        use super::super::id_from_content;

        #[test]
        fn it_generates_anchors() {
@ -361,6 +385,10 @@ more text with spaces
            );
            assert_eq!(id_from_content("## Über"), "Über");
        }
+    }
+
+    mod html_munging {
+        use super::super::{normalize_id, unique_id_from_content};

        #[test]
        fn it_normalizes_ids() {
@ -379,5 +407,28 @@ more text with spaces
            assert_eq!(normalize_id("한국어"), "한국어");
            assert_eq!(normalize_id(""), "");
        }
+
+        #[test]
+        fn it_generates_unique_ids_from_content() {
+            // Same id if not given shared state
+            assert_eq!(
+                unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
+                "中文標題-cjk-title"
+            );
+            assert_eq!(
+                unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
+                "中文標題-cjk-title"
+            );
+
+            // Different id if given shared state
+            let mut id_counter = Default::default();
+            assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über");
+            assert_eq!(
+                unique_id_from_content("## 中文標題 CJK title", &mut id_counter),
+                "中文標題-cjk-title"
+            );
+            assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-1");
+            assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-2");
+        }
    }
 }
--- a/tests/dummy_book/src/SUMMARY.md
+++ b/tests/dummy_book/src/SUMMARY.md
@ -13,6 +13,7 @@
    - [Markdown](first/markdown.md)
    - [Unicode](first/unicode.md)
    - [No Headers](first/no-headers.md)
+    - [Duplicate Headers](first/duplicate-headers.md)
 - [Second Chapter](second.md)
    - [Nested Chapter](second/nested.md)

--- a/tests/dummy_book/src/first/duplicate-headers.md
+++ b/tests/dummy_book/src/first/duplicate-headers.md
@ -0,0 +1,9 @@
+# Duplicate headers
+
+This page validates behaviour of duplicate headers.
+
+# Header Text
+
+# Header Text
+
+# header-text
--- a/tests/rendered_output.rs
+++ b/tests/rendered_output.rs
@ -35,6 +35,7 @@ const TOC_SECOND_LEVEL: &[&str] = &[
    "1.4. Markdown",
    "1.5. Unicode",
    "1.6. No Headers",
+    "1.7. Duplicate Headers",
    "2.1. Nested Chapter",
 ];

@ -633,11 +634,12 @@ mod search {
        let some_section = get_doc_ref("first/index.html#some-section");
        let summary = get_doc_ref("first/includes.html#summary");
        let no_headers = get_doc_ref("first/no-headers.html");
+        let duplicate_headers_1 = get_doc_ref("first/duplicate-headers.html#header-text-1");
        let conclusion = get_doc_ref("conclusion.html#conclusion");

        let bodyidx = &index["index"]["index"]["body"]["root"];
        let textidx = &bodyidx["t"]["e"]["x"]["t"];
-        assert_eq!(textidx["df"], 2);
+        assert_eq!(textidx["df"], 5);
        assert_eq!(textidx["docs"][&first_chapter]["tf"], 1.0);
        assert_eq!(textidx["docs"][&introduction]["tf"], 1.0);

@ -646,7 +648,7 @@ mod search {
        assert_eq!(docs[&some_section]["body"], "");
        assert_eq!(
            docs[&summary]["body"],
-            "Dummy Book Introduction First Chapter Nested Chapter Includes Recursive Markdown Unicode No Headers Second Chapter Nested Chapter Conclusion"
+            "Dummy Book Introduction First Chapter Nested Chapter Includes Recursive Markdown Unicode No Headers Duplicate Headers Second Chapter Nested Chapter Conclusion"
        );
        assert_eq!(
            docs[&summary]["breadcrumbs"],
@ -657,6 +659,10 @@ mod search {
            docs[&no_headers]["breadcrumbs"],
            "First Chapter » No Headers"
        );
+        assert_eq!(
+            docs[&duplicate_headers_1]["breadcrumbs"],
+            "First Chapter » Duplicate Headers » Header Text"
+        );
        assert_eq!(
            docs[&no_headers]["body"],
            "Capybara capybara capybara. Capybara capybara capybara."
--- a/tests/searchindex_fixture.json
+++ b/tests/searchindex_fixture.json