search: fix anchor ids for duplicate headers

This commit is contained in:
Tom Milligan 2022-02-18 15:27:24 +00:00
parent 2213312938
commit 972c61fa76
No known key found for this signature in database
GPG key ID: BFF10895C45328D2
7 changed files with 554 additions and 137 deletions

View file

@ -768,16 +768,7 @@ fn insert_link_into_header(
content: &str,
id_counter: &mut HashMap<String, usize>,
) -> String {
let raw_id = utils::id_from_content(content);
let id_count = id_counter.entry(raw_id.clone()).or_insert(0);
let id = match *id_count {
0 => raw_id,
other => format!("{}-{}", raw_id, other),
};
*id_count += 1;
let id = utils::unique_id_from_content(content, id_counter);
format!(
r##"<h{level} id="{id}"><a class="header" href="#{id}">{text}</a></h{level}>"##,

View file

@ -97,6 +97,7 @@ fn render_item(
breadcrumbs.push(chapter.name.clone());
let mut id_counter = HashMap::new();
while let Some(event) = p.next() {
match event {
Event::Start(Tag::Heading(i, ..)) if i as u32 <= max_section_depth => {
@ -120,7 +121,7 @@ fn render_item(
}
Event::End(Tag::Heading(i, ..)) if i as u32 <= max_section_depth => {
in_heading = false;
section_id = Some(utils::id_from_content(&heading));
section_id = Some(utils::unique_id_from_content(&heading, &mut id_counter));
breadcrumbs.push(heading.clone());
}
Event::Start(Tag::FootnoteDefinition(name)) => {

View file

@ -9,6 +9,7 @@ use regex::Regex;
use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag};
use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt::Write;
use std::path::Path;
@ -44,6 +45,8 @@ pub fn normalize_id(content: &str) -> String {
/// Generate an ID for use with anchors which is derived from a "normalised"
/// string.
// This function should be made private when the deprecation expires.
#[deprecated(since = "0.4.16", note = "use unique_id_from_content instead")]
pub fn id_from_content(content: &str) -> String {
let mut content = content.to_string();
@ -59,10 +62,30 @@ pub fn id_from_content(content: &str) -> String {
// Remove spaces and hashes indicating a header
let trimmed = content.trim().trim_start_matches('#').trim();
normalize_id(trimmed)
}
/// Generate an ID for use with anchors which is derived from a "normalised"
/// string.
///
/// Each ID returned will be unique, if the same `id_counter` is provided on
/// each call.
pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap<String, usize>) -> String {
let id = {
#[allow(deprecated)]
id_from_content(content)
};
// If we have headers with the same normalized id, append an incrementing counter
let id_count = id_counter.entry(id.clone()).or_insert(0);
let unique_id = match *id_count {
0 => id,
id_count => format!("{}-{}", id, id_count),
};
*id_count += 1;
unique_id
}
/// Fix links to the correct location.
///
/// This adjusts links, such as turning `.md` extensions to `.html`.
@ -332,8 +355,9 @@ more text with spaces
}
}
mod html_munging {
use super::super::{id_from_content, normalize_id};
#[allow(deprecated)]
mod id_from_content {
use super::super::id_from_content;
#[test]
fn it_generates_anchors() {
@ -361,6 +385,10 @@ more text with spaces
);
assert_eq!(id_from_content("## Über"), "Über");
}
}
mod html_munging {
use super::super::{normalize_id, unique_id_from_content};
#[test]
fn it_normalizes_ids() {
@ -379,5 +407,28 @@ more text with spaces
assert_eq!(normalize_id("한국어"), "한국어");
assert_eq!(normalize_id(""), "");
}
#[test]
fn it_generates_unique_ids_from_content() {
// Same id if not given shared state
assert_eq!(
unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
"中文標題-cjk-title"
);
assert_eq!(
unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
"中文標題-cjk-title"
);
// Different id if given shared state
let mut id_counter = Default::default();
assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über");
assert_eq!(
unique_id_from_content("## 中文標題 CJK title", &mut id_counter),
"中文標題-cjk-title"
);
assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-1");
assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-2");
}
}
}

View file

@ -13,6 +13,7 @@
- [Markdown](first/markdown.md)
- [Unicode](first/unicode.md)
- [No Headers](first/no-headers.md)
- [Duplicate Headers](first/duplicate-headers.md)
- [Second Chapter](second.md)
- [Nested Chapter](second/nested.md)

View file

@ -0,0 +1,9 @@
# Duplicate headers
This page validates behaviour of duplicate headers.
# Header Text
# Header Text
# header-text

View file

@ -35,6 +35,7 @@ const TOC_SECOND_LEVEL: &[&str] = &[
"1.4. Markdown",
"1.5. Unicode",
"1.6. No Headers",
"1.7. Duplicate Headers",
"2.1. Nested Chapter",
];
@ -633,11 +634,12 @@ mod search {
let some_section = get_doc_ref("first/index.html#some-section");
let summary = get_doc_ref("first/includes.html#summary");
let no_headers = get_doc_ref("first/no-headers.html");
let duplicate_headers_1 = get_doc_ref("first/duplicate-headers.html#header-text-1");
let conclusion = get_doc_ref("conclusion.html#conclusion");
let bodyidx = &index["index"]["index"]["body"]["root"];
let textidx = &bodyidx["t"]["e"]["x"]["t"];
assert_eq!(textidx["df"], 2);
assert_eq!(textidx["df"], 5);
assert_eq!(textidx["docs"][&first_chapter]["tf"], 1.0);
assert_eq!(textidx["docs"][&introduction]["tf"], 1.0);
@ -646,7 +648,7 @@ mod search {
assert_eq!(docs[&some_section]["body"], "");
assert_eq!(
docs[&summary]["body"],
"Dummy Book Introduction First Chapter Nested Chapter Includes Recursive Markdown Unicode No Headers Second Chapter Nested Chapter Conclusion"
"Dummy Book Introduction First Chapter Nested Chapter Includes Recursive Markdown Unicode No Headers Duplicate Headers Second Chapter Nested Chapter Conclusion"
);
assert_eq!(
docs[&summary]["breadcrumbs"],
@ -657,6 +659,10 @@ mod search {
docs[&no_headers]["breadcrumbs"],
"First Chapter » No Headers"
);
assert_eq!(
docs[&duplicate_headers_1]["breadcrumbs"],
"First Chapter » Duplicate Headers » Header Text"
);
assert_eq!(
docs[&no_headers]["body"],
"Capybara capybara capybara. Capybara capybara capybara."

File diff suppressed because it is too large Load diff