Fix shortcode/continue-reading parsing with inline HTML (smaller version) (#2606)

* Fix shortcode/continue-reading parsing with inline HTML (smaller version)

* Remove inline <!-- more -->
This commit is contained in:
Clar Fon 2024-08-15 08:19:28 -04:00 committed by Vincent Prouillet
parent 286b88e268
commit 59d70749af
8 changed files with 105 additions and 31 deletions

View file

@ -31,10 +31,6 @@ static RFC3339_DATE: Lazy<Regex> = Lazy::new(|| {
).unwrap()
});
static FOOTNOTES_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"<sup class="footnote-reference"><a href=\s*.*?>\s*.*?</a></sup>"#).unwrap()
});
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct Page {
/// All info about the actual file
@ -232,10 +228,7 @@ impl Page {
let res = render_content(&self.raw_content, &context)
.with_context(|| format!("Failed to render content of {}", self.file.path.display()))?;
self.summary = res
.summary_len
.map(|l| &res.body[0..l])
.map(|s| FOOTNOTES_RE.replace_all(s, "").into_owned());
self.summary = res.summary;
self.content = res.body;
self.toc = res.toc;
self.external_links = res.external_links;

View file

@ -36,6 +36,10 @@ static MORE_DIVIDER_RE: Lazy<Regex> = Lazy::new(|| {
.unwrap()
});
static FOOTNOTES_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"<sup class="footnote-reference"><a href=\s*.*?>\s*.*?</a></sup>"#).unwrap()
});
/// Although there exists [a list of registered URI schemes][uri-schemes], a link may use arbitrary,
/// private schemes. This regex checks if the given string starts with something that just looks
/// like a scheme, i.e., a case-insensitive identifier followed by a colon.
@ -78,7 +82,7 @@ fn is_colocated_asset_link(link: &str) -> bool {
#[derive(Debug)]
pub struct Rendered {
pub body: String,
pub summary_len: Option<usize>,
pub summary: Option<String>,
pub toc: Vec<Heading>,
/// Links to site-local pages: relative path plus optional anchor target.
pub internal_links: Vec<(String, Option<String>)>,
@ -405,6 +409,7 @@ pub fn markdown_to_html(
.map(|x| x.as_object().unwrap().get("relative_path").unwrap().as_str().unwrap());
// the rendered html
let mut html = String::with_capacity(content.len());
let mut summary = None;
// Set while parsing
let mut error = None;
@ -679,17 +684,13 @@ pub fn markdown_to_html(
event
});
}
Event::Html(text) => {
if !has_summary && MORE_DIVIDER_RE.is_match(&text) {
Event::Html(text) if !has_summary && MORE_DIVIDER_RE.is_match(text.as_ref()) => {
has_summary = true;
events.push(Event::Html(CONTINUE_READING.into()));
continue;
}
if !contains_shortcode(text.as_ref()) {
events.push(Event::Html(text));
continue;
}
Event::Html(text) | Event::InlineHtml(text)
if contains_shortcode(text.as_ref()) =>
{
render_shortcodes!(false, text, range);
}
_ => events.push(event),
@ -781,14 +782,31 @@ pub fn markdown_to_html(
convert_footnotes_to_github_style(&mut events);
}
cmark::html::push_html(&mut html, events.into_iter());
let continue_reading = events
.iter()
.position(|e| matches!(e, Event::Html(CowStr::Borrowed(CONTINUE_READING))))
.unwrap_or(events.len());
let mut events = events.into_iter();
// emit everything up to summary
cmark::html::push_html(&mut html, events.by_ref().take(continue_reading));
if has_summary {
// remove footnotes
let summary_html = FOOTNOTES_RE.replace_all(&html, "").into_owned();
summary = Some(summary_html)
}
// emit everything after summary
cmark::html::push_html(&mut html, events);
}
if let Some(e) = error {
Err(e)
} else {
Ok(Rendered {
summary_len: if has_summary { html.find(CONTINUE_READING) } else { None },
summary,
body: html,
toc: make_table_of_contents(headings),
internal_links,
@ -861,10 +879,10 @@ mod tests {
for more in mores {
let content = format!("{top}\n\n{more}\n\n{bottom}");
let rendered = markdown_to_html(&content, &context, vec![]).unwrap();
assert!(rendered.summary_len.is_some(), "no summary when splitting on {more}");
let summary_len = rendered.summary_len.unwrap();
let summary = &rendered.body[..summary_len].trim();
let body = &rendered.body[summary_len..].trim();
assert!(rendered.summary.is_some(), "no summary when splitting on {more}");
let summary = rendered.summary.unwrap();
let summary = summary.trim();
let body = rendered.body[summary.len()..].trim();
let continue_reading = &body[..CONTINUE_READING.len()];
let body = &body[CONTINUE_READING.len()..].trim();
assert_eq!(summary, &top_rendered);

View file

@ -311,3 +311,15 @@ fn can_use_shortcodes_in_quotes() {
.body;
insta::assert_snapshot!(body);
}
#[test]
fn can_render_with_inline_html() {
let body = common::render(
r#"
Here is <span>{{ ex1(page="") }}</span> example.
"#,
)
.unwrap()
.body;
insta::assert_snapshot!(body);
}

View file

@ -0,0 +1,5 @@
---
source: components/markdown/tests/shortcodes.rs
expression: body
---
<p>Here is <span>1</span> example.</p>

View file

@ -0,0 +1,5 @@
---
source: components/markdown/tests/summary.rs
expression: body
---
<p>Hello world.</p>

View file

@ -0,0 +1,10 @@
---
source: components/markdown/tests/summary.rs
expression: rendered.body
---
<p>Things to do:</p>
<ul>
<li>Program <!-- more --> something</li>
<li>Eat</li>
<li>Sleep</li>
</ul>

View file

@ -1,10 +1,11 @@
mod common;
fn get_summary(content: &str) -> String {
let rendered = common::render(content).unwrap();
assert!(rendered.summary_len.is_some());
let summary_len = rendered.summary_len.unwrap();
rendered.body[..summary_len].to_owned()
get_rendered(content).summary.expect("had no summary")
}
fn get_rendered(content: &str) -> markdown::Rendered {
common::render(content).expect("couldn't render")
}
#[test]
@ -45,3 +46,33 @@ And some content after
);
insta::assert_snapshot!(body);
}
#[test]
fn no_truncated_summary() {
let rendered = get_rendered(
r#"
Things to do:
* Program <!-- more --> something
* Eat
* Sleep
"#,
);
assert!(rendered.summary.is_none());
insta::assert_snapshot!(rendered.body);
}
#[test]
fn footnotes_summary() {
let body = get_summary(
r#"
Hello world[^1].
<!-- more -->
Good bye.
[^1]: "World" is a placeholder.
"#,
);
insta::assert_snapshot!(body);
}

View file

@ -155,7 +155,7 @@ template = "page.html"
You can ask Zola to create a summary if, for example, you only want to show the first
paragraph of the page content in a list.
To do so, add <code>&lt;!-- more --&gt;</code> in your content at the point
To do so, add `<!-- more -->` in your content at the point
where you want the summary to end. The content up to that point will be
available separately in the
[template](@/documentation/templates/pages-sections.md#page-variables) via `page.summary`.