Fix shortcode/continue-reading parsing with inline HTML (smaller version) (#2606)

* Fix shortcode/continue-reading parsing with inline HTML (smaller version)

* Remove inline <!-- more -->
This commit is contained in:
Clar Fon 2024-08-15 08:19:28 -04:00 committed by Vincent Prouillet
parent 286b88e268
commit 59d70749af
8 changed files with 105 additions and 31 deletions

View file

@ -31,10 +31,6 @@ static RFC3339_DATE: Lazy<Regex> = Lazy::new(|| {
).unwrap() ).unwrap()
}); });
static FOOTNOTES_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"<sup class="footnote-reference"><a href=\s*.*?>\s*.*?</a></sup>"#).unwrap()
});
#[derive(Clone, Debug, Default, PartialEq, Eq)] #[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct Page { pub struct Page {
/// All info about the actual file /// All info about the actual file
@ -232,10 +228,7 @@ impl Page {
let res = render_content(&self.raw_content, &context) let res = render_content(&self.raw_content, &context)
.with_context(|| format!("Failed to render content of {}", self.file.path.display()))?; .with_context(|| format!("Failed to render content of {}", self.file.path.display()))?;
self.summary = res self.summary = res.summary;
.summary_len
.map(|l| &res.body[0..l])
.map(|s| FOOTNOTES_RE.replace_all(s, "").into_owned());
self.content = res.body; self.content = res.body;
self.toc = res.toc; self.toc = res.toc;
self.external_links = res.external_links; self.external_links = res.external_links;

View file

@ -36,6 +36,10 @@ static MORE_DIVIDER_RE: Lazy<Regex> = Lazy::new(|| {
.unwrap() .unwrap()
}); });
static FOOTNOTES_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"<sup class="footnote-reference"><a href=\s*.*?>\s*.*?</a></sup>"#).unwrap()
});
/// Although there exists [a list of registered URI schemes][uri-schemes], a link may use arbitrary, /// Although there exists [a list of registered URI schemes][uri-schemes], a link may use arbitrary,
/// private schemes. This regex checks if the given string starts with something that just looks /// private schemes. This regex checks if the given string starts with something that just looks
/// like a scheme, i.e., a case-insensitive identifier followed by a colon. /// like a scheme, i.e., a case-insensitive identifier followed by a colon.
@ -78,7 +82,7 @@ fn is_colocated_asset_link(link: &str) -> bool {
#[derive(Debug)] #[derive(Debug)]
pub struct Rendered { pub struct Rendered {
pub body: String, pub body: String,
pub summary_len: Option<usize>, pub summary: Option<String>,
pub toc: Vec<Heading>, pub toc: Vec<Heading>,
/// Links to site-local pages: relative path plus optional anchor target. /// Links to site-local pages: relative path plus optional anchor target.
pub internal_links: Vec<(String, Option<String>)>, pub internal_links: Vec<(String, Option<String>)>,
@ -405,6 +409,7 @@ pub fn markdown_to_html(
.map(|x| x.as_object().unwrap().get("relative_path").unwrap().as_str().unwrap()); .map(|x| x.as_object().unwrap().get("relative_path").unwrap().as_str().unwrap());
// the rendered html // the rendered html
let mut html = String::with_capacity(content.len()); let mut html = String::with_capacity(content.len());
let mut summary = None;
// Set while parsing // Set while parsing
let mut error = None; let mut error = None;
@ -679,17 +684,13 @@ pub fn markdown_to_html(
event event
}); });
} }
Event::Html(text) => { Event::Html(text) if !has_summary && MORE_DIVIDER_RE.is_match(text.as_ref()) => {
if !has_summary && MORE_DIVIDER_RE.is_match(&text) { has_summary = true;
has_summary = true; events.push(Event::Html(CONTINUE_READING.into()));
events.push(Event::Html(CONTINUE_READING.into())); }
continue; Event::Html(text) | Event::InlineHtml(text)
} if contains_shortcode(text.as_ref()) =>
if !contains_shortcode(text.as_ref()) { {
events.push(Event::Html(text));
continue;
}
render_shortcodes!(false, text, range); render_shortcodes!(false, text, range);
} }
_ => events.push(event), _ => events.push(event),
@ -781,14 +782,31 @@ pub fn markdown_to_html(
convert_footnotes_to_github_style(&mut events); convert_footnotes_to_github_style(&mut events);
} }
cmark::html::push_html(&mut html, events.into_iter()); let continue_reading = events
.iter()
.position(|e| matches!(e, Event::Html(CowStr::Borrowed(CONTINUE_READING))))
.unwrap_or(events.len());
let mut events = events.into_iter();
// emit everything up to summary
cmark::html::push_html(&mut html, events.by_ref().take(continue_reading));
if has_summary {
// remove footnotes
let summary_html = FOOTNOTES_RE.replace_all(&html, "").into_owned();
summary = Some(summary_html)
}
// emit everything after summary
cmark::html::push_html(&mut html, events);
} }
if let Some(e) = error { if let Some(e) = error {
Err(e) Err(e)
} else { } else {
Ok(Rendered { Ok(Rendered {
summary_len: if has_summary { html.find(CONTINUE_READING) } else { None }, summary,
body: html, body: html,
toc: make_table_of_contents(headings), toc: make_table_of_contents(headings),
internal_links, internal_links,
@ -861,10 +879,10 @@ mod tests {
for more in mores { for more in mores {
let content = format!("{top}\n\n{more}\n\n{bottom}"); let content = format!("{top}\n\n{more}\n\n{bottom}");
let rendered = markdown_to_html(&content, &context, vec![]).unwrap(); let rendered = markdown_to_html(&content, &context, vec![]).unwrap();
assert!(rendered.summary_len.is_some(), "no summary when splitting on {more}"); assert!(rendered.summary.is_some(), "no summary when splitting on {more}");
let summary_len = rendered.summary_len.unwrap(); let summary = rendered.summary.unwrap();
let summary = &rendered.body[..summary_len].trim(); let summary = summary.trim();
let body = &rendered.body[summary_len..].trim(); let body = rendered.body[summary.len()..].trim();
let continue_reading = &body[..CONTINUE_READING.len()]; let continue_reading = &body[..CONTINUE_READING.len()];
let body = &body[CONTINUE_READING.len()..].trim(); let body = &body[CONTINUE_READING.len()..].trim();
assert_eq!(summary, &top_rendered); assert_eq!(summary, &top_rendered);

View file

@ -311,3 +311,15 @@ fn can_use_shortcodes_in_quotes() {
.body; .body;
insta::assert_snapshot!(body); insta::assert_snapshot!(body);
} }
#[test]
fn can_render_with_inline_html() {
let body = common::render(
r#"
Here is <span>{{ ex1(page="") }}</span> example.
"#,
)
.unwrap()
.body;
insta::assert_snapshot!(body);
}

View file

@ -0,0 +1,5 @@
---
source: components/markdown/tests/shortcodes.rs
expression: body
---
<p>Here is <span>1</span> example.</p>

View file

@ -0,0 +1,5 @@
---
source: components/markdown/tests/summary.rs
expression: body
---
<p>Hello world.</p>

View file

@ -0,0 +1,10 @@
---
source: components/markdown/tests/summary.rs
expression: rendered.body
---
<p>Things to do:</p>
<ul>
<li>Program <!-- more --> something</li>
<li>Eat</li>
<li>Sleep</li>
</ul>

View file

@ -1,10 +1,11 @@
mod common; mod common;
fn get_summary(content: &str) -> String { fn get_summary(content: &str) -> String {
let rendered = common::render(content).unwrap(); get_rendered(content).summary.expect("had no summary")
assert!(rendered.summary_len.is_some()); }
let summary_len = rendered.summary_len.unwrap();
rendered.body[..summary_len].to_owned() fn get_rendered(content: &str) -> markdown::Rendered {
common::render(content).expect("couldn't render")
} }
#[test] #[test]
@ -45,3 +46,33 @@ And some content after
); );
insta::assert_snapshot!(body); insta::assert_snapshot!(body);
} }
#[test]
fn no_truncated_summary() {
let rendered = get_rendered(
r#"
Things to do:
* Program <!-- more --> something
* Eat
* Sleep
"#,
);
assert!(rendered.summary.is_none());
insta::assert_snapshot!(rendered.body);
}
#[test]
fn footnotes_summary() {
let body = get_summary(
r#"
Hello world[^1].
<!-- more -->
Good bye.
[^1]: "World" is a placeholder.
"#,
);
insta::assert_snapshot!(body);
}

View file

@ -155,7 +155,7 @@ template = "page.html"
You can ask Zola to create a summary if, for example, you only want to show the first You can ask Zola to create a summary if, for example, you only want to show the first
paragraph of the page content in a list. paragraph of the page content in a list.
To do so, add <code>&lt;!-- more --&gt;</code> in your content at the point To do so, add `<!-- more -->` in your content at the point
where you want the summary to end. The content up to that point will be where you want the summary to end. The content up to that point will be
available separately in the available separately in the
[template](@/documentation/templates/pages-sections.md#page-variables) via `page.summary`. [template](@/documentation/templates/pages-sections.md#page-variables) via `page.summary`.