Pass extra for page in sitemap entries

This commit is contained in:
Vincent Prouillet 2019-03-19 20:42:16 +01:00
parent c63b7fde44
commit 2a8d0de532
4 changed files with 141 additions and 106 deletions

View file

@ -18,11 +18,12 @@ rendering it and not anymore on the `page`/`section` variable
- Add Dracula syntax highlighting theme
- Fix using inline styles in headers
- Fix sections with render=false being shown in sitemap
- Sitemap is now split when there are more than 30 000 links in it
- Sitemap is now split when there are more than 30 000 links in it
- Add link to sitemap in robots.txt
- Markdown rendering is now fully CommonMark compliant
- `load_data` now defaults to loading file as plain text, unless `format` is passed
or the extension matches csv/toml/json
- Sitemap entries get an additional `extra` field for pages only
## 0.5.1 (2018-12-14)

View file

@ -19,7 +19,10 @@ extern crate utils;
#[cfg(test)]
extern crate tempfile;
use std::collections::{HashMap, HashSet};
mod sitemap;
use std::collections::{HashMap};
use std::fs::{copy, create_dir_all, remove_dir_all};
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex, RwLock};
@ -40,20 +43,6 @@ use utils::fs::{copy_directory, create_directory, create_file, ensure_directory_
use utils::net::get_available_port;
use utils::templates::{render_template, rewrite_theme_paths};
/// The sitemap only needs links and potentially date so we trim down
/// all pages to only that
#[derive(Debug, Serialize, Eq, PartialEq, Hash)]
struct SitemapEntry {
permalink: String,
date: Option<String>,
}
impl SitemapEntry {
pub fn new(permalink: String, date: Option<String>) -> SitemapEntry {
SitemapEntry { permalink, date }
}
}
#[derive(Debug)]
pub struct Site {
/// The base path of the zola site
@ -787,98 +776,15 @@ impl Site {
pub fn render_sitemap(&self) -> Result<()> {
ensure_directory_exists(&self.output_path)?;
let pages = self
.library
.read()
.unwrap()
.pages_values()
.iter()
.filter(|p| !p.is_draft())
.map(|p| {
let date = match p.meta.date {
Some(ref d) => Some(d.to_string()),
None => None,
};
SitemapEntry::new(p.permalink.clone(), date)
})
.collect::<Vec<_>>();
let mut sections = self
.library
.read()
.unwrap()
.sections_values()
.iter()
.filter(|s| s.meta.render)
.map(|s| SitemapEntry::new(s.permalink.clone(), None))
.collect::<Vec<_>>();
for section in self
.library
.read()
.unwrap()
.sections_values()
.iter()
.filter(|s| s.meta.paginate_by.is_some())
{
let number_pagers = (section.pages.len() as f64
/ section.meta.paginate_by.unwrap() as f64)
.ceil() as isize;
for i in 1..=number_pagers {
let permalink =
format!("{}{}/{}/", section.permalink, section.meta.paginate_path, i);
sections.push(SitemapEntry::new(permalink, None))
}
}
let mut taxonomies = vec![];
for taxonomy in &self.taxonomies {
let name = &taxonomy.kind.name;
let mut terms = vec![];
terms.push(SitemapEntry::new(self.config.make_permalink(name), None));
for item in &taxonomy.items {
terms.push(SitemapEntry::new(
self.config.make_permalink(&format!("{}/{}", &name, item.slug)),
None,
));
if taxonomy.kind.is_paginated() {
let number_pagers = (item.pages.len() as f64
/ taxonomy.kind.paginate_by.unwrap() as f64)
.ceil() as isize;
for i in 1..=number_pagers {
let permalink = self.config.make_permalink(&format!(
"{}/{}/{}/{}",
name,
item.slug,
taxonomy.kind.paginate_path(),
i
));
terms.push(SitemapEntry::new(permalink, None))
}
}
}
taxonomies.push(terms);
}
let mut all_sitemap_entries = HashSet::new();
for p in pages {
all_sitemap_entries.insert(p);
}
for s in sections {
all_sitemap_entries.insert(s);
}
for terms in taxonomies {
for term in terms {
all_sitemap_entries.insert(term);
}
}
// Count total number of sitemap entries to include in sitemap
let total_number = all_sitemap_entries.len();
let library = self.library.read().unwrap();
let all_sitemap_entries = sitemap::find_entries(
&library,
&self.taxonomies[..],
&self.config,
);
let sitemap_limit = 30000;
if total_number < sitemap_limit {
if all_sitemap_entries.len() < sitemap_limit {
// Create single sitemap
let mut context = Context::new();
context.insert("entries", &all_sitemap_entries);

View file

@ -0,0 +1,127 @@
use std::borrow::Cow;
use std::hash::{Hash, Hasher};
use std::collections::{HashSet};
use tera::{Map, Value};
use config::{Config};
use library::{Library, Taxonomy};
/// The sitemap only needs links, potentially date and extra for pages in case of updates
/// for examples so we trim down all entries to only that
#[derive(Debug, Serialize)]
pub struct SitemapEntry<'a> {
permalink: Cow<'a, str>,
date: Option<String>,
extra: Option<&'a Map<String, Value>>,
}
// Hash/Eq is not implemented for tera::Map but in our case we only care about the permalink
// when comparing/hashing so we implement it manually
impl<'a> Hash for SitemapEntry<'a> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.permalink.hash(state);
}
}
impl<'a> PartialEq for SitemapEntry<'a> {
fn eq(&self, other: &SitemapEntry) -> bool {
self.permalink == other.permalink
}
}
impl<'a> Eq for SitemapEntry<'a> {}
impl<'a> SitemapEntry<'a> {
pub fn new(permalink: Cow<'a, str>, date: Option<String>) -> Self {
SitemapEntry { permalink, date, extra: None }
}
pub fn add_extra(&mut self, extra: &'a Map<String, Value>) {
self.extra = Some(extra);
}
}
/// Finds out all the links to put in a sitemap from the pages/sections/taxonomies
/// There are no duplicate permalinks in the output vec
pub fn find_entries<'a>(library: &'a Library, taxonomies: &'a [Taxonomy], config: &'a Config) -> Vec<SitemapEntry<'a>> {
let pages = library
.pages_values()
.iter()
.filter(|p| !p.is_draft())
.map(|p| {
let date = match p.meta.date {
Some(ref d) => Some(d.to_string()),
None => None,
};
let mut entry = SitemapEntry::new(Cow::Borrowed(&p.permalink), date);
entry.add_extra(&p.meta.extra);
entry
})
.collect::<Vec<_>>();
let mut sections = library
.sections_values()
.iter()
.filter(|s| s.meta.render)
.map(|s| SitemapEntry::new(Cow::Borrowed(&s.permalink), None))
.collect::<Vec<_>>();
for section in library
.sections_values()
.iter()
.filter(|s| s.meta.paginate_by.is_some())
{
let number_pagers = (section.pages.len() as f64
/ section.meta.paginate_by.unwrap() as f64)
.ceil() as isize;
for i in 1..=number_pagers {
let permalink =
format!("{}{}/{}/", section.permalink, section.meta.paginate_path, i);
sections.push(SitemapEntry::new(Cow::Owned(permalink), None))
}
}
let mut taxonomies_entries = vec![];
for taxonomy in taxonomies {
let name = &taxonomy.kind.name;
let mut terms = vec![];
terms.push(SitemapEntry::new(Cow::Owned(config.make_permalink(name)), None));
for item in &taxonomy.items {
terms.push(SitemapEntry::new(
Cow::Owned(config.make_permalink(&format!("{}/{}", name, item.slug))),
None,
));
if taxonomy.kind.is_paginated() {
let number_pagers = (item.pages.len() as f64
/ taxonomy.kind.paginate_by.unwrap() as f64)
.ceil() as isize;
for i in 1..=number_pagers {
let permalink = config.make_permalink(&format!(
"{}/{}/{}/{}",
name,
item.slug,
taxonomy.kind.paginate_path(),
i
));
terms.push(SitemapEntry::new(Cow::Owned(permalink), None))
}
}
}
taxonomies_entries.push(terms);
}
let mut all_sitemap_entries = HashSet::new();
for p in pages {
all_sitemap_entries.insert(p);
}
for s in sections {
all_sitemap_entries.insert(s);
}
for terms in taxonomies_entries {
for term in terms {
all_sitemap_entries.insert(term);
}
}
all_sitemap_entries.into_iter().collect::<Vec<_>>()
}

View file

@ -25,6 +25,7 @@ A `SitemapEntry` has the following fields:
```ts
permalink: String;
date: String?;
extra: Hashmap<String, Any>?;
```
The `split_sitemap_index.xml` also gets a single variable: