Improve sorting speed

This commit is contained in:
Vincent Prouillet 2017-07-01 19:13:21 +09:00
parent c3b525745e
commit 37af36186a
8 changed files with 214 additions and 78 deletions

View file

@ -6,6 +6,7 @@
- Add weight sorting - Add weight sorting
- Remove `section` from the `page` rendering context: this is too expensive. Use - Remove `section` from the `page` rendering context: this is too expensive. Use
the global function `get_section` if you need to get it the global function `get_section` if you need to get it
- Fix next/previous in pagination being incorrect
## 0.0.7 (2017-06-19) ## 0.0.7 (2017-06-19)

View file

@ -35,7 +35,7 @@ install:
test_script: test_script:
# we don't run the "test phase" when doing deploys # we don't run the "test phase" when doing deploys
- if [%APPVEYOR_REPO_TAG%]==[false] ( - if [%APPVEYOR_REPO_TAG%]==[false] (
cargo test --target %TARGET% cargo test -all --target %TARGET%
) )
before_deploy: before_deploy:

View file

@ -11,8 +11,8 @@ main() {
return return
fi fi
cross test --target $TARGET cross test --all --target $TARGET
cross test --target $TARGET --release cross test --all --target $TARGET --release
} }
# we don't run the "test phase" when doing deploys # we don't run the "test phase" when doing deploys

View file

@ -0,0 +1,135 @@
#![feature(test)]
extern crate test;
extern crate tera;
extern crate content;
extern crate front_matter;
extern crate config;
use std::collections::HashMap;
use config::Config;
use tera::Tera;
use front_matter::{SortBy, InsertAnchor};
use content::{Page, sort_pages, populate_previous_and_next_pages};
fn create_pages(number: usize, sort_by: SortBy) -> Vec<Page> {
let mut pages = vec![];
let config = Config::default();
let tera = Tera::default();
let permalinks = HashMap::new();
for i in 0..number {
let mut page = Page::default();
match sort_by {
SortBy::Weight => { page.meta.weight = Some(i); },
SortBy::Order => { page.meta.order = Some(i); },
_ => (),
};
page.raw_content = r#"
# Modus cognitius profanam ne duae virtutis mundi
## Ut vita
Lorem markdownum litora, care ponto nomina, et ut aspicit gelidas sui et
purpureo genuit. Tamen colla venientis [delphina](http://nil-sol.com/ecquis)
Tusci et temptata citaeque curam isto ubi vult vulnere reppulit.
- Seque vidit flendoque de quodam
- Dabit minimos deiecto caputque noctis pluma
- Leti coniunx est Helicen
- Illius pulvereumque Icare inpositos
- Vivunt pereo pluvio tot ramos Olenios gelidis
- Quater teretes natura inde
### A subsection
Protinus dicunt, breve per, et vivacis genus Orphei munere. Me terram [dimittere
casside](http://corpus.org/) pervenit saxo primoque frequentat genuum sorori
praeferre causas Libys. Illud in serpit adsuetam utrimque nunc haberent,
**terrae si** veni! Hectoreis potes sumite [Mavortis retusa](http://tua.org/)
granum captantur potuisse Minervae, frugum.
> Clivo sub inprovisoque nostrum minus fama est, discordia patrem petebat precatur
absumitur, poena per sit. Foramina *tamen cupidine* memor supplex tollentes
dictum unam orbem, Anubis caecae. Viderat formosior tegebat satis, Aethiopasque
sit submisso coniuge tristis ubi!
## Praeceps Corinthus totidem quem crus vultum cape
```rs
#[derive(Debug)]
pub struct Site {
/// The base path of the gutenberg site
pub base_path: PathBuf,
/// The parsed config for the site
pub config: Config,
pub pages: HashMap<PathBuf, Page>,
pub sections: HashMap<PathBuf, Section>,
pub tera: Tera,
live_reload: bool,
output_path: PathBuf,
static_path: PathBuf,
pub tags: Option<Taxonomy>,
pub categories: Option<Taxonomy>,
/// A map of all .md files (section and pages) and their permalink
/// We need that if there are relative links in the content that need to be resolved
pub permalinks: HashMap<String, String>,
}
```
## More stuff
And a shortcode:
{{ youtube(id="my_youtube_id") }}
### Another subsection
Gotta make the toc do a little bit of work
# A big title
- hello
- world
- !
```py
if __name__ == "__main__":
gen_site("basic-blog", [""], 250, paginate=True)
```
"#.to_string();
page.render_markdown(&permalinks, &tera, &config, InsertAnchor::None).unwrap();
pages.push(page);
}
pages
}
// Most of the time spent in those benches are due to the .clone()...
// but i don't know how to remove them so there are some baseline bench with
// just the cloning and with a bit of math we can figure it out
#[bench]
fn bench_baseline_cloning(b: &mut test::Bencher) {
let pages = create_pages(250, SortBy::Order);
b.iter(|| pages.clone());
}
#[bench]
fn bench_sorting_none(b: &mut test::Bencher) {
let pages = create_pages(250, SortBy::Order);
b.iter(|| sort_pages(pages.clone(), SortBy::None));
}
#[bench]
fn bench_sorting_order(b: &mut test::Bencher) {
let pages = create_pages(250, SortBy::Order);
b.iter(|| sort_pages(pages.clone(), SortBy::Order));
}
#[bench]
fn bench_populate_previous_and_next_pages(b: &mut test::Bencher) {
let pages = create_pages(250, SortBy::Order);
let (sorted_pages, _) = sort_pages(pages, SortBy::Order);
b.iter(|| populate_previous_and_next_pages(sorted_pages.clone()));
}

View file

@ -3,76 +3,60 @@ use rayon::prelude::*;
use page::Page; use page::Page;
use front_matter::SortBy; use front_matter::SortBy;
/// Sort pages using the method for the given section /// Sort pages by the given criteria
/// ///
/// Any pages that doesn't have a date when the sorting method is date or order /// Any pages that doesn't have a the required field when the sorting method is other than none
/// when the sorting method is order will be ignored. /// will be ignored.
pub fn sort_pages(pages: Vec<Page>, sort_by: SortBy) -> (Vec<Page>, Vec<Page>) { pub fn sort_pages(pages: Vec<Page>, sort_by: SortBy) -> (Vec<Page>, Vec<Page>) {
match sort_by { if sort_by == SortBy::None {
SortBy::Date => { return (pages, vec![]);
let mut can_be_sorted = vec![];
let mut cannot_be_sorted = vec![];
for page in pages {
if page.meta.date.is_some() {
can_be_sorted.push(page);
} else {
cannot_be_sorted.push(page);
}
}
can_be_sorted.par_sort_unstable_by(|a, b| b.meta.date().unwrap().cmp(&a.meta.date().unwrap()));
(can_be_sorted, cannot_be_sorted)
},
SortBy::Order | SortBy::Weight => {
let mut can_be_sorted = vec![];
let mut cannot_be_sorted = vec![];
for page in pages {
if page.meta.order.is_some() {
can_be_sorted.push(page);
} else {
cannot_be_sorted.push(page);
}
}
if sort_by == SortBy::Order {
can_be_sorted.par_sort_unstable_by(|a, b| b.meta.order().cmp(&a.meta.order()));
} else {
can_be_sorted.par_sort_unstable_by(|a, b| a.meta.order().cmp(&b.meta.order()));
}
(can_be_sorted, cannot_be_sorted)
},
SortBy::None => (pages, vec![])
} }
let (mut can_be_sorted, cannot_be_sorted): (Vec<_>, Vec<_>) = pages
.into_par_iter()
.partition(|ref page| {
match sort_by {
SortBy::Date => page.meta.date.is_some(),
SortBy::Order => page.meta.order.is_some(),
SortBy::Weight => page.meta.weight.is_some(),
_ => unreachable!()
}
});
match sort_by {
SortBy::Date => can_be_sorted.par_sort_unstable_by(|a, b| b.meta.date().unwrap().cmp(&a.meta.date().unwrap())),
SortBy::Order => can_be_sorted.par_sort_unstable_by(|a, b| b.meta.order().cmp(&a.meta.order())),
SortBy::Weight => can_be_sorted.par_sort_unstable_by(|a, b| a.meta.weight().cmp(&b.meta.weight())),
_ => unreachable!()
};
(can_be_sorted, cannot_be_sorted)
} }
/// Horribly inefficient way to set previous and next on each pages /// Horribly inefficient way to set previous and next on each pages
/// So many clones /// So many clones
pub fn populate_previous_and_next_pages(input: &[Page]) -> Vec<Page> { pub fn populate_previous_and_next_pages(input: Vec<Page>) -> Vec<Page> {
let pages = input.to_vec(); let mut res = Vec::with_capacity(input.len());
let mut res = Vec::new();
// the input is already sorted // The input is already sorted
// We might put prev/next randomly if a page is missing date/order, probably fine for (i, _) in input.iter().enumerate() {
for (i, page) in input.iter().enumerate() { let mut new_page = input[i].clone();
let mut new_page = page.clone();
if i > 0 { if i > 0 {
let next = &pages[i - 1]; let mut previous_page = input[i - 1].clone();
let mut next_page = next.clone(); // Remove prev/next otherwise we serialise the whole thing...
previous_page.previous = None;
previous_page.next = None;
new_page.previous = Some(Box::new(previous_page));
}
if i < input.len() - 1 {
let mut next_page = input[i + 1].clone();
// Remove prev/next otherwise we serialise the whole thing... // Remove prev/next otherwise we serialise the whole thing...
next_page.previous = None; next_page.previous = None;
next_page.next = None; next_page.next = None;
new_page.next = Some(Box::new(next_page)); new_page.next = Some(Box::new(next_page));
} }
if i < input.len() - 1 {
let previous = &pages[i + 1];
// Remove prev/next otherwise we serialise the whole thing...
let mut previous_page = previous.clone();
previous_page.previous = None;
previous_page.next = None;
new_page.previous = Some(Box::new(previous_page));
}
res.push(new_page); res.push(new_page);
} }
@ -97,6 +81,12 @@ mod tests {
Page::new("content/hello.md", front_matter) Page::new("content/hello.md", front_matter)
} }
fn create_page_with_weight(weight: usize) -> Page {
let mut front_matter = PageFrontMatter::default();
front_matter.weight = Some(weight);
Page::new("content/hello.md", front_matter)
}
#[test] #[test]
fn can_sort_by_dates() { fn can_sort_by_dates() {
let input = vec![ let input = vec![
@ -128,15 +118,15 @@ mod tests {
#[test] #[test]
fn can_sort_by_weight() { fn can_sort_by_weight() {
let input = vec![ let input = vec![
create_page_with_order(2), create_page_with_weight(2),
create_page_with_order(3), create_page_with_weight(3),
create_page_with_order(1), create_page_with_weight(1),
]; ];
let (pages, _) = sort_pages(input, SortBy::Weight); let (pages, _) = sort_pages(input, SortBy::Weight);
// Should be sorted by date // Should be sorted by date
assert_eq!(pages[0].clone().meta.order.unwrap(), 1); assert_eq!(pages[0].clone().meta.weight.unwrap(), 1);
assert_eq!(pages[1].clone().meta.order.unwrap(), 2); assert_eq!(pages[1].clone().meta.weight.unwrap(), 2);
assert_eq!(pages[2].clone().meta.order.unwrap(), 3); assert_eq!(pages[2].clone().meta.weight.unwrap(), 3);
} }
#[test] #[test]
@ -168,23 +158,23 @@ mod tests {
#[test] #[test]
fn can_populate_previous_and_next_pages() { fn can_populate_previous_and_next_pages() {
let input = vec![ let input = vec![
create_page_with_order(3),
create_page_with_order(2),
create_page_with_order(1), create_page_with_order(1),
create_page_with_order(2),
create_page_with_order(3),
]; ];
let pages = populate_previous_and_next_pages(input.as_slice()); let pages = populate_previous_and_next_pages(input);
assert!(pages[0].clone().next.is_none()); assert!(pages[0].clone().previous.is_none());
assert!(pages[0].clone().previous.is_some()); assert!(pages[0].clone().next.is_some());
assert_eq!(pages[0].clone().previous.unwrap().meta.order.unwrap(), 2); assert_eq!(pages[0].clone().next.unwrap().meta.order.unwrap(), 2);
assert!(pages[1].clone().next.is_some()); assert!(pages[1].clone().next.is_some());
assert!(pages[1].clone().previous.is_some()); assert!(pages[1].clone().previous.is_some());
assert_eq!(pages[1].clone().next.unwrap().meta.order.unwrap(), 3);
assert_eq!(pages[1].clone().previous.unwrap().meta.order.unwrap(), 1); assert_eq!(pages[1].clone().previous.unwrap().meta.order.unwrap(), 1);
assert_eq!(pages[1].clone().next.unwrap().meta.order.unwrap(), 3);
assert!(pages[2].clone().next.is_some()); assert!(pages[2].clone().next.is_none());
assert!(pages[2].clone().previous.is_none()); assert!(pages[2].clone().previous.is_some());
assert_eq!(pages[2].clone().next.unwrap().meta.order.unwrap(), 2); assert_eq!(pages[2].clone().previous.unwrap().meta.order.unwrap(), 2);
} }
} }

View file

@ -28,6 +28,8 @@ pub struct PageFrontMatter {
pub category: Option<String>, pub category: Option<String>,
/// Integer to use to order content. Lowest is at the bottom, highest first /// Integer to use to order content. Lowest is at the bottom, highest first
pub order: Option<usize>, pub order: Option<usize>,
/// Integer to use to order content. Highest is at the bottom, lowest first
pub weight: Option<usize>,
/// All aliases for that page. Gutenberg will create HTML templates that will /// All aliases for that page. Gutenberg will create HTML templates that will
#[serde(skip_serializing)] #[serde(skip_serializing)]
pub aliases: Option<Vec<String>>, pub aliases: Option<Vec<String>>,
@ -84,6 +86,10 @@ impl PageFrontMatter {
self.order.unwrap() self.order.unwrap()
} }
pub fn weight(&self) -> usize {
self.weight.unwrap()
}
pub fn has_tags(&self) -> bool { pub fn has_tags(&self) -> bool {
match self.tags { match self.tags {
Some(ref t) => !t.is_empty(), Some(ref t) => !t.is_empty(),
@ -103,6 +109,7 @@ impl Default for PageFrontMatter {
tags: None, tags: None,
category: None, category: None,
order: None, order: None,
weight: None,
aliases: None, aliases: None,
template: None, template: None,
extra: None, extra: None,

View file

@ -17,6 +17,7 @@ extern crate tempdir;
use std::collections::HashMap; use std::collections::HashMap;
use std::fs::{remove_dir_all, copy, create_dir_all}; use std::fs::{remove_dir_all, copy, create_dir_all};
use std::mem;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use glob::glob; use glob::glob;
@ -289,8 +290,9 @@ impl Site {
continue; continue;
} }
} }
let (sorted_pages, cannot_be_sorted_pages) = sort_pages(section.pages.clone(), section.meta.sort_by()); let pages = mem::replace(&mut section.pages, vec![]);
section.pages = populate_previous_and_next_pages(&sorted_pages); let (sorted_pages, cannot_be_sorted_pages) = sort_pages(pages, section.meta.sort_by());
section.pages = populate_previous_and_next_pages(sorted_pages);
section.ignored_pages = cannot_be_sorted_pages; section.ignored_pages = cannot_be_sorted_pages;
} }
} }
@ -305,7 +307,7 @@ impl Site {
// TODO: can we pass a reference? // TODO: can we pass a reference?
let (tags, categories) = Taxonomy::find_tags_and_categories( let (tags, categories) = Taxonomy::find_tags_and_categories(
self.pages.values().cloned().collect::<Vec<_>>() self.pages.values().cloned().collect::<Vec<_>>().as_slice()
); );
if generate_tags_pages { if generate_tags_pages {
self.tags = Some(tags); self.tags = Some(tags);

View file

@ -37,6 +37,7 @@ impl TaxonomyItem {
pub fn new(name: &str, pages: Vec<Page>) -> TaxonomyItem { pub fn new(name: &str, pages: Vec<Page>) -> TaxonomyItem {
// We shouldn't have any pages without dates there // We shouldn't have any pages without dates there
let (sorted_pages, _) = sort_pages(pages, SortBy::Date); let (sorted_pages, _) = sort_pages(pages, SortBy::Date);
TaxonomyItem { TaxonomyItem {
name: name.to_string(), name: name.to_string(),
slug: slugify(name), slug: slugify(name),
@ -55,7 +56,7 @@ pub struct Taxonomy {
impl Taxonomy { impl Taxonomy {
// TODO: take a Vec<&'a Page> if it makes a difference in terms of perf for actual sites // TODO: take a Vec<&'a Page> if it makes a difference in terms of perf for actual sites
pub fn find_tags_and_categories(all_pages: Vec<Page>) -> (Taxonomy, Taxonomy) { pub fn find_tags_and_categories(all_pages: &[Page]) -> (Taxonomy, Taxonomy) {
let mut tags = HashMap::new(); let mut tags = HashMap::new();
let mut categories = HashMap::new(); let mut categories = HashMap::new();