Add title_bytes sorting method

This commit is contained in:
Vincent Prouillet 2022-06-13 23:37:39 +02:00
parent c948f8c889
commit 5873e0319c
5 changed files with 59 additions and 8 deletions

View file

@ -28,6 +28,7 @@ any pages related to that taxonomy
- Add `rel="alternate"` to Atom post links
- Fix taxonomy `current_path`
- Fix feed location for taxonomies not in the default language
- Add `title_bytes` sorting method
## 0.15.3 (2022-01-23)

View file

@ -97,7 +97,8 @@ impl Markdown {
pub fn export_theme_css(&self, theme_name: &str) -> Result<String> {
if let Some(theme) = self.get_highlight_theme_by_name(theme_name) {
Ok(css_for_theme_with_class_style(theme, CLASS_STYLE).expect("the function can't even error?"))
Ok(css_for_theme_with_class_style(theme, CLASS_STYLE)
.expect("the function can't even error?"))
} else {
bail!("Theme {} not found", theme_name)
}

View file

@ -14,7 +14,7 @@ pub fn sort_pages(pages: &[&Page], sort_by: SortBy) -> (Vec<PathBuf>, Vec<PathBu
SortBy::UpdateDate => {
page.meta.datetime.is_some() || page.meta.updated_datetime.is_some()
}
SortBy::Title => page.meta.title.is_some(),
SortBy::Title | SortBy::TitleBytes => page.meta.title.is_some(),
SortBy::Weight => page.meta.weight.is_some(),
SortBy::None => unreachable!(),
});
@ -28,6 +28,9 @@ pub fn sort_pages(pages: &[&Page], sort_by: SortBy) -> (Vec<PathBuf>, Vec<PathBu
SortBy::Title => {
natural_lexical_cmp(a.meta.title.as_ref().unwrap(), b.meta.title.as_ref().unwrap())
}
SortBy::TitleBytes => {
a.meta.title.as_ref().unwrap().cmp(b.meta.title.as_ref().unwrap())
}
SortBy::Weight => a.meta.weight.unwrap().cmp(&b.meta.weight.unwrap()),
SortBy::None => unreachable!(),
};
@ -110,9 +113,11 @@ mod tests {
#[test]
fn can_sort_by_title() {
let titles = vec![
"åland",
"bagel",
"track_3",
"microkernel",
"Österrike",
"métro",
"BART",
"Underground",
@ -135,16 +140,47 @@ mod tests {
assert_eq!(
sorted_titles,
vec![
"åland",
"bagel",
"BART",
"μ-kernel",
"meter",
"métro",
"microkernel",
"Österrike",
"track_1",
"track_3",
"track_13",
"Underground"
]
);
let (sorted_pages, ignored_pages) =
sort_pages(&pages.iter().collect::<Vec<_>>(), SortBy::TitleBytes);
// Should be sorted by title in bytes order
let sorted_titles: Vec<_> = sorted_pages
.iter()
.map(|key| {
pages.iter().find(|p| &p.file.path == key).unwrap().meta.title.as_ref().unwrap()
})
.collect();
assert_eq!(ignored_pages.len(), 0);
assert_eq!(
sorted_titles,
vec![
"BART",
"Underground",
"bagel",
"meter",
"microkernel",
"métro",
"track_1",
"track_13",
"track_3",
// Non ASCII letters are not merged with the ASCII equivalent (o/a/m here)
"Österrike",
"åland",
"μ-kernel"
]
);
}

View file

@ -6,9 +6,13 @@ pub enum SortBy {
/// Most recent to oldest
Date,
/// Most recent to oldest
#[serde(rename = "update_date")]
UpdateDate,
/// Sort by title lexicographically
Title,
/// Sort by titles using the bytes directly
#[serde(rename = "title_bytes")]
TitleBytes,
/// Lower weight comes first
Weight,
/// No sorting

View file

@ -48,7 +48,7 @@ description = ""
# A draft section is only loaded if the `--drafts` flag is passed to `zola build`, `zola serve` or `zola check`.
draft = false
# Used to sort pages by "date", "title", "weight", or "none". See below for more information.
# Used to sort pages by "date", "update_date", "title", "title_bytes", "weight", or "none". See below for more information.
sort_by = "none"
# Used by the parent section to order its subsections.
@ -142,8 +142,8 @@ create a list of links to the posts, a simple template might look like this:
This would iterate over the posts in the order specified
by the `sort_by` variable set in the `_index.md` page for the corresponding
section. The `sort_by` variable can be given one of three values: `date`,
`title`, `weight` or `none`. If `sort_by` is not set, the pages will be
section. The `sort_by` variable can be given a few values: `date`, `update_date`
`title`, `title_bytes`, `weight` or `none`. If `sort_by` is not set, the pages will be
sorted in the `none` order, which is not intended for sorted content.
Any page that is missing the data it needs to be sorted will be ignored and
@ -163,6 +163,9 @@ top of the list) to the oldest (at the bottom of the list). Each page will
get `page.lower` and `page.higher` variables that contain the pages with
earlier and later dates, respectively.
### `update_date`
Same as `date` except it will take into account any `updated` date for the pages.
### `title`
This will sort all pages by their `title` field in natural lexical order, as
defined by `natural_lexical_cmp` in the [lexical-sort] crate. Each page will
@ -171,12 +174,18 @@ with previous and next titles, respectively.
For example, here is a natural lexical ordering: "bachata, BART, bolero,
μ-kernel, meter, Métro, Track-2, Track-3, Track-13, underground". Notice how
special characters and numbers are sorted reasonably. This is better than
the standard sorting: "BART, Métro, Track-13, Track-2, Track-3, bachata,
bolero, meter, underground, μ-kernel".
special characters and numbers are sorted reasonably.
[lexical-sort]: https://docs.rs/lexical-sort
### `title_bytes`
Same as `title` except it uses the bytes directly to sort.
Natural sorting treats non-ascii
characters like their closest ascii character. This can lead to unexpected
results for languages with different character sets. The last three characters
of the Swedish alphabet, åäö, for example would be considered by the natural
sort as aao. In that case the standard byte-order sort may be more suitable.
### `weight`
This will be sort all pages by their `weight` field, from lightest weight
(at the top of the list) to heaviest (at the bottom of the list). Each