From 5873e0319c2eabc5c64fcbb560fe072baec9e53c Mon Sep 17 00:00:00 2001 From: Vincent Prouillet Date: Mon, 13 Jun 2022 23:37:39 +0200 Subject: [PATCH] Add title_bytes sorting method --- CHANGELOG.md | 1 + components/config/src/config/markup.rs | 3 +- components/content/src/sorting.rs | 38 ++++++++++++++++++- components/content/src/types.rs | 4 ++ docs/content/documentation/content/section.md | 21 +++++++--- 5 files changed, 59 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ed159f26..34b54f61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ any pages related to that taxonomy - Add `rel="alternate"` to Atom post links - Fix taxonomy `current_path` - Fix feed location for taxonomies not in the default language +- Add `title_bytes` sorting method ## 0.15.3 (2022-01-23) diff --git a/components/config/src/config/markup.rs b/components/config/src/config/markup.rs index fed7cf2d..9b80ed89 100644 --- a/components/config/src/config/markup.rs +++ b/components/config/src/config/markup.rs @@ -97,7 +97,8 @@ impl Markdown { pub fn export_theme_css(&self, theme_name: &str) -> Result { if let Some(theme) = self.get_highlight_theme_by_name(theme_name) { - Ok(css_for_theme_with_class_style(theme, CLASS_STYLE).expect("the function can't even error?")) + Ok(css_for_theme_with_class_style(theme, CLASS_STYLE) + .expect("the function can't even error?")) } else { bail!("Theme {} not found", theme_name) } diff --git a/components/content/src/sorting.rs b/components/content/src/sorting.rs index 92b37ba5..313df367 100644 --- a/components/content/src/sorting.rs +++ b/components/content/src/sorting.rs @@ -14,7 +14,7 @@ pub fn sort_pages(pages: &[&Page], sort_by: SortBy) -> (Vec, Vec { page.meta.datetime.is_some() || page.meta.updated_datetime.is_some() } - SortBy::Title => page.meta.title.is_some(), + SortBy::Title | SortBy::TitleBytes => page.meta.title.is_some(), SortBy::Weight => page.meta.weight.is_some(), SortBy::None => unreachable!(), }); @@ -28,6 +28,9 @@ pub fn sort_pages(pages: &[&Page], sort_by: SortBy) -> (Vec, Vec { natural_lexical_cmp(a.meta.title.as_ref().unwrap(), b.meta.title.as_ref().unwrap()) } + SortBy::TitleBytes => { + a.meta.title.as_ref().unwrap().cmp(b.meta.title.as_ref().unwrap()) + } SortBy::Weight => a.meta.weight.unwrap().cmp(&b.meta.weight.unwrap()), SortBy::None => unreachable!(), }; @@ -110,9 +113,11 @@ mod tests { #[test] fn can_sort_by_title() { let titles = vec![ + "åland", "bagel", "track_3", "microkernel", + "Österrike", "métro", "BART", "Underground", @@ -135,16 +140,47 @@ mod tests { assert_eq!( sorted_titles, vec![ + "åland", "bagel", "BART", "μ-kernel", "meter", "métro", "microkernel", + "Österrike", "track_1", "track_3", "track_13", + "Underground" + ] + ); + + let (sorted_pages, ignored_pages) = + sort_pages(&pages.iter().collect::>(), SortBy::TitleBytes); + // Should be sorted by title in bytes order + let sorted_titles: Vec<_> = sorted_pages + .iter() + .map(|key| { + pages.iter().find(|p| &p.file.path == key).unwrap().meta.title.as_ref().unwrap() + }) + .collect(); + assert_eq!(ignored_pages.len(), 0); + assert_eq!( + sorted_titles, + vec![ + "BART", "Underground", + "bagel", + "meter", + "microkernel", + "métro", + "track_1", + "track_13", + "track_3", + // Non ASCII letters are not merged with the ASCII equivalent (o/a/m here) + "Österrike", + "åland", + "μ-kernel" ] ); } diff --git a/components/content/src/types.rs b/components/content/src/types.rs index 39174e96..e058dec9 100644 --- a/components/content/src/types.rs +++ b/components/content/src/types.rs @@ -6,9 +6,13 @@ pub enum SortBy { /// Most recent to oldest Date, /// Most recent to oldest + #[serde(rename = "update_date")] UpdateDate, /// Sort by title lexicographically Title, + /// Sort by titles using the bytes directly + #[serde(rename = "title_bytes")] + TitleBytes, /// Lower weight comes first Weight, /// No sorting diff --git a/docs/content/documentation/content/section.md b/docs/content/documentation/content/section.md index e0669ff4..c569205f 100644 --- a/docs/content/documentation/content/section.md +++ b/docs/content/documentation/content/section.md @@ -48,7 +48,7 @@ description = "" # A draft section is only loaded if the `--drafts` flag is passed to `zola build`, `zola serve` or `zola check`. draft = false -# Used to sort pages by "date", "title", "weight", or "none". See below for more information. +# Used to sort pages by "date", "update_date", "title", "title_bytes", "weight", or "none". See below for more information. sort_by = "none" # Used by the parent section to order its subsections. @@ -142,8 +142,8 @@ create a list of links to the posts, a simple template might look like this: This would iterate over the posts in the order specified by the `sort_by` variable set in the `_index.md` page for the corresponding -section. The `sort_by` variable can be given one of three values: `date`, -`title`, `weight` or `none`. If `sort_by` is not set, the pages will be +section. The `sort_by` variable can be given a few values: `date`, `update_date` +`title`, `title_bytes`, `weight` or `none`. If `sort_by` is not set, the pages will be sorted in the `none` order, which is not intended for sorted content. Any page that is missing the data it needs to be sorted will be ignored and @@ -163,6 +163,9 @@ top of the list) to the oldest (at the bottom of the list). Each page will get `page.lower` and `page.higher` variables that contain the pages with earlier and later dates, respectively. +### `update_date` +Same as `date` except it will take into account any `updated` date for the pages. + ### `title` This will sort all pages by their `title` field in natural lexical order, as defined by `natural_lexical_cmp` in the [lexical-sort] crate. Each page will @@ -171,12 +174,18 @@ with previous and next titles, respectively. For example, here is a natural lexical ordering: "bachata, BART, bolero, μ-kernel, meter, Métro, Track-2, Track-3, Track-13, underground". Notice how -special characters and numbers are sorted reasonably. This is better than -the standard sorting: "BART, Métro, Track-13, Track-2, Track-3, bachata, -bolero, meter, underground, μ-kernel". +special characters and numbers are sorted reasonably. [lexical-sort]: https://docs.rs/lexical-sort +### `title_bytes` +Same as `title` except it uses the bytes directly to sort. +Natural sorting treats non-ascii +characters like their closest ascii character. This can lead to unexpected +results for languages with different character sets. The last three characters +of the Swedish alphabet, åäö, for example would be considered by the natural +sort as aao. In that case the standard byte-order sort may be more suitable. + ### `weight` This will be sort all pages by their `weight` field, from lightest weight (at the top of the list) to heaviest (at the bottom of the list). Each