Add title_bytes sorting method

2024-11-10 06:14:19 +00:00 · 2022-06-13 23:37:39 +02:00 · 2022-06-13 23:37:39 +02:00 · 5873e0319c
commit 5873e0319c
parent c948f8c889
5 changed files with 59 additions and 8 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -28,6 +28,7 @@ any pages related to that taxonomy
 - Add `rel="alternate"` to Atom post links
 - Fix taxonomy `current_path`
 - Fix feed location for taxonomies not in the default language
+- Add `title_bytes` sorting method

 ## 0.15.3 (2022-01-23)

--- a/components/config/src/config/markup.rs
+++ b/components/config/src/config/markup.rs
@ -97,7 +97,8 @@ impl Markdown {

    pub fn export_theme_css(&self, theme_name: &str) -> Result<String> {
        if let Some(theme) = self.get_highlight_theme_by_name(theme_name) {
-            Ok(css_for_theme_with_class_style(theme, CLASS_STYLE).expect("the function can't even error?"))
+            Ok(css_for_theme_with_class_style(theme, CLASS_STYLE)
+                .expect("the function can't even error?"))
        } else {
            bail!("Theme {} not found", theme_name)
        }
--- a/components/content/src/sorting.rs
+++ b/components/content/src/sorting.rs
@ -14,7 +14,7 @@ pub fn sort_pages(pages: &[&Page], sort_by: SortBy) -> (Vec<PathBuf>, Vec<PathBu
            SortBy::UpdateDate => {
                page.meta.datetime.is_some() || page.meta.updated_datetime.is_some()
            }
-            SortBy::Title => page.meta.title.is_some(),
+            SortBy::Title | SortBy::TitleBytes => page.meta.title.is_some(),
            SortBy::Weight => page.meta.weight.is_some(),
            SortBy::None => unreachable!(),
        });
@ -28,6 +28,9 @@ pub fn sort_pages(pages: &[&Page], sort_by: SortBy) -> (Vec<PathBuf>, Vec<PathBu
            SortBy::Title => {
                natural_lexical_cmp(a.meta.title.as_ref().unwrap(), b.meta.title.as_ref().unwrap())
            }
+            SortBy::TitleBytes => {
+                a.meta.title.as_ref().unwrap().cmp(b.meta.title.as_ref().unwrap())
+            }
            SortBy::Weight => a.meta.weight.unwrap().cmp(&b.meta.weight.unwrap()),
            SortBy::None => unreachable!(),
        };
@ -110,9 +113,11 @@ mod tests {
    #[test]
    fn can_sort_by_title() {
        let titles = vec![
+            "åland",
            "bagel",
            "track_3",
            "microkernel",
+            "Österrike",
            "métro",
            "BART",
            "Underground",
@ -135,16 +140,47 @@ mod tests {
        assert_eq!(
            sorted_titles,
            vec![
+                "åland",
                "bagel",
                "BART",
                "μ-kernel",
                "meter",
                "métro",
                "microkernel",
+                "Österrike",
                "track_1",
                "track_3",
                "track_13",
+                "Underground"
+            ]
+        );
+
+        let (sorted_pages, ignored_pages) =
+            sort_pages(&pages.iter().collect::<Vec<_>>(), SortBy::TitleBytes);
+        // Should be sorted by title in bytes order
+        let sorted_titles: Vec<_> = sorted_pages
+            .iter()
+            .map(|key| {
+                pages.iter().find(|p| &p.file.path == key).unwrap().meta.title.as_ref().unwrap()
+            })
+            .collect();
+        assert_eq!(ignored_pages.len(), 0);
+        assert_eq!(
+            sorted_titles,
+            vec![
+                "BART",
                "Underground",
+                "bagel",
+                "meter",
+                "microkernel",
+                "métro",
+                "track_1",
+                "track_13",
+                "track_3",
+                // Non ASCII letters are not merged with the ASCII equivalent (o/a/m here)
+                "Österrike",
+                "åland",
+                "μ-kernel"
            ]
        );
    }
--- a/components/content/src/types.rs
+++ b/components/content/src/types.rs
@ -6,9 +6,13 @@ pub enum SortBy {
    /// Most recent to oldest
    Date,
    /// Most recent to oldest
+    #[serde(rename = "update_date")]
    UpdateDate,
    /// Sort by title lexicographically
    Title,
+    /// Sort by titles using the bytes directly
+    #[serde(rename = "title_bytes")]
+    TitleBytes,
    /// Lower weight comes first
    Weight,
    /// No sorting
--- a/docs/content/documentation/content/section.md
+++ b/docs/content/documentation/content/section.md
@ -48,7 +48,7 @@ description = ""
 # A draft section is only loaded if the `--drafts` flag is passed to `zola build`, `zola serve` or `zola check`.
 draft = false

-# Used to sort pages by "date", "title", "weight", or "none". See below for more information.
+# Used to sort pages by "date", "update_date", "title", "title_bytes", "weight", or "none". See below for more information.
 sort_by = "none"

 # Used by the parent section to order its subsections.
@ -142,8 +142,8 @@ create a list of links to the posts, a simple template might look like this:

 This would iterate over the posts in the order specified
 by the `sort_by` variable set in the `_index.md` page for the corresponding
-section.  The `sort_by` variable can be given one of three values: `date`,
-`title`, `weight` or `none`.  If `sort_by` is not set, the pages will be
+section.  The `sort_by` variable can be given a few values: `date`, `update_date`
+`title`, `title_bytes`, `weight` or `none`.  If `sort_by` is not set, the pages will be
 sorted in the `none` order, which is not intended for sorted content.

 Any page that is missing the data it needs to be sorted will be ignored and
@ -163,6 +163,9 @@ top of the list) to the oldest (at the bottom of the list). Each page will
 get `page.lower` and `page.higher` variables that contain the pages with
 earlier and later dates, respectively.

+### `update_date`
+Same as `date` except it will take into account any `updated` date for the pages.
+
 ### `title`
 This will sort all pages by their `title` field in natural lexical order, as
 defined  by `natural_lexical_cmp` in the [lexical-sort] crate. Each page will
@ -171,12 +174,18 @@ with  previous and next titles, respectively.

 For example, here is a natural lexical ordering: "bachata, BART, bolero,
 μ-kernel, meter, Métro, Track-2, Track-3, Track-13, underground". Notice how
-special characters and numbers are sorted reasonably. This is better than
-the standard sorting: "BART, Métro, Track-13, Track-2, Track-3, bachata,
-bolero, meter, underground, μ-kernel".
+special characters and numbers are sorted reasonably.

 [lexical-sort]: https://docs.rs/lexical-sort

+### `title_bytes`
+Same as `title` except it uses the bytes directly to sort.
+Natural sorting treats non-ascii
+characters like their closest ascii character. This can lead to unexpected
+results for languages with different character sets. The last three characters
+of the Swedish alphabet, åäö, for example would be considered by the natural
+sort as aao. In that case the standard byte-order sort may be more suitable.
+
 ### `weight`
 This will be sort all pages by their `weight` field, from lightest weight
 (at the top of the list) to heaviest (at the bottom of the list).  Each