Group ideographic characters in tag cloud (#613)

* Fix #588, Ideographic characters should be grouped together. Following the suggestion of using regex to find the ideographic range in this SO answer https://stackoverflow.com/a/2718203/554903 We group the ideographic characters together, while keeping other chinese, japanese and korean characters apart. * cleanup --------- Co-authored-by: Sascha Ißbrücker <sascha.issbruecker@gmail.com>
2024-11-22 03:13:02 +00:00 · 2024-03-16 07:09:37 +01:00 · 2024-03-16 07:09:37 +01:00 · 683cf529d7
commit 683cf529d7
parent 38204c87cf
2 changed files with 74 additions and 10 deletions
--- a/bookmarks/tests/test_tag_cloud_template.py
+++ b/bookmarks/tests/test_tag_cloud_template.py
@ -39,7 +39,7 @@ class TagCloudTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin):
            group_element = group_elements[group_index]
            link_elements = group_element.select("a")

-            self.assertEqual(len(link_elements), len(tags))
+            self.assertEqual(len(link_elements), len(tags), tags)

            for tag_index, tag in enumerate(tags, start=0):
                link_element = link_elements[tag_index]
@ -50,6 +50,59 @@ class TagCloudTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin):
        link_elements = soup.select("p.selected-tags a")
        self.assertEqual(len(link_elements), count)

+    def test_cjk_using_single_group(self):
+        """
+        Ideographic characters will be using the same group
+        While other japanese and korean characters will have separate groups.
+        """
+        tags = [
+            self.setup_tag(name="Aardvark"),
+            self.setup_tag(name="Armadillo"),
+            self.setup_tag(name="あひる"),
+            self.setup_tag(name="あきらか"),
+            self.setup_tag(name="アヒル"),
+            self.setup_tag(name="アキラカ"),
+            self.setup_tag(name="ひる"),
+            self.setup_tag(name="アヒル"),
+            self.setup_tag(name="오리"),
+            self.setup_tag(name="물"),
+            self.setup_tag(name="家鴨"),
+            self.setup_tag(name="感じ"),
+        ]
+        self.setup_bookmark(tags=tags)
+        rendered_template = self.render_template()
+
+        self.assertTagGroups(
+            rendered_template,
+            [
+                [
+                    "Aardvark",
+                    "Armadillo",
+                ],
+                [
+                    "あきらか",
+                    "あひる",
+                ],
+                [
+                    "ひる",
+                ],
+                [
+                    "アキラカ",
+                    "アヒル",
+                ],
+                [
+                    "물",
+                ],
+                [
+                    "오리",
+                ],
+                [
+                    "家鴨",
+                    "感じ",
+                ],
+            ],
+        )
+
    def test_group_alphabetically(self):
        tags = [
            self.setup_tag(name="Cockatoo"),
--- a/bookmarks/views/partials/contexts.py
+++ b/bookmarks/views/partials/contexts.py
@ -1,5 +1,6 @@
 import urllib.parse
 from typing import Set, List
+import re

 from django.core.handlers.wsgi import WSGIRequest
 from django.core.paginator import Paginator
@ -11,13 +12,13 @@ from bookmarks import utils
 from bookmarks.models import (
    Bookmark,
    BookmarkSearch,
-    BookmarkSearchForm,
    User,
    UserProfile,
    Tag,
 )

 DEFAULT_PAGE_SIZE = 30
+CJK_RE = re.compile(r"[\u4e00-\u9fff]+")


 class BookmarkItem:
@ -123,13 +124,13 @@ class BookmarkListContext:
        )

    def get_base_url(self):
-        raise Exception(f"Must be implemented by subclass")
+        raise Exception("Must be implemented by subclass")

    def get_base_action_url(self):
-        raise Exception(f"Must be implemented by subclass")
+        raise Exception("Must be implemented by subclass")

    def get_bookmark_query_set(self):
-        raise Exception(f"Must be implemented by subclass")
+        raise Exception("Must be implemented by subclass")


 class ActiveBookmarkListContext(BookmarkListContext):
@ -178,23 +179,33 @@ class TagGroup:
        self.tags = []
        self.char = char

+    def __repr__(self):
+        return f"<{self.char} TagGroup>"
+
    @staticmethod
    def create_tag_groups(tags: Set[Tag]):
        # Ensure groups, as well as tags within groups, are ordered alphabetically
        sorted_tags = sorted(tags, key=lambda x: str.lower(x.name))
        group = None
        groups = []
+        cjk_used = False
+        cjk_group = TagGroup("Ideographic")

        # Group tags that start with a different character than the previous one
        for tag in sorted_tags:
            tag_char = tag.name[0].lower()
-
-            if not group or group.char != tag_char:
+            if CJK_RE.match(tag_char):
+                cjk_used = True
+                cjk_group.tags.append(tag)
+            elif not group or group.char != tag_char:
                group = TagGroup(tag_char)
                groups.append(group)
+                group.tags.append(tag)
+            else:
+                group.tags.append(tag)

-            group.tags.append(tag)
-
+        if cjk_used:
+            groups.append(cjk_group)
        return groups


@ -224,7 +235,7 @@ class TagCloudContext:
        self.has_selected_tags = has_selected_tags

    def get_tag_query_set(self):
-        raise Exception(f"Must be implemented by subclass")
+        raise Exception("Must be implemented by subclass")

    def get_selected_tags(self, tags: List[Tag]):
        parsed_query = queries.parse_query_string(self.search.q)