From fa5f78cf71db68c0bef8447b6e6a6176fcbf3465 Mon Sep 17 00:00:00 2001 From: Viacheslav Slinko Date: Fri, 17 May 2024 10:39:46 +0300 Subject: [PATCH] Automatically add tags to bookmarks based on URL pattern (#736) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [WIP] DSL * upd * upd * upd * upd * upd * upd * upd * upd * upd * upd * upd * dsl2 * full feature * upd * upd * upd * upd * rename to auto_tagging_rules * update migration after rebase * add REST API tests * improve settings view --------- Co-authored-by: Sascha Ißbrücker --- .../0036_userprofile_auto_tagging_rules.py | 18 ++ bookmarks/models.py | 2 + bookmarks/services/auto_tagging.py | 70 +++++++ bookmarks/services/bookmarks.py | 10 + bookmarks/templates/settings/general.html | 15 ++ bookmarks/tests/test_auto_tagging.py | 179 ++++++++++++++++++ bookmarks/tests/test_bookmarks_api.py | 46 +++++ bookmarks/tests/test_bookmarks_service.py | 24 +++ bookmarks/tests/test_settings_general_view.py | 5 + 9 files changed, 369 insertions(+) create mode 100644 bookmarks/migrations/0036_userprofile_auto_tagging_rules.py create mode 100644 bookmarks/services/auto_tagging.py create mode 100644 bookmarks/tests/test_auto_tagging.py diff --git a/bookmarks/migrations/0036_userprofile_auto_tagging_rules.py b/bookmarks/migrations/0036_userprofile_auto_tagging_rules.py new file mode 100644 index 0000000..4454739 --- /dev/null +++ b/bookmarks/migrations/0036_userprofile_auto_tagging_rules.py @@ -0,0 +1,18 @@ +# Generated by Django 5.0.3 on 2024-05-17 07:09 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookmarks", "0035_userprofile_tag_grouping"), + ] + + operations = [ + migrations.AddField( + model_name="userprofile", + name="auto_tagging_rules", + field=models.TextField(blank=True), + ), + ] diff --git a/bookmarks/models.py b/bookmarks/models.py index 7b0a75c..a8120d5 100644 --- a/bookmarks/models.py +++ b/bookmarks/models.py @@ -415,6 +415,7 @@ class UserProfile(models.Model): display_remove_bookmark_action = models.BooleanField(default=True, null=False) permanent_notes = models.BooleanField(default=False, null=False) custom_css = models.TextField(blank=True, null=False) + auto_tagging_rules = models.TextField(blank=True, null=False) search_preferences = models.JSONField(default=dict, null=False) enable_automatic_html_snapshots = models.BooleanField(default=True, null=False) default_mark_unread = models.BooleanField(default=False, null=False) @@ -445,6 +446,7 @@ class UserProfileForm(forms.ModelForm): "permanent_notes", "default_mark_unread", "custom_css", + "auto_tagging_rules", ] diff --git a/bookmarks/services/auto_tagging.py b/bookmarks/services/auto_tagging.py new file mode 100644 index 0000000..d8bebf0 --- /dev/null +++ b/bookmarks/services/auto_tagging.py @@ -0,0 +1,70 @@ +from urllib.parse import urlparse, parse_qs +import re +import idna + + +def get_tags(script: str, url: str): + parsed_url = urlparse(url.lower()) + result = set() + + for line in script.lower().split("\n"): + if "#" in line: + i = line.index("#") + line = line[:i] + + parts = line.split() + if len(parts) < 2: + continue + + domain_pattern = re.sub("^https?://", "", parts[0]) + path_pattern = None + qs_pattern = None + + if "/" in domain_pattern: + i = domain_pattern.index("/") + path_pattern = domain_pattern[i:] + domain_pattern = domain_pattern[:i] + + if path_pattern and "?" in path_pattern: + i = path_pattern.index("?") + qs_pattern = path_pattern[i + 1 :] + path_pattern = path_pattern[:i] + + if not _domains_matches(domain_pattern, parsed_url.netloc): + continue + + if path_pattern and not _path_matches(path_pattern, parsed_url.path): + continue + + if qs_pattern and not _qs_matches(qs_pattern, parsed_url.query): + continue + + for tag in parts[1:]: + result.add(tag) + + return result + + +def _path_matches(expected_path: str, actual_path: str) -> bool: + return actual_path.startswith(expected_path) + + +def _domains_matches(expected_domain: str, actual_domain: str) -> bool: + expected_domain = idna.encode(expected_domain) + actual_domain = idna.encode(actual_domain) + + return actual_domain.endswith(expected_domain) + + +def _qs_matches(expected_qs: str, actual_qs: str) -> bool: + expected_qs = parse_qs(expected_qs, keep_blank_values=True) + actual_qs = parse_qs(actual_qs, keep_blank_values=True) + + for key in expected_qs: + if key not in actual_qs: + return False + for value in expected_qs[key]: + if value != "" and value not in actual_qs[key]: + return False + + return True diff --git a/bookmarks/services/bookmarks.py b/bookmarks/services/bookmarks.py index 5e96dbb..c7282cf 100644 --- a/bookmarks/services/bookmarks.py +++ b/bookmarks/services/bookmarks.py @@ -10,6 +10,7 @@ from django.utils import timezone from bookmarks.models import Bookmark, BookmarkAsset, parse_tag_string from bookmarks.services import tasks from bookmarks.services import website_loader +from bookmarks.services import auto_tagging from bookmarks.services.tags import get_or_create_tags logger = logging.getLogger(__name__) @@ -242,6 +243,15 @@ def _update_website_metadata(bookmark: Bookmark): def _update_bookmark_tags(bookmark: Bookmark, tag_string: str, user: User): tag_names = parse_tag_string(tag_string) + + if user.profile.auto_tagging_rules: + auto_tag_names = auto_tagging.get_tags( + user.profile.auto_tagging_rules, bookmark.url + ) + for auto_tag_name in auto_tag_names: + if auto_tag_name not in tag_names: + tag_names.append(auto_tag_name) + tags = get_or_create_tags(tag_names, user) bookmark.tags.set(tags) diff --git a/bookmarks/templates/settings/general.html b/bookmarks/templates/settings/general.html index f04edab..15ba2f4 100644 --- a/bookmarks/templates/settings/general.html +++ b/bookmarks/templates/settings/general.html @@ -118,6 +118,21 @@ If disabled, tags will not be grouped. +
+
+ Auto Tagging + +
+ {{ form.auto_tagging_rules|add_class:"form-input custom-css"|attr:"rows:6" }} +
+
+
+ Automatically adds tags to bookmarks based on predefined rules. + Each line is a single rule that maps a URL to one or more tags. For example: +
youtube.com video
+reddit.com/r/Music music reddit
+
+