Automatically add tags to bookmarks based on URL pattern (#736)

* [WIP] DSL

* upd

* upd

* upd

* upd

* upd

* upd

* upd

* upd

* upd

* upd

* upd

* dsl2

* full feature

* upd

* upd

* upd

* upd

* rename to auto_tagging_rules

* update migration after rebase

* add REST API tests

* improve settings view

---------

Co-authored-by: Sascha Ißbrücker <sascha.issbruecker@gmail.com>
This commit is contained in:
Viacheslav Slinko 2024-05-17 10:39:46 +03:00 committed by GitHub
parent e03f536925
commit fa5f78cf71
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 369 additions and 0 deletions

View file

@ -0,0 +1,18 @@
# Generated by Django 5.0.3 on 2024-05-17 07:09
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("bookmarks", "0035_userprofile_tag_grouping"),
]
operations = [
migrations.AddField(
model_name="userprofile",
name="auto_tagging_rules",
field=models.TextField(blank=True),
),
]

View file

@ -415,6 +415,7 @@ class UserProfile(models.Model):
display_remove_bookmark_action = models.BooleanField(default=True, null=False) display_remove_bookmark_action = models.BooleanField(default=True, null=False)
permanent_notes = models.BooleanField(default=False, null=False) permanent_notes = models.BooleanField(default=False, null=False)
custom_css = models.TextField(blank=True, null=False) custom_css = models.TextField(blank=True, null=False)
auto_tagging_rules = models.TextField(blank=True, null=False)
search_preferences = models.JSONField(default=dict, null=False) search_preferences = models.JSONField(default=dict, null=False)
enable_automatic_html_snapshots = models.BooleanField(default=True, null=False) enable_automatic_html_snapshots = models.BooleanField(default=True, null=False)
default_mark_unread = models.BooleanField(default=False, null=False) default_mark_unread = models.BooleanField(default=False, null=False)
@ -445,6 +446,7 @@ class UserProfileForm(forms.ModelForm):
"permanent_notes", "permanent_notes",
"default_mark_unread", "default_mark_unread",
"custom_css", "custom_css",
"auto_tagging_rules",
] ]

View file

@ -0,0 +1,70 @@
from urllib.parse import urlparse, parse_qs
import re
import idna
def get_tags(script: str, url: str):
parsed_url = urlparse(url.lower())
result = set()
for line in script.lower().split("\n"):
if "#" in line:
i = line.index("#")
line = line[:i]
parts = line.split()
if len(parts) < 2:
continue
domain_pattern = re.sub("^https?://", "", parts[0])
path_pattern = None
qs_pattern = None
if "/" in domain_pattern:
i = domain_pattern.index("/")
path_pattern = domain_pattern[i:]
domain_pattern = domain_pattern[:i]
if path_pattern and "?" in path_pattern:
i = path_pattern.index("?")
qs_pattern = path_pattern[i + 1 :]
path_pattern = path_pattern[:i]
if not _domains_matches(domain_pattern, parsed_url.netloc):
continue
if path_pattern and not _path_matches(path_pattern, parsed_url.path):
continue
if qs_pattern and not _qs_matches(qs_pattern, parsed_url.query):
continue
for tag in parts[1:]:
result.add(tag)
return result
def _path_matches(expected_path: str, actual_path: str) -> bool:
return actual_path.startswith(expected_path)
def _domains_matches(expected_domain: str, actual_domain: str) -> bool:
expected_domain = idna.encode(expected_domain)
actual_domain = idna.encode(actual_domain)
return actual_domain.endswith(expected_domain)
def _qs_matches(expected_qs: str, actual_qs: str) -> bool:
expected_qs = parse_qs(expected_qs, keep_blank_values=True)
actual_qs = parse_qs(actual_qs, keep_blank_values=True)
for key in expected_qs:
if key not in actual_qs:
return False
for value in expected_qs[key]:
if value != "" and value not in actual_qs[key]:
return False
return True

View file

@ -10,6 +10,7 @@ from django.utils import timezone
from bookmarks.models import Bookmark, BookmarkAsset, parse_tag_string from bookmarks.models import Bookmark, BookmarkAsset, parse_tag_string
from bookmarks.services import tasks from bookmarks.services import tasks
from bookmarks.services import website_loader from bookmarks.services import website_loader
from bookmarks.services import auto_tagging
from bookmarks.services.tags import get_or_create_tags from bookmarks.services.tags import get_or_create_tags
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -242,6 +243,15 @@ def _update_website_metadata(bookmark: Bookmark):
def _update_bookmark_tags(bookmark: Bookmark, tag_string: str, user: User): def _update_bookmark_tags(bookmark: Bookmark, tag_string: str, user: User):
tag_names = parse_tag_string(tag_string) tag_names = parse_tag_string(tag_string)
if user.profile.auto_tagging_rules:
auto_tag_names = auto_tagging.get_tags(
user.profile.auto_tagging_rules, bookmark.url
)
for auto_tag_name in auto_tag_names:
if auto_tag_name not in tag_names:
tag_names.append(auto_tag_name)
tags = get_or_create_tags(tag_names, user) tags = get_or_create_tags(tag_names, user)
bookmark.tags.set(tags) bookmark.tags.set(tags)

View file

@ -118,6 +118,21 @@
If disabled, tags will not be grouped. If disabled, tags will not be grouped.
</div> </div>
</div> </div>
<div class="form-group">
<details {% if form.auto_tagging_rules.value %}open{% endif %}>
<summary>Auto Tagging</summary>
<label for="{{ form.auto_tagging_rules.id_for_label }}" class="text-assistive">Auto Tagging</label>
<div class="mt-2">
{{ form.auto_tagging_rules|add_class:"form-input custom-css"|attr:"rows:6" }}
</div>
</details>
<div class="form-input-hint">
Automatically adds tags to bookmarks based on predefined rules.
Each line is a single rule that maps a URL to one or more tags. For example:
<pre>youtube.com video
reddit.com/r/Music music reddit</pre>
</div>
</div>
<div class="form-group"> <div class="form-group">
<label for="{{ form.enable_favicons.id_for_label }}" class="form-checkbox"> <label for="{{ form.enable_favicons.id_for_label }}" class="form-checkbox">
{{ form.enable_favicons }} {{ form.enable_favicons }}

View file

@ -0,0 +1,179 @@
from bookmarks.services import auto_tagging
from django.test import TestCase
class AutoTaggingTestCase(TestCase):
def test_auto_tag_by_domain(self):
script = """
example.com example
test.com test
"""
url = "https://example.com/"
tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["example"]))
def test_auto_tag_by_domain_ignores_case(self):
script = """
EXAMPLE.com example
"""
url = "https://example.com/"
tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["example"]))
def test_auto_tag_by_domain_should_add_all_tags(self):
script = """
example.com one two three
"""
url = "https://example.com/"
tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["one", "two", "three"]))
def test_auto_tag_by_domain_work_with_idn_domains(self):
script = """
रजि.रत tag1
"""
url = "https://www.xn--81bg3cc2b2bk5hb.xn--h2brj9c/"
tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["tag1"]))
script = """
xn--81bg3cc2b2bk5hb.xn--h2brj9c tag1
"""
url = "https://www.रजिस्ट्री.भारत/"
tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["tag1"]))
def test_auto_tag_by_domain_and_path(self):
script = """
example.com/one one
example.com/two two
test.com test
"""
url = "https://example.com/one/"
tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["one"]))
def test_auto_tag_by_domain_and_path_ignores_case(self):
script = """
example.com/One one
"""
url = "https://example.com/one/"
tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["one"]))
def test_auto_tag_by_domain_and_path_matches_path_ltr(self):
script = """
example.com/one one
example.com/two two
test.com test
"""
url = "https://example.com/one/two"
tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["one"]))
def test_auto_tag_by_domain_ignores_domain_in_path(self):
script = """
example.com example
"""
url = "https://test.com/example.com"
tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set([]))
def test_auto_tag_by_domain_includes_subdomains(self):
script = """
example.com example
test.example.com test
some.example.com some
"""
url = "https://test.example.com/"
tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["example", "test"]))
def test_auto_tag_by_domain_matches_domain_rtl(self):
script = """
example.com example
"""
url = "https://example.com.bad-website.com/"
tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set([]))
def test_auto_tag_by_domain_ignores_schema(self):
script = """
https://example.com/ https
http://example.com/ http
"""
url = "http://example.com/"
tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["https", "http"]))
def test_auto_tag_by_domain_ignores_lines_with_no_tags(self):
script = """
example.com
"""
url = "https://example.com/"
tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set([]))
def test_auto_tag_by_domain_path_and_qs(self):
script = """
example.com/page?a=b tag1 # true, matches a=b
example.com/page?a=c&c=d tag2 # true, matches both a=c and c=d
example.com/page?c=d&l=p tag3 # false, l=p doesn't exists
example.com/page?a=bb tag4 # false bb != b
example.com/page?a=b&a=c tag5 # true, matches both a=b and a=c
example.com/page?a=B tag6 # true, matches a=b because case insensitive
example.com/page?A=b tag7 # true, matches a=b because case insensitive
"""
url = "https://example.com/page/some?z=x&a=b&v=b&c=d&o=p&a=c"
tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["tag1", "tag2", "tag5", "tag6", "tag7"]))
def test_auto_tag_by_domain_path_and_qs_with_empty_value(self):
script = """
example.com/page?a= tag1
example.com/page?b= tag2
"""
url = "https://example.com/page/some?a=value"
tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["tag1"]))
def test_auto_tag_by_domain_path_and_qs_works_with_encoded_url(self):
script = """
example.com/page?a=йцу tag1
example.com/page?a=%D0%B9%D1%86%D1%83 tag2
"""
url = "https://example.com/page?a=%D0%B9%D1%86%D1%83"
tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["tag1", "tag2"]))

View file

@ -440,6 +440,20 @@ class BookmarksApiTestCase(LinkdingApiTestCase, BookmarkFactoryMixin):
bookmark = Bookmark.objects.get(url=data["url"]) bookmark = Bookmark.objects.get(url=data["url"])
self.assertFalse(bookmark.shared) self.assertFalse(bookmark.shared)
def test_create_bookmark_should_add_tags_from_auto_tagging(self):
tag1 = self.setup_tag()
tag2 = self.setup_tag()
self.authenticate()
profile = self.get_or_create_test_user().profile
profile.auto_tagging_rules = f"example.com {tag2.name}"
profile.save()
data = {"url": "https://example.com/", "tag_names": [tag1.name]}
self.post(reverse("bookmarks:bookmark-list"), data, status.HTTP_201_CREATED)
bookmark = Bookmark.objects.get(url=data["url"])
self.assertCountEqual(bookmark.tags.all(), [tag1, tag2])
def test_get_bookmark(self): def test_get_bookmark(self):
self.authenticate() self.authenticate()
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()
@ -512,6 +526,22 @@ class BookmarksApiTestCase(LinkdingApiTestCase, BookmarkFactoryMixin):
updated_bookmark = Bookmark.objects.get(id=bookmark.id) updated_bookmark = Bookmark.objects.get(id=bookmark.id)
self.assertEqual(updated_bookmark.shared, True) self.assertEqual(updated_bookmark.shared, True)
def test_update_bookmark_adds_tags_from_auto_tagging(self):
bookmark = self.setup_bookmark()
tag1 = self.setup_tag()
tag2 = self.setup_tag()
self.authenticate()
profile = self.get_or_create_test_user().profile
profile.auto_tagging_rules = f"example.com {tag2.name}"
profile.save()
data = {"url": "https://example.com/", "tag_names": [tag1.name]}
url = reverse("bookmarks:bookmark-detail", args=[bookmark.id])
self.put(url, data, expected_status_code=status.HTTP_200_OK)
updated_bookmark = Bookmark.objects.get(id=bookmark.id)
self.assertCountEqual(updated_bookmark.tags.all(), [tag1, tag2])
def test_patch_bookmark(self): def test_patch_bookmark(self):
self.authenticate() self.authenticate()
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()
@ -583,6 +613,22 @@ class BookmarksApiTestCase(LinkdingApiTestCase, BookmarkFactoryMixin):
self.assertEqual(updated_bookmark.description, bookmark.description) self.assertEqual(updated_bookmark.description, bookmark.description)
self.assertListEqual(updated_bookmark.tag_names, bookmark.tag_names) self.assertListEqual(updated_bookmark.tag_names, bookmark.tag_names)
def test_patch_bookmark_adds_tags_from_auto_tagging(self):
bookmark = self.setup_bookmark()
tag1 = self.setup_tag()
tag2 = self.setup_tag()
self.authenticate()
profile = self.get_or_create_test_user().profile
profile.auto_tagging_rules = f"example.com {tag2.name}"
profile.save()
data = {"tag_names": [tag1.name]}
url = reverse("bookmarks:bookmark-detail", args=[bookmark.id])
self.patch(url, data, expected_status_code=status.HTTP_200_OK)
updated_bookmark = Bookmark.objects.get(id=bookmark.id)
self.assertCountEqual(updated_bookmark.tags.all(), [tag1, tag2])
def test_delete_bookmark(self): def test_delete_bookmark(self):
self.authenticate() self.authenticate()
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()

View file

@ -130,6 +130,18 @@ class BookmarkServiceTestCase(TestCase, BookmarkFactoryMixin):
mock_create_html_snapshot.assert_not_called() mock_create_html_snapshot.assert_not_called()
def test_create_should_add_tags_from_auto_tagging(self):
tag1 = self.setup_tag()
tag2 = self.setup_tag()
profile = self.get_or_create_test_user().profile
profile.auto_tagging_rules = f"example.com {tag2.name}"
profile.save()
bookmark_data = Bookmark(url="https://example.com")
bookmark = create_bookmark(bookmark_data, tag1.name, self.user)
self.assertCountEqual(bookmark.tags.all(), [tag1, tag2])
def test_update_should_create_web_archive_snapshot_if_url_did_change(self): def test_update_should_create_web_archive_snapshot_if_url_did_change(self):
with patch.object( with patch.object(
tasks, "create_web_archive_snapshot" tasks, "create_web_archive_snapshot"
@ -201,6 +213,18 @@ class BookmarkServiceTestCase(TestCase, BookmarkFactoryMixin):
mock_create_html_snapshot.assert_not_called() mock_create_html_snapshot.assert_not_called()
def test_update_should_add_tags_from_auto_tagging(self):
tag1 = self.setup_tag()
tag2 = self.setup_tag()
profile = self.get_or_create_test_user().profile
profile.auto_tagging_rules = f"example.com {tag2.name}"
profile.save()
bookmark = self.setup_bookmark(url="https://example.com")
update_bookmark(bookmark, tag1.name, self.user)
self.assertCountEqual(bookmark.tags.all(), [tag1, tag2])
def test_archive_bookmark(self): def test_archive_bookmark(self):
bookmark = Bookmark( bookmark = Bookmark(
url="https://example.com", url="https://example.com",

View file

@ -42,6 +42,7 @@ class SettingsGeneralViewTestCase(TestCase, BookmarkFactoryMixin):
"display_remove_bookmark_action": True, "display_remove_bookmark_action": True,
"permanent_notes": False, "permanent_notes": False,
"custom_css": "", "custom_css": "",
"auto_tagging_rules": "",
} }
return {**form_data, **overrides} return {**form_data, **overrides}
@ -102,6 +103,7 @@ class SettingsGeneralViewTestCase(TestCase, BookmarkFactoryMixin):
"permanent_notes": True, "permanent_notes": True,
"default_mark_unread": True, "default_mark_unread": True,
"custom_css": "body { background-color: #000; }", "custom_css": "body { background-color: #000; }",
"auto_tagging_rules": "example.com tag",
} }
response = self.client.post(reverse("bookmarks:settings.general"), form_data) response = self.client.post(reverse("bookmarks:settings.general"), form_data)
html = response.content.decode() html = response.content.decode()
@ -168,6 +170,9 @@ class SettingsGeneralViewTestCase(TestCase, BookmarkFactoryMixin):
self.user.profile.default_mark_unread, form_data["default_mark_unread"] self.user.profile.default_mark_unread, form_data["default_mark_unread"]
) )
self.assertEqual(self.user.profile.custom_css, form_data["custom_css"]) self.assertEqual(self.user.profile.custom_css, form_data["custom_css"])
self.assertEqual(
self.user.profile.auto_tagging_rules, form_data["auto_tagging_rules"]
)
self.assertSuccessMessage(html, "Profile updated") self.assertSuccessMessage(html, "Profile updated")
def test_update_profile_should_not_be_called_without_respective_form_action(self): def test_update_profile_should_not_be_called_without_respective_form_action(self):