Fix auto-tagging when URL includes port (#820)

This commit is contained in:
Sascha Ißbrücker 2024-09-10 21:19:20 +02:00 committed by GitHub
parent cb0301fd9e
commit 7572aa5bc9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 57 additions and 36 deletions

View file

@ -1,3 +1,5 @@
import logging
from rest_framework import viewsets, mixins, status from rest_framework import viewsets, mixins, status
from rest_framework.decorators import action from rest_framework.decorators import action
from rest_framework.permissions import AllowAny from rest_framework.permissions import AllowAny
@ -19,6 +21,8 @@ from bookmarks.services.bookmarks import (
) )
from bookmarks.services.website_loader import WebsiteMetadata from bookmarks.services.website_loader import WebsiteMetadata
logger = logging.getLogger(__name__)
class BookmarkViewSet( class BookmarkViewSet(
viewsets.GenericViewSet, viewsets.GenericViewSet,
@ -112,7 +116,13 @@ class BookmarkViewSet(
profile = request.user.profile profile = request.user.profile
auto_tags = [] auto_tags = []
if profile.auto_tagging_rules: if profile.auto_tagging_rules:
auto_tags = auto_tagging.get_tags(profile.auto_tagging_rules, url) try:
auto_tags = auto_tagging.get_tags(profile.auto_tagging_rules, url)
except Exception as e:
logger.error(
f"Failed to auto-tag bookmark. url={bookmark.url}",
exc_info=e,
)
return Response( return Response(
{ {

View file

@ -16,27 +16,21 @@ def get_tags(script: str, url: str):
if len(parts) < 2: if len(parts) < 2:
continue continue
domain_pattern = re.sub("^https?://", "", parts[0]) # to parse a host name from the pattern URL, ensure it has a scheme
path_pattern = None pattern_url = "//" + re.sub("^https?://", "", parts[0])
qs_pattern = None parsed_pattern = urlparse(pattern_url)
if "/" in domain_pattern: if not _domains_matches(parsed_pattern.hostname, parsed_url.hostname):
i = domain_pattern.index("/")
path_pattern = domain_pattern[i:]
domain_pattern = domain_pattern[:i]
if path_pattern and "?" in path_pattern:
i = path_pattern.index("?")
qs_pattern = path_pattern[i + 1 :]
path_pattern = path_pattern[:i]
if not _domains_matches(domain_pattern, parsed_url.netloc):
continue continue
if path_pattern and not _path_matches(path_pattern, parsed_url.path): if parsed_pattern.path and not _path_matches(
parsed_pattern.path, parsed_url.path
):
continue continue
if qs_pattern and not _qs_matches(qs_pattern, parsed_url.query): if parsed_pattern.query and not _qs_matches(
parsed_pattern.query, parsed_url.query
):
continue continue
for tag in parts[1:]: for tag in parts[1:]:

View file

@ -245,12 +245,18 @@ def _update_bookmark_tags(bookmark: Bookmark, tag_string: str, user: User):
tag_names = parse_tag_string(tag_string) tag_names = parse_tag_string(tag_string)
if user.profile.auto_tagging_rules: if user.profile.auto_tagging_rules:
auto_tag_names = auto_tagging.get_tags( try:
user.profile.auto_tagging_rules, bookmark.url auto_tag_names = auto_tagging.get_tags(
) user.profile.auto_tagging_rules, bookmark.url
for auto_tag_name in auto_tag_names: )
if auto_tag_name not in tag_names: for auto_tag_name in auto_tag_names:
tag_names.append(auto_tag_name) if auto_tag_name not in tag_names:
tag_names.append(auto_tag_name)
except Exception as e:
logger.error(
f"Failed to auto-tag bookmark. url={bookmark.url}",
exc_info=e,
)
tags = get_or_create_tags(tag_names, user) tags = get_or_create_tags(tag_names, user)
bookmark.tags.set(tags) bookmark.tags.set(tags)

View file

@ -12,7 +12,18 @@ class AutoTaggingTestCase(TestCase):
tags = auto_tagging.get_tags(script, url) tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["example"])) self.assertEqual(tags, {"example"})
def test_auto_tag_by_domain_works_with_port(self):
script = """
example.com example
test.com test
"""
url = "https://example.com:8080/"
tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, {"example"})
def test_auto_tag_by_domain_ignores_case(self): def test_auto_tag_by_domain_ignores_case(self):
script = """ script = """
@ -22,7 +33,7 @@ class AutoTaggingTestCase(TestCase):
tags = auto_tagging.get_tags(script, url) tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["example"])) self.assertEqual(tags, {"example"})
def test_auto_tag_by_domain_should_add_all_tags(self): def test_auto_tag_by_domain_should_add_all_tags(self):
script = """ script = """
@ -32,7 +43,7 @@ class AutoTaggingTestCase(TestCase):
tags = auto_tagging.get_tags(script, url) tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["one", "two", "three"])) self.assertEqual(tags, {"one", "two", "three"})
def test_auto_tag_by_domain_work_with_idn_domains(self): def test_auto_tag_by_domain_work_with_idn_domains(self):
script = """ script = """
@ -42,7 +53,7 @@ class AutoTaggingTestCase(TestCase):
tags = auto_tagging.get_tags(script, url) tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["tag1"])) self.assertEqual(tags, {"tag1"})
script = """ script = """
xn--81bg3cc2b2bk5hb.xn--h2brj9c tag1 xn--81bg3cc2b2bk5hb.xn--h2brj9c tag1
@ -51,7 +62,7 @@ class AutoTaggingTestCase(TestCase):
tags = auto_tagging.get_tags(script, url) tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["tag1"])) self.assertEqual(tags, {"tag1"})
def test_auto_tag_by_domain_and_path(self): def test_auto_tag_by_domain_and_path(self):
script = """ script = """
@ -63,7 +74,7 @@ class AutoTaggingTestCase(TestCase):
tags = auto_tagging.get_tags(script, url) tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["one"])) self.assertEqual(tags, {"one"})
def test_auto_tag_by_domain_and_path_ignores_case(self): def test_auto_tag_by_domain_and_path_ignores_case(self):
script = """ script = """
@ -73,7 +84,7 @@ class AutoTaggingTestCase(TestCase):
tags = auto_tagging.get_tags(script, url) tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["one"])) self.assertEqual(tags, {"one"})
def test_auto_tag_by_domain_and_path_matches_path_ltr(self): def test_auto_tag_by_domain_and_path_matches_path_ltr(self):
script = """ script = """
@ -85,7 +96,7 @@ class AutoTaggingTestCase(TestCase):
tags = auto_tagging.get_tags(script, url) tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["one"])) self.assertEqual(tags, {"one"})
def test_auto_tag_by_domain_ignores_domain_in_path(self): def test_auto_tag_by_domain_ignores_domain_in_path(self):
script = """ script = """
@ -107,7 +118,7 @@ class AutoTaggingTestCase(TestCase):
tags = auto_tagging.get_tags(script, url) tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["example", "test"])) self.assertEqual(tags, {"example", "test"})
def test_auto_tag_by_domain_matches_domain_rtl(self): def test_auto_tag_by_domain_matches_domain_rtl(self):
script = """ script = """
@ -128,7 +139,7 @@ class AutoTaggingTestCase(TestCase):
tags = auto_tagging.get_tags(script, url) tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["https", "http"])) self.assertEqual(tags, {"https", "http"})
def test_auto_tag_by_domain_ignores_lines_with_no_tags(self): def test_auto_tag_by_domain_ignores_lines_with_no_tags(self):
script = """ script = """
@ -154,7 +165,7 @@ class AutoTaggingTestCase(TestCase):
tags = auto_tagging.get_tags(script, url) tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["tag1", "tag2", "tag5", "tag6", "tag7"])) self.assertEqual(tags, {"tag1", "tag2", "tag5", "tag6", "tag7"})
def test_auto_tag_by_domain_path_and_qs_with_empty_value(self): def test_auto_tag_by_domain_path_and_qs_with_empty_value(self):
script = """ script = """
@ -165,7 +176,7 @@ class AutoTaggingTestCase(TestCase):
tags = auto_tagging.get_tags(script, url) tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["tag1"])) self.assertEqual(tags, {"tag1"})
def test_auto_tag_by_domain_path_and_qs_works_with_encoded_url(self): def test_auto_tag_by_domain_path_and_qs_works_with_encoded_url(self):
script = """ script = """
@ -176,4 +187,4 @@ class AutoTaggingTestCase(TestCase):
tags = auto_tagging.get_tags(script, url) tags = auto_tagging.get_tags(script, url)
self.assertEqual(tags, set(["tag1", "tag2"])) self.assertEqual(tags, {"tag1", "tag2"})