Improve bookmark query performance (#334)

* Remove tag projection from bookmark queries

* add feeds performance test
This commit is contained in:
Sascha Ißbrücker 2022-09-09 19:46:55 +02:00 committed by GitHub
parent a30571ac99
commit 6420ec173a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 151 additions and 50 deletions

View file

@ -1,4 +1,6 @@
from django.db.models import prefetch_related_objects
from rest_framework import serializers
from rest_framework.serializers import ListSerializer
from bookmarks.models import Bookmark, Tag, build_tag_string
from bookmarks.services.bookmarks import create_bookmark, update_bookmark
@ -9,6 +11,14 @@ class TagListField(serializers.ListField):
child = serializers.CharField()
class BookmarkListSerializer(ListSerializer):
def to_representation(self, data):
# Prefetch nested relations to avoid n+1 queries
prefetch_related_objects(data, 'tags')
return super().to_representation(data)
class BookmarkSerializer(serializers.ModelSerializer):
class Meta:
model = Bookmark
@ -32,6 +42,7 @@ class BookmarkSerializer(serializers.ModelSerializer):
'date_added',
'date_modified'
]
list_serializer_class = BookmarkListSerializer
# Override optional char fields to provide default value
title = serializers.CharField(required=False, allow_blank=True, default='')

View file

@ -62,11 +62,6 @@ class Bookmark(models.Model):
owner = models.ForeignKey(get_user_model(), on_delete=models.CASCADE)
tags = models.ManyToManyField(Tag)
# Attributes might be calculated in query
tag_count = 0 # Projection for number of associated tags
tag_string = '' # Projection for list of tag names, comma-separated
tag_projection = False # Tracks if the above projections were loaded
@property
def resolved_title(self):
if self.title:
@ -82,11 +77,7 @@ class Bookmark(models.Model):
@property
def tag_names(self):
# If tag projections were loaded then avoid querying all tags (=executing further selects)
if self.tag_projection:
return parse_tag_string(self.tag_string)
else:
return [tag.name for tag in self.tags.all()]
return [tag.name for tag in self.tags.all()]
def __str__(self):
return self.resolved_title + ' (' + self.url[:30] + '...)'

View file

@ -1,24 +1,12 @@
from typing import Optional
from django.contrib.auth.models import User
from django.db.models import Q, Count, Aggregate, CharField, Value, BooleanField, QuerySet
from django.db.models import Q, QuerySet
from bookmarks.models import Bookmark, Tag
from bookmarks.utils import unique
class Concat(Aggregate):
function = 'GROUP_CONCAT'
template = '%(function)s(%(distinct)s%(expressions)s)'
def __init__(self, expression, distinct=False, **extra):
super(Concat, self).__init__(
expression,
distinct='DISTINCT ' if distinct else '',
output_field=CharField(),
**extra)
def query_bookmarks(user: User, query_string: str) -> QuerySet:
return _base_bookmarks_query(user, query_string) \
.filter(is_archived=False)
@ -36,11 +24,7 @@ def query_shared_bookmarks(user: Optional[User], query_string: str) -> QuerySet:
def _base_bookmarks_query(user: Optional[User], query_string: str) -> QuerySet:
# Add aggregated tag info to bookmark instances
query_set = Bookmark.objects \
.annotate(tag_count=Count('tags'),
tag_string=Concat('tags__name'),
tag_projection=Value(True, BooleanField()))
query_set = Bookmark.objects
# Filter for user
if user:

View file

@ -0,0 +1,64 @@
from django.db import connections
from django.db.utils import DEFAULT_DB_ALIAS
from django.test.utils import CaptureQueriesContext
from django.urls import reverse
from rest_framework import status
from rest_framework.authtoken.models import Token
from bookmarks.tests.helpers import LinkdingApiTestCase, BookmarkFactoryMixin
class BookmarksApiPerformanceTestCase(LinkdingApiTestCase, BookmarkFactoryMixin):
def setUp(self) -> None:
self.api_token = Token.objects.get_or_create(user=self.get_or_create_test_user())[0]
self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.api_token.key)
def get_connection(self):
return connections[DEFAULT_DB_ALIAS]
def test_list_bookmarks_max_queries(self):
# set up some bookmarks with associated tags
num_initial_bookmarks = 10
for index in range(num_initial_bookmarks):
self.setup_bookmark(tags=[self.setup_tag()])
# capture number of queries
context = CaptureQueriesContext(self.get_connection())
with context:
self.get(reverse('bookmarks:bookmark-list'), expected_status_code=status.HTTP_200_OK)
number_of_queries = context.final_queries
self.assertLess(number_of_queries, num_initial_bookmarks)
def test_list_archived_bookmarks_max_queries(self):
# set up some bookmarks with associated tags
num_initial_bookmarks = 10
for index in range(num_initial_bookmarks):
self.setup_bookmark(is_archived=True, tags=[self.setup_tag()])
# capture number of queries
context = CaptureQueriesContext(self.get_connection())
with context:
self.get(reverse('bookmarks:bookmark-archived'), expected_status_code=status.HTTP_200_OK)
number_of_queries = context.final_queries
self.assertLess(number_of_queries, num_initial_bookmarks)
def test_list_shared_bookmarks_max_queries(self):
# set up some bookmarks with associated tags
share_user = self.setup_user(enable_sharing=True)
num_initial_bookmarks = 10
for index in range(num_initial_bookmarks):
self.setup_bookmark(user=share_user, shared=True, tags=[self.setup_tag()])
# capture number of queries
context = CaptureQueriesContext(self.get_connection())
with context:
self.get(reverse('bookmarks:bookmark-shared'), expected_status_code=status.HTTP_200_OK)
number_of_queries = context.final_queries
self.assertLess(number_of_queries, num_initial_bookmarks)

View file

@ -0,0 +1,32 @@
from django.db import connections
from django.db.utils import DEFAULT_DB_ALIAS
from django.test import TestCase
from django.test.utils import CaptureQueriesContext
from django.urls import reverse
from bookmarks.tests.helpers import BookmarkFactoryMixin
class ExporterPerformanceTestCase(TestCase, BookmarkFactoryMixin):
def setUp(self) -> None:
user = self.get_or_create_test_user()
self.client.force_login(user)
def get_connection(self):
return connections[DEFAULT_DB_ALIAS]
def test_export_max_queries(self):
# set up some bookmarks with associated tags
num_initial_bookmarks = 10
for index in range(num_initial_bookmarks):
self.setup_bookmark(tags=[self.setup_tag()])
# capture number of queries
context = CaptureQueriesContext(self.get_connection())
with context:
self.client.get(reverse('bookmarks:settings.export'),follow=True)
number_of_queries = context.final_queries
self.assertLess(number_of_queries, num_initial_bookmarks)

View file

@ -0,0 +1,35 @@
from django.db import connections
from django.db.utils import DEFAULT_DB_ALIAS
from django.test import TestCase
from django.test.utils import CaptureQueriesContext
from django.urls import reverse
from bookmarks.models import FeedToken
from bookmarks.tests.helpers import BookmarkFactoryMixin
class FeedsPerformanceTestCase(TestCase, BookmarkFactoryMixin):
def setUp(self) -> None:
user = self.get_or_create_test_user()
self.client.force_login(user)
self.token = FeedToken.objects.get_or_create(user=user)[0]
def get_connection(self):
return connections[DEFAULT_DB_ALIAS]
def test_all_max_queries(self):
# set up some bookmarks with associated tags
num_initial_bookmarks = 10
for index in range(num_initial_bookmarks):
self.setup_bookmark(tags=[self.setup_tag()])
# capture number of queries
context = CaptureQueriesContext(self.get_connection())
with context:
feed_url = reverse('bookmarks:feeds.all', args=[self.token.key])
self.client.get(feed_url)
number_of_queries = context.final_queries
self.assertLess(number_of_queries, num_initial_bookmarks)

View file

@ -270,25 +270,6 @@ class QueriesTestCase(TestCase, BookmarkFactoryMixin):
self.assertQueryResult(query, [owned_bookmarks])
def test_query_bookmarks_should_use_tag_projection(self):
self.setup_bookmark_search_data()
# Test projection on bookmarks with tags
query = queries.query_bookmarks(self.user, '#tag1 #tag2')
for bookmark in query:
self.assertEqual(bookmark.tag_count, 2)
self.assertEqual(bookmark.tag_string, 'tag1,tag2')
self.assertTrue(bookmark.tag_projection)
# Test projection on bookmarks without tags
query = queries.query_bookmarks(self.user, 'term2')
for bookmark in query:
self.assertEqual(bookmark.tag_count, 0)
self.assertEqual(bookmark.tag_string, None)
self.assertTrue(bookmark.tag_projection)
def test_query_bookmarks_untagged_should_return_untagged_bookmarks_only(self):
tag = self.setup_tag()
untagged_bookmark = self.setup_bookmark()

View file

@ -73,8 +73,8 @@ def get_bookmark_view_context(request: WSGIRequest,
paginator = Paginator(query_set, _default_page_size)
bookmarks = paginator.get_page(page)
selected_tags = _get_selected_tags(tags, filters.query)
# Prefetch owner relation, this avoids n+1 queries when using the owner in templates
prefetch_related_objects(bookmarks.object_list, 'owner')
# Prefetch related objects, this avoids n+1 queries when accessing fields in templates
prefetch_related_objects(bookmarks.object_list, 'owner', 'tags')
return_url = generate_return_url(base_url, page, filters)
link_target = request.user.profile.bookmark_link_target

View file

@ -5,6 +5,7 @@ from functools import lru_cache
import requests
from django.contrib import messages
from django.contrib.auth.decorators import login_required
from django.db.models import prefetch_related_objects
from django.http import HttpResponseRedirect, HttpResponse
from django.shortcuts import render
from django.urls import reverse
@ -114,7 +115,9 @@ def bookmark_import(request):
def bookmark_export(request):
# noinspection PyBroadException
try:
bookmarks = query_bookmarks(request.user, '')
bookmarks = list(query_bookmarks(request.user, ''))
# Prefetch tags to prevent n+1 queries
prefetch_related_objects(bookmarks, 'tags')
file_content = exporter.export_netscape_html(bookmarks)
response = HttpResponse(content_type='text/plain; charset=UTF-8')