Replace django-background-tasks with huey (#657)

* Replace django-background-tasks with huey

* Add command for migrating tasks

* Add custom admin view

* fix dockerfile

* fix tests

* fix tests in CI

* fix task tests

* implement retries

* improve config

* workaround to avoid running singlefile tasks in parallel

* properly kill single-file sub-processes

* use period task for HTML snapshots

* clear task lock when starting task consumer

* remove obsolete cleanup task command
This commit is contained in:
Sascha Ißbrücker 2024-04-07 11:11:14 +02:00 committed by GitHub
parent 68c163d943
commit a6f35119cd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 566 additions and 403 deletions

View file

@ -5,7 +5,6 @@
!/bookmarks !/bookmarks
!/siteroot !/siteroot
!/background-tasks-wrapper.sh
!/bootstrap.sh !/bootstrap.sh
!/LICENSE.txt !/LICENSE.txt
!/manage.py !/manage.py

View file

@ -24,7 +24,9 @@ jobs:
- name: Install Node dependencies - name: Install Node dependencies
run: npm ci run: npm ci
- name: Setup Python environment - name: Setup Python environment
run: pip install -r requirements.txt -r requirements.dev.txt run: |
pip install -r requirements.txt -r requirements.dev.txt
mkdir data
- name: Run tests - name: Run tests
run: python manage.py test bookmarks.tests run: python manage.py test bookmarks.tests
e2e_tests: e2e_tests:
@ -47,6 +49,7 @@ jobs:
run: | run: |
pip install -r requirements.txt -r requirements.dev.txt pip install -r requirements.txt -r requirements.dev.txt
playwright install chromium playwright install chromium
mkdir data
- name: Run build - name: Run build
run: | run: |
npm run build npm run build

View file

@ -1,5 +0,0 @@
#!/usr/bin/env bash
# Wrapper script used by supervisord to first clear task locks before starting the background task processor
python manage.py clean_tasks
exec python manage.py process_tasks

View file

@ -1,22 +1,96 @@
from background_task.admin import TaskAdmin, CompletedTaskAdmin
from background_task.models import Task, CompletedTask
from django.contrib import admin, messages from django.contrib import admin, messages
from django.contrib.admin import AdminSite from django.contrib.admin import AdminSite
from django.contrib.auth.admin import UserAdmin from django.contrib.auth.admin import UserAdmin
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.core.paginator import Paginator
from django.db.models import Count, QuerySet from django.db.models import Count, QuerySet
from django.shortcuts import render
from django.urls import path
from django.utils.translation import ngettext, gettext from django.utils.translation import ngettext, gettext
from huey.contrib.djhuey import HUEY as huey
from rest_framework.authtoken.admin import TokenAdmin from rest_framework.authtoken.admin import TokenAdmin
from rest_framework.authtoken.models import TokenProxy from rest_framework.authtoken.models import TokenProxy
from bookmarks.models import Bookmark, Tag, UserProfile, Toast, FeedToken from bookmarks.models import Bookmark, BookmarkAsset, Tag, UserProfile, Toast, FeedToken
from bookmarks.services.bookmarks import archive_bookmark, unarchive_bookmark from bookmarks.services.bookmarks import archive_bookmark, unarchive_bookmark
# Custom paginator to paginate through Huey tasks
class TaskPaginator(Paginator):
def __init__(self):
super().__init__(self, 100)
self.task_count = huey.storage.queue_size()
@property
def count(self):
return self.task_count
def page(self, number):
limit = self.per_page
offset = (number - 1) * self.per_page
return self._get_page(
self.enqueued_items(limit, offset),
number,
self,
)
# Copied from Huey's SqliteStorage with some modifications to allow pagination
def enqueued_items(self, limit, offset):
to_bytes = lambda b: bytes(b) if not isinstance(b, bytes) else b
sql = "select data from task where queue=? order by priority desc, id limit ? offset ?"
params = (huey.storage.name, limit, offset)
serialized_tasks = [
to_bytes(i) for i, in huey.storage.sql(sql, params, results=True)
]
return [huey.deserialize_task(task) for task in serialized_tasks]
# Custom view to display Huey tasks in the admin
def background_task_view(request):
page_number = int(request.GET.get("p", 1))
paginator = TaskPaginator()
page = paginator.get_page(page_number)
page_range = paginator.get_elided_page_range(page_number, on_each_side=2, on_ends=2)
context = {
**linkding_admin_site.each_context(request),
"title": "Background tasks",
"page": page,
"page_range": page_range,
"tasks": page.object_list,
}
return render(request, "admin/background_tasks.html", context)
class LinkdingAdminSite(AdminSite): class LinkdingAdminSite(AdminSite):
site_header = "linkding administration" site_header = "linkding administration"
site_title = "linkding Admin" site_title = "linkding Admin"
def get_urls(self):
urls = super().get_urls()
custom_urls = [
path("tasks/", background_task_view, name="background_tasks"),
]
return custom_urls + urls
def get_app_list(self, request, app_label=None):
app_list = super().get_app_list(request, app_label)
app_list += [
{
"name": "Huey",
"app_label": "huey_app",
"models": [
{
"name": "Queued tasks",
"object_name": "background_tasks",
"admin_url": "/admin/tasks/",
"view_only": True,
}
],
}
]
return app_list
class AdminBookmark(admin.ModelAdmin): class AdminBookmark(admin.ModelAdmin):
list_display = ("resolved_title", "url", "is_archived", "owner", "date_added") list_display = ("resolved_title", "url", "is_archived", "owner", "date_added")
@ -125,6 +199,15 @@ class AdminBookmark(admin.ModelAdmin):
) )
class AdminBookmarkAsset(admin.ModelAdmin):
list_display = ("display_name", "date_created", "status")
search_fields = (
"display_name",
"file",
)
list_filter = ("status",)
class AdminTag(admin.ModelAdmin): class AdminTag(admin.ModelAdmin):
list_display = ("name", "bookmarks_count", "owner", "date_added") list_display = ("name", "bookmarks_count", "owner", "date_added")
search_fields = ("name", "owner__username") search_fields = ("name", "owner__username")
@ -200,10 +283,9 @@ class AdminFeedToken(admin.ModelAdmin):
linkding_admin_site = LinkdingAdminSite() linkding_admin_site = LinkdingAdminSite()
linkding_admin_site.register(Bookmark, AdminBookmark) linkding_admin_site.register(Bookmark, AdminBookmark)
linkding_admin_site.register(BookmarkAsset, AdminBookmarkAsset)
linkding_admin_site.register(Tag, AdminTag) linkding_admin_site.register(Tag, AdminTag)
linkding_admin_site.register(User, AdminCustomUser) linkding_admin_site.register(User, AdminCustomUser)
linkding_admin_site.register(TokenProxy, TokenAdmin) linkding_admin_site.register(TokenProxy, TokenAdmin)
linkding_admin_site.register(Toast, AdminToast) linkding_admin_site.register(Toast, AdminToast)
linkding_admin_site.register(FeedToken, AdminFeedToken) linkding_admin_site.register(FeedToken, AdminFeedToken)
linkding_admin_site.register(Task, TaskAdmin)
linkding_admin_site.register(CompletedTask, CompletedTaskAdmin)

View file

@ -1,15 +0,0 @@
from background_task.models import Task, CompletedTask
from django.core.management.base import BaseCommand
class Command(BaseCommand):
help = "Remove task locks and clear completed task history"
def handle(self, *args, **options):
# Remove task locks
# If the background task processor exited while executing tasks, these tasks would still be marked as locked,
# even though no process is working on them, and would prevent the task processor from picking the next task in
# the queue
Task.objects.all().update(locked_by=None, locked_at=None)
# Clear task history to prevent them from bloating the DB
CompletedTask.objects.all().delete()

View file

@ -0,0 +1,75 @@
import json
import os
import sqlite3
import importlib
from django.core.management.base import BaseCommand
class Command(BaseCommand):
help = "Migrate tasks from django-background-tasks to Huey"
def handle(self, *args, **options):
db = sqlite3.connect(os.path.join("data", "db.sqlite3"))
# Check if background_task table exists
cursor = db.cursor()
cursor.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='background_task'"
)
row = cursor.fetchone()
if not row:
self.stdout.write(
"Legacy task table does not exist. Skipping task migration"
)
return
# Load legacy tasks
cursor.execute("SELECT id, task_name, task_params FROM background_task")
legacy_tasks = cursor.fetchall()
if len(legacy_tasks) == 0:
self.stdout.write("No legacy tasks found. Skipping task migration")
return
self.stdout.write(
f"Found {len(legacy_tasks)} legacy tasks. Migrating to Huey..."
)
# Migrate tasks to Huey
succeeded_tasks = []
for task in legacy_tasks:
task_id = task[0]
task_name = task[1]
task_params_json = task[2]
try:
task_params = json.loads(task_params_json)
function_params = task_params[0]
# Resolve task function
module_name, func_name = task_name.rsplit(".", 1)
module = importlib.import_module(module_name)
func = getattr(module, func_name)
# Call task function
func(*function_params)
succeeded_tasks.append(task_id)
except Exception:
self.stderr.write(f"Error migrating task [{task_id}] {task_name}")
self.stdout.write(f"Migrated {len(succeeded_tasks)} tasks successfully")
# Clean up
try:
placeholders = ", ".join("?" for _ in succeeded_tasks)
sql = f"DELETE FROM background_task WHERE id IN ({placeholders})"
cursor.execute(sql, succeeded_tasks)
db.commit()
self.stdout.write(
f"Deleted {len(succeeded_tasks)} migrated tasks from legacy table"
)
except Exception:
self.stderr.write("Error cleaning up legacy tasks")
cursor.close()
db.close()

View file

@ -1,6 +1,8 @@
import gzip import gzip
import logging
import os import os
import shutil import shutil
import signal
import subprocess import subprocess
from django.conf import settings from django.conf import settings
@ -10,16 +12,21 @@ class SingeFileError(Exception):
pass pass
logger = logging.getLogger(__name__)
def create_snapshot(url: str, filepath: str): def create_snapshot(url: str, filepath: str):
singlefile_path = settings.LD_SINGLEFILE_PATH singlefile_path = settings.LD_SINGLEFILE_PATH
singlefile_options = settings.LD_SINGLEFILE_OPTIONS # singlefile_options = settings.LD_SINGLEFILE_OPTIONS
temp_filepath = filepath + ".tmp" temp_filepath = filepath + ".tmp"
args = [singlefile_path, url, temp_filepath]
try: try:
command = f"{singlefile_path} '{url}' {singlefile_options} {temp_filepath}" # Use start_new_session=True to create a new process group
subprocess.run(command, check=True, shell=True) process = subprocess.Popen(args, start_new_session=True)
process.wait(timeout=60)
# single-file doesn't return exit codes apparently, so check if the file was created # check if the file was created
if not os.path.exists(temp_filepath): if not os.path.exists(temp_filepath):
raise SingeFileError("Failed to create snapshot") raise SingeFileError("Failed to create snapshot")
@ -29,5 +36,20 @@ def create_snapshot(url: str, filepath: str):
shutil.copyfileobj(raw_file, gz_file) shutil.copyfileobj(raw_file, gz_file)
os.remove(temp_filepath) os.remove(temp_filepath)
except subprocess.TimeoutExpired:
# First try to terminate properly
try:
logger.error(
"Timeout expired while creating snapshot. Terminating process..."
)
process.terminate()
process.wait(timeout=20)
raise SingeFileError("Timeout expired while creating snapshot")
except subprocess.TimeoutExpired:
# Kill the whole process group, which should also clean up any chromium
# processes spawned by single-file
logger.error("Timeout expired while terminating. Killing process...")
os.killpg(os.getpgid(process.pid), signal.SIGTERM)
raise SingeFileError("Timeout expired while creating snapshot")
except subprocess.CalledProcessError as error: except subprocess.CalledProcessError as error:
raise SingeFileError(f"Failed to create snapshot: {error.stderr}") raise SingeFileError(f"Failed to create snapshot: {error.stderr}")

View file

@ -1,14 +1,16 @@
import functools
import logging import logging
import os import os
import waybackpy import waybackpy
from background_task import background
from background_task.models import Task
from django.conf import settings from django.conf import settings
from django.contrib.auth import get_user_model from django.contrib.auth import get_user_model
from django.contrib.auth.models import User from django.contrib.auth.models import User
from waybackpy.exceptions import WaybackError, TooManyRequestsError, NoCDXRecordFound
from django.utils import timezone, formats from django.utils import timezone, formats
from huey import crontab
from huey.contrib.djhuey import HUEY as huey
from huey.exceptions import TaskLockedException
from waybackpy.exceptions import WaybackError, TooManyRequestsError, NoCDXRecordFound
import bookmarks.services.wayback import bookmarks.services.wayback
from bookmarks.models import Bookmark, BookmarkAsset, UserProfile from bookmarks.models import Bookmark, BookmarkAsset, UserProfile
@ -18,6 +20,35 @@ from bookmarks.services.website_loader import DEFAULT_USER_AGENT
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Create custom decorator for Huey tasks that implements exponential backoff
# Taken from: https://huey.readthedocs.io/en/latest/guide.html#tips-and-tricks
# Retry 1: 60
# Retry 2: 240
# Retry 3: 960
# Retry 4: 3840
# Retry 5: 15360
def task(retries=5, retry_delay=15, retry_backoff=4):
def deco(fn):
@functools.wraps(fn)
def inner(*args, **kwargs):
task = kwargs.pop("task")
try:
return fn(*args, **kwargs)
except TaskLockedException as exc:
# Task locks are currently only used as workaround to enforce
# running specific types of tasks (e.g. singlefile snapshots)
# sequentially. In that case don't reduce the number of retries.
task.retries = retries
raise exc
except Exception as exc:
task.retry_delay *= retry_backoff
raise exc
return huey.task(retries=retries, retry_delay=retry_delay, context=True)(inner)
return deco
def is_web_archive_integration_active(user: User) -> bool: def is_web_archive_integration_active(user: User) -> bool:
background_tasks_enabled = not settings.LD_DISABLE_BACKGROUND_TASKS background_tasks_enabled = not settings.LD_DISABLE_BACKGROUND_TASKS
web_archive_integration_enabled = ( web_archive_integration_enabled = (
@ -67,7 +98,7 @@ def _create_snapshot(bookmark: Bookmark):
logger.info(f"Successfully created new snapshot for bookmark:. url={bookmark.url}") logger.info(f"Successfully created new snapshot for bookmark:. url={bookmark.url}")
@background() @task()
def _create_web_archive_snapshot_task(bookmark_id: int, force_update: bool): def _create_web_archive_snapshot_task(bookmark_id: int, force_update: bool):
try: try:
bookmark = Bookmark.objects.get(id=bookmark_id) bookmark = Bookmark.objects.get(id=bookmark_id)
@ -96,7 +127,7 @@ def _create_web_archive_snapshot_task(bookmark_id: int, force_update: bool):
_load_newest_snapshot(bookmark) _load_newest_snapshot(bookmark)
@background() @task()
def _load_web_archive_snapshot_task(bookmark_id: int): def _load_web_archive_snapshot_task(bookmark_id: int):
try: try:
bookmark = Bookmark.objects.get(id=bookmark_id) bookmark = Bookmark.objects.get(id=bookmark_id)
@ -114,13 +145,14 @@ def schedule_bookmarks_without_snapshots(user: User):
_schedule_bookmarks_without_snapshots_task(user.id) _schedule_bookmarks_without_snapshots_task(user.id)
@background() @task()
def _schedule_bookmarks_without_snapshots_task(user_id: int): def _schedule_bookmarks_without_snapshots_task(user_id: int):
user = get_user_model().objects.get(id=user_id) user = get_user_model().objects.get(id=user_id)
bookmarks_without_snapshots = Bookmark.objects.filter( bookmarks_without_snapshots = Bookmark.objects.filter(
web_archive_snapshot_url__exact="", owner=user web_archive_snapshot_url__exact="", owner=user
) )
# TODO: Implement bulk task creation
for bookmark in bookmarks_without_snapshots: for bookmark in bookmarks_without_snapshots:
# To prevent rate limit errors from the Wayback API only try to load the latest snapshots instead of creating # To prevent rate limit errors from the Wayback API only try to load the latest snapshots instead of creating
# new ones when processing bookmarks in bulk # new ones when processing bookmarks in bulk
@ -138,7 +170,7 @@ def load_favicon(user: User, bookmark: Bookmark):
_load_favicon_task(bookmark.id) _load_favicon_task(bookmark.id)
@background() @task()
def _load_favicon_task(bookmark_id: int): def _load_favicon_task(bookmark_id: int):
try: try:
bookmark = Bookmark.objects.get(id=bookmark_id) bookmark = Bookmark.objects.get(id=bookmark_id)
@ -162,19 +194,15 @@ def schedule_bookmarks_without_favicons(user: User):
_schedule_bookmarks_without_favicons_task(user.id) _schedule_bookmarks_without_favicons_task(user.id)
@background() @task()
def _schedule_bookmarks_without_favicons_task(user_id: int): def _schedule_bookmarks_without_favicons_task(user_id: int):
user = get_user_model().objects.get(id=user_id) user = get_user_model().objects.get(id=user_id)
bookmarks = Bookmark.objects.filter(favicon_file__exact="", owner=user) bookmarks = Bookmark.objects.filter(favicon_file__exact="", owner=user)
tasks = []
# TODO: Implement bulk task creation
for bookmark in bookmarks: for bookmark in bookmarks:
task = Task.objects.new_task( _load_favicon_task(bookmark.id)
task_name="bookmarks.services.tasks._load_favicon_task", args=(bookmark.id,) pass
)
tasks.append(task)
Task.objects.bulk_create(tasks)
def schedule_refresh_favicons(user: User): def schedule_refresh_favicons(user: User):
@ -182,19 +210,14 @@ def schedule_refresh_favicons(user: User):
_schedule_refresh_favicons_task(user.id) _schedule_refresh_favicons_task(user.id)
@background() @task()
def _schedule_refresh_favicons_task(user_id: int): def _schedule_refresh_favicons_task(user_id: int):
user = get_user_model().objects.get(id=user_id) user = get_user_model().objects.get(id=user_id)
bookmarks = Bookmark.objects.filter(owner=user) bookmarks = Bookmark.objects.filter(owner=user)
tasks = []
# TODO: Implement bulk task creation
for bookmark in bookmarks: for bookmark in bookmarks:
task = Task.objects.new_task( _load_favicon_task(bookmark.id)
task_name="bookmarks.services.tasks._load_favicon_task", args=(bookmark.id,)
)
tasks.append(task)
Task.objects.bulk_create(tasks)
def is_html_snapshot_feature_active() -> bool: def is_html_snapshot_feature_active() -> bool:
@ -214,7 +237,6 @@ def create_html_snapshot(bookmark: Bookmark):
status=BookmarkAsset.STATUS_PENDING, status=BookmarkAsset.STATUS_PENDING,
) )
asset.save() asset.save()
_create_html_snapshot_task(asset.id)
def _generate_snapshot_filename(asset: BookmarkAsset) -> str: def _generate_snapshot_filename(asset: BookmarkAsset) -> str:
@ -230,7 +252,23 @@ def _generate_snapshot_filename(asset: BookmarkAsset) -> str:
return f"{asset.asset_type}_{formatted_datetime}_{sanitized_url}.html.gz" return f"{asset.asset_type}_{formatted_datetime}_{sanitized_url}.html.gz"
@background() # singe-file does not support running multiple instances in parallel, so we can
# not queue up multiple snapshot tasks at once. Instead, schedule a periodic
# task that grabs a number of pending assets and creates snapshots for them in
# sequence. The task uses a lock to ensure that a new task isn't scheduled
# before the previous one has finished.
@huey.periodic_task(crontab(minute="*"))
@huey.lock_task("schedule-html-snapshots-lock")
def _schedule_html_snapshots_task():
# Get five pending assets
assets = BookmarkAsset.objects.filter(status=BookmarkAsset.STATUS_PENDING).order_by(
"date_created"
)[:5]
for asset in assets:
_create_html_snapshot_task(asset.id)
def _create_html_snapshot_task(asset_id: int): def _create_html_snapshot_task(asset_id: int):
try: try:
asset = BookmarkAsset.objects.get(id=asset_id) asset = BookmarkAsset.objects.get(id=asset_id)
@ -246,13 +284,14 @@ def _create_html_snapshot_task(asset_id: int):
asset.status = BookmarkAsset.STATUS_COMPLETE asset.status = BookmarkAsset.STATUS_COMPLETE
asset.file = filename asset.file = filename
asset.gzip = True asset.gzip = True
asset.save()
logger.info( logger.info(
f"Successfully created HTML snapshot for bookmark. url={asset.bookmark.url}" f"Successfully created HTML snapshot for bookmark. url={asset.bookmark.url}"
) )
except singlefile.SingeFileError as error: except Exception as error:
asset.status = BookmarkAsset.STATUS_FAILURE
logger.error( logger.error(
f"Failed to create HTML snapshot for bookmark. url={asset.bookmark.url}", f"Failed to HTML snapshot for bookmark. url={asset.bookmark.url}",
exc_info=error, exc_info=error,
) )
asset.status = BookmarkAsset.STATUS_FAILURE
asset.save() asset.save()

2
bookmarks/tasks.py Normal file
View file

@ -0,0 +1,2 @@
# Expose task modules to Huey Django extension
import bookmarks.services.tasks

View file

@ -0,0 +1,39 @@
{% extends "admin/base_site.html" %}
{% block content %}
<table style="width: 100%">
<thead>
<tr>
<th>ID</th>
<th>Name</th>
<th>Args</th>
<th>Retries</th>
</tr>
</thead>
<tbody>
{% for task in tasks %}
<tr>
<td>{{ task.id }}</td>
<td>{{ task.name }}</td>
<td>{{ task.args }}</td>
<td>{{ task.retries }}</td>
</tr>
{% endfor %}
</tbody>
</table>
<p class="paginator">
{% if page.paginator.num_pages > 1 %}
{% for page_number in page_range %}
{% if page_number == page.number %}
<span class="this-page">{{ page_number }}</span>
{% elif page_number == '…' %}
<span></span>
{% else %}
<a href="?p={{ page_number }}">{{ page_number }}</a>
{% endif %}
{% endfor %}
&nbsp;
{% endif %}
{{ page.paginator.count }} tasks
</p>
{% endblock %}

View file

@ -776,6 +776,4 @@ class BookmarkDetailsModalTestCase(TestCase, BookmarkFactoryMixin, HtmlTestMixin
) )
self.assertEqual(response.status_code, 302) self.assertEqual(response.status_code, 302)
mock_create_html_snapshot_task.assert_called_with(bookmark.id)
self.assertEqual(bookmark.bookmarkasset_set.count(), 1) self.assertEqual(bookmark.bookmarkasset_set.count(), 1)

View file

@ -1,21 +1,20 @@
import datetime import datetime
import os.path import os.path
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any
from unittest import mock from unittest import mock
import waybackpy import waybackpy
from background_task.models import Task
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.test import TestCase, override_settings from django.test import TestCase, override_settings
from huey.contrib.djhuey import HUEY as huey
from waybackpy.exceptions import WaybackError from waybackpy.exceptions import WaybackError
import bookmarks.services.favicon_loader import bookmarks.services.favicon_loader
import bookmarks.services.wayback import bookmarks.services.wayback
from bookmarks.models import BookmarkAsset, UserProfile from bookmarks.models import BookmarkAsset, UserProfile
from bookmarks.services import tasks, singlefile from bookmarks.services import tasks, singlefile
from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging from bookmarks.tests.helpers import BookmarkFactoryMixin
def create_wayback_machine_save_api_mock( def create_wayback_machine_save_api_mock(
@ -36,27 +35,45 @@ class MockCdxSnapshot:
datetime_timestamp: datetime.datetime datetime_timestamp: datetime.datetime
def create_cdx_server_api_mock(
archive_url: str | None = "https://example.com/newest_snapshot",
fail_loading_snapshot=False,
):
mock_api = mock.Mock()
if fail_loading_snapshot:
mock_api.newest.side_effect = WaybackError
elif archive_url:
mock_api.newest.return_value = MockCdxSnapshot(
archive_url, datetime.datetime.now()
)
else:
mock_api.newest.return_value = None
return mock_api
class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
def setUp(self): def setUp(self):
huey.immediate = True
huey.results = True
huey.store_none = True
self.mock_save_api = mock.Mock(
archive_url="https://example.com/created_snapshot"
)
self.mock_save_api_patcher = mock.patch.object(
waybackpy, "WaybackMachineSaveAPI", return_value=self.mock_save_api
)
self.mock_save_api_patcher.start()
self.mock_cdx_api = mock.Mock()
self.mock_cdx_api.newest.return_value = MockCdxSnapshot(
"https://example.com/newest_snapshot", datetime.datetime.now()
)
self.mock_cdx_api_patcher = mock.patch.object(
bookmarks.services.wayback,
"CustomWaybackMachineCDXServerAPI",
return_value=self.mock_cdx_api,
)
self.mock_cdx_api_patcher.start()
self.mock_load_favicon_patcher = mock.patch(
"bookmarks.services.favicon_loader.load_favicon"
)
self.mock_load_favicon = self.mock_load_favicon_patcher.start()
self.mock_load_favicon.return_value = "https_example_com.png"
self.mock_singlefile_create_snapshot_patcher = mock.patch(
"bookmarks.services.singlefile.create_snapshot",
)
self.mock_singlefile_create_snapshot = (
self.mock_singlefile_create_snapshot_patcher.start()
)
user = self.get_or_create_test_user() user = self.get_or_create_test_user()
user.profile.web_archive_integration = ( user.profile.web_archive_integration = (
UserProfile.WEB_ARCHIVE_INTEGRATION_ENABLED UserProfile.WEB_ARCHIVE_INTEGRATION_ENABLED
@ -64,107 +81,69 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
user.profile.enable_favicons = True user.profile.enable_favicons = True
user.profile.save() user.profile.save()
@disable_logging def tearDown(self):
def run_pending_task(self, task_function: Any): self.mock_save_api_patcher.stop()
func = getattr(task_function, "task_function", None) self.mock_cdx_api_patcher.stop()
task = Task.objects.all()[0] self.mock_load_favicon_patcher.stop()
self.assertEqual(task_function.name, task.task_name) self.mock_singlefile_create_snapshot_patcher.stop()
args, kwargs = task.params() huey.storage.flush_results()
func(*args, **kwargs) huey.immediate = False
task.delete()
@disable_logging def executed_count(self):
def run_all_pending_tasks(self, task_function: Any): return len(huey.all_results())
func = getattr(task_function, "task_function", None)
tasks = Task.objects.all()
for task in tasks:
self.assertEqual(task_function.name, task.task_name)
args, kwargs = task.params()
func(*args, **kwargs)
task.delete()
def test_create_web_archive_snapshot_should_update_snapshot_url(self): def test_create_web_archive_snapshot_should_update_snapshot_url(self):
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()
mock_save_api = create_wayback_machine_save_api_mock()
with mock.patch.object(
waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api
):
tasks.create_web_archive_snapshot( tasks.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, False self.get_or_create_test_user(), bookmark, False
) )
self.run_pending_task(tasks._create_web_archive_snapshot_task)
bookmark.refresh_from_db() bookmark.refresh_from_db()
mock_save_api.save.assert_called_once() self.mock_save_api.save.assert_called_once()
self.assertEqual(self.executed_count(), 1)
self.assertEqual( self.assertEqual(
bookmark.web_archive_snapshot_url, bookmark.web_archive_snapshot_url,
"https://example.com/created_snapshot", "https://example.com/created_snapshot",
) )
def test_create_web_archive_snapshot_should_handle_missing_bookmark_id(self): def test_create_web_archive_snapshot_should_handle_missing_bookmark_id(self):
mock_save_api = create_wayback_machine_save_api_mock()
with mock.patch.object(
waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api
):
tasks._create_web_archive_snapshot_task(123, False) tasks._create_web_archive_snapshot_task(123, False)
self.run_pending_task(tasks._create_web_archive_snapshot_task)
mock_save_api.save.assert_not_called() self.assertEqual(self.executed_count(), 1)
self.mock_save_api.save.assert_not_called()
def test_create_web_archive_snapshot_should_skip_if_snapshot_exists(self): def test_create_web_archive_snapshot_should_skip_if_snapshot_exists(self):
bookmark = self.setup_bookmark(web_archive_snapshot_url="https://example.com") bookmark = self.setup_bookmark(web_archive_snapshot_url="https://example.com")
mock_save_api = create_wayback_machine_save_api_mock()
with mock.patch.object( self.mock_save_api.create_web_archive_snapshot(
waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api
):
tasks.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, False self.get_or_create_test_user(), bookmark, False
) )
self.run_pending_task(tasks._create_web_archive_snapshot_task)
mock_save_api.assert_not_called() self.assertEqual(self.executed_count(), 0)
self.mock_save_api.assert_not_called()
def test_create_web_archive_snapshot_should_force_update_snapshot(self): def test_create_web_archive_snapshot_should_force_update_snapshot(self):
bookmark = self.setup_bookmark(web_archive_snapshot_url="https://example.com") bookmark = self.setup_bookmark(web_archive_snapshot_url="https://example.com")
mock_save_api = create_wayback_machine_save_api_mock( self.mock_save_api.archive_url = "https://other.com"
archive_url="https://other.com"
)
with mock.patch.object(
waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api
):
tasks.create_web_archive_snapshot( tasks.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, True self.get_or_create_test_user(), bookmark, True
) )
self.run_pending_task(tasks._create_web_archive_snapshot_task)
bookmark.refresh_from_db() bookmark.refresh_from_db()
self.assertEqual(bookmark.web_archive_snapshot_url, "https://other.com") self.assertEqual(bookmark.web_archive_snapshot_url, "https://other.com")
def test_create_web_archive_snapshot_should_use_newest_snapshot_as_fallback(self): def test_create_web_archive_snapshot_should_use_newest_snapshot_as_fallback(self):
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()
mock_save_api = create_wayback_machine_save_api_mock(fail_on_save=True) self.mock_save_api.save.side_effect = WaybackError
mock_cdx_api = create_cdx_server_api_mock()
with mock.patch.object(
waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api
):
with mock.patch.object(
bookmarks.services.wayback,
"CustomWaybackMachineCDXServerAPI",
return_value=mock_cdx_api,
):
tasks.create_web_archive_snapshot( tasks.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, False self.get_or_create_test_user(), bookmark, False
) )
self.run_pending_task(tasks._create_web_archive_snapshot_task)
bookmark.refresh_from_db() bookmark.refresh_from_db()
mock_cdx_api.newest.assert_called_once() self.mock_cdx_api.newest.assert_called_once()
self.assertEqual( self.assertEqual(
"https://example.com/newest_snapshot", "https://example.com/newest_snapshot",
bookmark.web_archive_snapshot_url, bookmark.web_archive_snapshot_url,
@ -172,49 +151,30 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
def test_create_web_archive_snapshot_should_ignore_missing_newest_snapshot(self): def test_create_web_archive_snapshot_should_ignore_missing_newest_snapshot(self):
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()
mock_save_api = create_wayback_machine_save_api_mock(fail_on_save=True) self.mock_save_api.save.side_effect = WaybackError
mock_cdx_api = create_cdx_server_api_mock(archive_url=None) self.mock_cdx_api.newest.return_value = None
with mock.patch.object(
waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api
):
with mock.patch.object(
bookmarks.services.wayback,
"CustomWaybackMachineCDXServerAPI",
return_value=mock_cdx_api,
):
tasks.create_web_archive_snapshot( tasks.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, False self.get_or_create_test_user(), bookmark, False
) )
self.run_pending_task(tasks._create_web_archive_snapshot_task)
bookmark.refresh_from_db() bookmark.refresh_from_db()
self.assertEqual("", bookmark.web_archive_snapshot_url) self.assertEqual("", bookmark.web_archive_snapshot_url)
def test_create_web_archive_snapshot_should_ignore_newest_snapshot_errors(self): def test_create_web_archive_snapshot_should_ignore_newest_snapshot_errors(self):
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()
mock_save_api = create_wayback_machine_save_api_mock(fail_on_save=True) self.mock_save_api.save.side_effect = WaybackError
mock_cdx_api = create_cdx_server_api_mock(fail_loading_snapshot=True) self.mock_cdx_api.newest.side_effect = WaybackError
with mock.patch.object(
waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api
):
with mock.patch.object(
bookmarks.services.wayback,
"CustomWaybackMachineCDXServerAPI",
return_value=mock_cdx_api,
):
tasks.create_web_archive_snapshot( tasks.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, False self.get_or_create_test_user(), bookmark, False
) )
self.run_pending_task(tasks._create_web_archive_snapshot_task)
bookmark.refresh_from_db() bookmark.refresh_from_db()
self.assertEqual("", bookmark.web_archive_snapshot_url) self.assertEqual("", bookmark.web_archive_snapshot_url)
def test_create_web_archive_snapshot_should_not_save_stale_bookmark_data(self): def test_create_web_archive_snapshot_should_not_save_stale_bookmark_data(self):
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()
mock_save_api = create_wayback_machine_save_api_mock()
# update bookmark during API call to check that saving # update bookmark during API call to check that saving
# the snapshot does not overwrite updated bookmark data # the snapshot does not overwrite updated bookmark data
@ -222,15 +182,11 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
bookmark.title = "Updated title" bookmark.title = "Updated title"
bookmark.save() bookmark.save()
mock_save_api.save.side_effect = mock_save_impl self.mock_save_api.save.side_effect = mock_save_impl
with mock.patch.object(
waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api
):
tasks.create_web_archive_snapshot( tasks.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, False self.get_or_create_test_user(), bookmark, False
) )
self.run_pending_task(tasks._create_web_archive_snapshot_task)
bookmark.refresh_from_db() bookmark.refresh_from_db()
self.assertEqual(bookmark.title, "Updated title") self.assertEqual(bookmark.title, "Updated title")
@ -241,80 +197,49 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
def test_load_web_archive_snapshot_should_update_snapshot_url(self): def test_load_web_archive_snapshot_should_update_snapshot_url(self):
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()
mock_cdx_api = create_cdx_server_api_mock()
with mock.patch.object(
bookmarks.services.wayback,
"CustomWaybackMachineCDXServerAPI",
return_value=mock_cdx_api,
):
tasks._load_web_archive_snapshot_task(bookmark.id) tasks._load_web_archive_snapshot_task(bookmark.id)
self.run_pending_task(tasks._load_web_archive_snapshot_task)
bookmark.refresh_from_db() bookmark.refresh_from_db()
mock_cdx_api.newest.assert_called_once()
self.assertEqual(self.executed_count(), 1)
self.mock_cdx_api.newest.assert_called_once()
self.assertEqual( self.assertEqual(
"https://example.com/newest_snapshot", bookmark.web_archive_snapshot_url "https://example.com/newest_snapshot", bookmark.web_archive_snapshot_url
) )
def test_load_web_archive_snapshot_should_handle_missing_bookmark_id(self): def test_load_web_archive_snapshot_should_handle_missing_bookmark_id(self):
mock_cdx_api = create_cdx_server_api_mock()
with mock.patch.object(
bookmarks.services.wayback,
"CustomWaybackMachineCDXServerAPI",
return_value=mock_cdx_api,
):
tasks._load_web_archive_snapshot_task(123) tasks._load_web_archive_snapshot_task(123)
self.run_pending_task(tasks._load_web_archive_snapshot_task)
mock_cdx_api.newest.assert_not_called() self.assertEqual(self.executed_count(), 1)
self.mock_cdx_api.newest.assert_not_called()
def test_load_web_archive_snapshot_should_skip_if_snapshot_exists(self): def test_load_web_archive_snapshot_should_skip_if_snapshot_exists(self):
bookmark = self.setup_bookmark(web_archive_snapshot_url="https://example.com") bookmark = self.setup_bookmark(web_archive_snapshot_url="https://example.com")
mock_cdx_api = create_cdx_server_api_mock()
with mock.patch.object(
bookmarks.services.wayback,
"CustomWaybackMachineCDXServerAPI",
return_value=mock_cdx_api,
):
tasks._load_web_archive_snapshot_task(bookmark.id) tasks._load_web_archive_snapshot_task(bookmark.id)
self.run_pending_task(tasks._load_web_archive_snapshot_task)
mock_cdx_api.newest.assert_not_called() self.assertEqual(self.executed_count(), 1)
self.mock_cdx_api.newest.assert_not_called()
def test_load_web_archive_snapshot_should_handle_missing_snapshot(self): def test_load_web_archive_snapshot_should_handle_missing_snapshot(self):
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()
mock_cdx_api = create_cdx_server_api_mock(archive_url=None) self.mock_cdx_api.newest.return_value = None
with mock.patch.object(
bookmarks.services.wayback,
"CustomWaybackMachineCDXServerAPI",
return_value=mock_cdx_api,
):
tasks._load_web_archive_snapshot_task(bookmark.id) tasks._load_web_archive_snapshot_task(bookmark.id)
self.run_pending_task(tasks._load_web_archive_snapshot_task)
self.assertEqual("", bookmark.web_archive_snapshot_url) self.assertEqual("", bookmark.web_archive_snapshot_url)
def test_load_web_archive_snapshot_should_handle_wayback_errors(self): def test_load_web_archive_snapshot_should_handle_wayback_errors(self):
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()
mock_cdx_api = create_cdx_server_api_mock(fail_loading_snapshot=True) self.mock_cdx_api.newest.side_effect = WaybackError
with mock.patch.object(
bookmarks.services.wayback,
"CustomWaybackMachineCDXServerAPI",
return_value=mock_cdx_api,
):
tasks._load_web_archive_snapshot_task(bookmark.id) tasks._load_web_archive_snapshot_task(bookmark.id)
self.run_pending_task(tasks._load_web_archive_snapshot_task)
self.assertEqual("", bookmark.web_archive_snapshot_url) self.assertEqual("", bookmark.web_archive_snapshot_url)
def test_load_web_archive_snapshot_should_not_save_stale_bookmark_data(self): def test_load_web_archive_snapshot_should_not_save_stale_bookmark_data(self):
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()
mock_cdx_api = create_cdx_server_api_mock()
# update bookmark during API call to check that saving # update bookmark during API call to check that saving
# the snapshot does not overwrite updated bookmark data # the snapshot does not overwrite updated bookmark data
@ -323,15 +248,9 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
bookmark.save() bookmark.save()
return mock.DEFAULT return mock.DEFAULT
mock_cdx_api.newest.side_effect = mock_newest_impl self.mock_cdx_api.newest.side_effect = mock_newest_impl
with mock.patch.object(
bookmarks.services.wayback,
"CustomWaybackMachineCDXServerAPI",
return_value=mock_cdx_api,
):
tasks._load_web_archive_snapshot_task(bookmark.id) tasks._load_web_archive_snapshot_task(bookmark.id)
self.run_pending_task(tasks._load_web_archive_snapshot_task)
bookmark.refresh_from_db() bookmark.refresh_from_db()
self.assertEqual("Updated title", bookmark.title) self.assertEqual("Updated title", bookmark.title)
@ -344,11 +263,11 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self, self,
): ):
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()
tasks.create_web_archive_snapshot( tasks.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, False self.get_or_create_test_user(), bookmark, False
) )
self.assertEqual(self.executed_count(), 0)
self.assertEqual(Task.objects.count(), 0)
def test_create_web_archive_snapshot_should_not_run_when_web_archive_integration_is_disabled( def test_create_web_archive_snapshot_should_not_run_when_web_archive_integration_is_disabled(
self, self,
@ -363,7 +282,7 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.get_or_create_test_user(), bookmark, False self.get_or_create_test_user(), bookmark, False
) )
self.assertEqual(Task.objects.count(), 0) self.assertEqual(self.executed_count(), 0)
def test_schedule_bookmarks_without_snapshots_should_load_snapshot_for_all_bookmarks_without_snapshot( def test_schedule_bookmarks_without_snapshots_should_load_snapshot_for_all_bookmarks_without_snapshot(
self, self,
@ -377,16 +296,9 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.setup_bookmark(web_archive_snapshot_url="https://example.com") self.setup_bookmark(web_archive_snapshot_url="https://example.com")
tasks.schedule_bookmarks_without_snapshots(user) tasks.schedule_bookmarks_without_snapshots(user)
self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
task_list = Task.objects.all() self.assertEqual(self.executed_count(), 4)
self.assertEqual(task_list.count(), 3) self.assertEqual(self.mock_cdx_api.newest.call_count, 3)
for task in task_list:
self.assertEqual(
task.task_name,
"bookmarks.services.tasks._load_web_archive_snapshot_task",
)
def test_schedule_bookmarks_without_snapshots_should_only_update_user_owned_bookmarks( def test_schedule_bookmarks_without_snapshots_should_only_update_user_owned_bookmarks(
self, self,
@ -403,10 +315,8 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.setup_bookmark(user=other_user) self.setup_bookmark(user=other_user)
tasks.schedule_bookmarks_without_snapshots(user) tasks.schedule_bookmarks_without_snapshots(user)
self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
task_list = Task.objects.all() self.assertEqual(self.mock_cdx_api.newest.call_count, 3)
self.assertEqual(task_list.count(), 3)
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True) @override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
def test_schedule_bookmarks_without_snapshots_should_not_run_when_background_tasks_are_disabled( def test_schedule_bookmarks_without_snapshots_should_not_run_when_background_tasks_are_disabled(
@ -414,7 +324,7 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
): ):
tasks.schedule_bookmarks_without_snapshots(self.user) tasks.schedule_bookmarks_without_snapshots(self.user)
self.assertEqual(Task.objects.count(), 0) self.assertEqual(self.executed_count(), 0)
def test_schedule_bookmarks_without_snapshots_should_not_run_when_web_archive_integration_is_disabled( def test_schedule_bookmarks_without_snapshots_should_not_run_when_web_archive_integration_is_disabled(
self, self,
@ -425,44 +335,32 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.user.profile.save() self.user.profile.save()
tasks.schedule_bookmarks_without_snapshots(self.user) tasks.schedule_bookmarks_without_snapshots(self.user)
self.assertEqual(Task.objects.count(), 0) self.assertEqual(self.executed_count(), 0)
def test_load_favicon_should_create_favicon_file(self): def test_load_favicon_should_create_favicon_file(self):
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()
with mock.patch(
"bookmarks.services.favicon_loader.load_favicon"
) as mock_load_favicon:
mock_load_favicon.return_value = "https_example_com.png"
tasks.load_favicon(self.get_or_create_test_user(), bookmark) tasks.load_favicon(self.get_or_create_test_user(), bookmark)
self.run_pending_task(tasks._load_favicon_task)
bookmark.refresh_from_db() bookmark.refresh_from_db()
self.assertEqual(self.executed_count(), 1)
self.assertEqual(bookmark.favicon_file, "https_example_com.png") self.assertEqual(bookmark.favicon_file, "https_example_com.png")
def test_load_favicon_should_update_favicon_file(self): def test_load_favicon_should_update_favicon_file(self):
bookmark = self.setup_bookmark(favicon_file="https_example_com.png") bookmark = self.setup_bookmark(favicon_file="https_example_com.png")
with mock.patch( self.mock_load_favicon.return_value = "https_example_updated_com.png"
"bookmarks.services.favicon_loader.load_favicon"
) as mock_load_favicon: tasks.load_favicon(self.get_or_create_test_user(), bookmark)
mock_load_favicon.return_value = "https_example_updated_com.png"
tasks.load_favicon(self.get_or_create_test_user(), bookmark)
self.run_pending_task(tasks._load_favicon_task)
mock_load_favicon.assert_called()
bookmark.refresh_from_db() bookmark.refresh_from_db()
self.mock_load_favicon.assert_called_once()
self.assertEqual(bookmark.favicon_file, "https_example_updated_com.png") self.assertEqual(bookmark.favicon_file, "https_example_updated_com.png")
def test_load_favicon_should_handle_missing_bookmark(self): def test_load_favicon_should_handle_missing_bookmark(self):
with mock.patch(
"bookmarks.services.favicon_loader.load_favicon"
) as mock_load_favicon:
tasks._load_favicon_task(123) tasks._load_favicon_task(123)
self.run_pending_task(tasks._load_favicon_task)
mock_load_favicon.assert_not_called() self.mock_load_favicon.assert_not_called()
def test_load_favicon_should_not_save_stale_bookmark_data(self): def test_load_favicon_should_not_save_stale_bookmark_data(self):
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()
@ -474,13 +372,9 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
bookmark.save() bookmark.save()
return "https_example_com.png" return "https_example_com.png"
with mock.patch( self.mock_load_favicon.side_effect = mock_load_favicon_impl
"bookmarks.services.favicon_loader.load_favicon"
) as mock_load_favicon:
mock_load_favicon.side_effect = mock_load_favicon_impl
tasks.load_favicon(self.get_or_create_test_user(), bookmark) tasks.load_favicon(self.get_or_create_test_user(), bookmark)
self.run_pending_task(tasks._load_favicon_task)
bookmark.refresh_from_db() bookmark.refresh_from_db()
self.assertEqual(bookmark.title, "Updated title") self.assertEqual(bookmark.title, "Updated title")
@ -491,7 +385,7 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()
tasks.load_favicon(self.get_or_create_test_user(), bookmark) tasks.load_favicon(self.get_or_create_test_user(), bookmark)
self.assertEqual(Task.objects.count(), 0) self.assertEqual(self.executed_count(), 0)
def test_load_favicon_should_not_run_when_favicon_feature_is_disabled(self): def test_load_favicon_should_not_run_when_favicon_feature_is_disabled(self):
self.user.profile.enable_favicons = False self.user.profile.enable_favicons = False
@ -500,7 +394,7 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()
tasks.load_favicon(self.get_or_create_test_user(), bookmark) tasks.load_favicon(self.get_or_create_test_user(), bookmark)
self.assertEqual(Task.objects.count(), 0) self.assertEqual(self.executed_count(), 0)
def test_schedule_bookmarks_without_favicons_should_load_favicon_for_all_bookmarks_without_favicon( def test_schedule_bookmarks_without_favicons_should_load_favicon_for_all_bookmarks_without_favicon(
self, self,
@ -514,15 +408,9 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.setup_bookmark(favicon_file="https_example_com.png") self.setup_bookmark(favicon_file="https_example_com.png")
tasks.schedule_bookmarks_without_favicons(user) tasks.schedule_bookmarks_without_favicons(user)
self.run_pending_task(tasks._schedule_bookmarks_without_favicons_task)
task_list = Task.objects.all() self.assertEqual(self.executed_count(), 4)
self.assertEqual(task_list.count(), 3) self.assertEqual(self.mock_load_favicon.call_count, 3)
for task in task_list:
self.assertEqual(
task.task_name, "bookmarks.services.tasks._load_favicon_task"
)
def test_schedule_bookmarks_without_favicons_should_only_update_user_owned_bookmarks( def test_schedule_bookmarks_without_favicons_should_only_update_user_owned_bookmarks(
self, self,
@ -539,19 +427,17 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.setup_bookmark(user=other_user) self.setup_bookmark(user=other_user)
tasks.schedule_bookmarks_without_favicons(user) tasks.schedule_bookmarks_without_favicons(user)
self.run_pending_task(tasks._schedule_bookmarks_without_favicons_task)
task_list = Task.objects.all() self.assertEqual(self.mock_load_favicon.call_count, 3)
self.assertEqual(task_list.count(), 3)
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True) @override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
def test_schedule_bookmarks_without_favicons_should_not_run_when_background_tasks_are_disabled( def test_schedule_bookmarks_without_favicons_should_not_run_when_background_tasks_are_disabled(
self, self,
): ):
bookmark = self.setup_bookmark() self.setup_bookmark()
tasks.schedule_bookmarks_without_favicons(self.get_or_create_test_user()) tasks.schedule_bookmarks_without_favicons(self.get_or_create_test_user())
self.assertEqual(Task.objects.count(), 0) self.assertEqual(self.executed_count(), 0)
def test_schedule_bookmarks_without_favicons_should_not_run_when_favicon_feature_is_disabled( def test_schedule_bookmarks_without_favicons_should_not_run_when_favicon_feature_is_disabled(
self, self,
@ -559,10 +445,10 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.user.profile.enable_favicons = False self.user.profile.enable_favicons = False
self.user.profile.save() self.user.profile.save()
bookmark = self.setup_bookmark() self.setup_bookmark()
tasks.schedule_bookmarks_without_favicons(self.get_or_create_test_user()) tasks.schedule_bookmarks_without_favicons(self.get_or_create_test_user())
self.assertEqual(Task.objects.count(), 0) self.assertEqual(self.executed_count(), 0)
def test_schedule_refresh_favicons_should_update_favicon_for_all_bookmarks(self): def test_schedule_refresh_favicons_should_update_favicon_for_all_bookmarks(self):
user = self.get_or_create_test_user() user = self.get_or_create_test_user()
@ -574,15 +460,9 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.setup_bookmark(favicon_file="https_example_com.png") self.setup_bookmark(favicon_file="https_example_com.png")
tasks.schedule_refresh_favicons(user) tasks.schedule_refresh_favicons(user)
self.run_pending_task(tasks._schedule_refresh_favicons_task)
task_list = Task.objects.all() self.assertEqual(self.executed_count(), 7)
self.assertEqual(task_list.count(), 6) self.assertEqual(self.mock_load_favicon.call_count, 6)
for task in task_list:
self.assertEqual(
task.task_name, "bookmarks.services.tasks._load_favicon_task"
)
def test_schedule_refresh_favicons_should_only_update_user_owned_bookmarks(self): def test_schedule_refresh_favicons_should_only_update_user_owned_bookmarks(self):
user = self.get_or_create_test_user() user = self.get_or_create_test_user()
@ -597,10 +477,8 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.setup_bookmark(user=other_user) self.setup_bookmark(user=other_user)
tasks.schedule_refresh_favicons(user) tasks.schedule_refresh_favicons(user)
self.run_pending_task(tasks._schedule_refresh_favicons_task)
task_list = Task.objects.all() self.assertEqual(self.mock_load_favicon.call_count, 3)
self.assertEqual(task_list.count(), 3)
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True) @override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
def test_schedule_refresh_favicons_should_not_run_when_background_tasks_are_disabled( def test_schedule_refresh_favicons_should_not_run_when_background_tasks_are_disabled(
@ -609,14 +487,14 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.setup_bookmark() self.setup_bookmark()
tasks.schedule_refresh_favicons(self.get_or_create_test_user()) tasks.schedule_refresh_favicons(self.get_or_create_test_user())
self.assertEqual(Task.objects.count(), 0) self.assertEqual(self.executed_count(), 0)
@override_settings(LD_ENABLE_REFRESH_FAVICONS=False) @override_settings(LD_ENABLE_REFRESH_FAVICONS=False)
def test_schedule_refresh_favicons_should_not_run_when_refresh_is_disabled(self): def test_schedule_refresh_favicons_should_not_run_when_refresh_is_disabled(self):
self.setup_bookmark() self.setup_bookmark()
tasks.schedule_refresh_favicons(self.get_or_create_test_user()) tasks.schedule_refresh_favicons(self.get_or_create_test_user())
self.assertEqual(Task.objects.count(), 0) self.assertEqual(self.executed_count(), 0)
def test_schedule_refresh_favicons_should_not_run_when_favicon_feature_is_disabled( def test_schedule_refresh_favicons_should_not_run_when_favicon_feature_is_disabled(
self, self,
@ -627,13 +505,14 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.setup_bookmark() self.setup_bookmark()
tasks.schedule_refresh_favicons(self.get_or_create_test_user()) tasks.schedule_refresh_favicons(self.get_or_create_test_user())
self.assertEqual(Task.objects.count(), 0) self.assertEqual(self.executed_count(), 0)
@override_settings(LD_ENABLE_SNAPSHOTS=True) @override_settings(LD_ENABLE_SNAPSHOTS=True)
def test_create_html_snapshot_should_create_pending_asset(self): def test_create_html_snapshot_should_create_pending_asset(self):
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()
with mock.patch("bookmarks.services.monolith.create_snapshot"): # Mock the task function to avoid running it immediately
with mock.patch("bookmarks.services.tasks._create_html_snapshot_task"):
tasks.create_html_snapshot(bookmark) tasks.create_html_snapshot(bookmark)
self.assertEqual(BookmarkAsset.objects.count(), 1) self.assertEqual(BookmarkAsset.objects.count(), 1)
@ -652,18 +531,24 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
def test_create_html_snapshot_should_update_file_info(self): def test_create_html_snapshot_should_update_file_info(self):
bookmark = self.setup_bookmark(url="https://example.com") bookmark = self.setup_bookmark(url="https://example.com")
with mock.patch("bookmarks.services.singlefile.create_snapshot") as mock_create: with mock.patch(
tasks.create_html_snapshot(bookmark) "bookmarks.services.tasks._generate_snapshot_filename"
asset = BookmarkAsset.objects.get(bookmark=bookmark) ) as mock_generate:
asset.date_created = datetime.datetime(2021, 1, 2, 3, 44, 55)
asset.save()
expected_filename = "snapshot_2021-01-02_034455_https___example.com.html.gz" expected_filename = "snapshot_2021-01-02_034455_https___example.com.html.gz"
mock_generate.return_value = expected_filename
self.run_pending_task(tasks._create_html_snapshot_task) tasks.create_html_snapshot(bookmark)
BookmarkAsset.objects.get(bookmark=bookmark)
mock_create.assert_called_once_with( # Run periodic task to process the snapshot
tasks._schedule_html_snapshots_task()
self.mock_singlefile_create_snapshot.assert_called_once_with(
"https://example.com", "https://example.com",
os.path.join(settings.LD_ASSET_FOLDER, expected_filename), os.path.join(
settings.LD_ASSET_FOLDER,
expected_filename,
),
) )
asset = BookmarkAsset.objects.get(bookmark=bookmark) asset = BookmarkAsset.objects.get(bookmark=bookmark)
@ -675,11 +560,13 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
def test_create_html_snapshot_should_handle_error(self): def test_create_html_snapshot_should_handle_error(self):
bookmark = self.setup_bookmark(url="https://example.com") bookmark = self.setup_bookmark(url="https://example.com")
with mock.patch("bookmarks.services.singlefile.create_snapshot") as mock_create: self.mock_singlefile_create_snapshot.side_effect = singlefile.SingeFileError(
mock_create.side_effect = singlefile.SingeFileError("Error") "Error"
)
tasks.create_html_snapshot(bookmark) tasks.create_html_snapshot(bookmark)
self.run_pending_task(tasks._create_html_snapshot_task)
# Run periodic task to process the snapshot
tasks._schedule_html_snapshots_task()
asset = BookmarkAsset.objects.get(bookmark=bookmark) asset = BookmarkAsset.objects.get(bookmark=bookmark)
self.assertEqual(asset.status, BookmarkAsset.STATUS_FAILURE) self.assertEqual(asset.status, BookmarkAsset.STATUS_FAILURE)
@ -688,26 +575,24 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
@override_settings(LD_ENABLE_SNAPSHOTS=True) @override_settings(LD_ENABLE_SNAPSHOTS=True)
def test_create_html_snapshot_should_handle_missing_bookmark(self): def test_create_html_snapshot_should_handle_missing_bookmark(self):
with mock.patch("bookmarks.services.singlefile.create_snapshot") as mock_create:
tasks._create_html_snapshot_task(123) tasks._create_html_snapshot_task(123)
self.run_pending_task(tasks._create_html_snapshot_task)
mock_create.assert_not_called() self.mock_singlefile_create_snapshot.assert_not_called()
@override_settings(LD_ENABLE_SNAPSHOTS=False) @override_settings(LD_ENABLE_SNAPSHOTS=False)
def test_create_html_snapshot_should_not_run_when_single_file_is_disabled( def test_create_html_snapshot_should_not_create_asset_when_single_file_is_disabled(
self, self,
): ):
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()
tasks.create_html_snapshot(bookmark) tasks.create_html_snapshot(bookmark)
self.assertEqual(Task.objects.count(), 0) self.assertEqual(BookmarkAsset.objects.count(), 0)
@override_settings(LD_ENABLE_SNAPSHOTS=True, LD_DISABLE_BACKGROUND_TASKS=True) @override_settings(LD_ENABLE_SNAPSHOTS=True, LD_DISABLE_BACKGROUND_TASKS=True)
def test_create_html_snapshot_should_not_run_when_background_tasks_are_disabled( def test_create_html_snapshot_should_not_create_asset_when_background_tasks_are_disabled(
self, self,
): ):
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()
tasks.create_html_snapshot(bookmark) tasks.create_html_snapshot(bookmark)
self.assertEqual(Task.objects.count(), 0) self.assertEqual(BookmarkAsset.objects.count(), 0)

View file

@ -1,7 +1,7 @@
import gzip import gzip
import os import os
from unittest import mock
import subprocess import subprocess
from unittest import mock
from django.test import TestCase from django.test import TestCase
@ -24,9 +24,11 @@ class SingleFileServiceTestCase(TestCase):
file.write(self.html_content) file.write(self.html_content)
def test_create_snapshot(self): def test_create_snapshot(self):
with mock.patch("subprocess.run") as mock_run: mock_process = mock.Mock()
mock_run.side_effect = self.create_test_file mock_process.wait.return_value = 0
self.create_test_file()
with mock.patch("subprocess.Popen", return_value=mock_process):
singlefile.create_snapshot("http://example.com", self.html_filepath) singlefile.create_snapshot("http://example.com", self.html_filepath)
self.assertTrue(os.path.exists(self.html_filepath)) self.assertTrue(os.path.exists(self.html_filepath))
@ -38,13 +40,13 @@ class SingleFileServiceTestCase(TestCase):
def test_create_snapshot_failure(self): def test_create_snapshot_failure(self):
# subprocess fails - which it probably doesn't as single-file doesn't return exit codes # subprocess fails - which it probably doesn't as single-file doesn't return exit codes
with mock.patch("subprocess.run") as mock_run: with mock.patch("subprocess.Popen") as mock_popen:
mock_run.side_effect = subprocess.CalledProcessError(1, "command") mock_popen.side_effect = subprocess.CalledProcessError(1, "command")
with self.assertRaises(singlefile.SingeFileError): with self.assertRaises(singlefile.SingeFileError):
singlefile.create_snapshot("http://example.com", self.html_filepath) singlefile.create_snapshot("http://example.com", self.html_filepath)
# so also check that it raises error if output file isn't created # so also check that it raises error if output file isn't created
with mock.patch("subprocess.run") as mock_run: with mock.patch("subprocess.Popen"):
with self.assertRaises(singlefile.SingeFileError): with self.assertRaises(singlefile.SingeFileError):
singlefile.create_snapshot("http://example.com", self.html_filepath) singlefile.create_snapshot("http://example.com", self.html_filepath)

View file

@ -18,6 +18,8 @@ python manage.py migrate
python manage.py enable_wal python manage.py enable_wal
# Create initial superuser if defined in options / environment variables # Create initial superuser if defined in options / environment variables
python manage.py create_initial_superuser python manage.py create_initial_superuser
# Migrate legacy background tasks to Huey
python manage.py migrate_tasks
# Ensure the DB folder is owned by the right user # Ensure the DB folder is owned by the right user
chown -R www-data: /etc/linkding/data chown -R www-data: /etc/linkding/data

View file

@ -31,7 +31,8 @@ RUN pip install -U pip && pip install -r requirements.txt -r requirements.dev.tx
COPY . . COPY . .
COPY --from=node-build /etc/linkding . COPY --from=node-build /etc/linkding .
# run Django part of the build # run Django part of the build
RUN python manage.py compilescss && \ RUN mkdir data && \
python manage.py compilescss && \
python manage.py collectstatic --ignore=*.scss && \ python manage.py collectstatic --ignore=*.scss && \
python manage.py compilescss --delete-files python manage.py compilescss --delete-files

View file

@ -33,7 +33,8 @@ RUN pip install -U pip && pip install -r requirements.txt -r requirements.dev.tx
COPY . . COPY . .
COPY --from=node-build /etc/linkding . COPY --from=node-build /etc/linkding .
# run Django part of the build # run Django part of the build
RUN python manage.py compilescss && \ RUN mkdir data && \
python manage.py compilescss && \
python manage.py collectstatic --ignore=*.scss && \ python manage.py collectstatic --ignore=*.scss && \
python manage.py compilescss --delete-files python manage.py compilescss --delete-files

View file

@ -5,8 +5,8 @@ Django
django-registration django-registration
django-sass-processor django-sass-processor
django-widget-tweaks django-widget-tweaks
django4-background-tasks
djangorestframework djangorestframework
huey
Markdown Markdown
mozilla-django-oidc mozilla-django-oidc
psycopg2-binary psycopg2-binary

View file

@ -39,10 +39,10 @@ django-sass-processor==1.4
# via -r requirements.in # via -r requirements.in
django-widget-tweaks==1.5.0 django-widget-tweaks==1.5.0
# via -r requirements.in # via -r requirements.in
django4-background-tasks==1.2.9
# via -r requirements.in
djangorestframework==3.14.0 djangorestframework==3.14.0
# via -r requirements.in # via -r requirements.in
huey==2.5.0
# via -r requirements.in
idna==3.6 idna==3.6
# via requests # via requests
josepy==1.14.0 josepy==1.14.0
@ -69,7 +69,6 @@ requests==2.31.0
six==1.16.0 six==1.16.0
# via # via
# bleach # bleach
# django4-background-tasks
# python-dateutil # python-dateutil
soupsieve==2.5 soupsieve==2.5
# via beautifulsoup4 # via beautifulsoup4

View file

@ -42,7 +42,7 @@ INSTALLED_APPS = [
"widget_tweaks", "widget_tweaks",
"rest_framework", "rest_framework",
"rest_framework.authtoken", "rest_framework.authtoken",
"background_task", "huey.contrib.djhuey",
"mozilla_django_oidc", "mozilla_django_oidc",
] ]
@ -173,13 +173,27 @@ LD_DISABLE_BACKGROUND_TASKS = os.getenv("LD_DISABLE_BACKGROUND_TASKS", False) in
"1", "1",
) )
# django-background-tasks # Huey task queue
MAX_ATTEMPTS = 5 HUEY = {
# How many tasks will run in parallel "huey_class": "huey.SqliteHuey",
# We want to keep this low to prevent SQLite lock errors and in general not to consume too much resources on smaller "filename": os.path.join(BASE_DIR, "data", "tasks.sqlite3"),
# specced systems like Raspberries. Should be OK as tasks are not time critical. "immediate": False,
BACKGROUND_TASK_RUN_ASYNC = True "results": False,
BACKGROUND_TASK_ASYNC_THREADS = 2 "store_none": False,
"utc": True,
"consumer": {
"workers": 2,
"worker_type": "thread",
"initial_delay": 5,
"backoff": 1.15,
"max_delay": 10,
"scheduler_interval": 10,
"periodic": True,
"check_worker_health": True,
"health_check_interval": 10,
},
}
# Enable OICD support if configured # Enable OICD support if configured
LD_ENABLE_OIDC = os.getenv("LD_ENABLE_OIDC", False) in (True, "True", "1") LD_ENABLE_OIDC = os.getenv("LD_ENABLE_OIDC", False) in (True, "True", "1")

View file

@ -47,6 +47,11 @@ LOGGING = {
"handlers": ["console"], "handlers": ["console"],
"propagate": False, "propagate": False,
}, },
"huey": { # Huey
"level": "INFO",
"handlers": ["console"],
"propagate": False,
},
}, },
} }

View file

@ -49,7 +49,12 @@ LOGGING = {
"level": "INFO", "level": "INFO",
"handlers": ["console"], "handlers": ["console"],
"propagate": False, "propagate": False,
} },
"huey": {
"level": "INFO",
"handlers": ["console"],
"propagate": False,
},
}, },
} }

View file

@ -4,8 +4,18 @@ loglevel=info
[program:jobs] [program:jobs]
user=www-data user=www-data
command=sh background-tasks-wrapper.sh command=python manage.py run_huey -f
stdout_logfile=background_tasks.log stdout_logfile=background_tasks.log
stdout_logfile_maxbytes=10MB stdout_logfile_maxbytes=10MB
stdout_logfile_backups=5 stdout_logfile_backups=5
redirect_stderr=true redirect_stderr=true
[unix_http_server]
file=/var/run/supervisor.sock
chmod=0700
[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
[supervisorctl]
serverurl=unix:///var/run/supervisor.sock