From a6f35119cd2e862d8c2d7a30c830376bfd9c7366 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sascha=20I=C3=9Fbr=C3=BCcker?= Date: Sun, 7 Apr 2024 11:11:14 +0200 Subject: [PATCH] Replace django-background-tasks with huey (#657) * Replace django-background-tasks with huey * Add command for migrating tasks * Add custom admin view * fix dockerfile * fix tests * fix tests in CI * fix task tests * implement retries * improve config * workaround to avoid running singlefile tasks in parallel * properly kill single-file sub-processes * use period task for HTML snapshots * clear task lock when starting task consumer * remove obsolete cleanup task command --- .dockerignore | 1 - .github/workflows/main.yaml | 5 +- background-tasks-wrapper.sh | 5 - bookmarks/admin.py | 92 ++- bookmarks/management/commands/clean_tasks.py | 15 - .../management/commands/migrate_tasks.py | 75 +++ bookmarks/services/singlefile.py | 30 +- bookmarks/services/tasks.py | 97 +++- bookmarks/tasks.py | 2 + .../templates/admin/background_tasks.html | 39 ++ .../tests/test_bookmark_details_modal.py | 2 - bookmarks/tests/test_bookmarks_tasks.py | 523 +++++++----------- bookmarks/tests/test_singlefile_service.py | 14 +- bootstrap.sh | 2 + docker/alpine.Dockerfile | 3 +- docker/default.Dockerfile | 3 +- requirements.in | 2 +- requirements.txt | 5 +- siteroot/settings/base.py | 30 +- siteroot/settings/dev.py | 5 + siteroot/settings/prod.py | 7 +- supervisord.conf | 12 +- 22 files changed, 566 insertions(+), 403 deletions(-) delete mode 100755 background-tasks-wrapper.sh delete mode 100644 bookmarks/management/commands/clean_tasks.py create mode 100644 bookmarks/management/commands/migrate_tasks.py create mode 100644 bookmarks/tasks.py create mode 100644 bookmarks/templates/admin/background_tasks.html diff --git a/.dockerignore b/.dockerignore index 049e352..c5f38ed 100644 --- a/.dockerignore +++ b/.dockerignore @@ -5,7 +5,6 @@ !/bookmarks !/siteroot -!/background-tasks-wrapper.sh !/bootstrap.sh !/LICENSE.txt !/manage.py diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 9521808..4e6e520 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -24,7 +24,9 @@ jobs: - name: Install Node dependencies run: npm ci - name: Setup Python environment - run: pip install -r requirements.txt -r requirements.dev.txt + run: | + pip install -r requirements.txt -r requirements.dev.txt + mkdir data - name: Run tests run: python manage.py test bookmarks.tests e2e_tests: @@ -47,6 +49,7 @@ jobs: run: | pip install -r requirements.txt -r requirements.dev.txt playwright install chromium + mkdir data - name: Run build run: | npm run build diff --git a/background-tasks-wrapper.sh b/background-tasks-wrapper.sh deleted file mode 100755 index 9429544..0000000 --- a/background-tasks-wrapper.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash -# Wrapper script used by supervisord to first clear task locks before starting the background task processor - -python manage.py clean_tasks -exec python manage.py process_tasks diff --git a/bookmarks/admin.py b/bookmarks/admin.py index b736b8b..532c1a9 100644 --- a/bookmarks/admin.py +++ b/bookmarks/admin.py @@ -1,22 +1,96 @@ -from background_task.admin import TaskAdmin, CompletedTaskAdmin -from background_task.models import Task, CompletedTask from django.contrib import admin, messages from django.contrib.admin import AdminSite from django.contrib.auth.admin import UserAdmin from django.contrib.auth.models import User +from django.core.paginator import Paginator from django.db.models import Count, QuerySet +from django.shortcuts import render +from django.urls import path from django.utils.translation import ngettext, gettext +from huey.contrib.djhuey import HUEY as huey from rest_framework.authtoken.admin import TokenAdmin from rest_framework.authtoken.models import TokenProxy -from bookmarks.models import Bookmark, Tag, UserProfile, Toast, FeedToken +from bookmarks.models import Bookmark, BookmarkAsset, Tag, UserProfile, Toast, FeedToken from bookmarks.services.bookmarks import archive_bookmark, unarchive_bookmark +# Custom paginator to paginate through Huey tasks +class TaskPaginator(Paginator): + def __init__(self): + super().__init__(self, 100) + self.task_count = huey.storage.queue_size() + + @property + def count(self): + return self.task_count + + def page(self, number): + limit = self.per_page + offset = (number - 1) * self.per_page + return self._get_page( + self.enqueued_items(limit, offset), + number, + self, + ) + + # Copied from Huey's SqliteStorage with some modifications to allow pagination + def enqueued_items(self, limit, offset): + to_bytes = lambda b: bytes(b) if not isinstance(b, bytes) else b + sql = "select data from task where queue=? order by priority desc, id limit ? offset ?" + params = (huey.storage.name, limit, offset) + + serialized_tasks = [ + to_bytes(i) for i, in huey.storage.sql(sql, params, results=True) + ] + return [huey.deserialize_task(task) for task in serialized_tasks] + + +# Custom view to display Huey tasks in the admin +def background_task_view(request): + page_number = int(request.GET.get("p", 1)) + paginator = TaskPaginator() + page = paginator.get_page(page_number) + page_range = paginator.get_elided_page_range(page_number, on_each_side=2, on_ends=2) + context = { + **linkding_admin_site.each_context(request), + "title": "Background tasks", + "page": page, + "page_range": page_range, + "tasks": page.object_list, + } + return render(request, "admin/background_tasks.html", context) + + class LinkdingAdminSite(AdminSite): site_header = "linkding administration" site_title = "linkding Admin" + def get_urls(self): + urls = super().get_urls() + custom_urls = [ + path("tasks/", background_task_view, name="background_tasks"), + ] + return custom_urls + urls + + def get_app_list(self, request, app_label=None): + app_list = super().get_app_list(request, app_label) + app_list += [ + { + "name": "Huey", + "app_label": "huey_app", + "models": [ + { + "name": "Queued tasks", + "object_name": "background_tasks", + "admin_url": "/admin/tasks/", + "view_only": True, + } + ], + } + ] + return app_list + class AdminBookmark(admin.ModelAdmin): list_display = ("resolved_title", "url", "is_archived", "owner", "date_added") @@ -125,6 +199,15 @@ class AdminBookmark(admin.ModelAdmin): ) +class AdminBookmarkAsset(admin.ModelAdmin): + list_display = ("display_name", "date_created", "status") + search_fields = ( + "display_name", + "file", + ) + list_filter = ("status",) + + class AdminTag(admin.ModelAdmin): list_display = ("name", "bookmarks_count", "owner", "date_added") search_fields = ("name", "owner__username") @@ -200,10 +283,9 @@ class AdminFeedToken(admin.ModelAdmin): linkding_admin_site = LinkdingAdminSite() linkding_admin_site.register(Bookmark, AdminBookmark) +linkding_admin_site.register(BookmarkAsset, AdminBookmarkAsset) linkding_admin_site.register(Tag, AdminTag) linkding_admin_site.register(User, AdminCustomUser) linkding_admin_site.register(TokenProxy, TokenAdmin) linkding_admin_site.register(Toast, AdminToast) linkding_admin_site.register(FeedToken, AdminFeedToken) -linkding_admin_site.register(Task, TaskAdmin) -linkding_admin_site.register(CompletedTask, CompletedTaskAdmin) diff --git a/bookmarks/management/commands/clean_tasks.py b/bookmarks/management/commands/clean_tasks.py deleted file mode 100644 index 90b7710..0000000 --- a/bookmarks/management/commands/clean_tasks.py +++ /dev/null @@ -1,15 +0,0 @@ -from background_task.models import Task, CompletedTask -from django.core.management.base import BaseCommand - - -class Command(BaseCommand): - help = "Remove task locks and clear completed task history" - - def handle(self, *args, **options): - # Remove task locks - # If the background task processor exited while executing tasks, these tasks would still be marked as locked, - # even though no process is working on them, and would prevent the task processor from picking the next task in - # the queue - Task.objects.all().update(locked_by=None, locked_at=None) - # Clear task history to prevent them from bloating the DB - CompletedTask.objects.all().delete() diff --git a/bookmarks/management/commands/migrate_tasks.py b/bookmarks/management/commands/migrate_tasks.py new file mode 100644 index 0000000..06b0db2 --- /dev/null +++ b/bookmarks/management/commands/migrate_tasks.py @@ -0,0 +1,75 @@ +import json +import os +import sqlite3 +import importlib + +from django.core.management.base import BaseCommand + + +class Command(BaseCommand): + help = "Migrate tasks from django-background-tasks to Huey" + + def handle(self, *args, **options): + db = sqlite3.connect(os.path.join("data", "db.sqlite3")) + + # Check if background_task table exists + cursor = db.cursor() + cursor.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name='background_task'" + ) + row = cursor.fetchone() + if not row: + self.stdout.write( + "Legacy task table does not exist. Skipping task migration" + ) + return + + # Load legacy tasks + cursor.execute("SELECT id, task_name, task_params FROM background_task") + legacy_tasks = cursor.fetchall() + + if len(legacy_tasks) == 0: + self.stdout.write("No legacy tasks found. Skipping task migration") + return + + self.stdout.write( + f"Found {len(legacy_tasks)} legacy tasks. Migrating to Huey..." + ) + + # Migrate tasks to Huey + succeeded_tasks = [] + for task in legacy_tasks: + task_id = task[0] + task_name = task[1] + task_params_json = task[2] + try: + task_params = json.loads(task_params_json) + function_params = task_params[0] + + # Resolve task function + module_name, func_name = task_name.rsplit(".", 1) + module = importlib.import_module(module_name) + func = getattr(module, func_name) + + # Call task function + func(*function_params) + succeeded_tasks.append(task_id) + except Exception: + self.stderr.write(f"Error migrating task [{task_id}] {task_name}") + + self.stdout.write(f"Migrated {len(succeeded_tasks)} tasks successfully") + + # Clean up + try: + placeholders = ", ".join("?" for _ in succeeded_tasks) + sql = f"DELETE FROM background_task WHERE id IN ({placeholders})" + cursor.execute(sql, succeeded_tasks) + db.commit() + self.stdout.write( + f"Deleted {len(succeeded_tasks)} migrated tasks from legacy table" + ) + except Exception: + self.stderr.write("Error cleaning up legacy tasks") + + cursor.close() + db.close() diff --git a/bookmarks/services/singlefile.py b/bookmarks/services/singlefile.py index 2fbaecc..7ccc3a8 100644 --- a/bookmarks/services/singlefile.py +++ b/bookmarks/services/singlefile.py @@ -1,6 +1,8 @@ import gzip +import logging import os import shutil +import signal import subprocess from django.conf import settings @@ -10,16 +12,21 @@ class SingeFileError(Exception): pass +logger = logging.getLogger(__name__) + + def create_snapshot(url: str, filepath: str): singlefile_path = settings.LD_SINGLEFILE_PATH - singlefile_options = settings.LD_SINGLEFILE_OPTIONS + # singlefile_options = settings.LD_SINGLEFILE_OPTIONS temp_filepath = filepath + ".tmp" + args = [singlefile_path, url, temp_filepath] try: - command = f"{singlefile_path} '{url}' {singlefile_options} {temp_filepath}" - subprocess.run(command, check=True, shell=True) + # Use start_new_session=True to create a new process group + process = subprocess.Popen(args, start_new_session=True) + process.wait(timeout=60) - # single-file doesn't return exit codes apparently, so check if the file was created + # check if the file was created if not os.path.exists(temp_filepath): raise SingeFileError("Failed to create snapshot") @@ -29,5 +36,20 @@ def create_snapshot(url: str, filepath: str): shutil.copyfileobj(raw_file, gz_file) os.remove(temp_filepath) + except subprocess.TimeoutExpired: + # First try to terminate properly + try: + logger.error( + "Timeout expired while creating snapshot. Terminating process..." + ) + process.terminate() + process.wait(timeout=20) + raise SingeFileError("Timeout expired while creating snapshot") + except subprocess.TimeoutExpired: + # Kill the whole process group, which should also clean up any chromium + # processes spawned by single-file + logger.error("Timeout expired while terminating. Killing process...") + os.killpg(os.getpgid(process.pid), signal.SIGTERM) + raise SingeFileError("Timeout expired while creating snapshot") except subprocess.CalledProcessError as error: raise SingeFileError(f"Failed to create snapshot: {error.stderr}") diff --git a/bookmarks/services/tasks.py b/bookmarks/services/tasks.py index e9f1357..2f1caad 100644 --- a/bookmarks/services/tasks.py +++ b/bookmarks/services/tasks.py @@ -1,14 +1,16 @@ +import functools import logging import os import waybackpy -from background_task import background -from background_task.models import Task from django.conf import settings from django.contrib.auth import get_user_model from django.contrib.auth.models import User -from waybackpy.exceptions import WaybackError, TooManyRequestsError, NoCDXRecordFound from django.utils import timezone, formats +from huey import crontab +from huey.contrib.djhuey import HUEY as huey +from huey.exceptions import TaskLockedException +from waybackpy.exceptions import WaybackError, TooManyRequestsError, NoCDXRecordFound import bookmarks.services.wayback from bookmarks.models import Bookmark, BookmarkAsset, UserProfile @@ -18,6 +20,35 @@ from bookmarks.services.website_loader import DEFAULT_USER_AGENT logger = logging.getLogger(__name__) +# Create custom decorator for Huey tasks that implements exponential backoff +# Taken from: https://huey.readthedocs.io/en/latest/guide.html#tips-and-tricks +# Retry 1: 60 +# Retry 2: 240 +# Retry 3: 960 +# Retry 4: 3840 +# Retry 5: 15360 +def task(retries=5, retry_delay=15, retry_backoff=4): + def deco(fn): + @functools.wraps(fn) + def inner(*args, **kwargs): + task = kwargs.pop("task") + try: + return fn(*args, **kwargs) + except TaskLockedException as exc: + # Task locks are currently only used as workaround to enforce + # running specific types of tasks (e.g. singlefile snapshots) + # sequentially. In that case don't reduce the number of retries. + task.retries = retries + raise exc + except Exception as exc: + task.retry_delay *= retry_backoff + raise exc + + return huey.task(retries=retries, retry_delay=retry_delay, context=True)(inner) + + return deco + + def is_web_archive_integration_active(user: User) -> bool: background_tasks_enabled = not settings.LD_DISABLE_BACKGROUND_TASKS web_archive_integration_enabled = ( @@ -67,7 +98,7 @@ def _create_snapshot(bookmark: Bookmark): logger.info(f"Successfully created new snapshot for bookmark:. url={bookmark.url}") -@background() +@task() def _create_web_archive_snapshot_task(bookmark_id: int, force_update: bool): try: bookmark = Bookmark.objects.get(id=bookmark_id) @@ -96,7 +127,7 @@ def _create_web_archive_snapshot_task(bookmark_id: int, force_update: bool): _load_newest_snapshot(bookmark) -@background() +@task() def _load_web_archive_snapshot_task(bookmark_id: int): try: bookmark = Bookmark.objects.get(id=bookmark_id) @@ -114,13 +145,14 @@ def schedule_bookmarks_without_snapshots(user: User): _schedule_bookmarks_without_snapshots_task(user.id) -@background() +@task() def _schedule_bookmarks_without_snapshots_task(user_id: int): user = get_user_model().objects.get(id=user_id) bookmarks_without_snapshots = Bookmark.objects.filter( web_archive_snapshot_url__exact="", owner=user ) + # TODO: Implement bulk task creation for bookmark in bookmarks_without_snapshots: # To prevent rate limit errors from the Wayback API only try to load the latest snapshots instead of creating # new ones when processing bookmarks in bulk @@ -138,7 +170,7 @@ def load_favicon(user: User, bookmark: Bookmark): _load_favicon_task(bookmark.id) -@background() +@task() def _load_favicon_task(bookmark_id: int): try: bookmark = Bookmark.objects.get(id=bookmark_id) @@ -162,19 +194,15 @@ def schedule_bookmarks_without_favicons(user: User): _schedule_bookmarks_without_favicons_task(user.id) -@background() +@task() def _schedule_bookmarks_without_favicons_task(user_id: int): user = get_user_model().objects.get(id=user_id) bookmarks = Bookmark.objects.filter(favicon_file__exact="", owner=user) - tasks = [] + # TODO: Implement bulk task creation for bookmark in bookmarks: - task = Task.objects.new_task( - task_name="bookmarks.services.tasks._load_favicon_task", args=(bookmark.id,) - ) - tasks.append(task) - - Task.objects.bulk_create(tasks) + _load_favicon_task(bookmark.id) + pass def schedule_refresh_favicons(user: User): @@ -182,19 +210,14 @@ def schedule_refresh_favicons(user: User): _schedule_refresh_favicons_task(user.id) -@background() +@task() def _schedule_refresh_favicons_task(user_id: int): user = get_user_model().objects.get(id=user_id) bookmarks = Bookmark.objects.filter(owner=user) - tasks = [] + # TODO: Implement bulk task creation for bookmark in bookmarks: - task = Task.objects.new_task( - task_name="bookmarks.services.tasks._load_favicon_task", args=(bookmark.id,) - ) - tasks.append(task) - - Task.objects.bulk_create(tasks) + _load_favicon_task(bookmark.id) def is_html_snapshot_feature_active() -> bool: @@ -214,7 +237,6 @@ def create_html_snapshot(bookmark: Bookmark): status=BookmarkAsset.STATUS_PENDING, ) asset.save() - _create_html_snapshot_task(asset.id) def _generate_snapshot_filename(asset: BookmarkAsset) -> str: @@ -230,7 +252,23 @@ def _generate_snapshot_filename(asset: BookmarkAsset) -> str: return f"{asset.asset_type}_{formatted_datetime}_{sanitized_url}.html.gz" -@background() +# singe-file does not support running multiple instances in parallel, so we can +# not queue up multiple snapshot tasks at once. Instead, schedule a periodic +# task that grabs a number of pending assets and creates snapshots for them in +# sequence. The task uses a lock to ensure that a new task isn't scheduled +# before the previous one has finished. +@huey.periodic_task(crontab(minute="*")) +@huey.lock_task("schedule-html-snapshots-lock") +def _schedule_html_snapshots_task(): + # Get five pending assets + assets = BookmarkAsset.objects.filter(status=BookmarkAsset.STATUS_PENDING).order_by( + "date_created" + )[:5] + + for asset in assets: + _create_html_snapshot_task(asset.id) + + def _create_html_snapshot_task(asset_id: int): try: asset = BookmarkAsset.objects.get(id=asset_id) @@ -246,13 +284,14 @@ def _create_html_snapshot_task(asset_id: int): asset.status = BookmarkAsset.STATUS_COMPLETE asset.file = filename asset.gzip = True + asset.save() logger.info( f"Successfully created HTML snapshot for bookmark. url={asset.bookmark.url}" ) - except singlefile.SingeFileError as error: - asset.status = BookmarkAsset.STATUS_FAILURE + except Exception as error: logger.error( - f"Failed to create HTML snapshot for bookmark. url={asset.bookmark.url}", + f"Failed to HTML snapshot for bookmark. url={asset.bookmark.url}", exc_info=error, ) - asset.save() + asset.status = BookmarkAsset.STATUS_FAILURE + asset.save() diff --git a/bookmarks/tasks.py b/bookmarks/tasks.py new file mode 100644 index 0000000..8fcc442 --- /dev/null +++ b/bookmarks/tasks.py @@ -0,0 +1,2 @@ +# Expose task modules to Huey Django extension +import bookmarks.services.tasks diff --git a/bookmarks/templates/admin/background_tasks.html b/bookmarks/templates/admin/background_tasks.html new file mode 100644 index 0000000..a0f341a --- /dev/null +++ b/bookmarks/templates/admin/background_tasks.html @@ -0,0 +1,39 @@ +{% extends "admin/base_site.html" %} + +{% block content %} + + + + + + + + + + + {% for task in tasks %} + + + + + + + {% endfor %} + +
IDNameArgsRetries
{{ task.id }}{{ task.name }}{{ task.args }}{{ task.retries }}
+

+ {% if page.paginator.num_pages > 1 %} + {% for page_number in page_range %} + {% if page_number == page.number %} + {{ page_number }} + {% elif page_number == '…' %} + + {% else %} + {{ page_number }} + {% endif %} + {% endfor %} +   + {% endif %} + {{ page.paginator.count }} tasks +

+{% endblock %} diff --git a/bookmarks/tests/test_bookmark_details_modal.py b/bookmarks/tests/test_bookmark_details_modal.py index 9704591..2b18435 100644 --- a/bookmarks/tests/test_bookmark_details_modal.py +++ b/bookmarks/tests/test_bookmark_details_modal.py @@ -776,6 +776,4 @@ class BookmarkDetailsModalTestCase(TestCase, BookmarkFactoryMixin, HtmlTestMixin ) self.assertEqual(response.status_code, 302) - mock_create_html_snapshot_task.assert_called_with(bookmark.id) - self.assertEqual(bookmark.bookmarkasset_set.count(), 1) diff --git a/bookmarks/tests/test_bookmarks_tasks.py b/bookmarks/tests/test_bookmarks_tasks.py index b1ace6b..e36183e 100644 --- a/bookmarks/tests/test_bookmarks_tasks.py +++ b/bookmarks/tests/test_bookmarks_tasks.py @@ -1,21 +1,20 @@ import datetime import os.path from dataclasses import dataclass -from typing import Any from unittest import mock import waybackpy -from background_task.models import Task from django.conf import settings from django.contrib.auth.models import User from django.test import TestCase, override_settings +from huey.contrib.djhuey import HUEY as huey from waybackpy.exceptions import WaybackError import bookmarks.services.favicon_loader import bookmarks.services.wayback from bookmarks.models import BookmarkAsset, UserProfile from bookmarks.services import tasks, singlefile -from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging +from bookmarks.tests.helpers import BookmarkFactoryMixin def create_wayback_machine_save_api_mock( @@ -36,27 +35,45 @@ class MockCdxSnapshot: datetime_timestamp: datetime.datetime -def create_cdx_server_api_mock( - archive_url: str | None = "https://example.com/newest_snapshot", - fail_loading_snapshot=False, -): - mock_api = mock.Mock() - - if fail_loading_snapshot: - mock_api.newest.side_effect = WaybackError - elif archive_url: - mock_api.newest.return_value = MockCdxSnapshot( - archive_url, datetime.datetime.now() - ) - else: - mock_api.newest.return_value = None - - return mock_api - - class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): def setUp(self): + huey.immediate = True + huey.results = True + huey.store_none = True + + self.mock_save_api = mock.Mock( + archive_url="https://example.com/created_snapshot" + ) + self.mock_save_api_patcher = mock.patch.object( + waybackpy, "WaybackMachineSaveAPI", return_value=self.mock_save_api + ) + self.mock_save_api_patcher.start() + + self.mock_cdx_api = mock.Mock() + self.mock_cdx_api.newest.return_value = MockCdxSnapshot( + "https://example.com/newest_snapshot", datetime.datetime.now() + ) + self.mock_cdx_api_patcher = mock.patch.object( + bookmarks.services.wayback, + "CustomWaybackMachineCDXServerAPI", + return_value=self.mock_cdx_api, + ) + self.mock_cdx_api_patcher.start() + + self.mock_load_favicon_patcher = mock.patch( + "bookmarks.services.favicon_loader.load_favicon" + ) + self.mock_load_favicon = self.mock_load_favicon_patcher.start() + self.mock_load_favicon.return_value = "https_example_com.png" + + self.mock_singlefile_create_snapshot_patcher = mock.patch( + "bookmarks.services.singlefile.create_snapshot", + ) + self.mock_singlefile_create_snapshot = ( + self.mock_singlefile_create_snapshot_patcher.start() + ) + user = self.get_or_create_test_user() user.profile.web_archive_integration = ( UserProfile.WEB_ARCHIVE_INTEGRATION_ENABLED @@ -64,157 +81,100 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): user.profile.enable_favicons = True user.profile.save() - @disable_logging - def run_pending_task(self, task_function: Any): - func = getattr(task_function, "task_function", None) - task = Task.objects.all()[0] - self.assertEqual(task_function.name, task.task_name) - args, kwargs = task.params() - func(*args, **kwargs) - task.delete() + def tearDown(self): + self.mock_save_api_patcher.stop() + self.mock_cdx_api_patcher.stop() + self.mock_load_favicon_patcher.stop() + self.mock_singlefile_create_snapshot_patcher.stop() + huey.storage.flush_results() + huey.immediate = False - @disable_logging - def run_all_pending_tasks(self, task_function: Any): - func = getattr(task_function, "task_function", None) - tasks = Task.objects.all() - - for task in tasks: - self.assertEqual(task_function.name, task.task_name) - args, kwargs = task.params() - func(*args, **kwargs) - task.delete() + def executed_count(self): + return len(huey.all_results()) def test_create_web_archive_snapshot_should_update_snapshot_url(self): bookmark = self.setup_bookmark() - mock_save_api = create_wayback_machine_save_api_mock() - with mock.patch.object( - waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api - ): - tasks.create_web_archive_snapshot( - self.get_or_create_test_user(), bookmark, False - ) - self.run_pending_task(tasks._create_web_archive_snapshot_task) - bookmark.refresh_from_db() + tasks.create_web_archive_snapshot( + self.get_or_create_test_user(), bookmark, False + ) + bookmark.refresh_from_db() - mock_save_api.save.assert_called_once() - self.assertEqual( - bookmark.web_archive_snapshot_url, - "https://example.com/created_snapshot", - ) + self.mock_save_api.save.assert_called_once() + self.assertEqual(self.executed_count(), 1) + self.assertEqual( + bookmark.web_archive_snapshot_url, + "https://example.com/created_snapshot", + ) def test_create_web_archive_snapshot_should_handle_missing_bookmark_id(self): - mock_save_api = create_wayback_machine_save_api_mock() + tasks._create_web_archive_snapshot_task(123, False) - with mock.patch.object( - waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api - ): - tasks._create_web_archive_snapshot_task(123, False) - self.run_pending_task(tasks._create_web_archive_snapshot_task) - - mock_save_api.save.assert_not_called() + self.assertEqual(self.executed_count(), 1) + self.mock_save_api.save.assert_not_called() def test_create_web_archive_snapshot_should_skip_if_snapshot_exists(self): bookmark = self.setup_bookmark(web_archive_snapshot_url="https://example.com") - mock_save_api = create_wayback_machine_save_api_mock() - with mock.patch.object( - waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api - ): - tasks.create_web_archive_snapshot( - self.get_or_create_test_user(), bookmark, False - ) - self.run_pending_task(tasks._create_web_archive_snapshot_task) + self.mock_save_api.create_web_archive_snapshot( + self.get_or_create_test_user(), bookmark, False + ) - mock_save_api.assert_not_called() + self.assertEqual(self.executed_count(), 0) + self.mock_save_api.assert_not_called() def test_create_web_archive_snapshot_should_force_update_snapshot(self): bookmark = self.setup_bookmark(web_archive_snapshot_url="https://example.com") - mock_save_api = create_wayback_machine_save_api_mock( - archive_url="https://other.com" + self.mock_save_api.archive_url = "https://other.com" + + tasks.create_web_archive_snapshot( + self.get_or_create_test_user(), bookmark, True ) + bookmark.refresh_from_db() - with mock.patch.object( - waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api - ): - tasks.create_web_archive_snapshot( - self.get_or_create_test_user(), bookmark, True - ) - self.run_pending_task(tasks._create_web_archive_snapshot_task) - bookmark.refresh_from_db() - - self.assertEqual(bookmark.web_archive_snapshot_url, "https://other.com") + self.assertEqual(bookmark.web_archive_snapshot_url, "https://other.com") def test_create_web_archive_snapshot_should_use_newest_snapshot_as_fallback(self): bookmark = self.setup_bookmark() - mock_save_api = create_wayback_machine_save_api_mock(fail_on_save=True) - mock_cdx_api = create_cdx_server_api_mock() + self.mock_save_api.save.side_effect = WaybackError - with mock.patch.object( - waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api - ): - with mock.patch.object( - bookmarks.services.wayback, - "CustomWaybackMachineCDXServerAPI", - return_value=mock_cdx_api, - ): - tasks.create_web_archive_snapshot( - self.get_or_create_test_user(), bookmark, False - ) - self.run_pending_task(tasks._create_web_archive_snapshot_task) + tasks.create_web_archive_snapshot( + self.get_or_create_test_user(), bookmark, False + ) - bookmark.refresh_from_db() - mock_cdx_api.newest.assert_called_once() - self.assertEqual( - "https://example.com/newest_snapshot", - bookmark.web_archive_snapshot_url, - ) + bookmark.refresh_from_db() + self.mock_cdx_api.newest.assert_called_once() + self.assertEqual( + "https://example.com/newest_snapshot", + bookmark.web_archive_snapshot_url, + ) def test_create_web_archive_snapshot_should_ignore_missing_newest_snapshot(self): bookmark = self.setup_bookmark() - mock_save_api = create_wayback_machine_save_api_mock(fail_on_save=True) - mock_cdx_api = create_cdx_server_api_mock(archive_url=None) + self.mock_save_api.save.side_effect = WaybackError + self.mock_cdx_api.newest.return_value = None - with mock.patch.object( - waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api - ): - with mock.patch.object( - bookmarks.services.wayback, - "CustomWaybackMachineCDXServerAPI", - return_value=mock_cdx_api, - ): - tasks.create_web_archive_snapshot( - self.get_or_create_test_user(), bookmark, False - ) - self.run_pending_task(tasks._create_web_archive_snapshot_task) + tasks.create_web_archive_snapshot( + self.get_or_create_test_user(), bookmark, False + ) - bookmark.refresh_from_db() - self.assertEqual("", bookmark.web_archive_snapshot_url) + bookmark.refresh_from_db() + self.assertEqual("", bookmark.web_archive_snapshot_url) def test_create_web_archive_snapshot_should_ignore_newest_snapshot_errors(self): bookmark = self.setup_bookmark() - mock_save_api = create_wayback_machine_save_api_mock(fail_on_save=True) - mock_cdx_api = create_cdx_server_api_mock(fail_loading_snapshot=True) + self.mock_save_api.save.side_effect = WaybackError + self.mock_cdx_api.newest.side_effect = WaybackError - with mock.patch.object( - waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api - ): - with mock.patch.object( - bookmarks.services.wayback, - "CustomWaybackMachineCDXServerAPI", - return_value=mock_cdx_api, - ): - tasks.create_web_archive_snapshot( - self.get_or_create_test_user(), bookmark, False - ) - self.run_pending_task(tasks._create_web_archive_snapshot_task) + tasks.create_web_archive_snapshot( + self.get_or_create_test_user(), bookmark, False + ) - bookmark.refresh_from_db() - self.assertEqual("", bookmark.web_archive_snapshot_url) + bookmark.refresh_from_db() + self.assertEqual("", bookmark.web_archive_snapshot_url) def test_create_web_archive_snapshot_should_not_save_stale_bookmark_data(self): bookmark = self.setup_bookmark() - mock_save_api = create_wayback_machine_save_api_mock() # update bookmark during API call to check that saving # the snapshot does not overwrite updated bookmark data @@ -222,99 +182,64 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): bookmark.title = "Updated title" bookmark.save() - mock_save_api.save.side_effect = mock_save_impl + self.mock_save_api.save.side_effect = mock_save_impl - with mock.patch.object( - waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api - ): - tasks.create_web_archive_snapshot( - self.get_or_create_test_user(), bookmark, False - ) - self.run_pending_task(tasks._create_web_archive_snapshot_task) - bookmark.refresh_from_db() + tasks.create_web_archive_snapshot( + self.get_or_create_test_user(), bookmark, False + ) + bookmark.refresh_from_db() - self.assertEqual(bookmark.title, "Updated title") - self.assertEqual( - "https://example.com/created_snapshot", - bookmark.web_archive_snapshot_url, - ) + self.assertEqual(bookmark.title, "Updated title") + self.assertEqual( + "https://example.com/created_snapshot", + bookmark.web_archive_snapshot_url, + ) def test_load_web_archive_snapshot_should_update_snapshot_url(self): bookmark = self.setup_bookmark() - mock_cdx_api = create_cdx_server_api_mock() - with mock.patch.object( - bookmarks.services.wayback, - "CustomWaybackMachineCDXServerAPI", - return_value=mock_cdx_api, - ): - tasks._load_web_archive_snapshot_task(bookmark.id) - self.run_pending_task(tasks._load_web_archive_snapshot_task) + tasks._load_web_archive_snapshot_task(bookmark.id) - bookmark.refresh_from_db() - mock_cdx_api.newest.assert_called_once() - self.assertEqual( - "https://example.com/newest_snapshot", bookmark.web_archive_snapshot_url - ) + bookmark.refresh_from_db() + + self.assertEqual(self.executed_count(), 1) + self.mock_cdx_api.newest.assert_called_once() + self.assertEqual( + "https://example.com/newest_snapshot", bookmark.web_archive_snapshot_url + ) def test_load_web_archive_snapshot_should_handle_missing_bookmark_id(self): - mock_cdx_api = create_cdx_server_api_mock() + tasks._load_web_archive_snapshot_task(123) - with mock.patch.object( - bookmarks.services.wayback, - "CustomWaybackMachineCDXServerAPI", - return_value=mock_cdx_api, - ): - tasks._load_web_archive_snapshot_task(123) - self.run_pending_task(tasks._load_web_archive_snapshot_task) - - mock_cdx_api.newest.assert_not_called() + self.assertEqual(self.executed_count(), 1) + self.mock_cdx_api.newest.assert_not_called() def test_load_web_archive_snapshot_should_skip_if_snapshot_exists(self): bookmark = self.setup_bookmark(web_archive_snapshot_url="https://example.com") - mock_cdx_api = create_cdx_server_api_mock() - with mock.patch.object( - bookmarks.services.wayback, - "CustomWaybackMachineCDXServerAPI", - return_value=mock_cdx_api, - ): - tasks._load_web_archive_snapshot_task(bookmark.id) - self.run_pending_task(tasks._load_web_archive_snapshot_task) + tasks._load_web_archive_snapshot_task(bookmark.id) - mock_cdx_api.newest.assert_not_called() + self.assertEqual(self.executed_count(), 1) + self.mock_cdx_api.newest.assert_not_called() def test_load_web_archive_snapshot_should_handle_missing_snapshot(self): bookmark = self.setup_bookmark() - mock_cdx_api = create_cdx_server_api_mock(archive_url=None) + self.mock_cdx_api.newest.return_value = None - with mock.patch.object( - bookmarks.services.wayback, - "CustomWaybackMachineCDXServerAPI", - return_value=mock_cdx_api, - ): - tasks._load_web_archive_snapshot_task(bookmark.id) - self.run_pending_task(tasks._load_web_archive_snapshot_task) + tasks._load_web_archive_snapshot_task(bookmark.id) - self.assertEqual("", bookmark.web_archive_snapshot_url) + self.assertEqual("", bookmark.web_archive_snapshot_url) def test_load_web_archive_snapshot_should_handle_wayback_errors(self): bookmark = self.setup_bookmark() - mock_cdx_api = create_cdx_server_api_mock(fail_loading_snapshot=True) + self.mock_cdx_api.newest.side_effect = WaybackError - with mock.patch.object( - bookmarks.services.wayback, - "CustomWaybackMachineCDXServerAPI", - return_value=mock_cdx_api, - ): - tasks._load_web_archive_snapshot_task(bookmark.id) - self.run_pending_task(tasks._load_web_archive_snapshot_task) + tasks._load_web_archive_snapshot_task(bookmark.id) - self.assertEqual("", bookmark.web_archive_snapshot_url) + self.assertEqual("", bookmark.web_archive_snapshot_url) def test_load_web_archive_snapshot_should_not_save_stale_bookmark_data(self): bookmark = self.setup_bookmark() - mock_cdx_api = create_cdx_server_api_mock() # update bookmark during API call to check that saving # the snapshot does not overwrite updated bookmark data @@ -323,32 +248,26 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): bookmark.save() return mock.DEFAULT - mock_cdx_api.newest.side_effect = mock_newest_impl + self.mock_cdx_api.newest.side_effect = mock_newest_impl - with mock.patch.object( - bookmarks.services.wayback, - "CustomWaybackMachineCDXServerAPI", - return_value=mock_cdx_api, - ): - tasks._load_web_archive_snapshot_task(bookmark.id) - self.run_pending_task(tasks._load_web_archive_snapshot_task) - bookmark.refresh_from_db() + tasks._load_web_archive_snapshot_task(bookmark.id) + bookmark.refresh_from_db() - self.assertEqual("Updated title", bookmark.title) - self.assertEqual( - "https://example.com/newest_snapshot", bookmark.web_archive_snapshot_url - ) + self.assertEqual("Updated title", bookmark.title) + self.assertEqual( + "https://example.com/newest_snapshot", bookmark.web_archive_snapshot_url + ) @override_settings(LD_DISABLE_BACKGROUND_TASKS=True) def test_create_web_archive_snapshot_should_not_run_when_background_tasks_are_disabled( self, ): bookmark = self.setup_bookmark() + tasks.create_web_archive_snapshot( self.get_or_create_test_user(), bookmark, False ) - - self.assertEqual(Task.objects.count(), 0) + self.assertEqual(self.executed_count(), 0) def test_create_web_archive_snapshot_should_not_run_when_web_archive_integration_is_disabled( self, @@ -363,7 +282,7 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): self.get_or_create_test_user(), bookmark, False ) - self.assertEqual(Task.objects.count(), 0) + self.assertEqual(self.executed_count(), 0) def test_schedule_bookmarks_without_snapshots_should_load_snapshot_for_all_bookmarks_without_snapshot( self, @@ -377,16 +296,9 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): self.setup_bookmark(web_archive_snapshot_url="https://example.com") tasks.schedule_bookmarks_without_snapshots(user) - self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task) - task_list = Task.objects.all() - self.assertEqual(task_list.count(), 3) - - for task in task_list: - self.assertEqual( - task.task_name, - "bookmarks.services.tasks._load_web_archive_snapshot_task", - ) + self.assertEqual(self.executed_count(), 4) + self.assertEqual(self.mock_cdx_api.newest.call_count, 3) def test_schedule_bookmarks_without_snapshots_should_only_update_user_owned_bookmarks( self, @@ -403,10 +315,8 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): self.setup_bookmark(user=other_user) tasks.schedule_bookmarks_without_snapshots(user) - self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task) - task_list = Task.objects.all() - self.assertEqual(task_list.count(), 3) + self.assertEqual(self.mock_cdx_api.newest.call_count, 3) @override_settings(LD_DISABLE_BACKGROUND_TASKS=True) def test_schedule_bookmarks_without_snapshots_should_not_run_when_background_tasks_are_disabled( @@ -414,7 +324,7 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): ): tasks.schedule_bookmarks_without_snapshots(self.user) - self.assertEqual(Task.objects.count(), 0) + self.assertEqual(self.executed_count(), 0) def test_schedule_bookmarks_without_snapshots_should_not_run_when_web_archive_integration_is_disabled( self, @@ -425,44 +335,32 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): self.user.profile.save() tasks.schedule_bookmarks_without_snapshots(self.user) - self.assertEqual(Task.objects.count(), 0) + self.assertEqual(self.executed_count(), 0) def test_load_favicon_should_create_favicon_file(self): bookmark = self.setup_bookmark() - with mock.patch( - "bookmarks.services.favicon_loader.load_favicon" - ) as mock_load_favicon: - mock_load_favicon.return_value = "https_example_com.png" + tasks.load_favicon(self.get_or_create_test_user(), bookmark) + bookmark.refresh_from_db() - tasks.load_favicon(self.get_or_create_test_user(), bookmark) - self.run_pending_task(tasks._load_favicon_task) - bookmark.refresh_from_db() - - self.assertEqual(bookmark.favicon_file, "https_example_com.png") + self.assertEqual(self.executed_count(), 1) + self.assertEqual(bookmark.favicon_file, "https_example_com.png") def test_load_favicon_should_update_favicon_file(self): bookmark = self.setup_bookmark(favicon_file="https_example_com.png") - with mock.patch( - "bookmarks.services.favicon_loader.load_favicon" - ) as mock_load_favicon: - mock_load_favicon.return_value = "https_example_updated_com.png" - tasks.load_favicon(self.get_or_create_test_user(), bookmark) - self.run_pending_task(tasks._load_favicon_task) + self.mock_load_favicon.return_value = "https_example_updated_com.png" - mock_load_favicon.assert_called() - bookmark.refresh_from_db() - self.assertEqual(bookmark.favicon_file, "https_example_updated_com.png") + tasks.load_favicon(self.get_or_create_test_user(), bookmark) + + bookmark.refresh_from_db() + self.mock_load_favicon.assert_called_once() + self.assertEqual(bookmark.favicon_file, "https_example_updated_com.png") def test_load_favicon_should_handle_missing_bookmark(self): - with mock.patch( - "bookmarks.services.favicon_loader.load_favicon" - ) as mock_load_favicon: - tasks._load_favicon_task(123) - self.run_pending_task(tasks._load_favicon_task) + tasks._load_favicon_task(123) - mock_load_favicon.assert_not_called() + self.mock_load_favicon.assert_not_called() def test_load_favicon_should_not_save_stale_bookmark_data(self): bookmark = self.setup_bookmark() @@ -474,24 +372,20 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): bookmark.save() return "https_example_com.png" - with mock.patch( - "bookmarks.services.favicon_loader.load_favicon" - ) as mock_load_favicon: - mock_load_favicon.side_effect = mock_load_favicon_impl + self.mock_load_favicon.side_effect = mock_load_favicon_impl - tasks.load_favicon(self.get_or_create_test_user(), bookmark) - self.run_pending_task(tasks._load_favicon_task) - bookmark.refresh_from_db() + tasks.load_favicon(self.get_or_create_test_user(), bookmark) + bookmark.refresh_from_db() - self.assertEqual(bookmark.title, "Updated title") - self.assertEqual(bookmark.favicon_file, "https_example_com.png") + self.assertEqual(bookmark.title, "Updated title") + self.assertEqual(bookmark.favicon_file, "https_example_com.png") @override_settings(LD_DISABLE_BACKGROUND_TASKS=True) def test_load_favicon_should_not_run_when_background_tasks_are_disabled(self): bookmark = self.setup_bookmark() tasks.load_favicon(self.get_or_create_test_user(), bookmark) - self.assertEqual(Task.objects.count(), 0) + self.assertEqual(self.executed_count(), 0) def test_load_favicon_should_not_run_when_favicon_feature_is_disabled(self): self.user.profile.enable_favicons = False @@ -500,7 +394,7 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): bookmark = self.setup_bookmark() tasks.load_favicon(self.get_or_create_test_user(), bookmark) - self.assertEqual(Task.objects.count(), 0) + self.assertEqual(self.executed_count(), 0) def test_schedule_bookmarks_without_favicons_should_load_favicon_for_all_bookmarks_without_favicon( self, @@ -514,15 +408,9 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): self.setup_bookmark(favicon_file="https_example_com.png") tasks.schedule_bookmarks_without_favicons(user) - self.run_pending_task(tasks._schedule_bookmarks_without_favicons_task) - task_list = Task.objects.all() - self.assertEqual(task_list.count(), 3) - - for task in task_list: - self.assertEqual( - task.task_name, "bookmarks.services.tasks._load_favicon_task" - ) + self.assertEqual(self.executed_count(), 4) + self.assertEqual(self.mock_load_favicon.call_count, 3) def test_schedule_bookmarks_without_favicons_should_only_update_user_owned_bookmarks( self, @@ -539,19 +427,17 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): self.setup_bookmark(user=other_user) tasks.schedule_bookmarks_without_favicons(user) - self.run_pending_task(tasks._schedule_bookmarks_without_favicons_task) - task_list = Task.objects.all() - self.assertEqual(task_list.count(), 3) + self.assertEqual(self.mock_load_favicon.call_count, 3) @override_settings(LD_DISABLE_BACKGROUND_TASKS=True) def test_schedule_bookmarks_without_favicons_should_not_run_when_background_tasks_are_disabled( self, ): - bookmark = self.setup_bookmark() + self.setup_bookmark() tasks.schedule_bookmarks_without_favicons(self.get_or_create_test_user()) - self.assertEqual(Task.objects.count(), 0) + self.assertEqual(self.executed_count(), 0) def test_schedule_bookmarks_without_favicons_should_not_run_when_favicon_feature_is_disabled( self, @@ -559,10 +445,10 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): self.user.profile.enable_favicons = False self.user.profile.save() - bookmark = self.setup_bookmark() + self.setup_bookmark() tasks.schedule_bookmarks_without_favicons(self.get_or_create_test_user()) - self.assertEqual(Task.objects.count(), 0) + self.assertEqual(self.executed_count(), 0) def test_schedule_refresh_favicons_should_update_favicon_for_all_bookmarks(self): user = self.get_or_create_test_user() @@ -574,15 +460,9 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): self.setup_bookmark(favicon_file="https_example_com.png") tasks.schedule_refresh_favicons(user) - self.run_pending_task(tasks._schedule_refresh_favicons_task) - task_list = Task.objects.all() - self.assertEqual(task_list.count(), 6) - - for task in task_list: - self.assertEqual( - task.task_name, "bookmarks.services.tasks._load_favicon_task" - ) + self.assertEqual(self.executed_count(), 7) + self.assertEqual(self.mock_load_favicon.call_count, 6) def test_schedule_refresh_favicons_should_only_update_user_owned_bookmarks(self): user = self.get_or_create_test_user() @@ -597,10 +477,8 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): self.setup_bookmark(user=other_user) tasks.schedule_refresh_favicons(user) - self.run_pending_task(tasks._schedule_refresh_favicons_task) - task_list = Task.objects.all() - self.assertEqual(task_list.count(), 3) + self.assertEqual(self.mock_load_favicon.call_count, 3) @override_settings(LD_DISABLE_BACKGROUND_TASKS=True) def test_schedule_refresh_favicons_should_not_run_when_background_tasks_are_disabled( @@ -609,14 +487,14 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): self.setup_bookmark() tasks.schedule_refresh_favicons(self.get_or_create_test_user()) - self.assertEqual(Task.objects.count(), 0) + self.assertEqual(self.executed_count(), 0) @override_settings(LD_ENABLE_REFRESH_FAVICONS=False) def test_schedule_refresh_favicons_should_not_run_when_refresh_is_disabled(self): self.setup_bookmark() tasks.schedule_refresh_favicons(self.get_or_create_test_user()) - self.assertEqual(Task.objects.count(), 0) + self.assertEqual(self.executed_count(), 0) def test_schedule_refresh_favicons_should_not_run_when_favicon_feature_is_disabled( self, @@ -627,13 +505,14 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): self.setup_bookmark() tasks.schedule_refresh_favicons(self.get_or_create_test_user()) - self.assertEqual(Task.objects.count(), 0) + self.assertEqual(self.executed_count(), 0) @override_settings(LD_ENABLE_SNAPSHOTS=True) def test_create_html_snapshot_should_create_pending_asset(self): bookmark = self.setup_bookmark() - with mock.patch("bookmarks.services.monolith.create_snapshot"): + # Mock the task function to avoid running it immediately + with mock.patch("bookmarks.services.tasks._create_html_snapshot_task"): tasks.create_html_snapshot(bookmark) self.assertEqual(BookmarkAsset.objects.count(), 1) @@ -652,18 +531,24 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): def test_create_html_snapshot_should_update_file_info(self): bookmark = self.setup_bookmark(url="https://example.com") - with mock.patch("bookmarks.services.singlefile.create_snapshot") as mock_create: - tasks.create_html_snapshot(bookmark) - asset = BookmarkAsset.objects.get(bookmark=bookmark) - asset.date_created = datetime.datetime(2021, 1, 2, 3, 44, 55) - asset.save() + with mock.patch( + "bookmarks.services.tasks._generate_snapshot_filename" + ) as mock_generate: expected_filename = "snapshot_2021-01-02_034455_https___example.com.html.gz" + mock_generate.return_value = expected_filename - self.run_pending_task(tasks._create_html_snapshot_task) + tasks.create_html_snapshot(bookmark) + BookmarkAsset.objects.get(bookmark=bookmark) - mock_create.assert_called_once_with( + # Run periodic task to process the snapshot + tasks._schedule_html_snapshots_task() + + self.mock_singlefile_create_snapshot.assert_called_once_with( "https://example.com", - os.path.join(settings.LD_ASSET_FOLDER, expected_filename), + os.path.join( + settings.LD_ASSET_FOLDER, + expected_filename, + ), ) asset = BookmarkAsset.objects.get(bookmark=bookmark) @@ -675,39 +560,39 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): def test_create_html_snapshot_should_handle_error(self): bookmark = self.setup_bookmark(url="https://example.com") - with mock.patch("bookmarks.services.singlefile.create_snapshot") as mock_create: - mock_create.side_effect = singlefile.SingeFileError("Error") + self.mock_singlefile_create_snapshot.side_effect = singlefile.SingeFileError( + "Error" + ) + tasks.create_html_snapshot(bookmark) - tasks.create_html_snapshot(bookmark) - self.run_pending_task(tasks._create_html_snapshot_task) + # Run periodic task to process the snapshot + tasks._schedule_html_snapshots_task() - asset = BookmarkAsset.objects.get(bookmark=bookmark) - self.assertEqual(asset.status, BookmarkAsset.STATUS_FAILURE) - self.assertEqual(asset.file, "") - self.assertFalse(asset.gzip) + asset = BookmarkAsset.objects.get(bookmark=bookmark) + self.assertEqual(asset.status, BookmarkAsset.STATUS_FAILURE) + self.assertEqual(asset.file, "") + self.assertFalse(asset.gzip) @override_settings(LD_ENABLE_SNAPSHOTS=True) def test_create_html_snapshot_should_handle_missing_bookmark(self): - with mock.patch("bookmarks.services.singlefile.create_snapshot") as mock_create: - tasks._create_html_snapshot_task(123) - self.run_pending_task(tasks._create_html_snapshot_task) + tasks._create_html_snapshot_task(123) - mock_create.assert_not_called() + self.mock_singlefile_create_snapshot.assert_not_called() @override_settings(LD_ENABLE_SNAPSHOTS=False) - def test_create_html_snapshot_should_not_run_when_single_file_is_disabled( + def test_create_html_snapshot_should_not_create_asset_when_single_file_is_disabled( self, ): bookmark = self.setup_bookmark() tasks.create_html_snapshot(bookmark) - self.assertEqual(Task.objects.count(), 0) + self.assertEqual(BookmarkAsset.objects.count(), 0) @override_settings(LD_ENABLE_SNAPSHOTS=True, LD_DISABLE_BACKGROUND_TASKS=True) - def test_create_html_snapshot_should_not_run_when_background_tasks_are_disabled( + def test_create_html_snapshot_should_not_create_asset_when_background_tasks_are_disabled( self, ): bookmark = self.setup_bookmark() tasks.create_html_snapshot(bookmark) - self.assertEqual(Task.objects.count(), 0) + self.assertEqual(BookmarkAsset.objects.count(), 0) diff --git a/bookmarks/tests/test_singlefile_service.py b/bookmarks/tests/test_singlefile_service.py index e6a3f2e..c592f06 100644 --- a/bookmarks/tests/test_singlefile_service.py +++ b/bookmarks/tests/test_singlefile_service.py @@ -1,7 +1,7 @@ import gzip import os -from unittest import mock import subprocess +from unittest import mock from django.test import TestCase @@ -24,9 +24,11 @@ class SingleFileServiceTestCase(TestCase): file.write(self.html_content) def test_create_snapshot(self): - with mock.patch("subprocess.run") as mock_run: - mock_run.side_effect = self.create_test_file + mock_process = mock.Mock() + mock_process.wait.return_value = 0 + self.create_test_file() + with mock.patch("subprocess.Popen", return_value=mock_process): singlefile.create_snapshot("http://example.com", self.html_filepath) self.assertTrue(os.path.exists(self.html_filepath)) @@ -38,13 +40,13 @@ class SingleFileServiceTestCase(TestCase): def test_create_snapshot_failure(self): # subprocess fails - which it probably doesn't as single-file doesn't return exit codes - with mock.patch("subprocess.run") as mock_run: - mock_run.side_effect = subprocess.CalledProcessError(1, "command") + with mock.patch("subprocess.Popen") as mock_popen: + mock_popen.side_effect = subprocess.CalledProcessError(1, "command") with self.assertRaises(singlefile.SingeFileError): singlefile.create_snapshot("http://example.com", self.html_filepath) # so also check that it raises error if output file isn't created - with mock.patch("subprocess.run") as mock_run: + with mock.patch("subprocess.Popen"): with self.assertRaises(singlefile.SingeFileError): singlefile.create_snapshot("http://example.com", self.html_filepath) diff --git a/bootstrap.sh b/bootstrap.sh index e52eca3..30ad83d 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -18,6 +18,8 @@ python manage.py migrate python manage.py enable_wal # Create initial superuser if defined in options / environment variables python manage.py create_initial_superuser +# Migrate legacy background tasks to Huey +python manage.py migrate_tasks # Ensure the DB folder is owned by the right user chown -R www-data: /etc/linkding/data diff --git a/docker/alpine.Dockerfile b/docker/alpine.Dockerfile index d28b02a..71b152c 100644 --- a/docker/alpine.Dockerfile +++ b/docker/alpine.Dockerfile @@ -31,7 +31,8 @@ RUN pip install -U pip && pip install -r requirements.txt -r requirements.dev.tx COPY . . COPY --from=node-build /etc/linkding . # run Django part of the build -RUN python manage.py compilescss && \ +RUN mkdir data && \ + python manage.py compilescss && \ python manage.py collectstatic --ignore=*.scss && \ python manage.py compilescss --delete-files diff --git a/docker/default.Dockerfile b/docker/default.Dockerfile index 778448f..8673293 100644 --- a/docker/default.Dockerfile +++ b/docker/default.Dockerfile @@ -33,7 +33,8 @@ RUN pip install -U pip && pip install -r requirements.txt -r requirements.dev.tx COPY . . COPY --from=node-build /etc/linkding . # run Django part of the build -RUN python manage.py compilescss && \ +RUN mkdir data && \ + python manage.py compilescss && \ python manage.py collectstatic --ignore=*.scss && \ python manage.py compilescss --delete-files diff --git a/requirements.in b/requirements.in index e67449c..20ac8b3 100644 --- a/requirements.in +++ b/requirements.in @@ -5,8 +5,8 @@ Django django-registration django-sass-processor django-widget-tweaks -django4-background-tasks djangorestframework +huey Markdown mozilla-django-oidc psycopg2-binary diff --git a/requirements.txt b/requirements.txt index 71cb706..5643fd6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -39,10 +39,10 @@ django-sass-processor==1.4 # via -r requirements.in django-widget-tweaks==1.5.0 # via -r requirements.in -django4-background-tasks==1.2.9 - # via -r requirements.in djangorestframework==3.14.0 # via -r requirements.in +huey==2.5.0 + # via -r requirements.in idna==3.6 # via requests josepy==1.14.0 @@ -69,7 +69,6 @@ requests==2.31.0 six==1.16.0 # via # bleach - # django4-background-tasks # python-dateutil soupsieve==2.5 # via beautifulsoup4 diff --git a/siteroot/settings/base.py b/siteroot/settings/base.py index 537eefa..b9f2d7f 100644 --- a/siteroot/settings/base.py +++ b/siteroot/settings/base.py @@ -42,7 +42,7 @@ INSTALLED_APPS = [ "widget_tweaks", "rest_framework", "rest_framework.authtoken", - "background_task", + "huey.contrib.djhuey", "mozilla_django_oidc", ] @@ -173,13 +173,27 @@ LD_DISABLE_BACKGROUND_TASKS = os.getenv("LD_DISABLE_BACKGROUND_TASKS", False) in "1", ) -# django-background-tasks -MAX_ATTEMPTS = 5 -# How many tasks will run in parallel -# We want to keep this low to prevent SQLite lock errors and in general not to consume too much resources on smaller -# specced systems like Raspberries. Should be OK as tasks are not time critical. -BACKGROUND_TASK_RUN_ASYNC = True -BACKGROUND_TASK_ASYNC_THREADS = 2 +# Huey task queue +HUEY = { + "huey_class": "huey.SqliteHuey", + "filename": os.path.join(BASE_DIR, "data", "tasks.sqlite3"), + "immediate": False, + "results": False, + "store_none": False, + "utc": True, + "consumer": { + "workers": 2, + "worker_type": "thread", + "initial_delay": 5, + "backoff": 1.15, + "max_delay": 10, + "scheduler_interval": 10, + "periodic": True, + "check_worker_health": True, + "health_check_interval": 10, + }, +} + # Enable OICD support if configured LD_ENABLE_OIDC = os.getenv("LD_ENABLE_OIDC", False) in (True, "True", "1") diff --git a/siteroot/settings/dev.py b/siteroot/settings/dev.py index 3264158..277fd5e 100644 --- a/siteroot/settings/dev.py +++ b/siteroot/settings/dev.py @@ -47,6 +47,11 @@ LOGGING = { "handlers": ["console"], "propagate": False, }, + "huey": { # Huey + "level": "INFO", + "handlers": ["console"], + "propagate": False, + }, }, } diff --git a/siteroot/settings/prod.py b/siteroot/settings/prod.py index 5da0a24..8ce17fc 100644 --- a/siteroot/settings/prod.py +++ b/siteroot/settings/prod.py @@ -49,7 +49,12 @@ LOGGING = { "level": "INFO", "handlers": ["console"], "propagate": False, - } + }, + "huey": { + "level": "INFO", + "handlers": ["console"], + "propagate": False, + }, }, } diff --git a/supervisord.conf b/supervisord.conf index df825a2..15d7f5e 100644 --- a/supervisord.conf +++ b/supervisord.conf @@ -4,8 +4,18 @@ loglevel=info [program:jobs] user=www-data -command=sh background-tasks-wrapper.sh +command=python manage.py run_huey -f stdout_logfile=background_tasks.log stdout_logfile_maxbytes=10MB stdout_logfile_backups=5 redirect_stderr=true + +[unix_http_server] +file=/var/run/supervisor.sock +chmod=0700 + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface + +[supervisorctl] +serverurl=unix:///var/run/supervisor.sock