Replace django-background-tasks with huey (#657)

* Replace django-background-tasks with huey

* Add command for migrating tasks

* Add custom admin view

* fix dockerfile

* fix tests

* fix tests in CI

* fix task tests

* implement retries

* improve config

* workaround to avoid running singlefile tasks in parallel

* properly kill single-file sub-processes

* use period task for HTML snapshots

* clear task lock when starting task consumer

* remove obsolete cleanup task command
This commit is contained in:
Sascha Ißbrücker 2024-04-07 11:11:14 +02:00 committed by GitHub
parent 68c163d943
commit a6f35119cd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 566 additions and 403 deletions

View file

@ -5,7 +5,6 @@
!/bookmarks
!/siteroot
!/background-tasks-wrapper.sh
!/bootstrap.sh
!/LICENSE.txt
!/manage.py

View file

@ -24,7 +24,9 @@ jobs:
- name: Install Node dependencies
run: npm ci
- name: Setup Python environment
run: pip install -r requirements.txt -r requirements.dev.txt
run: |
pip install -r requirements.txt -r requirements.dev.txt
mkdir data
- name: Run tests
run: python manage.py test bookmarks.tests
e2e_tests:
@ -47,6 +49,7 @@ jobs:
run: |
pip install -r requirements.txt -r requirements.dev.txt
playwright install chromium
mkdir data
- name: Run build
run: |
npm run build

View file

@ -1,5 +0,0 @@
#!/usr/bin/env bash
# Wrapper script used by supervisord to first clear task locks before starting the background task processor
python manage.py clean_tasks
exec python manage.py process_tasks

View file

@ -1,22 +1,96 @@
from background_task.admin import TaskAdmin, CompletedTaskAdmin
from background_task.models import Task, CompletedTask
from django.contrib import admin, messages
from django.contrib.admin import AdminSite
from django.contrib.auth.admin import UserAdmin
from django.contrib.auth.models import User
from django.core.paginator import Paginator
from django.db.models import Count, QuerySet
from django.shortcuts import render
from django.urls import path
from django.utils.translation import ngettext, gettext
from huey.contrib.djhuey import HUEY as huey
from rest_framework.authtoken.admin import TokenAdmin
from rest_framework.authtoken.models import TokenProxy
from bookmarks.models import Bookmark, Tag, UserProfile, Toast, FeedToken
from bookmarks.models import Bookmark, BookmarkAsset, Tag, UserProfile, Toast, FeedToken
from bookmarks.services.bookmarks import archive_bookmark, unarchive_bookmark
# Custom paginator to paginate through Huey tasks
class TaskPaginator(Paginator):
def __init__(self):
super().__init__(self, 100)
self.task_count = huey.storage.queue_size()
@property
def count(self):
return self.task_count
def page(self, number):
limit = self.per_page
offset = (number - 1) * self.per_page
return self._get_page(
self.enqueued_items(limit, offset),
number,
self,
)
# Copied from Huey's SqliteStorage with some modifications to allow pagination
def enqueued_items(self, limit, offset):
to_bytes = lambda b: bytes(b) if not isinstance(b, bytes) else b
sql = "select data from task where queue=? order by priority desc, id limit ? offset ?"
params = (huey.storage.name, limit, offset)
serialized_tasks = [
to_bytes(i) for i, in huey.storage.sql(sql, params, results=True)
]
return [huey.deserialize_task(task) for task in serialized_tasks]
# Custom view to display Huey tasks in the admin
def background_task_view(request):
page_number = int(request.GET.get("p", 1))
paginator = TaskPaginator()
page = paginator.get_page(page_number)
page_range = paginator.get_elided_page_range(page_number, on_each_side=2, on_ends=2)
context = {
**linkding_admin_site.each_context(request),
"title": "Background tasks",
"page": page,
"page_range": page_range,
"tasks": page.object_list,
}
return render(request, "admin/background_tasks.html", context)
class LinkdingAdminSite(AdminSite):
site_header = "linkding administration"
site_title = "linkding Admin"
def get_urls(self):
urls = super().get_urls()
custom_urls = [
path("tasks/", background_task_view, name="background_tasks"),
]
return custom_urls + urls
def get_app_list(self, request, app_label=None):
app_list = super().get_app_list(request, app_label)
app_list += [
{
"name": "Huey",
"app_label": "huey_app",
"models": [
{
"name": "Queued tasks",
"object_name": "background_tasks",
"admin_url": "/admin/tasks/",
"view_only": True,
}
],
}
]
return app_list
class AdminBookmark(admin.ModelAdmin):
list_display = ("resolved_title", "url", "is_archived", "owner", "date_added")
@ -125,6 +199,15 @@ class AdminBookmark(admin.ModelAdmin):
)
class AdminBookmarkAsset(admin.ModelAdmin):
list_display = ("display_name", "date_created", "status")
search_fields = (
"display_name",
"file",
)
list_filter = ("status",)
class AdminTag(admin.ModelAdmin):
list_display = ("name", "bookmarks_count", "owner", "date_added")
search_fields = ("name", "owner__username")
@ -200,10 +283,9 @@ class AdminFeedToken(admin.ModelAdmin):
linkding_admin_site = LinkdingAdminSite()
linkding_admin_site.register(Bookmark, AdminBookmark)
linkding_admin_site.register(BookmarkAsset, AdminBookmarkAsset)
linkding_admin_site.register(Tag, AdminTag)
linkding_admin_site.register(User, AdminCustomUser)
linkding_admin_site.register(TokenProxy, TokenAdmin)
linkding_admin_site.register(Toast, AdminToast)
linkding_admin_site.register(FeedToken, AdminFeedToken)
linkding_admin_site.register(Task, TaskAdmin)
linkding_admin_site.register(CompletedTask, CompletedTaskAdmin)

View file

@ -1,15 +0,0 @@
from background_task.models import Task, CompletedTask
from django.core.management.base import BaseCommand
class Command(BaseCommand):
help = "Remove task locks and clear completed task history"
def handle(self, *args, **options):
# Remove task locks
# If the background task processor exited while executing tasks, these tasks would still be marked as locked,
# even though no process is working on them, and would prevent the task processor from picking the next task in
# the queue
Task.objects.all().update(locked_by=None, locked_at=None)
# Clear task history to prevent them from bloating the DB
CompletedTask.objects.all().delete()

View file

@ -0,0 +1,75 @@
import json
import os
import sqlite3
import importlib
from django.core.management.base import BaseCommand
class Command(BaseCommand):
help = "Migrate tasks from django-background-tasks to Huey"
def handle(self, *args, **options):
db = sqlite3.connect(os.path.join("data", "db.sqlite3"))
# Check if background_task table exists
cursor = db.cursor()
cursor.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='background_task'"
)
row = cursor.fetchone()
if not row:
self.stdout.write(
"Legacy task table does not exist. Skipping task migration"
)
return
# Load legacy tasks
cursor.execute("SELECT id, task_name, task_params FROM background_task")
legacy_tasks = cursor.fetchall()
if len(legacy_tasks) == 0:
self.stdout.write("No legacy tasks found. Skipping task migration")
return
self.stdout.write(
f"Found {len(legacy_tasks)} legacy tasks. Migrating to Huey..."
)
# Migrate tasks to Huey
succeeded_tasks = []
for task in legacy_tasks:
task_id = task[0]
task_name = task[1]
task_params_json = task[2]
try:
task_params = json.loads(task_params_json)
function_params = task_params[0]
# Resolve task function
module_name, func_name = task_name.rsplit(".", 1)
module = importlib.import_module(module_name)
func = getattr(module, func_name)
# Call task function
func(*function_params)
succeeded_tasks.append(task_id)
except Exception:
self.stderr.write(f"Error migrating task [{task_id}] {task_name}")
self.stdout.write(f"Migrated {len(succeeded_tasks)} tasks successfully")
# Clean up
try:
placeholders = ", ".join("?" for _ in succeeded_tasks)
sql = f"DELETE FROM background_task WHERE id IN ({placeholders})"
cursor.execute(sql, succeeded_tasks)
db.commit()
self.stdout.write(
f"Deleted {len(succeeded_tasks)} migrated tasks from legacy table"
)
except Exception:
self.stderr.write("Error cleaning up legacy tasks")
cursor.close()
db.close()

View file

@ -1,6 +1,8 @@
import gzip
import logging
import os
import shutil
import signal
import subprocess
from django.conf import settings
@ -10,16 +12,21 @@ class SingeFileError(Exception):
pass
logger = logging.getLogger(__name__)
def create_snapshot(url: str, filepath: str):
singlefile_path = settings.LD_SINGLEFILE_PATH
singlefile_options = settings.LD_SINGLEFILE_OPTIONS
# singlefile_options = settings.LD_SINGLEFILE_OPTIONS
temp_filepath = filepath + ".tmp"
args = [singlefile_path, url, temp_filepath]
try:
command = f"{singlefile_path} '{url}' {singlefile_options} {temp_filepath}"
subprocess.run(command, check=True, shell=True)
# Use start_new_session=True to create a new process group
process = subprocess.Popen(args, start_new_session=True)
process.wait(timeout=60)
# single-file doesn't return exit codes apparently, so check if the file was created
# check if the file was created
if not os.path.exists(temp_filepath):
raise SingeFileError("Failed to create snapshot")
@ -29,5 +36,20 @@ def create_snapshot(url: str, filepath: str):
shutil.copyfileobj(raw_file, gz_file)
os.remove(temp_filepath)
except subprocess.TimeoutExpired:
# First try to terminate properly
try:
logger.error(
"Timeout expired while creating snapshot. Terminating process..."
)
process.terminate()
process.wait(timeout=20)
raise SingeFileError("Timeout expired while creating snapshot")
except subprocess.TimeoutExpired:
# Kill the whole process group, which should also clean up any chromium
# processes spawned by single-file
logger.error("Timeout expired while terminating. Killing process...")
os.killpg(os.getpgid(process.pid), signal.SIGTERM)
raise SingeFileError("Timeout expired while creating snapshot")
except subprocess.CalledProcessError as error:
raise SingeFileError(f"Failed to create snapshot: {error.stderr}")

View file

@ -1,14 +1,16 @@
import functools
import logging
import os
import waybackpy
from background_task import background
from background_task.models import Task
from django.conf import settings
from django.contrib.auth import get_user_model
from django.contrib.auth.models import User
from waybackpy.exceptions import WaybackError, TooManyRequestsError, NoCDXRecordFound
from django.utils import timezone, formats
from huey import crontab
from huey.contrib.djhuey import HUEY as huey
from huey.exceptions import TaskLockedException
from waybackpy.exceptions import WaybackError, TooManyRequestsError, NoCDXRecordFound
import bookmarks.services.wayback
from bookmarks.models import Bookmark, BookmarkAsset, UserProfile
@ -18,6 +20,35 @@ from bookmarks.services.website_loader import DEFAULT_USER_AGENT
logger = logging.getLogger(__name__)
# Create custom decorator for Huey tasks that implements exponential backoff
# Taken from: https://huey.readthedocs.io/en/latest/guide.html#tips-and-tricks
# Retry 1: 60
# Retry 2: 240
# Retry 3: 960
# Retry 4: 3840
# Retry 5: 15360
def task(retries=5, retry_delay=15, retry_backoff=4):
def deco(fn):
@functools.wraps(fn)
def inner(*args, **kwargs):
task = kwargs.pop("task")
try:
return fn(*args, **kwargs)
except TaskLockedException as exc:
# Task locks are currently only used as workaround to enforce
# running specific types of tasks (e.g. singlefile snapshots)
# sequentially. In that case don't reduce the number of retries.
task.retries = retries
raise exc
except Exception as exc:
task.retry_delay *= retry_backoff
raise exc
return huey.task(retries=retries, retry_delay=retry_delay, context=True)(inner)
return deco
def is_web_archive_integration_active(user: User) -> bool:
background_tasks_enabled = not settings.LD_DISABLE_BACKGROUND_TASKS
web_archive_integration_enabled = (
@ -67,7 +98,7 @@ def _create_snapshot(bookmark: Bookmark):
logger.info(f"Successfully created new snapshot for bookmark:. url={bookmark.url}")
@background()
@task()
def _create_web_archive_snapshot_task(bookmark_id: int, force_update: bool):
try:
bookmark = Bookmark.objects.get(id=bookmark_id)
@ -96,7 +127,7 @@ def _create_web_archive_snapshot_task(bookmark_id: int, force_update: bool):
_load_newest_snapshot(bookmark)
@background()
@task()
def _load_web_archive_snapshot_task(bookmark_id: int):
try:
bookmark = Bookmark.objects.get(id=bookmark_id)
@ -114,13 +145,14 @@ def schedule_bookmarks_without_snapshots(user: User):
_schedule_bookmarks_without_snapshots_task(user.id)
@background()
@task()
def _schedule_bookmarks_without_snapshots_task(user_id: int):
user = get_user_model().objects.get(id=user_id)
bookmarks_without_snapshots = Bookmark.objects.filter(
web_archive_snapshot_url__exact="", owner=user
)
# TODO: Implement bulk task creation
for bookmark in bookmarks_without_snapshots:
# To prevent rate limit errors from the Wayback API only try to load the latest snapshots instead of creating
# new ones when processing bookmarks in bulk
@ -138,7 +170,7 @@ def load_favicon(user: User, bookmark: Bookmark):
_load_favicon_task(bookmark.id)
@background()
@task()
def _load_favicon_task(bookmark_id: int):
try:
bookmark = Bookmark.objects.get(id=bookmark_id)
@ -162,19 +194,15 @@ def schedule_bookmarks_without_favicons(user: User):
_schedule_bookmarks_without_favicons_task(user.id)
@background()
@task()
def _schedule_bookmarks_without_favicons_task(user_id: int):
user = get_user_model().objects.get(id=user_id)
bookmarks = Bookmark.objects.filter(favicon_file__exact="", owner=user)
tasks = []
# TODO: Implement bulk task creation
for bookmark in bookmarks:
task = Task.objects.new_task(
task_name="bookmarks.services.tasks._load_favicon_task", args=(bookmark.id,)
)
tasks.append(task)
Task.objects.bulk_create(tasks)
_load_favicon_task(bookmark.id)
pass
def schedule_refresh_favicons(user: User):
@ -182,19 +210,14 @@ def schedule_refresh_favicons(user: User):
_schedule_refresh_favicons_task(user.id)
@background()
@task()
def _schedule_refresh_favicons_task(user_id: int):
user = get_user_model().objects.get(id=user_id)
bookmarks = Bookmark.objects.filter(owner=user)
tasks = []
# TODO: Implement bulk task creation
for bookmark in bookmarks:
task = Task.objects.new_task(
task_name="bookmarks.services.tasks._load_favicon_task", args=(bookmark.id,)
)
tasks.append(task)
Task.objects.bulk_create(tasks)
_load_favicon_task(bookmark.id)
def is_html_snapshot_feature_active() -> bool:
@ -214,7 +237,6 @@ def create_html_snapshot(bookmark: Bookmark):
status=BookmarkAsset.STATUS_PENDING,
)
asset.save()
_create_html_snapshot_task(asset.id)
def _generate_snapshot_filename(asset: BookmarkAsset) -> str:
@ -230,7 +252,23 @@ def _generate_snapshot_filename(asset: BookmarkAsset) -> str:
return f"{asset.asset_type}_{formatted_datetime}_{sanitized_url}.html.gz"
@background()
# singe-file does not support running multiple instances in parallel, so we can
# not queue up multiple snapshot tasks at once. Instead, schedule a periodic
# task that grabs a number of pending assets and creates snapshots for them in
# sequence. The task uses a lock to ensure that a new task isn't scheduled
# before the previous one has finished.
@huey.periodic_task(crontab(minute="*"))
@huey.lock_task("schedule-html-snapshots-lock")
def _schedule_html_snapshots_task():
# Get five pending assets
assets = BookmarkAsset.objects.filter(status=BookmarkAsset.STATUS_PENDING).order_by(
"date_created"
)[:5]
for asset in assets:
_create_html_snapshot_task(asset.id)
def _create_html_snapshot_task(asset_id: int):
try:
asset = BookmarkAsset.objects.get(id=asset_id)
@ -246,13 +284,14 @@ def _create_html_snapshot_task(asset_id: int):
asset.status = BookmarkAsset.STATUS_COMPLETE
asset.file = filename
asset.gzip = True
asset.save()
logger.info(
f"Successfully created HTML snapshot for bookmark. url={asset.bookmark.url}"
)
except singlefile.SingeFileError as error:
asset.status = BookmarkAsset.STATUS_FAILURE
except Exception as error:
logger.error(
f"Failed to create HTML snapshot for bookmark. url={asset.bookmark.url}",
f"Failed to HTML snapshot for bookmark. url={asset.bookmark.url}",
exc_info=error,
)
asset.save()
asset.status = BookmarkAsset.STATUS_FAILURE
asset.save()

2
bookmarks/tasks.py Normal file
View file

@ -0,0 +1,2 @@
# Expose task modules to Huey Django extension
import bookmarks.services.tasks

View file

@ -0,0 +1,39 @@
{% extends "admin/base_site.html" %}
{% block content %}
<table style="width: 100%">
<thead>
<tr>
<th>ID</th>
<th>Name</th>
<th>Args</th>
<th>Retries</th>
</tr>
</thead>
<tbody>
{% for task in tasks %}
<tr>
<td>{{ task.id }}</td>
<td>{{ task.name }}</td>
<td>{{ task.args }}</td>
<td>{{ task.retries }}</td>
</tr>
{% endfor %}
</tbody>
</table>
<p class="paginator">
{% if page.paginator.num_pages > 1 %}
{% for page_number in page_range %}
{% if page_number == page.number %}
<span class="this-page">{{ page_number }}</span>
{% elif page_number == '…' %}
<span></span>
{% else %}
<a href="?p={{ page_number }}">{{ page_number }}</a>
{% endif %}
{% endfor %}
&nbsp;
{% endif %}
{{ page.paginator.count }} tasks
</p>
{% endblock %}

View file

@ -776,6 +776,4 @@ class BookmarkDetailsModalTestCase(TestCase, BookmarkFactoryMixin, HtmlTestMixin
)
self.assertEqual(response.status_code, 302)
mock_create_html_snapshot_task.assert_called_with(bookmark.id)
self.assertEqual(bookmark.bookmarkasset_set.count(), 1)

View file

@ -1,21 +1,20 @@
import datetime
import os.path
from dataclasses import dataclass
from typing import Any
from unittest import mock
import waybackpy
from background_task.models import Task
from django.conf import settings
from django.contrib.auth.models import User
from django.test import TestCase, override_settings
from huey.contrib.djhuey import HUEY as huey
from waybackpy.exceptions import WaybackError
import bookmarks.services.favicon_loader
import bookmarks.services.wayback
from bookmarks.models import BookmarkAsset, UserProfile
from bookmarks.services import tasks, singlefile
from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging
from bookmarks.tests.helpers import BookmarkFactoryMixin
def create_wayback_machine_save_api_mock(
@ -36,27 +35,45 @@ class MockCdxSnapshot:
datetime_timestamp: datetime.datetime
def create_cdx_server_api_mock(
archive_url: str | None = "https://example.com/newest_snapshot",
fail_loading_snapshot=False,
):
mock_api = mock.Mock()
if fail_loading_snapshot:
mock_api.newest.side_effect = WaybackError
elif archive_url:
mock_api.newest.return_value = MockCdxSnapshot(
archive_url, datetime.datetime.now()
)
else:
mock_api.newest.return_value = None
return mock_api
class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
def setUp(self):
huey.immediate = True
huey.results = True
huey.store_none = True
self.mock_save_api = mock.Mock(
archive_url="https://example.com/created_snapshot"
)
self.mock_save_api_patcher = mock.patch.object(
waybackpy, "WaybackMachineSaveAPI", return_value=self.mock_save_api
)
self.mock_save_api_patcher.start()
self.mock_cdx_api = mock.Mock()
self.mock_cdx_api.newest.return_value = MockCdxSnapshot(
"https://example.com/newest_snapshot", datetime.datetime.now()
)
self.mock_cdx_api_patcher = mock.patch.object(
bookmarks.services.wayback,
"CustomWaybackMachineCDXServerAPI",
return_value=self.mock_cdx_api,
)
self.mock_cdx_api_patcher.start()
self.mock_load_favicon_patcher = mock.patch(
"bookmarks.services.favicon_loader.load_favicon"
)
self.mock_load_favicon = self.mock_load_favicon_patcher.start()
self.mock_load_favicon.return_value = "https_example_com.png"
self.mock_singlefile_create_snapshot_patcher = mock.patch(
"bookmarks.services.singlefile.create_snapshot",
)
self.mock_singlefile_create_snapshot = (
self.mock_singlefile_create_snapshot_patcher.start()
)
user = self.get_or_create_test_user()
user.profile.web_archive_integration = (
UserProfile.WEB_ARCHIVE_INTEGRATION_ENABLED
@ -64,157 +81,100 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
user.profile.enable_favicons = True
user.profile.save()
@disable_logging
def run_pending_task(self, task_function: Any):
func = getattr(task_function, "task_function", None)
task = Task.objects.all()[0]
self.assertEqual(task_function.name, task.task_name)
args, kwargs = task.params()
func(*args, **kwargs)
task.delete()
def tearDown(self):
self.mock_save_api_patcher.stop()
self.mock_cdx_api_patcher.stop()
self.mock_load_favicon_patcher.stop()
self.mock_singlefile_create_snapshot_patcher.stop()
huey.storage.flush_results()
huey.immediate = False
@disable_logging
def run_all_pending_tasks(self, task_function: Any):
func = getattr(task_function, "task_function", None)
tasks = Task.objects.all()
for task in tasks:
self.assertEqual(task_function.name, task.task_name)
args, kwargs = task.params()
func(*args, **kwargs)
task.delete()
def executed_count(self):
return len(huey.all_results())
def test_create_web_archive_snapshot_should_update_snapshot_url(self):
bookmark = self.setup_bookmark()
mock_save_api = create_wayback_machine_save_api_mock()
with mock.patch.object(
waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api
):
tasks.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, False
)
self.run_pending_task(tasks._create_web_archive_snapshot_task)
bookmark.refresh_from_db()
tasks.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, False
)
bookmark.refresh_from_db()
mock_save_api.save.assert_called_once()
self.assertEqual(
bookmark.web_archive_snapshot_url,
"https://example.com/created_snapshot",
)
self.mock_save_api.save.assert_called_once()
self.assertEqual(self.executed_count(), 1)
self.assertEqual(
bookmark.web_archive_snapshot_url,
"https://example.com/created_snapshot",
)
def test_create_web_archive_snapshot_should_handle_missing_bookmark_id(self):
mock_save_api = create_wayback_machine_save_api_mock()
tasks._create_web_archive_snapshot_task(123, False)
with mock.patch.object(
waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api
):
tasks._create_web_archive_snapshot_task(123, False)
self.run_pending_task(tasks._create_web_archive_snapshot_task)
mock_save_api.save.assert_not_called()
self.assertEqual(self.executed_count(), 1)
self.mock_save_api.save.assert_not_called()
def test_create_web_archive_snapshot_should_skip_if_snapshot_exists(self):
bookmark = self.setup_bookmark(web_archive_snapshot_url="https://example.com")
mock_save_api = create_wayback_machine_save_api_mock()
with mock.patch.object(
waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api
):
tasks.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, False
)
self.run_pending_task(tasks._create_web_archive_snapshot_task)
self.mock_save_api.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, False
)
mock_save_api.assert_not_called()
self.assertEqual(self.executed_count(), 0)
self.mock_save_api.assert_not_called()
def test_create_web_archive_snapshot_should_force_update_snapshot(self):
bookmark = self.setup_bookmark(web_archive_snapshot_url="https://example.com")
mock_save_api = create_wayback_machine_save_api_mock(
archive_url="https://other.com"
self.mock_save_api.archive_url = "https://other.com"
tasks.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, True
)
bookmark.refresh_from_db()
with mock.patch.object(
waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api
):
tasks.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, True
)
self.run_pending_task(tasks._create_web_archive_snapshot_task)
bookmark.refresh_from_db()
self.assertEqual(bookmark.web_archive_snapshot_url, "https://other.com")
self.assertEqual(bookmark.web_archive_snapshot_url, "https://other.com")
def test_create_web_archive_snapshot_should_use_newest_snapshot_as_fallback(self):
bookmark = self.setup_bookmark()
mock_save_api = create_wayback_machine_save_api_mock(fail_on_save=True)
mock_cdx_api = create_cdx_server_api_mock()
self.mock_save_api.save.side_effect = WaybackError
with mock.patch.object(
waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api
):
with mock.patch.object(
bookmarks.services.wayback,
"CustomWaybackMachineCDXServerAPI",
return_value=mock_cdx_api,
):
tasks.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, False
)
self.run_pending_task(tasks._create_web_archive_snapshot_task)
tasks.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, False
)
bookmark.refresh_from_db()
mock_cdx_api.newest.assert_called_once()
self.assertEqual(
"https://example.com/newest_snapshot",
bookmark.web_archive_snapshot_url,
)
bookmark.refresh_from_db()
self.mock_cdx_api.newest.assert_called_once()
self.assertEqual(
"https://example.com/newest_snapshot",
bookmark.web_archive_snapshot_url,
)
def test_create_web_archive_snapshot_should_ignore_missing_newest_snapshot(self):
bookmark = self.setup_bookmark()
mock_save_api = create_wayback_machine_save_api_mock(fail_on_save=True)
mock_cdx_api = create_cdx_server_api_mock(archive_url=None)
self.mock_save_api.save.side_effect = WaybackError
self.mock_cdx_api.newest.return_value = None
with mock.patch.object(
waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api
):
with mock.patch.object(
bookmarks.services.wayback,
"CustomWaybackMachineCDXServerAPI",
return_value=mock_cdx_api,
):
tasks.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, False
)
self.run_pending_task(tasks._create_web_archive_snapshot_task)
tasks.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, False
)
bookmark.refresh_from_db()
self.assertEqual("", bookmark.web_archive_snapshot_url)
bookmark.refresh_from_db()
self.assertEqual("", bookmark.web_archive_snapshot_url)
def test_create_web_archive_snapshot_should_ignore_newest_snapshot_errors(self):
bookmark = self.setup_bookmark()
mock_save_api = create_wayback_machine_save_api_mock(fail_on_save=True)
mock_cdx_api = create_cdx_server_api_mock(fail_loading_snapshot=True)
self.mock_save_api.save.side_effect = WaybackError
self.mock_cdx_api.newest.side_effect = WaybackError
with mock.patch.object(
waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api
):
with mock.patch.object(
bookmarks.services.wayback,
"CustomWaybackMachineCDXServerAPI",
return_value=mock_cdx_api,
):
tasks.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, False
)
self.run_pending_task(tasks._create_web_archive_snapshot_task)
tasks.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, False
)
bookmark.refresh_from_db()
self.assertEqual("", bookmark.web_archive_snapshot_url)
bookmark.refresh_from_db()
self.assertEqual("", bookmark.web_archive_snapshot_url)
def test_create_web_archive_snapshot_should_not_save_stale_bookmark_data(self):
bookmark = self.setup_bookmark()
mock_save_api = create_wayback_machine_save_api_mock()
# update bookmark during API call to check that saving
# the snapshot does not overwrite updated bookmark data
@ -222,99 +182,64 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
bookmark.title = "Updated title"
bookmark.save()
mock_save_api.save.side_effect = mock_save_impl
self.mock_save_api.save.side_effect = mock_save_impl
with mock.patch.object(
waybackpy, "WaybackMachineSaveAPI", return_value=mock_save_api
):
tasks.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, False
)
self.run_pending_task(tasks._create_web_archive_snapshot_task)
bookmark.refresh_from_db()
tasks.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, False
)
bookmark.refresh_from_db()
self.assertEqual(bookmark.title, "Updated title")
self.assertEqual(
"https://example.com/created_snapshot",
bookmark.web_archive_snapshot_url,
)
self.assertEqual(bookmark.title, "Updated title")
self.assertEqual(
"https://example.com/created_snapshot",
bookmark.web_archive_snapshot_url,
)
def test_load_web_archive_snapshot_should_update_snapshot_url(self):
bookmark = self.setup_bookmark()
mock_cdx_api = create_cdx_server_api_mock()
with mock.patch.object(
bookmarks.services.wayback,
"CustomWaybackMachineCDXServerAPI",
return_value=mock_cdx_api,
):
tasks._load_web_archive_snapshot_task(bookmark.id)
self.run_pending_task(tasks._load_web_archive_snapshot_task)
tasks._load_web_archive_snapshot_task(bookmark.id)
bookmark.refresh_from_db()
mock_cdx_api.newest.assert_called_once()
self.assertEqual(
"https://example.com/newest_snapshot", bookmark.web_archive_snapshot_url
)
bookmark.refresh_from_db()
self.assertEqual(self.executed_count(), 1)
self.mock_cdx_api.newest.assert_called_once()
self.assertEqual(
"https://example.com/newest_snapshot", bookmark.web_archive_snapshot_url
)
def test_load_web_archive_snapshot_should_handle_missing_bookmark_id(self):
mock_cdx_api = create_cdx_server_api_mock()
tasks._load_web_archive_snapshot_task(123)
with mock.patch.object(
bookmarks.services.wayback,
"CustomWaybackMachineCDXServerAPI",
return_value=mock_cdx_api,
):
tasks._load_web_archive_snapshot_task(123)
self.run_pending_task(tasks._load_web_archive_snapshot_task)
mock_cdx_api.newest.assert_not_called()
self.assertEqual(self.executed_count(), 1)
self.mock_cdx_api.newest.assert_not_called()
def test_load_web_archive_snapshot_should_skip_if_snapshot_exists(self):
bookmark = self.setup_bookmark(web_archive_snapshot_url="https://example.com")
mock_cdx_api = create_cdx_server_api_mock()
with mock.patch.object(
bookmarks.services.wayback,
"CustomWaybackMachineCDXServerAPI",
return_value=mock_cdx_api,
):
tasks._load_web_archive_snapshot_task(bookmark.id)
self.run_pending_task(tasks._load_web_archive_snapshot_task)
tasks._load_web_archive_snapshot_task(bookmark.id)
mock_cdx_api.newest.assert_not_called()
self.assertEqual(self.executed_count(), 1)
self.mock_cdx_api.newest.assert_not_called()
def test_load_web_archive_snapshot_should_handle_missing_snapshot(self):
bookmark = self.setup_bookmark()
mock_cdx_api = create_cdx_server_api_mock(archive_url=None)
self.mock_cdx_api.newest.return_value = None
with mock.patch.object(
bookmarks.services.wayback,
"CustomWaybackMachineCDXServerAPI",
return_value=mock_cdx_api,
):
tasks._load_web_archive_snapshot_task(bookmark.id)
self.run_pending_task(tasks._load_web_archive_snapshot_task)
tasks._load_web_archive_snapshot_task(bookmark.id)
self.assertEqual("", bookmark.web_archive_snapshot_url)
self.assertEqual("", bookmark.web_archive_snapshot_url)
def test_load_web_archive_snapshot_should_handle_wayback_errors(self):
bookmark = self.setup_bookmark()
mock_cdx_api = create_cdx_server_api_mock(fail_loading_snapshot=True)
self.mock_cdx_api.newest.side_effect = WaybackError
with mock.patch.object(
bookmarks.services.wayback,
"CustomWaybackMachineCDXServerAPI",
return_value=mock_cdx_api,
):
tasks._load_web_archive_snapshot_task(bookmark.id)
self.run_pending_task(tasks._load_web_archive_snapshot_task)
tasks._load_web_archive_snapshot_task(bookmark.id)
self.assertEqual("", bookmark.web_archive_snapshot_url)
self.assertEqual("", bookmark.web_archive_snapshot_url)
def test_load_web_archive_snapshot_should_not_save_stale_bookmark_data(self):
bookmark = self.setup_bookmark()
mock_cdx_api = create_cdx_server_api_mock()
# update bookmark during API call to check that saving
# the snapshot does not overwrite updated bookmark data
@ -323,32 +248,26 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
bookmark.save()
return mock.DEFAULT
mock_cdx_api.newest.side_effect = mock_newest_impl
self.mock_cdx_api.newest.side_effect = mock_newest_impl
with mock.patch.object(
bookmarks.services.wayback,
"CustomWaybackMachineCDXServerAPI",
return_value=mock_cdx_api,
):
tasks._load_web_archive_snapshot_task(bookmark.id)
self.run_pending_task(tasks._load_web_archive_snapshot_task)
bookmark.refresh_from_db()
tasks._load_web_archive_snapshot_task(bookmark.id)
bookmark.refresh_from_db()
self.assertEqual("Updated title", bookmark.title)
self.assertEqual(
"https://example.com/newest_snapshot", bookmark.web_archive_snapshot_url
)
self.assertEqual("Updated title", bookmark.title)
self.assertEqual(
"https://example.com/newest_snapshot", bookmark.web_archive_snapshot_url
)
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
def test_create_web_archive_snapshot_should_not_run_when_background_tasks_are_disabled(
self,
):
bookmark = self.setup_bookmark()
tasks.create_web_archive_snapshot(
self.get_or_create_test_user(), bookmark, False
)
self.assertEqual(Task.objects.count(), 0)
self.assertEqual(self.executed_count(), 0)
def test_create_web_archive_snapshot_should_not_run_when_web_archive_integration_is_disabled(
self,
@ -363,7 +282,7 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.get_or_create_test_user(), bookmark, False
)
self.assertEqual(Task.objects.count(), 0)
self.assertEqual(self.executed_count(), 0)
def test_schedule_bookmarks_without_snapshots_should_load_snapshot_for_all_bookmarks_without_snapshot(
self,
@ -377,16 +296,9 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.setup_bookmark(web_archive_snapshot_url="https://example.com")
tasks.schedule_bookmarks_without_snapshots(user)
self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
task_list = Task.objects.all()
self.assertEqual(task_list.count(), 3)
for task in task_list:
self.assertEqual(
task.task_name,
"bookmarks.services.tasks._load_web_archive_snapshot_task",
)
self.assertEqual(self.executed_count(), 4)
self.assertEqual(self.mock_cdx_api.newest.call_count, 3)
def test_schedule_bookmarks_without_snapshots_should_only_update_user_owned_bookmarks(
self,
@ -403,10 +315,8 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.setup_bookmark(user=other_user)
tasks.schedule_bookmarks_without_snapshots(user)
self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
task_list = Task.objects.all()
self.assertEqual(task_list.count(), 3)
self.assertEqual(self.mock_cdx_api.newest.call_count, 3)
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
def test_schedule_bookmarks_without_snapshots_should_not_run_when_background_tasks_are_disabled(
@ -414,7 +324,7 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
):
tasks.schedule_bookmarks_without_snapshots(self.user)
self.assertEqual(Task.objects.count(), 0)
self.assertEqual(self.executed_count(), 0)
def test_schedule_bookmarks_without_snapshots_should_not_run_when_web_archive_integration_is_disabled(
self,
@ -425,44 +335,32 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.user.profile.save()
tasks.schedule_bookmarks_without_snapshots(self.user)
self.assertEqual(Task.objects.count(), 0)
self.assertEqual(self.executed_count(), 0)
def test_load_favicon_should_create_favicon_file(self):
bookmark = self.setup_bookmark()
with mock.patch(
"bookmarks.services.favicon_loader.load_favicon"
) as mock_load_favicon:
mock_load_favicon.return_value = "https_example_com.png"
tasks.load_favicon(self.get_or_create_test_user(), bookmark)
bookmark.refresh_from_db()
tasks.load_favicon(self.get_or_create_test_user(), bookmark)
self.run_pending_task(tasks._load_favicon_task)
bookmark.refresh_from_db()
self.assertEqual(bookmark.favicon_file, "https_example_com.png")
self.assertEqual(self.executed_count(), 1)
self.assertEqual(bookmark.favicon_file, "https_example_com.png")
def test_load_favicon_should_update_favicon_file(self):
bookmark = self.setup_bookmark(favicon_file="https_example_com.png")
with mock.patch(
"bookmarks.services.favicon_loader.load_favicon"
) as mock_load_favicon:
mock_load_favicon.return_value = "https_example_updated_com.png"
tasks.load_favicon(self.get_or_create_test_user(), bookmark)
self.run_pending_task(tasks._load_favicon_task)
self.mock_load_favicon.return_value = "https_example_updated_com.png"
mock_load_favicon.assert_called()
bookmark.refresh_from_db()
self.assertEqual(bookmark.favicon_file, "https_example_updated_com.png")
tasks.load_favicon(self.get_or_create_test_user(), bookmark)
bookmark.refresh_from_db()
self.mock_load_favicon.assert_called_once()
self.assertEqual(bookmark.favicon_file, "https_example_updated_com.png")
def test_load_favicon_should_handle_missing_bookmark(self):
with mock.patch(
"bookmarks.services.favicon_loader.load_favicon"
) as mock_load_favicon:
tasks._load_favicon_task(123)
self.run_pending_task(tasks._load_favicon_task)
tasks._load_favicon_task(123)
mock_load_favicon.assert_not_called()
self.mock_load_favicon.assert_not_called()
def test_load_favicon_should_not_save_stale_bookmark_data(self):
bookmark = self.setup_bookmark()
@ -474,24 +372,20 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
bookmark.save()
return "https_example_com.png"
with mock.patch(
"bookmarks.services.favicon_loader.load_favicon"
) as mock_load_favicon:
mock_load_favicon.side_effect = mock_load_favicon_impl
self.mock_load_favicon.side_effect = mock_load_favicon_impl
tasks.load_favicon(self.get_or_create_test_user(), bookmark)
self.run_pending_task(tasks._load_favicon_task)
bookmark.refresh_from_db()
tasks.load_favicon(self.get_or_create_test_user(), bookmark)
bookmark.refresh_from_db()
self.assertEqual(bookmark.title, "Updated title")
self.assertEqual(bookmark.favicon_file, "https_example_com.png")
self.assertEqual(bookmark.title, "Updated title")
self.assertEqual(bookmark.favicon_file, "https_example_com.png")
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
def test_load_favicon_should_not_run_when_background_tasks_are_disabled(self):
bookmark = self.setup_bookmark()
tasks.load_favicon(self.get_or_create_test_user(), bookmark)
self.assertEqual(Task.objects.count(), 0)
self.assertEqual(self.executed_count(), 0)
def test_load_favicon_should_not_run_when_favicon_feature_is_disabled(self):
self.user.profile.enable_favicons = False
@ -500,7 +394,7 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
bookmark = self.setup_bookmark()
tasks.load_favicon(self.get_or_create_test_user(), bookmark)
self.assertEqual(Task.objects.count(), 0)
self.assertEqual(self.executed_count(), 0)
def test_schedule_bookmarks_without_favicons_should_load_favicon_for_all_bookmarks_without_favicon(
self,
@ -514,15 +408,9 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.setup_bookmark(favicon_file="https_example_com.png")
tasks.schedule_bookmarks_without_favicons(user)
self.run_pending_task(tasks._schedule_bookmarks_without_favicons_task)
task_list = Task.objects.all()
self.assertEqual(task_list.count(), 3)
for task in task_list:
self.assertEqual(
task.task_name, "bookmarks.services.tasks._load_favicon_task"
)
self.assertEqual(self.executed_count(), 4)
self.assertEqual(self.mock_load_favicon.call_count, 3)
def test_schedule_bookmarks_without_favicons_should_only_update_user_owned_bookmarks(
self,
@ -539,19 +427,17 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.setup_bookmark(user=other_user)
tasks.schedule_bookmarks_without_favicons(user)
self.run_pending_task(tasks._schedule_bookmarks_without_favicons_task)
task_list = Task.objects.all()
self.assertEqual(task_list.count(), 3)
self.assertEqual(self.mock_load_favicon.call_count, 3)
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
def test_schedule_bookmarks_without_favicons_should_not_run_when_background_tasks_are_disabled(
self,
):
bookmark = self.setup_bookmark()
self.setup_bookmark()
tasks.schedule_bookmarks_without_favicons(self.get_or_create_test_user())
self.assertEqual(Task.objects.count(), 0)
self.assertEqual(self.executed_count(), 0)
def test_schedule_bookmarks_without_favicons_should_not_run_when_favicon_feature_is_disabled(
self,
@ -559,10 +445,10 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.user.profile.enable_favicons = False
self.user.profile.save()
bookmark = self.setup_bookmark()
self.setup_bookmark()
tasks.schedule_bookmarks_without_favicons(self.get_or_create_test_user())
self.assertEqual(Task.objects.count(), 0)
self.assertEqual(self.executed_count(), 0)
def test_schedule_refresh_favicons_should_update_favicon_for_all_bookmarks(self):
user = self.get_or_create_test_user()
@ -574,15 +460,9 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.setup_bookmark(favicon_file="https_example_com.png")
tasks.schedule_refresh_favicons(user)
self.run_pending_task(tasks._schedule_refresh_favicons_task)
task_list = Task.objects.all()
self.assertEqual(task_list.count(), 6)
for task in task_list:
self.assertEqual(
task.task_name, "bookmarks.services.tasks._load_favicon_task"
)
self.assertEqual(self.executed_count(), 7)
self.assertEqual(self.mock_load_favicon.call_count, 6)
def test_schedule_refresh_favicons_should_only_update_user_owned_bookmarks(self):
user = self.get_or_create_test_user()
@ -597,10 +477,8 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.setup_bookmark(user=other_user)
tasks.schedule_refresh_favicons(user)
self.run_pending_task(tasks._schedule_refresh_favicons_task)
task_list = Task.objects.all()
self.assertEqual(task_list.count(), 3)
self.assertEqual(self.mock_load_favicon.call_count, 3)
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
def test_schedule_refresh_favicons_should_not_run_when_background_tasks_are_disabled(
@ -609,14 +487,14 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.setup_bookmark()
tasks.schedule_refresh_favicons(self.get_or_create_test_user())
self.assertEqual(Task.objects.count(), 0)
self.assertEqual(self.executed_count(), 0)
@override_settings(LD_ENABLE_REFRESH_FAVICONS=False)
def test_schedule_refresh_favicons_should_not_run_when_refresh_is_disabled(self):
self.setup_bookmark()
tasks.schedule_refresh_favicons(self.get_or_create_test_user())
self.assertEqual(Task.objects.count(), 0)
self.assertEqual(self.executed_count(), 0)
def test_schedule_refresh_favicons_should_not_run_when_favicon_feature_is_disabled(
self,
@ -627,13 +505,14 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.setup_bookmark()
tasks.schedule_refresh_favicons(self.get_or_create_test_user())
self.assertEqual(Task.objects.count(), 0)
self.assertEqual(self.executed_count(), 0)
@override_settings(LD_ENABLE_SNAPSHOTS=True)
def test_create_html_snapshot_should_create_pending_asset(self):
bookmark = self.setup_bookmark()
with mock.patch("bookmarks.services.monolith.create_snapshot"):
# Mock the task function to avoid running it immediately
with mock.patch("bookmarks.services.tasks._create_html_snapshot_task"):
tasks.create_html_snapshot(bookmark)
self.assertEqual(BookmarkAsset.objects.count(), 1)
@ -652,18 +531,24 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
def test_create_html_snapshot_should_update_file_info(self):
bookmark = self.setup_bookmark(url="https://example.com")
with mock.patch("bookmarks.services.singlefile.create_snapshot") as mock_create:
tasks.create_html_snapshot(bookmark)
asset = BookmarkAsset.objects.get(bookmark=bookmark)
asset.date_created = datetime.datetime(2021, 1, 2, 3, 44, 55)
asset.save()
with mock.patch(
"bookmarks.services.tasks._generate_snapshot_filename"
) as mock_generate:
expected_filename = "snapshot_2021-01-02_034455_https___example.com.html.gz"
mock_generate.return_value = expected_filename
self.run_pending_task(tasks._create_html_snapshot_task)
tasks.create_html_snapshot(bookmark)
BookmarkAsset.objects.get(bookmark=bookmark)
mock_create.assert_called_once_with(
# Run periodic task to process the snapshot
tasks._schedule_html_snapshots_task()
self.mock_singlefile_create_snapshot.assert_called_once_with(
"https://example.com",
os.path.join(settings.LD_ASSET_FOLDER, expected_filename),
os.path.join(
settings.LD_ASSET_FOLDER,
expected_filename,
),
)
asset = BookmarkAsset.objects.get(bookmark=bookmark)
@ -675,39 +560,39 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
def test_create_html_snapshot_should_handle_error(self):
bookmark = self.setup_bookmark(url="https://example.com")
with mock.patch("bookmarks.services.singlefile.create_snapshot") as mock_create:
mock_create.side_effect = singlefile.SingeFileError("Error")
self.mock_singlefile_create_snapshot.side_effect = singlefile.SingeFileError(
"Error"
)
tasks.create_html_snapshot(bookmark)
tasks.create_html_snapshot(bookmark)
self.run_pending_task(tasks._create_html_snapshot_task)
# Run periodic task to process the snapshot
tasks._schedule_html_snapshots_task()
asset = BookmarkAsset.objects.get(bookmark=bookmark)
self.assertEqual(asset.status, BookmarkAsset.STATUS_FAILURE)
self.assertEqual(asset.file, "")
self.assertFalse(asset.gzip)
asset = BookmarkAsset.objects.get(bookmark=bookmark)
self.assertEqual(asset.status, BookmarkAsset.STATUS_FAILURE)
self.assertEqual(asset.file, "")
self.assertFalse(asset.gzip)
@override_settings(LD_ENABLE_SNAPSHOTS=True)
def test_create_html_snapshot_should_handle_missing_bookmark(self):
with mock.patch("bookmarks.services.singlefile.create_snapshot") as mock_create:
tasks._create_html_snapshot_task(123)
self.run_pending_task(tasks._create_html_snapshot_task)
tasks._create_html_snapshot_task(123)
mock_create.assert_not_called()
self.mock_singlefile_create_snapshot.assert_not_called()
@override_settings(LD_ENABLE_SNAPSHOTS=False)
def test_create_html_snapshot_should_not_run_when_single_file_is_disabled(
def test_create_html_snapshot_should_not_create_asset_when_single_file_is_disabled(
self,
):
bookmark = self.setup_bookmark()
tasks.create_html_snapshot(bookmark)
self.assertEqual(Task.objects.count(), 0)
self.assertEqual(BookmarkAsset.objects.count(), 0)
@override_settings(LD_ENABLE_SNAPSHOTS=True, LD_DISABLE_BACKGROUND_TASKS=True)
def test_create_html_snapshot_should_not_run_when_background_tasks_are_disabled(
def test_create_html_snapshot_should_not_create_asset_when_background_tasks_are_disabled(
self,
):
bookmark = self.setup_bookmark()
tasks.create_html_snapshot(bookmark)
self.assertEqual(Task.objects.count(), 0)
self.assertEqual(BookmarkAsset.objects.count(), 0)

View file

@ -1,7 +1,7 @@
import gzip
import os
from unittest import mock
import subprocess
from unittest import mock
from django.test import TestCase
@ -24,9 +24,11 @@ class SingleFileServiceTestCase(TestCase):
file.write(self.html_content)
def test_create_snapshot(self):
with mock.patch("subprocess.run") as mock_run:
mock_run.side_effect = self.create_test_file
mock_process = mock.Mock()
mock_process.wait.return_value = 0
self.create_test_file()
with mock.patch("subprocess.Popen", return_value=mock_process):
singlefile.create_snapshot("http://example.com", self.html_filepath)
self.assertTrue(os.path.exists(self.html_filepath))
@ -38,13 +40,13 @@ class SingleFileServiceTestCase(TestCase):
def test_create_snapshot_failure(self):
# subprocess fails - which it probably doesn't as single-file doesn't return exit codes
with mock.patch("subprocess.run") as mock_run:
mock_run.side_effect = subprocess.CalledProcessError(1, "command")
with mock.patch("subprocess.Popen") as mock_popen:
mock_popen.side_effect = subprocess.CalledProcessError(1, "command")
with self.assertRaises(singlefile.SingeFileError):
singlefile.create_snapshot("http://example.com", self.html_filepath)
# so also check that it raises error if output file isn't created
with mock.patch("subprocess.run") as mock_run:
with mock.patch("subprocess.Popen"):
with self.assertRaises(singlefile.SingeFileError):
singlefile.create_snapshot("http://example.com", self.html_filepath)

View file

@ -18,6 +18,8 @@ python manage.py migrate
python manage.py enable_wal
# Create initial superuser if defined in options / environment variables
python manage.py create_initial_superuser
# Migrate legacy background tasks to Huey
python manage.py migrate_tasks
# Ensure the DB folder is owned by the right user
chown -R www-data: /etc/linkding/data

View file

@ -31,7 +31,8 @@ RUN pip install -U pip && pip install -r requirements.txt -r requirements.dev.tx
COPY . .
COPY --from=node-build /etc/linkding .
# run Django part of the build
RUN python manage.py compilescss && \
RUN mkdir data && \
python manage.py compilescss && \
python manage.py collectstatic --ignore=*.scss && \
python manage.py compilescss --delete-files

View file

@ -33,7 +33,8 @@ RUN pip install -U pip && pip install -r requirements.txt -r requirements.dev.tx
COPY . .
COPY --from=node-build /etc/linkding .
# run Django part of the build
RUN python manage.py compilescss && \
RUN mkdir data && \
python manage.py compilescss && \
python manage.py collectstatic --ignore=*.scss && \
python manage.py compilescss --delete-files

View file

@ -5,8 +5,8 @@ Django
django-registration
django-sass-processor
django-widget-tweaks
django4-background-tasks
djangorestframework
huey
Markdown
mozilla-django-oidc
psycopg2-binary

View file

@ -39,10 +39,10 @@ django-sass-processor==1.4
# via -r requirements.in
django-widget-tweaks==1.5.0
# via -r requirements.in
django4-background-tasks==1.2.9
# via -r requirements.in
djangorestframework==3.14.0
# via -r requirements.in
huey==2.5.0
# via -r requirements.in
idna==3.6
# via requests
josepy==1.14.0
@ -69,7 +69,6 @@ requests==2.31.0
six==1.16.0
# via
# bleach
# django4-background-tasks
# python-dateutil
soupsieve==2.5
# via beautifulsoup4

View file

@ -42,7 +42,7 @@ INSTALLED_APPS = [
"widget_tweaks",
"rest_framework",
"rest_framework.authtoken",
"background_task",
"huey.contrib.djhuey",
"mozilla_django_oidc",
]
@ -173,13 +173,27 @@ LD_DISABLE_BACKGROUND_TASKS = os.getenv("LD_DISABLE_BACKGROUND_TASKS", False) in
"1",
)
# django-background-tasks
MAX_ATTEMPTS = 5
# How many tasks will run in parallel
# We want to keep this low to prevent SQLite lock errors and in general not to consume too much resources on smaller
# specced systems like Raspberries. Should be OK as tasks are not time critical.
BACKGROUND_TASK_RUN_ASYNC = True
BACKGROUND_TASK_ASYNC_THREADS = 2
# Huey task queue
HUEY = {
"huey_class": "huey.SqliteHuey",
"filename": os.path.join(BASE_DIR, "data", "tasks.sqlite3"),
"immediate": False,
"results": False,
"store_none": False,
"utc": True,
"consumer": {
"workers": 2,
"worker_type": "thread",
"initial_delay": 5,
"backoff": 1.15,
"max_delay": 10,
"scheduler_interval": 10,
"periodic": True,
"check_worker_health": True,
"health_check_interval": 10,
},
}
# Enable OICD support if configured
LD_ENABLE_OIDC = os.getenv("LD_ENABLE_OIDC", False) in (True, "True", "1")

View file

@ -47,6 +47,11 @@ LOGGING = {
"handlers": ["console"],
"propagate": False,
},
"huey": { # Huey
"level": "INFO",
"handlers": ["console"],
"propagate": False,
},
},
}

View file

@ -49,7 +49,12 @@ LOGGING = {
"level": "INFO",
"handlers": ["console"],
"propagate": False,
}
},
"huey": {
"level": "INFO",
"handlers": ["console"],
"propagate": False,
},
},
}

View file

@ -4,8 +4,18 @@ loglevel=info
[program:jobs]
user=www-data
command=sh background-tasks-wrapper.sh
command=python manage.py run_huey -f
stdout_logfile=background_tasks.log
stdout_logfile_maxbytes=10MB
stdout_logfile_backups=5
redirect_stderr=true
[unix_http_server]
file=/var/run/supervisor.sock
chmod=0700
[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
[supervisorctl]
serverurl=unix:///var/run/supervisor.sock