From d87dde6baea13dc5d024c3738fae9306923a1f5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sascha=20I=C3=9Fbr=C3=BCcker?= Date: Sat, 4 Sep 2021 22:31:04 +0200 Subject: [PATCH] Create snapshots on web.archive.org for bookmarks (#150) * Implement initial background tasks concept * fix property reference * update requirements.txt * simplify bookmark null check * improve web archive url display * add background tasks test * add basic supervisor setup * schedule missing snapshot creation on login * remove task locks and clear task history before starting background task processor * batch create snapshots after import * fix script reference in supervisord.conf * add option to disable background tasks * restructure feature overview --- .dockerignore | 4 + .env.sample | 4 +- README.md | 13 +- background-tasks-wrapper.sh | 5 + bookmarks/apps.py | 4 + bookmarks/management/commands/clean_tasks.py | 15 ++ .../0009_bookmark_web_archive_snapshot_url.py | 18 ++ bookmarks/models.py | 1 + bookmarks/services/bookmarks.py | 11 ++ bookmarks/services/importer.py | 4 + bookmarks/services/tasks.py | 62 +++++++ bookmarks/signals.py | 8 + bookmarks/styles/bookmarks.scss | 4 + .../templates/bookmarks/bookmark_list.html | 24 ++- bookmarks/tests/helpers.py | 4 +- bookmarks/tests/test_bookmarks_list_tag.py | 45 ++++- bookmarks/tests/test_bookmarks_service.py | 32 +++- bookmarks/tests/test_bookmarks_tasks.py | 154 ++++++++++++++++++ bookmarks/tests/test_importer.py | 12 ++ bookmarks/tests/test_signals.py | 15 ++ bootstrap.sh | 5 + docs/Options.md | 11 +- requirements.prod.txt | 4 + requirements.txt | 3 + siteroot/settings/base.py | 12 ++ siteroot/settings/dev.py | 5 + supervisord.conf | 10 ++ 27 files changed, 470 insertions(+), 19 deletions(-) create mode 100755 background-tasks-wrapper.sh create mode 100644 bookmarks/management/commands/clean_tasks.py create mode 100644 bookmarks/migrations/0009_bookmark_web_archive_snapshot_url.py create mode 100644 bookmarks/services/tasks.py create mode 100644 bookmarks/signals.py create mode 100644 bookmarks/tests/test_bookmarks_tasks.py create mode 100644 bookmarks/tests/test_signals.py create mode 100644 supervisord.conf diff --git a/.dockerignore b/.dockerignore index 445b537..9abd49b 100644 --- a/.dockerignore +++ b/.dockerignore @@ -7,6 +7,7 @@ /docs /static /build +/out /.dockerignore /.gitignore @@ -17,10 +18,13 @@ /*.patch /*.md /*.js +/*.log +/*.pid # Whitelist files needed in build or prod image !/rollup.config.js !/bootstrap.sh +!/background-tasks-wrapper.sh # Remove development settings /siteroot/settings/dev.py diff --git a/.env.sample b/.env.sample index 0bd4924..32143e6 100644 --- a/.env.sample +++ b/.env.sample @@ -5,5 +5,7 @@ LD_HOST_PORT=9090 # Directory on the host system that should be mounted as data dir into the Docker container LD_HOST_DATA_DIR=./data +# Option to disable background tasks +LD_DISABLE_BACKGROUND_TASKS=False # Option to disable URL validation for bookmarks completely -LD_DISABLE_URL_VALIDATION=False \ No newline at end of file +LD_DISABLE_URL_VALIDATION=False diff --git a/README.md b/README.md index 4c7b3fe..effd415 100644 --- a/README.md +++ b/README.md @@ -13,17 +13,14 @@ The name comes from: - Search by text or tags - Bulk editing - Bookmark archive -- Automatically provides titles and descriptions from linked websites -- Import and export bookmarks in Netscape HTML format -- Extensions for [Firefox](https://addons.mozilla.org/de/firefox/addon/linkding-extension/) and [Chrome](https://chrome.google.com/webstore/detail/linkding-extension/beakmhbijpdhipnjhnclmhgjlddhidpe) -- Bookmarklet that should work in most browsers - Dark mode -- Easy to set up using Docker -- Uses SQLite as database -- Works without Javascript -- ...but has several UI enhancements when Javascript is enabled +- Automatically creates snapshots of bookmarked websites on [web archive](https://archive.org/web/) +- Automatically provides titles and descriptions of bookmarked websites +- Import and export bookmarks in Netscape HTML format +- Extensions for [Firefox](https://addons.mozilla.org/de/firefox/addon/linkding-extension/) and [Chrome](https://chrome.google.com/webstore/detail/linkding-extension/beakmhbijpdhipnjhnclmhgjlddhidpe), and a bookmarklet that should work in most browsers - REST API for developing 3rd party apps - Admin panel for user self-service and raw data access +- Easy to set up using Docker, uses SQLite as database **Demo:** https://demo.linkding.link/ (configured with open registration) diff --git a/background-tasks-wrapper.sh b/background-tasks-wrapper.sh new file mode 100755 index 0000000..9429544 --- /dev/null +++ b/background-tasks-wrapper.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +# Wrapper script used by supervisord to first clear task locks before starting the background task processor + +python manage.py clean_tasks +exec python manage.py process_tasks diff --git a/bookmarks/apps.py b/bookmarks/apps.py index db791ce..89f1c7c 100644 --- a/bookmarks/apps.py +++ b/bookmarks/apps.py @@ -3,3 +3,7 @@ from django.apps import AppConfig class BookmarksConfig(AppConfig): name = 'bookmarks' + + def ready(self): + # Register signal handlers + import bookmarks.signals diff --git a/bookmarks/management/commands/clean_tasks.py b/bookmarks/management/commands/clean_tasks.py new file mode 100644 index 0000000..90b7710 --- /dev/null +++ b/bookmarks/management/commands/clean_tasks.py @@ -0,0 +1,15 @@ +from background_task.models import Task, CompletedTask +from django.core.management.base import BaseCommand + + +class Command(BaseCommand): + help = "Remove task locks and clear completed task history" + + def handle(self, *args, **options): + # Remove task locks + # If the background task processor exited while executing tasks, these tasks would still be marked as locked, + # even though no process is working on them, and would prevent the task processor from picking the next task in + # the queue + Task.objects.all().update(locked_by=None, locked_at=None) + # Clear task history to prevent them from bloating the DB + CompletedTask.objects.all().delete() diff --git a/bookmarks/migrations/0009_bookmark_web_archive_snapshot_url.py b/bookmarks/migrations/0009_bookmark_web_archive_snapshot_url.py new file mode 100644 index 0000000..89483d1 --- /dev/null +++ b/bookmarks/migrations/0009_bookmark_web_archive_snapshot_url.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.20 on 2021-05-16 14:35 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('bookmarks', '0008_userprofile_bookmark_date_display'), + ] + + operations = [ + migrations.AddField( + model_name='bookmark', + name='web_archive_snapshot_url', + field=models.CharField(blank=True, max_length=2048), + ), + ] diff --git a/bookmarks/models.py b/bookmarks/models.py index 1fd079c..e772e80 100644 --- a/bookmarks/models.py +++ b/bookmarks/models.py @@ -41,6 +41,7 @@ class Bookmark(models.Model): description = models.TextField(blank=True) website_title = models.CharField(max_length=512, blank=True, null=True) website_description = models.TextField(blank=True, null=True) + web_archive_snapshot_url = models.CharField(max_length=2048, blank=True) unread = models.BooleanField(default=True) is_archived = models.BooleanField(default=False) date_added = models.DateTimeField() diff --git a/bookmarks/services/bookmarks.py b/bookmarks/services/bookmarks.py index 7223c30..5171016 100644 --- a/bookmarks/services/bookmarks.py +++ b/bookmarks/services/bookmarks.py @@ -6,6 +6,7 @@ from django.utils import timezone from bookmarks.models import Bookmark, parse_tag_string from bookmarks.services.tags import get_or_create_tags from bookmarks.services.website_loader import load_website_metadata +from bookmarks.services import tasks def create_bookmark(bookmark: Bookmark, tag_string: str, current_user: User): @@ -27,10 +28,16 @@ def create_bookmark(bookmark: Bookmark, tag_string: str, current_user: User): # Update tag list _update_bookmark_tags(bookmark, tag_string, current_user) bookmark.save() + # Create snapshot on web archive + tasks.create_web_archive_snapshot(bookmark.id, False) + return bookmark def update_bookmark(bookmark: Bookmark, tag_string, current_user: User): + # Detect URL change + original_bookmark = Bookmark.objects.get(id=bookmark.id) + has_url_changed = original_bookmark.url != bookmark.url # Update website info _update_website_metadata(bookmark) # Update tag list @@ -38,6 +45,10 @@ def update_bookmark(bookmark: Bookmark, tag_string, current_user: User): # Update dates bookmark.date_modified = timezone.now() bookmark.save() + # Update web archive snapshot, if URL changed + if has_url_changed: + tasks.create_web_archive_snapshot(bookmark.id, True) + return bookmark diff --git a/bookmarks/services/importer.py b/bookmarks/services/importer.py index ae75388..7fb0bd9 100644 --- a/bookmarks/services/importer.py +++ b/bookmarks/services/importer.py @@ -5,6 +5,7 @@ from django.contrib.auth.models import User from django.utils import timezone from bookmarks.models import Bookmark, parse_tag_string +from bookmarks.services import tasks from bookmarks.services.parser import parse, NetscapeBookmark from bookmarks.services.tags import get_or_create_tags from bookmarks.utils import parse_timestamp @@ -38,6 +39,9 @@ def import_netscape_html(html: str, user: User): logging.exception('Error importing bookmark: ' + shortened_bookmark_tag_str) result.failed = result.failed + 1 + # Create snapshots for newly imported bookmarks + tasks.schedule_bookmarks_without_snapshots(user.id) + return result diff --git a/bookmarks/services/tasks.py b/bookmarks/services/tasks.py new file mode 100644 index 0000000..eb7347d --- /dev/null +++ b/bookmarks/services/tasks.py @@ -0,0 +1,62 @@ +import logging + +import waybackpy +from background_task import background +from django.conf import settings +from django.contrib.auth import get_user_model +from waybackpy.exceptions import WaybackError + +from bookmarks.models import Bookmark + +logger = logging.getLogger(__name__) + + +def when_background_tasks_enabled(fn): + def wrapper(*args, **kwargs): + if settings.LD_DISABLE_BACKGROUND_TASKS: + return + return fn(*args, **kwargs) + + # Expose attributes from wrapped TaskProxy function + attrs = vars(fn) + for key, value in attrs.items(): + setattr(wrapper, key, value) + + return wrapper + + +@when_background_tasks_enabled +@background() +def create_web_archive_snapshot(bookmark_id: int, force_update: bool): + try: + bookmark = Bookmark.objects.get(id=bookmark_id) + except Bookmark.DoesNotExist: + return + + # Skip if snapshot exists and update is not explicitly requested + if bookmark.web_archive_snapshot_url and not force_update: + return + + logger.debug(f'Create web archive link for bookmark: {bookmark}...') + + wayback = waybackpy.Url(bookmark.url) + + try: + archive = wayback.save() + except WaybackError as error: + logger.exception(f'Error creating web archive link for bookmark: {bookmark}...', exc_info=error) + raise + + bookmark.web_archive_snapshot_url = archive.archive_url + bookmark.save() + logger.debug(f'Successfully created web archive link for bookmark: {bookmark}...') + + +@when_background_tasks_enabled +@background() +def schedule_bookmarks_without_snapshots(user_id: int): + user = get_user_model().objects.get(id=user_id) + bookmarks_without_snapshots = Bookmark.objects.filter(web_archive_snapshot_url__exact='', owner=user) + + for bookmark in bookmarks_without_snapshots: + create_web_archive_snapshot(bookmark.id, False) diff --git a/bookmarks/signals.py b/bookmarks/signals.py new file mode 100644 index 0000000..602ef4f --- /dev/null +++ b/bookmarks/signals.py @@ -0,0 +1,8 @@ +from django.contrib.auth import user_logged_in +from django.dispatch import receiver +from bookmarks.services import tasks + + +@receiver(user_logged_in) +def user_logged_in(sender, request, user, **kwargs): + tasks.schedule_bookmarks_without_snapshots(user.id) diff --git a/bookmarks/styles/bookmarks.scss b/bookmarks/styles/bookmarks.scss index 0ebc101..ae33592 100644 --- a/bookmarks/styles/bookmarks.scss +++ b/bookmarks/styles/bookmarks.scss @@ -54,6 +54,10 @@ ul.bookmark-list { margin-right: 0.1rem; } + .actions .date-label a { + color: $gray-color; + } + .actions .btn-link { color: $gray-color; padding: 0; diff --git a/bookmarks/templates/bookmarks/bookmark_list.html b/bookmarks/templates/bookmarks/bookmark_list.html index d2c2bcb..ff83f38 100644 --- a/bookmarks/templates/bookmarks/bookmark_list.html +++ b/bookmarks/templates/bookmarks/bookmark_list.html @@ -27,11 +27,31 @@
{% if request.user.profile.bookmark_date_display == 'relative' %} - {{ bookmark.date_added|humanize_relative_date }} + + {% if bookmark.web_archive_snapshot_url %} + + {% endif %} + {{ bookmark.date_added|humanize_relative_date }} + {% if bookmark.web_archive_snapshot_url %} + + + {% endif %} + | {% endif %} {% if request.user.profile.bookmark_date_display == 'absolute' %} - {{ bookmark.date_added|humanize_absolute_date }} + + {% if bookmark.web_archive_snapshot_url %} + + {% endif %} + {{ bookmark.date_added|humanize_absolute_date }} + {% if bookmark.web_archive_snapshot_url %} + + + {% endif %} + | {% endif %} {formatted_date} + + {formatted_date} + + | + ''', html) + + def test_should_render_web_archive_link_with_absolute_date_setting(self): + bookmark = self.setup_date_format_test(UserProfile.BOOKMARK_DATE_DISPLAY_ABSOLUTE, + 'https://web.archive.org/web/20210811214511/https://wanikani.com/') + html = self.render_template([bookmark]) + formatted_date = formats.date_format(bookmark.date_added, 'SHORT_DATE_FORMAT') + + self.assertInHTML(f''' + + + {formatted_date} + + + + | ''', html) def test_should_respect_relative_date_setting(self): @@ -47,5 +68,23 @@ class BookmarkListTagTest(TestCase, BookmarkFactoryMixin): html = self.render_template([bookmark]) self.assertInHTML(''' - 1 week ago + + 1 week ago + + | + ''', html) + + def test_should_render_web_archive_link_with_relative_date_setting(self): + bookmark = self.setup_date_format_test(UserProfile.BOOKMARK_DATE_DISPLAY_RELATIVE, + 'https://web.archive.org/web/20210811214511/https://wanikani.com/') + html = self.render_template([bookmark]) + self.assertInHTML(f''' + + + 1 week ago + + + + | ''', html) diff --git a/bookmarks/tests/test_bookmarks_service.py b/bookmarks/tests/test_bookmarks_service.py index 7d9beba..6219e39 100644 --- a/bookmarks/tests/test_bookmarks_service.py +++ b/bookmarks/tests/test_bookmarks_service.py @@ -1,11 +1,14 @@ +from unittest.mock import patch + from django.contrib.auth import get_user_model from django.test import TestCase from django.utils import timezone from bookmarks.models import Bookmark, Tag -from bookmarks.services.bookmarks import archive_bookmark, archive_bookmarks, unarchive_bookmark, unarchive_bookmarks, \ - delete_bookmarks, tag_bookmarks, untag_bookmarks +from bookmarks.services.bookmarks import create_bookmark, update_bookmark, archive_bookmark, archive_bookmarks, \ + unarchive_bookmark, unarchive_bookmarks, delete_bookmarks, tag_bookmarks, untag_bookmarks from bookmarks.tests.helpers import BookmarkFactoryMixin +from bookmarks.services import tasks User = get_user_model() @@ -13,7 +16,30 @@ User = get_user_model() class BookmarkServiceTestCase(TestCase, BookmarkFactoryMixin): def setUp(self) -> None: - self.user = User.objects.create_user('testuser', 'test@example.com', 'password123') + self.get_or_create_test_user() + + def test_create_should_create_web_archive_snapshot(self): + with patch.object(tasks, 'create_web_archive_snapshot') as mock_create_web_archive_snapshot: + bookmark_data = Bookmark(url='https://example.com') + bookmark = create_bookmark(bookmark_data, 'tag1 tag2', self.user) + + mock_create_web_archive_snapshot.assert_called_once_with(bookmark.id, False) + + def test_update_should_create_web_archive_snapshot_if_url_did_change(self): + with patch.object(tasks, 'create_web_archive_snapshot') as mock_create_web_archive_snapshot: + bookmark = self.setup_bookmark() + bookmark.url = 'https://example.com/updated' + update_bookmark(bookmark, 'tag1 tag2', self.user) + + mock_create_web_archive_snapshot.assert_called_once_with(bookmark.id, True) + + def test_update_should_not_create_web_archive_snapshot_if_url_did_not_change(self): + with patch.object(tasks, 'create_web_archive_snapshot') as mock_create_web_archive_snapshot: + bookmark = self.setup_bookmark() + bookmark.title = 'updated title' + update_bookmark(bookmark, 'tag1 tag2', self.user) + + mock_create_web_archive_snapshot.assert_not_called() def test_archive_bookmark(self): bookmark = Bookmark( diff --git a/bookmarks/tests/test_bookmarks_tasks.py b/bookmarks/tests/test_bookmarks_tasks.py new file mode 100644 index 0000000..af585a4 --- /dev/null +++ b/bookmarks/tests/test_bookmarks_tasks.py @@ -0,0 +1,154 @@ +from unittest.mock import patch + +import waybackpy +from background_task.models import Task +from django.contrib.auth.models import User +from django.test import TestCase, override_settings + +from bookmarks.models import Bookmark +from bookmarks.services.tasks import create_web_archive_snapshot, schedule_bookmarks_without_snapshots +from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging + + +class MockWaybackUrl: + + def __init__(self, archive_url: str): + self.archive_url = archive_url + + def save(self): + return self + + +class MockWaybackUrlWithSaveError: + def save(self): + raise NotImplementedError + + +class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): + + @disable_logging + def run_pending_task(self, task_function): + func = getattr(task_function, 'task_function', None) + task = Task.objects.all()[0] + args, kwargs = task.params() + func(*args, **kwargs) + task.delete() + + @disable_logging + def run_all_pending_tasks(self, task_function): + func = getattr(task_function, 'task_function', None) + tasks = Task.objects.all() + + for task in tasks: + args, kwargs = task.params() + func(*args, **kwargs) + task.delete() + + def test_create_web_archive_snapshot_should_update_snapshot_url(self): + bookmark = self.setup_bookmark() + + with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://example.com')): + create_web_archive_snapshot(bookmark.id, False) + self.run_pending_task(create_web_archive_snapshot) + bookmark.refresh_from_db() + + self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com') + + def test_create_web_archive_snapshot_should_handle_missing_bookmark_id(self): + with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://example.com')) as mock_wayback_url: + create_web_archive_snapshot(123, False) + self.run_pending_task(create_web_archive_snapshot) + + mock_wayback_url.assert_not_called() + + def test_create_web_archive_snapshot_should_handle_wayback_save_error(self): + bookmark = self.setup_bookmark() + + with patch.object(waybackpy, 'Url', + return_value=MockWaybackUrlWithSaveError()): + with self.assertRaises(NotImplementedError): + create_web_archive_snapshot(bookmark.id, False) + self.run_pending_task(create_web_archive_snapshot) + + def test_create_web_archive_snapshot_should_skip_if_snapshot_exists(self): + bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com') + + with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://other.com')): + create_web_archive_snapshot(bookmark.id, False) + self.run_pending_task(create_web_archive_snapshot) + bookmark.refresh_from_db() + + self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com') + + def test_create_web_archive_snapshot_should_force_update_snapshot(self): + bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com') + + with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://other.com')): + create_web_archive_snapshot(bookmark.id, True) + self.run_pending_task(create_web_archive_snapshot) + bookmark.refresh_from_db() + + self.assertEqual(bookmark.web_archive_snapshot_url, 'https://other.com') + + @override_settings(LD_DISABLE_BACKGROUND_TASKS=True) + def test_create_web_archive_snapshot_should_not_run_when_background_tasks_are_disabled(self): + bookmark = self.setup_bookmark() + create_web_archive_snapshot(bookmark.id, False) + + self.assertEqual(Task.objects.count(), 0) + + def test_schedule_bookmarks_without_snapshots_should_create_snapshot_task_for_all_bookmarks_without_snapshot(self): + user = self.get_or_create_test_user() + self.setup_bookmark() + self.setup_bookmark() + self.setup_bookmark() + + with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://example.com')): + schedule_bookmarks_without_snapshots(user.id) + self.run_pending_task(schedule_bookmarks_without_snapshots) + self.run_all_pending_tasks(create_web_archive_snapshot) + + for bookmark in Bookmark.objects.all(): + self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com') + + def test_schedule_bookmarks_without_snapshots_should_not_update_bookmarks_with_existing_snapshot(self): + user = self.get_or_create_test_user() + self.setup_bookmark(web_archive_snapshot_url='https://example.com') + self.setup_bookmark(web_archive_snapshot_url='https://example.com') + self.setup_bookmark(web_archive_snapshot_url='https://example.com') + + with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://other.com')): + schedule_bookmarks_without_snapshots(user.id) + self.run_pending_task(schedule_bookmarks_without_snapshots) + self.run_all_pending_tasks(create_web_archive_snapshot) + + for bookmark in Bookmark.objects.all(): + self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com') + + def test_schedule_bookmarks_without_snapshots_should_only_update_user_owned_bookmarks(self): + user = self.get_or_create_test_user() + other_user = User.objects.create_user('otheruser', 'otheruser@example.com', 'password123') + self.setup_bookmark() + self.setup_bookmark() + self.setup_bookmark() + self.setup_bookmark(user=other_user) + self.setup_bookmark(user=other_user) + self.setup_bookmark(user=other_user) + + with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://example.com')): + schedule_bookmarks_without_snapshots(user.id) + self.run_pending_task(schedule_bookmarks_without_snapshots) + self.run_all_pending_tasks(create_web_archive_snapshot) + + for bookmark in Bookmark.objects.all().filter(owner=user): + self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com') + + for bookmark in Bookmark.objects.all().filter(owner=other_user): + self.assertEqual(bookmark.web_archive_snapshot_url, '') + + @override_settings(LD_DISABLE_BACKGROUND_TASKS=True) + def test_schedule_bookmarks_without_snapshots_should_not_run_when_background_tasks_are_disabled(self): + user = self.get_or_create_test_user() + schedule_bookmarks_without_snapshots(user.id) + + self.assertEqual(Task.objects.count(), 0) diff --git a/bookmarks/tests/test_importer.py b/bookmarks/tests/test_importer.py index 5cb2283..d67638e 100644 --- a/bookmarks/tests/test_importer.py +++ b/bookmarks/tests/test_importer.py @@ -1,5 +1,8 @@ +from unittest.mock import patch + from django.test import TestCase +from bookmarks.services import tasks from bookmarks.services.importer import import_netscape_html from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging @@ -31,3 +34,12 @@ class ImporterTestCase(TestCase, BookmarkFactoryMixin): import_result = import_netscape_html(test_html, self.get_or_create_test_user()) self.assertEqual(import_result.success, 0) + + def test_schedule_snapshot_creation(self): + user = self.get_or_create_test_user() + test_html = self.create_import_html('') + + with patch.object(tasks, 'schedule_bookmarks_without_snapshots') as mock_schedule_bookmarks_without_snapshots: + import_netscape_html(test_html, user) + + mock_schedule_bookmarks_without_snapshots.assert_called_once_with(user.id) diff --git a/bookmarks/tests/test_signals.py b/bookmarks/tests/test_signals.py new file mode 100644 index 0000000..fe2c5ac --- /dev/null +++ b/bookmarks/tests/test_signals.py @@ -0,0 +1,15 @@ +from unittest.mock import patch + +from django.test import TestCase + +from bookmarks.services import tasks +from bookmarks.tests.helpers import BookmarkFactoryMixin + + +class SignalsTestCase(TestCase, BookmarkFactoryMixin): + def test_login_should_schedule_snapshot_creation(self): + user = self.get_or_create_test_user() + + with patch.object(tasks, 'schedule_bookmarks_without_snapshots') as mock_schedule_bookmarks_without_snapshots: + self.client.force_login(user) + mock_schedule_bookmarks_without_snapshots.assert_called_once_with(user.id) diff --git a/bootstrap.sh b/bootstrap.sh index bbccee3..1ccf9a0 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -12,5 +12,10 @@ python manage.py generate_secret_key # Ensure the DB folder is owned by the right user chown -R www-data: /etc/linkding/data +# Start background task processor using supervisord, unless explicitly disabled +if [ "$LD_DISABLE_BACKGROUND_TASKS" != "True" ]; then + supervisord -c supervisord.conf +fi + # Start uwsgi server uwsgi uwsgi.ini diff --git a/docs/Options.md b/docs/Options.md index 0f39c8d..bb57b03 100644 --- a/docs/Options.md +++ b/docs/Options.md @@ -25,11 +25,20 @@ All options need to be defined as environment variables in the environment that ## List of options +### `LD_DISABLE_BACKGROUND_TASKS` + +Values: `True`, `False` | Default = `False` + +Disables background tasks, such as creating snapshots for bookmarks on the web archive. +Enabling this flag will prevent the background task processor from starting up, and prevents scheduling tasks. +This might be useful if you are experiencing performance issues or other problematic behaviour due to background task processing. + ### `LD_DISABLE_URL_VALIDATION` Values: `True`, `False` | Default = `False` -Completely disables URL validation for bookmarks. This can be useful if you intend to store non fully qualified domain name URLs, such as network paths, or you want to store URLs that use another protocol than `http` or `https`. +Completely disables URL validation for bookmarks. +This can be useful if you intend to store non fully qualified domain name URLs, such as network paths, or you want to store URLs that use another protocol than `http` or `https`. ### `LD_REQUEST_TIMEOUT` diff --git a/requirements.prod.txt b/requirements.prod.txt index 8ba016b..9d550f1 100644 --- a/requirements.prod.txt +++ b/requirements.prod.txt @@ -4,6 +4,8 @@ certifi==2019.6.16 charset-normalizer==2.0.4 confusable-homoglyphs==3.2.0 Django==3.2.6 +django-background-tasks==1.2.5 +django-compat==1.0.15 django-generate-secret-key==1.0.2 django-picklefield==3.0.1 django-registration==3.2 @@ -17,6 +19,8 @@ pytz==2021.1 requests==2.26.0 soupsieve==1.9.2 sqlparse==0.4.1 +supervisor==4.2.2 typing-extensions==3.10.0.0 urllib3==1.26.6 uWSGI==2.0.18 +waybackpy==2.4.3 diff --git a/requirements.txt b/requirements.txt index 947cb8b..e3c8ef8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,8 @@ confusable-homoglyphs==3.2.0 coverage==5.5 Django==3.2.6 django-appconf==1.0.4 +django-background-tasks==1.2.5 +django-compat==1.0.15 django-compressor==2.4.1 django-debug-toolbar==3.2.1 django-generate-secret-key==1.0.2 @@ -27,3 +29,4 @@ soupsieve==1.9.2 sqlparse==0.4.1 typing-extensions==3.10.0.0 urllib3==1.26.6 +waybackpy==2.4.3 diff --git a/siteroot/settings/base.py b/siteroot/settings/base.py index 8d141b8..ca522e8 100644 --- a/siteroot/settings/base.py +++ b/siteroot/settings/base.py @@ -42,6 +42,7 @@ INSTALLED_APPS = [ 'django_generate_secret_key', 'rest_framework', 'rest_framework.authtoken', + 'background_task', ] MIDDLEWARE = [ @@ -166,3 +167,14 @@ ALLOW_REGISTRATION = False # URL validation flag LD_DISABLE_URL_VALIDATION = os.getenv('LD_DISABLE_URL_VALIDATION', False) in (True, 'True', '1') + +# Background task enabled setting +LD_DISABLE_BACKGROUND_TASKS = os.getenv('LD_DISABLE_BACKGROUND_TASKS', False) in (True, 'True', '1') + +# django-background-tasks +MAX_ATTEMPTS = 5 +# How many tasks will run in parallel +# We want to keep this low to prevent SQLite lock errors and in general not to consume too much resources on smaller +# specced systems like Raspberries. Should be OK as tasks are not time critical. +BACKGROUND_TASK_RUN_ASYNC = True +BACKGROUND_TASK_ASYNC_THREADS = 2 diff --git a/siteroot/settings/dev.py b/siteroot/settings/dev.py index b5ceaac..cba5b7a 100644 --- a/siteroot/settings/dev.py +++ b/siteroot/settings/dev.py @@ -43,6 +43,11 @@ LOGGING = { 'django.db.backends': { 'level': 'ERROR', # Set to DEBUG to log all SQL calls 'handlers': ['console'], + }, + 'bookmarks.services.tasks': { # Log task output + 'level': 'DEBUG', + 'handlers': ['console'], + 'propagate': False, } } } diff --git a/supervisord.conf b/supervisord.conf new file mode 100644 index 0000000..bdbe724 --- /dev/null +++ b/supervisord.conf @@ -0,0 +1,10 @@ +[supervisord] +user=root +loglevel=info + +[program:jobs] +user=www-data +command=sh background-tasks-wrapper.sh +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +redirect_stderr=true