Create snapshots on web.archive.org for bookmarks (#150)

* Implement initial background tasks concept

* fix property reference

* update requirements.txt

* simplify bookmark null check

* improve web archive url display

* add background tasks test

* add basic supervisor setup

* schedule missing snapshot creation on login

* remove task locks and clear task history before starting background task processor

* batch create snapshots after import

* fix script reference in supervisord.conf

* add option to disable background tasks

* restructure feature overview
This commit is contained in:
Sascha Ißbrücker 2021-09-04 22:31:04 +02:00 committed by GitHub
parent 8d214649b7
commit d87dde6bae
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
27 changed files with 470 additions and 19 deletions

View file

@ -7,6 +7,7 @@
/docs /docs
/static /static
/build /build
/out
/.dockerignore /.dockerignore
/.gitignore /.gitignore
@ -17,10 +18,13 @@
/*.patch /*.patch
/*.md /*.md
/*.js /*.js
/*.log
/*.pid
# Whitelist files needed in build or prod image # Whitelist files needed in build or prod image
!/rollup.config.js !/rollup.config.js
!/bootstrap.sh !/bootstrap.sh
!/background-tasks-wrapper.sh
# Remove development settings # Remove development settings
/siteroot/settings/dev.py /siteroot/settings/dev.py

View file

@ -5,5 +5,7 @@ LD_HOST_PORT=9090
# Directory on the host system that should be mounted as data dir into the Docker container # Directory on the host system that should be mounted as data dir into the Docker container
LD_HOST_DATA_DIR=./data LD_HOST_DATA_DIR=./data
# Option to disable background tasks
LD_DISABLE_BACKGROUND_TASKS=False
# Option to disable URL validation for bookmarks completely # Option to disable URL validation for bookmarks completely
LD_DISABLE_URL_VALIDATION=False LD_DISABLE_URL_VALIDATION=False

View file

@ -13,17 +13,14 @@ The name comes from:
- Search by text or tags - Search by text or tags
- Bulk editing - Bulk editing
- Bookmark archive - Bookmark archive
- Automatically provides titles and descriptions from linked websites
- Import and export bookmarks in Netscape HTML format
- Extensions for [Firefox](https://addons.mozilla.org/de/firefox/addon/linkding-extension/) and [Chrome](https://chrome.google.com/webstore/detail/linkding-extension/beakmhbijpdhipnjhnclmhgjlddhidpe)
- Bookmarklet that should work in most browsers
- Dark mode - Dark mode
- Easy to set up using Docker - Automatically creates snapshots of bookmarked websites on [web archive](https://archive.org/web/)
- Uses SQLite as database - Automatically provides titles and descriptions of bookmarked websites
- Works without Javascript - Import and export bookmarks in Netscape HTML format
- ...but has several UI enhancements when Javascript is enabled - Extensions for [Firefox](https://addons.mozilla.org/de/firefox/addon/linkding-extension/) and [Chrome](https://chrome.google.com/webstore/detail/linkding-extension/beakmhbijpdhipnjhnclmhgjlddhidpe), and a bookmarklet that should work in most browsers
- REST API for developing 3rd party apps - REST API for developing 3rd party apps
- Admin panel for user self-service and raw data access - Admin panel for user self-service and raw data access
- Easy to set up using Docker, uses SQLite as database
**Demo:** https://demo.linkding.link/ (configured with open registration) **Demo:** https://demo.linkding.link/ (configured with open registration)

5
background-tasks-wrapper.sh Executable file
View file

@ -0,0 +1,5 @@
#!/usr/bin/env bash
# Wrapper script used by supervisord to first clear task locks before starting the background task processor
python manage.py clean_tasks
exec python manage.py process_tasks

View file

@ -3,3 +3,7 @@ from django.apps import AppConfig
class BookmarksConfig(AppConfig): class BookmarksConfig(AppConfig):
name = 'bookmarks' name = 'bookmarks'
def ready(self):
# Register signal handlers
import bookmarks.signals

View file

@ -0,0 +1,15 @@
from background_task.models import Task, CompletedTask
from django.core.management.base import BaseCommand
class Command(BaseCommand):
help = "Remove task locks and clear completed task history"
def handle(self, *args, **options):
# Remove task locks
# If the background task processor exited while executing tasks, these tasks would still be marked as locked,
# even though no process is working on them, and would prevent the task processor from picking the next task in
# the queue
Task.objects.all().update(locked_by=None, locked_at=None)
# Clear task history to prevent them from bloating the DB
CompletedTask.objects.all().delete()

View file

@ -0,0 +1,18 @@
# Generated by Django 2.2.20 on 2021-05-16 14:35
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('bookmarks', '0008_userprofile_bookmark_date_display'),
]
operations = [
migrations.AddField(
model_name='bookmark',
name='web_archive_snapshot_url',
field=models.CharField(blank=True, max_length=2048),
),
]

View file

@ -41,6 +41,7 @@ class Bookmark(models.Model):
description = models.TextField(blank=True) description = models.TextField(blank=True)
website_title = models.CharField(max_length=512, blank=True, null=True) website_title = models.CharField(max_length=512, blank=True, null=True)
website_description = models.TextField(blank=True, null=True) website_description = models.TextField(blank=True, null=True)
web_archive_snapshot_url = models.CharField(max_length=2048, blank=True)
unread = models.BooleanField(default=True) unread = models.BooleanField(default=True)
is_archived = models.BooleanField(default=False) is_archived = models.BooleanField(default=False)
date_added = models.DateTimeField() date_added = models.DateTimeField()

View file

@ -6,6 +6,7 @@ from django.utils import timezone
from bookmarks.models import Bookmark, parse_tag_string from bookmarks.models import Bookmark, parse_tag_string
from bookmarks.services.tags import get_or_create_tags from bookmarks.services.tags import get_or_create_tags
from bookmarks.services.website_loader import load_website_metadata from bookmarks.services.website_loader import load_website_metadata
from bookmarks.services import tasks
def create_bookmark(bookmark: Bookmark, tag_string: str, current_user: User): def create_bookmark(bookmark: Bookmark, tag_string: str, current_user: User):
@ -27,10 +28,16 @@ def create_bookmark(bookmark: Bookmark, tag_string: str, current_user: User):
# Update tag list # Update tag list
_update_bookmark_tags(bookmark, tag_string, current_user) _update_bookmark_tags(bookmark, tag_string, current_user)
bookmark.save() bookmark.save()
# Create snapshot on web archive
tasks.create_web_archive_snapshot(bookmark.id, False)
return bookmark return bookmark
def update_bookmark(bookmark: Bookmark, tag_string, current_user: User): def update_bookmark(bookmark: Bookmark, tag_string, current_user: User):
# Detect URL change
original_bookmark = Bookmark.objects.get(id=bookmark.id)
has_url_changed = original_bookmark.url != bookmark.url
# Update website info # Update website info
_update_website_metadata(bookmark) _update_website_metadata(bookmark)
# Update tag list # Update tag list
@ -38,6 +45,10 @@ def update_bookmark(bookmark: Bookmark, tag_string, current_user: User):
# Update dates # Update dates
bookmark.date_modified = timezone.now() bookmark.date_modified = timezone.now()
bookmark.save() bookmark.save()
# Update web archive snapshot, if URL changed
if has_url_changed:
tasks.create_web_archive_snapshot(bookmark.id, True)
return bookmark return bookmark

View file

@ -5,6 +5,7 @@ from django.contrib.auth.models import User
from django.utils import timezone from django.utils import timezone
from bookmarks.models import Bookmark, parse_tag_string from bookmarks.models import Bookmark, parse_tag_string
from bookmarks.services import tasks
from bookmarks.services.parser import parse, NetscapeBookmark from bookmarks.services.parser import parse, NetscapeBookmark
from bookmarks.services.tags import get_or_create_tags from bookmarks.services.tags import get_or_create_tags
from bookmarks.utils import parse_timestamp from bookmarks.utils import parse_timestamp
@ -38,6 +39,9 @@ def import_netscape_html(html: str, user: User):
logging.exception('Error importing bookmark: ' + shortened_bookmark_tag_str) logging.exception('Error importing bookmark: ' + shortened_bookmark_tag_str)
result.failed = result.failed + 1 result.failed = result.failed + 1
# Create snapshots for newly imported bookmarks
tasks.schedule_bookmarks_without_snapshots(user.id)
return result return result

View file

@ -0,0 +1,62 @@
import logging
import waybackpy
from background_task import background
from django.conf import settings
from django.contrib.auth import get_user_model
from waybackpy.exceptions import WaybackError
from bookmarks.models import Bookmark
logger = logging.getLogger(__name__)
def when_background_tasks_enabled(fn):
def wrapper(*args, **kwargs):
if settings.LD_DISABLE_BACKGROUND_TASKS:
return
return fn(*args, **kwargs)
# Expose attributes from wrapped TaskProxy function
attrs = vars(fn)
for key, value in attrs.items():
setattr(wrapper, key, value)
return wrapper
@when_background_tasks_enabled
@background()
def create_web_archive_snapshot(bookmark_id: int, force_update: bool):
try:
bookmark = Bookmark.objects.get(id=bookmark_id)
except Bookmark.DoesNotExist:
return
# Skip if snapshot exists and update is not explicitly requested
if bookmark.web_archive_snapshot_url and not force_update:
return
logger.debug(f'Create web archive link for bookmark: {bookmark}...')
wayback = waybackpy.Url(bookmark.url)
try:
archive = wayback.save()
except WaybackError as error:
logger.exception(f'Error creating web archive link for bookmark: {bookmark}...', exc_info=error)
raise
bookmark.web_archive_snapshot_url = archive.archive_url
bookmark.save()
logger.debug(f'Successfully created web archive link for bookmark: {bookmark}...')
@when_background_tasks_enabled
@background()
def schedule_bookmarks_without_snapshots(user_id: int):
user = get_user_model().objects.get(id=user_id)
bookmarks_without_snapshots = Bookmark.objects.filter(web_archive_snapshot_url__exact='', owner=user)
for bookmark in bookmarks_without_snapshots:
create_web_archive_snapshot(bookmark.id, False)

8
bookmarks/signals.py Normal file
View file

@ -0,0 +1,8 @@
from django.contrib.auth import user_logged_in
from django.dispatch import receiver
from bookmarks.services import tasks
@receiver(user_logged_in)
def user_logged_in(sender, request, user, **kwargs):
tasks.schedule_bookmarks_without_snapshots(user.id)

View file

@ -54,6 +54,10 @@ ul.bookmark-list {
margin-right: 0.1rem; margin-right: 0.1rem;
} }
.actions .date-label a {
color: $gray-color;
}
.actions .btn-link { .actions .btn-link {
color: $gray-color; color: $gray-color;
padding: 0; padding: 0;

View file

@ -27,11 +27,31 @@
</div> </div>
<div class="actions"> <div class="actions">
{% if request.user.profile.bookmark_date_display == 'relative' %} {% if request.user.profile.bookmark_date_display == 'relative' %}
<span class="text-gray text-sm">{{ bookmark.date_added|humanize_relative_date }}</span> <span class="date-label text-gray text-sm">
{% if bookmark.web_archive_snapshot_url %}
<a href="{{ bookmark.web_archive_snapshot_url }}"
title="Show snapshot on web archive" target="_blank" rel="noopener">
{% endif %}
<span>{{ bookmark.date_added|humanize_relative_date }}</span>
{% if bookmark.web_archive_snapshot_url %}
<span></span>
</a>
{% endif %}
</span>
<span class="text-gray text-sm">|</span> <span class="text-gray text-sm">|</span>
{% endif %} {% endif %}
{% if request.user.profile.bookmark_date_display == 'absolute' %} {% if request.user.profile.bookmark_date_display == 'absolute' %}
<span class="text-gray text-sm">{{ bookmark.date_added|humanize_absolute_date }}</span> <span class="date-label text-gray text-sm">
{% if bookmark.web_archive_snapshot_url %}
<a href="{{ bookmark.web_archive_snapshot_url }}"
title="Show snapshot on web archive" target="_blank" rel="noopener">
{% endif %}
<span>{{ bookmark.date_added|humanize_absolute_date }}</span>
{% if bookmark.web_archive_snapshot_url %}
<span></span>
</a>
{% endif %}
</span>
<span class="text-gray text-sm">|</span> <span class="text-gray text-sm">|</span>
{% endif %} {% endif %}
<a href="{% url 'bookmarks:edit' bookmark.id %}?return_url={{ return_url }}" <a href="{% url 'bookmarks:edit' bookmark.id %}?return_url={{ return_url }}"

View file

@ -28,6 +28,7 @@ class BookmarkFactoryMixin:
description: str = '', description: str = '',
website_title: str = '', website_title: str = '',
website_description: str = '', website_description: str = '',
web_archive_snapshot_url: str = '',
): ):
if tags is None: if tags is None:
tags = [] tags = []
@ -45,7 +46,8 @@ class BookmarkFactoryMixin:
date_added=timezone.now(), date_added=timezone.now(),
date_modified=timezone.now(), date_modified=timezone.now(),
owner=user, owner=user,
is_archived=is_archived is_archived=is_archived,
web_archive_snapshot_url=web_archive_snapshot_url,
) )
bookmark.save() bookmark.save()
for tag in tags: for tag in tags:

View file

@ -24,9 +24,10 @@ class BookmarkListTagTest(TestCase, BookmarkFactoryMixin):
) )
return template_to_render.render(context) return template_to_render.render(context)
def setup_date_format_test(self, date_display_setting): def setup_date_format_test(self, date_display_setting: str, web_archive_url: str = ''):
bookmark = self.setup_bookmark() bookmark = self.setup_bookmark()
bookmark.date_added = timezone.now() - relativedelta(days=8) bookmark.date_added = timezone.now() - relativedelta(days=8)
bookmark.web_archive_snapshot_url = web_archive_url
bookmark.save() bookmark.save()
user = self.get_or_create_test_user() user = self.get_or_create_test_user()
user.profile.bookmark_date_display = date_display_setting user.profile.bookmark_date_display = date_display_setting
@ -39,7 +40,27 @@ class BookmarkListTagTest(TestCase, BookmarkFactoryMixin):
formatted_date = formats.date_format(bookmark.date_added, 'SHORT_DATE_FORMAT') formatted_date = formats.date_format(bookmark.date_added, 'SHORT_DATE_FORMAT')
self.assertInHTML(f''' self.assertInHTML(f'''
<span class="text-gray text-sm">{formatted_date}</span> <span class="date-label text-gray text-sm">
<span>{formatted_date}</span>
</span>
<span class="text-gray text-sm">|</span>
''', html)
def test_should_render_web_archive_link_with_absolute_date_setting(self):
bookmark = self.setup_date_format_test(UserProfile.BOOKMARK_DATE_DISPLAY_ABSOLUTE,
'https://web.archive.org/web/20210811214511/https://wanikani.com/')
html = self.render_template([bookmark])
formatted_date = formats.date_format(bookmark.date_added, 'SHORT_DATE_FORMAT')
self.assertInHTML(f'''
<span class="date-label text-gray text-sm">
<a href="{bookmark.web_archive_snapshot_url}"
title="Show snapshot on web archive" target="_blank" rel="noopener">
<span>{formatted_date}</span>
<span></span>
</a>
</span>
<span class="text-gray text-sm">|</span>
''', html) ''', html)
def test_should_respect_relative_date_setting(self): def test_should_respect_relative_date_setting(self):
@ -47,5 +68,23 @@ class BookmarkListTagTest(TestCase, BookmarkFactoryMixin):
html = self.render_template([bookmark]) html = self.render_template([bookmark])
self.assertInHTML(''' self.assertInHTML('''
<span class="text-gray text-sm">1 week ago</span> <span class="date-label text-gray text-sm">
<span>1 week ago</span>
</span>
<span class="text-gray text-sm">|</span>
''', html)
def test_should_render_web_archive_link_with_relative_date_setting(self):
bookmark = self.setup_date_format_test(UserProfile.BOOKMARK_DATE_DISPLAY_RELATIVE,
'https://web.archive.org/web/20210811214511/https://wanikani.com/')
html = self.render_template([bookmark])
self.assertInHTML(f'''
<span class="date-label text-gray text-sm">
<a href="{bookmark.web_archive_snapshot_url}"
title="Show snapshot on web archive" target="_blank" rel="noopener">
<span>1 week ago</span>
<span></span>
</a>
</span>
<span class="text-gray text-sm">|</span>
''', html) ''', html)

View file

@ -1,11 +1,14 @@
from unittest.mock import patch
from django.contrib.auth import get_user_model from django.contrib.auth import get_user_model
from django.test import TestCase from django.test import TestCase
from django.utils import timezone from django.utils import timezone
from bookmarks.models import Bookmark, Tag from bookmarks.models import Bookmark, Tag
from bookmarks.services.bookmarks import archive_bookmark, archive_bookmarks, unarchive_bookmark, unarchive_bookmarks, \ from bookmarks.services.bookmarks import create_bookmark, update_bookmark, archive_bookmark, archive_bookmarks, \
delete_bookmarks, tag_bookmarks, untag_bookmarks unarchive_bookmark, unarchive_bookmarks, delete_bookmarks, tag_bookmarks, untag_bookmarks
from bookmarks.tests.helpers import BookmarkFactoryMixin from bookmarks.tests.helpers import BookmarkFactoryMixin
from bookmarks.services import tasks
User = get_user_model() User = get_user_model()
@ -13,7 +16,30 @@ User = get_user_model()
class BookmarkServiceTestCase(TestCase, BookmarkFactoryMixin): class BookmarkServiceTestCase(TestCase, BookmarkFactoryMixin):
def setUp(self) -> None: def setUp(self) -> None:
self.user = User.objects.create_user('testuser', 'test@example.com', 'password123') self.get_or_create_test_user()
def test_create_should_create_web_archive_snapshot(self):
with patch.object(tasks, 'create_web_archive_snapshot') as mock_create_web_archive_snapshot:
bookmark_data = Bookmark(url='https://example.com')
bookmark = create_bookmark(bookmark_data, 'tag1 tag2', self.user)
mock_create_web_archive_snapshot.assert_called_once_with(bookmark.id, False)
def test_update_should_create_web_archive_snapshot_if_url_did_change(self):
with patch.object(tasks, 'create_web_archive_snapshot') as mock_create_web_archive_snapshot:
bookmark = self.setup_bookmark()
bookmark.url = 'https://example.com/updated'
update_bookmark(bookmark, 'tag1 tag2', self.user)
mock_create_web_archive_snapshot.assert_called_once_with(bookmark.id, True)
def test_update_should_not_create_web_archive_snapshot_if_url_did_not_change(self):
with patch.object(tasks, 'create_web_archive_snapshot') as mock_create_web_archive_snapshot:
bookmark = self.setup_bookmark()
bookmark.title = 'updated title'
update_bookmark(bookmark, 'tag1 tag2', self.user)
mock_create_web_archive_snapshot.assert_not_called()
def test_archive_bookmark(self): def test_archive_bookmark(self):
bookmark = Bookmark( bookmark = Bookmark(

View file

@ -0,0 +1,154 @@
from unittest.mock import patch
import waybackpy
from background_task.models import Task
from django.contrib.auth.models import User
from django.test import TestCase, override_settings
from bookmarks.models import Bookmark
from bookmarks.services.tasks import create_web_archive_snapshot, schedule_bookmarks_without_snapshots
from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging
class MockWaybackUrl:
def __init__(self, archive_url: str):
self.archive_url = archive_url
def save(self):
return self
class MockWaybackUrlWithSaveError:
def save(self):
raise NotImplementedError
class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
@disable_logging
def run_pending_task(self, task_function):
func = getattr(task_function, 'task_function', None)
task = Task.objects.all()[0]
args, kwargs = task.params()
func(*args, **kwargs)
task.delete()
@disable_logging
def run_all_pending_tasks(self, task_function):
func = getattr(task_function, 'task_function', None)
tasks = Task.objects.all()
for task in tasks:
args, kwargs = task.params()
func(*args, **kwargs)
task.delete()
def test_create_web_archive_snapshot_should_update_snapshot_url(self):
bookmark = self.setup_bookmark()
with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://example.com')):
create_web_archive_snapshot(bookmark.id, False)
self.run_pending_task(create_web_archive_snapshot)
bookmark.refresh_from_db()
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
def test_create_web_archive_snapshot_should_handle_missing_bookmark_id(self):
with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://example.com')) as mock_wayback_url:
create_web_archive_snapshot(123, False)
self.run_pending_task(create_web_archive_snapshot)
mock_wayback_url.assert_not_called()
def test_create_web_archive_snapshot_should_handle_wayback_save_error(self):
bookmark = self.setup_bookmark()
with patch.object(waybackpy, 'Url',
return_value=MockWaybackUrlWithSaveError()):
with self.assertRaises(NotImplementedError):
create_web_archive_snapshot(bookmark.id, False)
self.run_pending_task(create_web_archive_snapshot)
def test_create_web_archive_snapshot_should_skip_if_snapshot_exists(self):
bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com')
with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://other.com')):
create_web_archive_snapshot(bookmark.id, False)
self.run_pending_task(create_web_archive_snapshot)
bookmark.refresh_from_db()
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
def test_create_web_archive_snapshot_should_force_update_snapshot(self):
bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com')
with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://other.com')):
create_web_archive_snapshot(bookmark.id, True)
self.run_pending_task(create_web_archive_snapshot)
bookmark.refresh_from_db()
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://other.com')
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
def test_create_web_archive_snapshot_should_not_run_when_background_tasks_are_disabled(self):
bookmark = self.setup_bookmark()
create_web_archive_snapshot(bookmark.id, False)
self.assertEqual(Task.objects.count(), 0)
def test_schedule_bookmarks_without_snapshots_should_create_snapshot_task_for_all_bookmarks_without_snapshot(self):
user = self.get_or_create_test_user()
self.setup_bookmark()
self.setup_bookmark()
self.setup_bookmark()
with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://example.com')):
schedule_bookmarks_without_snapshots(user.id)
self.run_pending_task(schedule_bookmarks_without_snapshots)
self.run_all_pending_tasks(create_web_archive_snapshot)
for bookmark in Bookmark.objects.all():
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
def test_schedule_bookmarks_without_snapshots_should_not_update_bookmarks_with_existing_snapshot(self):
user = self.get_or_create_test_user()
self.setup_bookmark(web_archive_snapshot_url='https://example.com')
self.setup_bookmark(web_archive_snapshot_url='https://example.com')
self.setup_bookmark(web_archive_snapshot_url='https://example.com')
with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://other.com')):
schedule_bookmarks_without_snapshots(user.id)
self.run_pending_task(schedule_bookmarks_without_snapshots)
self.run_all_pending_tasks(create_web_archive_snapshot)
for bookmark in Bookmark.objects.all():
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
def test_schedule_bookmarks_without_snapshots_should_only_update_user_owned_bookmarks(self):
user = self.get_or_create_test_user()
other_user = User.objects.create_user('otheruser', 'otheruser@example.com', 'password123')
self.setup_bookmark()
self.setup_bookmark()
self.setup_bookmark()
self.setup_bookmark(user=other_user)
self.setup_bookmark(user=other_user)
self.setup_bookmark(user=other_user)
with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://example.com')):
schedule_bookmarks_without_snapshots(user.id)
self.run_pending_task(schedule_bookmarks_without_snapshots)
self.run_all_pending_tasks(create_web_archive_snapshot)
for bookmark in Bookmark.objects.all().filter(owner=user):
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
for bookmark in Bookmark.objects.all().filter(owner=other_user):
self.assertEqual(bookmark.web_archive_snapshot_url, '')
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
def test_schedule_bookmarks_without_snapshots_should_not_run_when_background_tasks_are_disabled(self):
user = self.get_or_create_test_user()
schedule_bookmarks_without_snapshots(user.id)
self.assertEqual(Task.objects.count(), 0)

View file

@ -1,5 +1,8 @@
from unittest.mock import patch
from django.test import TestCase from django.test import TestCase
from bookmarks.services import tasks
from bookmarks.services.importer import import_netscape_html from bookmarks.services.importer import import_netscape_html
from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging
@ -31,3 +34,12 @@ class ImporterTestCase(TestCase, BookmarkFactoryMixin):
import_result = import_netscape_html(test_html, self.get_or_create_test_user()) import_result = import_netscape_html(test_html, self.get_or_create_test_user())
self.assertEqual(import_result.success, 0) self.assertEqual(import_result.success, 0)
def test_schedule_snapshot_creation(self):
user = self.get_or_create_test_user()
test_html = self.create_import_html('')
with patch.object(tasks, 'schedule_bookmarks_without_snapshots') as mock_schedule_bookmarks_without_snapshots:
import_netscape_html(test_html, user)
mock_schedule_bookmarks_without_snapshots.assert_called_once_with(user.id)

View file

@ -0,0 +1,15 @@
from unittest.mock import patch
from django.test import TestCase
from bookmarks.services import tasks
from bookmarks.tests.helpers import BookmarkFactoryMixin
class SignalsTestCase(TestCase, BookmarkFactoryMixin):
def test_login_should_schedule_snapshot_creation(self):
user = self.get_or_create_test_user()
with patch.object(tasks, 'schedule_bookmarks_without_snapshots') as mock_schedule_bookmarks_without_snapshots:
self.client.force_login(user)
mock_schedule_bookmarks_without_snapshots.assert_called_once_with(user.id)

View file

@ -12,5 +12,10 @@ python manage.py generate_secret_key
# Ensure the DB folder is owned by the right user # Ensure the DB folder is owned by the right user
chown -R www-data: /etc/linkding/data chown -R www-data: /etc/linkding/data
# Start background task processor using supervisord, unless explicitly disabled
if [ "$LD_DISABLE_BACKGROUND_TASKS" != "True" ]; then
supervisord -c supervisord.conf
fi
# Start uwsgi server # Start uwsgi server
uwsgi uwsgi.ini uwsgi uwsgi.ini

View file

@ -25,11 +25,20 @@ All options need to be defined as environment variables in the environment that
## List of options ## List of options
### `LD_DISABLE_BACKGROUND_TASKS`
Values: `True`, `False` | Default = `False`
Disables background tasks, such as creating snapshots for bookmarks on the web archive.
Enabling this flag will prevent the background task processor from starting up, and prevents scheduling tasks.
This might be useful if you are experiencing performance issues or other problematic behaviour due to background task processing.
### `LD_DISABLE_URL_VALIDATION` ### `LD_DISABLE_URL_VALIDATION`
Values: `True`, `False` | Default = `False` Values: `True`, `False` | Default = `False`
Completely disables URL validation for bookmarks. This can be useful if you intend to store non fully qualified domain name URLs, such as network paths, or you want to store URLs that use another protocol than `http` or `https`. Completely disables URL validation for bookmarks.
This can be useful if you intend to store non fully qualified domain name URLs, such as network paths, or you want to store URLs that use another protocol than `http` or `https`.
### `LD_REQUEST_TIMEOUT` ### `LD_REQUEST_TIMEOUT`

View file

@ -4,6 +4,8 @@ certifi==2019.6.16
charset-normalizer==2.0.4 charset-normalizer==2.0.4
confusable-homoglyphs==3.2.0 confusable-homoglyphs==3.2.0
Django==3.2.6 Django==3.2.6
django-background-tasks==1.2.5
django-compat==1.0.15
django-generate-secret-key==1.0.2 django-generate-secret-key==1.0.2
django-picklefield==3.0.1 django-picklefield==3.0.1
django-registration==3.2 django-registration==3.2
@ -17,6 +19,8 @@ pytz==2021.1
requests==2.26.0 requests==2.26.0
soupsieve==1.9.2 soupsieve==1.9.2
sqlparse==0.4.1 sqlparse==0.4.1
supervisor==4.2.2
typing-extensions==3.10.0.0 typing-extensions==3.10.0.0
urllib3==1.26.6 urllib3==1.26.6
uWSGI==2.0.18 uWSGI==2.0.18
waybackpy==2.4.3

View file

@ -6,6 +6,8 @@ confusable-homoglyphs==3.2.0
coverage==5.5 coverage==5.5
Django==3.2.6 Django==3.2.6
django-appconf==1.0.4 django-appconf==1.0.4
django-background-tasks==1.2.5
django-compat==1.0.15
django-compressor==2.4.1 django-compressor==2.4.1
django-debug-toolbar==3.2.1 django-debug-toolbar==3.2.1
django-generate-secret-key==1.0.2 django-generate-secret-key==1.0.2
@ -27,3 +29,4 @@ soupsieve==1.9.2
sqlparse==0.4.1 sqlparse==0.4.1
typing-extensions==3.10.0.0 typing-extensions==3.10.0.0
urllib3==1.26.6 urllib3==1.26.6
waybackpy==2.4.3

View file

@ -42,6 +42,7 @@ INSTALLED_APPS = [
'django_generate_secret_key', 'django_generate_secret_key',
'rest_framework', 'rest_framework',
'rest_framework.authtoken', 'rest_framework.authtoken',
'background_task',
] ]
MIDDLEWARE = [ MIDDLEWARE = [
@ -166,3 +167,14 @@ ALLOW_REGISTRATION = False
# URL validation flag # URL validation flag
LD_DISABLE_URL_VALIDATION = os.getenv('LD_DISABLE_URL_VALIDATION', False) in (True, 'True', '1') LD_DISABLE_URL_VALIDATION = os.getenv('LD_DISABLE_URL_VALIDATION', False) in (True, 'True', '1')
# Background task enabled setting
LD_DISABLE_BACKGROUND_TASKS = os.getenv('LD_DISABLE_BACKGROUND_TASKS', False) in (True, 'True', '1')
# django-background-tasks
MAX_ATTEMPTS = 5
# How many tasks will run in parallel
# We want to keep this low to prevent SQLite lock errors and in general not to consume too much resources on smaller
# specced systems like Raspberries. Should be OK as tasks are not time critical.
BACKGROUND_TASK_RUN_ASYNC = True
BACKGROUND_TASK_ASYNC_THREADS = 2

View file

@ -43,6 +43,11 @@ LOGGING = {
'django.db.backends': { 'django.db.backends': {
'level': 'ERROR', # Set to DEBUG to log all SQL calls 'level': 'ERROR', # Set to DEBUG to log all SQL calls
'handlers': ['console'], 'handlers': ['console'],
},
'bookmarks.services.tasks': { # Log task output
'level': 'DEBUG',
'handlers': ['console'],
'propagate': False,
} }
} }
} }

10
supervisord.conf Normal file
View file

@ -0,0 +1,10 @@
[supervisord]
user=root
loglevel=info
[program:jobs]
user=www-data
command=sh background-tasks-wrapper.sh
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
redirect_stderr=true