Prevent rate limit errors in wayback machine API (#339)

The Wayback Machine Save API only allows a limited number of requests within a timespan. This introduces several changes to avoid rate limit errors:
- There will be max. 1 attempt to create a new snapshot
- If a new snapshot could not be created, then attempt to use the latest existing snapshot
- Bulk snapshot updates (bookmark import, load missing snapshots after login) will only attempt to load the latest snapshot instead of creating new ones
This commit is contained in:
Sascha Ißbrücker 2022-09-10 20:43:15 +02:00 committed by GitHub
parent 6420ec173a
commit 1b35d5b5ef
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 364 additions and 188 deletions

View file

@ -5,8 +5,9 @@ from background_task import background
from django.conf import settings
from django.contrib.auth import get_user_model
from django.contrib.auth.models import User
from waybackpy.exceptions import WaybackError
from waybackpy.exceptions import WaybackError, TooManyRequestsError, NoCDXRecordFound
import bookmarks.services.wayback
from bookmarks.models import Bookmark, UserProfile
from bookmarks.services.website_loader import DEFAULT_USER_AGENT
@ -26,6 +27,32 @@ def create_web_archive_snapshot(user: User, bookmark: Bookmark, force_update: bo
_create_web_archive_snapshot_task(bookmark.id, force_update)
def _load_newest_snapshot(bookmark: Bookmark):
try:
logger.debug(f'Load existing snapshot for bookmark. url={bookmark.url}')
cdx_api = bookmarks.services.wayback.CustomWaybackMachineCDXServerAPI(bookmark.url)
existing_snapshot = cdx_api.newest()
if existing_snapshot:
bookmark.web_archive_snapshot_url = existing_snapshot.archive_url
bookmark.save()
logger.debug(f'Using newest snapshot. url={bookmark.url} from={existing_snapshot.datetime_timestamp}')
except NoCDXRecordFound:
logger.error(f'Could not find any snapshots for bookmark. url={bookmark.url}')
except WaybackError as error:
logger.error(f'Failed to load existing snapshot. url={bookmark.url}', exc_info=error)
def _create_snapshot(bookmark: Bookmark):
logger.debug(f'Create new snapshot for bookmark. url={bookmark.url}...')
archive = waybackpy.WaybackMachineSaveAPI(bookmark.url, DEFAULT_USER_AGENT, max_tries=1)
archive.save()
bookmark.web_archive_snapshot_url = archive.archive_url
bookmark.save()
logger.debug(f'Successfully created new snapshot for bookmark:. url={bookmark.url}')
@background()
def _create_web_archive_snapshot_task(bookmark_id: int, force_update: bool):
try:
@ -37,19 +64,31 @@ def _create_web_archive_snapshot_task(bookmark_id: int, force_update: bool):
if bookmark.web_archive_snapshot_url and not force_update:
return
logger.debug(f'Create web archive link for bookmark: {bookmark}...')
archive = waybackpy.WaybackMachineSaveAPI(bookmark.url, DEFAULT_USER_AGENT)
# Create new snapshot
try:
archive.save()
except WaybackError as error:
logger.exception(f'Error creating web archive link for bookmark: {bookmark}...', exc_info=error)
raise
_create_snapshot(bookmark)
return
except TooManyRequestsError:
logger.error(
f'Failed to create snapshot due to rate limiting, trying to load newest snapshot as fallback. url={bookmark.url}')
except WaybackError:
logger.error(f'Failed to create snapshot, trying to load newest snapshot as fallback. url={bookmark.url}')
bookmark.web_archive_snapshot_url = archive.archive_url
bookmark.save()
logger.debug(f'Successfully created web archive link for bookmark: {bookmark}...')
# Load the newest snapshot as fallback
_load_newest_snapshot(bookmark)
@background()
def _load_web_archive_snapshot_task(bookmark_id: int):
try:
bookmark = Bookmark.objects.get(id=bookmark_id)
except Bookmark.DoesNotExist:
return
# Skip if snapshot exists
if bookmark.web_archive_snapshot_url:
return
# Load the newest snapshot
_load_newest_snapshot(bookmark)
def schedule_bookmarks_without_snapshots(user: User):
@ -63,4 +102,6 @@ def _schedule_bookmarks_without_snapshots_task(user_id: int):
bookmarks_without_snapshots = Bookmark.objects.filter(web_archive_snapshot_url__exact='', owner=user)
for bookmark in bookmarks_without_snapshots:
_create_web_archive_snapshot_task(bookmark.id, False)
# To prevent rate limit errors from the Wayback API only try to load the latest snapshots instead of creating
# new ones when processing bookmarks in bulk
_load_web_archive_snapshot_task(bookmark.id)

View file

@ -0,0 +1,40 @@
import time
from typing import Dict
import waybackpy
import waybackpy.utils
from waybackpy.exceptions import NoCDXRecordFound
class CustomWaybackMachineCDXServerAPI(waybackpy.WaybackMachineCDXServerAPI):
"""
Customized WaybackMachineCDXServerAPI to work around some issues with retrieving the newest snapshot.
See https://github.com/akamhy/waybackpy/issues/176
"""
def newest(self):
unix_timestamp = int(time.time())
self.closest = waybackpy.utils.unix_timestamp_to_wayback_timestamp(unix_timestamp)
self.sort = 'closest'
self.limit = -5
newest_snapshot = None
for snapshot in self.snapshots():
newest_snapshot = snapshot
break
if not newest_snapshot:
raise NoCDXRecordFound(
"Wayback Machine's CDX server did not return any records "
+ "for the query. The URL may not have any archives "
+ " on the Wayback Machine or the URL may have been recently "
+ "archived and is still not available on the CDX server."
)
return newest_snapshot
def add_payload(self, payload: Dict[str, str]) -> None:
super().add_payload(payload)
# Set fastLatest query param, as we are only using this API to get the latest snapshot and using fastLatest
# makes searching for latest snapshots faster
payload['fastLatest'] = 'true'

View file

@ -2,136 +2,139 @@
{% load widget_tweaks %}
{% block content %}
<div class="settings-page">
<div class="settings-page">
{% include 'settings/nav.html' %}
{% include 'settings/nav.html' %}
{# Profile section #}
<section class="content-area">
<h2>Profile</h2>
<p>
<a href="{% url 'change_password' %}">Change password</a>
</p>
<form action="{% url 'bookmarks:settings.general' %}" method="post" novalidate>
{% csrf_token %}
<div class="form-group">
<label for="{{ form.theme.id_for_label }}" class="form-label">Theme</label>
{{ form.theme|add_class:"form-select col-2 col-sm-12" }}
<div class="form-input-hint">
Whether to use a light or dark theme, or automatically adjust the theme based on your system's settings.
</div>
</div>
<div class="form-group">
<label for="{{ form.bookmark_date_display.id_for_label }}" class="form-label">Bookmark date format</label>
{{ form.bookmark_date_display|add_class:"form-select col-2 col-sm-12" }}
<div class="form-input-hint">
Whether to show bookmark dates as relative (how long ago), or as absolute dates. Alternatively the date can be hidden.
</div>
</div>
<div class="form-group">
<label for="{{ form.bookmark_link_target.id_for_label }}" class="form-label">Open bookmarks in</label>
{{ form.bookmark_link_target|add_class:"form-select col-2 col-sm-12" }}
<div class="form-input-hint">
Whether to open bookmarks a new page or in the same page.
</div>
</div>
<div class="form-group">
<label for="{{ form.web_archive_integration.id_for_label }}" class="form-label">Internet Archive
integration</label>
{{ form.web_archive_integration|add_class:"form-select col-2 col-sm-12" }}
<div class="form-input-hint">
Enabling this feature will automatically create snapshots of bookmarked websites on the <a
href="https://web.archive.org/" target="_blank" rel="noopener">Internet Archive Wayback
Machine</a>. This allows
to preserve, and later access, the website as it was at the point in time it was bookmarked, in
case it goes offline or its content is modified.
</div>
</div>
<div class="form-group">
<label for="{{ form.enable_sharing.id_for_label }}" class="form-checkbox">
{{ form.enable_sharing }}
<i class="form-icon"></i> Enable bookmark sharing
</label>
<div class="form-input-hint">
Allows to share bookmarks with other users, and to view shared bookmarks.
Disabling this feature will hide all previously shared bookmarks from other users.
</div>
</div>
<div class="form-group">
<input type="submit" value="Save" class="btn btn-primary mt-2">
</div>
</form>
</section>
{# Profile section #}
<section class="content-area">
<h2>Profile</h2>
<p>
<a href="{% url 'change_password' %}">Change password</a>
</p>
<form action="{% url 'bookmarks:settings.general' %}" method="post" novalidate>
{% csrf_token %}
<div class="form-group">
<label for="{{ form.theme.id_for_label }}" class="form-label">Theme</label>
{{ form.theme|add_class:"form-select col-2 col-sm-12" }}
<div class="form-input-hint">
Whether to use a light or dark theme, or automatically adjust the theme based on your system's settings.
</div>
</div>
<div class="form-group">
<label for="{{ form.bookmark_date_display.id_for_label }}" class="form-label">Bookmark date format</label>
{{ form.bookmark_date_display|add_class:"form-select col-2 col-sm-12" }}
<div class="form-input-hint">
Whether to show bookmark dates as relative (how long ago), or as absolute dates. Alternatively the date can
be hidden.
</div>
</div>
<div class="form-group">
<label for="{{ form.bookmark_link_target.id_for_label }}" class="form-label">Open bookmarks in</label>
{{ form.bookmark_link_target|add_class:"form-select col-2 col-sm-12" }}
<div class="form-input-hint">
Whether to open bookmarks a new page or in the same page.
</div>
</div>
<div class="form-group">
<label for="{{ form.web_archive_integration.id_for_label }}" class="form-label">Internet Archive
integration</label>
{{ form.web_archive_integration|add_class:"form-select col-2 col-sm-12" }}
<div class="form-input-hint">
Enabling this feature will automatically create snapshots of bookmarked websites on the <a
href="https://web.archive.org/" target="_blank" rel="noopener">Internet Archive Wayback
Machine</a>.
This allows to preserve, and later access the website as it was at the point in time it was bookmarked, in
case it goes offline or its content is modified.
Please consider donating to the <a href="https://archive.org/donate/index.php" target="_blank"
rel="noopener">Internet Archive</a> if you make use of this feature.
</div>
</div>
<div class="form-group">
<label for="{{ form.enable_sharing.id_for_label }}" class="form-checkbox">
{{ form.enable_sharing }}
<i class="form-icon"></i> Enable bookmark sharing
</label>
<div class="form-input-hint">
Allows to share bookmarks with other users, and to view shared bookmarks.
Disabling this feature will hide all previously shared bookmarks from other users.
</div>
</div>
<div class="form-group">
<input type="submit" value="Save" class="btn btn-primary mt-2">
</div>
</form>
</section>
{# Import section #}
<section class="content-area">
<h2>Import</h2>
<p>Import bookmarks and tags in the Netscape HTML format. This will execute a sync where new bookmarks are
added and existing ones are updated.</p>
<form method="post" enctype="multipart/form-data" action="{% url 'bookmarks:settings.import' %}">
{% csrf_token %}
<div class="form-group">
<div class="input-group col-8 col-md-12">
<input class="form-input" type="file" name="import_file">
<input type="submit" class="input-group-btn col-2 btn btn-primary" value="Upload">
</div>
{% if import_success_message %}
<div class="has-success">
<p class="form-input-hint">
{{ import_success_message }}
</p>
</div>
{% endif %}
{% if import_errors_message %}
<div class="has-error">
<p class="form-input-hint">
{{ import_errors_message }}
</p>
</div>
{% endif %}
</div>
</form>
</section>
{# Import section #}
<section class="content-area">
<h2>Import</h2>
<p>Import bookmarks and tags in the Netscape HTML format. This will execute a sync where new bookmarks are
added and existing ones are updated.</p>
<form method="post" enctype="multipart/form-data" action="{% url 'bookmarks:settings.import' %}">
{% csrf_token %}
<div class="form-group">
<div class="input-group col-8 col-md-12">
<input class="form-input" type="file" name="import_file">
<input type="submit" class="input-group-btn col-2 btn btn-primary" value="Upload">
</div>
{% if import_success_message %}
<div class="has-success">
<p class="form-input-hint">
{{ import_success_message }}
</p>
</div>
{% endif %}
{% if import_errors_message %}
<div class="has-error">
<p class="form-input-hint">
{{ import_errors_message }}
</p>
</div>
{% endif %}
</div>
</form>
</section>
{# Export section #}
<section class="content-area">
<h2>Export</h2>
<p>Export all bookmarks in Netscape HTML format.</p>
<a class="btn btn-primary" href="{% url 'bookmarks:settings.export' %}">Download (.html)</a>
{% if export_error %}
<div class="has-error">
<p class="form-input-hint">
{{ export_error }}
</p>
</div>
{% endif %}
</section>
{# Export section #}
<section class="content-area">
<h2>Export</h2>
<p>Export all bookmarks in Netscape HTML format.</p>
<a class="btn btn-primary" href="{% url 'bookmarks:settings.export' %}">Download (.html)</a>
{% if export_error %}
<div class="has-error">
<p class="form-input-hint">
{{ export_error }}
</p>
</div>
{% endif %}
</section>
{# About section #}
<section class="content-area about">
<h2>About</h2>
<table class="table">
<tbody>
<tr>
<td>Version</td>
<td>{{ version_info }}</td>
</tr>
<tr>
<td rowspan="3" style="vertical-align: top">Links</td>
<td><a href="https://github.com/sissbruecker/linkding/"
target="_blank">GitHub</a></td>
</tr>
<tr>
<td><a href="https://github.com/sissbruecker/linkding#documentation"
target="_blank">Documentation</a></td>
</tr>
<tr>
<td><a href="https://github.com/sissbruecker/linkding/blob/master/CHANGELOG.md"
target="_blank">Changelog</a></td>
</tr>
</tbody>
</table>
</section>
</div>
{# About section #}
<section class="content-area about">
<h2>About</h2>
<table class="table">
<tbody>
<tr>
<td>Version</td>
<td>{{ version_info }}</td>
</tr>
<tr>
<td rowspan="3" style="vertical-align: top">Links</td>
<td><a href="https://github.com/sissbruecker/linkding/"
target="_blank">GitHub</a></td>
</tr>
<tr>
<td><a href="https://github.com/sissbruecker/linkding#documentation"
target="_blank">Documentation</a></td>
</tr>
<tr>
<td><a href="https://github.com/sissbruecker/linkding/blob/master/CHANGELOG.md"
target="_blank">Changelog</a></td>
</tr>
</tbody>
</table>
</section>
</div>
{% endblock %}

View file

@ -1,25 +1,51 @@
import datetime
from dataclasses import dataclass
from unittest.mock import patch
import waybackpy
from background_task.models import Task
from django.contrib.auth.models import User
from django.test import TestCase, override_settings
from waybackpy.exceptions import WaybackError
from bookmarks.models import Bookmark, UserProfile
import bookmarks.services.wayback
from bookmarks.models import UserProfile
from bookmarks.services import tasks
from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging
class MockWaybackMachineSaveAPI:
def __init__(self, archive_url: str):
def __init__(self, archive_url: str = 'https://example.com/created_snapshot', fail_on_save: bool = False):
self.archive_url = archive_url
self.fail_on_save = fail_on_save
def save(self):
if self.fail_on_save:
raise WaybackError
return self
class MockWaybackUrlWithSaveError:
def save(self):
raise NotImplementedError
@dataclass
class MockCdxSnapshot:
archive_url: str
datetime_timestamp: datetime.datetime
class MockWaybackMachineCDXServerAPI:
def __init__(self,
archive_url: str = 'https://example.com/newest_snapshot',
has_no_snapshot=False,
fail_loading_snapshot=False):
self.archive_url = archive_url
self.has_no_snapshot = has_no_snapshot
self.fail_loading_snapshot = fail_loading_snapshot
def newest(self):
if self.has_no_snapshot:
return None
if self.fail_loading_snapshot:
raise WaybackError
return MockCdxSnapshot(self.archive_url, datetime.datetime.now())
class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
@ -50,49 +76,130 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
def test_create_web_archive_snapshot_should_update_snapshot_url(self):
bookmark = self.setup_bookmark()
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://example.com')):
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI()):
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
self.run_pending_task(tasks._create_web_archive_snapshot_task)
bookmark.refresh_from_db()
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com/created_snapshot')
def test_create_web_archive_snapshot_should_handle_missing_bookmark_id(self):
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://example.com')) as mock_wayback_url:
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
return_value=MockWaybackMachineSaveAPI()) as mock_save_api:
tasks._create_web_archive_snapshot_task(123, False)
self.run_pending_task(tasks._create_web_archive_snapshot_task)
mock_wayback_url.assert_not_called()
def test_create_web_archive_snapshot_should_handle_wayback_save_error(self):
bookmark = self.setup_bookmark()
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
return_value=MockWaybackUrlWithSaveError()):
with self.assertRaises(NotImplementedError):
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
self.run_pending_task(tasks._create_web_archive_snapshot_task)
mock_save_api.assert_not_called()
def test_create_web_archive_snapshot_should_skip_if_snapshot_exists(self):
bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com')
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://other.com')):
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
return_value=MockWaybackMachineSaveAPI()) as mock_save_api:
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
self.run_pending_task(tasks._create_web_archive_snapshot_task)
bookmark.refresh_from_db()
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
mock_save_api.assert_not_called()
def test_create_web_archive_snapshot_should_force_update_snapshot(self):
bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com')
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://other.com')):
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
return_value=MockWaybackMachineSaveAPI('https://other.com')):
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, True)
self.run_pending_task(tasks._create_web_archive_snapshot_task)
bookmark.refresh_from_db()
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://other.com')
def test_create_web_archive_snapshot_should_use_newest_snapshot_as_fallback(self):
bookmark = self.setup_bookmark()
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
return_value=MockWaybackMachineSaveAPI(fail_on_save=True)):
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
return_value=MockWaybackMachineCDXServerAPI()):
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
self.run_pending_task(tasks._create_web_archive_snapshot_task)
bookmark.refresh_from_db()
self.assertEqual('https://example.com/newest_snapshot', bookmark.web_archive_snapshot_url)
def test_create_web_archive_snapshot_should_ignore_missing_newest_snapshot(self):
bookmark = self.setup_bookmark()
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
return_value=MockWaybackMachineSaveAPI(fail_on_save=True)):
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
return_value=MockWaybackMachineCDXServerAPI(has_no_snapshot=True)):
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
self.run_pending_task(tasks._create_web_archive_snapshot_task)
bookmark.refresh_from_db()
self.assertEqual('', bookmark.web_archive_snapshot_url)
def test_create_web_archive_snapshot_should_ignore_newest_snapshot_errors(self):
bookmark = self.setup_bookmark()
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
return_value=MockWaybackMachineSaveAPI(fail_on_save=True)):
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
return_value=MockWaybackMachineCDXServerAPI(fail_loading_snapshot=True)):
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
self.run_pending_task(tasks._create_web_archive_snapshot_task)
bookmark.refresh_from_db()
self.assertEqual('', bookmark.web_archive_snapshot_url)
def test_load_web_archive_snapshot_should_update_snapshot_url(self):
bookmark = self.setup_bookmark()
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
return_value=MockWaybackMachineCDXServerAPI()):
tasks._load_web_archive_snapshot_task(bookmark.id)
self.run_pending_task(tasks._load_web_archive_snapshot_task)
bookmark.refresh_from_db()
self.assertEqual('https://example.com/newest_snapshot', bookmark.web_archive_snapshot_url)
def test_load_web_archive_snapshot_should_handle_missing_bookmark_id(self):
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
return_value=MockWaybackMachineCDXServerAPI()) as mock_cdx_api:
tasks._load_web_archive_snapshot_task(123)
self.run_pending_task(tasks._load_web_archive_snapshot_task)
mock_cdx_api.assert_not_called()
def test_load_web_archive_snapshot_should_skip_if_snapshot_exists(self):
bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com')
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
return_value=MockWaybackMachineCDXServerAPI()) as mock_cdx_api:
tasks._load_web_archive_snapshot_task(bookmark.id)
self.run_pending_task(tasks._load_web_archive_snapshot_task)
mock_cdx_api.assert_not_called()
def test_load_web_archive_snapshot_should_handle_missing_snapshot(self):
bookmark = self.setup_bookmark()
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
return_value=MockWaybackMachineCDXServerAPI(has_no_snapshot=True)):
tasks._load_web_archive_snapshot_task(bookmark.id)
self.run_pending_task(tasks._load_web_archive_snapshot_task)
self.assertEqual('', bookmark.web_archive_snapshot_url)
def test_load_web_archive_snapshot_should_handle_wayback_errors(self):
bookmark = self.setup_bookmark()
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
return_value=MockWaybackMachineCDXServerAPI(fail_loading_snapshot=True)):
tasks._load_web_archive_snapshot_task(bookmark.id)
self.run_pending_task(tasks._load_web_archive_snapshot_task)
self.assertEqual('', bookmark.web_archive_snapshot_url)
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
def test_create_web_archive_snapshot_should_not_run_when_background_tasks_are_disabled(self):
bookmark = self.setup_bookmark()
@ -109,33 +216,23 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.assertEqual(Task.objects.count(), 0)
def test_schedule_bookmarks_without_snapshots_should_create_snapshot_task_for_all_bookmarks_without_snapshot(self):
def test_schedule_bookmarks_without_snapshots_should_load_snapshot_for_all_bookmarks_without_snapshot(self):
user = self.get_or_create_test_user()
self.setup_bookmark()
self.setup_bookmark()
self.setup_bookmark()
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://example.com')):
tasks.schedule_bookmarks_without_snapshots(user)
self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
self.run_all_pending_tasks(tasks._create_web_archive_snapshot_task)
for bookmark in Bookmark.objects.all():
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
def test_schedule_bookmarks_without_snapshots_should_not_update_bookmarks_with_existing_snapshot(self):
user = self.get_or_create_test_user()
self.setup_bookmark(web_archive_snapshot_url='https://example.com')
self.setup_bookmark(web_archive_snapshot_url='https://example.com')
self.setup_bookmark(web_archive_snapshot_url='https://example.com')
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://other.com')):
tasks.schedule_bookmarks_without_snapshots(user)
self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
self.run_all_pending_tasks(tasks._create_web_archive_snapshot_task)
tasks.schedule_bookmarks_without_snapshots(user)
self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
for bookmark in Bookmark.objects.all():
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
task_list = Task.objects.all()
self.assertEqual(task_list.count(), 3)
for task in task_list:
self.assertEqual(task.task_name, 'bookmarks.services.tasks._load_web_archive_snapshot_task')
def test_schedule_bookmarks_without_snapshots_should_only_update_user_owned_bookmarks(self):
user = self.get_or_create_test_user()
@ -147,16 +244,11 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.setup_bookmark(user=other_user)
self.setup_bookmark(user=other_user)
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://example.com')):
tasks.schedule_bookmarks_without_snapshots(user)
self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
self.run_all_pending_tasks(tasks._create_web_archive_snapshot_task)
tasks.schedule_bookmarks_without_snapshots(user)
self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
for bookmark in Bookmark.objects.all().filter(owner=user):
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
for bookmark in Bookmark.objects.all().filter(owner=other_user):
self.assertEqual(bookmark.web_archive_snapshot_url, '')
task_list = Task.objects.all()
self.assertEqual(task_list.count(), 3)
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
def test_schedule_bookmarks_without_snapshots_should_not_run_when_background_tasks_are_disabled(self):