mirror of
https://github.com/sissbruecker/linkding
synced 2024-11-22 03:13:02 +00:00
Prevent rate limit errors in wayback machine API (#339)
The Wayback Machine Save API only allows a limited number of requests within a timespan. This introduces several changes to avoid rate limit errors: - There will be max. 1 attempt to create a new snapshot - If a new snapshot could not be created, then attempt to use the latest existing snapshot - Bulk snapshot updates (bookmark import, load missing snapshots after login) will only attempt to load the latest snapshot instead of creating new ones
This commit is contained in:
parent
6420ec173a
commit
1b35d5b5ef
4 changed files with 364 additions and 188 deletions
|
@ -5,8 +5,9 @@ from background_task import background
|
|||
from django.conf import settings
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.contrib.auth.models import User
|
||||
from waybackpy.exceptions import WaybackError
|
||||
from waybackpy.exceptions import WaybackError, TooManyRequestsError, NoCDXRecordFound
|
||||
|
||||
import bookmarks.services.wayback
|
||||
from bookmarks.models import Bookmark, UserProfile
|
||||
from bookmarks.services.website_loader import DEFAULT_USER_AGENT
|
||||
|
||||
|
@ -26,6 +27,32 @@ def create_web_archive_snapshot(user: User, bookmark: Bookmark, force_update: bo
|
|||
_create_web_archive_snapshot_task(bookmark.id, force_update)
|
||||
|
||||
|
||||
def _load_newest_snapshot(bookmark: Bookmark):
|
||||
try:
|
||||
logger.debug(f'Load existing snapshot for bookmark. url={bookmark.url}')
|
||||
cdx_api = bookmarks.services.wayback.CustomWaybackMachineCDXServerAPI(bookmark.url)
|
||||
existing_snapshot = cdx_api.newest()
|
||||
|
||||
if existing_snapshot:
|
||||
bookmark.web_archive_snapshot_url = existing_snapshot.archive_url
|
||||
bookmark.save()
|
||||
logger.debug(f'Using newest snapshot. url={bookmark.url} from={existing_snapshot.datetime_timestamp}')
|
||||
|
||||
except NoCDXRecordFound:
|
||||
logger.error(f'Could not find any snapshots for bookmark. url={bookmark.url}')
|
||||
except WaybackError as error:
|
||||
logger.error(f'Failed to load existing snapshot. url={bookmark.url}', exc_info=error)
|
||||
|
||||
|
||||
def _create_snapshot(bookmark: Bookmark):
|
||||
logger.debug(f'Create new snapshot for bookmark. url={bookmark.url}...')
|
||||
archive = waybackpy.WaybackMachineSaveAPI(bookmark.url, DEFAULT_USER_AGENT, max_tries=1)
|
||||
archive.save()
|
||||
bookmark.web_archive_snapshot_url = archive.archive_url
|
||||
bookmark.save()
|
||||
logger.debug(f'Successfully created new snapshot for bookmark:. url={bookmark.url}')
|
||||
|
||||
|
||||
@background()
|
||||
def _create_web_archive_snapshot_task(bookmark_id: int, force_update: bool):
|
||||
try:
|
||||
|
@ -37,19 +64,31 @@ def _create_web_archive_snapshot_task(bookmark_id: int, force_update: bool):
|
|||
if bookmark.web_archive_snapshot_url and not force_update:
|
||||
return
|
||||
|
||||
logger.debug(f'Create web archive link for bookmark: {bookmark}...')
|
||||
|
||||
archive = waybackpy.WaybackMachineSaveAPI(bookmark.url, DEFAULT_USER_AGENT)
|
||||
|
||||
# Create new snapshot
|
||||
try:
|
||||
archive.save()
|
||||
except WaybackError as error:
|
||||
logger.exception(f'Error creating web archive link for bookmark: {bookmark}...', exc_info=error)
|
||||
raise
|
||||
_create_snapshot(bookmark)
|
||||
return
|
||||
except TooManyRequestsError:
|
||||
logger.error(
|
||||
f'Failed to create snapshot due to rate limiting, trying to load newest snapshot as fallback. url={bookmark.url}')
|
||||
except WaybackError:
|
||||
logger.error(f'Failed to create snapshot, trying to load newest snapshot as fallback. url={bookmark.url}')
|
||||
|
||||
bookmark.web_archive_snapshot_url = archive.archive_url
|
||||
bookmark.save()
|
||||
logger.debug(f'Successfully created web archive link for bookmark: {bookmark}...')
|
||||
# Load the newest snapshot as fallback
|
||||
_load_newest_snapshot(bookmark)
|
||||
|
||||
|
||||
@background()
|
||||
def _load_web_archive_snapshot_task(bookmark_id: int):
|
||||
try:
|
||||
bookmark = Bookmark.objects.get(id=bookmark_id)
|
||||
except Bookmark.DoesNotExist:
|
||||
return
|
||||
# Skip if snapshot exists
|
||||
if bookmark.web_archive_snapshot_url:
|
||||
return
|
||||
# Load the newest snapshot
|
||||
_load_newest_snapshot(bookmark)
|
||||
|
||||
|
||||
def schedule_bookmarks_without_snapshots(user: User):
|
||||
|
@ -63,4 +102,6 @@ def _schedule_bookmarks_without_snapshots_task(user_id: int):
|
|||
bookmarks_without_snapshots = Bookmark.objects.filter(web_archive_snapshot_url__exact='', owner=user)
|
||||
|
||||
for bookmark in bookmarks_without_snapshots:
|
||||
_create_web_archive_snapshot_task(bookmark.id, False)
|
||||
# To prevent rate limit errors from the Wayback API only try to load the latest snapshots instead of creating
|
||||
# new ones when processing bookmarks in bulk
|
||||
_load_web_archive_snapshot_task(bookmark.id)
|
||||
|
|
40
bookmarks/services/wayback.py
Normal file
40
bookmarks/services/wayback.py
Normal file
|
@ -0,0 +1,40 @@
|
|||
import time
|
||||
from typing import Dict
|
||||
|
||||
import waybackpy
|
||||
import waybackpy.utils
|
||||
from waybackpy.exceptions import NoCDXRecordFound
|
||||
|
||||
|
||||
class CustomWaybackMachineCDXServerAPI(waybackpy.WaybackMachineCDXServerAPI):
|
||||
"""
|
||||
Customized WaybackMachineCDXServerAPI to work around some issues with retrieving the newest snapshot.
|
||||
See https://github.com/akamhy/waybackpy/issues/176
|
||||
"""
|
||||
|
||||
def newest(self):
|
||||
unix_timestamp = int(time.time())
|
||||
self.closest = waybackpy.utils.unix_timestamp_to_wayback_timestamp(unix_timestamp)
|
||||
self.sort = 'closest'
|
||||
self.limit = -5
|
||||
|
||||
newest_snapshot = None
|
||||
for snapshot in self.snapshots():
|
||||
newest_snapshot = snapshot
|
||||
break
|
||||
|
||||
if not newest_snapshot:
|
||||
raise NoCDXRecordFound(
|
||||
"Wayback Machine's CDX server did not return any records "
|
||||
+ "for the query. The URL may not have any archives "
|
||||
+ " on the Wayback Machine or the URL may have been recently "
|
||||
+ "archived and is still not available on the CDX server."
|
||||
)
|
||||
|
||||
return newest_snapshot
|
||||
|
||||
def add_payload(self, payload: Dict[str, str]) -> None:
|
||||
super().add_payload(payload)
|
||||
# Set fastLatest query param, as we are only using this API to get the latest snapshot and using fastLatest
|
||||
# makes searching for latest snapshots faster
|
||||
payload['fastLatest'] = 'true'
|
|
@ -2,136 +2,139 @@
|
|||
{% load widget_tweaks %}
|
||||
|
||||
{% block content %}
|
||||
<div class="settings-page">
|
||||
<div class="settings-page">
|
||||
|
||||
{% include 'settings/nav.html' %}
|
||||
{% include 'settings/nav.html' %}
|
||||
|
||||
{# Profile section #}
|
||||
<section class="content-area">
|
||||
<h2>Profile</h2>
|
||||
<p>
|
||||
<a href="{% url 'change_password' %}">Change password</a>
|
||||
</p>
|
||||
<form action="{% url 'bookmarks:settings.general' %}" method="post" novalidate>
|
||||
{% csrf_token %}
|
||||
<div class="form-group">
|
||||
<label for="{{ form.theme.id_for_label }}" class="form-label">Theme</label>
|
||||
{{ form.theme|add_class:"form-select col-2 col-sm-12" }}
|
||||
<div class="form-input-hint">
|
||||
Whether to use a light or dark theme, or automatically adjust the theme based on your system's settings.
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="{{ form.bookmark_date_display.id_for_label }}" class="form-label">Bookmark date format</label>
|
||||
{{ form.bookmark_date_display|add_class:"form-select col-2 col-sm-12" }}
|
||||
<div class="form-input-hint">
|
||||
Whether to show bookmark dates as relative (how long ago), or as absolute dates. Alternatively the date can be hidden.
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="{{ form.bookmark_link_target.id_for_label }}" class="form-label">Open bookmarks in</label>
|
||||
{{ form.bookmark_link_target|add_class:"form-select col-2 col-sm-12" }}
|
||||
<div class="form-input-hint">
|
||||
Whether to open bookmarks a new page or in the same page.
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="{{ form.web_archive_integration.id_for_label }}" class="form-label">Internet Archive
|
||||
integration</label>
|
||||
{{ form.web_archive_integration|add_class:"form-select col-2 col-sm-12" }}
|
||||
<div class="form-input-hint">
|
||||
Enabling this feature will automatically create snapshots of bookmarked websites on the <a
|
||||
href="https://web.archive.org/" target="_blank" rel="noopener">Internet Archive Wayback
|
||||
Machine</a>. This allows
|
||||
to preserve, and later access, the website as it was at the point in time it was bookmarked, in
|
||||
case it goes offline or its content is modified.
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="{{ form.enable_sharing.id_for_label }}" class="form-checkbox">
|
||||
{{ form.enable_sharing }}
|
||||
<i class="form-icon"></i> Enable bookmark sharing
|
||||
</label>
|
||||
<div class="form-input-hint">
|
||||
Allows to share bookmarks with other users, and to view shared bookmarks.
|
||||
Disabling this feature will hide all previously shared bookmarks from other users.
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<input type="submit" value="Save" class="btn btn-primary mt-2">
|
||||
</div>
|
||||
</form>
|
||||
</section>
|
||||
{# Profile section #}
|
||||
<section class="content-area">
|
||||
<h2>Profile</h2>
|
||||
<p>
|
||||
<a href="{% url 'change_password' %}">Change password</a>
|
||||
</p>
|
||||
<form action="{% url 'bookmarks:settings.general' %}" method="post" novalidate>
|
||||
{% csrf_token %}
|
||||
<div class="form-group">
|
||||
<label for="{{ form.theme.id_for_label }}" class="form-label">Theme</label>
|
||||
{{ form.theme|add_class:"form-select col-2 col-sm-12" }}
|
||||
<div class="form-input-hint">
|
||||
Whether to use a light or dark theme, or automatically adjust the theme based on your system's settings.
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="{{ form.bookmark_date_display.id_for_label }}" class="form-label">Bookmark date format</label>
|
||||
{{ form.bookmark_date_display|add_class:"form-select col-2 col-sm-12" }}
|
||||
<div class="form-input-hint">
|
||||
Whether to show bookmark dates as relative (how long ago), or as absolute dates. Alternatively the date can
|
||||
be hidden.
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="{{ form.bookmark_link_target.id_for_label }}" class="form-label">Open bookmarks in</label>
|
||||
{{ form.bookmark_link_target|add_class:"form-select col-2 col-sm-12" }}
|
||||
<div class="form-input-hint">
|
||||
Whether to open bookmarks a new page or in the same page.
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="{{ form.web_archive_integration.id_for_label }}" class="form-label">Internet Archive
|
||||
integration</label>
|
||||
{{ form.web_archive_integration|add_class:"form-select col-2 col-sm-12" }}
|
||||
<div class="form-input-hint">
|
||||
Enabling this feature will automatically create snapshots of bookmarked websites on the <a
|
||||
href="https://web.archive.org/" target="_blank" rel="noopener">Internet Archive Wayback
|
||||
Machine</a>.
|
||||
This allows to preserve, and later access the website as it was at the point in time it was bookmarked, in
|
||||
case it goes offline or its content is modified.
|
||||
Please consider donating to the <a href="https://archive.org/donate/index.php" target="_blank"
|
||||
rel="noopener">Internet Archive</a> if you make use of this feature.
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="{{ form.enable_sharing.id_for_label }}" class="form-checkbox">
|
||||
{{ form.enable_sharing }}
|
||||
<i class="form-icon"></i> Enable bookmark sharing
|
||||
</label>
|
||||
<div class="form-input-hint">
|
||||
Allows to share bookmarks with other users, and to view shared bookmarks.
|
||||
Disabling this feature will hide all previously shared bookmarks from other users.
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<input type="submit" value="Save" class="btn btn-primary mt-2">
|
||||
</div>
|
||||
</form>
|
||||
</section>
|
||||
|
||||
{# Import section #}
|
||||
<section class="content-area">
|
||||
<h2>Import</h2>
|
||||
<p>Import bookmarks and tags in the Netscape HTML format. This will execute a sync where new bookmarks are
|
||||
added and existing ones are updated.</p>
|
||||
<form method="post" enctype="multipart/form-data" action="{% url 'bookmarks:settings.import' %}">
|
||||
{% csrf_token %}
|
||||
<div class="form-group">
|
||||
<div class="input-group col-8 col-md-12">
|
||||
<input class="form-input" type="file" name="import_file">
|
||||
<input type="submit" class="input-group-btn col-2 btn btn-primary" value="Upload">
|
||||
</div>
|
||||
{% if import_success_message %}
|
||||
<div class="has-success">
|
||||
<p class="form-input-hint">
|
||||
{{ import_success_message }}
|
||||
</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if import_errors_message %}
|
||||
<div class="has-error">
|
||||
<p class="form-input-hint">
|
||||
{{ import_errors_message }}
|
||||
</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</form>
|
||||
</section>
|
||||
{# Import section #}
|
||||
<section class="content-area">
|
||||
<h2>Import</h2>
|
||||
<p>Import bookmarks and tags in the Netscape HTML format. This will execute a sync where new bookmarks are
|
||||
added and existing ones are updated.</p>
|
||||
<form method="post" enctype="multipart/form-data" action="{% url 'bookmarks:settings.import' %}">
|
||||
{% csrf_token %}
|
||||
<div class="form-group">
|
||||
<div class="input-group col-8 col-md-12">
|
||||
<input class="form-input" type="file" name="import_file">
|
||||
<input type="submit" class="input-group-btn col-2 btn btn-primary" value="Upload">
|
||||
</div>
|
||||
{% if import_success_message %}
|
||||
<div class="has-success">
|
||||
<p class="form-input-hint">
|
||||
{{ import_success_message }}
|
||||
</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if import_errors_message %}
|
||||
<div class="has-error">
|
||||
<p class="form-input-hint">
|
||||
{{ import_errors_message }}
|
||||
</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</form>
|
||||
</section>
|
||||
|
||||
{# Export section #}
|
||||
<section class="content-area">
|
||||
<h2>Export</h2>
|
||||
<p>Export all bookmarks in Netscape HTML format.</p>
|
||||
<a class="btn btn-primary" href="{% url 'bookmarks:settings.export' %}">Download (.html)</a>
|
||||
{% if export_error %}
|
||||
<div class="has-error">
|
||||
<p class="form-input-hint">
|
||||
{{ export_error }}
|
||||
</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
</section>
|
||||
{# Export section #}
|
||||
<section class="content-area">
|
||||
<h2>Export</h2>
|
||||
<p>Export all bookmarks in Netscape HTML format.</p>
|
||||
<a class="btn btn-primary" href="{% url 'bookmarks:settings.export' %}">Download (.html)</a>
|
||||
{% if export_error %}
|
||||
<div class="has-error">
|
||||
<p class="form-input-hint">
|
||||
{{ export_error }}
|
||||
</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
</section>
|
||||
|
||||
{# About section #}
|
||||
<section class="content-area about">
|
||||
<h2>About</h2>
|
||||
<table class="table">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>Version</td>
|
||||
<td>{{ version_info }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td rowspan="3" style="vertical-align: top">Links</td>
|
||||
<td><a href="https://github.com/sissbruecker/linkding/"
|
||||
target="_blank">GitHub</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://github.com/sissbruecker/linkding#documentation"
|
||||
target="_blank">Documentation</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://github.com/sissbruecker/linkding/blob/master/CHANGELOG.md"
|
||||
target="_blank">Changelog</a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</section>
|
||||
</div>
|
||||
{# About section #}
|
||||
<section class="content-area about">
|
||||
<h2>About</h2>
|
||||
<table class="table">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>Version</td>
|
||||
<td>{{ version_info }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td rowspan="3" style="vertical-align: top">Links</td>
|
||||
<td><a href="https://github.com/sissbruecker/linkding/"
|
||||
target="_blank">GitHub</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://github.com/sissbruecker/linkding#documentation"
|
||||
target="_blank">Documentation</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://github.com/sissbruecker/linkding/blob/master/CHANGELOG.md"
|
||||
target="_blank">Changelog</a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</section>
|
||||
</div>
|
||||
|
||||
{% endblock %}
|
||||
|
|
|
@ -1,25 +1,51 @@
|
|||
import datetime
|
||||
from dataclasses import dataclass
|
||||
from unittest.mock import patch
|
||||
|
||||
import waybackpy
|
||||
from background_task.models import Task
|
||||
from django.contrib.auth.models import User
|
||||
from django.test import TestCase, override_settings
|
||||
from waybackpy.exceptions import WaybackError
|
||||
|
||||
from bookmarks.models import Bookmark, UserProfile
|
||||
import bookmarks.services.wayback
|
||||
from bookmarks.models import UserProfile
|
||||
from bookmarks.services import tasks
|
||||
from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging
|
||||
|
||||
|
||||
class MockWaybackMachineSaveAPI:
|
||||
def __init__(self, archive_url: str):
|
||||
def __init__(self, archive_url: str = 'https://example.com/created_snapshot', fail_on_save: bool = False):
|
||||
self.archive_url = archive_url
|
||||
self.fail_on_save = fail_on_save
|
||||
|
||||
def save(self):
|
||||
if self.fail_on_save:
|
||||
raise WaybackError
|
||||
return self
|
||||
|
||||
class MockWaybackUrlWithSaveError:
|
||||
def save(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@dataclass
|
||||
class MockCdxSnapshot:
|
||||
archive_url: str
|
||||
datetime_timestamp: datetime.datetime
|
||||
|
||||
|
||||
class MockWaybackMachineCDXServerAPI:
|
||||
def __init__(self,
|
||||
archive_url: str = 'https://example.com/newest_snapshot',
|
||||
has_no_snapshot=False,
|
||||
fail_loading_snapshot=False):
|
||||
self.archive_url = archive_url
|
||||
self.has_no_snapshot = has_no_snapshot
|
||||
self.fail_loading_snapshot = fail_loading_snapshot
|
||||
|
||||
def newest(self):
|
||||
if self.has_no_snapshot:
|
||||
return None
|
||||
if self.fail_loading_snapshot:
|
||||
raise WaybackError
|
||||
return MockCdxSnapshot(self.archive_url, datetime.datetime.now())
|
||||
|
||||
|
||||
class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
|
||||
|
@ -50,49 +76,130 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
|
|||
def test_create_web_archive_snapshot_should_update_snapshot_url(self):
|
||||
bookmark = self.setup_bookmark()
|
||||
|
||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://example.com')):
|
||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI()):
|
||||
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
|
||||
self.run_pending_task(tasks._create_web_archive_snapshot_task)
|
||||
bookmark.refresh_from_db()
|
||||
|
||||
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
|
||||
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com/created_snapshot')
|
||||
|
||||
def test_create_web_archive_snapshot_should_handle_missing_bookmark_id(self):
|
||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://example.com')) as mock_wayback_url:
|
||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
|
||||
return_value=MockWaybackMachineSaveAPI()) as mock_save_api:
|
||||
tasks._create_web_archive_snapshot_task(123, False)
|
||||
self.run_pending_task(tasks._create_web_archive_snapshot_task)
|
||||
|
||||
mock_wayback_url.assert_not_called()
|
||||
|
||||
def test_create_web_archive_snapshot_should_handle_wayback_save_error(self):
|
||||
bookmark = self.setup_bookmark()
|
||||
|
||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
|
||||
return_value=MockWaybackUrlWithSaveError()):
|
||||
with self.assertRaises(NotImplementedError):
|
||||
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
|
||||
self.run_pending_task(tasks._create_web_archive_snapshot_task)
|
||||
mock_save_api.assert_not_called()
|
||||
|
||||
def test_create_web_archive_snapshot_should_skip_if_snapshot_exists(self):
|
||||
bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com')
|
||||
|
||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://other.com')):
|
||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
|
||||
return_value=MockWaybackMachineSaveAPI()) as mock_save_api:
|
||||
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
|
||||
self.run_pending_task(tasks._create_web_archive_snapshot_task)
|
||||
bookmark.refresh_from_db()
|
||||
|
||||
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
|
||||
mock_save_api.assert_not_called()
|
||||
|
||||
def test_create_web_archive_snapshot_should_force_update_snapshot(self):
|
||||
bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com')
|
||||
|
||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://other.com')):
|
||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
|
||||
return_value=MockWaybackMachineSaveAPI('https://other.com')):
|
||||
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, True)
|
||||
self.run_pending_task(tasks._create_web_archive_snapshot_task)
|
||||
bookmark.refresh_from_db()
|
||||
|
||||
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://other.com')
|
||||
|
||||
def test_create_web_archive_snapshot_should_use_newest_snapshot_as_fallback(self):
|
||||
bookmark = self.setup_bookmark()
|
||||
|
||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
|
||||
return_value=MockWaybackMachineSaveAPI(fail_on_save=True)):
|
||||
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
|
||||
return_value=MockWaybackMachineCDXServerAPI()):
|
||||
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
|
||||
self.run_pending_task(tasks._create_web_archive_snapshot_task)
|
||||
|
||||
bookmark.refresh_from_db()
|
||||
self.assertEqual('https://example.com/newest_snapshot', bookmark.web_archive_snapshot_url)
|
||||
|
||||
def test_create_web_archive_snapshot_should_ignore_missing_newest_snapshot(self):
|
||||
bookmark = self.setup_bookmark()
|
||||
|
||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
|
||||
return_value=MockWaybackMachineSaveAPI(fail_on_save=True)):
|
||||
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
|
||||
return_value=MockWaybackMachineCDXServerAPI(has_no_snapshot=True)):
|
||||
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
|
||||
self.run_pending_task(tasks._create_web_archive_snapshot_task)
|
||||
|
||||
bookmark.refresh_from_db()
|
||||
self.assertEqual('', bookmark.web_archive_snapshot_url)
|
||||
|
||||
def test_create_web_archive_snapshot_should_ignore_newest_snapshot_errors(self):
|
||||
bookmark = self.setup_bookmark()
|
||||
|
||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI',
|
||||
return_value=MockWaybackMachineSaveAPI(fail_on_save=True)):
|
||||
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
|
||||
return_value=MockWaybackMachineCDXServerAPI(fail_loading_snapshot=True)):
|
||||
tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False)
|
||||
self.run_pending_task(tasks._create_web_archive_snapshot_task)
|
||||
|
||||
bookmark.refresh_from_db()
|
||||
self.assertEqual('', bookmark.web_archive_snapshot_url)
|
||||
|
||||
def test_load_web_archive_snapshot_should_update_snapshot_url(self):
|
||||
bookmark = self.setup_bookmark()
|
||||
|
||||
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
|
||||
return_value=MockWaybackMachineCDXServerAPI()):
|
||||
tasks._load_web_archive_snapshot_task(bookmark.id)
|
||||
self.run_pending_task(tasks._load_web_archive_snapshot_task)
|
||||
|
||||
bookmark.refresh_from_db()
|
||||
self.assertEqual('https://example.com/newest_snapshot', bookmark.web_archive_snapshot_url)
|
||||
|
||||
def test_load_web_archive_snapshot_should_handle_missing_bookmark_id(self):
|
||||
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
|
||||
return_value=MockWaybackMachineCDXServerAPI()) as mock_cdx_api:
|
||||
tasks._load_web_archive_snapshot_task(123)
|
||||
self.run_pending_task(tasks._load_web_archive_snapshot_task)
|
||||
|
||||
mock_cdx_api.assert_not_called()
|
||||
|
||||
def test_load_web_archive_snapshot_should_skip_if_snapshot_exists(self):
|
||||
bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com')
|
||||
|
||||
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
|
||||
return_value=MockWaybackMachineCDXServerAPI()) as mock_cdx_api:
|
||||
tasks._load_web_archive_snapshot_task(bookmark.id)
|
||||
self.run_pending_task(tasks._load_web_archive_snapshot_task)
|
||||
|
||||
mock_cdx_api.assert_not_called()
|
||||
|
||||
def test_load_web_archive_snapshot_should_handle_missing_snapshot(self):
|
||||
bookmark = self.setup_bookmark()
|
||||
|
||||
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
|
||||
return_value=MockWaybackMachineCDXServerAPI(has_no_snapshot=True)):
|
||||
tasks._load_web_archive_snapshot_task(bookmark.id)
|
||||
self.run_pending_task(tasks._load_web_archive_snapshot_task)
|
||||
|
||||
self.assertEqual('', bookmark.web_archive_snapshot_url)
|
||||
|
||||
def test_load_web_archive_snapshot_should_handle_wayback_errors(self):
|
||||
bookmark = self.setup_bookmark()
|
||||
|
||||
with patch.object(bookmarks.services.wayback, 'CustomWaybackMachineCDXServerAPI',
|
||||
return_value=MockWaybackMachineCDXServerAPI(fail_loading_snapshot=True)):
|
||||
tasks._load_web_archive_snapshot_task(bookmark.id)
|
||||
self.run_pending_task(tasks._load_web_archive_snapshot_task)
|
||||
|
||||
self.assertEqual('', bookmark.web_archive_snapshot_url)
|
||||
|
||||
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
|
||||
def test_create_web_archive_snapshot_should_not_run_when_background_tasks_are_disabled(self):
|
||||
bookmark = self.setup_bookmark()
|
||||
|
@ -109,33 +216,23 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
|
|||
|
||||
self.assertEqual(Task.objects.count(), 0)
|
||||
|
||||
def test_schedule_bookmarks_without_snapshots_should_create_snapshot_task_for_all_bookmarks_without_snapshot(self):
|
||||
def test_schedule_bookmarks_without_snapshots_should_load_snapshot_for_all_bookmarks_without_snapshot(self):
|
||||
user = self.get_or_create_test_user()
|
||||
self.setup_bookmark()
|
||||
self.setup_bookmark()
|
||||
self.setup_bookmark()
|
||||
|
||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://example.com')):
|
||||
tasks.schedule_bookmarks_without_snapshots(user)
|
||||
self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
|
||||
self.run_all_pending_tasks(tasks._create_web_archive_snapshot_task)
|
||||
|
||||
for bookmark in Bookmark.objects.all():
|
||||
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
|
||||
|
||||
def test_schedule_bookmarks_without_snapshots_should_not_update_bookmarks_with_existing_snapshot(self):
|
||||
user = self.get_or_create_test_user()
|
||||
self.setup_bookmark(web_archive_snapshot_url='https://example.com')
|
||||
self.setup_bookmark(web_archive_snapshot_url='https://example.com')
|
||||
self.setup_bookmark(web_archive_snapshot_url='https://example.com')
|
||||
|
||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://other.com')):
|
||||
tasks.schedule_bookmarks_without_snapshots(user)
|
||||
self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
|
||||
self.run_all_pending_tasks(tasks._create_web_archive_snapshot_task)
|
||||
tasks.schedule_bookmarks_without_snapshots(user)
|
||||
self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
|
||||
|
||||
for bookmark in Bookmark.objects.all():
|
||||
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
|
||||
task_list = Task.objects.all()
|
||||
self.assertEqual(task_list.count(), 3)
|
||||
|
||||
for task in task_list:
|
||||
self.assertEqual(task.task_name, 'bookmarks.services.tasks._load_web_archive_snapshot_task')
|
||||
|
||||
def test_schedule_bookmarks_without_snapshots_should_only_update_user_owned_bookmarks(self):
|
||||
user = self.get_or_create_test_user()
|
||||
|
@ -147,16 +244,11 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
|
|||
self.setup_bookmark(user=other_user)
|
||||
self.setup_bookmark(user=other_user)
|
||||
|
||||
with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://example.com')):
|
||||
tasks.schedule_bookmarks_without_snapshots(user)
|
||||
self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
|
||||
self.run_all_pending_tasks(tasks._create_web_archive_snapshot_task)
|
||||
tasks.schedule_bookmarks_without_snapshots(user)
|
||||
self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task)
|
||||
|
||||
for bookmark in Bookmark.objects.all().filter(owner=user):
|
||||
self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com')
|
||||
|
||||
for bookmark in Bookmark.objects.all().filter(owner=other_user):
|
||||
self.assertEqual(bookmark.web_archive_snapshot_url, '')
|
||||
task_list = Task.objects.all()
|
||||
self.assertEqual(task_list.count(), 3)
|
||||
|
||||
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
|
||||
def test_schedule_bookmarks_without_snapshots_should_not_run_when_background_tasks_are_disabled(self):
|
||||
|
|
Loading…
Reference in a new issue