mirror of
https://github.com/sissbruecker/linkding
synced 2024-11-13 23:27:16 +00:00
Generate fallback URLs for web archive links (#804)
* generate fallback web archive URL if none exists * remove fallback web archive snapshot creation * fix test
This commit is contained in:
parent
36a84276a2
commit
749bc1ef63
10 changed files with 53 additions and 336 deletions
|
@ -79,8 +79,6 @@ def import_netscape_html(
|
||||||
for batch in batches:
|
for batch in batches:
|
||||||
_import_batch(batch, user, options, tag_cache, result)
|
_import_batch(batch, user, options, tag_cache, result)
|
||||||
|
|
||||||
# Create snapshots for newly imported bookmarks
|
|
||||||
tasks.schedule_bookmarks_without_snapshots(user)
|
|
||||||
# Load favicons for newly imported bookmarks
|
# Load favicons for newly imported bookmarks
|
||||||
tasks.schedule_bookmarks_without_favicons(user)
|
tasks.schedule_bookmarks_without_favicons(user)
|
||||||
# Load previews for newly imported bookmarks
|
# Load previews for newly imported bookmarks
|
||||||
|
|
|
@ -12,9 +12,8 @@ from django.utils import timezone, formats
|
||||||
from huey import crontab
|
from huey import crontab
|
||||||
from huey.contrib.djhuey import HUEY as huey
|
from huey.contrib.djhuey import HUEY as huey
|
||||||
from huey.exceptions import TaskLockedException
|
from huey.exceptions import TaskLockedException
|
||||||
from waybackpy.exceptions import WaybackError, TooManyRequestsError, NoCDXRecordFound
|
from waybackpy.exceptions import WaybackError, TooManyRequestsError
|
||||||
|
|
||||||
import bookmarks.services.wayback
|
|
||||||
from bookmarks.models import Bookmark, BookmarkAsset, UserProfile
|
from bookmarks.models import Bookmark, BookmarkAsset, UserProfile
|
||||||
from bookmarks.services import favicon_loader, singlefile, preview_image_loader
|
from bookmarks.services import favicon_loader, singlefile, preview_image_loader
|
||||||
from bookmarks.services.website_loader import DEFAULT_USER_AGENT
|
from bookmarks.services.website_loader import DEFAULT_USER_AGENT
|
||||||
|
@ -66,29 +65,6 @@ def create_web_archive_snapshot(user: User, bookmark: Bookmark, force_update: bo
|
||||||
_create_web_archive_snapshot_task(bookmark.id, force_update)
|
_create_web_archive_snapshot_task(bookmark.id, force_update)
|
||||||
|
|
||||||
|
|
||||||
def _load_newest_snapshot(bookmark: Bookmark):
|
|
||||||
try:
|
|
||||||
logger.info(f"Load existing snapshot for bookmark. url={bookmark.url}")
|
|
||||||
cdx_api = bookmarks.services.wayback.CustomWaybackMachineCDXServerAPI(
|
|
||||||
bookmark.url
|
|
||||||
)
|
|
||||||
existing_snapshot = cdx_api.newest()
|
|
||||||
|
|
||||||
if existing_snapshot:
|
|
||||||
bookmark.web_archive_snapshot_url = existing_snapshot.archive_url
|
|
||||||
bookmark.save(update_fields=["web_archive_snapshot_url"])
|
|
||||||
logger.info(
|
|
||||||
f"Using newest snapshot. url={bookmark.url} from={existing_snapshot.datetime_timestamp}"
|
|
||||||
)
|
|
||||||
|
|
||||||
except NoCDXRecordFound:
|
|
||||||
logger.info(f"Could not find any snapshots for bookmark. url={bookmark.url}")
|
|
||||||
except WaybackError as error:
|
|
||||||
logger.error(
|
|
||||||
f"Failed to load existing snapshot. url={bookmark.url}", exc_info=error
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _create_snapshot(bookmark: Bookmark):
|
def _create_snapshot(bookmark: Bookmark):
|
||||||
logger.info(f"Create new snapshot for bookmark. url={bookmark.url}...")
|
logger.info(f"Create new snapshot for bookmark. url={bookmark.url}...")
|
||||||
archive = waybackpy.WaybackMachineSaveAPI(
|
archive = waybackpy.WaybackMachineSaveAPI(
|
||||||
|
@ -117,48 +93,27 @@ def _create_web_archive_snapshot_task(bookmark_id: int, force_update: bool):
|
||||||
return
|
return
|
||||||
except TooManyRequestsError:
|
except TooManyRequestsError:
|
||||||
logger.error(
|
logger.error(
|
||||||
f"Failed to create snapshot due to rate limiting, trying to load newest snapshot as fallback. url={bookmark.url}"
|
f"Failed to create snapshot due to rate limiting. url={bookmark.url}"
|
||||||
)
|
)
|
||||||
except WaybackError as error:
|
except WaybackError as error:
|
||||||
logger.error(
|
logger.error(
|
||||||
f"Failed to create snapshot, trying to load newest snapshot as fallback. url={bookmark.url}",
|
f"Failed to create snapshot. url={bookmark.url}",
|
||||||
exc_info=error,
|
exc_info=error,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Load the newest snapshot as fallback
|
|
||||||
_load_newest_snapshot(bookmark)
|
|
||||||
|
|
||||||
|
|
||||||
@task()
|
@task()
|
||||||
def _load_web_archive_snapshot_task(bookmark_id: int):
|
def _load_web_archive_snapshot_task(bookmark_id: int):
|
||||||
try:
|
# Loading snapshots from CDX API has been removed, keeping the task function
|
||||||
bookmark = Bookmark.objects.get(id=bookmark_id)
|
# for now to prevent errors when huey tries to run the task
|
||||||
except Bookmark.DoesNotExist:
|
pass
|
||||||
return
|
|
||||||
# Skip if snapshot exists
|
|
||||||
if bookmark.web_archive_snapshot_url:
|
|
||||||
return
|
|
||||||
# Load the newest snapshot
|
|
||||||
_load_newest_snapshot(bookmark)
|
|
||||||
|
|
||||||
|
|
||||||
def schedule_bookmarks_without_snapshots(user: User):
|
|
||||||
if is_web_archive_integration_active(user):
|
|
||||||
_schedule_bookmarks_without_snapshots_task(user.id)
|
|
||||||
|
|
||||||
|
|
||||||
@task()
|
@task()
|
||||||
def _schedule_bookmarks_without_snapshots_task(user_id: int):
|
def _schedule_bookmarks_without_snapshots_task(user_id: int):
|
||||||
user = get_user_model().objects.get(id=user_id)
|
# Loading snapshots from CDX API has been removed, keeping the task function
|
||||||
bookmarks_without_snapshots = Bookmark.objects.filter(
|
# for now to prevent errors when huey tries to run the task
|
||||||
web_archive_snapshot_url__exact="", owner=user
|
pass
|
||||||
)
|
|
||||||
|
|
||||||
# TODO: Implement bulk task creation
|
|
||||||
for bookmark in bookmarks_without_snapshots:
|
|
||||||
# To prevent rate limit errors from the Wayback API only try to load the latest snapshots instead of creating
|
|
||||||
# new ones when processing bookmarks in bulk
|
|
||||||
_load_web_archive_snapshot_task(bookmark.id)
|
|
||||||
|
|
||||||
|
|
||||||
def is_favicon_feature_active(user: User) -> bool:
|
def is_favicon_feature_active(user: User) -> bool:
|
||||||
|
|
|
@ -1,42 +1,20 @@
|
||||||
import time
|
import datetime
|
||||||
from typing import Dict
|
|
||||||
|
|
||||||
import waybackpy
|
from django.utils import timezone
|
||||||
import waybackpy.utils
|
|
||||||
from waybackpy.exceptions import NoCDXRecordFound
|
|
||||||
|
|
||||||
|
|
||||||
class CustomWaybackMachineCDXServerAPI(waybackpy.WaybackMachineCDXServerAPI):
|
def generate_fallback_webarchive_url(
|
||||||
|
url: str, timestamp: datetime.datetime
|
||||||
|
) -> str | None:
|
||||||
"""
|
"""
|
||||||
Customized WaybackMachineCDXServerAPI to work around some issues with retrieving the newest snapshot.
|
Generate a URL to the web archive for the given URL and timestamp.
|
||||||
See https://github.com/akamhy/waybackpy/issues/176
|
A snapshot for the specific timestamp might not exist, in which case the
|
||||||
|
web archive will show the closest snapshot to the given timestamp.
|
||||||
|
If there is no snapshot at all the URL will be invalid.
|
||||||
"""
|
"""
|
||||||
|
if not url:
|
||||||
|
return None
|
||||||
|
if not timestamp:
|
||||||
|
timestamp = timezone.now()
|
||||||
|
|
||||||
def newest(self):
|
return f"https://web.archive.org/web/{timestamp.strftime('%Y%m%d%H%M%S')}/{url}"
|
||||||
unix_timestamp = int(time.time())
|
|
||||||
self.closest = waybackpy.utils.unix_timestamp_to_wayback_timestamp(
|
|
||||||
unix_timestamp
|
|
||||||
)
|
|
||||||
self.sort = "closest"
|
|
||||||
self.limit = -5
|
|
||||||
|
|
||||||
newest_snapshot = None
|
|
||||||
for snapshot in self.snapshots():
|
|
||||||
newest_snapshot = snapshot
|
|
||||||
break
|
|
||||||
|
|
||||||
if not newest_snapshot:
|
|
||||||
raise NoCDXRecordFound(
|
|
||||||
"Wayback Machine's CDX server did not return any records "
|
|
||||||
+ "for the query. The URL may not have any archives "
|
|
||||||
+ " on the Wayback Machine or the URL may have been recently "
|
|
||||||
+ "archived and is still not available on the CDX server."
|
|
||||||
)
|
|
||||||
|
|
||||||
return newest_snapshot
|
|
||||||
|
|
||||||
def add_payload(self, payload: Dict[str, str]) -> None:
|
|
||||||
super().add_payload(payload)
|
|
||||||
# Set fastLatest query param, as we are only using this API to get the latest snapshot and using fastLatest
|
|
||||||
# makes searching for latest snapshots faster
|
|
||||||
payload["fastLatest"] = "true"
|
|
||||||
|
|
|
@ -1,15 +1,7 @@
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.contrib.auth import user_logged_in
|
|
||||||
from django.db.backends.signals import connection_created
|
from django.db.backends.signals import connection_created
|
||||||
from django.dispatch import receiver
|
from django.dispatch import receiver
|
||||||
|
|
||||||
from bookmarks.services import tasks
|
|
||||||
|
|
||||||
|
|
||||||
@receiver(user_logged_in)
|
|
||||||
def user_logged_in(sender, request, user, **kwargs):
|
|
||||||
tasks.schedule_bookmarks_without_snapshots(user)
|
|
||||||
|
|
||||||
|
|
||||||
@receiver(connection_created)
|
@receiver(connection_created)
|
||||||
def extend_sqlite(connection=None, **kwargs):
|
def extend_sqlite(connection=None, **kwargs):
|
||||||
|
|
|
@ -6,11 +6,7 @@ from django.test import TestCase
|
||||||
from django.urls import reverse
|
from django.urls import reverse
|
||||||
|
|
||||||
from bookmarks.models import Bookmark, BookmarkSearch, Tag, UserProfile
|
from bookmarks.models import Bookmark, BookmarkSearch, Tag, UserProfile
|
||||||
from bookmarks.tests.helpers import (
|
from bookmarks.tests.helpers import BookmarkFactoryMixin, HtmlTestMixin
|
||||||
BookmarkFactoryMixin,
|
|
||||||
HtmlTestMixin,
|
|
||||||
collapse_whitespace,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BookmarkIndexViewTestCase(TestCase, BookmarkFactoryMixin, HtmlTestMixin):
|
class BookmarkIndexViewTestCase(TestCase, BookmarkFactoryMixin, HtmlTestMixin):
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import datetime
|
||||||
from typing import Type
|
from typing import Type
|
||||||
|
|
||||||
from dateutil.relativedelta import relativedelta
|
from dateutil.relativedelta import relativedelta
|
||||||
|
@ -36,15 +37,6 @@ class BookmarkListTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin):
|
||||||
html,
|
html,
|
||||||
)
|
)
|
||||||
|
|
||||||
def assertDateLabel(self, html: str, label_content: str):
|
|
||||||
self.assertInHTML(
|
|
||||||
f"""
|
|
||||||
<span>{label_content}</span>
|
|
||||||
<span>|</span>
|
|
||||||
""",
|
|
||||||
html,
|
|
||||||
)
|
|
||||||
|
|
||||||
def assertWebArchiveLink(
|
def assertWebArchiveLink(
|
||||||
self, html: str, label_content: str, url: str, link_target: str = "_blank"
|
self, html: str, label_content: str, url: str, link_target: str = "_blank"
|
||||||
):
|
):
|
||||||
|
@ -465,15 +457,6 @@ class BookmarkListTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin):
|
||||||
style = bookmark_list["style"]
|
style = bookmark_list["style"]
|
||||||
self.assertIn("--ld-bookmark-description-max-lines:3;", style)
|
self.assertIn("--ld-bookmark-description-max-lines:3;", style)
|
||||||
|
|
||||||
def test_should_respect_absolute_date_setting(self):
|
|
||||||
bookmark = self.setup_date_format_test(
|
|
||||||
UserProfile.BOOKMARK_DATE_DISPLAY_ABSOLUTE
|
|
||||||
)
|
|
||||||
html = self.render_template()
|
|
||||||
formatted_date = formats.date_format(bookmark.date_added, "SHORT_DATE_FORMAT")
|
|
||||||
|
|
||||||
self.assertDateLabel(html, formatted_date)
|
|
||||||
|
|
||||||
def test_should_render_web_archive_link_with_absolute_date_setting(self):
|
def test_should_render_web_archive_link_with_absolute_date_setting(self):
|
||||||
bookmark = self.setup_date_format_test(
|
bookmark = self.setup_date_format_test(
|
||||||
UserProfile.BOOKMARK_DATE_DISPLAY_ABSOLUTE,
|
UserProfile.BOOKMARK_DATE_DISPLAY_ABSOLUTE,
|
||||||
|
@ -486,12 +469,6 @@ class BookmarkListTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin):
|
||||||
html, formatted_date, bookmark.web_archive_snapshot_url
|
html, formatted_date, bookmark.web_archive_snapshot_url
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_should_respect_relative_date_setting(self):
|
|
||||||
self.setup_date_format_test(UserProfile.BOOKMARK_DATE_DISPLAY_RELATIVE)
|
|
||||||
html = self.render_template()
|
|
||||||
|
|
||||||
self.assertDateLabel(html, "1 week ago")
|
|
||||||
|
|
||||||
def test_should_render_web_archive_link_with_relative_date_setting(self):
|
def test_should_render_web_archive_link_with_relative_date_setting(self):
|
||||||
bookmark = self.setup_date_format_test(
|
bookmark = self.setup_date_format_test(
|
||||||
UserProfile.BOOKMARK_DATE_DISPLAY_RELATIVE,
|
UserProfile.BOOKMARK_DATE_DISPLAY_RELATIVE,
|
||||||
|
@ -501,6 +478,27 @@ class BookmarkListTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin):
|
||||||
|
|
||||||
self.assertWebArchiveLink(html, "1 week ago", bookmark.web_archive_snapshot_url)
|
self.assertWebArchiveLink(html, "1 week ago", bookmark.web_archive_snapshot_url)
|
||||||
|
|
||||||
|
def test_should_render_generated_web_archive_link_without_saved_snapshot_url(self):
|
||||||
|
user = self.get_or_create_test_user()
|
||||||
|
user.profile.bookmark_date_display = UserProfile.BOOKMARK_DATE_DISPLAY_ABSOLUTE
|
||||||
|
user.profile.save()
|
||||||
|
|
||||||
|
date_added = timezone.datetime(
|
||||||
|
2023, 8, 11, 21, 45, 11, tzinfo=datetime.timezone.utc
|
||||||
|
)
|
||||||
|
bookmark = self.setup_bookmark(
|
||||||
|
url="https://example.com/article", added=date_added
|
||||||
|
)
|
||||||
|
|
||||||
|
html = self.render_template()
|
||||||
|
formatted_date = formats.date_format(bookmark.date_added, "SHORT_DATE_FORMAT")
|
||||||
|
|
||||||
|
self.assertWebArchiveLink(
|
||||||
|
html,
|
||||||
|
formatted_date,
|
||||||
|
"https://web.archive.org/web/20230811214511/https://example.com/article",
|
||||||
|
)
|
||||||
|
|
||||||
def test_bookmark_link_target_should_be_blank_by_default(self):
|
def test_bookmark_link_target_should_be_blank_by_default(self):
|
||||||
bookmark = self.setup_bookmark()
|
bookmark = self.setup_bookmark()
|
||||||
html = self.render_template()
|
html = self.render_template()
|
||||||
|
|
|
@ -1,6 +1,4 @@
|
||||||
import datetime
|
|
||||||
import os.path
|
import os.path
|
||||||
from dataclasses import dataclass
|
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
import waybackpy
|
import waybackpy
|
||||||
|
@ -10,8 +8,6 @@ from django.test import TestCase, override_settings
|
||||||
from huey.contrib.djhuey import HUEY as huey
|
from huey.contrib.djhuey import HUEY as huey
|
||||||
from waybackpy.exceptions import WaybackError
|
from waybackpy.exceptions import WaybackError
|
||||||
|
|
||||||
import bookmarks.services.favicon_loader
|
|
||||||
import bookmarks.services.wayback
|
|
||||||
from bookmarks.models import BookmarkAsset, UserProfile
|
from bookmarks.models import BookmarkAsset, UserProfile
|
||||||
from bookmarks.services import tasks, singlefile
|
from bookmarks.services import tasks, singlefile
|
||||||
from bookmarks.tests.helpers import BookmarkFactoryMixin
|
from bookmarks.tests.helpers import BookmarkFactoryMixin
|
||||||
|
@ -29,12 +25,6 @@ def create_wayback_machine_save_api_mock(
|
||||||
return mock_api
|
return mock_api
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class MockCdxSnapshot:
|
|
||||||
archive_url: str
|
|
||||||
datetime_timestamp: datetime.datetime
|
|
||||||
|
|
||||||
|
|
||||||
class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
|
class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
|
@ -50,17 +40,6 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
|
||||||
)
|
)
|
||||||
self.mock_save_api_patcher.start()
|
self.mock_save_api_patcher.start()
|
||||||
|
|
||||||
self.mock_cdx_api = mock.Mock()
|
|
||||||
self.mock_cdx_api.newest.return_value = MockCdxSnapshot(
|
|
||||||
"https://example.com/newest_snapshot", datetime.datetime.now()
|
|
||||||
)
|
|
||||||
self.mock_cdx_api_patcher = mock.patch.object(
|
|
||||||
bookmarks.services.wayback,
|
|
||||||
"CustomWaybackMachineCDXServerAPI",
|
|
||||||
return_value=self.mock_cdx_api,
|
|
||||||
)
|
|
||||||
self.mock_cdx_api_patcher.start()
|
|
||||||
|
|
||||||
self.mock_load_favicon_patcher = mock.patch(
|
self.mock_load_favicon_patcher = mock.patch(
|
||||||
"bookmarks.services.favicon_loader.load_favicon"
|
"bookmarks.services.favicon_loader.load_favicon"
|
||||||
)
|
)
|
||||||
|
@ -90,7 +69,6 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
self.mock_save_api_patcher.stop()
|
self.mock_save_api_patcher.stop()
|
||||||
self.mock_cdx_api_patcher.stop()
|
|
||||||
self.mock_load_favicon_patcher.stop()
|
self.mock_load_favicon_patcher.stop()
|
||||||
self.mock_singlefile_create_snapshot_patcher.stop()
|
self.mock_singlefile_create_snapshot_patcher.stop()
|
||||||
self.mock_load_preview_image_patcher.stop()
|
self.mock_load_preview_image_patcher.stop()
|
||||||
|
@ -142,45 +120,6 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
|
||||||
|
|
||||||
self.assertEqual(bookmark.web_archive_snapshot_url, "https://other.com")
|
self.assertEqual(bookmark.web_archive_snapshot_url, "https://other.com")
|
||||||
|
|
||||||
def test_create_web_archive_snapshot_should_use_newest_snapshot_as_fallback(self):
|
|
||||||
bookmark = self.setup_bookmark()
|
|
||||||
self.mock_save_api.save.side_effect = WaybackError
|
|
||||||
|
|
||||||
tasks.create_web_archive_snapshot(
|
|
||||||
self.get_or_create_test_user(), bookmark, False
|
|
||||||
)
|
|
||||||
|
|
||||||
bookmark.refresh_from_db()
|
|
||||||
self.mock_cdx_api.newest.assert_called_once()
|
|
||||||
self.assertEqual(
|
|
||||||
"https://example.com/newest_snapshot",
|
|
||||||
bookmark.web_archive_snapshot_url,
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_create_web_archive_snapshot_should_ignore_missing_newest_snapshot(self):
|
|
||||||
bookmark = self.setup_bookmark()
|
|
||||||
self.mock_save_api.save.side_effect = WaybackError
|
|
||||||
self.mock_cdx_api.newest.return_value = None
|
|
||||||
|
|
||||||
tasks.create_web_archive_snapshot(
|
|
||||||
self.get_or_create_test_user(), bookmark, False
|
|
||||||
)
|
|
||||||
|
|
||||||
bookmark.refresh_from_db()
|
|
||||||
self.assertEqual("", bookmark.web_archive_snapshot_url)
|
|
||||||
|
|
||||||
def test_create_web_archive_snapshot_should_ignore_newest_snapshot_errors(self):
|
|
||||||
bookmark = self.setup_bookmark()
|
|
||||||
self.mock_save_api.save.side_effect = WaybackError
|
|
||||||
self.mock_cdx_api.newest.side_effect = WaybackError
|
|
||||||
|
|
||||||
tasks.create_web_archive_snapshot(
|
|
||||||
self.get_or_create_test_user(), bookmark, False
|
|
||||||
)
|
|
||||||
|
|
||||||
bookmark.refresh_from_db()
|
|
||||||
self.assertEqual("", bookmark.web_archive_snapshot_url)
|
|
||||||
|
|
||||||
def test_create_web_archive_snapshot_should_not_save_stale_bookmark_data(self):
|
def test_create_web_archive_snapshot_should_not_save_stale_bookmark_data(self):
|
||||||
bookmark = self.setup_bookmark()
|
bookmark = self.setup_bookmark()
|
||||||
|
|
||||||
|
@ -203,69 +142,6 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
|
||||||
bookmark.web_archive_snapshot_url,
|
bookmark.web_archive_snapshot_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_load_web_archive_snapshot_should_update_snapshot_url(self):
|
|
||||||
bookmark = self.setup_bookmark()
|
|
||||||
|
|
||||||
tasks._load_web_archive_snapshot_task(bookmark.id)
|
|
||||||
|
|
||||||
bookmark.refresh_from_db()
|
|
||||||
|
|
||||||
self.assertEqual(self.executed_count(), 1)
|
|
||||||
self.mock_cdx_api.newest.assert_called_once()
|
|
||||||
self.assertEqual(
|
|
||||||
"https://example.com/newest_snapshot", bookmark.web_archive_snapshot_url
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_load_web_archive_snapshot_should_handle_missing_bookmark_id(self):
|
|
||||||
tasks._load_web_archive_snapshot_task(123)
|
|
||||||
|
|
||||||
self.assertEqual(self.executed_count(), 1)
|
|
||||||
self.mock_cdx_api.newest.assert_not_called()
|
|
||||||
|
|
||||||
def test_load_web_archive_snapshot_should_skip_if_snapshot_exists(self):
|
|
||||||
bookmark = self.setup_bookmark(web_archive_snapshot_url="https://example.com")
|
|
||||||
|
|
||||||
tasks._load_web_archive_snapshot_task(bookmark.id)
|
|
||||||
|
|
||||||
self.assertEqual(self.executed_count(), 1)
|
|
||||||
self.mock_cdx_api.newest.assert_not_called()
|
|
||||||
|
|
||||||
def test_load_web_archive_snapshot_should_handle_missing_snapshot(self):
|
|
||||||
bookmark = self.setup_bookmark()
|
|
||||||
self.mock_cdx_api.newest.return_value = None
|
|
||||||
|
|
||||||
tasks._load_web_archive_snapshot_task(bookmark.id)
|
|
||||||
|
|
||||||
self.assertEqual("", bookmark.web_archive_snapshot_url)
|
|
||||||
|
|
||||||
def test_load_web_archive_snapshot_should_handle_wayback_errors(self):
|
|
||||||
bookmark = self.setup_bookmark()
|
|
||||||
self.mock_cdx_api.newest.side_effect = WaybackError
|
|
||||||
|
|
||||||
tasks._load_web_archive_snapshot_task(bookmark.id)
|
|
||||||
|
|
||||||
self.assertEqual("", bookmark.web_archive_snapshot_url)
|
|
||||||
|
|
||||||
def test_load_web_archive_snapshot_should_not_save_stale_bookmark_data(self):
|
|
||||||
bookmark = self.setup_bookmark()
|
|
||||||
|
|
||||||
# update bookmark during API call to check that saving
|
|
||||||
# the snapshot does not overwrite updated bookmark data
|
|
||||||
def mock_newest_impl():
|
|
||||||
bookmark.title = "Updated title"
|
|
||||||
bookmark.save()
|
|
||||||
return mock.DEFAULT
|
|
||||||
|
|
||||||
self.mock_cdx_api.newest.side_effect = mock_newest_impl
|
|
||||||
|
|
||||||
tasks._load_web_archive_snapshot_task(bookmark.id)
|
|
||||||
bookmark.refresh_from_db()
|
|
||||||
|
|
||||||
self.assertEqual("Updated title", bookmark.title)
|
|
||||||
self.assertEqual(
|
|
||||||
"https://example.com/newest_snapshot", bookmark.web_archive_snapshot_url
|
|
||||||
)
|
|
||||||
|
|
||||||
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
|
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
|
||||||
def test_create_web_archive_snapshot_should_not_run_when_background_tasks_are_disabled(
|
def test_create_web_archive_snapshot_should_not_run_when_background_tasks_are_disabled(
|
||||||
self,
|
self,
|
||||||
|
@ -292,59 +168,6 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
|
||||||
|
|
||||||
self.assertEqual(self.executed_count(), 0)
|
self.assertEqual(self.executed_count(), 0)
|
||||||
|
|
||||||
def test_schedule_bookmarks_without_snapshots_should_load_snapshot_for_all_bookmarks_without_snapshot(
|
|
||||||
self,
|
|
||||||
):
|
|
||||||
user = self.get_or_create_test_user()
|
|
||||||
self.setup_bookmark()
|
|
||||||
self.setup_bookmark()
|
|
||||||
self.setup_bookmark()
|
|
||||||
self.setup_bookmark(web_archive_snapshot_url="https://example.com")
|
|
||||||
self.setup_bookmark(web_archive_snapshot_url="https://example.com")
|
|
||||||
self.setup_bookmark(web_archive_snapshot_url="https://example.com")
|
|
||||||
|
|
||||||
tasks.schedule_bookmarks_without_snapshots(user)
|
|
||||||
|
|
||||||
self.assertEqual(self.executed_count(), 4)
|
|
||||||
self.assertEqual(self.mock_cdx_api.newest.call_count, 3)
|
|
||||||
|
|
||||||
def test_schedule_bookmarks_without_snapshots_should_only_update_user_owned_bookmarks(
|
|
||||||
self,
|
|
||||||
):
|
|
||||||
user = self.get_or_create_test_user()
|
|
||||||
other_user = User.objects.create_user(
|
|
||||||
"otheruser", "otheruser@example.com", "password123"
|
|
||||||
)
|
|
||||||
self.setup_bookmark()
|
|
||||||
self.setup_bookmark()
|
|
||||||
self.setup_bookmark()
|
|
||||||
self.setup_bookmark(user=other_user)
|
|
||||||
self.setup_bookmark(user=other_user)
|
|
||||||
self.setup_bookmark(user=other_user)
|
|
||||||
|
|
||||||
tasks.schedule_bookmarks_without_snapshots(user)
|
|
||||||
|
|
||||||
self.assertEqual(self.mock_cdx_api.newest.call_count, 3)
|
|
||||||
|
|
||||||
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
|
|
||||||
def test_schedule_bookmarks_without_snapshots_should_not_run_when_background_tasks_are_disabled(
|
|
||||||
self,
|
|
||||||
):
|
|
||||||
tasks.schedule_bookmarks_without_snapshots(self.user)
|
|
||||||
|
|
||||||
self.assertEqual(self.executed_count(), 0)
|
|
||||||
|
|
||||||
def test_schedule_bookmarks_without_snapshots_should_not_run_when_web_archive_integration_is_disabled(
|
|
||||||
self,
|
|
||||||
):
|
|
||||||
self.user.profile.web_archive_integration = (
|
|
||||||
UserProfile.WEB_ARCHIVE_INTEGRATION_DISABLED
|
|
||||||
)
|
|
||||||
self.user.profile.save()
|
|
||||||
tasks.schedule_bookmarks_without_snapshots(self.user)
|
|
||||||
|
|
||||||
self.assertEqual(self.executed_count(), 0)
|
|
||||||
|
|
||||||
def test_load_favicon_should_create_favicon_file(self):
|
def test_load_favicon_should_create_favicon_file(self):
|
||||||
bookmark = self.setup_bookmark()
|
bookmark = self.setup_bookmark()
|
||||||
|
|
||||||
|
|
|
@ -444,17 +444,6 @@ class ImporterTestCase(TestCase, BookmarkFactoryMixin, ImportTestMixin):
|
||||||
self.assertEqual(Bookmark.objects.all()[0].description, "Example description")
|
self.assertEqual(Bookmark.objects.all()[0].description, "Example description")
|
||||||
self.assertEqual(Bookmark.objects.all()[0].notes, "Updated notes")
|
self.assertEqual(Bookmark.objects.all()[0].notes, "Updated notes")
|
||||||
|
|
||||||
def test_schedule_snapshot_creation(self):
|
|
||||||
user = self.get_or_create_test_user()
|
|
||||||
test_html = self.render_html(tags_html="")
|
|
||||||
|
|
||||||
with patch.object(
|
|
||||||
tasks, "schedule_bookmarks_without_snapshots"
|
|
||||||
) as mock_schedule_bookmarks_without_snapshots:
|
|
||||||
import_netscape_html(test_html, user)
|
|
||||||
|
|
||||||
mock_schedule_bookmarks_without_snapshots.assert_called_once_with(user)
|
|
||||||
|
|
||||||
def test_schedule_favicon_loading(self):
|
def test_schedule_favicon_loading(self):
|
||||||
user = self.get_or_create_test_user()
|
user = self.get_or_create_test_user()
|
||||||
test_html = self.render_html(tags_html="")
|
test_html = self.render_html(tags_html="")
|
||||||
|
|
|
@ -1,17 +0,0 @@
|
||||||
from unittest.mock import patch
|
|
||||||
|
|
||||||
from django.test import TestCase
|
|
||||||
|
|
||||||
from bookmarks.services import tasks
|
|
||||||
from bookmarks.tests.helpers import BookmarkFactoryMixin
|
|
||||||
|
|
||||||
|
|
||||||
class SignalsTestCase(TestCase, BookmarkFactoryMixin):
|
|
||||||
def test_login_should_schedule_snapshot_creation(self):
|
|
||||||
user = self.get_or_create_test_user()
|
|
||||||
|
|
||||||
with patch.object(
|
|
||||||
tasks, "schedule_bookmarks_without_snapshots"
|
|
||||||
) as mock_schedule_bookmarks_without_snapshots:
|
|
||||||
self.client.force_login(user)
|
|
||||||
mock_schedule_bookmarks_without_snapshots.assert_called_once_with(user)
|
|
|
@ -1,12 +1,12 @@
|
||||||
|
import re
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from typing import Set, List
|
from typing import Set, List
|
||||||
import re
|
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
from django.core.handlers.wsgi import WSGIRequest
|
from django.core.handlers.wsgi import WSGIRequest
|
||||||
from django.core.paginator import Paginator
|
from django.core.paginator import Paginator
|
||||||
from django.db import models
|
from django.db import models
|
||||||
from django.urls import reverse
|
from django.urls import reverse
|
||||||
from django.conf import settings
|
|
||||||
|
|
||||||
from bookmarks import queries
|
from bookmarks import queries
|
||||||
from bookmarks import utils
|
from bookmarks import utils
|
||||||
|
@ -18,6 +18,7 @@ from bookmarks.models import (
|
||||||
UserProfile,
|
UserProfile,
|
||||||
Tag,
|
Tag,
|
||||||
)
|
)
|
||||||
|
from bookmarks.services.wayback import generate_fallback_webarchive_url
|
||||||
|
|
||||||
DEFAULT_PAGE_SIZE = 30
|
DEFAULT_PAGE_SIZE = 30
|
||||||
CJK_RE = re.compile(r"[\u4e00-\u9fff]+")
|
CJK_RE = re.compile(r"[\u4e00-\u9fff]+")
|
||||||
|
@ -144,6 +145,10 @@ class BookmarkItem:
|
||||||
self.notes = bookmark.notes
|
self.notes = bookmark.notes
|
||||||
self.tag_names = bookmark.tag_names
|
self.tag_names = bookmark.tag_names
|
||||||
self.web_archive_snapshot_url = bookmark.web_archive_snapshot_url
|
self.web_archive_snapshot_url = bookmark.web_archive_snapshot_url
|
||||||
|
if not self.web_archive_snapshot_url:
|
||||||
|
self.web_archive_snapshot_url = generate_fallback_webarchive_url(
|
||||||
|
bookmark.url, bookmark.date_added
|
||||||
|
)
|
||||||
self.favicon_file = bookmark.favicon_file
|
self.favicon_file = bookmark.favicon_file
|
||||||
self.preview_image_file = bookmark.preview_image_file
|
self.preview_image_file = bookmark.preview_image_file
|
||||||
self.is_archived = bookmark.is_archived
|
self.is_archived = bookmark.is_archived
|
||||||
|
|
Loading…
Reference in a new issue