mirror of
https://github.com/sissbruecker/linkding
synced 2024-11-10 06:04:15 +00:00
Load bookmark thumbnails after import (#724)
* Update thumbnails after import * Safer way to download thumbnails * small test improvements * add missing tests --------- Co-authored-by: Sascha Ißbrücker <sascha.issbruecker@gmail.com>
This commit is contained in:
parent
87cd4061cb
commit
b4376a9ff1
9 changed files with 263 additions and 20 deletions
|
@ -1,4 +1,4 @@
|
|||
# Generated by Django 5.0.3 on 2024-05-07 07:27
|
||||
# Generated by Django 5.0.3 on 2024-05-10 07:01
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
@ -13,7 +13,7 @@ class Migration(migrations.Migration):
|
|||
migrations.AddField(
|
||||
model_name="bookmark",
|
||||
name="preview_image_file",
|
||||
field=models.CharField(blank=True, max_length=512, null=True),
|
||||
field=models.CharField(blank=True, max_length=512),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="userprofile",
|
||||
|
|
|
@ -59,7 +59,7 @@ class Bookmark(models.Model):
|
|||
website_description = models.TextField(blank=True, null=True)
|
||||
web_archive_snapshot_url = models.CharField(max_length=2048, blank=True)
|
||||
favicon_file = models.CharField(max_length=512, blank=True)
|
||||
preview_image_file = models.CharField(max_length=512, blank=True, null=True)
|
||||
preview_image_file = models.CharField(max_length=512, blank=True)
|
||||
unread = models.BooleanField(default=False)
|
||||
is_archived = models.BooleanField(default=False)
|
||||
shared = models.BooleanField(default=False)
|
||||
|
|
|
@ -83,6 +83,8 @@ def import_netscape_html(
|
|||
tasks.schedule_bookmarks_without_snapshots(user)
|
||||
# Load favicons for newly imported bookmarks
|
||||
tasks.schedule_bookmarks_without_favicons(user)
|
||||
# Load previews for newly imported bookmarks
|
||||
tasks.schedule_bookmarks_without_previews(user)
|
||||
|
||||
end = timezone.now()
|
||||
logger.debug(f"Import duration: {end - import_start}")
|
||||
|
|
|
@ -31,16 +31,58 @@ def load_preview_image(url: str) -> str | None:
|
|||
logger.debug(f"Could not find preview image in metadata: {url}")
|
||||
return None
|
||||
|
||||
logger.debug(f"Loading preview image: {metadata.preview_image}")
|
||||
with requests.get(metadata.preview_image, stream=True) as response:
|
||||
content_type = response.headers["Content-Type"]
|
||||
preview_image_hash = _url_to_filename(url)
|
||||
image_url = metadata.preview_image
|
||||
|
||||
logger.debug(f"Loading preview image: {image_url}")
|
||||
with requests.get(image_url, stream=True) as response:
|
||||
if response.status_code < 200 or response.status_code >= 300:
|
||||
logger.debug(
|
||||
f"Bad response status code for preview image: {image_url} status_code={response.status_code}"
|
||||
)
|
||||
return None
|
||||
|
||||
if "Content-Length" not in response.headers:
|
||||
logger.debug(f"Empty Content-Length for preview image: {image_url}")
|
||||
return None
|
||||
|
||||
content_length = int(response.headers["Content-Length"])
|
||||
if content_length > settings.LD_PREVIEW_MAX_SIZE:
|
||||
logger.debug(
|
||||
f"Content-Length exceeds LD_PREVIEW_MAX_SIZE: {image_url} length={content_length}"
|
||||
)
|
||||
return None
|
||||
|
||||
if "Content-Type" not in response.headers:
|
||||
logger.debug(f"Empty Content-Type for preview image: {image_url}")
|
||||
return None
|
||||
|
||||
content_type = response.headers["Content-Type"].split(";", 1)[0]
|
||||
file_extension = mimetypes.guess_extension(content_type)
|
||||
|
||||
if file_extension not in settings.LD_PREVIEW_ALLOWED_EXTENSIONS:
|
||||
logger.debug(
|
||||
f"Unsupported Content-Type for preview image: {image_url} content_type={content_type}"
|
||||
)
|
||||
return None
|
||||
|
||||
preview_image_hash = _url_to_filename(url)
|
||||
preview_image_file = f"{preview_image_hash}{file_extension}"
|
||||
preview_image_path = _get_image_path(preview_image_file)
|
||||
|
||||
with open(preview_image_path, "wb") as file:
|
||||
downloaded = 0
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
downloaded += len(chunk)
|
||||
if downloaded > content_length:
|
||||
logger.debug(
|
||||
f"Content-Length mismatch for preview image: {image_url} length={content_length} downloaded={downloaded}"
|
||||
)
|
||||
file.close()
|
||||
preview_image_path.unlink()
|
||||
return None
|
||||
|
||||
file.write(chunk)
|
||||
|
||||
logger.debug(f"Saved preview image as: {preview_image_path}")
|
||||
|
||||
return preview_image_file
|
||||
|
|
|
@ -7,6 +7,7 @@ import waybackpy
|
|||
from django.conf import settings
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.contrib.auth.models import User
|
||||
from django.db.models import Q
|
||||
from django.utils import timezone, formats
|
||||
from huey import crontab
|
||||
from huey.contrib.djhuey import HUEY as huey
|
||||
|
@ -166,6 +167,12 @@ def is_favicon_feature_active(user: User) -> bool:
|
|||
return background_tasks_enabled and user.profile.enable_favicons
|
||||
|
||||
|
||||
def is_preview_feature_active(user: User) -> bool:
|
||||
return (
|
||||
user.profile.enable_preview_images and not settings.LD_DISABLE_BACKGROUND_TASKS
|
||||
)
|
||||
|
||||
|
||||
def load_favicon(user: User, bookmark: Bookmark):
|
||||
if is_favicon_feature_active(user):
|
||||
_load_favicon_task(bookmark.id)
|
||||
|
@ -222,7 +229,7 @@ def _schedule_refresh_favicons_task(user_id: int):
|
|||
|
||||
|
||||
def load_preview_image(user: User, bookmark: Bookmark):
|
||||
if user.profile.enable_preview_images and not settings.LD_DISABLE_BACKGROUND_TASKS:
|
||||
if is_preview_feature_active(user):
|
||||
_load_preview_image_task(bookmark.id)
|
||||
|
||||
|
||||
|
@ -245,6 +252,27 @@ def _load_preview_image_task(bookmark_id: int):
|
|||
)
|
||||
|
||||
|
||||
def schedule_bookmarks_without_previews(user: User):
|
||||
if is_preview_feature_active(user):
|
||||
_schedule_bookmarks_without_previews_task(user.id)
|
||||
|
||||
|
||||
@task()
|
||||
def _schedule_bookmarks_without_previews_task(user_id: int):
|
||||
user = get_user_model().objects.get(id=user_id)
|
||||
bookmarks = Bookmark.objects.filter(
|
||||
Q(preview_image_file__exact=""),
|
||||
owner=user,
|
||||
)
|
||||
|
||||
# TODO: Implement bulk task creation
|
||||
for bookmark in bookmarks:
|
||||
try:
|
||||
_load_preview_image_task(bookmark.id)
|
||||
except Exception as exc:
|
||||
logging.exception(exc)
|
||||
|
||||
|
||||
def is_html_snapshot_feature_active() -> bool:
|
||||
return settings.LD_ENABLE_SNAPSHOTS and not settings.LD_DISABLE_BACKGROUND_TASKS
|
||||
|
||||
|
|
|
@ -578,6 +578,60 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
|
|||
|
||||
self.assertEqual(self.executed_count(), 0)
|
||||
|
||||
def test_schedule_bookmarks_without_previews_should_load_preview_for_all_bookmarks_without_preview(
|
||||
self,
|
||||
):
|
||||
user = self.get_or_create_test_user()
|
||||
self.setup_bookmark()
|
||||
self.setup_bookmark()
|
||||
self.setup_bookmark()
|
||||
self.setup_bookmark(preview_image_file="test.png")
|
||||
self.setup_bookmark(preview_image_file="test.png")
|
||||
self.setup_bookmark(preview_image_file="test.png")
|
||||
|
||||
tasks.schedule_bookmarks_without_previews(user)
|
||||
|
||||
self.assertEqual(self.executed_count(), 4)
|
||||
self.assertEqual(self.mock_load_preview_image.call_count, 3)
|
||||
|
||||
def test_schedule_bookmarks_without_previews_should_only_update_user_owned_bookmarks(
|
||||
self,
|
||||
):
|
||||
user = self.get_or_create_test_user()
|
||||
other_user = User.objects.create_user(
|
||||
"otheruser", "otheruser@example.com", "password123"
|
||||
)
|
||||
self.setup_bookmark()
|
||||
self.setup_bookmark()
|
||||
self.setup_bookmark()
|
||||
self.setup_bookmark(user=other_user)
|
||||
self.setup_bookmark(user=other_user)
|
||||
self.setup_bookmark(user=other_user)
|
||||
|
||||
tasks.schedule_bookmarks_without_previews(user)
|
||||
|
||||
self.assertEqual(self.mock_load_preview_image.call_count, 3)
|
||||
|
||||
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
|
||||
def test_schedule_bookmarks_without_previews_should_not_run_when_background_tasks_are_disabled(
|
||||
self,
|
||||
):
|
||||
self.setup_bookmark()
|
||||
tasks.schedule_bookmarks_without_previews(self.get_or_create_test_user())
|
||||
|
||||
self.assertEqual(self.executed_count(), 0)
|
||||
|
||||
def test_schedule_bookmarks_without_previews_should_not_run_when_preview_feature_is_disabled(
|
||||
self,
|
||||
):
|
||||
self.user.profile.enable_preview_images = False
|
||||
self.user.profile.save()
|
||||
|
||||
self.setup_bookmark()
|
||||
tasks.schedule_bookmarks_without_previews(self.get_or_create_test_user())
|
||||
|
||||
self.assertEqual(self.executed_count(), 0)
|
||||
|
||||
@override_settings(LD_ENABLE_SNAPSHOTS=True)
|
||||
def test_create_html_snapshot_should_create_pending_asset(self):
|
||||
bookmark = self.setup_bookmark()
|
||||
|
|
|
@ -465,3 +465,14 @@ class ImporterTestCase(TestCase, BookmarkFactoryMixin, ImportTestMixin):
|
|||
import_netscape_html(test_html, user)
|
||||
|
||||
mock_schedule_bookmarks_without_favicons.assert_called_once_with(user)
|
||||
|
||||
def test_schedule_preview_loading(self):
|
||||
user = self.get_or_create_test_user()
|
||||
test_html = self.render_html(tags_html="")
|
||||
|
||||
with patch.object(
|
||||
tasks, "schedule_bookmarks_without_previews"
|
||||
) as mock_schedule_bookmarks_without_previews:
|
||||
import_netscape_html(test_html, user)
|
||||
|
||||
mock_schedule_bookmarks_without_previews.assert_called_once_with(user)
|
||||
|
|
|
@ -13,9 +13,20 @@ mock_image_data = b"mock_image"
|
|||
|
||||
|
||||
class MockStreamingResponse:
|
||||
def __init__(self, data=mock_image_data, content_type="image/png"):
|
||||
def __init__(
|
||||
self,
|
||||
url,
|
||||
data=mock_image_data,
|
||||
content_type="image/png",
|
||||
content_length=None,
|
||||
status_code=200,
|
||||
):
|
||||
self.url = url
|
||||
self.chunks = [data]
|
||||
self.headers = {"Content-Type": content_type}
|
||||
self.status_code = status_code
|
||||
if not content_length:
|
||||
content_length = len(data)
|
||||
self.headers = {"Content-Type": content_type, "Content-Length": content_length}
|
||||
|
||||
def iter_content(self, **kwargs):
|
||||
return self.chunks
|
||||
|
@ -47,19 +58,29 @@ class PreviewImageLoaderTestCase(TestCase):
|
|||
self.settings_override.disable()
|
||||
self.mock_load_website_metadata_patcher.stop()
|
||||
|
||||
def create_mock_response(self, icon_data=mock_image_data, content_type="image/png"):
|
||||
def create_mock_response(
|
||||
self,
|
||||
url="https://example.com/image.png",
|
||||
icon_data=mock_image_data,
|
||||
content_type="image/png",
|
||||
content_length=len(mock_image_data),
|
||||
status_code=200,
|
||||
):
|
||||
mock_response = mock.Mock()
|
||||
mock_response.raw = io.BytesIO(icon_data)
|
||||
return MockStreamingResponse(icon_data, content_type)
|
||||
return MockStreamingResponse(
|
||||
url, icon_data, content_type, content_length, status_code
|
||||
)
|
||||
|
||||
def get_image_path(self, filename):
|
||||
return Path(os.path.join(settings.LD_PREVIEW_FOLDER, filename))
|
||||
|
||||
def image_exists(self, filename):
|
||||
return self.get_image_path(filename).exists()
|
||||
def assertImageExists(self, filename, data):
|
||||
self.assertTrue(self.get_image_path(filename).exists())
|
||||
self.assertEqual(self.get_image_path(filename).read_bytes(), data)
|
||||
|
||||
def get_image_data(self, filename):
|
||||
return self.get_image_path(filename).read_bytes()
|
||||
def assertNoImageExists(self):
|
||||
self.assertFalse(os.listdir(settings.LD_PREVIEW_FOLDER))
|
||||
|
||||
def test_load_preview_image(self):
|
||||
with mock.patch("requests.get") as mock_get:
|
||||
|
@ -67,8 +88,8 @@ class PreviewImageLoaderTestCase(TestCase):
|
|||
|
||||
file = preview_image_loader.load_preview_image("https://example.com")
|
||||
|
||||
self.assertTrue(self.image_exists(file))
|
||||
self.assertEqual(mock_image_data, self.get_image_data(file))
|
||||
self.assertIsNotNone(file)
|
||||
self.assertImageExists(file, mock_image_data)
|
||||
|
||||
def test_load_preview_image_returns_none_if_no_preview_image_detected(self):
|
||||
with mock.patch("requests.get") as mock_get:
|
||||
|
@ -78,6 +99,80 @@ class PreviewImageLoaderTestCase(TestCase):
|
|||
file = preview_image_loader.load_preview_image("https://example.com")
|
||||
|
||||
self.assertIsNone(file)
|
||||
self.assertNoImageExists()
|
||||
|
||||
def test_load_preview_image_returns_none_for_invalid_status_code(self):
|
||||
invalid_status_codes = [199, 300, 400, 500]
|
||||
|
||||
for status_code in invalid_status_codes:
|
||||
with mock.patch("requests.get") as mock_get:
|
||||
mock_get.return_value = self.create_mock_response(
|
||||
status_code=status_code
|
||||
)
|
||||
|
||||
file = preview_image_loader.load_preview_image("https://example.com")
|
||||
|
||||
self.assertIsNone(file)
|
||||
self.assertNoImageExists()
|
||||
|
||||
def test_load_preview_image_returns_none_if_content_length_exceeds_limit(self):
|
||||
# exceeds max size
|
||||
with mock.patch("requests.get") as mock_get:
|
||||
mock_get.return_value = self.create_mock_response(
|
||||
content_length=settings.LD_PREVIEW_MAX_SIZE + 1
|
||||
)
|
||||
|
||||
file = preview_image_loader.load_preview_image("https://example.com")
|
||||
|
||||
self.assertIsNone(file)
|
||||
self.assertNoImageExists()
|
||||
|
||||
# equals max size
|
||||
with mock.patch("requests.get") as mock_get:
|
||||
mock_get.return_value = self.create_mock_response(
|
||||
content_length=settings.LD_PREVIEW_MAX_SIZE
|
||||
)
|
||||
|
||||
file = preview_image_loader.load_preview_image("https://example.com")
|
||||
|
||||
self.assertIsNotNone(file)
|
||||
self.assertImageExists(file, mock_image_data)
|
||||
|
||||
def test_load_preview_image_returns_none_for_invalid_content_type(self):
|
||||
invalid_content_types = ["text/html", "application/json"]
|
||||
|
||||
for content_type in invalid_content_types:
|
||||
with mock.patch("requests.get") as mock_get:
|
||||
mock_get.return_value = self.create_mock_response(
|
||||
content_type=content_type
|
||||
)
|
||||
|
||||
file = preview_image_loader.load_preview_image("https://example.com")
|
||||
|
||||
self.assertIsNone(file)
|
||||
self.assertNoImageExists()
|
||||
|
||||
valid_content_types = ["image/png", "image/jpeg", "image/gif"]
|
||||
|
||||
for content_type in valid_content_types:
|
||||
with mock.patch("requests.get") as mock_get:
|
||||
mock_get.return_value = self.create_mock_response(
|
||||
content_type=content_type
|
||||
)
|
||||
|
||||
file = preview_image_loader.load_preview_image("https://example.com")
|
||||
|
||||
self.assertIsNotNone(file)
|
||||
self.assertImageExists(file, mock_image_data)
|
||||
|
||||
def test_load_preview_image_returns_none_if_download_exceeds_content_length(self):
|
||||
with mock.patch("requests.get") as mock_get:
|
||||
mock_get.return_value = self.create_mock_response(content_length=1)
|
||||
|
||||
file = preview_image_loader.load_preview_image("https://example.com")
|
||||
|
||||
self.assertIsNone(file)
|
||||
self.assertNoImageExists()
|
||||
|
||||
def test_load_preview_image_creates_folder_if_not_exists(self):
|
||||
with mock.patch("requests.get") as mock_get:
|
||||
|
@ -95,14 +190,16 @@ class PreviewImageLoaderTestCase(TestCase):
|
|||
def test_guess_file_extension(self):
|
||||
with mock.patch("requests.get") as mock_get:
|
||||
mock_get.return_value = self.create_mock_response(content_type="image/png")
|
||||
|
||||
file = preview_image_loader.load_preview_image("https://example.com")
|
||||
|
||||
self.assertTrue(self.image_exists(file))
|
||||
self.assertImageExists(file, mock_image_data)
|
||||
self.assertEqual("png", file.split(".")[-1])
|
||||
|
||||
with mock.patch("requests.get") as mock_get:
|
||||
mock_get.return_value = self.create_mock_response(content_type="image/jpeg")
|
||||
|
||||
file = preview_image_loader.load_preview_image("https://example.com")
|
||||
|
||||
self.assertTrue(self.image_exists(file))
|
||||
self.assertImageExists(file, mock_image_data)
|
||||
self.assertEqual("jpg", file.split(".")[-1])
|
||||
|
|
|
@ -289,6 +289,15 @@ LD_ENABLE_REFRESH_FAVICONS = os.getenv("LD_ENABLE_REFRESH_FAVICONS", True) in (
|
|||
|
||||
# Previews settings
|
||||
LD_PREVIEW_FOLDER = os.path.join(BASE_DIR, "data", "previews")
|
||||
LD_PREVIEW_MAX_SIZE = int(os.getenv("LD_PREVIEW_MAX_SIZE", 5242880))
|
||||
LD_PREVIEW_ALLOWED_EXTENSIONS = [
|
||||
".jpg",
|
||||
".jpeg",
|
||||
".png",
|
||||
".gif",
|
||||
".svg",
|
||||
".webp",
|
||||
]
|
||||
|
||||
# Asset / snapshot settings
|
||||
LD_ASSET_FOLDER = os.path.join(BASE_DIR, "data", "assets")
|
||||
|
|
Loading…
Reference in a new issue