linkding/bookmarks/services/preview_image_loader.py
Viacheslav Slinko b4376a9ff1
Load bookmark thumbnails after import (#724)
* Update thumbnails after import

* Safer way to download thumbnails

* small test improvements

* add missing tests

---------

Co-authored-by: Sascha Ißbrücker <sascha.issbruecker@gmail.com>
2024-05-10 09:19:00 +02:00

88 lines
3 KiB
Python

import logging
import mimetypes
import os.path
import hashlib
from pathlib import Path
import requests
from django.conf import settings
from bookmarks.services import website_loader
logger = logging.getLogger(__name__)
def _ensure_preview_folder():
Path(settings.LD_PREVIEW_FOLDER).mkdir(parents=True, exist_ok=True)
def _url_to_filename(preview_image: str) -> str:
return hashlib.md5(preview_image.encode()).hexdigest()
def _get_image_path(preview_image_file: str) -> Path:
return Path(os.path.join(settings.LD_PREVIEW_FOLDER, preview_image_file))
def load_preview_image(url: str) -> str | None:
_ensure_preview_folder()
metadata = website_loader.load_website_metadata(url)
if not metadata.preview_image:
logger.debug(f"Could not find preview image in metadata: {url}")
return None
image_url = metadata.preview_image
logger.debug(f"Loading preview image: {image_url}")
with requests.get(image_url, stream=True) as response:
if response.status_code < 200 or response.status_code >= 300:
logger.debug(
f"Bad response status code for preview image: {image_url} status_code={response.status_code}"
)
return None
if "Content-Length" not in response.headers:
logger.debug(f"Empty Content-Length for preview image: {image_url}")
return None
content_length = int(response.headers["Content-Length"])
if content_length > settings.LD_PREVIEW_MAX_SIZE:
logger.debug(
f"Content-Length exceeds LD_PREVIEW_MAX_SIZE: {image_url} length={content_length}"
)
return None
if "Content-Type" not in response.headers:
logger.debug(f"Empty Content-Type for preview image: {image_url}")
return None
content_type = response.headers["Content-Type"].split(";", 1)[0]
file_extension = mimetypes.guess_extension(content_type)
if file_extension not in settings.LD_PREVIEW_ALLOWED_EXTENSIONS:
logger.debug(
f"Unsupported Content-Type for preview image: {image_url} content_type={content_type}"
)
return None
preview_image_hash = _url_to_filename(url)
preview_image_file = f"{preview_image_hash}{file_extension}"
preview_image_path = _get_image_path(preview_image_file)
with open(preview_image_path, "wb") as file:
downloaded = 0
for chunk in response.iter_content(chunk_size=8192):
downloaded += len(chunk)
if downloaded > content_length:
logger.debug(
f"Content-Length mismatch for preview image: {image_url} length={content_length} downloaded={downloaded}"
)
file.close()
preview_image_path.unlink()
return None
file.write(chunk)
logger.debug(f"Saved preview image as: {preview_image_path}")
return preview_image_file