linkding/bookmarks/services/favicon_loader.py
2024-01-27 11:29:16 +01:00

83 lines
2.7 KiB
Python

import logging
import mimetypes
import os.path
import re
import time
from pathlib import Path
from urllib.parse import urlparse
import requests
from django.conf import settings
max_file_age = 60 * 60 * 24 # 1 day
logger = logging.getLogger(__name__)
# register mime type for .ico files, which is not included in the default
# mimetypes of the Docker image
mimetypes.add_type("image/x-icon", ".ico")
def _ensure_favicon_folder():
Path(settings.LD_FAVICON_FOLDER).mkdir(parents=True, exist_ok=True)
def _url_to_filename(url: str) -> str:
return re.sub(r"\W+", "_", url)
def _get_url_parameters(url: str) -> dict:
parsed_uri = urlparse(url)
return {
# https://example.com/foo?bar -> https://example.com
"url": f"{parsed_uri.scheme}://{parsed_uri.hostname}",
# https://example.com/foo?bar -> example.com
"domain": parsed_uri.hostname,
}
def _get_favicon_path(favicon_file: str) -> Path:
return Path(os.path.join(settings.LD_FAVICON_FOLDER, favicon_file))
def _check_existing_favicon(favicon_name: str):
# return existing file if a file with the same name, ignoring extension,
# exists and is not stale
for filename in os.listdir(settings.LD_FAVICON_FOLDER):
file_base_name, _ = os.path.splitext(filename)
if file_base_name == favicon_name:
favicon_path = _get_favicon_path(filename)
return filename if not _is_stale(favicon_path) else None
return None
def _is_stale(path: Path) -> bool:
stat = path.stat()
file_age = time.time() - stat.st_mtime
return file_age >= max_file_age
def load_favicon(url: str) -> str:
url_parameters = _get_url_parameters(url)
# Create favicon folder if not exists
_ensure_favicon_folder()
# Use scheme+hostname as favicon filename to reuse icon for all pages on the same domain
favicon_name = _url_to_filename(url_parameters["url"])
favicon_file = _check_existing_favicon(favicon_name)
if not favicon_file:
# Load favicon from provider, save to file
favicon_url = settings.LD_FAVICON_PROVIDER.format(**url_parameters)
logger.debug(f"Loading favicon from: {favicon_url}")
with requests.get(favicon_url, stream=True) as response:
content_type = response.headers["Content-Type"]
file_extension = mimetypes.guess_extension(content_type)
favicon_file = f"{favicon_name}{file_extension}"
favicon_path = _get_favicon_path(favicon_file)
with open(favicon_path, "wb") as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
logger.debug(f"Saved favicon as: {favicon_path}")
return favicon_file