Cache website metadata to avoid duplicate scraping (#401)

* Cache website metadata to avoid duplicate scraping

* fix test setup
This commit is contained in:
Sascha Ißbrücker 2023-01-20 22:28:44 +01:00 committed by GitHub
parent da99b8b034
commit 30da1880a5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 8 additions and 0 deletions

View file

@ -1,5 +1,6 @@
import logging
from dataclasses import dataclass
from functools import lru_cache
import requests
from bs4 import BeautifulSoup
@ -23,6 +24,9 @@ class WebsiteMetadata:
}
# Caching metadata avoids scraping again when saving bookmarks, in case the
# metadata was already scraped to show preview values in the bookmark form
@lru_cache(maxsize=10)
def load_website_metadata(url: str):
title = None
description = None

View file

@ -25,6 +25,10 @@ class MockStreamingResponse:
class WebsiteLoaderTestCase(TestCase):
def setUp(self):
# clear cached metadata before test run
website_loader.load_website_metadata.cache_clear()
def render_html_document(self, title, description):
return f'''
<!DOCTYPE html>