Cache website metadata to avoid duplicate scraping (#401)

* Cache website metadata to avoid duplicate scraping * fix test setup
2024-11-10 06:04:15 +00:00 · 2023-01-20 22:28:44 +01:00 · 2023-01-20 22:28:44 +01:00 · 30da1880a5
commit 30da1880a5
parent da99b8b034
2 changed files with 8 additions and 0 deletions
--- a/bookmarks/services/website_loader.py
+++ b/bookmarks/services/website_loader.py
@ -1,5 +1,6 @@
 import logging
 from dataclasses import dataclass
+from functools import lru_cache

 import requests
 from bs4 import BeautifulSoup
@ -23,6 +24,9 @@ class WebsiteMetadata:
        }


+# Caching metadata avoids scraping again when saving bookmarks, in case the
+# metadata was already scraped to show preview values in the bookmark form
+@lru_cache(maxsize=10)
 def load_website_metadata(url: str):
    title = None
    description = None
--- a/bookmarks/tests/test_website_loader.py
+++ b/bookmarks/tests/test_website_loader.py
@ -25,6 +25,10 @@ class MockStreamingResponse:


 class WebsiteLoaderTestCase(TestCase):
+    def setUp(self):
+        # clear cached metadata before test run
+        website_loader.load_website_metadata.cache_clear()
+
    def render_html_document(self, title, description):
        return f'''
        <!DOCTYPE html>