mirror of
https://github.com/sissbruecker/linkding
synced 2024-11-10 06:04:15 +00:00
Trim website metadata title and description (#383)
* feat: trim fetched metadata placeholders * feat: implement trimming serverside * Add website loader tests * Address review comments Co-authored-by: Sascha Ißbrücker <sascha.issbruecker@gmail.com>
This commit is contained in:
parent
13e0516961
commit
c2d8cde86b
2 changed files with 29 additions and 2 deletions
|
@ -29,9 +29,9 @@ def load_website_metadata(url: str):
|
|||
page_text = load_page(url)
|
||||
soup = BeautifulSoup(page_text, 'html.parser')
|
||||
|
||||
title = soup.title.string if soup.title is not None else None
|
||||
title = soup.title.string.strip() if soup.title is not None else None
|
||||
description_tag = soup.find('meta', attrs={'name': 'description'})
|
||||
description = description_tag['content'] if description_tag is not None else None
|
||||
description = description = description_tag['content'].strip() if description_tag and description_tag['content'] else None
|
||||
finally:
|
||||
return WebsiteMetadata(url=url, title=title, description=description)
|
||||
|
||||
|
|
|
@ -25,6 +25,19 @@ class MockStreamingResponse:
|
|||
|
||||
|
||||
class WebsiteLoaderTestCase(TestCase):
|
||||
def render_html_document(self, title, description):
|
||||
return f'''
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>{title}</title>
|
||||
<meta name="description" content="{description}">
|
||||
</head>
|
||||
<body></body>
|
||||
</html>
|
||||
'''
|
||||
|
||||
def test_load_page_returns_content(self):
|
||||
with mock.patch('requests.get') as mock_get:
|
||||
mock_get.return_value = MockStreamingResponse(num_chunks=10, chunk_size=1024)
|
||||
|
@ -51,3 +64,17 @@ class WebsiteLoaderTestCase(TestCase):
|
|||
# Should have read first chunk, and second chunk containing closing head tag
|
||||
expected_content_size = 1 * 1024 * 1000 + len('</head>')
|
||||
self.assertEqual(expected_content_size, len(content))
|
||||
|
||||
def test_load_website_metadata(self):
|
||||
with mock.patch('bookmarks.services.website_loader.load_page') as mock_load_page:
|
||||
mock_load_page.return_value = self.render_html_document('test title', 'test description')
|
||||
metadata = website_loader.load_website_metadata('https://example.com')
|
||||
self.assertEqual('test title', metadata.title)
|
||||
self.assertEqual('test description', metadata.description)
|
||||
|
||||
def test_load_website_metadata_trims_title_and_description(self):
|
||||
with mock.patch('bookmarks.services.website_loader.load_page') as mock_load_page:
|
||||
mock_load_page.return_value = self.render_html_document(' test title ', ' test description ')
|
||||
metadata = website_loader.load_website_metadata('https://example.com')
|
||||
self.assertEqual('test title', metadata.title)
|
||||
self.assertEqual('test description', metadata.description)
|
||||
|
|
Loading…
Reference in a new issue