Improve import performance (#261)

* Run import in batches, cache tags

* Use bulk operations for bookmarks and assigning tags

* Improve naming

* Restore bookmark validation

* Add logging

* Bulk create tags

* Use HTMLParser for parsing bookmarks

* add parser tests

* Add more importer tests

* Add more importer tests

* Remove pyparsing dependency

Co-authored-by: Sascha Ißbrücker <sascha.issbruecker@gmail.com>
This commit is contained in:
Sascha Ißbrücker 2022-05-21 09:27:30 +02:00 committed by GitHub
parent 117160ea87
commit f4e3d724f0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 571 additions and 103 deletions

View file

@ -1,13 +1,13 @@
import logging
from dataclasses import dataclass
from typing import List
from django.contrib.auth.models import User
from django.utils import timezone
from bookmarks.models import Bookmark, parse_tag_string
from bookmarks.models import Bookmark, Tag, parse_tag_string
from bookmarks.services import tasks
from bookmarks.services.parser import parse, NetscapeBookmark
from bookmarks.services.tags import get_or_create_tags
from bookmarks.utils import parse_timestamp
logger = logging.getLogger(__name__)
@ -20,8 +20,39 @@ class ImportResult:
failed: int = 0
class TagCache:
def __init__(self, user: User):
self.user = user
self.cache = dict()
# Init cache with all existing tags for that user
tags = Tag.objects.filter(owner=user)
for tag in tags:
self.put(tag)
def get(self, tag_name: str):
tag_name_lowercase = tag_name.lower()
if tag_name_lowercase in self.cache:
return self.cache[tag_name_lowercase]
else:
return None
def get_all(self, tag_names: List[str]):
result = []
for tag_name in tag_names:
tag = self.get(tag_name)
# Prevent returning duplicates
if not (tag in result):
result.append(tag)
return result
def put(self, tag: Tag):
self.cache[tag.name.lower()] = tag
def import_netscape_html(html: str, user: User):
result = ImportResult()
import_start = timezone.now()
try:
netscape_bookmarks = parse(html)
@ -29,26 +60,130 @@ def import_netscape_html(html: str, user: User):
logging.exception('Could not read bookmarks file.')
raise
parse_end = timezone.now()
logger.debug(f'Parse duration: {parse_end - import_start}')
# Create and cache all tags beforehand
_create_missing_tags(netscape_bookmarks, user)
tag_cache = TagCache(user)
# Split bookmarks to import into batches, to keep memory usage for bulk operations manageable
batches = _get_batches(netscape_bookmarks, 200)
for batch in batches:
_import_batch(batch, user, tag_cache, result)
# Create snapshots for newly imported bookmarks
tasks.schedule_bookmarks_without_snapshots(user)
end = timezone.now()
logger.debug(f'Import duration: {end - import_start}')
return result
def _create_missing_tags(netscape_bookmarks: List[NetscapeBookmark], user: User):
tag_cache = TagCache(user)
tags_to_create = []
for netscape_bookmark in netscape_bookmarks:
tag_names = parse_tag_string(netscape_bookmark.tag_string)
for tag_name in tag_names:
tag = tag_cache.get(tag_name)
if not tag:
tag = Tag(name=tag_name, owner=user)
tag.date_added = timezone.now()
tags_to_create.append(tag)
Tag.objects.bulk_create(tags_to_create)
def _get_batches(items: List, batch_size: int):
batches = []
offset = 0
num_items = len(items)
while offset < num_items:
batch = items[offset:min(offset + batch_size, num_items)]
if len(batch) > 0:
batches.append(batch)
offset = offset + batch_size
return batches
def _import_batch(netscape_bookmarks: List[NetscapeBookmark], user: User, tag_cache: TagCache, result: ImportResult):
# Query existing bookmarks
batch_urls = [bookmark.href for bookmark in netscape_bookmarks]
existing_bookmarks = Bookmark.objects.filter(owner=user, url__in=batch_urls)
# Create or update bookmarks from parsed Netscape bookmarks
bookmarks_to_create = []
bookmarks_to_update = []
for netscape_bookmark in netscape_bookmarks:
result.total = result.total + 1
try:
_import_bookmark_tag(netscape_bookmark, user)
# Lookup existing bookmark by URL, or create new bookmark if there is no bookmark for that URL yet
bookmark = next(
(bookmark for bookmark in existing_bookmarks if bookmark.url == netscape_bookmark.href), None)
if not bookmark:
bookmark = Bookmark(owner=user)
is_update = False
else:
is_update = True
# Copy data from parsed bookmark
_copy_bookmark_data(netscape_bookmark, bookmark)
# Validate bookmark fields, exclude owner to prevent n+1 database query,
# also there is no specific validation on owner
bookmark.clean_fields(exclude=['owner'])
# Schedule for update or insert
if is_update:
bookmarks_to_update.append(bookmark)
else:
bookmarks_to_create.append(bookmark)
result.success = result.success + 1
except:
shortened_bookmark_tag_str = str(netscape_bookmark)[:100] + '...'
logging.exception('Error importing bookmark: ' + shortened_bookmark_tag_str)
result.failed = result.failed + 1
# Create snapshots for newly imported bookmarks
tasks.schedule_bookmarks_without_snapshots(user)
# Bulk update bookmarks in DB
Bookmark.objects.bulk_update(bookmarks_to_update,
['url', 'date_added', 'date_modified', 'unread', 'title', 'description', 'owner'])
# Bulk insert new bookmarks into DB
Bookmark.objects.bulk_create(bookmarks_to_create)
return result
# Bulk assign tags
# In Django 3, bulk_create does not return the auto-generated IDs when bulk inserting,
# so we have to reload the inserted bookmarks, and match them to the parsed bookmarks by URL
existing_bookmarks = Bookmark.objects.filter(owner=user, url__in=batch_urls)
BookmarkToTagRelationShip = Bookmark.tags.through
relationships = []
for netscape_bookmark in netscape_bookmarks:
# Lookup bookmark by URL again
bookmark = next(
(bookmark for bookmark in existing_bookmarks if bookmark.url == netscape_bookmark.href), None)
if not bookmark:
# Something is wrong, we should have just created this bookmark
shortened_bookmark_tag_str = str(netscape_bookmark)[:100] + '...'
logging.warning(
f'Failed to assign tags to the bookmark: {shortened_bookmark_tag_str}. Could not find bookmark by URL.')
# Get tag models by string, schedule inserts for bookmark -> tag associations
tag_names = parse_tag_string(netscape_bookmark.tag_string)
tags = tag_cache.get_all(tag_names)
for tag in tags:
relationships.append(BookmarkToTagRelationShip(bookmark=bookmark, tag=tag))
# Insert all bookmark -> tag associations at once, should ignore errors if association already exists
BookmarkToTagRelationShip.objects.bulk_create(relationships, ignore_conflicts=True)
def _import_bookmark_tag(netscape_bookmark: NetscapeBookmark, user: User):
# Either modify existing bookmark for the URL or create new one
bookmark = _get_or_create_bookmark(netscape_bookmark.href, user)
def _copy_bookmark_data(netscape_bookmark: NetscapeBookmark, bookmark: Bookmark):
bookmark.url = netscape_bookmark.href
if netscape_bookmark.date_added:
bookmark.date_added = parse_timestamp(netscape_bookmark.date_added)
@ -56,24 +191,7 @@ def _import_bookmark_tag(netscape_bookmark: NetscapeBookmark, user: User):
bookmark.date_added = timezone.now()
bookmark.date_modified = bookmark.date_added
bookmark.unread = False
bookmark.title = netscape_bookmark.title
if netscape_bookmark.title:
bookmark.title = netscape_bookmark.title
if netscape_bookmark.description:
bookmark.description = netscape_bookmark.description
bookmark.owner = user
bookmark.full_clean()
bookmark.save()
# Set tags
tag_names = parse_tag_string(netscape_bookmark.tag_string)
tags = get_or_create_tags(tag_names, user)
bookmark.tags.set(tags)
bookmark.save()
def _get_or_create_bookmark(url: str, user: User):
try:
return Bookmark.objects.get(url=url, owner=user)
except Bookmark.DoesNotExist:
return Bookmark()

View file

@ -1,6 +1,6 @@
from dataclasses import dataclass
import pyparsing as pp
from html.parser import HTMLParser
from typing import Dict, List
@dataclass
@ -12,60 +12,72 @@ class NetscapeBookmark:
tag_string: str
def extract_bookmark_link(tag):
href = tag[0].href
title = tag[0].text
tag_string = tag[0].tags
date_added = tag[0].add_date
class BookmarkParser(HTMLParser):
def __init__(self):
super().__init__()
self.bookmarks = []
return {
'href': href,
'title': title,
'tag_string': tag_string,
'date_added': date_added
}
self.current_tag = None
self.bookmark = None
self.href = ''
self.add_date = ''
self.tags = ''
self.title = ''
self.description = ''
def handle_starttag(self, tag: str, attrs: list):
name = 'handle_start_' + tag.lower()
if name in dir(self):
getattr(self, name)({k.lower(): v for k, v in attrs})
self.current_tag = tag
def extract_bookmark(tag):
link = tag[0].link
description = tag[0].description
description = description[0] if description else ''
def handle_endtag(self, tag: str):
name = 'handle_end_' + tag.lower()
if name in dir(self):
getattr(self, name)()
self.current_tag = None
return {
'link': link,
'description': description,
}
def handle_data(self, data):
name = f'handle_{self.current_tag}_data'
if name in dir(self):
getattr(self, name)(data)
def handle_end_dl(self):
self.add_bookmark()
def extract_description(tag):
return tag[0].strip()
def handle_start_dt(self, attrs: Dict[str, str]):
self.add_bookmark()
# define grammar
dt_start, _ = pp.makeHTMLTags("DT")
dd_start, _ = pp.makeHTMLTags("DD")
a_start, a_end = pp.makeHTMLTags("A")
bookmark_link_tag = pp.Group(a_start + a_start.tag_body("text") + a_end.suppress())
bookmark_link_tag.addParseAction(extract_bookmark_link)
bookmark_description_tag = dd_start.suppress() + pp.SkipTo(pp.anyOpenTag | pp.anyCloseTag)("description")
bookmark_description_tag.addParseAction(extract_description)
bookmark_tag = pp.Group(dt_start + bookmark_link_tag("link") + pp.ZeroOrMore(bookmark_description_tag)("description"))
bookmark_tag.addParseAction(extract_bookmark)
def parse(html: str) -> [NetscapeBookmark]:
matches = bookmark_tag.searchString(html)
bookmarks = []
for match in matches:
bookmark_match = match[0]
bookmark = NetscapeBookmark(
href=bookmark_match['link']['href'],
title=bookmark_match['link']['title'],
description=bookmark_match['description'],
tag_string=bookmark_match['link']['tag_string'],
date_added=bookmark_match['link']['date_added'],
def handle_start_a(self, attrs: Dict[str, str]):
vars(self).update(attrs)
self.bookmark = NetscapeBookmark(
href=self.href,
title='',
description='',
date_added=self.add_date,
tag_string=self.tags,
)
bookmarks.append(bookmark)
return bookmarks
def handle_a_data(self, data):
self.title = data.strip()
def handle_dd_data(self, data):
self.description = data.strip()
def add_bookmark(self):
if self.bookmark:
self.bookmark.title = self.title
self.bookmark.description = self.description
self.bookmarks.append(self.bookmark)
self.bookmark = None
self.href = ''
self.add_date = ''
self.tags = ''
self.title = ''
self.description = ''
def parse(html: str) -> List[NetscapeBookmark]:
parser = BookmarkParser()
parser.feed(html)
return parser.bookmarks

View file

@ -1,5 +1,7 @@
import random
import logging
from dataclasses import dataclass
from typing import Optional, List
from django.contrib.auth.models import User
from django.utils import timezone
@ -87,6 +89,42 @@ class LinkdingApiTestCase(APITestCase):
return response
class BookmarkHtmlTag:
def __init__(self, href: str = '', title: str = '', description: str = '', add_date: str = '', tags: str = ''):
self.href = href
self.title = title
self.description = description
self.add_date = add_date
self.tags = tags
class ImportTestMixin:
def render_tag(self, tag: BookmarkHtmlTag):
return f'''
<DT>
<A {f'HREF="{tag.href}"' if tag.href else ''}
{f'ADD_DATE="{tag.add_date}"' if tag.add_date else ''}
{f'TAGS="{tag.tags}"' if tag.tags else ''}>
{tag.title if tag.title else ''}
</A>
{f'<DD>{tag.description}' if tag.description else ''}
'''
def render_html(self, tags: List[BookmarkHtmlTag] = None, tags_html: str = ''):
if tags:
rendered_tags = [self.render_tag(tag) for tag in tags]
tags_html = '\n'.join(rendered_tags)
return f'''
<!DOCTYPE NETSCAPE-Bookmark-file-1>
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
<TITLE>Bookmarks</TITLE>
<H1>Bookmarks</H1>
<DL><p>
{tags_html}
</DL><p>
'''
_words = [
'quasi',
'consequatur',

View file

@ -1,29 +1,204 @@
from typing import List
from unittest.mock import patch
from django.test import TestCase
from django.test import TestCase, override_settings
from django.utils import timezone
from bookmarks.models import Tag
from bookmarks.models import Bookmark, Tag, parse_tag_string
from bookmarks.services import tasks
from bookmarks.services.importer import import_netscape_html
from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging
from bookmarks.tests.helpers import BookmarkFactoryMixin, ImportTestMixin, BookmarkHtmlTag, disable_logging
from bookmarks.utils import parse_timestamp
class ImporterTestCase(TestCase, BookmarkFactoryMixin):
class ImporterTestCase(TestCase, BookmarkFactoryMixin, ImportTestMixin):
def create_import_html(self, bookmark_tags_string: str):
return f'''
<!DOCTYPE NETSCAPE-Bookmark-file-1>
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
<TITLE>Bookmarks</TITLE>
<H1>Bookmarks</H1>
<DL><p>
{bookmark_tags_string}
</DL><p>
'''
def assertBookmarksImported(self, html_tags: List[BookmarkHtmlTag]):
for html_tag in html_tags:
bookmark = Bookmark.objects.get(url=html_tag.href)
self.assertIsNotNone(bookmark)
self.assertEqual(bookmark.title, html_tag.title)
self.assertEqual(bookmark.description, html_tag.description)
self.assertEqual(bookmark.date_added, parse_timestamp(html_tag.add_date))
tag_names = parse_tag_string(html_tag.tags)
# Check assigned tags
for tag_name in tag_names:
tag = next(
(tag for tag in bookmark.tags.all() if tag.name == tag_name), None)
self.assertIsNotNone(tag)
def test_import(self):
html_tags = [
BookmarkHtmlTag(href='https://example.com', title='Example title', description='Example description',
add_date='1', tags='example-tag'),
BookmarkHtmlTag(href='https://foo.com', title='Foo title', description='',
add_date='2', tags=''),
BookmarkHtmlTag(href='https://bar.com', title='Bar title', description='Bar description',
add_date='3', tags='bar-tag, other-tag'),
]
import_html = self.render_html(tags=html_tags)
result = import_netscape_html(import_html, self.get_or_create_test_user())
# Check result
self.assertEqual(result.total, 3)
self.assertEqual(result.success, 3)
self.assertEqual(result.failed, 0)
# Check bookmarks
bookmarks = Bookmark.objects.all()
self.assertEqual(len(bookmarks), 3)
self.assertBookmarksImported(html_tags)
def test_synchronize(self):
# Initial import
html_tags = [
BookmarkHtmlTag(href='https://example.com', title='Example title', description='Example description',
add_date='1', tags='example-tag'),
BookmarkHtmlTag(href='https://foo.com', title='Foo title', description='',
add_date='2', tags=''),
BookmarkHtmlTag(href='https://bar.com', title='Bar title', description='Bar description',
add_date='3', tags='bar-tag, other-tag'),
]
import_html = self.render_html(tags=html_tags)
import_netscape_html(import_html, self.get_or_create_test_user())
# Change data, add some new data
html_tags = [
BookmarkHtmlTag(href='https://example.com', title='Updated Example title',
description='Updated Example description', add_date='111', tags='updated-example-tag'),
BookmarkHtmlTag(href='https://foo.com', title='Updated Foo title', description='Updated Foo description',
add_date='222', tags='new-tag'),
BookmarkHtmlTag(href='https://bar.com', title='Updated Bar title', description='Updated Bar description',
add_date='333', tags='updated-bar-tag, updated-other-tag'),
BookmarkHtmlTag(href='https://baz.com', add_date='444', tags='baz-tag')
]
# Import updated data
import_html = self.render_html(tags=html_tags)
result = import_netscape_html(import_html, self.get_or_create_test_user())
# Check result
self.assertEqual(result.total, 4)
self.assertEqual(result.success, 4)
self.assertEqual(result.failed, 0)
# Check bookmarks
bookmarks = Bookmark.objects.all()
self.assertEqual(len(bookmarks), 4)
self.assertBookmarksImported(html_tags)
def test_import_with_some_invalid_bookmarks(self):
html_tags = [
BookmarkHtmlTag(href='https://example.com'),
# Invalid URL
BookmarkHtmlTag(href='foo.com'),
# No URL
BookmarkHtmlTag(),
]
import_html = self.render_html(tags=html_tags)
result = import_netscape_html(import_html, self.get_or_create_test_user())
# Check result
self.assertEqual(result.total, 3)
self.assertEqual(result.success, 1)
self.assertEqual(result.failed, 2)
# Check bookmarks
bookmarks = Bookmark.objects.all()
self.assertEqual(len(bookmarks), 1)
self.assertBookmarksImported(html_tags[1:1])
def test_import_tags(self):
html_tags = [
BookmarkHtmlTag(href='https://example.com', tags='tag1'),
BookmarkHtmlTag(href='https://foo.com', tags='tag2'),
BookmarkHtmlTag(href='https://bar.com', tags='tag3'),
]
import_html = self.render_html(tags=html_tags)
import_netscape_html(import_html, self.get_or_create_test_user())
self.assertEqual(Tag.objects.count(), 3)
def test_create_missing_tags(self):
html_tags = [
BookmarkHtmlTag(href='https://example.com', tags='tag1'),
BookmarkHtmlTag(href='https://foo.com', tags='tag2'),
BookmarkHtmlTag(href='https://bar.com', tags='tag3'),
]
import_html = self.render_html(tags=html_tags)
import_netscape_html(import_html, self.get_or_create_test_user())
html_tags.append(
BookmarkHtmlTag(href='https://baz.com', tags='tag4')
)
import_html = self.render_html(tags=html_tags)
import_netscape_html(import_html, self.get_or_create_test_user())
self.assertEqual(Tag.objects.count(), 4)
def test_should_append_tags_to_bookmark_when_reimporting_with_different_tags(self):
html_tags = [
BookmarkHtmlTag(href='https://example.com', tags='tag1'),
]
import_html = self.render_html(tags=html_tags)
import_netscape_html(import_html, self.get_or_create_test_user())
html_tags.append(
BookmarkHtmlTag(href='https://example.com', tags='tag2, tag3')
)
import_html = self.render_html(tags=html_tags)
import_netscape_html(import_html, self.get_or_create_test_user())
self.assertEqual(Bookmark.objects.count(), 1)
self.assertEqual(Bookmark.objects.all()[0].tags.all().count(), 3)
@override_settings(USE_TZ=False)
def test_use_current_date_when_no_add_date(self):
test_html = self.render_html(tags_html=f'''
<DT><A HREF="https://example.com">Example.com</A>
<DD>Example.com
''')
with patch.object(timezone, 'now', return_value=timezone.datetime(2021, 1, 1)):
import_netscape_html(test_html, self.get_or_create_test_user())
self.assertEqual(Bookmark.objects.count(), 1)
self.assertEqual(Bookmark.objects.all()[0].date_added, timezone.datetime(2021, 1, 1))
def test_keep_title_if_imported_bookmark_has_empty_title(self):
test_html = self.render_html(tags=[
BookmarkHtmlTag(href='https://example.com', title='Example.com')
])
import_netscape_html(test_html, self.get_or_create_test_user())
test_html = self.render_html(tags=[
BookmarkHtmlTag(href='https://example.com')
])
import_netscape_html(test_html, self.get_or_create_test_user())
self.assertEqual(Bookmark.objects.count(), 1)
self.assertEqual(Bookmark.objects.all()[0].title, 'Example.com')
def test_keep_description_if_imported_bookmark_has_empty_description(self):
test_html = self.render_html(tags=[
BookmarkHtmlTag(href='https://example.com', description='Example.com')
])
import_netscape_html(test_html, self.get_or_create_test_user())
test_html = self.render_html(tags=[
BookmarkHtmlTag(href='https://example.com')
])
import_netscape_html(test_html, self.get_or_create_test_user())
self.assertEqual(Bookmark.objects.count(), 1)
self.assertEqual(Bookmark.objects.all()[0].description, 'Example.com')
def test_replace_whitespace_in_tag_names(self):
test_html = self.create_import_html(f'''
<DT><A HREF="https://example.com" ADD_DATE="1616337559" PRIVATE="0" TOREAD="0" TAGS="tag 1, tag 2, tag 3">Example.com</A>
test_html = self.render_html(tags_html=f'''
<DT><A HREF="https://example.com" TAGS="tag 1, tag 2, tag 3">Example.com</A>
<DD>Example.com
''')
import_netscape_html(test_html, self.get_or_create_test_user())
@ -35,22 +210,22 @@ class ImporterTestCase(TestCase, BookmarkFactoryMixin):
@disable_logging
def test_validate_empty_or_missing_bookmark_url(self):
test_html = self.create_import_html(f'''
<!-- Empty URL -->
<DT><A HREF="" ADD_DATE="1616337559" PRIVATE="0" TOREAD="0" TAGS="tag3">Empty URL</A>
test_html = self.render_html(tags_html=f'''
<DT><A HREF="">Empty URL</A>
<DD>Empty URL
<!-- Missing URL -->
<DT><A ADD_DATE="1616337559" PRIVATE="0" TOREAD="0" TAGS="tag3">Missing URL</A>
<DT><A>Missing URL</A>
<DD>Missing URL
''')
import_result = import_netscape_html(test_html, self.get_or_create_test_user())
self.assertEqual(Bookmark.objects.count(), 0)
self.assertEqual(import_result.success, 0)
self.assertEqual(import_result.failed, 2)
def test_schedule_snapshot_creation(self):
user = self.get_or_create_test_user()
test_html = self.create_import_html('')
test_html = self.render_html(tags_html='')
with patch.object(tasks, 'schedule_bookmarks_without_snapshots') as mock_schedule_bookmarks_without_snapshots:
import_netscape_html(test_html, user)

View file

@ -0,0 +1,122 @@
from typing import List
from django.test import TestCase
from bookmarks.services.parser import NetscapeBookmark
from bookmarks.services.parser import parse
from bookmarks.tests.helpers import ImportTestMixin, BookmarkHtmlTag
class ParserTestCase(TestCase, ImportTestMixin):
def assertTagsEqual(self, bookmarks: List[NetscapeBookmark], html_tags: List[BookmarkHtmlTag]):
self.assertEqual(len(bookmarks), len(html_tags))
for bookmark in bookmarks:
html_tag = html_tags[bookmarks.index(bookmark)]
self.assertEqual(bookmark.href, html_tag.href)
self.assertEqual(bookmark.title, html_tag.title)
self.assertEqual(bookmark.date_added, html_tag.add_date)
self.assertEqual(bookmark.description, html_tag.description)
self.assertEqual(bookmark.tag_string, html_tag.tags)
def test_parse_bookmarks(self):
html_tags = [
BookmarkHtmlTag(href='https://example.com', title='Example title', description='Example description',
add_date='1', tags='example-tag'),
BookmarkHtmlTag(href='https://foo.com', title='Foo title', description='',
add_date='2', tags=''),
BookmarkHtmlTag(href='https://bar.com', title='Bar title', description='Bar description',
add_date='3', tags='bar-tag, other-tag'),
]
html = self.render_html(html_tags)
bookmarks = parse(html)
self.assertTagsEqual(bookmarks, html_tags)
def test_no_bookmarks(self):
html = self.render_html()
bookmarks = parse(html)
self.assertEqual(bookmarks, [])
def test_reset_properties_after_adding_bookmark(self):
html_tags = [
BookmarkHtmlTag(href='https://example.com', title='Example title', description='Example description',
add_date='1', tags='example-tag'),
BookmarkHtmlTag(href='', title='', description='',
add_date='', tags='')
]
html = self.render_html(html_tags)
bookmarks = parse(html)
self.assertTagsEqual(bookmarks, html_tags)
def test_empty_title(self):
html_tags = [
BookmarkHtmlTag(href='https://example.com', title='', description='Example description',
add_date='1', tags='example-tag'),
]
html = self.render_html(tags_html='''
<DT><A HREF="https://example.com" ADD_DATE="1" TAGS="example-tag"></A>
<DD>Example description
''')
bookmarks = parse(html)
self.assertTagsEqual(bookmarks, html_tags)
def test_with_closing_description_tag(self):
html_tags = [
BookmarkHtmlTag(href='https://example.com', title='Example title', description='Example description',
add_date='1', tags='example-tag'),
BookmarkHtmlTag(href='https://foo.com', title='Foo title', description='',
add_date='2', tags=''),
]
html = self.render_html(tags_html='''
<DT><A HREF="https://example.com" ADD_DATE="1" TAGS="example-tag">Example title</A>
<DD>Example description</DD>
<DT><A HREF="https://foo.com" ADD_DATE="2">Foo title</A>
<DD></DD>
''')
bookmarks = parse(html)
self.assertTagsEqual(bookmarks, html_tags)
def test_description_tag_before_anchor_tag(self):
html_tags = [
BookmarkHtmlTag(href='https://example.com', title='Example title', description='Example description',
add_date='1', tags='example-tag'),
BookmarkHtmlTag(href='https://foo.com', title='Foo title', description='',
add_date='2', tags=''),
]
html = self.render_html(tags_html='''
<DT><DD>Example description</DD>
<A HREF="https://example.com" ADD_DATE="1" TAGS="example-tag">Example title</A>
<DT><DD></DD>
<A HREF="https://foo.com" ADD_DATE="2">Foo title</A>
''')
bookmarks = parse(html)
self.assertTagsEqual(bookmarks, html_tags)
def test_with_folders(self):
html_tags = [
BookmarkHtmlTag(href='https://example.com', title='Example title', description='Example description',
add_date='1', tags='example-tag'),
BookmarkHtmlTag(href='https://foo.com', title='Foo title', description='',
add_date='2', tags=''),
]
html = self.render_html(tags_html='''
<DL><p>
<DT><H3>Folder 1</H3>
<DL><p>
<DT><A HREF="https://example.com" ADD_DATE="1" TAGS="example-tag">Example title</A>
<DD>Example description
</DL><p>
<DT><H3>Folder 2</H3>
<DL><p>
<DT><A HREF="https://foo.com" ADD_DATE="2">Foo title</A>
</DL><p>
</DL><p>
''')
bookmarks = parse(html)
self.assertTagsEqual(bookmarks, html_tags)

View file

@ -13,7 +13,6 @@ django-sass-processor==1.0.1
django-widget-tweaks==1.4.8
djangorestframework==3.12.4
idna==2.8
pyparsing==2.4.7
python-dateutil==2.8.1
pytz==2021.1
requests==2.26.0

View file

@ -18,7 +18,6 @@ django-widget-tweaks==1.4.8
djangorestframework==3.12.4
idna==2.8
libsass==0.21.0
pyparsing==2.4.7
python-dateutil==2.8.1
pytz==2021.1
rcssmin==1.0.6

View file

@ -48,6 +48,11 @@ LOGGING = {
'level': 'DEBUG',
'handlers': ['console'],
'propagate': False,
},
'bookmarks.services.importer': { # Log importer debug output
'level': 'DEBUG',
'handlers': ['console'],
'propagate': False,
}
}
}