Add support for PRIVATE flag in import and export (#505)

* Add support for PRIVATE attribute in import

* Add support for PRIVATE attribute in export

* Update import sync tests
This commit is contained in:
Sascha Ißbrücker 2023-08-20 11:44:53 +02:00 committed by GitHub
parent 5d9e487ec1
commit 8206705876
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 184 additions and 20 deletions

View file

@ -33,9 +33,10 @@ def append_bookmark(doc: BookmarkDocument, bookmark: Bookmark):
desc = html.escape(bookmark.resolved_description or '')
tags = ','.join(bookmark.tag_names)
toread = '1' if bookmark.unread else '0'
private = '0' if bookmark.shared else '1'
added = int(bookmark.date_added.timestamp())
doc.append(f'<DT><A HREF="{url}" ADD_DATE="{added}" PRIVATE="0" TOREAD="{toread}" TAGS="{tags}">{title}</A>')
doc.append(f'<DT><A HREF="{url}" ADD_DATE="{added}" PRIVATE="{private}" TOREAD="{toread}" TAGS="{tags}">{title}</A>')
if desc:
doc.append(f'<DD>{desc}')

View file

@ -20,6 +20,11 @@ class ImportResult:
failed: int = 0
@dataclass
class ImportOptions:
map_private_flag: bool = False
class TagCache:
def __init__(self, user: User):
self.user = user
@ -50,7 +55,7 @@ class TagCache:
self.cache[tag.name.lower()] = tag
def import_netscape_html(html: str, user: User):
def import_netscape_html(html: str, user: User, options: ImportOptions = ImportOptions()) -> ImportResult:
result = ImportResult()
import_start = timezone.now()
@ -70,7 +75,7 @@ def import_netscape_html(html: str, user: User):
# Split bookmarks to import into batches, to keep memory usage for bulk operations manageable
batches = _get_batches(netscape_bookmarks, 200)
for batch in batches:
_import_batch(batch, user, tag_cache, result)
_import_batch(batch, user, options, tag_cache, result)
# Create snapshots for newly imported bookmarks
tasks.schedule_bookmarks_without_snapshots(user)
@ -114,7 +119,11 @@ def _get_batches(items: List, batch_size: int):
return batches
def _import_batch(netscape_bookmarks: List[NetscapeBookmark], user: User, tag_cache: TagCache, result: ImportResult):
def _import_batch(netscape_bookmarks: List[NetscapeBookmark],
user: User,
options: ImportOptions,
tag_cache: TagCache,
result: ImportResult):
# Query existing bookmarks
batch_urls = [bookmark.href for bookmark in netscape_bookmarks]
existing_bookmarks = Bookmark.objects.filter(owner=user, url__in=batch_urls)
@ -135,7 +144,7 @@ def _import_batch(netscape_bookmarks: List[NetscapeBookmark], user: User, tag_ca
else:
is_update = True
# Copy data from parsed bookmark
_copy_bookmark_data(netscape_bookmark, bookmark)
_copy_bookmark_data(netscape_bookmark, bookmark, options)
# Validate bookmark fields, exclude owner to prevent n+1 database query,
# also there is no specific validation on owner
bookmark.clean_fields(exclude=['owner'])
@ -152,8 +161,14 @@ def _import_batch(netscape_bookmarks: List[NetscapeBookmark], user: User, tag_ca
result.failed = result.failed + 1
# Bulk update bookmarks in DB
Bookmark.objects.bulk_update(bookmarks_to_update,
['url', 'date_added', 'date_modified', 'unread', 'title', 'description', 'owner'])
Bookmark.objects.bulk_update(bookmarks_to_update, ['url',
'date_added',
'date_modified',
'unread',
'shared',
'title',
'description',
'owner'])
# Bulk insert new bookmarks into DB
Bookmark.objects.bulk_create(bookmarks_to_create)
@ -187,7 +202,7 @@ def _import_batch(netscape_bookmarks: List[NetscapeBookmark], user: User, tag_ca
BookmarkToTagRelationShip.objects.bulk_create(relationships, ignore_conflicts=True)
def _copy_bookmark_data(netscape_bookmark: NetscapeBookmark, bookmark: Bookmark):
def _copy_bookmark_data(netscape_bookmark: NetscapeBookmark, bookmark: Bookmark, options: ImportOptions):
bookmark.url = netscape_bookmark.href
if netscape_bookmark.date_added:
bookmark.date_added = parse_timestamp(netscape_bookmark.date_added)
@ -199,3 +214,5 @@ def _copy_bookmark_data(netscape_bookmark: NetscapeBookmark, bookmark: Bookmark)
bookmark.title = netscape_bookmark.title
if netscape_bookmark.description:
bookmark.description = netscape_bookmark.description
if options.map_private_flag and not netscape_bookmark.private:
bookmark.shared = True

View file

@ -11,6 +11,7 @@ class NetscapeBookmark:
date_added: str
tag_string: str
to_read: bool
private: bool
class BookmarkParser(HTMLParser):
@ -26,6 +27,7 @@ class BookmarkParser(HTMLParser):
self.title = ''
self.description = ''
self.toread = ''
self.private = ''
def handle_starttag(self, tag: str, attrs: list):
name = 'handle_start_' + tag.lower()
@ -58,7 +60,9 @@ class BookmarkParser(HTMLParser):
description='',
date_added=self.add_date,
tag_string=self.tags,
to_read=self.toread == '1'
to_read=self.toread == '1',
# Mark as private by default, also when attribute is not specified
private=self.private != '0',
)
def handle_a_data(self, data):
@ -79,6 +83,7 @@ class BookmarkParser(HTMLParser):
self.title = ''
self.description = ''
self.toread = ''
self.private = ''
def parse(html: str) -> List[NetscapeBookmark]:

View file

@ -144,6 +144,16 @@
added and existing ones are updated.</p>
<form method="post" enctype="multipart/form-data" action="{% url 'bookmarks:settings.import' %}">
{% csrf_token %}
<div class="form-group">
<label for="import_map_private_flag" class="form-checkbox">
<input type="checkbox" id="import_map_private_flag" name="map_private_flag">
<i class="form-icon"></i> Import public bookmarks as shared
</label>
<div class="form-input-hint">
When importing bookmarks from a service that supports marking bookmarks as public or private (using the <code>PRIVATE</code> attribute), enabling this option will import all bookmarks that are marked as not private as shared bookmarks.
Otherwise, all bookmarks will be imported as private bookmarks.
</div>
</div>
<div class="form-group">
<div class="input-group col-8 col-md-12">
<input class="form-input" type="file" name="import_file">
@ -171,6 +181,10 @@
<section class="content-area">
<h2>Export</h2>
<p>Export all bookmarks in Netscape HTML format.</p>
<p>
Note that exporting bookmark notes is currently not supported due to limitations of the format.
For proper backups please use a database backup as described in the documentation.
</p>
<a class="btn btn-primary" href="{% url 'bookmarks:settings.export' %}">Download (.html)</a>
{% if export_error %}
<div class="has-error">

View file

@ -1,5 +1,6 @@
import random
import logging
import datetime
from typing import List
from bs4 import BeautifulSoup
@ -35,6 +36,7 @@ class BookmarkFactoryMixin:
website_description: str = '',
web_archive_snapshot_url: str = '',
favicon_file: str = '',
added: datetime = None,
):
if not title:
title = get_random_string(length=32)
@ -45,6 +47,8 @@ class BookmarkFactoryMixin:
if not url:
unique_id = get_random_string(length=32)
url = 'https://example.com/' + unique_id
if added is None:
added = timezone.now()
bookmark = Bookmark(
url=url,
title=title,
@ -52,7 +56,7 @@ class BookmarkFactoryMixin:
notes=notes,
website_title=website_title,
website_description=website_description,
date_added=timezone.now(),
date_added=added,
date_modified=timezone.now(),
owner=user,
is_archived=is_archived,
@ -125,13 +129,15 @@ class BookmarkHtmlTag:
description: str = '',
add_date: str = '',
tags: str = '',
to_read: bool = False):
to_read: bool = False,
private: bool = True):
self.href = href
self.title = title
self.description = description
self.add_date = add_date
self.tags = tags
self.to_read = to_read
self.private = private
class ImportTestMixin:
@ -141,7 +147,8 @@ class ImportTestMixin:
<A {f'HREF="{tag.href}"' if tag.href else ''}
{f'ADD_DATE="{tag.add_date}"' if tag.add_date else ''}
{f'TAGS="{tag.tags}"' if tag.tags else ''}
TOREAD="{1 if tag.to_read else 0}">
TOREAD="{1 if tag.to_read else 0}"
PRIVATE="{1 if tag.private else 0}">
{tag.title if tag.title else ''}
</A>
{f'<DD>{tag.description}' if tag.description else ''}

View file

@ -1,10 +1,36 @@
from django.test import TestCase
from django.utils import timezone
from bookmarks.services import exporter
from bookmarks.tests.helpers import BookmarkFactoryMixin
class ExporterTestCase(TestCase, BookmarkFactoryMixin):
def test_export_bookmarks(self):
added = timezone.now()
timestamp = int(added.timestamp())
bookmarks = [
self.setup_bookmark(url='https://example.com/1', title='Title 1', added=added,
description='Example description'),
self.setup_bookmark(url='https://example.com/2', title='Title 2', added=added,
tags=[self.setup_tag(name='tag1'), self.setup_tag(name='tag2'),
self.setup_tag(name='tag3')]),
self.setup_bookmark(url='https://example.com/3', title='Title 3', added=added, unread=True),
self.setup_bookmark(url='https://example.com/4', title='Title 4', added=added, shared=True),
]
html = exporter.export_netscape_html(bookmarks)
lines = [
f'<DT><A HREF="https://example.com/1" ADD_DATE="{timestamp}" PRIVATE="1" TOREAD="0" TAGS="">Title 1</A>',
'<DD>Example description',
f'<DT><A HREF="https://example.com/2" ADD_DATE="{timestamp}" PRIVATE="1" TOREAD="0" TAGS="tag1,tag2,tag3">Title 2</A>',
f'<DT><A HREF="https://example.com/3" ADD_DATE="{timestamp}" PRIVATE="1" TOREAD="1" TAGS="">Title 3</A>',
f'<DT><A HREF="https://example.com/4" ADD_DATE="{timestamp}" PRIVATE="0" TOREAD="0" TAGS="">Title 4</A>',
]
self.assertIn('\n\r'.join(lines), html)
def test_escape_html_in_title_and_description(self):
bookmark = self.setup_bookmark(
title='<style>: The Style Information element',

View file

@ -6,7 +6,7 @@ from django.utils import timezone
from bookmarks.models import Bookmark, Tag, parse_tag_string
from bookmarks.services import tasks
from bookmarks.services.importer import import_netscape_html
from bookmarks.services.importer import import_netscape_html, ImportOptions
from bookmarks.tests.helpers import BookmarkFactoryMixin, ImportTestMixin, BookmarkHtmlTag, disable_logging
from bookmarks.utils import parse_timestamp
@ -22,6 +22,7 @@ class ImporterTestCase(TestCase, BookmarkFactoryMixin, ImportTestMixin):
self.assertEqual(bookmark.description, html_tag.description)
self.assertEqual(bookmark.date_added, parse_timestamp(html_tag.add_date))
self.assertEqual(bookmark.unread, html_tag.to_read)
self.assertEqual(bookmark.shared, not html_tag.private)
tag_names = parse_tag_string(html_tag.tags)
@ -66,35 +67,46 @@ class ImporterTestCase(TestCase, BookmarkFactoryMixin, ImportTestMixin):
add_date='3', tags='bar-tag, other-tag'),
BookmarkHtmlTag(href='https://example.com/unread', title='Unread title', description='Unread description',
add_date='3', to_read=True),
BookmarkHtmlTag(href='https://example.com/private', title='Private title', description='Private description',
add_date='4', private=True),
]
import_html = self.render_html(tags=html_tags)
import_netscape_html(import_html, self.get_or_create_test_user())
# Check bookmarks
bookmarks = Bookmark.objects.all()
self.assertEqual(len(bookmarks), 5)
self.assertBookmarksImported(html_tags)
# Change data, add some new data
html_tags = [
BookmarkHtmlTag(href='https://example.com', title='Updated Example title',
description='Updated Example description', add_date='111', tags='updated-example-tag'),
BookmarkHtmlTag(href='https://example.com/foo', title='Updated Foo title', description='Updated Foo description',
BookmarkHtmlTag(href='https://example.com/foo', title='Updated Foo title',
description='Updated Foo description',
add_date='222', tags='new-tag'),
BookmarkHtmlTag(href='https://example.com/bar', title='Updated Bar title', description='Updated Bar description',
BookmarkHtmlTag(href='https://example.com/bar', title='Updated Bar title',
description='Updated Bar description',
add_date='333', tags='updated-bar-tag, updated-other-tag'),
BookmarkHtmlTag(href='https://example.com/unread', title='Unread title', description='Unread description',
add_date='3', to_read=False),
BookmarkHtmlTag(href='https://example.com/private', title='Private title', description='Private description',
add_date='4', private=False),
BookmarkHtmlTag(href='https://baz.com', add_date='444', tags='baz-tag')
]
# Import updated data
import_html = self.render_html(tags=html_tags)
result = import_netscape_html(import_html, self.get_or_create_test_user())
result = import_netscape_html(import_html, self.get_or_create_test_user(), ImportOptions(map_private_flag=True))
# Check result
self.assertEqual(result.total, 5)
self.assertEqual(result.success, 5)
self.assertEqual(result.total, 6)
self.assertEqual(result.success, 6)
self.assertEqual(result.failed, 0)
# Check bookmarks
bookmarks = Bookmark.objects.all()
self.assertEqual(len(bookmarks), 5)
self.assertEqual(len(bookmarks), 6)
self.assertBookmarksImported(html_tags)
def test_import_with_some_invalid_bookmarks(self):
@ -254,6 +266,33 @@ class ImporterTestCase(TestCase, BookmarkFactoryMixin, ImportTestMixin):
self.assertEqual(import_result.success, 0)
self.assertEqual(import_result.failed, 2)
def test_private_flag(self):
# does not map private flag if not enabled in options
test_html = self.render_html(tags_html='''
<DT><A HREF="https://example.com/1" ADD_DATE="1">Example title 1</A>
<DD>Example description 1</DD>
<DT><A HREF="https://example.com/2" ADD_DATE="1" PRIVATE="1">Example title 2</A>
<DD>Example description 2</DD>
<DT><A HREF="https://example.com/3" ADD_DATE="1" PRIVATE="0">Example title 3</A>
<DD>Example description 3</DD>
''')
import_netscape_html(test_html, self.get_or_create_test_user(), ImportOptions())
self.assertEqual(Bookmark.objects.count(), 3)
self.assertEqual(Bookmark.objects.all()[0].shared, False)
self.assertEqual(Bookmark.objects.all()[1].shared, False)
self.assertEqual(Bookmark.objects.all()[2].shared, False)
# does map private flag if enabled in options
Bookmark.objects.all().delete()
import_netscape_html(test_html, self.get_or_create_test_user(), ImportOptions(map_private_flag=True))
bookmark1 = Bookmark.objects.get(url='https://example.com/1')
bookmark2 = Bookmark.objects.get(url='https://example.com/2')
bookmark3 = Bookmark.objects.get(url='https://example.com/3')
self.assertEqual(bookmark1.shared, False)
self.assertEqual(bookmark2.shared, False)
self.assertEqual(bookmark3.shared, True)
def test_schedule_snapshot_creation(self):
user = self.get_or_create_test_user()
test_html = self.render_html(tags_html='')

View file

@ -18,6 +18,7 @@ class ParserTestCase(TestCase, ImportTestMixin):
self.assertEqual(bookmark.description, html_tag.description)
self.assertEqual(bookmark.tag_string, html_tag.tags)
self.assertEqual(bookmark.to_read, html_tag.to_read)
self.assertEqual(bookmark.private, html_tag.private)
def test_parse_bookmarks(self):
html_tags = [
@ -123,3 +124,28 @@ class ParserTestCase(TestCase, ImportTestMixin):
bookmarks = parse(html)
self.assertTagsEqual(bookmarks, html_tags)
def test_private_flag(self):
# is private by default
html = self.render_html(tags_html='''
<DT><A HREF="https://example.com" ADD_DATE="1">Example title</A>
<DD>Example description</DD>
''')
bookmarks = parse(html)
self.assertEqual(bookmarks[0].private, True)
# explicitly marked as private
html = self.render_html(tags_html='''
<DT><A HREF="https://example.com" ADD_DATE="1" PRIVATE="1">Example title</A>
<DD>Example description</DD>
''')
bookmarks = parse(html)
self.assertEqual(bookmarks[0].private, True)
# explicitly marked as public
html = self.render_html(tags_html='''
<DT><A HREF="https://example.com" ADD_DATE="1" PRIVATE="0">Example title</A>
<DD>Example description</DD>
''')
bookmarks = parse(html)
self.assertEqual(bookmarks[0].private, False)

View file

@ -1,6 +1,7 @@
from django.test import TestCase
from django.urls import reverse
from bookmarks.models import Bookmark
from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging
@ -77,3 +78,30 @@ class SettingsImportViewTestCase(TestCase, BookmarkFactoryMixin):
self.assertRedirects(response, reverse('bookmarks:settings.general'))
self.assertFormSuccessHint(response, '2 bookmarks were successfully imported')
self.assertFormErrorHint(response, '1 bookmarks could not be imported')
def test_should_respect_map_private_flag_option(self):
with open('bookmarks/tests/resources/simple_valid_import_file.html') as import_file:
self.client.post(
reverse('bookmarks:settings.import'),
{'import_file': import_file},
follow=True
)
self.assertEqual(Bookmark.objects.count(), 3)
self.assertEqual(Bookmark.objects.all()[0].shared, False)
self.assertEqual(Bookmark.objects.all()[1].shared, False)
self.assertEqual(Bookmark.objects.all()[2].shared, False)
Bookmark.objects.all().delete()
with open('bookmarks/tests/resources/simple_valid_import_file.html') as import_file:
self.client.post(
reverse('bookmarks:settings.import'),
{'import_file': import_file, 'map_private_flag': 'on'},
follow=True
)
self.assertEqual(Bookmark.objects.count(), 3)
self.assertEqual(Bookmark.objects.all()[0].shared, True)
self.assertEqual(Bookmark.objects.all()[1].shared, True)
self.assertEqual(Bookmark.objects.all()[2].shared, True)

View file

@ -116,6 +116,7 @@ def integrations(request):
@login_required
def bookmark_import(request):
import_file = request.FILES.get('import_file')
import_options = importer.ImportOptions(map_private_flag=request.POST.get('map_private_flag') == 'on')
if import_file is None:
messages.error(request, 'Please select a file to import.', 'bookmark_import_errors')
@ -123,7 +124,7 @@ def bookmark_import(request):
try:
content = import_file.read().decode()
result = importer.import_netscape_html(content, request.user)
result = importer.import_netscape_html(content, request.user, import_options)
success_msg = str(result.success) + ' bookmarks were successfully imported.'
messages.success(request, success_msg, 'bookmark_import_success')
if result.failed > 0: