2020-12-31 06:02:28 +00:00
|
|
|
from dataclasses import dataclass
|
2022-05-21 07:27:30 +00:00
|
|
|
from html.parser import HTMLParser
|
|
|
|
from typing import Dict, List
|
2020-12-31 06:02:28 +00:00
|
|
|
|
2023-11-24 08:21:23 +00:00
|
|
|
from bookmarks.models import parse_tag_string
|
|
|
|
|
2020-12-31 06:02:28 +00:00
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class NetscapeBookmark:
|
|
|
|
href: str
|
|
|
|
title: str
|
|
|
|
description: str
|
2023-09-10 21:37:37 +00:00
|
|
|
notes: str
|
2021-05-14 21:34:53 +00:00
|
|
|
date_added: str
|
2023-11-24 08:21:23 +00:00
|
|
|
tag_names: List[str]
|
2022-07-23 20:17:20 +00:00
|
|
|
to_read: bool
|
2023-08-20 09:44:53 +00:00
|
|
|
private: bool
|
2023-11-24 08:21:23 +00:00
|
|
|
archived: bool
|
2020-12-31 06:02:28 +00:00
|
|
|
|
|
|
|
|
2022-05-21 07:27:30 +00:00
|
|
|
class BookmarkParser(HTMLParser):
|
|
|
|
def __init__(self):
|
|
|
|
super().__init__()
|
|
|
|
self.bookmarks = []
|
|
|
|
|
|
|
|
self.current_tag = None
|
|
|
|
self.bookmark = None
|
|
|
|
self.href = ''
|
|
|
|
self.add_date = ''
|
|
|
|
self.tags = ''
|
|
|
|
self.title = ''
|
|
|
|
self.description = ''
|
2023-09-10 21:37:37 +00:00
|
|
|
self.notes = ''
|
2022-07-23 20:17:20 +00:00
|
|
|
self.toread = ''
|
2023-08-20 09:44:53 +00:00
|
|
|
self.private = ''
|
2022-05-21 07:27:30 +00:00
|
|
|
|
|
|
|
def handle_starttag(self, tag: str, attrs: list):
|
|
|
|
name = 'handle_start_' + tag.lower()
|
|
|
|
if name in dir(self):
|
|
|
|
getattr(self, name)({k.lower(): v for k, v in attrs})
|
|
|
|
self.current_tag = tag
|
|
|
|
|
|
|
|
def handle_endtag(self, tag: str):
|
|
|
|
name = 'handle_end_' + tag.lower()
|
|
|
|
if name in dir(self):
|
|
|
|
getattr(self, name)()
|
|
|
|
self.current_tag = None
|
|
|
|
|
|
|
|
def handle_data(self, data):
|
|
|
|
name = f'handle_{self.current_tag}_data'
|
|
|
|
if name in dir(self):
|
|
|
|
getattr(self, name)(data)
|
|
|
|
|
|
|
|
def handle_end_dl(self):
|
|
|
|
self.add_bookmark()
|
|
|
|
|
|
|
|
def handle_start_dt(self, attrs: Dict[str, str]):
|
|
|
|
self.add_bookmark()
|
|
|
|
|
|
|
|
def handle_start_a(self, attrs: Dict[str, str]):
|
|
|
|
vars(self).update(attrs)
|
2023-11-24 08:21:23 +00:00
|
|
|
tag_names = parse_tag_string(self.tags)
|
|
|
|
archived = 'linkding:archived' in self.tags
|
|
|
|
try:
|
|
|
|
tag_names.remove('linkding:archived')
|
|
|
|
except ValueError:
|
|
|
|
pass
|
|
|
|
|
2022-05-21 07:27:30 +00:00
|
|
|
self.bookmark = NetscapeBookmark(
|
|
|
|
href=self.href,
|
|
|
|
title='',
|
|
|
|
description='',
|
2023-09-10 21:37:37 +00:00
|
|
|
notes='',
|
2022-05-21 07:27:30 +00:00
|
|
|
date_added=self.add_date,
|
2023-11-24 08:21:23 +00:00
|
|
|
tag_names=tag_names,
|
2023-08-20 09:44:53 +00:00
|
|
|
to_read=self.toread == '1',
|
|
|
|
# Mark as private by default, also when attribute is not specified
|
|
|
|
private=self.private != '0',
|
2023-11-24 08:21:23 +00:00
|
|
|
archived=archived,
|
2020-12-31 06:02:28 +00:00
|
|
|
)
|
|
|
|
|
2022-05-21 07:27:30 +00:00
|
|
|
def handle_a_data(self, data):
|
|
|
|
self.title = data.strip()
|
|
|
|
|
|
|
|
def handle_dd_data(self, data):
|
2023-09-10 21:37:37 +00:00
|
|
|
desc = data.strip()
|
|
|
|
if '[linkding-notes]' in desc:
|
|
|
|
self.notes = desc.split('[linkding-notes]')[1].split('[/linkding-notes]')[0]
|
|
|
|
self.description = desc.split('[linkding-notes]')[0]
|
2022-05-21 07:27:30 +00:00
|
|
|
|
|
|
|
def add_bookmark(self):
|
|
|
|
if self.bookmark:
|
|
|
|
self.bookmark.title = self.title
|
|
|
|
self.bookmark.description = self.description
|
2023-09-10 21:37:37 +00:00
|
|
|
self.bookmark.notes = self.notes
|
2022-05-21 07:27:30 +00:00
|
|
|
self.bookmarks.append(self.bookmark)
|
|
|
|
self.bookmark = None
|
|
|
|
self.href = ''
|
|
|
|
self.add_date = ''
|
|
|
|
self.tags = ''
|
|
|
|
self.title = ''
|
|
|
|
self.description = ''
|
2023-09-10 21:37:37 +00:00
|
|
|
self.notes = ''
|
2022-07-23 20:17:20 +00:00
|
|
|
self.toread = ''
|
2023-08-20 09:44:53 +00:00
|
|
|
self.private = ''
|
2022-05-21 07:27:30 +00:00
|
|
|
|
|
|
|
|
|
|
|
def parse(html: str) -> List[NetscapeBookmark]:
|
|
|
|
parser = BookmarkParser()
|
|
|
|
parser.feed(html)
|
|
|
|
return parser.bookmarks
|