linkding/bookmarks/services/parser.py
Sascha Ißbrücker 13ff9ac4f8
Add read it later functionality (#304)
* Allow marking bookmarks as unread

* Restructure navigation to include preset filters

* Add mark as read action

* Improve description

* Highlight unread bookmarks visually

* Mark bookmarks as read by default

* Add tests

* Implement toread flag in importer

* Implement admin actions

* Add query tests

* Remove untagged link

* Update api docs

* Reduce height of description textarea

Co-authored-by: Sascha Ißbrücker <sascha.issbruecker@gmail.com>
2022-07-23 22:17:20 +02:00

87 lines
2.2 KiB
Python

from dataclasses import dataclass
from html.parser import HTMLParser
from typing import Dict, List
@dataclass
class NetscapeBookmark:
href: str
title: str
description: str
date_added: str
tag_string: str
to_read: bool
class BookmarkParser(HTMLParser):
def __init__(self):
super().__init__()
self.bookmarks = []
self.current_tag = None
self.bookmark = None
self.href = ''
self.add_date = ''
self.tags = ''
self.title = ''
self.description = ''
self.toread = ''
def handle_starttag(self, tag: str, attrs: list):
name = 'handle_start_' + tag.lower()
if name in dir(self):
getattr(self, name)({k.lower(): v for k, v in attrs})
self.current_tag = tag
def handle_endtag(self, tag: str):
name = 'handle_end_' + tag.lower()
if name in dir(self):
getattr(self, name)()
self.current_tag = None
def handle_data(self, data):
name = f'handle_{self.current_tag}_data'
if name in dir(self):
getattr(self, name)(data)
def handle_end_dl(self):
self.add_bookmark()
def handle_start_dt(self, attrs: Dict[str, str]):
self.add_bookmark()
def handle_start_a(self, attrs: Dict[str, str]):
vars(self).update(attrs)
self.bookmark = NetscapeBookmark(
href=self.href,
title='',
description='',
date_added=self.add_date,
tag_string=self.tags,
to_read=self.toread == '1'
)
def handle_a_data(self, data):
self.title = data.strip()
def handle_dd_data(self, data):
self.description = data.strip()
def add_bookmark(self):
if self.bookmark:
self.bookmark.title = self.title
self.bookmark.description = self.description
self.bookmarks.append(self.bookmark)
self.bookmark = None
self.href = ''
self.add_date = ''
self.tags = ''
self.title = ''
self.description = ''
self.toread = ''
def parse(html: str) -> List[NetscapeBookmark]:
parser = BookmarkParser()
parser.feed(html)
return parser.bookmarks