mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-10 06:34:16 +00:00
Merge pull request #1168 from mAAdhaTTah/add-readwise-reader
This commit is contained in:
commit
16d278fbdb
3 changed files with 127 additions and 0 deletions
|
@ -223,6 +223,8 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
|||
|
||||
'POCKET_CONSUMER_KEY': {'type': str, 'default': None},
|
||||
'POCKET_ACCESS_TOKENS': {'type': dict, 'default': {}},
|
||||
|
||||
'READWISE_READER_TOKENS': {'type': dict, 'default': {}},
|
||||
},
|
||||
}
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@ from ..index.schema import Link
|
|||
from ..logging_util import TimedProgress, log_source_saved
|
||||
|
||||
from . import pocket_api
|
||||
from . import readwise_reader_api
|
||||
from . import wallabag_atom
|
||||
from . import pocket_html
|
||||
from . import pinboard_rss
|
||||
|
@ -51,6 +52,7 @@ from . import url_list
|
|||
PARSERS = {
|
||||
# Specialized parsers
|
||||
pocket_api.KEY: (pocket_api.NAME, pocket_api.PARSER),
|
||||
readwise_reader_api.KEY: (readwise_reader_api.NAME, readwise_reader_api.PARSER),
|
||||
wallabag_atom.KEY: (wallabag_atom.NAME, wallabag_atom.PARSER),
|
||||
pocket_html.KEY: (pocket_html.NAME, pocket_html.PARSER),
|
||||
pinboard_rss.KEY: (pinboard_rss.NAME, pinboard_rss.PARSER),
|
||||
|
|
123
archivebox/parsers/readwise_reader_api.py
Normal file
123
archivebox/parsers/readwise_reader_api.py
Normal file
|
@ -0,0 +1,123 @@
|
|||
__package__ = "archivebox.parsers"
|
||||
|
||||
|
||||
import re
|
||||
import requests
|
||||
from datetime import datetime
|
||||
|
||||
from typing import IO, Iterable, Optional
|
||||
from configparser import ConfigParser
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from ..index.schema import Link
|
||||
from ..util import enforce_types
|
||||
from ..system import atomic_write
|
||||
from ..config import (
|
||||
SOURCES_DIR,
|
||||
READWISE_READER_TOKENS,
|
||||
)
|
||||
|
||||
|
||||
API_DB_PATH = Path(SOURCES_DIR) / "readwise_reader_api.db"
|
||||
|
||||
|
||||
class ReadwiseReaderAPI:
|
||||
cursor: Optional[str]
|
||||
|
||||
def __init__(self, api_token, cursor=None) -> None:
|
||||
self.api_token = api_token
|
||||
self.cursor = cursor
|
||||
|
||||
def get_archive(self):
|
||||
response = requests.get(
|
||||
url="https://readwise.io/api/v3/list/",
|
||||
headers={"Authorization": "Token s71gNtiNDWquEvlJFFUyDU10ao8fn99lGyNryvyllQcDSnrd7X"},
|
||||
params={
|
||||
"location": "archive",
|
||||
"pageCursor": self.cursor,
|
||||
}
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
|
||||
def get_readwise_reader_articles(api: ReadwiseReaderAPI):
|
||||
response = api.get_archive()
|
||||
body = response.json()
|
||||
articles = body["results"]
|
||||
|
||||
yield from articles
|
||||
|
||||
|
||||
if body['nextPageCursor']:
|
||||
api.cursor = body["nextPageCursor"]
|
||||
yield from get_readwise_reader_articles(api)
|
||||
|
||||
|
||||
def link_from_article(article: dict, sources: list):
|
||||
url: str = article['source_url']
|
||||
title = article["title"] or url
|
||||
timestamp = datetime.fromisoformat(article['updated_at']).timestamp()
|
||||
|
||||
return Link(
|
||||
url=url,
|
||||
timestamp=str(timestamp),
|
||||
title=title,
|
||||
tags="",
|
||||
sources=sources,
|
||||
)
|
||||
|
||||
|
||||
def write_cursor(username: str, since: str):
|
||||
if not API_DB_PATH.exists():
|
||||
atomic_write(API_DB_PATH, "")
|
||||
|
||||
since_file = ConfigParser()
|
||||
since_file.optionxform = str
|
||||
since_file.read(API_DB_PATH)
|
||||
|
||||
since_file[username] = {"since": since}
|
||||
|
||||
with open(API_DB_PATH, "w+") as new:
|
||||
since_file.write(new)
|
||||
|
||||
|
||||
def read_cursor(username: str) -> Optional[str]:
|
||||
if not API_DB_PATH.exists():
|
||||
atomic_write(API_DB_PATH, "")
|
||||
|
||||
config_file = ConfigParser()
|
||||
config_file.optionxform = str
|
||||
config_file.read(API_DB_PATH)
|
||||
|
||||
return config_file.get(username, "since", fallback=None)
|
||||
|
||||
|
||||
|
||||
|
||||
@enforce_types
|
||||
def should_parse_as_readwise_reader_api(text: str) -> bool:
|
||||
return text.startswith("readwise-reader://")
|
||||
|
||||
|
||||
@enforce_types
|
||||
def parse_readwise_reader_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
|
||||
"""Parse bookmarks from the Readwise Reader API"""
|
||||
|
||||
input_buffer.seek(0)
|
||||
pattern = re.compile(r"^readwise-reader:\/\/(\w+)")
|
||||
for line in input_buffer:
|
||||
if should_parse_as_readwise_reader_api(line):
|
||||
username = pattern.search(line).group(1)
|
||||
api = ReadwiseReaderAPI(READWISE_READER_TOKENS[username], cursor=read_cursor(username))
|
||||
|
||||
for article in get_readwise_reader_articles(api):
|
||||
yield link_from_article(article, sources=[line])
|
||||
|
||||
if api.cursor:
|
||||
write_cursor(username, api.cursor)
|
||||
|
||||
|
||||
KEY = "readwise_reader_api"
|
||||
NAME = "Readwise Reader API"
|
||||
PARSER = parse_readwise_reader_api_export
|
Loading…
Reference in a new issue