2023-07-02 15:18:41 +00:00
|
|
|
__package__ = "archivebox.parsers"
|
|
|
|
|
|
|
|
|
|
|
|
import re
|
|
|
|
import requests
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
|
|
from typing import IO, Iterable, Optional
|
|
|
|
from configparser import ConfigParser
|
|
|
|
|
2024-09-30 22:59:05 +00:00
|
|
|
from archivebox.config import CONSTANTS
|
|
|
|
|
2023-07-02 15:18:41 +00:00
|
|
|
from ..index.schema import Link
|
2024-10-01 00:25:15 +00:00
|
|
|
from archivebox.misc.util import enforce_types
|
2024-10-01 00:13:55 +00:00
|
|
|
from archivebox.misc.system import atomic_write
|
2024-09-30 22:59:05 +00:00
|
|
|
from ..config.legacy import READWISE_READER_TOKENS
|
2023-07-02 15:18:41 +00:00
|
|
|
|
|
|
|
|
2024-09-30 22:59:05 +00:00
|
|
|
API_DB_PATH = CONSTANTS.SOURCES_DIR / "readwise_reader_api.db"
|
2023-07-02 15:18:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
class ReadwiseReaderAPI:
|
|
|
|
cursor: Optional[str]
|
|
|
|
|
|
|
|
def __init__(self, api_token, cursor=None) -> None:
|
|
|
|
self.api_token = api_token
|
|
|
|
self.cursor = cursor
|
|
|
|
|
|
|
|
def get_archive(self):
|
|
|
|
response = requests.get(
|
|
|
|
url="https://readwise.io/api/v3/list/",
|
2023-10-29 21:27:04 +00:00
|
|
|
headers={"Authorization": f"Token {self.api_token}"},
|
2023-07-02 15:18:41 +00:00
|
|
|
params={
|
|
|
|
"location": "archive",
|
|
|
|
"pageCursor": self.cursor,
|
|
|
|
}
|
|
|
|
)
|
|
|
|
response.raise_for_status()
|
|
|
|
return response
|
|
|
|
|
|
|
|
def get_readwise_reader_articles(api: ReadwiseReaderAPI):
|
|
|
|
response = api.get_archive()
|
|
|
|
body = response.json()
|
|
|
|
articles = body["results"]
|
|
|
|
|
|
|
|
yield from articles
|
|
|
|
|
|
|
|
|
|
|
|
if body['nextPageCursor']:
|
|
|
|
api.cursor = body["nextPageCursor"]
|
|
|
|
yield from get_readwise_reader_articles(api)
|
|
|
|
|
|
|
|
|
|
|
|
def link_from_article(article: dict, sources: list):
|
|
|
|
url: str = article['source_url']
|
|
|
|
title = article["title"] or url
|
|
|
|
timestamp = datetime.fromisoformat(article['updated_at']).timestamp()
|
|
|
|
|
|
|
|
return Link(
|
|
|
|
url=url,
|
|
|
|
timestamp=str(timestamp),
|
|
|
|
title=title,
|
|
|
|
tags="",
|
|
|
|
sources=sources,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def write_cursor(username: str, since: str):
|
|
|
|
if not API_DB_PATH.exists():
|
|
|
|
atomic_write(API_DB_PATH, "")
|
|
|
|
|
|
|
|
since_file = ConfigParser()
|
|
|
|
since_file.optionxform = str
|
|
|
|
since_file.read(API_DB_PATH)
|
|
|
|
|
|
|
|
since_file[username] = {"since": since}
|
|
|
|
|
|
|
|
with open(API_DB_PATH, "w+") as new:
|
|
|
|
since_file.write(new)
|
|
|
|
|
|
|
|
|
|
|
|
def read_cursor(username: str) -> Optional[str]:
|
|
|
|
if not API_DB_PATH.exists():
|
|
|
|
atomic_write(API_DB_PATH, "")
|
|
|
|
|
|
|
|
config_file = ConfigParser()
|
|
|
|
config_file.optionxform = str
|
|
|
|
config_file.read(API_DB_PATH)
|
|
|
|
|
|
|
|
return config_file.get(username, "since", fallback=None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@enforce_types
|
|
|
|
def should_parse_as_readwise_reader_api(text: str) -> bool:
|
|
|
|
return text.startswith("readwise-reader://")
|
|
|
|
|
|
|
|
|
|
|
|
@enforce_types
|
|
|
|
def parse_readwise_reader_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
|
|
|
|
"""Parse bookmarks from the Readwise Reader API"""
|
|
|
|
|
|
|
|
input_buffer.seek(0)
|
|
|
|
pattern = re.compile(r"^readwise-reader:\/\/(\w+)")
|
|
|
|
for line in input_buffer:
|
|
|
|
if should_parse_as_readwise_reader_api(line):
|
|
|
|
username = pattern.search(line).group(1)
|
|
|
|
api = ReadwiseReaderAPI(READWISE_READER_TOKENS[username], cursor=read_cursor(username))
|
|
|
|
|
|
|
|
for article in get_readwise_reader_articles(api):
|
|
|
|
yield link_from_article(article, sources=[line])
|
|
|
|
|
|
|
|
if api.cursor:
|
|
|
|
write_cursor(username, api.cursor)
|
|
|
|
|
|
|
|
|
|
|
|
KEY = "readwise_reader_api"
|
|
|
|
NAME = "Readwise Reader API"
|
|
|
|
PARSER = parse_readwise_reader_api_export
|