linkding/bookmarks/services/wayback.py
Sascha Ißbrücker 1b35d5b5ef
Prevent rate limit errors in wayback machine API (#339)
The Wayback Machine Save API only allows a limited number of requests within a timespan. This introduces several changes to avoid rate limit errors:
- There will be max. 1 attempt to create a new snapshot
- If a new snapshot could not be created, then attempt to use the latest existing snapshot
- Bulk snapshot updates (bookmark import, load missing snapshots after login) will only attempt to load the latest snapshot instead of creating new ones
2022-09-10 20:43:15 +02:00

40 lines
1.4 KiB
Python

import time
from typing import Dict
import waybackpy
import waybackpy.utils
from waybackpy.exceptions import NoCDXRecordFound
class CustomWaybackMachineCDXServerAPI(waybackpy.WaybackMachineCDXServerAPI):
"""
Customized WaybackMachineCDXServerAPI to work around some issues with retrieving the newest snapshot.
See https://github.com/akamhy/waybackpy/issues/176
"""
def newest(self):
unix_timestamp = int(time.time())
self.closest = waybackpy.utils.unix_timestamp_to_wayback_timestamp(unix_timestamp)
self.sort = 'closest'
self.limit = -5
newest_snapshot = None
for snapshot in self.snapshots():
newest_snapshot = snapshot
break
if not newest_snapshot:
raise NoCDXRecordFound(
"Wayback Machine's CDX server did not return any records "
+ "for the query. The URL may not have any archives "
+ " on the Wayback Machine or the URL may have been recently "
+ "archived and is still not available on the CDX server."
)
return newest_snapshot
def add_payload(self, payload: Dict[str, str]) -> None:
super().add_payload(payload)
# Set fastLatest query param, as we are only using this API to get the latest snapshot and using fastLatest
# makes searching for latest snapshots faster
payload['fastLatest'] = 'true'