mirror of
https://github.com/sissbruecker/linkding
synced 2024-11-10 14:14:18 +00:00
1b35d5b5ef
The Wayback Machine Save API only allows a limited number of requests within a timespan. This introduces several changes to avoid rate limit errors: - There will be max. 1 attempt to create a new snapshot - If a new snapshot could not be created, then attempt to use the latest existing snapshot - Bulk snapshot updates (bookmark import, load missing snapshots after login) will only attempt to load the latest snapshot instead of creating new ones
40 lines
1.4 KiB
Python
40 lines
1.4 KiB
Python
import time
|
|
from typing import Dict
|
|
|
|
import waybackpy
|
|
import waybackpy.utils
|
|
from waybackpy.exceptions import NoCDXRecordFound
|
|
|
|
|
|
class CustomWaybackMachineCDXServerAPI(waybackpy.WaybackMachineCDXServerAPI):
|
|
"""
|
|
Customized WaybackMachineCDXServerAPI to work around some issues with retrieving the newest snapshot.
|
|
See https://github.com/akamhy/waybackpy/issues/176
|
|
"""
|
|
|
|
def newest(self):
|
|
unix_timestamp = int(time.time())
|
|
self.closest = waybackpy.utils.unix_timestamp_to_wayback_timestamp(unix_timestamp)
|
|
self.sort = 'closest'
|
|
self.limit = -5
|
|
|
|
newest_snapshot = None
|
|
for snapshot in self.snapshots():
|
|
newest_snapshot = snapshot
|
|
break
|
|
|
|
if not newest_snapshot:
|
|
raise NoCDXRecordFound(
|
|
"Wayback Machine's CDX server did not return any records "
|
|
+ "for the query. The URL may not have any archives "
|
|
+ " on the Wayback Machine or the URL may have been recently "
|
|
+ "archived and is still not available on the CDX server."
|
|
)
|
|
|
|
return newest_snapshot
|
|
|
|
def add_payload(self, payload: Dict[str, str]) -> None:
|
|
super().add_payload(payload)
|
|
# Set fastLatest query param, as we are only using this API to get the latest snapshot and using fastLatest
|
|
# makes searching for latest snapshots faster
|
|
payload['fastLatest'] = 'true'
|