From a1afd0211f85467527c1342061b62c5d5c0d4952 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Fri, 17 May 2024 20:11:00 -0700 Subject: [PATCH] fix abid calculation --- archivebox/abid_utils/abid.py | 9 ++++++--- archivebox/abid_utils/models.py | 10 +++++----- archivebox/api/models.py | 2 +- archivebox/config.py | 2 +- archivebox/core/settings.py | 1 + 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/archivebox/abid_utils/abid.py b/archivebox/abid_utils/abid.py index 1d3d28f9..48597813 100644 --- a/archivebox/abid_utils/abid.py +++ b/archivebox/abid_utils/abid.py @@ -108,9 +108,12 @@ def uri_hash(uri: Union[str, bytes]) -> str: # only hash the domain part of URLs if '://' in uri_str: - domain = urlparse(uri_str).host - if domain: - url_str = domain + try: + domain = urlparse(uri_str).netloc + if domain: + uri_str = domain + except AttributeError: + pass uri_bytes = uri_str.encode('utf-8') diff --git a/archivebox/abid_utils/models.py b/archivebox/abid_utils/models.py index 06fb6c79..de8b3c87 100644 --- a/archivebox/abid_utils/models.py +++ b/archivebox/abid_utils/models.py @@ -82,11 +82,11 @@ class ABIDModel(models.Model): def save(self, *args: Any, **kwargs: Any) -> None: if hasattr(self, 'abid'): - # self.abid = ABID.parse(self.abid) if self.abid else self.calculate_abid() - self.abid = self.calculate_abid() + # self.abid = ABID.parse(self.abid) if self.abid else self.get_abid() + self.abid = self.get_abid() else: print(f'[!] WARNING: {self.__class__.__name__}.abid is not a DB field so ABID will not be persisted!') - self.abid = self.calculate_abid() + self.abid = self.get_abid() super().save(*args, **kwargs) @@ -141,7 +141,7 @@ class ABIDModel(models.Model): """ ULIDParts(timestamp='01HX9FPYTR', url='E4A5CCD9', subtype='00', randomness='ZYEBQE') """ - return ABID.parse(self.abid) if getattr(self, 'abid', None) else self.calculate_abid() + return ABID.parse(self.abid) if getattr(self, 'abid', None) else self.get_abid() @property def ULID(self) -> ULID: @@ -274,7 +274,7 @@ def find_obj_from_abid_rand(rand: Union[ABID, str], model=None) -> List[ABIDMode ) for obj in qs: - if obj.calculate_abid() == abid: + if obj.get_abid() == abid: # found exact match, no need to keep iterating return [obj] partial_matches.append(obj) diff --git a/archivebox/api/models.py b/archivebox/api/models.py index 8d286a8b..177b275f 100644 --- a/archivebox/api/models.py +++ b/archivebox/api/models.py @@ -56,7 +56,7 @@ class APIToken(ABIDModel): return { "TYPE": "APIToken", "uuid": str(self.id), - "abid": str(self.calculate_abid()), + "abid": str(self.get_abid()), "user_id": str(self.user.id), "user_username": self.user.username, "token": self.token, diff --git a/archivebox/config.py b/archivebox/config.py index da2d1b04..1637023b 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -37,7 +37,7 @@ from sqlite3 import dbapi2 as sqlite3 from hashlib import md5 from pathlib import Path from datetime import datetime, timezone -from typing import Optional, Type, Tuple, Dict, Union, List +from typing import Optional, Type, Tuple, Dict, Union, List, Any from subprocess import run, PIPE, DEVNULL from configparser import ConfigParser from collections import defaultdict diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index d540d2be..ab197eea 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -10,6 +10,7 @@ from pathlib import Path from django.utils.crypto import get_random_string from ..config import ( + CONFIG, DEBUG, SECRET_KEY, ALLOWED_HOSTS,