mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-22 20:23:12 +00:00
Refactor Snapshot and ArchiveResult to use ulid
and typeid
instead of uuidv4
(#1430)
Fixes: https://github.com/ArchiveBox/ArchiveBox/issues/74
This commit is contained in:
commit
3114980eeb
34 changed files with 1349 additions and 180 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -29,6 +29,7 @@ dist/
|
|||
data/
|
||||
data*/
|
||||
output/
|
||||
index.sqlite3
|
||||
|
||||
# vim
|
||||
*.sw?
|
||||
|
|
|
@ -1,7 +1,4 @@
|
|||
__package__ = 'archivebox'
|
||||
|
||||
|
||||
# monkey patch django timezone to add back utc (it was removed in Django 5.0)
|
||||
import datetime
|
||||
from django.utils import timezone
|
||||
timezone.utc = datetime.timezone.utc
|
||||
from .monkey_patches import *
|
||||
|
|
1
archivebox/abid_utils/__init__.py
Normal file
1
archivebox/abid_utils/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
__package__ = 'abid_utils'
|
191
archivebox/abid_utils/abid.py
Normal file
191
archivebox/abid_utils/abid.py
Normal file
|
@ -0,0 +1,191 @@
|
|||
from typing import NamedTuple, Any, Union, Optional
|
||||
|
||||
import ulid
|
||||
import uuid6
|
||||
import hashlib
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from uuid import UUID
|
||||
from typeid import TypeID # type: ignore[import-untyped]
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
|
||||
ABID_PREFIX_LEN = 4
|
||||
ABID_SUFFIX_LEN = 26
|
||||
ABID_LEN = 30
|
||||
ABID_TS_LEN = 10
|
||||
ABID_URI_LEN = 8
|
||||
ABID_SUBTYPE_LEN = 2
|
||||
ABID_RAND_LEN = 6
|
||||
|
||||
DEFAULT_ABID_PREFIX = 'obj_'
|
||||
|
||||
|
||||
class ABID(NamedTuple):
|
||||
"""
|
||||
e.g. ABID('obj_01HX9FPYTRE4A5CCD901ZYEBQE')
|
||||
"""
|
||||
prefix: str # e.g. obj_
|
||||
ts: str # e.g. 01HX9FPYTR
|
||||
uri: str # e.g. E4A5CCD9
|
||||
subtype: str # e.g. 01
|
||||
rand: str # e.g. ZYEBQE
|
||||
|
||||
def __getattr__(self, attr: str) -> Any:
|
||||
return getattr(self.ulid, attr)
|
||||
|
||||
def __eq__(self, other: Any) -> bool:
|
||||
try:
|
||||
return self.ulid == other.ulid
|
||||
except AttributeError:
|
||||
return NotImplemented
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.prefix + self.suffix
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.prefix + self.suffix)
|
||||
|
||||
@classmethod
|
||||
def parse(cls, buffer: Union[str, UUID, ulid.ULID, TypeID, 'ABID'], prefix=DEFAULT_ABID_PREFIX) -> 'ABID':
|
||||
assert buffer, f'Attempted to create ABID from null value {buffer}'
|
||||
|
||||
buffer = str(buffer)
|
||||
if '_' in buffer:
|
||||
prefix, suffix = buffer.split('_')
|
||||
else:
|
||||
prefix, suffix = prefix.strip('_'), buffer
|
||||
|
||||
assert len(prefix) == ABID_PREFIX_LEN - 1 # length without trailing _
|
||||
assert len(suffix) == ABID_SUFFIX_LEN, f'Suffix {suffix} from {buffer} was not {ABID_SUFFIX_LEN} chars long'
|
||||
|
||||
return cls(
|
||||
prefix=abid_part_from_prefix(prefix),
|
||||
ts=suffix[0:10].upper(),
|
||||
uri=suffix[10:18].upper(),
|
||||
subtype=suffix[18:20].upper(),
|
||||
rand=suffix[20:26].upper(),
|
||||
)
|
||||
|
||||
@property
|
||||
def suffix(self):
|
||||
return ''.join((self.ts, self.uri, self.subtype, self.rand))
|
||||
|
||||
@property
|
||||
def ulid(self) -> ulid.ULID:
|
||||
return ulid.parse(self.suffix)
|
||||
|
||||
@property
|
||||
def uuid(self) -> UUID:
|
||||
return self.ulid.uuid
|
||||
|
||||
@property
|
||||
def uuid6(self) -> uuid6.UUID:
|
||||
return uuid6.UUID(hex=self.uuid.hex)
|
||||
|
||||
@property
|
||||
def typeid(self) -> TypeID:
|
||||
return TypeID.from_uuid(prefix=self.prefix.strip('_'), suffix=self.uuid6)
|
||||
|
||||
@property
|
||||
def datetime(self) -> datetime:
|
||||
return self.ulid.timestamp().datetime
|
||||
|
||||
|
||||
|
||||
####################################################
|
||||
|
||||
|
||||
def uri_hash(uri: Union[str, bytes]) -> str:
|
||||
"""
|
||||
'E4A5CCD9AF4ED2A6E0954DF19FD274E9CDDB4853051F033FD518BFC90AA1AC25'
|
||||
"""
|
||||
if isinstance(uri, bytes):
|
||||
uri_str: str = uri.decode()
|
||||
else:
|
||||
uri_str = uri
|
||||
|
||||
# only hash the domain part of URLs
|
||||
if '://' in uri_str:
|
||||
try:
|
||||
domain = urlparse(uri_str).netloc
|
||||
if domain:
|
||||
uri_str = domain
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
uri_bytes = uri_str.encode('utf-8')
|
||||
|
||||
return hashlib.sha256(uri_bytes).hexdigest().upper()
|
||||
|
||||
def abid_part_from_prefix(prefix: Optional[str]) -> str:
|
||||
"""
|
||||
'snp_'
|
||||
"""
|
||||
if prefix is None:
|
||||
return 'obj_'
|
||||
|
||||
prefix = prefix.strip('_').lower()
|
||||
assert len(prefix) == 3
|
||||
return prefix + '_'
|
||||
|
||||
def abid_part_from_uri(uri: str) -> str:
|
||||
"""
|
||||
'E4A5CCD9' # takes first 8 characters of sha256(url)
|
||||
"""
|
||||
uri = str(uri)
|
||||
return uri_hash(uri)[:ABID_URI_LEN]
|
||||
|
||||
def abid_part_from_ts(ts: Optional[datetime]) -> str:
|
||||
"""
|
||||
'01HX9FPYTR' # produces 10 character Timestamp section of ulid based on added date
|
||||
"""
|
||||
return str(ulid.from_timestamp(ts) if ts else ulid.new())[:ABID_TS_LEN]
|
||||
|
||||
def abid_part_from_subtype(subtype: str) -> str:
|
||||
"""
|
||||
Snapshots have 01 type, other objects have other subtypes like wget/media/etc.
|
||||
Also allows us to change the ulid spec later by putting special sigil values here.
|
||||
"""
|
||||
subtype = str(subtype)
|
||||
if len(subtype) == ABID_SUBTYPE_LEN:
|
||||
return subtype
|
||||
|
||||
return hashlib.sha256(subtype.encode('utf-8')).hexdigest()[:ABID_SUBTYPE_LEN].upper()
|
||||
|
||||
def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str:
|
||||
"""
|
||||
'ZYEBQE' # takes last 6 characters of randomness from existing legacy uuid db field
|
||||
"""
|
||||
if rand is None:
|
||||
# if it's None we generate a new random 6 character hex string
|
||||
return str(ulid.new())[-ABID_RAND_LEN:]
|
||||
elif isinstance(rand, UUID):
|
||||
# if it's a uuid we take the last 6 characters of the ULID represation of it
|
||||
return str(ulid.from_uuid(rand))[-ABID_RAND_LEN:]
|
||||
elif isinstance(rand, int):
|
||||
# if it's a BigAutoInteger field we convert it from an int to a 0-padded string
|
||||
rand_str = str(rand)[-ABID_RAND_LEN:]
|
||||
padding_needed = ABID_RAND_LEN - len(rand_str)
|
||||
rand_str = ('0'*padding_needed) + rand_str
|
||||
return rand_str
|
||||
|
||||
# otherwise treat it as a string, take the last 6 characters of it verbatim
|
||||
return str(rand)[-ABID_RAND_LEN:].upper()
|
||||
|
||||
|
||||
def abid_from_values(prefix, ts, uri, subtype, rand) -> ABID:
|
||||
"""
|
||||
Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
|
||||
"""
|
||||
|
||||
abid = ABID(
|
||||
prefix=abid_part_from_prefix(prefix),
|
||||
ts=abid_part_from_ts(ts),
|
||||
uri=abid_part_from_uri(uri),
|
||||
subtype=abid_part_from_subtype(subtype),
|
||||
rand=abid_part_from_rand(rand),
|
||||
)
|
||||
assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for ts={ts} uri={uri} subtyp={subtype} rand={rand}'
|
||||
return abid
|
7
archivebox/abid_utils/apps.py
Normal file
7
archivebox/abid_utils/apps.py
Normal file
|
@ -0,0 +1,7 @@
|
|||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class AbidUtilsConfig(AppConfig):
|
||||
default_auto_field = 'django.db.models.BigAutoField'
|
||||
|
||||
name = 'abid_utils'
|
314
archivebox/abid_utils/models.py
Normal file
314
archivebox/abid_utils/models.py
Normal file
|
@ -0,0 +1,314 @@
|
|||
"""
|
||||
This file provides the Django ABIDField and ABIDModel base model to inherit from.
|
||||
|
||||
It implements the ArchiveBox ID (ABID) interfaces including abid_values, get_abid, .abid, .uuid, .id.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, Union, List, Set, NamedTuple, cast
|
||||
|
||||
from ulid import ULID
|
||||
from uuid import uuid4, UUID
|
||||
from typeid import TypeID # type: ignore[import-untyped]
|
||||
from datetime import datetime
|
||||
from functools import partial
|
||||
from charidfield import CharIDField # type: ignore[import-untyped]
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import models
|
||||
from django.db.utils import OperationalError
|
||||
from django.contrib.auth import get_user_model
|
||||
|
||||
from django_stubs_ext.db.models import TypedModelMeta
|
||||
|
||||
from .abid import (
|
||||
ABID,
|
||||
ABID_LEN,
|
||||
ABID_RAND_LEN,
|
||||
ABID_SUFFIX_LEN,
|
||||
DEFAULT_ABID_PREFIX,
|
||||
abid_part_from_prefix,
|
||||
abid_from_values
|
||||
)
|
||||
|
||||
####################################################
|
||||
|
||||
|
||||
# Database Field for typeid/ulid style IDs with a prefix, e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ
|
||||
ABIDField = partial(
|
||||
CharIDField,
|
||||
max_length=ABID_LEN,
|
||||
help_text="ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)",
|
||||
default=None,
|
||||
null=True,
|
||||
blank=True,
|
||||
db_index=True,
|
||||
unique=True,
|
||||
)
|
||||
|
||||
def get_or_create_system_user_pk(username='system'):
|
||||
"""Get or create a system user with is_superuser=True to be the default owner for new DB rows"""
|
||||
|
||||
User = get_user_model()
|
||||
|
||||
# if only one user exists total, return that user
|
||||
if User.objects.filter(is_superuser=True).count() == 1:
|
||||
return User.objects.filter(is_superuser=True).values_list('pk', flat=True)[0]
|
||||
|
||||
# otherwise, create a dedicated "system" user
|
||||
user, created = User.objects.get_or_create(username=username, is_staff=True, is_superuser=True, defaults={'email': '', 'password': ''})
|
||||
return user.pk
|
||||
|
||||
|
||||
class ABIDModel(models.Model):
|
||||
"""
|
||||
Abstract Base Model for other models to depend on. Provides ArchiveBox ID (ABID) interface.
|
||||
"""
|
||||
abid_prefix: str = DEFAULT_ABID_PREFIX # e.g. 'tag_'
|
||||
abid_ts_src = 'None' # e.g. 'self.created'
|
||||
abid_uri_src = 'None' # e.g. 'self.uri'
|
||||
abid_subtype_src = 'None' # e.g. 'self.extractor'
|
||||
abid_rand_src = 'None' # e.g. 'self.uuid' or 'self.id'
|
||||
|
||||
id = models.UUIDField(primary_key=True, default=uuid4, editable=True)
|
||||
uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
|
||||
abid = ABIDField(prefix=abid_prefix)
|
||||
|
||||
created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk)
|
||||
created = models.DateTimeField(auto_now_add=True)
|
||||
modified = models.DateTimeField(auto_now=True)
|
||||
|
||||
class Meta(TypedModelMeta):
|
||||
abstract = True
|
||||
|
||||
def save(self, *args: Any, **kwargs: Any) -> None:
|
||||
if hasattr(self, 'abid'):
|
||||
# self.abid = ABID.parse(self.abid) if self.abid else self.get_abid()
|
||||
self.abid = self.get_abid()
|
||||
else:
|
||||
print(f'[!] WARNING: {self.__class__.__name__}.abid is not a DB field so ABID will not be persisted!')
|
||||
self.abid = self.get_abid()
|
||||
|
||||
super().save(*args, **kwargs)
|
||||
|
||||
@property
|
||||
def abid_values(self) -> Dict[str, Any]:
|
||||
return {
|
||||
'prefix': self.abid_prefix,
|
||||
'ts': eval(self.abid_ts_src),
|
||||
'uri': eval(self.abid_uri_src),
|
||||
'subtype': eval(self.abid_subtype_src),
|
||||
'rand': eval(self.abid_rand_src),
|
||||
}
|
||||
|
||||
def get_abid(self) -> ABID:
|
||||
"""
|
||||
Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
|
||||
"""
|
||||
prefix, ts, uri, subtype, rand = self.abid_values.values()
|
||||
|
||||
if (not prefix) or prefix == DEFAULT_ABID_PREFIX:
|
||||
suggested_abid = self.__class__.__name__[:3].lower()
|
||||
raise Exception(f'{self.__class__.__name__}.abid_prefix must be defined to calculate ABIDs (suggested: {suggested_abid})')
|
||||
|
||||
if not ts:
|
||||
ts = datetime.utcfromtimestamp(0)
|
||||
print(f'[!] WARNING: Generating ABID with ts=0000000000 placeholder because {self.__class__.__name__}.abid_ts_src={self.abid_ts_src} is unset!', ts.isoformat())
|
||||
|
||||
if not uri:
|
||||
uri = str(self)
|
||||
print(f'[!] WARNING: Generating ABID with uri=str(self) placeholder because {self.__class__.__name__}.abid_uri_src={self.abid_uri_src} is unset!', uri)
|
||||
|
||||
if not subtype:
|
||||
subtype = self.__class__.__name__
|
||||
print(f'[!] WARNING: Generating ABID with subtype={subtype} placeholder because {self.__class__.__name__}.abid_subtype_src={self.abid_subtype_src} is unset!', subtype)
|
||||
|
||||
if not rand:
|
||||
rand = getattr(self, 'uuid', None) or getattr(self, 'id', None) or getattr(self, 'pk')
|
||||
print(f'[!] WARNING: Generating ABID with rand=self.id placeholder because {self.__class__.__name__}.abid_rand_src={self.abid_rand_src} is unset!', rand)
|
||||
|
||||
abid = abid_from_values(
|
||||
prefix=prefix,
|
||||
ts=ts,
|
||||
uri=uri,
|
||||
subtype=subtype,
|
||||
rand=rand,
|
||||
)
|
||||
assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for {self.__class__.__name__}'
|
||||
return abid
|
||||
|
||||
@property
|
||||
def ABID(self) -> ABID:
|
||||
"""
|
||||
ULIDParts(timestamp='01HX9FPYTR', url='E4A5CCD9', subtype='00', randomness='ZYEBQE')
|
||||
"""
|
||||
return ABID.parse(self.abid) if getattr(self, 'abid', None) else self.get_abid()
|
||||
|
||||
@property
|
||||
def ULID(self) -> ULID:
|
||||
"""
|
||||
Get a ulid.ULID representation of the object's ABID.
|
||||
"""
|
||||
return self.ABID.ulid
|
||||
|
||||
@property
|
||||
def UUID(self) -> UUID:
|
||||
"""
|
||||
Get a uuid.UUID (v4) representation of the object's ABID.
|
||||
"""
|
||||
return self.ABID.uuid
|
||||
|
||||
@property
|
||||
def TypeID(self) -> TypeID:
|
||||
"""
|
||||
Get a typeid.TypeID (stripe-style) representation of the object's ABID.
|
||||
"""
|
||||
return self.ABID.typeid
|
||||
|
||||
|
||||
|
||||
####################################################
|
||||
|
||||
# Django helpers
|
||||
def find_all_abid_prefixes() -> Dict[str, type[models.Model]]:
|
||||
"""
|
||||
Return the mapping of all ABID prefixes to their models.
|
||||
e.g. {'tag_': core.models.Tag, 'snp_': core.models.Snapshot, ...}
|
||||
"""
|
||||
import django.apps
|
||||
prefix_map = {}
|
||||
|
||||
for model in django.apps.apps.get_models():
|
||||
abid_prefix = getattr(model, 'abid_prefix', None)
|
||||
if abid_prefix:
|
||||
prefix_map[abid_prefix] = model
|
||||
return prefix_map
|
||||
|
||||
def find_prefix_for_abid(abid: ABID) -> str:
|
||||
"""
|
||||
Find the correct prefix for a given ABID that may have be missing a prefix (slow).
|
||||
e.g. ABID('obj_01BJQMF54D093DXEAWZ6JYRPAQ') -> 'snp_'
|
||||
"""
|
||||
# if existing abid prefix is correct, lookup is easy
|
||||
model = find_model_from_abid(abid)
|
||||
if model:
|
||||
assert issubclass(model, ABIDModel)
|
||||
return model.abid_prefix
|
||||
|
||||
# prefix might be obj_ or missing, fuzzy-search to find any object that matches
|
||||
return find_obj_from_abid_rand(abid)[0].abid_prefix
|
||||
|
||||
def find_model_from_abid_prefix(prefix: str) -> type[ABIDModel] | None:
|
||||
"""
|
||||
Return the Django Model that corresponds to a given ABID prefix.
|
||||
e.g. 'tag_' -> core.models.Tag
|
||||
"""
|
||||
prefix = abid_part_from_prefix(prefix)
|
||||
|
||||
import django.apps
|
||||
|
||||
for model in django.apps.apps.get_models():
|
||||
if not issubclass(model, ABIDModel): continue # skip non-ABID-enabled models
|
||||
if not hasattr(model, 'objects'): continue # skip abstract models
|
||||
|
||||
if (model.abid_prefix == prefix):
|
||||
return model
|
||||
|
||||
return None
|
||||
|
||||
def find_model_from_abid(abid: ABID) -> type[models.Model] | None:
|
||||
"""
|
||||
Shortcut for find_model_from_abid_prefix(abid.prefix)
|
||||
"""
|
||||
return find_model_from_abid_prefix(abid.prefix)
|
||||
|
||||
def find_obj_from_abid_rand(rand: Union[ABID, str], model=None) -> List[ABIDModel]:
|
||||
"""
|
||||
Find an object corresponding to an ABID by exhaustively searching using its random suffix (slow).
|
||||
e.g. 'obj_....................JYRPAQ' -> Snapshot('snp_01BJQMF54D093DXEAWZ6JYRPAQ')
|
||||
"""
|
||||
|
||||
# convert str to ABID if necessary
|
||||
if isinstance(rand, ABID):
|
||||
abid: ABID = rand
|
||||
else:
|
||||
rand = str(rand)
|
||||
if len(rand) < ABID_SUFFIX_LEN:
|
||||
padding_needed = ABID_SUFFIX_LEN - len(rand)
|
||||
rand = ('0'*padding_needed) + rand
|
||||
abid = ABID.parse(rand)
|
||||
|
||||
import django.apps
|
||||
|
||||
partial_matches: List[ABIDModel] = []
|
||||
|
||||
models_to_try = cast(Set[type[models.Model]], set(filter(bool, (
|
||||
model,
|
||||
find_model_from_abid(abid),
|
||||
*django.apps.apps.get_models(),
|
||||
))))
|
||||
# print(abid, abid.rand, abid.uuid, models_to_try)
|
||||
|
||||
for model in models_to_try:
|
||||
if not issubclass(model, ABIDModel): continue # skip Models that arent ABID-enabled
|
||||
if not hasattr(model, 'objects'): continue # skip abstract Models
|
||||
assert hasattr(model, 'objects') # force-fix for type hint nit about missing manager https://github.com/typeddjango/django-stubs/issues/1684
|
||||
|
||||
# continue on to try fuzzy searching by randomness portion derived from uuid field
|
||||
try:
|
||||
qs = []
|
||||
if hasattr(model, 'abid'):
|
||||
qs = model.objects.filter(abid__endswith=abid.rand)
|
||||
elif hasattr(model, 'uuid'):
|
||||
qs = model.objects.filter(uuid__endswith=str(abid.uuid)[-ABID_RAND_LEN:])
|
||||
elif hasattr(model, 'id'):
|
||||
# NOTE: this only works on SQLite where every column is a string
|
||||
# other DB backends like postgres dont let you do __endswith if this is a BigAutoInteger field
|
||||
|
||||
# try to search for uuid=...-2354352
|
||||
# try to search for id=...2354352
|
||||
# try to search for id=2354352
|
||||
qs = model.objects.filter(
|
||||
models.Q(id__endswith=str(abid.uuid)[-ABID_RAND_LEN:])
|
||||
| models.Q(id__endswith=abid.rand)
|
||||
| models.Q(id__startswith=str(int(abid.rand)) if abid.rand.isdigit() else abid.rand)
|
||||
)
|
||||
|
||||
for obj in qs:
|
||||
if obj.get_abid() == abid:
|
||||
# found exact match, no need to keep iterating
|
||||
return [obj]
|
||||
partial_matches.append(obj)
|
||||
except OperationalError as err:
|
||||
print(f'[!] WARNING: Got error while trying to iterate through QuerySet for {model}:', err, '\n')
|
||||
|
||||
return partial_matches
|
||||
|
||||
def find_obj_from_abid(abid: ABID, model=None, fuzzy=False) -> Any:
|
||||
"""
|
||||
Find an object with a given ABID by filtering possible models for a matching abid/uuid/id (fast).
|
||||
e.g. 'snp_01BJQMF54D093DXEAWZ6JYRPAQ' -> Snapshot('snp_01BJQMF54D093DXEAWZ6JYRPAQ')
|
||||
"""
|
||||
|
||||
model = model or find_model_from_abid(abid)
|
||||
assert model, f'Could not find model that could match this ABID type: {abid}'
|
||||
|
||||
try:
|
||||
if hasattr(model, 'abid'):
|
||||
return model.objects.get(abid__endswith=abid.suffix)
|
||||
if hasattr(model, 'uuid'):
|
||||
return model.objects.get(uuid=abid.uuid)
|
||||
return model.objects.get(id=abid.uuid)
|
||||
except model.DoesNotExist:
|
||||
# if the model has an abid field then it shouldve matched, pointless to fuzzy search in that case
|
||||
if hasattr(model, 'abid') or (not fuzzy):
|
||||
raise
|
||||
|
||||
# continue on to try fuzzy searching by randomness portion derived from uuid field
|
||||
match_by_rand = find_obj_from_abid_rand(abid, model=model)
|
||||
if match_by_rand:
|
||||
if match_by_rand[0].abid_prefix != abid.prefix:
|
||||
print(f'[!] WARNING: fetched object {match_by_rand} even though prefix {abid.prefix} doesnt match!', abid, '\n')
|
||||
return match_by_rand
|
||||
|
||||
raise model.DoesNotExist
|
||||
|
3
archivebox/abid_utils/tests.py
Normal file
3
archivebox/abid_utils/tests.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
from django.test import TestCase
|
||||
|
||||
# Create your tests here.
|
|
@ -3,5 +3,9 @@ __package__ = 'archivebox.api'
|
|||
from django.apps import AppConfig
|
||||
|
||||
|
||||
|
||||
class APIConfig(AppConfig):
|
||||
name = 'api'
|
||||
|
||||
def ready(self):
|
||||
pass
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
# Generated by Django 5.0.6 on 2024-05-13 10:58
|
||||
|
||||
import charidfield.fields
|
||||
import signal_webhooks.fields
|
||||
import signal_webhooks.utils
|
||||
import uuid
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('api', '0002_alter_apitoken_options'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='OutboundWebhook',
|
||||
fields=[
|
||||
('name', models.CharField(db_index=True, help_text='Give your webhook a descriptive name (e.g. Notify ACME Slack channel of any new ArchiveResults).', max_length=255, unique=True, verbose_name='name')),
|
||||
('signal', models.CharField(choices=[('CREATE', 'Create'), ('UPDATE', 'Update'), ('DELETE', 'Delete'), ('M2M', 'M2M changed'), ('CREATE_OR_UPDATE', 'Create or Update'), ('CREATE_OR_DELETE', 'Create or Delete'), ('CREATE_OR_M2M', 'Create or M2M changed'), ('UPDATE_OR_DELETE', 'Update or Delete'), ('UPDATE_OR_M2M', 'Update or M2M changed'), ('DELETE_OR_M2M', 'Delete or M2M changed'), ('CREATE_UPDATE_OR_DELETE', 'Create, Update or Delete'), ('CREATE_UPDATE_OR_M2M', 'Create, Update or M2M changed'), ('CREATE_DELETE_OR_M2M', 'Create, Delete or M2M changed'), ('UPDATE_DELETE_OR_M2M', 'Update, Delete or M2M changed'), ('CREATE_UPDATE_DELETE_OR_M2M', 'Create, Update or Delete, or M2M changed')], help_text='The type of event the webhook should fire for (e.g. Create, Update, Delete).', max_length=255, verbose_name='signal')),
|
||||
('ref', models.CharField(db_index=True, help_text='Dot import notation of the model the webhook should fire for (e.g. core.models.Snapshot or core.models.ArchiveResult).', max_length=1023, validators=[signal_webhooks.utils.model_from_reference], verbose_name='referenced model')),
|
||||
('endpoint', models.URLField(help_text='External URL to POST the webhook notification to (e.g. https://someapp.example.com/webhook/some-webhook-receiver).', max_length=2047, verbose_name='endpoint')),
|
||||
('headers', models.JSONField(blank=True, default=dict, help_text='Headers to send with the webhook request.', validators=[signal_webhooks.utils.is_dict], verbose_name='headers')),
|
||||
('auth_token', signal_webhooks.fields.TokenField(blank=True, default='', help_text='Authentication token to use in an Authorization header.', max_length=8000, validators=[signal_webhooks.utils.decode_cipher_key], verbose_name='authentication token')),
|
||||
('enabled', models.BooleanField(default=True, help_text='Is this webhook enabled?', verbose_name='enabled')),
|
||||
('keep_last_response', models.BooleanField(default=False, help_text='Should the webhook keep a log of the latest response it got?', verbose_name='keep last response')),
|
||||
('created', models.DateTimeField(auto_now_add=True, help_text='When the webhook was created.', verbose_name='created')),
|
||||
('updated', models.DateTimeField(auto_now=True, help_text='When the webhook was last updated.', verbose_name='updated')),
|
||||
('last_response', models.CharField(blank=True, default='', help_text='Latest response to this webhook.', max_length=8000, verbose_name='last response')),
|
||||
('last_success', models.DateTimeField(default=None, help_text='When the webhook last succeeded.', null=True, verbose_name='last success')),
|
||||
('last_failure', models.DateTimeField(default=None, help_text='When the webhook last failed.', null=True, verbose_name='last failure')),
|
||||
('uuid', models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False)),
|
||||
('abid', charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='whk', unique=True)),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'API Outbound Webhook',
|
||||
'abstract': False,
|
||||
},
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='apitoken',
|
||||
name='abid',
|
||||
field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='apt', unique=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='apitoken',
|
||||
name='uuid',
|
||||
field=models.UUIDField(blank=True, null=True, unique=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='apitoken',
|
||||
name='id',
|
||||
field=models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False),
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name='outboundwebhook',
|
||||
constraint=models.UniqueConstraint(fields=('ref', 'endpoint'), name='prevent_duplicate_hooks_api_outboundwebhook'),
|
||||
),
|
||||
]
|
|
@ -0,0 +1,58 @@
|
|||
# Generated by Django 5.0.6 on 2024-05-13 14:36
|
||||
|
||||
import abid_utils.models
|
||||
import charidfield.fields
|
||||
import django.db.models.deletion
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('api', '0003_outboundwebhook_apitoken_abid_apitoken_uuid_and_more'),
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RenameField(
|
||||
model_name='apitoken',
|
||||
old_name='user',
|
||||
new_name='created_by',
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='apitoken',
|
||||
name='modified',
|
||||
field=models.DateTimeField(auto_now=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='outboundwebhook',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(default=abid_utils.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='outboundwebhook',
|
||||
name='id',
|
||||
field=models.UUIDField(blank=True, null=True, unique=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='outboundwebhook',
|
||||
name='modified',
|
||||
field=models.DateTimeField(auto_now=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='apitoken',
|
||||
name='abid',
|
||||
field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='apt_', unique=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='outboundwebhook',
|
||||
name='abid',
|
||||
field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='whk_', unique=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='outboundwebhook',
|
||||
name='created',
|
||||
field=models.DateTimeField(auto_now_add=True),
|
||||
),
|
||||
]
|
|
@ -8,22 +8,39 @@ from django.conf import settings
|
|||
from django.db import models
|
||||
from django.utils import timezone
|
||||
|
||||
from signal_webhooks.models import WebhookBase
|
||||
|
||||
from django_stubs_ext.db.models import TypedModelMeta
|
||||
|
||||
from abid_utils.models import ABIDModel, ABIDField
|
||||
|
||||
|
||||
def generate_secret_token() -> str:
|
||||
# returns cryptographically secure string with len() == 32
|
||||
return secrets.token_hex(16)
|
||||
|
||||
|
||||
class APIToken(models.Model):
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||
class APIToken(ABIDModel):
|
||||
"""
|
||||
A secret key generated by a User that's used to authenticate REST API requests to ArchiveBox.
|
||||
"""
|
||||
# ABID: apt_<created_ts>_<token_hash>_<user_id_hash>_<uuid_rand>
|
||||
abid_prefix = 'apt_'
|
||||
abid_ts_src = 'self.created'
|
||||
abid_uri_src = 'self.token'
|
||||
abid_subtype_src = 'self.user_id'
|
||||
abid_rand_src = 'self.id'
|
||||
|
||||
user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True)
|
||||
uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
|
||||
abid = ABIDField(prefix=abid_prefix)
|
||||
|
||||
created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
|
||||
token = models.CharField(max_length=32, default=generate_secret_token, unique=True)
|
||||
|
||||
created = models.DateTimeField(auto_now_add=True)
|
||||
expires = models.DateTimeField(null=True, blank=True)
|
||||
|
||||
|
||||
class Meta(TypedModelMeta):
|
||||
verbose_name = "API Key"
|
||||
|
@ -38,7 +55,8 @@ class APIToken(models.Model):
|
|||
def __json__(self) -> dict:
|
||||
return {
|
||||
"TYPE": "APIToken",
|
||||
"id": str(self.id),
|
||||
"uuid": str(self.id),
|
||||
"abid": str(self.get_abid()),
|
||||
"user_id": str(self.user.id),
|
||||
"user_username": self.user.username,
|
||||
"token": self.token,
|
||||
|
@ -61,3 +79,37 @@ class APIToken(models.Model):
|
|||
|
||||
return True
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# monkey patch django-signals-webhooks to change how it shows up in Admin UI
|
||||
|
||||
class OutboundWebhook(ABIDModel, WebhookBase):
|
||||
"""
|
||||
Model used in place of (extending) signals_webhooks.models.WebhookModel. Swapped using:
|
||||
settings.SIGNAL_WEBHOOKS_CUSTOM_MODEL = 'api.models.OutboundWebhook'
|
||||
"""
|
||||
abid_prefix = 'whk_'
|
||||
abid_ts_src = 'self.created'
|
||||
abid_uri_src = 'self.endpoint'
|
||||
abid_subtype_src = 'self.ref'
|
||||
abid_rand_src = 'self.id'
|
||||
|
||||
id = models.UUIDField(blank=True, null=True, unique=True, editable=True)
|
||||
uuid = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True)
|
||||
abid = ABIDField(prefix=abid_prefix)
|
||||
|
||||
WebhookBase._meta.get_field('name').help_text = (
|
||||
'Give your webhook a descriptive name (e.g. Notify ACME Slack channel of any new ArchiveResults).')
|
||||
WebhookBase._meta.get_field('signal').help_text = (
|
||||
'The type of event the webhook should fire for (e.g. Create, Update, Delete).')
|
||||
WebhookBase._meta.get_field('ref').help_text = (
|
||||
'Dot import notation of the model the webhook should fire for (e.g. core.models.Snapshot or core.models.ArchiveResult).')
|
||||
WebhookBase._meta.get_field('endpoint').help_text = (
|
||||
'External URL to POST the webhook notification to (e.g. https://someapp.example.com/webhook/some-webhook-receiver).')
|
||||
|
||||
class Meta(WebhookBase.Meta):
|
||||
verbose_name = 'API Outbound Webhook'
|
||||
|
||||
|
|
|
@ -47,6 +47,6 @@ def check_api_token(request, token_data: TokenAuthSchema):
|
|||
request=request,
|
||||
)
|
||||
if user:
|
||||
return {"success": True, "user_id": str(user.id)}
|
||||
return {"success": True, "user_id": str(user.pk)}
|
||||
|
||||
return {"success": False, "user_id": None}
|
||||
|
|
|
@ -4,13 +4,14 @@ from uuid import UUID
|
|||
from typing import List, Optional
|
||||
from datetime import datetime
|
||||
|
||||
from django.db.models import Q
|
||||
from django.shortcuts import get_object_or_404
|
||||
|
||||
from ninja import Router, Schema, FilterSchema, Field, Query
|
||||
from ninja.pagination import paginate
|
||||
|
||||
from core.models import Snapshot, ArchiveResult, Tag
|
||||
|
||||
from abid_utils.abid import ABID
|
||||
|
||||
router = Router(tags=['Core Models'])
|
||||
|
||||
|
@ -20,24 +21,39 @@ router = Router(tags=['Core Models'])
|
|||
### ArchiveResult #########################################################################
|
||||
|
||||
class ArchiveResultSchema(Schema):
|
||||
id: UUID
|
||||
abid: str
|
||||
uuid: UUID
|
||||
pk: str
|
||||
modified: datetime
|
||||
created: datetime
|
||||
created_by_id: str
|
||||
|
||||
snapshot_id: UUID
|
||||
snapshot_abid: str
|
||||
snapshot_url: str
|
||||
snapshot_tags: str
|
||||
|
||||
extractor: str
|
||||
cmd_version: str
|
||||
cmd: List[str]
|
||||
pwd: str
|
||||
cmd_version: str
|
||||
output: str
|
||||
status: str
|
||||
|
||||
created: datetime
|
||||
output: str
|
||||
|
||||
@staticmethod
|
||||
def resolve_id(obj):
|
||||
return obj.uuid
|
||||
def resolve_created_by_id(obj):
|
||||
return str(obj.created_by_id)
|
||||
|
||||
@staticmethod
|
||||
def resolve_pk(obj):
|
||||
return str(obj.pk)
|
||||
|
||||
@staticmethod
|
||||
def resolve_uuid(obj):
|
||||
return str(obj.uuid)
|
||||
|
||||
@staticmethod
|
||||
def resolve_abid(obj):
|
||||
return str(obj.ABID)
|
||||
|
||||
@staticmethod
|
||||
def resolve_created(obj):
|
||||
|
@ -47,18 +63,23 @@ class ArchiveResultSchema(Schema):
|
|||
def resolve_snapshot_url(obj):
|
||||
return obj.snapshot.url
|
||||
|
||||
@staticmethod
|
||||
def resolve_snapshot_abid(obj):
|
||||
return str(obj.snapshot.ABID)
|
||||
|
||||
@staticmethod
|
||||
def resolve_snapshot_tags(obj):
|
||||
return obj.snapshot.tags_str()
|
||||
|
||||
|
||||
class ArchiveResultFilterSchema(FilterSchema):
|
||||
id: Optional[UUID] = Field(None, q='uuid')
|
||||
uuid: Optional[UUID] = Field(None, q='uuid')
|
||||
# abid: Optional[str] = Field(None, q='abid')
|
||||
|
||||
search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains'])
|
||||
snapshot_id: Optional[UUID] = Field(None, q='snapshot_id')
|
||||
snapshot_url: Optional[str] = Field(None, q='snapshot__url')
|
||||
snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name')
|
||||
snapshot_uuid: Optional[UUID] = Field(None, q='snapshot_uuid__icontains')
|
||||
snapshot_url: Optional[str] = Field(None, q='snapshot__url__icontains')
|
||||
snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name__icontains')
|
||||
|
||||
status: Optional[str] = Field(None, q='status')
|
||||
output: Optional[str] = Field(None, q='output__icontains')
|
||||
|
@ -75,6 +96,7 @@ class ArchiveResultFilterSchema(FilterSchema):
|
|||
@router.get("/archiveresults", response=List[ArchiveResultSchema])
|
||||
@paginate
|
||||
def list_archiveresults(request, filters: ArchiveResultFilterSchema = Query(...)):
|
||||
"""List all ArchiveResult entries matching these filters."""
|
||||
qs = ArchiveResult.objects.all()
|
||||
results = filters.filter(qs)
|
||||
return results
|
||||
|
@ -82,8 +104,8 @@ def list_archiveresults(request, filters: ArchiveResultFilterSchema = Query(...)
|
|||
|
||||
@router.get("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema)
|
||||
def get_archiveresult(request, archiveresult_id: str):
|
||||
archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
|
||||
return archiveresult
|
||||
"""Get a specific ArchiveResult by abid, uuid, or pk."""
|
||||
return ArchiveResult.objects.get(Q(pk__icontains=archiveresult_id) | Q(abid__icontains=archiveresult_id) | Q(uuid__icontains=archiveresult_id))
|
||||
|
||||
|
||||
# @router.post("/archiveresult", response=ArchiveResultSchema)
|
||||
|
@ -115,27 +137,50 @@ def get_archiveresult(request, archiveresult_id: str):
|
|||
|
||||
|
||||
class SnapshotSchema(Schema):
|
||||
id: UUID
|
||||
abid: str
|
||||
uuid: UUID
|
||||
pk: str
|
||||
modified: datetime
|
||||
created: datetime
|
||||
created_by_id: str
|
||||
|
||||
url: str
|
||||
tags: str
|
||||
title: Optional[str]
|
||||
timestamp: str
|
||||
bookmarked: datetime
|
||||
added: datetime
|
||||
updated: datetime
|
||||
archive_path: str
|
||||
|
||||
bookmarked: datetime
|
||||
added: datetime
|
||||
updated: Optional[datetime]
|
||||
|
||||
num_archiveresults: int
|
||||
archiveresults: List[ArchiveResultSchema]
|
||||
|
||||
# @staticmethod
|
||||
# def resolve_id(obj):
|
||||
# return str(obj.id)
|
||||
@staticmethod
|
||||
def resolve_created_by_id(obj):
|
||||
return str(obj.created_by_id)
|
||||
|
||||
@staticmethod
|
||||
def resolve_pk(obj):
|
||||
return str(obj.pk)
|
||||
|
||||
@staticmethod
|
||||
def resolve_uuid(obj):
|
||||
return str(obj.uuid)
|
||||
|
||||
@staticmethod
|
||||
def resolve_abid(obj):
|
||||
return str(obj.ABID)
|
||||
|
||||
@staticmethod
|
||||
def resolve_tags(obj):
|
||||
return obj.tags_str()
|
||||
|
||||
@staticmethod
|
||||
def resolve_num_archiveresults(obj, context):
|
||||
return obj.archiveresult_set.all().distinct().count()
|
||||
|
||||
@staticmethod
|
||||
def resolve_archiveresults(obj, context):
|
||||
if context['request'].with_archiveresults:
|
||||
|
@ -144,23 +189,32 @@ class SnapshotSchema(Schema):
|
|||
|
||||
|
||||
class SnapshotFilterSchema(FilterSchema):
|
||||
id: Optional[UUID] = Field(None, q='id')
|
||||
abid: Optional[str] = Field(None, q='abid__icontains')
|
||||
uuid: Optional[str] = Field(None, q='uuid__icontains')
|
||||
pk: Optional[str] = Field(None, q='pk__icontains')
|
||||
created_by_id: str = Field(None, q='created_by_id__icontains')
|
||||
created__gte: datetime = Field(None, q='created__gte')
|
||||
created__lt: datetime = Field(None, q='created__lt')
|
||||
created: datetime = Field(None, q='created')
|
||||
modified: datetime = Field(None, q='modified')
|
||||
modified__gte: datetime = Field(None, q='modified__gte')
|
||||
modified__lt: datetime = Field(None, q='modified__lt')
|
||||
|
||||
search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains'])
|
||||
search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains', 'abid__icontains', 'uuid__icontains'])
|
||||
url: Optional[str] = Field(None, q='url')
|
||||
tag: Optional[str] = Field(None, q='tags__name')
|
||||
title: Optional[str] = Field(None, q='title__icontains')
|
||||
|
||||
timestamp: Optional[str] = Field(None, q='timestamp__startswith')
|
||||
|
||||
added: Optional[datetime] = Field(None, q='added')
|
||||
added__gte: Optional[datetime] = Field(None, q='added__gte')
|
||||
added__lt: Optional[datetime] = Field(None, q='added__lt')
|
||||
|
||||
|
||||
|
||||
@router.get("/snapshots", response=List[SnapshotSchema])
|
||||
@paginate
|
||||
def list_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_archiveresults: bool=True):
|
||||
"""List all Snapshot entries matching these filters."""
|
||||
request.with_archiveresults = with_archiveresults
|
||||
|
||||
qs = Snapshot.objects.all()
|
||||
|
@ -169,8 +223,24 @@ def list_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_arc
|
|||
|
||||
@router.get("/snapshot/{snapshot_id}", response=SnapshotSchema)
|
||||
def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
|
||||
"""Get a specific Snapshot by abid, uuid, or pk."""
|
||||
request.with_archiveresults = with_archiveresults
|
||||
snapshot = get_object_or_404(Snapshot, id=snapshot_id)
|
||||
snapshot = None
|
||||
try:
|
||||
snapshot = Snapshot.objects.get(Q(uuid__startswith=snapshot_id) | Q(abid__startswith=snapshot_id)| Q(pk__startswith=snapshot_id))
|
||||
except Snapshot.DoesNotExist:
|
||||
pass
|
||||
|
||||
try:
|
||||
snapshot = snapshot or Snapshot.objects.get()
|
||||
except Snapshot.DoesNotExist:
|
||||
pass
|
||||
|
||||
try:
|
||||
snapshot = snapshot or Snapshot.objects.get(Q(uuid__icontains=snapshot_id) | Q(abid__icontains=snapshot_id))
|
||||
except Snapshot.DoesNotExist:
|
||||
pass
|
||||
|
||||
return snapshot
|
||||
|
||||
|
||||
|
@ -179,9 +249,9 @@ def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
|
|||
# snapshot = Snapshot.objects.create(**payload.dict())
|
||||
# return snapshot
|
||||
#
|
||||
# @router.put("/snapshot/{snapshot_id}", response=SnapshotSchema)
|
||||
# def update_snapshot(request, snapshot_id: str, payload: SnapshotSchema):
|
||||
# snapshot = get_object_or_404(Snapshot, id=snapshot_id)
|
||||
# @router.put("/snapshot/{snapshot_uuid}", response=SnapshotSchema)
|
||||
# def update_snapshot(request, snapshot_uuid: str, payload: SnapshotSchema):
|
||||
# snapshot = get_object_or_404(Snapshot, uuid=snapshot_uuid)
|
||||
#
|
||||
# for attr, value in payload.dict().items():
|
||||
# setattr(snapshot, attr, value)
|
||||
|
@ -189,9 +259,9 @@ def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
|
|||
#
|
||||
# return snapshot
|
||||
#
|
||||
# @router.delete("/snapshot/{snapshot_id}")
|
||||
# def delete_snapshot(request, snapshot_id: str):
|
||||
# snapshot = get_object_or_404(Snapshot, id=snapshot_id)
|
||||
# @router.delete("/snapshot/{snapshot_uuid}")
|
||||
# def delete_snapshot(request, snapshot_uuid: str):
|
||||
# snapshot = get_object_or_404(Snapshot, uuid=snapshot_uuid)
|
||||
# snapshot.delete()
|
||||
# return {"success": True}
|
||||
|
||||
|
@ -201,10 +271,21 @@ def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
|
|||
|
||||
|
||||
class TagSchema(Schema):
|
||||
abid: Optional[UUID] = Field(None, q='abid')
|
||||
uuid: Optional[UUID] = Field(None, q='uuid')
|
||||
pk: Optional[UUID] = Field(None, q='pk')
|
||||
modified: datetime
|
||||
created: datetime
|
||||
created_by_id: str
|
||||
|
||||
name: str
|
||||
slug: str
|
||||
|
||||
|
||||
@staticmethod
|
||||
def resolve_created_by_id(obj):
|
||||
return str(obj.created_by_id)
|
||||
|
||||
@router.get("/tags", response=List[TagSchema])
|
||||
def list_tags(request):
|
||||
return Tag.objects.all()
|
||||
|
|
|
@ -37,7 +37,10 @@ is_valid_cli_module = lambda module, subcommand: (
|
|||
)
|
||||
|
||||
|
||||
def wait_for_bg_threads_to_exit(thread_names: Iterable[str]=(), ignore_names: Iterable[str]=('MainThread', 'ThreadPoolExecutor'), timeout: int=60) -> int:
|
||||
IGNORED_BG_THREADS = ('MainThread', 'ThreadPoolExecutor', 'IPythonHistorySavingThread') # threads we dont have to wait for before exiting
|
||||
|
||||
|
||||
def wait_for_bg_threads_to_exit(thread_names: Iterable[str]=(), ignore_names: Iterable[str]=IGNORED_BG_THREADS, timeout: int=60) -> int:
|
||||
"""
|
||||
Block until the specified threads exit. e.g. pass thread_names=('default_hook_handler',) to wait for webhooks.
|
||||
Useful for waiting for signal handlers, webhooks, etc. to finish running after a mgmt command completes.
|
||||
|
|
|
@ -37,7 +37,7 @@ from sqlite3 import dbapi2 as sqlite3
|
|||
from hashlib import md5
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional, Type, Tuple, Dict, Union, List
|
||||
from typing import Optional, Type, Tuple, Dict, Union, List, Any
|
||||
from subprocess import run, PIPE, DEVNULL
|
||||
from configparser import ConfigParser
|
||||
from collections import defaultdict
|
||||
|
|
|
@ -15,8 +15,7 @@ from django.contrib.auth import get_user_model
|
|||
from django import forms
|
||||
|
||||
|
||||
from signal_webhooks.apps import DjangoSignalWebhooksConfig
|
||||
from signal_webhooks.admin import WebhookAdmin, WebhookModel
|
||||
from signal_webhooks.admin import WebhookAdmin, get_webhook_model
|
||||
|
||||
from ..util import htmldecode, urldecode, ansi_to_html
|
||||
|
||||
|
@ -104,23 +103,14 @@ class ArchiveBoxAdmin(admin.AdminSite):
|
|||
return render(template_name='add.html', request=request, context=context)
|
||||
|
||||
|
||||
# monkey patch django-signals-webhooks to change how it shows up in Admin UI
|
||||
DjangoSignalWebhooksConfig.verbose_name = 'API'
|
||||
WebhookModel._meta.get_field('name').help_text = 'Give your webhook a descriptive name (e.g. Notify ACME Slack channel of any new ArchiveResults).'
|
||||
WebhookModel._meta.get_field('signal').help_text = 'The type of event the webhook should fire for (e.g. Create, Update, Delete).'
|
||||
WebhookModel._meta.get_field('ref').help_text = 'Dot import notation of the model the webhook should fire for (e.g. core.models.Snapshot or core.models.ArchiveResult).'
|
||||
WebhookModel._meta.get_field('endpoint').help_text = 'External URL to POST the webhook notification to (e.g. https://someapp.example.com/webhook/some-webhook-receiver).'
|
||||
WebhookModel._meta.app_label = 'api'
|
||||
|
||||
|
||||
archivebox_admin = ArchiveBoxAdmin()
|
||||
archivebox_admin.register(get_user_model())
|
||||
archivebox_admin.register(APIToken)
|
||||
archivebox_admin.register(WebhookModel, WebhookAdmin)
|
||||
archivebox_admin.register(get_webhook_model(), WebhookAdmin)
|
||||
archivebox_admin.disable_action('delete_selected')
|
||||
|
||||
|
||||
# patch admin with methods to add data views
|
||||
# patch admin with methods to add data views (implemented by admin_data_views package)
|
||||
from admin_data_views.admin import get_app_list, admin_data_index_view, get_admin_data_urls, get_urls
|
||||
|
||||
archivebox_admin.get_app_list = get_app_list.__get__(archivebox_admin, ArchiveBoxAdmin)
|
||||
|
@ -170,14 +160,41 @@ class SnapshotActionForm(ActionForm):
|
|||
# )
|
||||
|
||||
|
||||
def get_abid_info(self, obj):
|
||||
return format_html(
|
||||
# URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
|
||||
'''
|
||||
ABID: <code style="font-size: 16px; user-select: all"><b>{}</b></code><br/>
|
||||
TS: <code style="font-size: 10px; user-select: all"><b>{}</b></code> ({})<br/>
|
||||
URI: <code style="font-size: 10px; user-select: all"><b>{}</b></code> ({})<br/>
|
||||
SUBTYPE: <code style="font-size: 10px; user-select: all"><b>{}</b></code> ({})<br/>
|
||||
RAND: <code style="font-size: 10px; user-select: all"><b>{}</b></code> ({})<br/><br/>
|
||||
ABID AS UUID: <code style="font-size: 10px; user-select: all">{}</code> <br/><br/>
|
||||
|
||||
.uuid: <code style="font-size: 10px; user-select: all">{}</code> <br/>
|
||||
.id: <code style="font-size: 10px; user-select: all">{}</code> <br/>
|
||||
.pk: <code style="font-size: 10px; user-select: all">{}</code> <br/><br/>
|
||||
''',
|
||||
obj.abid,
|
||||
obj.ABID.ts, obj.abid_values['ts'].isoformat() if isinstance(obj.abid_values['ts'], datetime) else obj.abid_values['ts'],
|
||||
obj.ABID.uri, str(obj.abid_values['uri']),
|
||||
obj.ABID.subtype, str(obj.abid_values['subtype']),
|
||||
obj.ABID.rand, str(obj.abid_values['rand'])[-7:],
|
||||
obj.ABID.uuid,
|
||||
obj.uuid,
|
||||
obj.id,
|
||||
obj.pk,
|
||||
)
|
||||
|
||||
|
||||
@admin.register(Snapshot, site=archivebox_admin)
|
||||
class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
|
||||
list_display = ('added', 'title_str', 'files', 'size', 'url_str')
|
||||
sort_fields = ('title_str', 'url_str', 'added', 'files')
|
||||
readonly_fields = ('info', 'bookmarked', 'added', 'updated')
|
||||
search_fields = ('id', 'url', 'timestamp', 'title', 'tags__name')
|
||||
fields = ('timestamp', 'url', 'title', 'tags', *readonly_fields)
|
||||
list_filter = ('added', 'updated', 'tags', 'archiveresult__status')
|
||||
readonly_fields = ('admin_actions', 'status_info', 'bookmarked', 'added', 'updated', 'created', 'modified', 'identifiers')
|
||||
search_fields = ('id', 'url', 'abid', 'uuid', 'timestamp', 'title', 'tags__name')
|
||||
fields = ('url', 'timestamp', 'created_by', 'tags', 'title', *readonly_fields)
|
||||
list_filter = ('added', 'updated', 'tags', 'archiveresult__status', 'created_by')
|
||||
ordering = ['-added']
|
||||
actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
|
||||
autocomplete_fields = ['tags']
|
||||
|
@ -223,40 +240,46 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
|
|||
# </form>
|
||||
# ''',
|
||||
# csrf.get_token(self.request),
|
||||
# obj.id,
|
||||
# obj.pk,
|
||||
# )
|
||||
|
||||
def info(self, obj):
|
||||
def admin_actions(self, obj):
|
||||
return format_html(
|
||||
# URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
|
||||
'''
|
||||
<a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/archive/{}">Summary page ➡️</a>
|
||||
<a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/archive/{}/index.html#all">Result files 📑</a>
|
||||
<a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/admin/core/snapshot/?id__exact={}">Admin actions ⚙️</a>
|
||||
''',
|
||||
obj.timestamp,
|
||||
obj.timestamp,
|
||||
obj.pk,
|
||||
)
|
||||
|
||||
def status_info(self, obj):
|
||||
return format_html(
|
||||
# URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
|
||||
'''
|
||||
UUID: <code style="font-size: 10px; user-select: all">{}</code>
|
||||
Timestamp: <code style="font-size: 10px; user-select: all">{}</code>
|
||||
URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
|
||||
Archived: {} ({} files {})
|
||||
Favicon: <img src="{}" style="height: 20px"/>
|
||||
Status code: {}
|
||||
Status code: {} <br/>
|
||||
Server: {}
|
||||
Content type: {}
|
||||
Extension: {}
|
||||
<br/><br/>
|
||||
<a href="/archive/{}">View Snapshot index ➡️</a>
|
||||
<a href="/admin/core/snapshot/?id__exact={}">View actions ⚙️</a>
|
||||
''',
|
||||
obj.id,
|
||||
obj.timestamp,
|
||||
obj.url_hash,
|
||||
'✅' if obj.is_archived else '❌',
|
||||
obj.num_outputs,
|
||||
self.size(obj),
|
||||
self.size(obj) or '0kb',
|
||||
f'/archive/{obj.timestamp}/favicon.ico',
|
||||
obj.status_code or '?',
|
||||
obj.headers and obj.headers.get('Server') or '?',
|
||||
obj.headers and obj.headers.get('Content-Type') or '?',
|
||||
obj.extension or '?',
|
||||
obj.timestamp,
|
||||
obj.id,
|
||||
obj.status_code or '-',
|
||||
obj.headers and obj.headers.get('Server') or '-',
|
||||
obj.headers and obj.headers.get('Content-Type') or '-',
|
||||
obj.extension or '-',
|
||||
)
|
||||
|
||||
def identifiers(self, obj):
|
||||
return get_abid_info(self, obj)
|
||||
|
||||
@admin.display(
|
||||
description='Title',
|
||||
ordering='title',
|
||||
|
@ -316,7 +339,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
|
|||
return format_html(
|
||||
'<a href="{}"><code style="user-select: all;">{}</code></a>',
|
||||
obj.url,
|
||||
obj.url,
|
||||
obj.url[:128],
|
||||
)
|
||||
|
||||
def grid_view(self, request, extra_context=None):
|
||||
|
@ -419,42 +442,45 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
|
|||
|
||||
@admin.register(Tag, site=archivebox_admin)
|
||||
class TagAdmin(admin.ModelAdmin):
|
||||
list_display = ('slug', 'name', 'num_snapshots', 'snapshots', 'id')
|
||||
sort_fields = ('id', 'name', 'slug')
|
||||
readonly_fields = ('id', 'num_snapshots', 'snapshots')
|
||||
search_fields = ('id', 'name', 'slug')
|
||||
fields = (*readonly_fields, 'name', 'slug')
|
||||
list_display = ('slug', 'name', 'num_snapshots', 'snapshots', 'abid')
|
||||
sort_fields = ('id', 'name', 'slug', 'abid')
|
||||
readonly_fields = ('created', 'modified', 'identifiers', 'num_snapshots', 'snapshots')
|
||||
search_fields = ('id', 'abid', 'uuid', 'name', 'slug')
|
||||
fields = ('name', 'slug', 'created_by', *readonly_fields, )
|
||||
actions = ['delete_selected']
|
||||
ordering = ['-id']
|
||||
|
||||
def num_snapshots(self, obj):
|
||||
def identifiers(self, obj):
|
||||
return get_abid_info(self, obj)
|
||||
|
||||
def num_snapshots(self, tag):
|
||||
return format_html(
|
||||
'<a href="/admin/core/snapshot/?tags__id__exact={}">{} total</a>',
|
||||
obj.id,
|
||||
obj.snapshot_set.count(),
|
||||
tag.id,
|
||||
tag.snapshot_set.count(),
|
||||
)
|
||||
|
||||
def snapshots(self, obj):
|
||||
total_count = obj.snapshot_set.count()
|
||||
def snapshots(self, tag):
|
||||
total_count = tag.snapshot_set.count()
|
||||
return mark_safe('<br/>'.join(
|
||||
format_html(
|
||||
'{} <code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a> {}</code>',
|
||||
snap.updated.strftime('%Y-%m-%d %H:%M') if snap.updated else 'pending...',
|
||||
snap.id,
|
||||
snap.timestamp,
|
||||
snap.pk,
|
||||
snap.abid,
|
||||
snap.url,
|
||||
)
|
||||
for snap in obj.snapshot_set.order_by('-updated')[:10]
|
||||
) + (f'<br/><a href="/admin/core/snapshot/?tags__id__exact={obj.id}">and {total_count-10} more...<a>' if obj.snapshot_set.count() > 10 else ''))
|
||||
for snap in tag.snapshot_set.order_by('-updated')[:10]
|
||||
) + (f'<br/><a href="/admin/core/snapshot/?tags__id__exact={tag.id}">and {total_count-10} more...<a>' if tag.snapshot_set.count() > 10 else ''))
|
||||
|
||||
|
||||
@admin.register(ArchiveResult, site=archivebox_admin)
|
||||
class ArchiveResultAdmin(admin.ModelAdmin):
|
||||
list_display = ('id', 'start_ts', 'extractor', 'snapshot_str', 'tags_str', 'cmd_str', 'status', 'output_str')
|
||||
list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str')
|
||||
sort_fields = ('start_ts', 'extractor', 'status')
|
||||
readonly_fields = ('id', 'uuid', 'snapshot_str', 'tags_str')
|
||||
search_fields = ('id', 'uuid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
|
||||
fields = (*readonly_fields, 'snapshot', 'extractor', 'status', 'start_ts', 'end_ts', 'output', 'pwd', 'cmd', 'cmd_version')
|
||||
readonly_fields = ('snapshot_info', 'tags_str', 'created_by', 'created', 'modified', 'identifiers')
|
||||
search_fields = ('id', 'uuid', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
|
||||
fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'cmd', 'start_ts', 'end_ts', 'cmd_version', *readonly_fields)
|
||||
autocomplete_fields = ['snapshot']
|
||||
|
||||
list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
|
||||
|
@ -462,33 +488,36 @@ class ArchiveResultAdmin(admin.ModelAdmin):
|
|||
list_per_page = SNAPSHOTS_PER_PAGE
|
||||
|
||||
@admin.display(
|
||||
description='snapshot'
|
||||
description='Snapshot Info'
|
||||
)
|
||||
def snapshot_str(self, obj):
|
||||
def snapshot_info(self, result):
|
||||
return format_html(
|
||||
'<a href="/archive/{}/index.html"><b><code>[{}]</code></b></a><br/>'
|
||||
'<small>{}</small>',
|
||||
obj.snapshot.timestamp,
|
||||
obj.snapshot.timestamp,
|
||||
obj.snapshot.url[:128],
|
||||
'<a href="/archive/{}/index.html"><b><code>[{}]</code></b> {} {}</a><br/>',
|
||||
result.snapshot.timestamp,
|
||||
result.snapshot.abid,
|
||||
result.snapshot.added.strftime('%Y-%m-%d %H:%M'),
|
||||
result.snapshot.url[:128],
|
||||
)
|
||||
|
||||
def identifiers(self, obj):
|
||||
return get_abid_info(self, obj)
|
||||
|
||||
@admin.display(
|
||||
description='tags'
|
||||
description='Snapshot Tags'
|
||||
)
|
||||
def tags_str(self, obj):
|
||||
return obj.snapshot.tags_str()
|
||||
def tags_str(self, result):
|
||||
return result.snapshot.tags_str()
|
||||
|
||||
def cmd_str(self, obj):
|
||||
def cmd_str(self, result):
|
||||
return format_html(
|
||||
'<pre>{}</pre>',
|
||||
' '.join(obj.cmd) if isinstance(obj.cmd, list) else str(obj.cmd),
|
||||
' '.join(result.cmd) if isinstance(result.cmd, list) else str(result.cmd),
|
||||
)
|
||||
|
||||
def output_str(self, obj):
|
||||
def output_str(self, result):
|
||||
return format_html(
|
||||
'<a href="/archive/{}/{}" class="output-link">↗️</a><pre>{}</pre>',
|
||||
obj.snapshot.timestamp,
|
||||
obj.output if (obj.status == 'succeeded') and obj.extractor not in ('title', 'archive_org') else 'index.html',
|
||||
obj.output,
|
||||
result.snapshot.timestamp,
|
||||
result.output if (result.status == 'succeeded') and result.extractor not in ('title', 'archive_org') else 'index.html',
|
||||
result.output,
|
||||
)
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
# Generated by Django 5.0.6 on 2024-05-13 10:56
|
||||
|
||||
import charidfield.fields
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0022_auto_20231023_2008'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterModelOptions(
|
||||
name='archiveresult',
|
||||
options={'verbose_name': 'Result'},
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='abid',
|
||||
field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='res_', unique=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='abid',
|
||||
field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='snp_', unique=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='uuid',
|
||||
field=models.UUIDField(blank=True, null=True, unique=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='tag',
|
||||
name='abid',
|
||||
field=charidfield.fields.CharIDField(blank=True, db_index=True, default=None, help_text='ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)', max_length=30, null=True, prefix='tag_', unique=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='extractor',
|
||||
field=models.CharField(choices=[('htmltotext', 'htmltotext'), ('git', 'git'), ('singlefile', 'singlefile'), ('media', 'media'), ('archive_org', 'archive_org'), ('readability', 'readability'), ('mercury', 'mercury'), ('favicon', 'favicon'), ('pdf', 'pdf'), ('headers', 'headers'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('title', 'title'), ('wget', 'wget')], max_length=32),
|
||||
),
|
||||
]
|
95
archivebox/core/migrations/0024_auto_20240513_1143.py
Normal file
95
archivebox/core/migrations/0024_auto_20240513_1143.py
Normal file
|
@ -0,0 +1,95 @@
|
|||
# Generated by Django 5.0.6 on 2024-05-13 11:43
|
||||
|
||||
from django.db import migrations
|
||||
from datetime import datetime
|
||||
from abid_utils.abid import abid_from_values
|
||||
|
||||
|
||||
def calculate_abid(self):
|
||||
"""
|
||||
Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
|
||||
"""
|
||||
prefix = self.abid_prefix
|
||||
ts = eval(self.abid_ts_src)
|
||||
uri = eval(self.abid_uri_src)
|
||||
subtype = eval(self.abid_subtype_src)
|
||||
rand = eval(self.abid_rand_src)
|
||||
|
||||
if (not prefix) or prefix == 'obj_':
|
||||
suggested_abid = self.__class__.__name__[:3].lower()
|
||||
raise Exception(f'{self.__class__.__name__}.abid_prefix must be defined to calculate ABIDs (suggested: {suggested_abid})')
|
||||
|
||||
if not ts:
|
||||
ts = datetime.utcfromtimestamp(0)
|
||||
print(f'[!] WARNING: Generating ABID with ts=0000000000 placeholder because {self.__class__.__name__}.abid_ts_src={self.abid_ts_src} is unset!', ts.isoformat())
|
||||
|
||||
if not uri:
|
||||
uri = str(self)
|
||||
print(f'[!] WARNING: Generating ABID with uri=str(self) placeholder because {self.__class__.__name__}.abid_uri_src={self.abid_uri_src} is unset!', uri)
|
||||
|
||||
if not subtype:
|
||||
subtype = self.__class__.__name__
|
||||
print(f'[!] WARNING: Generating ABID with subtype={subtype} placeholder because {self.__class__.__name__}.abid_subtype_src={self.abid_subtype_src} is unset!', subtype)
|
||||
|
||||
if not rand:
|
||||
rand = getattr(self, 'uuid', None) or getattr(self, 'id', None) or getattr(self, 'pk')
|
||||
print(f'[!] WARNING: Generating ABID with rand=self.id placeholder because {self.__class__.__name__}.abid_rand_src={self.abid_rand_src} is unset!', rand)
|
||||
|
||||
abid = abid_from_values(
|
||||
prefix=prefix,
|
||||
ts=ts,
|
||||
uri=uri,
|
||||
subtype=subtype,
|
||||
rand=rand,
|
||||
)
|
||||
assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for {self.__class__.__name__}'
|
||||
return abid
|
||||
|
||||
|
||||
def copy_snapshot_uuids(apps, schema_editor):
|
||||
Snapshot = apps.get_model("core", "Snapshot")
|
||||
for snapshot in Snapshot.objects.all():
|
||||
snapshot.uuid = snapshot.id
|
||||
snapshot.save(update_fields=["uuid"])
|
||||
|
||||
def generate_snapshot_abids(apps, schema_editor):
|
||||
Snapshot = apps.get_model("core", "Snapshot")
|
||||
for snapshot in Snapshot.objects.all():
|
||||
snapshot.abid_prefix = 'snp_'
|
||||
snapshot.abid_ts_src = 'self.added'
|
||||
snapshot.abid_uri_src = 'self.url'
|
||||
snapshot.abid_subtype_src = '"01"'
|
||||
snapshot.abid_rand_src = 'self.uuid'
|
||||
|
||||
snapshot.abid = calculate_abid(snapshot)
|
||||
snapshot.save(update_fields=["abid"])
|
||||
|
||||
def generate_archiveresult_abids(apps, schema_editor):
|
||||
ArchiveResult = apps.get_model("core", "ArchiveResult")
|
||||
Snapshot = apps.get_model("core", "Snapshot")
|
||||
for result in ArchiveResult.objects.all():
|
||||
result.abid_prefix = 'res_'
|
||||
result.snapshot = Snapshot.objects.get(pk=result.snapshot_id)
|
||||
result.snapshot_added = result.snapshot.added
|
||||
result.snapshot_url = result.snapshot.url
|
||||
result.abid_ts_src = 'self.snapshot_added'
|
||||
result.abid_uri_src = 'self.snapshot_url'
|
||||
result.abid_subtype_src = 'self.extractor'
|
||||
result.abid_rand_src = 'self.id'
|
||||
|
||||
result.abid = calculate_abid(result)
|
||||
result.uuid = result.abid.uuid
|
||||
result.save(update_fields=["abid", "uuid"])
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0023_alter_archiveresult_options_archiveresult_abid_and_more'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(copy_snapshot_uuids, reverse_code=migrations.RunPython.noop),
|
||||
migrations.RunPython(generate_snapshot_abids, reverse_code=migrations.RunPython.noop),
|
||||
migrations.RunPython(generate_archiveresult_abids, reverse_code=migrations.RunPython.noop),
|
||||
]
|
19
archivebox/core/migrations/0025_alter_archiveresult_uuid.py
Normal file
19
archivebox/core/migrations/0025_alter_archiveresult_uuid.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
# Generated by Django 5.0.6 on 2024-05-13 12:08
|
||||
|
||||
import uuid
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0024_auto_20240513_1143'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='uuid',
|
||||
field=models.UUIDField(default=uuid.uuid4, editable=False, unique=True),
|
||||
),
|
||||
]
|
|
@ -0,0 +1,76 @@
|
|||
# Generated by Django 5.0.6 on 2024-05-13 13:01
|
||||
|
||||
import abid_utils.models
|
||||
import django.db.models.deletion
|
||||
import django.utils.timezone
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0025_alter_archiveresult_uuid'),
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='created',
|
||||
field=models.DateTimeField(auto_now_add=True, default=django.utils.timezone.now),
|
||||
preserve_default=False,
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(default=abid_utils.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='archiveresult',
|
||||
name='modified',
|
||||
field=models.DateTimeField(auto_now=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='created',
|
||||
field=models.DateTimeField(auto_now_add=True, default=django.utils.timezone.now),
|
||||
preserve_default=False,
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(default=abid_utils.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='snapshot',
|
||||
name='modified',
|
||||
field=models.DateTimeField(auto_now=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='tag',
|
||||
name='created',
|
||||
field=models.DateTimeField(auto_now_add=True, default=django.utils.timezone.now),
|
||||
preserve_default=False,
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='tag',
|
||||
name='created_by',
|
||||
field=models.ForeignKey(default=abid_utils.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='tag',
|
||||
name='modified',
|
||||
field=models.DateTimeField(auto_now=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='tag',
|
||||
name='uuid',
|
||||
field=models.UUIDField(blank=True, null=True, unique=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='archiveresult',
|
||||
name='uuid',
|
||||
field=models.UUIDField(blank=True, null=True, unique=True),
|
||||
),
|
||||
]
|
|
@ -1,12 +1,14 @@
|
|||
__package__ = 'archivebox.core'
|
||||
|
||||
|
||||
import uuid
|
||||
from typing import Optional, List, Dict
|
||||
from django_stubs_ext.db.models import TypedModelMeta
|
||||
|
||||
import json
|
||||
|
||||
import uuid
|
||||
from uuid import uuid4
|
||||
from pathlib import Path
|
||||
from typing import Optional, List
|
||||
from importlib import import_module
|
||||
|
||||
from django.db import models
|
||||
from django.utils.functional import cached_property
|
||||
|
@ -16,12 +18,15 @@ from django.urls import reverse
|
|||
from django.db.models import Case, When, Value, IntegerField
|
||||
from django.contrib.auth.models import User # noqa
|
||||
|
||||
from abid_utils.models import ABIDModel, ABIDField
|
||||
|
||||
from ..config import ARCHIVE_DIR, ARCHIVE_DIR_NAME
|
||||
from ..system import get_dir_size
|
||||
from ..util import parse_date, base_url, hashurl
|
||||
from ..util import parse_date, base_url
|
||||
from ..index.schema import Link
|
||||
from ..index.html import snapshot_icons
|
||||
from ..extractors import get_default_archive_methods, ARCHIVE_METHODS_INDEXING_PRECEDENCE, EXTRACTORS
|
||||
from ..extractors import ARCHIVE_METHODS_INDEXING_PRECEDENCE, EXTRACTORS
|
||||
|
||||
|
||||
EXTRACTOR_CHOICES = [(extractor_name, extractor_name) for extractor_name in EXTRACTORS.keys()]
|
||||
STATUS_CHOICES = [
|
||||
|
@ -30,26 +35,41 @@ STATUS_CHOICES = [
|
|||
("skipped", "skipped")
|
||||
]
|
||||
|
||||
try:
|
||||
JSONField = models.JSONField
|
||||
except AttributeError:
|
||||
import jsonfield
|
||||
JSONField = jsonfield.JSONField
|
||||
|
||||
|
||||
class Tag(models.Model):
|
||||
# class BaseModel(models.Model):
|
||||
# # TODO: migrate all models to a shared base class with all our standard fields and helpers:
|
||||
# # ulid/created/modified/owner/is_deleted/as_json/from_json/etc.
|
||||
# #
|
||||
# # id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
|
||||
# # ulid = models.CharField(max_length=26, null=True, blank=True, db_index=True, unique=True)
|
||||
|
||||
# class Meta(TypedModelMeta):
|
||||
# abstract = True
|
||||
|
||||
|
||||
class Tag(ABIDModel):
|
||||
"""
|
||||
Based on django-taggit model
|
||||
Based on django-taggit model + ABID base.
|
||||
"""
|
||||
abid_prefix = 'tag_'
|
||||
abid_ts_src = 'self.created' # TODO: add created/modified time
|
||||
abid_uri_src = 'self.name'
|
||||
abid_subtype_src = '"03"'
|
||||
abid_rand_src = 'self.id'
|
||||
|
||||
# id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True)
|
||||
id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
|
||||
uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
|
||||
abid = ABIDField(prefix=abid_prefix)
|
||||
|
||||
|
||||
name = models.CharField(unique=True, blank=False, max_length=100)
|
||||
|
||||
# slug is autoset on save from name, never set it manually
|
||||
slug = models.SlugField(unique=True, blank=True, max_length=100)
|
||||
# slug is autoset on save from name, never set it manually
|
||||
|
||||
|
||||
class Meta:
|
||||
class Meta(TypedModelMeta):
|
||||
verbose_name = "Tag"
|
||||
verbose_name_plural = "Tags"
|
||||
|
||||
|
@ -85,8 +105,16 @@ class Tag(models.Model):
|
|||
return super().save(*args, **kwargs)
|
||||
|
||||
|
||||
class Snapshot(models.Model):
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||
class Snapshot(ABIDModel):
|
||||
abid_prefix = 'snp_'
|
||||
abid_ts_src = 'self.added'
|
||||
abid_uri_src = 'self.url'
|
||||
abid_subtype_src = '"01"'
|
||||
abid_rand_src = 'self.id'
|
||||
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) # legacy pk
|
||||
uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
|
||||
abid = ABIDField(prefix=abid_prefix)
|
||||
|
||||
url = models.URLField(unique=True, db_index=True)
|
||||
timestamp = models.CharField(max_length=32, unique=True, db_index=True)
|
||||
|
@ -99,6 +127,7 @@ class Snapshot(models.Model):
|
|||
|
||||
keys = ('url', 'timestamp', 'title', 'tags', 'updated')
|
||||
|
||||
|
||||
def __repr__(self) -> str:
|
||||
title = self.title or '-'
|
||||
return f'[{self.timestamp}] {self.url[:64]} ({title[:64]})'
|
||||
|
@ -127,8 +156,8 @@ class Snapshot(models.Model):
|
|||
from ..index import load_link_details
|
||||
return load_link_details(self.as_link())
|
||||
|
||||
def tags_str(self, nocache=True) -> str:
|
||||
cache_key = f'{self.id}-{(self.updated or self.added).timestamp()}-tags'
|
||||
def tags_str(self, nocache=True) -> str | None:
|
||||
cache_key = f'{self.pk}-{(self.updated or self.added).timestamp()}-tags'
|
||||
calc_tags_str = lambda: ','.join(self.tags.order_by('name').values_list('name', flat=True))
|
||||
if nocache:
|
||||
tags_str = calc_tags_str()
|
||||
|
@ -158,13 +187,9 @@ class Snapshot(models.Model):
|
|||
return self.as_link().is_archived
|
||||
|
||||
@cached_property
|
||||
def num_outputs(self):
|
||||
def num_outputs(self) -> int:
|
||||
return self.archiveresult_set.filter(status='succeeded').count()
|
||||
|
||||
@cached_property
|
||||
def url_hash(self):
|
||||
return hashurl(self.url)
|
||||
|
||||
@cached_property
|
||||
def base_url(self):
|
||||
return base_url(self.url)
|
||||
|
@ -179,7 +204,7 @@ class Snapshot(models.Model):
|
|||
|
||||
@cached_property
|
||||
def archive_size(self):
|
||||
cache_key = f'{str(self.id)[:12]}-{(self.updated or self.added).timestamp()}-size'
|
||||
cache_key = f'{str(self.pk)[:12]}-{(self.updated or self.added).timestamp()}-size'
|
||||
|
||||
def calc_dir_size():
|
||||
try:
|
||||
|
@ -200,7 +225,7 @@ class Snapshot(models.Model):
|
|||
return None
|
||||
|
||||
@cached_property
|
||||
def headers(self) -> Optional[dict]:
|
||||
def headers(self) -> Optional[Dict[str, str]]:
|
||||
try:
|
||||
return json.loads((Path(self.link_dir) / 'headers.json').read_text(encoding='utf-8').strip())
|
||||
except Exception:
|
||||
|
@ -251,11 +276,37 @@ class Snapshot(models.Model):
|
|||
tags_id = []
|
||||
for tag in tags:
|
||||
if tag.strip():
|
||||
tags_id.append(Tag.objects.get_or_create(name=tag)[0].id)
|
||||
tags_id.append(Tag.objects.get_or_create(name=tag)[0].pk)
|
||||
self.tags.clear()
|
||||
self.tags.add(*tags_id)
|
||||
|
||||
|
||||
# def get_storage_dir(self, create=True, symlink=True) -> Path:
|
||||
# date_str = self.added.strftime('%Y%m%d')
|
||||
# domain_str = domain(self.url)
|
||||
# abs_storage_dir = Path(ARCHIVE_DIR) / 'snapshots' / date_str / domain_str / str(self.ulid)
|
||||
|
||||
# if create and not abs_storage_dir.is_dir():
|
||||
# abs_storage_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# if symlink:
|
||||
# LINK_PATHS = [
|
||||
# Path(ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
|
||||
# # Path(ARCHIVE_DIR).parent / 'index' / 'snapshots_by_id' / str(self.ulid),
|
||||
# Path(ARCHIVE_DIR).parent / 'index' / 'snapshots_by_date' / date_str / domain_str / str(self.ulid),
|
||||
# Path(ARCHIVE_DIR).parent / 'index' / 'snapshots_by_domain' / domain_str / date_str / str(self.ulid),
|
||||
# ]
|
||||
# for link_path in LINK_PATHS:
|
||||
# link_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
# try:
|
||||
# link_path.symlink_to(abs_storage_dir)
|
||||
# except FileExistsError:
|
||||
# link_path.unlink()
|
||||
# link_path.symlink_to(abs_storage_dir)
|
||||
|
||||
# return abs_storage_dir
|
||||
|
||||
|
||||
class ArchiveResultManager(models.Manager):
|
||||
def indexable(self, sorted: bool = True):
|
||||
INDEXABLE_METHODS = [ r[0] for r in ARCHIVE_METHODS_INDEXING_PRECEDENCE ]
|
||||
|
@ -267,15 +318,22 @@ class ArchiveResultManager(models.Manager):
|
|||
return qs
|
||||
|
||||
|
||||
class ArchiveResult(models.Model):
|
||||
class ArchiveResult(ABIDModel):
|
||||
abid_prefix = 'res_'
|
||||
abid_ts_src = 'self.snapshot.added'
|
||||
abid_uri_src = 'self.snapshot.url'
|
||||
abid_subtype_src = 'self.extractor'
|
||||
abid_rand_src = 'self.uuid'
|
||||
EXTRACTOR_CHOICES = EXTRACTOR_CHOICES
|
||||
|
||||
id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
|
||||
uuid = models.UUIDField(default=uuid.uuid4, editable=False)
|
||||
# id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||
id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID') # legacy pk
|
||||
uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
|
||||
abid = ABIDField(prefix=abid_prefix)
|
||||
|
||||
snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)
|
||||
extractor = models.CharField(choices=EXTRACTOR_CHOICES, max_length=32)
|
||||
cmd = JSONField()
|
||||
cmd = models.JSONField()
|
||||
pwd = models.CharField(max_length=256)
|
||||
cmd_version = models.CharField(max_length=128, default=None, null=True, blank=True)
|
||||
output = models.CharField(max_length=1024)
|
||||
|
@ -285,6 +343,9 @@ class ArchiveResult(models.Model):
|
|||
|
||||
objects = ArchiveResultManager()
|
||||
|
||||
class Meta(TypedModelMeta):
|
||||
verbose_name = 'Result'
|
||||
|
||||
def __str__(self):
|
||||
return self.extractor
|
||||
|
||||
|
@ -318,3 +379,33 @@ class ArchiveResult(models.Model):
|
|||
|
||||
def output_exists(self) -> bool:
|
||||
return Path(self.output_path()).exists()
|
||||
|
||||
|
||||
# def get_storage_dir(self, create=True, symlink=True):
|
||||
# date_str = self.snapshot.added.strftime('%Y%m%d')
|
||||
# domain_str = domain(self.snapshot.url)
|
||||
# abs_storage_dir = Path(ARCHIVE_DIR) / 'results' / date_str / domain_str / self.extractor / str(self.ulid)
|
||||
|
||||
# if create and not abs_storage_dir.is_dir():
|
||||
# abs_storage_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# if symlink:
|
||||
# LINK_PATHS = [
|
||||
# Path(ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
|
||||
# # Path(ARCHIVE_DIR).parent / 'index' / 'results_by_id' / str(self.ulid),
|
||||
# # Path(ARCHIVE_DIR).parent / 'index' / 'results_by_date' / date_str / domain_str / self.extractor / str(self.ulid),
|
||||
# Path(ARCHIVE_DIR).parent / 'index' / 'results_by_domain' / domain_str / date_str / self.extractor / str(self.ulid),
|
||||
# Path(ARCHIVE_DIR).parent / 'index' / 'results_by_type' / self.extractor / date_str / domain_str / str(self.ulid),
|
||||
# ]
|
||||
# for link_path in LINK_PATHS:
|
||||
# link_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
# try:
|
||||
# link_path.symlink_to(abs_storage_dir)
|
||||
# except FileExistsError:
|
||||
# link_path.unlink()
|
||||
# link_path.symlink_to(abs_storage_dir)
|
||||
|
||||
# return abs_storage_dir
|
||||
|
||||
# def symlink_index(self, create=True):
|
||||
# abs_result_dir = self.get_storage_dir(create=create)
|
||||
|
|
|
@ -10,6 +10,7 @@ from pathlib import Path
|
|||
from django.utils.crypto import get_random_string
|
||||
|
||||
from ..config import (
|
||||
CONFIG,
|
||||
DEBUG,
|
||||
SECRET_KEY,
|
||||
ALLOWED_HOSTS,
|
||||
|
@ -62,12 +63,13 @@ INSTALLED_APPS = [
|
|||
'django.contrib.staticfiles',
|
||||
'django.contrib.admin',
|
||||
|
||||
'signal_webhooks',
|
||||
'abid_utils',
|
||||
'core',
|
||||
'api',
|
||||
|
||||
'admin_data_views',
|
||||
|
||||
'signal_webhooks',
|
||||
'django_extensions',
|
||||
]
|
||||
|
||||
|
@ -247,22 +249,26 @@ DATABASES = {
|
|||
'TIME_ZONE': TIMEZONE,
|
||||
# DB setup is sometimes modified at runtime by setup_django() in config.py
|
||||
},
|
||||
'cache': {
|
||||
'ENGINE': 'django.db.backends.sqlite3',
|
||||
'NAME': CACHE_DB_PATH,
|
||||
'OPTIONS': {
|
||||
'timeout': 60,
|
||||
'check_same_thread': False,
|
||||
},
|
||||
'TIME_ZONE': TIMEZONE,
|
||||
},
|
||||
# 'cache': {
|
||||
# 'ENGINE': 'django.db.backends.sqlite3',
|
||||
# 'NAME': CACHE_DB_PATH,
|
||||
# 'OPTIONS': {
|
||||
# 'timeout': 60,
|
||||
# 'check_same_thread': False,
|
||||
# },
|
||||
# 'TIME_ZONE': TIMEZONE,
|
||||
# },
|
||||
}
|
||||
MIGRATION_MODULES = {'signal_webhooks': None}
|
||||
|
||||
# as much as I'd love this to be a UUID or ULID field, it's not supported yet as of Django 5.0
|
||||
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
|
||||
|
||||
|
||||
CACHES = {
|
||||
'default': {'BACKEND': 'django.core.cache.backends.db.DatabaseCache', 'LOCATION': 'cache'},
|
||||
'dummy': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'},
|
||||
'locmem': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'},
|
||||
'default': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'},
|
||||
# 'sqlite': {'BACKEND': 'django.core.cache.backends.db.DatabaseCache', 'LOCATION': 'cache'},
|
||||
# 'dummy': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'},
|
||||
# 'filebased': {"BACKEND": "django.core.cache.backends.filebased.FileBasedCache", "LOCATION": CACHE_DIR / 'cache_filebased'},
|
||||
}
|
||||
|
||||
|
@ -421,9 +427,11 @@ LOGGING = {
|
|||
|
||||
|
||||
# Add default webhook configuration to the User model
|
||||
SIGNAL_WEBHOOKS_CUSTOM_MODEL = 'api.models.OutboundWebhook'
|
||||
SIGNAL_WEBHOOKS = {
|
||||
"HOOKS": {
|
||||
"django.contrib.auth.models.User": ..., # ... is a special value that means "use the default autogenerated hooks"
|
||||
# ... is a special sigil value that means "use the default autogenerated hooks"
|
||||
"django.contrib.auth.models.User": ...,
|
||||
"core.models.Snapshot": ...,
|
||||
"core.models.ArchiveResult": ...,
|
||||
"core.models.Tag": ...,
|
||||
|
|
|
@ -226,8 +226,8 @@ class SnapshotView(View):
|
|||
'<i><b>Next steps:</i></b><br/>'
|
||||
f'- list all the <a href="/archive/{snapshot.timestamp}/" target="_top">Snapshot files <code>.*</code></a><br/>'
|
||||
f'- view the <a href="/archive/{snapshot.timestamp}/index.html" target="_top">Snapshot <code>./index.html</code></a><br/>'
|
||||
f'- go to the <a href="/admin/core/snapshot/{snapshot.id}/change/" target="_top">Snapshot admin</a> to edit<br/>'
|
||||
f'- go to the <a href="/admin/core/snapshot/?id__startswith={snapshot.id}" target="_top">Snapshot actions</a> to re-archive<br/>'
|
||||
f'- go to the <a href="/admin/core/snapshot/{snapshot.pk}/change/" target="_top">Snapshot admin</a> to edit<br/>'
|
||||
f'- go to the <a href="/admin/core/snapshot/?uuid__startswith={snapshot.uuid}" target="_top">Snapshot actions</a> to re-archive<br/>'
|
||||
'- or return to <a href="/" target="_top">the main index...</a></div>'
|
||||
'</center>'
|
||||
),
|
||||
|
@ -455,7 +455,7 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
|
|||
|
||||
for section in CONFIG_SCHEMA.keys():
|
||||
for key in CONFIG_SCHEMA[section].keys():
|
||||
rows['Section'].append(section.replace('_', ' ').title().replace(' Config', ''))
|
||||
rows['Section'].append(section) # section.replace('_', ' ').title().replace(' Config', '')
|
||||
rows['Key'].append(ItemLink(key, key=key))
|
||||
rows['Type'].append(mark_safe(f'<code>{find_config_type(key)}</code>'))
|
||||
rows['Value'].append(mark_safe(f'<code>{CONFIG[key]}</code>') if key_is_safe(key) else '******** (redacted)')
|
||||
|
@ -465,7 +465,7 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
|
|||
|
||||
section = 'DYNAMIC'
|
||||
for key in DYNAMIC_CONFIG_SCHEMA.keys():
|
||||
rows['Section'].append(section.replace('_', ' ').title().replace(' Config', ''))
|
||||
rows['Section'].append(section) # section.replace('_', ' ').title().replace(' Config', '')
|
||||
rows['Key'].append(ItemLink(key, key=key))
|
||||
rows['Type'].append(mark_safe(f'<code>{find_config_type(key)}</code>'))
|
||||
rows['Value'].append(mark_safe(f'<code>{CONFIG[key]}</code>') if key_is_safe(key) else '******** (redacted)')
|
||||
|
|
|
@ -160,7 +160,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
|
|||
# bump the updated time on the main Snapshot here, this is critical
|
||||
# to be able to cache summaries of the ArchiveResults for a given
|
||||
# snapshot without having to load all the results from the DB each time.
|
||||
# (we use {Snapshot.id}-{Snapshot.updated} as the cache key and assume
|
||||
# (we use {Snapshot.pk}-{Snapshot.updated} as the cache key and assume
|
||||
# ArchiveResults are unchanged as long as the updated timestamp is unchanged)
|
||||
snapshot.save()
|
||||
else:
|
||||
|
|
|
@ -94,7 +94,8 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
|
|||
status = 'failed'
|
||||
# TODO: Make this prettier. This is necessary to run the command (escape JSON internal quotes).
|
||||
cmd[2] = browser_args.replace('"', "\\\"")
|
||||
err.hints = (result.stdout + result.stderr).decode().split('\n')
|
||||
if result:
|
||||
err.hints = (result.stdout + result.stderr).decode().split('\n')
|
||||
output = err
|
||||
finally:
|
||||
timer.end()
|
||||
|
|
|
@ -118,7 +118,7 @@ def render_django_template(template: str, context: Mapping[str, str]) -> str:
|
|||
|
||||
|
||||
def snapshot_icons(snapshot) -> str:
|
||||
cache_key = f'{snapshot.id}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons'
|
||||
cache_key = f'{snapshot.pk}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons'
|
||||
|
||||
def calc_snapshot_icons():
|
||||
from core.models import EXTRACTOR_CHOICES
|
||||
|
|
|
@ -192,6 +192,9 @@ class Link:
|
|||
if extended:
|
||||
info.update({
|
||||
'snapshot_id': self.snapshot_id,
|
||||
'snapshot_uuid': self.snapshot_uuid,
|
||||
'snapshot_abid': self.snapshot_abid,
|
||||
|
||||
'link_dir': self.link_dir,
|
||||
'archive_path': self.archive_path,
|
||||
|
||||
|
@ -261,9 +264,21 @@ class Link:
|
|||
return to_csv(self, cols=cols or self.field_names(), separator=separator, ljust=ljust)
|
||||
|
||||
@cached_property
|
||||
def snapshot_id(self):
|
||||
def snapshot(self):
|
||||
from core.models import Snapshot
|
||||
return str(Snapshot.objects.only('id').get(url=self.url).id)
|
||||
return Snapshot.objects.only('uuid').get(url=self.url)
|
||||
|
||||
@cached_property
|
||||
def snapshot_id(self):
|
||||
return str(self.snapshot.pk)
|
||||
|
||||
@cached_property
|
||||
def snapshot_uuid(self):
|
||||
return str(self.snapshot.uuid)
|
||||
|
||||
@cached_property
|
||||
def snapshot_abid(self):
|
||||
return str(self.snapshot.ABID)
|
||||
|
||||
@classmethod
|
||||
def field_names(cls):
|
||||
|
|
|
@ -45,7 +45,8 @@ def write_link_to_sql_index(link: Link):
|
|||
info.pop('tags')
|
||||
|
||||
try:
|
||||
info["timestamp"] = Snapshot.objects.get(url=link.url).timestamp
|
||||
snapshot = Snapshot.objects.get(url=link.url)
|
||||
info["timestamp"] = snapshot.timestamp
|
||||
except Snapshot.DoesNotExist:
|
||||
while Snapshot.objects.filter(timestamp=info["timestamp"]).exists():
|
||||
info["timestamp"] = str(float(info["timestamp"]) + 1.0)
|
||||
|
@ -57,7 +58,7 @@ def write_link_to_sql_index(link: Link):
|
|||
for entry in entries:
|
||||
if isinstance(entry, dict):
|
||||
result, _ = ArchiveResult.objects.get_or_create(
|
||||
snapshot_id=snapshot.id,
|
||||
snapshot_id=snapshot.pk,
|
||||
extractor=extractor,
|
||||
start_ts=parse_date(entry['start_ts']),
|
||||
defaults={
|
||||
|
@ -71,7 +72,7 @@ def write_link_to_sql_index(link: Link):
|
|||
)
|
||||
else:
|
||||
result, _ = ArchiveResult.objects.update_or_create(
|
||||
snapshot_id=snapshot.id,
|
||||
snapshot_id=snapshot.pk,
|
||||
extractor=extractor,
|
||||
start_ts=parse_date(entry.start_ts),
|
||||
defaults={
|
||||
|
|
16
archivebox/monkey_patches.py
Normal file
16
archivebox/monkey_patches.py
Normal file
|
@ -0,0 +1,16 @@
|
|||
__package__ = 'archivebox'
|
||||
|
||||
import django_stubs_ext
|
||||
|
||||
django_stubs_ext.monkeypatch()
|
||||
|
||||
|
||||
# monkey patch django timezone to add back utc (it was removed in Django 5.0)
|
||||
import datetime
|
||||
from django.utils import timezone
|
||||
timezone.utc = datetime.timezone.utc
|
||||
|
||||
|
||||
# monkey patch django-signals-webhooks to change how it shows up in Admin UI
|
||||
# from signal_webhooks.apps import DjangoSignalWebhooksConfig
|
||||
# DjangoSignalWebhooksConfig.verbose_name = 'API'
|
|
@ -39,7 +39,7 @@ def write_search_index(link: Link, texts: Union[List[str], None]=None, out_dir:
|
|||
backend = import_backend()
|
||||
if snap:
|
||||
try:
|
||||
backend.index(snapshot_id=str(snap.id), texts=texts)
|
||||
backend.index(snapshot_id=str(snap.pk), texts=texts)
|
||||
except Exception as err:
|
||||
stderr()
|
||||
stderr(
|
||||
|
@ -54,7 +54,7 @@ def query_search_index(query: str, out_dir: Path=OUTPUT_DIR) -> QuerySet:
|
|||
if search_backend_enabled():
|
||||
backend = import_backend()
|
||||
try:
|
||||
snapshot_ids = backend.search(query)
|
||||
snapshot_pks = backend.search(query)
|
||||
except Exception as err:
|
||||
stderr()
|
||||
stderr(
|
||||
|
@ -64,7 +64,7 @@ def query_search_index(query: str, out_dir: Path=OUTPUT_DIR) -> QuerySet:
|
|||
raise
|
||||
else:
|
||||
# TODO preserve ordering from backend
|
||||
qsearch = Snapshot.objects.filter(pk__in=snapshot_ids)
|
||||
qsearch = Snapshot.objects.filter(pk__in=snapshot_pks)
|
||||
return qsearch
|
||||
|
||||
return Snapshot.objects.none()
|
||||
|
@ -74,9 +74,9 @@ def flush_search_index(snapshots: QuerySet):
|
|||
if not indexing_enabled() or not snapshots:
|
||||
return
|
||||
backend = import_backend()
|
||||
snapshot_ids=(str(pk) for pk in snapshots.values_list('pk',flat=True))
|
||||
snapshot_pks = (str(pk) for pk in snapshots.values_list('pk', flat=True))
|
||||
try:
|
||||
backend.flush(snapshot_ids)
|
||||
backend.flush(snapshot_pks)
|
||||
except Exception as err:
|
||||
stderr()
|
||||
stderr(
|
||||
|
|
|
@ -147,7 +147,7 @@
|
|||
{% for obj in results %}
|
||||
<div class="card">
|
||||
<div class="card-info">
|
||||
<a href="{% url 'admin:core_snapshot_change' obj.id %}">
|
||||
<a href="{% url 'admin:core_snapshot_change' obj.pk %}">
|
||||
<span class="timestamp">{{obj.added}}</span>
|
||||
</a>
|
||||
<label>
|
||||
|
|
|
@ -405,7 +405,7 @@
|
|||
</a>
|
||||
<!--<a href="{{result.path}}" target="preview"><h4 class="card-title">{{result.name}}</h4></a>-->
|
||||
</div>
|
||||
<iframe class="card-img-top" src="{{result.path}}" sandbox="allow-scripts allow-forms" scrolling="no" loading="lazy"></iframe>
|
||||
<iframe class="card-img-top" src="{{result.path}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no" loading="lazy"></iframe>
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
|
@ -463,7 +463,7 @@
|
|||
if (target.endsWith('.pdf')) {
|
||||
jQuery('#main-frame')[0].removeAttribute('sandbox')
|
||||
} else {
|
||||
jQuery('#main-frame')[0].sandbox = "allow-scripts allow-forms allow-top-navigation-by-user-activation"
|
||||
jQuery('#main-frame')[0].sandbox = "allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms"
|
||||
}
|
||||
window.location.hash = getPreviewTypeFromPath(event.currentTarget.querySelector('a'))
|
||||
|
||||
|
|
|
@ -37,6 +37,9 @@ dependencies = [
|
|||
# - See Github issues for more...
|
||||
"django-signal-webhooks>=0.3.0",
|
||||
"django-admin-data-views>=0.3.1",
|
||||
"ulid-py>=1.1.0",
|
||||
"typeid-python>=0.3.0",
|
||||
"django-charid-field>=0.4",
|
||||
]
|
||||
|
||||
homepage = "https://github.com/ArchiveBox/ArchiveBox"
|
||||
|
|
Loading…
Reference in a new issue