abid gradual improvements, some regrets

This commit is contained in:
Nick Sweeting 2024-09-04 00:08:14 -07:00
parent dd97f01bfc
commit d060eaa499
No known key found for this signature in database
4 changed files with 51 additions and 31 deletions

View file

@ -1,4 +1,6 @@
from typing import NamedTuple, Any, Union, Optional
__package__ = 'archivebox.abid_utils'
from typing import NamedTuple, Any, Union, Optional, Dict
import ulid
import uuid6
@ -9,6 +11,7 @@ from uuid import UUID
from typeid import TypeID # type: ignore[import-untyped]
from datetime import datetime
from ..util import enforce_types
ABID_PREFIX_LEN = 4
@ -108,6 +111,7 @@ class ABID(NamedTuple):
####################################################
@enforce_types
def uri_hash(uri: Union[str, bytes], salt: str=DEFAULT_ABID_URI_SALT) -> str:
"""
'E4A5CCD9AF4ED2A6E0954DF19FD274E9CDDB4853051F033FD518BFC90AA1AC25'
@ -130,17 +134,19 @@ def uri_hash(uri: Union[str, bytes], salt: str=DEFAULT_ABID_URI_SALT) -> str:
return hashlib.sha256(uri_bytes).hexdigest().upper()
def abid_part_from_prefix(prefix: Optional[str]) -> str:
@enforce_types
def abid_part_from_prefix(prefix: str) -> str:
"""
'snp_'
"""
if prefix is None:
return 'obj_'
# if prefix is None:
# return 'obj_'
prefix = prefix.strip('_').lower()
assert len(prefix) == 3
return prefix + '_'
@enforce_types
def abid_part_from_uri(uri: str, salt: str=DEFAULT_ABID_URI_SALT) -> str:
"""
'E4A5CCD9' # takes first 8 characters of sha256(url)
@ -148,12 +154,14 @@ def abid_part_from_uri(uri: str, salt: str=DEFAULT_ABID_URI_SALT) -> str:
uri = str(uri)
return uri_hash(uri, salt=salt)[:ABID_URI_LEN]
def abid_part_from_ts(ts: Optional[datetime]) -> str:
@enforce_types
def abid_part_from_ts(ts: datetime) -> str:
"""
'01HX9FPYTR' # produces 10 character Timestamp section of ulid based on added date
"""
return str(ulid.from_timestamp(ts) if ts else ulid.new())[:ABID_TS_LEN]
return str(ulid.from_timestamp(ts))[:ABID_TS_LEN]
@enforce_types
def abid_part_from_subtype(subtype: str) -> str:
"""
Snapshots have 01 type, other objects have other subtypes like wget/media/etc.
@ -165,6 +173,7 @@ def abid_part_from_subtype(subtype: str) -> str:
return hashlib.sha256(subtype.encode('utf-8')).hexdigest()[:ABID_SUBTYPE_LEN].upper()
@enforce_types
def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str:
"""
'ZYEBQE' # takes last 6 characters of randomness from existing legacy uuid db field
@ -186,17 +195,22 @@ def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str:
return str(rand)[-ABID_RAND_LEN:].upper()
def abid_from_values(prefix, ts, uri, subtype, rand, salt=DEFAULT_ABID_URI_SALT) -> ABID:
@enforce_types
def abid_hashes_from_values(prefix: str, ts: datetime, uri: str, subtype: str, rand: Union[str, UUID, None, int], salt: str=DEFAULT_ABID_URI_SALT) -> Dict[str, str]:
return {
'prefix': abid_part_from_prefix(prefix),
'ts': abid_part_from_ts(ts),
'uri': abid_part_from_uri(uri, salt=salt),
'subtype': abid_part_from_subtype(subtype),
'rand': abid_part_from_rand(rand),
}
@enforce_types
def abid_from_values(prefix: str, ts: datetime, uri: str, subtype: str, rand: Union[str, UUID, None, int], salt: str=DEFAULT_ABID_URI_SALT) -> ABID:
"""
Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
"""
abid = ABID(
prefix=abid_part_from_prefix(prefix),
ts=abid_part_from_ts(ts),
uri=abid_part_from_uri(uri, salt=salt),
subtype=abid_part_from_subtype(subtype),
rand=abid_part_from_rand(rand),
)
abid = ABID(**abid_hashes_from_values(prefix, ts, uri, subtype, rand, salt=salt))
assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for ts={ts} uri={uri} subtyp={subtype} rand={rand}'
return abid

View file

@ -16,21 +16,20 @@ def highlight_diff(display_val, compare_val):
display_val = str(display_val)
compare_val = str(compare_val)
diff_chars = mark_safe('').join(
return mark_safe(''.join(
format_html('<span style="color: red;">{}</span>', display_val[i])
if display_val[i] != compare_val[i] else
format_html('<span display="color: black">{}</span>', display_val[i])
for i in range(len(display_val))
)
return diff_chars
))
def get_abid_info(self, obj, request=None):
try:
abid_diff = f' != obj.ABID: {highlight_diff(obj.ABID, obj.abid)}' if str(obj.ABID) != str(obj.abid) else ' == .ABID ✅'
fresh_abid = obj.generate_abid()
fresh_abid_diff = f' != &nbsp; .fresh_abid: {highlight_diff(obj.ABID, fresh_abid)}' if str(fresh_abid) != str(obj.ABID) else ''
fresh_uuid_diff = f' != &nbsp; .fresh_uuid: {highlight_diff(obj.ABID.uuid, fresh_abid.uuid)}' if str(fresh_abid.uuid) != str(obj.ABID.uuid) else ''
fresh_abid = obj.ABID_FRESH
fresh_abid_diff = f' != &nbsp; .fresh_abid: {highlight_diff(fresh_abid, obj.ABID)}' if str(fresh_abid) != str(obj.ABID) else ''
fresh_uuid_diff = f' != &nbsp; .fresh_uuid: {highlight_diff(fresh_abid.uuid, obj.ABID.uuid)}' if str(fresh_abid.uuid) != str(obj.ABID.uuid) else ''
id_fresh_abid_diff = f' != .fresh_abid ❌' if str(fresh_abid.uuid) != str(obj.id) else ' == .fresh_abid ✅'
id_abid_diff = f' != .abid.uuid: {highlight_diff(obj.ABID.uuid, obj.id)}' if str(obj.id) != str(obj.ABID.uuid) else ' == .abid ✅'
@ -74,16 +73,16 @@ def get_abid_info(self, obj, request=None):
</div>
''',
obj.api_url + (f'?api_key={get_or_create_api_token(request.user)}' if request and request.user else ''), obj.api_url, obj.api_docs_url,
str(obj.abid), mark_safe(fresh_abid_diff),
str(obj.ABID.uuid), mark_safe(fresh_uuid_diff),
highlight_diff(obj.abid, fresh_abid), mark_safe(fresh_abid_diff),
highlight_diff(obj.ABID.uuid, fresh_abid.uuid), mark_safe(fresh_uuid_diff),
str(obj.id), mark_safe(id_pk_diff + id_abid_diff + id_fresh_abid_diff),
# str(fresh_abid.uuid), mark_safe(fresh_uuid_diff),
# str(fresh_abid), mark_safe(fresh_abid_diff),
obj.ABID.ts, str(obj.ABID.uuid)[0:14], mark_safe(ts_diff), obj.abid_ts_src, source_ts_val and source_ts_val.isoformat(),
obj.ABID.uri, str(obj.ABID.uuid)[14:26], mark_safe(uri_diff), obj.abid_uri_src, str(obj.abid_values['uri']),
obj.ABID.subtype, str(obj.ABID.uuid)[26:28], mark_safe(subtype_diff), obj.abid_subtype_src, str(obj.abid_values['subtype']),
obj.ABID.rand, str(obj.ABID.uuid)[28:36], mark_safe(rand_diff), obj.abid_rand_src, str(obj.abid_values['rand'])[-7:],
str(getattr(obj, 'old_id', '')),
highlight_diff(obj.ABID.ts, derived_ts), highlight_diff(str(obj.ABID.uuid)[0:14], str(fresh_abid.uuid)[0:14]), mark_safe(ts_diff), obj.abid_ts_src, source_ts_val and source_ts_val.isoformat(),
highlight_diff(obj.ABID.uri, derived_uri), highlight_diff(str(obj.ABID.uuid)[14:26], str(fresh_abid.uuid)[14:26]), mark_safe(uri_diff), obj.abid_uri_src, str(obj.abid_values['uri']),
highlight_diff(obj.ABID.subtype, derived_subtype), highlight_diff(str(obj.ABID.uuid)[26:28], str(fresh_abid.uuid)[26:28]), mark_safe(subtype_diff), obj.abid_subtype_src, str(obj.abid_values['subtype']),
highlight_diff(obj.ABID.rand, derived_rand), highlight_diff(str(obj.ABID.uuid)[28:36], str(fresh_abid.uuid)[28:36]), mark_safe(rand_diff), obj.abid_rand_src, str(obj.abid_values['rand'])[-7:],
highlight_diff(getattr(obj, 'old_id', ''), obj.pk),
)
except Exception as e:
return str(e)

View file

@ -352,7 +352,7 @@ class SnapshotActionForm(ActionForm):
@admin.register(Snapshot, site=archivebox_admin)
class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
list_display = ('added', 'title_str', 'files', 'size', 'url_str')
sort_fields = ('title_str', 'url_str', 'added', 'files')
sort_fields = ('title_str', 'url_str', 'added')
readonly_fields = ('tags_str', 'timestamp', 'admin_actions', 'status_info', 'bookmarked', 'added', 'updated', 'created', 'modified', 'API', 'link_dir')
search_fields = ('id', 'url', 'abid', 'old_id', 'timestamp', 'title', 'tags__name')
list_filter = ('added', 'updated', 'archiveresult__status', 'created_by', 'tags__name')
@ -510,6 +510,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
# ordering='archiveresult_count',
)
def files(self, obj):
# return '-'
return snapshot_icons(obj)

View file

@ -118,7 +118,7 @@ def render_django_template(template: str, context: Mapping[str, str]) -> str:
def snapshot_icons(snapshot) -> str:
cache_key = f'{snapshot.pk}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons'
cache_key = f'result_icons:{snapshot.pk}:{(snapshot.modified or snapshot.created or snapshot.added).timestamp()}'
def calc_snapshot_icons():
from core.models import ArchiveResult
@ -133,6 +133,7 @@ def snapshot_icons(snapshot) -> str:
else:
archive_results = snapshot.archiveresult_set.filter(status="succeeded", output__isnull=False)
# import ipdb; ipdb.set_trace()
link = snapshot.as_link()
path = link.archive_path
canon = link.canonical_outputs()
@ -197,7 +198,12 @@ def snapshot_icons(snapshot) -> str:
# print(((end - start).total_seconds()*1000) // 1, 'ms')
return result
return cache.get_or_set(cache_key, calc_snapshot_icons)
# return calc_snapshot_icons()
cache_result = cache.get(cache_key)
if cache_result:
return cache_result
fresh_result = calc_snapshot_icons()
cache.set(cache_key, fresh_result, timeout=60 * 60 * 24)
return fresh_result