mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-10 06:34:16 +00:00
abid gradual improvements, some regrets
This commit is contained in:
parent
dd97f01bfc
commit
d060eaa499
4 changed files with 51 additions and 31 deletions
|
@ -1,4 +1,6 @@
|
|||
from typing import NamedTuple, Any, Union, Optional
|
||||
__package__ = 'archivebox.abid_utils'
|
||||
|
||||
from typing import NamedTuple, Any, Union, Optional, Dict
|
||||
|
||||
import ulid
|
||||
import uuid6
|
||||
|
@ -9,6 +11,7 @@ from uuid import UUID
|
|||
from typeid import TypeID # type: ignore[import-untyped]
|
||||
from datetime import datetime
|
||||
|
||||
from ..util import enforce_types
|
||||
|
||||
|
||||
ABID_PREFIX_LEN = 4
|
||||
|
@ -108,6 +111,7 @@ class ABID(NamedTuple):
|
|||
####################################################
|
||||
|
||||
|
||||
@enforce_types
|
||||
def uri_hash(uri: Union[str, bytes], salt: str=DEFAULT_ABID_URI_SALT) -> str:
|
||||
"""
|
||||
'E4A5CCD9AF4ED2A6E0954DF19FD274E9CDDB4853051F033FD518BFC90AA1AC25'
|
||||
|
@ -130,17 +134,19 @@ def uri_hash(uri: Union[str, bytes], salt: str=DEFAULT_ABID_URI_SALT) -> str:
|
|||
|
||||
return hashlib.sha256(uri_bytes).hexdigest().upper()
|
||||
|
||||
def abid_part_from_prefix(prefix: Optional[str]) -> str:
|
||||
@enforce_types
|
||||
def abid_part_from_prefix(prefix: str) -> str:
|
||||
"""
|
||||
'snp_'
|
||||
"""
|
||||
if prefix is None:
|
||||
return 'obj_'
|
||||
# if prefix is None:
|
||||
# return 'obj_'
|
||||
|
||||
prefix = prefix.strip('_').lower()
|
||||
assert len(prefix) == 3
|
||||
return prefix + '_'
|
||||
|
||||
@enforce_types
|
||||
def abid_part_from_uri(uri: str, salt: str=DEFAULT_ABID_URI_SALT) -> str:
|
||||
"""
|
||||
'E4A5CCD9' # takes first 8 characters of sha256(url)
|
||||
|
@ -148,12 +154,14 @@ def abid_part_from_uri(uri: str, salt: str=DEFAULT_ABID_URI_SALT) -> str:
|
|||
uri = str(uri)
|
||||
return uri_hash(uri, salt=salt)[:ABID_URI_LEN]
|
||||
|
||||
def abid_part_from_ts(ts: Optional[datetime]) -> str:
|
||||
@enforce_types
|
||||
def abid_part_from_ts(ts: datetime) -> str:
|
||||
"""
|
||||
'01HX9FPYTR' # produces 10 character Timestamp section of ulid based on added date
|
||||
"""
|
||||
return str(ulid.from_timestamp(ts) if ts else ulid.new())[:ABID_TS_LEN]
|
||||
return str(ulid.from_timestamp(ts))[:ABID_TS_LEN]
|
||||
|
||||
@enforce_types
|
||||
def abid_part_from_subtype(subtype: str) -> str:
|
||||
"""
|
||||
Snapshots have 01 type, other objects have other subtypes like wget/media/etc.
|
||||
|
@ -165,6 +173,7 @@ def abid_part_from_subtype(subtype: str) -> str:
|
|||
|
||||
return hashlib.sha256(subtype.encode('utf-8')).hexdigest()[:ABID_SUBTYPE_LEN].upper()
|
||||
|
||||
@enforce_types
|
||||
def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str:
|
||||
"""
|
||||
'ZYEBQE' # takes last 6 characters of randomness from existing legacy uuid db field
|
||||
|
@ -186,17 +195,22 @@ def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str:
|
|||
return str(rand)[-ABID_RAND_LEN:].upper()
|
||||
|
||||
|
||||
def abid_from_values(prefix, ts, uri, subtype, rand, salt=DEFAULT_ABID_URI_SALT) -> ABID:
|
||||
@enforce_types
|
||||
def abid_hashes_from_values(prefix: str, ts: datetime, uri: str, subtype: str, rand: Union[str, UUID, None, int], salt: str=DEFAULT_ABID_URI_SALT) -> Dict[str, str]:
|
||||
return {
|
||||
'prefix': abid_part_from_prefix(prefix),
|
||||
'ts': abid_part_from_ts(ts),
|
||||
'uri': abid_part_from_uri(uri, salt=salt),
|
||||
'subtype': abid_part_from_subtype(subtype),
|
||||
'rand': abid_part_from_rand(rand),
|
||||
}
|
||||
|
||||
@enforce_types
|
||||
def abid_from_values(prefix: str, ts: datetime, uri: str, subtype: str, rand: Union[str, UUID, None, int], salt: str=DEFAULT_ABID_URI_SALT) -> ABID:
|
||||
"""
|
||||
Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
|
||||
"""
|
||||
|
||||
abid = ABID(
|
||||
prefix=abid_part_from_prefix(prefix),
|
||||
ts=abid_part_from_ts(ts),
|
||||
uri=abid_part_from_uri(uri, salt=salt),
|
||||
subtype=abid_part_from_subtype(subtype),
|
||||
rand=abid_part_from_rand(rand),
|
||||
)
|
||||
abid = ABID(**abid_hashes_from_values(prefix, ts, uri, subtype, rand, salt=salt))
|
||||
assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for ts={ts} uri={uri} subtyp={subtype} rand={rand}'
|
||||
return abid
|
||||
|
|
|
@ -16,21 +16,20 @@ def highlight_diff(display_val, compare_val):
|
|||
display_val = str(display_val)
|
||||
compare_val = str(compare_val)
|
||||
|
||||
diff_chars = mark_safe('').join(
|
||||
return mark_safe(''.join(
|
||||
format_html('<span style="color: red;">{}</span>', display_val[i])
|
||||
if display_val[i] != compare_val[i] else
|
||||
format_html('<span display="color: black">{}</span>', display_val[i])
|
||||
for i in range(len(display_val))
|
||||
)
|
||||
return diff_chars
|
||||
))
|
||||
|
||||
def get_abid_info(self, obj, request=None):
|
||||
try:
|
||||
abid_diff = f' != obj.ABID: {highlight_diff(obj.ABID, obj.abid)} ❌' if str(obj.ABID) != str(obj.abid) else ' == .ABID ✅'
|
||||
|
||||
fresh_abid = obj.generate_abid()
|
||||
fresh_abid_diff = f' != .fresh_abid: {highlight_diff(obj.ABID, fresh_abid)} ❌' if str(fresh_abid) != str(obj.ABID) else '✅'
|
||||
fresh_uuid_diff = f' != .fresh_uuid: {highlight_diff(obj.ABID.uuid, fresh_abid.uuid)} ❌' if str(fresh_abid.uuid) != str(obj.ABID.uuid) else '✅'
|
||||
fresh_abid = obj.ABID_FRESH
|
||||
fresh_abid_diff = f' != .fresh_abid: {highlight_diff(fresh_abid, obj.ABID)} ❌' if str(fresh_abid) != str(obj.ABID) else '✅'
|
||||
fresh_uuid_diff = f' != .fresh_uuid: {highlight_diff(fresh_abid.uuid, obj.ABID.uuid)} ❌' if str(fresh_abid.uuid) != str(obj.ABID.uuid) else '✅'
|
||||
|
||||
id_fresh_abid_diff = f' != .fresh_abid ❌' if str(fresh_abid.uuid) != str(obj.id) else ' == .fresh_abid ✅'
|
||||
id_abid_diff = f' != .abid.uuid: {highlight_diff(obj.ABID.uuid, obj.id)} ❌' if str(obj.id) != str(obj.ABID.uuid) else ' == .abid ✅'
|
||||
|
@ -74,16 +73,16 @@ def get_abid_info(self, obj, request=None):
|
|||
</div>
|
||||
''',
|
||||
obj.api_url + (f'?api_key={get_or_create_api_token(request.user)}' if request and request.user else ''), obj.api_url, obj.api_docs_url,
|
||||
str(obj.abid), mark_safe(fresh_abid_diff),
|
||||
str(obj.ABID.uuid), mark_safe(fresh_uuid_diff),
|
||||
highlight_diff(obj.abid, fresh_abid), mark_safe(fresh_abid_diff),
|
||||
highlight_diff(obj.ABID.uuid, fresh_abid.uuid), mark_safe(fresh_uuid_diff),
|
||||
str(obj.id), mark_safe(id_pk_diff + id_abid_diff + id_fresh_abid_diff),
|
||||
# str(fresh_abid.uuid), mark_safe(fresh_uuid_diff),
|
||||
# str(fresh_abid), mark_safe(fresh_abid_diff),
|
||||
obj.ABID.ts, str(obj.ABID.uuid)[0:14], mark_safe(ts_diff), obj.abid_ts_src, source_ts_val and source_ts_val.isoformat(),
|
||||
obj.ABID.uri, str(obj.ABID.uuid)[14:26], mark_safe(uri_diff), obj.abid_uri_src, str(obj.abid_values['uri']),
|
||||
obj.ABID.subtype, str(obj.ABID.uuid)[26:28], mark_safe(subtype_diff), obj.abid_subtype_src, str(obj.abid_values['subtype']),
|
||||
obj.ABID.rand, str(obj.ABID.uuid)[28:36], mark_safe(rand_diff), obj.abid_rand_src, str(obj.abid_values['rand'])[-7:],
|
||||
str(getattr(obj, 'old_id', '')),
|
||||
highlight_diff(obj.ABID.ts, derived_ts), highlight_diff(str(obj.ABID.uuid)[0:14], str(fresh_abid.uuid)[0:14]), mark_safe(ts_diff), obj.abid_ts_src, source_ts_val and source_ts_val.isoformat(),
|
||||
highlight_diff(obj.ABID.uri, derived_uri), highlight_diff(str(obj.ABID.uuid)[14:26], str(fresh_abid.uuid)[14:26]), mark_safe(uri_diff), obj.abid_uri_src, str(obj.abid_values['uri']),
|
||||
highlight_diff(obj.ABID.subtype, derived_subtype), highlight_diff(str(obj.ABID.uuid)[26:28], str(fresh_abid.uuid)[26:28]), mark_safe(subtype_diff), obj.abid_subtype_src, str(obj.abid_values['subtype']),
|
||||
highlight_diff(obj.ABID.rand, derived_rand), highlight_diff(str(obj.ABID.uuid)[28:36], str(fresh_abid.uuid)[28:36]), mark_safe(rand_diff), obj.abid_rand_src, str(obj.abid_values['rand'])[-7:],
|
||||
highlight_diff(getattr(obj, 'old_id', ''), obj.pk),
|
||||
)
|
||||
except Exception as e:
|
||||
return str(e)
|
||||
|
|
|
@ -352,7 +352,7 @@ class SnapshotActionForm(ActionForm):
|
|||
@admin.register(Snapshot, site=archivebox_admin)
|
||||
class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
|
||||
list_display = ('added', 'title_str', 'files', 'size', 'url_str')
|
||||
sort_fields = ('title_str', 'url_str', 'added', 'files')
|
||||
sort_fields = ('title_str', 'url_str', 'added')
|
||||
readonly_fields = ('tags_str', 'timestamp', 'admin_actions', 'status_info', 'bookmarked', 'added', 'updated', 'created', 'modified', 'API', 'link_dir')
|
||||
search_fields = ('id', 'url', 'abid', 'old_id', 'timestamp', 'title', 'tags__name')
|
||||
list_filter = ('added', 'updated', 'archiveresult__status', 'created_by', 'tags__name')
|
||||
|
@ -510,6 +510,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
|
|||
# ordering='archiveresult_count',
|
||||
)
|
||||
def files(self, obj):
|
||||
# return '-'
|
||||
return snapshot_icons(obj)
|
||||
|
||||
|
||||
|
|
|
@ -118,7 +118,7 @@ def render_django_template(template: str, context: Mapping[str, str]) -> str:
|
|||
|
||||
|
||||
def snapshot_icons(snapshot) -> str:
|
||||
cache_key = f'{snapshot.pk}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons'
|
||||
cache_key = f'result_icons:{snapshot.pk}:{(snapshot.modified or snapshot.created or snapshot.added).timestamp()}'
|
||||
|
||||
def calc_snapshot_icons():
|
||||
from core.models import ArchiveResult
|
||||
|
@ -133,6 +133,7 @@ def snapshot_icons(snapshot) -> str:
|
|||
else:
|
||||
archive_results = snapshot.archiveresult_set.filter(status="succeeded", output__isnull=False)
|
||||
|
||||
# import ipdb; ipdb.set_trace()
|
||||
link = snapshot.as_link()
|
||||
path = link.archive_path
|
||||
canon = link.canonical_outputs()
|
||||
|
@ -197,7 +198,12 @@ def snapshot_icons(snapshot) -> str:
|
|||
# print(((end - start).total_seconds()*1000) // 1, 'ms')
|
||||
return result
|
||||
|
||||
return cache.get_or_set(cache_key, calc_snapshot_icons)
|
||||
# return calc_snapshot_icons()
|
||||
cache_result = cache.get(cache_key)
|
||||
if cache_result:
|
||||
return cache_result
|
||||
|
||||
fresh_result = calc_snapshot_icons()
|
||||
cache.set(cache_key, fresh_result, timeout=60 * 60 * 24)
|
||||
return fresh_result
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue