fix ABID generation consistency when self._state.adding is True

This commit is contained in:
Nick Sweeting 2024-08-20 05:56:19 -07:00
parent 9d2116ad9a
commit 4ae186dfca
No known key found for this signature in database
4 changed files with 91 additions and 33 deletions

View file

@ -1,7 +1,7 @@
"""
This file provides the Django ABIDField and ABIDModel base model to inherit from.
It implements the ArchiveBox ID (ABID) interfaces including abid_values, get_abid, .abid, .uuid, .id.
It implements the ArchiveBox ID (ABID) interfaces including abid_values, generate_abid, .abid, .uuid, .id.
"""
from typing import Any, Dict, Union, List, Set, NamedTuple, cast
@ -82,14 +82,17 @@ class ABIDModel(models.Model):
abstract = True
def save(self, *args: Any, **kwargs: Any) -> None:
if hasattr(self, 'abid'):
# self.abid = ABID.parse(self.abid) if self.abid else self.get_abid()
self.abid = self.get_abid()
else:
print(f'[!] WARNING: {self.__class__.__name__}.abid is not a DB field so ABID will not be persisted!')
self.abid = self.get_abid()
# when first creating a row, self.ABID is the source of truth
# overwrite default prefilled self.id & self.abid with generated self.ABID value
if self._state.adding or not self.id:
self.id = self.ABID.uuid
if self._state.adding or not self.abid:
self.abid = str(self.ABID)
super().save(*args, **kwargs)
assert str(self.id) == str(self.ABID.uuid), f'self.id {self.id} does not match self.ABID {self.ABID.uuid}'
assert str(self.abid) == str(self.ABID), f'self.abid {self.id} does not match self.ABID {self.ABID.uuid}'
@property
def abid_values(self) -> Dict[str, Any]:
@ -101,7 +104,7 @@ class ABIDModel(models.Model):
'rand': eval(self.abid_rand_src),
}
def get_abid(self) -> ABID:
def generate_abid(self) -> ABID:
"""
Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
"""
@ -143,7 +146,30 @@ class ABIDModel(models.Model):
"""
ULIDParts(timestamp='01HX9FPYTR', url='E4A5CCD9', subtype='00', randomness='ZYEBQE')
"""
return ABID.parse(self.abid) if getattr(self, 'abid', None) else self.get_abid()
abid = None
try:
abid = abid or ABID.parse(self.pk)
except Exception:
pass
try:
abid = abid or ABID.parse(self.id)
except Exception:
pass
try:
abid = abid or ABID.parse(self.uuid)
except Exception:
pass
try:
abid = abid or ABID.parse(self.abid)
except Exception:
pass
abid = abid or self.generate_abid()
return abid
@property
def ULID(self) -> ULID:
@ -276,7 +302,7 @@ def find_obj_from_abid_rand(rand: Union[ABID, str], model=None) -> List[ABIDMode
)
for obj in qs:
if obj.get_abid() == abid:
if obj.generate_abid() == abid:
# found exact match, no need to keep iterating
return [obj]
partial_matches.append(obj)

View file

@ -55,11 +55,9 @@ class APIToken(ABIDModel):
def __json__(self) -> dict:
return {
"TYPE": "APIToken",
"uuid": str(self.id),
"ulid": str(self.ulid),
"abid": str(self.get_abid()),
"user_id": str(self.user.id),
"user_username": self.user.username,
"id": str(self.pk),
"abid": str(self.ABID),
"created_by_id": str(self.created_by_id),
"token": self.token,
"created": self.created.isoformat(),
"expires": self.expires_as_iso8601,

View file

@ -1,6 +1,8 @@
__package__ = 'archivebox.core'
import os
import json
from io import StringIO
from pathlib import Path
from contextlib import redirect_stdout
@ -197,28 +199,29 @@ def get_abid_info(self, obj):
<a href="{}" style="font-size: 16px; font-family: monospace; user-select: all; border-radius: 8px; background-color: #ddf; padding: 3px 5px; border: 1px solid #aaa; margin-bottom: 8px; display: inline-block; vertical-align: top;">{}</a> &nbsp; &nbsp; <a href="{}" style="color: limegreen; font-size: 0.9em; vertical-align: 1px; font-family: monospace;">📖 API DOCS</a>
<br/><hr/>
<div style="opacity: 0.8">
&nbsp; &nbsp; TS: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<code style="font-size: 10px; user-select: all"><b>{}</b></code> &nbsp; &nbsp; &nbsp;&nbsp; ({})<br/>
&nbsp; &nbsp; URI: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px; user-select: all"><b>{}</b></code> &nbsp;&nbsp; &nbsp; &nbsp; &nbsp;&nbsp; (<span style="display:inline-block; vertical-align: -4px; user-select: all; width: 230px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{}</span>)<br/>
&nbsp; &nbsp; SUBTYPE: &nbsp; &nbsp; &nbsp; <code style="font-size: 10px; user-select: all"><b>{}</b></code> ({}) &nbsp; &nbsp;
&nbsp; RAND: &nbsp; <code style="font-size: 10px; user-select: all"><b>{}</b></code> ({}) &nbsp; &nbsp;
&nbsp; SALT: &nbsp; <code style="font-size: 10px; user-select: all"><b style="display:inline-block; user-select: all; width: 50px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{}</b></code>
<br/><hr/>
&nbsp; &nbsp; <small style="opacity: 0.8">.abid: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px; user-select: all">{}</code></small><br/>
&nbsp; &nbsp; <small style="opacity: 0.8">.abid.uuid: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px; user-select: all">{}</code></small><br/>
&nbsp; &nbsp; <small style="opacity: 0.8">.id: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<code style="font-size: 10px; user-select: all">{}</code></small><br/>
<hr/>
&nbsp; &nbsp; TS: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<code style="font-size: 10px;"><b style="user-select: all">{}</b> &nbsp; {}</code> &nbsp; &nbsp; &nbsp;&nbsp; {}: <code style="user-select: all">{}</code><br/>
&nbsp; &nbsp; URI: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px; "><b style="user-select: all">{}</b> &nbsp; &nbsp; {}</code> &nbsp;&nbsp; &nbsp; &nbsp; &nbsp;&nbsp; <span style="display:inline-block; vertical-align: -4px; width: 290px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{}: <code style="user-select: all">{}</code></span>
&nbsp; SALT: &nbsp; <code style="font-size: 10px;"><b style="display:inline-block; user-select: all; width: 50px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{}</b></code><br/>
&nbsp; &nbsp; SUBTYPE: &nbsp; &nbsp; &nbsp; <code style="font-size: 10px;"><b style="user-select: all">{}</b> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; {}</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; {}: <code style="user-select: all">{}</code><br/>
&nbsp; &nbsp; RAND: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px;"><b style="user-select: all">{}</b> &nbsp; &nbsp; &nbsp; {}</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; {}: <code style="user-select: all">{}</code>
<br/><hr/>
&nbsp; &nbsp; <small style="opacity: 0.5">.old_id: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<code style="font-size: 10px; user-select: all">{}</code></small><br/>
</div>
''',
obj.api_url, obj.api_url, obj.api_docs_url,
obj.ABID.ts, obj.abid_values['ts'].isoformat() if isinstance(obj.abid_values['ts'], datetime) else obj.abid_values['ts'],
obj.ABID.uri, str(obj.abid_values['uri']),
obj.ABID.subtype, str(obj.abid_values['subtype']),
obj.ABID.rand, str(obj.abid_values['rand'])[-7:],
obj.ABID.uri_salt,
str(obj.abid),
str(obj.ABID.uuid),
obj.id,
getattr(obj, 'old_id', ''),
str(obj.id),
obj.ABID.ts, str(obj.ABID.uuid)[0:14], obj.abid_ts_src, obj.abid_values['ts'].isoformat() if isinstance(obj.abid_values['ts'], datetime) else obj.abid_values['ts'],
obj.ABID.uri, str(obj.ABID.uuid)[14:26], obj.abid_uri_src, str(obj.abid_values['uri']),
obj.ABID.uri_salt,
obj.ABID.subtype, str(obj.ABID.uuid)[26:28], obj.abid_subtype_src, str(obj.abid_values['subtype']),
obj.ABID.rand, str(obj.ABID.uuid)[28:36], obj.abid_rand_src, str(obj.abid_values['rand'])[-7:],
str(getattr(obj, 'old_id', '')),
)
@ -568,9 +571,9 @@ class TagAdmin(admin.ModelAdmin):
class ArchiveResultAdmin(admin.ModelAdmin):
list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str')
sort_fields = ('start_ts', 'extractor', 'status')
readonly_fields = ('snapshot_info', 'tags_str', 'created', 'modified', 'API')
readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created', 'modified', 'API', 'output_summary')
search_fields = ('id', 'old_id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'cmd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', *readonly_fields)
fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', 'cmd', *readonly_fields)
autocomplete_fields = ['snapshot']
list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
@ -593,6 +596,7 @@ class ArchiveResultAdmin(admin.ModelAdmin):
try:
return get_abid_info(self, obj)
except Exception as e:
raise e
return str(e)
@admin.display(
@ -606,7 +610,7 @@ class ArchiveResultAdmin(admin.ModelAdmin):
'<pre>{}</pre>',
' '.join(result.cmd) if isinstance(result.cmd, list) else str(result.cmd),
)
def output_str(self, result):
return format_html(
'<a href="/archive/{}/{}" class="output-link">↗️</a><pre>{}</pre>',
@ -614,3 +618,33 @@ class ArchiveResultAdmin(admin.ModelAdmin):
result.output if (result.status == 'succeeded') and result.extractor not in ('title', 'archive_org') else 'index.html',
result.output,
)
def output_summary(self, result):
snapshot_dir = Path(OUTPUT_DIR) / str(result.pwd).split('data/', 1)[-1]
output_str = format_html(
'<pre style="display: inline-block">{}</pre><br/>',
result.output,
)
output_str += format_html('<a href="/archive/{}/index.html#all">See result files ...</a><br/><pre><code>', str(result.snapshot.timestamp))
path_from_output_str = (snapshot_dir / result.output)
output_str += format_html('<i style="padding: 1px">{}</i><b style="padding-right: 20px">/</b><i>{}</i><br/><hr/>', str(snapshot_dir), str(result.output))
if path_from_output_str.exists():
root_dir = str(path_from_output_str)
else:
root_dir = str(snapshot_dir)
# print(root_dir, str(list(os.walk(root_dir))))
for root, dirs, files in os.walk(root_dir):
depth = root.replace(root_dir, '').count(os.sep) + 1
if depth > 2:
continue
indent = ' ' * 4 * (depth)
output_str += format_html('<b style="padding: 1px">{}{}/</b><br/>', indent, os.path.basename(root))
indentation_str = ' ' * 4 * (depth + 1)
for filename in sorted(files):
is_hidden = filename.startswith('.')
output_str += format_html('<span style="opacity: {}.2">{}{}</span><br/>', int(not is_hidden), indentation_str, filename.strip())
return output_str + format_html('</code></pre>')

View file

@ -372,7 +372,7 @@ class ArchiveResult(ABIDModel):
abid_ts_src = 'self.snapshot.added'
abid_uri_src = 'self.snapshot.url'
abid_subtype_src = 'self.extractor'
abid_rand_src = 'self.id'
abid_rand_src = 'self.old_id'
EXTRACTOR_CHOICES = EXTRACTOR_CHOICES
old_id = models.BigIntegerField(default=rand_int_id, serialize=False, verbose_name='Old ID')