mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-10 06:34:16 +00:00
fix ABID generation consistency when self._state.adding is True
This commit is contained in:
parent
9d2116ad9a
commit
4ae186dfca
4 changed files with 91 additions and 33 deletions
|
@ -1,7 +1,7 @@
|
|||
"""
|
||||
This file provides the Django ABIDField and ABIDModel base model to inherit from.
|
||||
|
||||
It implements the ArchiveBox ID (ABID) interfaces including abid_values, get_abid, .abid, .uuid, .id.
|
||||
It implements the ArchiveBox ID (ABID) interfaces including abid_values, generate_abid, .abid, .uuid, .id.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, Union, List, Set, NamedTuple, cast
|
||||
|
@ -82,14 +82,17 @@ class ABIDModel(models.Model):
|
|||
abstract = True
|
||||
|
||||
def save(self, *args: Any, **kwargs: Any) -> None:
|
||||
if hasattr(self, 'abid'):
|
||||
# self.abid = ABID.parse(self.abid) if self.abid else self.get_abid()
|
||||
self.abid = self.get_abid()
|
||||
else:
|
||||
print(f'[!] WARNING: {self.__class__.__name__}.abid is not a DB field so ABID will not be persisted!')
|
||||
self.abid = self.get_abid()
|
||||
|
||||
# when first creating a row, self.ABID is the source of truth
|
||||
# overwrite default prefilled self.id & self.abid with generated self.ABID value
|
||||
if self._state.adding or not self.id:
|
||||
self.id = self.ABID.uuid
|
||||
if self._state.adding or not self.abid:
|
||||
self.abid = str(self.ABID)
|
||||
|
||||
super().save(*args, **kwargs)
|
||||
assert str(self.id) == str(self.ABID.uuid), f'self.id {self.id} does not match self.ABID {self.ABID.uuid}'
|
||||
assert str(self.abid) == str(self.ABID), f'self.abid {self.id} does not match self.ABID {self.ABID.uuid}'
|
||||
|
||||
|
||||
@property
|
||||
def abid_values(self) -> Dict[str, Any]:
|
||||
|
@ -101,7 +104,7 @@ class ABIDModel(models.Model):
|
|||
'rand': eval(self.abid_rand_src),
|
||||
}
|
||||
|
||||
def get_abid(self) -> ABID:
|
||||
def generate_abid(self) -> ABID:
|
||||
"""
|
||||
Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src).
|
||||
"""
|
||||
|
@ -143,7 +146,30 @@ class ABIDModel(models.Model):
|
|||
"""
|
||||
ULIDParts(timestamp='01HX9FPYTR', url='E4A5CCD9', subtype='00', randomness='ZYEBQE')
|
||||
"""
|
||||
return ABID.parse(self.abid) if getattr(self, 'abid', None) else self.get_abid()
|
||||
abid = None
|
||||
try:
|
||||
abid = abid or ABID.parse(self.pk)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
abid = abid or ABID.parse(self.id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
abid = abid or ABID.parse(self.uuid)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
abid = abid or ABID.parse(self.abid)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
abid = abid or self.generate_abid()
|
||||
|
||||
return abid
|
||||
|
||||
@property
|
||||
def ULID(self) -> ULID:
|
||||
|
@ -276,7 +302,7 @@ def find_obj_from_abid_rand(rand: Union[ABID, str], model=None) -> List[ABIDMode
|
|||
)
|
||||
|
||||
for obj in qs:
|
||||
if obj.get_abid() == abid:
|
||||
if obj.generate_abid() == abid:
|
||||
# found exact match, no need to keep iterating
|
||||
return [obj]
|
||||
partial_matches.append(obj)
|
||||
|
|
|
@ -55,11 +55,9 @@ class APIToken(ABIDModel):
|
|||
def __json__(self) -> dict:
|
||||
return {
|
||||
"TYPE": "APIToken",
|
||||
"uuid": str(self.id),
|
||||
"ulid": str(self.ulid),
|
||||
"abid": str(self.get_abid()),
|
||||
"user_id": str(self.user.id),
|
||||
"user_username": self.user.username,
|
||||
"id": str(self.pk),
|
||||
"abid": str(self.ABID),
|
||||
"created_by_id": str(self.created_by_id),
|
||||
"token": self.token,
|
||||
"created": self.created.isoformat(),
|
||||
"expires": self.expires_as_iso8601,
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
__package__ = 'archivebox.core'
|
||||
|
||||
import os
|
||||
import json
|
||||
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
from contextlib import redirect_stdout
|
||||
|
@ -197,28 +199,29 @@ def get_abid_info(self, obj):
|
|||
<a href="{}" style="font-size: 16px; font-family: monospace; user-select: all; border-radius: 8px; background-color: #ddf; padding: 3px 5px; border: 1px solid #aaa; margin-bottom: 8px; display: inline-block; vertical-align: top;">{}</a> <a href="{}" style="color: limegreen; font-size: 0.9em; vertical-align: 1px; font-family: monospace;">📖 API DOCS</a>
|
||||
<br/><hr/>
|
||||
<div style="opacity: 0.8">
|
||||
TS: <code style="font-size: 10px; user-select: all"><b>{}</b></code> ({})<br/>
|
||||
URI: <code style="font-size: 10px; user-select: all"><b>{}</b></code> (<span style="display:inline-block; vertical-align: -4px; user-select: all; width: 230px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{}</span>)<br/>
|
||||
SUBTYPE: <code style="font-size: 10px; user-select: all"><b>{}</b></code> ({})
|
||||
RAND: <code style="font-size: 10px; user-select: all"><b>{}</b></code> ({})
|
||||
SALT: <code style="font-size: 10px; user-select: all"><b style="display:inline-block; user-select: all; width: 50px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{}</b></code>
|
||||
<br/><hr/>
|
||||
<small style="opacity: 0.8">.abid: <code style="font-size: 10px; user-select: all">{}</code></small><br/>
|
||||
<small style="opacity: 0.8">.abid.uuid: <code style="font-size: 10px; user-select: all">{}</code></small><br/>
|
||||
<small style="opacity: 0.8">.id: <code style="font-size: 10px; user-select: all">{}</code></small><br/>
|
||||
<hr/>
|
||||
TS: <code style="font-size: 10px;"><b style="user-select: all">{}</b> {}</code> {}: <code style="user-select: all">{}</code><br/>
|
||||
URI: <code style="font-size: 10px; "><b style="user-select: all">{}</b> {}</code> <span style="display:inline-block; vertical-align: -4px; width: 290px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{}: <code style="user-select: all">{}</code></span>
|
||||
SALT: <code style="font-size: 10px;"><b style="display:inline-block; user-select: all; width: 50px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{}</b></code><br/>
|
||||
SUBTYPE: <code style="font-size: 10px;"><b style="user-select: all">{}</b> {}</code> {}: <code style="user-select: all">{}</code><br/>
|
||||
RAND: <code style="font-size: 10px;"><b style="user-select: all">{}</b> {}</code> {}: <code style="user-select: all">{}</code>
|
||||
<br/><hr/>
|
||||
<small style="opacity: 0.5">.old_id: <code style="font-size: 10px; user-select: all">{}</code></small><br/>
|
||||
</div>
|
||||
''',
|
||||
obj.api_url, obj.api_url, obj.api_docs_url,
|
||||
obj.ABID.ts, obj.abid_values['ts'].isoformat() if isinstance(obj.abid_values['ts'], datetime) else obj.abid_values['ts'],
|
||||
obj.ABID.uri, str(obj.abid_values['uri']),
|
||||
obj.ABID.subtype, str(obj.abid_values['subtype']),
|
||||
obj.ABID.rand, str(obj.abid_values['rand'])[-7:],
|
||||
obj.ABID.uri_salt,
|
||||
str(obj.abid),
|
||||
str(obj.ABID.uuid),
|
||||
obj.id,
|
||||
getattr(obj, 'old_id', ''),
|
||||
str(obj.id),
|
||||
obj.ABID.ts, str(obj.ABID.uuid)[0:14], obj.abid_ts_src, obj.abid_values['ts'].isoformat() if isinstance(obj.abid_values['ts'], datetime) else obj.abid_values['ts'],
|
||||
obj.ABID.uri, str(obj.ABID.uuid)[14:26], obj.abid_uri_src, str(obj.abid_values['uri']),
|
||||
obj.ABID.uri_salt,
|
||||
obj.ABID.subtype, str(obj.ABID.uuid)[26:28], obj.abid_subtype_src, str(obj.abid_values['subtype']),
|
||||
obj.ABID.rand, str(obj.ABID.uuid)[28:36], obj.abid_rand_src, str(obj.abid_values['rand'])[-7:],
|
||||
str(getattr(obj, 'old_id', '')),
|
||||
)
|
||||
|
||||
|
||||
|
@ -568,9 +571,9 @@ class TagAdmin(admin.ModelAdmin):
|
|||
class ArchiveResultAdmin(admin.ModelAdmin):
|
||||
list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str')
|
||||
sort_fields = ('start_ts', 'extractor', 'status')
|
||||
readonly_fields = ('snapshot_info', 'tags_str', 'created', 'modified', 'API')
|
||||
readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created', 'modified', 'API', 'output_summary')
|
||||
search_fields = ('id', 'old_id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
|
||||
fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'cmd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', *readonly_fields)
|
||||
fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', 'cmd', *readonly_fields)
|
||||
autocomplete_fields = ['snapshot']
|
||||
|
||||
list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
|
||||
|
@ -593,6 +596,7 @@ class ArchiveResultAdmin(admin.ModelAdmin):
|
|||
try:
|
||||
return get_abid_info(self, obj)
|
||||
except Exception as e:
|
||||
raise e
|
||||
return str(e)
|
||||
|
||||
@admin.display(
|
||||
|
@ -606,7 +610,7 @@ class ArchiveResultAdmin(admin.ModelAdmin):
|
|||
'<pre>{}</pre>',
|
||||
' '.join(result.cmd) if isinstance(result.cmd, list) else str(result.cmd),
|
||||
)
|
||||
|
||||
|
||||
def output_str(self, result):
|
||||
return format_html(
|
||||
'<a href="/archive/{}/{}" class="output-link">↗️</a><pre>{}</pre>',
|
||||
|
@ -614,3 +618,33 @@ class ArchiveResultAdmin(admin.ModelAdmin):
|
|||
result.output if (result.status == 'succeeded') and result.extractor not in ('title', 'archive_org') else 'index.html',
|
||||
result.output,
|
||||
)
|
||||
|
||||
def output_summary(self, result):
|
||||
snapshot_dir = Path(OUTPUT_DIR) / str(result.pwd).split('data/', 1)[-1]
|
||||
output_str = format_html(
|
||||
'<pre style="display: inline-block">{}</pre><br/>',
|
||||
result.output,
|
||||
)
|
||||
output_str += format_html('<a href="/archive/{}/index.html#all">See result files ...</a><br/><pre><code>', str(result.snapshot.timestamp))
|
||||
path_from_output_str = (snapshot_dir / result.output)
|
||||
output_str += format_html('<i style="padding: 1px">{}</i><b style="padding-right: 20px">/</b><i>{}</i><br/><hr/>', str(snapshot_dir), str(result.output))
|
||||
if path_from_output_str.exists():
|
||||
root_dir = str(path_from_output_str)
|
||||
else:
|
||||
root_dir = str(snapshot_dir)
|
||||
|
||||
|
||||
# print(root_dir, str(list(os.walk(root_dir))))
|
||||
|
||||
for root, dirs, files in os.walk(root_dir):
|
||||
depth = root.replace(root_dir, '').count(os.sep) + 1
|
||||
if depth > 2:
|
||||
continue
|
||||
indent = ' ' * 4 * (depth)
|
||||
output_str += format_html('<b style="padding: 1px">{}{}/</b><br/>', indent, os.path.basename(root))
|
||||
indentation_str = ' ' * 4 * (depth + 1)
|
||||
for filename in sorted(files):
|
||||
is_hidden = filename.startswith('.')
|
||||
output_str += format_html('<span style="opacity: {}.2">{}{}</span><br/>', int(not is_hidden), indentation_str, filename.strip())
|
||||
|
||||
return output_str + format_html('</code></pre>')
|
||||
|
|
|
@ -372,7 +372,7 @@ class ArchiveResult(ABIDModel):
|
|||
abid_ts_src = 'self.snapshot.added'
|
||||
abid_uri_src = 'self.snapshot.url'
|
||||
abid_subtype_src = 'self.extractor'
|
||||
abid_rand_src = 'self.id'
|
||||
abid_rand_src = 'self.old_id'
|
||||
EXTRACTOR_CHOICES = EXTRACTOR_CHOICES
|
||||
|
||||
old_id = models.BigIntegerField(default=rand_int_id, serialize=False, verbose_name='Old ID')
|
||||
|
|
Loading…
Reference in a new issue