diff --git a/archivebox/core/utils.py b/archivebox/core/utils.py
index 6266024b..14c40eaf 100644
--- a/archivebox/core/utils.py
+++ b/archivebox/core/utils.py
@@ -1,15 +1,16 @@
from django.utils.html import format_html
from core.models import Snapshot, EXTRACTORS
+from core.settings import DEBUG
from pathlib import Path
def get_icons(snapshot: Snapshot) -> str:
- archive_results = snapshot.archiveresult_set
+ archive_results = list(snapshot.archiveresult_set.all())
link = snapshot.as_link()
canon = link.canonical_outputs()
output = ""
- output_template = '{} '
+ output_template = '{} '
icons = {
"singlefile": "❶",
"wget": "🆆",
@@ -27,62 +28,30 @@ def get_icons(snapshot: Snapshot) -> str:
# Missing specific entry for WARC
for extractor, _ in EXTRACTORS:
- result = archive_results.filter(extractor=extractor, status="succeeded")
- path, exists = link.archive_path, result.exists()
- try:
- if extractor not in exclude:
- output += output_template.format(path, canon[f"{extractor}_path"],
- exists, extractor, icons.get(extractor, "?"))
- if extractor == "wget":
- # warc isn't technically it's own extractor, so we have to add it after wget
- exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
- if exists:
- output += output_template.format(exists[0], "",
- True, "warc", icons.get("warc", "?"))
+ for result in archive_results:
+ if result.extractor != extractor or result.status != "succeeded":
+ continue
+ path = link.archive_path
+ try:
+ if extractor not in exclude:
+ output += output_template.format(path, canon[f"{extractor}_path"],
+ extractor, icons.get(extractor, "?"))
+ if extractor == "wget":
+ # warc isn't technically it's own extractor, so we have to add it after wget
+ exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
+ if exists:
+ output += output_template.format(exists[0], "",
+ "warc", icons.get("warc", "?"))
- if extractor == "archive_org" and exists:
- # The check for archive_org is different, so it has to be handled separately
- target_path = Path(path) / "archive.org.txt"
- exists = target_path.exists()
- if exists:
- output += '{} '.format(canon["archive_org_path"],
- True, "archive_org", icons.get("archive_org", "?"))
+ if extractor == "archive_org":
+ # The check for archive_org is different, so it has to be handled separately
+ target_path = Path(path) / "archive.org.txt"
+ exists = target_path.exists()
+ if exists:
+ output += '{} '.format(canon["archive_org_path"],
+ "archive_org", icons.get("archive_org", "?"))
- except Exception as e:
- print(e)
+ except Exception as e:
+ print(e)
return format_html(f'{output}')
-
-#def get_icons(snapshot: Snapshot) -> str:
-# link = snapshot.as_link()
-# canon = link.canonical_outputs()
-# out_dir = Path(link.link_dir)
-#
-# # slow version: highlights icons based on whether files exist or not for that output
-# # link_tuple = lambda link, method: (link.archive_path, canon[method] or '', canon[method] and (out_dir / (canon[method] or 'notdone')).exists())
-# # fast version: all icons are highlighted without checking for outputs in filesystem
-# link_tuple = lambda link, method: (link.archive_path, canon[method] or '', canon[method] and (out_dir / (canon[method] or 'notdone')).exists())
-#
-# return format_html(
-# ''
-# '❶ '
-# '🆆 '
-# '🅷 '
-# '📄 '
-# '💻 '
-# '📦 '
-# '📼 '
-# '🅶 '
-# '🏛 '
-# '',
-# *link_tuple(link, 'singlefile_path'),
-# *link_tuple(link, 'wget_path')[:2], any((out_dir / link.domain).glob('*')),
-# *link_tuple(link, 'pdf_path'),
-# *link_tuple(link, 'screenshot_path'),
-# *link_tuple(link, 'dom_path'),
-# *link_tuple(link, 'warc_path')[:2], any((out_dir / canon['warc_path']).glob('*.warc.gz')),
-# *link_tuple(link, 'media_path')[:2], any((out_dir / canon['media_path']).glob('*')),
-# *link_tuple(link, 'git_path')[:2], any((out_dir / canon['git_path']).glob('*')),
-# canon['archive_org_path'], (out_dir / 'archive.org.txt').exists(),
-# )
-#
diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index 7cd8b104..ee540821 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -98,6 +98,7 @@ class PublicArchiveView(ListView):
query = self.request.GET.get('q')
if query:
qs = Snapshot.objects.filter(title__icontains=query)
+ qs = qs.prefetch_related("archiveresult_set").all()
for snapshot in qs:
snapshot.icons = get_icons(snapshot)
return qs