diff --git a/archivebox/core/utils.py b/archivebox/core/utils.py
index 3c310525..9804d6ee 100644
--- a/archivebox/core/utils.py
+++ b/archivebox/core/utils.py
@@ -1,4 +1,5 @@
from django.utils.html import format_html
+from collections import defaultdict
from core.models import Snapshot, EXTRACTORS
from pathlib import Path
@@ -7,9 +8,10 @@ from pathlib import Path
def get_icons(snapshot: Snapshot) -> str:
archive_results = snapshot.archiveresult_set.filter(status="succeeded")
link = snapshot.as_link()
+ path = link.archive_path
canon = link.canonical_outputs()
output = ""
- output_template = '{} '
+ output_template = '{} '
icons = {
"singlefile": "❶",
"wget": "🆆",
@@ -23,34 +25,31 @@ def get_icons(snapshot: Snapshot) -> str:
"mercury": "🅼",
"warc": "📦"
}
- exclude = ["favicon", "archive_org"]
+ exclude = ["favicon", "title", "headers", "archive_org"]
# Missing specific entry for WARC
+ extractor_items = defaultdict(lambda: None)
for extractor, _ in EXTRACTORS:
for result in archive_results:
- if result.extractor != extractor:
- continue
- path = link.archive_path
- try:
- if extractor not in exclude:
- output += output_template.format(path, canon[f"{extractor}_path"],
- extractor, icons.get(extractor, "?"))
- if extractor == "wget":
- # warc isn't technically it's own extractor, so we have to add it after wget
- exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
- if exists:
- output += output_template.format(exists[0], "",
- "warc", icons.get("warc", "?"))
+ if result.extractor == extractor:
+ extractor_items[extractor] = result
- if extractor == "archive_org":
- # The check for archive_org is different, so it has to be handled separately
- target_path = Path(path) / "archive.org.txt"
- exists = target_path.exists()
- if exists:
- output += '{} '.format(canon["archive_org_path"],
- "archive_org", icons.get("archive_org", "?"))
+ for extractor, _ in EXTRACTORS:
+ if extractor not in exclude:
+ exists = extractor_items[extractor] is not None
+ output += output_template.format(path, canon[f"{extractor}_path"], str(exists),
+ extractor, icons.get(extractor, "?"))
+ if extractor == "wget":
+ # warc isn't technically it's own extractor, so we have to add it after wget
+ exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
+ if exists:
+ output += output_template.format(exists[0], "", str(bool(exists)), "warc", icons.get("warc", "?"))
- except Exception as e:
- print(e)
+ if extractor == "archive_org":
+ # The check for archive_org is different, so it has to be handled separately
+ target_path = Path(path) / "archive.org.txt"
+ exists = target_path.exists()
+ output += '{} '.format(canon["archive_org_path"], str(exists),
+ "archive_org", icons.get("archive_org", "?"))
return format_html(f'{output}')
diff --git a/archivebox/themes/default/base.html b/archivebox/themes/default/base.html
index f778da16..77d912d5 100644
--- a/archivebox/themes/default/base.html
+++ b/archivebox/themes/default/base.html
@@ -226,6 +226,7 @@
.exists-False {
opacity: 0.1;
+ pointer-events: none;
}