mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-26 14:10:20 +00:00
feat: Use prefetch related to reduce the number of queries to the database on public index view
This commit is contained in:
parent
8cfad64271
commit
c565fad75c
2 changed files with 27 additions and 57 deletions
|
@ -1,15 +1,16 @@
|
||||||
from django.utils.html import format_html
|
from django.utils.html import format_html
|
||||||
|
|
||||||
from core.models import Snapshot, EXTRACTORS
|
from core.models import Snapshot, EXTRACTORS
|
||||||
|
from core.settings import DEBUG
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
def get_icons(snapshot: Snapshot) -> str:
|
def get_icons(snapshot: Snapshot) -> str:
|
||||||
archive_results = snapshot.archiveresult_set
|
archive_results = list(snapshot.archiveresult_set.all())
|
||||||
link = snapshot.as_link()
|
link = snapshot.as_link()
|
||||||
canon = link.canonical_outputs()
|
canon = link.canonical_outputs()
|
||||||
output = ""
|
output = ""
|
||||||
output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{} </a>'
|
output_template = '<a href="/{}/{}" class="exists-True" title="{}">{} </a>'
|
||||||
icons = {
|
icons = {
|
||||||
"singlefile": "❶",
|
"singlefile": "❶",
|
||||||
"wget": "🆆",
|
"wget": "🆆",
|
||||||
|
@ -27,62 +28,30 @@ def get_icons(snapshot: Snapshot) -> str:
|
||||||
# Missing specific entry for WARC
|
# Missing specific entry for WARC
|
||||||
|
|
||||||
for extractor, _ in EXTRACTORS:
|
for extractor, _ in EXTRACTORS:
|
||||||
result = archive_results.filter(extractor=extractor, status="succeeded")
|
for result in archive_results:
|
||||||
path, exists = link.archive_path, result.exists()
|
if result.extractor != extractor or result.status != "succeeded":
|
||||||
try:
|
continue
|
||||||
if extractor not in exclude:
|
path = link.archive_path
|
||||||
output += output_template.format(path, canon[f"{extractor}_path"],
|
try:
|
||||||
exists, extractor, icons.get(extractor, "?"))
|
if extractor not in exclude:
|
||||||
if extractor == "wget":
|
output += output_template.format(path, canon[f"{extractor}_path"],
|
||||||
# warc isn't technically it's own extractor, so we have to add it after wget
|
extractor, icons.get(extractor, "?"))
|
||||||
exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
|
if extractor == "wget":
|
||||||
if exists:
|
# warc isn't technically it's own extractor, so we have to add it after wget
|
||||||
output += output_template.format(exists[0], "",
|
exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
|
||||||
True, "warc", icons.get("warc", "?"))
|
if exists:
|
||||||
|
output += output_template.format(exists[0], "",
|
||||||
|
"warc", icons.get("warc", "?"))
|
||||||
|
|
||||||
if extractor == "archive_org" and exists:
|
if extractor == "archive_org":
|
||||||
# The check for archive_org is different, so it has to be handled separately
|
# The check for archive_org is different, so it has to be handled separately
|
||||||
target_path = Path(path) / "archive.org.txt"
|
target_path = Path(path) / "archive.org.txt"
|
||||||
exists = target_path.exists()
|
exists = target_path.exists()
|
||||||
if exists:
|
if exists:
|
||||||
output += '<a href="{}" class="exists-{}" title="{}">{} </a>'.format(canon["archive_org_path"],
|
output += '<a href="{}" class="exists-True" title="{}">{} </a>'.format(canon["archive_org_path"],
|
||||||
True, "archive_org", icons.get("archive_org", "?"))
|
"archive_org", icons.get("archive_org", "?"))
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
|
|
||||||
return format_html(f'<span class="files-icons" style="font-size: 1.2em; opacity: 0.8">{output}<span>')
|
return format_html(f'<span class="files-icons" style="font-size: 1.2em; opacity: 0.8">{output}<span>')
|
||||||
|
|
||||||
#def get_icons(snapshot: Snapshot) -> str:
|
|
||||||
# link = snapshot.as_link()
|
|
||||||
# canon = link.canonical_outputs()
|
|
||||||
# out_dir = Path(link.link_dir)
|
|
||||||
#
|
|
||||||
# # slow version: highlights icons based on whether files exist or not for that output
|
|
||||||
# # link_tuple = lambda link, method: (link.archive_path, canon[method] or '', canon[method] and (out_dir / (canon[method] or 'notdone')).exists())
|
|
||||||
# # fast version: all icons are highlighted without checking for outputs in filesystem
|
|
||||||
# link_tuple = lambda link, method: (link.archive_path, canon[method] or '', canon[method] and (out_dir / (canon[method] or 'notdone')).exists())
|
|
||||||
#
|
|
||||||
# return format_html(
|
|
||||||
# '<span class="files-icons" style="font-size: 1.2em; opacity: 0.8">'
|
|
||||||
# '<a href="/{}/{}" class="exists-{}" title="SingleFile">❶ </a>'
|
|
||||||
# '<a href="/{}/{}" class="exists-{}" title="Wget clone">🆆 </a> '
|
|
||||||
# '<a href="/{}/{}" class="exists-{}" title="HTML dump">🅷 </a> '
|
|
||||||
# '<a href="/{}/{}" class="exists-{}" title="PDF">📄 </a> '
|
|
||||||
# '<a href="/{}/{}" class="exists-{}" title="Screenshot">💻 </a> '
|
|
||||||
# '<a href="/{}/{}" class="exists-{}" title="WARC">📦 </a> '
|
|
||||||
# '<a href="/{}/{}/" class="exists-{}" title="Media files">📼 </a> '
|
|
||||||
# '<a href="/{}/{}/" class="exists-{}" title="Git repos">🅶 </a> '
|
|
||||||
# '<a href="{}" class="exists-{}" title="Archive.org snapshot">🏛 </a> '
|
|
||||||
# '</span>',
|
|
||||||
# *link_tuple(link, 'singlefile_path'),
|
|
||||||
# *link_tuple(link, 'wget_path')[:2], any((out_dir / link.domain).glob('*')),
|
|
||||||
# *link_tuple(link, 'pdf_path'),
|
|
||||||
# *link_tuple(link, 'screenshot_path'),
|
|
||||||
# *link_tuple(link, 'dom_path'),
|
|
||||||
# *link_tuple(link, 'warc_path')[:2], any((out_dir / canon['warc_path']).glob('*.warc.gz')),
|
|
||||||
# *link_tuple(link, 'media_path')[:2], any((out_dir / canon['media_path']).glob('*')),
|
|
||||||
# *link_tuple(link, 'git_path')[:2], any((out_dir / canon['git_path']).glob('*')),
|
|
||||||
# canon['archive_org_path'], (out_dir / 'archive.org.txt').exists(),
|
|
||||||
# )
|
|
||||||
#
|
|
||||||
|
|
|
@ -98,6 +98,7 @@ class PublicArchiveView(ListView):
|
||||||
query = self.request.GET.get('q')
|
query = self.request.GET.get('q')
|
||||||
if query:
|
if query:
|
||||||
qs = Snapshot.objects.filter(title__icontains=query)
|
qs = Snapshot.objects.filter(title__icontains=query)
|
||||||
|
qs = qs.prefetch_related("archiveresult_set").all()
|
||||||
for snapshot in qs:
|
for snapshot in qs:
|
||||||
snapshot.icons = get_icons(snapshot)
|
snapshot.icons = get_icons(snapshot)
|
||||||
return qs
|
return qs
|
||||||
|
|
Loading…
Reference in a new issue