mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-23 04:33:11 +00:00
use the db exclusively for icons instead of hammering filesystem
This commit is contained in:
parent
923f517a8f
commit
534ead2440
1 changed files with 21 additions and 12 deletions
|
@ -150,24 +150,33 @@ def snapshot_icons(snapshot) -> str:
|
||||||
|
|
||||||
for extractor, _ in EXTRACTORS:
|
for extractor, _ in EXTRACTORS:
|
||||||
if extractor not in exclude:
|
if extractor not in exclude:
|
||||||
outpath = extractor_outputs[extractor] and extractor_outputs[extractor].output
|
existing = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
|
||||||
if outpath:
|
# Check filesystsem to see if anything is actually present (too slow, needs optimization/caching)
|
||||||
outpath = (Path(path) / outpath)
|
# if existing:
|
||||||
if outpath.is_file():
|
# existing = (Path(path) / existing)
|
||||||
exists = True
|
# if existing.is_file():
|
||||||
elif outpath.is_dir():
|
# existing = True
|
||||||
exists = any(outpath.glob('*.*'))
|
# elif existing.is_dir():
|
||||||
output += format_html(output_template, path, canon[f"{extractor}_path"], str(bool(outpath)),
|
# existing = any(existing.glob('*.*'))
|
||||||
|
output += format_html(output_template, path, canon[f"{extractor}_path"], str(bool(existing)),
|
||||||
extractor, icons.get(extractor, "?"))
|
extractor, icons.get(extractor, "?"))
|
||||||
if extractor == "wget":
|
if extractor == "wget":
|
||||||
# warc isn't technically it's own extractor, so we have to add it after wget
|
# warc isn't technically it's own extractor, so we have to add it after wget
|
||||||
exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
|
|
||||||
output += format_html(output_template, exists[0] if exists else '#', canon["warc_path"], str(bool(exists)), "warc", icons.get("warc", "?"))
|
# get from db (faster but less thurthful)
|
||||||
|
exists = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
|
||||||
|
# get from filesystem (slower but more accurate)
|
||||||
|
# exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
|
||||||
|
output += format_html(output_template, 'warc/', canon["warc_path"], str(bool(exists)), "warc", icons.get("warc", "?"))
|
||||||
|
|
||||||
if extractor == "archive_org":
|
if extractor == "archive_org":
|
||||||
# The check for archive_org is different, so it has to be handled separately
|
# The check for archive_org is different, so it has to be handled separately
|
||||||
target_path = Path(path) / "archive.org.txt"
|
|
||||||
exists = target_path.exists()
|
# get from db (faster)
|
||||||
|
exists = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
|
||||||
|
# get from filesystem (slower)
|
||||||
|
# target_path = Path(path) / "archive.org.txt"
|
||||||
|
# exists = target_path.exists()
|
||||||
output += '<a href="{}" class="exists-{}" title="{}">{}</a> '.format(canon["archive_org_path"], str(exists),
|
output += '<a href="{}" class="exists-{}" title="{}">{}</a> '.format(canon["archive_org_path"], str(exists),
|
||||||
"archive_org", icons.get("archive_org", "?"))
|
"archive_org", icons.get("archive_org", "?"))
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue