catch json parse errors in link archiving

This commit is contained in:
Nick Sweeting 2019-02-04 08:00:08 -08:00
parent c37941efd1
commit ad7038e031

View file

@ -80,43 +80,46 @@ def archive_links(archive_path, links, source=None, resume=None):
def archive_link(link_dir, link, overwrite=True): def archive_link(link_dir, link, overwrite=True):
"""download the DOM, PDF, and a screenshot into a folder named after the link's timestamp""" """download the DOM, PDF, and a screenshot into a folder named after the link's timestamp"""
update_existing = os.path.exists(link_dir) try:
if update_existing: update_existing = os.path.exists(link_dir)
link = { if update_existing:
**parse_json_link_index(link_dir), link = {
**link, **parse_json_link_index(link_dir),
} **link,
else: }
os.makedirs(link_dir) else:
os.makedirs(link_dir)
log_link_archive(link_dir, link, update_existing)
log_link_archive(link_dir, link, update_existing)
if FETCH_FAVICON: if FETCH_FAVICON:
link = fetch_favicon(link_dir, link, overwrite=overwrite) link = fetch_favicon(link_dir, link, overwrite=overwrite)
if FETCH_WGET: if FETCH_WGET:
link = fetch_wget(link_dir, link, overwrite=overwrite) link = fetch_wget(link_dir, link, overwrite=overwrite)
if FETCH_PDF: if FETCH_PDF:
link = fetch_pdf(link_dir, link, overwrite=overwrite) link = fetch_pdf(link_dir, link, overwrite=overwrite)
if FETCH_SCREENSHOT: if FETCH_SCREENSHOT:
link = fetch_screenshot(link_dir, link, overwrite=overwrite) link = fetch_screenshot(link_dir, link, overwrite=overwrite)
if FETCH_DOM: if FETCH_DOM:
link = fetch_dom(link_dir, link, overwrite=overwrite) link = fetch_dom(link_dir, link, overwrite=overwrite)
if SUBMIT_ARCHIVE_DOT_ORG: if SUBMIT_ARCHIVE_DOT_ORG:
link = archive_dot_org(link_dir, link, overwrite=overwrite) link = archive_dot_org(link_dir, link, overwrite=overwrite)
if FETCH_GIT: if FETCH_GIT:
link = fetch_git(link_dir, link, overwrite=overwrite) link = fetch_git(link_dir, link, overwrite=overwrite)
if FETCH_MEDIA: if FETCH_MEDIA:
link = fetch_media(link_dir, link, overwrite=overwrite) link = fetch_media(link_dir, link, overwrite=overwrite)
write_link_index(link_dir, link)
write_link_index(link_dir, link) except Exception as err:
print(' ! Failed to archive link: {err.__class__.__name__}: {err}')
return link return link