mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-10 14:44:18 +00:00
more granular errors
This commit is contained in:
parent
263f71862a
commit
fa8a7bde80
1 changed files with 16 additions and 8 deletions
24
archive.py
24
archive.py
|
@ -157,12 +157,12 @@ def fetch_wget(out_dir, link, overwrite=False):
|
|||
if not os.path.exists('{}/{}'.format(out_dir, domain)) or overwrite:
|
||||
print(' - Downloading Full Site')
|
||||
CMD = [
|
||||
*'wget --no-clobber --page-requisites --adjust-extension --convert-links --no-parent'.split(' '),
|
||||
*'wget --mirror --page-requisites --adjust-extension --convert-links --no-parent'.split(' '),
|
||||
]
|
||||
try:
|
||||
output = run(CMD, stdout=DEVNULL, stderr=PIPE, cwd=out_dir, timeout=20) # dom.html
|
||||
if output.returncode:
|
||||
print(output.stderr.read())
|
||||
result = run(CMD, stdout=DEVNULL, stderr=PIPE, cwd=out_dir, timeout=20) # dom.html
|
||||
if result.returncode:
|
||||
print(' ', result.stderr)
|
||||
raise Exception('Failed to wget download')
|
||||
except Exception as e:
|
||||
print(' Exception: {} {}'.format(e.__class__.__name__, e))
|
||||
|
@ -177,9 +177,13 @@ def fetch_pdf(out_dir, link, overwrite=False):
|
|||
chrome_args = '--headless --disable-gpu --print-to-pdf'.split(' ')
|
||||
try:
|
||||
result = run([CHROME_BINARY, *chrome_args, link['url']], stdout=DEVNULL, stderr=PIPE, cwd=out_dir, timeout=20) # output.pdf
|
||||
if run(['chmod', ARCHIVE_PERMISSIONS, 'output.pdf'], stdout=DEVNULL, stderr=DEVNULL, timeout=5).returncode:
|
||||
print(result.stderr.read())
|
||||
if result.returncode:
|
||||
print(' ', result.stderr)
|
||||
raise Exception('Failed to print PDF')
|
||||
chmod_result = run(['chmod', ARCHIVE_PERMISSIONS, 'output.pdf'], stdout=DEVNULL, stderr=DEVNULL, timeout=5)
|
||||
if chmod_result.returncode:
|
||||
print(' ', chmod_result.stderr)
|
||||
raise Exception('Failed to chmod PDF')
|
||||
except Exception as e:
|
||||
print(' Exception: {} {}'.format(e.__class__.__name__, e))
|
||||
else:
|
||||
|
@ -193,9 +197,13 @@ def fetch_screenshot(out_dir, link, overwrite=False):
|
|||
chrome_args = '--headless --disable-gpu --screenshot'.split(' ')
|
||||
try:
|
||||
result = run([CHROME_BINARY, *chrome_args, '--window-size={}'.format(RESOLUTION), link['url']], stdout=DEVNULL, stderr=DEVNULL, cwd=out_dir, timeout=20) # sreenshot.png
|
||||
if run(['chmod', ARCHIVE_PERMISSIONS, 'screenshot.png'], stdout=DEVNULL, stderr=DEVNULL, timeout=5).returncode:
|
||||
print(result.stderr.read())
|
||||
if result.returncode:
|
||||
print(result.stderr)
|
||||
raise Exception('Failed to take screenshot')
|
||||
chmod_result = run(['chmod', ARCHIVE_PERMISSIONS, 'screenshot.png'], stdout=DEVNULL, stderr=DEVNULL, timeout=5)
|
||||
if chmod_result.returncode:
|
||||
print(result.stderr)
|
||||
raise Exception('Failed to chmod screenshot')
|
||||
except Exception as e:
|
||||
print(' Exception: {} {}'.format(e.__class__.__name__, e))
|
||||
else:
|
||||
|
|
Loading…
Reference in a new issue