mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-10 06:34:16 +00:00
After a timeout, chrome will leave behind a SingletonLock, which prevents future instances of chrome from starting. When an extractor fails due to a timeout, remove this file.
This commit is contained in:
parent
00ecf57b0f
commit
603ce7ec10
4 changed files with 18 additions and 0 deletions
|
@ -9,6 +9,7 @@ from ..util import (
|
|||
enforce_types,
|
||||
is_static_file,
|
||||
chrome_args,
|
||||
chrome_cleanup,
|
||||
)
|
||||
from ..config import (
|
||||
TIMEOUT,
|
||||
|
@ -57,6 +58,7 @@ def save_dom(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
|
|||
except Exception as err:
|
||||
status = 'failed'
|
||||
output = err
|
||||
chrome_cleanup()
|
||||
finally:
|
||||
timer.end()
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@ from ..util import (
|
|||
enforce_types,
|
||||
is_static_file,
|
||||
chrome_args,
|
||||
chrome_cleanup,
|
||||
)
|
||||
from ..config import (
|
||||
TIMEOUT,
|
||||
|
@ -54,6 +55,7 @@ def save_pdf(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
|
|||
except Exception as err:
|
||||
status = 'failed'
|
||||
output = err
|
||||
chrome_cleanup()
|
||||
finally:
|
||||
timer.end()
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@ from ..util import (
|
|||
enforce_types,
|
||||
is_static_file,
|
||||
chrome_args,
|
||||
chrome_cleanup,
|
||||
)
|
||||
from ..config import (
|
||||
TIMEOUT,
|
||||
|
@ -54,6 +55,7 @@ def save_screenshot(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
|
|||
except Exception as err:
|
||||
status = 'failed'
|
||||
output = err
|
||||
chrome_cleanup()
|
||||
finally:
|
||||
timer.end()
|
||||
|
||||
|
|
|
@ -17,6 +17,8 @@ from requests.exceptions import RequestException, ReadTimeout
|
|||
|
||||
from .vendor.base32_crockford import encode as base32_encode # type: ignore
|
||||
from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
|
||||
from os.path import lexists
|
||||
from os import remove as remove_file
|
||||
|
||||
try:
|
||||
import chardet
|
||||
|
@ -272,6 +274,16 @@ def chrome_args(**options) -> List[str]:
|
|||
|
||||
return cmd_args
|
||||
|
||||
def chrome_cleanup():
|
||||
"""
|
||||
Cleans up any state or runtime files that chrome leaves behind when killed by
|
||||
a timeout or other error
|
||||
"""
|
||||
|
||||
from .config import IN_DOCKER
|
||||
|
||||
if IN_DOCKER and lexists("/home/archivebox/.config/chromium/SingletonLock"):
|
||||
remove_file("/home/archivebox/.config/chromium/SingletonLock")
|
||||
|
||||
def ansi_to_html(text):
|
||||
"""
|
||||
|
|
Loading…
Reference in a new issue