mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-10 06:34:16 +00:00
add chrome headless option and improve default data dir finding
This commit is contained in:
parent
8630c0fdaa
commit
1c1bc76ac1
2 changed files with 39 additions and 9 deletions
|
@ -31,6 +31,7 @@ from config import (
|
|||
COOKIES_FILE,
|
||||
WGET_USER_AGENT,
|
||||
CHROME_USER_DATA_DIR,
|
||||
CHROME_HEADLESS,
|
||||
CHROME_SANDBOX,
|
||||
TIMEOUT,
|
||||
MEDIA_TIMEOUT,
|
||||
|
@ -613,14 +614,42 @@ def fetch_git(link_dir, link, timeout=TIMEOUT):
|
|||
'output': output,
|
||||
}
|
||||
|
||||
def chrome_headless(binary=CHROME_BINARY, user_data_dir=CHROME_USER_DATA_DIR):
|
||||
args = [binary, '--headless']
|
||||
if not CHROME_SANDBOX:
|
||||
def chrome_headless(binary=CHROME_BINARY, user_data_dir=CHROME_USER_DATA_DIR, headless=CHROME_HEADLESS, sandbox=CHROME_SANDBOX):
|
||||
global USER_DATA_DIR
|
||||
user_data_dir = user_data_dir or USER_DATA_DIR
|
||||
cmd_args = [binary]
|
||||
|
||||
if headless:
|
||||
cmd_args += ('--headless',)
|
||||
|
||||
if not sandbox:
|
||||
# dont use GPU or sandbox when running inside docker container
|
||||
args += ['--no-sandbox', '--disable-gpu']
|
||||
default_profile = os.path.expanduser('~/Library/Application Support/Google/Chrome')
|
||||
cmd_args += ('--no-sandbox', '--disable-gpu')
|
||||
|
||||
|
||||
# Find chrome user data directory
|
||||
default_profile_paths = (
|
||||
'~/.config/chromium',
|
||||
'~/.config/google-chrome',
|
||||
'~/.config/google-chrome-beta',
|
||||
'~/.config/google-chrome-unstable',
|
||||
'~/Library/Application Support/Chromium',
|
||||
'~/Library/Application Support/Google/Chrome',
|
||||
'~/Library/Application Support/Google/Chrome Canary',
|
||||
'~/AppData/Local/Chromium/User Data',
|
||||
'~/AppData/Local/Google/Chrome/User Data',
|
||||
'~/AppData/Local/Google/Chrome SxS/User Data',
|
||||
)
|
||||
if user_data_dir:
|
||||
args.append('--user-data-dir={}'.format(user_data_dir))
|
||||
elif os.path.exists(default_profile):
|
||||
args.append('--user-data-dir={}'.format(default_profile))
|
||||
return args
|
||||
cmd_args.append('--user-data-dir={}'.format(user_data_dir))
|
||||
else:
|
||||
for path in default_profile_paths:
|
||||
full_path = os.path.expanduser(path)
|
||||
if os.path.exists(full_path):
|
||||
USER_DATA_DIR = full_path
|
||||
cmd_args.append('--user-data-dir={}'.format(full_path))
|
||||
break
|
||||
return cmd_args
|
||||
|
||||
|
||||
USER_DATA_DIR = CHROME_USER_DATA_DIR
|
||||
|
|
|
@ -37,6 +37,7 @@ GIT_DOMAINS = os.getenv('GIT_DOMAINS', 'github.com,bitbuck
|
|||
WGET_USER_AGENT = os.getenv('WGET_USER_AGENT', 'ArchiveBox/{GIT_SHA} (+https://github.com/pirate/ArchiveBox/) wget/{WGET_VERSION}')
|
||||
COOKIES_FILE = os.getenv('COOKIES_FILE', None)
|
||||
CHROME_USER_DATA_DIR = os.getenv('CHROME_USER_DATA_DIR', None)
|
||||
CHROME_HEADLESS = os.getenv('CHROME_HEADLESS', 'True' ).lower() == 'true'
|
||||
|
||||
CURL_BINARY = os.getenv('CURL_BINARY', 'curl')
|
||||
GIT_BINARY = os.getenv('GIT_BINARY', 'git')
|
||||
|
|
Loading…
Reference in a new issue