Adds support for HEADLESS_USER_AGENT for Chrome

This commit is contained in:
noncetonic 2019-03-19 05:32:48 -07:00 committed by GitHub
parent 127c72bd79
commit a13f22d15a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -30,6 +30,7 @@ from config import (
SUBMIT_ARCHIVE_DOT_ORG, SUBMIT_ARCHIVE_DOT_ORG,
COOKIES_FILE, COOKIES_FILE,
WGET_USER_AGENT, WGET_USER_AGENT,
HEADLESS_USER_AGENT,
CHROME_USER_DATA_DIR, CHROME_USER_DATA_DIR,
CHROME_HEADLESS, CHROME_HEADLESS,
CHROME_SANDBOX, CHROME_SANDBOX,
@ -266,6 +267,7 @@ def fetch_pdf(link_dir, link, timeout=TIMEOUT, user_data_dir=CHROME_USER_DATA_DI
'--hide-scrollbars', '--hide-scrollbars',
'--timeout={}'.format((timeout) * 1000), '--timeout={}'.format((timeout) * 1000),
*(() if CHECK_SSL_VALIDITY else ('--disable-web-security', '--ignore-certificate-errors')), *(() if CHECK_SSL_VALIDITY else ('--disable-web-security', '--ignore-certificate-errors')),
*(('--user-agent={}'.format(HEADLESS_USER_AGENT),) if HEADLESS_USER_AGENT else ()),
link['url'] link['url']
] ]
end = progress(timeout, prefix=' ') end = progress(timeout, prefix=' ')
@ -304,6 +306,7 @@ def fetch_screenshot(link_dir, link, timeout=TIMEOUT, user_data_dir=CHROME_USER_
'--hide-scrollbars', '--hide-scrollbars',
'--timeout={}'.format((timeout) * 1000), '--timeout={}'.format((timeout) * 1000),
*(() if CHECK_SSL_VALIDITY else ('--disable-web-security', '--ignore-certificate-errors')), *(() if CHECK_SSL_VALIDITY else ('--disable-web-security', '--ignore-certificate-errors')),
*(('--user-agent={}'.format(HEADLESS_USER_AGENT),) if HEADLESS_USER_AGENT else ()),
# '--full-page', # TODO: make this actually work using ./bin/screenshot fullPage: true # '--full-page', # TODO: make this actually work using ./bin/screenshot fullPage: true
link['url'], link['url'],
] ]
@ -342,6 +345,7 @@ def fetch_dom(link_dir, link, timeout=TIMEOUT, user_data_dir=CHROME_USER_DATA_DI
*chrome_headless(user_data_dir=user_data_dir), *chrome_headless(user_data_dir=user_data_dir),
'--dump-dom', '--dump-dom',
'--timeout={}'.format((timeout) * 1000), '--timeout={}'.format((timeout) * 1000),
*(('--user-agent={}'.format(HEADLESS_USER_AGENT),) if HEADLESS_USER_AGENT else ()),
link['url'] link['url']
] ]
end = progress(timeout, prefix=' ') end = progress(timeout, prefix=' ')