ArchiveBox/config.py

186 lines
8.9 KiB
Python
Raw Normal View History

import os
2017-07-04 10:48:12 +00:00
import sys
2017-07-05 21:33:51 +00:00
import time
import shutil
2017-07-04 10:48:12 +00:00
2017-07-05 21:33:51 +00:00
from subprocess import run, PIPE, DEVNULL
from multiprocessing import Process
# os.getenv('VARIABLE', 'DEFAULT') gets the value of environment
# variable "VARIABLE" and if it is not set, sets it to 'DEFAULT'
# for boolean values, check to see if the string is 'true', and
# if so, the python variable will be True
2017-07-05 21:33:51 +00:00
IS_TTY = sys.stdout.isatty()
USE_COLOR = os.getenv('USE_COLOR', str(IS_TTY) ).lower() == 'true'
SHOW_PROGRESS = os.getenv('SHOW_PROGRESS', str(IS_TTY) ).lower() == 'true'
FETCH_WGET = os.getenv('FETCH_WGET', 'True' ).lower() == 'true'
FETCH_WGET_REQUISITES = os.getenv('FETCH_WGET_REQUISITES', 'True' ).lower() == 'true'
2017-07-05 21:33:51 +00:00
FETCH_AUDIO = os.getenv('FETCH_AUDIO', 'False' ).lower() == 'true'
FETCH_VIDEO = os.getenv('FETCH_VIDEO', 'False' ).lower() == 'true'
FETCH_PDF = os.getenv('FETCH_PDF', 'True' ).lower() == 'true'
FETCH_SCREENSHOT = os.getenv('FETCH_SCREENSHOT', 'True' ).lower() == 'true'
FETCH_FAVICON = os.getenv('FETCH_FAVICON', 'True' ).lower() == 'true'
SUBMIT_ARCHIVE_DOT_ORG = os.getenv('SUBMIT_ARCHIVE_DOT_ORG', 'True' ).lower() == 'true'
RESOLUTION = os.getenv('RESOLUTION', '1440,900' )
ARCHIVE_PERMISSIONS = os.getenv('ARCHIVE_PERMISSIONS', '755' )
CHROME_BINARY = os.getenv('CHROME_BINARY', 'chromium-browser' ) # change to google-chrome browser if using google-chrome
WGET_BINARY = os.getenv('WGET_BINARY', 'wget' )
WGET_USER_AGENT = os.getenv('WGET_USER_AGENT', None)
TIMEOUT = int(os.getenv('TIMEOUT', '60'))
INDEX_TEMPLATE = os.getenv('INDEX_TEMPLATE', 'templates/index.html')
INDEX_ROW_TEMPLATE = os.getenv('INDEX_ROW_TEMPLATE', 'templates/index_row.html')
2017-07-05 21:33:51 +00:00
TERM_WIDTH = shutil.get_terminal_size((100, 10)).columns
ANSI = {
'reset': '\033[00;00m',
'lightblue': '\033[01;30m',
'lightyellow': '\033[01;33m',
'lightred': '\033[01;35m',
'red': '\033[01;31m',
'green': '\033[01;32m',
'blue': '\033[01;34m',
'white': '\033[01;37m',
'black': '\033[01;30m',
}
if not USE_COLOR:
# dont show colors if USE_COLOR is False
ANSI = {k: '' for k in ANSI.keys()}
2017-07-06 21:44:04 +00:00
if sys.stdout.encoding != 'UTF-8':
print('[X] Your system is running python3 scripts with a bad locale setting: {} (it should be UTF-8).'.format(sys.stdout.encoding))
print(' To fix it, add the line "export PYTHONIOENCODING=utf8" to your ~/.bashrc file (without quotes)')
print('')
print(' Confirm that it\'s fixed by opening a new shell and running:')
print(' python3 -c "import sys; print(sys.stdout.encoding)" # should output UTF-8')
print('')
print(' Alternatively, run this script with:')
print(' env PYTHONIOENCODING=utf8 ./archive.py export.html')
2017-07-05 21:33:51 +00:00
### Util Functions
def check_dependencies():
2017-07-05 21:33:51 +00:00
"""Check that all necessary dependencies are installed, and have valid versions"""
print('[*] Checking Dependencies:')
2017-07-04 10:48:12 +00:00
python_vers = float('{}.{}'.format(sys.version_info.major, sys.version_info.minor))
if python_vers < 3.5:
2017-07-05 21:33:51 +00:00
print('{}[X] Python version is not new enough: {} (>3.5 is required){}'.format(ANSI['red'], python_vers, ANSI['reset']))
2017-07-04 10:48:12 +00:00
print(' See https://github.com/pirate/bookmark-archiver#troubleshooting for help upgrading your Python installation.')
raise SystemExit(1)
if FETCH_PDF or FETCH_SCREENSHOT:
if run(['which', CHROME_BINARY]).returncode:
2017-07-05 21:33:51 +00:00
print('{}[X] Missing dependency: {}{}'.format(ANSI['red'], CHROME_BINARY, ANSI['reset']))
print(' Run ./setup.sh, then confirm it was installed with: {} --version'.format(CHROME_BINARY))
print(' See https://github.com/pirate/bookmark-archiver for help.')
raise SystemExit(1)
# parse chrome --version e.g. Google Chrome 61.0.3114.0 canary / Chromium 59.0.3029.110 built on Ubuntu, running on Ubuntu 16.04
2017-07-05 21:33:51 +00:00
try:
result = run([CHROME_BINARY, '--version'], stdout=PIPE)
version = result.stdout.decode('utf-8').replace('Google Chrome ', '').replace('Chromium ', '').split(' ', 1)[0].split('.', 1)[0] # TODO: regex might be better
if int(version) < 59:
print('{red}[X] Chrome version must be 59 or greater for headless PDF and screenshot saving{reset}'.format(**ANSI))
print(' See https://github.com/pirate/bookmark-archiver for help.')
raise SystemExit(1)
except (TypeError, OSError):
print('{red}[X] Failed to parse Chrome version, is it installed properly?{reset}'.format(**ANSI))
print(' Run ./setup.sh, then confirm it was installed with: {} --version'.format(CHROME_BINARY))
print(' See https://github.com/pirate/bookmark-archiver for help.')
raise SystemExit(1)
if FETCH_WGET:
2017-07-05 21:33:51 +00:00
if run(['which', 'wget']).returncode or run(['wget', '--version'], stdout=DEVNULL).returncode:
print('{red}[X] Missing dependency: wget{reset}'.format(**ANSI))
print(' Run ./setup.sh, then confirm it was installed with: {} --version'.format('wget'))
print(' See https://github.com/pirate/bookmark-archiver for help.')
raise SystemExit(1)
if FETCH_FAVICON or SUBMIT_ARCHIVE_DOT_ORG:
2017-07-05 21:33:51 +00:00
if run(['which', 'curl']).returncode or run(['curl', '--version'], stdout=DEVNULL).returncode:
print('{red}[X] Missing dependency: curl{reset}'.format(**ANSI))
print(' Run ./setup.sh, then confirm it was installed with: {} --version'.format('curl'))
print(' See https://github.com/pirate/bookmark-archiver for help.')
raise SystemExit(1)
if FETCH_AUDIO or FETCH_VIDEO:
2017-07-05 21:33:51 +00:00
if run(['which', 'youtube-dl']).returncode or run(['youtube-dl', '--version'], stdout=DEVNULL).returncode:
print('{red}[X] Missing dependency: youtube-dl{reset}'.format(**ANSI))
print(' Run ./setup.sh, then confirm it was installed with: {} --version'.format('youtube-dl'))
print(' See https://github.com/pirate/bookmark-archiver for help.')
raise SystemExit(1)
2017-07-05 21:33:51 +00:00
def chmod_file(path, cwd='.', permissions=ARCHIVE_PERMISSIONS, timeout=30):
"""chmod -R <permissions> <cwd>/<path>"""
if not os.path.exists(os.path.join(cwd, path)):
raise Exception('Failed to chmod: {} does not exist (did the previous step fail?)'.format(path))
chmod_result = run(['chmod', '-R', permissions, path], cwd=cwd, stdout=DEVNULL, stderr=PIPE, timeout=timeout)
if chmod_result.returncode == 1:
print(' ', chmod_result.stderr.decode())
raise Exception('Failed to chmod {}/{}'.format(cwd, path))
def progress(seconds=TIMEOUT, prefix=''):
"""Show a (subprocess-controlled) progress bar with a <seconds> timeout,
returns end() function to instantly finish the progress
"""
if not SHOW_PROGRESS:
return lambda: None
2017-07-06 21:44:04 +00:00
chunk = '' if sys.stdout.encoding == 'UTF-8' else '#'
2017-07-05 21:33:51 +00:00
chunks = TERM_WIDTH - len(prefix) - 20 # number of progress chunks to show (aka max bar width)
def progress_bar(seconds=seconds, prefix=prefix):
"""show timer in the form of progress bar, with percentage and seconds remaining"""
try:
for s in range(seconds * chunks):
progress = s / chunks / seconds * 100
bar_width = round(progress/(100/chunks))
# ████████████████████ 0.9% (1/60sec)
sys.stdout.write('\r{0}{1}{2}{3} {4}% ({5}/{6}sec)'.format(
prefix,
ANSI['green'],
2017-07-06 21:44:04 +00:00
(chunk * bar_width).ljust(chunks),
2017-07-05 21:33:51 +00:00
ANSI['reset'],
round(progress, 1),
round(s/chunks),
seconds,
))
sys.stdout.flush()
time.sleep(1 / chunks)
# ██████████████████████████████████ 100.0% (60/60sec)
sys.stdout.write('\r{0}{1}{2}{3} {4}% ({5}/{6}sec)\n'.format(
prefix,
ANSI['red'],
2017-07-06 21:44:04 +00:00
chunk * chunks,
2017-07-05 21:33:51 +00:00
ANSI['reset'],
100.0,
seconds,
seconds,
))
sys.stdout.flush()
except KeyboardInterrupt:
print()
pass
p = Process(target=progress_bar)
p.start()
def end():
"""immediately finish progress and clear the progressbar line"""
p.terminate()
2017-07-05 22:15:39 +00:00
sys.stdout.write('\r{}{}\r'.format((' ' * TERM_WIDTH), ANSI['reset'])) # clear whole terminal line
2017-07-05 21:33:51 +00:00
sys.stdout.flush()
return end