mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-26 22:20:21 +00:00
185 lines
8.9 KiB
Python
185 lines
8.9 KiB
Python
import os
|
|
import sys
|
|
import time
|
|
import shutil
|
|
|
|
from subprocess import run, PIPE, DEVNULL
|
|
from multiprocessing import Process
|
|
|
|
# os.getenv('VARIABLE', 'DEFAULT') gets the value of environment
|
|
# variable "VARIABLE" and if it is not set, sets it to 'DEFAULT'
|
|
|
|
# for boolean values, check to see if the string is 'true', and
|
|
# if so, the python variable will be True
|
|
|
|
IS_TTY = sys.stdout.isatty()
|
|
|
|
USE_COLOR = os.getenv('USE_COLOR', str(IS_TTY) ).lower() == 'true'
|
|
SHOW_PROGRESS = os.getenv('SHOW_PROGRESS', str(IS_TTY) ).lower() == 'true'
|
|
FETCH_WGET = os.getenv('FETCH_WGET', 'True' ).lower() == 'true'
|
|
FETCH_WGET_REQUISITES = os.getenv('FETCH_WGET_REQUISITES', 'True' ).lower() == 'true'
|
|
FETCH_AUDIO = os.getenv('FETCH_AUDIO', 'False' ).lower() == 'true'
|
|
FETCH_VIDEO = os.getenv('FETCH_VIDEO', 'False' ).lower() == 'true'
|
|
FETCH_PDF = os.getenv('FETCH_PDF', 'True' ).lower() == 'true'
|
|
FETCH_SCREENSHOT = os.getenv('FETCH_SCREENSHOT', 'True' ).lower() == 'true'
|
|
FETCH_FAVICON = os.getenv('FETCH_FAVICON', 'True' ).lower() == 'true'
|
|
SUBMIT_ARCHIVE_DOT_ORG = os.getenv('SUBMIT_ARCHIVE_DOT_ORG', 'True' ).lower() == 'true'
|
|
RESOLUTION = os.getenv('RESOLUTION', '1440,900' )
|
|
ARCHIVE_PERMISSIONS = os.getenv('ARCHIVE_PERMISSIONS', '755' )
|
|
CHROME_BINARY = os.getenv('CHROME_BINARY', 'chromium-browser' ) # change to google-chrome browser if using google-chrome
|
|
WGET_BINARY = os.getenv('WGET_BINARY', 'wget' )
|
|
WGET_USER_AGENT = os.getenv('WGET_USER_AGENT', None)
|
|
TIMEOUT = int(os.getenv('TIMEOUT', '60'))
|
|
INDEX_TEMPLATE = os.getenv('INDEX_TEMPLATE', 'templates/index.html')
|
|
INDEX_ROW_TEMPLATE = os.getenv('INDEX_ROW_TEMPLATE', 'templates/index_row.html')
|
|
|
|
TERM_WIDTH = shutil.get_terminal_size((100, 10)).columns
|
|
ANSI = {
|
|
'reset': '\033[00;00m',
|
|
'lightblue': '\033[01;30m',
|
|
'lightyellow': '\033[01;33m',
|
|
'lightred': '\033[01;35m',
|
|
'red': '\033[01;31m',
|
|
'green': '\033[01;32m',
|
|
'blue': '\033[01;34m',
|
|
'white': '\033[01;37m',
|
|
'black': '\033[01;30m',
|
|
}
|
|
if not USE_COLOR:
|
|
# dont show colors if USE_COLOR is False
|
|
ANSI = {k: '' for k in ANSI.keys()}
|
|
|
|
if sys.stdout.encoding != 'UTF-8':
|
|
print('[X] Your system is running python3 scripts with a bad locale setting: {} (it should be UTF-8).'.format(sys.stdout.encoding))
|
|
print(' To fix it, add the line "export PYTHONIOENCODING=utf8" to your ~/.bashrc file (without quotes)')
|
|
print('')
|
|
print(' Confirm that it\'s fixed by opening a new shell and running:')
|
|
print(' python3 -c "import sys; print(sys.stdout.encoding)" # should output UTF-8')
|
|
print('')
|
|
print(' Alternatively, run this script with:')
|
|
print(' env PYTHONIOENCODING=utf8 ./archive.py export.html')
|
|
|
|
### Util Functions
|
|
|
|
def check_dependencies():
|
|
"""Check that all necessary dependencies are installed, and have valid versions"""
|
|
|
|
print('[*] Checking Dependencies:')
|
|
|
|
python_vers = float('{}.{}'.format(sys.version_info.major, sys.version_info.minor))
|
|
if python_vers < 3.5:
|
|
print('{}[X] Python version is not new enough: {} (>3.5 is required){}'.format(ANSI['red'], python_vers, ANSI['reset']))
|
|
print(' See https://github.com/pirate/bookmark-archiver#troubleshooting for help upgrading your Python installation.')
|
|
raise SystemExit(1)
|
|
|
|
if FETCH_PDF or FETCH_SCREENSHOT:
|
|
if run(['which', CHROME_BINARY]).returncode:
|
|
print('{}[X] Missing dependency: {}{}'.format(ANSI['red'], CHROME_BINARY, ANSI['reset']))
|
|
print(' Run ./setup.sh, then confirm it was installed with: {} --version'.format(CHROME_BINARY))
|
|
print(' See https://github.com/pirate/bookmark-archiver for help.')
|
|
raise SystemExit(1)
|
|
|
|
# parse chrome --version e.g. Google Chrome 61.0.3114.0 canary / Chromium 59.0.3029.110 built on Ubuntu, running on Ubuntu 16.04
|
|
try:
|
|
result = run([CHROME_BINARY, '--version'], stdout=PIPE)
|
|
version = result.stdout.decode('utf-8').replace('Google Chrome ', '').replace('Chromium ', '').split(' ', 1)[0].split('.', 1)[0] # TODO: regex might be better
|
|
if int(version) < 59:
|
|
print('{red}[X] Chrome version must be 59 or greater for headless PDF and screenshot saving{reset}'.format(**ANSI))
|
|
print(' See https://github.com/pirate/bookmark-archiver for help.')
|
|
raise SystemExit(1)
|
|
except (TypeError, OSError):
|
|
print('{red}[X] Failed to parse Chrome version, is it installed properly?{reset}'.format(**ANSI))
|
|
print(' Run ./setup.sh, then confirm it was installed with: {} --version'.format(CHROME_BINARY))
|
|
print(' See https://github.com/pirate/bookmark-archiver for help.')
|
|
raise SystemExit(1)
|
|
|
|
if FETCH_WGET:
|
|
if run(['which', 'wget']).returncode or run(['wget', '--version'], stdout=DEVNULL).returncode:
|
|
print('{red}[X] Missing dependency: wget{reset}'.format(**ANSI))
|
|
print(' Run ./setup.sh, then confirm it was installed with: {} --version'.format('wget'))
|
|
print(' See https://github.com/pirate/bookmark-archiver for help.')
|
|
raise SystemExit(1)
|
|
|
|
if FETCH_FAVICON or SUBMIT_ARCHIVE_DOT_ORG:
|
|
if run(['which', 'curl']).returncode or run(['curl', '--version'], stdout=DEVNULL).returncode:
|
|
print('{red}[X] Missing dependency: curl{reset}'.format(**ANSI))
|
|
print(' Run ./setup.sh, then confirm it was installed with: {} --version'.format('curl'))
|
|
print(' See https://github.com/pirate/bookmark-archiver for help.')
|
|
raise SystemExit(1)
|
|
|
|
if FETCH_AUDIO or FETCH_VIDEO:
|
|
if run(['which', 'youtube-dl']).returncode or run(['youtube-dl', '--version'], stdout=DEVNULL).returncode:
|
|
print('{red}[X] Missing dependency: youtube-dl{reset}'.format(**ANSI))
|
|
print(' Run ./setup.sh, then confirm it was installed with: {} --version'.format('youtube-dl'))
|
|
print(' See https://github.com/pirate/bookmark-archiver for help.')
|
|
raise SystemExit(1)
|
|
|
|
|
|
def chmod_file(path, cwd='.', permissions=ARCHIVE_PERMISSIONS, timeout=30):
|
|
"""chmod -R <permissions> <cwd>/<path>"""
|
|
|
|
if not os.path.exists(os.path.join(cwd, path)):
|
|
raise Exception('Failed to chmod: {} does not exist (did the previous step fail?)'.format(path))
|
|
|
|
chmod_result = run(['chmod', '-R', permissions, path], cwd=cwd, stdout=DEVNULL, stderr=PIPE, timeout=timeout)
|
|
if chmod_result.returncode == 1:
|
|
print(' ', chmod_result.stderr.decode())
|
|
raise Exception('Failed to chmod {}/{}'.format(cwd, path))
|
|
|
|
|
|
def progress(seconds=TIMEOUT, prefix=''):
|
|
"""Show a (subprocess-controlled) progress bar with a <seconds> timeout,
|
|
returns end() function to instantly finish the progress
|
|
"""
|
|
|
|
if not SHOW_PROGRESS:
|
|
return lambda: None
|
|
|
|
chunk = '█' if sys.stdout.encoding == 'UTF-8' else '#'
|
|
chunks = TERM_WIDTH - len(prefix) - 20 # number of progress chunks to show (aka max bar width)
|
|
|
|
def progress_bar(seconds=seconds, prefix=prefix):
|
|
"""show timer in the form of progress bar, with percentage and seconds remaining"""
|
|
try:
|
|
for s in range(seconds * chunks):
|
|
progress = s / chunks / seconds * 100
|
|
bar_width = round(progress/(100/chunks))
|
|
|
|
# ████████████████████ 0.9% (1/60sec)
|
|
sys.stdout.write('\r{0}{1}{2}{3} {4}% ({5}/{6}sec)'.format(
|
|
prefix,
|
|
ANSI['green'],
|
|
(chunk * bar_width).ljust(chunks),
|
|
ANSI['reset'],
|
|
round(progress, 1),
|
|
round(s/chunks),
|
|
seconds,
|
|
))
|
|
sys.stdout.flush()
|
|
time.sleep(1 / chunks)
|
|
|
|
# ██████████████████████████████████ 100.0% (60/60sec)
|
|
sys.stdout.write('\r{0}{1}{2}{3} {4}% ({5}/{6}sec)\n'.format(
|
|
prefix,
|
|
ANSI['red'],
|
|
chunk * chunks,
|
|
ANSI['reset'],
|
|
100.0,
|
|
seconds,
|
|
seconds,
|
|
))
|
|
sys.stdout.flush()
|
|
except KeyboardInterrupt:
|
|
print()
|
|
pass
|
|
|
|
p = Process(target=progress_bar)
|
|
p.start()
|
|
|
|
def end():
|
|
"""immediately finish progress and clear the progressbar line"""
|
|
p.terminate()
|
|
sys.stdout.write('\r{}{}\r'.format((' ' * TERM_WIDTH), ANSI['reset'])) # clear whole terminal line
|
|
sys.stdout.flush()
|
|
|
|
return end
|