2024-11-19 02:52:48 +00:00
__package__ = ' archivebox.workers '
2024-09-10 07:04:39 +00:00
2024-10-21 07:35:25 +00:00
import sys
2024-09-10 07:04:39 +00:00
import time
import signal
import psutil
2024-10-09 01:59:48 +00:00
import shutil
2024-09-10 07:04:39 +00:00
import subprocess
2024-10-21 07:35:25 +00:00
from typing import Dict , cast , Iterator
from pathlib import Path
from functools import cache
2024-09-10 07:04:39 +00:00
2024-10-21 07:35:25 +00:00
from rich import print
2024-09-10 07:04:39 +00:00
from supervisor . xmlrpc import SupervisorTransport
from xmlrpc . client import ServerProxy
2024-10-21 07:35:25 +00:00
from archivebox . config import CONSTANTS
from archivebox . config . paths import get_or_create_working_tmp_dir
2024-10-08 06:45:11 +00:00
from archivebox . config . permissions import ARCHIVEBOX_USER
2024-10-21 07:35:25 +00:00
from archivebox . misc . logging import STDERR
2024-11-19 03:08:49 +00:00
from archivebox . misc . logging_util import pretty_path
2024-10-08 06:45:11 +00:00
2024-10-21 07:35:25 +00:00
LOG_FILE_NAME = " supervisord.log "
CONFIG_FILE_NAME = " supervisord.conf "
PID_FILE_NAME = " supervisord.pid "
WORKERS_DIR_NAME = " workers "
2024-09-10 07:04:39 +00:00
2024-10-24 22:41:43 +00:00
SCHEDULER_WORKER = {
" name " : " worker_scheduler " ,
" command " : " archivebox manage djangohuey --queue system_tasks -w 4 -k thread --disable-health-check --flush-locks " ,
" autostart " : " true " ,
" autorestart " : " true " ,
" stdout_logfile " : " logs/worker_scheduler.log " ,
" redirect_stderr " : " true " ,
}
COMMAND_WORKER = {
" name " : " worker_commands " ,
" command " : " archivebox manage djangohuey --queue commands -w 4 -k thread --no-periodic --disable-health-check " ,
" autostart " : " true " ,
" autorestart " : " true " ,
" stdout_logfile " : " logs/worker_commands.log " ,
" redirect_stderr " : " true " ,
}
2024-11-16 10:47:51 +00:00
ORCHESTRATOR_WORKER = {
" name " : " worker_orchestrator " ,
" command " : " archivebox manage orchestrator " ,
" autostart " : " true " ,
" autorestart " : " true " ,
" stdout_logfile " : " logs/worker_orchestrator.log " ,
" redirect_stderr " : " true " ,
}
SERVER_WORKER = lambda host , port : {
" name " : " worker_daphne " ,
" command " : f " daphne --bind= { host } --port= { port } --application-close-timeout=600 archivebox.core.asgi:application " ,
" autostart " : " false " ,
" autorestart " : " true " ,
" stdout_logfile " : " logs/worker_daphne.log " ,
" redirect_stderr " : " true " ,
}
2024-10-24 22:41:43 +00:00
2024-10-21 07:35:25 +00:00
@cache
def get_sock_file ( ) :
""" Get the path to the supervisord socket file, symlinking to a shorter path if needed due to unix path length limits """
TMP_DIR = get_or_create_working_tmp_dir ( autofix = True , quiet = False )
assert TMP_DIR , " Failed to find or create a writable TMP_DIR! "
socket_file = TMP_DIR / " supervisord.sock "
return socket_file
2024-09-25 04:17:51 +00:00
def follow ( file , sleep_sec = 0.1 ) - > Iterator [ str ] :
""" Yield each line from a file as they are written.
` sleep_sec ` is the time to sleep after empty reads . """
line = ' '
while True :
tmp = file . readline ( )
if tmp is not None and tmp != " " :
line + = tmp
if line . endswith ( " \n " ) :
yield line
line = ' '
elif sleep_sec :
time . sleep ( sleep_sec )
2024-09-10 07:04:39 +00:00
def create_supervisord_config ( ) :
2024-10-21 07:35:25 +00:00
SOCK_FILE = get_sock_file ( )
WORKERS_DIR = SOCK_FILE . parent / WORKERS_DIR_NAME
CONFIG_FILE = SOCK_FILE . parent / CONFIG_FILE_NAME
PID_FILE = SOCK_FILE . parent / PID_FILE_NAME
LOG_FILE = CONSTANTS . LOGS_DIR / LOG_FILE_NAME
2024-09-10 07:04:39 +00:00
config_content = f """
[ supervisord ]
nodaemon = true
environment = IS_SUPERVISORD_PARENT = " true "
2024-10-21 07:35:25 +00:00
pidfile = { PID_FILE }
logfile = { LOG_FILE }
childlogdir = { CONSTANTS . LOGS_DIR }
directory = { CONSTANTS . DATA_DIR }
2024-09-10 07:04:39 +00:00
strip_ansi = true
nocleanup = true
2024-10-08 06:45:11 +00:00
user = { ARCHIVEBOX_USER }
2024-09-10 07:04:39 +00:00
[ unix_http_server ]
2024-10-21 07:35:25 +00:00
file = { SOCK_FILE }
2024-09-10 07:04:39 +00:00
chmod = 0700
[ supervisorctl ]
2024-10-21 07:35:25 +00:00
serverurl = unix : / / { SOCK_FILE }
2024-09-10 07:04:39 +00:00
[ rpcinterface : supervisor ]
supervisor . rpcinterface_factory = supervisor . rpcinterface : make_main_rpcinterface
[ include ]
2024-10-03 10:43:02 +00:00
files = { WORKERS_DIR } / * . conf
2024-09-10 07:04:39 +00:00
"""
2024-10-21 07:35:25 +00:00
CONFIG_FILE . write_text ( config_content )
2024-10-24 22:41:43 +00:00
Path . mkdir ( WORKERS_DIR , exist_ok = True , parents = True )
2024-10-21 07:35:25 +00:00
( WORKERS_DIR / ' initial_startup.conf ' ) . write_text ( ' ' ) # hides error about "no files found to include" when supervisord starts
2024-09-10 07:04:39 +00:00
def create_worker_config ( daemon ) :
2024-10-24 22:41:43 +00:00
""" Create a supervisord worker config file for a given daemon """
2024-10-21 07:35:25 +00:00
SOCK_FILE = get_sock_file ( )
WORKERS_DIR = SOCK_FILE . parent / WORKERS_DIR_NAME
2024-10-24 22:41:43 +00:00
Path . mkdir ( WORKERS_DIR , exist_ok = True , parents = True )
2024-09-10 07:04:39 +00:00
name = daemon [ ' name ' ]
2024-10-24 22:41:43 +00:00
worker_conf = WORKERS_DIR / f " { name } .conf "
2024-09-10 07:04:39 +00:00
2024-10-24 22:41:43 +00:00
worker_str = f " [program: { name } ] \n "
2024-09-10 07:04:39 +00:00
for key , value in daemon . items ( ) :
2024-10-03 10:43:02 +00:00
if key == ' name ' :
continue
2024-10-24 22:41:43 +00:00
worker_str + = f " { key } = { value } \n "
worker_str + = " \n "
2024-09-10 07:04:39 +00:00
2024-10-24 22:41:43 +00:00
worker_conf . write_text ( worker_str )
2024-09-10 07:04:39 +00:00
def get_existing_supervisord_process ( ) :
2024-10-21 07:35:25 +00:00
SOCK_FILE = get_sock_file ( )
2024-09-10 07:04:39 +00:00
try :
2024-10-21 07:35:25 +00:00
transport = SupervisorTransport ( None , None , f " unix:// { SOCK_FILE } " )
2024-10-24 22:41:43 +00:00
server = ServerProxy ( " http://localhost " , transport = transport ) # user:pass@localhost doesn't work for some reason with unix://.sock, cant seem to silence CRIT no-auth warning
2024-09-10 07:04:39 +00:00
current_state = cast ( Dict [ str , int | str ] , server . supervisor . getState ( ) )
if current_state [ " statename " ] == " RUNNING " :
pid = server . supervisor . getPID ( )
2024-10-21 07:35:25 +00:00
print ( f " [🦸♂️] Supervisord connected (pid= { pid } ) via unix:// { pretty_path ( SOCK_FILE ) } . " )
2024-09-10 07:04:39 +00:00
return server . supervisor
except FileNotFoundError :
return None
except Exception as e :
print ( f " Error connecting to existing supervisord: { str ( e ) } " )
return None
def stop_existing_supervisord_process ( ) :
2024-10-21 07:35:25 +00:00
SOCK_FILE = get_sock_file ( )
PID_FILE = SOCK_FILE . parent / PID_FILE_NAME
2024-09-10 07:04:39 +00:00
try :
2024-10-24 22:41:43 +00:00
# if pid file exists, load PID int
2024-10-21 07:35:25 +00:00
try :
pid = int ( PID_FILE . read_text ( ) )
except ( FileNotFoundError , ValueError ) :
return
2024-09-10 07:04:39 +00:00
2024-10-21 07:35:25 +00:00
try :
print ( f " [🦸♂️] Stopping supervisord process (pid= { pid } )... " )
proc = psutil . Process ( pid )
proc . terminate ( )
2024-10-24 22:41:43 +00:00
proc . wait ( timeout = 5 )
except ( BaseException , BrokenPipeError , IOError , KeyboardInterrupt ) :
2024-10-21 07:35:25 +00:00
pass
finally :
try :
# clear PID file and socket file
PID_FILE . unlink ( missing_ok = True )
get_sock_file ( ) . unlink ( missing_ok = True )
2024-10-24 22:41:43 +00:00
except BaseException :
2024-10-21 07:35:25 +00:00
pass
2024-09-10 07:04:39 +00:00
2024-09-25 05:22:03 +00:00
def start_new_supervisord_process ( daemonize = False ) :
2024-10-21 07:35:25 +00:00
SOCK_FILE = get_sock_file ( )
WORKERS_DIR = SOCK_FILE . parent / WORKERS_DIR_NAME
LOG_FILE = CONSTANTS . LOGS_DIR / LOG_FILE_NAME
CONFIG_FILE = SOCK_FILE . parent / CONFIG_FILE_NAME
PID_FILE = SOCK_FILE . parent / PID_FILE_NAME
2024-09-10 07:04:39 +00:00
print ( f " [🦸♂️] Supervisord starting { ' in background ' if daemonize else ' ' } ... " )
2024-10-21 07:35:25 +00:00
pretty_log_path = pretty_path ( LOG_FILE )
print ( f " > Writing supervisord logs to: { pretty_log_path } " )
print ( f " > Writing task worker logs to: { pretty_log_path . replace ( ' supervisord.log ' , ' worker_*.log ' ) } " )
print ( f ' > Using supervisord config file: { pretty_path ( CONFIG_FILE ) } ' )
print ( f " > Using supervisord UNIX socket: { pretty_path ( SOCK_FILE ) } " )
print ( )
2024-10-09 01:59:48 +00:00
# clear out existing stale state files
shutil . rmtree ( WORKERS_DIR , ignore_errors = True )
PID_FILE . unlink ( missing_ok = True )
get_sock_file ( ) . unlink ( missing_ok = True )
2024-10-21 07:35:25 +00:00
CONFIG_FILE . unlink ( missing_ok = True )
2024-10-09 01:59:48 +00:00
2024-10-21 07:35:25 +00:00
# create the supervisord config file
2024-09-10 07:04:39 +00:00
create_supervisord_config ( )
# Start supervisord
2024-10-21 07:35:25 +00:00
# panel = Panel(f"Starting supervisord with config: {SUPERVISORD_CONFIG_FILE}")
# with Live(panel, refresh_per_second=1) as live:
2024-09-10 07:04:39 +00:00
subprocess . Popen (
2024-10-21 07:35:25 +00:00
f " supervisord --configuration= { CONFIG_FILE } " ,
2024-09-10 07:04:39 +00:00
stdin = None ,
shell = True ,
start_new_session = daemonize ,
)
def exit_signal_handler ( signum , frame ) :
2024-10-21 07:35:25 +00:00
if signum == 2 :
STDERR . print ( " \n [🛑] Got Ctrl+C. Terminating child processes... " )
elif signum != 13 :
STDERR . print ( f " \n [🦸♂️] Supervisord got stop signal ( { signal . strsignal ( signum ) } ). Terminating child processes... " )
2024-09-10 07:04:39 +00:00
stop_existing_supervisord_process ( )
raise SystemExit ( 0 )
# Monitor for termination signals and cleanup child processes
if not daemonize :
2024-10-21 07:35:25 +00:00
try :
signal . signal ( signal . SIGINT , exit_signal_handler )
signal . signal ( signal . SIGHUP , exit_signal_handler )
signal . signal ( signal . SIGPIPE , exit_signal_handler )
signal . signal ( signal . SIGTERM , exit_signal_handler )
except Exception :
# signal handlers only work in main thread
pass
2024-09-10 07:04:39 +00:00
# otherwise supervisord will containue in background even if parent proc is ends (aka daemon mode)
time . sleep ( 2 )
return get_existing_supervisord_process ( )
2024-09-25 05:22:03 +00:00
def get_or_create_supervisord_process ( daemonize = False ) :
2024-10-21 07:35:25 +00:00
SOCK_FILE = get_sock_file ( )
WORKERS_DIR = SOCK_FILE . parent / WORKERS_DIR_NAME
2024-09-10 07:04:39 +00:00
supervisor = get_existing_supervisord_process ( )
if supervisor is None :
stop_existing_supervisord_process ( )
supervisor = start_new_supervisord_process ( daemonize = daemonize )
2024-10-03 10:43:02 +00:00
time . sleep ( 0.5 )
2024-09-10 07:04:39 +00:00
2024-10-21 07:35:25 +00:00
# wait up to 5s in case supervisord is slow to start
if not supervisor :
for _ in range ( 10 ) :
if supervisor is not None :
print ( )
break
sys . stdout . write ( ' . ' )
sys . stdout . flush ( )
time . sleep ( 0.5 )
supervisor = get_existing_supervisord_process ( )
else :
print ( )
2024-10-03 10:43:02 +00:00
assert supervisor , " Failed to start supervisord or connect to it! "
supervisor . getPID ( ) # make sure it doesn't throw an exception
2024-10-21 07:35:25 +00:00
( WORKERS_DIR / ' initial_startup.conf ' ) . unlink ( missing_ok = True )
2024-10-03 10:43:02 +00:00
2024-09-10 07:04:39 +00:00
return supervisor
def start_worker ( supervisor , daemon , lazy = False ) :
assert supervisor . getPID ( )
print ( f " [🦸♂️] Supervisord starting new subprocess worker: { daemon [ ' name ' ] } ... " )
create_worker_config ( daemon )
result = supervisor . reloadConfig ( )
added , changed , removed = result [ 0 ]
# print(f"Added: {added}, Changed: {changed}, Removed: {removed}")
for removed in removed :
supervisor . stopProcessGroup ( removed )
supervisor . removeProcessGroup ( removed )
for changed in changed :
supervisor . stopProcessGroup ( changed )
supervisor . removeProcessGroup ( changed )
supervisor . addProcessGroup ( changed )
for added in added :
supervisor . addProcessGroup ( added )
time . sleep ( 1 )
for _ in range ( 10 ) :
procs = supervisor . getAllProcessInfo ( )
for proc in procs :
if proc [ ' name ' ] == daemon [ " name " ] :
# See process state diagram here: http://supervisord.org/subprocess.html
if proc [ ' statename ' ] == ' RUNNING ' :
print ( f " - Worker { daemon [ ' name ' ] } : already { proc [ ' statename ' ] } ( { proc [ ' description ' ] } ) " )
return proc
else :
if not lazy :
supervisor . startProcessGroup ( daemon [ " name " ] , True )
proc = supervisor . getProcessInfo ( daemon [ " name " ] )
print ( f " - Worker { daemon [ ' name ' ] } : started { proc [ ' statename ' ] } ( { proc [ ' description ' ] } ) " )
return proc
# retry in a second in case it's slow to launch
time . sleep ( 0.5 )
raise Exception ( f " Failed to start worker { daemon [ ' name ' ] } ! Only found: { procs } " )
2024-10-24 22:41:43 +00:00
def get_worker ( supervisor , daemon_name ) :
try :
return supervisor . getProcessInfo ( daemon_name )
except Exception :
pass
return None
def stop_worker ( supervisor , daemon_name ) :
proc = get_worker ( supervisor , daemon_name )
2024-09-10 07:04:39 +00:00
2024-10-24 22:41:43 +00:00
for _ in range ( 10 ) :
if not proc :
# worker does not exist (was never running or configured in the first place)
return True
# See process state diagram here: http://supervisord.org/subprocess.html
2024-09-10 07:04:39 +00:00
if proc [ ' statename ' ] == ' STOPPED ' :
2024-10-24 22:41:43 +00:00
# worker was configured but has already stopped for some reason
supervisor . removeProcessGroup ( daemon_name )
return True
else :
# worker was configured and is running, stop it now
supervisor . stopProcessGroup ( daemon_name )
2024-09-10 07:04:39 +00:00
2024-10-24 22:41:43 +00:00
# wait 500ms and then re-check to make sure it's really stopped
time . sleep ( 0.5 )
proc = get_worker ( supervisor , daemon_name )
raise Exception ( f " Failed to stop worker { daemon_name } ! " )
2024-09-10 07:04:39 +00:00
2024-09-25 04:17:51 +00:00
def tail_worker_logs ( log_path : str ) :
2024-09-25 05:22:03 +00:00
get_or_create_supervisord_process ( daemonize = False )
2024-09-25 04:17:51 +00:00
from rich . live import Live
from rich . table import Table
table = Table ( )
table . add_column ( " TS " )
table . add_column ( " URL " )
try :
with Live ( table , refresh_per_second = 1 ) as live : # update 4 times a second to feel fluid
with open ( log_path , ' r ' ) as f :
for line in follow ( f ) :
if ' :// ' in line :
live . console . print ( f " Working on: { line . strip ( ) } " )
2024-10-21 07:35:25 +00:00
# table.add_row("123124234", line.strip())
except ( KeyboardInterrupt , BrokenPipeError , IOError ) :
STDERR . print ( " \n [🛑] Got Ctrl+C, stopping gracefully... " )
2024-09-25 04:17:51 +00:00
except SystemExit :
pass
2024-09-10 07:04:39 +00:00
2024-10-24 22:41:43 +00:00
def watch_worker ( supervisor , daemon_name , interval = 5 ) :
""" loop continuously and monitor worker ' s health """
while True :
proc = get_worker ( supervisor , daemon_name )
2024-09-10 07:04:39 +00:00
if not proc :
2024-10-24 22:41:43 +00:00
raise Exception ( " Worker dissapeared while running! " + daemon_name )
2024-09-10 07:04:39 +00:00
2024-10-24 22:41:43 +00:00
if proc [ ' statename ' ] == ' STOPPED ' :
return proc
2024-09-10 07:04:39 +00:00
2024-10-24 22:41:43 +00:00
if proc [ ' statename ' ] == ' RUNNING ' :
time . sleep ( 1 )
continue
2024-09-10 07:04:39 +00:00
2024-10-24 22:41:43 +00:00
if proc [ ' statename ' ] in ( ' STARTING ' , ' BACKOFF ' , ' FATAL ' , ' EXITED ' , ' STOPPING ' ) :
print ( f ' [🦸♂️] WARNING: Worker { daemon_name } { proc [ " statename " ] } { proc [ " description " ] } ' )
time . sleep ( interval )
continue
2024-09-25 04:17:51 +00:00
2024-10-03 02:46:48 +00:00
def start_server_workers ( host = ' 0.0.0.0 ' , port = ' 8000 ' , daemonize = False ) :
supervisor = get_or_create_supervisord_process ( daemonize = daemonize )
2024-09-25 04:17:51 +00:00
bg_workers = [
2024-10-24 22:41:43 +00:00
SCHEDULER_WORKER ,
COMMAND_WORKER ,
2024-11-16 10:47:51 +00:00
ORCHESTRATOR_WORKER ,
2024-09-25 04:17:51 +00:00
]
2024-09-25 05:22:03 +00:00
print ( )
2024-11-16 10:47:51 +00:00
start_worker ( supervisor , SERVER_WORKER ( host = host , port = port ) )
2024-09-25 04:17:51 +00:00
print ( )
for worker in bg_workers :
start_worker ( supervisor , worker )
print ( )
2024-10-03 02:46:48 +00:00
if not daemonize :
try :
watch_worker ( supervisor , " worker_daphne " )
2024-10-21 07:35:25 +00:00
except ( KeyboardInterrupt , BrokenPipeError , IOError ) :
STDERR . print ( " \n [🛑] Got Ctrl+C, stopping gracefully... " )
2024-10-03 02:46:48 +00:00
except SystemExit :
pass
except BaseException as e :
2024-10-21 07:35:25 +00:00
STDERR . print ( f " \n [🛑] Got { e . __class__ . __name__ } exception, stopping web server gracefully... " )
2024-10-03 02:46:48 +00:00
raise
finally :
stop_worker ( supervisor , " worker_daphne " )
time . sleep ( 0.5 )
2024-09-25 04:17:51 +00:00
def start_cli_workers ( watch = False ) :
supervisor = get_or_create_supervisord_process ( daemonize = False )
2024-11-16 10:47:51 +00:00
start_worker ( supervisor , COMMAND_WORKER )
start_worker ( supervisor , ORCHESTRATOR_WORKER )
2024-09-25 04:17:51 +00:00
if watch :
try :
2024-11-16 10:47:51 +00:00
watch_worker ( supervisor , ORCHESTRATOR_WORKER [ ' name ' ] )
2024-10-21 07:35:25 +00:00
except ( KeyboardInterrupt , BrokenPipeError , IOError ) :
STDERR . print ( " \n [🛑] Got Ctrl+C, stopping gracefully... " )
2024-09-25 04:17:51 +00:00
except SystemExit :
pass
except BaseException as e :
2024-10-21 07:35:25 +00:00
STDERR . print ( f " \n [🛑] Got { e . __class__ . __name__ } exception, stopping web server gracefully... " )
2024-09-25 04:17:51 +00:00
raise
finally :
2024-11-16 10:47:51 +00:00
stop_worker ( supervisor , COMMAND_WORKER [ ' name ' ] )
stop_worker ( supervisor , ORCHESTRATOR_WORKER [ ' name ' ] )
2024-09-25 04:17:51 +00:00
time . sleep ( 0.5 )
2024-11-16 10:47:51 +00:00
return [ COMMAND_WORKER , ORCHESTRATOR_WORKER ]
2024-09-25 04:17:51 +00:00
2024-09-25 05:22:03 +00:00
# def main(daemons):
# supervisor = get_or_create_supervisord_process(daemonize=False)
2024-09-10 07:04:39 +00:00
2024-09-25 05:22:03 +00:00
# worker = start_worker(supervisor, daemons["webworker"])
# pprint(worker)
2024-09-10 07:04:39 +00:00
2024-09-25 05:22:03 +00:00
# print("All processes started in background.")
2024-09-10 07:04:39 +00:00
# Optionally you can block the main thread until an exit signal is received:
# try:
# signal.pause()
# except KeyboardInterrupt:
# pass
# finally:
# stop_existing_supervisord_process()
# if __name__ == "__main__":
# DAEMONS = {
# "webworker": {
# "name": "webworker",
# "command": "python3 -m http.server 9000",
# "directory": str(cwd),
# "autostart": "true",
# "autorestart": "true",
# "stdout_logfile": cwd / "webworker.log",
# "stderr_logfile": cwd / "webworker_error.log",
# },
# }
# main(DAEMONS, cwd)