2019-04-03 04:27:37 +00:00
|
|
|
__package__ = 'archivebox.cli'
|
2020-07-02 07:53:39 +00:00
|
|
|
__command__ = 'archivebox'
|
2019-04-03 04:27:37 +00:00
|
|
|
|
2020-07-02 08:00:51 +00:00
|
|
|
import sys
|
2020-07-02 07:53:39 +00:00
|
|
|
import argparse
|
2024-05-12 08:42:20 +00:00
|
|
|
import threading
|
2024-09-25 02:04:38 +00:00
|
|
|
|
2024-05-12 08:42:20 +00:00
|
|
|
from time import sleep
|
2024-09-25 02:04:38 +00:00
|
|
|
from collections.abc import Mapping
|
2020-07-02 07:53:39 +00:00
|
|
|
|
2024-09-25 02:04:38 +00:00
|
|
|
from typing import Optional, List, IO, Union, Iterable
|
2020-09-30 19:43:14 +00:00
|
|
|
from pathlib import Path
|
2020-07-02 07:53:39 +00:00
|
|
|
|
2024-09-30 22:59:05 +00:00
|
|
|
from archivebox.config import DATA_DIR
|
2024-10-01 06:19:11 +00:00
|
|
|
from archivebox.misc.logging import stderr
|
2019-04-19 01:09:54 +00:00
|
|
|
|
2019-04-03 04:27:37 +00:00
|
|
|
from importlib import import_module
|
|
|
|
|
2024-05-12 08:42:20 +00:00
|
|
|
BUILTIN_LIST = list
|
|
|
|
|
2020-09-30 19:43:14 +00:00
|
|
|
CLI_DIR = Path(__file__).resolve().parent
|
2019-04-03 04:27:37 +00:00
|
|
|
|
2024-10-01 06:19:11 +00:00
|
|
|
# rewrite setup -> install for backwards compatibility
|
|
|
|
if sys.argv[1] == 'setup':
|
|
|
|
sys.argv[1] = 'install'
|
|
|
|
|
2020-12-08 23:05:37 +00:00
|
|
|
|
2024-09-25 02:04:38 +00:00
|
|
|
# def list_subcommands() -> Dict[str, str]:
|
|
|
|
# """find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
|
|
|
|
# COMMANDS = []
|
|
|
|
# for filename in os.listdir(CLI_DIR):
|
|
|
|
# if is_cli_module(filename):
|
|
|
|
# subcommand = filename.replace('archivebox_', '').replace('.py', '')
|
|
|
|
# module = import_module('.archivebox_{}'.format(subcommand), __package__)
|
|
|
|
# assert is_valid_cli_module(module, subcommand)
|
|
|
|
# COMMANDS.append((subcommand, module.main.__doc__))
|
|
|
|
# globals()[subcommand] = module.main
|
|
|
|
# display_order = lambda cmd: (
|
|
|
|
# display_first.index(cmd[0])
|
|
|
|
# if cmd[0] in display_first else
|
|
|
|
# 100 + len(cmd[0])
|
|
|
|
# )
|
|
|
|
# return dict(sorted(COMMANDS, key=display_order))
|
|
|
|
|
|
|
|
# just define it statically, it's much faster:
|
|
|
|
SUBCOMMAND_MODULES = {
|
|
|
|
'help': 'archivebox_help',
|
|
|
|
'version': 'archivebox_version' ,
|
|
|
|
|
|
|
|
'init': 'archivebox_init',
|
|
|
|
'config': 'archivebox_config',
|
2024-10-01 06:19:11 +00:00
|
|
|
'install': 'archivebox_install',
|
2024-09-25 02:04:38 +00:00
|
|
|
|
|
|
|
'add': 'archivebox_add',
|
|
|
|
'remove': 'archivebox_remove',
|
|
|
|
'update': 'archivebox_update',
|
|
|
|
'list': 'archivebox_list',
|
|
|
|
'status': 'archivebox_status',
|
|
|
|
|
|
|
|
'schedule': 'archivebox_schedule',
|
|
|
|
'server': 'archivebox_server',
|
|
|
|
'shell': 'archivebox_shell',
|
|
|
|
'manage': 'archivebox_manage',
|
|
|
|
|
|
|
|
'oneshot': 'archivebox_oneshot',
|
|
|
|
}
|
2019-04-03 04:27:37 +00:00
|
|
|
|
2019-04-19 01:09:54 +00:00
|
|
|
# every imported command module must have these properties in order to be valid
|
|
|
|
required_attrs = ('__package__', '__command__', 'main')
|
2019-04-03 04:27:37 +00:00
|
|
|
|
2019-04-19 01:09:54 +00:00
|
|
|
# basic checks to make sure imported files are valid subcommands
|
|
|
|
is_cli_module = lambda fname: fname.startswith('archivebox_') and fname.endswith('.py')
|
|
|
|
is_valid_cli_module = lambda module, subcommand: (
|
|
|
|
all(hasattr(module, attr) for attr in required_attrs)
|
|
|
|
and module.__command__.split(' ')[-1] == subcommand
|
|
|
|
)
|
2019-04-11 10:59:14 +00:00
|
|
|
|
2024-09-25 02:04:38 +00:00
|
|
|
class LazySubcommands(Mapping):
|
|
|
|
def keys(self):
|
|
|
|
return SUBCOMMAND_MODULES.keys()
|
|
|
|
|
|
|
|
def values(self):
|
|
|
|
return [self[key] for key in self.keys()]
|
|
|
|
|
|
|
|
def items(self):
|
|
|
|
return [(key, self[key]) for key in self.keys()]
|
|
|
|
|
|
|
|
def __getitem__(self, key):
|
|
|
|
module = import_module(f'.{SUBCOMMAND_MODULES[key]}', __package__)
|
|
|
|
assert is_valid_cli_module(module, key)
|
|
|
|
return module.main
|
|
|
|
|
|
|
|
def __iter__(self):
|
|
|
|
return iter(SUBCOMMAND_MODULES.keys())
|
|
|
|
|
|
|
|
def __len__(self):
|
|
|
|
return len(SUBCOMMAND_MODULES)
|
|
|
|
|
|
|
|
CLI_SUBCOMMANDS = LazySubcommands()
|
|
|
|
|
|
|
|
|
|
|
|
# these common commands will appear sorted before any others for ease-of-use
|
|
|
|
meta_cmds = ('help', 'version') # dont require valid data folder at all
|
2024-10-01 06:19:11 +00:00
|
|
|
main_cmds = ('init', 'config', 'setup', 'install') # dont require existing db present
|
2024-09-25 02:04:38 +00:00
|
|
|
archive_cmds = ('add', 'remove', 'update', 'list', 'status') # require existing db present
|
|
|
|
fake_db = ("oneshot",) # use fake in-memory db
|
|
|
|
|
|
|
|
display_first = (*meta_cmds, *main_cmds, *archive_cmds)
|
|
|
|
|
2020-07-02 07:53:39 +00:00
|
|
|
|
2024-09-10 07:04:39 +00:00
|
|
|
IGNORED_BG_THREADS = ('MainThread', 'ThreadPoolExecutor', 'IPythonHistorySavingThread', 'Scheduler') # threads we dont have to wait for before exiting
|
2024-05-13 02:25:55 +00:00
|
|
|
|
|
|
|
|
|
|
|
def wait_for_bg_threads_to_exit(thread_names: Iterable[str]=(), ignore_names: Iterable[str]=IGNORED_BG_THREADS, timeout: int=60) -> int:
|
2024-05-12 08:42:20 +00:00
|
|
|
"""
|
|
|
|
Block until the specified threads exit. e.g. pass thread_names=('default_hook_handler',) to wait for webhooks.
|
|
|
|
Useful for waiting for signal handlers, webhooks, etc. to finish running after a mgmt command completes.
|
|
|
|
"""
|
|
|
|
|
|
|
|
wait_for_all: bool = thread_names == ()
|
|
|
|
|
|
|
|
thread_matches = lambda thread, ptns: any(ptn in repr(thread) for ptn in ptns)
|
|
|
|
|
|
|
|
should_wait = lambda thread: (
|
|
|
|
not thread_matches(thread, ignore_names)
|
|
|
|
and (wait_for_all or thread_matches(thread, thread_names)))
|
|
|
|
|
|
|
|
for tries in range(timeout):
|
|
|
|
all_threads = [*threading.enumerate()]
|
|
|
|
blocking_threads = [*filter(should_wait, all_threads)]
|
|
|
|
threads_summary = ', '.join(repr(t) for t in blocking_threads)
|
|
|
|
if blocking_threads:
|
|
|
|
sleep(1)
|
|
|
|
if tries == 5: # only show stderr message if we need to wait more than 5s
|
|
|
|
stderr(
|
|
|
|
f'[…] Waiting up to {timeout}s for background jobs (e.g. webhooks) to finish...',
|
|
|
|
threads_summary,
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
return tries
|
|
|
|
|
2024-09-06 04:41:49 +00:00
|
|
|
raise Exception(f'Background threads failed to exit after {tries}s: {threads_summary}')
|
2024-05-12 08:42:20 +00:00
|
|
|
|
|
|
|
|
2019-04-03 04:27:37 +00:00
|
|
|
|
2019-04-27 21:26:24 +00:00
|
|
|
def run_subcommand(subcommand: str,
|
2024-09-25 02:04:38 +00:00
|
|
|
subcommand_args: List[str] | None = None,
|
2019-04-27 21:26:24 +00:00
|
|
|
stdin: Optional[IO]=None,
|
2020-10-30 08:50:08 +00:00
|
|
|
pwd: Union[Path, str, None]=None) -> None:
|
2019-05-01 03:10:48 +00:00
|
|
|
"""Run a given ArchiveBox subcommand with the given list of args"""
|
2020-12-08 23:42:01 +00:00
|
|
|
|
2021-04-06 01:13:49 +00:00
|
|
|
subcommand_args = subcommand_args or []
|
|
|
|
|
2020-12-08 23:42:01 +00:00
|
|
|
if subcommand not in meta_cmds:
|
2024-09-30 22:59:05 +00:00
|
|
|
from ..config.legacy import setup_django, CONFIG
|
2021-03-01 03:53:23 +00:00
|
|
|
|
|
|
|
cmd_requires_db = subcommand in archive_cmds
|
|
|
|
init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
|
|
|
|
|
2021-04-01 07:30:53 +00:00
|
|
|
if cmd_requires_db:
|
2024-09-25 02:04:38 +00:00
|
|
|
check_data_folder(CONFIG)
|
2021-04-01 07:30:53 +00:00
|
|
|
|
2021-03-01 03:53:23 +00:00
|
|
|
setup_django(in_memory_db=subcommand in fake_db, check_db=cmd_requires_db and not init_pending)
|
2019-04-03 04:27:37 +00:00
|
|
|
|
2021-04-01 07:30:53 +00:00
|
|
|
if cmd_requires_db:
|
2024-09-25 02:04:38 +00:00
|
|
|
check_migrations(CONFIG)
|
2021-04-01 07:30:53 +00:00
|
|
|
|
2019-04-03 04:27:37 +00:00
|
|
|
module = import_module('.archivebox_{}'.format(subcommand), __package__)
|
2019-04-27 21:26:24 +00:00
|
|
|
module.main(args=subcommand_args, stdin=stdin, pwd=pwd) # type: ignore
|
2019-04-19 01:09:54 +00:00
|
|
|
|
2024-05-12 08:42:20 +00:00
|
|
|
# wait for webhooks, signals, and other background jobs to finish before exit
|
|
|
|
wait_for_bg_threads_to_exit(timeout=60)
|
|
|
|
|
2019-04-19 01:09:54 +00:00
|
|
|
|
2024-09-25 02:04:38 +00:00
|
|
|
|
|
|
|
|
2019-04-19 01:09:54 +00:00
|
|
|
|
2020-07-02 17:31:05 +00:00
|
|
|
class NotProvided:
|
2024-09-25 02:04:38 +00:00
|
|
|
def __len__(self):
|
|
|
|
return 0
|
|
|
|
def __bool__(self):
|
|
|
|
return False
|
|
|
|
def __repr__(self):
|
|
|
|
return '<not provided>'
|
|
|
|
|
|
|
|
Omitted = Union[None, NotProvided]
|
2020-07-02 17:31:05 +00:00
|
|
|
|
2024-09-25 02:04:38 +00:00
|
|
|
OMITTED = NotProvided()
|
2020-07-02 17:31:05 +00:00
|
|
|
|
2020-07-02 07:53:39 +00:00
|
|
|
|
2024-09-25 02:04:38 +00:00
|
|
|
def main(args: List[str] | Omitted=OMITTED, stdin: IO | Omitted=OMITTED, pwd: str | None=None) -> None:
|
|
|
|
# print('STARTING CLI MAIN ENTRYPOINT')
|
|
|
|
|
|
|
|
args = sys.argv[1:] if args is OMITTED else args
|
|
|
|
stdin = sys.stdin if stdin is OMITTED else stdin
|
|
|
|
|
2020-07-02 07:53:39 +00:00
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
prog=__command__,
|
|
|
|
description='ArchiveBox: The self-hosted internet archive',
|
|
|
|
add_help=False,
|
|
|
|
)
|
|
|
|
group = parser.add_mutually_exclusive_group()
|
|
|
|
group.add_argument(
|
|
|
|
'--help', '-h',
|
|
|
|
action='store_true',
|
2024-09-25 02:04:38 +00:00
|
|
|
help=CLI_SUBCOMMANDS['help'].__doc__,
|
2020-07-02 07:53:39 +00:00
|
|
|
)
|
|
|
|
group.add_argument(
|
|
|
|
'--version',
|
|
|
|
action='store_true',
|
2024-09-25 02:04:38 +00:00
|
|
|
help=CLI_SUBCOMMANDS['version'].__doc__,
|
2020-07-02 07:53:39 +00:00
|
|
|
)
|
|
|
|
group.add_argument(
|
|
|
|
"subcommand",
|
|
|
|
type=str,
|
|
|
|
help= "The name of the subcommand to run",
|
|
|
|
nargs='?',
|
2024-09-25 02:04:38 +00:00
|
|
|
choices=CLI_SUBCOMMANDS.keys(),
|
2020-07-02 07:53:39 +00:00
|
|
|
default=None,
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"subcommand_args",
|
|
|
|
help="Arguments for the subcommand",
|
|
|
|
nargs=argparse.REMAINDER,
|
|
|
|
)
|
|
|
|
command = parser.parse_args(args or ())
|
|
|
|
|
2020-08-18 19:58:05 +00:00
|
|
|
if command.version:
|
2020-07-02 07:53:39 +00:00
|
|
|
command.subcommand = 'version'
|
2020-08-18 19:58:05 +00:00
|
|
|
elif command.help or command.subcommand is None:
|
|
|
|
command.subcommand = 'help'
|
|
|
|
|
2020-07-13 15:26:30 +00:00
|
|
|
if command.subcommand not in ('help', 'version', 'status'):
|
2020-07-22 16:02:13 +00:00
|
|
|
from ..logging_util import log_cli_command
|
2020-07-13 15:26:30 +00:00
|
|
|
|
|
|
|
log_cli_command(
|
|
|
|
subcommand=command.subcommand,
|
|
|
|
subcommand_args=command.subcommand_args,
|
2024-09-25 02:04:38 +00:00
|
|
|
stdin=stdin or None,
|
2024-09-30 22:59:05 +00:00
|
|
|
pwd=pwd or DATA_DIR,
|
2020-07-13 15:26:30 +00:00
|
|
|
)
|
2020-07-02 07:53:39 +00:00
|
|
|
|
|
|
|
run_subcommand(
|
|
|
|
subcommand=command.subcommand,
|
|
|
|
subcommand_args=command.subcommand_args,
|
2024-09-25 02:04:38 +00:00
|
|
|
stdin=stdin or None,
|
2024-09-30 22:59:05 +00:00
|
|
|
pwd=pwd or DATA_DIR,
|
2020-07-02 07:53:39 +00:00
|
|
|
)
|