move main funcs into cli files and switch to using click for CLI

2024-11-21 11:43:07 +00:00 · 2024-11-19 00:18:51 -08:00 · 2024-11-19 00:18:51 -08:00 · 328eb98a38
commit 328eb98a38
parent 569081a9eb
35 changed files with 1885 additions and 2296 deletions
--- a/archivebox/init.py
+++ b/archivebox/init.py
@ -51,6 +51,7 @@ from .pkgs import load_vendored_pkgs             # noqa
 load_vendored_pkgs()
 # print('DONE LOADING VENDORED LIBRARIES')

+# print('LOADING ABX PLUGIN SPECIFICATIONS')
 # Load ABX Plugin Specifications + Default Implementations
 import abx                                       # noqa
 import abx_spec_archivebox                       # noqa
@ -74,7 +75,7 @@ abx.pm.register(abx_spec_searchbackend.PLUGIN_SPEC())
 # Cast to ArchiveBoxPluginSpec to enable static type checking of pm.hook.call() methods
 abx.pm = cast(abx.ABXPluginManager[abx_spec_archivebox.ArchiveBoxPluginSpec], abx.pm)
 pm = abx.pm
-
+# print('DONE LOADING ABX PLUGIN SPECIFICATIONS')

 # Load all pip-installed ABX-compatible plugins
 ABX_ECOSYSTEM_PLUGINS = abx.get_pip_installed_plugins(group='abx')
@ -94,7 +95,9 @@ USER_PLUGINS = abx.find_plugins_in_dir(Path(os.getcwd()) / 'user_plugins')

 # Import all plugins and register them with ABX Plugin Manager
 ALL_PLUGINS = {**ABX_ECOSYSTEM_PLUGINS, **ARCHIVEBOX_BUILTIN_PLUGINS, **USER_PLUGINS}
+# print('LOADING ALL PLUGINS')
 LOADED_PLUGINS = abx.load_plugins(ALL_PLUGINS)
+# print('DONE LOADING ALL PLUGINS')

 # Setup basic config, constants, paths, and version
 from .config.constants import CONSTANTS                         # noqa
--- a/archivebox/main.py
+++ b/archivebox/main.py
@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-"""This is the main entry point for the ArchiveBox CLI."""
+"""This is the entrypoint for python -m archivebox ..."""
 __package__ = 'archivebox'

 import archivebox      # noqa # make sure monkey patches are applied before anything else
@ -15,5 +15,4 @@ ASCII_LOGO_MINI = r"""
 /_/   \_\_|  \___|_| |_|_| \_/ \___|____/ \___/_/\_\
 """

-if __name__ == '__main__':
 main(args=sys.argv[1:], stdin=sys.stdin)
--- a/archivebox/api/v1_cli.py
+++ b/archivebox/api/v1_cli.py
@ -6,13 +6,6 @@ from enum import Enum

 from ninja import Router, Schema

-from archivebox.main import (
-    add,
-    remove,
-    update,
-    list_all,
-    schedule,
-)
 from archivebox.misc.util import ansi_to_html
 from archivebox.config.common import ARCHIVING_CONFIG

@ -60,13 +53,11 @@ class AddCommandSchema(Schema):
    urls: List[str]
    tag: str = ""
    depth: int = 0
-    update: bool = not ARCHIVING_CONFIG.ONLY_NEW  # Default to the opposite of ARCHIVING_CONFIG.ONLY_NEW
-    update_all: bool = False
-    index_only: bool = False
-    overwrite: bool = False
-    init: bool = False
-    extractors: str = ""
    parser: str = "auto"
+    extract: str = ""
+    update: bool = not ARCHIVING_CONFIG.ONLY_NEW  # Default to the opposite of ARCHIVING_CONFIG.ONLY_NEW
+    overwrite: bool = False
+    index_only: bool = False

 class UpdateCommandSchema(Schema):
    resume: Optional[float] = 0
@ -93,7 +84,7 @@ class ScheduleCommandSchema(Schema):
 class ListCommandSchema(Schema):
    filter_patterns: Optional[List[str]] = ['https://example.com']
    filter_type: str = FilterTypeChoices.substring
-    status: Optional[StatusChoices] = StatusChoices.indexed
+    status: StatusChoices = StatusChoices.indexed
    after: Optional[float] = 0
    before: Optional[float] = 999999999999999
    sort: str = 'bookmarked_at'
@ -115,16 +106,16 @@ class RemoveCommandSchema(Schema):

@router.post("/add", response=CLICommandResponseSchema, summary='archivebox add [args] [urls]')
 def cli_add(request, args: AddCommandSchema):
+    from archivebox.cli.archivebox_add import add
+    
    result = add(
        urls=args.urls,
        tag=args.tag,
        depth=args.depth,
        update=args.update,
-        update_all=args.update_all,
        index_only=args.index_only,
        overwrite=args.overwrite,
-        init=args.init,
-        extractors=args.extractors,
+        extract=args.extract,
        parser=args.parser,
    )

@ -139,6 +130,8 @@ def cli_add(request, args: AddCommandSchema):

@router.post("/update", response=CLICommandResponseSchema, summary='archivebox update [args] [filter_patterns]')
 def cli_update(request, args: UpdateCommandSchema):
+    from archivebox.cli.archivebox_update import update
+    
    result = update(
        resume=args.resume,
        only_new=args.only_new,
@ -162,6 +155,8 @@ def cli_update(request, args: UpdateCommandSchema):

@router.post("/schedule", response=CLICommandResponseSchema, summary='archivebox schedule [args] [import_path]')
 def cli_schedule(request, args: ScheduleCommandSchema):
+    from archivebox.cli.archivebox_schedule import schedule
+    
    result = schedule(
        import_path=args.import_path,
        add=args.add,
@ -184,9 +179,11 @@ def cli_schedule(request, args: ScheduleCommandSchema):



-@router.post("/list", response=CLICommandResponseSchema, summary='archivebox list [args] [filter_patterns] (use this endpoint with ?filter_type=search to search for snapshots)')
-def cli_list(request, args: ListCommandSchema):
-    result = list_all(
+@router.post("/search", response=CLICommandResponseSchema, summary='archivebox search [args] [filter_patterns]')
+def cli_search(request, args: ListCommandSchema):
+    from archivebox.cli.archivebox_search import search
+    
+    result = search(
        filter_patterns=args.filter_patterns,
        filter_type=args.filter_type,
        status=args.status,
@ -221,6 +218,8 @@ def cli_list(request, args: ListCommandSchema):

@router.post("/remove", response=CLICommandResponseSchema, summary='archivebox remove [args] [filter_patterns]')
 def cli_remove(request, args: RemoveCommandSchema):
+    from archivebox.cli.archivebox_remove import remove
+    
    result = remove(
        yes=True,            # no way to interactively ask for confirmation via API, so we force yes
        delete=args.delete,
--- a/archivebox/cli/init.py
+++ b/archivebox/cli/init.py
@ -1,264 +1,117 @@
 __package__ = 'archivebox.cli'
 __command__ = 'archivebox'
-
 import os
 import sys
-import argparse
-import threading
-
-from time import sleep
-from collections.abc import Mapping
-
-from rich import print
-
-from typing import Optional, List, IO, Union, Iterable
-from pathlib import Path
-
 from importlib import import_module

-BUILTIN_LIST = list
-
-CLI_DIR = Path(__file__).resolve().parent
-
-# rewrite setup -> install for backwards compatibility
-if len(sys.argv) > 1 and sys.argv[1] == 'setup':
+import rich_click as click
 from rich import print
-    print(':warning: [bold red]DEPRECATED[/bold red] `archivebox setup` is deprecated, use `archivebox install` instead')
-    sys.argv[1] = 'install'
+
+from archivebox.config.version import VERSION
+
+

 if '--debug' in sys.argv:
    os.environ['DEBUG'] = 'True'
    sys.argv.remove('--debug')


-# def list_subcommands() -> Dict[str, str]:
-#     """find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
-#     COMMANDS = []
-#     for filename in os.listdir(CLI_DIR):
-#         if is_cli_module(filename):
-#             subcommand = filename.replace('archivebox_', '').replace('.py', '')
-#             module = import_module('.archivebox_{}'.format(subcommand), __package__)
-#             assert is_valid_cli_module(module, subcommand)
-#             COMMANDS.append((subcommand, module.main.__doc__))
-#             globals()[subcommand] = module.main
-#     display_order = lambda cmd: (
-#         display_first.index(cmd[0])
-#         if cmd[0] in display_first else
-#         100 + len(cmd[0])
-#     )
-#     return dict(sorted(COMMANDS, key=display_order))
-
-# just define it statically, it's much faster:
-SUBCOMMAND_MODULES = {
-    'help': 'archivebox_help',
-    'version': 'archivebox_version' ,
-    
-    'init': 'archivebox_init',
-    'install': 'archivebox_install',
-    ##############################################
-    'config': 'archivebox_config',
-    'add': 'archivebox_add',
-    'remove': 'archivebox_remove',
-    'update': 'archivebox_update',
-    'list': 'archivebox_list',
-    'status': 'archivebox_status',
-    
-    'schedule': 'archivebox_schedule',
-    'server': 'archivebox_server',
-    'shell': 'archivebox_shell',
-    'manage': 'archivebox_manage',
-
-    # 'oneshot': 'archivebox_oneshot',
+class ArchiveBoxGroup(click.Group):
+    """lazy loading click group for archivebox commands"""
+    meta_commands = {
+        'help': 'archivebox.cli.archivebox_help.main',
+        'version': 'archivebox.cli.archivebox_version.main',
+    }
+    setup_commands = {
+        'init': 'archivebox.cli.archivebox_init.main',
+        'install': 'archivebox.cli.archivebox_install.main',
+    }
+    archive_commands = {
+        'add': 'archivebox.cli.archivebox_add.main',
+        'remove': 'archivebox.cli.archivebox_remove.main',
+        'update': 'archivebox.cli.archivebox_update.main',
+        'search': 'archivebox.cli.archivebox_search.main',
+        'status': 'archivebox.cli.archivebox_status.main',
+        'config': 'archivebox.cli.archivebox_config.main',
+        'schedule': 'archivebox.cli.archivebox_schedule.main',
+        'server': 'archivebox.cli.archivebox_server.main',
+        'shell': 'archivebox.cli.archivebox_shell.main',
+        'manage': 'archivebox.cli.archivebox_manage.main',
+    }
+    all_subcommands = {
+        **meta_commands,
+        **setup_commands,
+        **archive_commands,
+    }
+    renamed_commands = {
+        'setup': 'install',
+        'list': 'search',
+        'import': 'add',
+        'archive': 'add',
+        'export': 'search',
    }
    
-# every imported command module must have these properties in order to be valid
-required_attrs = ('__package__', '__command__', 'main')

-# basic checks to make sure imported files are valid subcommands
-is_cli_module = lambda fname: fname.startswith('archivebox_') and fname.endswith('.py')
-is_valid_cli_module = lambda module, subcommand: (
-    all(hasattr(module, attr) for attr in required_attrs)
-    and module.__command__.split(' ')[-1] == subcommand
-)
+    def get_command(self, ctx, cmd_name):
+        # handle renamed commands
+        if cmd_name in self.renamed_commands:
+            new_name = self.renamed_commands[cmd_name]
+            print(f' [violet]Hint:[/violet] `archivebox {cmd_name}` has been renamed to `archivebox {new_name}`')
+            cmd_name = new_name
+            ctx.invoked_subcommand = cmd_name
        
-class LazySubcommands(Mapping):
-    def keys(self):
-        return SUBCOMMAND_MODULES.keys()
+        # handle lazy loading of commands
+        if cmd_name in self.all_subcommands:
+            return self._lazy_load(cmd_name)
        
-    def values(self):
-        return [self[key] for key in self.keys()]
+        # fall-back to using click's default command lookup
+        return super().get_command(ctx, cmd_name)

-    def items(self):
-        return [(key, self[key]) for key in self.keys()]
+    @classmethod
+    def _lazy_load(cls, cmd_name):
+        import_path = cls.all_subcommands[cmd_name]
+        modname, funcname = import_path.rsplit('.', 1)
        
-    def __getitem__(self, key):
-        module = import_module(f'.{SUBCOMMAND_MODULES[key]}', __package__)
-        assert is_valid_cli_module(module, key)
-        return module.main
+        # print(f'LAZY LOADING {import_path}')
+        mod = import_module(modname)
+        func = getattr(mod, funcname)
        
-    def __iter__(self):
-        return iter(SUBCOMMAND_MODULES.keys())
+        if not hasattr(func, '__doc__'):
+            raise ValueError(f'lazy loading of {import_path} failed - no docstring found on method')
        
-    def __len__(self):
-        return len(SUBCOMMAND_MODULES)
+        # if not isinstance(cmd, click.BaseCommand):
+            # raise ValueError(f'lazy loading of {import_path} failed - not a click command')
            
-CLI_SUBCOMMANDS = LazySubcommands()
+        return func


-# these common commands will appear sorted before any others for ease-of-use
-meta_cmds = ('help', 'version')                               # dont require valid data folder at all
-setup_cmds = ('init', 'setup', 'install')                      # require valid data folder, but dont require DB present in it yet
-archive_cmds = ('add', 'remove', 'update', 'list', 'status', 'schedule', 'server', 'shell', 'manage')  # require valid data folder + existing db present
-fake_db = ("oneshot",)                                        # use fake in-memory db
+@click.group(cls=ArchiveBoxGroup, invoke_without_command=True)
+@click.option('--help', '-h', is_flag=True, help='Show help')
+@click.version_option(version=VERSION, package_name='archivebox', message='%(version)s')
+@click.pass_context
+def cli(ctx, help=False):
+    """ArchiveBox: The self-hosted internet archive"""
    
-display_first = (*meta_cmds, *setup_cmds, *archive_cmds)
+    if help or ctx.invoked_subcommand is None:
+        ctx.invoke(ctx.command.get_command(ctx, 'help'))
    
-
-IGNORED_BG_THREADS = ('MainThread', 'ThreadPoolExecutor', 'IPythonHistorySavingThread', 'Scheduler')  # threads we dont have to wait for before exiting
-
-
-def wait_for_bg_threads_to_exit(thread_names: Iterable[str]=(), ignore_names: Iterable[str]=IGNORED_BG_THREADS, timeout: int=60) -> int:
-    """
-    Block until the specified threads exit. e.g. pass thread_names=('default_hook_handler',) to wait for webhooks.
-    Useful for waiting for signal handlers, webhooks, etc. to finish running after a mgmt command completes.
-    """
-
-    wait_for_all: bool = thread_names == ()
-
-    thread_matches = lambda thread, ptns: any(ptn in repr(thread) for ptn in ptns)
-
-    should_wait = lambda thread: (
-        not thread_matches(thread, ignore_names)
-        and (wait_for_all or thread_matches(thread, thread_names)))
-
-    for tries in range(timeout):
-        all_threads = [*threading.enumerate()]
-        blocking_threads = [*filter(should_wait, all_threads)]
-        threads_summary = ', '.join(repr(t) for t in blocking_threads)
-        if blocking_threads:
-            sleep(1)
-            if tries == 5:                            # only show stderr message if we need to wait more than 5s
-                print(
-                    f'[…] Waiting up to {timeout}s for background jobs (e.g. webhooks) to finish...',
-                    threads_summary,
-                    file=sys.stderr,
-                )
-        else:
-            return tries
-
-    raise Exception(f'Background threads failed to exit after {tries}s: {threads_summary}')
-
-
-
-def run_subcommand(subcommand: str,
-                   subcommand_args: List[str] | None = None,
-                   stdin: Optional[IO]=None,
-                   pwd: Union[Path, str, None]=None) -> None:
-    """Run a given ArchiveBox subcommand with the given list of args"""
-
-    subcommand_args = subcommand_args or []
-
-    from archivebox.misc.checks import check_migrations
+    if ctx.invoked_subcommand in ArchiveBoxGroup.archive_commands:
+        # print('SETUP DJANGO AND CHECK DATA FOLDER')
        from archivebox.config.django import setup_django
+        from archivebox.misc.checks import check_data_folder
+        setup_django()
+        check_data_folder()

-    # print('DATA_DIR is', DATA_DIR)
-    # print('pwd is', os.getcwd())    
-
-    cmd_requires_db = (subcommand in archive_cmds)
-    init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
-
-    check_db = cmd_requires_db and not init_pending
-
-    setup_django(in_memory_db=subcommand in fake_db, check_db=check_db)
-
-    for ignore_pattern in ('help', '-h', '--help', 'version', '--version'):
-        if ignore_pattern in sys.argv[:4]:
-            cmd_requires_db = False
-            break
-    
-    if subcommand in archive_cmds:
-        if cmd_requires_db:
-            check_migrations()
-
-    module = import_module('.archivebox_{}'.format(subcommand), __package__)
-    module.main(args=subcommand_args, stdin=stdin, pwd=pwd)    # type: ignore
-
-    # wait for webhooks, signals, and other background jobs to finish before exit
-    wait_for_bg_threads_to_exit(timeout=60)
-
-
-
-
-
-class NotProvided:
-    def __len__(self):
-        return 0
-    def __bool__(self):
-        return False
-    def __repr__(self):
-        return '<not provided>'
-
-Omitted = Union[None, NotProvided]
-
-OMITTED = NotProvided()
-
-
-def main(args: List[str] | Omitted=OMITTED, stdin: IO | Omitted=OMITTED, pwd: str | None=None) -> None:
-    # print('STARTING CLI MAIN ENTRYPOINT')
-    
-    args = sys.argv[1:] if args is OMITTED else args
-    stdin = sys.stdin if stdin is OMITTED else stdin
-
-    parser = argparse.ArgumentParser(
-        prog=__command__,
-        description='ArchiveBox: The self-hosted internet archive',
-        add_help=False,
-    )
-    group = parser.add_mutually_exclusive_group()
-    group.add_argument(
-        '--help', '-h',
-        action='store_true',
-        help=CLI_SUBCOMMANDS['help'].__doc__,
-    )
-    group.add_argument(
-        '--version',
-        action='store_true',
-        help=CLI_SUBCOMMANDS['version'].__doc__,
-    )
-    group.add_argument(
-        "subcommand",
-        type=str,
-        help= "The name of the subcommand to run",
-        nargs='?',
-        choices=CLI_SUBCOMMANDS.keys(),
-        default=None,
-    )
-    parser.add_argument(
-        "subcommand_args",
-        help="Arguments for the subcommand",
-        nargs=argparse.REMAINDER,
-    )
-    command = parser.parse_args(args or ())
-
-    if command.version:
-        command.subcommand = 'version'
-    elif command.help or command.subcommand is None:
-        command.subcommand = 'help'
-
-    if command.subcommand not in ('version',):
-        from archivebox.misc.logging_util import log_cli_command
-
-        log_cli_command(
-            subcommand=command.subcommand,
-            subcommand_args=command.subcommand_args,
-            stdin=stdin or None,
-        )
+def main(args=None, prog_name=None):
+    # show `docker run archivebox xyz` in help messages if running in docker
+    IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE', 'yes')
+    prog_name = prog_name or ('docker compose run archivebox' if IN_DOCKER else 'archivebox')

    try:
-        run_subcommand(
-            subcommand=command.subcommand,
-            subcommand_args=command.subcommand_args,
-            stdin=stdin or None,
-        )
+        cli(args=args, prog_name=prog_name)
    except KeyboardInterrupt:
        print('\n\n[red][X] Got CTRL+C. Exiting...[/red]')
+
+
+if __name__ == '__main__':
+    main()
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@ -4,10 +4,10 @@ __package__ = 'archivebox.cli'
 __command__ = 'archivebox add'

 import sys
-import argparse

-from typing import IO, TYPE_CHECKING
+from typing import TYPE_CHECKING

+import rich_click as click

 from django.utils import timezone
 from django.db.models import QuerySet
@ -18,7 +18,6 @@ from archivebox.config.common import ARCHIVING_CONFIG
 from archivebox.config.django import setup_django
 from archivebox.config.permissions import USER, HOSTNAME
 from archivebox.misc.checks import check_data_folder
-from archivebox.misc.logging_util import SmartFormatter, accept_stdin, stderr
 from archivebox.parsers import PARSERS


@ -29,22 +28,142 @@ if TYPE_CHECKING:
 ORCHESTRATOR = None


+# OLD VERSION:
+# def add(urls: Union[str, List[str]],
+#         tag: str='',
+#         depth: int=0,
+#         update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
+#         update_all: bool=False,
+#         index_only: bool=False,
+#         overwrite: bool=False,
+#         # duplicate: bool=False,  # TODO: reuse the logic from admin.py resnapshot to allow adding multiple snapshots by appending timestamp automatically
+#         init: bool=False,
+#         extractors: str="",
+#         parser: str="auto",
+#         created_by_id: int | None=None,
+#         out_dir: Path=DATA_DIR) -> List[Link]:
+#     """Add a new URL or list of URLs to your archive"""
+
+#     from core.models import Snapshot, Tag
+#     # from workers.supervisord_util import start_cli_workers, tail_worker_logs
+#     # from workers.tasks import bg_archive_link
+    
+
+#     assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
+
+#     extractors = extractors.split(",") if extractors else []
+
+#     if init:
+#         run_subcommand('init', stdin=None, pwd=out_dir)
+
+#     # Load list of links from the existing index
+#     check_data_folder()
+
+#     # worker = start_cli_workers()
+    
+#     new_links: List[Link] = []
+#     all_links = load_main_index(out_dir=out_dir)
+
+#     log_importing_started(urls=urls, depth=depth, index_only=index_only)
+#     if isinstance(urls, str):
+#         # save verbatim stdin to sources
+#         write_ahead_log = save_text_as_source(urls, filename='{ts}-import.txt', out_dir=out_dir)
+#     elif isinstance(urls, list):
+#         # save verbatim args to sources
+#         write_ahead_log = save_text_as_source('\n'.join(urls), filename='{ts}-import.txt', out_dir=out_dir)
+    
+
+#     new_links += parse_links_from_source(write_ahead_log, root_url=None, parser=parser)
+
+#     # If we're going one level deeper, download each link and look for more links
+#     new_links_depth = []
+#     if new_links and depth == 1:
+#         log_crawl_started(new_links)
+#         for new_link in new_links:
+#             try:
+#                 downloaded_file = save_file_as_source(new_link.url, filename=f'{new_link.timestamp}-crawl-{new_link.domain}.txt', out_dir=out_dir)
+#                 new_links_depth += parse_links_from_source(downloaded_file, root_url=new_link.url)
+#             except Exception as err:
+#                 stderr('[!] Failed to get contents of URL {new_link.url}', err, color='red')
+
+#     imported_links = list({link.url: link for link in (new_links + new_links_depth)}.values())
+    
+#     new_links = dedupe_links(all_links, imported_links)
+
+#     write_main_index(links=new_links, out_dir=out_dir, created_by_id=created_by_id)
+#     all_links = load_main_index(out_dir=out_dir)
+
+#     tags = [
+#         Tag.objects.get_or_create(name=name.strip(), defaults={'created_by_id': created_by_id})[0]
+#         for name in tag.split(',')
+#         if name.strip()
+#     ]
+#     if tags:
+#         for link in imported_links:
+#             snapshot = Snapshot.objects.get(url=link.url)
+#             snapshot.tags.add(*tags)
+#             snapshot.tags_str(nocache=True)
+#             snapshot.save()
+#         # print(f'    √ Tagged {len(imported_links)} Snapshots with {len(tags)} tags {tags_str}')
+
+#     if index_only:
+#         # mock archive all the links using the fake index_only extractor method in order to update their state
+#         if overwrite:
+#             archive_links(imported_links, overwrite=overwrite, methods=['index_only'], out_dir=out_dir, created_by_id=created_by_id)
+#         else:
+#             archive_links(new_links, overwrite=False, methods=['index_only'], out_dir=out_dir, created_by_id=created_by_id)
+#     else:
+#         # fully run the archive extractor methods for each link
+#         archive_kwargs = {
+#             "out_dir": out_dir,
+#             "created_by_id": created_by_id,
+#         }
+#         if extractors:
+#             archive_kwargs["methods"] = extractors
+
+#         stderr()
+
+#         ts = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
+
+#         if update:
+#             stderr(f'[*] [{ts}] Archiving + updating {len(imported_links)}/{len(all_links)}', len(imported_links), 'URLs from added set...', color='green')
+#             archive_links(imported_links, overwrite=overwrite, **archive_kwargs)
+#         elif update_all:
+#             stderr(f'[*] [{ts}] Archiving + updating {len(all_links)}/{len(all_links)}', len(all_links), 'URLs from entire library...', color='green')
+#             archive_links(all_links, overwrite=overwrite, **archive_kwargs)
+#         elif overwrite:
+#             stderr(f'[*] [{ts}] Archiving + overwriting {len(imported_links)}/{len(all_links)}', len(imported_links), 'URLs from added set...', color='green')
+#             archive_links(imported_links, overwrite=True, **archive_kwargs)
+#         elif new_links:
+#             stderr(f'[*] [{ts}] Archiving {len(new_links)}/{len(all_links)} URLs from added set...', color='green')
+#             archive_links(new_links, overwrite=False, **archive_kwargs)
+
+#     # tail_worker_logs(worker['stdout_logfile'])
+
+#     # if CAN_UPGRADE:
+#     #     hint(f"There's a new version of ArchiveBox available! Your current version is {VERSION}. You can upgrade to {VERSIONS_AVAILABLE['recommended_version']['tag_name']} ({VERSIONS_AVAILABLE['recommended_version']['html_url']}). For more on how to upgrade: https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives\n")
+
+#     return new_links
+
+
+
 def add(urls: str | list[str],
-        tag: str='',
        depth: int=0,
-        update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
-        update_all: bool=False,
-        index_only: bool=False,
-        overwrite: bool=False,
-        extractors: str="",
+        tag: str='',
        parser: str="auto",
+        extract: str="",
        persona: str='Default',
+        overwrite: bool=False,
+        update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
+        index_only: bool=False,
        bg: bool=False,
        created_by_id: int | None=None) -> QuerySet['Snapshot']:
    """Add a new URL or list of URLs to your archive"""

    global ORCHESTRATOR

+    depth = int(depth)
+
    assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'

    # 0. setup abx, django, check_data_folder
@ -56,7 +175,6 @@ def add(urls: str | list[str],
    from archivebox.base_models.models import get_or_create_system_user_pk


-    
    created_by_id = created_by_id or get_or_create_system_user_pk()
    
    # 1. save the provided urls to sources/2024-11-05__23-59-59__cli_add.txt
@ -72,7 +190,7 @@ def add(urls: str | list[str],
        'ONLY_NEW': not update,
        'INDEX_ONLY': index_only,
        'OVERWRITE': overwrite,
-        'EXTRACTORS': extractors,
+        'EXTRACTORS': extract,
        'DEFAULT_PERSONA': persona or 'Default',
    })
    # 3. create a new Crawl pointing to the Seed
@ -91,118 +209,23 @@ def add(urls: str | list[str],
    return crawl.snapshot_set.all()


-def main(args: list[str] | None=None, stdin: IO | None=None, pwd: str | None=None) -> None:
+@click.command()
+@click.option('--depth', '-d', type=click.Choice(('0', '1')), default='0', help='Recursively archive linked pages up to N hops away')
+@click.option('--tag', '-t', default='', help='Comma-separated list of tags to add to each snapshot e.g. tag1,tag2,tag3')
+@click.option('--parser', type=click.Choice(['auto', *PARSERS.keys()]), default='auto', help='Parser for reading input URLs')
+@click.option('--extract', '-e', default='', help='Comma-separated list of extractors to use e.g. title,favicon,screenshot,singlefile,...')
+@click.option('--persona', default='Default', help='Authentication profile to use when archiving')
+@click.option('--overwrite', '-F', is_flag=True, help='Overwrite existing data if URLs have been archived previously')
+@click.option('--update', is_flag=True, default=ARCHIVING_CONFIG.ONLY_NEW, help='Retry any previously skipped/failed URLs when re-adding them')
+@click.option('--index-only', is_flag=True, help='Just add the URLs to the index without archiving them now')
+# @click.option('--update-all', is_flag=True, help='Update ALL links in index when finished adding new ones')
+@click.option('--bg', is_flag=True, help='Run crawl in background worker instead of immediately')
+@click.argument('urls', nargs=-1, type=click.Path())
+def main(**kwargs):
    """Add a new URL or list of URLs to your archive"""
-    parser = argparse.ArgumentParser(
-        prog=__command__,
-        description=add.__doc__,
-        add_help=True,
-        formatter_class=SmartFormatter,
-    )
-    parser.add_argument(
-        '--tag', '-t',
-        type=str,
-        default='',
-        help="Tag the added URLs with the provided tags e.g. --tag=tag1,tag2,tag3",
-    )
-    parser.add_argument(
-        '--update', #'-u',
-        action='store_true',
-        default=not ARCHIVING_CONFIG.ONLY_NEW,  # when ONLY_NEW=True we skip updating old links
-        help="Also retry previously skipped/failed links when adding new links",
-    )
-    parser.add_argument(
-        '--update-all', #'-n',
-        action='store_true',
-        default=False, 
-        help="Also update ALL links in index when finished adding new links",
-    )
-    parser.add_argument(
-        '--index-only', #'-o',
-        action='store_true',
-        help="Add the links to the main index without archiving them",
-    )
-    parser.add_argument(
-        'urls',
-        nargs='*',
-        type=str,
-        default=None,
-        help=(
-            'URLs or paths to archive e.g.:\n'
-            '    https://getpocket.com/users/USERNAME/feed/all\n'
-            '    https://example.com/some/rss/feed.xml\n'
-            '    https://example.com\n'
-            '    ~/Downloads/firefox_bookmarks_export.html\n'
-            '    ~/Desktop/sites_list.csv\n'
-        )
-    )
-    parser.add_argument(
-        "--depth",
-        action="store",
-        default=0,
-        choices=[0, 1],
-        type=int,
-        help="Recursively archive all linked pages up to this many hops away"
-    )
-    parser.add_argument(
-        "--overwrite",
-        default=False,
-        action="store_true",
-        help="Re-archive URLs from scratch, overwriting any existing files"
-    )
-    parser.add_argument(
-        "--extract", '-e',
-        type=str,
-        help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
-              This does not take precedence over the configuration",
-        default=""
-    )
-    parser.add_argument(
-        "--parser",
-        type=str,
-        help="Parser used to read inputted URLs.",
-        default="auto",
-        choices=["auto", *PARSERS.keys()],
-    )
-    parser.add_argument(
-        "--persona",
-        type=str,
-        help="Name of accounts persona to use when archiving.",
-        default="Default",
-    )
-    parser.add_argument(
-        "--bg",
-        default=False,
-        action="store_true",
-        help="Enqueue a background worker to complete the crawl instead of running it immediately",
-    )
-    command = parser.parse_args(args or ())
-    urls = command.urls
    
-    stdin_urls = ''
-    if not urls:
-        stdin_urls = accept_stdin(stdin)
-
-    if (stdin_urls and urls) or (not stdin and not urls):
-        stderr(
-            '[X] You must pass URLs/paths to add via stdin or CLI arguments.\n',
-            color='red',
-        )
-        raise SystemExit(2)
-    add(
-        urls=stdin_urls or urls,
-        depth=command.depth,
-        tag=command.tag,
-        update=command.update,
-        update_all=command.update_all,
-        index_only=command.index_only,
-        overwrite=command.overwrite,
-        extractors=command.extract,
-        parser=command.parser,
-        persona=command.persona,
-        bg=command.bg,
-    )
+    add(**kwargs)


 if __name__ == '__main__':
-    main(args=sys.argv[1:], stdin=sys.stdin)
+    main()
--- a/archivebox/cli/archivebox_config.py
+++ b/archivebox/cli/archivebox_config.py
@ -12,7 +12,130 @@ from typing import Optional, List, IO
 from archivebox.misc.util import docstring
 from archivebox.config import DATA_DIR
 from archivebox.misc.logging_util import SmartFormatter, accept_stdin
-from ..main import config
+
+
+
+# @enforce_types
+def config(config_options_str: Optional[str]=None,
+           config_options: Optional[List[str]]=None,
+           get: bool=False,
+           set: bool=False,
+           search: bool=False,
+           reset: bool=False,
+           out_dir: Path=DATA_DIR) -> None:
+    """Get and set your ArchiveBox project configuration values"""
+
+    from rich import print
+
+    check_data_folder()
+    if config_options and config_options_str:
+        stderr(
+            '[X] You should either pass config values as an arguments '
+            'or via stdin, but not both.\n',
+            color='red',
+        )
+        raise SystemExit(2)
+    elif config_options_str:
+        config_options = config_options_str.split('\n')
+
+    FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG()
+    CONFIGS = archivebox.pm.hook.get_CONFIGS()
+    
+    config_options = config_options or []
+
+    no_args = not (get or set or reset or config_options)
+
+    matching_config = {}
+    if search:
+        if config_options:
+            config_options = [get_real_name(key) for key in config_options]
+            matching_config = {key: FLAT_CONFIG[key] for key in config_options if key in FLAT_CONFIG}
+            for config_section in CONFIGS.values():
+                aliases = config_section.aliases
+                
+                for search_key in config_options:
+                    # search all aliases in the section
+                    for alias_key, key in aliases.items():
+                        if search_key.lower() in alias_key.lower():
+                            matching_config[key] = config_section.model_dump()[key]
+                    
+                    # search all keys and values in the section
+                    for existing_key, value in config_section.model_dump().items():
+                        if search_key.lower() in existing_key.lower() or search_key.lower() in str(value).lower():
+                            matching_config[existing_key] = value
+            
+        print(printable_config(matching_config))
+        raise SystemExit(not matching_config)
+    elif get or no_args:
+        if config_options:
+            config_options = [get_real_name(key) for key in config_options]
+            matching_config = {key: FLAT_CONFIG[key] for key in config_options if key in FLAT_CONFIG}
+            failed_config = [key for key in config_options if key not in FLAT_CONFIG]
+            if failed_config:
+                stderr()
+                stderr('[X] These options failed to get', color='red')
+                stderr('    {}'.format('\n    '.join(config_options)))
+                raise SystemExit(1)
+        else:
+            matching_config = FLAT_CONFIG
+        
+        print(printable_config(matching_config))
+        raise SystemExit(not matching_config)
+    elif set:
+        new_config = {}
+        failed_options = []
+        for line in config_options:
+            if line.startswith('#') or not line.strip():
+                continue
+            if '=' not in line:
+                stderr('[X] Config KEY=VALUE must have an = sign in it', color='red')
+                stderr(f'    {line}')
+                raise SystemExit(2)
+
+            raw_key, val = line.split('=', 1)
+            raw_key = raw_key.upper().strip()
+            key = get_real_name(raw_key)
+            if key != raw_key:
+                stderr(f'[i] Note: The config option {raw_key} has been renamed to {key}, please use the new name going forwards.', color='lightyellow')
+
+            if key in FLAT_CONFIG:
+                new_config[key] = val.strip()
+            else:
+                failed_options.append(line)
+
+        if new_config:
+            before = FLAT_CONFIG
+            matching_config = write_config_file(new_config)
+            after = {**load_all_config(), **archivebox.pm.hook.get_FLAT_CONFIG()}
+            print(printable_config(matching_config))
+
+            side_effect_changes = {}
+            for key, val in after.items():
+                if key in FLAT_CONFIG and (str(before[key]) != str(after[key])) and (key not in matching_config):
+                    side_effect_changes[key] = after[key]
+                    # import ipdb; ipdb.set_trace()
+
+            if side_effect_changes:
+                stderr()
+                stderr('[i] Note: This change also affected these other options that depended on it:', color='lightyellow')
+                print('    {}'.format(printable_config(side_effect_changes, prefix='    ')))
+        if failed_options:
+            stderr()
+            stderr('[X] These options failed to set (check for typos):', color='red')
+            stderr('    {}'.format('\n    '.join(failed_options)))
+            raise SystemExit(1)
+    elif reset:
+        stderr('[X] This command is not implemented yet.', color='red')
+        stderr('    Please manually remove the relevant lines from your config file:')
+        raise SystemExit(2)
+    else:
+        stderr('[X] You must pass either --get or --set, or no arguments to get the whole config.', color='red')
+        stderr('    archivebox config')
+        stderr('    archivebox config --get SOME_KEY')
+        stderr('    archivebox config --set SOME_KEY=SOME_VALUE')
+        raise SystemExit(2)
+
+


@docstring(config.__doc__)
--- a/archivebox/cli/archivebox_help.py
+++ b/archivebox/cli/archivebox_help.py
@ -1,32 +1,105 @@
 #!/usr/bin/env python3
-
 __package__ = 'archivebox.cli'
 __command__ = 'archivebox help'

-import sys
-import argparse
+import os    
 from pathlib import Path
-from typing import Optional, List, IO

-from archivebox.misc.util import docstring
-from archivebox.misc.logging_util import SmartFormatter, reject_stdin
-from archivebox.config import DATA_DIR
-from ..main import help
+import click
+from rich import print
+from rich.panel import Panel


-@docstring(help.__doc__)
-def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
-    parser = argparse.ArgumentParser(
-        prog=__command__,
-        description=help.__doc__,
-        add_help=True,
-        formatter_class=SmartFormatter,
+def help() -> None:
+    """Print the ArchiveBox help message and usage"""
+
+    from archivebox.cli import ArchiveBoxGroup
+    from archivebox.config import CONSTANTS
+    from archivebox.config.permissions import IN_DOCKER
+    from archivebox.misc.logging_util import log_cli_command
+    
+    log_cli_command('help', [], None, '.')
+    
+    COMMANDS_HELP_TEXT = '\n    '.join(
+        f'[green]{cmd.ljust(20)}[/green] {ArchiveBoxGroup._lazy_load(cmd).__doc__}'
+        for cmd in ArchiveBoxGroup.meta_commands.keys()
+    ) + '\n\n    ' + '\n    '.join(
+        f'[green]{cmd.ljust(20)}[/green] {ArchiveBoxGroup._lazy_load(cmd).__doc__}'
+        for cmd in ArchiveBoxGroup.setup_commands.keys()
+    ) + '\n\n    ' + '\n    '.join(
+        f'[green]{cmd.ljust(20)}[/green] {ArchiveBoxGroup._lazy_load(cmd).__doc__}'
+        for cmd in ArchiveBoxGroup.archive_commands.keys()
    )
-    parser.parse_args(args or ())
-    reject_stdin(__command__, stdin)
    
-    help(out_dir=Path(pwd) if pwd else DATA_DIR)
+    DOCKER_USAGE = '''
+[dodger_blue3]Docker Usage:[/dodger_blue3]
+    [grey53]# using Docker Compose:[/grey53]
+    [blue]docker compose run[/blue] [dark_green]archivebox[/dark_green] [green]\\[command][/green] [green3][...args][/green3] [violet][--help][/violet] [grey53][--version][/grey53]

+    [grey53]# using Docker:[/grey53]
+    [blue]docker run[/blue] -v [light_slate_blue]$PWD:/data[/light_slate_blue] [grey53]-p 8000:8000[/grey53] -it [dark_green]archivebox/archivebox[/dark_green] [green]\\[command][/green] [green3][...args][/green3] [violet][--help][/violet] [grey53][--version][/grey53]
+''' if IN_DOCKER else ''
+    DOCKER_DOCS = '\n    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Docker[/link]' if IN_DOCKER else ''
+    DOCKER_OUTSIDE_HINT = "\n    [grey53]# outside of Docker:[/grey53]" if IN_DOCKER else ''
+    DOCKER_CMD_PREFIX = "[blue]docker ... [/blue]" if IN_DOCKER else ''
+
+    print(f'''{DOCKER_USAGE}
+[deep_sky_blue4]Usage:[/deep_sky_blue4]{DOCKER_OUTSIDE_HINT}
+    [dark_green]archivebox[/dark_green] [green]\\[command][/green] [green3][...args][/green3] [violet][--help][/violet] [grey53][--version][/grey53]
+
+[deep_sky_blue4]Commands:[/deep_sky_blue4]
+    {COMMANDS_HELP_TEXT}
+
+[deep_sky_blue4]Documentation:[/deep_sky_blue4]
+    [link=https://github.com/ArchiveBox/ArchiveBox/wiki]https://github.com/ArchiveBox/ArchiveBox/wiki[/link]{DOCKER_DOCS}
+    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#cli-usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Usage[/link]
+    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration]https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration[/link]
+''')
+    
+    
+    if os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) and CONSTANTS.ARCHIVE_DIR.is_dir():
+        pretty_out_dir = str(CONSTANTS.DATA_DIR).replace(str(Path('~').expanduser()), '~')
+        EXAMPLE_USAGE = f'''
+[light_slate_blue]DATA DIR[/light_slate_blue]: [yellow]{pretty_out_dir}[/yellow]
+
+[violet]Hint:[/violet] [i]Common maintenance tasks:[/i]
+    [dark_green]archivebox[/dark_green] [green]init[/green]      [grey53]# make sure database is up-to-date (safe to run multiple times)[/grey53]
+    [dark_green]archivebox[/dark_green] [green]install[/green]   [grey53]# make sure plugins are up-to-date (wget, chrome, singlefile, etc.)[/grey53]
+    [dark_green]archivebox[/dark_green] [green]status[/green]    [grey53]# get a health checkup report on your collection[/grey53]
+    [dark_green]archivebox[/dark_green] [green]update[/green]    [grey53]# retry any previously failed or interrupted archiving tasks[/grey53]
+
+[violet]Hint:[/violet] [i]More example usage:[/i]
+    [dark_green]archivebox[/dark_green] [green]add[/green] --depth=1 "https://example.com/some/page"
+    [dark_green]archivebox[/dark_green] [green]list[/green] --sort=timestamp --csv=timestamp,downloaded_at,url,title
+    [dark_green]archivebox[/dark_green] [green]schedule[/green] --every=day --depth=1 "https://example.com/some/feed.rss"
+    [dark_green]archivebox[/dark_green] [green]server[/green] [blue]0.0.0.0:8000[/blue]                [grey53]# Start the Web UI / API server[/grey53]
+'''
+        print(Panel(EXAMPLE_USAGE, expand=False, border_style='grey53', title='[green3]:white_check_mark: A collection [light_slate_blue]DATA DIR[/light_slate_blue] is currently active[/green3]', subtitle='Commands run inside this dir will only apply to this collection.'))
+    else:
+        DATA_SETUP_HELP = '\n'
+        if IN_DOCKER:
+            DATA_SETUP_HELP += '[violet]Hint:[/violet] When using Docker, you need to mount a volume to use as your data dir:\n'
+            DATA_SETUP_HELP += '    docker run [violet]-v /some/path/data:/data[/violet] archivebox/archivebox ...\n\n'
+        DATA_SETUP_HELP += 'To load an [dark_blue]existing[/dark_blue] collection:\n'
+        DATA_SETUP_HELP += '    1. [green]cd[/green] ~/archivebox/data     [grey53]# go into existing [light_slate_blue]DATA DIR[/light_slate_blue] (can be anywhere)[/grey53]\n'
+        DATA_SETUP_HELP += f'    2. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]init[/green]          [grey53]# migrate to latest version (safe to run multiple times)[/grey53]\n'
+        DATA_SETUP_HELP += f'    3. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]install[/green]       [grey53]# auto-update all plugins (wget, chrome, singlefile, etc.)[/grey53]\n'
+        DATA_SETUP_HELP += f'    4. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]help[/green]          [grey53]# ...get help with next steps... [/grey53]\n\n'
+        DATA_SETUP_HELP += 'To start a [sea_green1]new[/sea_green1] collection:\n'
+        DATA_SETUP_HELP += '    1. [green]mkdir[/green] ~/archivebox/data  [grey53]# create a new, empty [light_slate_blue]DATA DIR[/light_slate_blue] (can be anywhere)[/grey53]\n'
+        DATA_SETUP_HELP += '    2. [green]cd[/green] ~/archivebox/data     [grey53]# cd into the new directory[/grey53]\n'
+        DATA_SETUP_HELP += f'    3. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]init[/green]          [grey53]# initialize ArchiveBox in the new data dir[/grey53]\n'
+        DATA_SETUP_HELP += f'    4. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]install[/green]       [grey53]# auto-install all plugins (wget, chrome, singlefile, etc.)[/grey53]\n'
+        DATA_SETUP_HELP += f'    5. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]help[/green]          [grey53]# ... get help with next steps... [/grey53]\n'
+        print(Panel(DATA_SETUP_HELP, expand=False, border_style='grey53', title='[red]:cross_mark: No collection is currently active[/red]', subtitle='All archivebox [green]commands[/green] should be run from inside a collection [light_slate_blue]DATA DIR[/light_slate_blue]'))
+
+
+
+@click.command()
+@click.option('--help', '-h', is_flag=True, help='Show help')
+def main(**kwargs):
+    """Print the ArchiveBox help message and usage"""
+    return help()

 if __name__ == '__main__':
-    main(args=sys.argv[1:], stdin=sys.stdin)
+    main()
--- a/archivebox/cli/archivebox_init.py
+++ b/archivebox/cli/archivebox_init.py
@ -5,13 +5,193 @@ __command__ = 'archivebox init'

 import sys
 import argparse
-
+from pathlib import Path
 from typing import Optional, List, IO

+
 from archivebox.misc.util import docstring
 from archivebox.config import DATA_DIR
 from archivebox.misc.logging_util import SmartFormatter, reject_stdin
-from ..main import init
+
+
+def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Path=DATA_DIR) -> None:
+    """Initialize a new ArchiveBox collection in the current directory"""
+    
+    from core.models import Snapshot
+    from rich import print
+    
+    # if os.access(out_dir / CONSTANTS.JSON_INDEX_FILENAME, os.F_OK):
+    #     print("[red]:warning: This folder contains a JSON index. It is deprecated, and will no longer be kept up to date automatically.[/red]", file=sys.stderr)
+    #     print("[red]    You can run `archivebox list --json --with-headers > static_index.json` to manually generate it.[/red]", file=sys.stderr)
+
+    is_empty = not len(set(os.listdir(out_dir)) - CONSTANTS.ALLOWED_IN_DATA_DIR)
+    existing_index = os.path.isfile(CONSTANTS.DATABASE_FILE)
+    if is_empty and not existing_index:
+        print(f'[turquoise4][+] Initializing a new ArchiveBox v{VERSION} collection...[/turquoise4]')
+        print('[green]----------------------------------------------------------------------[/green]')
+    elif existing_index:
+        # TODO: properly detect and print the existing version in current index as well
+        print(f'[green][*] Verifying and updating existing ArchiveBox collection to v{VERSION}...[/green]')
+        print('[green]----------------------------------------------------------------------[/green]')
+    else:
+        if force:
+            print('[red][!] This folder appears to already have files in it, but no index.sqlite3 is present.[/red]')
+            print('[red]    Because --force was passed, ArchiveBox will initialize anyway (which may overwrite existing files).[/red]')
+        else:
+            print(
+                ("[red][X] This folder appears to already have files in it, but no index.sqlite3 present.[/red]\n\n"
+                "    You must run init in a completely empty directory, or an existing data folder.\n\n"
+                "    [violet]Hint:[/violet] To import an existing data folder make sure to cd into the folder first, \n"
+                "    then run and run 'archivebox init' to pick up where you left off.\n\n"
+                "    (Always make sure your data folder is backed up first before updating ArchiveBox)"
+                )
+            )
+            raise SystemExit(2)
+
+    if existing_index:
+        print('\n[green][*] Verifying archive folder structure...[/green]')
+    else:
+        print('\n[green][+] Building archive folder structure...[/green]')
+    
+    print(f'    + ./{CONSTANTS.ARCHIVE_DIR.relative_to(DATA_DIR)}, ./{CONSTANTS.SOURCES_DIR.relative_to(DATA_DIR)}, ./{CONSTANTS.LOGS_DIR.relative_to(DATA_DIR)}...')
+    Path(CONSTANTS.SOURCES_DIR).mkdir(exist_ok=True)
+    Path(CONSTANTS.ARCHIVE_DIR).mkdir(exist_ok=True)
+    Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
+    
+    print(f'    + ./{CONSTANTS.CONFIG_FILE.relative_to(DATA_DIR)}...')
+    
+    # create the .archivebox_id file with a unique ID for this collection
+    from archivebox.config.paths import _get_collection_id
+    _get_collection_id(CONSTANTS.DATA_DIR, force_create=True)
+    
+    # create the ArchiveBox.conf file
+    write_config_file({'SECRET_KEY': SERVER_CONFIG.SECRET_KEY})
+
+
+    if os.access(CONSTANTS.DATABASE_FILE, os.F_OK):
+        print('\n[green][*] Verifying main SQL index and running any migrations needed...[/green]')
+    else:
+        print('\n[green][+] Building main SQL index and running initial migrations...[/green]')
+    
+    for migration_line in apply_migrations(out_dir):
+        sys.stdout.write(f'    {migration_line}\n')
+
+    assert os.path.isfile(CONSTANTS.DATABASE_FILE) and os.access(CONSTANTS.DATABASE_FILE, os.R_OK)
+    print()
+    print(f'    √ ./{CONSTANTS.DATABASE_FILE.relative_to(DATA_DIR)}')
+    
+    # from django.contrib.auth.models import User
+    # if SHELL_CONFIG.IS_TTY and not User.objects.filter(is_superuser=True).exclude(username='system').exists():
+    #     print('{green}[+] Creating admin user account...{reset}'.format(**SHELL_CONFIG.ANSI))
+    #     call_command("createsuperuser", interactive=True)
+
+    print()
+    print('[dodger_blue3][*] Checking links from indexes and archive folders (safe to Ctrl+C)...[/dodger_blue3]')
+
+    all_links = Snapshot.objects.none()
+    pending_links: Dict[str, Link] = {}
+
+    if existing_index:
+        all_links = load_main_index(out_dir=out_dir, warn=False)
+        print(f'    √ Loaded {all_links.count()} links from existing main index.')
+
+    if quick:
+        print('    > Skipping full snapshot directory check (quick mode)')
+    else:
+        try:
+            # Links in data folders that dont match their timestamp
+            fixed, cant_fix = fix_invalid_folder_locations(out_dir=out_dir)
+            if fixed:
+                print(f'    [yellow]√ Fixed {len(fixed)} data directory locations that didn\'t match their link timestamps.[/yellow]')
+            if cant_fix:
+                print(f'    [red]! Could not fix {len(cant_fix)} data directory locations due to conflicts with existing folders.[/red]')
+
+            # Links in JSON index but not in main index
+            orphaned_json_links = {
+                link.url: link
+                for link in parse_json_main_index(out_dir)
+                if not all_links.filter(url=link.url).exists()
+            }
+            if orphaned_json_links:
+                pending_links.update(orphaned_json_links)
+                print(f'    [yellow]√ Added {len(orphaned_json_links)} orphaned links from existing JSON index...[/yellow]')
+
+            # Links in data dir indexes but not in main index
+            orphaned_data_dir_links = {
+                link.url: link
+                for link in parse_json_links_details(out_dir)
+                if not all_links.filter(url=link.url).exists()
+            }
+            if orphaned_data_dir_links:
+                pending_links.update(orphaned_data_dir_links)
+                print(f'    [yellow]√ Added {len(orphaned_data_dir_links)} orphaned links from existing archive directories.[/yellow]')
+
+            # Links in invalid/duplicate data dirs
+            invalid_folders = {
+                folder: link
+                for folder, link in get_invalid_folders(all_links, out_dir=out_dir).items()
+            }
+            if invalid_folders:
+                print(f'    [red]! Skipped adding {len(invalid_folders)} invalid link data directories.[/red]')
+                print('        X ' + '\n        X '.join(f'./{Path(folder).relative_to(DATA_DIR)} {link}' for folder, link in invalid_folders.items()))
+                print()
+                print('    [violet]Hint:[/violet] For more information about the link data directories that were skipped, run:')
+                print('        archivebox status')
+                print('        archivebox list --status=invalid')
+
+        except (KeyboardInterrupt, SystemExit):
+            print(file=sys.stderr)
+            print('[yellow]:stop_sign: Stopped checking archive directories due to Ctrl-C/SIGTERM[/yellow]', file=sys.stderr)
+            print('    Your archive data is safe, but you should re-run `archivebox init` to finish the process later.', file=sys.stderr)
+            print(file=sys.stderr)
+            print('    [violet]Hint:[/violet] In the future you can run a quick init without checking dirs like so:', file=sys.stderr)
+            print('        archivebox init --quick', file=sys.stderr)
+            raise SystemExit(1)
+        
+        write_main_index(list(pending_links.values()), out_dir=out_dir)
+
+    print('\n[green]----------------------------------------------------------------------[/green]')
+
+    from django.contrib.auth.models import User
+
+    if (SERVER_CONFIG.ADMIN_USERNAME and SERVER_CONFIG.ADMIN_PASSWORD) and not User.objects.filter(username=SERVER_CONFIG.ADMIN_USERNAME).exists():
+        print('[green][+] Found ADMIN_USERNAME and ADMIN_PASSWORD configuration options, creating new admin user.[/green]')
+        User.objects.create_superuser(username=SERVER_CONFIG.ADMIN_USERNAME, password=SERVER_CONFIG.ADMIN_PASSWORD)
+
+    if existing_index:
+        print('[green][√] Done. Verified and updated the existing ArchiveBox collection.[/green]')
+    else:
+        print(f'[green][√] Done. A new ArchiveBox collection was initialized ({len(all_links) + len(pending_links)} links).[/green]')
+
+    json_index = out_dir / CONSTANTS.JSON_INDEX_FILENAME
+    html_index = out_dir / CONSTANTS.HTML_INDEX_FILENAME
+    index_name = f"{date.today()}_index_old"
+    if os.access(json_index, os.F_OK):
+        json_index.rename(f"{index_name}.json")
+    if os.access(html_index, os.F_OK):
+        html_index.rename(f"{index_name}.html")
+    
+    CONSTANTS.PERSONAS_DIR.mkdir(parents=True, exist_ok=True)
+    CONSTANTS.DEFAULT_TMP_DIR.mkdir(parents=True, exist_ok=True)
+    CONSTANTS.DEFAULT_LIB_DIR.mkdir(parents=True, exist_ok=True)
+    
+    from archivebox.config.common import STORAGE_CONFIG
+    STORAGE_CONFIG.TMP_DIR.mkdir(parents=True, exist_ok=True)
+    STORAGE_CONFIG.LIB_DIR.mkdir(parents=True, exist_ok=True)
+    
+    if install:
+        run_subcommand('install', pwd=out_dir)
+
+    if Snapshot.objects.count() < 25:     # hide the hints for experienced users
+        print()
+        print('    [violet]Hint:[/violet] To view your archive index, run:')
+        print('        archivebox server  # then visit [deep_sky_blue4][link=http://127.0.0.1:8000]http://127.0.0.1:8000[/link][/deep_sky_blue4]')
+        print()
+        print('    To add new links, you can run:')
+        print("        archivebox add < ~/some/path/to/list_of_links.txt")
+        print()
+        print('    For more usage and examples, run:')
+        print('        archivebox help')


@docstring(init.__doc__)
--- a/archivebox/cli/archivebox_install.py
+++ b/archivebox/cli/archivebox_install.py
@ -3,6 +3,7 @@
 __package__ = 'archivebox.cli'
 __command__ = 'archivebox install'

+import os
 import sys
 import argparse
 from pathlib import Path
@ -11,11 +12,145 @@ from typing import Optional, List, IO
 from archivebox.misc.util import docstring
 from archivebox.config import DATA_DIR
 from archivebox.misc.logging_util import SmartFormatter, reject_stdin
-from ..main import install
+
+
+def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, binaries: Optional[List[str]]=None, dry_run: bool=False) -> None:
+    """Automatically install all ArchiveBox dependencies and extras"""
+    
+    # if running as root:
+    #    - run init to create index + lib dir
+    #    - chown -R 911 DATA_DIR
+    #    - install all binaries as root
+    #    - chown -R 911 LIB_DIR
+    # else:
+    #    - run init to create index + lib dir as current user
+    #    - install all binaries as current user
+    #    - recommend user re-run with sudo if any deps need to be installed as root
+
+    from rich import print
+    
+    from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
+    from archivebox.config.paths import get_or_create_working_lib_dir
+
+    if not (os.access(ARCHIVE_DIR, os.R_OK) and ARCHIVE_DIR.is_dir()):
+        run_subcommand('init', stdin=None, pwd=out_dir)  # must init full index because we need a db to store InstalledBinary entries in
+
+    print('\n[green][+] Installing ArchiveBox dependencies automatically...[/green]')
+    
+    # we never want the data dir to be owned by root, detect owner of existing owner of DATA_DIR to try and guess desired non-root UID
+    if IS_ROOT:
+        EUID = os.geteuid()
+        
+        # if we have sudo/root permissions, take advantage of them just while installing dependencies
+        print()
+        print(f'[yellow]:warning:  Running as UID=[blue]{EUID}[/blue] with [red]sudo[/red] only for dependencies that need it.[/yellow]')
+        print(f'    DATA_DIR, LIB_DIR, and TMP_DIR will be owned by [blue]{ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/blue].')
+        print()
+    
+    LIB_DIR = get_or_create_working_lib_dir()
+    
+    package_manager_names = ', '.join(
+        f'[yellow]{binprovider.name}[/yellow]'
+        for binprovider in reversed(list(abx.as_dict(abx.pm.hook.get_BINPROVIDERS()).values()))
+        if not binproviders or (binproviders and binprovider.name in binproviders)
+    )
+    print(f'[+] Setting up package managers {package_manager_names}...')
+    for binprovider in reversed(list(abx.as_dict(abx.pm.hook.get_BINPROVIDERS()).values())):
+        if binproviders and binprovider.name not in binproviders:
+            continue
+        try:
+            binprovider.setup()
+        except Exception:
+            # it's ok, installing binaries below will automatically set up package managers as needed
+            # e.g. if user does not have npm available we cannot set it up here yet, but once npm Binary is installed
+            # the next package that depends on npm will automatically call binprovider.setup() during its own install
+            pass
+    
+    print()
+    
+    for binary in reversed(list(abx.as_dict(abx.pm.hook.get_BINARIES()).values())):
+        if binary.name in ('archivebox', 'django', 'sqlite', 'python'):
+            # obviously must already be installed if we are running
+            continue
+        
+        if binaries and binary.name not in binaries:
+            continue
+        
+        providers = ' [grey53]or[/grey53] '.join(
+            provider.name for provider in binary.binproviders_supported
+            if not binproviders or (binproviders and provider.name in binproviders)
+        )
+        if not providers:
+            continue
+        print(f'[+] Detecting / Installing [yellow]{binary.name.ljust(22)}[/yellow] using [red]{providers}[/red]...')
+        try:
+            with SudoPermission(uid=0, fallback=True):
+                # print(binary.load_or_install(fresh=True).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'}))
+                if binproviders:
+                    providers_supported_by_binary = [provider.name for provider in binary.binproviders_supported]
+                    for binprovider_name in binproviders:
+                        if binprovider_name not in providers_supported_by_binary:
+                            continue
+                        try:
+                            if dry_run:
+                                # always show install commands when doing a dry run
+                                sys.stderr.write("\033[2;49;90m")  # grey53
+                                result = binary.install(binproviders=[binprovider_name], dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
+                                sys.stderr.write("\033[00m\n")     # reset
+                            else:
+                                loaded_binary = archivebox.pm.hook.binary_load_or_install(binary=binary, binproviders=[binprovider_name], fresh=True, dry_run=dry_run, quiet=False)
+                                result = loaded_binary.model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
+                            if result and result['loaded_version']:
+                                break
+                        except Exception as e:
+                            print(f'[red]:cross_mark: Failed to install {binary.name} as using {binprovider_name} as user {ARCHIVEBOX_USER}: {e}[/red]')
+                else:
+                    if dry_run:
+                        sys.stderr.write("\033[2;49;90m")  # grey53
+                        binary.install(dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
+                        sys.stderr.write("\033[00m\n")  # reset
+                    else:
+                        loaded_binary = archivebox.pm.hook.binary_load_or_install(binary=binary, fresh=True, dry_run=dry_run)
+                        result = loaded_binary.model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
+            if IS_ROOT and LIB_DIR:
+                with SudoPermission(uid=0):
+                    if ARCHIVEBOX_USER == 0:
+                        os.system(f'chmod -R 777 "{LIB_DIR.resolve()}"')
+                    else:    
+                        os.system(f'chown -R {ARCHIVEBOX_USER} "{LIB_DIR.resolve()}"')
+        except Exception as e:
+            print(f'[red]:cross_mark: Failed to install {binary.name} as user {ARCHIVEBOX_USER}: {e}[/red]')
+            if binaries and len(binaries) == 1:
+                # if we are only installing a single binary, raise the exception so the user can see what went wrong
+                raise
+                
+
+    from django.contrib.auth import get_user_model
+    User = get_user_model()
+
+    if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
+        stderr('\n[+] Don\'t forget to create a new admin user for the Web UI...', color='green')
+        stderr('    archivebox manage createsuperuser')
+        # run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
+    
+    print('\n[green][√] Set up ArchiveBox and its dependencies successfully.[/green]\n', file=sys.stderr)
+    
+    from abx_plugin_pip.binaries import ARCHIVEBOX_BINARY
+    
+    extra_args = []
+    if binproviders:
+        extra_args.append(f'--binproviders={",".join(binproviders)}')
+    if binaries:
+        extra_args.append(f'--binaries={",".join(binaries)}')
+    
+    proc = run_shell([ARCHIVEBOX_BINARY.load().abspath, 'version', *extra_args], capture_output=False, cwd=out_dir)
+    raise SystemExit(proc.returncode)
+


@docstring(install.__doc__)
 def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
+    
    parser = argparse.ArgumentParser(
        prog=__command__,
        description=install.__doc__,
--- a/archivebox/cli/archivebox_list.py
+++ b/archivebox/cli/archivebox_list.py
@ -1,139 +0,0 @@
-#!/usr/bin/env python3
-
-__package__ = 'archivebox.cli'
-__command__ = 'archivebox list'
-
-import sys
-import argparse
-from pathlib import Path
-from typing import Optional, List, IO
-
-from archivebox.config import DATA_DIR
-from archivebox.misc.util import docstring
-from archivebox.misc.logging_util import SmartFormatter, reject_stdin, stderr
-from ..main import list_all
-from ..index import (
-    LINK_FILTERS,
-    get_indexed_folders,
-    get_archived_folders,
-    get_unarchived_folders,
-    get_present_folders,
-    get_valid_folders,
-    get_invalid_folders,
-    get_duplicate_folders,
-    get_orphaned_folders,
-    get_corrupted_folders,
-    get_unrecognized_folders,
-)
-
-
-@docstring(list_all.__doc__)
-def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
-    parser = argparse.ArgumentParser(
-        prog=__command__,
-        description=list_all.__doc__,
-        add_help=True,
-        formatter_class=SmartFormatter,
-    )
-    group = parser.add_mutually_exclusive_group()
-    group.add_argument(
-        '--csv', #'-c',
-        type=str,
-        help="Print the output in CSV format with the given columns, e.g.: timestamp,url,extension",
-        default=None,
-    )
-    group.add_argument(
-        '--json', #'-j',
-        action='store_true',
-        help="Print the output in JSON format with all columns included",
-    )
-    group.add_argument(
-        '--html',
-        action='store_true',
-        help="Print the output in HTML format"
-    )
-    parser.add_argument(
-        '--with-headers',
-        action='store_true',
-        help='Include the headers in the output document' 
-    )
-    parser.add_argument(
-        '--sort', #'-s',
-        type=str,
-        help="List the links sorted using the given key, e.g. timestamp or updated",
-        default=None,
-    )
-    parser.add_argument(
-        '--before', #'-b',
-        type=float,
-        help="List only links bookmarked before (less than) the given timestamp",
-        default=None,
-    )
-    parser.add_argument(
-        '--after', #'-a',
-        type=float,
-        help="List only links bookmarked after (greater than or equal to) the given timestamp",
-        default=None,
-    )
-    parser.add_argument(
-        '--status',
-        type=str,
-        choices=('indexed', 'archived', 'unarchived', 'present', 'valid', 'invalid', 'duplicate', 'orphaned', 'corrupted', 'unrecognized'),
-        default='indexed',
-        help=(
-            'List only links or data directories that have the given status\n'
-            f'    indexed       {get_indexed_folders.__doc__} (the default)\n'
-            f'    archived      {get_archived_folders.__doc__}\n'
-            f'    unarchived    {get_unarchived_folders.__doc__}\n'
-            '\n'
-            f'    present       {get_present_folders.__doc__}\n'
-            f'    valid         {get_valid_folders.__doc__}\n'
-            f'    invalid       {get_invalid_folders.__doc__}\n'
-            '\n'
-            f'    duplicate     {get_duplicate_folders.__doc__}\n'
-            f'    orphaned      {get_orphaned_folders.__doc__}\n'
-            f'    corrupted     {get_corrupted_folders.__doc__}\n'
-            f'    unrecognized  {get_unrecognized_folders.__doc__}\n'
-        )
-    )
-    parser.add_argument(
-        '--filter-type', '-t',
-        type=str,
-        choices=(*LINK_FILTERS.keys(), 'search'),
-        default='exact',
-        help='Type of pattern matching to use when filtering URLs',
-    )
-    parser.add_argument(
-        'filter_patterns',
-        nargs='*',
-        type=str,
-        default=None,
-        help='List only URLs matching these filter patterns'
-    )
-    command = parser.parse_args(args or ())
-    reject_stdin(stdin)
-
-    if command.with_headers and not (command.json or command.html or command.csv):
-        stderr(
-            '[X] --with-headers can only be used with --json, --html or --csv options\n',
-            color='red',
-        )
-        raise SystemExit(2)
-
-    matching_folders = list_all(
-        filter_patterns=command.filter_patterns,
-        filter_type=command.filter_type,
-        status=command.status,
-        after=command.after,
-        before=command.before,
-        sort=command.sort,
-        csv=command.csv,
-        json=command.json,
-        html=command.html,
-        with_headers=command.with_headers,
-        out_dir=Path(pwd) if pwd else DATA_DIR,
-    )
-    raise SystemExit(not matching_folders)
-
-if __name__ == '__main__':
-    main(args=sys.argv[1:], stdin=sys.stdin)
--- a/archivebox/cli/archivebox_manage.py
+++ b/archivebox/cli/archivebox_manage.py
@ -9,7 +9,27 @@ from typing import Optional, List, IO

 from archivebox.misc.util import docstring
 from archivebox.config import DATA_DIR
-from ..main import manage
+
+
+
+# @enforce_types
+def manage(args: Optional[List[str]]=None, out_dir: Path=DATA_DIR) -> None:
+    """Run an ArchiveBox Django management command"""
+
+    check_data_folder()
+    from django.core.management import execute_from_command_line
+
+    if (args and "createsuperuser" in args) and (IN_DOCKER and not SHELL_CONFIG.IS_TTY):
+        stderr('[!] Warning: you need to pass -it to use interactive commands in docker', color='lightyellow')
+        stderr('    docker run -it archivebox manage {}'.format(' '.join(args or ['...'])), color='lightyellow')
+        stderr('')
+        
+    # import ipdb; ipdb.set_trace()
+
+    execute_from_command_line(['manage.py', *(args or ['help'])])
+
+
+


@docstring(manage.__doc__)
--- a/archivebox/cli/archivebox_oneshot.py
+++ b/archivebox/cli/archivebox_oneshot.py
@ -1,73 +1,98 @@
-#!/usr/bin/env python3
+# #!/usr/bin/env python3

-__package__ = 'archivebox.cli'
-__command__ = 'archivebox oneshot'
+################## DEPRECATED IN FAVOR OF abx-dl #####################
+# https://github.com/ArchiveBox/abx-dl

-import sys
-import argparse
+# __package__ = 'archivebox.cli'
+# __command__ = 'archivebox oneshot'

-from pathlib import Path
-from typing import List, Optional, IO
+# import sys
+# import argparse

-from archivebox.misc.util import docstring
-from archivebox.config import DATA_DIR
-from archivebox.misc.logging_util import SmartFormatter, accept_stdin, stderr
-from ..main import oneshot
+# from pathlib import Path
+# from typing import List, Optional, IO
+
+# from archivebox.misc.util import docstring
+# from archivebox.config import DATA_DIR
+# from archivebox.misc.logging_util import SmartFormatter, accept_stdin, stderr


-@docstring(oneshot.__doc__)
-def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
-    parser = argparse.ArgumentParser(
-        prog=__command__,
-        description=oneshot.__doc__,
-        add_help=True,
-        formatter_class=SmartFormatter,
-    )
-    parser.add_argument(
-        'url',
-        type=str,
-        default=None,
-        help=(
-            'URLs or paths to archive e.g.:\n'
-            '    https://getpocket.com/users/USERNAME/feed/all\n'
-            '    https://example.com/some/rss/feed.xml\n'
-            '    https://example.com\n'
-            '    ~/Downloads/firefox_bookmarks_export.html\n'
-            '    ~/Desktop/sites_list.csv\n'
-        )
-    )
-    parser.add_argument(
-        "--extract",
-        type=str,
-        help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
-              This does not take precedence over the configuration",
-        default=""
-    )
-    parser.add_argument(
-        '--out-dir',
-        type=str,
-        default=DATA_DIR,
-        help= "Path to save the single archive folder to, e.g. ./example.com_archive"
-    )
-    command = parser.parse_args(args or ())
-    stdin_url = None
-    url = command.url
-    if not url:
-        stdin_url = accept_stdin(stdin)
+# @enforce_types
+# def oneshot(url: str, extractors: str="", out_dir: Path=DATA_DIR, created_by_id: int | None=None) -> List[Link]:
+#     """
+#     Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
+#     You can run this to archive single pages without needing to create a whole collection with archivebox init.
+#     """
+#     oneshot_link, _ = parse_links_memory([url])
+#     if len(oneshot_link) > 1:
+#         stderr(
+#                 '[X] You should pass a single url to the oneshot command',
+#                 color='red'
+#             )
+#         raise SystemExit(2)

-    if (stdin_url and url) or (not stdin and not url):
-        stderr(
-            '[X] You must pass a URL/path to add via stdin or CLI arguments.\n',
-            color='red',
-        )
-        raise SystemExit(2)
-    
-    oneshot(
-        url=stdin_url or url,
-        out_dir=Path(command.out_dir).resolve(),
-        extractors=command.extract,
-    )
+#     methods = extractors.split(",") if extractors else ignore_methods(['title'])
+#     archive_link(oneshot_link[0], out_dir=out_dir, methods=methods, created_by_id=created_by_id)
+#     return oneshot_link


-if __name__ == '__main__':
-    main(args=sys.argv[1:], stdin=sys.stdin)
+
+
+
+
+# @docstring(oneshot.__doc__)
+# def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
+#     parser = argparse.ArgumentParser(
+#         prog=__command__,
+#         description=oneshot.__doc__,
+#         add_help=True,
+#         formatter_class=SmartFormatter,
+#     )
+#     parser.add_argument(
+#         'url',
+#         type=str,
+#         default=None,
+#         help=(
+#             'URLs or paths to archive e.g.:\n'
+#             '    https://getpocket.com/users/USERNAME/feed/all\n'
+#             '    https://example.com/some/rss/feed.xml\n'
+#             '    https://example.com\n'
+#             '    ~/Downloads/firefox_bookmarks_export.html\n'
+#             '    ~/Desktop/sites_list.csv\n'
+#         )
+#     )
+#     parser.add_argument(
+#         "--extract",
+#         type=str,
+#         help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
+#               This does not take precedence over the configuration",
+#         default=""
+#     )
+#     parser.add_argument(
+#         '--out-dir',
+#         type=str,
+#         default=DATA_DIR,
+#         help= "Path to save the single archive folder to, e.g. ./example.com_archive"
+#     )
+#     command = parser.parse_args(args or ())
+#     stdin_url = None
+#     url = command.url
+#     if not url:
+#         stdin_url = accept_stdin(stdin)
+
+#     if (stdin_url and url) or (not stdin and not url):
+#         stderr(
+#             '[X] You must pass a URL/path to add via stdin or CLI arguments.\n',
+#             color='red',
+#         )
+#         raise SystemExit(2)
+    
+#     oneshot(
+#         url=stdin_url or url,
+#         out_dir=Path(command.out_dir).resolve(),
+#         extractors=command.extract,
+#     )
+
+
+# if __name__ == '__main__':
+#     main(args=sys.argv[1:], stdin=sys.stdin)
--- a/archivebox/cli/archivebox_remove.py
+++ b/archivebox/cli/archivebox_remove.py
@ -8,10 +8,93 @@ import argparse
 from pathlib import Path
 from typing import Optional, List, IO

+from django.db.models import QuerySet
+
 from archivebox.misc.util import docstring
 from archivebox.config import DATA_DIR
 from archivebox.misc.logging_util import SmartFormatter, accept_stdin
-from ..main import remove
+from archivebox.index.schema import Link
+
+
+def remove(filter_str: Optional[str]=None,
+           filter_patterns: Optional[list[str]]=None,
+           filter_type: str='exact',
+           snapshots: Optional[QuerySet]=None,
+           after: Optional[float]=None,
+           before: Optional[float]=None,
+           yes: bool=False,
+           delete: bool=False,
+           out_dir: Path=DATA_DIR) -> list[Link]:
+    """Remove the specified URLs from the archive"""
+    
+    check_data_folder()
+
+    if snapshots is None:
+        if filter_str and filter_patterns:
+            stderr(
+                '[X] You should pass either a pattern as an argument, '
+                'or pass a list of patterns via stdin, but not both.\n',
+                color='red',
+            )
+            raise SystemExit(2)
+        elif not (filter_str or filter_patterns):
+            stderr(
+                '[X] You should pass either a pattern as an argument, '
+                'or pass a list of patterns via stdin.',
+                color='red',
+            )
+            stderr()
+            hint(('To remove all urls you can run:',
+                'archivebox remove --filter-type=regex ".*"'))
+            stderr()
+            raise SystemExit(2)
+        elif filter_str:
+            filter_patterns = [ptn.strip() for ptn in filter_str.split('\n')]
+
+    list_kwargs = {
+        "filter_patterns": filter_patterns,
+        "filter_type": filter_type,
+        "after": after,
+        "before": before,
+    }
+    if snapshots:
+        list_kwargs["snapshots"] = snapshots
+
+    log_list_started(filter_patterns, filter_type)
+    timer = TimedProgress(360, prefix='      ')
+    try:
+        snapshots = list_links(**list_kwargs)
+    finally:
+        timer.end()
+
+
+    if not snapshots.exists():
+        log_removal_finished(0, 0)
+        raise SystemExit(1)
+
+
+    log_links = [link.as_link() for link in snapshots]
+    log_list_finished(log_links)
+    log_removal_started(log_links, yes=yes, delete=delete)
+
+    timer = TimedProgress(360, prefix='      ')
+    try:
+        for snapshot in snapshots:
+            if delete:
+                shutil.rmtree(snapshot.as_link().link_dir, ignore_errors=True)
+    finally:
+        timer.end()
+
+    to_remove = snapshots.count()
+
+    from .search import flush_search_index
+
+    flush_search_index(snapshots=snapshots)
+    remove_from_sql_main_index(snapshots=snapshots, out_dir=out_dir)
+    all_snapshots = load_main_index(out_dir=out_dir)
+    log_removal_finished(all_snapshots.count(), to_remove)
+    
+    return all_snapshots


@docstring(remove.__doc__)
--- a/archivebox/cli/archivebox_schedule.py
+++ b/archivebox/cli/archivebox_schedule.py
@ -11,7 +11,139 @@ from typing import Optional, List, IO
 from archivebox.misc.util import docstring
 from archivebox.config import DATA_DIR
 from archivebox.misc.logging_util import SmartFormatter, reject_stdin
-from ..main import schedule
+from archivebox.config.common import ARCHIVING_CONFIG
+
+
+# @enforce_types
+def schedule(add: bool=False,
+             show: bool=False,
+             clear: bool=False,
+             foreground: bool=False,
+             run_all: bool=False,
+             quiet: bool=False,
+             every: Optional[str]=None,
+             tag: str='',
+             depth: int=0,
+             overwrite: bool=False,
+             update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
+             import_path: Optional[str]=None,
+             out_dir: Path=DATA_DIR):
+    """Set ArchiveBox to regularly import URLs at specific times using cron"""
+    
+    check_data_folder()
+    from abx_plugin_pip.binaries import ARCHIVEBOX_BINARY
+    from archivebox.config.permissions import USER
+
+    Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
+
+    cron = CronTab(user=True)
+    cron = dedupe_cron_jobs(cron)
+
+    if clear:
+        print(cron.remove_all(comment=CRON_COMMENT))
+        cron.write()
+        raise SystemExit(0)
+
+    existing_jobs = list(cron.find_comment(CRON_COMMENT))
+
+    if every or add:
+        every = every or 'day'
+        quoted = lambda s: f'"{s}"' if (s and ' ' in str(s)) else str(s)
+        cmd = [
+            'cd',
+            quoted(out_dir),
+            '&&',
+            quoted(ARCHIVEBOX_BINARY.load().abspath),
+            *([
+                'add',
+                *(['--overwrite'] if overwrite else []),
+                *(['--update'] if update else []),
+                *([f'--tag={tag}'] if tag else []),
+                f'--depth={depth}',
+                f'"{import_path}"',
+            ] if import_path else ['update']),
+            '>>',
+            quoted(Path(CONSTANTS.LOGS_DIR) / 'schedule.log'),
+            '2>&1',
+
+        ]
+        new_job = cron.new(command=' '.join(cmd), comment=CRON_COMMENT)
+
+        if every in ('minute', 'hour', 'day', 'month', 'year'):
+            set_every = getattr(new_job.every(), every)
+            set_every()
+        elif CronSlices.is_valid(every):
+            new_job.setall(every)
+        else:
+            stderr('{red}[X] Got invalid timeperiod for cron task.{reset}'.format(**SHELL_CONFIG.ANSI))
+            stderr('    It must be one of minute/hour/day/month')
+            stderr('    or a quoted cron-format schedule like:')
+            stderr('        archivebox init --every=day --depth=1 https://example.com/some/rss/feed.xml')
+            stderr('        archivebox init --every="0/5 * * * *" --depth=1 https://example.com/some/rss/feed.xml')
+            raise SystemExit(1)
+
+        cron = dedupe_cron_jobs(cron)
+        cron.write()
+
+        total_runs = sum(j.frequency_per_year() for j in cron)
+        existing_jobs = list(cron.find_comment(CRON_COMMENT))
+
+        print()
+        print('{green}[√] Scheduled new ArchiveBox cron job for user: {} ({} jobs are active).{reset}'.format(USER, len(existing_jobs), **SHELL_CONFIG.ANSI))
+        print('\n'.join(f'  > {cmd}' if str(cmd) == str(new_job) else f'    {cmd}' for cmd in existing_jobs))
+        if total_runs > 60 and not quiet:
+            stderr()
+            stderr('{lightyellow}[!] With the current cron config, ArchiveBox is estimated to run >{} times per year.{reset}'.format(total_runs, **SHELL_CONFIG.ANSI))
+            stderr('    Congrats on being an enthusiastic internet archiver! 👌')
+            stderr()
+            stderr('    Make sure you have enough storage space available to hold all the data.')
+            stderr('    Using a compressed/deduped filesystem like ZFS is recommended if you plan on archiving a lot.')
+            stderr('')
+    elif show:
+        if existing_jobs:
+            print('\n'.join(str(cmd) for cmd in existing_jobs))
+        else:
+            stderr('{red}[X] There are no ArchiveBox cron jobs scheduled for your user ({}).{reset}'.format(USER, **SHELL_CONFIG.ANSI))
+            stderr('    To schedule a new job, run:')
+            stderr('        archivebox schedule --every=[timeperiod] --depth=1 https://example.com/some/rss/feed.xml')
+        raise SystemExit(0)
+
+    cron = CronTab(user=True)
+    cron = dedupe_cron_jobs(cron)
+    existing_jobs = list(cron.find_comment(CRON_COMMENT))
+
+    if foreground or run_all:
+        if not existing_jobs:
+            stderr('{red}[X] You must schedule some jobs first before running in foreground mode.{reset}'.format(**SHELL_CONFIG.ANSI))
+            stderr('    archivebox schedule --every=hour --depth=1 https://example.com/some/rss/feed.xml')
+            raise SystemExit(1)
+
+        print('{green}[*] Running {} ArchiveBox jobs in foreground task scheduler...{reset}'.format(len(existing_jobs), **SHELL_CONFIG.ANSI))
+        if run_all:
+            try:
+                for job in existing_jobs:
+                    sys.stdout.write(f'  > {job.command.split("/archivebox ")[0].split(" && ")[0]}\n')
+                    sys.stdout.write(f'    > {job.command.split("/archivebox ")[-1].split(" >> ")[0]}')
+                    sys.stdout.flush()
+                    job.run()
+                    sys.stdout.write(f'\r    √ {job.command.split("/archivebox ")[-1]}\n')
+            except KeyboardInterrupt:
+                print('\n{green}[√] Stopped.{reset}'.format(**SHELL_CONFIG.ANSI))
+                raise SystemExit(1)
+
+        if foreground:
+            try:
+                for job in existing_jobs:
+                    print(f'  > {job.command.split("/archivebox ")[-1].split(" >> ")[0]}')
+                for result in cron.run_scheduler():
+                    print(result)
+            except KeyboardInterrupt:
+                print('\n{green}[√] Stopped.{reset}'.format(**SHELL_CONFIG.ANSI))
+                raise SystemExit(1)
+
+    # if CAN_UPGRADE:
+    #     hint(f"There's a new version of ArchiveBox available! Your current version is {VERSION}. You can upgrade to {VERSIONS_AVAILABLE['recommended_version']['tag_name']} ({VERSIONS_AVAILABLE['recommended_version']['html_url']}). For more on how to upgrade: https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives\n")
+


@docstring(schedule.__doc__)
--- a/archivebox/cli/archivebox_search.py
+++ b/archivebox/cli/archivebox_search.py
@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+
+__package__ = 'archivebox.cli'
+__command__ = 'archivebox search'
+
+from pathlib import Path
+from typing import Optional, List, Iterable
+
+import rich_click as click
+from rich import print
+
+from django.db.models import QuerySet
+
+from archivebox.config import DATA_DIR
+from archivebox.index import LINK_FILTERS
+from archivebox.index.schema import Link
+from archivebox.misc.logging import stderr
+from archivebox.misc.util import enforce_types, docstring
+
+STATUS_CHOICES = [
+    'indexed', 'archived', 'unarchived', 'present', 'valid', 'invalid',
+    'duplicate', 'orphaned', 'corrupted', 'unrecognized'
+]
+
+
+
+def list_links(snapshots: Optional[QuerySet]=None,
+               filter_patterns: Optional[List[str]]=None,
+               filter_type: str='substring',
+               after: Optional[float]=None,
+               before: Optional[float]=None,
+               out_dir: Path=DATA_DIR) -> Iterable[Link]:
+    
+    from archivebox.index import load_main_index
+    from archivebox.index import snapshot_filter
+
+    if snapshots:
+        all_snapshots = snapshots
+    else:
+        all_snapshots = load_main_index(out_dir=out_dir)
+
+    if after is not None:
+        all_snapshots = all_snapshots.filter(timestamp__gte=after)
+    if before is not None:
+        all_snapshots = all_snapshots.filter(timestamp__lt=before)
+    if filter_patterns:
+        all_snapshots = snapshot_filter(all_snapshots, filter_patterns, filter_type)
+
+    if not all_snapshots:
+        stderr('[!] No Snapshots matched your filters:', filter_patterns, f'({filter_type})', color='lightyellow')
+
+    return all_snapshots
+
+
+def list_folders(links: list[Link], status: str, out_dir: Path=DATA_DIR) -> dict[str, Link | None]:
+    
+    from archivebox.misc.checks import check_data_folder
+    from archivebox.index import (
+        get_indexed_folders,
+        get_archived_folders,
+        get_unarchived_folders,
+        get_present_folders,
+        get_valid_folders,
+        get_invalid_folders,
+        get_duplicate_folders,
+        get_orphaned_folders,
+        get_corrupted_folders,
+        get_unrecognized_folders,
+    )
+    
+    check_data_folder()
+
+    STATUS_FUNCTIONS = {
+        "indexed": get_indexed_folders,
+        "archived": get_archived_folders,
+        "unarchived": get_unarchived_folders,
+        "present": get_present_folders,
+        "valid": get_valid_folders,
+        "invalid": get_invalid_folders,
+        "duplicate": get_duplicate_folders,
+        "orphaned": get_orphaned_folders,
+        "corrupted": get_corrupted_folders,
+        "unrecognized": get_unrecognized_folders,
+    }
+
+    try:
+        return STATUS_FUNCTIONS[status](links, out_dir=out_dir)
+    except KeyError:
+        raise ValueError('Status not recognized.')
+
+
+
+
+@enforce_types
+def search(filter_patterns: list[str] | None=None,
+           filter_type: str='substring',
+           status: str='indexed',
+           before: float | None=None,
+           after: float | None=None,
+           sort: str | None=None,
+           json: bool=False,
+           html: bool=False,
+           csv: str | None=None,
+           with_headers: bool=False):
+    """List, filter, and export information about archive entries"""
+    
+
+    if with_headers and not (json or html or csv):
+        stderr('[X] --with-headers requires --json, --html or --csv\n', color='red')
+        raise SystemExit(2)
+
+    snapshots = list_links(
+        filter_patterns=list(filter_patterns) if filter_patterns else None,
+        filter_type=filter_type,
+        before=before,
+        after=after,
+    )
+
+    if sort:
+        snapshots = snapshots.order_by(sort)
+
+    folders = list_folders(
+        links=snapshots,
+        status=status,
+        out_dir=DATA_DIR,
+    )
+
+    if json:
+        from archivebox.index.json import generate_json_index_from_links
+        output = generate_json_index_from_links(folders.values(), with_headers)
+    elif html:
+        from archivebox.index.html import generate_index_from_links
+        output = generate_index_from_links(folders.values(), with_headers) 
+    elif csv:
+        from archivebox.index.csv import links_to_csv
+        output = links_to_csv(folders.values(), csv.split(','), with_headers)
+    else:
+        from archivebox.misc.logging_util import printable_folders
+        output = printable_folders(folders, with_headers)
+
+    print(output)
+    return output
+
+
+@click.command()
+@click.option('--filter-type', '-f', type=click.Choice(['search', *LINK_FILTERS.keys()]), default='substring', help='Pattern matching type for filtering URLs')
+@click.option('--status', '-s', type=click.Choice(STATUS_CHOICES), default='indexed', help='List snapshots with the given status')
+@click.option('--before', '-b', type=float, help='List snapshots bookmarked before the given UNIX timestamp')
+@click.option('--after', '-a', type=float, help='List snapshots bookmarked after the given UNIX timestamp')
+@click.option('--sort', '-o', type=str, help='Field to sort by, e.g. url, created_at, bookmarked_at, downloaded_at')
+@click.option('--json', '-J', is_flag=True, help='Print output in JSON format')
+@click.option('--html', '-M', is_flag=True, help='Print output in HTML format (suitable for viewing statically without a server)')
+@click.option('--csv', '-C', type=str, help='Print output as CSV with the provided fields, e.g.: created_at,url,title')
+@click.option('--with-headers', '-H', is_flag=True, help='Include extra CSV/HTML headers in the output')
+@click.help_option('--help', '-h')
+@click.argument('filter_patterns', nargs=-1)
+@docstring(search.__doc__)
+def main(**kwargs):
+    return search(**kwargs)
+
+
+
+if __name__ == '__main__':
+    main()
--- a/archivebox/cli/archivebox_server.py
+++ b/archivebox/cli/archivebox_server.py
@ -12,7 +12,81 @@ from archivebox.misc.util import docstring
 from archivebox.config import DATA_DIR
 from archivebox.config.common import SERVER_CONFIG
 from archivebox.misc.logging_util import SmartFormatter, reject_stdin
-from ..main import server
+
+
+
+# @enforce_types
+def server(runserver_args: Optional[List[str]]=None,
+           reload: bool=False,
+           debug: bool=False,
+           init: bool=False,
+           quick_init: bool=False,
+           createsuperuser: bool=False,
+           daemonize: bool=False,
+           out_dir: Path=DATA_DIR) -> None:
+    """Run the ArchiveBox HTTP server"""
+
+    from rich import print
+
+    runserver_args = runserver_args or []
+    
+    if init:
+        run_subcommand('init', stdin=None, pwd=out_dir)
+        print()
+    elif quick_init:
+        run_subcommand('init', subcommand_args=['--quick'], stdin=None, pwd=out_dir)
+        print()
+
+    if createsuperuser:
+        run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
+        print()
+
+
+    check_data_folder()
+
+    from django.core.management import call_command
+    from django.contrib.auth.models import User
+    
+    if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
+        print()
+        # print('[yellow][!] No admin accounts exist, you must create one to be able to log in to the Admin UI![/yellow]')
+        print('[violet]Hint:[/violet] To create an [bold]admin username & password[/bold] for the [deep_sky_blue3][underline][link=http://{host}:{port}/admin]Admin UI[/link][/underline][/deep_sky_blue3], run:')
+        print('      [green]archivebox manage createsuperuser[/green]')
+        print()
+    
+
+    host = '127.0.0.1'
+    port = '8000'
+    
+    try:
+        host_and_port = [arg for arg in runserver_args if arg.replace('.', '').replace(':', '').isdigit()][0]
+        if ':' in host_and_port:
+            host, port = host_and_port.split(':')
+        else:
+            if '.' in host_and_port:
+                host = host_and_port
+            else:
+                port = host_and_port
+    except IndexError:
+        pass
+
+    print('[green][+] Starting ArchiveBox webserver...[/green]')
+    print(f'    [blink][green]>[/green][/blink] Starting ArchiveBox webserver on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
+    print(f'    [green]>[/green] Log in to ArchiveBox Admin UI on [deep_sky_blue3][link=http://{host}:{port}/admin]http://{host}:{port}/admin[/link][/deep_sky_blue3]')
+    print('    > Writing ArchiveBox error log to ./logs/errors.log')
+
+    if SHELL_CONFIG.DEBUG:
+        if not reload:
+            runserver_args.append('--noreload')  # '--insecure'
+        call_command("runserver", *runserver_args)
+    else:
+        from workers.supervisord_util import start_server_workers
+
+        print()
+        start_server_workers(host=host, port=port, daemonize=False)
+        print("\n[i][green][🟩] ArchiveBox server shut down gracefully.[/green][/i]")
+
+

@docstring(server.__doc__)
 def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
--- a/archivebox/cli/archivebox_shell.py
+++ b/archivebox/cli/archivebox_shell.py
@ -11,7 +11,19 @@ from typing import Optional, List, IO
 from archivebox.misc.util import docstring
 from archivebox.config import DATA_DIR
 from archivebox.misc.logging_util import SmartFormatter, reject_stdin
-from ..main import shell
+
+
+
+#@enforce_types
+def shell(out_dir: Path=DATA_DIR) -> None:
+    """Enter an interactive ArchiveBox Django shell"""
+
+    check_data_folder()
+
+    from django.core.management import call_command
+    call_command("shell_plus")
+
+


@docstring(shell.__doc__)
--- a/archivebox/cli/archivebox_status.py
+++ b/archivebox/cli/archivebox_status.py
@ -8,10 +8,114 @@ import argparse
 from pathlib import Path
 from typing import Optional, List, IO

+from rich import print
+
 from archivebox.misc.util import docstring
 from archivebox.config import DATA_DIR
 from archivebox.misc.logging_util import SmartFormatter, reject_stdin
-from ..main import status
+
+
+
+
+# @enforce_types
+def status(out_dir: Path=DATA_DIR) -> None:
+    """Print out some info and statistics about the archive collection"""
+
+    check_data_folder()
+
+    from core.models import Snapshot
+    from django.contrib.auth import get_user_model
+    User = get_user_model()
+
+    print('{green}[*] Scanning archive main index...{reset}'.format(**SHELL_CONFIG.ANSI))
+    print(SHELL_CONFIG.ANSI['lightyellow'], f'   {out_dir}/*', SHELL_CONFIG.ANSI['reset'])
+    num_bytes, num_dirs, num_files = get_dir_size(out_dir, recursive=False, pattern='index.')
+    size = printable_filesize(num_bytes)
+    print(f'    Index size: {size} across {num_files} files')
+    print()
+
+    links = load_main_index(out_dir=out_dir)
+    num_sql_links = links.count()
+    num_link_details = sum(1 for link in parse_json_links_details(out_dir=out_dir))
+    print(f'    > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {CONSTANTS.SQL_INDEX_FILENAME})')
+    print(f'    > JSON Link Details: {num_link_details} links'.ljust(36), f'(found in {ARCHIVE_DIR.name}/*/index.json)')
+    print()
+    print('{green}[*] Scanning archive data directories...{reset}'.format(**SHELL_CONFIG.ANSI))
+    print(SHELL_CONFIG.ANSI['lightyellow'], f'   {ARCHIVE_DIR}/*', SHELL_CONFIG.ANSI['reset'])
+    num_bytes, num_dirs, num_files = get_dir_size(ARCHIVE_DIR)
+    size = printable_filesize(num_bytes)
+    print(f'    Size: {size} across {num_files} files in {num_dirs} directories')
+    print(SHELL_CONFIG.ANSI['black'])
+    num_indexed = len(get_indexed_folders(links, out_dir=out_dir))
+    num_archived = len(get_archived_folders(links, out_dir=out_dir))
+    num_unarchived = len(get_unarchived_folders(links, out_dir=out_dir))
+    print(f'    > indexed: {num_indexed}'.ljust(36), f'({get_indexed_folders.__doc__})')
+    print(f'      > archived: {num_archived}'.ljust(36), f'({get_archived_folders.__doc__})')
+    print(f'      > unarchived: {num_unarchived}'.ljust(36), f'({get_unarchived_folders.__doc__})')
+    
+    num_present = len(get_present_folders(links, out_dir=out_dir))
+    num_valid = len(get_valid_folders(links, out_dir=out_dir))
+    print()
+    print(f'    > present: {num_present}'.ljust(36), f'({get_present_folders.__doc__})')
+    print(f'      > valid: {num_valid}'.ljust(36), f'({get_valid_folders.__doc__})')
+    
+    duplicate = get_duplicate_folders(links, out_dir=out_dir)
+    orphaned = get_orphaned_folders(links, out_dir=out_dir)
+    corrupted = get_corrupted_folders(links, out_dir=out_dir)
+    unrecognized = get_unrecognized_folders(links, out_dir=out_dir)
+    num_invalid = len({**duplicate, **orphaned, **corrupted, **unrecognized})
+    print(f'      > invalid: {num_invalid}'.ljust(36), f'({get_invalid_folders.__doc__})')
+    print(f'        > duplicate: {len(duplicate)}'.ljust(36), f'({get_duplicate_folders.__doc__})')
+    print(f'        > orphaned: {len(orphaned)}'.ljust(36), f'({get_orphaned_folders.__doc__})')
+    print(f'        > corrupted: {len(corrupted)}'.ljust(36), f'({get_corrupted_folders.__doc__})')
+    print(f'        > unrecognized: {len(unrecognized)}'.ljust(36), f'({get_unrecognized_folders.__doc__})')
+        
+    print(SHELL_CONFIG.ANSI['reset'])
+
+    if num_indexed:
+        print('    {lightred}Hint:{reset} You can list link data directories by status like so:'.format(**SHELL_CONFIG.ANSI))
+        print('        archivebox list --status=<status>  (e.g. indexed, corrupted, archived, etc.)')
+
+    if orphaned:
+        print('    {lightred}Hint:{reset} To automatically import orphaned data directories into the main index, run:'.format(**SHELL_CONFIG.ANSI))
+        print('        archivebox init')
+
+    if num_invalid:
+        print('    {lightred}Hint:{reset} You may need to manually remove or fix some invalid data directories, afterwards make sure to run:'.format(**SHELL_CONFIG.ANSI))
+        print('        archivebox init')
+    
+    print()
+    print('{green}[*] Scanning recent archive changes and user logins:{reset}'.format(**SHELL_CONFIG.ANSI))
+    print(SHELL_CONFIG.ANSI['lightyellow'], f'   {CONSTANTS.LOGS_DIR}/*', SHELL_CONFIG.ANSI['reset'])
+    users = get_admins().values_list('username', flat=True)
+    print(f'    UI users {len(users)}: {", ".join(users)}')
+    last_login = User.objects.order_by('last_login').last()
+    if last_login:
+        print(f'    Last UI login: {last_login.username} @ {str(last_login.last_login)[:16]}')
+    last_downloaded = Snapshot.objects.order_by('downloaded_at').last()
+    if last_downloaded:
+        print(f'    Last changes: {str(last_downloaded.downloaded_at)[:16]}')
+
+    if not users:
+        print()
+        print('    {lightred}Hint:{reset} You can create an admin user by running:'.format(**SHELL_CONFIG.ANSI))
+        print('        archivebox manage createsuperuser')
+
+    print()
+    for snapshot in links.order_by('-downloaded_at')[:10]:
+        if not snapshot.downloaded_at:
+            continue
+        print(
+            SHELL_CONFIG.ANSI['black'],
+            (
+                f'   > {str(snapshot.downloaded_at)[:16]} '
+                f'[{snapshot.num_outputs} {("X", "√")[snapshot.is_archived]} {printable_filesize(snapshot.archive_size)}] '
+                f'"{snapshot.title}": {snapshot.url}'
+            )[:SHELL_CONFIG.TERM_WIDTH],
+            SHELL_CONFIG.ANSI['reset'],
+        )
+    print(SHELL_CONFIG.ANSI['black'], '   ...', SHELL_CONFIG.ANSI['reset'])
+


@docstring(status.__doc__)
--- a/archivebox/cli/archivebox_update.py
+++ b/archivebox/cli/archivebox_update.py
@ -24,7 +24,92 @@ from archivebox.index import (
 from archivebox.misc.logging_util import SmartFormatter, accept_stdin
 # from ..main import update

+
+
+
+# LEGACY VERSION:
+# @enforce_types
+# def update(resume: Optional[float]=None,
+#            only_new: bool=ARCHIVING_CONFIG.ONLY_NEW,
+#            index_only: bool=False,
+#            overwrite: bool=False,
+#            filter_patterns_str: Optional[str]=None,
+#            filter_patterns: Optional[List[str]]=None,
+#            filter_type: Optional[str]=None,
+#            status: Optional[str]=None,
+#            after: Optional[str]=None,
+#            before: Optional[str]=None,
+#            extractors: str="",
+#            out_dir: Path=DATA_DIR) -> List[Link]:
+#     """Import any new links from subscriptions and retry any previously failed/skipped links"""
+
+#     from core.models import ArchiveResult
+#     from .search import index_links
+#     # from workers.supervisord_util import start_cli_workers
+    
+
+#     check_data_folder()
+#     # start_cli_workers()
+#     new_links: List[Link] = [] # TODO: Remove input argument: only_new
+
+#     extractors = extractors.split(",") if extractors else []
+
+#     # Step 1: Filter for selected_links
+#     print('[*] Finding matching Snapshots to update...')
+#     print(f'    - Filtering by {" ".join(filter_patterns)} ({filter_type}) {before=} {after=} {status=}...')
+#     matching_snapshots = list_links(
+#         filter_patterns=filter_patterns,
+#         filter_type=filter_type,
+#         before=before,
+#         after=after,
+#     )
+#     print(f'    - Checking {matching_snapshots.count()} snapshot folders for existing data with {status=}...')
+#     matching_folders = list_folders(
+#         links=matching_snapshots,
+#         status=status,
+#         out_dir=out_dir,
+#     )
+#     all_links = (link for link in matching_folders.values() if link)
+#     print('    - Sorting by most unfinished -> least unfinished + date archived...')
+#     all_links = sorted(all_links, key=lambda link: (ArchiveResult.objects.filter(snapshot__url=link.url).count(), link.timestamp))
+
+#     if index_only:
+#         for link in all_links:
+#             write_link_details(link, out_dir=out_dir, skip_sql_index=True)
+#         index_links(all_links, out_dir=out_dir)
+#         return all_links
+        
+#     # Step 2: Run the archive methods for each link
+#     to_archive = new_links if only_new else all_links
+#     if resume:
+#         to_archive = [
+#             link for link in to_archive
+#             if link.timestamp >= str(resume)
+#         ]
+#         if not to_archive:
+#             stderr('')
+#             stderr(f'[√] Nothing found to resume after {resume}', color='green')
+#             return all_links
+
+#     archive_kwargs = {
+#         "out_dir": out_dir,
+#     }
+#     if extractors:
+#         archive_kwargs["methods"] = extractors
+
+
+#     archive_links(to_archive, overwrite=overwrite, **archive_kwargs)
+
+#     # Step 4: Re-write links index with updated titles, icons, and resources
+#     all_links = load_main_index(out_dir=out_dir)
+#     return all_links
+
+
+
+
+
 def update():
+    """Import any new links from subscriptions and retry any previously failed/skipped links"""
    from archivebox.config.django import setup_django
    setup_django()
    
--- a/archivebox/cli/archivebox_version.py
+++ b/archivebox/cli/archivebox_version.py
@ -1,61 +1,207 @@
 #!/usr/bin/env python3

 __package__ = 'archivebox.cli'
-__command__ = 'archivebox version'

 import sys
-import argparse
-from pathlib import Path
-from typing import Optional, List, IO
+from typing import Iterable

-# from archivebox.misc.util import docstring
-from archivebox.config import DATA_DIR, VERSION
-from archivebox.misc.logging_util import SmartFormatter, reject_stdin
+import rich_click as click
+
+from archivebox.misc.util import docstring, enforce_types


-# @docstring(version.__doc__)
-def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
-    """Print the ArchiveBox version and dependency information"""
-    parser = argparse.ArgumentParser(
-        prog=__command__,
-        description="Print the ArchiveBox version and dependency information",   # version.__doc__,
-        add_help=True,
-        formatter_class=SmartFormatter,
-    )
-    parser.add_argument(
-        '--quiet', '-q',
-        action='store_true',
-        help='Only print ArchiveBox version number and nothing else.',
-    )
-    parser.add_argument(
-        '--binproviders', '-p',
-        type=str,
-        help='Select binproviders to detect DEFAULT=env,apt,brew,sys_pip,venv_pip,lib_pip,pipx,sys_npm,lib_npm,puppeteer,playwright (all)',
-        default=None,
-    )
-    parser.add_argument(
-        '--binaries', '-b',
-        type=str,
-        help='Select binaries to detect DEFAULT=curl,wget,git,yt-dlp,chrome,single-file,readability-extractor,postlight-parser,... (all)',
-        default=None,
-    )
-    command = parser.parse_args(args or ())
-    reject_stdin(__command__, stdin)
+@enforce_types
+def version(quiet: bool=False,
+            binproviders: Iterable[str]=(),
+            binaries: Iterable[str]=()) -> list[str]:
+    """Print the ArchiveBox version, debug metadata, and installed dependency versions"""
    
-    # for speed reasons, check if quiet flag was set and just return simple version immediately if so
-    if command.quiet:
+    # fast path for just getting the version and exiting, dont do any slower imports
+    from archivebox.config.version import VERSION
    print(VERSION)
-        return
+    if quiet or '--version' in sys.argv:
+        return []
    
-    # otherwise do big expensive import to get the full version
-    from ..main import version
-    version(
-        quiet=command.quiet,
-        out_dir=Path(pwd) if pwd else DATA_DIR,
-        binproviders=command.binproviders.split(',') if command.binproviders else None,
-        binaries=command.binaries.split(',') if command.binaries else None,
+    # Only do slower imports when getting full version info
+    import os
+    import platform
+    from pathlib import Path
+    
+    from rich.panel import Panel
+    from rich.console import Console
+    from abx_pkg import Binary
+    
+    import abx
+    import archivebox
+    from archivebox.config import CONSTANTS, DATA_DIR
+    from archivebox.config.version import get_COMMIT_HASH, get_BUILD_TIME
+    from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, RUNNING_AS_UID, RUNNING_AS_GID, IN_DOCKER
+    from archivebox.config.paths import get_data_locations, get_code_locations
+    from archivebox.config.common import SHELL_CONFIG, STORAGE_CONFIG, SEARCH_BACKEND_CONFIG
+    from archivebox.misc.logging_util import printable_folder_status
+    
+    from abx_plugin_default_binproviders import apt, brew, env
+    
+    console = Console()
+    prnt = console.print
+    
+    LDAP_ENABLED = archivebox.pm.hook.get_SCOPE_CONFIG().LDAP_ENABLED
+
+    # 0.7.1
+    # ArchiveBox v0.7.1+editable COMMIT_HASH=951bba5 BUILD_TIME=2023-12-17 16:46:05 1702860365
+    # IN_DOCKER=False IN_QEMU=False ARCH=arm64 OS=Darwin PLATFORM=macOS-14.2-arm64-arm-64bit PYTHON=Cpython
+    # FS_ATOMIC=True FS_REMOTE=False FS_USER=501:20 FS_PERMS=644
+    # DEBUG=False IS_TTY=True TZ=UTC SEARCH_BACKEND=ripgrep LDAP=False
+    
+    p = platform.uname()
+    COMMIT_HASH = get_COMMIT_HASH()
+    prnt(
+        '[dark_green]ArchiveBox[/dark_green] [dark_goldenrod]v{}[/dark_goldenrod]'.format(CONSTANTS.VERSION),
+        f'COMMIT_HASH={COMMIT_HASH[:7] if COMMIT_HASH else "unknown"}',
+        f'BUILD_TIME={get_BUILD_TIME()}',
    )
+    prnt(
+        f'IN_DOCKER={IN_DOCKER}',
+        f'IN_QEMU={SHELL_CONFIG.IN_QEMU}',
+        f'ARCH={p.machine}',
+        f'OS={p.system}',
+        f'PLATFORM={platform.platform()}',
+        f'PYTHON={sys.implementation.name.title()}' + (' (venv)' if CONSTANTS.IS_INSIDE_VENV else ''),
+    )
+    OUTPUT_IS_REMOTE_FS = get_data_locations().DATA_DIR.is_mount or get_data_locations().ARCHIVE_DIR.is_mount
+    DATA_DIR_STAT = CONSTANTS.DATA_DIR.stat()
+    prnt(
+        f'EUID={os.geteuid()}:{os.getegid()} UID={RUNNING_AS_UID}:{RUNNING_AS_GID} PUID={ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}',
+        f'FS_UID={DATA_DIR_STAT.st_uid}:{DATA_DIR_STAT.st_gid}',
+        f'FS_PERMS={STORAGE_CONFIG.OUTPUT_PERMISSIONS}',
+        f'FS_ATOMIC={STORAGE_CONFIG.ENFORCE_ATOMIC_WRITES}',
+        f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}',
+    )
+    prnt(
+        f'DEBUG={SHELL_CONFIG.DEBUG}',
+        f'IS_TTY={SHELL_CONFIG.IS_TTY}',
+        f'SUDO={CONSTANTS.IS_ROOT}',
+        f'ID={CONSTANTS.MACHINE_ID}:{CONSTANTS.COLLECTION_ID}',
+        f'SEARCH_BACKEND={SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}',
+        f'LDAP={LDAP_ENABLED}',
+        #f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})',  # add this if we have more useful info to show eventually
+    )
+    prnt()
+    
+    if not (os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) and os.access(CONSTANTS.CONFIG_FILE, os.R_OK)):
+        PANEL_TEXT = '\n'.join((
+            # '',
+            # f'[yellow]CURRENT DIR =[/yellow] [red]{os.getcwd()}[/red]',
+            '',
+            '[violet]Hint:[/violet] [green]cd[/green] into a collection [blue]DATA_DIR[/blue] and run [green]archivebox version[/green] again...',
+            '      [grey53]OR[/grey53] run [green]archivebox init[/green] to create a new collection in the current dir.',
+            '',
+            '      [i][grey53](this is [red]REQUIRED[/red] if you are opening a Github Issue to get help)[/grey53][/i]',
+            '',
+        ))
+        prnt(Panel(PANEL_TEXT, expand=False, border_style='grey53', title='[red]:exclamation: No collection [blue]DATA_DIR[/blue] is currently active[/red]', subtitle='Full version info is only available when inside a collection [light_slate_blue]DATA DIR[/light_slate_blue]'))
+        prnt()
+        return []
+
+    prnt('[pale_green1][i] Binary Dependencies:[/pale_green1]')
+    failures = []
+    BINARIES = abx.as_dict(archivebox.pm.hook.get_BINARIES())
+    for name, binary in list(BINARIES.items()):
+        if binary.name == 'archivebox':
+            continue
+        
+        # skip if the binary is not in the requested list of binaries
+        if binaries and binary.name not in binaries:
+            continue
+        
+        # skip if the binary is not supported by any of the requested binproviders
+        if binproviders and binary.binproviders_supported and not any(provider.name in binproviders for provider in binary.binproviders_supported):
+            continue
+        
+        err = None
+        try:
+            loaded_bin = binary.load()
+        except Exception as e:
+            err = e
+            loaded_bin = binary
+        provider_summary = f'[dark_sea_green3]{loaded_bin.binprovider.name.ljust(10)}[/dark_sea_green3]' if loaded_bin.binprovider else '[grey23]not found[/grey23] '
+        if loaded_bin.abspath:
+            abspath = str(loaded_bin.abspath).replace(str(DATA_DIR), '[light_slate_blue].[/light_slate_blue]').replace(str(Path('~').expanduser()), '~')
+            if ' ' in abspath:
+                abspath = abspath.replace(' ', r'\ ')
+        else:
+            abspath = f'[red]{err}[/red]'
+        prnt('', '[green]√[/green]' if loaded_bin.is_valid else '[red]X[/red]', '', loaded_bin.name.ljust(21), str(loaded_bin.version).ljust(12), provider_summary, abspath, overflow='ignore', crop=False)
+        if not loaded_bin.is_valid:
+            failures.append(loaded_bin.name)
+            
+    prnt()
+    prnt('[gold3][i] Package Managers:[/gold3]')
+    BINPROVIDERS = abx.as_dict(archivebox.pm.hook.get_BINPROVIDERS())
+    for name, binprovider in list(BINPROVIDERS.items()):
+        err = None
+        
+        if binproviders and binprovider.name not in binproviders:
+            continue
+        
+        # TODO: implement a BinProvider.BINARY() method that gets the loaded binary for a binprovider's INSTALLER_BIN
+        loaded_bin = binprovider.INSTALLER_BINARY or Binary(name=binprovider.INSTALLER_BIN, binproviders=[env, apt, brew])
+        
+        abspath = str(loaded_bin.abspath).replace(str(DATA_DIR), '[light_slate_blue].[/light_slate_blue]').replace(str(Path('~').expanduser()), '~')
+        abspath = None
+        if loaded_bin.abspath:
+            abspath = str(loaded_bin.abspath).replace(str(DATA_DIR), '.').replace(str(Path('~').expanduser()), '~')
+            if ' ' in abspath:
+                abspath = abspath.replace(' ', r'\ ')
+                
+        PATH = str(binprovider.PATH).replace(str(DATA_DIR), '[light_slate_blue].[/light_slate_blue]').replace(str(Path('~').expanduser()), '~')
+        ownership_summary = f'UID=[blue]{str(binprovider.EUID).ljust(4)}[/blue]'
+        provider_summary = f'[dark_sea_green3]{str(abspath).ljust(52)}[/dark_sea_green3]' if abspath else f'[grey23]{"not available".ljust(52)}[/grey23]'
+        prnt('', '[green]√[/green]' if binprovider.is_valid else '[grey53]-[/grey53]', '', binprovider.name.ljust(11), provider_summary, ownership_summary, f'PATH={PATH}', overflow='ellipsis', soft_wrap=True)
+
+    if not (binaries or binproviders):
+        # dont show source code / data dir info if we just want to get version info for a binary or binprovider
+        
+        prnt()
+        prnt('[deep_sky_blue3][i] Code locations:[/deep_sky_blue3]')
+        for name, path in get_code_locations().items():
+            prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
+
+        prnt()
+        if os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) or os.access(CONSTANTS.CONFIG_FILE, os.R_OK):
+            prnt('[bright_yellow][i] Data locations:[/bright_yellow]')
+            for name, path in get_data_locations().items():
+                prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
+        
+            from archivebox.misc.checks import check_data_dir_permissions
+            
+            check_data_dir_permissions()
+        else:
+            prnt()
+            prnt('[red][i] Data locations:[/red] (not in a data directory)')
+        
+    prnt()
+    
+    if failures:
+        prnt('[red]Error:[/red] [yellow]Failed to detect the following binaries:[/yellow]')
+        prnt(f'      [red]{", ".join(failures)}[/red]')
+        prnt()
+        prnt('[violet]Hint:[/violet] To install missing binaries automatically, run:')
+        prnt('      [green]archivebox install[/green]')
+        prnt()
+    return failures
+
+
+@click.command()
+@click.option('--quiet', '-q', is_flag=True, help='Only print ArchiveBox version number and nothing else. (equivalent to archivebox --version)')
+@click.option('--binproviders', '-p', help='Select binproviders to detect DEFAULT=env,apt,brew,sys_pip,venv_pip,lib_pip,pipx,sys_npm,lib_npm,puppeteer,playwright (all)')
+@click.option('--binaries', '-b', help='Select binaries to detect DEFAULT=curl,wget,git,yt-dlp,chrome,single-file,readability-extractor,postlight-parser,... (all)')
+@docstring(version.__doc__)
+def main(**kwargs):
+    failures = version(**kwargs)
+    if failures:
+        raise SystemExit(1)


 if __name__ == '__main__':
-    main(args=sys.argv[1:], stdin=sys.stdin)
+    main()
--- a/archivebox/config/django.py
+++ b/archivebox/config/django.py
@ -60,7 +60,7 @@ def setup_django(check_db=False, in_memory_db=False) -> None:
        return

    with Progress(transient=True, expand=True, console=STDERR) as INITIAL_STARTUP_PROGRESS:
-        INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25, visible=False)
+        INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25, visible=True)
        
        from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, SudoPermission
    
--- a/archivebox/config/paths.py
+++ b/archivebox/config/paths.py
@ -142,7 +142,7 @@ def create_and_chown_dir(dir_path: Path) -> None:
        os.system(f'chown {ARCHIVEBOX_USER} "{dir_path}"/* 2>/dev/null &')

@cache
-def get_or_create_working_tmp_dir(autofix=True, quiet=False):
+def get_or_create_working_tmp_dir(autofix=True, quiet=True):
    from archivebox import CONSTANTS
    from archivebox.config.common import STORAGE_CONFIG
    from archivebox.misc.checks import check_tmp_dir
@ -165,7 +165,7 @@ def get_or_create_working_tmp_dir(autofix=True, quiet=False):
            pass
        if check_tmp_dir(candidate, throw=False, quiet=True, must_exist=True):
            if autofix and STORAGE_CONFIG.TMP_DIR != candidate:
-                STORAGE_CONFIG.update_in_place(TMP_DIR=candidate, warn=not quiet)
+                STORAGE_CONFIG.update_in_place(TMP_DIR=candidate)
            return candidate
    
    if not quiet:
@ -193,7 +193,7 @@ def get_or_create_working_lib_dir(autofix=True, quiet=False):
            pass
        if check_lib_dir(candidate, throw=False, quiet=True, must_exist=True):
            if autofix and STORAGE_CONFIG.LIB_DIR != candidate:
-                STORAGE_CONFIG.update_in_place(LIB_DIR=candidate, warn=not quiet)
+                STORAGE_CONFIG.update_in_place(LIB_DIR=candidate)
            return candidate
    
    if not quiet:
--- a/archivebox/config/permissions.py
+++ b/archivebox/config/permissions.py
@ -36,6 +36,8 @@ HOSTNAME: str           = max([socket.gethostname(), platform.node()], key=len)

 IS_ROOT = RUNNING_AS_UID == 0
 IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE', 'yes')
+# IN_DOCKER_COMPOSE =  # TODO: figure out a way to detect if running in docker compose
+

 FALLBACK_UID = RUNNING_AS_UID or SUDO_UID
 FALLBACK_GID = RUNNING_AS_GID or SUDO_GID
--- a/archivebox/config/views.py
+++ b/archivebox/config/views.py
@ -303,7 +303,7 @@ def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:
        "Exit Status": [],
    }
    
-    from workers.supervisor_util import get_existing_supervisord_process
+    from workers.supervisord_util import get_existing_supervisord_process
    
    supervisor = get_existing_supervisord_process()
    if supervisor is None:
@ -373,7 +373,7 @@ def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:
 def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
    assert request.user.is_superuser, "Must be a superuser to view configuration settings."

-    from workers.supervisor_util import get_existing_supervisord_process, get_worker, get_sock_file, CONFIG_FILE_NAME
+    from workers.supervisord_util import get_existing_supervisord_process, get_worker, get_sock_file, CONFIG_FILE_NAME

    SOCK_FILE = get_sock_file()
    CONFIG_FILE = SOCK_FILE.parent / CONFIG_FILE_NAME
--- a/archivebox/core/admin_snapshots.py
+++ b/archivebox/core/admin_snapshots.py
@ -21,7 +21,6 @@ from archivebox.misc.logging_util import printable_filesize
 from archivebox.search.admin import SearchResultsAdminMixin
 from archivebox.index.html import snapshot_icons
 from archivebox.extractors import archive_links
-from archivebox.main import remove

 from archivebox.base_models.admin import ABIDModelAdmin
 from archivebox.workers.tasks import bg_archive_links, bg_add
@ -321,7 +320,9 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
        description="☠️ Delete"
    )
    def delete_snapshots(self, request, queryset):
+        from archivebox.cli.archivebox_remove import remove
        remove(snapshots=queryset, yes=True, delete=True, out_dir=DATA_DIR)
+        
        messages.success(
            request,
            mark_safe(f"Succesfully deleted {queryset.count()} Snapshots. Don't forget to scrub URLs from import logs (data/sources) and error logs (data/logs) if needed."),
--- a/archivebox/main.py
+++ b/archivebox/main.py
--- a/archivebox/misc/checks.py
+++ b/archivebox/misc/checks.py
@ -24,7 +24,7 @@ def check_data_folder() -> None:
    from archivebox.config import CONSTANTS
    from archivebox.config.paths import create_and_chown_dir, get_or_create_working_tmp_dir, get_or_create_working_lib_dir
    
-    archive_dir_exists = os.access(ARCHIVE_DIR, os.R_OK) and ARCHIVE_DIR.is_dir()
+    archive_dir_exists = os.path.isdir(ARCHIVE_DIR)
    if not archive_dir_exists:
        print('[red][X] No archivebox index found in the current directory.[/red]', file=sys.stderr)
        print(f'    {DATA_DIR}', file=sys.stderr)
--- a/archivebox/misc/logging_util.py
+++ b/archivebox/misc/logging_util.py
@ -12,7 +12,7 @@ from pathlib import Path

 from datetime import datetime, timezone
 from dataclasses import dataclass
-from typing import Any, Optional, List, Dict, Union, IO, TYPE_CHECKING
+from typing import Any, Optional, List, Dict, Union, Iterable, IO, TYPE_CHECKING

 if TYPE_CHECKING:
    from ..index.schema import Link, ArchiveResult
@ -228,7 +228,7 @@ def progress_bar(seconds: int, prefix: str='', ANSI: Dict[str, str]=ANSI) -> Non
        print()


-def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional[str | IO], pwd: str='.'):
+def log_cli_command(subcommand: str, subcommand_args: Iterable[str]=(), stdin: str | IO | None=None, pwd: str='.'):
    args = ' '.join(subcommand_args)
    version_msg = '[dark_magenta]\\[{now}][/dark_magenta] [dark_red]ArchiveBox[/dark_red] [dark_goldenrod]v{VERSION}[/dark_goldenrod]: [green4]archivebox [green3]{subcommand}[green2] {args}[/green2]'.format(
        now=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S'),
--- a/archivebox/misc/shell_welcome_message.py
+++ b/archivebox/misc/shell_welcome_message.py
@ -20,11 +20,9 @@ from datetime import datetime, timedelta   # noqa
 from django.conf import settings           # noqa

 from archivebox import CONSTANTS           # noqa
-from ..main import *                       # noqa
-from ..cli import CLI_SUBCOMMANDS
+from archivebox.cli import *               # noqa

 CONFIG = archivebox.pm.hook.get_FLAT_CONFIG()
-CLI_COMMAND_NAMES = ", ".join(CLI_SUBCOMMANDS.keys())

 if __name__ == '__main__':
    # load the rich extension for ipython for pretty printing
@ -40,7 +38,7 @@ if __name__ == '__main__':
    prnt('[green]import re, os, sys, psutil, subprocess, reqiests, json, pydantic, benedict, django, abx[/]')
    prnt('[yellow4]# ArchiveBox Imports[/]')
    prnt('[yellow4]import archivebox[/]')
-    prnt('[yellow4]from archivebox.main import {}[/]'.format(CLI_COMMAND_NAMES))
+    prnt('[yellow4]from archivebox.cli import *[/]')
    prnt()
    
    if console.width >= 80:
--- a/archivebox/pkgs/abx/abx.py
+++ b/archivebox/pkgs/abx/abx.py
@ -459,8 +459,8 @@ def load_plugins(plugins: Iterable[PluginId | ModuleType | Type] | Dict[PluginId
    PLUGINS_TO_LOAD = sorted(PLUGINS_TO_LOAD, key=lambda x: x['order'])
        
    for plugin_info in PLUGINS_TO_LOAD:
-        if '--version' not in sys.argv and '--help' not in sys.argv:
-            print(f'🧩 Loading plugin: {plugin_info["id"]}...', end='\r', flush=True, file=sys.stderr)
+        # if '--version' not in sys.argv and '--help' not in sys.argv:
+        #     print(f'🧩 Loading plugin: {plugin_info["id"]}...', end='\r', flush=True, file=sys.stderr)
        pm.register(plugin_info['module'])
        LOADED_PLUGINS[plugin_info['id']] = plugin_info
    # print('\x1b[2K', end='\r', flush=True, file=sys.stderr)
--- a/archivebox/workers/semaphores.py
+++ b/archivebox/workers/semaphores.py
@ -1,103 +1,103 @@
-import uuid
-from functools import wraps
-from django.db import connection, transaction
-from django.utils import timezone
-from huey.exceptions import TaskLockedException
+# import uuid
+# from functools import wraps
+# from django.db import connection, transaction
+# from django.utils import timezone
+# from huey.exceptions import TaskLockedException

-from archivebox.config import CONSTANTS
+# from archivebox.config import CONSTANTS

-class SqliteSemaphore:
-    def __init__(self, db_path, table_name, name, value=1, timeout=None):
-        self.db_path = db_path
-        self.table_name = table_name
-        self.name = name
-        self.value = value
-        self.timeout = timeout or 86400  # Set a max age for lock holders
+# class SqliteSemaphore:
+#     def __init__(self, db_path, table_name, name, value=1, timeout=None):
+#         self.db_path = db_path
+#         self.table_name = table_name
+#         self.name = name
+#         self.value = value
+#         self.timeout = timeout or 86400  # Set a max age for lock holders

-        # Ensure the table exists
-        with connection.cursor() as cursor:
-            cursor.execute(f"""
-                CREATE TABLE IF NOT EXISTS {self.table_name} (
-                    id TEXT PRIMARY KEY,
-                    name TEXT,
-                    timestamp DATETIME
-                )
-            """)
+#         # Ensure the table exists
+#         with connection.cursor() as cursor:
+#             cursor.execute(f"""
+#                 CREATE TABLE IF NOT EXISTS {self.table_name} (
+#                     id TEXT PRIMARY KEY,
+#                     name TEXT,
+#                     timestamp DATETIME
+#                 )
+#             """)

-    def acquire(self, name=None):
-        name = name or str(uuid.uuid4())
-        now = timezone.now()
-        expiration = now - timezone.timedelta(seconds=self.timeout)
+#     def acquire(self, name=None):
+#         name = name or str(uuid.uuid4())
+#         now = timezone.now()
+#         expiration = now - timezone.timedelta(seconds=self.timeout)

-        with transaction.atomic():
-            # Remove expired locks
-            with connection.cursor() as cursor:
-                cursor.execute(f"""
-                    DELETE FROM {self.table_name}
-                    WHERE name = %s AND timestamp < %s
-                """, [self.name, expiration])
+#         with transaction.atomic():
+#             # Remove expired locks
+#             with connection.cursor() as cursor:
+#                 cursor.execute(f"""
+#                     DELETE FROM {self.table_name}
+#                     WHERE name = %s AND timestamp < %s
+#                 """, [self.name, expiration])

-            # Try to acquire the lock
-            with connection.cursor() as cursor:
-                cursor.execute(f"""
-                    INSERT INTO {self.table_name} (id, name, timestamp)
-                    SELECT %s, %s, %s
-                    WHERE (
-                        SELECT COUNT(*) FROM {self.table_name}
-                        WHERE name = %s
-                    ) < %s
-                """, [name, self.name, now, self.name, self.value])
+#             # Try to acquire the lock
+#             with connection.cursor() as cursor:
+#                 cursor.execute(f"""
+#                     INSERT INTO {self.table_name} (id, name, timestamp)
+#                     SELECT %s, %s, %s
+#                     WHERE (
+#                         SELECT COUNT(*) FROM {self.table_name}
+#                         WHERE name = %s
+#                     ) < %s
+#                 """, [name, self.name, now, self.name, self.value])

-                if cursor.rowcount > 0:
-                    return name
+#                 if cursor.rowcount > 0:
+#                     return name

-        # If we couldn't acquire the lock, remove our attempted entry
-        with connection.cursor() as cursor:
-            cursor.execute(f"""
-                DELETE FROM {self.table_name}
-                WHERE id = %s AND name = %s
-            """, [name, self.name])
+#         # If we couldn't acquire the lock, remove our attempted entry
+#         with connection.cursor() as cursor:
+#             cursor.execute(f"""
+#                 DELETE FROM {self.table_name}
+#                 WHERE id = %s AND name = %s
+#             """, [name, self.name])

-        return None
+#         return None

-    def release(self, name):
-        with connection.cursor() as cursor:
-            cursor.execute(f"""
-                DELETE FROM {self.table_name}
-                WHERE id = %s AND name = %s
-            """, [name, self.name])
-        return cursor.rowcount > 0
+#     def release(self, name):
+#         with connection.cursor() as cursor:
+#             cursor.execute(f"""
+#                 DELETE FROM {self.table_name}
+#                 WHERE id = %s AND name = %s
+#             """, [name, self.name])
+#         return cursor.rowcount > 0


-LOCKS_DB_PATH = CONSTANTS.DATABASE_FILE.parent / 'locks.sqlite3'
+# LOCKS_DB_PATH = CONSTANTS.DATABASE_FILE.parent / 'locks.sqlite3'


-def lock_task_semaphore(db_path, table_name, lock_name, value=1, timeout=None):
-    """
-    Lock which can be acquired multiple times (default = 1).
+# def lock_task_semaphore(db_path, table_name, lock_name, value=1, timeout=None):
+#     """
+#     Lock which can be acquired multiple times (default = 1).

-    NOTE: no provisions are made for blocking, waiting, or notifying. This is
-    just a lock which can be acquired a configurable number of times.
+#     NOTE: no provisions are made for blocking, waiting, or notifying. This is
+#     just a lock which can be acquired a configurable number of times.

-    Example:
+#     Example:

-    # Allow up to 3 workers to run this task concurrently. If the task is
-    # locked, retry up to 2 times with a delay of 60s.
-    @huey.task(retries=2, retry_delay=60)
-    @lock_task_semaphore('path/to/db.sqlite3', 'semaphore_locks', 'my-lock', 3)
-    def my_task():
-        ...
-    """
-    sem = SqliteSemaphore(db_path, table_name, lock_name, value, timeout)
-    def decorator(fn):
-        @wraps(fn)
-        def inner(*args, **kwargs):
-            tid = sem.acquire()
-            if tid is None:
-                raise TaskLockedException(f'unable to acquire lock {lock_name}')
-            try:
-                return fn(*args, **kwargs)
-            finally:
-                sem.release(tid)
-        return inner
-    return decorator
+#     # Allow up to 3 workers to run this task concurrently. If the task is
+#     # locked, retry up to 2 times with a delay of 60s.
+#     @huey.task(retries=2, retry_delay=60)
+#     @lock_task_semaphore('path/to/db.sqlite3', 'semaphore_locks', 'my-lock', 3)
+#     def my_task():
+#         ...
+#     """
+#     sem = SqliteSemaphore(db_path, table_name, lock_name, value, timeout)
+#     def decorator(fn):
+#         @wraps(fn)
+#         def inner(*args, **kwargs):
+#             tid = sem.acquire()
+#             if tid is None:
+#                 raise TaskLockedException(f'unable to acquire lock {lock_name}')
+#             try:
+#                 return fn(*args, **kwargs)
+#             finally:
+#                 sem.release(tid)
+#         return inner
+#     return decorator
--- a/archivebox/workers/supervisord_util.py
+++ b/archivebox/workers/supervisord_util.py
--- a/archivebox/workers/tasks.py
+++ b/archivebox/workers/tasks.py
@ -8,7 +8,7 @@ from django_huey import db_task, task
 from huey_monitor.models import TaskModel
 from huey_monitor.tqdm import ProcessInfo

-from .supervisor_util import get_or_create_supervisord_process
+from .supervisord_util import get_or_create_supervisord_process

 # @db_task(queue="commands", context=True, schedule=1)
 # def scheduler_tick():
--- a/pyproject.toml
+++ b/pyproject.toml
@ -115,6 +115,8 @@ dependencies = [
    "abx-plugin-mercury>=2024.10.28",
    "abx-plugin-htmltotext>=2024.10.28",
    "python-statemachine>=2.3.6",
+    "click>=8.1.7",
+    "rich-click>=1.8.4",
 ]

 [project.optional-dependencies]
--- a/uv.lock
+++ b/uv.lock
@ -658,6 +658,7 @@ dependencies = [
    { name = "atomicwrites", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "base32-crockford", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "channels", extra = ["daphne"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "click", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "croniter", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "dateparser", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "django", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@ -688,6 +689,7 @@ dependencies = [
    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "rich", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "rich-argparse", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "rich-click", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "setuptools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "sonic-client", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "supervisor", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@ -784,6 +786,7 @@ requires-dist = [
    { name = "atomicwrites", specifier = "==1.4.1" },
    { name = "base32-crockford", specifier = "==0.3.0" },
    { name = "channels", extras = ["daphne"], specifier = ">=4.1.0" },
+    { name = "click", specifier = ">=8.1.7" },
    { name = "croniter", specifier = ">=3.0.3" },
    { name = "dateparser", specifier = ">=1.2.0" },
    { name = "django", specifier = ">=5.1.1,<6.0" },
@ -821,6 +824,7 @@ requires-dist = [
    { name = "requests-tracker", marker = "extra == 'debug'", specifier = ">=0.3.3" },
    { name = "rich", specifier = ">=13.8.0" },
    { name = "rich-argparse", specifier = ">=1.5.2" },
+    { name = "rich-click", specifier = ">=1.8.4" },
    { name = "setuptools", specifier = ">=74.1.0" },
    { name = "sonic-client", specifier = ">=1.0.0" },
    { name = "supervisor", specifier = ">=4.2.5" },
@ -2806,6 +2810,20 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/25/45/54b95bb72bb17c27a7252bee5034955020b5869a33918b660ffc29cbf608/rich_argparse-1.6.0-py3-none-any.whl", hash = "sha256:fbe70a1d821b3f2fa8958cddf0cae131870a6e9faa04ab52b409cb1eda809bd7", size = 20072 },
 ]

+[[package]]
+name = "rich-click"
+version = "1.8.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "click", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "rich", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fc/f4/e48dc2850662526a26fb0961aacb0162c6feab934312b109b748ae4efee2/rich_click-1.8.4.tar.gz", hash = "sha256:0f49471f04439269d0e66a6f43120f52d11d594869a2a0be600cfb12eb0616b9", size = 38247 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/84/f3/72f93d8494ee641bde76bfe1208cf4abc44c6f9448673762f6077bc162d6/rich_click-1.8.4-py3-none-any.whl", hash = "sha256:2d2841b3cebe610d5682baa1194beaf78ab00c4fa31931533261b5eba2ee80b7", size = 35071 },
+]
+
 [[package]]
 name = "ruff"
 version = "0.7.4"