move main funcs into cli files and switch to using click for CLI

2024-11-24 21:23:22 +00:00 · 2024-11-19 00:18:51 -08:00 · 2024-11-19 00:18:51 -08:00 · 328eb98a38
commit 328eb98a38
parent 569081a9eb
35 changed files with 1885 additions and 2296 deletions
--- a/archivebox/init.py
+++ b/archivebox/init.py
@ -51,6 +51,7 @@ from .pkgs import load_vendored_pkgs             # noqa
 load_vendored_pkgs()
 # print('DONE LOADING VENDORED LIBRARIES')
 # print('LOADING ABX PLUGIN SPECIFICATIONS')
 # Load ABX Plugin Specifications + Default Implementations
 import abx                                       # noqa
 import abx_spec_archivebox                       # noqa
@ -74,7 +75,7 @@ abx.pm.register(abx_spec_searchbackend.PLUGIN_SPEC())
 # Cast to ArchiveBoxPluginSpec to enable static type checking of pm.hook.call() methods
 abx.pm = cast(abx.ABXPluginManager[abx_spec_archivebox.ArchiveBoxPluginSpec], abx.pm)
 pm = abx.pm
-
+# print('DONE LOADING ABX PLUGIN SPECIFICATIONS')
 # Load all pip-installed ABX-compatible plugins
 ABX_ECOSYSTEM_PLUGINS = abx.get_pip_installed_plugins(group='abx')
@ -94,7 +95,9 @@ USER_PLUGINS = abx.find_plugins_in_dir(Path(os.getcwd()) / 'user_plugins')
 # Import all plugins and register them with ABX Plugin Manager
 ALL_PLUGINS = {**ABX_ECOSYSTEM_PLUGINS, **ARCHIVEBOX_BUILTIN_PLUGINS, **USER_PLUGINS}
 # print('LOADING ALL PLUGINS')
 LOADED_PLUGINS = abx.load_plugins(ALL_PLUGINS)
 # print('DONE LOADING ALL PLUGINS')
 # Setup basic config, constants, paths, and version
 from .config.constants import CONSTANTS                         # noqa
--- a/archivebox/main.py
+++ b/archivebox/main.py
@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-"""This is the main entry point for the ArchiveBox CLI."""
+"""This is the entrypoint for python -m archivebox ..."""
 __package__ = 'archivebox'
 import archivebox      # noqa # make sure monkey patches are applied before anything else
@ -15,5 +15,4 @@ ASCII_LOGO_MINI = r"""
 /_/   \_\_|  \___|_| |_|_| \_/ \___|____/ \___/_/\_\
 """
-if __name__ == '__main__':
+main(args=sys.argv[1:], stdin=sys.stdin)
    main(args=sys.argv[1:], stdin=sys.stdin)
--- a/archivebox/api/v1_cli.py
+++ b/archivebox/api/v1_cli.py
@ -6,13 +6,6 @@ from enum import Enum
 from ninja import Router, Schema
 from archivebox.main import (
    add,
    remove,
    update,
    list_all,
    schedule,
 )
 from archivebox.misc.util import ansi_to_html
 from archivebox.config.common import ARCHIVING_CONFIG
@ -60,13 +53,11 @@ class AddCommandSchema(Schema):
    urls: List[str]
    tag: str = ""
    depth: int = 0
    update: bool = not ARCHIVING_CONFIG.ONLY_NEW  # Default to the opposite of ARCHIVING_CONFIG.ONLY_NEW
    update_all: bool = False
    index_only: bool = False
    overwrite: bool = False
    init: bool = False
    extractors: str = ""
    parser: str = "auto"
    extract: str = ""
    update: bool = not ARCHIVING_CONFIG.ONLY_NEW  # Default to the opposite of ARCHIVING_CONFIG.ONLY_NEW
    overwrite: bool = False
    index_only: bool = False
 class UpdateCommandSchema(Schema):
    resume: Optional[float] = 0
@ -93,7 +84,7 @@ class ScheduleCommandSchema(Schema):
 class ListCommandSchema(Schema):
    filter_patterns: Optional[List[str]] = ['https://example.com']
    filter_type: str = FilterTypeChoices.substring
-    status: Optional[StatusChoices] = StatusChoices.indexed
+    status: StatusChoices = StatusChoices.indexed
    after: Optional[float] = 0
    before: Optional[float] = 999999999999999
    sort: str = 'bookmarked_at'
@ -115,16 +106,16 @@ class RemoveCommandSchema(Schema):
@router.post("/add", response=CLICommandResponseSchema, summary='archivebox add [args] [urls]')
 def cli_add(request, args: AddCommandSchema):
    from archivebox.cli.archivebox_add import add
    result = add(
        urls=args.urls,
        tag=args.tag,
        depth=args.depth,
        update=args.update,
        update_all=args.update_all,
        index_only=args.index_only,
        overwrite=args.overwrite,
-        init=args.init,
+        extract=args.extract,
        extractors=args.extractors,
        parser=args.parser,
    )
@ -139,6 +130,8 @@ def cli_add(request, args: AddCommandSchema):
@router.post("/update", response=CLICommandResponseSchema, summary='archivebox update [args] [filter_patterns]')
 def cli_update(request, args: UpdateCommandSchema):
    from archivebox.cli.archivebox_update import update
    result = update(
        resume=args.resume,
        only_new=args.only_new,
@ -162,6 +155,8 @@ def cli_update(request, args: UpdateCommandSchema):
@router.post("/schedule", response=CLICommandResponseSchema, summary='archivebox schedule [args] [import_path]')
 def cli_schedule(request, args: ScheduleCommandSchema):
    from archivebox.cli.archivebox_schedule import schedule
    result = schedule(
        import_path=args.import_path,
        add=args.add,
@ -184,9 +179,11 @@ def cli_schedule(request, args: ScheduleCommandSchema):
-@router.post("/list", response=CLICommandResponseSchema, summary='archivebox list [args] [filter_patterns] (use this endpoint with ?filter_type=search to search for snapshots)')
+@router.post("/search", response=CLICommandResponseSchema, summary='archivebox search [args] [filter_patterns]')
-def cli_list(request, args: ListCommandSchema):
+def cli_search(request, args: ListCommandSchema):
-    result = list_all(
+    from archivebox.cli.archivebox_search import search
    result = search(
        filter_patterns=args.filter_patterns,
        filter_type=args.filter_type,
        status=args.status,
@ -221,6 +218,8 @@ def cli_list(request, args: ListCommandSchema):
@router.post("/remove", response=CLICommandResponseSchema, summary='archivebox remove [args] [filter_patterns]')
 def cli_remove(request, args: RemoveCommandSchema):
    from archivebox.cli.archivebox_remove import remove
    result = remove(
        yes=True,            # no way to interactively ask for confirmation via API, so we force yes
        delete=args.delete,
--- a/archivebox/cli/init.py
+++ b/archivebox/cli/init.py
@ -1,264 +1,117 @@
 __package__ = 'archivebox.cli'
 __command__ = 'archivebox'
 import os
 import sys
 import argparse
 import threading
 from time import sleep
 from collections.abc import Mapping
 from rich import print
 from typing import Optional, List, IO, Union, Iterable
 from pathlib import Path
 from importlib import import_module
-BUILTIN_LIST = list
+import rich_click as click
 from rich import print
 from archivebox.config.version import VERSION
 CLI_DIR = Path(__file__).resolve().parent
 # rewrite setup -> install for backwards compatibility
 if len(sys.argv) > 1 and sys.argv[1] == 'setup':
    from rich import print
    print(':warning: [bold red]DEPRECATED[/bold red] `archivebox setup` is deprecated, use `archivebox install` instead')
    sys.argv[1] = 'install'
 if '--debug' in sys.argv:
    os.environ['DEBUG'] = 'True'
    sys.argv.remove('--debug')
-# def list_subcommands() -> Dict[str, str]:
+class ArchiveBoxGroup(click.Group):
-#     """find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
+    """lazy loading click group for archivebox commands"""
-#     COMMANDS = []
+    meta_commands = {
-#     for filename in os.listdir(CLI_DIR):
+        'help': 'archivebox.cli.archivebox_help.main',
-#         if is_cli_module(filename):
+        'version': 'archivebox.cli.archivebox_version.main',
-#             subcommand = filename.replace('archivebox_', '').replace('.py', '')
+    }
-#             module = import_module('.archivebox_{}'.format(subcommand), __package__)
+    setup_commands = {
-#             assert is_valid_cli_module(module, subcommand)
+        'init': 'archivebox.cli.archivebox_init.main',
-#             COMMANDS.append((subcommand, module.main.__doc__))
+        'install': 'archivebox.cli.archivebox_install.main',
-#             globals()[subcommand] = module.main
+    }
-#     display_order = lambda cmd: (
+    archive_commands = {
-#         display_first.index(cmd[0])
+        'add': 'archivebox.cli.archivebox_add.main',
-#         if cmd[0] in display_first else
+        'remove': 'archivebox.cli.archivebox_remove.main',
-#         100 + len(cmd[0])
+        'update': 'archivebox.cli.archivebox_update.main',
-#     )
+        'search': 'archivebox.cli.archivebox_search.main',
-#     return dict(sorted(COMMANDS, key=display_order))
+        'status': 'archivebox.cli.archivebox_status.main',
-
+        'config': 'archivebox.cli.archivebox_config.main',
-# just define it statically, it's much faster:
+        'schedule': 'archivebox.cli.archivebox_schedule.main',
-SUBCOMMAND_MODULES = {
+        'server': 'archivebox.cli.archivebox_server.main',
-    'help': 'archivebox_help',
+        'shell': 'archivebox.cli.archivebox_shell.main',
-    'version': 'archivebox_version' ,
+        'manage': 'archivebox.cli.archivebox_manage.main',
-    
+    }
-    'init': 'archivebox_init',
+    all_subcommands = {
-    'install': 'archivebox_install',
+        **meta_commands,
-    ##############################################
+        **setup_commands,
-    'config': 'archivebox_config',
+        **archive_commands,
-    'add': 'archivebox_add',
+    }
-    'remove': 'archivebox_remove',
+    renamed_commands = {
-    'update': 'archivebox_update',
+        'setup': 'install',
-    'list': 'archivebox_list',
+        'list': 'search',
-    'status': 'archivebox_status',
+        'import': 'add',
-    
+        'archive': 'add',
-    'schedule': 'archivebox_schedule',
+        'export': 'search',
-    'server': 'archivebox_server',
+    }
    'shell': 'archivebox_shell',
    'manage': 'archivebox_manage',
    # 'oneshot': 'archivebox_oneshot',
 }
 # every imported command module must have these properties in order to be valid
 required_attrs = ('__package__', '__command__', 'main')
 # basic checks to make sure imported files are valid subcommands
 is_cli_module = lambda fname: fname.startswith('archivebox_') and fname.endswith('.py')
 is_valid_cli_module = lambda module, subcommand: (
    all(hasattr(module, attr) for attr in required_attrs)
    and module.__command__.split(' ')[-1] == subcommand
 )
 class LazySubcommands(Mapping):
    def keys(self):
        return SUBCOMMAND_MODULES.keys()
    def values(self):
        return [self[key] for key in self.keys()]
    def items(self):
        return [(key, self[key]) for key in self.keys()]
    def __getitem__(self, key):
        module = import_module(f'.{SUBCOMMAND_MODULES[key]}', __package__)
        assert is_valid_cli_module(module, key)
        return module.main
    def __iter__(self):
        return iter(SUBCOMMAND_MODULES.keys())
    def __len__(self):
        return len(SUBCOMMAND_MODULES)
 CLI_SUBCOMMANDS = LazySubcommands()
-# these common commands will appear sorted before any others for ease-of-use
+    def get_command(self, ctx, cmd_name):
-meta_cmds = ('help', 'version')                               # dont require valid data folder at all
+        # handle renamed commands
-setup_cmds = ('init', 'setup', 'install')                      # require valid data folder, but dont require DB present in it yet
+        if cmd_name in self.renamed_commands:
-archive_cmds = ('add', 'remove', 'update', 'list', 'status', 'schedule', 'server', 'shell', 'manage')  # require valid data folder + existing db present
+            new_name = self.renamed_commands[cmd_name]
-fake_db = ("oneshot",)                                        # use fake in-memory db
+            print(f' [violet]Hint:[/violet] `archivebox {cmd_name}` has been renamed to `archivebox {new_name}`')
            cmd_name = new_name
            ctx.invoked_subcommand = cmd_name
-display_first = (*meta_cmds, *setup_cmds, *archive_cmds)
+        # handle lazy loading of commands
        if cmd_name in self.all_subcommands:
            return self._lazy_load(cmd_name)
        # fall-back to using click's default command lookup
        return super().get_command(ctx, cmd_name)
    @classmethod
    def _lazy_load(cls, cmd_name):
        import_path = cls.all_subcommands[cmd_name]
        modname, funcname = import_path.rsplit('.', 1)
        # print(f'LAZY LOADING {import_path}')
        mod = import_module(modname)
        func = getattr(mod, funcname)
        if not hasattr(func, '__doc__'):
            raise ValueError(f'lazy loading of {import_path} failed - no docstring found on method')
        # if not isinstance(cmd, click.BaseCommand):
            # raise ValueError(f'lazy loading of {import_path} failed - not a click command')
        return func
-IGNORED_BG_THREADS = ('MainThread', 'ThreadPoolExecutor', 'IPythonHistorySavingThread', 'Scheduler')  # threads we dont have to wait for before exiting
+@click.group(cls=ArchiveBoxGroup, invoke_without_command=True)
@click.option('--help', '-h', is_flag=True, help='Show help')
@click.version_option(version=VERSION, package_name='archivebox', message='%(version)s')
@click.pass_context
 def cli(ctx, help=False):
    """ArchiveBox: The self-hosted internet archive"""
    if help or ctx.invoked_subcommand is None:
        ctx.invoke(ctx.command.get_command(ctx, 'help'))
-def wait_for_bg_threads_to_exit(thread_names: Iterable[str]=(), ignore_names: Iterable[str]=IGNORED_BG_THREADS, timeout: int=60) -> int:
+    if ctx.invoked_subcommand in ArchiveBoxGroup.archive_commands:
-    """
+        # print('SETUP DJANGO AND CHECK DATA FOLDER')
-    Block until the specified threads exit. e.g. pass thread_names=('default_hook_handler',) to wait for webhooks.
+        from archivebox.config.django import setup_django
-    Useful for waiting for signal handlers, webhooks, etc. to finish running after a mgmt command completes.
+        from archivebox.misc.checks import check_data_folder
-    """
+        setup_django()
        check_data_folder()
-    wait_for_all: bool = thread_names == ()
+def main(args=None, prog_name=None):
-
+    # show `docker run archivebox xyz` in help messages if running in docker
-    thread_matches = lambda thread, ptns: any(ptn in repr(thread) for ptn in ptns)
+    IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE', 'yes')
-
+    prog_name = prog_name or ('docker compose run archivebox' if IN_DOCKER else 'archivebox')
    should_wait = lambda thread: (
        not thread_matches(thread, ignore_names)
        and (wait_for_all or thread_matches(thread, thread_names)))
    for tries in range(timeout):
        all_threads = [*threading.enumerate()]
        blocking_threads = [*filter(should_wait, all_threads)]
        threads_summary = ', '.join(repr(t) for t in blocking_threads)
        if blocking_threads:
            sleep(1)
            if tries == 5:                            # only show stderr message if we need to wait more than 5s
                print(
                    f'[…] Waiting up to {timeout}s for background jobs (e.g. webhooks) to finish...',
                    threads_summary,
                    file=sys.stderr,
                )
        else:
            return tries
    raise Exception(f'Background threads failed to exit after {tries}s: {threads_summary}')
 def run_subcommand(subcommand: str,
                   subcommand_args: List[str] | None = None,
                   stdin: Optional[IO]=None,
                   pwd: Union[Path, str, None]=None) -> None:
    """Run a given ArchiveBox subcommand with the given list of args"""
    subcommand_args = subcommand_args or []
    from archivebox.misc.checks import check_migrations
    from archivebox.config.django import setup_django
    # print('DATA_DIR is', DATA_DIR)
    # print('pwd is', os.getcwd())    
    cmd_requires_db = (subcommand in archive_cmds)
    init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
    check_db = cmd_requires_db and not init_pending
    setup_django(in_memory_db=subcommand in fake_db, check_db=check_db)
    for ignore_pattern in ('help', '-h', '--help', 'version', '--version'):
        if ignore_pattern in sys.argv[:4]:
            cmd_requires_db = False
            break
    if subcommand in archive_cmds:
        if cmd_requires_db:
            check_migrations()
    module = import_module('.archivebox_{}'.format(subcommand), __package__)
    module.main(args=subcommand_args, stdin=stdin, pwd=pwd)    # type: ignore
    # wait for webhooks, signals, and other background jobs to finish before exit
    wait_for_bg_threads_to_exit(timeout=60)
 class NotProvided:
    def __len__(self):
        return 0
    def __bool__(self):
        return False
    def __repr__(self):
        return '<not provided>'
 Omitted = Union[None, NotProvided]
 OMITTED = NotProvided()
 def main(args: List[str] | Omitted=OMITTED, stdin: IO | Omitted=OMITTED, pwd: str | None=None) -> None:
    # print('STARTING CLI MAIN ENTRYPOINT')
    args = sys.argv[1:] if args is OMITTED else args
    stdin = sys.stdin if stdin is OMITTED else stdin
    parser = argparse.ArgumentParser(
        prog=__command__,
        description='ArchiveBox: The self-hosted internet archive',
        add_help=False,
    )
    group = parser.add_mutually_exclusive_group()
    group.add_argument(
        '--help', '-h',
        action='store_true',
        help=CLI_SUBCOMMANDS['help'].__doc__,
    )
    group.add_argument(
        '--version',
        action='store_true',
        help=CLI_SUBCOMMANDS['version'].__doc__,
    )
    group.add_argument(
        "subcommand",
        type=str,
        help= "The name of the subcommand to run",
        nargs='?',
        choices=CLI_SUBCOMMANDS.keys(),
        default=None,
    )
    parser.add_argument(
        "subcommand_args",
        help="Arguments for the subcommand",
        nargs=argparse.REMAINDER,
    )
    command = parser.parse_args(args or ())
    if command.version:
        command.subcommand = 'version'
    elif command.help or command.subcommand is None:
        command.subcommand = 'help'
    if command.subcommand not in ('version',):
        from archivebox.misc.logging_util import log_cli_command
        log_cli_command(
            subcommand=command.subcommand,
            subcommand_args=command.subcommand_args,
            stdin=stdin or None,
        )
    try:
-        run_subcommand(
+        cli(args=args, prog_name=prog_name)
            subcommand=command.subcommand,
            subcommand_args=command.subcommand_args,
            stdin=stdin or None,
        )
    except KeyboardInterrupt:
        print('\n\n[red][X] Got CTRL+C. Exiting...[/red]')
 if __name__ == '__main__':
    main()
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@ -4,10 +4,10 @@ __package__ = 'archivebox.cli'
 __command__ = 'archivebox add'
 import sys
 import argparse
-from typing import IO, TYPE_CHECKING
+from typing import TYPE_CHECKING
 import rich_click as click
 from django.utils import timezone
 from django.db.models import QuerySet
@ -18,7 +18,6 @@ from archivebox.config.common import ARCHIVING_CONFIG
 from archivebox.config.django import setup_django
 from archivebox.config.permissions import USER, HOSTNAME
 from archivebox.misc.checks import check_data_folder
 from archivebox.misc.logging_util import SmartFormatter, accept_stdin, stderr
 from archivebox.parsers import PARSERS
@ -29,22 +28,142 @@ if TYPE_CHECKING:
 ORCHESTRATOR = None
 # OLD VERSION:
 # def add(urls: Union[str, List[str]],
 #         tag: str='',
 #         depth: int=0,
 #         update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
 #         update_all: bool=False,
 #         index_only: bool=False,
 #         overwrite: bool=False,
 #         # duplicate: bool=False,  # TODO: reuse the logic from admin.py resnapshot to allow adding multiple snapshots by appending timestamp automatically
 #         init: bool=False,
 #         extractors: str="",
 #         parser: str="auto",
 #         created_by_id: int | None=None,
 #         out_dir: Path=DATA_DIR) -> List[Link]:
 #     """Add a new URL or list of URLs to your archive"""
 #     from core.models import Snapshot, Tag
 #     # from workers.supervisord_util import start_cli_workers, tail_worker_logs
 #     # from workers.tasks import bg_archive_link
 #     assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
 #     extractors = extractors.split(",") if extractors else []
 #     if init:
 #         run_subcommand('init', stdin=None, pwd=out_dir)
 #     # Load list of links from the existing index
 #     check_data_folder()
 #     # worker = start_cli_workers()
 #     new_links: List[Link] = []
 #     all_links = load_main_index(out_dir=out_dir)
 #     log_importing_started(urls=urls, depth=depth, index_only=index_only)
 #     if isinstance(urls, str):
 #         # save verbatim stdin to sources
 #         write_ahead_log = save_text_as_source(urls, filename='{ts}-import.txt', out_dir=out_dir)
 #     elif isinstance(urls, list):
 #         # save verbatim args to sources
 #         write_ahead_log = save_text_as_source('\n'.join(urls), filename='{ts}-import.txt', out_dir=out_dir)
 #     new_links += parse_links_from_source(write_ahead_log, root_url=None, parser=parser)
 #     # If we're going one level deeper, download each link and look for more links
 #     new_links_depth = []
 #     if new_links and depth == 1:
 #         log_crawl_started(new_links)
 #         for new_link in new_links:
 #             try:
 #                 downloaded_file = save_file_as_source(new_link.url, filename=f'{new_link.timestamp}-crawl-{new_link.domain}.txt', out_dir=out_dir)
 #                 new_links_depth += parse_links_from_source(downloaded_file, root_url=new_link.url)
 #             except Exception as err:
 #                 stderr('[!] Failed to get contents of URL {new_link.url}', err, color='red')
 #     imported_links = list({link.url: link for link in (new_links + new_links_depth)}.values())
 #     new_links = dedupe_links(all_links, imported_links)
 #     write_main_index(links=new_links, out_dir=out_dir, created_by_id=created_by_id)
 #     all_links = load_main_index(out_dir=out_dir)
 #     tags = [
 #         Tag.objects.get_or_create(name=name.strip(), defaults={'created_by_id': created_by_id})[0]
 #         for name in tag.split(',')
 #         if name.strip()
 #     ]
 #     if tags:
 #         for link in imported_links:
 #             snapshot = Snapshot.objects.get(url=link.url)
 #             snapshot.tags.add(*tags)
 #             snapshot.tags_str(nocache=True)
 #             snapshot.save()
 #         # print(f'    √ Tagged {len(imported_links)} Snapshots with {len(tags)} tags {tags_str}')
 #     if index_only:
 #         # mock archive all the links using the fake index_only extractor method in order to update their state
 #         if overwrite:
 #             archive_links(imported_links, overwrite=overwrite, methods=['index_only'], out_dir=out_dir, created_by_id=created_by_id)
 #         else:
 #             archive_links(new_links, overwrite=False, methods=['index_only'], out_dir=out_dir, created_by_id=created_by_id)
 #     else:
 #         # fully run the archive extractor methods for each link
 #         archive_kwargs = {
 #             "out_dir": out_dir,
 #             "created_by_id": created_by_id,
 #         }
 #         if extractors:
 #             archive_kwargs["methods"] = extractors
 #         stderr()
 #         ts = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
 #         if update:
 #             stderr(f'[*] [{ts}] Archiving + updating {len(imported_links)}/{len(all_links)}', len(imported_links), 'URLs from added set...', color='green')
 #             archive_links(imported_links, overwrite=overwrite, **archive_kwargs)
 #         elif update_all:
 #             stderr(f'[*] [{ts}] Archiving + updating {len(all_links)}/{len(all_links)}', len(all_links), 'URLs from entire library...', color='green')
 #             archive_links(all_links, overwrite=overwrite, **archive_kwargs)
 #         elif overwrite:
 #             stderr(f'[*] [{ts}] Archiving + overwriting {len(imported_links)}/{len(all_links)}', len(imported_links), 'URLs from added set...', color='green')
 #             archive_links(imported_links, overwrite=True, **archive_kwargs)
 #         elif new_links:
 #             stderr(f'[*] [{ts}] Archiving {len(new_links)}/{len(all_links)} URLs from added set...', color='green')
 #             archive_links(new_links, overwrite=False, **archive_kwargs)
 #     # tail_worker_logs(worker['stdout_logfile'])
 #     # if CAN_UPGRADE:
 #     #     hint(f"There's a new version of ArchiveBox available! Your current version is {VERSION}. You can upgrade to {VERSIONS_AVAILABLE['recommended_version']['tag_name']} ({VERSIONS_AVAILABLE['recommended_version']['html_url']}). For more on how to upgrade: https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives\n")
 #     return new_links
 def add(urls: str | list[str],
        tag: str='',
        depth: int=0,
-        update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
+        tag: str='',
        update_all: bool=False,
        index_only: bool=False,
        overwrite: bool=False,
        extractors: str="",
        parser: str="auto",
        extract: str="",
        persona: str='Default',
        overwrite: bool=False,
        update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
        index_only: bool=False,
        bg: bool=False,
        created_by_id: int | None=None) -> QuerySet['Snapshot']:
    """Add a new URL or list of URLs to your archive"""
    global ORCHESTRATOR
    depth = int(depth)
    assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
    # 0. setup abx, django, check_data_folder
@ -56,7 +175,6 @@ def add(urls: str | list[str],
    from archivebox.base_models.models import get_or_create_system_user_pk
    created_by_id = created_by_id or get_or_create_system_user_pk()
    # 1. save the provided urls to sources/2024-11-05__23-59-59__cli_add.txt
@ -72,7 +190,7 @@ def add(urls: str | list[str],
        'ONLY_NEW': not update,
        'INDEX_ONLY': index_only,
        'OVERWRITE': overwrite,
-        'EXTRACTORS': extractors,
+        'EXTRACTORS': extract,
        'DEFAULT_PERSONA': persona or 'Default',
    })
    # 3. create a new Crawl pointing to the Seed
@ -91,118 +209,23 @@ def add(urls: str | list[str],
    return crawl.snapshot_set.all()
-def main(args: list[str] | None=None, stdin: IO | None=None, pwd: str | None=None) -> None:
+@click.command()
@click.option('--depth', '-d', type=click.Choice(('0', '1')), default='0', help='Recursively archive linked pages up to N hops away')
@click.option('--tag', '-t', default='', help='Comma-separated list of tags to add to each snapshot e.g. tag1,tag2,tag3')
@click.option('--parser', type=click.Choice(['auto', *PARSERS.keys()]), default='auto', help='Parser for reading input URLs')
@click.option('--extract', '-e', default='', help='Comma-separated list of extractors to use e.g. title,favicon,screenshot,singlefile,...')
@click.option('--persona', default='Default', help='Authentication profile to use when archiving')
@click.option('--overwrite', '-F', is_flag=True, help='Overwrite existing data if URLs have been archived previously')
@click.option('--update', is_flag=True, default=ARCHIVING_CONFIG.ONLY_NEW, help='Retry any previously skipped/failed URLs when re-adding them')
@click.option('--index-only', is_flag=True, help='Just add the URLs to the index without archiving them now')
 # @click.option('--update-all', is_flag=True, help='Update ALL links in index when finished adding new ones')
@click.option('--bg', is_flag=True, help='Run crawl in background worker instead of immediately')
@click.argument('urls', nargs=-1, type=click.Path())
 def main(**kwargs):
    """Add a new URL or list of URLs to your archive"""
    parser = argparse.ArgumentParser(
        prog=__command__,
        description=add.__doc__,
        add_help=True,
        formatter_class=SmartFormatter,
    )
    parser.add_argument(
        '--tag', '-t',
        type=str,
        default='',
        help="Tag the added URLs with the provided tags e.g. --tag=tag1,tag2,tag3",
    )
    parser.add_argument(
        '--update', #'-u',
        action='store_true',
        default=not ARCHIVING_CONFIG.ONLY_NEW,  # when ONLY_NEW=True we skip updating old links
        help="Also retry previously skipped/failed links when adding new links",
    )
    parser.add_argument(
        '--update-all', #'-n',
        action='store_true',
        default=False, 
        help="Also update ALL links in index when finished adding new links",
    )
    parser.add_argument(
        '--index-only', #'-o',
        action='store_true',
        help="Add the links to the main index without archiving them",
    )
    parser.add_argument(
        'urls',
        nargs='*',
        type=str,
        default=None,
        help=(
            'URLs or paths to archive e.g.:\n'
            '    https://getpocket.com/users/USERNAME/feed/all\n'
            '    https://example.com/some/rss/feed.xml\n'
            '    https://example.com\n'
            '    ~/Downloads/firefox_bookmarks_export.html\n'
            '    ~/Desktop/sites_list.csv\n'
        )
    )
    parser.add_argument(
        "--depth",
        action="store",
        default=0,
        choices=[0, 1],
        type=int,
        help="Recursively archive all linked pages up to this many hops away"
    )
    parser.add_argument(
        "--overwrite",
        default=False,
        action="store_true",
        help="Re-archive URLs from scratch, overwriting any existing files"
    )
    parser.add_argument(
        "--extract", '-e',
        type=str,
        help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
              This does not take precedence over the configuration",
        default=""
    )
    parser.add_argument(
        "--parser",
        type=str,
        help="Parser used to read inputted URLs.",
        default="auto",
        choices=["auto", *PARSERS.keys()],
    )
    parser.add_argument(
        "--persona",
        type=str,
        help="Name of accounts persona to use when archiving.",
        default="Default",
    )
    parser.add_argument(
        "--bg",
        default=False,
        action="store_true",
        help="Enqueue a background worker to complete the crawl instead of running it immediately",
    )
    command = parser.parse_args(args or ())
    urls = command.urls
-    stdin_urls = ''
+    add(**kwargs)
    if not urls:
        stdin_urls = accept_stdin(stdin)
    if (stdin_urls and urls) or (not stdin and not urls):
        stderr(
            '[X] You must pass URLs/paths to add via stdin or CLI arguments.\n',
            color='red',
        )
        raise SystemExit(2)
    add(
        urls=stdin_urls or urls,
        depth=command.depth,
        tag=command.tag,
        update=command.update,
        update_all=command.update_all,
        index_only=command.index_only,
        overwrite=command.overwrite,
        extractors=command.extract,
        parser=command.parser,
        persona=command.persona,
        bg=command.bg,
    )
 if __name__ == '__main__':
-    main(args=sys.argv[1:], stdin=sys.stdin)
+    main()
--- a/archivebox/cli/archivebox_config.py
+++ b/archivebox/cli/archivebox_config.py
@ -12,7 +12,130 @@ from typing import Optional, List, IO
 from archivebox.misc.util import docstring
 from archivebox.config import DATA_DIR
 from archivebox.misc.logging_util import SmartFormatter, accept_stdin
-from ..main import config
+
 # @enforce_types
 def config(config_options_str: Optional[str]=None,
           config_options: Optional[List[str]]=None,
           get: bool=False,
           set: bool=False,
           search: bool=False,
           reset: bool=False,
           out_dir: Path=DATA_DIR) -> None:
    """Get and set your ArchiveBox project configuration values"""
    from rich import print
    check_data_folder()
    if config_options and config_options_str:
        stderr(
            '[X] You should either pass config values as an arguments '
            'or via stdin, but not both.\n',
            color='red',
        )
        raise SystemExit(2)
    elif config_options_str:
        config_options = config_options_str.split('\n')
    FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG()
    CONFIGS = archivebox.pm.hook.get_CONFIGS()
    config_options = config_options or []
    no_args = not (get or set or reset or config_options)
    matching_config = {}
    if search:
        if config_options:
            config_options = [get_real_name(key) for key in config_options]
            matching_config = {key: FLAT_CONFIG[key] for key in config_options if key in FLAT_CONFIG}
            for config_section in CONFIGS.values():
                aliases = config_section.aliases
                for search_key in config_options:
                    # search all aliases in the section
                    for alias_key, key in aliases.items():
                        if search_key.lower() in alias_key.lower():
                            matching_config[key] = config_section.model_dump()[key]
                    # search all keys and values in the section
                    for existing_key, value in config_section.model_dump().items():
                        if search_key.lower() in existing_key.lower() or search_key.lower() in str(value).lower():
                            matching_config[existing_key] = value
        print(printable_config(matching_config))
        raise SystemExit(not matching_config)
    elif get or no_args:
        if config_options:
            config_options = [get_real_name(key) for key in config_options]
            matching_config = {key: FLAT_CONFIG[key] for key in config_options if key in FLAT_CONFIG}
            failed_config = [key for key in config_options if key not in FLAT_CONFIG]
            if failed_config:
                stderr()
                stderr('[X] These options failed to get', color='red')
                stderr('    {}'.format('\n    '.join(config_options)))
                raise SystemExit(1)
        else:
            matching_config = FLAT_CONFIG
        print(printable_config(matching_config))
        raise SystemExit(not matching_config)
    elif set:
        new_config = {}
        failed_options = []
        for line in config_options:
            if line.startswith('#') or not line.strip():
                continue
            if '=' not in line:
                stderr('[X] Config KEY=VALUE must have an = sign in it', color='red')
                stderr(f'    {line}')
                raise SystemExit(2)
            raw_key, val = line.split('=', 1)
            raw_key = raw_key.upper().strip()
            key = get_real_name(raw_key)
            if key != raw_key:
                stderr(f'[i] Note: The config option {raw_key} has been renamed to {key}, please use the new name going forwards.', color='lightyellow')
            if key in FLAT_CONFIG:
                new_config[key] = val.strip()
            else:
                failed_options.append(line)
        if new_config:
            before = FLAT_CONFIG
            matching_config = write_config_file(new_config)
            after = {**load_all_config(), **archivebox.pm.hook.get_FLAT_CONFIG()}
            print(printable_config(matching_config))
            side_effect_changes = {}
            for key, val in after.items():
                if key in FLAT_CONFIG and (str(before[key]) != str(after[key])) and (key not in matching_config):
                    side_effect_changes[key] = after[key]
                    # import ipdb; ipdb.set_trace()
            if side_effect_changes:
                stderr()
                stderr('[i] Note: This change also affected these other options that depended on it:', color='lightyellow')
                print('    {}'.format(printable_config(side_effect_changes, prefix='    ')))
        if failed_options:
            stderr()
            stderr('[X] These options failed to set (check for typos):', color='red')
            stderr('    {}'.format('\n    '.join(failed_options)))
            raise SystemExit(1)
    elif reset:
        stderr('[X] This command is not implemented yet.', color='red')
        stderr('    Please manually remove the relevant lines from your config file:')
        raise SystemExit(2)
    else:
        stderr('[X] You must pass either --get or --set, or no arguments to get the whole config.', color='red')
        stderr('    archivebox config')
        stderr('    archivebox config --get SOME_KEY')
        stderr('    archivebox config --set SOME_KEY=SOME_VALUE')
        raise SystemExit(2)
@docstring(config.__doc__)
--- a/archivebox/cli/archivebox_help.py
+++ b/archivebox/cli/archivebox_help.py
@ -1,32 +1,105 @@
 #!/usr/bin/env python3
 __package__ = 'archivebox.cli'
 __command__ = 'archivebox help'
-import sys
+import os    
 import argparse
 from pathlib import Path
 from typing import Optional, List, IO
-from archivebox.misc.util import docstring
+import click
-from archivebox.misc.logging_util import SmartFormatter, reject_stdin
+from rich import print
-from archivebox.config import DATA_DIR
+from rich.panel import Panel
 from ..main import help
-@docstring(help.__doc__)
+def help() -> None:
-def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
+    """Print the ArchiveBox help message and usage"""
-    parser = argparse.ArgumentParser(
+
-        prog=__command__,
+    from archivebox.cli import ArchiveBoxGroup
-        description=help.__doc__,
+    from archivebox.config import CONSTANTS
-        add_help=True,
+    from archivebox.config.permissions import IN_DOCKER
-        formatter_class=SmartFormatter,
+    from archivebox.misc.logging_util import log_cli_command
    log_cli_command('help', [], None, '.')
    COMMANDS_HELP_TEXT = '\n    '.join(
        f'[green]{cmd.ljust(20)}[/green] {ArchiveBoxGroup._lazy_load(cmd).__doc__}'
        for cmd in ArchiveBoxGroup.meta_commands.keys()
    ) + '\n\n    ' + '\n    '.join(
        f'[green]{cmd.ljust(20)}[/green] {ArchiveBoxGroup._lazy_load(cmd).__doc__}'
        for cmd in ArchiveBoxGroup.setup_commands.keys()
    ) + '\n\n    ' + '\n    '.join(
        f'[green]{cmd.ljust(20)}[/green] {ArchiveBoxGroup._lazy_load(cmd).__doc__}'
        for cmd in ArchiveBoxGroup.archive_commands.keys()
    )
    parser.parse_args(args or ())
    reject_stdin(__command__, stdin)
-    help(out_dir=Path(pwd) if pwd else DATA_DIR)
+    DOCKER_USAGE = '''
 [dodger_blue3]Docker Usage:[/dodger_blue3]
    [grey53]# using Docker Compose:[/grey53]
    [blue]docker compose run[/blue] [dark_green]archivebox[/dark_green] [green]\\[command][/green] [green3][...args][/green3] [violet][--help][/violet] [grey53][--version][/grey53]
    [grey53]# using Docker:[/grey53]
    [blue]docker run[/blue] -v [light_slate_blue]$PWD:/data[/light_slate_blue] [grey53]-p 8000:8000[/grey53] -it [dark_green]archivebox/archivebox[/dark_green] [green]\\[command][/green] [green3][...args][/green3] [violet][--help][/violet] [grey53][--version][/grey53]
 ''' if IN_DOCKER else ''
    DOCKER_DOCS = '\n    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Docker[/link]' if IN_DOCKER else ''
    DOCKER_OUTSIDE_HINT = "\n    [grey53]# outside of Docker:[/grey53]" if IN_DOCKER else ''
    DOCKER_CMD_PREFIX = "[blue]docker ... [/blue]" if IN_DOCKER else ''
    print(f'''{DOCKER_USAGE}
 [deep_sky_blue4]Usage:[/deep_sky_blue4]{DOCKER_OUTSIDE_HINT}
    [dark_green]archivebox[/dark_green] [green]\\[command][/green] [green3][...args][/green3] [violet][--help][/violet] [grey53][--version][/grey53]
 [deep_sky_blue4]Commands:[/deep_sky_blue4]
    {COMMANDS_HELP_TEXT}
 [deep_sky_blue4]Documentation:[/deep_sky_blue4]
    [link=https://github.com/ArchiveBox/ArchiveBox/wiki]https://github.com/ArchiveBox/ArchiveBox/wiki[/link]{DOCKER_DOCS}
    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#cli-usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Usage[/link]
    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration]https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration[/link]
 ''')
    if os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) and CONSTANTS.ARCHIVE_DIR.is_dir():
        pretty_out_dir = str(CONSTANTS.DATA_DIR).replace(str(Path('~').expanduser()), '~')
        EXAMPLE_USAGE = f'''
 [light_slate_blue]DATA DIR[/light_slate_blue]: [yellow]{pretty_out_dir}[/yellow]
 [violet]Hint:[/violet] [i]Common maintenance tasks:[/i]
    [dark_green]archivebox[/dark_green] [green]init[/green]      [grey53]# make sure database is up-to-date (safe to run multiple times)[/grey53]
    [dark_green]archivebox[/dark_green] [green]install[/green]   [grey53]# make sure plugins are up-to-date (wget, chrome, singlefile, etc.)[/grey53]
    [dark_green]archivebox[/dark_green] [green]status[/green]    [grey53]# get a health checkup report on your collection[/grey53]
    [dark_green]archivebox[/dark_green] [green]update[/green]    [grey53]# retry any previously failed or interrupted archiving tasks[/grey53]
 [violet]Hint:[/violet] [i]More example usage:[/i]
    [dark_green]archivebox[/dark_green] [green]add[/green] --depth=1 "https://example.com/some/page"
    [dark_green]archivebox[/dark_green] [green]list[/green] --sort=timestamp --csv=timestamp,downloaded_at,url,title
    [dark_green]archivebox[/dark_green] [green]schedule[/green] --every=day --depth=1 "https://example.com/some/feed.rss"
    [dark_green]archivebox[/dark_green] [green]server[/green] [blue]0.0.0.0:8000[/blue]                [grey53]# Start the Web UI / API server[/grey53]
 '''
        print(Panel(EXAMPLE_USAGE, expand=False, border_style='grey53', title='[green3]:white_check_mark: A collection [light_slate_blue]DATA DIR[/light_slate_blue] is currently active[/green3]', subtitle='Commands run inside this dir will only apply to this collection.'))
    else:
        DATA_SETUP_HELP = '\n'
        if IN_DOCKER:
            DATA_SETUP_HELP += '[violet]Hint:[/violet] When using Docker, you need to mount a volume to use as your data dir:\n'
            DATA_SETUP_HELP += '    docker run [violet]-v /some/path/data:/data[/violet] archivebox/archivebox ...\n\n'
        DATA_SETUP_HELP += 'To load an [dark_blue]existing[/dark_blue] collection:\n'
        DATA_SETUP_HELP += '    1. [green]cd[/green] ~/archivebox/data     [grey53]# go into existing [light_slate_blue]DATA DIR[/light_slate_blue] (can be anywhere)[/grey53]\n'
        DATA_SETUP_HELP += f'    2. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]init[/green]          [grey53]# migrate to latest version (safe to run multiple times)[/grey53]\n'
        DATA_SETUP_HELP += f'    3. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]install[/green]       [grey53]# auto-update all plugins (wget, chrome, singlefile, etc.)[/grey53]\n'
        DATA_SETUP_HELP += f'    4. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]help[/green]          [grey53]# ...get help with next steps... [/grey53]\n\n'
        DATA_SETUP_HELP += 'To start a [sea_green1]new[/sea_green1] collection:\n'
        DATA_SETUP_HELP += '    1. [green]mkdir[/green] ~/archivebox/data  [grey53]# create a new, empty [light_slate_blue]DATA DIR[/light_slate_blue] (can be anywhere)[/grey53]\n'
        DATA_SETUP_HELP += '    2. [green]cd[/green] ~/archivebox/data     [grey53]# cd into the new directory[/grey53]\n'
        DATA_SETUP_HELP += f'    3. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]init[/green]          [grey53]# initialize ArchiveBox in the new data dir[/grey53]\n'
        DATA_SETUP_HELP += f'    4. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]install[/green]       [grey53]# auto-install all plugins (wget, chrome, singlefile, etc.)[/grey53]\n'
        DATA_SETUP_HELP += f'    5. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]help[/green]          [grey53]# ... get help with next steps... [/grey53]\n'
        print(Panel(DATA_SETUP_HELP, expand=False, border_style='grey53', title='[red]:cross_mark: No collection is currently active[/red]', subtitle='All archivebox [green]commands[/green] should be run from inside a collection [light_slate_blue]DATA DIR[/light_slate_blue]'))
@click.command()
@click.option('--help', '-h', is_flag=True, help='Show help')
 def main(**kwargs):
    """Print the ArchiveBox help message and usage"""
    return help()
 if __name__ == '__main__':
-    main(args=sys.argv[1:], stdin=sys.stdin)
+    main()
--- a/archivebox/cli/archivebox_init.py
+++ b/archivebox/cli/archivebox_init.py
@ -5,13 +5,193 @@ __command__ = 'archivebox init'
 import sys
 import argparse
-
+from pathlib import Path
 from typing import Optional, List, IO
 from archivebox.misc.util import docstring
 from archivebox.config import DATA_DIR
 from archivebox.misc.logging_util import SmartFormatter, reject_stdin
-from ..main import init
+
 def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Path=DATA_DIR) -> None:
    """Initialize a new ArchiveBox collection in the current directory"""
    from core.models import Snapshot
    from rich import print
    # if os.access(out_dir / CONSTANTS.JSON_INDEX_FILENAME, os.F_OK):
    #     print("[red]:warning: This folder contains a JSON index. It is deprecated, and will no longer be kept up to date automatically.[/red]", file=sys.stderr)
    #     print("[red]    You can run `archivebox list --json --with-headers > static_index.json` to manually generate it.[/red]", file=sys.stderr)
    is_empty = not len(set(os.listdir(out_dir)) - CONSTANTS.ALLOWED_IN_DATA_DIR)
    existing_index = os.path.isfile(CONSTANTS.DATABASE_FILE)
    if is_empty and not existing_index:
        print(f'[turquoise4][+] Initializing a new ArchiveBox v{VERSION} collection...[/turquoise4]')
        print('[green]----------------------------------------------------------------------[/green]')
    elif existing_index:
        # TODO: properly detect and print the existing version in current index as well
        print(f'[green][*] Verifying and updating existing ArchiveBox collection to v{VERSION}...[/green]')
        print('[green]----------------------------------------------------------------------[/green]')
    else:
        if force:
            print('[red][!] This folder appears to already have files in it, but no index.sqlite3 is present.[/red]')
            print('[red]    Because --force was passed, ArchiveBox will initialize anyway (which may overwrite existing files).[/red]')
        else:
            print(
                ("[red][X] This folder appears to already have files in it, but no index.sqlite3 present.[/red]\n\n"
                "    You must run init in a completely empty directory, or an existing data folder.\n\n"
                "    [violet]Hint:[/violet] To import an existing data folder make sure to cd into the folder first, \n"
                "    then run and run 'archivebox init' to pick up where you left off.\n\n"
                "    (Always make sure your data folder is backed up first before updating ArchiveBox)"
                )
            )
            raise SystemExit(2)
    if existing_index:
        print('\n[green][*] Verifying archive folder structure...[/green]')
    else:
        print('\n[green][+] Building archive folder structure...[/green]')
    print(f'    + ./{CONSTANTS.ARCHIVE_DIR.relative_to(DATA_DIR)}, ./{CONSTANTS.SOURCES_DIR.relative_to(DATA_DIR)}, ./{CONSTANTS.LOGS_DIR.relative_to(DATA_DIR)}...')
    Path(CONSTANTS.SOURCES_DIR).mkdir(exist_ok=True)
    Path(CONSTANTS.ARCHIVE_DIR).mkdir(exist_ok=True)
    Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
    print(f'    + ./{CONSTANTS.CONFIG_FILE.relative_to(DATA_DIR)}...')
    # create the .archivebox_id file with a unique ID for this collection
    from archivebox.config.paths import _get_collection_id
    _get_collection_id(CONSTANTS.DATA_DIR, force_create=True)
    # create the ArchiveBox.conf file
    write_config_file({'SECRET_KEY': SERVER_CONFIG.SECRET_KEY})
    if os.access(CONSTANTS.DATABASE_FILE, os.F_OK):
        print('\n[green][*] Verifying main SQL index and running any migrations needed...[/green]')
    else:
        print('\n[green][+] Building main SQL index and running initial migrations...[/green]')
    for migration_line in apply_migrations(out_dir):
        sys.stdout.write(f'    {migration_line}\n')
    assert os.path.isfile(CONSTANTS.DATABASE_FILE) and os.access(CONSTANTS.DATABASE_FILE, os.R_OK)
    print()
    print(f'    √ ./{CONSTANTS.DATABASE_FILE.relative_to(DATA_DIR)}')
    # from django.contrib.auth.models import User
    # if SHELL_CONFIG.IS_TTY and not User.objects.filter(is_superuser=True).exclude(username='system').exists():
    #     print('{green}[+] Creating admin user account...{reset}'.format(**SHELL_CONFIG.ANSI))
    #     call_command("createsuperuser", interactive=True)
    print()
    print('[dodger_blue3][*] Checking links from indexes and archive folders (safe to Ctrl+C)...[/dodger_blue3]')
    all_links = Snapshot.objects.none()
    pending_links: Dict[str, Link] = {}
    if existing_index:
        all_links = load_main_index(out_dir=out_dir, warn=False)
        print(f'    √ Loaded {all_links.count()} links from existing main index.')
    if quick:
        print('    > Skipping full snapshot directory check (quick mode)')
    else:
        try:
            # Links in data folders that dont match their timestamp
            fixed, cant_fix = fix_invalid_folder_locations(out_dir=out_dir)
            if fixed:
                print(f'    [yellow]√ Fixed {len(fixed)} data directory locations that didn\'t match their link timestamps.[/yellow]')
            if cant_fix:
                print(f'    [red]! Could not fix {len(cant_fix)} data directory locations due to conflicts with existing folders.[/red]')
            # Links in JSON index but not in main index
            orphaned_json_links = {
                link.url: link
                for link in parse_json_main_index(out_dir)
                if not all_links.filter(url=link.url).exists()
            }
            if orphaned_json_links:
                pending_links.update(orphaned_json_links)
                print(f'    [yellow]√ Added {len(orphaned_json_links)} orphaned links from existing JSON index...[/yellow]')
            # Links in data dir indexes but not in main index
            orphaned_data_dir_links = {
                link.url: link
                for link in parse_json_links_details(out_dir)
                if not all_links.filter(url=link.url).exists()
            }
            if orphaned_data_dir_links:
                pending_links.update(orphaned_data_dir_links)
                print(f'    [yellow]√ Added {len(orphaned_data_dir_links)} orphaned links from existing archive directories.[/yellow]')
            # Links in invalid/duplicate data dirs
            invalid_folders = {
                folder: link
                for folder, link in get_invalid_folders(all_links, out_dir=out_dir).items()
            }
            if invalid_folders:
                print(f'    [red]! Skipped adding {len(invalid_folders)} invalid link data directories.[/red]')
                print('        X ' + '\n        X '.join(f'./{Path(folder).relative_to(DATA_DIR)} {link}' for folder, link in invalid_folders.items()))
                print()
                print('    [violet]Hint:[/violet] For more information about the link data directories that were skipped, run:')
                print('        archivebox status')
                print('        archivebox list --status=invalid')
        except (KeyboardInterrupt, SystemExit):
            print(file=sys.stderr)
            print('[yellow]:stop_sign: Stopped checking archive directories due to Ctrl-C/SIGTERM[/yellow]', file=sys.stderr)
            print('    Your archive data is safe, but you should re-run `archivebox init` to finish the process later.', file=sys.stderr)
            print(file=sys.stderr)
            print('    [violet]Hint:[/violet] In the future you can run a quick init without checking dirs like so:', file=sys.stderr)
            print('        archivebox init --quick', file=sys.stderr)
            raise SystemExit(1)
        write_main_index(list(pending_links.values()), out_dir=out_dir)
    print('\n[green]----------------------------------------------------------------------[/green]')
    from django.contrib.auth.models import User
    if (SERVER_CONFIG.ADMIN_USERNAME and SERVER_CONFIG.ADMIN_PASSWORD) and not User.objects.filter(username=SERVER_CONFIG.ADMIN_USERNAME).exists():
        print('[green][+] Found ADMIN_USERNAME and ADMIN_PASSWORD configuration options, creating new admin user.[/green]')
        User.objects.create_superuser(username=SERVER_CONFIG.ADMIN_USERNAME, password=SERVER_CONFIG.ADMIN_PASSWORD)
    if existing_index:
        print('[green][√] Done. Verified and updated the existing ArchiveBox collection.[/green]')
    else:
        print(f'[green][√] Done. A new ArchiveBox collection was initialized ({len(all_links) + len(pending_links)} links).[/green]')
    json_index = out_dir / CONSTANTS.JSON_INDEX_FILENAME
    html_index = out_dir / CONSTANTS.HTML_INDEX_FILENAME
    index_name = f"{date.today()}_index_old"
    if os.access(json_index, os.F_OK):
        json_index.rename(f"{index_name}.json")
    if os.access(html_index, os.F_OK):
        html_index.rename(f"{index_name}.html")
    CONSTANTS.PERSONAS_DIR.mkdir(parents=True, exist_ok=True)
    CONSTANTS.DEFAULT_TMP_DIR.mkdir(parents=True, exist_ok=True)
    CONSTANTS.DEFAULT_LIB_DIR.mkdir(parents=True, exist_ok=True)
    from archivebox.config.common import STORAGE_CONFIG
    STORAGE_CONFIG.TMP_DIR.mkdir(parents=True, exist_ok=True)
    STORAGE_CONFIG.LIB_DIR.mkdir(parents=True, exist_ok=True)
    if install:
        run_subcommand('install', pwd=out_dir)
    if Snapshot.objects.count() < 25:     # hide the hints for experienced users
        print()
        print('    [violet]Hint:[/violet] To view your archive index, run:')
        print('        archivebox server  # then visit [deep_sky_blue4][link=http://127.0.0.1:8000]http://127.0.0.1:8000[/link][/deep_sky_blue4]')
        print()
        print('    To add new links, you can run:')
        print("        archivebox add < ~/some/path/to/list_of_links.txt")
        print()
        print('    For more usage and examples, run:')
        print('        archivebox help')
@docstring(init.__doc__)
--- a/archivebox/cli/archivebox_install.py
+++ b/archivebox/cli/archivebox_install.py
@ -3,6 +3,7 @@
 __package__ = 'archivebox.cli'
 __command__ = 'archivebox install'
 import os
 import sys
 import argparse
 from pathlib import Path
@ -11,11 +12,145 @@ from typing import Optional, List, IO
 from archivebox.misc.util import docstring
 from archivebox.config import DATA_DIR
 from archivebox.misc.logging_util import SmartFormatter, reject_stdin
-from ..main import install
+
 def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, binaries: Optional[List[str]]=None, dry_run: bool=False) -> None:
    """Automatically install all ArchiveBox dependencies and extras"""
    # if running as root:
    #    - run init to create index + lib dir
    #    - chown -R 911 DATA_DIR
    #    - install all binaries as root
    #    - chown -R 911 LIB_DIR
    # else:
    #    - run init to create index + lib dir as current user
    #    - install all binaries as current user
    #    - recommend user re-run with sudo if any deps need to be installed as root
    from rich import print
    from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
    from archivebox.config.paths import get_or_create_working_lib_dir
    if not (os.access(ARCHIVE_DIR, os.R_OK) and ARCHIVE_DIR.is_dir()):
        run_subcommand('init', stdin=None, pwd=out_dir)  # must init full index because we need a db to store InstalledBinary entries in
    print('\n[green][+] Installing ArchiveBox dependencies automatically...[/green]')
    # we never want the data dir to be owned by root, detect owner of existing owner of DATA_DIR to try and guess desired non-root UID
    if IS_ROOT:
        EUID = os.geteuid()
        # if we have sudo/root permissions, take advantage of them just while installing dependencies
        print()
        print(f'[yellow]:warning:  Running as UID=[blue]{EUID}[/blue] with [red]sudo[/red] only for dependencies that need it.[/yellow]')
        print(f'    DATA_DIR, LIB_DIR, and TMP_DIR will be owned by [blue]{ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/blue].')
        print()
    LIB_DIR = get_or_create_working_lib_dir()
    package_manager_names = ', '.join(
        f'[yellow]{binprovider.name}[/yellow]'
        for binprovider in reversed(list(abx.as_dict(abx.pm.hook.get_BINPROVIDERS()).values()))
        if not binproviders or (binproviders and binprovider.name in binproviders)
    )
    print(f'[+] Setting up package managers {package_manager_names}...')
    for binprovider in reversed(list(abx.as_dict(abx.pm.hook.get_BINPROVIDERS()).values())):
        if binproviders and binprovider.name not in binproviders:
            continue
        try:
            binprovider.setup()
        except Exception:
            # it's ok, installing binaries below will automatically set up package managers as needed
            # e.g. if user does not have npm available we cannot set it up here yet, but once npm Binary is installed
            # the next package that depends on npm will automatically call binprovider.setup() during its own install
            pass
    print()
    for binary in reversed(list(abx.as_dict(abx.pm.hook.get_BINARIES()).values())):
        if binary.name in ('archivebox', 'django', 'sqlite', 'python'):
            # obviously must already be installed if we are running
            continue
        if binaries and binary.name not in binaries:
            continue
        providers = ' [grey53]or[/grey53] '.join(
            provider.name for provider in binary.binproviders_supported
            if not binproviders or (binproviders and provider.name in binproviders)
        )
        if not providers:
            continue
        print(f'[+] Detecting / Installing [yellow]{binary.name.ljust(22)}[/yellow] using [red]{providers}[/red]...')
        try:
            with SudoPermission(uid=0, fallback=True):
                # print(binary.load_or_install(fresh=True).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'}))
                if binproviders:
                    providers_supported_by_binary = [provider.name for provider in binary.binproviders_supported]
                    for binprovider_name in binproviders:
                        if binprovider_name not in providers_supported_by_binary:
                            continue
                        try:
                            if dry_run:
                                # always show install commands when doing a dry run
                                sys.stderr.write("\033[2;49;90m")  # grey53
                                result = binary.install(binproviders=[binprovider_name], dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
                                sys.stderr.write("\033[00m\n")     # reset
                            else:
                                loaded_binary = archivebox.pm.hook.binary_load_or_install(binary=binary, binproviders=[binprovider_name], fresh=True, dry_run=dry_run, quiet=False)
                                result = loaded_binary.model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
                            if result and result['loaded_version']:
                                break
                        except Exception as e:
                            print(f'[red]:cross_mark: Failed to install {binary.name} as using {binprovider_name} as user {ARCHIVEBOX_USER}: {e}[/red]')
                else:
                    if dry_run:
                        sys.stderr.write("\033[2;49;90m")  # grey53
                        binary.install(dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
                        sys.stderr.write("\033[00m\n")  # reset
                    else:
                        loaded_binary = archivebox.pm.hook.binary_load_or_install(binary=binary, fresh=True, dry_run=dry_run)
                        result = loaded_binary.model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
            if IS_ROOT and LIB_DIR:
                with SudoPermission(uid=0):
                    if ARCHIVEBOX_USER == 0:
                        os.system(f'chmod -R 777 "{LIB_DIR.resolve()}"')
                    else:    
                        os.system(f'chown -R {ARCHIVEBOX_USER} "{LIB_DIR.resolve()}"')
        except Exception as e:
            print(f'[red]:cross_mark: Failed to install {binary.name} as user {ARCHIVEBOX_USER}: {e}[/red]')
            if binaries and len(binaries) == 1:
                # if we are only installing a single binary, raise the exception so the user can see what went wrong
                raise
    from django.contrib.auth import get_user_model
    User = get_user_model()
    if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
        stderr('\n[+] Don\'t forget to create a new admin user for the Web UI...', color='green')
        stderr('    archivebox manage createsuperuser')
        # run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
    print('\n[green][√] Set up ArchiveBox and its dependencies successfully.[/green]\n', file=sys.stderr)
    from abx_plugin_pip.binaries import ARCHIVEBOX_BINARY
    extra_args = []
    if binproviders:
        extra_args.append(f'--binproviders={",".join(binproviders)}')
    if binaries:
        extra_args.append(f'--binaries={",".join(binaries)}')
    proc = run_shell([ARCHIVEBOX_BINARY.load().abspath, 'version', *extra_args], capture_output=False, cwd=out_dir)
    raise SystemExit(proc.returncode)
@docstring(install.__doc__)
 def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
    parser = argparse.ArgumentParser(
        prog=__command__,
        description=install.__doc__,
--- a/archivebox/cli/archivebox_list.py
+++ b/archivebox/cli/archivebox_list.py
@ -1,139 +0,0 @@
 #!/usr/bin/env python3
 __package__ = 'archivebox.cli'
 __command__ = 'archivebox list'
 import sys
 import argparse
 from pathlib import Path
 from typing import Optional, List, IO
 from archivebox.config import DATA_DIR
 from archivebox.misc.util import docstring
 from archivebox.misc.logging_util import SmartFormatter, reject_stdin, stderr
 from ..main import list_all
 from ..index import (
    LINK_FILTERS,
    get_indexed_folders,
    get_archived_folders,
    get_unarchived_folders,
    get_present_folders,
    get_valid_folders,
    get_invalid_folders,
    get_duplicate_folders,
    get_orphaned_folders,
    get_corrupted_folders,
    get_unrecognized_folders,
 )
@docstring(list_all.__doc__)
 def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
    parser = argparse.ArgumentParser(
        prog=__command__,
        description=list_all.__doc__,
        add_help=True,
        formatter_class=SmartFormatter,
    )
    group = parser.add_mutually_exclusive_group()
    group.add_argument(
        '--csv', #'-c',
        type=str,
        help="Print the output in CSV format with the given columns, e.g.: timestamp,url,extension",
        default=None,
    )
    group.add_argument(
        '--json', #'-j',
        action='store_true',
        help="Print the output in JSON format with all columns included",
    )
    group.add_argument(
        '--html',
        action='store_true',
        help="Print the output in HTML format"
    )
    parser.add_argument(
        '--with-headers',
        action='store_true',
        help='Include the headers in the output document' 
    )
    parser.add_argument(
        '--sort', #'-s',
        type=str,
        help="List the links sorted using the given key, e.g. timestamp or updated",
        default=None,
    )
    parser.add_argument(
        '--before', #'-b',
        type=float,
        help="List only links bookmarked before (less than) the given timestamp",
        default=None,
    )
    parser.add_argument(
        '--after', #'-a',
        type=float,
        help="List only links bookmarked after (greater than or equal to) the given timestamp",
        default=None,
    )
    parser.add_argument(
        '--status',
        type=str,
        choices=('indexed', 'archived', 'unarchived', 'present', 'valid', 'invalid', 'duplicate', 'orphaned', 'corrupted', 'unrecognized'),
        default='indexed',
        help=(
            'List only links or data directories that have the given status\n'
            f'    indexed       {get_indexed_folders.__doc__} (the default)\n'
            f'    archived      {get_archived_folders.__doc__}\n'
            f'    unarchived    {get_unarchived_folders.__doc__}\n'
            '\n'
            f'    present       {get_present_folders.__doc__}\n'
            f'    valid         {get_valid_folders.__doc__}\n'
            f'    invalid       {get_invalid_folders.__doc__}\n'
            '\n'
            f'    duplicate     {get_duplicate_folders.__doc__}\n'
            f'    orphaned      {get_orphaned_folders.__doc__}\n'
            f'    corrupted     {get_corrupted_folders.__doc__}\n'
            f'    unrecognized  {get_unrecognized_folders.__doc__}\n'
        )
    )
    parser.add_argument(
        '--filter-type', '-t',
        type=str,
        choices=(*LINK_FILTERS.keys(), 'search'),
        default='exact',
        help='Type of pattern matching to use when filtering URLs',
    )
    parser.add_argument(
        'filter_patterns',
        nargs='*',
        type=str,
        default=None,
        help='List only URLs matching these filter patterns'
    )
    command = parser.parse_args(args or ())
    reject_stdin(stdin)
    if command.with_headers and not (command.json or command.html or command.csv):
        stderr(
            '[X] --with-headers can only be used with --json, --html or --csv options\n',
            color='red',
        )
        raise SystemExit(2)
    matching_folders = list_all(
        filter_patterns=command.filter_patterns,
        filter_type=command.filter_type,
        status=command.status,
        after=command.after,
        before=command.before,
        sort=command.sort,
        csv=command.csv,
        json=command.json,
        html=command.html,
        with_headers=command.with_headers,
        out_dir=Path(pwd) if pwd else DATA_DIR,
    )
    raise SystemExit(not matching_folders)
 if __name__ == '__main__':
    main(args=sys.argv[1:], stdin=sys.stdin)
--- a/archivebox/cli/archivebox_manage.py
+++ b/archivebox/cli/archivebox_manage.py
@ -9,7 +9,27 @@ from typing import Optional, List, IO
 from archivebox.misc.util import docstring
 from archivebox.config import DATA_DIR
-from ..main import manage
+
 # @enforce_types
 def manage(args: Optional[List[str]]=None, out_dir: Path=DATA_DIR) -> None:
    """Run an ArchiveBox Django management command"""
    check_data_folder()
    from django.core.management import execute_from_command_line
    if (args and "createsuperuser" in args) and (IN_DOCKER and not SHELL_CONFIG.IS_TTY):
        stderr('[!] Warning: you need to pass -it to use interactive commands in docker', color='lightyellow')
        stderr('    docker run -it archivebox manage {}'.format(' '.join(args or ['...'])), color='lightyellow')
        stderr('')
    # import ipdb; ipdb.set_trace()
    execute_from_command_line(['manage.py', *(args or ['help'])])
@docstring(manage.__doc__)
--- a/archivebox/cli/archivebox_oneshot.py
+++ b/archivebox/cli/archivebox_oneshot.py
@ -1,73 +1,98 @@
-#!/usr/bin/env python3
+# #!/usr/bin/env python3
-__package__ = 'archivebox.cli'
+################## DEPRECATED IN FAVOR OF abx-dl #####################
-__command__ = 'archivebox oneshot'
+# https://github.com/ArchiveBox/abx-dl
-import sys
+# __package__ = 'archivebox.cli'
-import argparse
+# __command__ = 'archivebox oneshot'
-from pathlib import Path
+# import sys
-from typing import List, Optional, IO
+# import argparse
-from archivebox.misc.util import docstring
+# from pathlib import Path
-from archivebox.config import DATA_DIR
+# from typing import List, Optional, IO
-from archivebox.misc.logging_util import SmartFormatter, accept_stdin, stderr
+
-from ..main import oneshot
+# from archivebox.misc.util import docstring
 # from archivebox.config import DATA_DIR
 # from archivebox.misc.logging_util import SmartFormatter, accept_stdin, stderr
-@docstring(oneshot.__doc__)
+# @enforce_types
-def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
+# def oneshot(url: str, extractors: str="", out_dir: Path=DATA_DIR, created_by_id: int | None=None) -> List[Link]:
-    parser = argparse.ArgumentParser(
+#     """
-        prog=__command__,
+#     Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
-        description=oneshot.__doc__,
+#     You can run this to archive single pages without needing to create a whole collection with archivebox init.
-        add_help=True,
+#     """
-        formatter_class=SmartFormatter,
+#     oneshot_link, _ = parse_links_memory([url])
-    )
+#     if len(oneshot_link) > 1:
-    parser.add_argument(
+#         stderr(
-        'url',
+#                 '[X] You should pass a single url to the oneshot command',
-        type=str,
+#                 color='red'
-        default=None,
+#             )
-        help=(
+#         raise SystemExit(2)
            'URLs or paths to archive e.g.:\n'
            '    https://getpocket.com/users/USERNAME/feed/all\n'
            '    https://example.com/some/rss/feed.xml\n'
            '    https://example.com\n'
            '    ~/Downloads/firefox_bookmarks_export.html\n'
            '    ~/Desktop/sites_list.csv\n'
        )
    )
    parser.add_argument(
        "--extract",
        type=str,
        help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
              This does not take precedence over the configuration",
        default=""
    )
    parser.add_argument(
        '--out-dir',
        type=str,
        default=DATA_DIR,
        help= "Path to save the single archive folder to, e.g. ./example.com_archive"
    )
    command = parser.parse_args(args or ())
    stdin_url = None
    url = command.url
    if not url:
        stdin_url = accept_stdin(stdin)
-    if (stdin_url and url) or (not stdin and not url):
+#     methods = extractors.split(",") if extractors else ignore_methods(['title'])
-        stderr(
+#     archive_link(oneshot_link[0], out_dir=out_dir, methods=methods, created_by_id=created_by_id)
-            '[X] You must pass a URL/path to add via stdin or CLI arguments.\n',
+#     return oneshot_link
            color='red',
        )
        raise SystemExit(2)
    oneshot(
        url=stdin_url or url,
        out_dir=Path(command.out_dir).resolve(),
        extractors=command.extract,
    )
-if __name__ == '__main__':
+
-    main(args=sys.argv[1:], stdin=sys.stdin)
+
 # @docstring(oneshot.__doc__)
 # def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
 #     parser = argparse.ArgumentParser(
 #         prog=__command__,
 #         description=oneshot.__doc__,
 #         add_help=True,
 #         formatter_class=SmartFormatter,
 #     )
 #     parser.add_argument(
 #         'url',
 #         type=str,
 #         default=None,
 #         help=(
 #             'URLs or paths to archive e.g.:\n'
 #             '    https://getpocket.com/users/USERNAME/feed/all\n'
 #             '    https://example.com/some/rss/feed.xml\n'
 #             '    https://example.com\n'
 #             '    ~/Downloads/firefox_bookmarks_export.html\n'
 #             '    ~/Desktop/sites_list.csv\n'
 #         )
 #     )
 #     parser.add_argument(
 #         "--extract",
 #         type=str,
 #         help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
 #               This does not take precedence over the configuration",
 #         default=""
 #     )
 #     parser.add_argument(
 #         '--out-dir',
 #         type=str,
 #         default=DATA_DIR,
 #         help= "Path to save the single archive folder to, e.g. ./example.com_archive"
 #     )
 #     command = parser.parse_args(args or ())
 #     stdin_url = None
 #     url = command.url
 #     if not url:
 #         stdin_url = accept_stdin(stdin)
 #     if (stdin_url and url) or (not stdin and not url):
 #         stderr(
 #             '[X] You must pass a URL/path to add via stdin or CLI arguments.\n',
 #             color='red',
 #         )
 #         raise SystemExit(2)
 #     oneshot(
 #         url=stdin_url or url,
 #         out_dir=Path(command.out_dir).resolve(),
 #         extractors=command.extract,
 #     )
 # if __name__ == '__main__':
 #     main(args=sys.argv[1:], stdin=sys.stdin)
--- a/archivebox/cli/archivebox_remove.py
+++ b/archivebox/cli/archivebox_remove.py
@ -8,10 +8,93 @@ import argparse
 from pathlib import Path
 from typing import Optional, List, IO
 from django.db.models import QuerySet
 from archivebox.misc.util import docstring
 from archivebox.config import DATA_DIR
 from archivebox.misc.logging_util import SmartFormatter, accept_stdin
-from ..main import remove
+from archivebox.index.schema import Link
 def remove(filter_str: Optional[str]=None,
           filter_patterns: Optional[list[str]]=None,
           filter_type: str='exact',
           snapshots: Optional[QuerySet]=None,
           after: Optional[float]=None,
           before: Optional[float]=None,
           yes: bool=False,
           delete: bool=False,
           out_dir: Path=DATA_DIR) -> list[Link]:
    """Remove the specified URLs from the archive"""
    check_data_folder()
    if snapshots is None:
        if filter_str and filter_patterns:
            stderr(
                '[X] You should pass either a pattern as an argument, '
                'or pass a list of patterns via stdin, but not both.\n',
                color='red',
            )
            raise SystemExit(2)
        elif not (filter_str or filter_patterns):
            stderr(
                '[X] You should pass either a pattern as an argument, '
                'or pass a list of patterns via stdin.',
                color='red',
            )
            stderr()
            hint(('To remove all urls you can run:',
                'archivebox remove --filter-type=regex ".*"'))
            stderr()
            raise SystemExit(2)
        elif filter_str:
            filter_patterns = [ptn.strip() for ptn in filter_str.split('\n')]
    list_kwargs = {
        "filter_patterns": filter_patterns,
        "filter_type": filter_type,
        "after": after,
        "before": before,
    }
    if snapshots:
        list_kwargs["snapshots"] = snapshots
    log_list_started(filter_patterns, filter_type)
    timer = TimedProgress(360, prefix='      ')
    try:
        snapshots = list_links(**list_kwargs)
    finally:
        timer.end()
    if not snapshots.exists():
        log_removal_finished(0, 0)
        raise SystemExit(1)
    log_links = [link.as_link() for link in snapshots]
    log_list_finished(log_links)
    log_removal_started(log_links, yes=yes, delete=delete)
    timer = TimedProgress(360, prefix='      ')
    try:
        for snapshot in snapshots:
            if delete:
                shutil.rmtree(snapshot.as_link().link_dir, ignore_errors=True)
    finally:
        timer.end()
    to_remove = snapshots.count()
    from .search import flush_search_index
    flush_search_index(snapshots=snapshots)
    remove_from_sql_main_index(snapshots=snapshots, out_dir=out_dir)
    all_snapshots = load_main_index(out_dir=out_dir)
    log_removal_finished(all_snapshots.count(), to_remove)
    return all_snapshots
@docstring(remove.__doc__)
--- a/archivebox/cli/archivebox_schedule.py
+++ b/archivebox/cli/archivebox_schedule.py
@ -11,7 +11,139 @@ from typing import Optional, List, IO
 from archivebox.misc.util import docstring
 from archivebox.config import DATA_DIR
 from archivebox.misc.logging_util import SmartFormatter, reject_stdin
-from ..main import schedule
+from archivebox.config.common import ARCHIVING_CONFIG
 # @enforce_types
 def schedule(add: bool=False,
             show: bool=False,
             clear: bool=False,
             foreground: bool=False,
             run_all: bool=False,
             quiet: bool=False,
             every: Optional[str]=None,
             tag: str='',
             depth: int=0,
             overwrite: bool=False,
             update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
             import_path: Optional[str]=None,
             out_dir: Path=DATA_DIR):
    """Set ArchiveBox to regularly import URLs at specific times using cron"""
    check_data_folder()
    from abx_plugin_pip.binaries import ARCHIVEBOX_BINARY
    from archivebox.config.permissions import USER
    Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
    cron = CronTab(user=True)
    cron = dedupe_cron_jobs(cron)
    if clear:
        print(cron.remove_all(comment=CRON_COMMENT))
        cron.write()
        raise SystemExit(0)
    existing_jobs = list(cron.find_comment(CRON_COMMENT))
    if every or add:
        every = every or 'day'
        quoted = lambda s: f'"{s}"' if (s and ' ' in str(s)) else str(s)
        cmd = [
            'cd',
            quoted(out_dir),
            '&&',
            quoted(ARCHIVEBOX_BINARY.load().abspath),
            *([
                'add',
                *(['--overwrite'] if overwrite else []),
                *(['--update'] if update else []),
                *([f'--tag={tag}'] if tag else []),
                f'--depth={depth}',
                f'"{import_path}"',
            ] if import_path else ['update']),
            '>>',
            quoted(Path(CONSTANTS.LOGS_DIR) / 'schedule.log'),
            '2>&1',
        ]
        new_job = cron.new(command=' '.join(cmd), comment=CRON_COMMENT)
        if every in ('minute', 'hour', 'day', 'month', 'year'):
            set_every = getattr(new_job.every(), every)
            set_every()
        elif CronSlices.is_valid(every):
            new_job.setall(every)
        else:
            stderr('{red}[X] Got invalid timeperiod for cron task.{reset}'.format(**SHELL_CONFIG.ANSI))
            stderr('    It must be one of minute/hour/day/month')
            stderr('    or a quoted cron-format schedule like:')
            stderr('        archivebox init --every=day --depth=1 https://example.com/some/rss/feed.xml')
            stderr('        archivebox init --every="0/5 * * * *" --depth=1 https://example.com/some/rss/feed.xml')
            raise SystemExit(1)
        cron = dedupe_cron_jobs(cron)
        cron.write()
        total_runs = sum(j.frequency_per_year() for j in cron)
        existing_jobs = list(cron.find_comment(CRON_COMMENT))
        print()
        print('{green}[√] Scheduled new ArchiveBox cron job for user: {} ({} jobs are active).{reset}'.format(USER, len(existing_jobs), **SHELL_CONFIG.ANSI))
        print('\n'.join(f'  > {cmd}' if str(cmd) == str(new_job) else f'    {cmd}' for cmd in existing_jobs))
        if total_runs > 60 and not quiet:
            stderr()
            stderr('{lightyellow}[!] With the current cron config, ArchiveBox is estimated to run >{} times per year.{reset}'.format(total_runs, **SHELL_CONFIG.ANSI))
            stderr('    Congrats on being an enthusiastic internet archiver! 👌')
            stderr()
            stderr('    Make sure you have enough storage space available to hold all the data.')
            stderr('    Using a compressed/deduped filesystem like ZFS is recommended if you plan on archiving a lot.')
            stderr('')
    elif show:
        if existing_jobs:
            print('\n'.join(str(cmd) for cmd in existing_jobs))
        else:
            stderr('{red}[X] There are no ArchiveBox cron jobs scheduled for your user ({}).{reset}'.format(USER, **SHELL_CONFIG.ANSI))
            stderr('    To schedule a new job, run:')
            stderr('        archivebox schedule --every=[timeperiod] --depth=1 https://example.com/some/rss/feed.xml')
        raise SystemExit(0)
    cron = CronTab(user=True)
    cron = dedupe_cron_jobs(cron)
    existing_jobs = list(cron.find_comment(CRON_COMMENT))
    if foreground or run_all:
        if not existing_jobs:
            stderr('{red}[X] You must schedule some jobs first before running in foreground mode.{reset}'.format(**SHELL_CONFIG.ANSI))
            stderr('    archivebox schedule --every=hour --depth=1 https://example.com/some/rss/feed.xml')
            raise SystemExit(1)
        print('{green}[*] Running {} ArchiveBox jobs in foreground task scheduler...{reset}'.format(len(existing_jobs), **SHELL_CONFIG.ANSI))
        if run_all:
            try:
                for job in existing_jobs:
                    sys.stdout.write(f'  > {job.command.split("/archivebox ")[0].split(" && ")[0]}\n')
                    sys.stdout.write(f'    > {job.command.split("/archivebox ")[-1].split(" >> ")[0]}')
                    sys.stdout.flush()
                    job.run()
                    sys.stdout.write(f'\r    √ {job.command.split("/archivebox ")[-1]}\n')
            except KeyboardInterrupt:
                print('\n{green}[√] Stopped.{reset}'.format(**SHELL_CONFIG.ANSI))
                raise SystemExit(1)
        if foreground:
            try:
                for job in existing_jobs:
                    print(f'  > {job.command.split("/archivebox ")[-1].split(" >> ")[0]}')
                for result in cron.run_scheduler():
                    print(result)
            except KeyboardInterrupt:
                print('\n{green}[√] Stopped.{reset}'.format(**SHELL_CONFIG.ANSI))
                raise SystemExit(1)
    # if CAN_UPGRADE:
    #     hint(f"There's a new version of ArchiveBox available! Your current version is {VERSION}. You can upgrade to {VERSIONS_AVAILABLE['recommended_version']['tag_name']} ({VERSIONS_AVAILABLE['recommended_version']['html_url']}). For more on how to upgrade: https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives\n")
@docstring(schedule.__doc__)
--- a/archivebox/cli/archivebox_search.py
+++ b/archivebox/cli/archivebox_search.py
@ -0,0 +1,164 @@
 #!/usr/bin/env python3
 __package__ = 'archivebox.cli'
 __command__ = 'archivebox search'
 from pathlib import Path
 from typing import Optional, List, Iterable
 import rich_click as click
 from rich import print
 from django.db.models import QuerySet
 from archivebox.config import DATA_DIR
 from archivebox.index import LINK_FILTERS
 from archivebox.index.schema import Link
 from archivebox.misc.logging import stderr
 from archivebox.misc.util import enforce_types, docstring
 STATUS_CHOICES = [
    'indexed', 'archived', 'unarchived', 'present', 'valid', 'invalid',
    'duplicate', 'orphaned', 'corrupted', 'unrecognized'
 ]
 def list_links(snapshots: Optional[QuerySet]=None,
               filter_patterns: Optional[List[str]]=None,
               filter_type: str='substring',
               after: Optional[float]=None,
               before: Optional[float]=None,
               out_dir: Path=DATA_DIR) -> Iterable[Link]:
    from archivebox.index import load_main_index
    from archivebox.index import snapshot_filter
    if snapshots:
        all_snapshots = snapshots
    else:
        all_snapshots = load_main_index(out_dir=out_dir)
    if after is not None:
        all_snapshots = all_snapshots.filter(timestamp__gte=after)
    if before is not None:
        all_snapshots = all_snapshots.filter(timestamp__lt=before)
    if filter_patterns:
        all_snapshots = snapshot_filter(all_snapshots, filter_patterns, filter_type)
    if not all_snapshots:
        stderr('[!] No Snapshots matched your filters:', filter_patterns, f'({filter_type})', color='lightyellow')
    return all_snapshots
 def list_folders(links: list[Link], status: str, out_dir: Path=DATA_DIR) -> dict[str, Link | None]:
    from archivebox.misc.checks import check_data_folder
    from archivebox.index import (
        get_indexed_folders,
        get_archived_folders,
        get_unarchived_folders,
        get_present_folders,
        get_valid_folders,
        get_invalid_folders,
        get_duplicate_folders,
        get_orphaned_folders,
        get_corrupted_folders,
        get_unrecognized_folders,
    )
    check_data_folder()
    STATUS_FUNCTIONS = {
        "indexed": get_indexed_folders,
        "archived": get_archived_folders,
        "unarchived": get_unarchived_folders,
        "present": get_present_folders,
        "valid": get_valid_folders,
        "invalid": get_invalid_folders,
        "duplicate": get_duplicate_folders,
        "orphaned": get_orphaned_folders,
        "corrupted": get_corrupted_folders,
        "unrecognized": get_unrecognized_folders,
    }
    try:
        return STATUS_FUNCTIONS[status](links, out_dir=out_dir)
    except KeyError:
        raise ValueError('Status not recognized.')
@enforce_types
 def search(filter_patterns: list[str] | None=None,
           filter_type: str='substring',
           status: str='indexed',
           before: float | None=None,
           after: float | None=None,
           sort: str | None=None,
           json: bool=False,
           html: bool=False,
           csv: str | None=None,
           with_headers: bool=False):
    """List, filter, and export information about archive entries"""
    if with_headers and not (json or html or csv):
        stderr('[X] --with-headers requires --json, --html or --csv\n', color='red')
        raise SystemExit(2)
    snapshots = list_links(
        filter_patterns=list(filter_patterns) if filter_patterns else None,
        filter_type=filter_type,
        before=before,
        after=after,
    )
    if sort:
        snapshots = snapshots.order_by(sort)
    folders = list_folders(
        links=snapshots,
        status=status,
        out_dir=DATA_DIR,
    )
    if json:
        from archivebox.index.json import generate_json_index_from_links
        output = generate_json_index_from_links(folders.values(), with_headers)
    elif html:
        from archivebox.index.html import generate_index_from_links
        output = generate_index_from_links(folders.values(), with_headers) 
    elif csv:
        from archivebox.index.csv import links_to_csv
        output = links_to_csv(folders.values(), csv.split(','), with_headers)
    else:
        from archivebox.misc.logging_util import printable_folders
        output = printable_folders(folders, with_headers)
    print(output)
    return output
@click.command()
@click.option('--filter-type', '-f', type=click.Choice(['search', *LINK_FILTERS.keys()]), default='substring', help='Pattern matching type for filtering URLs')
@click.option('--status', '-s', type=click.Choice(STATUS_CHOICES), default='indexed', help='List snapshots with the given status')
@click.option('--before', '-b', type=float, help='List snapshots bookmarked before the given UNIX timestamp')
@click.option('--after', '-a', type=float, help='List snapshots bookmarked after the given UNIX timestamp')
@click.option('--sort', '-o', type=str, help='Field to sort by, e.g. url, created_at, bookmarked_at, downloaded_at')
@click.option('--json', '-J', is_flag=True, help='Print output in JSON format')
@click.option('--html', '-M', is_flag=True, help='Print output in HTML format (suitable for viewing statically without a server)')
@click.option('--csv', '-C', type=str, help='Print output as CSV with the provided fields, e.g.: created_at,url,title')
@click.option('--with-headers', '-H', is_flag=True, help='Include extra CSV/HTML headers in the output')
@click.help_option('--help', '-h')
@click.argument('filter_patterns', nargs=-1)
@docstring(search.__doc__)
 def main(**kwargs):
    return search(**kwargs)
 if __name__ == '__main__':
    main()
--- a/archivebox/cli/archivebox_server.py
+++ b/archivebox/cli/archivebox_server.py
@ -12,7 +12,81 @@ from archivebox.misc.util import docstring
 from archivebox.config import DATA_DIR
 from archivebox.config.common import SERVER_CONFIG
 from archivebox.misc.logging_util import SmartFormatter, reject_stdin
-from ..main import server
+
 # @enforce_types
 def server(runserver_args: Optional[List[str]]=None,
           reload: bool=False,
           debug: bool=False,
           init: bool=False,
           quick_init: bool=False,
           createsuperuser: bool=False,
           daemonize: bool=False,
           out_dir: Path=DATA_DIR) -> None:
    """Run the ArchiveBox HTTP server"""
    from rich import print
    runserver_args = runserver_args or []
    if init:
        run_subcommand('init', stdin=None, pwd=out_dir)
        print()
    elif quick_init:
        run_subcommand('init', subcommand_args=['--quick'], stdin=None, pwd=out_dir)
        print()
    if createsuperuser:
        run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
        print()
    check_data_folder()
    from django.core.management import call_command
    from django.contrib.auth.models import User
    if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
        print()
        # print('[yellow][!] No admin accounts exist, you must create one to be able to log in to the Admin UI![/yellow]')
        print('[violet]Hint:[/violet] To create an [bold]admin username & password[/bold] for the [deep_sky_blue3][underline][link=http://{host}:{port}/admin]Admin UI[/link][/underline][/deep_sky_blue3], run:')
        print('      [green]archivebox manage createsuperuser[/green]')
        print()
    host = '127.0.0.1'
    port = '8000'
    try:
        host_and_port = [arg for arg in runserver_args if arg.replace('.', '').replace(':', '').isdigit()][0]
        if ':' in host_and_port:
            host, port = host_and_port.split(':')
        else:
            if '.' in host_and_port:
                host = host_and_port
            else:
                port = host_and_port
    except IndexError:
        pass
    print('[green][+] Starting ArchiveBox webserver...[/green]')
    print(f'    [blink][green]>[/green][/blink] Starting ArchiveBox webserver on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
    print(f'    [green]>[/green] Log in to ArchiveBox Admin UI on [deep_sky_blue3][link=http://{host}:{port}/admin]http://{host}:{port}/admin[/link][/deep_sky_blue3]')
    print('    > Writing ArchiveBox error log to ./logs/errors.log')
    if SHELL_CONFIG.DEBUG:
        if not reload:
            runserver_args.append('--noreload')  # '--insecure'
        call_command("runserver", *runserver_args)
    else:
        from workers.supervisord_util import start_server_workers
        print()
        start_server_workers(host=host, port=port, daemonize=False)
        print("\n[i][green][🟩] ArchiveBox server shut down gracefully.[/green][/i]")
@docstring(server.__doc__)
 def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
--- a/archivebox/cli/archivebox_shell.py
+++ b/archivebox/cli/archivebox_shell.py
@ -11,7 +11,19 @@ from typing import Optional, List, IO
 from archivebox.misc.util import docstring
 from archivebox.config import DATA_DIR
 from archivebox.misc.logging_util import SmartFormatter, reject_stdin
-from ..main import shell
+
 #@enforce_types
 def shell(out_dir: Path=DATA_DIR) -> None:
    """Enter an interactive ArchiveBox Django shell"""
    check_data_folder()
    from django.core.management import call_command
    call_command("shell_plus")
@docstring(shell.__doc__)
--- a/archivebox/cli/archivebox_status.py
+++ b/archivebox/cli/archivebox_status.py
@ -8,10 +8,114 @@ import argparse
 from pathlib import Path
 from typing import Optional, List, IO
 from rich import print
 from archivebox.misc.util import docstring
 from archivebox.config import DATA_DIR
 from archivebox.misc.logging_util import SmartFormatter, reject_stdin
-from ..main import status
+
 # @enforce_types
 def status(out_dir: Path=DATA_DIR) -> None:
    """Print out some info and statistics about the archive collection"""
    check_data_folder()
    from core.models import Snapshot
    from django.contrib.auth import get_user_model
    User = get_user_model()
    print('{green}[*] Scanning archive main index...{reset}'.format(**SHELL_CONFIG.ANSI))
    print(SHELL_CONFIG.ANSI['lightyellow'], f'   {out_dir}/*', SHELL_CONFIG.ANSI['reset'])
    num_bytes, num_dirs, num_files = get_dir_size(out_dir, recursive=False, pattern='index.')
    size = printable_filesize(num_bytes)
    print(f'    Index size: {size} across {num_files} files')
    print()
    links = load_main_index(out_dir=out_dir)
    num_sql_links = links.count()
    num_link_details = sum(1 for link in parse_json_links_details(out_dir=out_dir))
    print(f'    > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {CONSTANTS.SQL_INDEX_FILENAME})')
    print(f'    > JSON Link Details: {num_link_details} links'.ljust(36), f'(found in {ARCHIVE_DIR.name}/*/index.json)')
    print()
    print('{green}[*] Scanning archive data directories...{reset}'.format(**SHELL_CONFIG.ANSI))
    print(SHELL_CONFIG.ANSI['lightyellow'], f'   {ARCHIVE_DIR}/*', SHELL_CONFIG.ANSI['reset'])
    num_bytes, num_dirs, num_files = get_dir_size(ARCHIVE_DIR)
    size = printable_filesize(num_bytes)
    print(f'    Size: {size} across {num_files} files in {num_dirs} directories')
    print(SHELL_CONFIG.ANSI['black'])
    num_indexed = len(get_indexed_folders(links, out_dir=out_dir))
    num_archived = len(get_archived_folders(links, out_dir=out_dir))
    num_unarchived = len(get_unarchived_folders(links, out_dir=out_dir))
    print(f'    > indexed: {num_indexed}'.ljust(36), f'({get_indexed_folders.__doc__})')
    print(f'      > archived: {num_archived}'.ljust(36), f'({get_archived_folders.__doc__})')
    print(f'      > unarchived: {num_unarchived}'.ljust(36), f'({get_unarchived_folders.__doc__})')
    num_present = len(get_present_folders(links, out_dir=out_dir))
    num_valid = len(get_valid_folders(links, out_dir=out_dir))
    print()
    print(f'    > present: {num_present}'.ljust(36), f'({get_present_folders.__doc__})')
    print(f'      > valid: {num_valid}'.ljust(36), f'({get_valid_folders.__doc__})')
    duplicate = get_duplicate_folders(links, out_dir=out_dir)
    orphaned = get_orphaned_folders(links, out_dir=out_dir)
    corrupted = get_corrupted_folders(links, out_dir=out_dir)
    unrecognized = get_unrecognized_folders(links, out_dir=out_dir)
    num_invalid = len({**duplicate, **orphaned, **corrupted, **unrecognized})
    print(f'      > invalid: {num_invalid}'.ljust(36), f'({get_invalid_folders.__doc__})')
    print(f'        > duplicate: {len(duplicate)}'.ljust(36), f'({get_duplicate_folders.__doc__})')
    print(f'        > orphaned: {len(orphaned)}'.ljust(36), f'({get_orphaned_folders.__doc__})')
    print(f'        > corrupted: {len(corrupted)}'.ljust(36), f'({get_corrupted_folders.__doc__})')
    print(f'        > unrecognized: {len(unrecognized)}'.ljust(36), f'({get_unrecognized_folders.__doc__})')
    print(SHELL_CONFIG.ANSI['reset'])
    if num_indexed:
        print('    {lightred}Hint:{reset} You can list link data directories by status like so:'.format(**SHELL_CONFIG.ANSI))
        print('        archivebox list --status=<status>  (e.g. indexed, corrupted, archived, etc.)')
    if orphaned:
        print('    {lightred}Hint:{reset} To automatically import orphaned data directories into the main index, run:'.format(**SHELL_CONFIG.ANSI))
        print('        archivebox init')
    if num_invalid:
        print('    {lightred}Hint:{reset} You may need to manually remove or fix some invalid data directories, afterwards make sure to run:'.format(**SHELL_CONFIG.ANSI))
        print('        archivebox init')
    print()
    print('{green}[*] Scanning recent archive changes and user logins:{reset}'.format(**SHELL_CONFIG.ANSI))
    print(SHELL_CONFIG.ANSI['lightyellow'], f'   {CONSTANTS.LOGS_DIR}/*', SHELL_CONFIG.ANSI['reset'])
    users = get_admins().values_list('username', flat=True)
    print(f'    UI users {len(users)}: {", ".join(users)}')
    last_login = User.objects.order_by('last_login').last()
    if last_login:
        print(f'    Last UI login: {last_login.username} @ {str(last_login.last_login)[:16]}')
    last_downloaded = Snapshot.objects.order_by('downloaded_at').last()
    if last_downloaded:
        print(f'    Last changes: {str(last_downloaded.downloaded_at)[:16]}')
    if not users:
        print()
        print('    {lightred}Hint:{reset} You can create an admin user by running:'.format(**SHELL_CONFIG.ANSI))
        print('        archivebox manage createsuperuser')
    print()
    for snapshot in links.order_by('-downloaded_at')[:10]:
        if not snapshot.downloaded_at:
            continue
        print(
            SHELL_CONFIG.ANSI['black'],
            (
                f'   > {str(snapshot.downloaded_at)[:16]} '
                f'[{snapshot.num_outputs} {("X", "√")[snapshot.is_archived]} {printable_filesize(snapshot.archive_size)}] '
                f'"{snapshot.title}": {snapshot.url}'
            )[:SHELL_CONFIG.TERM_WIDTH],
            SHELL_CONFIG.ANSI['reset'],
        )
    print(SHELL_CONFIG.ANSI['black'], '   ...', SHELL_CONFIG.ANSI['reset'])
@docstring(status.__doc__)
--- a/archivebox/cli/archivebox_update.py
+++ b/archivebox/cli/archivebox_update.py
@ -24,7 +24,92 @@ from archivebox.index import (
 from archivebox.misc.logging_util import SmartFormatter, accept_stdin
 # from ..main import update
 # LEGACY VERSION:
 # @enforce_types
 # def update(resume: Optional[float]=None,
 #            only_new: bool=ARCHIVING_CONFIG.ONLY_NEW,
 #            index_only: bool=False,
 #            overwrite: bool=False,
 #            filter_patterns_str: Optional[str]=None,
 #            filter_patterns: Optional[List[str]]=None,
 #            filter_type: Optional[str]=None,
 #            status: Optional[str]=None,
 #            after: Optional[str]=None,
 #            before: Optional[str]=None,
 #            extractors: str="",
 #            out_dir: Path=DATA_DIR) -> List[Link]:
 #     """Import any new links from subscriptions and retry any previously failed/skipped links"""
 #     from core.models import ArchiveResult
 #     from .search import index_links
 #     # from workers.supervisord_util import start_cli_workers
 #     check_data_folder()
 #     # start_cli_workers()
 #     new_links: List[Link] = [] # TODO: Remove input argument: only_new
 #     extractors = extractors.split(",") if extractors else []
 #     # Step 1: Filter for selected_links
 #     print('[*] Finding matching Snapshots to update...')
 #     print(f'    - Filtering by {" ".join(filter_patterns)} ({filter_type}) {before=} {after=} {status=}...')
 #     matching_snapshots = list_links(
 #         filter_patterns=filter_patterns,
 #         filter_type=filter_type,
 #         before=before,
 #         after=after,
 #     )
 #     print(f'    - Checking {matching_snapshots.count()} snapshot folders for existing data with {status=}...')
 #     matching_folders = list_folders(
 #         links=matching_snapshots,
 #         status=status,
 #         out_dir=out_dir,
 #     )
 #     all_links = (link for link in matching_folders.values() if link)
 #     print('    - Sorting by most unfinished -> least unfinished + date archived...')
 #     all_links = sorted(all_links, key=lambda link: (ArchiveResult.objects.filter(snapshot__url=link.url).count(), link.timestamp))
 #     if index_only:
 #         for link in all_links:
 #             write_link_details(link, out_dir=out_dir, skip_sql_index=True)
 #         index_links(all_links, out_dir=out_dir)
 #         return all_links
 #     # Step 2: Run the archive methods for each link
 #     to_archive = new_links if only_new else all_links
 #     if resume:
 #         to_archive = [
 #             link for link in to_archive
 #             if link.timestamp >= str(resume)
 #         ]
 #         if not to_archive:
 #             stderr('')
 #             stderr(f'[√] Nothing found to resume after {resume}', color='green')
 #             return all_links
 #     archive_kwargs = {
 #         "out_dir": out_dir,
 #     }
 #     if extractors:
 #         archive_kwargs["methods"] = extractors
 #     archive_links(to_archive, overwrite=overwrite, **archive_kwargs)
 #     # Step 4: Re-write links index with updated titles, icons, and resources
 #     all_links = load_main_index(out_dir=out_dir)
 #     return all_links
 def update():
    """Import any new links from subscriptions and retry any previously failed/skipped links"""
    from archivebox.config.django import setup_django
    setup_django()
--- a/archivebox/cli/archivebox_version.py
+++ b/archivebox/cli/archivebox_version.py
@ -1,61 +1,207 @@
 #!/usr/bin/env python3
 __package__ = 'archivebox.cli'
 __command__ = 'archivebox version'
 import sys
-import argparse
+from typing import Iterable
 from pathlib import Path
 from typing import Optional, List, IO
-# from archivebox.misc.util import docstring
+import rich_click as click
-from archivebox.config import DATA_DIR, VERSION
+
-from archivebox.misc.logging_util import SmartFormatter, reject_stdin
+from archivebox.misc.util import docstring, enforce_types
-# @docstring(version.__doc__)
+@enforce_types
-def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
+def version(quiet: bool=False,
-    """Print the ArchiveBox version and dependency information"""
+            binproviders: Iterable[str]=(),
-    parser = argparse.ArgumentParser(
+            binaries: Iterable[str]=()) -> list[str]:
-        prog=__command__,
+    """Print the ArchiveBox version, debug metadata, and installed dependency versions"""
        description="Print the ArchiveBox version and dependency information",   # version.__doc__,
        add_help=True,
        formatter_class=SmartFormatter,
    )
    parser.add_argument(
        '--quiet', '-q',
        action='store_true',
        help='Only print ArchiveBox version number and nothing else.',
    )
    parser.add_argument(
        '--binproviders', '-p',
        type=str,
        help='Select binproviders to detect DEFAULT=env,apt,brew,sys_pip,venv_pip,lib_pip,pipx,sys_npm,lib_npm,puppeteer,playwright (all)',
        default=None,
    )
    parser.add_argument(
        '--binaries', '-b',
        type=str,
        help='Select binaries to detect DEFAULT=curl,wget,git,yt-dlp,chrome,single-file,readability-extractor,postlight-parser,... (all)',
        default=None,
    )
    command = parser.parse_args(args or ())
    reject_stdin(__command__, stdin)
-    # for speed reasons, check if quiet flag was set and just return simple version immediately if so
+    # fast path for just getting the version and exiting, dont do any slower imports
-    if command.quiet:
+    from archivebox.config.version import VERSION
-        print(VERSION)
+    print(VERSION)
-        return
+    if quiet or '--version' in sys.argv:
        return []
-    # otherwise do big expensive import to get the full version
+    # Only do slower imports when getting full version info
-    from ..main import version
+    import os
-    version(
+    import platform
-        quiet=command.quiet,
+    from pathlib import Path
-        out_dir=Path(pwd) if pwd else DATA_DIR,
+    
-        binproviders=command.binproviders.split(',') if command.binproviders else None,
+    from rich.panel import Panel
-        binaries=command.binaries.split(',') if command.binaries else None,
+    from rich.console import Console
    from abx_pkg import Binary
    import abx
    import archivebox
    from archivebox.config import CONSTANTS, DATA_DIR
    from archivebox.config.version import get_COMMIT_HASH, get_BUILD_TIME
    from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, RUNNING_AS_UID, RUNNING_AS_GID, IN_DOCKER
    from archivebox.config.paths import get_data_locations, get_code_locations
    from archivebox.config.common import SHELL_CONFIG, STORAGE_CONFIG, SEARCH_BACKEND_CONFIG
    from archivebox.misc.logging_util import printable_folder_status
    from abx_plugin_default_binproviders import apt, brew, env
    console = Console()
    prnt = console.print
    LDAP_ENABLED = archivebox.pm.hook.get_SCOPE_CONFIG().LDAP_ENABLED
    # 0.7.1
    # ArchiveBox v0.7.1+editable COMMIT_HASH=951bba5 BUILD_TIME=2023-12-17 16:46:05 1702860365
    # IN_DOCKER=False IN_QEMU=False ARCH=arm64 OS=Darwin PLATFORM=macOS-14.2-arm64-arm-64bit PYTHON=Cpython
    # FS_ATOMIC=True FS_REMOTE=False FS_USER=501:20 FS_PERMS=644
    # DEBUG=False IS_TTY=True TZ=UTC SEARCH_BACKEND=ripgrep LDAP=False
    p = platform.uname()
    COMMIT_HASH = get_COMMIT_HASH()
    prnt(
        '[dark_green]ArchiveBox[/dark_green] [dark_goldenrod]v{}[/dark_goldenrod]'.format(CONSTANTS.VERSION),
        f'COMMIT_HASH={COMMIT_HASH[:7] if COMMIT_HASH else "unknown"}',
        f'BUILD_TIME={get_BUILD_TIME()}',
    )
    prnt(
        f'IN_DOCKER={IN_DOCKER}',
        f'IN_QEMU={SHELL_CONFIG.IN_QEMU}',
        f'ARCH={p.machine}',
        f'OS={p.system}',
        f'PLATFORM={platform.platform()}',
        f'PYTHON={sys.implementation.name.title()}' + (' (venv)' if CONSTANTS.IS_INSIDE_VENV else ''),
    )
    OUTPUT_IS_REMOTE_FS = get_data_locations().DATA_DIR.is_mount or get_data_locations().ARCHIVE_DIR.is_mount
    DATA_DIR_STAT = CONSTANTS.DATA_DIR.stat()
    prnt(
        f'EUID={os.geteuid()}:{os.getegid()} UID={RUNNING_AS_UID}:{RUNNING_AS_GID} PUID={ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}',
        f'FS_UID={DATA_DIR_STAT.st_uid}:{DATA_DIR_STAT.st_gid}',
        f'FS_PERMS={STORAGE_CONFIG.OUTPUT_PERMISSIONS}',
        f'FS_ATOMIC={STORAGE_CONFIG.ENFORCE_ATOMIC_WRITES}',
        f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}',
    )
    prnt(
        f'DEBUG={SHELL_CONFIG.DEBUG}',
        f'IS_TTY={SHELL_CONFIG.IS_TTY}',
        f'SUDO={CONSTANTS.IS_ROOT}',
        f'ID={CONSTANTS.MACHINE_ID}:{CONSTANTS.COLLECTION_ID}',
        f'SEARCH_BACKEND={SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}',
        f'LDAP={LDAP_ENABLED}',
        #f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})',  # add this if we have more useful info to show eventually
    )
    prnt()
    if not (os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) and os.access(CONSTANTS.CONFIG_FILE, os.R_OK)):
        PANEL_TEXT = '\n'.join((
            # '',
            # f'[yellow]CURRENT DIR =[/yellow] [red]{os.getcwd()}[/red]',
            '',
            '[violet]Hint:[/violet] [green]cd[/green] into a collection [blue]DATA_DIR[/blue] and run [green]archivebox version[/green] again...',
            '      [grey53]OR[/grey53] run [green]archivebox init[/green] to create a new collection in the current dir.',
            '',
            '      [i][grey53](this is [red]REQUIRED[/red] if you are opening a Github Issue to get help)[/grey53][/i]',
            '',
        ))
        prnt(Panel(PANEL_TEXT, expand=False, border_style='grey53', title='[red]:exclamation: No collection [blue]DATA_DIR[/blue] is currently active[/red]', subtitle='Full version info is only available when inside a collection [light_slate_blue]DATA DIR[/light_slate_blue]'))
        prnt()
        return []
    prnt('[pale_green1][i] Binary Dependencies:[/pale_green1]')
    failures = []
    BINARIES = abx.as_dict(archivebox.pm.hook.get_BINARIES())
    for name, binary in list(BINARIES.items()):
        if binary.name == 'archivebox':
            continue
        # skip if the binary is not in the requested list of binaries
        if binaries and binary.name not in binaries:
            continue
        # skip if the binary is not supported by any of the requested binproviders
        if binproviders and binary.binproviders_supported and not any(provider.name in binproviders for provider in binary.binproviders_supported):
            continue
        err = None
        try:
            loaded_bin = binary.load()
        except Exception as e:
            err = e
            loaded_bin = binary
        provider_summary = f'[dark_sea_green3]{loaded_bin.binprovider.name.ljust(10)}[/dark_sea_green3]' if loaded_bin.binprovider else '[grey23]not found[/grey23] '
        if loaded_bin.abspath:
            abspath = str(loaded_bin.abspath).replace(str(DATA_DIR), '[light_slate_blue].[/light_slate_blue]').replace(str(Path('~').expanduser()), '~')
            if ' ' in abspath:
                abspath = abspath.replace(' ', r'\ ')
        else:
            abspath = f'[red]{err}[/red]'
        prnt('', '[green]√[/green]' if loaded_bin.is_valid else '[red]X[/red]', '', loaded_bin.name.ljust(21), str(loaded_bin.version).ljust(12), provider_summary, abspath, overflow='ignore', crop=False)
        if not loaded_bin.is_valid:
            failures.append(loaded_bin.name)
    prnt()
    prnt('[gold3][i] Package Managers:[/gold3]')
    BINPROVIDERS = abx.as_dict(archivebox.pm.hook.get_BINPROVIDERS())
    for name, binprovider in list(BINPROVIDERS.items()):
        err = None
        if binproviders and binprovider.name not in binproviders:
            continue
        # TODO: implement a BinProvider.BINARY() method that gets the loaded binary for a binprovider's INSTALLER_BIN
        loaded_bin = binprovider.INSTALLER_BINARY or Binary(name=binprovider.INSTALLER_BIN, binproviders=[env, apt, brew])
        abspath = str(loaded_bin.abspath).replace(str(DATA_DIR), '[light_slate_blue].[/light_slate_blue]').replace(str(Path('~').expanduser()), '~')
        abspath = None
        if loaded_bin.abspath:
            abspath = str(loaded_bin.abspath).replace(str(DATA_DIR), '.').replace(str(Path('~').expanduser()), '~')
            if ' ' in abspath:
                abspath = abspath.replace(' ', r'\ ')
        PATH = str(binprovider.PATH).replace(str(DATA_DIR), '[light_slate_blue].[/light_slate_blue]').replace(str(Path('~').expanduser()), '~')
        ownership_summary = f'UID=[blue]{str(binprovider.EUID).ljust(4)}[/blue]'
        provider_summary = f'[dark_sea_green3]{str(abspath).ljust(52)}[/dark_sea_green3]' if abspath else f'[grey23]{"not available".ljust(52)}[/grey23]'
        prnt('', '[green]√[/green]' if binprovider.is_valid else '[grey53]-[/grey53]', '', binprovider.name.ljust(11), provider_summary, ownership_summary, f'PATH={PATH}', overflow='ellipsis', soft_wrap=True)
    if not (binaries or binproviders):
        # dont show source code / data dir info if we just want to get version info for a binary or binprovider
        prnt()
        prnt('[deep_sky_blue3][i] Code locations:[/deep_sky_blue3]')
        for name, path in get_code_locations().items():
            prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
        prnt()
        if os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) or os.access(CONSTANTS.CONFIG_FILE, os.R_OK):
            prnt('[bright_yellow][i] Data locations:[/bright_yellow]')
            for name, path in get_data_locations().items():
                prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
            from archivebox.misc.checks import check_data_dir_permissions
            check_data_dir_permissions()
        else:
            prnt()
            prnt('[red][i] Data locations:[/red] (not in a data directory)')
    prnt()
    if failures:
        prnt('[red]Error:[/red] [yellow]Failed to detect the following binaries:[/yellow]')
        prnt(f'      [red]{", ".join(failures)}[/red]')
        prnt()
        prnt('[violet]Hint:[/violet] To install missing binaries automatically, run:')
        prnt('      [green]archivebox install[/green]')
        prnt()
    return failures
@click.command()
@click.option('--quiet', '-q', is_flag=True, help='Only print ArchiveBox version number and nothing else. (equivalent to archivebox --version)')
@click.option('--binproviders', '-p', help='Select binproviders to detect DEFAULT=env,apt,brew,sys_pip,venv_pip,lib_pip,pipx,sys_npm,lib_npm,puppeteer,playwright (all)')
@click.option('--binaries', '-b', help='Select binaries to detect DEFAULT=curl,wget,git,yt-dlp,chrome,single-file,readability-extractor,postlight-parser,... (all)')
@docstring(version.__doc__)
 def main(**kwargs):
    failures = version(**kwargs)
    if failures:
        raise SystemExit(1)
 if __name__ == '__main__':
-    main(args=sys.argv[1:], stdin=sys.stdin)
+    main()
--- a/archivebox/config/django.py
+++ b/archivebox/config/django.py
@ -60,7 +60,7 @@ def setup_django(check_db=False, in_memory_db=False) -> None:
        return
    with Progress(transient=True, expand=True, console=STDERR) as INITIAL_STARTUP_PROGRESS:
-        INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25, visible=False)
+        INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25, visible=True)
        from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, SudoPermission
--- a/archivebox/config/paths.py
+++ b/archivebox/config/paths.py
@ -142,7 +142,7 @@ def create_and_chown_dir(dir_path: Path) -> None:
        os.system(f'chown {ARCHIVEBOX_USER} "{dir_path}"/* 2>/dev/null &')
@cache
-def get_or_create_working_tmp_dir(autofix=True, quiet=False):
+def get_or_create_working_tmp_dir(autofix=True, quiet=True):
    from archivebox import CONSTANTS
    from archivebox.config.common import STORAGE_CONFIG
    from archivebox.misc.checks import check_tmp_dir
@ -165,7 +165,7 @@ def get_or_create_working_tmp_dir(autofix=True, quiet=False):
            pass
        if check_tmp_dir(candidate, throw=False, quiet=True, must_exist=True):
            if autofix and STORAGE_CONFIG.TMP_DIR != candidate:
-                STORAGE_CONFIG.update_in_place(TMP_DIR=candidate, warn=not quiet)
+                STORAGE_CONFIG.update_in_place(TMP_DIR=candidate)
            return candidate
    if not quiet:
@ -193,7 +193,7 @@ def get_or_create_working_lib_dir(autofix=True, quiet=False):
            pass
        if check_lib_dir(candidate, throw=False, quiet=True, must_exist=True):
            if autofix and STORAGE_CONFIG.LIB_DIR != candidate:
-                STORAGE_CONFIG.update_in_place(LIB_DIR=candidate, warn=not quiet)
+                STORAGE_CONFIG.update_in_place(LIB_DIR=candidate)
            return candidate
    if not quiet:
--- a/archivebox/config/permissions.py
+++ b/archivebox/config/permissions.py
@ -36,6 +36,8 @@ HOSTNAME: str           = max([socket.gethostname(), platform.node()], key=len)
 IS_ROOT = RUNNING_AS_UID == 0
 IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE', 'yes')
 # IN_DOCKER_COMPOSE =  # TODO: figure out a way to detect if running in docker compose
 FALLBACK_UID = RUNNING_AS_UID or SUDO_UID
 FALLBACK_GID = RUNNING_AS_GID or SUDO_GID
--- a/archivebox/config/views.py
+++ b/archivebox/config/views.py
@ -303,7 +303,7 @@ def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:
        "Exit Status": [],
    }
-    from workers.supervisor_util import get_existing_supervisord_process
+    from workers.supervisord_util import get_existing_supervisord_process
    supervisor = get_existing_supervisord_process()
    if supervisor is None:
@ -373,7 +373,7 @@ def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:
 def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
    assert request.user.is_superuser, "Must be a superuser to view configuration settings."
-    from workers.supervisor_util import get_existing_supervisord_process, get_worker, get_sock_file, CONFIG_FILE_NAME
+    from workers.supervisord_util import get_existing_supervisord_process, get_worker, get_sock_file, CONFIG_FILE_NAME
    SOCK_FILE = get_sock_file()
    CONFIG_FILE = SOCK_FILE.parent / CONFIG_FILE_NAME
--- a/archivebox/core/admin_snapshots.py
+++ b/archivebox/core/admin_snapshots.py
@ -21,7 +21,6 @@ from archivebox.misc.logging_util import printable_filesize
 from archivebox.search.admin import SearchResultsAdminMixin
 from archivebox.index.html import snapshot_icons
 from archivebox.extractors import archive_links
 from archivebox.main import remove
 from archivebox.base_models.admin import ABIDModelAdmin
 from archivebox.workers.tasks import bg_archive_links, bg_add
@ -321,7 +320,9 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
        description="☠️ Delete"
    )
    def delete_snapshots(self, request, queryset):
        from archivebox.cli.archivebox_remove import remove
        remove(snapshots=queryset, yes=True, delete=True, out_dir=DATA_DIR)
        messages.success(
            request,
            mark_safe(f"Succesfully deleted {queryset.count()} Snapshots. Don't forget to scrub URLs from import logs (data/sources) and error logs (data/logs) if needed."),
--- a/archivebox/main.py
+++ b/archivebox/main.py
--- a/archivebox/misc/checks.py
+++ b/archivebox/misc/checks.py
@ -24,7 +24,7 @@ def check_data_folder() -> None:
    from archivebox.config import CONSTANTS
    from archivebox.config.paths import create_and_chown_dir, get_or_create_working_tmp_dir, get_or_create_working_lib_dir
-    archive_dir_exists = os.access(ARCHIVE_DIR, os.R_OK) and ARCHIVE_DIR.is_dir()
+    archive_dir_exists = os.path.isdir(ARCHIVE_DIR)
    if not archive_dir_exists:
        print('[red][X] No archivebox index found in the current directory.[/red]', file=sys.stderr)
        print(f'    {DATA_DIR}', file=sys.stderr)
--- a/archivebox/misc/logging_util.py
+++ b/archivebox/misc/logging_util.py
@ -12,7 +12,7 @@ from pathlib import Path
 from datetime import datetime, timezone
 from dataclasses import dataclass
-from typing import Any, Optional, List, Dict, Union, IO, TYPE_CHECKING
+from typing import Any, Optional, List, Dict, Union, Iterable, IO, TYPE_CHECKING
 if TYPE_CHECKING:
    from ..index.schema import Link, ArchiveResult
@ -228,7 +228,7 @@ def progress_bar(seconds: int, prefix: str='', ANSI: Dict[str, str]=ANSI) -> Non
        print()
-def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional[str | IO], pwd: str='.'):
+def log_cli_command(subcommand: str, subcommand_args: Iterable[str]=(), stdin: str | IO | None=None, pwd: str='.'):
    args = ' '.join(subcommand_args)
    version_msg = '[dark_magenta]\\[{now}][/dark_magenta] [dark_red]ArchiveBox[/dark_red] [dark_goldenrod]v{VERSION}[/dark_goldenrod]: [green4]archivebox [green3]{subcommand}[green2] {args}[/green2]'.format(
        now=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S'),
--- a/archivebox/misc/shell_welcome_message.py
+++ b/archivebox/misc/shell_welcome_message.py
@ -20,11 +20,9 @@ from datetime import datetime, timedelta   # noqa
 from django.conf import settings           # noqa
 from archivebox import CONSTANTS           # noqa
-from ..main import *                       # noqa
+from archivebox.cli import *               # noqa
 from ..cli import CLI_SUBCOMMANDS
 CONFIG = archivebox.pm.hook.get_FLAT_CONFIG()
 CLI_COMMAND_NAMES = ", ".join(CLI_SUBCOMMANDS.keys())
 if __name__ == '__main__':
    # load the rich extension for ipython for pretty printing
@ -40,7 +38,7 @@ if __name__ == '__main__':
    prnt('[green]import re, os, sys, psutil, subprocess, reqiests, json, pydantic, benedict, django, abx[/]')
    prnt('[yellow4]# ArchiveBox Imports[/]')
    prnt('[yellow4]import archivebox[/]')
-    prnt('[yellow4]from archivebox.main import {}[/]'.format(CLI_COMMAND_NAMES))
+    prnt('[yellow4]from archivebox.cli import *[/]')
    prnt()
    if console.width >= 80:
--- a/archivebox/pkgs/abx/abx.py
+++ b/archivebox/pkgs/abx/abx.py
@ -459,8 +459,8 @@ def load_plugins(plugins: Iterable[PluginId | ModuleType | Type] | Dict[PluginId
    PLUGINS_TO_LOAD = sorted(PLUGINS_TO_LOAD, key=lambda x: x['order'])
    for plugin_info in PLUGINS_TO_LOAD:
-        if '--version' not in sys.argv and '--help' not in sys.argv:
+        # if '--version' not in sys.argv and '--help' not in sys.argv:
-            print(f'🧩 Loading plugin: {plugin_info["id"]}...', end='\r', flush=True, file=sys.stderr)
+        #     print(f'🧩 Loading plugin: {plugin_info["id"]}...', end='\r', flush=True, file=sys.stderr)
        pm.register(plugin_info['module'])
        LOADED_PLUGINS[plugin_info['id']] = plugin_info
    # print('\x1b[2K', end='\r', flush=True, file=sys.stderr)
--- a/archivebox/workers/semaphores.py
+++ b/archivebox/workers/semaphores.py
@ -1,103 +1,103 @@
-import uuid
+# import uuid
-from functools import wraps
+# from functools import wraps
-from django.db import connection, transaction
+# from django.db import connection, transaction
-from django.utils import timezone
+# from django.utils import timezone
-from huey.exceptions import TaskLockedException
+# from huey.exceptions import TaskLockedException
-from archivebox.config import CONSTANTS
+# from archivebox.config import CONSTANTS
-class SqliteSemaphore:
+# class SqliteSemaphore:
-    def __init__(self, db_path, table_name, name, value=1, timeout=None):
+#     def __init__(self, db_path, table_name, name, value=1, timeout=None):
-        self.db_path = db_path
+#         self.db_path = db_path
-        self.table_name = table_name
+#         self.table_name = table_name
-        self.name = name
+#         self.name = name
-        self.value = value
+#         self.value = value
-        self.timeout = timeout or 86400  # Set a max age for lock holders
+#         self.timeout = timeout or 86400  # Set a max age for lock holders
-        # Ensure the table exists
+#         # Ensure the table exists
-        with connection.cursor() as cursor:
+#         with connection.cursor() as cursor:
-            cursor.execute(f"""
+#             cursor.execute(f"""
-                CREATE TABLE IF NOT EXISTS {self.table_name} (
+#                 CREATE TABLE IF NOT EXISTS {self.table_name} (
-                    id TEXT PRIMARY KEY,
+#                     id TEXT PRIMARY KEY,
-                    name TEXT,
+#                     name TEXT,
-                    timestamp DATETIME
+#                     timestamp DATETIME
-                )
+#                 )
-            """)
+#             """)
-    def acquire(self, name=None):
+#     def acquire(self, name=None):
-        name = name or str(uuid.uuid4())
+#         name = name or str(uuid.uuid4())
-        now = timezone.now()
+#         now = timezone.now()
-        expiration = now - timezone.timedelta(seconds=self.timeout)
+#         expiration = now - timezone.timedelta(seconds=self.timeout)
-        with transaction.atomic():
+#         with transaction.atomic():
-            # Remove expired locks
+#             # Remove expired locks
-            with connection.cursor() as cursor:
+#             with connection.cursor() as cursor:
-                cursor.execute(f"""
+#                 cursor.execute(f"""
-                    DELETE FROM {self.table_name}
+#                     DELETE FROM {self.table_name}
-                    WHERE name = %s AND timestamp < %s
+#                     WHERE name = %s AND timestamp < %s
-                """, [self.name, expiration])
+#                 """, [self.name, expiration])
-            # Try to acquire the lock
+#             # Try to acquire the lock
-            with connection.cursor() as cursor:
+#             with connection.cursor() as cursor:
-                cursor.execute(f"""
+#                 cursor.execute(f"""
-                    INSERT INTO {self.table_name} (id, name, timestamp)
+#                     INSERT INTO {self.table_name} (id, name, timestamp)
-                    SELECT %s, %s, %s
+#                     SELECT %s, %s, %s
-                    WHERE (
+#                     WHERE (
-                        SELECT COUNT(*) FROM {self.table_name}
+#                         SELECT COUNT(*) FROM {self.table_name}
-                        WHERE name = %s
+#                         WHERE name = %s
-                    ) < %s
+#                     ) < %s
-                """, [name, self.name, now, self.name, self.value])
+#                 """, [name, self.name, now, self.name, self.value])
-                if cursor.rowcount > 0:
+#                 if cursor.rowcount > 0:
-                    return name
+#                     return name
-        # If we couldn't acquire the lock, remove our attempted entry
+#         # If we couldn't acquire the lock, remove our attempted entry
-        with connection.cursor() as cursor:
+#         with connection.cursor() as cursor:
-            cursor.execute(f"""
+#             cursor.execute(f"""
-                DELETE FROM {self.table_name}
+#                 DELETE FROM {self.table_name}
-                WHERE id = %s AND name = %s
+#                 WHERE id = %s AND name = %s
-            """, [name, self.name])
+#             """, [name, self.name])
-        return None
+#         return None
-    def release(self, name):
+#     def release(self, name):
-        with connection.cursor() as cursor:
+#         with connection.cursor() as cursor:
-            cursor.execute(f"""
+#             cursor.execute(f"""
-                DELETE FROM {self.table_name}
+#                 DELETE FROM {self.table_name}
-                WHERE id = %s AND name = %s
+#                 WHERE id = %s AND name = %s
-            """, [name, self.name])
+#             """, [name, self.name])
-        return cursor.rowcount > 0
+#         return cursor.rowcount > 0
-LOCKS_DB_PATH = CONSTANTS.DATABASE_FILE.parent / 'locks.sqlite3'
+# LOCKS_DB_PATH = CONSTANTS.DATABASE_FILE.parent / 'locks.sqlite3'
-def lock_task_semaphore(db_path, table_name, lock_name, value=1, timeout=None):
+# def lock_task_semaphore(db_path, table_name, lock_name, value=1, timeout=None):
-    """
+#     """
-    Lock which can be acquired multiple times (default = 1).
+#     Lock which can be acquired multiple times (default = 1).
-    NOTE: no provisions are made for blocking, waiting, or notifying. This is
+#     NOTE: no provisions are made for blocking, waiting, or notifying. This is
-    just a lock which can be acquired a configurable number of times.
+#     just a lock which can be acquired a configurable number of times.
-    Example:
+#     Example:
-    # Allow up to 3 workers to run this task concurrently. If the task is
+#     # Allow up to 3 workers to run this task concurrently. If the task is
-    # locked, retry up to 2 times with a delay of 60s.
+#     # locked, retry up to 2 times with a delay of 60s.
-    @huey.task(retries=2, retry_delay=60)
+#     @huey.task(retries=2, retry_delay=60)
-    @lock_task_semaphore('path/to/db.sqlite3', 'semaphore_locks', 'my-lock', 3)
+#     @lock_task_semaphore('path/to/db.sqlite3', 'semaphore_locks', 'my-lock', 3)
-    def my_task():
+#     def my_task():
-        ...
+#         ...
-    """
+#     """
-    sem = SqliteSemaphore(db_path, table_name, lock_name, value, timeout)
+#     sem = SqliteSemaphore(db_path, table_name, lock_name, value, timeout)
-    def decorator(fn):
+#     def decorator(fn):
-        @wraps(fn)
+#         @wraps(fn)
-        def inner(*args, **kwargs):
+#         def inner(*args, **kwargs):
-            tid = sem.acquire()
+#             tid = sem.acquire()
-            if tid is None:
+#             if tid is None:
-                raise TaskLockedException(f'unable to acquire lock {lock_name}')
+#                 raise TaskLockedException(f'unable to acquire lock {lock_name}')
-            try:
+#             try:
-                return fn(*args, **kwargs)
+#                 return fn(*args, **kwargs)
-            finally:
+#             finally:
-                sem.release(tid)
+#                 sem.release(tid)
-        return inner
+#         return inner
-    return decorator
+#     return decorator
--- a/archivebox/workers/supervisord_util.py
+++ b/archivebox/workers/supervisord_util.py
--- a/archivebox/workers/tasks.py
+++ b/archivebox/workers/tasks.py
@ -8,7 +8,7 @@ from django_huey import db_task, task
 from huey_monitor.models import TaskModel
 from huey_monitor.tqdm import ProcessInfo
-from .supervisor_util import get_or_create_supervisord_process
+from .supervisord_util import get_or_create_supervisord_process
 # @db_task(queue="commands", context=True, schedule=1)
 # def scheduler_tick():
--- a/pyproject.toml
+++ b/pyproject.toml
@ -115,6 +115,8 @@ dependencies = [
    "abx-plugin-mercury>=2024.10.28",
    "abx-plugin-htmltotext>=2024.10.28",
    "python-statemachine>=2.3.6",
    "click>=8.1.7",
    "rich-click>=1.8.4",
 ]
 [project.optional-dependencies]
--- a/uv.lock
+++ b/uv.lock
@ -658,6 +658,7 @@ dependencies = [
    { name = "atomicwrites", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "base32-crockford", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "channels", extra = ["daphne"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "click", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "croniter", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "dateparser", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "django", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@ -688,6 +689,7 @@ dependencies = [
    { name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "rich", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "rich-argparse", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "rich-click", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "setuptools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "sonic-client", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "supervisor", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@ -784,6 +786,7 @@ requires-dist = [
    { name = "atomicwrites", specifier = "==1.4.1" },
    { name = "base32-crockford", specifier = "==0.3.0" },
    { name = "channels", extras = ["daphne"], specifier = ">=4.1.0" },
    { name = "click", specifier = ">=8.1.7" },
    { name = "croniter", specifier = ">=3.0.3" },
    { name = "dateparser", specifier = ">=1.2.0" },
    { name = "django", specifier = ">=5.1.1,<6.0" },
@ -821,6 +824,7 @@ requires-dist = [
    { name = "requests-tracker", marker = "extra == 'debug'", specifier = ">=0.3.3" },
    { name = "rich", specifier = ">=13.8.0" },
    { name = "rich-argparse", specifier = ">=1.5.2" },
    { name = "rich-click", specifier = ">=1.8.4" },
    { name = "setuptools", specifier = ">=74.1.0" },
    { name = "sonic-client", specifier = ">=1.0.0" },
    { name = "supervisor", specifier = ">=4.2.5" },
@ -2806,6 +2810,20 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/25/45/54b95bb72bb17c27a7252bee5034955020b5869a33918b660ffc29cbf608/rich_argparse-1.6.0-py3-none-any.whl", hash = "sha256:fbe70a1d821b3f2fa8958cddf0cae131870a6e9faa04ab52b409cb1eda809bd7", size = 20072 },
 ]
 [[package]]
 name = "rich-click"
 version = "1.8.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "click", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "rich", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
    { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/fc/f4/e48dc2850662526a26fb0961aacb0162c6feab934312b109b748ae4efee2/rich_click-1.8.4.tar.gz", hash = "sha256:0f49471f04439269d0e66a6f43120f52d11d594869a2a0be600cfb12eb0616b9", size = 38247 }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/84/f3/72f93d8494ee641bde76bfe1208cf4abc44c6f9448673762f6077bc162d6/rich_click-1.8.4-py3-none-any.whl", hash = "sha256:2d2841b3cebe610d5682baa1194beaf78ab00c4fa31931533261b5eba2ee80b7", size = 35071 },
 ]
 [[package]]
 name = "ruff"
 version = "0.7.4"