Merge branch 'newmodels' into dev

2024-11-23 20:53:09 +00:00 · 2024-10-21 00:38:56 -07:00 · 2024-10-21 00:38:56 -07:00 · 354c1ede35
commit 354c1ede35
parent b5872145a2 5023bdba2f
185 changed files with 5524 additions and 3819 deletions
--- a/7
+++ b/7
@ -300,10 +300,15 @@ RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH
 # Setup ArchiveBox runtime config
 WORKDIR "$DATA_DIR"
 RUN openssl rand -hex 16 > /etc/machine-id \
-    && chown -R "$DEFAULT_PUID:$DEFAULT_PGID" "/tmp"
+    && mkdir -p "/tmp/archivebox" \
+    && chown -R "$DEFAULT_PUID:$DEFAULT_PGID" "/tmp/archivebox" \
+    && mkdir -p "/usr/share/archivebox/lib" \
+    && chown -R "$DEFAULT_PUID:$DEFAULT_PGID" "/usr/share/archivebox/lib" \
 ENV GOOGLE_API_KEY=no \
    GOOGLE_DEFAULT_CLIENT_ID=no \
    GOOGLE_DEFAULT_CLIENT_SECRET=no \
+    TMP_DIR=/tmp/archivebox \
+    LIB_DIR=/usr/share/archivebox/lib \
    ALLOWED_HOSTS=*

 # Print version for nice docker finish summary
--- a/README.md
+++ b/README.md
@ -130,7 +130,7 @@ curl -fsSL 'https://get.archivebox.io' | sh
 - [**Extracts a wide variety of content out-of-the-box**](https://github.com/ArchiveBox/ArchiveBox/issues/51): [media (yt-dlp), articles (readability), code (git), etc.](#output-formats)
 - [**Supports scheduled/realtime importing**](https://github.com/ArchiveBox/ArchiveBox/wiki/Scheduled-Archiving) from [many types of sources](#input-formats)
 - [**Uses standard, durable, long-term formats**](#output-formats) like HTML, JSON, PDF, PNG, MP4, TXT, and WARC
- [**Usable as a oneshot CLI**](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#CLI-Usage), [**self-hosted web UI**](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#UI-Usage), [Python API](https://docs.archivebox.io/en/latest/modules.html) (BETA), [REST API](https://github.com/ArchiveBox/ArchiveBox/issues/496) (ALPHA), or [desktop app](https://github.com/ArchiveBox/electron-archivebox)
+- [**Usable as a oneshot CLI**](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#CLI-Usage), [**self-hosted web UI**](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#UI-Usage), [Python API](https://docs.archivebox.io/en/dev/apidocs/archivebox/archivebox.html) (BETA), [REST API](https://github.com/ArchiveBox/ArchiveBox/issues/496) (ALPHA), or [desktop app](https://github.com/ArchiveBox/electron-archivebox)
 - [**Saves all pages to archive.org as well**](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_archive_dot_org) by default for redundancy (can be [disabled](https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#stealth-mode) for local-only mode)
 - Advanced users: support for archiving [content requiring login/paywall/cookies](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#chrome_user_data_dir) (see wiki security caveats!)
 - Planned: support for running [JS during archiving](https://github.com/ArchiveBox/ArchiveBox/issues/51) to adblock, [autoscroll](https://github.com/ArchiveBox/ArchiveBox/issues/80), [modal-hide](https://github.com/ArchiveBox/ArchiveBox/issues/175), [thread-expand](https://github.com/ArchiveBox/ArchiveBox/issues/345)
--- a/archivebox/Architecture.md
+++ b/archivebox/Architecture.md
@ -0,0 +1,172 @@
+# ArchiveBox UI
+
+## Page: Getting Started
+
+### What do you want to capture?
+
+- Save some URLs now -> [Add page]
+    - Paste some URLs to archive now
+    - Upload a file containing URLs (bookmarks.html export, RSS.xml feed, markdown file, word doc, PDF, etc.)
+    - Pull in URLs to archive from a remote location (e.g. RSS feed URL, remote TXT file, JSON file, etc.)
+
+- Import URLs from a browser -> [Import page]
+    - Desktop: Get the ArchiveBox Chrome/Firefox extension
+    - Mobile: Get the ArchiveBox iOS App / Android App
+    - Upload a bookmarks.html export file
+    - Upload a browser_history.sqlite3 export file
+
+- Import URLs from a 3rd party bookmarking service -> [Sync page]
+    - Pocket
+    - Pinboard
+    - Instapaper
+    - Wallabag
+    - Zapier, N8N, IFTTT, etc.
+    - Upload a bookmarks.html export, bookmarks.json, RSS, etc. file
+
+- Archive URLs on a schedule -> [Schedule page]
+
+- Archive an entire website -> [Crawl page]
+    - What starting URL/domain?
+    - How deep?
+    - Follow links to external domains?
+    - Follow links to parent URLs?
+    - Maximum number of pages to save?
+    - Maximum number of requests/minute?
+
+- Crawl for URLs with a search engine and save automatically
+    - 
+- Some URLs on a schedule
+- Save an entire website (e.g. `https://example.com`)
+- Save results matching a search query (e.g. "site:example.com")
+- Save a social media feed (e.g. `https://x.com/user/1234567890`)
+
+--------------------------------------------------------------------------------
+
+### Crawls App
+
+- Archive an entire website -> [Crawl page]
+    - What are the seed URLs?
+    - How many hops to follow?
+    - Follow links to external domains?
+    - Follow links to parent URLs?
+    - Maximum number of pages to save?
+    - Maximum number of requests/minute?
+
+
+--------------------------------------------------------------------------------
+
+### Scheduler App
+
+
+- Archive URLs on a schedule -> [Schedule page]
+    - What URL(s)?
+    - How often?
+    - Do you want to discard old snapshots after x amount of time?
+    - Any filter rules?
+    - Want to be notified when changes are detected -> redirect[Alerts app/create new alert(crawl=self)]
+
+
+* Choose Schedule check for new URLs: Schedule.objects.get(pk=xyz)
+    - 1 minute
+    - 5 minutes
+    - 1 hour
+    - 1 day
+
+    * Choose Destination Crawl to archive URLs using : Crawl.objects.get(pk=xyz)
+        - Tags
+        - Persona
+        - Created By ID
+        - Config
+        - Filters
+            - URL patterns to include
+            - URL patterns to exclude
+            - ONLY_NEW= Ignore URLs if already saved once / save URL each time it appears / only save is last save > x time ago
+
+
+--------------------------------------------------------------------------------
+
+### Sources App (For managing sources that ArchiveBox pulls URLs in from)
+
+- Add a new source to pull URLs in from (WIZARD)
+    - Choose URI:
+        - [x] Web UI
+        - [x] CLI
+        - Local filesystem path (directory to monitor for new files containing URLs)
+        - Remote URL (RSS/JSON/XML feed)
+        - Chrome browser profile sync (login using gmail to pull bookmarks/history)
+        - Pocket, Pinboard, Instapaper, Wallabag, etc.
+        - Zapier, N8N, IFTTT, etc.
+        - Local server filesystem path (directory to monitor for new files containing URLs)
+        - Google drive (directory to monitor for new files containing URLs)
+        - Remote server FTP/SFTP/SCP path (directory to monitor for new files containing URLs)
+        - AWS/S3/B2/GCP bucket (directory to monitor for new files containing URLs)
+        - XBrowserSync (login to pull bookmarks)
+    - Choose extractor
+        - auto
+        - RSS
+        - Pocket
+        - etc.
+    - Specify extra Config, e.g.
+        - credentials
+        - extractor tuning options (e.g. verify_ssl, cookies, etc.)
+
+- Provide credentials for the source
+    - API Key
+    - Username / Password
+    - OAuth
+
+--------------------------------------------------------------------------------
+
+### Alerts App
+
+- Create a new alert, choose condition
+    - Get notified when a site goes down (<x% success ratio for Snapshots)
+    - Get notified when a site changes visually more than x% (screenshot diff)
+    - Get notified when a site's text content changes more than x% (text diff)
+    - Get notified when a keyword appears
+    - Get notified when a keyword dissapears
+    - When an AI prompt returns some result
+
+- Choose alert threshold:
+    - any condition is met
+    - all conditions are met
+    - condition is met for x% of URLs
+    - condition is met for x% of time
+
+- Choose how to notify: (List[AlertDestination])
+    - maximum alert frequency
+    - destination type: email / Slack / Webhook / Google Sheet / logfile
+    - destination info:
+        - email address(es)
+        - Slack channel
+        - Webhook URL
+
+- Choose scope:
+    - Choose ArchiveResult scope (extractors): (a query that returns ArchiveResult.objects QuerySet)
+        - All extractors
+        - Only screenshots
+        - Only readability / mercury text
+        - Only video
+        - Only html
+        - Only headers
+
+    - Choose Snapshot scope (URL): (a query that returns Snapshot.objects QuerySet)
+        - All domains
+        - Specific domain
+        - All domains in a tag
+        - All domains in a tag category
+        - All URLs matching a certain regex pattern
+
+    - Choose crawl scope: (a query that returns Crawl.objects QuerySet)
+        - All crawls
+        - Specific crawls
+        - crawls by a certain user
+        - crawls using a certain persona
+
+
+class AlertDestination(models.Model):
+    destination_type: [email, slack, webhook, google_sheet, local logfile, b2/s3/gcp bucket, etc.]
+    maximum_frequency
+    filter_rules
+    credentials
+    alert_template: JINJA2 json/text template that gets populated with alert contents
--- a/archivebox/init.py
+++ b/archivebox/init.py
@ -56,7 +56,7 @@ from .config.paths import PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR    # noqa
 from .config.version import VERSION                             # noqa

 __version__ = VERSION
-__author__ = 'Nick Sweeting'
+__author__ = 'ArchiveBox'
 __license__ = 'MIT'

 ASCII_ICON = """
--- a/archivebox/abid_utils/admin.py
+++ b/archivebox/abid_utils/admin.py
@ -10,12 +10,11 @@ from django.shortcuts import redirect

 from django_object_actions import DjangoObjectActions, action

-
-from api.auth import get_or_create_api_token
-
 from archivebox.misc.util import parse_date
+
 from .abid import ABID

+
 def highlight_diff(display_val: Any, compare_val: Any, invert: bool=False, color_same: str | None=None, color_diff: str | None=None):
    """highlight each character in red that differs with the char at the same index in compare_val"""

@ -37,6 +36,8 @@ def highlight_diff(display_val: Any, compare_val: Any, invert: bool=False, color
    ))

 def get_abid_info(self, obj, request=None):
+    from archivebox.api.auth import get_or_create_api_token
+    
    try:
        #abid_diff = f' != obj.ABID: {highlight_diff(obj.ABID, obj.abid)} ❌' if str(obj.ABID) != str(obj.abid) else ' == .ABID ✅'

--- a/archivebox/abid_utils/models.py
+++ b/archivebox/abid_utils/models.py
@ -321,6 +321,44 @@ class ABIDModel(models.Model):
    def get_absolute_url(self):
        return self.api_docs_url

+
+
+class ModelWithHealthStats(models.Model):
+    num_uses_failed = models.PositiveIntegerField(default=0)
+    num_uses_succeeded = models.PositiveIntegerField(default=0)
+    
+    class Meta:
+        abstract = True
+    
+    def record_health_failure(self) -> None:
+        self.num_uses_failed += 1
+        self.save()
+
+    def record_health_success(self) -> None:
+        self.num_uses_succeeded += 1
+        self.save()
+        
+    def reset_health(self) -> None:
+        # move all the failures to successes when resetting so we dont lose track of the total count
+        self.num_uses_succeeded = self.num_uses_failed + self.num_uses_succeeded
+        self.num_uses_failed = 0
+        self.save()
+        
+    @property
+    def health(self) -> int:
+        total_uses = max((self.num_uses_failed + self.num_uses_succeeded, 1))
+        success_pct = (self.num_uses_succeeded / total_uses) * 100
+        return round(success_pct)
+
+
+
+
+
+
+
+
+
+
 ####################################################

 # Django helpers
--- a/archivebox/abx/init.py
+++ b/archivebox/abx/init.py
@ -2,11 +2,11 @@ __package__ = 'abx'

 import importlib
 from pathlib import Path
-from typing import Dict
+from typing import Dict, Callable, List

 from . import hookspec as base_spec
-from .hookspec import hookimpl, hookspec           # noqa
-from .manager import pm, PluginManager             # noqa
+from abx.hookspec import hookimpl, hookspec           # noqa
+from abx.manager import pm, PluginManager             # noqa


 pm.add_hookspecs(base_spec)
@ -23,21 +23,28 @@ def get_plugin_order(plugin_entrypoint: Path):
        pass
    return (order, plugin_entrypoint)

-def register_hookspecs(hookspecs):
+def register_hookspecs(hookspecs: List[str]):
+    """
+    Register all the hookspecs from a list of module names.
+    """
    for hookspec_import_path in hookspecs:
        hookspec_module = importlib.import_module(hookspec_import_path)
        pm.add_hookspecs(hookspec_module)


 def find_plugins_in_dir(plugins_dir: Path, prefix: str) -> Dict[str, Path]:
+    """
+    Find all the plugins in a given directory. Just looks for an __init__.py file.
+    """
    return {
        f"{prefix}.{plugin_entrypoint.parent.name}": plugin_entrypoint.parent
-        for plugin_entrypoint in sorted(plugins_dir.glob("*/apps.py"), key=get_plugin_order)
+        for plugin_entrypoint in sorted(plugins_dir.glob("*/__init__.py"), key=get_plugin_order)
+        if plugin_entrypoint.parent.name != 'abx'
    }   # "plugins_pkg.pip": "/app/archivebox/plugins_pkg/pip"


 def get_pip_installed_plugins(group='abx'):
-    """replaces pm.load_setuptools_entrypoints("abx")"""
+    """replaces pm.load_setuptools_entrypoints("abx"), finds plugins that registered entrypoints via pip"""
    import importlib.metadata

    DETECTED_PLUGINS = {}   # module_name: module_dir_path
@ -52,6 +59,9 @@ def get_pip_installed_plugins(group='abx'):


 def get_plugins_in_dirs(plugin_dirs: Dict[str, Path]):
+    """
+    Get the mapping of dir_name: {plugin_id: plugin_dir} for all plugins in the given directories.
+    """
    DETECTED_PLUGINS = {}
    for plugin_prefix, plugin_dir in plugin_dirs.items():
        DETECTED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix))
@ -61,6 +71,9 @@ def get_plugins_in_dirs(plugin_dirs: Dict[str, Path]):
 # Load all plugins from pip packages, archivebox built-ins, and user plugins

 def load_plugins(plugins_dict: Dict[str, Path]):
+    """
+    Load all the plugins from a dictionary of module names and directory paths.
+    """
    LOADED_PLUGINS = {}
    for plugin_module, plugin_dir in plugins_dict.items():
        # print(f'Loading plugin: {plugin_module} from {plugin_dir}')
@ -71,6 +84,9 @@ def load_plugins(plugins_dict: Dict[str, Path]):
    return LOADED_PLUGINS

 def get_registered_plugins():
+    """
+    Get all the plugins registered with Pluggy.
+    """
    plugins = {}
    plugin_to_distinfo = dict(pm.list_plugin_distinfo())
    for plugin in pm.get_plugins():
@ -88,3 +104,28 @@ def get_registered_plugins():
    return plugins


+
+
+def get_plugin_hooks(plugin_pkg: str | None) -> Dict[str, Callable]:
+    """
+    Get all the functions marked with @hookimpl on a module.
+    """
+    if not plugin_pkg:
+        return {}
+    
+    hooks = {}
+    
+    plugin_module = importlib.import_module(plugin_pkg)
+    for attr_name in dir(plugin_module):
+        if attr_name.startswith('_'):
+            continue
+        try:
+            attr = getattr(plugin_module, attr_name)
+            if isinstance(attr, Callable):
+                hooks[attr_name] = None
+                pm.parse_hookimpl_opts(plugin_module, attr_name)
+                hooks[attr_name] = attr
+        except Exception as e:
+            print(f'Error getting hookimpls for {plugin_pkg}: {e}')
+
+    return hooks
--- a/archivebox/abx/archivebox/init.py
+++ b/archivebox/abx/archivebox/init.py
@ -10,31 +10,21 @@ from pathlib import Path
 def load_archivebox_plugins(pm, plugins_dict: Dict[str, Path]):
    """Load archivebox plugins, very similar to abx.load_plugins but it looks for a pydantic PLUGIN model + hooks in apps.py"""
    LOADED_PLUGINS = {}
-    for plugin_module, plugin_dir in plugins_dict.items():
+    for plugin_module, plugin_dir in reversed(plugins_dict.items()):
        # print(f'Loading plugin: {plugin_module} from {plugin_dir}')
        
-        archivebox_plugins_found = []
-        
        # 1. register the plugin module directly in case it contains any look hookimpls (e.g. in __init__.py)
+        try:
            plugin_module_loaded = importlib.import_module(plugin_module)
            pm.register(plugin_module_loaded)
-        if hasattr(plugin_module_loaded, 'PLUGIN'):
-            archivebox_plugins_found.append(plugin_module_loaded.PLUGIN)
+        except Exception as e:
+            print(f'Error registering plugin: {plugin_module} - {e}')
+            
        
        # 2. then try to import plugin_module.apps as well
        if os.access(plugin_dir / 'apps.py', os.R_OK):
            plugin_apps = importlib.import_module(plugin_module + '.apps')
            pm.register(plugin_apps)                                           # register the whole .apps  in case it contains loose hookimpls (not in a class)
-            if hasattr(plugin_apps, 'PLUGIN'):
-                archivebox_plugins_found.append(plugin_apps.PLUGIN)
            
-        # 3. then try to look for plugin_module.PLUGIN and register it + all its hooks
-        for ab_plugin in archivebox_plugins_found:
-            pm.register(ab_plugin)
-            for hook in ab_plugin.hooks:
-                hook.__signature__ = hook.__class__.__signature__              # fix to make pydantic model usable as Pluggy plugin
-                pm.register(hook)
-            LOADED_PLUGINS[plugin_module] = ab_plugin
-            
-        # print(f'    √ Loaded plugin: {LOADED_PLUGINS}')
+        # print(f'    √ Loaded plugin: {plugin_module} {len(archivebox_plugins_found) * "🧩"}')
    return LOADED_PLUGINS
--- a/archivebox/abx/archivebox/base_admindataview.py
+++ b/archivebox/abx/archivebox/base_admindataview.py
@ -1,38 +0,0 @@
-__package__ = 'abx.archivebox'
-
-from typing import Dict
-
-import abx
-
-from .base_hook import BaseHook, HookType
-
-
-class BaseAdminDataView(BaseHook):
-    hook_type: HookType = "ADMINDATAVIEW"
-    
-    name: str = 'example_admin_data_view_list'
-    verbose_name: str = 'Data View'
-    route: str = '/__OVERRIDE_THIS__/'
-    view: str = 'plugins_example.example.views.example_view_list'
-    
-    items: Dict[str, str] = {
-        'route': '<str:key>/',
-        "name": 'example_admin_data_view_item',
-        'view': 'plugins_example.example.views.example_view_item',
-    }
-    
-    @abx.hookimpl
-    def get_ADMINDATAVIEWS(self):
-        return [self]
-    
-    @abx.hookimpl
-    def get_ADMIN_DATA_VIEWS_URLS(self):
-        """routes to be added to django.conf.settings.ADMIN_DATA_VIEWS['urls']"""
-        route = {
-            "route": self.route,
-            "view": self.view,
-            "name": self.verbose_name,
-            "items": self.items,
-        }
-        return [route]
-
--- a/archivebox/abx/archivebox/base_binary.py
+++ b/archivebox/abx/archivebox/base_binary.py
@ -18,12 +18,9 @@ from archivebox.config import CONSTANTS
 from archivebox.config.permissions import ARCHIVEBOX_USER

 import abx
-from .base_hook import BaseHook, HookType


-class BaseBinProvider(BaseHook, BinProvider):
-    hook_type: HookType = "BINPROVIDER"
-
+class BaseBinProvider(BinProvider):
    
    # TODO: add install/load/load_or_install methods as abx.hookimpl methods
    
@ -36,12 +33,12 @@ class BaseBinProvider(BaseHook, BinProvider):
    def get_BINPROVIDERS(self):
        return [self]

-class BaseBinary(BaseHook, Binary):
-    hook_type: HookType = "BINARY"
+class BaseBinary(Binary):

    @staticmethod
    def symlink_to_lib(binary, bin_dir=None) -> None:
-        bin_dir = bin_dir or CONSTANTS.LIB_BIN_DIR
+        from archivebox.config.common import STORAGE_CONFIG
+        bin_dir = bin_dir or STORAGE_CONFIG.LIB_DIR / 'bin'
        
        if not (binary.abspath and os.access(binary.abspath, os.R_OK)):
            return
@ -59,9 +56,10 @@ class BaseBinary(BaseHook, Binary):
        
    @validate_call
    def load(self, fresh=False, **kwargs) -> Self:
+        from archivebox.config.common import STORAGE_CONFIG
        if fresh:
            binary = super().load(**kwargs)
-            self.symlink_to_lib(binary=binary, bin_dir=CONSTANTS.LIB_BIN_DIR)
+            self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
        else:
            # get cached binary from db
            try:
@ -76,16 +74,18 @@ class BaseBinary(BaseHook, Binary):
    
    @validate_call
    def install(self, **kwargs) -> Self:
+        from archivebox.config.common import STORAGE_CONFIG
        binary = super().install(**kwargs)
-        self.symlink_to_lib(binary=binary, bin_dir=CONSTANTS.LIB_BIN_DIR)
+        self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
        return binary
    
    @validate_call
    def load_or_install(self, fresh=False, **kwargs) -> Self:
+        from archivebox.config.common import STORAGE_CONFIG
        try:
            binary = self.load(fresh=fresh)
            if binary and binary.version:
-                self.symlink_to_lib(binary=binary, bin_dir=CONSTANTS.LIB_BIN_DIR)
+                self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
                return binary
        except Exception:
            pass
--- a/archivebox/abx/archivebox/base_configset.py
+++ b/archivebox/abx/archivebox/base_configset.py
@ -1,8 +1,13 @@
 __package__ = 'abx.archivebox'

 import os
+import sys
+import re
 from pathlib import Path
-from typing import Type, Tuple, Callable, ClassVar
+from typing import Type, Tuple, Callable, ClassVar, Dict, Any
+
+import toml
+from rich import print

 from benedict import benedict
 from pydantic import model_validator, TypeAdapter
@ -11,15 +16,18 @@ from pydantic_settings.sources import TomlConfigSettingsSource

 from pydantic_pkgr import func_takes_args_or_kwargs

-import abx

-from .base_hook import BaseHook, HookType
 from . import toml_util


 PACKAGE_DIR = Path(__file__).resolve().parent.parent
 DATA_DIR = Path(os.getcwd()).resolve()

+ARCHIVEBOX_CONFIG_FILE = DATA_DIR / "ArchiveBox.conf"
+ARCHIVEBOX_CONFIG_FILE_BAK = ARCHIVEBOX_CONFIG_FILE.parent / ".ArchiveBox.conf.bak"
+
+AUTOFIXES_HEADER = "[AUTOFIXES]"
+AUTOFIXES_SUBHEADER = "# The following config was added automatically to fix problems detected at startup:"


 class FlatTomlConfigSettingsSource(TomlConfigSettingsSource):
@ -55,7 +63,7 @@ class FlatTomlConfigSettingsSource(TomlConfigSettingsSource):
        super(TomlConfigSettingsSource, self).__init__(settings_cls, self.toml_data)


-class ArchiveBoxBaseConfig(BaseSettings):
+class BaseConfigSet(BaseSettings):
    """
    This is the base class for an ArchiveBox ConfigSet.
    It handles loading values from schema defaults, ArchiveBox.conf TOML config, and environment variables.
@ -85,7 +93,7 @@ class ArchiveBoxBaseConfig(BaseSettings):
        loc_by_alias=False,
        validate_assignment=True,
        validate_return=True,
-        revalidate_instances="always",
+        revalidate_instances="subclass-instances",
    )
    
    load_from_defaults: ClassVar[bool] = True
@ -103,9 +111,6 @@ class ArchiveBoxBaseConfig(BaseSettings):
    ) -> Tuple[PydanticBaseSettingsSource, ...]:
        """Defines the config precedence order: Schema defaults -> ArchiveBox.conf (TOML) -> Environment variables"""
        
-        ARCHIVEBOX_CONFIG_FILE = DATA_DIR / "ArchiveBox.conf"
-        ARCHIVEBOX_CONFIG_FILE_BAK = ARCHIVEBOX_CONFIG_FILE.parent / ".ArchiveBox.conf.bak"
-        
        # import ipdb; ipdb.set_trace()
        
        precedence_order = {}
@ -154,7 +159,17 @@ class ArchiveBoxBaseConfig(BaseSettings):
    def fill_defaults(self):
        """Populate any unset values using function provided as their default"""

-        for key, field in self.model_fields.items():
+        for key in self.model_fields.keys():
+            if isinstance(getattr(self, key), Callable):
+                if self.load_from_defaults:
+                    computed_default = self.get_default_value(key)
+                    # set generated default value as final validated value
+                    setattr(self, key, computed_default)
+        return self
+    
+    def get_default_value(self, key):
+        """Get the default value for a given config key"""
+        field = self.model_fields[key]
        value = getattr(self, key)
    
        if isinstance(value, Callable):
@ -170,11 +185,10 @@ class ArchiveBoxBaseConfig(BaseSettings):
            # coerce/check to make sure default factory return value matches type annotation
            TypeAdapter(field.annotation).validate_python(computed_default)

-                # set generated default value as final validated value
-                setattr(self, key, computed_default)
-        return self
+            return computed_default
+        return value
    
-    def update_in_place(self, warn=True, **kwargs):
+    def update_in_place(self, warn=True, persist=False, hint='', **kwargs):
        """
        Update the config with new values. Use this sparingly! We should almost never be updating config at runtime.
        Sets them in the environment so they propagate to spawned subprocesses / across future re-__init__()s and reload from environment
@ -182,48 +196,106 @@ class ArchiveBoxBaseConfig(BaseSettings):
        Example acceptable use case: user config says SEARCH_BACKEND_ENGINE=sonic but sonic_client pip library is not installed so we cannot use it.
        SEARCH_BACKEND_CONFIG.update_in_place(SEARCH_BACKEND_ENGINE='ripgrep') can be used to reset it back to ripgrep so we can continue.
        """
+        from archivebox.misc.toml_util import CustomTOMLEncoder
+        
        if warn:
-            print('[!] WARNING: Some of the provided user config values cannot be used, temporarily ignoring them:')
+            fix_scope = 'in ArchiveBox.conf' if persist else 'just for current run'
+            print(f'[yellow]:warning:  WARNING: Some config cannot be used as-is, fixing automatically {fix_scope}:[/yellow] {hint}', file=sys.stderr)
+        
+        # set the new values in the environment
        for key, value in kwargs.items():
            os.environ[key] = str(value)
            original_value = getattr(self, key)
            if warn:
                print(f'    {key}={original_value} -> {value}')
+        
+        # if persist=True, write config changes to data/ArchiveBox.conf [AUTOFIXES] section
+        try:
+            if persist and ARCHIVEBOX_CONFIG_FILE.is_file():
+                autofixes_to_add = benedict(kwargs).to_toml(encoder=CustomTOMLEncoder())
+                
+                existing_config = ARCHIVEBOX_CONFIG_FILE.read_text().split(AUTOFIXES_HEADER, 1)[0].strip()
+                if AUTOFIXES_HEADER in existing_config:
+                    existing_autofixes = existing_config.split(AUTOFIXES_HEADER, 1)[-1].strip().replace(AUTOFIXES_SUBHEADER, '').replace(AUTOFIXES_HEADER, '').strip()
+                else:
+                    existing_autofixes = ''
+                
+                new_config = '\n'.join(line for line in [
+                    existing_config,
+                    '\n' + AUTOFIXES_HEADER,
+                    AUTOFIXES_SUBHEADER,
+                    existing_autofixes,
+                    autofixes_to_add,
+                ] if line.strip()).strip() + '\n'
+                ARCHIVEBOX_CONFIG_FILE.write_text(new_config)
+        except Exception:
+            pass
        self.__init__()
+        if warn:
+            print(file=sys.stderr)
+            
        return self
    
-    def as_legacy_config_schema(self):
+    @property
+    def toml_section_header(self):
+        """Convert the class name to a TOML section header e.g. ShellConfig -> SHELL_CONFIG"""
+        class_name = self.__class__.__name__
+        return re.sub('([A-Z]+)', r'_\1', class_name).upper().strip('_')
+    
+    
+    def from_defaults(self) -> Dict[str, Any]:
+        """Get the dictionary of {key: value} config loaded from the default values"""
+        class OnlyDefaultsConfig(self.__class__):
+            load_from_defaults = True
+            load_from_configfile = False
+            load_from_environment = False
+        return benedict(OnlyDefaultsConfig().model_dump(exclude_unset=False, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
+    
+    def from_configfile(self) -> Dict[str, Any]:
+        """Get the dictionary of {key: value} config loaded from the configfile ArchiveBox.conf"""
+        class OnlyConfigFileConfig(self.__class__):
+            load_from_defaults = False
+            load_from_configfile = True
+            load_from_environment = False
+        return benedict(OnlyConfigFileConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
+    
+    def from_environment(self) -> Dict[str, Any]:
+        """Get the dictionary of {key: value} config loaded from the environment variables"""
+        class OnlyEnvironmentConfig(self.__class__):
+            load_from_defaults = False
+            load_from_configfile = False
+            load_from_environment = True
+        return benedict(OnlyEnvironmentConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
+    
+    def from_computed(self) -> Dict[str, Any]:
+        """Get the dictionary of {key: value} config loaded from the computed fields"""
+        return benedict(self.model_dump(include=set(self.model_computed_fields.keys())))
+    
+
+    def to_toml_dict(self, defaults=False) -> Dict[str, Any]:
+        """Get the current config as a TOML-ready dict"""
+        config_dict = {}
+        for key, value in benedict(self).items():
+            if defaults or value != self.get_default_value(key):
+                config_dict[key] = value
+        
+        return benedict({self.toml_section_header: config_dict})
+    
+    def to_toml_str(self, defaults=False) -> str:
+        """Get the current config as a TOML string"""
+        from archivebox.misc.toml_util import CustomTOMLEncoder
+        
+        toml_dict = self.to_toml_dict(defaults=defaults)
+        if not toml_dict[self.toml_section_header]:
+            # if the section is empty, don't write it
+            toml_dict.pop(self.toml_section_header)
+        
+        return toml.dumps(toml_dict, encoder=CustomTOMLEncoder())
+    
+    def as_legacy_config_schema(self) -> Dict[str, Any]:
        # shim for backwards compatibility with old config schema style
        model_values = self.model_dump()
        return benedict({
            key: {'type': field.annotation, 'default': model_values[key]}
            for key, field in self.model_fields.items()
        })
-
-
-class BaseConfigSet(ArchiveBoxBaseConfig, BaseHook):      # type: ignore[type-arg]
-    hook_type: ClassVar[HookType] = 'CONFIG'
-
-    # @abx.hookimpl
-    # def ready(self, settings):
-    #    # reload config from environment, in case it's been changed by any other plugins
-    #    self.__init__()
-
-
-    @abx.hookimpl
-    def get_CONFIGS(self):
-        try:
-            return {self.id: self}
-        except Exception as e:
-            # raise Exception(f'Error computing CONFIGS for {type(self)}: {e.__class__.__name__}: {e}')
-            print(f'Error computing CONFIGS for {type(self)}: {e.__class__.__name__}: {e}')
-        return {}
-
-    @abx.hookimpl
-    def get_FLAT_CONFIG(self):
-        try:
-            return self.model_dump()
-        except Exception as e:
-            # raise Exception(f'Error computing FLAT_CONFIG for {type(self)}: {e.__class__.__name__}: {e}')
-            print(f'Error computing FLAT_CONFIG for {type(self)}: {e.__class__.__name__}: {e}')
-        return {}
--- a/archivebox/abx/archivebox/base_extractor.py
+++ b/archivebox/abx/archivebox/base_extractor.py
@ -14,7 +14,6 @@ from django.utils import timezone

 import abx

-from .base_hook import BaseHook, HookType
 from .base_binary import BaseBinary


@ -28,8 +27,7 @@ HandlerFuncStr = Annotated[str, AfterValidator(lambda s: s.startswith('self.'))]
 CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(no_empty_args)]


-class BaseExtractor(BaseHook):
-    hook_type: HookType = 'EXTRACTOR'
+class BaseExtractor:
    
    name: ExtractorName
    binary: BinName
@ -51,9 +49,9 @@ class BaseExtractor(BaseHook):


    def get_output_path(self, snapshot) -> Path:
-        return Path(self.id.lower())
+        return Path(self.__class__.__name__.lower())

-    def should_extract(self, snapshot) -> bool:
+    def should_extract(self, uri: str, config: dict | None=None) -> bool:
        try:
            assert self.detect_installed_binary().version
        except Exception:
@ -197,8 +195,8 @@ class BaseExtractor(BaseHook):
    
    @cached_property
    def BINARY(self) -> BaseBinary:
-        import abx.archivebox.use
-        for binary in abx.archivebox.use.get_BINARIES().values():
+        import abx.archivebox.reads
+        for binary in abx.archivebox.reads.get_BINARIES().values():
            if binary.name == self.binary:
                return binary
        raise ValueError(f'Binary {self.binary} not found')
--- a/archivebox/abx/archivebox/base_hook.py
+++ b/archivebox/abx/archivebox/base_hook.py
@ -1,80 +0,0 @@
-__package__ = 'abx.archivebox'
-
-import inspect
-from huey.api import TaskWrapper
-
-from pathlib import Path
-from typing import Tuple, Literal, ClassVar, get_args
-from pydantic import BaseModel, ConfigDict
-from django.utils.functional import cached_property
-
-import abx
-
-HookType = Literal['CONFIG', 'BINPROVIDER', 'BINARY', 'EXTRACTOR', 'REPLAYER', 'CHECK', 'ADMINDATAVIEW', 'QUEUE', 'SEARCHBACKEND']
-hook_type_names: Tuple[HookType] = get_args(HookType)
-
-class BaseHook(BaseModel):
-    model_config = ConfigDict(
-        extra="allow",
-        arbitrary_types_allowed=True,
-        from_attributes=True,
-        populate_by_name=True,
-        validate_defaults=True,
-        validate_assignment=False,
-        revalidate_instances="subclass-instances",
-        ignored_types=(TaskWrapper, cached_property),
-    )
-    
-    hook_type: ClassVar[HookType]     # e.g. = 'CONFIG'
-    
-    # verbose_name: str = Field()
-    
-    _is_registered: bool = False
-    _is_ready: bool = False
-
-
-    @property
-    def id(self) -> str:
-        return self.__class__.__name__
-
-    @property
-    def hook_module(self) -> str:
-        """e.g. plugins_extractor.singlefile.apps.SinglefileConfigSet"""
-        return f'{self.__module__}.{self.__class__.__name__}'
-
-    @property
-    def hook_file(self) -> Path:
-        """e.g. plugins_extractor.singlefile.apps.SinglefileConfigSet"""
-        return Path(inspect.getfile(self.__class__))
-
-    @property
-    def plugin_module(self) -> str:
-        """e.g. plugins_extractor.singlefile"""
-        return f"{self.__module__}.{self.__class__.__name__}".split("archivebox.", 1)[-1].rsplit(".apps.", 1)[0]
-
-    @property
-    def plugin_dir(self) -> Path:
-        return Path(inspect.getfile(self.__class__)).parent.resolve()
-    
-    @property
-    def admin_url(self) -> str:
-        # e.g. /admin/environment/config/LdapConfig/
-        return f"/admin/environment/{self.hook_type.lower()}/{self.id}/"
-
-
-    @abx.hookimpl
-    def register(self, settings):
-        """Called when django.apps.AppConfig.ready() is called"""
-        
-        # print("REGISTERED HOOK:", self.hook_module)
-        self._is_registered = True
-        
-
-    @abx.hookimpl
-    def ready(self):
-        """Called when django.apps.AppConfig.ready() is called"""
-        
-        assert self._is_registered, f"Tried to run {self.hook_module}.ready() but it was never registered!"
-       
-        # print("READY HOOK:", self.hook_module)
-        self._is_ready = True
--- a/archivebox/abx/archivebox/base_plugin.py
+++ b/archivebox/abx/archivebox/base_plugin.py
@ -1,154 +0,0 @@
-__package__ = 'abx.archivebox'
-
-import abx
-import inspect
-from pathlib import Path
-
-from django.apps import AppConfig
-
-from typing import List, Type, Dict
-from typing_extensions import Self
-
-from pydantic import (
-    BaseModel,
-    ConfigDict,
-    Field,
-    model_validator,
-    InstanceOf,
-    computed_field,
-)
-from benedict import benedict
-
-from .base_hook import BaseHook, HookType
-
-class BasePlugin(BaseModel):
-    model_config = ConfigDict(
-        extra='forbid',
-        arbitrary_types_allowed=True,
-        populate_by_name=True,
-        from_attributes=True,
-        validate_defaults=False,
-        validate_assignment=False,
-        revalidate_instances="always",
-        # frozen=True,
-    )
-
-    # Required by AppConfig:
-    app_label: str = Field()                      # e.g. 'singlefile'                  (one-word machine-readable representation, to use as url-safe id/db-table prefix_/attr name)
-    verbose_name: str = Field()                   # e.g. 'SingleFile'                  (human-readable *short* label, for use in column names, form labels, etc.)
-    docs_url: str = Field(default=None)           # e.g. 'https://github.com/...'
-    
-    # All the hooks the plugin will install:
-    hooks: List[InstanceOf[BaseHook]] = Field(default=[])
-    
-    _is_registered: bool = False
-    _is_ready: bool = False
-    
-    @computed_field
-    @property
-    def id(self) -> str:
-        return self.__class__.__name__
-    
-    @property
-    def name(self) -> str:
-        return self.app_label
-    
-    # @computed_field
-    @property
-    def plugin_module(self) -> str:  # DottedImportPath
-        """ "
-        Dotted import path of the plugin's module (after its loaded via settings.INSTALLED_APPS).
-        e.g. 'archivebox.plugins_pkg.npm.apps.NpmPlugin' -> 'plugins_pkg.npm'
-        """
-        return f"{self.__module__}.{self.__class__.__name__}".split("archivebox.", 1)[-1].rsplit('.apps.', 1)[0]
-
-
-    @property
-    def plugin_module_full(self) -> str:  # DottedImportPath
-        """e.g. 'archivebox.plugins_pkg.npm.apps.NpmPlugin'"""
-        return f"{self.__module__}.{self.__class__.__name__}"
-    
-    # @computed_field
-    @property
-    def plugin_dir(self) -> Path:
-        return Path(inspect.getfile(self.__class__)).parent.resolve()
-    
-    @model_validator(mode='after')
-    def validate(self) -> Self:
-        """Validate the plugin's build-time configuration here before it's registered in Django at runtime."""
-        
-        # VERY IMPORTANT:
-        # preserve references to original default objects,
-        # pydantic deepcopies them by default which breaks mutability
-        # see https://github.com/pydantic/pydantic/issues/7608
-        # if we dont do this, then plugins_extractor.SINGLEFILE_CONFIG != settings.CONFIGS.SingleFileConfig for example
-        # and calling .__init__() on one of them will not update the other
-        self.hooks = self.model_fields['hooks'].default
-        
-        assert self.app_label and self.app_label and self.verbose_name, f'{self.__class__.__name__} is missing .name or .app_label or .verbose_name'
-        
-        # assert json.dumps(self.model_json_schema(), indent=4), f"Plugin {self.plugin_module} has invalid JSON schema."
-        
-        return self
-    
-    @property
-    def AppConfig(plugin_self) -> Type[AppConfig]:
-        """Generate a Django AppConfig class for this plugin."""
-
-
-        class PluginAppConfig(AppConfig):
-            """Django AppConfig for plugin, allows it to be loaded as a Django app listed in settings.INSTALLED_APPS."""
-            name = plugin_self.plugin_module
-            app_label = plugin_self.app_label
-            verbose_name = plugin_self.verbose_name
-
-            default_auto_field = 'django.db.models.AutoField'
-
-            # handled by abx.hookimpl  ready()
-            # def ready(self):
-            #     from django.conf import settings
-            #     plugin_self.ready(settings)
-
-        return PluginAppConfig
-
-    @property
-    def HOOKS_BY_ID(self) -> Dict[str, InstanceOf[BaseHook]]:
-        return benedict({hook.id: hook for hook in self.hooks})
-
-    @property
-    def HOOKS_BY_TYPE(self) -> Dict[HookType, Dict[str, InstanceOf[BaseHook]]]:
-        hooks = benedict({})
-        for hook in self.hooks:
-            hooks[hook.hook_type] = hooks.get(hook.hook_type) or benedict({})
-            hooks[hook.hook_type][hook.id] = hook
-        return hooks
-
-
-
-    @abx.hookimpl
-    def register(self, settings):
-        from archivebox.config.legacy import bump_startup_progress_bar
-
-        self._is_registered = True
-        bump_startup_progress_bar()
-
-        # print('◣----------------- REGISTERED PLUGIN:', self.plugin_module, '-----------------◢')
-        # print()
-
-    @abx.hookimpl
-    def ready(self, settings=None):
-        """Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
-
-        from archivebox.config.legacy import bump_startup_progress_bar
-
-        assert self._is_registered, f"Tried to run {self.plugin_module}.ready() but it was never registered!"
-        self._is_ready = True
-
-        # settings.PLUGINS[self.id]._is_ready = True
-        bump_startup_progress_bar()
-
-
-    @abx.hookimpl
-    def get_INSTALLED_APPS(self):
-        return [self.plugin_module]
-
--- a/archivebox/abx/archivebox/base_queue.py
+++ b/archivebox/abx/archivebox/base_queue.py
@ -1,106 +0,0 @@
-__package__ = 'abx.archivebox'
-
-import importlib
-
-from typing import Dict, List, TYPE_CHECKING
-from pydantic import Field, InstanceOf
-from benedict import benedict
-
-if TYPE_CHECKING:
-    from huey.api import TaskWrapper
-
-import abx
-
-from .base_hook import BaseHook, HookType
-from .base_binary import BaseBinary
-
-
-
-class BaseQueue(BaseHook):
-    hook_type: HookType = 'QUEUE'
-
-    name: str = Field()       # e.g. 'singlefile'
-
-    binaries: List[InstanceOf[BaseBinary]] = Field()
-
-    @property
-    def tasks(self) -> Dict[str, 'TaskWrapper']:
-        """Return an dict of all the background worker tasks defined in the plugin's tasks.py file."""
-        tasks = importlib.import_module(f"{self.plugin_module}.tasks")
-
-        all_tasks = {}
-
-        for task_name, task in tasks.__dict__.items():
-            # if attr is a Huey task and its queue_name matches our hook's queue name
-            if hasattr(task, "task_class") and task.huey.name == self.name:
-                all_tasks[task_name] = task
-
-        return benedict(all_tasks)
-
-    def get_django_huey_config(self, QUEUE_DATABASE_NAME) -> dict:
-        """Get the config dict to insert into django.conf.settings.DJANGO_HUEY['queues']."""
-        return {
-            "huey_class": "huey.SqliteHuey",
-            "filename": QUEUE_DATABASE_NAME,
-            "name": self.name,
-            "results": True,
-            "store_none": True,
-            "immediate": False,
-            "utc": True,
-            "consumer": {
-                "workers": 1,
-                "worker_type": "thread",
-                "initial_delay": 0.1,  # Smallest polling interval, same as -d.
-                "backoff": 1.15,  # Exponential backoff using this rate, -b.
-                "max_delay": 10.0,  # Max possible polling interval, -m.
-                "scheduler_interval": 1,  # Check schedule every second, -s.
-                "periodic": True,  # Enable crontab feature.
-                "check_worker_health": True,  # Enable worker health checks.
-                "health_check_interval": 1,  # Check worker health every second.
-            },
-        }
-        
-    def get_supervisord_config(self, settings) -> dict:
-        """Ge the config dict used to tell sueprvisord to start a huey consumer for this queue."""
-        return {
-            "name": f"worker_{self.name}",
-            "command": f"archivebox manage djangohuey --queue {self.name}",
-            "stdout_logfile": f"logs/worker_{self.name}.log",
-            "redirect_stderr": "true",
-            "autorestart": "true",
-            "autostart": "false",
-        }
-        
-    def start_supervisord_worker(self, settings, lazy=True):
-        from queues.supervisor_util import get_or_create_supervisord_process, start_worker
-        print()
-        try:
-            supervisor = get_or_create_supervisord_process(daemonize=False)
-        except Exception as e:
-            print(f"Error starting worker for queue {self.name}: {e}")
-            return None
-        print()
-        worker = start_worker(supervisor, self.get_supervisord_config(settings), lazy=lazy)
-
-        # Update settings.WORKERS to include this worker
-        settings.WORKERS = getattr(settings, "WORKERS", None) or benedict({})
-        settings.WORKERS[self.id] = self.start_supervisord_worker(settings, lazy=True)
-
-        return worker
-
-    @abx.hookimpl
-    def get_QUEUES(self):
-        return [self]
-
-    @abx.hookimpl
-    def get_DJANGO_HUEY_QUEUES(self, QUEUE_DATABASE_NAME):
-        """queue configs to be added to django.conf.settings.DJANGO_HUEY['queues']"""
-        return {
-            self.name: self.get_django_huey_config(QUEUE_DATABASE_NAME)
-        }
-        
-        
-    # @abx.hookimpl
-    # def ready(self, settings):
-    #     self.start_supervisord_worker(settings, lazy=True)
-    #     super().ready(settings)
--- a/archivebox/abx/archivebox/base_replayer.py
+++ b/archivebox/abx/archivebox/base_replayer.py
@ -2,14 +2,10 @@ __package__ = 'abx.archivebox'

 import abx

-from .base_hook import BaseHook, HookType

-
-class BaseReplayer(BaseHook):
+class BaseReplayer:
    """Describes how to render an ArchiveResult in several contexts"""
    
-    hook_type: HookType = 'REPLAYER'
-    
    url_pattern: str = '*'

    row_template: str = 'plugins/generic_replayer/templates/row.html'
--- a/archivebox/abx/archivebox/base_searchbackend.py
+++ b/archivebox/abx/archivebox/base_searchbackend.py
@ -1,33 +1,25 @@
 __package__ = 'abx.archivebox'

 from typing import Iterable, List
-from pydantic import Field
-
-import abx
-from .base_hook import BaseHook, HookType
+import abc



-class BaseSearchBackend(BaseHook):
-    hook_type: HookType = 'SEARCHBACKEND'
-
-    name: str = Field()       # e.g. 'singlefile'
-
-
-    # TODO: move these to a hookimpl
+class BaseSearchBackend(abc.ABC):
+    name: str

    @staticmethod
+    @abc.abstractmethod
    def index(snapshot_id: str, texts: List[str]):
        return

    @staticmethod
+    @abc.abstractmethod
    def flush(snapshot_ids: Iterable[str]):
        return

    @staticmethod
+    @abc.abstractmethod
    def search(text: str) -> List[str]:
        raise NotImplementedError("search method must be implemented by subclass")

-    @abx.hookimpl
-    def get_SEARCHBACKENDS(self):
-        return [self]
--- a/archivebox/abx/archivebox/effects.py
+++ b/archivebox/abx/archivebox/effects.py
@ -0,0 +1,20 @@
+"""
+Hookspec for side-effects that ArchiveBox plugins can trigger.
+
+(e.g. network requests, binary execution, remote API calls, external library calls, etc.)
+"""
+
+__package__ = 'abx.archivebox'
+
+import abx
+
+
+@abx.hookspec
+def check_remote_seed_connection(urls, extractor, credentials, created_by):
+    pass
+
+
+@abx.hookspec
+def exec_extractor(url, extractor, credentials, config):
+    pass
+
--- a/archivebox/abx/archivebox/events.py
+++ b/archivebox/abx/archivebox/events.py
@ -0,0 +1,45 @@
+"""
+Hookspec for ArchiveBox system events that plugins can hook into.
+
+Loosely modeled after Django's signals architecture.
+https://docs.djangoproject.com/en/5.1/ref/signals/
+"""
+
+__package__ = 'abx.archivebox'
+
+import abx
+
+
+
+@abx.hookspec
+def on_crawl_schedule_tick(crawl_schedule):
+    pass
+
+
+
+
+@abx.hookspec
+def on_seed_post_save(seed, created=False):
+    ...
+
+@abx.hookspec
+def on_crawl_post_save(crawl, created=False):
+    ...
+
+
+@abx.hookspec
+def on_snapshot_post_save(snapshot, created=False):
+    ...
+    
+# @abx.hookspec
+# def on_snapshot_post_delete(snapshot):
+#     ...
+
+
+@abx.hookspec
+def on_archiveresult_post_save(archiveresult, created=False):
+    ...
+
+# @abx.hookspec
+# def on_archiveresult_post_delete(archiveresult):
+#     ...
--- a/archivebox/abx/archivebox/hookspec.py
+++ b/archivebox/abx/archivebox/hookspec.py
@ -4,32 +4,49 @@ from typing import Dict, Any

 from .. import hookspec

-
-@hookspec
-def get_CONFIGS():
-    return {}
-
-@hookspec
-def get_EXTRACTORS():
-    return {}
-
-@hookspec
-def get_REPLAYERS():
-    return {}
-
-@hookspec
-def get_ADMINDATAVIEWS():
-    return {}
-
-@hookspec
-def get_QUEUES():
-    return {}
-
-@hookspec
-def get_SEARCHBACKENDS():
-    return {}
+from .base_binary import BaseBinary, BaseBinProvider
+from .base_configset import BaseConfigSet
+from .base_extractor import BaseExtractor
+from .base_searchbackend import BaseSearchBackend


@hookspec
-def extract(snapshot_id) -> Dict[str, Any]:
+def get_PLUGIN() -> Dict[str, Dict[str, Any]]:
    return {}
+
+@hookspec
+def get_CONFIG() -> Dict[str, BaseConfigSet]:
+    return {}
+
+
+
+@hookspec
+def get_EXTRACTORS() -> Dict[str, BaseExtractor]:
+    return {}
+
+@hookspec
+def get_SEARCHBACKENDS() -> Dict[str, BaseSearchBackend]:
+    return {}
+
+# @hookspec
+# def get_REPLAYERS() -> Dict[str, BaseReplayer]:
+#     return {}
+
+# @hookspec
+# def get_ADMINDATAVIEWS():
+#     return {}
+
+# @hookspec
+# def get_QUEUES():
+#     return {}
+
+
+##############################################################
+# provided by abx.pydantic_pkgr.hookspec:
+# @hookspec
+# def get_BINARIES() -> Dict[str, BaseBinary]:
+#     return {}
+
+# @hookspec
+# def get_BINPROVIDERS() -> Dict[str, BaseBinProvider]:
+#     return {}
--- a/archivebox/abx/archivebox/reads.py
+++ b/archivebox/abx/archivebox/reads.py
@ -0,0 +1,160 @@
+__package__ = 'abx.archivebox'
+
+import importlib
+from typing import Dict, Set, Any, TYPE_CHECKING
+
+from benedict import benedict
+
+import abx
+from .. import pm
+
+if TYPE_CHECKING:
+    from .base_configset import BaseConfigSet
+    from .base_binary import BaseBinary, BaseBinProvider
+    from .base_extractor import BaseExtractor
+    from .base_searchbackend import BaseSearchBackend
+    # from .base_replayer import BaseReplayer
+    # from .base_queue import BaseQueue
+    # from .base_admindataview import BaseAdminDataView
+
+# API exposed to ArchiveBox code
+
+def get_PLUGINS() -> Dict[str, Dict[str, Any]]:
+    return benedict({
+        plugin_id: plugin
+        for plugin_dict in pm.hook.get_PLUGIN()
+            for plugin_id, plugin in plugin_dict.items()
+    })
+
+def get_PLUGIN(plugin_id: str) -> Dict[str, Any]:
+    plugin_info = get_PLUGINS().get(plugin_id, {})
+    package = plugin_info.get('package', plugin_info.get('PACKAGE', None))
+    if not package:
+        return {'id': plugin_id, 'hooks': {}}
+    module = importlib.import_module(package)
+    hooks = abx.get_plugin_hooks(module.__package__)
+    assert plugin_info and (plugin_info.get('id') or plugin_info.get('ID') or hooks)
+    
+    return benedict({
+        'id': plugin_id,
+        'label': getattr(module, '__label__', plugin_id),
+        'module': module,
+        'package': module.__package__,
+        'hooks': hooks,
+        'version': getattr(module, '__version__', '999.999.999'),
+        'author': getattr(module, '__author__', 'Unknown'),
+        'homepage': getattr(module, '__homepage__', 'https://github.com/ArchiveBox/ArchiveBox'),
+        'dependencies': getattr(module, '__dependencies__', []),
+        'source_code': module.__file__,
+        **plugin_info,
+    })
+    
+
+def get_HOOKS() -> Set[str]:
+    return {
+        hook_name
+        for plugin_id in get_PLUGINS().keys()
+            for hook_name in get_PLUGIN(plugin_id).hooks
+    }
+
+def get_CONFIGS() -> Dict[str, 'BaseConfigSet']:
+    return benedict({
+        config_id: configset
+        for plugin_configs in pm.hook.get_CONFIG()
+            for config_id, configset in plugin_configs.items()
+    })
+
+
+def get_FLAT_CONFIG() -> Dict[str, Any]:
+    return benedict({
+        key: value
+        for configset in get_CONFIGS().values()
+            for key, value in configset.model_dump().items()
+    })
+
+def get_BINPROVIDERS() -> Dict[str, 'BaseBinProvider']:
+    # TODO: move these to plugins
+    from abx.archivebox.base_binary import apt, brew, env
+    builtin_binproviders = {
+        'apt': apt,
+        'brew': brew,
+        'env': env,
+    }
+    
+    return benedict({
+        binprovider_id: binprovider
+        for plugin_binproviders in [builtin_binproviders, *pm.hook.get_BINPROVIDERS()]
+            for binprovider_id, binprovider in plugin_binproviders.items()
+    })
+
+def get_BINARIES() -> Dict[str, 'BaseBinary']:
+    return benedict({
+        binary_id: binary
+        for plugin_binaries in pm.hook.get_BINARIES()
+            for binary_id, binary in plugin_binaries.items()
+    })
+
+def get_EXTRACTORS() -> Dict[str, 'BaseExtractor']:
+    return benedict({
+        extractor_id: extractor
+        for plugin_extractors in pm.hook.get_EXTRACTORS()
+            for extractor_id, extractor in plugin_extractors.items()
+    })
+
+# def get_REPLAYERS() -> Dict[str, 'BaseReplayer']:
+#     return benedict({
+#         replayer.id: replayer
+#         for plugin_replayers in pm.hook.get_REPLAYERS()
+#             for replayer in plugin_replayers
+#     })
+
+# def get_ADMINDATAVIEWS() -> Dict[str, 'BaseAdminDataView']:
+#     return benedict({
+#         admin_dataview.id: admin_dataview
+#         for plugin_admin_dataviews in pm.hook.get_ADMINDATAVIEWS()
+#             for admin_dataview in plugin_admin_dataviews
+#     })
+
+# def get_QUEUES() -> Dict[str, 'BaseQueue']:
+#     return benedict({
+#         queue.id: queue
+#         for plugin_queues in pm.hook.get_QUEUES()
+#             for queue in plugin_queues
+#     })
+
+def get_SEARCHBACKENDS() -> Dict[str, 'BaseSearchBackend']:
+    return benedict({
+        searchbackend_id: searchbackend
+        for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS()
+            for searchbackend_id,searchbackend in plugin_searchbackends.items()
+    })
+
+
+
+def get_scope_config(defaults: benedict | None = None, persona=None, seed=None, crawl=None, snapshot=None, archiveresult=None, extra_config=None):
+    """Get all the relevant config for the given scope, in correct precedence order"""
+    
+    from django.conf import settings
+    default_config: benedict = defaults or settings.CONFIG
+    
+    snapshot = snapshot or (archiveresult and archiveresult.snapshot)
+    crawl = crawl or (snapshot and snapshot.crawl)
+    seed = seed or (crawl and crawl.seed)
+    persona = persona or (crawl and crawl.persona)
+    
+    persona_config = persona.config if persona else {}
+    seed_config = seed.config if seed else {}
+    crawl_config = crawl.config if crawl else {}
+    snapshot_config = snapshot.config if snapshot else {}
+    archiveresult_config = archiveresult.config if archiveresult else {}
+    extra_config = extra_config or {}
+    
+    return {
+        **default_config,               # defaults / config file / environment variables
+        **persona_config,               # lowest precedence
+        **seed_config,
+        **crawl_config,
+        **snapshot_config,
+        **archiveresult_config,
+        **extra_config,                 # highest precedence
+    }
--- a/archivebox/abx/archivebox/use.py
+++ b/archivebox/abx/archivebox/use.py
@ -1,130 +0,0 @@
-__package__ = 'abx.archivebox'
-
-from typing import Dict, Any, TYPE_CHECKING
-
-from django.utils import timezone
-from benedict import benedict
-
-from .. import pm
-
-if TYPE_CHECKING:
-    from .base_hook import BaseHook
-    from .base_configset import BaseConfigSet
-    from .base_binary import BaseBinary, BaseBinProvider
-    from .base_extractor import BaseExtractor
-    from .base_replayer import BaseReplayer
-    from .base_queue import BaseQueue
-    from .base_admindataview import BaseAdminDataView
-    from .base_searchbackend import BaseSearchBackend
-
-# API exposed to ArchiveBox code
-
-def get_PLUGINS():
-    return benedict({
-        plugin.PLUGIN.id: plugin.PLUGIN
-        for plugin in pm.get_plugins()
-    })
-
-def get_HOOKS(PLUGINS) -> Dict[str, 'BaseHook']:
-    return benedict({
-        hook.id: hook
-        for plugin in PLUGINS.values()
-            for hook in plugin.hooks
-    })
-
-def get_CONFIGS() -> Dict[str, 'BaseConfigSet']:
-    return benedict({
-        config_id: config
-        for plugin_configs in pm.hook.get_CONFIGS()
-            for config_id, config in plugin_configs.items()
-    })
-    
-def get_FLAT_CONFIG() -> Dict[str, Any]:
-    return benedict({
-        key: value
-        for plugin_config_dict in pm.hook.get_FLAT_CONFIG()
-            for key, value in plugin_config_dict.items()
-    })
-
-def get_BINPROVIDERS() -> Dict[str, 'BaseBinProvider']:
-    # TODO: move these to plugins
-    from abx.archivebox.base_binary import apt, brew, env
-    builtin_binproviders = [apt, brew, env]
-    
-    return benedict({
-        binprovider.id: binprovider
-        for plugin_binproviders in [builtin_binproviders, *pm.hook.get_BINPROVIDERS()]
-            for binprovider in plugin_binproviders
-    })
-
-def get_BINARIES() -> Dict[str, 'BaseBinary']:
-    return benedict({
-        binary.id: binary
-        for plugin_binaries in pm.hook.get_BINARIES()
-            for binary in plugin_binaries
-    })
-
-def get_EXTRACTORS() -> Dict[str, 'BaseExtractor']:
-    return benedict({
-        extractor.id: extractor
-        for plugin_extractors in pm.hook.get_EXTRACTORS()
-            for extractor in plugin_extractors
-    })
-
-def get_REPLAYERS() -> Dict[str, 'BaseReplayer']:
-    return benedict({
-        replayer.id: replayer
-        for plugin_replayers in pm.hook.get_REPLAYERS()
-            for replayer in plugin_replayers
-    })
-
-def get_ADMINDATAVIEWS() -> Dict[str, 'BaseAdminDataView']:
-    return benedict({
-        admin_dataview.id: admin_dataview
-        for plugin_admin_dataviews in pm.hook.get_ADMINDATAVIEWS()
-            for admin_dataview in plugin_admin_dataviews
-    })
-
-def get_QUEUES() -> Dict[str, 'BaseQueue']:
-    return benedict({
-        queue.id: queue
-        for plugin_queues in pm.hook.get_QUEUES()
-            for queue in plugin_queues
-    })
-
-def get_SEARCHBACKENDS() -> Dict[str, 'BaseSearchBackend']:
-    return benedict({
-        searchbackend.id: searchbackend
-        for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS()
-            for searchbackend in plugin_searchbackends
-    })
-
-
-###########################
-
-
-def register_all_hooks(settings):
-    pm.hook.register(settings=settings)
-
-
-
-def extract(url_or_snapshot_id):
-    from core.models import Snapshot
-    
-    url, snapshot_abid, snapshot_id = None, None, None
-    snapshot = None
-    if '://' in url_or_snapshot_id:
-        url = url_or_snapshot_id
-        try:
-            snapshot = Snapshot.objects.get(url=url)
-        except Snapshot.DoesNotExist:
-            snapshot = Snapshot(url=url_or_snapshot_id, timestamp=str(timezone.now().timestamp()), bookmarked_at=timezone.now())
-            snapshot.save()
-    elif '-' in url_or_snapshot_id:
-        snapshot_id = url_or_snapshot_id
-        snapshot = Snapshot.objects.get(id=snapshot_id)
-    else:
-        snapshot_abid = url_or_snapshot_id
-        snapshot = Snapshot.objects.get(abid=snapshot_abid)
-
-    return pm.hook.extract(snapshot_id=snapshot.id)
--- a/archivebox/abx/archivebox/writes.py
+++ b/archivebox/abx/archivebox/writes.py
@ -0,0 +1,133 @@
+__package__ = 'abx.archivebox'
+
+import importlib
+from typing import Dict, Set, Any, TYPE_CHECKING
+
+from benedict import benedict
+
+from django.conf import settings
+
+import abx
+from .. import pm
+
+
+@abx.hookimpl
+def get_or_create_snapshot(crawl, url, config):
+    pass
+
+@abx.hookimpl
+def update_crawl_schedule_next_run_at(crawl_schedule, next_run_at):
+    pass
+
+@abx.hookimpl
+def create_crawl_copy(crawl_to_copy, schedule):
+    pass
+
+@abx.hookimpl
+def create_crawl(seed, depth, tags_str, persona, created_by, config, schedule):
+    pass
+
+
+
+
+def create_crawl_from_ui_action(urls, extractor, credentials, depth, tags_str, persona, created_by, crawl_config):
+    if seed_is_remote(urls, extractor, credentials):
+        # user's seed is a remote source that will provide the urls (e.g. RSS feed URL, Pocket API, etc.)
+        uri, extractor, credentials = abx.archivebox.effects.check_remote_seed_connection(urls, extractor, credentials, created_by)
+    else:
+        # user's seed is some raw text they provided to parse for urls, save it to a file then load the file as a Seed
+        uri = abx.archivebox.writes.write_raw_urls_to_local_file(urls, extractor, tags_str, created_by)  # file:///data/sources/some_import.txt
+    
+    seed = abx.archivebox.writes.get_or_create_seed(uri=remote_uri, extractor, credentials, created_by)
+    # abx.archivebox.events.on_seed_created(seed)
+        
+    crawl = abx.archivebox.writes.create_crawl(seed=seed, depth=depth, tags_str=tags_str, persona=persona, created_by=created_by, config=crawl_config, schedule=None)
+    abx.archivebox.events.on_crawl_created(crawl)
+
+
+@abx.hookimpl(specname='on_crawl_schedule_tick')
+def create_crawl_from_crawlschedule_if_due(crawl_schedule):
+    # make sure it's not too early to run this scheduled import (makes this function indepmpotent / safe to call multiple times / every second)
+    if timezone.now() < crawl_schedule.next_run_at:
+        # it's not time to run it yet, wait for the next tick
+        return
+    else:
+        # we're going to run it now, bump the next run time so that no one else runs it at the same time as us
+        abx.archivebox.writes.update_crawl_schedule_next_run_at(crawl_schedule, next_run_at=crawl_schedule.next_run_at + crawl_schedule.interval)
+    
+    crawl_to_copy = None
+    try:
+        crawl_to_copy = crawl_schedule.crawl_set.first()  # alternatively use .last() to copy most recent crawl instead of very first crawl
+    except Crawl.DoesNotExist:
+        # there is no template crawl to base the next one off of
+        # user must add at least one crawl to a schedule that serves as the template for all future repeated crawls
+        return
+    
+    new_crawl = abx.archivebox.writes.create_crawl_copy(crawl_to_copy=crawl_to_copy, schedule=crawl_schedule)
+    abx.archivebox.events.on_crawl_created(new_crawl)
+
+
+@abx.hookimpl(specname='on_crawl_post_save')
+def create_root_snapshot_from_seed(crawl):
+    # create a snapshot for the seed URI which kicks off the crawl
+    # only a single extractor will run on it, which will produce outlinks which get added back to the crawl
+    root_snapshot, created = abx.archivebox.writes.get_or_create_snapshot(crawl=crawl, url=crawl.seed.uri, config={
+        'extractors': (
+            abx.archivebox.reads.get_extractors_that_produce_outlinks()
+            if crawl.seed.extractor == 'auto' else
+            [crawl.seed.extractor]
+        ),
+        **crawl.seed.config,
+    })
+    if created:
+        abx.archivebox.events.on_snapshot_created(root_snapshot)
+        abx.archivebox.writes.update_crawl_stats(started_at=timezone.now())
+
+
+@abx.hookimpl(specname='on_snapshot_created')
+def create_archiveresults_pending_from_snapshot(snapshot, config):
+    config = get_scope_config(
+        # defaults=settings.CONFIG_FROM_DEFAULTS,
+        # configfile=settings.CONFIG_FROM_FILE,
+        # environment=settings.CONFIG_FROM_ENVIRONMENT,
+        persona=archiveresult.snapshot.crawl.persona,
+        seed=archiveresult.snapshot.crawl.seed,
+        crawl=archiveresult.snapshot.crawl,
+        snapshot=archiveresult.snapshot,
+        archiveresult=archiveresult,
+        # extra_config=extra_config,
+    )
+    
+    extractors = abx.archivebox.reads.get_extractors_for_snapshot(snapshot, config)
+    for extractor in extractors:
+        archiveresult, created = abx.archivebox.writes.get_or_create_archiveresult_pending(
+            snapshot=snapshot,
+            extractor=extractor,
+            status='pending'
+        )
+        if created:
+            abx.archivebox.events.on_archiveresult_created(archiveresult)
+
+
+
+@abx.hookimpl(specname='on_archiveresult_updated')
+def create_snapshots_pending_from_archiveresult_outlinks(archiveresult):
+    config = get_scope_config(...)
+    
+    # check if extractor has finished succesfully, if not, dont bother checking for outlinks
+    if not archiveresult.status == 'succeeded':
+        return
+    
+    # check if we have already reached the maximum recursion depth
+    hops_to_here = abx.archivebox.reads.get_outlink_parents(crawl_pk=archiveresult.snapshot.crawl_id, url=archiveresult.url, config=config)
+    if len(hops_to_here) >= archiveresult.crawl.max_depth +1:
+        return
+    
+    # parse the output to get outlink url_entries
+    discovered_urls = abx.archivebox.reads.get_archiveresult_discovered_url_entries(archiveresult, config=config)
+    
+    for url_entry in discovered_urls:
+        abx.archivebox.writes.create_outlink_record(src=archiveresult.snapshot.url, dst=url_entry.url, via=archiveresult)
+        abx.archivebox.writes.create_snapshot(crawl=archiveresult.snapshot.crawl, url_entry=url_entry)
+        
+    # abx.archivebox.events.on_crawl_updated(archiveresult.snapshot.crawl)
--- a/archivebox/abx/django/hookspec.py
+++ b/archivebox/abx/django/hookspec.py
@ -110,6 +110,11 @@ def register_checks():
    """Register django checks with django system checks system"""
    pass

+@hookspec
+def register_admin(admin_site):
+    """Register django admin views/models with the main django admin site instance"""
+    pass
+

 ###########################################################################################

--- a/archivebox/abx/django/use.py
+++ b/archivebox/abx/django/use.py
@ -96,3 +96,6 @@ def register_checks():
    """register any django system checks"""
    pm.hook.register_checks()

+def register_admin(admin_site):
+    """register any django admin models/views with the main django admin site instance"""
+    pm.hook.register_admin(admin_site=admin_site)
--- a/archivebox/api/admin.py
+++ b/archivebox/api/admin.py
@ -0,0 +1,31 @@
+__package__ = 'archivebox.api'
+
+from signal_webhooks.admin import WebhookAdmin
+from signal_webhooks.utils import get_webhook_model
+
+from abid_utils.admin import ABIDModelAdmin
+
+from api.models import APIToken
+
+
+class APITokenAdmin(ABIDModelAdmin):
+    list_display = ('created_at', 'abid', 'created_by', 'token_redacted', 'expires')
+    sort_fields = ('abid', 'created_at', 'created_by', 'expires')
+    readonly_fields = ('created_at', 'modified_at', 'abid_info')
+    search_fields = ('id', 'abid', 'created_by__username', 'token')
+    fields = ('created_by', 'token', 'expires', *readonly_fields)
+
+    list_filter = ('created_by',)
+    ordering = ['-created_at']
+    list_per_page = 100
+
+
+class CustomWebhookAdmin(WebhookAdmin, ABIDModelAdmin):
+    list_display = ('created_at', 'created_by', 'abid', *WebhookAdmin.list_display)
+    sort_fields = ('created_at', 'created_by', 'abid', 'referenced_model', 'endpoint', 'last_success', 'last_error')
+    readonly_fields = ('created_at', 'modified_at', 'abid_info', *WebhookAdmin.readonly_fields)
+
+
+def register_admin(admin_site):
+    admin_site.register(APIToken, APITokenAdmin)
+    admin_site.register(get_webhook_model(), CustomWebhookAdmin)
--- a/archivebox/api/apps.py
+++ b/archivebox/api/apps.py
@ -2,10 +2,14 @@ __package__ = 'archivebox.api'

 from django.apps import AppConfig

+import abx


 class APIConfig(AppConfig):
    name = 'api'

-    def ready(self):
-        pass
+
+@abx.hookimpl
+def register_admin(admin_site):
+    from api.admin import register_admin
+    register_admin(admin_site)
--- a/archivebox/api/v1_core.py
+++ b/archivebox/api/v1_core.py
@ -6,7 +6,6 @@ from typing import List, Optional, Union, Any
 from datetime import datetime

 from django.db.models import Q
-from django.shortcuts import get_object_or_404
 from django.core.exceptions import ValidationError
 from django.contrib.auth import get_user_model

@ -16,7 +15,6 @@ from ninja.errors import HttpError

 from core.models import Snapshot, ArchiveResult, Tag
 from api.models import APIToken, OutboundWebhook
-from abid_utils.abid import ABID

 from .auth import API_AUTH_METHODS

@ -397,11 +395,70 @@ def get_tag(request, tag_id: str, with_snapshots: bool=True):



+# class CrawlSchema(Schema):
+#     TYPE: str = 'core.models.Crawl'
+
+#     id: UUID
+#     abid: str
+
+#     modified_at: datetime
+#     created_at: datetime
+#     created_by_id: str
+#     created_by_username: str
+
+#     urls: str
+#     depth: int
+#     parser: str
+    
+#     # snapshots: List[SnapshotSchema]
+
+#     @staticmethod
+#     def resolve_created_by_id(obj):
+#         return str(obj.created_by_id)
+    
+#     @staticmethod
+#     def resolve_created_by_username(obj):
+#         User = get_user_model()
+#         return User.objects.get(id=obj.created_by_id).username
+    
+#     @staticmethod
+#     def resolve_snapshots(obj, context):
+#         if context['request'].with_snapshots:
+#             return obj.snapshot_set.all().distinct()
+#         return Snapshot.objects.none()
+
+
+# @router.get("/crawl/{crawl_id}", response=CrawlSchema, url_name="get_crawl")
+# def get_crawl(request, crawl_id: str, with_snapshots: bool=False, with_archiveresults: bool=False):
+#     """Get a specific Crawl by id or abid."""
+#     crawl = None
+#     request.with_snapshots = with_snapshots
+#     request.with_archiveresults = with_archiveresults
+    
+#     try:
+#         crawl = Crawl.objects.get(abid__icontains=crawl_id)
+#     except Exception:
+#         pass
+
+#     try:
+#         crawl = crawl or Crawl.objects.get(id__icontains=crawl_id)
+#     except Exception:
+#         pass
+#     return crawl
+
+
+# [..., CrawlSchema]
@router.get("/any/{abid}", response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema], url_name="get_any")
 def get_any(request, abid: str):
    request.with_snapshots = False
    request.with_archiveresults = False

+    if abid.startswith(APIToken.abid_prefix):
+        raise HttpError(403, 'APIToken objects are not accessible via REST API')
+    
+    if abid.startswith(OutboundWebhook.abid_prefix):
+        raise HttpError(403, 'OutboundWebhook objects are not accessible via REST API')
+    
    response = None
    try:
        response = response or get_snapshot(request, abid)
@ -418,10 +475,12 @@ def get_any(request, abid: str):
    except Exception:
        pass
    
-    if abid.startswith(APIToken.abid_prefix):
-        raise HttpError(403, 'APIToken objects are not accessible via REST API')
-    
-    if abid.startswith(OutboundWebhook.abid_prefix):
-        raise HttpError(403, 'OutboundWebhook objects are not accessible via REST API')
+    # try:
+    #     response = response or get_crawl(request, abid)
+    # except Exception:
+    #     pass

+    if not response:
        raise HttpError(404, 'Object with given ABID not found')
+
+    return response
--- a/archivebox/cli/init.py
+++ b/archivebox/cli/init.py
@ -164,13 +164,18 @@ def run_subcommand(subcommand: str,
    # print('DATA_DIR is', DATA_DIR)
    # print('pwd is', os.getcwd())    

-    cmd_requires_db = subcommand in archive_cmds
+    cmd_requires_db = (subcommand in archive_cmds)
    init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args

    check_db = cmd_requires_db and not init_pending

    setup_django(in_memory_db=subcommand in fake_db, check_db=check_db)

+    for ignore_pattern in ('help', '-h', '--help', 'version', '--version'):
+        if ignore_pattern in sys.argv[:4]:
+            cmd_requires_db = False
+            break
+    
    if subcommand in archive_cmds:
        if cmd_requires_db:
            check_migrations()
--- a/archivebox/config/init.py
+++ b/archivebox/config/init.py
@ -5,5 +5,34 @@ from .paths import (
    DATA_DIR,                                       # noqa
    ARCHIVE_DIR,                                    # noqa
 )
-from .constants import CONSTANTS, CONSTANTS_CONFIG  # noqa
+from .constants import CONSTANTS, CONSTANTS_CONFIG, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR      # noqa
 from .version import VERSION                        # noqa
+
+
+import abx
+
+
+# @abx.hookimpl
+# def get_INSTALLED_APPS():
+#     return ['config']
+
+
+@abx.hookimpl
+def get_CONFIG():
+    from .common import (
+        SHELL_CONFIG,
+        STORAGE_CONFIG,
+        GENERAL_CONFIG,
+        SERVER_CONFIG,
+        ARCHIVING_CONFIG,
+        SEARCH_BACKEND_CONFIG,
+    )
+    return {
+        'SHELL_CONFIG': SHELL_CONFIG,
+        'STORAGE_CONFIG': STORAGE_CONFIG,
+        'GENERAL_CONFIG': GENERAL_CONFIG,
+        'SERVER_CONFIG': SERVER_CONFIG,
+        'ARCHIVING_CONFIG': ARCHIVING_CONFIG,
+        'SEARCHBACKEND_CONFIG': SEARCH_BACKEND_CONFIG,
+    }
+
--- a/archivebox/config/apps.py
+++ b/archivebox/config/apps.py
@ -1,57 +0,0 @@
-__package__ = 'archivebox.config'
-
-from typing import List
-from pydantic import InstanceOf
-
-from abx.archivebox.base_plugin import BasePlugin
-from abx.archivebox.base_hook import BaseHook
-
-
-from .constants import CONSTANTS, CONSTANTS_CONFIG, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR      # noqa
-from .common import (
-    ShellConfig,                    # noqa: F401
-    StorageConfig,                  # noqa: F401
-    GeneralConfig,                  # noqa: F401
-    ServerConfig,                   # noqa: F401
-    ArchivingConfig,                # noqa: F401
-    SearchBackendConfig,            # noqa: F401
-    SHELL_CONFIG,
-    STORAGE_CONFIG,
-    GENERAL_CONFIG,
-    SERVER_CONFIG,
-    ARCHIVING_CONFIG,
-    SEARCH_BACKEND_CONFIG,
-)
-
-###################### Config ##########################
-
-
-class ConfigPlugin(BasePlugin):
-    app_label: str = 'CONFIG'
-    verbose_name: str = 'Configuration'
-
-    hooks: List[InstanceOf[BaseHook]] = [
-        SHELL_CONFIG,
-        GENERAL_CONFIG,
-        STORAGE_CONFIG,
-        SERVER_CONFIG,
-        ARCHIVING_CONFIG,
-        SEARCH_BACKEND_CONFIG,
-    ]
-
-
-PLUGIN = ConfigPlugin()
-DJANGO_APP = PLUGIN.AppConfig
-
-
-
-# # register django apps
-# @abx.hookimpl
-# def get_INSTALLED_APPS():
-#     return [DJANGO_APP.name]
-
-# # register configs
-# @abx.hookimpl
-# def register_CONFIG():
-#     return PLUGIN.HOOKS_BY_TYPE['CONFIG'].values()
-
--- a/archivebox/config/common.py
+++ b/archivebox/config/common.py
@ -1,18 +1,18 @@
 __package__ = 'archivebox.config'

+import os
 import sys
 import shutil
-
+import tempfile
 from typing import Dict, Optional
 from pathlib import Path

 from rich import print
-from pydantic import Field, field_validator, computed_field
+from pydantic import Field, field_validator, computed_field, model_validator
 from django.utils.crypto import get_random_string

 from abx.archivebox.base_configset import BaseConfigSet

-
 from .constants import CONSTANTS
 from .version import get_COMMIT_HASH, get_BUILD_TIME
 from .permissions import IN_DOCKER
@ -35,7 +35,6 @@ class ShellConfig(BaseConfigSet):
    VERSIONS_AVAILABLE: bool = False             # .check_for_update.get_versions_available_on_github(c)},
    CAN_UPGRADE: bool = False                    # .check_for_update.can_upgrade(c)},

-    
    @computed_field
    @property
    def TERM_WIDTH(self) -> int:
@ -57,6 +56,16 @@ SHELL_CONFIG = ShellConfig()


 class StorageConfig(BaseConfigSet):
+    # TMP_DIR must be a local, fast, readable/writable dir by archivebox user,
+    # must be a short path due to unix path length restrictions for socket files (<100 chars)
+    # must be a local SSD/tmpfs for speed and because bind mounts/network mounts/FUSE dont support unix sockets
+    TMP_DIR: Path                       = Field(default=CONSTANTS.DEFAULT_TMP_DIR)
+    
+    # LIB_DIR must be a local, fast, readable/writable dir by archivebox user,
+    # must be able to contain executable binaries (up to 5GB size)
+    # should not be a remote/network/FUSE mount for speed reasons, otherwise extractors will be slow
+    LIB_DIR: Path                       = Field(default=CONSTANTS.DEFAULT_LIB_DIR)
+    
    OUTPUT_PERMISSIONS: str             = Field(default='644')
    RESTRICT_FILE_NAMES: str            = Field(default='windows')
    ENFORCE_ATOMIC_WRITES: bool         = Field(default=True)
--- a/archivebox/config/constants.py
+++ b/archivebox/config/constants.py
@ -1,6 +1,5 @@
 __package__ = 'archivebox.config'

-import os
 import re
 import sys

@ -97,14 +96,10 @@ class ConstantsDict(Mapping):
    
    # Runtime dirs
    TMP_DIR_NAME: str                   = 'tmp'
-    TMP_DIR: Path                       = DATA_DIR / TMP_DIR_NAME / MACHINE_ID
+    DEFAULT_TMP_DIR: Path               = DATA_DIR / TMP_DIR_NAME / MACHINE_ID    # ./data/tmp/abc3244323
+    
    LIB_DIR_NAME: str                   = 'lib'
-    LIB_DIR: Path                       = DATA_DIR / LIB_DIR_NAME / MACHINE_TYPE
-    LIB_PIP_DIR: Path                   = LIB_DIR / 'pip'
-    LIB_NPM_DIR: Path                   = LIB_DIR / 'npm'
-    LIB_BROWSERS_DIR: Path              = LIB_DIR / 'browsers'
-    LIB_BIN_DIR: Path                   = LIB_DIR / 'bin'
-    BIN_DIR: Path                       = LIB_BIN_DIR
+    DEFAULT_LIB_DIR: Path               = DATA_DIR / LIB_DIR_NAME / MACHINE_TYPE  # ./data/lib/arm64-linux-docker

    # Config constants
    TIMEZONE: str                       = 'UTC'
@ -199,90 +194,6 @@ class ConstantsDict(Mapping):
        "Dockerfile",
    ))
        
-    CODE_LOCATIONS = benedict({
-        'PACKAGE_DIR': {
-            'path': (PACKAGE_DIR).resolve(),
-            'enabled': True,
-            'is_valid': os.access(PACKAGE_DIR / '__main__.py', os.X_OK),                                                                  # executable
-        },
-        'TEMPLATES_DIR': {
-            'path': TEMPLATES_DIR.resolve(),
-            'enabled': True,
-            'is_valid': os.access(STATIC_DIR, os.R_OK) and os.access(STATIC_DIR, os.X_OK),                                                # read + list
-        },
-        'CUSTOM_TEMPLATES_DIR': {
-            'path': CUSTOM_TEMPLATES_DIR.resolve(),
-            'enabled': os.path.isdir(CUSTOM_TEMPLATES_DIR),
-            'is_valid': os.path.isdir(CUSTOM_TEMPLATES_DIR) and os.access(CUSTOM_TEMPLATES_DIR, os.R_OK),                                      # read
-        },
-        'USER_PLUGINS_DIR': {
-            'path': USER_PLUGINS_DIR.resolve(),
-            'enabled': os.path.isdir(USER_PLUGINS_DIR),
-            'is_valid': os.path.isdir(USER_PLUGINS_DIR) and os.access(USER_PLUGINS_DIR, os.R_OK),                                              # read
-        },
-        'LIB_DIR': {
-            'path': LIB_DIR.resolve(),
-            'enabled': True,
-            'is_valid': os.path.isdir(LIB_DIR) and os.access(LIB_DIR, os.R_OK) and os.access(LIB_DIR, os.W_OK),                      # read + write
-        },
-    })
-        
-    DATA_LOCATIONS = benedict({
-        "DATA_DIR": {
-            "path": DATA_DIR.resolve(),
-            "enabled": True,
-            "is_valid": os.path.isdir(DATA_DIR) and os.access(DATA_DIR, os.R_OK) and os.access(DATA_DIR, os.W_OK),
-            "is_mount": os.path.ismount(DATA_DIR.resolve()),
-        },
-        "CONFIG_FILE": {
-            "path": CONFIG_FILE.resolve(),
-            "enabled": True,
-            "is_valid": os.path.isfile(CONFIG_FILE) and os.access(CONFIG_FILE, os.R_OK) and os.access(CONFIG_FILE, os.W_OK),
-        },
-        "SQL_INDEX": {
-            "path": DATABASE_FILE.resolve(),
-            "enabled": True,
-            "is_valid": os.path.isfile(DATABASE_FILE) and os.access(DATABASE_FILE, os.R_OK) and os.access(DATABASE_FILE, os.W_OK),
-            "is_mount": os.path.ismount(DATABASE_FILE.resolve()),
-        },
-        "QUEUE_DATABASE": {
-            "path": QUEUE_DATABASE_FILE.resolve(),
-            "enabled": True,
-            "is_valid": os.path.isfile(QUEUE_DATABASE_FILE) and os.access(QUEUE_DATABASE_FILE, os.R_OK) and os.access(QUEUE_DATABASE_FILE, os.W_OK),
-            "is_mount": os.path.ismount(QUEUE_DATABASE_FILE.resolve()),
-        },
-        "ARCHIVE_DIR": {
-            "path": ARCHIVE_DIR.resolve(),
-            "enabled": True,
-            "is_valid": os.path.isdir(ARCHIVE_DIR) and os.access(ARCHIVE_DIR, os.R_OK) and os.access(ARCHIVE_DIR, os.W_OK),
-            "is_mount": os.path.ismount(ARCHIVE_DIR.resolve()),
-        },
-        "SOURCES_DIR": {
-            "path": SOURCES_DIR.resolve(),
-            "enabled": True,
-            "is_valid": os.path.isdir(SOURCES_DIR) and os.access(SOURCES_DIR, os.R_OK) and os.access(SOURCES_DIR, os.W_OK),
-        },
-        "PERSONAS_DIR": {
-            "path": PERSONAS_DIR.resolve(),
-            "enabled": os.path.isdir(PERSONAS_DIR),
-            "is_valid": os.path.isdir(PERSONAS_DIR) and os.access(PERSONAS_DIR, os.R_OK) and os.access(PERSONAS_DIR, os.W_OK),                 # read + write
-        },
-        "LOGS_DIR": {
-            "path": LOGS_DIR.resolve(),
-            "enabled": True,
-            "is_valid": os.path.isdir(LOGS_DIR) and os.access(LOGS_DIR, os.R_OK) and os.access(LOGS_DIR, os.W_OK),                              # read + write
-        },
-        'TMP_DIR': {
-            'path': TMP_DIR.resolve(),
-            'enabled': True,
-            'is_valid': os.path.isdir(TMP_DIR) and os.access(TMP_DIR, os.R_OK) and os.access(TMP_DIR, os.W_OK),                      # read + write
-        },
-        # "CACHE_DIR": {
-        #     "path": CACHE_DIR.resolve(),
-        #     "enabled": True,
-        #     "is_valid": os.access(CACHE_DIR, os.R_OK) and os.access(CACHE_DIR, os.W_OK),                        # read + write
-        # },
-    })

    @classmethod
    def __getitem__(cls, key: str):
--- a/archivebox/config/legacy.py
+++ b/archivebox/config/legacy.py
@ -50,13 +50,11 @@ from ..misc.logging import (
 )

 from .common import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG
-from archivebox.plugins_auth.ldap.apps import LDAP_CONFIG
-from archivebox.plugins_extractor.favicon.apps import FAVICON_CONFIG
-from archivebox.plugins_extractor.wget.apps import WGET_CONFIG
-from archivebox.plugins_extractor.curl.apps import CURL_CONFIG
+from archivebox.plugins_extractor.favicon.config import FAVICON_CONFIG
+from archivebox.plugins_extractor.wget.config import WGET_CONFIG
+from archivebox.plugins_extractor.curl.config import CURL_CONFIG

 ANSI = SHELL_CONFIG.ANSI
-LDAP = LDAP_CONFIG.LDAP_ENABLED

 ############################### Config Schema ##################################

@ -73,8 +71,6 @@ CONFIG_SCHEMA: Dict[str, Dict[str, Any]] = {

    'STORAGE_CONFIG': STORAGE_CONFIG.as_legacy_config_schema(),
    
-    'LDAP_CONFIG': LDAP_CONFIG.as_legacy_config_schema(),
-    
    # 'FAVICON_CONFIG': FAVICON_CONFIG.as_legacy_config_schema(),
    
    # 'WGET_CONFIG': WGET_CONFIG.as_legacy_config_schema(),
@ -263,6 +259,9 @@ def load_config_val(key: str,
    elif type is list or type is dict:
        return json.loads(val)
    
+    elif type is Path:
+        return Path(val)
+
    raise Exception('Config values can only be str, bool, int, or json')


@ -578,7 +577,7 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CON
            with SudoPermission(uid=0):
                # running as root is a special case where it's ok to be a bit slower
                # make sure data dir is always owned by the correct user
-                os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}"')
+                os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}" 2>/dev/null')
                os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}"/* 2>/dev/null')

        bump_startup_progress_bar()
--- a/archivebox/config/paths.py
+++ b/archivebox/config/paths.py
@ -1,12 +1,16 @@
 __package__ = 'archivebox.config'

 import os
+import socket
 import hashlib
+import tempfile
 import platform
 from pathlib import Path
 from functools import cache
 from datetime import datetime

+from benedict import benedict
+
 from .permissions import SudoPermission, IS_ROOT, ARCHIVEBOX_USER

 #############################################################################################
@ -41,7 +45,8 @@ def _get_collection_id(DATA_DIR=DATA_DIR, force_create=False) -> str:
    try:
        # only persist collection_id file if we already have an index.sqlite3 file present
        # otherwise we might be running in a directory that is not a collection, no point creating cruft files
-        if os.path.isfile(DATABASE_FILE) and os.access(DATA_DIR, os.W_OK) or force_create:
+        collection_is_active = os.path.isfile(DATABASE_FILE) and os.path.isdir(ARCHIVE_DIR) and os.access(DATA_DIR, os.W_OK)
+        if collection_is_active or force_create:
            collection_id_file.write_text(collection_id)
            
            # if we're running as root right now, make sure the collection_id file is owned by the archivebox user
@ -87,7 +92,7 @@ def get_machine_type() -> str:
    return LIB_DIR_SCOPE


-def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = None, fallback=True) -> bool:
+def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = None, fallback=True, chown=True) -> bool:
    """Check if a given directory is writable by a specific user and group (fallback=try as current user is unable to check with provided uid)"""
    current_uid, current_gid = os.geteuid(), os.getegid()
    uid, gid = uid or current_uid, gid or current_gid
@ -100,10 +105,197 @@ def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = No
            test_file.unlink()
            return True
    except (IOError, OSError, PermissionError):
-        pass
-        
+        if chown:    
+            # try fixing it using sudo permissions
+            with SudoPermission(uid=uid, fallback=fallback):
+                os.system(f'chown {uid}:{gid} "{dir_path}" 2>/dev/null')
+            return dir_is_writable(dir_path, uid=uid, gid=gid, fallback=fallback, chown=False)
    return False

+def assert_dir_can_contain_unix_sockets(dir_path: Path) -> bool:
+    """Check if a given directory can contain unix sockets (e.g. /tmp/supervisord.sock)"""
+    from archivebox.logging_util import pretty_path
+    
+    try:
+        socket_path = str(dir_path / '.test_socket.sock')
+        s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+        try:
+            os.remove(socket_path)
+        except OSError:
+            pass
+        s.bind(socket_path)
+        s.close()
+        try:
+            os.remove(socket_path)
+        except OSError:
+            pass
+    except Exception as e:
+        raise Exception(f'ArchiveBox failed to create a test UNIX socket file in {pretty_path(dir_path, color=False)}') from e
+    
+    return True
+
+
+def create_and_chown_dir(dir_path: Path) -> None:
+    with SudoPermission(uid=0, fallback=True):
+        dir_path.mkdir(parents=True, exist_ok=True)
+        os.system(f'chown {ARCHIVEBOX_USER} "{dir_path}" 2>/dev/null')
+        os.system(f'chown {ARCHIVEBOX_USER} "{dir_path}"/* 2>/dev/null &')
+
+@cache
+def get_or_create_working_tmp_dir(autofix=True, quiet=False):
+    from archivebox import CONSTANTS
+    from archivebox.config.common import STORAGE_CONFIG
+    from archivebox.misc.checks import check_tmp_dir
+
+    # try a few potential directories in order of preference
+    CANDIDATES = [
+        STORAGE_CONFIG.TMP_DIR,                                                # <user-specified>
+        CONSTANTS.DEFAULT_TMP_DIR,                                             # ./data/tmp/<machine_id>
+        Path('/var/run/archivebox') / get_collection_id(),                     # /var/run/archivebox/abc5d8512
+        Path('/tmp') / 'archivebox' / get_collection_id(),                     # /tmp/archivebox/abc5d8512
+        Path('~/.tmp/archivebox').expanduser() / get_collection_id(),          # ~/.tmp/archivebox/abc5d8512
+        Path(tempfile.gettempdir()) / 'archivebox' / get_collection_id(),      # /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/archivebox/abc5d8512
+        Path(tempfile.gettempdir()) / 'archivebox' / get_collection_id()[:4],  # /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/archivebox/abc5d
+        Path(tempfile.gettempdir()) / 'abx' / get_collection_id()[:4],         # /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/abx/abc5
+    ]
+    for candidate in CANDIDATES:
+        try:
+            create_and_chown_dir(candidate)
+        except Exception:
+            pass
+        if check_tmp_dir(candidate, throw=False, quiet=True, must_exist=True):
+            if autofix and STORAGE_CONFIG.TMP_DIR != candidate:
+                STORAGE_CONFIG.update_in_place(TMP_DIR=candidate, warn=not quiet)
+            return candidate
+    
+    if not quiet:
+        raise OSError(f'ArchiveBox is unable to find a writable TMP_DIR, tried {CANDIDATES}!')
+
+@cache
+def get_or_create_working_lib_dir(autofix=True, quiet=False):
+    from archivebox import CONSTANTS
+    from archivebox.config.common import STORAGE_CONFIG
+    from archivebox.misc.checks import check_lib_dir
+    
+    # try a few potential directories in order of preference
+    CANDIDATES = [
+        STORAGE_CONFIG.LIB_DIR,                                                   # <user-specified>
+        CONSTANTS.DEFAULT_LIB_DIR,                                                # ./data/lib/arm64-linux-docker
+        Path('/usr/local/share/archivebox') / get_collection_id(),                # /usr/local/share/archivebox/abc5
+        *([Path('/opt/homebrew/share/archivebox') / get_collection_id()] if os.path.isfile('/opt/homebrew/bin/archivebox') else []),  # /opt/homebrew/share/archivebox/abc5
+        Path('~/.local/share/archivebox').expanduser() / get_collection_id(),     # ~/.local/share/archivebox/abc5
+    ]
+    
+    for candidate in CANDIDATES:
+        try:
+            create_and_chown_dir(candidate)
+        except Exception:
+            pass
+        if check_lib_dir(candidate, throw=False, quiet=True, must_exist=True):
+            if autofix and STORAGE_CONFIG.LIB_DIR != candidate:
+                STORAGE_CONFIG.update_in_place(LIB_DIR=candidate, warn=not quiet)
+            return candidate
+    
+    if not quiet:
+        raise OSError(f'ArchiveBox is unable to find a writable LIB_DIR, tried {CANDIDATES}!')
+
+
+
+@cache
+def get_data_locations():
+    from archivebox.config import CONSTANTS
+    from archivebox.config.common import STORAGE_CONFIG
+    
+    return benedict({
+        "DATA_DIR": {
+            "path": DATA_DIR.resolve(),
+            "enabled": True,
+            "is_valid": os.path.isdir(DATA_DIR) and os.access(DATA_DIR, os.R_OK) and os.access(DATA_DIR, os.W_OK),
+            "is_mount": os.path.ismount(DATA_DIR.resolve()),
+        },
+        "CONFIG_FILE": {
+            "path": CONSTANTS.CONFIG_FILE.resolve(),
+            "enabled": True,
+            "is_valid": os.path.isfile(CONSTANTS.CONFIG_FILE) and os.access(CONSTANTS.CONFIG_FILE, os.R_OK) and os.access(CONSTANTS.CONFIG_FILE, os.W_OK),
+        },
+        "SQL_INDEX": {
+            "path": DATABASE_FILE.resolve(),
+            "enabled": True,
+            "is_valid": os.path.isfile(DATABASE_FILE) and os.access(DATABASE_FILE, os.R_OK) and os.access(DATABASE_FILE, os.W_OK),
+            "is_mount": os.path.ismount(DATABASE_FILE.resolve()),
+        },
+        "QUEUE_DATABASE": {
+            "path": CONSTANTS.QUEUE_DATABASE_FILE,
+            "enabled": True,
+            "is_valid": os.path.isfile(CONSTANTS.QUEUE_DATABASE_FILE) and os.access(CONSTANTS.QUEUE_DATABASE_FILE, os.R_OK) and os.access(CONSTANTS.QUEUE_DATABASE_FILE, os.W_OK),
+            "is_mount": os.path.ismount(CONSTANTS.QUEUE_DATABASE_FILE),
+        },
+        "ARCHIVE_DIR": {
+            "path": ARCHIVE_DIR.resolve(),
+            "enabled": True,
+            "is_valid": os.path.isdir(ARCHIVE_DIR) and os.access(ARCHIVE_DIR, os.R_OK) and os.access(ARCHIVE_DIR, os.W_OK),
+            "is_mount": os.path.ismount(ARCHIVE_DIR.resolve()),
+        },
+        "SOURCES_DIR": {
+            "path": CONSTANTS.SOURCES_DIR.resolve(),
+            "enabled": True,
+            "is_valid": os.path.isdir(CONSTANTS.SOURCES_DIR) and os.access(CONSTANTS.SOURCES_DIR, os.R_OK) and os.access(CONSTANTS.SOURCES_DIR, os.W_OK),
+        },
+        "PERSONAS_DIR": {
+            "path": CONSTANTS.PERSONAS_DIR.resolve(),
+            "enabled": os.path.isdir(CONSTANTS.PERSONAS_DIR),
+            "is_valid": os.path.isdir(CONSTANTS.PERSONAS_DIR) and os.access(CONSTANTS.PERSONAS_DIR, os.R_OK) and os.access(CONSTANTS.PERSONAS_DIR, os.W_OK),                 # read + write
+        },
+        "LOGS_DIR": {
+            "path": CONSTANTS.LOGS_DIR.resolve(),
+            "enabled": True,
+            "is_valid": os.path.isdir(CONSTANTS.LOGS_DIR) and os.access(CONSTANTS.LOGS_DIR, os.R_OK) and os.access(CONSTANTS.LOGS_DIR, os.W_OK),                             # read + write
+        },
+        'TMP_DIR': {
+            'path': STORAGE_CONFIG.TMP_DIR.resolve(),
+            'enabled': True,
+            'is_valid': os.path.isdir(STORAGE_CONFIG.TMP_DIR) and os.access(STORAGE_CONFIG.TMP_DIR, os.R_OK) and os.access(STORAGE_CONFIG.TMP_DIR, os.W_OK),        # read + write
+        },
+        # "CACHE_DIR": {
+        #     "path": CACHE_DIR.resolve(),
+        #     "enabled": True,
+        #     "is_valid": os.access(CACHE_DIR, os.R_OK) and os.access(CACHE_DIR, os.W_OK),                        # read + write
+        # },
+    })
+
+@cache
+def get_code_locations():
+    from archivebox.config import CONSTANTS
+    from archivebox.config.common import STORAGE_CONFIG
+    
+    return benedict({
+        'PACKAGE_DIR': {
+            'path': (PACKAGE_DIR).resolve(),
+            'enabled': True,
+            'is_valid': os.access(PACKAGE_DIR / '__main__.py', os.X_OK),                                                                  # executable
+        },
+        'TEMPLATES_DIR': {
+            'path': CONSTANTS.TEMPLATES_DIR.resolve(),
+            'enabled': True,
+            'is_valid': os.access(CONSTANTS.STATIC_DIR, os.R_OK) and os.access(CONSTANTS.STATIC_DIR, os.X_OK),                                                # read + list
+        },
+        'CUSTOM_TEMPLATES_DIR': {
+            'path': CONSTANTS.CUSTOM_TEMPLATES_DIR.resolve(),
+            'enabled': os.path.isdir(CONSTANTS.CUSTOM_TEMPLATES_DIR),
+            'is_valid': os.path.isdir(CONSTANTS.CUSTOM_TEMPLATES_DIR) and os.access(CONSTANTS.CUSTOM_TEMPLATES_DIR, os.R_OK),                                      # read
+        },
+        'USER_PLUGINS_DIR': {
+            'path': CONSTANTS.USER_PLUGINS_DIR.resolve(),
+            'enabled': os.path.isdir(CONSTANTS.USER_PLUGINS_DIR),
+            'is_valid': os.path.isdir(CONSTANTS.USER_PLUGINS_DIR) and os.access(CONSTANTS.USER_PLUGINS_DIR, os.R_OK),                                              # read
+        },
+        'LIB_DIR': {
+            'path': STORAGE_CONFIG.LIB_DIR.resolve(),
+            'enabled': True,
+            'is_valid': os.path.isdir(STORAGE_CONFIG.LIB_DIR) and os.access(STORAGE_CONFIG.LIB_DIR, os.R_OK) and os.access(STORAGE_CONFIG.LIB_DIR, os.W_OK),                      # read + write
+        },
+    })
+


 # @cache
--- a/archivebox/config/views.py
+++ b/archivebox/config/views.py
@ -2,6 +2,7 @@ __package__ = 'abx.archivebox'

 import os
 import inspect
+from pathlib import Path
 from typing import Any, List, Dict, cast
 from benedict import benedict

@ -13,6 +14,8 @@ from django.utils.html import format_html, mark_safe
 from admin_data_views.typing import TableContext, ItemContext
 from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink

+import abx.archivebox.reads
+
 from archivebox.config import CONSTANTS
 from archivebox.misc.util import parse_date

@ -82,8 +85,12 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
        if '_BINARY' in key or '_VERSION' in key
    }

-    for plugin in settings.PLUGINS.values():
-        for binary in plugin.HOOKS_BY_TYPE.get('BINARY', {}).values():
+    for plugin_id, plugin in abx.archivebox.reads.get_PLUGINS().items():
+        plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
+        if not plugin.hooks.get('get_BINARIES'):
+            continue
+        
+        for binary in plugin.hooks.get_BINARIES().values():
            try:
                installed_binary = InstalledBinary.objects.get_from_db_or_cache(binary)
                binary = installed_binary.load_from_db()
@ -92,7 +99,7 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:

            rows['Binary Name'].append(ItemLink(binary.name, key=binary.name))
            rows['Found Version'].append(f'✅ {binary.loaded_version}' if binary.loaded_version else '❌ missing')
-            rows['From Plugin'].append(plugin.plugin_module)
+            rows['From Plugin'].append(plugin.package)
            rows['Provided By'].append(
                ', '.join(
                    f'[{binprovider.name}]' if binprovider.name == getattr(binary.loaded_binprovider, 'name', None) else binprovider.name
@ -128,11 +135,16 @@ def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:

    binary = None
    plugin = None
-    for loaded_plugin in settings.PLUGINS.values():
-        for loaded_binary in loaded_plugin.HOOKS_BY_TYPE.get('BINARY', {}).values():
+    for plugin_id in abx.archivebox.reads.get_PLUGINS().keys():
+        loaded_plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
+        try:
+            for loaded_binary in loaded_plugin.hooks.get_BINARIES().values():
                if loaded_binary.name == key:
                    binary = loaded_binary
                    plugin = loaded_plugin
+                    # break  # last write wins
+        except Exception as e:
+            print(e)

    assert plugin and binary, f'Could not find a binary matching the specified name: {key}'

@ -149,7 +161,7 @@ def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
                "name": binary.name,
                "description": binary.abspath,
                "fields": {
-                    'plugin': plugin.name,
+                    'plugin': plugin.package,
                    'binprovider': binary.loaded_binprovider,
                    'abspath': binary.loaded_abspath,
                    'version': binary.loaded_version,
@ -170,28 +182,68 @@ def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext:
    assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'

    rows = {
-        "Name": [],
-        "verbose_name": [],
-        "module": [],
-        "source_code": [],
-        "hooks": [],
+        "Label": [],
+        "Version": [],
+        "Author": [],
+        "Package": [],
+        "Source Code": [],
+        "Config": [],
+        "Binaries": [],
+        "Package Managers": [],
+        # "Search Backends": [],
    }

+    config_colors = {
+        '_BINARY': '#339',
+        'USE_': 'green',
+        'SAVE_': 'green',
+        '_ARGS': '#33e',
+        'KEY': 'red',
+        'COOKIES': 'red',
+        'AUTH': 'red',
+        'SECRET': 'red',
+        'TOKEN': 'red',
+        'PASSWORD': 'red',
+        'TIMEOUT': '#533',
+        'RETRIES': '#533',
+        'MAX': '#533',
+        'MIN': '#533',
+    }
+    def get_color(key):
+        for pattern, color in config_colors.items():
+            if pattern in key:
+                return color
+        return 'black'

-    for plugin in settings.PLUGINS.values():
-        # try:
-        #     plugin.load_binaries()
-        # except Exception as e:
-        #     print(e)
+    for plugin_id in settings.PLUGINS.keys():
        
-        rows['Name'].append(ItemLink(plugin.id, key=plugin.id))
-        rows['verbose_name'].append(mark_safe(f'<a href="{plugin.docs_url}" target="_blank">{plugin.verbose_name}</a>'))
-        rows['module'].append(str(plugin.plugin_module))
-        rows['source_code'].append(str(plugin.plugin_dir))
-        rows['hooks'].append(mark_safe(', '.join(
-            f'<a href="{hook.admin_url}">{hook.id}</a>'
-            for hook in plugin.hooks
+        plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
+        plugin.hooks.get_BINPROVIDERS = plugin.hooks.get('get_BINPROVIDERS', lambda: {})
+        plugin.hooks.get_BINARIES = plugin.hooks.get('get_BINARIES', lambda: {})
+        plugin.hooks.get_CONFIG = plugin.hooks.get('get_CONFIG', lambda: {})
+        
+        rows['Label'].append(ItemLink(plugin.label, key=plugin.package))
+        rows['Version'].append(str(plugin.version))
+        rows['Author'].append(mark_safe(f'<a href="{plugin.homepage}" target="_blank">{plugin.author}</a>'))
+        rows['Package'].append(ItemLink(plugin.package, key=plugin.package))
+        rows['Source Code'].append(format_html('<code>{}</code>', str(plugin.source_code).replace(str(Path('~').expanduser()), '~')))
+        rows['Config'].append(mark_safe(''.join(
+            f'<a href="/admin/environment/config/{key}/"><b><code style="color: {get_color(key)};">{key}</code></b>=<code>{value}</code></a><br/>'
+            for configdict in plugin.hooks.get_CONFIG().values()
+                for key, value in benedict(configdict).items()
        )))
+        rows['Binaries'].append(mark_safe(', '.join(
+            f'<a href="/admin/environment/binaries/{binary.name}/"><code>{binary.name}</code></a>'
+            for binary in plugin.hooks.get_BINARIES().values()
+        )))
+        rows['Package Managers'].append(mark_safe(', '.join(
+            f'<a href="/admin/environment/binproviders/{binprovider.name}/"><code>{binprovider.name}</code></a>'
+            for binprovider in plugin.hooks.get_BINPROVIDERS().values()
+        )))
+        # rows['Search Backends'].append(mark_safe(', '.join(
+        #     f'<a href="/admin/environment/searchbackends/{searchbackend.name}/"><code>{searchbackend.name}</code></a>'
+        #     for searchbackend in plugin.SEARCHBACKENDS.values()
+        # )))

    return TableContext(
        title="Installed plugins",
@ -203,28 +255,33 @@ def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:

    assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'

-    plugin = None
-    for loaded_plugin in settings.PLUGINS.values():
-        if loaded_plugin.id == key:
-            plugin = loaded_plugin
+    plugin_id = None
+    for check_plugin_id, loaded_plugin in settings.PLUGINS.items():
+        if check_plugin_id.split('.')[-1] == key.split('.')[-1]:
+            plugin_id = check_plugin_id
+            break

-    assert plugin, f'Could not find a plugin matching the specified name: {key}'
+    assert plugin_id, f'Could not find a plugin matching the specified name: {key}'

-    try:
-        plugin = plugin.load_binaries()
-    except Exception as e:
-        print(e)
+    plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)

    return ItemContext(
        slug=key,
        title=key,
        data=[
            {
-                "name": plugin.id,
-                "description": plugin.verbose_name,
+                "name": plugin.package,
+                "description": plugin.label,
                "fields": {
+                    "id": plugin.id,
+                    "package": plugin.package,
+                    "label": plugin.label,
+                    "version": plugin.version,
+                    "author": plugin.author,
+                    "homepage": plugin.homepage,
+                    "dependencies": getattr(plugin, 'DEPENDENCIES', []),
+                    "source_code": plugin.source_code,
                    "hooks": plugin.hooks,
-                    "schema": obj_to_yaml(plugin.model_dump(include=("name", "verbose_name", "app_label", "hooks"))),
                },
                "help_texts": {
                    # TODO
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@ -1,859 +1,20 @@
 __package__ = 'archivebox.core'

-import os
+from django.contrib.auth import get_user_model

-from pathlib import Path
-
-from django.contrib import admin, messages
-from django.urls import path, reverse, resolve
-from django.utils import timezone
-from django.utils.functional import cached_property
-from django.utils.html import format_html
-from django.utils.safestring import mark_safe
-from django.contrib.auth import get_user_model, get_permission_codename
-from django.contrib.auth.admin import UserAdmin
-from django.core.paginator import Paginator
-from django.core.exceptions import ValidationError
-from django.template import Template, RequestContext
-from django.conf import settings
-from django import forms
-
-from signal_webhooks.admin import WebhookAdmin
-from signal_webhooks.utils import get_webhook_model
-
-from archivebox.config import VERSION, DATA_DIR
-from archivebox.misc.util import htmldecode, urldecode

 from core.models import Snapshot, ArchiveResult, Tag
-from core.mixins import SearchResultsAdminMixin
-from api.models import APIToken
-from abid_utils.admin import ABIDModelAdmin
-from queues.tasks import bg_archive_links, bg_add
-from machine.models import Machine, NetworkInterface, InstalledBinary
+from core.admin_tags import TagAdmin
+from core.admin_snapshots import SnapshotAdmin
+from core.admin_archiveresults import ArchiveResultAdmin
+from core.admin_users import UserAdmin

-from index.html import snapshot_icons
-from logging_util import printable_filesize
-from main import remove
-from extractors import archive_links
+import abx


-CONFIG = settings.FLAT_CONFIG
-
-GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': [], 'CAN_UPGRADE': False}
-
-# Admin URLs
-# /admin/
-# /admin/login/
-# /admin/core/
-# /admin/core/snapshot/
-# /admin/core/snapshot/:uuid/
-# /admin/core/tag/
-# /admin/core/tag/:uuid/
-
-
-# TODO: https://stackoverflow.com/questions/40760880/add-custom-button-to-django-admin-panel
-
-
-class ArchiveBoxAdmin(admin.AdminSite):
-    site_header = 'ArchiveBox'
-    index_title = 'Links'
-    site_title = 'Index'
-    namespace = 'admin'
-
-
-class CustomUserAdmin(UserAdmin):
-    sort_fields = ['id', 'email', 'username', 'is_superuser', 'last_login', 'date_joined']
-    list_display = ['username', 'id', 'email', 'is_superuser', 'last_login', 'date_joined']
-    readonly_fields = ('snapshot_set', 'archiveresult_set', 'tag_set', 'apitoken_set', 'outboundwebhook_set')
-    fieldsets = [*UserAdmin.fieldsets, ('Data', {'fields': readonly_fields})]
-
-    @admin.display(description='Snapshots')
-    def snapshot_set(self, obj):
-        total_count = obj.snapshot_set.count()
-        return mark_safe('<br/>'.join(
-            format_html(
-                '<code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> {}',
-                snap.pk,
-                snap.abid,
-                snap.downloaded_at.strftime('%Y-%m-%d %H:%M') if snap.downloaded_at else 'pending...',
-                snap.url[:64],
-            )
-            for snap in obj.snapshot_set.order_by('-modified_at')[:10]
-        ) + f'<br/><a href="/admin/core/snapshot/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
-
-    @admin.display(description='Archive Result Logs')
-    def archiveresult_set(self, obj):
-        total_count = obj.archiveresult_set.count()
-        return mark_safe('<br/>'.join(
-            format_html(
-                '<code><a href="/admin/core/archiveresult/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> <b>📄 {}</b> {}',
-                result.pk,
-                result.abid,
-                result.snapshot.downloaded_at.strftime('%Y-%m-%d %H:%M') if result.snapshot.downloaded_at else 'pending...',
-                result.extractor,
-                result.snapshot.url[:64],
-            )
-            for result in obj.archiveresult_set.order_by('-modified_at')[:10]
-        ) + f'<br/><a href="/admin/core/archiveresult/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
-
-    @admin.display(description='Tags')
-    def tag_set(self, obj):
-        total_count = obj.tag_set.count()
-        return mark_safe(', '.join(
-            format_html(
-                '<code><a href="/admin/core/tag/{}/change"><b>{}</b></a></code>',
-                tag.pk,
-                tag.name,
-            )
-            for tag in obj.tag_set.order_by('-modified_at')[:10]
-        ) + f'<br/><a href="/admin/core/tag/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
-
-    @admin.display(description='API Tokens')
-    def apitoken_set(self, obj):
-        total_count = obj.apitoken_set.count()
-        return mark_safe('<br/>'.join(
-            format_html(
-                '<code><a href="/admin/api/apitoken/{}/change"><b>[{}]</b></a></code> {} (expires {})',
-                apitoken.pk,
-                apitoken.abid,
-                apitoken.token_redacted[:64],
-                apitoken.expires,
-            )
-            for apitoken in obj.apitoken_set.order_by('-modified_at')[:10]
-        ) + f'<br/><a href="/admin/api/apitoken/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
-
-    @admin.display(description='API Outbound Webhooks')
-    def outboundwebhook_set(self, obj):
-        total_count = obj.outboundwebhook_set.count()
-        return mark_safe('<br/>'.join(
-            format_html(
-                '<code><a href="/admin/api/outboundwebhook/{}/change"><b>[{}]</b></a></code> {} -> {}',
-                outboundwebhook.pk,
-                outboundwebhook.abid,
-                outboundwebhook.referenced_model,
-                outboundwebhook.endpoint,
-            )
-            for outboundwebhook in obj.outboundwebhook_set.order_by('-modified_at')[:10]
-        ) + f'<br/><a href="/admin/api/outboundwebhook/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
-
-
-
-
-archivebox_admin = ArchiveBoxAdmin()
-archivebox_admin.register(get_user_model(), CustomUserAdmin)
-archivebox_admin.disable_action('delete_selected')
-
-# archivebox_admin.register(CustomPlugin)
-
-# patch admin with methods to add data views (implemented by admin_data_views package)
-# https://github.com/MrThearMan/django-admin-data-views
-# https://mrthearman.github.io/django-admin-data-views/setup/
-############### Additional sections are defined in settings.ADMIN_DATA_VIEWS #########
-from admin_data_views.admin import get_app_list, admin_data_index_view, get_admin_data_urls, get_urls
-
-archivebox_admin.get_app_list = get_app_list.__get__(archivebox_admin, ArchiveBoxAdmin)
-archivebox_admin.admin_data_index_view = admin_data_index_view.__get__(archivebox_admin, ArchiveBoxAdmin)       # type: ignore
-archivebox_admin.get_admin_data_urls = get_admin_data_urls.__get__(archivebox_admin, ArchiveBoxAdmin)           # type: ignore
-archivebox_admin.get_urls = get_urls(archivebox_admin.get_urls).__get__(archivebox_admin, ArchiveBoxAdmin)
-
-
-from huey_monitor.apps import HueyMonitorConfig
-HueyMonitorConfig.verbose_name = 'Background Workers'
-
-from huey_monitor.admin import TaskModel, TaskModelAdmin, SignalInfoModel, SignalInfoModelAdmin
-archivebox_admin.register(SignalInfoModel, SignalInfoModelAdmin)
-
-
-class CustomTaskModelAdmin(TaskModelAdmin):
-    actions = ["delete_selected"]
-
-    def has_delete_permission(self, request, obj=None):
-        codename = get_permission_codename("delete", self.opts)
-        return request.user.has_perm("%s.%s" % (self.opts.app_label, codename))
-
-
-archivebox_admin.register(TaskModel, CustomTaskModelAdmin)
-
-def result_url(result: TaskModel) -> str:
-    url = reverse("admin:huey_monitor_taskmodel_change", args=[str(result.id)])
-    return format_html('<a href="{url}" class="fade-in-progress-url">See progress...</a>'.format(url=url))
-
-
-class AccelleratedPaginator(Paginator):
-    """
-    Accellerated Pagniator ignores DISTINCT when counting total number of rows.
-    Speeds up SELECT Count(*) on Admin views by >20x.
-    https://hakibenita.com/optimizing-the-django-admin-paginator
-    """
-
-    @cached_property
-    def count(self):
-        if self.object_list._has_filters():                             # type: ignore
-            # fallback to normal count method on filtered queryset
-            return super().count
-        else:
-            # otherwise count total rows in a separate fast query
-            return self.object_list.model.objects.count()
-    
-        # Alternative approach for PostgreSQL: fallback count takes > 200ms
-        # from django.db import connection, transaction, OperationalError
-        # with transaction.atomic(), connection.cursor() as cursor:
-        #     cursor.execute('SET LOCAL statement_timeout TO 200;')
-        #     try:
-        #         return super().count
-        #     except OperationalError:
-        #         return 9999999999999
-
-
-class ArchiveResultInline(admin.TabularInline):
-    name = 'Archive Results Log'
-    model = ArchiveResult
-    parent_model = Snapshot
-    # fk_name = 'snapshot'
-    extra = 0
-    sort_fields = ('end_ts', 'extractor', 'output', 'status', 'cmd_version')
-    readonly_fields = ('id', 'result_id', 'completed', 'command', 'version')
-    fields = ('start_ts', 'end_ts', *readonly_fields, 'extractor', 'cmd', 'cmd_version', 'pwd', 'created_by', 'status', 'output')
-    # exclude = ('id',)
-    ordering = ('end_ts',)
-    show_change_link = True
-    # # classes = ['collapse']
-    # # list_display_links = ['abid']
-
-    def get_parent_object_from_request(self, request):
-        resolved = resolve(request.path_info)
-        try:
-            return self.parent_model.objects.get(pk=resolved.kwargs['object_id'])
-        except (self.parent_model.DoesNotExist, ValidationError):
-            return self.parent_model.objects.get(pk=self.parent_model.id_from_abid(resolved.kwargs['object_id']))
-
-    @admin.display(
-        description='Completed',
-        ordering='end_ts',
-    )
-    def completed(self, obj):
-        return format_html('<p style="white-space: nowrap">{}</p>', obj.end_ts.strftime('%Y-%m-%d %H:%M:%S'))
-
-    def result_id(self, obj):
-        return format_html('<a href="{}"><code style="font-size: 10px">[{}]</code></a>', reverse('admin:core_archiveresult_change', args=(obj.id,)), obj.abid)
-    
-    def command(self, obj):
-        return format_html('<small><code>{}</code></small>', " ".join(obj.cmd or []))
-    
-    def version(self, obj):
-        return format_html('<small><code>{}</code></small>', obj.cmd_version or '-')
-    
-    def get_formset(self, request, obj=None, **kwargs):
-        formset = super().get_formset(request, obj, **kwargs)
-        snapshot = self.get_parent_object_from_request(request)
-
-        # import ipdb; ipdb.set_trace()
-        # formset.form.base_fields['id'].widget = formset.form.base_fields['id'].hidden_widget()
-        
-        # default values for new entries
-        formset.form.base_fields['status'].initial = 'succeeded'
-        formset.form.base_fields['start_ts'].initial = timezone.now()
-        formset.form.base_fields['end_ts'].initial = timezone.now()
-        formset.form.base_fields['cmd_version'].initial = '-'
-        formset.form.base_fields['pwd'].initial = str(snapshot.link_dir)
-        formset.form.base_fields['created_by'].initial = request.user
-        formset.form.base_fields['cmd'] = forms.JSONField(initial=['-'])
-        formset.form.base_fields['output'].initial = 'Manually recorded cmd output...'
-        
-        if obj is not None:
-            # hidden values for existing entries and new entries
-            formset.form.base_fields['start_ts'].widget = formset.form.base_fields['start_ts'].hidden_widget()
-            formset.form.base_fields['end_ts'].widget = formset.form.base_fields['end_ts'].hidden_widget()
-            formset.form.base_fields['cmd'].widget = formset.form.base_fields['cmd'].hidden_widget()
-            formset.form.base_fields['pwd'].widget = formset.form.base_fields['pwd'].hidden_widget()
-            formset.form.base_fields['created_by'].widget = formset.form.base_fields['created_by'].hidden_widget()
-            formset.form.base_fields['cmd_version'].widget = formset.form.base_fields['cmd_version'].hidden_widget()
-        return formset
-    
-    def get_readonly_fields(self, request, obj=None):
-        if obj is not None:
-            return self.readonly_fields
-        else:
-            return []
-
-
-class TagInline(admin.TabularInline):
-    model = Tag.snapshot_set.through       # type: ignore
-    # fk_name = 'snapshot'
-    fields = ('id', 'tag')
-    extra = 1
-    # min_num = 1
-    max_num = 1000
-    autocomplete_fields = (
-        'tag',
-    )
-
-from django.contrib.admin.helpers import ActionForm
-from django.contrib.admin.widgets import FilteredSelectMultiple
-
-# class AutocompleteTags:
-#     model = Tag
-#     search_fields = ['name']
-#     name = 'name'
-#     # source_field = 'name'
-#     remote_field = Tag._meta.get_field('name')
-
-# class AutocompleteTagsAdminStub:
-#     name = 'admin'
-
-
-class SnapshotActionForm(ActionForm):
-    tags = forms.ModelMultipleChoiceField(
-        label='Edit tags',
-        queryset=Tag.objects.all(),
-        required=False,
-        widget=FilteredSelectMultiple(
-            'core_tag__name',
-            False,
-        ),
-    )
-
-    # TODO: allow selecting actions for specific extractors? is this useful?
-    # extractor = forms.ChoiceField(
-    #     choices=ArchiveResult.EXTRACTOR_CHOICES,
-    #     required=False,
-    #     widget=forms.MultileChoiceField(attrs={'class': "form-control"})
-    # )
-
-
-
-
-
-@admin.register(Snapshot, site=archivebox_admin)
-class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
-    list_display = ('created_at', 'title_str', 'files', 'size', 'url_str')
-    sort_fields = ('title_str', 'url_str', 'created_at')
-    readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'abid_info', 'link_dir')
-    search_fields = ('id', 'url', 'abid', 'timestamp', 'title', 'tags__name')
-    list_filter = ('created_at', 'downloaded_at', 'archiveresult__status', 'created_by', 'tags__name')
-    fields = ('url', 'title', 'created_by', 'bookmarked_at', *readonly_fields)
-    ordering = ['-created_at']
-    actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
-    inlines = [TagInline, ArchiveResultInline]
-    list_per_page = min(max(5, CONFIG.SNAPSHOTS_PER_PAGE), 5000)
-
-    action_form = SnapshotActionForm
-    paginator = AccelleratedPaginator
-
-    save_on_top = True
-    show_full_result_count = False
-
-    def changelist_view(self, request, extra_context=None):
-        self.request = request
-        extra_context = extra_context or {}
-        try:
-            return super().changelist_view(request, extra_context | GLOBAL_CONTEXT)
-        except Exception as e:
-            self.message_user(request, f'Error occurred while loading the page: {str(e)} {request.GET} {request.POST}')
-            return super().changelist_view(request, GLOBAL_CONTEXT)
-
-
-    def get_urls(self):
-        urls = super().get_urls()
-        custom_urls = [
-            path('grid/', self.admin_site.admin_view(self.grid_view), name='grid')
-        ]
-        return custom_urls + urls
-
-    # def get_queryset(self, request):
-    #     # tags_qs = SnapshotTag.objects.all().select_related('tag')
-    #     # prefetch = Prefetch('snapshottag_set', queryset=tags_qs)
-
-    #     self.request = request
-    #     return super().get_queryset(request).prefetch_related('archiveresult_set').distinct()  # .annotate(archiveresult_count=Count('archiveresult'))
-
-    @admin.action(
-        description="Imported Timestamp"
-    )
-    def imported_timestamp(self, obj):
-        context = RequestContext(self.request, {
-            'bookmarked_date': obj.bookmarked,
-            'timestamp': obj.timestamp,
-        })
-
-        html = Template("""{{bookmarked_date}} (<code>{{timestamp}}</code>)""")
-        return mark_safe(html.render(context))
-    
-        # pretty_time = obj.bookmarked.strftime('%Y-%m-%d %H:%M:%S')
-        # return f'{pretty_time} ({obj.timestamp})'
-
-    # TODO: figure out a different way to do this, you cant nest forms so this doenst work
-    # def action(self, obj):
-    #     # csrfmiddlewaretoken: Wa8UcQ4fD3FJibzxqHN3IYrrjLo4VguWynmbzzcPYoebfVUnDovon7GEMYFRgsh0
-    #     # action: update_snapshots
-    #     # select_across: 0
-    #     # _selected_action: 76d29b26-2a88-439e-877c-a7cca1b72bb3
-    #     return format_html(
-    #         '''
-    #             <form action="/admin/core/snapshot/" method="post" onsubmit="e => e.stopPropagation()">
-    #                 <input type="hidden" name="csrfmiddlewaretoken" value="{}">
-    #                 <input type="hidden" name="_selected_action" value="{}">
-    #                 <button name="update_snapshots">Check</button>
-    #                 <button name="update_titles">Pull title + favicon</button>
-    #                 <button name="update_snapshots">Update</button>
-    #                 <button name="overwrite_snapshots">Re-Archive (overwrite)</button>
-    #                 <button name="delete_snapshots">Permanently delete</button>
-    #             </form>
-    #         ''',
-    #         csrf.get_token(self.request),
-    #         obj.pk,
-    #     )
-
-    def admin_actions(self, obj):
-        return format_html(
-            # URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
-            '''
-            <a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/archive/{}">Summary page ➡️</a> &nbsp; &nbsp;
-            <a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/archive/{}/index.html#all">Result files 📑</a> &nbsp; &nbsp;
-            <a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/admin/core/snapshot/?id__exact={}">Admin actions ⚙️</a>
-            ''',
-            obj.timestamp,
-            obj.timestamp,
-            obj.pk,
-        )
-
-    def status_info(self, obj):
-        return format_html(
-            # URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
-            '''
-            Archived: {} ({} files {}) &nbsp; &nbsp;
-            Favicon: <img src="{}" style="height: 20px"/> &nbsp; &nbsp;
-            Status code: {} &nbsp; &nbsp;<br/>
-            Server: {} &nbsp; &nbsp;
-            Content type: {} &nbsp; &nbsp;
-            Extension: {} &nbsp; &nbsp;
-            ''',
-            '✅' if obj.is_archived else '❌',
-            obj.num_outputs,
-            self.size(obj) or '0kb',
-            f'/archive/{obj.timestamp}/favicon.ico',
-            obj.status_code or '-',
-            obj.headers and obj.headers.get('Server') or '-',
-            obj.headers and obj.headers.get('Content-Type') or '-',
-            obj.extension or '-',
-        )
-
-    @admin.display(
-        description='Title',
-        ordering='title',
-    )
-    def title_str(self, obj):
-        tags = ''.join(
-            format_html('<a href="/admin/core/snapshot/?tags__id__exact={}"><span class="tag">{}</span></a> ', tag.pk, tag.name)
-            for tag in obj.tags.all()
-            if str(tag.name).strip()
-        )
-        return format_html(
-            '<a href="/{}">'
-                '<img src="/{}/favicon.ico" class="favicon" onerror="this.remove()">'
-            '</a>'
-            '<a href="/{}/index.html">'
-                '<b class="status-{}">{}</b>'
-            '</a>',
-            obj.archive_path,
-            obj.archive_path,
-            obj.archive_path,
-            'fetched' if obj.latest_title or obj.title else 'pending',
-            urldecode(htmldecode(obj.latest_title or obj.title or ''))[:128] or 'Pending...'
-        ) + mark_safe(f' <span class="tags">{tags}</span>')
-
-    @admin.display(
-        description='Files Saved',
-        # ordering='archiveresult_count',
-    )
-    def files(self, obj):
-        # return '-'
-        return snapshot_icons(obj)
-
-
-    @admin.display(
-        # ordering='archiveresult_count'
-    )
-    def size(self, obj):
-        archive_size = os.access(Path(obj.link_dir) / 'index.html', os.F_OK) and obj.archive_size
-        if archive_size:
-            size_txt = printable_filesize(archive_size)
-            if archive_size > 52428800:
-                size_txt = mark_safe(f'<b>{size_txt}</b>')
-        else:
-            size_txt = mark_safe('<span style="opacity: 0.3">...</span>')
-        return format_html(
-            '<a href="/{}" title="View all files">{}</a>',
-            obj.archive_path,
-            size_txt,
-        )
-
-
-    @admin.display(
-        description='Original URL',
-        ordering='url',
-    )
-    def url_str(self, obj):
-        return format_html(
-            '<a href="{}"><code style="user-select: all;">{}</code></a>',
-            obj.url,
-            obj.url[:128],
-        )
-
-    def grid_view(self, request, extra_context=None):
-
-        # cl = self.get_changelist_instance(request)
-
-        # Save before monkey patching to restore for changelist list view
-        saved_change_list_template = self.change_list_template
-        saved_list_per_page = self.list_per_page
-        saved_list_max_show_all = self.list_max_show_all
-
-        # Monkey patch here plus core_tags.py
-        self.change_list_template = 'private_index_grid.html'
-        self.list_per_page = CONFIG.SNAPSHOTS_PER_PAGE
-        self.list_max_show_all = self.list_per_page
-
-        # Call monkey patched view
-        rendered_response = self.changelist_view(request, extra_context=extra_context)
-
-        # Restore values
-        self.change_list_template = saved_change_list_template
-        self.list_per_page = saved_list_per_page
-        self.list_max_show_all = saved_list_max_show_all
-
-        return rendered_response
-
-    # for debugging, uncomment this to print all requests:
-    # def changelist_view(self, request, extra_context=None):
-    #     print('[*] Got request', request.method, request.POST)
-    #     return super().changelist_view(request, extra_context=None)
-
-    @admin.action(
-        description="ℹ️ Get Title"
-    )
-    def update_titles(self, request, queryset):
-        links = [snapshot.as_link() for snapshot in queryset]
-        if len(links) < 3:
-            # run syncronously if there are only 1 or 2 links
-            archive_links(links, overwrite=True, methods=('title','favicon'), out_dir=DATA_DIR)
-            messages.success(request, f"Title and favicon have been fetched and saved for {len(links)} URLs.")
-        else:
-            # otherwise run in a background worker
-            result = bg_archive_links((links,), kwargs={"overwrite": True, "methods": ["title", "favicon"], "out_dir": DATA_DIR})
-            messages.success(
-                request,
-                mark_safe(f"Title and favicon are updating in the background for {len(links)} URLs. {result_url(result)}"),
-            )
-
-    @admin.action(
-        description="⬇️ Get Missing"
-    )
-    def update_snapshots(self, request, queryset):
-        links = [snapshot.as_link() for snapshot in queryset]
-
-        result = bg_archive_links((links,), kwargs={"overwrite": False, "out_dir": DATA_DIR})
-
-        messages.success(
-            request,
-            mark_safe(f"Re-trying any previously failed methods for {len(links)} URLs in the background. {result_url(result)}"),
-        )
-
-
-    @admin.action(
-        description="🆕 Archive Again"
-    )
-    def resnapshot_snapshot(self, request, queryset):
-        for snapshot in queryset:
-            timestamp = timezone.now().isoformat('T', 'seconds')
-            new_url = snapshot.url.split('#')[0] + f'#{timestamp}'
-
-            result = bg_add({'urls': new_url, 'tag': snapshot.tags_str()})
-
-        messages.success(
-            request,
-            mark_safe(f"Creating new fresh snapshots for {queryset.count()} URLs in the background. {result_url(result)}"),
-        )
-
-    @admin.action(
-        description="🔄 Redo"
-    )
-    def overwrite_snapshots(self, request, queryset):
-        links = [snapshot.as_link() for snapshot in queryset]
-
-        result = bg_archive_links((links,), kwargs={"overwrite": True, "out_dir": DATA_DIR})
-
-        messages.success(
-            request,
-            mark_safe(f"Clearing all previous results and re-downloading {len(links)} URLs in the background. {result_url(result)}"),
-        )
-
-    @admin.action(
-        description="☠️ Delete"
-    )
-    def delete_snapshots(self, request, queryset):
-        remove(snapshots=queryset, yes=True, delete=True, out_dir=DATA_DIR)
-        messages.success(
-            request,
-            mark_safe(f"Succesfully deleted {queryset.count()} Snapshots. Don't forget to scrub URLs from import logs (data/sources) and error logs (data/logs) if needed."),
-        )
-
-
-    @admin.action(
-        description="+"
-    )
-    def add_tags(self, request, queryset):
-        tags = request.POST.getlist('tags')
-        print('[+] Adding tags', tags, 'to Snapshots', queryset)
-        for obj in queryset:
-            obj.tags.add(*tags)
-        messages.success(
-            request,
-            f"Added {len(tags)} tags to {queryset.count()} Snapshots.",
-        )
-
-
-    @admin.action(
-        description="–"
-    )
-    def remove_tags(self, request, queryset):
-        tags = request.POST.getlist('tags')
-        print('[-] Removing tags', tags, 'to Snapshots', queryset)
-        for obj in queryset:
-            obj.tags.remove(*tags)
-        messages.success(
-            request,
-            f"Removed {len(tags)} tags from {queryset.count()} Snapshots.",
-        )
-
-
-# @admin.register(SnapshotTag, site=archivebox_admin)
-# class SnapshotTagAdmin(ABIDModelAdmin):
-#     list_display = ('id', 'snapshot', 'tag')
-#     sort_fields = ('id', 'snapshot', 'tag')
-#     search_fields = ('id', 'snapshot_id', 'tag_id')
-#     fields = ('snapshot', 'id')
-#     actions = ['delete_selected']
-#     ordering = ['-id']
-
-
-
-@admin.register(Tag, site=archivebox_admin)
-class TagAdmin(ABIDModelAdmin):
-    list_display = ('created_at', 'created_by', 'abid', 'name', 'num_snapshots', 'snapshots')
-    list_filter = ('created_at', 'created_by')
-    sort_fields = ('name', 'slug', 'abid', 'created_by', 'created_at')
-    readonly_fields = ('slug', 'abid', 'created_at', 'modified_at', 'abid_info', 'snapshots')
-    search_fields = ('abid', 'name', 'slug')
-    fields = ('name', 'created_by', *readonly_fields)
-    actions = ['delete_selected']
-    ordering = ['-created_at']
-
-    paginator = AccelleratedPaginator
-
-
-    def num_snapshots(self, tag):
-        return format_html(
-            '<a href="/admin/core/snapshot/?tags__id__exact={}">{} total</a>',
-            tag.id,
-            tag.snapshot_set.count(),
-        )
-
-    def snapshots(self, tag):
-        total_count = tag.snapshot_set.count()
-        return mark_safe('<br/>'.join(
-            format_html(
-                '<code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a></code> {}',
-                snap.pk,
-                snap.downloaded_at.strftime('%Y-%m-%d %H:%M') if snap.downloaded_at else 'pending...',
-                snap.url[:64],
-            )
-            for snap in tag.snapshot_set.order_by('-downloaded_at')[:10]
-        ) + (f'<br/><a href="/admin/core/snapshot/?tags__id__exact={tag.id}">{total_count} total snapshots...<a>'))
-
-
-@admin.register(ArchiveResult, site=archivebox_admin)
-class ArchiveResultAdmin(ABIDModelAdmin):
-    list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str')
-    sort_fields = ('start_ts', 'extractor', 'status')
-    readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'abid_info', 'output_summary')
-    search_fields = ('id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
-    fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', 'cmd', *readonly_fields)
-    autocomplete_fields = ['snapshot']
-
-    list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
-    ordering = ['-start_ts']
-    list_per_page = CONFIG.SNAPSHOTS_PER_PAGE
-    
-    paginator = AccelleratedPaginator
-    save_on_top = True
-    
-    actions = ['delete_selected']
-    
-    class Meta:
-        verbose_name = 'Archive Result'
-        verbose_name_plural = 'Archive Results'
-
-    def change_view(self, request, object_id, form_url="", extra_context=None):
-        self.request = request
-        return super().change_view(request, object_id, form_url, extra_context)
-
-    @admin.display(
-        description='Snapshot Info'
-    )
-    def snapshot_info(self, result):
-        return format_html(
-            '<a href="/archive/{}/index.html"><b><code>[{}]</code></b> &nbsp; {} &nbsp; {}</a><br/>',
-            result.snapshot.timestamp,
-            result.snapshot.abid,
-            result.snapshot.bookmarked_at.strftime('%Y-%m-%d %H:%M'),
-            result.snapshot.url[:128],
-        )
-
-
-    @admin.display(
-        description='Snapshot Tags'
-    )
-    def tags_str(self, result):
-        return result.snapshot.tags_str()
-
-    def cmd_str(self, result):
-        return format_html(
-            '<pre>{}</pre>',
-            ' '.join(result.cmd) if isinstance(result.cmd, list) else str(result.cmd),
-        )
-    
-    def output_str(self, result):
-        return format_html(
-            '<a href="/archive/{}/{}" class="output-link">↗️</a><pre>{}</pre>',
-            result.snapshot.timestamp,
-            result.output if (result.status == 'succeeded') and result.extractor not in ('title', 'archive_org') else 'index.html',
-            result.output,
-        )
-
-    def output_summary(self, result):
-        snapshot_dir = Path(DATA_DIR) / str(result.pwd).split('data/', 1)[-1]
-        output_str = format_html(
-            '<pre style="display: inline-block">{}</pre><br/>',
-            result.output,
-        )
-        output_str += format_html('<a href="/archive/{}/index.html#all">See result files ...</a><br/><pre><code>', str(result.snapshot.timestamp))
-        path_from_output_str = (snapshot_dir / result.output)
-        output_str += format_html('<i style="padding: 1px">{}</i><b style="padding-right: 20px">/</b><i>{}</i><br/><hr/>', str(snapshot_dir), str(result.output))
-        if os.access(path_from_output_str, os.R_OK):
-            root_dir = str(path_from_output_str)
-        else:
-            root_dir = str(snapshot_dir)
-
-        # print(root_dir, str(list(os.walk(root_dir))))
-
-        for root, dirs, files in os.walk(root_dir):
-            depth = root.replace(root_dir, '').count(os.sep) + 1
-            if depth > 2:
-                continue
-            indent = ' ' * 4 * (depth)
-            output_str += format_html('<b style="padding: 1px">{}{}/</b><br/>', indent, os.path.basename(root))
-            indentation_str = ' ' * 4 * (depth + 1)
-            for filename in sorted(files):
-                is_hidden = filename.startswith('.')
-                output_str += format_html('<span style="opacity: {}.2">{}{}</span><br/>', int(not is_hidden), indentation_str, filename.strip())
-
-        return output_str + format_html('</code></pre>')
-
-
-
-@admin.register(APIToken, site=archivebox_admin)
-class APITokenAdmin(ABIDModelAdmin):
-    list_display = ('created_at', 'abid', 'created_by', 'token_redacted', 'expires')
-    sort_fields = ('abid', 'created_at', 'created_by', 'expires')
-    readonly_fields = ('created_at', 'modified_at', 'abid_info')
-    search_fields = ('id', 'abid', 'created_by__username', 'token')
-    fields = ('created_by', 'token', 'expires', *readonly_fields)
-
-    list_filter = ('created_by',)
-    ordering = ['-created_at']
-    list_per_page = 100
-
-@admin.register(get_webhook_model(), site=archivebox_admin)
-class CustomWebhookAdmin(WebhookAdmin, ABIDModelAdmin):
-    list_display = ('created_at', 'created_by', 'abid', *WebhookAdmin.list_display)
-    sort_fields = ('created_at', 'created_by', 'abid', 'referenced_model', 'endpoint', 'last_success', 'last_error')
-    readonly_fields = ('created_at', 'modified_at', 'abid_info', *WebhookAdmin.readonly_fields)
-
-
-@admin.register(Machine, site=archivebox_admin)
-class MachineAdmin(ABIDModelAdmin):
-    list_display = ('abid', 'created_at', 'hostname', 'ips', 'os_platform', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'os_arch', 'os_family', 'os_release', 'hw_uuid', 'health')
-    sort_fields = ('abid', 'created_at', 'hostname', 'ips', 'os_platform', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'os_arch', 'os_family', 'os_release', 'hw_uuid')
-    # search_fields = ('id', 'abid', 'guid', 'hostname', 'hw_manufacturer', 'hw_product', 'hw_uuid', 'os_arch', 'os_family', 'os_platform', 'os_kernel', 'os_release')
-    
-    readonly_fields = ('guid', 'created_at', 'modified_at', 'abid_info', 'ips')
-    fields = (*readonly_fields, 'hostname', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'hw_uuid', 'os_arch', 'os_family', 'os_platform', 'os_kernel', 'os_release', 'stats', 'num_uses_succeeded', 'num_uses_failed')
-
-    list_filter = ('hw_in_docker', 'hw_in_vm', 'os_arch', 'os_family', 'os_platform')
-    ordering = ['-created_at']
-    list_per_page = 100
-    actions = ["delete_selected"]
-
-    @admin.display(
-        description='Public IP',
-        ordering='networkinterface__ip_public',
-    )
-    def ips(self, machine):
-        return format_html(
-            '<a href="/admin/machine/networkinterface/?q={}"><b><code>{}</code></b></a>',
-            machine.abid,
-            ', '.join(machine.networkinterface_set.values_list('ip_public', flat=True)),
-        )
-
-@admin.register(NetworkInterface, site=archivebox_admin)
-class NetworkInterfaceAdmin(ABIDModelAdmin):
-    list_display = ('abid', 'created_at', 'machine_info', 'ip_public', 'dns_server', 'isp', 'country', 'region', 'city', 'iface', 'ip_local', 'mac_address', 'health')
-    sort_fields = ('abid', 'created_at', 'machine_info', 'ip_public', 'dns_server', 'isp', 'country', 'region', 'city', 'iface', 'ip_local', 'mac_address')
-    search_fields = ('abid', 'machine__abid', 'iface', 'ip_public', 'ip_local', 'mac_address', 'dns_server', 'hostname', 'isp', 'city', 'region', 'country')
-    
-    readonly_fields = ('machine', 'created_at', 'modified_at', 'abid_info', 'mac_address', 'ip_public', 'ip_local', 'dns_server')
-    fields = (*readonly_fields, 'iface', 'hostname', 'isp', 'city', 'region', 'country', 'num_uses_succeeded', 'num_uses_failed')
-
-    list_filter = ('isp', 'country', 'region')
-    ordering = ['-created_at']
-    list_per_page = 100
-    actions = ["delete_selected"]
-
-    @admin.display(
-        description='Machine',
-        ordering='machine__abid',
-    )
-    def machine_info(self, iface):
-        return format_html(
-            '<a href="/admin/machine/machine/{}/change"><b><code>[{}]</code></b> &nbsp; {}</a>',
-            iface.machine.id,
-            iface.machine.abid,
-            iface.machine.hostname,
-        )
-
-@admin.register(InstalledBinary, site=archivebox_admin)
-class InstalledBinaryAdmin(ABIDModelAdmin):
-    list_display = ('abid', 'created_at', 'machine_info', 'name', 'binprovider', 'version', 'abspath', 'sha256', 'health')
-    sort_fields = ('abid', 'created_at', 'machine_info', 'name', 'binprovider', 'version', 'abspath', 'sha256')
-    search_fields = ('abid', 'machine__abid', 'name', 'binprovider', 'version', 'abspath', 'sha256')
-    
-    readonly_fields = ('created_at', 'modified_at', 'abid_info')
-    fields = ('machine', 'name', 'binprovider', 'abspath', 'version', 'sha256', *readonly_fields, 'num_uses_succeeded', 'num_uses_failed')
-
-    list_filter = ('name', 'binprovider', 'machine_id')
-    ordering = ['-created_at']
-    list_per_page = 100
-    actions = ["delete_selected"]
-
-    @admin.display(
-        description='Machine',
-        ordering='machine__abid',
-    )
-    def machine_info(self, installed_binary):
-        return format_html(
-            '<a href="/admin/machine/machine/{}/change"><b><code>[{}]</code></b> &nbsp; {}</a>',
-            installed_binary.machine.id,
-            installed_binary.machine.abid,
-            installed_binary.machine.hostname,
-        )
+@abx.hookimpl
+def register_admin(admin_site):
+    admin_site.register(get_user_model(), UserAdmin)
+    admin_site.register(ArchiveResult, ArchiveResultAdmin)
+    admin_site.register(Snapshot, SnapshotAdmin)
+    admin_site.register(Tag, TagAdmin)
--- a/archivebox/core/admin_archiveresults.py
+++ b/archivebox/core/admin_archiveresults.py
@ -0,0 +1,199 @@
+__package__ = 'archivebox.core'
+
+import os
+from pathlib import Path
+
+from django.contrib import admin
+from django.utils.html import format_html, mark_safe
+from django.core.exceptions import ValidationError
+from django.urls import reverse, resolve
+from django.utils import timezone
+from django.forms import forms
+
+from huey_monitor.admin import TaskModel
+
+import abx
+
+from archivebox.config import DATA_DIR
+from archivebox.config.common import SERVER_CONFIG
+from archivebox.misc.paginators import AccelleratedPaginator
+
+from abid_utils.admin import ABIDModelAdmin
+
+from core.models import ArchiveResult, Snapshot
+
+
+
+
+def result_url(result: TaskModel) -> str:
+    url = reverse("admin:huey_monitor_taskmodel_change", args=[str(result.id)])
+    return format_html('<a href="{url}" class="fade-in-progress-url">See progress...</a>'.format(url=url))
+
+
+
+class ArchiveResultInline(admin.TabularInline):
+    name = 'Archive Results Log'
+    model = ArchiveResult
+    parent_model = Snapshot
+    # fk_name = 'snapshot'
+    extra = 0
+    sort_fields = ('end_ts', 'extractor', 'output', 'status', 'cmd_version')
+    readonly_fields = ('id', 'result_id', 'completed', 'command', 'version')
+    fields = ('start_ts', 'end_ts', *readonly_fields, 'extractor', 'cmd', 'cmd_version', 'pwd', 'created_by', 'status', 'output')
+    # exclude = ('id',)
+    ordering = ('end_ts',)
+    show_change_link = True
+    # # classes = ['collapse']
+    # # list_display_links = ['abid']
+
+    def get_parent_object_from_request(self, request):
+        resolved = resolve(request.path_info)
+        try:
+            return self.parent_model.objects.get(pk=resolved.kwargs['object_id'])
+        except (self.parent_model.DoesNotExist, ValidationError):
+            return self.parent_model.objects.get(pk=self.parent_model.id_from_abid(resolved.kwargs['object_id']))
+
+    @admin.display(
+        description='Completed',
+        ordering='end_ts',
+    )
+    def completed(self, obj):
+        return format_html('<p style="white-space: nowrap">{}</p>', obj.end_ts.strftime('%Y-%m-%d %H:%M:%S'))
+
+    def result_id(self, obj):
+        return format_html('<a href="{}"><code style="font-size: 10px">[{}]</code></a>', reverse('admin:core_archiveresult_change', args=(obj.id,)), obj.abid)
+    
+    def command(self, obj):
+        return format_html('<small><code>{}</code></small>', " ".join(obj.cmd or []))
+    
+    def version(self, obj):
+        return format_html('<small><code>{}</code></small>', obj.cmd_version or '-')
+    
+    def get_formset(self, request, obj=None, **kwargs):
+        formset = super().get_formset(request, obj, **kwargs)
+        snapshot = self.get_parent_object_from_request(request)
+
+        # import ipdb; ipdb.set_trace()
+        # formset.form.base_fields['id'].widget = formset.form.base_fields['id'].hidden_widget()
+        
+        # default values for new entries
+        formset.form.base_fields['status'].initial = 'succeeded'
+        formset.form.base_fields['start_ts'].initial = timezone.now()
+        formset.form.base_fields['end_ts'].initial = timezone.now()
+        formset.form.base_fields['cmd_version'].initial = '-'
+        formset.form.base_fields['pwd'].initial = str(snapshot.link_dir)
+        formset.form.base_fields['created_by'].initial = request.user
+        formset.form.base_fields['cmd'] = forms.JSONField(initial=['-'])
+        formset.form.base_fields['output'].initial = 'Manually recorded cmd output...'
+        
+        if obj is not None:
+            # hidden values for existing entries and new entries
+            formset.form.base_fields['start_ts'].widget = formset.form.base_fields['start_ts'].hidden_widget()
+            formset.form.base_fields['end_ts'].widget = formset.form.base_fields['end_ts'].hidden_widget()
+            formset.form.base_fields['cmd'].widget = formset.form.base_fields['cmd'].hidden_widget()
+            formset.form.base_fields['pwd'].widget = formset.form.base_fields['pwd'].hidden_widget()
+            formset.form.base_fields['created_by'].widget = formset.form.base_fields['created_by'].hidden_widget()
+            formset.form.base_fields['cmd_version'].widget = formset.form.base_fields['cmd_version'].hidden_widget()
+        return formset
+    
+    def get_readonly_fields(self, request, obj=None):
+        if obj is not None:
+            return self.readonly_fields
+        else:
+            return []
+
+
+
+class ArchiveResultAdmin(ABIDModelAdmin):
+    list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str')
+    sort_fields = ('start_ts', 'extractor', 'status')
+    readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'abid_info', 'output_summary')
+    search_fields = ('id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
+    fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', 'cmd', *readonly_fields)
+    autocomplete_fields = ['snapshot']
+
+    list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
+    ordering = ['-start_ts']
+    list_per_page = SERVER_CONFIG.SNAPSHOTS_PER_PAGE
+    
+    paginator = AccelleratedPaginator
+    save_on_top = True
+    
+    actions = ['delete_selected']
+    
+    class Meta:
+        verbose_name = 'Archive Result'
+        verbose_name_plural = 'Archive Results'
+
+    def change_view(self, request, object_id, form_url="", extra_context=None):
+        self.request = request
+        return super().change_view(request, object_id, form_url, extra_context)
+
+    @admin.display(
+        description='Snapshot Info'
+    )
+    def snapshot_info(self, result):
+        return format_html(
+            '<a href="/archive/{}/index.html"><b><code>[{}]</code></b> &nbsp; {} &nbsp; {}</a><br/>',
+            result.snapshot.timestamp,
+            result.snapshot.abid,
+            result.snapshot.bookmarked_at.strftime('%Y-%m-%d %H:%M'),
+            result.snapshot.url[:128],
+        )
+
+
+    @admin.display(
+        description='Snapshot Tags'
+    )
+    def tags_str(self, result):
+        return result.snapshot.tags_str()
+
+    def cmd_str(self, result):
+        return format_html(
+            '<pre>{}</pre>',
+            ' '.join(result.cmd) if isinstance(result.cmd, list) else str(result.cmd),
+        )
+    
+    def output_str(self, result):
+        return format_html(
+            '<a href="/archive/{}/{}" class="output-link">↗️</a><pre>{}</pre>',
+            result.snapshot.timestamp,
+            result.output if (result.status == 'succeeded') and result.extractor not in ('title', 'archive_org') else 'index.html',
+            result.output,
+        )
+
+    def output_summary(self, result):
+        snapshot_dir = Path(DATA_DIR) / str(result.pwd).split('data/', 1)[-1]
+        output_str = format_html(
+            '<pre style="display: inline-block">{}</pre><br/>',
+            result.output,
+        )
+        output_str += format_html('<a href="/archive/{}/index.html#all">See result files ...</a><br/><pre><code>', str(result.snapshot.timestamp))
+        path_from_output_str = (snapshot_dir / result.output)
+        output_str += format_html('<i style="padding: 1px">{}</i><b style="padding-right: 20px">/</b><i>{}</i><br/><hr/>', str(snapshot_dir), str(result.output))
+        if os.access(path_from_output_str, os.R_OK):
+            root_dir = str(path_from_output_str)
+        else:
+            root_dir = str(snapshot_dir)
+
+        # print(root_dir, str(list(os.walk(root_dir))))
+
+        for root, dirs, files in os.walk(root_dir):
+            depth = root.replace(root_dir, '').count(os.sep) + 1
+            if depth > 2:
+                continue
+            indent = ' ' * 4 * (depth)
+            output_str += format_html('<b style="padding: 1px">{}{}/</b><br/>', indent, os.path.basename(root))
+            indentation_str = ' ' * 4 * (depth + 1)
+            for filename in sorted(files):
+                is_hidden = filename.startswith('.')
+                output_str += format_html('<span style="opacity: {}.2">{}{}</span><br/>', int(not is_hidden), indentation_str, filename.strip())
+
+        return output_str + format_html('</code></pre>')
+
+
+
+
+@abx.hookimpl
+def register_admin(admin_site):
+    admin_site.register(ArchiveResult, ArchiveResultAdmin)
--- a/archivebox/core/admin_site.py
+++ b/archivebox/core/admin_site.py
@ -0,0 +1,42 @@
+__package__ = 'archivebox.core'
+
+from django.contrib import admin
+
+import abx.django.use
+
+class ArchiveBoxAdmin(admin.AdminSite):
+    site_header = 'ArchiveBox'
+    index_title = 'Admin Views'
+    site_title = 'Admin'
+    namespace = 'admin'
+
+
+archivebox_admin = ArchiveBoxAdmin()
+archivebox_admin.disable_action('delete_selected')
+# TODO: https://stackoverflow.com/questions/40760880/add-custom-button-to-django-admin-panel
+
+
+
+# patch admin with methods to add data views (implemented by admin_data_views package)
+# https://github.com/MrThearMan/django-admin-data-views
+# https://mrthearman.github.io/django-admin-data-views/setup/
+from admin_data_views.admin import get_app_list, admin_data_index_view, get_admin_data_urls, get_urls
+archivebox_admin.get_app_list = get_app_list.__get__(archivebox_admin, ArchiveBoxAdmin)
+archivebox_admin.admin_data_index_view = admin_data_index_view.__get__(archivebox_admin, ArchiveBoxAdmin)       # type: ignore
+archivebox_admin.get_admin_data_urls = get_admin_data_urls.__get__(archivebox_admin, ArchiveBoxAdmin)           # type: ignore
+archivebox_admin.get_urls = get_urls(archivebox_admin.get_urls).__get__(archivebox_admin, ArchiveBoxAdmin)
+############### Admin Data View sections are defined in settings.ADMIN_DATA_VIEWS #########
+
+
+def register_admin_site():
+    """Replace the default admin site with our custom ArchiveBox admin site."""
+    from django.contrib import admin
+    from django.contrib.admin import sites
+
+    admin.site = archivebox_admin
+    sites.site = archivebox_admin
+    
+    # register all plugins admin classes
+    abx.django.use.register_admin(archivebox_admin)
+    
+    return archivebox_admin
--- a/archivebox/core/admin_snapshots.py
+++ b/archivebox/core/admin_snapshots.py
@ -0,0 +1,357 @@
+
+__package__ = 'archivebox.core'
+
+import os
+from pathlib import Path
+
+from django.contrib import admin, messages
+from django.urls import path
+from django.utils.html import format_html, mark_safe
+from django.utils import timezone
+from django import forms
+from django.template import Template, RequestContext
+from django.contrib.admin.helpers import ActionForm
+from django.contrib.admin.widgets import FilteredSelectMultiple
+
+from archivebox.config import DATA_DIR, VERSION
+from archivebox.config.common import SERVER_CONFIG
+from archivebox.misc.util import htmldecode, urldecode
+from archivebox.misc.paginators import AccelleratedPaginator
+from archivebox.search.admin import SearchResultsAdminMixin
+
+from archivebox.logging_util import printable_filesize
+from archivebox.index.html import snapshot_icons
+from archivebox.extractors import archive_links
+from archivebox.main import remove
+
+from archivebox.abid_utils.admin import ABIDModelAdmin
+from archivebox.queues.tasks import bg_archive_links, bg_add
+
+from core.models import Tag
+from core.admin_tags import TagInline
+from core.admin_archiveresults import ArchiveResultInline, result_url
+
+
+GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': [], 'CAN_UPGRADE': False}
+
+
+
+class SnapshotActionForm(ActionForm):
+    tags = forms.ModelMultipleChoiceField(
+        label='Edit tags',
+        queryset=Tag.objects.all(),
+        required=False,
+        widget=FilteredSelectMultiple(
+            'core_tag__name',
+            False,
+        ),
+    )
+
+    # TODO: allow selecting actions for specific extractors? is this useful?
+    # extractor = forms.ChoiceField(
+    #     choices=ArchiveResult.EXTRACTOR_CHOICES,
+    #     required=False,
+    #     widget=forms.MultileChoiceField(attrs={'class': "form-control"})
+    # )
+
+
+class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
+    list_display = ('created_at', 'title_str', 'files', 'size', 'url_str')
+    sort_fields = ('title_str', 'url_str', 'created_at')
+    readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'abid_info', 'link_dir')
+    search_fields = ('id', 'url', 'abid', 'timestamp', 'title', 'tags__name')
+    list_filter = ('created_at', 'downloaded_at', 'archiveresult__status', 'created_by', 'tags__name')
+    fields = ('url', 'title', 'created_by', 'bookmarked_at', *readonly_fields)
+    ordering = ['-created_at']
+    actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
+    inlines = [TagInline, ArchiveResultInline]
+    list_per_page = min(max(5, SERVER_CONFIG.SNAPSHOTS_PER_PAGE), 5000)
+
+    action_form = SnapshotActionForm
+    paginator = AccelleratedPaginator
+
+    save_on_top = True
+    show_full_result_count = False
+
+    def changelist_view(self, request, extra_context=None):
+        self.request = request
+        extra_context = extra_context or {}
+        try:
+            return super().changelist_view(request, extra_context | GLOBAL_CONTEXT)
+        except Exception as e:
+            self.message_user(request, f'Error occurred while loading the page: {str(e)} {request.GET} {request.POST}')
+            return super().changelist_view(request, GLOBAL_CONTEXT)
+
+
+    def get_urls(self):
+        urls = super().get_urls()
+        custom_urls = [
+            path('grid/', self.admin_site.admin_view(self.grid_view), name='grid')
+        ]
+        return custom_urls + urls
+
+    # def get_queryset(self, request):
+    #     # tags_qs = SnapshotTag.objects.all().select_related('tag')
+    #     # prefetch = Prefetch('snapshottag_set', queryset=tags_qs)
+
+    #     self.request = request
+    #     return super().get_queryset(request).prefetch_related('archiveresult_set').distinct()  # .annotate(archiveresult_count=Count('archiveresult'))
+
+    @admin.action(
+        description="Imported Timestamp"
+    )
+    def imported_timestamp(self, obj):
+        context = RequestContext(self.request, {
+            'bookmarked_date': obj.bookmarked,
+            'timestamp': obj.timestamp,
+        })
+
+        html = Template("""{{bookmarked_date}} (<code>{{timestamp}}</code>)""")
+        return mark_safe(html.render(context))
+    
+        # pretty_time = obj.bookmarked.strftime('%Y-%m-%d %H:%M:%S')
+        # return f'{pretty_time} ({obj.timestamp})'
+
+    # TODO: figure out a different way to do this, you cant nest forms so this doenst work
+    # def action(self, obj):
+    #     # csrfmiddlewaretoken: Wa8UcQ4fD3FJibzxqHN3IYrrjLo4VguWynmbzzcPYoebfVUnDovon7GEMYFRgsh0
+    #     # action: update_snapshots
+    #     # select_across: 0
+    #     # _selected_action: 76d29b26-2a88-439e-877c-a7cca1b72bb3
+    #     return format_html(
+    #         '''
+    #             <form action="/admin/core/snapshot/" method="post" onsubmit="e => e.stopPropagation()">
+    #                 <input type="hidden" name="csrfmiddlewaretoken" value="{}">
+    #                 <input type="hidden" name="_selected_action" value="{}">
+    #                 <button name="update_snapshots">Check</button>
+    #                 <button name="update_titles">Pull title + favicon</button>
+    #                 <button name="update_snapshots">Update</button>
+    #                 <button name="overwrite_snapshots">Re-Archive (overwrite)</button>
+    #                 <button name="delete_snapshots">Permanently delete</button>
+    #             </form>
+    #         ''',
+    #         csrf.get_token(self.request),
+    #         obj.pk,
+    #     )
+
+    def admin_actions(self, obj):
+        return format_html(
+            # URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
+            '''
+            <a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/archive/{}">Summary page ➡️</a> &nbsp; &nbsp;
+            <a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/archive/{}/index.html#all">Result files 📑</a> &nbsp; &nbsp;
+            <a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/admin/core/snapshot/?id__exact={}">Admin actions ⚙️</a>
+            ''',
+            obj.timestamp,
+            obj.timestamp,
+            obj.pk,
+        )
+
+    def status_info(self, obj):
+        return format_html(
+            # URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
+            '''
+            Archived: {} ({} files {}) &nbsp; &nbsp;
+            Favicon: <img src="{}" style="height: 20px"/> &nbsp; &nbsp;
+            Status code: {} &nbsp; &nbsp;<br/>
+            Server: {} &nbsp; &nbsp;
+            Content type: {} &nbsp; &nbsp;
+            Extension: {} &nbsp; &nbsp;
+            ''',
+            '✅' if obj.is_archived else '❌',
+            obj.num_outputs,
+            self.size(obj) or '0kb',
+            f'/archive/{obj.timestamp}/favicon.ico',
+            obj.status_code or '-',
+            obj.headers and obj.headers.get('Server') or '-',
+            obj.headers and obj.headers.get('Content-Type') or '-',
+            obj.extension or '-',
+        )
+
+    @admin.display(
+        description='Title',
+        ordering='title',
+    )
+    def title_str(self, obj):
+        tags = ''.join(
+            format_html('<a href="/admin/core/snapshot/?tags__id__exact={}"><span class="tag">{}</span></a> ', tag.pk, tag.name)
+            for tag in obj.tags.all()
+            if str(tag.name).strip()
+        )
+        return format_html(
+            '<a href="/{}">'
+                '<img src="/{}/favicon.ico" class="favicon" onerror="this.remove()">'
+            '</a>'
+            '<a href="/{}/index.html">'
+                '<b class="status-{}">{}</b>'
+            '</a>',
+            obj.archive_path,
+            obj.archive_path,
+            obj.archive_path,
+            'fetched' if obj.latest_title or obj.title else 'pending',
+            urldecode(htmldecode(obj.latest_title or obj.title or ''))[:128] or 'Pending...'
+        ) + mark_safe(f' <span class="tags">{tags}</span>')
+
+    @admin.display(
+        description='Files Saved',
+        # ordering='archiveresult_count',
+    )
+    def files(self, obj):
+        # return '-'
+        return snapshot_icons(obj)
+
+
+    @admin.display(
+        # ordering='archiveresult_count'
+    )
+    def size(self, obj):
+        archive_size = os.access(Path(obj.link_dir) / 'index.html', os.F_OK) and obj.archive_size
+        if archive_size:
+            size_txt = printable_filesize(archive_size)
+            if archive_size > 52428800:
+                size_txt = mark_safe(f'<b>{size_txt}</b>')
+        else:
+            size_txt = mark_safe('<span style="opacity: 0.3">...</span>')
+        return format_html(
+            '<a href="/{}" title="View all files">{}</a>',
+            obj.archive_path,
+            size_txt,
+        )
+
+
+    @admin.display(
+        description='Original URL',
+        ordering='url',
+    )
+    def url_str(self, obj):
+        return format_html(
+            '<a href="{}"><code style="user-select: all;">{}</code></a>',
+            obj.url,
+            obj.url[:128],
+        )
+
+    def grid_view(self, request, extra_context=None):
+
+        # cl = self.get_changelist_instance(request)
+
+        # Save before monkey patching to restore for changelist list view
+        saved_change_list_template = self.change_list_template
+        saved_list_per_page = self.list_per_page
+        saved_list_max_show_all = self.list_max_show_all
+
+        # Monkey patch here plus core_tags.py
+        self.change_list_template = 'private_index_grid.html'
+        self.list_per_page = SERVER_CONFIG.SNAPSHOTS_PER_PAGE
+        self.list_max_show_all = self.list_per_page
+
+        # Call monkey patched view
+        rendered_response = self.changelist_view(request, extra_context=extra_context)
+
+        # Restore values
+        self.change_list_template = saved_change_list_template
+        self.list_per_page = saved_list_per_page
+        self.list_max_show_all = saved_list_max_show_all
+
+        return rendered_response
+
+    # for debugging, uncomment this to print all requests:
+    # def changelist_view(self, request, extra_context=None):
+    #     print('[*] Got request', request.method, request.POST)
+    #     return super().changelist_view(request, extra_context=None)
+
+    @admin.action(
+        description="ℹ️ Get Title"
+    )
+    def update_titles(self, request, queryset):
+        links = [snapshot.as_link() for snapshot in queryset]
+        if len(links) < 3:
+            # run syncronously if there are only 1 or 2 links
+            archive_links(links, overwrite=True, methods=('title','favicon'), out_dir=DATA_DIR)
+            messages.success(request, f"Title and favicon have been fetched and saved for {len(links)} URLs.")
+        else:
+            # otherwise run in a background worker
+            result = bg_archive_links((links,), kwargs={"overwrite": True, "methods": ["title", "favicon"], "out_dir": DATA_DIR})
+            messages.success(
+                request,
+                mark_safe(f"Title and favicon are updating in the background for {len(links)} URLs. {result_url(result)}"),
+            )
+
+    @admin.action(
+        description="⬇️ Get Missing"
+    )
+    def update_snapshots(self, request, queryset):
+        links = [snapshot.as_link() for snapshot in queryset]
+
+        result = bg_archive_links((links,), kwargs={"overwrite": False, "out_dir": DATA_DIR})
+
+        messages.success(
+            request,
+            mark_safe(f"Re-trying any previously failed methods for {len(links)} URLs in the background. {result_url(result)}"),
+        )
+
+
+    @admin.action(
+        description="🆕 Archive Again"
+    )
+    def resnapshot_snapshot(self, request, queryset):
+        for snapshot in queryset:
+            timestamp = timezone.now().isoformat('T', 'seconds')
+            new_url = snapshot.url.split('#')[0] + f'#{timestamp}'
+
+            result = bg_add({'urls': new_url, 'tag': snapshot.tags_str()})
+
+        messages.success(
+            request,
+            mark_safe(f"Creating new fresh snapshots for {queryset.count()} URLs in the background. {result_url(result)}"),
+        )
+
+    @admin.action(
+        description="🔄 Redo"
+    )
+    def overwrite_snapshots(self, request, queryset):
+        links = [snapshot.as_link() for snapshot in queryset]
+
+        result = bg_archive_links((links,), kwargs={"overwrite": True, "out_dir": DATA_DIR})
+
+        messages.success(
+            request,
+            mark_safe(f"Clearing all previous results and re-downloading {len(links)} URLs in the background. {result_url(result)}"),
+        )
+
+    @admin.action(
+        description="☠️ Delete"
+    )
+    def delete_snapshots(self, request, queryset):
+        remove(snapshots=queryset, yes=True, delete=True, out_dir=DATA_DIR)
+        messages.success(
+            request,
+            mark_safe(f"Succesfully deleted {queryset.count()} Snapshots. Don't forget to scrub URLs from import logs (data/sources) and error logs (data/logs) if needed."),
+        )
+
+
+    @admin.action(
+        description="+"
+    )
+    def add_tags(self, request, queryset):
+        tags = request.POST.getlist('tags')
+        print('[+] Adding tags', tags, 'to Snapshots', queryset)
+        for obj in queryset:
+            obj.tags.add(*tags)
+        messages.success(
+            request,
+            f"Added {len(tags)} tags to {queryset.count()} Snapshots.",
+        )
+
+
+    @admin.action(
+        description="–"
+    )
+    def remove_tags(self, request, queryset):
+        tags = request.POST.getlist('tags')
+        print('[-] Removing tags', tags, 'to Snapshots', queryset)
+        for obj in queryset:
+            obj.tags.remove(*tags)
+        messages.success(
+            request,
+            f"Removed {len(tags)} tags from {queryset.count()} Snapshots.",
+        )
--- a/archivebox/core/admin_tags.py
+++ b/archivebox/core/admin_tags.py
@ -0,0 +1,165 @@
+__package__ = 'archivebox.core'
+
+from django.contrib import admin
+from django.utils.html import format_html, mark_safe
+
+import abx
+
+from abid_utils.admin import ABIDModelAdmin
+from archivebox.misc.paginators import AccelleratedPaginator
+
+from core.models import Tag
+
+
+class TagInline(admin.TabularInline):
+    model = Tag.snapshot_set.through       # type: ignore
+    # fk_name = 'snapshot'
+    fields = ('id', 'tag')
+    extra = 1
+    # min_num = 1
+    max_num = 1000
+    autocomplete_fields = (
+        'tag',
+    )
+    
+
+# class AutocompleteTags:
+#     model = Tag
+#     search_fields = ['name']
+#     name = 'name'
+#     # source_field = 'name'
+#     remote_field = Tag._meta.get_field('name')
+
+# class AutocompleteTagsAdminStub:
+#     name = 'admin'
+    
+    
+# class TaggedItemInline(admin.TabularInline):
+#     readonly_fields = ('object_link',)
+#     fields = ('id', 'tag', 'content_type', 'object_id', *readonly_fields)
+#     model = TaggedItem
+#     extra = 1
+#     show_change_link = True
+    
+#     @admin.display(description='object')
+#     def object_link(self, obj):
+#         obj = obj.content_type.get_object_for_this_type(pk=obj.object_id)
+#         return format_html('<a href="/admin/{}/{}/{}/change"><b>[{}]</b></a>', obj._meta.app_label, obj._meta.model_name, obj.pk, str(obj))
+
+    
+class TagAdmin(ABIDModelAdmin):
+    list_display = ('created_at', 'created_by', 'abid', 'name', 'num_snapshots', 'snapshots')
+    list_filter = ('created_at', 'created_by')
+    sort_fields = ('name', 'slug', 'abid', 'created_by', 'created_at')
+    readonly_fields = ('slug', 'abid', 'created_at', 'modified_at', 'abid_info', 'snapshots')
+    search_fields = ('abid', 'name', 'slug')
+    fields = ('name', 'created_by', *readonly_fields)
+    actions = ['delete_selected', 'merge_tags']
+    ordering = ['-created_at']
+    # inlines = [TaggedItemInline]
+
+    paginator = AccelleratedPaginator
+
+
+    def num_snapshots(self, tag):
+        return format_html(
+            '<a href="/admin/core/snapshot/?tags__id__exact={}">{} total</a>',
+            tag.id,
+            tag.snapshot_set.count(),
+        )
+
+    def snapshots(self, tag):
+        total_count = tag.snapshot_set.count()
+        return mark_safe('<br/>'.join(
+            format_html(
+                '<code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a></code> {}',
+                snap.pk,
+                snap.downloaded_at.strftime('%Y-%m-%d %H:%M') if snap.downloaded_at else 'pending...',
+                snap.url[:64],
+            )
+            for snap in tag.snapshot_set.order_by('-downloaded_at')[:10]
+        ) + (f'<br/><a href="/admin/core/snapshot/?tags__id__exact={tag.id}">{total_count} total snapshots...<a>'))
+
+    # def get_urls(self):
+    #     urls = super().get_urls()
+    #     custom_urls = [
+    #         path(
+    #             "merge-tags/",
+    #             self.admin_site.admin_view(self.merge_tags_view),
+    #             name="taggit_tag_merge_tags",
+    #         ),
+    #     ]
+    #     return custom_urls + urls
+
+    # @admin.action(description="Merge selected tags")
+    # def merge_tags(self, request, queryset):
+    #     selected = request.POST.getlist(admin.helpers.ACTION_CHECKBOX_NAME)
+    #     if not selected:
+    #         self.message_user(request, "Please select at least one tag.")
+    #         return redirect(request.get_full_path())
+
+    #     selected_tag_ids = ",".join(selected)
+    #     redirect_url = f"{request.get_full_path()}merge-tags/"
+
+    #     request.session["selected_tag_ids"] = selected_tag_ids
+
+    #     return redirect(redirect_url)
+
+    # def merge_tags_view(self, request):
+    #     selected_tag_ids = request.session.get("selected_tag_ids", "").split(",")
+    #     if request.method == "POST":
+    #         form = MergeTagsForm(request.POST)
+    #         if form.is_valid():
+    #             new_tag_name = form.cleaned_data["new_tag_name"]
+    #             new_tag, created = Tag.objects.get_or_create(name=new_tag_name)
+    #             with transaction.atomic():
+    #                 for tag_id in selected_tag_ids:
+    #                     tag = Tag.objects.get(id=tag_id)
+    #                     tagged_items = TaggedItem.objects.filter(tag=tag)
+    #                     for tagged_item in tagged_items:
+    #                         if TaggedItem.objects.filter(
+    #                             tag=new_tag,
+    #                             content_type=tagged_item.content_type,
+    #                             object_id=tagged_item.object_id,
+    #                         ).exists():
+    #                             # we have the new tag as well, so we can just
+    #                             # remove the tag association
+    #                             tagged_item.delete()
+    #                         else:
+    #                             # point this taggedItem to the new one
+    #                             tagged_item.tag = new_tag
+    #                             tagged_item.save()
+                        
+    #                     # delete the old tag
+    #                     if tag.id != new_tag.id:
+    #                         tag.delete()
+
+    #             self.message_user(request, "Tags have been merged", level="success")
+    #             # clear the selected_tag_ids from session after merge is complete
+    #             request.session.pop("selected_tag_ids", None)
+
+    #             return redirect("..")
+    #         else:
+    #             self.message_user(request, "Form is invalid.", level="error")
+
+    #     context = {
+    #         "form": MergeTagsForm(),
+    #         "selected_tag_ids": selected_tag_ids,
+    #     }
+    #     return render(request, "admin/taggit/merge_tags_form.html", context)
+
+
+# @admin.register(SnapshotTag, site=archivebox_admin)
+# class SnapshotTagAdmin(ABIDModelAdmin):
+#     list_display = ('id', 'snapshot', 'tag')
+#     sort_fields = ('id', 'snapshot', 'tag')
+#     search_fields = ('id', 'snapshot_id', 'tag_id')
+#     fields = ('snapshot', 'id')
+#     actions = ['delete_selected']
+#     ordering = ['-id']
+
+
+@abx.hookimpl
+def register_admin(admin_site):
+    admin_site.register(Tag, TagAdmin)
+
--- a/archivebox/core/admin_users.py
+++ b/archivebox/core/admin_users.py
@ -0,0 +1,91 @@
+__package__ = 'archivebox.core'
+
+from django.contrib import admin
+from django.contrib.auth.admin import UserAdmin
+from django.utils.html import format_html, mark_safe
+from django.contrib.auth import get_user_model
+
+import abx
+
+
+class CustomUserAdmin(UserAdmin):
+    sort_fields = ['id', 'email', 'username', 'is_superuser', 'last_login', 'date_joined']
+    list_display = ['username', 'id', 'email', 'is_superuser', 'last_login', 'date_joined']
+    readonly_fields = ('snapshot_set', 'archiveresult_set', 'tag_set', 'apitoken_set', 'outboundwebhook_set')
+    fieldsets = [*UserAdmin.fieldsets, ('Data', {'fields': readonly_fields})]
+
+    @admin.display(description='Snapshots')
+    def snapshot_set(self, obj):
+        total_count = obj.snapshot_set.count()
+        return mark_safe('<br/>'.join(
+            format_html(
+                '<code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> {}',
+                snap.pk,
+                snap.abid,
+                snap.downloaded_at.strftime('%Y-%m-%d %H:%M') if snap.downloaded_at else 'pending...',
+                snap.url[:64],
+            )
+            for snap in obj.snapshot_set.order_by('-modified_at')[:10]
+        ) + f'<br/><a href="/admin/core/snapshot/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
+
+    @admin.display(description='Archive Result Logs')
+    def archiveresult_set(self, obj):
+        total_count = obj.archiveresult_set.count()
+        return mark_safe('<br/>'.join(
+            format_html(
+                '<code><a href="/admin/core/archiveresult/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> <b>📄 {}</b> {}',
+                result.pk,
+                result.abid,
+                result.snapshot.downloaded_at.strftime('%Y-%m-%d %H:%M') if result.snapshot.downloaded_at else 'pending...',
+                result.extractor,
+                result.snapshot.url[:64],
+            )
+            for result in obj.archiveresult_set.order_by('-modified_at')[:10]
+        ) + f'<br/><a href="/admin/core/archiveresult/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
+
+    @admin.display(description='Tags')
+    def tag_set(self, obj):
+        total_count = obj.tag_set.count()
+        return mark_safe(', '.join(
+            format_html(
+                '<code><a href="/admin/core/tag/{}/change"><b>{}</b></a></code>',
+                tag.pk,
+                tag.name,
+            )
+            for tag in obj.tag_set.order_by('-modified_at')[:10]
+        ) + f'<br/><a href="/admin/core/tag/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
+
+    @admin.display(description='API Tokens')
+    def apitoken_set(self, obj):
+        total_count = obj.apitoken_set.count()
+        return mark_safe('<br/>'.join(
+            format_html(
+                '<code><a href="/admin/api/apitoken/{}/change"><b>[{}]</b></a></code> {} (expires {})',
+                apitoken.pk,
+                apitoken.abid,
+                apitoken.token_redacted[:64],
+                apitoken.expires,
+            )
+            for apitoken in obj.apitoken_set.order_by('-modified_at')[:10]
+        ) + f'<br/><a href="/admin/api/apitoken/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
+
+    @admin.display(description='API Outbound Webhooks')
+    def outboundwebhook_set(self, obj):
+        total_count = obj.outboundwebhook_set.count()
+        return mark_safe('<br/>'.join(
+            format_html(
+                '<code><a href="/admin/api/outboundwebhook/{}/change"><b>[{}]</b></a></code> {} -> {}',
+                outboundwebhook.pk,
+                outboundwebhook.abid,
+                outboundwebhook.referenced_model,
+                outboundwebhook.endpoint,
+            )
+            for outboundwebhook in obj.outboundwebhook_set.order_by('-modified_at')[:10]
+        ) + f'<br/><a href="/admin/api/outboundwebhook/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
+
+
+
+
+@abx.hookimpl
+def register_admin(admin_site):
+    admin_site.register(get_user_model(), CustomUserAdmin)
--- a/archivebox/core/apps.py
+++ b/archivebox/core/apps.py
@ -2,27 +2,22 @@ __package__ = 'archivebox.core'

 from django.apps import AppConfig

+import abx
+

 class CoreConfig(AppConfig):
    name = 'core'

    def ready(self):
-        # register our custom admin as the primary django admin
-        from django.contrib import admin
-        from django.contrib.admin import sites
-        from core.admin import archivebox_admin
-
-        admin.site = archivebox_admin
-        sites.site = archivebox_admin
-
-
-        # register signal handlers
-        from .auth import register_signals
-
-        register_signals()
+        """Register the archivebox.core.admin_site as the main django admin site"""
+        from core.admin_site import register_admin_site
+        register_admin_site()



-# from django.contrib.admin.apps import AdminConfig
-# class CoreAdminConfig(AdminConfig):
-#     default_site = "core.admin.get_admin_site"
+
+@abx.hookimpl
+def register_admin(admin_site):
+    """Register the core.models views (Snapshot, ArchiveResult, Tag, etc.) with the admin site"""
+    from core.admin import register_admin
+    register_admin(admin_site)
--- a/archivebox/core/auth.py
+++ b/archivebox/core/auth.py
@ -1,12 +0,0 @@
-__package__ = 'archivebox.core'
-
-
-from archivebox.plugins_auth.ldap.apps import LDAP_CONFIG
-
-def register_signals():
-
-    if LDAP_CONFIG.LDAP_ENABLED:
-        import django_auth_ldap.backend
-        from .auth_ldap import create_user
-
-        django_auth_ldap.backend.populate_user.connect(create_user)
--- a/archivebox/core/auth_ldap.py
+++ b/archivebox/core/auth_ldap.py
@ -1,8 +0,0 @@
-from archivebox.plugins_auth.ldap.apps import LDAP_CONFIG
-
-def create_user(sender, user=None, ldap_user=None, **kwargs):
-    if not user.id and LDAP_CONFIG.LDAP_CREATE_SUPERUSER:
-        user.is_superuser = True
-
-    user.is_staff = True
-    print(f'[!] WARNING: Creating new user {user} based on LDAP user {ldap_user} (is_staff={user.is_staff}, is_superuser={user.is_superuser})')
--- a/archivebox/core/migrations/0075_crawl.py
+++ b/archivebox/core/migrations/0075_crawl.py
@ -1,101 +0,0 @@
-# Generated by Django 5.1.1 on 2024-10-01 02:10
-
-import abid_utils.models
-import charidfield.fields
-import django.core.validators
-import django.db.models.deletion
-from django.conf import settings
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ("core", "0074_alter_snapshot_downloaded_at"),
-        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
-    ]
-
-    operations = [
-        migrations.CreateModel(
-            name="Crawl",
-            fields=[
-                (
-                    "id",
-                    models.UUIDField(
-                        default=None,
-                        editable=False,
-                        primary_key=True,
-                        serialize=False,
-                        unique=True,
-                        verbose_name="ID",
-                    ),
-                ),
-                (
-                    "abid",
-                    charidfield.fields.CharIDField(
-                        blank=True,
-                        db_index=True,
-                        default=None,
-                        help_text="ABID-format identifier for this entity (e.g. snp_01BJQMF54D093DXEAWZ6JYRPAQ)",
-                        max_length=30,
-                        null=True,
-                        prefix="crl_",
-                        unique=True,
-                    ),
-                ),
-                (
-                    "created_at",
-                    abid_utils.models.AutoDateTimeField(db_index=True, default=None),
-                ),
-                ("modified_at", models.DateTimeField(auto_now=True)),
-                ("urls", models.TextField()),
-                (
-                    "depth",
-                    models.PositiveSmallIntegerField(
-                        default=1,
-                        validators=[
-                            django.core.validators.MinValueValidator(0),
-                            django.core.validators.MaxValueValidator(2),
-                        ],
-                    ),
-                ),
-                (
-                    "parser",
-                    models.CharField(
-                        choices=[
-                            ("auto", "auto"),
-                            ("pocket_api", "Pocket API"),
-                            ("readwise_reader_api", "Readwise Reader API"),
-                            ("wallabag_atom", "Wallabag Atom"),
-                            ("pocket_html", "Pocket HTML"),
-                            ("pinboard_rss", "Pinboard RSS"),
-                            ("shaarli_rss", "Shaarli RSS"),
-                            ("medium_rss", "Medium RSS"),
-                            ("netscape_html", "Netscape HTML"),
-                            ("rss", "Generic RSS"),
-                            ("json", "Generic JSON"),
-                            ("jsonl", "Generic JSONL"),
-                            ("html", "Generic HTML"),
-                            ("txt", "Generic TXT"),
-                            ("url_list", "URL List"),
-                        ],
-                        default="auto",
-                        max_length=32,
-                    ),
-                ),
-                (
-                    "created_by",
-                    models.ForeignKey(
-                        default=None,
-                        on_delete=django.db.models.deletion.CASCADE,
-                        related_name="crawl_set",
-                        to=settings.AUTH_USER_MODEL,
-                    ),
-                ),
-            ],
-            options={
-                "verbose_name": "Crawl",
-                "verbose_name_plural": "Crawls",
-            },
-        ),
-    ]
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@ -15,7 +15,6 @@ from django.utils.text import slugify
 from django.core.cache import cache
 from django.urls import reverse, reverse_lazy
 from django.db.models import Case, When, Value, IntegerField
-from django.core.validators import MaxValueValidator, MinValueValidator 
 from django.contrib import admin
 from django.conf import settings

@ -23,6 +22,7 @@ from archivebox.config import CONSTANTS

 from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField
 from queues.tasks import bg_archive_snapshot
+# from crawls.models import Crawl
 # from machine.models import Machine, NetworkInterface

 from archivebox.misc.system import get_dir_size
@ -30,7 +30,6 @@ from archivebox.misc.util import parse_date, base_url
 from ..index.schema import Link
 from ..index.html import snapshot_icons
 from ..extractors import ARCHIVE_METHODS_INDEXING_PRECEDENCE, EXTRACTORS
-from ..parsers import PARSERS


 # class BaseModel(models.Model):
@ -45,9 +44,11 @@ from ..parsers import PARSERS



+
+
 class Tag(ABIDModel):
    """
-    Based on django-taggit model + ABID base.
+    Loosely based on django-taggit model + ABID base.
    """
    abid_prefix = 'tag_'
    abid_ts_src = 'self.created_at'
@ -68,7 +69,7 @@ class Tag(ABIDModel):
    # slug is autoset on save from name, never set it manually

    snapshot_set: models.Manager['Snapshot']
-    crawl_set: models.Manager['Crawl']
+    # crawl_set: models.Manager['Crawl']

    class Meta(TypedModelMeta):
        verbose_name = "Tag"
@ -83,8 +84,12 @@ class Tag(ABIDModel):
            slug += "_%d" % i
        return slug
    
+    def clean(self, *args, **kwargs):
+        self.slug = self.slug or self.slugify(self.name)
+        super().clean(*args, **kwargs)
+
    def save(self, *args, **kwargs):
-        if self._state.adding and not self.slug:
+        if self._state.adding:
            self.slug = self.slugify(self.name)

            # if name is different but slug conficts with another tags slug, append a counter
@ -114,6 +119,8 @@ class Tag(ABIDModel):
    def api_docs_url(self) -> str:
        return '/api/v1/docs#/Core%20Models/api_v1_core_get_tag'

+
+
 class SnapshotTag(models.Model):
    id = models.AutoField(primary_key=True)

@ -136,69 +143,6 @@ class SnapshotTag(models.Model):
 #         unique_together = [('crawl', 'tag')]


-class Crawl(ABIDModel):
-    abid_prefix = 'crl_'
-    abid_ts_src = 'self.created_at'
-    abid_uri_src = 'self.urls'
-    abid_subtype_src = 'self.crawler'
-    abid_rand_src = 'self.id'
-    abid_drift_allowed = True
-
-    # CRAWLER_CHOICES = (
-    #     ('breadth_first', 'Breadth-First'),
-    #     ('depth_first', 'Depth-First'),
-    # )
-    PARSER_CHOICES = (
-        ('auto', 'auto'),
-        *((parser_key, value[0]) for parser_key, value in PARSERS.items()),
-    )
-
-    id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
-    abid = ABIDField(prefix=abid_prefix)
-
-    created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False, related_name='crawl_set')
-    created_at = AutoDateTimeField(default=None, null=False, db_index=True)
-    modified_at = models.DateTimeField(auto_now=True)
-
-    urls = models.TextField(blank=False, null=False)
-    depth = models.PositiveSmallIntegerField(default=1, validators=[MinValueValidator(0), MaxValueValidator(2)])
-    parser = models.CharField(choices=PARSER_CHOICES, default='auto', max_length=32)
-    
-    # crawler = models.CharField(choices=CRAWLER_CHOICES, default='breadth_first', max_length=32)
-    # tags = models.ManyToManyField(Tag, blank=True, related_name='crawl_set', through='CrawlTag')
-    # schedule = models.JSONField()
-    # config = models.JSONField()
-    
-
-    class Meta(TypedModelMeta):
-        verbose_name = 'Crawl'
-        verbose_name_plural = 'Crawls'
-
-    def __str__(self):
-        return self.parser
-
-    @cached_property
-    def crawl_dir(self):
-        return Path()
-
-    @property
-    def api_url(self) -> str:
-        # /api/v1/core/crawl/{uulid}
-        return reverse_lazy('api-1:get_crawl', args=[self.abid])  # + f'?api_key={get_or_create_api_token(request.user)}'
-
-    @property
-    def api_docs_url(self) -> str:
-        return '/api/v1/docs#/Core%20Models/api_v1_core_get_crawl'
-
-    # def get_absolute_url(self):
-    #     return f'/crawls/{self.abid}'
-    
-    def crawl(self):
-        # write self.urls to sources/crawl__<user>__YYYYMMDDHHMMSS.txt
-        # run parse_links(sources/crawl__<user>__YYYYMMDDHHMMSS.txt, parser=self.parser) and for each resulting link:
-        #   create a Snapshot
-        #   enqueue task bg_archive_snapshot(snapshot)
-        pass



@ -227,6 +171,8 @@ class Snapshot(ABIDModel):
    bookmarked_at = AutoDateTimeField(default=None, null=False, editable=True, db_index=True)
    downloaded_at = models.DateTimeField(default=None, null=True, editable=False, db_index=True, blank=True)

+    # crawl = models.ForeignKey(Crawl, on_delete=models.CASCADE, default=None, null=True, blank=True, related_name='snapshot_set')
+
    url = models.URLField(unique=True, db_index=True)
    timestamp = models.CharField(max_length=32, unique=True, db_index=True, editable=False)
    tags = models.ManyToManyField(Tag, blank=True, through=SnapshotTag, related_name='snapshot_set', through_fields=('snapshot', 'tag'))
@ -561,9 +507,10 @@ class ArchiveResult(ABIDModel):
        # return f'[{self.abid}] 📅 {self.start_ts.strftime("%Y-%m-%d %H:%M")} 📄 {self.extractor} {self.snapshot.url}'
        return self.extractor

-    @cached_property
-    def machine(self):
-        return self.iface.machine if self.iface else None
+    # TODO: finish connecting machine.models
+    # @cached_property
+    # def machine(self):
+    #     return self.iface.machine if self.iface else None

    @cached_property
    def snapshot_dir(self):
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@ -10,7 +10,7 @@ from django.utils.crypto import get_random_string

 import abx
 import abx.archivebox
-import abx.archivebox.use
+import abx.archivebox.reads
 import abx.django.use

 from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS
@ -19,8 +19,7 @@ from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG      # noqa
 IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3]
 IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ
 IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3]
-
-
+IS_GETTING_VERSION_OR_HELP = 'version' in sys.argv or 'help' in sys.argv or '--version' in sys.argv or '--help' in sys.argv

 ################################################################################
 ### ArchiveBox Plugin Settings
@ -41,7 +40,7 @@ BUILTIN_PLUGIN_DIRS = {
    'plugins_extractor':       PACKAGE_DIR / 'plugins_extractor',
 }
 USER_PLUGIN_DIRS = {
-    'user_plugins':            DATA_DIR / 'user_plugins',
+    # 'user_plugins':            DATA_DIR / 'user_plugins',
 }

 # Discover ArchiveBox plugins
@ -52,19 +51,18 @@ ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS}

 # Load ArchiveBox plugins
 PLUGIN_MANAGER = abx.pm
-PLUGINS = abx.archivebox.load_archivebox_plugins(PLUGIN_MANAGER, ALL_PLUGINS)
-HOOKS = abx.archivebox.use.get_HOOKS(PLUGINS)
+abx.archivebox.load_archivebox_plugins(PLUGIN_MANAGER, ALL_PLUGINS)
+PLUGINS = abx.archivebox.reads.get_PLUGINS()

 # Load ArchiveBox config from plugins
-CONFIGS = abx.archivebox.use.get_CONFIGS()
-FLAT_CONFIG = abx.archivebox.use.get_FLAT_CONFIG()
-BINPROVIDERS = abx.archivebox.use.get_BINPROVIDERS()
-BINARIES = abx.archivebox.use.get_BINARIES()
-EXTRACTORS = abx.archivebox.use.get_EXTRACTORS()
-REPLAYERS = abx.archivebox.use.get_REPLAYERS()
-ADMINDATAVIEWS = abx.archivebox.use.get_ADMINDATAVIEWS()
-QUEUES = abx.archivebox.use.get_QUEUES()
-SEARCHBACKENDS = abx.archivebox.use.get_SEARCHBACKENDS()
+CONFIGS = abx.archivebox.reads.get_CONFIGS()
+CONFIG = FLAT_CONFIG = abx.archivebox.reads.get_FLAT_CONFIG()
+BINPROVIDERS = abx.archivebox.reads.get_BINPROVIDERS()
+BINARIES = abx.archivebox.reads.get_BINARIES()
+EXTRACTORS = abx.archivebox.reads.get_EXTRACTORS()
+SEARCHBACKENDS = abx.archivebox.reads.get_SEARCHBACKENDS()
+# REPLAYERS = abx.archivebox.reads.get_REPLAYERS()
+# ADMINDATAVIEWS = abx.archivebox.reads.get_ADMINDATAVIEWS()


 ################################################################################
@ -101,10 +99,13 @@ INSTALLED_APPS = [
    'django_object_actions',     # provides easy Django Admin action buttons on change views       https://github.com/crccheck/django-object-actions

    # Our ArchiveBox-provided apps
-    #'config',                   # ArchiveBox config settings (loaded as a plugin, don't need to add it here)
+    # 'abid_utils',                # handles ABID ID creation, handling, and models
+    'config',                    # ArchiveBox config settings (loaded as a plugin, don't need to add it here) 
    'machine',                   # handles collecting and storing information about the host machine, network interfaces, installed binaries, etc.
    'queues',                    # handles starting and managing background workers and processes
-    'abid_utils',                # handles ABID ID creation, handling, and models
+    'seeds',                     # handles Seed model and URL source management
+    'crawls',                    # handles Crawl and CrawlSchedule models and management
+    'personas',                  # handles Persona and session management
    'core',                      # core django model with Snapshot, ArchiveResult, etc.
    'api',                       # Django-Ninja-based Rest API interfaces, config, APIToken model, etc.

@ -262,7 +263,8 @@ MIGRATION_MODULES = {'signal_webhooks': None}
 DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'


-HUEY = {
+if not IS_GETTING_VERSION_OR_HELP:             # dont create queue.sqlite3 file if we're just running to get --version or --help
+    HUEY = {
        "huey_class": "huey.SqliteHuey",
        "filename": CONSTANTS.QUEUE_DATABASE_FILENAME,
        "name": "system_tasks",
@ -281,18 +283,18 @@ HUEY = {
            "check_worker_health": True,  # Enable worker health checks.
            "health_check_interval": 1,  # Check worker health every second.
        },
-}
+    }

-# https://huey.readthedocs.io/en/latest/contrib.html#setting-things-up
-# https://github.com/gaiacoop/django-huey
-DJANGO_HUEY = {
+    # https://huey.readthedocs.io/en/latest/contrib.html#setting-things-up
+    # https://github.com/gaiacoop/django-huey
+    DJANGO_HUEY = {
        "default": "system_tasks",
        "queues": {
            HUEY["name"]: HUEY.copy(),
            # more registered here at plugin import-time by BaseQueue.register()
            **abx.django.use.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=CONSTANTS.QUEUE_DATABASE_FILENAME),
        },
-}
+    }

 class HueyDBRouter:
    """
@ -410,7 +412,7 @@ SHELL_PLUS_PRINT_SQL = False
 IPYTHON_ARGUMENTS = ['--no-confirm-exit', '--no-banner']
 IPYTHON_KERNEL_DISPLAY_NAME = 'ArchiveBox Django Shell'
 if IS_SHELL:
-    os.environ['PYTHONSTARTUP'] = str(PACKAGE_DIR / 'core' / 'shell_welcome_message.py')
+    os.environ['PYTHONSTARTUP'] = str(PACKAGE_DIR / 'misc' / 'shell_welcome_message.py')


 ################################################################################
@ -610,6 +612,6 @@ if DEBUG_REQUESTS_TRACKER:


 abx.django.use.register_checks()
-abx.archivebox.use.register_all_hooks(globals())
+# abx.archivebox.reads.register_all_hooks(globals())

 # import ipdb; ipdb.set_trace()
--- a/archivebox/core/urls.py
+++ b/archivebox/core/urls.py
@ -5,9 +5,10 @@ from django.views import static
 from django.conf import settings
 from django.views.generic.base import RedirectView

-from .admin import archivebox_admin
-from .views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
-from .serve_static import serve_static
+from archivebox.misc.serve_static import serve_static
+
+from core.admin_site import archivebox_admin
+from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView

 # GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306
 # from archivebox.config import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@ -24,16 +24,15 @@ from admin_data_views.utils import render_with_table_view, render_with_item_view

 from core.models import Snapshot
 from core.forms import AddLinkForm
-from core.admin import result_url

 from queues.tasks import bg_add

 from archivebox.config import CONSTANTS_CONFIG, DATA_DIR, VERSION
 from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG
 from archivebox.misc.util import base_url, htmlencode, ts_to_date_str
+from archivebox.misc.serve_static import serve_static_with_byterange_support

-from .serve_static import serve_static_with_byterange_support
-from ..plugins_extractor.archivedotorg.apps import ARCHIVEDOTORG_CONFIG
+from ..plugins_extractor.archivedotorg.config import ARCHIVEDOTORG_CONFIG
 from ..logging_util import printable_filesize
 from ..search import query_search_index

@ -452,6 +451,8 @@ class AddView(UserPassesTestMixin, FormView):
        }

    def form_valid(self, form):
+        from core.admin_archiveresults import result_url
+        
        url = form.cleaned_data["url"]
        print(f'[+] Adding URL: {url}')
        parser = form.cleaned_data["parser"]
@ -502,7 +503,7 @@ def find_config_section(key: str) -> str:
    if key in CONSTANTS_CONFIG:
        return 'CONSTANT'
    matching_sections = [
-        section.id for section in settings.CONFIGS.values() if key in section.model_fields
+        section_id for section_id, section in settings.CONFIGS.items() if key in section.model_fields
    ]
    section = matching_sections[0] if matching_sections else 'DYNAMIC'
    return section
@ -559,9 +560,9 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
        # "Aliases": [],
    }

-    for section in reversed(list(settings.CONFIGS.values())):
+    for section_id, section in reversed(list(settings.CONFIGS.items())):
        for key, field in section.model_fields.items():
-            rows['Section'].append(section.id)   # section.replace('_', ' ').title().replace(' Config', '')
+            rows['Section'].append(section_id)   # section.replace('_', ' ').title().replace(' Config', '')
            rows['Key'].append(ItemLink(key, key=key))
            rows['Type'].append(format_html('<code>{}</code>', find_config_type(key)))
            rows['Value'].append(mark_safe(f'<code>{getattr(section, key)}</code>') if key_is_safe(key) else '******** (redacted)')
@ -612,7 +613,7 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
                "fields": {
                    'Key': key,
                    'Type': find_config_type(key),
-                    'Value': settings.FLAT_CONFIG[key] if key_is_safe(key) else '********',
+                    'Value': settings.FLAT_CONFIG.get(key, settings.CONFIGS.get(key, None)) if key_is_safe(key) else '********',
                },
                "help_texts": {
                    'Key': mark_safe(f'''
--- a/archivebox/plugins_extractor/singlefile/migrations/init.py
+++ b/archivebox/plugins_extractor/singlefile/migrations/init.py
--- a/archivebox/crawls/admin.py
+++ b/archivebox/crawls/admin.py
@ -0,0 +1,28 @@
+__package__ = 'archivebox.crawls'
+
+import abx
+
+from abid_utils.admin import ABIDModelAdmin
+
+from crawls.models import Crawl
+
+
+
+class CrawlAdmin(ABIDModelAdmin):
+    list_display = ('abid', 'created_at', 'created_by', 'depth', 'parser', 'urls')
+    sort_fields = ('abid', 'created_at', 'created_by', 'depth', 'parser', 'urls')
+    search_fields = ('abid', 'created_by__username', 'depth', 'parser', 'urls')
+    
+    readonly_fields = ('created_at', 'modified_at', 'abid_info')
+    fields = ('urls', 'depth', 'parser', 'created_by', *readonly_fields)
+
+    list_filter = ('depth', 'parser', 'created_by')
+    ordering = ['-created_at']
+    list_per_page = 100
+    actions = ["delete_selected"]
+
+
+
+@abx.hookimpl
+def register_admin(admin_site):
+    admin_site.register(Crawl, CrawlAdmin)
--- a/archivebox/crawls/apps.py
+++ b/archivebox/crawls/apps.py
@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class CrawlsConfig(AppConfig):
+    default_auto_field = "django.db.models.BigAutoField"
+    name = "crawls"
--- a/archivebox/plugins_search/sqlite/init.py
+++ b/archivebox/plugins_search/sqlite/init.py
--- a/archivebox/crawls/models.py
+++ b/archivebox/crawls/models.py
@ -0,0 +1,164 @@
+__package__ = 'archivebox.crawls'
+
+from django_stubs_ext.db.models import TypedModelMeta
+
+from django.db import models
+from django.db.models import Q
+from django.core.validators import MaxValueValidator, MinValueValidator 
+from django.conf import settings
+from django.utils import timezone
+from django.urls import reverse_lazy
+
+from seeds.models import Seed
+
+from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats
+
+
+class CrawlSchedule(ABIDModel, ModelWithHealthStats):
+    """
+    A record for a job that should run repeatedly on a given schedule.
+    
+    It pulls from a given Seed and creates a new Crawl for each scheduled run.
+    The new Crawl will inherit all the properties of the crawl_template Crawl.
+    """
+    abid_prefix = 'sch_'
+    abid_ts_src = 'self.created_at'
+    abid_uri_src = 'self.created_by_id'
+    abid_subtype_src = 'self.schedule'
+    abid_rand_src = 'self.id'
+    
+    schedule = models.CharField(max_length=64, blank=False, null=False)
+    
+    is_enabled = models.BooleanField(default=True)
+    created_at = AutoDateTimeField(default=None, null=False, db_index=True)
+    modified_at = models.DateTimeField(auto_now=True)
+    created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False)
+    
+    crawl_set: models.Manager['Crawl']
+    
+    @property
+    def template(self):
+        """The base crawl that each new scheduled job should copy as a template"""
+        return self.crawl_set.first()
+
+
+class Crawl(ABIDModel, ModelWithHealthStats):
+    """
+    A single session of URLs to archive starting from a given Seed and expanding outwards. An "archiving session" so to speak.
+
+    A new Crawl should be created for each loading from a Seed (because it can produce a different set of URLs every time its loaded).
+    E.g. every scheduled import from an RSS feed should create a new Crawl, and more loadings from the same seed each create a new Crawl
+    
+    Every "Add" task triggered from the Web UI, CLI, or Scheduled Crawl should create a new Crawl with the seed set to a 
+    file URI e.g. file:///sources/<date>_{ui,cli}_add.txt containing the user's input.
+    """
+    abid_prefix = 'crl_'
+    abid_ts_src = 'self.created_at'
+    abid_uri_src = 'self.seed.uri'
+    abid_subtype_src = 'self.persona_id'
+    abid_rand_src = 'self.id'
+    abid_drift_allowed = True
+
+    id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
+    abid = ABIDField(prefix=abid_prefix)
+
+    created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False, related_name='crawl_set')
+    created_at = AutoDateTimeField(default=None, null=False, db_index=True)
+    modified_at = models.DateTimeField(auto_now=True)
+
+    seed = models.ForeignKey(Seed, on_delete=models.PROTECT, related_name='crawl_set', null=False, blank=False)
+    max_depth = models.PositiveSmallIntegerField(default=0, validators=[MinValueValidator(0), MaxValueValidator(4)])
+    tags_str = models.CharField(max_length=1024, blank=True, null=False, default='')
+    persona = models.CharField(max_length=32, blank=True, null=False, default='auto')
+    config = models.JSONField(default=dict)
+    
+    schedule = models.ForeignKey(CrawlSchedule, on_delete=models.SET_NULL, null=True, blank=True, editable=True)
+    
+    # crawler = models.CharField(choices=CRAWLER_CHOICES, default='breadth_first', max_length=32)
+    # tags = models.ManyToManyField(Tag, blank=True, related_name='crawl_set', through='CrawlTag')
+    # schedule = models.JSONField()
+    # config = models.JSONField()
+    
+    # snapshot_set: models.Manager['Snapshot']
+    
+
+    class Meta(TypedModelMeta):
+        verbose_name = 'Crawl'
+        verbose_name_plural = 'Crawls'
+        
+    @property
+    def template(self):
+        """If this crawl was created under a ScheduledCrawl, returns the original template Crawl it was based off"""
+        if not self.schedule:
+            return None
+        return self.schedule.template
+
+    @property
+    def api_url(self) -> str:
+        # /api/v1/core/crawl/{uulid}
+        # TODO: implement get_crawl
+        return reverse_lazy('api-1:get_crawl', args=[self.abid])  # + f'?api_key={get_or_create_api_token(request.user)}'
+
+    @property
+    def api_docs_url(self) -> str:
+        return '/api/v1/docs#/Core%20Models/api_v1_core_get_crawl'
+
+
+class Outlink(models.Model):
+    """A record of a link found on a page, pointing to another page."""
+    id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
+    
+    src = models.URLField()   # parent page where the outlink/href was found       e.g. https://example.com/downloads
+    dst = models.URLField()   # remote location the child outlink/href points to   e.g. https://example.com/downloads/some_file.pdf
+    
+    crawl = models.ForeignKey(Crawl, on_delete=models.CASCADE, null=False, blank=False, related_name='outlink_set')
+    via = models.ForeignKey('core.ArchiveResult', on_delete=models.SET_NULL, null=True, blank=True, related_name='outlink_set')
+
+    class Meta:
+        unique_together = (('src', 'dst', 'via'),)
+
+
+
+
+        
+# @abx.hookimpl.on_archiveresult_created
+# def exec_archiveresult_extractor_effects(archiveresult):
+#     config = get_scope_config(...)
+    
+#     # abx.archivebox.writes.update_archiveresult_started(archiveresult, start_ts=timezone.now())
+#     # abx.archivebox.events.on_archiveresult_updated(archiveresult)
+    
+#     # check if it should be skipped
+#     if not abx.archivebox.reads.get_archiveresult_should_run(archiveresult, config):
+#         abx.archivebox.writes.update_archiveresult_skipped(archiveresult, status='skipped')
+#         abx.archivebox.events.on_archiveresult_skipped(archiveresult, config)
+#         return
+    
+#     # run the extractor method and save the output back to the archiveresult
+#     try:
+#         output = abx.archivebox.effects.exec_archiveresult_extractor(archiveresult, config)
+#         abx.archivebox.writes.update_archiveresult_succeeded(archiveresult, output=output, error=None, end_ts=timezone.now())
+#     except Exception as e:
+#         abx.archivebox.writes.update_archiveresult_failed(archiveresult, error=e, end_ts=timezone.now())
+    
+#     # bump the modified time on the archiveresult and Snapshot
+#     abx.archivebox.events.on_archiveresult_updated(archiveresult)
+#     abx.archivebox.events.on_snapshot_updated(archiveresult.snapshot)
+    
+
+# @abx.hookimpl.reads.get_outlink_parents
+# def get_outlink_parents(url, crawl_pk=None, config=None):
+#     scope = Q(dst=url)
+#     if crawl_pk:
+#         scope = scope | Q(via__snapshot__crawl_id=crawl_pk)
+    
+#     parent = list(Outlink.objects.filter(scope))
+#     if not parent:
+#         # base case: we reached the top of the chain, no more parents left
+#         return []
+    
+#     # recursive case: there is another parent above us, get its parents
+#     yield parent[0]
+#     yield from get_outlink_parents(parent[0].src, crawl_pk=crawl_pk, config=config)
+
+
--- a/archivebox/crawls/tests.py
+++ b/archivebox/crawls/tests.py
@ -0,0 +1,3 @@
+from django.test import TestCase
+
+# Create your tests here.
--- a/archivebox/crawls/views.py
+++ b/archivebox/crawls/views.py
@ -0,0 +1,3 @@
+from django.shortcuts import render
+
+# Create your views here.
--- a/archivebox/extractors/archive_org.py
+++ b/archivebox/extractors/archive_org.py
@ -8,8 +8,9 @@ from collections import defaultdict
 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
 from archivebox.misc.system import run, chmod_file
 from archivebox.misc.util import enforce_types, is_static_file, dedupe
-from archivebox.plugins_extractor.archivedotorg.apps import ARCHIVEDOTORG_CONFIG
-from archivebox.plugins_extractor.curl.apps import CURL_CONFIG, CURL_BINARY
+from archivebox.plugins_extractor.archivedotorg.config import ARCHIVEDOTORG_CONFIG
+from archivebox.plugins_extractor.curl.config import CURL_CONFIG
+from archivebox.plugins_extractor.curl.binaries import CURL_BINARY

 from ..logging_util import TimedProgress

--- a/archivebox/extractors/dom.py
+++ b/archivebox/extractors/dom.py
@ -11,6 +11,9 @@ from archivebox.misc.util import (
 )
 from ..logging_util import TimedProgress

+from plugins_extractor.chrome.config import CHROME_CONFIG
+from plugins_extractor.chrome.binaries import CHROME_BINARY
+

 def get_output_path():
    return 'output.html'
@ -18,7 +21,6 @@ def get_output_path():

@enforce_types
 def should_save_dom(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
-    from plugins_extractor.chrome.apps import CHROME_CONFIG
    
    if is_static_file(link.url):
        return False
@ -34,8 +36,6 @@ def should_save_dom(link: Link, out_dir: Optional[Path]=None, overwrite: Optiona
 def save_dom(link: Link, out_dir: Optional[Path]=None, timeout: int=60) -> ArchiveResult:
    """print HTML of site to file using chrome --dump-html"""

-    from plugins_extractor.chrome.apps import CHROME_CONFIG, CHROME_BINARY
-
    CHROME_BIN = CHROME_BINARY.load()
    assert CHROME_BIN.abspath and CHROME_BIN.version

--- a/archivebox/extractors/favicon.py
+++ b/archivebox/extractors/favicon.py
@ -4,8 +4,9 @@ from pathlib import Path

 from archivebox.misc.system import chmod_file, run
 from archivebox.misc.util import enforce_types, domain, dedupe
-from archivebox.plugins_extractor.favicon.apps import FAVICON_CONFIG
-from archivebox.plugins_extractor.curl.apps import CURL_CONFIG, CURL_BINARY
+from archivebox.plugins_extractor.favicon.config import FAVICON_CONFIG
+from archivebox.plugins_extractor.curl.config import CURL_CONFIG
+from archivebox.plugins_extractor.curl.binaries import CURL_BINARY
 from ..index.schema import Link, ArchiveResult, ArchiveOutput
 from ..logging_util import TimedProgress

--- a/archivebox/extractors/git.py
+++ b/archivebox/extractors/git.py
@ -13,10 +13,12 @@ from archivebox.misc.util import (
    without_query,
    without_fragment,
 )
-from archivebox.plugins_extractor.git.apps import GIT_CONFIG, GIT_BINARY
 from ..logging_util import TimedProgress
 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError

+from archivebox.plugins_extractor.git.config import GIT_CONFIG
+from archivebox.plugins_extractor.git.binaries import GIT_BINARY
+

 def get_output_path():
    return 'git/'
--- a/archivebox/extractors/headers.py
+++ b/archivebox/extractors/headers.py
@ -10,7 +10,8 @@ from archivebox.misc.util import (
    get_headers,
    dedupe,
 )
-from archivebox.plugins_extractor.curl.apps import CURL_CONFIG, CURL_BINARY
+from archivebox.plugins_extractor.curl.config import CURL_CONFIG
+from archivebox.plugins_extractor.curl.binaries import CURL_BINARY
 from ..index.schema import Link, ArchiveResult, ArchiveOutput
 from ..logging_util import TimedProgress

--- a/archivebox/extractors/media.py
+++ b/archivebox/extractors/media.py
@ -3,11 +3,13 @@ __package__ = 'archivebox.extractors'
 from pathlib import Path
 from typing import Optional

-from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
 from archivebox.misc.system import run, chmod_file
 from archivebox.misc.util import enforce_types, is_static_file, dedupe
+from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
 from ..logging_util import TimedProgress

+from plugins_extractor.ytdlp.config import YTDLP_CONFIG
+from plugins_extractor.ytdlp.binaries import YTDLP_BINARY

 def get_output_path():
    return 'media/'
@ -25,7 +27,6 @@ def get_embed_path(archiveresult=None):

@enforce_types
 def should_save_media(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
-    from plugins_extractor.ytdlp.apps import YTDLP_CONFIG
    
    if is_static_file(link.url):
        return False
@ -40,10 +41,6 @@ def should_save_media(link: Link, out_dir: Optional[Path]=None, overwrite: Optio
 def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=0) -> ArchiveResult:
    """Download playlists or individual video, audio, and subtitles using youtube-dl or yt-dlp"""

-
-    # from plugins_extractor.chrome.apps import CHROME_CONFIG
-    from plugins_extractor.ytdlp.apps import YTDLP_BINARY, YTDLP_CONFIG
-
    YTDLP_BIN = YTDLP_BINARY.load()
    assert YTDLP_BIN.abspath and YTDLP_BIN.version

--- a/archivebox/extractors/mercury.py
+++ b/archivebox/extractors/mercury.py
@ -12,7 +12,8 @@ from archivebox.misc.util import (
    enforce_types,
    is_static_file,
 )
-from archivebox.plugins_extractor.mercury.apps import MERCURY_CONFIG, MERCURY_BINARY
+from archivebox.plugins_extractor.mercury.config import MERCURY_CONFIG
+from archivebox.plugins_extractor.mercury.binaries import MERCURY_BINARY

 from ..logging_util import TimedProgress

--- a/archivebox/extractors/pdf.py
+++ b/archivebox/extractors/pdf.py
@ -3,14 +3,17 @@ __package__ = 'archivebox.extractors'
 from pathlib import Path
 from typing import Optional

-from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
 from archivebox.misc.system import run, chmod_file
 from archivebox.misc.util import (
    enforce_types,
    is_static_file,
 )
+from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
 from ..logging_util import TimedProgress

+from plugins_extractor.chrome.config import CHROME_CONFIG
+from plugins_extractor.chrome.binaries import CHROME_BINARY
+

 def get_output_path():
    return 'output.pdf'
@ -18,7 +21,6 @@ def get_output_path():

@enforce_types
 def should_save_pdf(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
-    from plugins_extractor.chrome.apps import CHROME_CONFIG
    
    if is_static_file(link.url):
        return False
@ -34,8 +36,6 @@ def should_save_pdf(link: Link, out_dir: Optional[Path]=None, overwrite: Optiona
 def save_pdf(link: Link, out_dir: Optional[Path]=None, timeout: int=60) -> ArchiveResult:
    """print PDF of site to file using chrome --headless"""

-    from plugins_extractor.chrome.apps import CHROME_CONFIG, CHROME_BINARY
-
    CHROME_BIN = CHROME_BINARY.load()
    assert CHROME_BIN.abspath and CHROME_BIN.version

--- a/archivebox/extractors/readability.py
+++ b/archivebox/extractors/readability.py
@ -6,12 +6,16 @@ from tempfile import NamedTemporaryFile
 from typing import Optional
 import json

-from ..index.schema import Link, ArchiveResult, ArchiveError
 from archivebox.misc.system import run, atomic_write
 from archivebox.misc.util import enforce_types, is_static_file
+from ..index.schema import Link, ArchiveResult, ArchiveError
 from ..logging_util import TimedProgress
 from .title import get_html

+from plugins_extractor.readability.config import READABILITY_CONFIG
+from plugins_extractor.readability.binaries import READABILITY_BINARY
+
+
 def get_output_path():
    return 'readability/'

@ -21,7 +25,6 @@ def get_embed_path(archiveresult=None):

@enforce_types
 def should_save_readability(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
-    from plugins_extractor.readability.apps import READABILITY_CONFIG
    
    if is_static_file(link.url):
        return False
@ -37,8 +40,6 @@ def should_save_readability(link: Link, out_dir: Optional[str]=None, overwrite:
 def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=0) -> ArchiveResult:
    """download reader friendly version using @mozilla/readability"""
    
-    from plugins_extractor.readability.apps import READABILITY_CONFIG, READABILITY_BINARY
-    
    READABILITY_BIN = READABILITY_BINARY.load()
    assert READABILITY_BIN.abspath and READABILITY_BIN.version

--- a/archivebox/extractors/screenshot.py
+++ b/archivebox/extractors/screenshot.py
@ -3,11 +3,14 @@ __package__ = 'archivebox.extractors'
 from pathlib import Path
 from typing import Optional

-from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
 from archivebox.misc.system import run, chmod_file
 from archivebox.misc.util import enforce_types, is_static_file
+from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
 from ..logging_util import TimedProgress

+from plugins_extractor.chrome.config import CHROME_CONFIG
+from plugins_extractor.chrome.binaries import CHROME_BINARY
+

 def get_output_path():
    return 'screenshot.png'
@ -15,7 +18,6 @@ def get_output_path():

@enforce_types
 def should_save_screenshot(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
-    from plugins_extractor.chrome.apps import CHROME_CONFIG
    
    if is_static_file(link.url):
        return False
@ -30,7 +32,6 @@ def should_save_screenshot(link: Link, out_dir: Optional[Path]=None, overwrite:
 def save_screenshot(link: Link, out_dir: Optional[Path]=None, timeout: int=60) -> ArchiveResult:
    """take screenshot of site using chrome --headless"""
    
-    from plugins_extractor.chrome.apps import CHROME_CONFIG, CHROME_BINARY
    CHROME_BIN = CHROME_BINARY.load()
    assert CHROME_BIN.abspath and CHROME_BIN.version

--- a/archivebox/extractors/singlefile.py
+++ b/archivebox/extractors/singlefile.py
@ -10,6 +10,11 @@ from archivebox.misc.system import run, chmod_file
 from archivebox.misc.util import enforce_types, is_static_file, dedupe
 from ..logging_util import TimedProgress

+from plugins_extractor.chrome.config import CHROME_CONFIG
+from plugins_extractor.chrome.binaries import CHROME_BINARY
+from plugins_extractor.singlefile.config import SINGLEFILE_CONFIG
+from plugins_extractor.singlefile.binaries import SINGLEFILE_BINARY
+

 def get_output_path():
    return 'singlefile.html'
@ -17,7 +22,6 @@ def get_output_path():

@enforce_types
 def should_save_singlefile(link: Link, out_dir: Optional[Path]=None, overwrite: Optional[bool]=False) -> bool:
-    from plugins_extractor.singlefile.apps import SINGLEFILE_CONFIG
    
    if is_static_file(link.url):
        return False
@ -26,16 +30,13 @@ def should_save_singlefile(link: Link, out_dir: Optional[Path]=None, overwrite:
    if not overwrite and (out_dir / get_output_path()).exists():
        return False

-    return SINGLEFILE_CONFIG.SAVE_SINGLEFILE
+    return CHROME_CONFIG.USE_CHROME and SINGLEFILE_CONFIG.SAVE_SINGLEFILE


@enforce_types
 def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=60) -> ArchiveResult:
    """download full site using single-file"""

-    from plugins_extractor.chrome.apps import CHROME_CONFIG, CHROME_BINARY
-    from plugins_extractor.singlefile.apps import SINGLEFILE_CONFIG, SINGLEFILE_BINARY
-
    CHROME_BIN = CHROME_BINARY.load()
    assert CHROME_BIN.abspath and CHROME_BIN.version
    SINGLEFILE_BIN = SINGLEFILE_BINARY.load()
--- a/archivebox/extractors/title.py
+++ b/archivebox/extractors/title.py
@ -11,7 +11,9 @@ from archivebox.misc.util import (
    htmldecode,
    dedupe,
 )
-from archivebox.plugins_extractor.curl.apps import CURL_CONFIG, CURL_BINARY
+from archivebox.plugins_extractor.curl.config import CURL_CONFIG
+from archivebox.plugins_extractor.curl.binaries import CURL_BINARY
+
 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
 from ..logging_util import TimedProgress

--- a/archivebox/extractors/wget.py
+++ b/archivebox/extractors/wget.py
@ -17,8 +17,8 @@ from archivebox.misc.util import (
    urldecode,
    dedupe,
 )
-from archivebox.plugins_extractor.wget.apps import WGET_BINARY, WGET_CONFIG
-
+from archivebox.plugins_extractor.wget.config import WGET_CONFIG
+from archivebox.plugins_extractor.wget.binaries import WGET_BINARY
 from ..logging_util import TimedProgress
 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError

--- a/archivebox/index/html.py
+++ b/archivebox/index/html.py
@ -19,7 +19,7 @@ from archivebox.misc.util import (
 from archivebox.config import CONSTANTS, DATA_DIR, VERSION
 from archivebox.config.common import SERVER_CONFIG
 from archivebox.config.version import get_COMMIT_HASH
-from archivebox.plugins_extractor.archivedotorg.apps import ARCHIVEDOTORG_CONFIG
+from archivebox.plugins_extractor.archivedotorg.config import ARCHIVEDOTORG_CONFIG

 from .schema import Link
 from ..logging_util import printable_filesize
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@ -19,7 +19,7 @@ from django.utils.functional import cached_property

 from archivebox.config import ARCHIVE_DIR, CONSTANTS

-from plugins_extractor.favicon.apps import FAVICON_CONFIG
+from plugins_extractor.favicon.config import FAVICON_CONFIG

 from archivebox.misc.system import get_dir_size
 from archivebox.misc.util import ts_to_date_str, parse_date
--- a/archivebox/index/sql.py
+++ b/archivebox/index/sql.py
@ -160,4 +160,4 @@ def apply_migrations(out_dir: Path=DATA_DIR) -> List[str]:
@enforce_types
 def get_admins(out_dir: Path=DATA_DIR) -> List[str]:
    from django.contrib.auth.models import User
-    return User.objects.filter(is_superuser=True)
+    return User.objects.filter(is_superuser=True).exclude(username='system')
--- a/archivebox/logging_util.py
+++ b/archivebox/logging_util.py
@ -510,7 +510,7 @@ def log_removal_finished(all_links: int, to_remove: int):
 ### Helpers

@enforce_types
-def pretty_path(path: Union[Path, str], pwd: Union[Path, str]=DATA_DIR) -> str:
+def pretty_path(path: Union[Path, str], pwd: Union[Path, str]=DATA_DIR, color: bool=True) -> str:
    """convert paths like .../ArchiveBox/archivebox/../output/abc into output/abc"""
    pwd = str(Path(pwd))  # .resolve()
    path = str(path)
@ -520,7 +520,10 @@ def pretty_path(path: Union[Path, str], pwd: Union[Path, str]=DATA_DIR) -> str:

    # replace long absolute paths with ./ relative ones to save on terminal output width
    if path.startswith(pwd) and (pwd != '/') and path != pwd:
+        if color:
            path = path.replace(pwd, '[light_slate_blue].[/light_slate_blue]', 1)
+        else:
+            path = path.replace(pwd, '.', 1)
    
    # quote paths containing spaces
    if ' ' in path:
--- a/archivebox/machine/admin.py
+++ b/archivebox/machine/admin.py
@ -0,0 +1,94 @@
+__package__ = 'archivebox.machine'
+
+import abx
+
+from django.contrib import admin
+from django.utils.html import format_html
+
+from abid_utils.admin import ABIDModelAdmin
+
+from machine.models import Machine, NetworkInterface, InstalledBinary
+
+
+
+class MachineAdmin(ABIDModelAdmin):
+    list_display = ('abid', 'created_at', 'hostname', 'ips', 'os_platform', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'os_arch', 'os_family', 'os_release', 'hw_uuid', 'health')
+    sort_fields = ('abid', 'created_at', 'hostname', 'ips', 'os_platform', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'os_arch', 'os_family', 'os_release', 'hw_uuid')
+    # search_fields = ('id', 'abid', 'guid', 'hostname', 'hw_manufacturer', 'hw_product', 'hw_uuid', 'os_arch', 'os_family', 'os_platform', 'os_kernel', 'os_release')
+    
+    readonly_fields = ('guid', 'created_at', 'modified_at', 'abid_info', 'ips')
+    fields = (*readonly_fields, 'hostname', 'hw_in_docker', 'hw_in_vm', 'hw_manufacturer', 'hw_product', 'hw_uuid', 'os_arch', 'os_family', 'os_platform', 'os_kernel', 'os_release', 'stats', 'num_uses_succeeded', 'num_uses_failed')
+
+    list_filter = ('hw_in_docker', 'hw_in_vm', 'os_arch', 'os_family', 'os_platform')
+    ordering = ['-created_at']
+    list_per_page = 100
+    actions = ["delete_selected"]
+
+    @admin.display(
+        description='Public IP',
+        ordering='networkinterface__ip_public',
+    )
+    def ips(self, machine):
+        return format_html(
+            '<a href="/admin/machine/networkinterface/?q={}"><b><code>{}</code></b></a>',
+            machine.abid,
+            ', '.join(machine.networkinterface_set.values_list('ip_public', flat=True)),
+        )
+
+class NetworkInterfaceAdmin(ABIDModelAdmin):
+    list_display = ('abid', 'created_at', 'machine_info', 'ip_public', 'dns_server', 'isp', 'country', 'region', 'city', 'iface', 'ip_local', 'mac_address', 'health')
+    sort_fields = ('abid', 'created_at', 'machine_info', 'ip_public', 'dns_server', 'isp', 'country', 'region', 'city', 'iface', 'ip_local', 'mac_address')
+    search_fields = ('abid', 'machine__abid', 'iface', 'ip_public', 'ip_local', 'mac_address', 'dns_server', 'hostname', 'isp', 'city', 'region', 'country')
+    
+    readonly_fields = ('machine', 'created_at', 'modified_at', 'abid_info', 'mac_address', 'ip_public', 'ip_local', 'dns_server')
+    fields = (*readonly_fields, 'iface', 'hostname', 'isp', 'city', 'region', 'country', 'num_uses_succeeded', 'num_uses_failed')
+
+    list_filter = ('isp', 'country', 'region')
+    ordering = ['-created_at']
+    list_per_page = 100
+    actions = ["delete_selected"]
+
+    @admin.display(
+        description='Machine',
+        ordering='machine__abid',
+    )
+    def machine_info(self, iface):
+        return format_html(
+            '<a href="/admin/machine/machine/{}/change"><b><code>[{}]</code></b> &nbsp; {}</a>',
+            iface.machine.id,
+            iface.machine.abid,
+            iface.machine.hostname,
+        )
+
+class InstalledBinaryAdmin(ABIDModelAdmin):
+    list_display = ('abid', 'created_at', 'machine_info', 'name', 'binprovider', 'version', 'abspath', 'sha256', 'health')
+    sort_fields = ('abid', 'created_at', 'machine_info', 'name', 'binprovider', 'version', 'abspath', 'sha256')
+    search_fields = ('abid', 'machine__abid', 'name', 'binprovider', 'version', 'abspath', 'sha256')
+    
+    readonly_fields = ('created_at', 'modified_at', 'abid_info')
+    fields = ('machine', 'name', 'binprovider', 'abspath', 'version', 'sha256', *readonly_fields, 'num_uses_succeeded', 'num_uses_failed')
+
+    list_filter = ('name', 'binprovider', 'machine_id')
+    ordering = ['-created_at']
+    list_per_page = 100
+    actions = ["delete_selected"]
+
+    @admin.display(
+        description='Machine',
+        ordering='machine__abid',
+    )
+    def machine_info(self, installed_binary):
+        return format_html(
+            '<a href="/admin/machine/machine/{}/change"><b><code>[{}]</code></b> &nbsp; {}</a>',
+            installed_binary.machine.id,
+            installed_binary.machine.abid,
+            installed_binary.machine.hostname,
+        )
+
+
+
+@abx.hookimpl
+def register_admin(admin_site):
+    admin_site.register(Machine, MachineAdmin)
+    admin_site.register(NetworkInterface, NetworkInterfaceAdmin)
+    admin_site.register(InstalledBinary, InstalledBinaryAdmin)
--- a/archivebox/machine/apps.py
+++ b/archivebox/machine/apps.py
@ -2,9 +2,17 @@ __package__ = 'archivebox.machine'

 from django.apps import AppConfig

+import abx
+

 class MachineConfig(AppConfig):
    default_auto_field = 'django.db.models.BigAutoField'
    
    name = 'machine'
    verbose_name = 'Machine Info'
+
+
+@abx.hookimpl
+def register_admin(admin_site):
+    from machine.admin import register_admin
+    register_admin(admin_site)
--- a/archivebox/machine/models.py
+++ b/archivebox/machine/models.py
@ -8,66 +8,41 @@ from django.db import models
 from django.utils import timezone
 from django.utils.functional import cached_property

+import abx.archivebox.reads

-import abx.archivebox.use
 from abx.archivebox.base_binary import BaseBinary, BaseBinProvider
-from archivebox.abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField
+from archivebox.abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats

 from .detect import get_host_guid, get_os_info, get_vm_info, get_host_network, get_host_stats

-CURRENT_MACHINE = None                              # global cache for the current machine
-CURRENT_INTERFACE = None                            # global cache for the current network interface
-CURRENT_BINARIES = {}                               # global cache for the currently installed binaries
+_CURRENT_MACHINE = None                              # global cache for the current machine
+_CURRENT_INTERFACE = None                            # global cache for the current network interface
+_CURRENT_BINARIES = {}                               # global cache for the currently installed binaries
+
+
 MACHINE_RECHECK_INTERVAL = 7 * 24 * 60 * 60         # 1 week (how often should we check for OS/hardware changes?)
 NETWORK_INTERFACE_RECHECK_INTERVAL = 1 * 60 * 60    # 1 hour (how often should we check for public IP/private IP/DNS changes?)
 INSTALLED_BINARY_RECHECK_INTERVAL = 1 * 30 * 60     # 30min  (how often should we check for changes to locally installed binaries?)


-class ModelWithHealthStats(models.Model):
-    num_uses_failed = models.PositiveIntegerField(default=0)
-    num_uses_succeeded = models.PositiveIntegerField(default=0)
-    
-    class Meta:
-        abstract = True
-    
-    def record_health_failure(self) -> None:
-        self.num_uses_failed += 1
-        self.save()
-
-    def record_health_success(self) -> None:
-        self.num_uses_succeeded += 1
-        self.save()
-        
-    def reset_health(self) -> None:
-        # move all the failures to successes when resetting so we dont lose track of the total count
-        self.num_uses_succeeded = self.num_uses_failed + self.num_uses_succeeded
-        self.num_uses_failed = 0
-        self.save()
-        
-    @property
-    def health(self) -> int:
-        total_uses = max((self.num_uses_failed + self.num_uses_succeeded, 1))
-        success_pct = (self.num_uses_succeeded / total_uses) * 100
-        return round(success_pct)
-

 class MachineManager(models.Manager):
    def current(self) -> 'Machine':
        """Get the current machine that ArchiveBox is running on."""
        
-        global CURRENT_MACHINE
-        if CURRENT_MACHINE:
-            expires_at = CURRENT_MACHINE.modified_at + timedelta(seconds=MACHINE_RECHECK_INTERVAL)
+        global _CURRENT_MACHINE
+        if _CURRENT_MACHINE:
+            expires_at = _CURRENT_MACHINE.modified_at + timedelta(seconds=MACHINE_RECHECK_INTERVAL)
            if timezone.now() < expires_at:
                # assume current machine cant change *while archivebox is actively running on it*
                # it's not strictly impossible to swap hardware while code is running,
                # but its rare and unusual so we check only once per week
                # (e.g. VMWare can live-migrate a VM to a new host while it's running)
-                return CURRENT_MACHINE
+                return _CURRENT_MACHINE
            else:
-                CURRENT_MACHINE = None
+                _CURRENT_MACHINE = None
        
-        CURRENT_MACHINE, _created = self.update_or_create(
+        _CURRENT_MACHINE, _created = self.update_or_create(
            guid=get_host_guid(),
            defaults={
                'hostname': socket.gethostname(),
@ -76,11 +51,14 @@ class MachineManager(models.Manager):
                'stats': get_host_stats(),
            },
        )        
-        CURRENT_MACHINE.save()  # populate ABID
+        _CURRENT_MACHINE.save()  # populate ABID
+        
+        return _CURRENT_MACHINE

-        return CURRENT_MACHINE

 class Machine(ABIDModel, ModelWithHealthStats):
+    """Audit log entry for a physical machine that was used to do archiving."""
+    
    abid_prefix = 'mxn_'
    abid_ts_src = 'self.created_at'
    abid_uri_src = 'self.guid'
@ -113,6 +91,7 @@ class Machine(ABIDModel, ModelWithHealthStats):
    
    # STATS COUNTERS
    stats = models.JSONField(default=dict, null=False)                    # e.g. {"cpu_load": [1.25, 2.4, 1.4], "mem_swap_used_pct": 56, ...}
+    
    # num_uses_failed = models.PositiveIntegerField(default=0)                  # from ModelWithHealthStats
    # num_uses_succeeded = models.PositiveIntegerField(default=0)
    
@ -127,18 +106,18 @@ class NetworkInterfaceManager(models.Manager):
    def current(self) -> 'NetworkInterface':
        """Get the current network interface for the current machine."""
        
-        global CURRENT_INTERFACE
-        if CURRENT_INTERFACE:
+        global _CURRENT_INTERFACE
+        if _CURRENT_INTERFACE:
            # assume the current network interface (public IP, DNS servers, etc.) wont change more than once per hour
-            expires_at = CURRENT_INTERFACE.modified_at + timedelta(seconds=NETWORK_INTERFACE_RECHECK_INTERVAL)
+            expires_at = _CURRENT_INTERFACE.modified_at + timedelta(seconds=NETWORK_INTERFACE_RECHECK_INTERVAL)
            if timezone.now() < expires_at:
-                return CURRENT_INTERFACE
+                return _CURRENT_INTERFACE
            else:
-                CURRENT_INTERFACE = None
+                _CURRENT_INTERFACE = None
        
        machine = Machine.objects.current()
        net_info = get_host_network()
-        CURRENT_INTERFACE, _created = self.update_or_create(
+        _CURRENT_INTERFACE, _created = self.update_or_create(
            machine=machine,
            ip_public=net_info.pop('ip_public'),
            ip_local=net_info.pop('ip_local'),
@ -146,14 +125,16 @@ class NetworkInterfaceManager(models.Manager):
            dns_server=net_info.pop('dns_server'),
            defaults=net_info,
        )
-        CURRENT_INTERFACE.save()  # populate ABID
+        _CURRENT_INTERFACE.save()  # populate ABID

-        return CURRENT_INTERFACE
+        return _CURRENT_INTERFACE
    



 class NetworkInterface(ABIDModel, ModelWithHealthStats):
+    """Audit log entry for a physical network interface / internet connection that was used to do archiving."""
+    
    abid_prefix = 'ixf_'
    abid_ts_src = 'self.machine.created_at'
    abid_uri_src = 'self.machine.guid'
@ -183,7 +164,7 @@ class NetworkInterface(ABIDModel, ModelWithHealthStats):
    region = models.CharField(max_length=63, default=None, null=False)                        # e.g. California
    country = models.CharField(max_length=63, default=None, null=False)                       # e.g. United States

-    # STATS COUNTERS (from ModelWithHealthStats)
+    # STATS COUNTERS (inherited from ModelWithHealthStats)
    # num_uses_failed = models.PositiveIntegerField(default=0)
    # num_uses_succeeded = models.PositiveIntegerField(default=0)

@ -202,8 +183,8 @@ class InstalledBinaryManager(models.Manager):
    def get_from_db_or_cache(self, binary: BaseBinary) -> 'InstalledBinary':
        """Get or create an InstalledBinary record for a Binary on the local machine"""
        
-        global CURRENT_BINARIES
-        cached_binary = CURRENT_BINARIES.get(binary.id)
+        global _CURRENT_BINARIES
+        cached_binary = _CURRENT_BINARIES.get(binary.name)
        if cached_binary:
            expires_at = cached_binary.modified_at + timedelta(seconds=INSTALLED_BINARY_RECHECK_INTERVAL)
            if timezone.now() < expires_at:
@ -218,7 +199,7 @@ class InstalledBinaryManager(models.Manager):
                        or binary.sha256 != cached_binary.sha256
                    )
                    if is_different_from_cache:
-                        CURRENT_BINARIES.pop(binary.id)
+                        _CURRENT_BINARIES.pop(binary.name)
                    else:
                        return cached_binary
                else:
@ -229,7 +210,7 @@ class InstalledBinaryManager(models.Manager):
                    return cached_binary
            else:
                # cached binary is too old, reload it from scratch
-                CURRENT_BINARIES.pop(binary.id)
+                _CURRENT_BINARIES.pop(binary.name)
        
        if not binary.abspath or not binary.version or not binary.sha256:
            # if binary was not yet loaded from filesystem, do it now
@ -239,7 +220,7 @@ class InstalledBinaryManager(models.Manager):

        assert binary.loaded_binprovider and binary.loaded_abspath and binary.loaded_version and binary.loaded_sha256, f'Failed to load binary {binary.name} abspath, version, and sha256'
        
-        CURRENT_BINARIES[binary.id], _created = self.update_or_create(
+        _CURRENT_BINARIES[binary.name], _created = self.update_or_create(
            machine=Machine.objects.current(),
            name=binary.name,
            binprovider=binary.loaded_binprovider.name,
@ -247,7 +228,7 @@ class InstalledBinaryManager(models.Manager):
            abspath=str(binary.loaded_abspath),
            sha256=str(binary.loaded_sha256),
        )
-        cached_binary = CURRENT_BINARIES[binary.id]
+        cached_binary = _CURRENT_BINARIES[binary.name]
        cached_binary.save()   # populate ABID
        
        # if we get this far make sure DB record matches in-memroy cache
@ -282,11 +263,11 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
    version = models.CharField(max_length=32, default=None, null=False, blank=True)
    sha256 = models.CharField(max_length=64, default=None, null=False, blank=True)
    
-    # MUTABLE PROPERTIES
+    # MUTABLE PROPERTIES (TODO)
    # is_pinned = models.BooleanField(default=False)    # i.e. should this binary superceede other binaries with the same name on the host?
    # is_valid = models.BooleanField(default=True)      # i.e. is this binary still available on the host?
    
-    # STATS COUNTERS (from ModelWithHealthStats)
+    # STATS COUNTERS (inherited from ModelWithHealthStats)
    # num_uses_failed = models.PositiveIntegerField(default=0)
    # num_uses_succeeded = models.PositiveIntegerField(default=0)
    
@ -310,7 +291,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
        if not hasattr(self, 'machine'):
            self.machine = Machine.objects.current()
        if not self.binprovider:
-            all_known_binproviders = list(abx.archivebox.use.get_BINPROVIDERS().values())
+            all_known_binproviders = list(abx.archivebox.reads.get_BINPROVIDERS().values())
            binary = BaseBinary(name=self.name, binproviders=all_known_binproviders).load(fresh=True)
            self.binprovider = binary.loaded_binprovider.name if binary.loaded_binprovider else None
        if not self.abspath:
@ -324,7 +305,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):

    @cached_property
    def BINARY(self) -> BaseBinary:
-        for binary in abx.archivebox.use.get_BINARIES().values():
+        for binary in abx.archivebox.reads.get_BINARIES().values():
            if binary.name == self.name:
                return binary
        raise Exception(f'Orphaned InstalledBinary {self.name} {self.binprovider} was found in DB, could not find any plugin that defines it')
@ -332,7 +313,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):

    @cached_property
    def BINPROVIDER(self) -> BaseBinProvider:
-        for binprovider in abx.archivebox.use.get_BINPROVIDERS().values():
+        for binprovider in abx.archivebox.reads.get_BINPROVIDERS().values():
            if binprovider.name == self.binprovider:
                return binprovider
        raise Exception(f'Orphaned InstalledBinary(name={self.name}) was found in DB, could not find any plugin that defines BinProvider(name={self.binprovider})')
--- a/archivebox/main.py
+++ b/archivebox/main.py
@ -189,14 +189,16 @@ def version(quiet: bool=False,
    if quiet or '--version' in sys.argv:
        return
    
+    from rich.panel import Panel
    from rich.console import Console
    console = Console()
    prnt = console.print
    
-    from plugins_auth.ldap.apps import LDAP_CONFIG
+    from plugins_auth.ldap.config import LDAP_CONFIG
    from django.conf import settings
    from archivebox.config.version import get_COMMIT_HASH, get_BUILD_TIME
    from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, RUNNING_AS_UID, RUNNING_AS_GID
+    from archivebox.config.paths import get_data_locations, get_code_locations

    from abx.archivebox.base_binary import BaseBinary, apt, brew, env

@ -221,7 +223,7 @@ def version(quiet: bool=False,
        f'PLATFORM={platform.platform()}',
        f'PYTHON={sys.implementation.name.title()}' + (' (venv)' if CONSTANTS.IS_INSIDE_VENV else ''),
    )
-    OUTPUT_IS_REMOTE_FS = CONSTANTS.DATA_LOCATIONS.DATA_DIR.is_mount or CONSTANTS.DATA_LOCATIONS.ARCHIVE_DIR.is_mount
+    OUTPUT_IS_REMOTE_FS = get_data_locations().DATA_DIR.is_mount or get_data_locations().ARCHIVE_DIR.is_mount
    DATA_DIR_STAT = CONSTANTS.DATA_DIR.stat()
    prnt(
        f'EUID={os.geteuid()}:{os.getegid()} UID={RUNNING_AS_UID}:{RUNNING_AS_GID} PUID={ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}',
@ -241,6 +243,21 @@ def version(quiet: bool=False,
    )
    prnt()
    
+    if not (os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) and os.access(CONSTANTS.CONFIG_FILE, os.R_OK)):
+        PANEL_TEXT = '\n'.join((
+            # '',
+            # f'[yellow]CURRENT DIR =[/yellow] [red]{os.getcwd()}[/red]',
+            '',
+            '[violet]Hint:[/violet] [green]cd[/green] into a collection [blue]DATA_DIR[/blue] and run [green]archivebox version[/green] again...',
+            '      [grey53]OR[/grey53] run [green]archivebox init[/green] to create a new collection in the current dir.',
+            '',
+            '      [i][grey53](this is [red]REQUIRED[/red] if you are opening a Github Issue to get help)[/grey53][/i]',
+            '',
+        ))
+        prnt(Panel(PANEL_TEXT, expand=False, border_style='grey53', title='[red]:exclamation: No collection [blue]DATA_DIR[/blue] is currently active[/red]', subtitle='Full version info is only available when inside a collection [light_slate_blue]DATA DIR[/light_slate_blue]'))
+        prnt()
+        return
+
    prnt('[pale_green1][i] Binary Dependencies:[/pale_green1]')
    failures = []
    for name, binary in reversed(list(settings.BINARIES.items())):
@ -299,13 +316,13 @@ def version(quiet: bool=False,
        
        prnt()
        prnt('[deep_sky_blue3][i] Code locations:[/deep_sky_blue3]')
-        for name, path in CONSTANTS.CODE_LOCATIONS.items():
+        for name, path in get_code_locations().items():
            prnt(printable_folder_status(name, path), overflow='ignore', crop=False)

        prnt()
        if os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) or os.access(CONSTANTS.CONFIG_FILE, os.R_OK):
            prnt('[bright_yellow][i] Data locations:[/bright_yellow]')
-            for name, path in CONSTANTS.DATA_LOCATIONS.items():
+            for name, path in get_data_locations().items():
                prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
        
            from archivebox.misc.checks import check_data_dir_permissions
@ -395,7 +412,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
    print(f'    √ ./{CONSTANTS.DATABASE_FILE.relative_to(DATA_DIR)}')
    
    # from django.contrib.auth.models import User
-    # if SHELL_CONFIG.IS_TTY and not User.objects.filter(is_superuser=True).exists():
+    # if SHELL_CONFIG.IS_TTY and not User.objects.filter(is_superuser=True).exclude(username='system').exists():
    #     print('{green}[+] Creating admin user account...{reset}'.format(**SHELL_CONFIG.ANSI))
    #     call_command("createsuperuser", interactive=True)

@ -486,8 +503,12 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
        html_index.rename(f"{index_name}.html")
    
    CONSTANTS.PERSONAS_DIR.mkdir(parents=True, exist_ok=True)
-    CONSTANTS.TMP_DIR.mkdir(parents=True, exist_ok=True)
-    CONSTANTS.LIB_DIR.mkdir(parents=True, exist_ok=True)
+    CONSTANTS.DEFAULT_TMP_DIR.mkdir(parents=True, exist_ok=True)
+    CONSTANTS.DEFAULT_LIB_DIR.mkdir(parents=True, exist_ok=True)
+    
+    from archivebox.config.common import STORAGE_CONFIG
+    STORAGE_CONFIG.TMP_DIR.mkdir(parents=True, exist_ok=True)
+    STORAGE_CONFIG.LIB_DIR.mkdir(parents=True, exist_ok=True)
    
    if install:
        run_subcommand('install', pwd=out_dir)
@ -1115,14 +1136,14 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
    from django.contrib.auth import get_user_model
    User = get_user_model()

-    if not User.objects.filter(is_superuser=True).exists():
+    if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
        stderr('\n[+] Don\'t forget to create a new admin user for the Web UI...', color='green')
        stderr('    archivebox manage createsuperuser')
        # run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
    
    print('\n[green][√] Set up ArchiveBox and its dependencies successfully.[/green]\n', file=sys.stderr)
    
-    from plugins_pkg.pip.apps import ARCHIVEBOX_BINARY
+    from plugins_pkg.pip.binaries import ARCHIVEBOX_BINARY
    
    extra_args = []
    if binproviders:
@ -1253,7 +1274,7 @@ def schedule(add: bool=False,
    """Set ArchiveBox to regularly import URLs at specific times using cron"""
    
    check_data_folder()
-    from archivebox.plugins_pkg.pip.apps import ARCHIVEBOX_BINARY
+    from archivebox.plugins_pkg.pip.binaries import ARCHIVEBOX_BINARY
    from archivebox.config.permissions import USER

    Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
@ -1399,23 +1420,14 @@ def server(runserver_args: Optional[List[str]]=None,
    from django.core.management import call_command
    from django.contrib.auth.models import User
    
-    
-
-    print('[green][+] Starting ArchiveBox webserver...[/green]')
-    print('    > Logging errors to ./logs/errors.log')
-    if not User.objects.filter(is_superuser=True).exists():
-        print('[yellow][!] No admin users exist yet, you will not be able to edit links in the UI.[/yellow]')
+    if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
        print()
-        print('    [violet]Hint:[/violet] To create an admin user, run:')
-        print('        archivebox manage createsuperuser')
+        # print('[yellow][!] No admin accounts exist, you must create one to be able to log in to the Admin UI![/yellow]')
+        print('[violet]Hint:[/violet] To create an [bold]admin username & password[/bold] for the [deep_sky_blue3][underline][link=http://{host}:{port}/admin]Admin UI[/link][/underline][/deep_sky_blue3], run:')
+        print('      [green]archivebox manage createsuperuser[/green]')
        print()
    

-    if SHELL_CONFIG.DEBUG:
-        if not reload:
-            runserver_args.append('--noreload')  # '--insecure'
-        call_command("runserver", *runserver_args)
-    else:
    host = '127.0.0.1'
    port = '8000'
    
@ -1431,14 +1443,20 @@ def server(runserver_args: Optional[List[str]]=None,
    except IndexError:
        pass

+    print('[green][+] Starting ArchiveBox webserver...[/green]')
    print(f'    [blink][green]>[/green][/blink] Starting ArchiveBox webserver on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
+    print(f'    [green]>[/green] Log in to ArchiveBox Admin UI on [deep_sky_blue3][link=http://{host}:{port}/admin]http://{host}:{port}/admin[/link][/deep_sky_blue3]')
+    print('    > Writing ArchiveBox error log to ./logs/errors.log')

+    if SHELL_CONFIG.DEBUG:
+        if not reload:
+            runserver_args.append('--noreload')  # '--insecure'
+        call_command("runserver", *runserver_args)
+    else:
        from queues.supervisor_util import start_server_workers

        print()
-        
        start_server_workers(host=host, port=port, daemonize=False)
-
        print("\n[i][green][🟩] ArchiveBox server shut down gracefully.[/green][/i]")


--- a/archivebox/misc/checks.py
+++ b/archivebox/misc/checks.py
@ -5,16 +5,24 @@ import sys
 from pathlib import Path

 from rich import print
+from rich.panel import Panel

-# DO NOT ADD ANY TOP-LEVEL IMPORTS HERE
+# DO NOT ADD ANY TOP-LEVEL IMPORTS HERE to anything other than builtin python libraries
 # this file is imported by archivebox/__init__.py
 # and any imports here will be imported by EVERYTHING else
 # so this file should only be used for pure python checks
 # that don't need to import other parts of ArchiveBox

+# if a check needs to import other parts of ArchiveBox,
+# the imports should be done inside the check function
+# and you should make sure if you need to import any django stuff
+# that the check is called after django.setup() has been called
+

 def check_data_folder() -> None:
    from archivebox import DATA_DIR, ARCHIVE_DIR
+    from archivebox.config import CONSTANTS
+    from archivebox.config.paths import create_and_chown_dir, get_or_create_working_tmp_dir, get_or_create_working_lib_dir
    
    archive_dir_exists = os.access(ARCHIVE_DIR, os.R_OK) and ARCHIVE_DIR.is_dir()
    if not archive_dir_exists:
@ -30,13 +38,27 @@ def check_data_folder() -> None:
        raise SystemExit(2)
    
    
+    # Create data dir subdirs
+    create_and_chown_dir(CONSTANTS.SOURCES_DIR)
+    create_and_chown_dir(CONSTANTS.PERSONAS_DIR / 'Default')
+    create_and_chown_dir(CONSTANTS.LOGS_DIR)
+    # create_and_chown_dir(CONSTANTS.CACHE_DIR)
+    
+    # Create /tmp and /lib dirs if they don't exist
+    get_or_create_working_tmp_dir(autofix=True, quiet=False)
+    get_or_create_working_lib_dir(autofix=True, quiet=False)
+    
+    # Check data dir permissions, /tmp, and /lib permissions
+    check_data_dir_permissions()
+    
 def check_migrations():
-    from archivebox import DATA_DIR, CONSTANTS
+    from archivebox import DATA_DIR
    from ..index.sql import list_migrations

    pending_migrations = [name for status, name in list_migrations() if not status]
+    is_migrating = any(arg in sys.argv for arg in ['makemigrations', 'migrate', 'init'])

-    if pending_migrations:
+    if pending_migrations and not is_migrating:
        print('[red][X] This collection was created with an older version of ArchiveBox and must be upgraded first.[/red]')
        print(f'    {DATA_DIR}', file=sys.stderr)
        print(file=sys.stderr)
@ -44,13 +66,6 @@ def check_migrations():
        print('        archivebox init', file=sys.stderr)
        raise SystemExit(3)

-    CONSTANTS.SOURCES_DIR.mkdir(exist_ok=True)
-    CONSTANTS.LOGS_DIR.mkdir(exist_ok=True)
-    # CONSTANTS.CACHE_DIR.mkdir(exist_ok=True)
-    (CONSTANTS.LIB_DIR / 'bin').mkdir(exist_ok=True, parents=True)
-    (CONSTANTS.PERSONAS_DIR / 'Default').mkdir(exist_ok=True, parents=True)
-
-
 def check_io_encoding():
    PYTHON_ENCODING = (sys.__stdout__ or sys.stdout or sys.__stderr__ or sys.stderr).encoding.upper().replace('UTF8', 'UTF-8')
            
@ -127,3 +142,98 @@ def check_data_dir_permissions():
        STDERR.print('    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#permissions]https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#permissions[/link]')
        STDERR.print('    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid]https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid[/link]')
        STDERR.print('    [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#filesystem-doesnt-support-fsync-eg-network-mounts]https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#filesystem-doesnt-support-fsync-eg-network-mounts[/link]')
+
+    from archivebox.config.common import STORAGE_CONFIG
+
+    # Check /tmp dir permissions
+    check_tmp_dir(STORAGE_CONFIG.TMP_DIR, throw=False, must_exist=True)
+
+    # Check /lib dir permissions
+    check_lib_dir(STORAGE_CONFIG.LIB_DIR, throw=False, must_exist=True)
+
+
+def check_tmp_dir(tmp_dir=None, throw=False, quiet=False, must_exist=True):
+    from archivebox.config.paths import assert_dir_can_contain_unix_sockets, dir_is_writable, get_or_create_working_tmp_dir
+    from archivebox.misc.logging import STDERR
+    from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
+    from archivebox.config.common import STORAGE_CONFIG
+    from archivebox.logging_util import pretty_path
+    
+    tmp_dir = tmp_dir or STORAGE_CONFIG.TMP_DIR
+    socket_file = tmp_dir.absolute().resolve() / "supervisord.sock"
+
+    if not must_exist and not os.path.isdir(tmp_dir):
+        # just check that its viable based on its length (because dir may not exist yet, we cant check if its writable)
+        return len(f'file://{socket_file}') <= 96
+
+    tmp_is_valid = False
+    try:
+        tmp_is_valid = dir_is_writable(tmp_dir)
+        tmp_is_valid = tmp_is_valid and assert_dir_can_contain_unix_sockets(tmp_dir)
+        assert tmp_is_valid, f'ArchiveBox user PUID={ARCHIVEBOX_USER} PGID={ARCHIVEBOX_GROUP} is unable to write to TMP_DIR={tmp_dir}'            
+        assert len(f'file://{socket_file}') <= 96, f'ArchiveBox TMP_DIR={tmp_dir} is too long, dir containing unix socket files must be <90 chars.'
+        return True
+    except Exception as e:
+        if not quiet:
+            STDERR.print()
+            ERROR_TEXT = '\n'.join((
+                '',
+                f'[red]:cross_mark: ArchiveBox is unable to use TMP_DIR={pretty_path(tmp_dir)}[/red]',
+                f'   [yellow]{e}[/yellow]',
+                '',
+                '[blue]Info:[/blue] [grey53]The TMP_DIR is used for the supervisord unix socket file and other temporary files.',
+                '  - It [red]must[/red] be on a local drive (not inside a docker volume, remote network drive, or FUSE mount).',
+                f'  - It [red]must[/red] be readable and writable by the ArchiveBox user (PUID={ARCHIVEBOX_USER}, PGID={ARCHIVEBOX_GROUP}).',
+                '  - It [red]must[/red] be a *short* path (less than 90 characters) due to UNIX path length restrictions for sockets.',
+                '  - It [yellow]should[/yellow] be able to hold at least 200MB of data (in-progress downloads can be large).[/grey53]',
+                '',
+                '[violet]Hint:[/violet] Fix it by setting TMP_DIR to a path that meets these requirements, e.g.:',
+                f'      [green]archivebox config --set TMP_DIR={get_or_create_working_tmp_dir(autofix=False, quiet=True) or "/tmp/archivebox"}[/green]',
+                '',
+            ))
+            STDERR.print(Panel(ERROR_TEXT, expand=False, border_style='red', title='[red]:cross_mark: Error with configured TMP_DIR[/red]', subtitle='Background workers may fail to start until fixed.'))
+            STDERR.print()
+        if throw:
+            raise OSError(f'TMP_DIR={tmp_dir} is invalid, ArchiveBox is unable to use it and the server will fail to start!') from e
+    return False
+
+
+def check_lib_dir(lib_dir: Path | None = None, throw=False, quiet=False, must_exist=True):
+    from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
+    from archivebox.misc.logging import STDERR
+    from archivebox.config.paths import dir_is_writable, get_or_create_working_lib_dir
+    from archivebox.config.common import STORAGE_CONFIG
+    from archivebox.logging_util import pretty_path
+    
+    lib_dir = lib_dir or STORAGE_CONFIG.LIB_DIR
+    
+    if not must_exist and not os.path.isdir(lib_dir):
+        return True
+    
+    lib_is_valid = False
+    try:
+        lib_is_valid = dir_is_writable(lib_dir)
+        assert lib_is_valid, f'ArchiveBox user PUID={ARCHIVEBOX_USER} PGID={ARCHIVEBOX_GROUP} is unable to write to LIB_DIR={lib_dir}'
+        return True
+    except Exception as e:
+        if not quiet:
+            STDERR.print()
+            ERROR_TEXT = '\n'.join((
+                '',
+                f'[red]:cross_mark: ArchiveBox is unable to use LIB_DIR={pretty_path(lib_dir)}[/red]',
+                f'   [yellow]{e}[/yellow]',
+                '',
+                '[blue]Info:[/blue] [grey53]The LIB_DIR is used to store ArchiveBox auto-installed plugin library and binary dependencies.',
+                f'  - It [red]must[/red] be readable and writable by the ArchiveBox user (PUID={ARCHIVEBOX_USER}, PGID={ARCHIVEBOX_GROUP}).',
+                '  - It [yellow]should[/yellow] be on a local (ideally fast) drive like an SSD or HDD (not on a network drive or external HDD).',
+                '  - It [yellow]should[/yellow] be able to hold at least 1GB of data (some dependencies like Chrome can be large).[/grey53]',
+                '',
+                '[violet]Hint:[/violet] Fix it by setting LIB_DIR to a path that meets these requirements, e.g.:',
+                f'      [green]archivebox config --set LIB_DIR={get_or_create_working_lib_dir(autofix=False, quiet=True) or "/usr/local/share/archivebox"}[/green]',
+                '',
+            ))
+            STDERR.print(Panel(ERROR_TEXT, expand=False, border_style='red', title='[red]:cross_mark: Error with configured LIB_DIR[/red]', subtitle='[yellow]Dependencies may not auto-install properly until fixed.[/yellow]'))
+            STDERR.print()
+        if throw:
+            raise OSError(f'LIB_DIR={lib_dir} is invalid, ArchiveBox is unable to use it and dependencies will fail to install.') from e
+    return False
--- a/archivebox/misc/paginators.py
+++ b/archivebox/misc/paginators.py
@ -0,0 +1,30 @@
+__package__ = 'archivebox.misc'
+
+from django.core.paginator import Paginator
+from django.utils.functional import cached_property
+
+
+class AccelleratedPaginator(Paginator):
+    """
+    Accellerated Pagniator ignores DISTINCT when counting total number of rows.
+    Speeds up SELECT Count(*) on Admin views by >20x.
+    https://hakibenita.com/optimizing-the-django-admin-paginator
+    """
+
+    @cached_property
+    def count(self):
+        if self.object_list._has_filters():                             # type: ignore
+            # fallback to normal count method on filtered queryset
+            return super().count
+        else:
+            # otherwise count total rows in a separate fast query
+            return self.object_list.model.objects.count()
+    
+        # Alternative approach for PostgreSQL: fallback count takes > 200ms
+        # from django.db import connection, transaction, OperationalError
+        # with transaction.atomic(), connection.cursor() as cursor:
+        #     cursor.execute('SET LOCAL statement_timeout TO 200;')
+        #     try:
+        #         return super().count
+        #     except OperationalError:
+        #         return 9999999999999
--- a/archivebox/misc/serve_static.py
+++ b/archivebox/misc/serve_static.py
--- a/archivebox/misc/shell_welcome_message.py
+++ b/archivebox/misc/shell_welcome_message.py
@ -49,7 +49,7 @@ if __name__ == '__main__':
        
    prnt('[i] :heavy_dollar_sign: Welcome to the ArchiveBox Shell!')
    prnt('    [deep_sky_blue4]Docs:[/deep_sky_blue4] [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage[/link]')
-    prnt('          [link=https://docs.archivebox.io/en/latest/modules.html]https://docs.archivebox.io/en/latest/modules.html[/link]')
+    prnt('          [link=https://docs.archivebox.io/en/dev/apidocs/archivebox/archivebox.html]https://docs.archivebox.io/en/dev/apidocs/archivebox/archivebox.html[/link]')
    prnt()
    prnt(' :grey_question: [violet]Hint[/] [i]Here are some examples to get started:[/]')
    prnt('    add[blink][deep_sky_blue4]?[/deep_sky_blue4][/blink]                                                                        [grey53]# add ? after anything to get help[/]')
--- a/archivebox/personas/init.py
+++ b/archivebox/personas/init.py
--- a/archivebox/personas/admin.py
+++ b/archivebox/personas/admin.py
@ -0,0 +1,3 @@
+from django.contrib import admin
+
+# Register your models here.
--- a/archivebox/personas/apps.py
+++ b/archivebox/personas/apps.py
@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class SessionsConfig(AppConfig):
+    default_auto_field = "django.db.models.BigAutoField"
+    name = "personas"
--- a/archivebox/personas/migrations/init.py
+++ b/archivebox/personas/migrations/init.py
--- a/archivebox/personas/models.py
+++ b/archivebox/personas/models.py
@ -0,0 +1,67 @@
+from django.db import models
+
+from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats
+
+from django.conf import settings
+
+
+# class Persona(ABIDModel, ModelWithHealthStats):
+#     """Aka a "SessionType", its a template for a crawler browsing session containing some config."""
+    
+#     abid_prefix = 'prs_'
+#     abid_ts_src = 'self.created_at'
+#     abid_uri_src = 'self.name'
+#     abid_subtype_src = 'self.created_by'
+#     abid_rand_src = 'self.id'
+    
+#     id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
+#     abid = ABIDField(prefix=abid_prefix)
+    
+#     created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False)
+#     created_at = AutoDateTimeField(default=None, null=False, db_index=True)
+#     modified_at = models.DateTimeField(auto_now=True)
+    
+#     name = models.CharField(max_length=100, blank=False, null=False, editable=False)
+    
+#     persona_dir = models.FilePathField(path=settings.PERSONAS_DIR, allow_files=False, allow_folders=True, blank=True, null=False, editable=False)
+#     config = models.JSONField(default=dict)
+#     # e.g. {
+#     #    USER_AGENT: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
+#     #    COOKIES_TXT_FILE: '/path/to/cookies.txt',
+#     #    CHROME_USER_DATA_DIR: '/path/to/chrome/user/data/dir',
+#     #    CHECK_SSL_VALIDITY: False,
+#     #    SAVE_ARCHIVE_DOT_ORG: True,
+#     #    CHROME_BINARY: 'chromium'
+#     #    ...
+#     # }
+#     # domain_allowlist = models.CharField(max_length=1024, blank=True, null=False, default='')
+#     # domain_denylist = models.CharField(max_length=1024, blank=True, null=False, default='')
+    
+#     class Meta:
+#         verbose_name = 'Session Type'
+#         verbose_name_plural = 'Session Types'
+#         unique_together = (('created_by', 'name'),)
+    
+
+#     def clean(self):
+#         self.persona_dir = settings.PERSONAS_DIR / self.name
+#         assert self.persona_dir == settings.PERSONAS_DIR / self.name, f'Persona dir {self.persona_dir} must match settings.PERSONAS_DIR / self.name'
+        
+        
+#         # make sure config keys all exist in FLAT_CONFIG
+#         # make sure config values all match expected types
+#         pass
+        
+#     def save(self, *args, **kwargs):
+#         self.full_clean()
+        
+#         # make sure basic file structure is present in persona_dir:
+#         # - PERSONAS_DIR / self.name / 
+#         #   - chrome_profile/
+#         #   - chrome_downloads/
+#         #   - chrome_extensions/
+#         #   - cookies.txt
+#         #   - auth.json
+#         #   - config.json    # json dump of the model
+        
+#         super().save(*args, **kwargs)
--- a/archivebox/personas/tests.py
+++ b/archivebox/personas/tests.py
@ -0,0 +1,3 @@
+from django.test import TestCase
+
+# Create your tests here.
--- a/archivebox/personas/views.py
+++ b/archivebox/personas/views.py
@ -0,0 +1,3 @@
+from django.shortcuts import render
+
+# Create your views here.
--- a/archivebox/plugins_auth/ldap/init.py
+++ b/archivebox/plugins_auth/ldap/init.py
@ -0,0 +1,72 @@
+__package__ = 'plugins_auth.ldap'
+__id__ = 'ldap'
+__label__ = 'LDAP'
+__version__ = '2024.10.14'
+__author__ = 'ArchiveBox'
+__homepage__ = 'https://github.com/django-auth-ldap/django-auth-ldap'
+__dependencies__ = ['pip']
+
+import abx
+
+
+@abx.hookimpl
+def get_PLUGIN():
+    return {
+        __id__: {
+            'id': __id__,
+            'package': __package__,
+            'label': __label__,
+            'version': __version__,
+            'author': __author__,
+            'homepage': __homepage__,
+            'dependencies': __dependencies__,
+        }
+    }
+
+
+
+@abx.hookimpl
+def get_CONFIG():
+    from .config import LDAP_CONFIG
+    return {
+        __id__: LDAP_CONFIG
+    }
+
+@abx.hookimpl
+def get_BINARIES():
+    from .binaries import LDAP_BINARY
+    
+    return {
+        'ldap': LDAP_BINARY,
+    }
+
+
+def create_superuser_from_ldap_user(sender, user=None, ldap_user=None, **kwargs):
+    """
+    Invoked after LDAP authenticates a user, but before they have a local User account created.
+    ArchiveBox requires staff/superuser status to view the admin at all, so we must create a user
+    + set staff and superuser when LDAP authenticates a new person.
+    """
+    from django.conf import settings
+    
+    if user is None:
+        return                        # not authenticated at all
+    
+    if not user.id and settings.CONFIGS.ldap.LDAP_CREATE_SUPERUSER:
+        user.is_superuser = True      # authenticated via LDAP, but user is not set up in DB yet
+
+    user.is_staff = True
+    print(f'[!] WARNING: Creating new user {user} based on LDAP user {ldap_user} (is_staff={user.is_staff}, is_superuser={user.is_superuser})')
+
+
+@abx.hookimpl
+def ready():
+    """
+    Called at AppConfig.ready() time (settings + models are all loaded)
+    """
+    from django.conf import settings
+    
+    if settings.CONFIGS.ldap.LDAP_ENABLED:
+        # tell django-auth-ldap to call our function when a user is authenticated via LDAP
+        import django_auth_ldap.backend
+        django_auth_ldap.backend.populate_user.connect(create_superuser_from_ldap_user)
--- a/archivebox/plugins_auth/ldap/binaries.py
+++ b/archivebox/plugins_auth/ldap/binaries.py
@ -1,4 +1,4 @@
-__package__ = 'archivebox.plugins_auth.ldap'
+__package__ = 'plugins_auth.ldap'


 import inspect
@ -9,15 +9,14 @@ from pydantic import InstanceOf

 from pydantic_pkgr import BinaryOverrides, SemVer

-from abx.archivebox.base_plugin import BasePlugin
-from abx.archivebox.base_hook import BaseHook
+
 from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, apt

-from plugins_pkg.pip.apps import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, VENV_SITE_PACKAGES, LIB_SITE_PACKAGES, USER_SITE_PACKAGES, SYS_SITE_PACKAGES
-from .settings import LDAP_CONFIG, get_ldap_lib
+from plugins_pkg.pip.binproviders import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, VENV_SITE_PACKAGES, LIB_SITE_PACKAGES, USER_SITE_PACKAGES, SYS_SITE_PACKAGES
+
+from .config import get_ldap_lib


-###################### Config ##########################

 def get_LDAP_LIB_path(paths=()):
    LDAP_LIB = get_ldap_lib()[0]
@ -34,10 +33,12 @@ def get_LDAP_LIB_path(paths=()):
            return lib_path
    return None

+
 def get_LDAP_LIB_version():
    LDAP_LIB = get_ldap_lib()[0]
    return LDAP_LIB and SemVer(LDAP_LIB.__version__)

+
 class LdapBinary(BaseBinary):
    name: str = 'ldap'
    description: str = 'LDAP Authentication'
@ -67,17 +68,3 @@ class LdapBinary(BaseBinary):
    }

 LDAP_BINARY = LdapBinary()
-
-
-class LdapAuthPlugin(BasePlugin):
-    app_label: str = 'ldap'
-    verbose_name: str = 'LDAP Authentication'
-
-    hooks: List[InstanceOf[BaseHook]] = [
-        LDAP_CONFIG,
-        *([LDAP_BINARY] if LDAP_CONFIG.LDAP_ENABLED else []),
-    ]
-
-
-PLUGIN = LdapAuthPlugin()
-DJANGO_APP = PLUGIN.AppConfig
--- a/archivebox/plugins_auth/ldap/settings.py
+++ b/archivebox/plugins_auth/ldap/settings.py
@ -1,4 +1,4 @@
-__package__ = 'archivebox.plugins_auth.ldap'
+__package__ = 'plugins_auth.ldap'

 import sys

--- a/archivebox/plugins_extractor/archivedotorg/init.py
+++ b/archivebox/plugins_extractor/archivedotorg/init.py
@ -0,0 +1,39 @@
+__package__ = 'plugins_extractor.archivedotorg'
+__label__ = 'archivedotorg'
+__version__ = '2024.10.14'
+__author__ = 'ArchiveBox'
+__homepage__ = 'https://archive.org'
+__dependencies__ = []
+
+import abx
+
+
+@abx.hookimpl
+def get_PLUGIN():
+    return {
+        'archivedotorg': {
+            'PACKAGE': __package__,
+            'LABEL': __label__,
+            'VERSION': __version__,
+            'AUTHOR': __author__,
+            'HOMEPAGE': __homepage__,
+            'DEPENDENCIES': __dependencies__,
+        }
+    }
+
+@abx.hookimpl
+def get_CONFIG():
+    from .config import ARCHIVEDOTORG_CONFIG
+    
+    return {
+        'archivedotorg': ARCHIVEDOTORG_CONFIG
+    }
+
+
+# @abx.hookimpl
+# def get_EXTRACTORS():
+#     from .extractors import ARCHIVEDOTORG_EXTRACTOR
+#
+#     return {
+#         'archivedotorg': ARCHIVEDOTORG_EXTRACTOR,
+#     }
--- a/archivebox/plugins_extractor/archivedotorg/apps.py
+++ b/archivebox/plugins_extractor/archivedotorg/apps.py
@ -1,28 +0,0 @@
-__package__ = 'archivebox.plugins_extractor.archivedotorg'
-
-from typing import List
-
-from abx.archivebox.base_plugin import BasePlugin
-from abx.archivebox.base_configset import BaseConfigSet
-from abx.archivebox.base_hook import BaseHook
-
-###################### Config ##########################
-
-
-class ArchivedotorgConfig(BaseConfigSet):
-    SAVE_ARCHIVE_DOT_ORG: bool = True
-
-
-ARCHIVEDOTORG_CONFIG = ArchivedotorgConfig()
-
-
-class ArchivedotorgPlugin(BasePlugin):
-    app_label: str = 'archivedotorg'
-    verbose_name: str = 'Archive.org'
-    
-    hooks: List[BaseHook] = [
-        ARCHIVEDOTORG_CONFIG
-    ]
-
-PLUGIN = ArchivedotorgPlugin()
-DJANGO_APP = PLUGIN.AppConfig
--- a/archivebox/plugins_extractor/archivedotorg/config.py
+++ b/archivebox/plugins_extractor/archivedotorg/config.py
@ -0,0 +1,11 @@
+__package__ = 'plugins_extractor.archivedotorg'
+
+
+from abx.archivebox.base_configset import BaseConfigSet
+
+
+class ArchivedotorgConfig(BaseConfigSet):
+    SAVE_ARCHIVE_DOT_ORG: bool = True
+
+
+ARCHIVEDOTORG_CONFIG = ArchivedotorgConfig()
--- a/archivebox/plugins_extractor/chrome/init.py
+++ b/archivebox/plugins_extractor/chrome/init.py
@ -0,0 +1,65 @@
+__package__ = 'plugins_extractor.chrome'
+__label__ = 'chrome'
+__version__ = '2024.10.14'
+__author__ = 'ArchiveBox'
+__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/main/archivebox/plugins_extractor/chrome'
+__dependencies__ = []
+
+import abx
+
+
+@abx.hookimpl
+def get_PLUGIN():
+    return {
+        'chrome': {
+            'PACKAGE': __package__,
+            'LABEL': __label__,
+            'VERSION': __version__,
+            'AUTHOR': __author__,
+            'HOMEPAGE': __homepage__,
+            'DEPENDENCIES': __dependencies__,
+        }
+    }
+
+@abx.hookimpl
+def get_CONFIG():
+    from .config import CHROME_CONFIG
+    
+    return {
+        'chrome': CHROME_CONFIG
+    }
+
+@abx.hookimpl
+def get_BINARIES():
+    from .binaries import CHROME_BINARY
+    
+    return {
+        'chrome': CHROME_BINARY,
+    }
+
+# @abx.hookimpl
+# def get_EXTRACTORS():
+#     return {
+#         'pdf': PDF_EXTRACTOR,
+#         'screenshot': SCREENSHOT_EXTRACTOR,
+#         'dom': DOM_EXTRACTOR,
+#     }
+
+# Hooks Available:
+
+# Events:
+# on_crawl_schedule_tick
+# on_seed_post_save
+# on_crawl_post_save
+# on_snapshot_post_save
+# on_archiveresult_post_save
+
+
+# create_root_snapshot_from_seed
+# create_archiveresults_pending_from_snapshot
+# create_crawl_from_crawlschedule_if_due
+# create_crawl_copy_from_template
+#  
+
+
+# create_crawl_from_crawlschedule_if_due
--- a/archivebox/plugins_extractor/chrome/binaries.py
+++ b/archivebox/plugins_extractor/chrome/binaries.py
@ -0,0 +1,148 @@
+__package__ = 'plugins_extractor.chrome'
+
+import os
+import platform
+from pathlib import Path
+from typing import List, Optional
+
+from pydantic import InstanceOf
+from pydantic_pkgr import (
+    BinProvider,
+    BinName,
+    BinaryOverrides,
+    bin_abspath,
+)
+
+from abx.archivebox.base_binary import BaseBinary, env, apt, brew
+
+# Depends on Other Plugins:
+from archivebox.config import CONSTANTS
+from archivebox.config.common import SHELL_CONFIG
+from plugins_pkg.puppeteer.binproviders import PUPPETEER_BINPROVIDER
+from plugins_pkg.playwright.binproviders import PLAYWRIGHT_BINPROVIDER
+
+
+from .config import CHROME_CONFIG
+CHROMIUM_BINARY_NAMES_LINUX = [
+    "chromium",
+    "chromium-browser",
+    "chromium-browser-beta",
+    "chromium-browser-unstable",
+    "chromium-browser-canary",
+    "chromium-browser-dev",
+]
+CHROMIUM_BINARY_NAMES_MACOS = ["/Applications/Chromium.app/Contents/MacOS/Chromium"]
+CHROMIUM_BINARY_NAMES = CHROMIUM_BINARY_NAMES_LINUX + CHROMIUM_BINARY_NAMES_MACOS
+
+CHROME_BINARY_NAMES_LINUX = [
+    "google-chrome",
+    "google-chrome-stable",
+    "google-chrome-beta",
+    "google-chrome-canary",
+    "google-chrome-unstable",
+    "google-chrome-dev",
+    "chrome"
+]
+CHROME_BINARY_NAMES_MACOS = [
+    "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
+    "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
+]
+CHROME_BINARY_NAMES = CHROME_BINARY_NAMES_LINUX + CHROME_BINARY_NAMES_MACOS
+
+APT_DEPENDENCIES = [
+    'apt-transport-https', 'at-spi2-common', 'chromium-browser',
+    'fontconfig', 'fonts-freefont-ttf', 'fonts-ipafont-gothic', 'fonts-kacst', 'fonts-khmeros', 'fonts-liberation', 'fonts-noto', 'fonts-noto-color-emoji', 'fonts-symbola', 'fonts-thai-tlwg', 'fonts-tlwg-loma-otf', 'fonts-unifont', 'fonts-wqy-zenhei',
+    'libasound2', 'libatk-bridge2.0-0', 'libatk1.0-0', 'libatspi2.0-0', 'libavahi-client3', 'libavahi-common-data', 'libavahi-common3', 'libcairo2', 'libcups2',
+    'libdbus-1-3', 'libdrm2', 'libfontenc1', 'libgbm1', 'libglib2.0-0', 'libice6', 'libnspr4', 'libnss3', 'libsm6', 'libunwind8', 'libx11-6', 'libxaw7', 'libxcb1',
+    'libxcomposite1', 'libxdamage1', 'libxext6', 'libxfixes3', 'libxfont2', 'libxkbcommon0', 'libxkbfile1', 'libxmu6', 'libxpm4', 'libxrandr2', 'libxt6', 'x11-utils', 'x11-xkb-utils', 'xfonts-encodings',
+]
+
+
+def autodetect_system_chrome_install(PATH=None) -> Optional[Path]:
+    for bin_name in CHROME_BINARY_NAMES + CHROMIUM_BINARY_NAMES:
+        abspath = bin_abspath(bin_name, PATH=env.PATH)
+        if abspath:
+            return abspath
+    return None
+
+def create_macos_app_symlink(target: Path, shortcut: Path):
+    """
+    on macOS, some binaries are inside of .app, so we need to
+    create a tiny bash script instead of a symlink
+    (so that ../ parent relationships are relative to original .app instead of callsite dir)
+    """
+    # TODO: should we enforce this? is it useful in any other situation?
+    # if platform.system().lower() != 'darwin':
+    #     raise Exception(...)
+    shortcut.unlink(missing_ok=True)
+    shortcut.write_text(f"""#!/usr/bin/env bash\nexec '{target}' "$@"\n""")
+    shortcut.chmod(0o777)   # make sure its executable by everyone
+
+###################### Config ##########################
+
+
+class ChromeBinary(BaseBinary):
+    name: BinName = CHROME_CONFIG.CHROME_BINARY
+    binproviders_supported: List[InstanceOf[BinProvider]] = [PUPPETEER_BINPROVIDER, env, PLAYWRIGHT_BINPROVIDER, apt, brew]
+    
+    overrides: BinaryOverrides = {
+        env.name: {
+            'abspath': lambda: autodetect_system_chrome_install(PATH=env.PATH),  # /usr/bin/google-chrome-stable
+        },
+        PUPPETEER_BINPROVIDER.name: {
+            'packages': ['chrome@stable'],              # npx @puppeteer/browsers install chrome@stable
+        },
+        PLAYWRIGHT_BINPROVIDER.name: {
+            'packages': ['chromium'],                   # playwright install chromium
+        },
+        apt.name: {
+            'packages': APT_DEPENDENCIES,
+        },
+        brew.name: {
+            'packages': ['--cask', 'chromium'],
+        },
+    }
+
+    @staticmethod
+    def symlink_to_lib(binary, bin_dir=None) -> None:
+        from archivebox.config.common import STORAGE_CONFIG
+        bin_dir = bin_dir or STORAGE_CONFIG.LIB_DIR / 'bin'
+        
+        if not (binary.abspath and os.access(binary.abspath, os.F_OK)):
+            return
+        
+        bin_dir.mkdir(parents=True, exist_ok=True)
+        symlink = bin_dir / binary.name
+        
+        try:
+            if platform.system().lower() == 'darwin':
+                # if on macOS, browser binary is inside a .app, so we need to create a tiny bash script instead of a symlink
+                create_macos_app_symlink(binary.abspath, symlink)
+            else:
+                # otherwise on linux we can symlink directly to binary executable
+                symlink.unlink(missing_ok=True)
+                symlink.symlink_to(binary.abspath)
+        except Exception as err:
+            # print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}')
+            # not actually needed, we can just run without it
+            pass
+
+    @staticmethod            
+    def chrome_cleanup_lockfile():
+        """
+        Cleans up any state or runtime files that chrome leaves behind when killed by
+        a timeout or other error
+        """
+        lock_file = Path("~/.config/chromium/SingletonLock").expanduser()
+
+        if SHELL_CONFIG.IN_DOCKER and os.access(lock_file, os.F_OK):
+            lock_file.unlink()
+        
+        if CHROME_CONFIG.CHROME_USER_DATA_DIR:
+            if os.access(CHROME_CONFIG.CHROME_USER_DATA_DIR / 'SingletonLock', os.F_OK):
+                lock_file.unlink()
+
+
+
+CHROME_BINARY = ChromeBinary()
+
--- a/archivebox/plugins_extractor/chrome/config.py
+++ b/archivebox/plugins_extractor/chrome/config.py
@ -1,35 +1,18 @@
-__package__ = 'archivebox.plugins_extractor.chrome'
+__package__ = 'plugins_extractor.chrome'

 import os
-import sys
-import platform
+
 from pathlib import Path
 from typing import List, Optional

-# Depends on other PyPI/vendor packages:
-from rich import print
-from pydantic import InstanceOf, Field, model_validator
-from pydantic_pkgr import (
-    BinProvider,
-    BinName,
-    BinaryOverrides,
-    bin_abspath,
-)
+from pydantic import Field, model_validator
+from pydantic_pkgr import bin_abspath

-# Depends on other Django apps:
-from abx.archivebox.base_plugin import BasePlugin
 from abx.archivebox.base_configset import BaseConfigSet
-from abx.archivebox.base_binary import BaseBinary, env, apt, brew
-# from abx.archivebox.base_extractor import BaseExtractor
-# from abx.archivebox.base_queue import BaseQueue
-from abx.archivebox.base_hook import BaseHook
+from abx.archivebox.base_binary import env

-# Depends on Other Plugins:
-from archivebox.config import CONSTANTS
 from archivebox.config.common import ARCHIVING_CONFIG, SHELL_CONFIG
-from plugins_pkg.puppeteer.apps import PUPPETEER_BINPROVIDER
-from plugins_pkg.playwright.apps import PLAYWRIGHT_BINPROVIDER
-
+from archivebox.misc.logging import STDERR
 from archivebox.misc.util import dedupe


@ -129,33 +112,34 @@ class ChromeConfig(BaseConfigSet):
    @model_validator(mode='after')
    def validate_use_chrome(self):
        if self.USE_CHROME and self.CHROME_TIMEOUT < 15:
-            print(f'[red][!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={self.CHROME_TIMEOUT} seconds)[/red]', file=sys.stderr)
-            print('    Chrome will fail to archive all sites if set to less than ~15 seconds.', file=sys.stderr)
-            print('    (Setting it to somewhere between 30 and 300 seconds is recommended)', file=sys.stderr)
-            print(file=sys.stderr)
-            print('    If you want to make ArchiveBox run faster, disable specific archive methods instead:', file=sys.stderr)
-            print('        https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles', file=sys.stderr)
-            print(file=sys.stderr)
+            STDERR.print(f'[red][!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={self.CHROME_TIMEOUT} seconds)[/red]')
+            STDERR.print('    Chrome will fail to archive all sites if set to less than ~15 seconds.')
+            STDERR.print('    (Setting it to somewhere between 30 and 300 seconds is recommended)')
+            STDERR.print()
+            STDERR.print('    If you want to make ArchiveBox run faster, disable specific archive methods instead:')
+            STDERR.print('        https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles')
+            STDERR.print()
            
        # if user has specified a user data dir, make sure its valid
        if self.CHROME_USER_DATA_DIR and os.access(self.CHROME_USER_DATA_DIR, os.R_OK):
            # check to make sure user_data_dir/<profile_name> exists
            if not (self.CHROME_USER_DATA_DIR / self.CHROME_PROFILE_NAME).is_dir():
-                print(f'[red][X] Could not find profile "{self.CHROME_PROFILE_NAME}" in CHROME_USER_DATA_DIR.[/red]', file=sys.stderr)
-                print(f'    {self.CHROME_USER_DATA_DIR}', file=sys.stderr)
-                print('    Make sure you set it to a Chrome user data directory containing a Default profile folder.', file=sys.stderr)
-                print('    For more info see:', file=sys.stderr)
-                print('        https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#CHROME_USER_DATA_DIR', file=sys.stderr)
+                STDERR.print(f'[red][X] Could not find profile "{self.CHROME_PROFILE_NAME}" in CHROME_USER_DATA_DIR.[/red]')
+                STDERR.print(f'    {self.CHROME_USER_DATA_DIR}')
+                STDERR.print('    Make sure you set it to a Chrome user data directory containing a Default profile folder.')
+                STDERR.print('    For more info see:')
+                STDERR.print('        https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#CHROME_USER_DATA_DIR')
                if '/Default' in str(self.CHROME_USER_DATA_DIR):
-                    print(file=sys.stderr)
-                    print('    Try removing /Default from the end e.g.:', file=sys.stderr)
-                    print('        CHROME_USER_DATA_DIR="{}"'.format(str(self.CHROME_USER_DATA_DIR).split('/Default')[0]), file=sys.stderr)
+                    STDERR.print()
+                    STDERR.print('    Try removing /Default from the end e.g.:')
+                    STDERR.print('        CHROME_USER_DATA_DIR="{}"'.format(str(self.CHROME_USER_DATA_DIR).split('/Default')[0]))
                
                # hard error is too annoying here, instead just set it to nothing
                # raise SystemExit(2)
-                self.CHROME_USER_DATA_DIR = None
+                self.update_in_place(CHROME_USER_DATA_DIR=None)
        else:
-            self.CHROME_USER_DATA_DIR = None
+            if self.CHROME_USER_DATA_DIR is not None:
+                self.update_in_place(CHROME_USER_DATA_DIR=None)
            
        return self

@ -206,81 +190,3 @@ class ChromeConfig(BaseConfigSet):

 CHROME_CONFIG = ChromeConfig()

-
-class ChromeBinary(BaseBinary):
-    name: BinName = CHROME_CONFIG.CHROME_BINARY
-    binproviders_supported: List[InstanceOf[BinProvider]] = [PUPPETEER_BINPROVIDER, env, PLAYWRIGHT_BINPROVIDER, apt, brew]
-    
-    overrides: BinaryOverrides = {
-        env.name: {
-            'abspath': lambda: autodetect_system_chrome_install(PATH=env.PATH),  # /usr/bin/google-chrome-stable
-        },
-        PUPPETEER_BINPROVIDER.name: {
-            'packages': ['chrome@stable'],              # npx @puppeteer/browsers install chrome@stable
-        },
-        PLAYWRIGHT_BINPROVIDER.name: {
-            'packages': ['chromium'],                   # playwright install chromium
-        },
-        apt.name: {
-            'packages': APT_DEPENDENCIES,
-        },
-        brew.name: {
-            'packages': ['--cask', 'chromium'],
-        },
-    }
-
-    @staticmethod
-    def symlink_to_lib(binary, bin_dir=CONSTANTS.LIB_BIN_DIR) -> None:
-        if not (binary.abspath and os.access(binary.abspath, os.F_OK)):
-            return
-        
-        bin_dir.mkdir(parents=True, exist_ok=True)
-        symlink = bin_dir / binary.name
-        
-        try:
-            if platform.system().lower() == 'darwin':
-                # if on macOS, browser binary is inside a .app, so we need to create a tiny bash script instead of a symlink
-                create_macos_app_symlink(binary.abspath, symlink)
-            else:
-                # otherwise on linux we can symlink directly to binary executable
-                symlink.unlink(missing_ok=True)
-                symlink.symlink_to(binary.abspath)
-        except Exception as err:
-            # print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}')
-            # not actually needed, we can just run without it
-            pass
-
-    @staticmethod            
-    def chrome_cleanup_lockfile():
-        """
-        Cleans up any state or runtime files that chrome leaves behind when killed by
-        a timeout or other error
-        """
-        lock_file = Path("~/.config/chromium/SingletonLock").expanduser()
-
-        if SHELL_CONFIG.IN_DOCKER and os.access(lock_file, os.F_OK):
-            lock_file.unlink()
-        
-        if CHROME_CONFIG.CHROME_USER_DATA_DIR:
-            if os.access(CHROME_CONFIG.CHROME_USER_DATA_DIR / 'SingletonLock', os.F_OK):
-                lock_file.unlink()
-
-
-
-CHROME_BINARY = ChromeBinary()
-
-
-class ChromePlugin(BasePlugin):
-    app_label: str = 'chrome'
-    verbose_name: str = 'Chrome Browser'
-
-    hooks: List[InstanceOf[BaseHook]] = [
-        CHROME_CONFIG,
-        CHROME_BINARY,
-    ]
-
-
-
-PLUGIN = ChromePlugin()
-# PLUGIN.register(settings)
-DJANGO_APP = PLUGIN.AppConfig
--- a/archivebox/plugins_extractor/curl/init.py
+++ b/archivebox/plugins_extractor/curl/init.py
@ -0,0 +1,38 @@
+__package__ = 'plugins_extractor.curl'
+__label__ = 'curl'
+__version__ = '2024.10.14'
+__author__ = 'ArchiveBox'
+__homepage__ = 'https://github.com/curl/curl'
+__dependencies__ = []
+
+import abx
+
+
+@abx.hookimpl
+def get_PLUGIN():
+    return {
+        'curl': {
+            'PACKAGE': __package__,
+            'LABEL': __label__,
+            'VERSION': __version__,
+            'AUTHOR': __author__,
+            'HOMEPAGE': __homepage__,
+            'DEPENDENCIES': __dependencies__,
+        }
+    }
+
+@abx.hookimpl
+def get_CONFIG():
+    from .config import CURL_CONFIG
+    
+    return {
+        'curl': CURL_CONFIG
+    }
+
+@abx.hookimpl
+def get_BINARIES():
+    from .binaries import CURL_BINARY
+    
+    return {
+        'curl': CURL_BINARY,
+    }
--- a/archivebox/plugins_extractor/curl/apps.py
+++ b/archivebox/plugins_extractor/curl/apps.py
@ -1,79 +0,0 @@
-__package__ = 'plugins_extractor.curl'
-
-from typing import List, Optional
-from pathlib import Path
-
-from pydantic import InstanceOf, Field
-from pydantic_pkgr import BinProvider, BinName
-
-from abx.archivebox.base_plugin import BasePlugin, BaseHook
-from abx.archivebox.base_configset import BaseConfigSet
-from abx.archivebox.base_binary import BaseBinary, env, apt, brew
-# from abx.archivebox.base_extractor import BaseExtractor, ExtractorName
-
-from archivebox.config.common import ARCHIVING_CONFIG
-from archivebox.plugins_extractor.favicon.apps import FAVICON_CONFIG
-from archivebox.plugins_extractor.archivedotorg.apps import ARCHIVEDOTORG_CONFIG
-
-class CurlConfig(BaseConfigSet):
-    
-    SAVE_TITLE: bool = Field(default=True)
-    SAVE_HEADERS: bool = Field(default=True)
-    USE_CURL: bool = Field(default=lambda c: 
-        ARCHIVEDOTORG_CONFIG.SAVE_ARCHIVE_DOT_ORG
-        or FAVICON_CONFIG.SAVE_FAVICON
-        or c.SAVE_HEADERS
-        or c.SAVE_TITLE
-    )
-    
-    CURL_BINARY: str = Field(default='curl')
-    CURL_ARGS: List[str] = [
-        '--silent',
-        '--location',
-        '--compressed',
-    ]
-    CURL_EXTRA_ARGS: List[str] = []
-    
-    CURL_TIMEOUT: int =  Field(default=lambda: ARCHIVING_CONFIG.TIMEOUT)
-    CURL_CHECK_SSL_VALIDITY: bool = Field(default=lambda: ARCHIVING_CONFIG.CHECK_SSL_VALIDITY)
-    CURL_USER_AGENT: str = Field(default=lambda: ARCHIVING_CONFIG.USER_AGENT)
-    CURL_COOKIES_FILE: Optional[Path] = Field(default=lambda: ARCHIVING_CONFIG.COOKIES_FILE)
-    
-
-CURL_CONFIG = CurlConfig()
-
-
-class CurlBinary(BaseBinary):
-    name: BinName = CURL_CONFIG.CURL_BINARY
-    binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
-
-CURL_BINARY = CurlBinary()
-
-
-# class CurlExtractor(BaseExtractor):
-#     name: ExtractorName = 'curl'
-#     binary: str = CURL_BINARY.name
-
-#     def get_output_path(self, snapshot) -> Path | None:
-#         curl_index_path = curl_output_path(snapshot.as_link())
-#         if curl_index_path:
-#             return Path(curl_index_path)
-#         return None
-
-# CURL_EXTRACTOR = CurlExtractor()
-
-
-
-class CurlPlugin(BasePlugin):
-    app_label: str = 'curl'
-    verbose_name: str = 'CURL'
-    
-    hooks: List[InstanceOf[BaseHook]] = [
-        CURL_CONFIG,
-        CURL_BINARY,
-        # CURL_EXTRACTOR,
-    ]
-
-
-PLUGIN = CurlPlugin()
-DJANGO_APP = PLUGIN.AppConfig
--- a/Show more
+++ b/Show more