finish migrating almost all config to new system

2025-02-16 13:28:29 +00:00 · 2024-09-30 23:21:34 -07:00 · 2024-09-30 23:21:34 -07:00 · d21bc86075
commit d21bc86075
parent 4b6a2a3e50
25 changed files with 246 additions and 349 deletions
--- a/archivebox/abx/archivebox/base_hook.py
+++ b/archivebox/abx/archivebox/base_hook.py
@ -13,43 +13,6 @@ HookType = Literal['CONFIG', 'BINPROVIDER', 'BINARY', 'EXTRACTOR', 'REPLAYER', '
 hook_type_names: Tuple[HookType] = get_args(HookType)

 class BaseHook(BaseModel):
-    """
-    A Plugin consists of a list of Hooks, applied to django.conf.settings when AppConfig.read() -> Plugin.register() is called.
-    Plugin.register() then calls each Hook.register() on the provided settings.
-    each Hook.regsiter() function (ideally pure) takes a django.conf.settings as input and returns a new one back.
-    or 
-    it modifies django.conf.settings in-place to add changes corresponding to its HookType.
-    e.g. for a HookType.CONFIG, the Hook.register() function places the hook in settings.CONFIG (and settings.HOOKS)
-    An example of an impure Hook would be a CHECK that modifies settings but also calls django.core.checks.register(check).
-    In practice any object that subclasses BaseHook and provides a .register() function can behave as a Hook.
-
-    setup_django() -> imports all settings.INSTALLED_APPS...
-        # django imports AppConfig, models, migrations, admins, etc. for all installed apps
-        # django then calls AppConfig.ready() on each installed app...
-
-        plugins_pkg.npm.NpmPlugin().AppConfig.ready()                    # called by django
-            plugins_pkg.npm.NpmPlugin().register(settings) ->
-                plugins_pkg.npm.NpmConfigSet().register(settings)
-                    abx.archivebox.base_configset.BaseConfigSet().register(settings)
-                        abx.archivebox.base_hook.BaseHook().register(settings, parent_plugin=plugins_pkg.npm.NpmPlugin())
-
-                ...
-        ...
-
-    Both core ArchiveBox code and plugin code depend on python >= 3.10 and django >= 5.0 w/ sqlite and a filesystem.
-    Core ArchiveBox code can depend only on python and the pip libraries it ships with, and can never depend on plugin code / node / other binaries.
-    Plugin code can depend on archivebox core, other django apps, other pip libraries, and other plugins.
-    Plugins can provide BinProviders + Binaries which can depend on arbitrary other binaries / package managers like curl / wget / yt-dlp / etc.
-
-    The execution interface between plugins is simply calling builtinplugins.npm.... functions directly, django handles
-    importing all plugin code. There is no need to manually register methods/classes, only register to call
-    impure setup functions or provide runtime state.
-    settings.CONFIGS / settings.BINPROVIDERS / settings.BINARIES /... etc. are reserved for dynamic runtime state only.
-    This state is exposed to the broader system in a flat namespace, e.g. CONFIG.IS_DOCKER=True, or BINARIES = [
-        ..., Binary('node', abspath='/usr/local/bin/node', version='22.2.0'), ...
-    ]
-
-    """
    model_config = ConfigDict(
        extra="allow",
        arbitrary_types_allowed=True,
--- a/archivebox/api/v1_cli.py
+++ b/archivebox/api/v1_cli.py
@ -13,7 +13,7 @@ from ..main import (
    schedule,
 )
 from archivebox.misc.util import ansi_to_html
-from ..config.legacy import ONLY_NEW
+from archivebox.config import ARCHIVING_CONFIG


 from .auth import API_AUTH_METHODS
@ -58,7 +58,7 @@ class AddCommandSchema(Schema):
    urls: List[str]
    tag: str = ""
    depth: int = 0
-    update: bool = not ONLY_NEW  # Default to the opposite of ONLY_NEW
+    update: bool = not ARCHIVING_CONFIG.ONLY_NEW  # Default to the opposite of ARCHIVING_CONFIG.ONLY_NEW
    update_all: bool = False
    index_only: bool = False
    overwrite: bool = False
@ -68,7 +68,7 @@ class AddCommandSchema(Schema):

 class UpdateCommandSchema(Schema):
    resume: Optional[float] = 0
-    only_new: bool = ONLY_NEW
+    only_new: bool = ARCHIVING_CONFIG.ONLY_NEW
    index_only: bool = False
    overwrite: bool = False
    after: Optional[float] = 0
@ -85,7 +85,7 @@ class ScheduleCommandSchema(Schema):
    tag: str = ''
    depth: int = 0
    overwrite: bool = False
-    update: bool = not ONLY_NEW
+    update: bool = not ARCHIVING_CONFIG.ONLY_NEW
    clear: bool = False

 class ListCommandSchema(Schema):
--- a/archivebox/cli/init.py
+++ b/archivebox/cli/init.py
@ -152,18 +152,15 @@ def run_subcommand(subcommand: str,
    subcommand_args = subcommand_args or []

    if subcommand not in meta_cmds:
-        from ..config.legacy import setup_django, CONFIG
+        from archivebox.config.legacy import setup_django

        cmd_requires_db = subcommand in archive_cmds
        init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args

-        if cmd_requires_db:
-            check_data_folder(CONFIG)
-
        setup_django(in_memory_db=subcommand in fake_db, check_db=cmd_requires_db and not init_pending)

        if cmd_requires_db:
-            check_migrations(CONFIG)
+            check_migrations()

    module = import_module('.archivebox_{}'.format(subcommand), __package__)
    module.main(args=subcommand_args, stdin=stdin, pwd=pwd)    # type: ignore
--- a/archivebox/config/init.py
+++ b/archivebox/config/init.py
@ -1,6 +1,6 @@
 __package__ = 'archivebox.config'

-from .constants import CONSTANTS, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR, VERSION
+from .constants import CONSTANTS, CONSTANTS_CONFIG, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR, VERSION
 from .defaults import (
    SHELL_CONFIG,
    STORAGE_CONFIG,
@ -23,4 +23,5 @@ __all__ = [
    'SERVER_CONFIG',
    'ARCHIVING_CONFIG',
    'SEARCH_BACKEND_CONFIG',
+    'CONSTANTS_CONFIG',
 ]
--- a/archivebox/config/legacy.py
+++ b/archivebox/config/legacy.py
@ -60,6 +60,7 @@ from .defaults import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CON
 from archivebox.plugins_auth.ldap.apps import LDAP_CONFIG
 from archivebox.plugins_extractor.favicon.apps import FAVICON_CONFIG
 from archivebox.plugins_extractor.wget.apps import WGET_CONFIG
+from archivebox.plugins_extractor.curl.apps import CURL_CONFIG

 ANSI = SHELL_CONFIG.ANSI
 LDAP = LDAP_CONFIG.LDAP_ENABLED
@ -81,9 +82,11 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
    
    'LDAP_CONFIG': LDAP_CONFIG.as_legacy_config_schema(),
    
-    'FAVICON_CONFIG': FAVICON_CONFIG.as_legacy_config_schema(),
+    # 'FAVICON_CONFIG': FAVICON_CONFIG.as_legacy_config_schema(),
    
-    'WGET_CONFIG': WGET_CONFIG.as_legacy_config_schema(),
+    # 'WGET_CONFIG': WGET_CONFIG.as_legacy_config_schema(),
+    
+    # 'CURL_CONFIG': CURL_CONFIG.as_legacy_config_schema(),


    'ARCHIVE_METHOD_TOGGLES': {
@ -109,7 +112,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {

    'ARCHIVE_METHOD_OPTIONS': {
        'RESOLUTION':               {'type': str,   'default': '1440,2000', 'aliases': ('SCREENSHOT_RESOLUTION','WINDOW_SIZE')},
-        'GIT_DOMAINS':              {'type': str,   'default': 'github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht'},
+        # 'GIT_DOMAINS':              {'type': str,   'default': 'github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht'},
        'CHECK_SSL_VALIDITY':       {'type': bool,  'default': True},
        'MEDIA_MAX_SIZE':           {'type': str,   'default': '750m'},

@ -144,15 +147,6 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
                                                                ]},
        'YOUTUBEDL_EXTRA_ARGS':     {'type': list,  'default': None},

-
-        'CURL_ARGS':                {'type': list,  'default': ['--silent',
-                                                                '--location',
-                                                                '--compressed'
-                                                               ]},
-        'CURL_EXTRA_ARGS':          {'type': list,  'default': None},
-        'GIT_ARGS':                 {'type': list,  'default': ['--recursive']},
-        'SINGLEFILE_ARGS':          {'type': list,  'default': None},
-        'SINGLEFILE_EXTRA_ARGS':    {'type': list,  'default': None},
    },

    'DEPENDENCY_CONFIG': {
@ -164,9 +158,9 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
        'USE_YOUTUBEDL':            {'type': bool,  'default': True},
        'USE_RIPGREP':              {'type': bool,  'default': True},

-        'CURL_BINARY':              {'type': str,   'default': 'curl'},
-        'GIT_BINARY':               {'type': str,   'default': 'git'},
-        'NODE_BINARY':              {'type': str,   'default': 'node'},
+        # 'GIT_BINARY':               {'type': str,   'default': 'git'},
+        # 'CURL_BINARY':              {'type': str,   'default': 'curl'},
+        # 'NODE_BINARY':              {'type': str,   'default': 'node'},
        # 'YOUTUBEDL_BINARY':         {'type': str,   'default': 'yt-dlp'},   # also can accept youtube-dl
        # 'SINGLEFILE_BINARY':        {'type': str,   'default': lambda c: bin_path('single-file')},
        # 'READABILITY_BINARY':       {'type': str,   'default': lambda c: bin_path('readability-extractor')},
@ -209,21 +203,12 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
    'URL_DENYLIST_PTN':         {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS)},
    'URL_ALLOWLIST_PTN':        {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS)},

-
-    'USE_CURL':                 {'default': lambda c: c['USE_CURL'] and (c['SAVE_FAVICON'] or c['SAVE_TITLE'] or c['SAVE_ARCHIVE_DOT_ORG'])},
-    'CURL_VERSION':             {'default': lambda c: bin_version(c['CURL_BINARY']) if c['USE_CURL'] else None},
-    # 'CURL_USER_AGENT':          {'default': lambda c: c['CURL_USER_AGENT'].format(**c)},
-    'CURL_ARGS':                {'default': lambda c: c['CURL_ARGS'] or []},
-    'CURL_EXTRA_ARGS':          {'default': lambda c: c['CURL_EXTRA_ARGS'] or []},
-    'SAVE_FAVICON':             {'default': lambda c: c['USE_CURL'] and c['SAVE_FAVICON']},
-    'SAVE_ARCHIVE_DOT_ORG':     {'default': lambda c: c['USE_CURL'] and c['SAVE_ARCHIVE_DOT_ORG']},
-
-    'USE_GIT':                  {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
-    'GIT_VERSION':              {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None},
-    'SAVE_GIT':                 {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
+    # 'USE_GIT':                  {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
+    # 'GIT_VERSION':              {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None},
+    # 'SAVE_GIT':                 {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},


-    'DEPENDENCIES':             {'default': lambda c: get_dependency_info(c)},
+    # 'DEPENDENCIES':             {'default': lambda c: get_dependency_info(c)},
    # 'CODE_LOCATIONS':           {'default': lambda c: get_code_locations(c)},
    # 'DATA_LOCATIONS':           {'default': lambda c: get_data_locations(c)},

@ -613,13 +598,13 @@ def get_dependency_info(config: benedict) -> ConfigValue:
        #     'is_valid': True,
        # },
        
-        'CURL_BINARY': {
-            'path': bin_path(config['CURL_BINARY']),
-            'version': config['CURL_VERSION'],
-            'hash': bin_hash(config['CURL_BINARY']),
-            'enabled': config['USE_CURL'],
-            'is_valid': bool(config['CURL_VERSION']),
-        },
+        # 'CURL_BINARY': {
+        #     'path': bin_path(config['CURL_BINARY']),
+        #     'version': config['CURL_VERSION'],
+        #     'hash': bin_hash(config['CURL_BINARY']),
+        #     'enabled': config['USE_CURL'],
+        #     'is_valid': bool(config['CURL_VERSION']),
+        # },
        # 'WGET_BINARY': {
        #     'path': bin_path(config['WGET_BINARY']),
        #     'version': config['WGET_VERSION'],
@ -641,13 +626,13 @@ def get_dependency_info(config: benedict) -> ConfigValue:
        #     'enabled': config['USE_MERCURY'],
        #     'is_valid': bool(config['MERCURY_VERSION']),
        # },
-        'GIT_BINARY': {
-            'path': bin_path(config['GIT_BINARY']),
-            'version': config['GIT_VERSION'],
-            'hash': bin_hash(config['GIT_BINARY']),
-            'enabled': config['USE_GIT'],
-            'is_valid': bool(config['GIT_VERSION']),
-        },
+        # 'GIT_BINARY': {
+        #     'path': bin_path(config['GIT_BINARY']),
+        #     'version': config['GIT_VERSION'],
+        #     'hash': bin_hash(config['GIT_BINARY']),
+        #     'enabled': config['USE_GIT'],
+        #     'is_valid': bool(config['GIT_VERSION']),
+        # },
        # 'SINGLEFILE_BINARY': {
        #     'path': bin_path(config['SINGLEFILE_BINARY']),
        #     'version': config['SINGLEFILE_VERSION'],
--- a/archivebox/config/views.py
+++ b/archivebox/config/views.py
@ -76,7 +76,7 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:

    relevant_configs = {
        key: val
-        for key, val in settings.CONFIG.items()
+        for key, val in settings.FLAT_CONFIG.items()
        if '_BINARY' in key or '_VERSION' in key
    }

@ -105,6 +105,7 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
                f'<a href="/admin/environment/config/{config_key}/">{config_key}</a>'
                for config_key, config_value in relevant_configs.items()
                    if str(binary.name).lower().replace('-', '').replace('_', '').replace('ytdlp', 'youtubedl') in config_key.lower()
+                    or config_value.lower().endswith(binary.name.lower())
                    # or binary.name.lower().replace('-', '').replace('_', '') in str(config_value).lower()
            )))
            # if not binary.provider_overrides:
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@ -36,7 +36,7 @@ from main import remove
 from extractors import archive_links


-CONFIG = settings.CONFIG
+CONFIG = settings.FLAT_CONFIG

 GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': [], 'CAN_UPGRADE': False}

--- a/archivebox/core/auth.py
+++ b/archivebox/core/auth.py
@ -1,13 +1,11 @@
 __package__ = 'archivebox.core'


-from ..config.legacy import (
-    LDAP
-)
+from archivebox.plugins_auth.ldap.apps import LDAP_CONFIG

 def register_signals():

-    if LDAP:
+    if LDAP_CONFIG.LDAP_ENABLED:
        import django_auth_ldap.backend
        from .auth_ldap import create_user

--- a/archivebox/core/auth_ldap.py
+++ b/archivebox/core/auth_ldap.py
@ -1,9 +1,7 @@
-from ..config.legacy import (
-    LDAP_CREATE_SUPERUSER
-)
+from archivebox.plugins_auth.ldap.apps import LDAP_CONFIG

 def create_user(sender, user=None, ldap_user=None, **kwargs):
-    if not user.id and LDAP_CREATE_SUPERUSER:
+    if not user.id and LDAP_CONFIG.LDAP_CREATE_SUPERUSER:
        user.is_superuser = True

    user.is_staff = True
--- a/archivebox/core/middleware.py
+++ b/archivebox/core/middleware.py
@ -5,7 +5,7 @@ from django.utils import timezone
 from django.contrib.auth.middleware import RemoteUserMiddleware
 from django.core.exceptions import ImproperlyConfigured

-from ..config.legacy import PUBLIC_SNAPSHOTS, REVERSE_PROXY_USER_HEADER, REVERSE_PROXY_WHITELIST
+from archivebox.config import SERVER_CONFIG


 def detect_timezone(request, activate: bool=True):
@ -32,7 +32,7 @@ def CacheControlMiddleware(get_response):
        response = get_response(request)

        if '/archive/' in request.path or '/static/' in request.path:
-            policy = 'public' if PUBLIC_SNAPSHOTS else 'private'
+            policy = 'public' if SERVER_CONFIG.PUBLIC_SNAPSHOTS else 'private'
            response['Cache-Control'] = f'{policy}, max-age=60, stale-while-revalidate=300'
            # print('Set Cache-Control header to', response['Cache-Control'])
        return response
@ -40,15 +40,15 @@ def CacheControlMiddleware(get_response):
    return middleware

 class ReverseProxyAuthMiddleware(RemoteUserMiddleware):
-    header = 'HTTP_{normalized}'.format(normalized=REVERSE_PROXY_USER_HEADER.replace('-', '_').upper())
+    header = 'HTTP_{normalized}'.format(normalized=SERVER_CONFIG.REVERSE_PROXY_USER_HEADER.replace('-', '_').upper())

    def process_request(self, request):
-        if REVERSE_PROXY_WHITELIST == '':
+        if SERVER_CONFIG.REVERSE_PROXY_WHITELIST == '':
            return

        ip = request.META.get('REMOTE_ADDR')

-        for cidr in REVERSE_PROXY_WHITELIST.split(','):
+        for cidr in SERVER_CONFIG.REVERSE_PROXY_WHITELIST.split(','):
            try:
                network = ipaddress.ip_network(cidr)
            except ValueError:
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@ -13,9 +13,7 @@ import abx.archivebox
 import abx.archivebox.use
 import abx.django.use

-from archivebox.config import VERSION, DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS      # noqa
-
-from ..config.legacy import CONFIG
+from archivebox.config import VERSION, DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS, SHELL_CONFIG, SERVER_CONFIG      # noqa

 IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3]
 IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ
@ -80,7 +78,7 @@ LOGOUT_REDIRECT_URL = os.environ.get('LOGOUT_REDIRECT_URL', '/')
 PASSWORD_RESET_URL = '/accounts/password_reset/'
 APPEND_SLASH = True

-DEBUG = CONFIG.DEBUG or ('--debug' in sys.argv)
+DEBUG = SHELL_CONFIG.DEBUG or ('--debug' in sys.argv)


 INSTALLED_APPS = [
@ -364,10 +362,10 @@ STORAGES = {
 ### Security Settings
 ################################################################################

-SECRET_KEY = CONFIG.SECRET_KEY or get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789_')
+SECRET_KEY = SERVER_CONFIG.SECRET_KEY or get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789_')

-ALLOWED_HOSTS = CONFIG.ALLOWED_HOSTS.split(',')
-CSRF_TRUSTED_ORIGINS = list(set(CONFIG.CSRF_TRUSTED_ORIGINS.split(',')))
+ALLOWED_HOSTS = SERVER_CONFIG.ALLOWED_HOSTS.split(',')
+CSRF_TRUSTED_ORIGINS = list(set(SERVER_CONFIG.CSRF_TRUSTED_ORIGINS.split(',')))

 # automatically fix case when user sets ALLOWED_HOSTS (e.g. to archivebox.example.com)
 # but forgets to add https://archivebox.example.com to CSRF_TRUSTED_ORIGINS
--- a/archivebox/core/urls.py
+++ b/archivebox/core/urls.py
@ -10,7 +10,7 @@ from .views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthC
 from .serve_static import serve_static

 # GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306
-# from .config.legacy import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE
+# from archivebox.config import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE
 # GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': VERSIONS_AVAILABLE, 'CAN_UPGRADE': CAN_UPGRADE}


--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@ -1,7 +1,7 @@
 __package__ = 'archivebox.core'

-from typing import Callable
-from benedict import benedict
+import inspect
+from typing import Callable, get_type_hints
 from pathlib import Path

 from django.shortcuts import render, redirect
@ -27,21 +27,13 @@ from core.admin import result_url

 from queues.tasks import bg_add

-from archivebox.config import CONSTANTS, DATA_DIR, VERSION, SHELL_CONFIG, SERVER_CONFIG
-from ..plugins_extractor.archivedotorg.apps import ARCHIVEDOTORG_CONFIG
-
-from ..config.legacy import (
-    CONFIG_SCHEMA,
-    DYNAMIC_CONFIG_SCHEMA,
-    USER_CONFIG,
-    CONFIG,
-)
-from ..logging_util import printable_filesize
+from archivebox.config import CONSTANTS_CONFIG, DATA_DIR, VERSION, SHELL_CONFIG, SERVER_CONFIG
 from archivebox.misc.util import base_url, htmlencode, ts_to_date_str
-from ..search import query_search_index
-from .serve_static import serve_static_with_byterange_support

-CONFIG = benedict({**CONSTANTS, **CONFIG, **settings.FLAT_CONFIG})
+from .serve_static import serve_static_with_byterange_support
+from ..plugins_extractor.archivedotorg.apps import ARCHIVEDOTORG_CONFIG
+from ..logging_util import printable_filesize
+from ..search import query_search_index


 class HomepageView(View):
@ -502,27 +494,43 @@ class HealthCheckView(View):


 def find_config_section(key: str) -> str:
-    if key in CONSTANTS:
+    if key in CONSTANTS_CONFIG:
        return 'CONSTANT'
    matching_sections = [
-        name for name, opts in CONFIG_SCHEMA.items() if key in opts
+        section.id for section in settings.CONFIGS.values() if key in section.model_fields
    ]
    section = matching_sections[0] if matching_sections else 'DYNAMIC'
    return section

 def find_config_default(key: str) -> str:
-    default_val = USER_CONFIG.get(key, {}).get('default', lambda: None)
+    if key in CONSTANTS_CONFIG:
+        return str(CONSTANTS_CONFIG[key])
+    
+    default_val = None
+
+    for config in settings.CONFIGS.values():
+        if key in config.model_fields:
+            default_val = config.model_fields[key].default
+            break
+        
    if isinstance(default_val, Callable):
-        return None
+        default_val = inspect.getsource(default_val).split('lambda', 1)[-1].split(':', 1)[-1].replace('\n', ' ').strip()
+        if default_val.count(')') > default_val.count('('):
+            default_val = default_val[:-1]
    else:
-        default_val = repr(default_val)
+        default_val = str(default_val)
+        
+        
    return default_val

 def find_config_type(key: str) -> str:
-    if key in USER_CONFIG:
-        return str(USER_CONFIG[key]['type'])
-    elif key in DYNAMIC_CONFIG_SCHEMA:
-        return str(type(CONFIG[key]))
+    for config in settings.CONFIGS.values():
+        if hasattr(config, key):
+            type_hints = get_type_hints(config)
+            try:
+                return str(type_hints[key].__name__)
+            except AttributeError:
+                return str(type_hints[key])
    return 'str'

 def key_is_safe(key: str) -> bool:
@ -543,40 +551,29 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
        "Value": [],
        "Default": [],
        # "Documentation": [],
-        "Aliases": [],
+        # "Aliases": [],
    }

-    for section in CONFIG_SCHEMA.keys():
-        for key in CONFIG_SCHEMA[section].keys():
-            rows['Section'].append(section)   # section.replace('_', ' ').title().replace(' Config', '')
+    for section in reversed(list(settings.CONFIGS.values())):
+        for key, field in section.model_fields.items():
+            rows['Section'].append(section.id)   # section.replace('_', ' ').title().replace(' Config', '')
            rows['Key'].append(ItemLink(key, key=key))
-            rows['Type'].append(mark_safe(f'<code>{find_config_type(key)}</code>'))
-            rows['Value'].append(mark_safe(f'<code>{CONFIG[key]}</code>') if key_is_safe(key) else '******** (redacted)')
-            rows['Default'].append(mark_safe(f'<a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig.py+{key}&type=code"><code style="text-decoration: underline">{find_config_default(key) or "See here..."}</code></a>'))
+            rows['Type'].append(format_html('<code>{}</code>', find_config_type(key)))
+            rows['Value'].append(mark_safe(f'<code>{getattr(section, key)}</code>') if key_is_safe(key) else '******** (redacted)')
+            rows['Default'].append(mark_safe(f'<a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig+{key}&type=code"><code style="text-decoration: underline">{find_config_default(key) or "See here..."}</code></a>'))
            # rows['Documentation'].append(mark_safe(f'Wiki: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">{key}</a>'))
-            rows['Aliases'].append(', '.join(CONFIG_SCHEMA[section][key].get('aliases', [])))
-
-    section = 'DYNAMIC'
-    for key in DYNAMIC_CONFIG_SCHEMA.keys():
-        if key in CONSTANTS:
-            continue
-        rows['Section'].append(section)   # section.replace('_', ' ').title().replace(' Config', '')
-        rows['Key'].append(ItemLink(key, key=key))
-        rows['Type'].append(mark_safe(f'<code>{find_config_type(key)}</code>'))
-        rows['Value'].append(mark_safe(f'<code>{CONFIG[key]}</code>') if key_is_safe(key) else '******** (redacted)')
-        rows['Default'].append(mark_safe(f'<a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig.py+{key}&type=code"><code style="text-decoration: underline">{find_config_default(key) or "See here..."}</code></a>'))
-        # rows['Documentation'].append(mark_safe(f'Wiki: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">{key}</a>'))
-        rows['Aliases'].append(ItemLink(key, key=key) if key in USER_CONFIG else '')
+            # rows['Aliases'].append(', '.join(find_config_aliases(key)))

+   
    section = 'CONSTANT'
-    for key in CONSTANTS.keys():
+    for key in CONSTANTS_CONFIG.keys():
        rows['Section'].append(section)   # section.replace('_', ' ').title().replace(' Config', '')
        rows['Key'].append(ItemLink(key, key=key))
-        rows['Type'].append(mark_safe(f'<code>{find_config_type(key)}</code>'))
-        rows['Value'].append(mark_safe(f'<code>{CONFIG[key]}</code>') if key_is_safe(key) else '******** (redacted)')
-        rows['Default'].append(mark_safe(f'<a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig.py+{key}&type=code"><code style="text-decoration: underline">{find_config_default(key) or "See here..."}</code></a>'))
+        rows['Type'].append(format_html('<code>{}</code>', getattr(type(CONSTANTS_CONFIG[key]), '__name__', repr(CONSTANTS_CONFIG[key]))))
+        rows['Value'].append(format_html('<code>{}</code>', CONSTANTS_CONFIG[key]) if key_is_safe(key) else '******** (redacted)')
+        rows['Default'].append(mark_safe(f'<a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig+{key}&type=code"><code style="text-decoration: underline">{find_config_default(key) or "See here..."}</code></a>'))
        # rows['Documentation'].append(mark_safe(f'Wiki: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">{key}</a>'))
-        rows['Aliases'].append(ItemLink(key, key=key) if key in USER_CONFIG else '')
+        # rows['Aliases'].append('')


    return TableContext(
@ -589,11 +586,12 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont

    assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'

-    aliases = USER_CONFIG.get(key, {}).get("aliases", [])
+    # aliases = USER_CONFIG.get(key, {}).get("aliases", [])
+    aliases = []

-    if key in CONSTANTS:
+    if key in CONSTANTS_CONFIG:
        section_header = mark_safe(f'[CONSTANTS]   &nbsp; <b><code style="color: lightgray">{key}</code></b> &nbsp; <small>(read-only, hardcoded by ArchiveBox)</small>')
-    elif key in USER_CONFIG:
+    elif key in settings.FLAT_CONFIG:
        section_header = mark_safe(f'data / ArchiveBox.conf &nbsp; [{find_config_section(key)}]  &nbsp; <b><code style="color: lightgray">{key}</code></b>')
    else:
        section_header = mark_safe(f'[DYNAMIC CONFIG]   &nbsp; <b><code style="color: lightgray">{key}</code></b> &nbsp; <small>(read-only, calculated at runtime)</small>')
@ -609,7 +607,7 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
                "fields": {
                    'Key': key,
                    'Type': find_config_type(key),
-                    'Value': CONFIG[key] if key_is_safe(key) else '********',
+                    'Value': settings.FLAT_CONFIG[key] if key_is_safe(key) else '********',
                },
                "help_texts": {
                    'Key': mark_safe(f'''
@ -619,25 +617,25 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
                        </span>
                    '''),
                    'Type': mark_safe(f'''
-                        <a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig.py+{key}&type=code">
-                            See full definition in <code>archivebox/config.py</code>...
+                        <a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig+{key}&type=code">
+                            See full definition in <code>archivebox/config</code>...
                        </a>
                    '''),
                    'Value': mark_safe(f'''
                        {'<b style="color: red">Value is redacted for your security. (Passwords, secrets, API tokens, etc. cannot be viewed in the Web UI)</b><br/><br/>' if not key_is_safe(key) else ''}
                        <br/><hr/><br/>
                        Default: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 
-                        <a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig.py+{key}&type=code">
+                        <a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig+{key}&type=code">
                            <code>{find_config_default(key) or '↗️ See in ArchiveBox source code...'}</code>
                        </a>
                        <br/><br/>
-                        <p style="display: {"block" if key in USER_CONFIG else "none"}">
+                        <p style="display: {"block" if key in settings.FLAT_CONFIG else "none"}">
                            <i>To change this value, edit <code>data/ArchiveBox.conf</code> or run:</i>
                            <br/><br/>
                            <code>archivebox config --set {key}="{
                                val.strip("'")
                                if (val := find_config_default(key)) else
-                                (repr(CONFIG[key] if key_is_safe(key) else '********')).strip("'")
+                                (repr(settings.FLAT_CONFIG[key] if key_is_safe(key) else '********')).strip("'")
                            }"</code>
                        </p>
                    '''),
--- a/archivebox/extractors/archive_org.py
+++ b/archivebox/extractors/archive_org.py
@ -7,21 +7,10 @@ from collections import defaultdict

 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
 from archivebox.misc.system import run, chmod_file
-from archivebox.misc.util import (
-    enforce_types,
-    is_static_file,
-    dedupe,
-)
-from ..config.legacy import (
-    TIMEOUT,
-    CURL_ARGS,
-    CURL_EXTRA_ARGS,
-    CHECK_SSL_VALIDITY,
-    SAVE_ARCHIVE_DOT_ORG,
-    CURL_BINARY,
-    CURL_VERSION,
-    CURL_USER_AGENT,
-)
+from archivebox.misc.util import enforce_types, is_static_file, dedupe
+from archivebox.plugins_extractor.archivedotorg.apps import ARCHIVEDOTORG_CONFIG
+from archivebox.plugins_extractor.curl.apps import CURL_CONFIG, CURL_BINARY
+
 from ..logging_util import TimedProgress


@ -39,27 +28,30 @@ def should_save_archive_dot_org(link: Link, out_dir: Optional[Path]=None, overwr
        # if open(path, 'r', encoding='utf-8').read().strip() != 'None':
        return False

-    return SAVE_ARCHIVE_DOT_ORG
+    return ARCHIVEDOTORG_CONFIG.SAVE_ARCHIVE_DOT_ORG

@enforce_types
-def save_archive_dot_org(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult:
+def save_archive_dot_org(link: Link, out_dir: Optional[Path]=None, timeout: int=CURL_CONFIG.CURL_TIMEOUT) -> ArchiveResult:
    """submit site to archive.org for archiving via their service, save returned archive url"""

+    curl_binary = CURL_BINARY.load()
+    assert curl_binary.abspath and curl_binary.version
+
    out_dir = out_dir or Path(link.link_dir)
    output: ArchiveOutput = get_output_path()
    archive_org_url = None
    submit_url = 'https://web.archive.org/save/{}'.format(link.url)
    # later options take precedence
    options = [
-        *CURL_ARGS,
-        *CURL_EXTRA_ARGS,
+        *CURL_CONFIG.CURL_ARGS,
+        *CURL_CONFIG.CURL_EXTRA_ARGS,
        '--head',
        '--max-time', str(timeout),
-        *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
-        *([] if CHECK_SSL_VALIDITY else ['--insecure']),
+        *(['--user-agent', '{}'.format(CURL_CONFIG.CURL_USER_AGENT)] if CURL_CONFIG.CURL_USER_AGENT else []),
+        *([] if CURL_CONFIG.CURL_CHECK_SSL_VALIDITY else ['--insecure']),
    ]
    cmd = [
-        CURL_BINARY,
+        str(curl_binary.abspath),
        *dedupe(options),
        submit_url,
    ]
@ -97,22 +89,22 @@ def save_archive_dot_org(link: Link, out_dir: Optional[Path]=None, timeout: int=
    return ArchiveResult(
        cmd=cmd,
        pwd=str(out_dir),
-        cmd_version=CURL_VERSION,
+        cmd_version=str(curl_binary.version),
        output=output,
        status=status,
        **timer.stats,
    )

@enforce_types
-def parse_archive_dot_org_response(response: bytes) -> Tuple[List[str], List[str]]:
+def parse_archive_dot_org_response(response: str) -> Tuple[List[str], List[str]]:
    # Parse archive.org response headers
    headers: Dict[str, List[str]] = defaultdict(list)

    # lowercase all the header names and store in dict
    for header in response.splitlines():
-        if b':' not in header or not header.strip():
+        if ':' not in header or not header.strip():
            continue
-        name, val = header.decode().split(':', 1)
+        name, val = header.split(':', 1)
        headers[name.lower().strip()].append(val.strip())

    # Get successful archive url in "content-location" header or any errors
--- a/archivebox/extractors/favicon.py
+++ b/archivebox/extractors/favicon.py
@ -2,16 +2,11 @@ __package__ = 'archivebox.extractors'

 from pathlib import Path

-from typing import Optional
-
-from ..index.schema import Link, ArchiveResult, ArchiveOutput
 from archivebox.misc.system import chmod_file, run
-from archivebox.misc.util import (
-    enforce_types,
-    domain,
-    dedupe,
-)
-from ..config.legacy import CONFIG
+from archivebox.misc.util import enforce_types, domain, dedupe
+from archivebox.plugins_extractor.favicon.apps import FAVICON_CONFIG
+from archivebox.plugins_extractor.curl.apps import CURL_CONFIG, CURL_BINARY
+from ..index.schema import Link, ArchiveResult, ArchiveOutput
 from ..logging_util import TimedProgress


@ -22,7 +17,7 @@ def should_save_favicon(link: Link, out_dir: str | Path | None=None, overwrite:
    if not overwrite and (out_dir / 'favicon.ico').exists():
        return False

-    return CONFIG.SAVE_FAVICON
+    return FAVICON_CONFIG.SAVE_FAVICON

@enforce_types
 def get_output_path():
@ -30,26 +25,29 @@ def get_output_path():


@enforce_types
-def save_favicon(link: Link, out_dir: str | Path | None=None, timeout: int=CONFIG.TIMEOUT) -> ArchiveResult:
+def save_favicon(link: Link, out_dir: str | Path | None=None, timeout: int=CURL_CONFIG.CURL_TIMEOUT) -> ArchiveResult:
    """download site favicon from google's favicon api"""

+    curl_binary = CURL_BINARY.load()
+    assert curl_binary.abspath and curl_binary.version
+
    out_dir = Path(out_dir or link.link_dir)
    assert out_dir.exists()

    output: ArchiveOutput = 'favicon.ico'
    # later options take precedence
    options = [
-        *CONFIG.CURL_ARGS,
-        *CONFIG.CURL_EXTRA_ARGS,
+        *CURL_CONFIG.CURL_ARGS,
+        *CURL_CONFIG.CURL_EXTRA_ARGS,
        '--max-time', str(timeout),
        '--output', str(output),
-        *(['--user-agent', '{}'.format(CONFIG.CURL_USER_AGENT)] if CONFIG.CURL_USER_AGENT else []),
-        *([] if CONFIG.CHECK_SSL_VALIDITY else ['--insecure']),
+        *(['--user-agent', '{}'.format(CURL_CONFIG.CURL_USER_AGENT)] if CURL_CONFIG.CURL_USER_AGENT else []),
+        *([] if CURL_CONFIG.CURL_CHECK_SSL_VALIDITY else ['--insecure']),
    ]
    cmd = [
-        CONFIG.CURL_BINARY,
+        str(curl_binary.abspath),
        *dedupe(options),
-        CONFIG.FAVICON_PROVIDER.format(domain(link.url)),
+        FAVICON_CONFIG.FAVICON_PROVIDER.format(domain(link.url)),
    ]
    status = 'failed'
    timer = TimedProgress(timeout, prefix='      ')
@ -65,7 +63,7 @@ def save_favicon(link: Link, out_dir: str | Path | None=None, timeout: int=CONFI
    return ArchiveResult(
        cmd=cmd,
        pwd=str(out_dir),
-        cmd_version=CONFIG.CURL_VERSION,
+        cmd_version=str(curl_binary.version),
        output=output,
        status=status,
        **timer.stats,
--- a/archivebox/extractors/git.py
+++ b/archivebox/extractors/git.py
@ -4,7 +4,6 @@ __package__ = 'archivebox.extractors'
 from pathlib import Path
 from typing import Optional

-from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
 from archivebox.misc.system import run, chmod_file
 from archivebox.misc.util import (
    enforce_types,
@ -14,8 +13,9 @@ from archivebox.misc.util import (
    without_query,
    without_fragment,
 )
-from ..config.legacy import CONFIG
+from archivebox.plugins_extractor.git.apps import GIT_CONFIG, GIT_BINARY
 from ..logging_util import TimedProgress
+from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError


 def get_output_path():
@ -42,28 +42,31 @@ def should_save_git(link: Link, out_dir: Optional[Path]=None, overwrite: Optiona
        return False

    is_clonable_url = (
-        (domain(link.url) in CONFIG.GIT_DOMAINS)
+        (domain(link.url) in GIT_CONFIG.GIT_DOMAINS)
        or (extension(link.url) == 'git')
    )
    if not is_clonable_url:
        return False

-    return CONFIG.SAVE_GIT
+    return GIT_CONFIG.SAVE_GIT


@enforce_types
-def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=CONFIG.TIMEOUT) -> ArchiveResult:
+def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=GIT_CONFIG.GIT_TIMEOUT) -> ArchiveResult:
    """download full site using git"""
+    
+    git_binary = GIT_BINARY.load()
+    assert git_binary.abspath and git_binary.version

    out_dir = out_dir or Path(link.link_dir)
    output: ArchiveOutput = get_output_path()
    output_path = out_dir / output
    output_path.mkdir(exist_ok=True)
    cmd = [
-        CONFIG.GIT_BINARY,
+        str(git_binary.abspath),
        'clone',
-        *CONFIG.GIT_ARGS,
-        *([] if CONFIG.CHECK_SSL_VALIDITY else ['-c', 'http.sslVerify=false']),
+        *GIT_CONFIG.GIT_ARGS,
+        *([] if GIT_CONFIG.GIT_CHECK_SSL_VALIDITY else ['-c', 'http.sslVerify=false']),
        without_query(without_fragment(link.url)),
    ]
    status = 'succeeded'
@ -88,7 +91,7 @@ def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=CONFIG.TIMEO
    return ArchiveResult(
        cmd=cmd,
        pwd=str(out_dir),
-        cmd_version=CONFIG.GIT_VERSION,
+        cmd_version=str(git_binary.version),
        output=output,
        status=status,
        **timer.stats,
--- a/archivebox/extractors/headers.py
+++ b/archivebox/extractors/headers.py
@ -4,23 +4,14 @@ from pathlib import Path

 from typing import Optional

-from ..index.schema import Link, ArchiveResult, ArchiveOutput
 from archivebox.misc.system import atomic_write
 from archivebox.misc.util import (
    enforce_types,
    get_headers,
    dedupe,
 )
-from ..config.legacy import (
-    TIMEOUT,
-    CURL_BINARY,
-    CURL_ARGS,
-    CURL_EXTRA_ARGS,
-    CURL_USER_AGENT,
-    CURL_VERSION,
-    CHECK_SSL_VALIDITY,
-    SAVE_HEADERS
-)
+from archivebox.plugins_extractor.curl.apps import CURL_CONFIG, CURL_BINARY
+from ..index.schema import Link, ArchiveResult, ArchiveOutput
 from ..logging_util import TimedProgress

 def get_output_path():
@ -29,34 +20,38 @@ def get_output_path():

@enforce_types
 def should_save_headers(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
-    out_dir = out_dir or Path(link.link_dir)
-    if not overwrite and (out_dir / get_output_path()).exists():
+    out_dir_path = Path(out_dir or link.link_dir)
+    assert out_dir_path
+    if not overwrite and (out_dir_path / get_output_path()).exists():
        return False

-    return SAVE_HEADERS
+    return CURL_CONFIG.SAVE_HEADERS


@enforce_types
-def save_headers(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) -> ArchiveResult:
+def save_headers(link: Link, out_dir: Optional[str]=None, timeout: int=CURL_CONFIG.CURL_TIMEOUT) -> ArchiveResult:
    """Download site headers"""

-    out_dir = Path(out_dir or link.link_dir)
-    output_folder = out_dir.absolute()
+    curl_binary = CURL_BINARY.load()
+    assert curl_binary.abspath and curl_binary.version
+
+    out_dir_path = Path(out_dir or link.link_dir)
+    output_folder = out_dir_path.absolute()
    output: ArchiveOutput = get_output_path()

    status = 'succeeded'
-    timer = TimedProgress(timeout, prefix='      ')
+    timer = TimedProgress(timeout + 1, prefix='      ')
    # later options take precedence
    options = [
-        *CURL_ARGS,
-        *CURL_EXTRA_ARGS,
+        *CURL_CONFIG.CURL_ARGS,
+        *CURL_CONFIG.CURL_EXTRA_ARGS,
        '--head',
        '--max-time', str(timeout),
-        *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
-        *([] if CHECK_SSL_VALIDITY else ['--insecure']),
+        *(['--user-agent', '{}'.format(CURL_CONFIG.CURL_USER_AGENT)] if CURL_CONFIG.CURL_USER_AGENT else []),
+        *([] if CURL_CONFIG.CURL_CHECK_SSL_VALIDITY else ['--insecure']),
    ]
    cmd = [
-        CURL_BINARY,
+        str(curl_binary.abspath),
        *dedupe(options),
        link.url,
    ]
@ -72,8 +67,8 @@ def save_headers(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT)

    return ArchiveResult(
        cmd=cmd,
-        pwd=str(out_dir),
-        cmd_version=CURL_VERSION,
+        pwd=str(out_dir_path),
+        cmd_version=str(curl_binary.version),
        output=output,
        status=status,
        **timer.stats,
--- a/archivebox/extractors/htmltotext.py
+++ b/archivebox/extractors/htmltotext.py
@ -5,18 +5,13 @@ import io
 from pathlib import Path
 from typing import Optional

-from archivebox.config import VERSION
-from ..config.legacy import (
-    SAVE_HTMLTOTEXT,
-    TIMEOUT,
-)
-from ..index.schema import Link, ArchiveResult, ArchiveError
-from ..logging_util import TimedProgress
+from archivebox.config import VERSION, ARCHIVING_CONFIG
+from archivebox.config.legacy import SAVE_HTMLTOTEXT
 from archivebox.misc.system import atomic_write
-from archivebox.misc.util import (
-    enforce_types,
-    is_static_file,
-)
+from archivebox.misc.util import enforce_types, is_static_file
+
+from ..logging_util import TimedProgress
+from ..index.schema import Link, ArchiveResult, ArchiveError
 from .title import get_html


@ -122,7 +117,7 @@ def should_save_htmltotext(link: Link, out_dir: Optional[Path]=None, overwrite:


@enforce_types
-def save_htmltotext(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult:
+def save_htmltotext(link: Link, out_dir: Optional[Path]=None, timeout: int=ARCHIVING_CONFIG.TIMEOUT) -> ArchiveResult:
    """extract search-indexing-friendly text from an HTML document"""

    out_dir = Path(out_dir or link.link_dir)
--- a/archivebox/extractors/title.py
+++ b/archivebox/extractors/title.py
@ -5,23 +5,14 @@ from html.parser import HTMLParser
 from pathlib import Path
 from typing import Optional

-from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
 from archivebox.misc.util import (
    enforce_types,
    download_url,
    htmldecode,
    dedupe,
 )
-from ..config.legacy import (
-    TIMEOUT,
-    CHECK_SSL_VALIDITY,
-    SAVE_TITLE,
-    CURL_BINARY,
-    CURL_ARGS,
-    CURL_EXTRA_ARGS,
-    CURL_VERSION,
-    CURL_USER_AGENT,
-)
+from archivebox.plugins_extractor.curl.apps import CURL_CONFIG, CURL_BINARY
+from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
 from ..logging_util import TimedProgress


@ -62,7 +53,7 @@ class TitleParser(HTMLParser):


@enforce_types
-def get_html(link: Link, path: Path, timeout: int=TIMEOUT) -> str:
+def get_html(link: Link, path: Path, timeout: int=CURL_CONFIG.CURL_TIMEOUT) -> str:
    """
    Try to find wget, singlefile and then dom files.
    If none is found, download the url again.
@ -98,7 +89,7 @@ def should_save_title(link: Link, out_dir: Optional[str]=None, overwrite: Option
    if not overwrite and link.title and not link.title.lower().startswith('http'):
        return False

-    return SAVE_TITLE
+    return CURL_CONFIG.SAVE_TITLE

 def extract_title_with_regex(html):
    match = re.search(HTML_TITLE_REGEX, html)
@ -106,22 +97,25 @@ def extract_title_with_regex(html):
    return output

@enforce_types
-def save_title(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult:
+def save_title(link: Link, out_dir: Optional[Path]=None, timeout: int=CURL_CONFIG.CURL_TIMEOUT) -> ArchiveResult:
    """try to guess the page's title from its content"""

    from core.models import Snapshot

+    curl_binary = CURL_BINARY.load()
+    assert curl_binary.abspath and curl_binary.version
+
    output: ArchiveOutput = None
    # later options take precedence
    options = [
-        *CURL_ARGS,
-        *CURL_EXTRA_ARGS,
+        *CURL_CONFIG.CURL_ARGS,
+        *CURL_CONFIG.CURL_EXTRA_ARGS,
        '--max-time', str(timeout),
-        *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
-        *([] if CHECK_SSL_VALIDITY else ['--insecure']),
+        *(['--user-agent', '{}'.format(CURL_CONFIG.CURL_USER_AGENT)] if CURL_CONFIG.CURL_USER_AGENT else []),
+        *([] if CURL_CONFIG.CURL_CHECK_SSL_VALIDITY else ['--insecure']),
    ]
    cmd = [
-        CURL_BINARY,
+        str(curl_binary.abspath),
        *dedupe(options),
        link.url,
    ]
@ -161,7 +155,7 @@ def save_title(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -
    return ArchiveResult(
        cmd=cmd,
        pwd=str(out_dir),
-        cmd_version=CURL_VERSION,
+        cmd_version=str(curl_binary.version),
        output=output,
        status=status,
        **timer.stats,
--- a/archivebox/main.py
+++ b/archivebox/main.py
@ -430,7 +430,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
 def status(out_dir: Path=DATA_DIR) -> None:
    """Print out some info and statistics about the archive collection"""

-    check_data_folder(CONFIG)
+    check_data_folder()

    from core.models import Snapshot
    from django.contrib.auth import get_user_model
@ -573,7 +573,7 @@ def add(urls: Union[str, List[str]],
        run_subcommand('init', stdin=None, pwd=out_dir)

    # Load list of links from the existing index
-    check_data_folder(CONFIG)
+    check_data_folder()

    # worker = start_cli_workers()
    
@ -673,7 +673,7 @@ def remove(filter_str: Optional[str]=None,
           out_dir: Path=DATA_DIR) -> List[Link]:
    """Remove the specified URLs from the archive"""
    
-    check_data_folder(CONFIG)
+    check_data_folder()

    if snapshots is None:
        if filter_str and filter_patterns:
@ -762,7 +762,7 @@ def update(resume: Optional[float]=None,
    # from .queues.supervisor_util import start_cli_workers
    

-    check_data_folder(CONFIG)
+    check_data_folder()
    # start_cli_workers()
    new_links: List[Link] = [] # TODO: Remove input argument: only_new

@ -833,7 +833,7 @@ def list_all(filter_patterns_str: Optional[str]=None,
             out_dir: Path=DATA_DIR) -> Iterable[Link]:
    """List, filter, and export information about archive entries"""
    
-    check_data_folder(CONFIG)
+    check_data_folder()

    if filter_patterns and filter_patterns_str:
        stderr(
@ -881,7 +881,7 @@ def list_links(snapshots: Optional[QuerySet]=None,
               before: Optional[float]=None,
               out_dir: Path=DATA_DIR) -> Iterable[Link]:
    
-    check_data_folder(CONFIG)
+    check_data_folder()

    if snapshots:
        all_snapshots = snapshots
@ -905,7 +905,7 @@ def list_folders(links: List[Link],
                 status: str,
                 out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
    
-    check_data_folder(CONFIG)
+    check_data_folder()

    STATUS_FUNCTIONS = {
        "indexed": get_indexed_folders,
@ -926,7 +926,7 @@ def list_folders(links: List[Link],
        raise ValueError('Status not recognized.')

@enforce_types
-def setup(out_dir: Path=DATA_DIR) -> None:
+def install(out_dir: Path=DATA_DIR) -> None:
    """Automatically install all ArchiveBox dependencies and extras"""

    from rich import print
@ -937,40 +937,20 @@ def setup(out_dir: Path=DATA_DIR) -> None:

    stderr('\n[+] Installing ArchiveBox dependencies automatically...', color='green')

-    for binary in settings.BINARIES.values():
+    for binary in reversed(list(settings.BINARIES.values())):
        try:
            print(binary.load_or_install().model_dump(exclude={'binproviders_supported', 'loaded_binprovider', 'provider_overrides', 'loaded_abspaths', 'bin_dir', 'loaded_respath'}))
        except Exception as e:
            print(f'[X] Failed to install {binary.name}: {e}')

-    # from plugins_extractor.curl.apps import CURL_BINARY
-    # print(CURL_BINARY.load_or_install().model_dump(exclude={'binproviders_supported', 'loaded_binprovider', 'provider_overrides', 'loaded_abspaths', 'bin_dir', 'loaded_respath'}))
-
-    # from plugins_extractor.wget.apps import WGET_BINARY
-    # print(WGET_BINARY.load_or_install().model_dump(exclude={'binproviders_supported', 'loaded_binprovider', 'provider_overrides', 'loaded_abspaths', 'bin_dir', 'loaded_respath'}))
-
-    # from plugins_extractor.ytdlp.apps import YTDLP_BINARY
-    # print(YTDLP_BINARY.load_or_install().model_dump(exclude={'binproviders_supported', 'loaded_binprovider', 'provider_overrides', 'loaded_abspaths', 'bin_dir', 'loaded_respath'}))
-
-    # from plugins_extractor.chrome.apps import CHROME_BINARY
-    # print(CHROME_BINARY.load_or_install().model_dump(exclude={'binproviders_supported', 'loaded_binprovider', 'provider_overrides', 'loaded_abspaths', 'bin_dir', 'loaded_respath'}))
-
-    # from plugins_extractor.singlefile.apps import SINGLEFILE_BINARY
-    # print(SINGLEFILE_BINARY.load_or_install().model_dump(exclude={'binproviders_supported', 'loaded_binprovider', 'provider_overrides', 'loaded_abspaths', 'bin_dir', 'loaded_respath'}))
-    
-    # from plugins_extractor.readability.apps import READABILITY_BINARY
-    # print(READABILITY_BINARY.load_or_install().model_dump(exclude={'binproviders_supported', 'loaded_binprovider', 'provider_overrides', 'loaded_abspaths', 'bin_dir', 'loaded_respath'}))
-    
-    # from plugins_extractor.mercury.apps import MERCURY_BINARY
-    # print(MERCURY_BINARY.load_or_install().model_dump(exclude={'binproviders_supported', 'loaded_binprovider', 'provider_overrides', 'loaded_abspaths', 'bin_dir', 'loaded_respath'}))
-    

    from django.contrib.auth import get_user_model
    User = get_user_model()

    if not User.objects.filter(is_superuser=True).exists():
-        stderr('\n[+] Creating new admin user for the Web UI...', color='green')
-        run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
+        stderr('\n[+] Don\'t forget to create a new admin user for the Web UI...', color='green')
+        stderr('    archivebox manage createsuperuser')
+        # run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
    
    stderr('\n[√] Set up ArchiveBox and its dependencies successfully.', color='green')
    
@ -978,6 +958,10 @@ def setup(out_dir: Path=DATA_DIR) -> None:
    
    run_shell([ARCHIVEBOX_BINARY.load().abspath, '--version'], capture_output=False, cwd=out_dir)

+# backwards-compatibility:
+setup = install
+
+
@enforce_types
 def config(config_options_str: Optional[str]=None,
           config_options: Optional[List[str]]=None,
@ -989,7 +973,7 @@ def config(config_options_str: Optional[str]=None,

    from rich import print

-    check_data_folder(CONFIG)
+    check_data_folder()
    if config_options and config_options_str:
        stderr(
            '[X] You should either pass config values as an arguments '
@ -1090,8 +1074,8 @@ def schedule(add: bool=False,
             out_dir: Path=DATA_DIR):
    """Set ArchiveBox to regularly import URLs at specific times using cron"""
    
-    check_data_folder(CONFIG)
-    from plugins_pkg.pip.apps import ARCHIVEBOX_BINARY
+    check_data_folder()
+    from archivebox.plugins_pkg.pip.apps import ARCHIVEBOX_BINARY

    Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)

@ -1228,7 +1212,7 @@ def server(runserver_args: Optional[List[str]]=None,
        print()


-    check_data_folder(CONFIG)
+    check_data_folder()

    from django.core.management import call_command
    from django.contrib.auth.models import User
@ -1280,7 +1264,7 @@ def server(runserver_args: Optional[List[str]]=None,
 def manage(args: Optional[List[str]]=None, out_dir: Path=DATA_DIR) -> None:
    """Run an ArchiveBox Django management command"""

-    check_data_folder(CONFIG)
+    check_data_folder()
    from django.core.management import execute_from_command_line

    if (args and "createsuperuser" in args) and (SHELL_CONFIG.IN_DOCKER and not SHELL_CONFIG.IS_TTY):
@ -1297,7 +1281,7 @@ def manage(args: Optional[List[str]]=None, out_dir: Path=DATA_DIR) -> None:
 def shell(out_dir: Path=DATA_DIR) -> None:
    """Enter an interactive ArchiveBox Django shell"""

-    check_data_folder(CONFIG)
+    check_data_folder()

    from django.core.management import call_command
    call_command("shell_plus")
--- a/archivebox/misc/checks.py
+++ b/archivebox/misc/checks.py
@ -1,13 +1,11 @@
 __package__ = 'archivebox.misc'

-from benedict import benedict
-
 from archivebox.config import DATA_DIR, ARCHIVE_DIR, CONSTANTS, SHELL_CONFIG

 from .logging import stderr


-def check_data_folder(config: benedict) -> None:
+def check_data_folder() -> None:

    archive_dir_exists = ARCHIVE_DIR.exists()
    if not archive_dir_exists:
@ -23,7 +21,7 @@ def check_data_folder(config: benedict) -> None:
        raise SystemExit(2)


-def check_migrations(config: benedict):
+def check_migrations():
    from ..index.sql import list_migrations

    pending_migrations = [name for status, name in list_migrations() if not status]
--- a/archivebox/plugins_extractor/curl/apps.py
+++ b/archivebox/plugins_extractor/curl/apps.py
@ -1,10 +1,10 @@
 __package__ = 'plugins_extractor.curl'

-from typing import List, Optional, Dict
+from typing import List, Optional
 from pathlib import Path

 from pydantic import InstanceOf, Field
-from pydantic_pkgr import BinProvider, BinName, bin_abspath, BinProviderName, ProviderLookupDict
+from pydantic_pkgr import BinProvider, BinName

 from abx.archivebox.base_plugin import BasePlugin, BaseHook
 from abx.archivebox.base_configset import BaseConfigSet
@ -12,15 +12,26 @@ from abx.archivebox.base_binary import BaseBinary, env, apt, brew
 # from abx.archivebox.base_extractor import BaseExtractor, ExtractorName

 from archivebox.config import ARCHIVING_CONFIG
-
+from archivebox.plugins_extractor.favicon.apps import FAVICON_CONFIG
+from archivebox.plugins_extractor.archivedotorg.apps import ARCHIVEDOTORG_CONFIG

 class CurlConfig(BaseConfigSet):
-
-    SAVE_CURL: bool = True
    
-    # USE_CURL: bool = Field(default=lambda c: c.SAVE_HEADERS or c.SAVE_FAVICON)
+    SAVE_TITLE: bool = Field(default=True)
+    SAVE_HEADERS: bool = Field(default=True)
+    USE_CURL: bool = Field(default=lambda c: 
+        ARCHIVEDOTORG_CONFIG.SAVE_ARCHIVE_DOT_ORG
+        or FAVICON_CONFIG.SAVE_FAVICON
+        or c.SAVE_HEADERS
+        or c.SAVE_TITLE
+    )
    
    CURL_BINARY: str = Field(default='curl')
+    CURL_ARGS: List[str] = [
+        '--silent',
+        '--location',
+        '--compressed',
+    ]
    CURL_EXTRA_ARGS: List[str] = []
    
    CURL_TIMEOUT: int =  Field(default=lambda: ARCHIVING_CONFIG.TIMEOUT)
@ -35,12 +46,6 @@ CURL_CONFIG = CurlConfig()
 class CurlBinary(BaseBinary):
    name: BinName = CURL_CONFIG.CURL_BINARY
    binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
-    
-    provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
-        brew.name: {
-            'abspath': lambda: bin_abspath(CURL_CONFIG.CURL_BINARY, PATH=f'/opt/homebrew/opt/curl/bin:{brew.PATH}'),
-        },
-    }

 CURL_BINARY = CurlBinary()

--- a/archivebox/plugins_extractor/wget/apps.py
+++ b/archivebox/plugins_extractor/wget/apps.py
@ -1,13 +1,13 @@
 __package__ = 'plugins_extractor.wget'

 import sys
-from typing import List, Optional, Dict
+from typing import List, Optional
 from pathlib import Path
 from subprocess import run, DEVNULL

 from rich import print
 from pydantic import InstanceOf, Field, model_validator
-from pydantic_pkgr import BinProvider, BinName, bin_abspath, BinProviderName, ProviderLookupDict
+from pydantic_pkgr import BinProvider, BinName

 from abx.archivebox.base_plugin import BasePlugin, BaseHook
 from abx.archivebox.base_configset import BaseConfigSet
@ -80,12 +80,6 @@ WGET_CONFIG = WgetConfig()
 class WgetBinary(BaseBinary):
    name: BinName = WGET_CONFIG.WGET_BINARY
    binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
-    
-    provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
-        brew.name: {
-            'abspath': lambda: bin_abspath(WGET_CONFIG.WGET_BINARY, PATH=f'/opt/homebrew/opt/wget/bin:{brew.PATH}'),
-        },
-    }

 WGET_BINARY = WgetBinary()

--- a/archivebox/search/init.py
+++ b/archivebox/search/init.py
@ -11,7 +11,7 @@ from archivebox.misc.util import enforce_types
 from archivebox.misc.logging import stderr
 from archivebox.config.legacy import ANSI

-# from archivebox.archivebox.config import settings.CONFIGS.SearchBackendConfig
+from archivebox.config import SEARCH_BACKEND_CONFIG


 def log_index_started(url):
@ -58,13 +58,13 @@ def get_indexable_content(results: QuerySet):

 def import_backend():
    for backend in settings.SEARCH_BACKENDS.values():
-        if backend.name == settings.CONFIGS.SearchBackendConfig.SEARCH_BACKEND_ENGINE:
+        if backend.name == SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE:
            return backend
-    raise Exception(f'Could not load {settings.CONFIGS.SearchBackendConfig.SEARCH_BACKEND_ENGINE} as search backend')
+    raise Exception(f'Could not load {SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE} as search backend')

@enforce_types
 def write_search_index(link: Link, texts: Union[List[str], None]=None, out_dir: Path=settings.DATA_DIR, skip_text_index: bool=False) -> None:
-    if not settings.CONFIGS.SearchBackendConfig.USE_INDEXING_BACKEND:
+    if not SEARCH_BACKEND_CONFIG.USE_INDEXING_BACKEND:
        return

    if not skip_text_index and texts:
@ -86,7 +86,7 @@ def write_search_index(link: Link, texts: Union[List[str], None]=None, out_dir:
 def query_search_index(query: str, out_dir: Path=settings.DATA_DIR) -> QuerySet:
    from core.models import Snapshot

-    if settings.CONFIGS.SearchBackendConfig.USE_SEARCHING_BACKEND:
+    if SEARCH_BACKEND_CONFIG.USE_SEARCHING_BACKEND:
        backend = import_backend()
        try:
            snapshot_pks = backend.search(query)
@ -106,7 +106,7 @@ def query_search_index(query: str, out_dir: Path=settings.DATA_DIR) -> QuerySet:

@enforce_types
 def flush_search_index(snapshots: QuerySet):
-    if not settings.CONFIGS.SearchBackendConfig.USE_INDEXING_BACKEND or not snapshots:
+    if not SEARCH_BACKEND_CONFIG.USE_INDEXING_BACKEND or not snapshots:
        return
    backend = import_backend()
    snapshot_pks = (str(pk) for pk in snapshots.values_list('pk', flat=True))
--- a/archivebox/vendor/pydantic-pkgr
+++ b/archivebox/vendor/pydantic-pkgr
@ -1 +1 @@
-Subproject commit 4f9486ab86a65f83ad1bfd94320795b8e09871aa
+Subproject commit 4f31b355fbf319a54b38953795b17b1b04db4348