__package__ = 'archivebox.core' import os import sys import re import logging import inspect import tempfile from typing import Any, Dict from pathlib import Path from django.utils.crypto import get_random_string from ..config import CONFIG from ..config_stubs import AttrDict assert isinstance(CONFIG, AttrDict) IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3] IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3] ################################################################################ ### ArchiveBox Plugin Settings ################################################################################ BUILTIN_PLUGINS_DIR = CONFIG.PACKAGE_DIR / 'builtin_plugins' # /app/archivebox/builtin_plugins USERDATA_PLUGINS_DIR = CONFIG.OUTPUT_DIR / 'user_plugins' # /data/user_plugins def find_plugins_in_dir(plugins_dir, prefix: str) -> Dict[str, Path]: return { f'{prefix}.{plugin_entrypoint.parent.name}': plugin_entrypoint.parent for plugin_entrypoint in sorted(plugins_dir.glob('*/apps.py')) } INSTALLED_PLUGINS = { **find_plugins_in_dir(BUILTIN_PLUGINS_DIR, prefix='builtin_plugins'), **find_plugins_in_dir(USERDATA_PLUGINS_DIR, prefix='user_plugins'), } ### Plugins Globals (filled by plugantic.apps.load_plugins() after Django startup) PLUGINS = AttrDict({}) HOOKS = AttrDict({}) CONFIGS = AttrDict({}) BINPROVIDERS = AttrDict({}) BINARIES = AttrDict({}) EXTRACTORS = AttrDict({}) REPLAYERS = AttrDict({}) CHECKS = AttrDict({}) ADMINDATAVIEWS = AttrDict({}) PLUGIN_KEYS = AttrDict({ 'CONFIGS': CONFIGS, 'BINPROVIDERS': BINPROVIDERS, 'BINARIES': BINARIES, 'EXTRACTORS': EXTRACTORS, 'REPLAYERS': REPLAYERS, 'CHECKS': CHECKS, 'ADMINDATAVIEWS': ADMINDATAVIEWS, }) ################################################################################ ### Django Core Settings ################################################################################ WSGI_APPLICATION = 'core.wsgi.application' ROOT_URLCONF = 'core.urls' LOGIN_URL = '/accounts/login/' LOGOUT_REDIRECT_URL = os.environ.get('LOGOUT_REDIRECT_URL', '/') PASSWORD_RESET_URL = '/accounts/password_reset/' APPEND_SLASH = True DEBUG = CONFIG.DEBUG or ('--debug' in sys.argv) INSTALLED_APPS = [ # Django default apps 'django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', 'django.contrib.admin', # 3rd-party apps from PyPI 'django_jsonform', # handles rendering Pydantic models to Django HTML widgets/forms 'signal_webhooks', # handles REST API outbound webhooks # our own apps 'abid_utils', # handles ABID ID creation, handling, and models 'plugantic', # ArchiveBox plugin API definition + finding/registering/calling interface 'core', # core django model with Snapshot, ArchiveResult, etc. 'api', # Django-Ninja-based Rest API interfaces, config, APIToken model, etc. 'pkg', # ArchiveBox runtime package management interface for subdependencies # ArchiveBox plugins *INSTALLED_PLUGINS.keys(), # all plugin django-apps found in archivebox/builtin_plugins and data/user_plugins # 3rd-party apps from PyPI that need to be loaded last 'admin_data_views', # handles rendering some convenient automatic read-only views of data in Django admin 'django_extensions', # provides Django Debug Toolbar (and other non-debug helpers) ] MIDDLEWARE = [ 'core.middleware.TimezoneMiddleware', 'django.middleware.security.SecurityMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', 'django.middleware.common.CommonMiddleware', 'django.middleware.csrf.CsrfViewMiddleware', 'django.contrib.auth.middleware.AuthenticationMiddleware', 'core.middleware.ReverseProxyAuthMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'core.middleware.CacheControlMiddleware', ] ################################################################################ ### Authentication Settings ################################################################################ # AUTH_USER_MODEL = 'auth.User' # cannot be easily changed unfortunately AUTHENTICATION_BACKENDS = [ 'django.contrib.auth.backends.RemoteUserBackend', 'django.contrib.auth.backends.ModelBackend', ] if CONFIG.LDAP: try: import ldap from django_auth_ldap.config import LDAPSearch global AUTH_LDAP_SERVER_URI global AUTH_LDAP_BIND_DN global AUTH_LDAP_BIND_PASSWORD global AUTH_LDAP_USER_SEARCH global AUTH_LDAP_USER_ATTR_MAP AUTH_LDAP_SERVER_URI = CONFIG.LDAP_SERVER_URI AUTH_LDAP_BIND_DN = CONFIG.LDAP_BIND_DN AUTH_LDAP_BIND_PASSWORD = CONFIG.LDAP_BIND_PASSWORD assert AUTH_LDAP_SERVER_URI and CONFIG.LDAP_USERNAME_ATTR and CONFIG.LDAP_USER_FILTER, 'LDAP_* config options must all be set if LDAP=True' AUTH_LDAP_USER_SEARCH = LDAPSearch( CONFIG.LDAP_USER_BASE, ldap.SCOPE_SUBTREE, '(&(' + CONFIG.LDAP_USERNAME_ATTR + '=%(user)s)' + CONFIG.LDAP_USER_FILTER + ')', ) AUTH_LDAP_USER_ATTR_MAP = { 'username': CONFIG.LDAP_USERNAME_ATTR, 'first_name': CONFIG.LDAP_FIRSTNAME_ATTR, 'last_name': CONFIG.LDAP_LASTNAME_ATTR, 'email': CONFIG.LDAP_EMAIL_ATTR, } AUTHENTICATION_BACKENDS = [ 'django.contrib.auth.backends.ModelBackend', 'django_auth_ldap.backend.LDAPBackend', ] except ModuleNotFoundError: sys.stderr.write('[X] Error: Found LDAP=True config but LDAP packages not installed. You may need to run: pip install archivebox[ldap]\n\n') # dont hard exit here. in case the user is just running "archivebox version" or "archivebox help", we still want those to work despite broken ldap # sys.exit(1) ################################################################################ ### Staticfile and Template Settings ################################################################################ STATIC_URL = '/static/' STATICFILES_DIRS = [ *([str(CONFIG.CUSTOM_TEMPLATES_DIR / 'static')] if CONFIG.CUSTOM_TEMPLATES_DIR else []), str(Path(CONFIG.PACKAGE_DIR) / CONFIG.TEMPLATES_DIR_NAME / 'static'), ] TEMPLATE_DIRS = [ *([str(CONFIG.CUSTOM_TEMPLATES_DIR)] if CONFIG.CUSTOM_TEMPLATES_DIR else []), str(Path(CONFIG.PACKAGE_DIR) / CONFIG.TEMPLATES_DIR_NAME / 'core'), str(Path(CONFIG.PACKAGE_DIR) / CONFIG.TEMPLATES_DIR_NAME / 'admin'), str(Path(CONFIG.PACKAGE_DIR) / CONFIG.TEMPLATES_DIR_NAME), ] TEMPLATES = [ { 'BACKEND': 'django.template.backends.django.DjangoTemplates', 'DIRS': TEMPLATE_DIRS, 'APP_DIRS': True, 'OPTIONS': { 'context_processors': [ 'django.template.context_processors.debug', 'django.template.context_processors.request', 'django.contrib.auth.context_processors.auth', 'django.contrib.messages.context_processors.messages', ], }, }, ] ################################################################################ ### External Service Settings ################################################################################ CACHE_DB_FILENAME = 'cache.sqlite3' CACHE_DB_PATH = CONFIG.CACHE_DIR / CACHE_DB_FILENAME CACHE_DB_TABLE = 'django_cache' DATABASE_FILE = Path(CONFIG.OUTPUT_DIR) / CONFIG.SQL_INDEX_FILENAME DATABASE_NAME = os.environ.get("ARCHIVEBOX_DATABASE_NAME", str(DATABASE_FILE)) DATABASES = { 'default': { 'ENGINE': 'django.db.backends.sqlite3', 'NAME': DATABASE_NAME, 'OPTIONS': { 'timeout': 60, 'check_same_thread': False, }, 'TIME_ZONE': CONFIG.TIMEZONE, # DB setup is sometimes modified at runtime by setup_django() in config.py }, # 'cache': { # 'ENGINE': 'django.db.backends.sqlite3', # 'NAME': CACHE_DB_PATH, # 'OPTIONS': { # 'timeout': 60, # 'check_same_thread': False, # }, # 'TIME_ZONE': CONFIG.TIMEZONE, # }, } MIGRATION_MODULES = {'signal_webhooks': None} # as much as I'd love this to be a UUID or ULID field, it's not supported yet as of Django 5.0 DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' CACHES = { 'default': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'}, # 'sqlite': {'BACKEND': 'django.core.cache.backends.db.DatabaseCache', 'LOCATION': 'cache'}, # 'dummy': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}, # 'filebased': {"BACKEND": "django.core.cache.backends.filebased.FileBasedCache", "LOCATION": CACHE_DIR / 'cache_filebased'}, } EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend' STORAGES = { "default": { "BACKEND": "django.core.files.storage.FileSystemStorage", }, "staticfiles": { "BACKEND": "django.contrib.staticfiles.storage.StaticFilesStorage", }, "archive": { "BACKEND": "django.core.files.storage.FileSystemStorage", "OPTIONS": { "base_url": "/archive/", "location": CONFIG.ARCHIVE_DIR, }, }, # "personas": { # "BACKEND": "django.core.files.storage.FileSystemStorage", # "OPTIONS": { # "base_url": "/personas/", # "location": PERSONAS_DIR, # }, # }, } ################################################################################ ### Security Settings ################################################################################ SECRET_KEY = CONFIG.SECRET_KEY or get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789_') ALLOWED_HOSTS = CONFIG.ALLOWED_HOSTS.split(',') CSRF_TRUSTED_ORIGINS = list(set(CONFIG.CSRF_TRUSTED_ORIGINS.split(','))) # automatically fix case when user sets ALLOWED_HOSTS (e.g. to archivebox.example.com) # but forgets to add https://archivebox.example.com to CSRF_TRUSTED_ORIGINS for hostname in ALLOWED_HOSTS: https_endpoint = f'https://{hostname}' if hostname != '*' and https_endpoint not in CSRF_TRUSTED_ORIGINS: print(f'[!] WARNING: {https_endpoint} from ALLOWED_HOSTS should be added to CSRF_TRUSTED_ORIGINS') CSRF_TRUSTED_ORIGINS.append(https_endpoint) SECURE_BROWSER_XSS_FILTER = True SECURE_CONTENT_TYPE_NOSNIFF = True SECURE_REFERRER_POLICY = 'strict-origin-when-cross-origin' CSRF_COOKIE_SECURE = False SESSION_COOKIE_SECURE = False SESSION_COOKIE_HTTPONLY = True SESSION_COOKIE_DOMAIN = None SESSION_COOKIE_AGE = 1209600 # 2 weeks SESSION_EXPIRE_AT_BROWSER_CLOSE = False SESSION_SAVE_EVERY_REQUEST = False SESSION_ENGINE = "django.contrib.sessions.backends.db" AUTH_PASSWORD_VALIDATORS = [ {'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator'}, {'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator'}, {'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator'}, {'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator'}, ] DATA_UPLOAD_MAX_NUMBER_FIELDS = None ################################################################################ ### Shell Settings ################################################################################ SHELL_PLUS = 'ipython' SHELL_PLUS_PRINT_SQL = False IPYTHON_ARGUMENTS = ['--no-confirm-exit', '--no-banner'] IPYTHON_KERNEL_DISPLAY_NAME = 'ArchiveBox Django Shell' if IS_SHELL: os.environ['PYTHONSTARTUP'] = str(Path(CONFIG.PACKAGE_DIR) / 'core' / 'welcome_message.py') ################################################################################ ### Internationalization & Localization Settings ################################################################################ LANGUAGE_CODE = 'en-us' USE_I18N = True USE_TZ = True DATETIME_FORMAT = 'Y-m-d h:i:s A' SHORT_DATETIME_FORMAT = 'Y-m-d h:i:s A' TIME_ZONE = CONFIG.TIMEZONE # django convention is TIME_ZONE, archivebox config uses TIMEZONE, they are equivalent from django.conf.locale.en import formats as en_formats # type: ignore en_formats.DATETIME_FORMAT = DATETIME_FORMAT en_formats.SHORT_DATETIME_FORMAT = SHORT_DATETIME_FORMAT ################################################################################ ### Logging Settings ################################################################################ IGNORABLE_404_URLS = [ re.compile(r'apple-touch-icon.*\.png$'), re.compile(r'favicon\.ico$'), re.compile(r'robots\.txt$'), re.compile(r'.*\.(css|js)\.map$'), ] IGNORABLE_200_URLS = [ re.compile(r'.*"GET /static/.* HTTP/.*" 2|3.+', re.I | re.M), re.compile(r'.*"GET /admin/jsi18n/ HTTP/1.1" 200 .+', re.I | re.M), ] class NoisyRequestsFilter(logging.Filter): def filter(self, record) -> bool: logline = record.getMessage() # ignore harmless 404s for the patterns in IGNORABLE_404_URLS for ignorable_url_pattern in IGNORABLE_404_URLS: ignorable_log_pattern = re.compile(f'"GET /.*/?{ignorable_url_pattern.pattern[:-1]} HTTP/.*" (200|30.|404) .+$', re.I | re.M) if ignorable_log_pattern.match(logline): return False ignorable_log_pattern = re.compile(f'Not Found: /.*/?{ignorable_url_pattern.pattern}', re.I | re.M) if ignorable_log_pattern.match(logline): return False # ignore staticfile requests that 200 or 30* for ignorable_url_pattern in IGNORABLE_200_URLS: if ignorable_log_pattern.match(logline): return False return True ERROR_LOG = tempfile.NamedTemporaryFile().name if CONFIG.LOGS_DIR.exists(): ERROR_LOG = (CONFIG.LOGS_DIR / 'errors.log') else: # historically too many edge cases here around creating log dir w/ correct permissions early on # if there's an issue on startup, we trash the log and let user figure it out via stdout/stderr print(f'[!] WARNING: data/logs dir does not exist. Logging to temp file: {ERROR_LOG}') LOGGING = { 'version': 1, 'disable_existing_loggers': False, 'handlers': { "console": { "level": "DEBUG", "filters": [], 'formatter': 'simple', "class": "logging.StreamHandler", 'filters': ['noisyrequestsfilter'], }, 'logfile': { 'level': 'ERROR', 'class': 'logging.handlers.RotatingFileHandler', 'filename': ERROR_LOG, 'maxBytes': 1024 * 1024 * 25, # 25 MB 'backupCount': 10, 'formatter': 'verbose', 'filters': ['noisyrequestsfilter'], }, # "mail_admins": { # "level": "ERROR", # "filters": ["require_debug_false"], # "class": "django.utils.log.AdminEmailHandler", # }, }, 'filters': { 'noisyrequestsfilter': { '()': NoisyRequestsFilter, }, "require_debug_false": { "()": "django.utils.log.RequireDebugFalse", }, "require_debug_true": { "()": "django.utils.log.RequireDebugTrue", }, }, 'formatters': { 'verbose': { 'format': '{name} {levelname} {asctime} {module} {process:d} {thread:d} {message}', 'style': '{', }, 'simple': { 'format': '{name} {message}', 'style': '{', }, "django.server": { "()": "django.utils.log.ServerFormatter", "format": "[{server_time}] {message}", "style": "{", }, }, 'loggers': { 'api': { 'handlers': ['console', 'logfile'], 'level': 'DEBUG', }, 'checks': { 'handlers': ['console', 'logfile'], 'level': 'DEBUG', }, 'core': { 'handlers': ['console', 'logfile'], 'level': 'DEBUG', }, 'builtin_plugins': { 'handlers': ['console', 'logfile'], 'level': 'DEBUG', }, 'django': { 'handlers': ['console', 'logfile'], 'level': 'INFO', 'filters': ['noisyrequestsfilter'], }, 'django.server': { 'handlers': ['console', 'logfile'], 'level': 'INFO', 'filters': ['noisyrequestsfilter'], 'propagate': False, "formatter": "django.server", }, 'django.request': { 'handlers': ['console', 'logfile'], 'level': 'INFO', 'filters': ['noisyrequestsfilter'], 'propagate': False, "formatter": "django.server", }, }, } ################################################################################ ### REST API Outbound Webhooks settings ################################################################################ # Add default webhook configuration to the User model SIGNAL_WEBHOOKS_CUSTOM_MODEL = 'api.models.OutboundWebhook' SIGNAL_WEBHOOKS = { "HOOKS": { # ... is a special sigil value that means "use the default autogenerated hooks" "django.contrib.auth.models.User": ..., "core.models.Snapshot": ..., "core.models.ArchiveResult": ..., "core.models.Tag": ..., "api.models.APIToken": ..., }, } ################################################################################ ### Admin Data View Settings ################################################################################ ADMIN_DATA_VIEWS = { "NAME": "Environment", "URLS": [ { "route": "config/", "view": "core.views.live_config_list_view", "name": "Configuration", "items": { "route": "/", "view": "core.views.live_config_value_view", "name": "config_val", }, }, { "route": "binaries/", "view": "plugantic.views.binaries_list_view", "name": "Binaries", "items": { "route": "/", "view": "plugantic.views.binary_detail_view", "name": "binary", }, }, { "route": "plugins/", "view": "plugantic.views.plugins_list_view", "name": "Plugins", "items": { "route": "/", "view": "plugantic.views.plugin_detail_view", "name": "plugin", }, }, ], } ################################################################################ ### Debug Settings ################################################################################ # only enable debug toolbar when in DEBUG mode with --nothreading (it doesnt work in multithreaded mode) DEBUG_TOOLBAR = False DEBUG_TOOLBAR = DEBUG_TOOLBAR and DEBUG and ('--nothreading' in sys.argv) and ('--reload' not in sys.argv) if DEBUG_TOOLBAR: try: import debug_toolbar # noqa DEBUG_TOOLBAR = True except ImportError: DEBUG_TOOLBAR = False if DEBUG_TOOLBAR: INSTALLED_APPS = [*INSTALLED_APPS, 'debug_toolbar'] INTERNAL_IPS = ['0.0.0.0', '127.0.0.1', '*'] DEBUG_TOOLBAR_CONFIG = { "SHOW_TOOLBAR_CALLBACK": lambda request: True, "RENDER_PANELS": True, } DEBUG_TOOLBAR_PANELS = [ 'debug_toolbar.panels.history.HistoryPanel', 'debug_toolbar.panels.versions.VersionsPanel', 'debug_toolbar.panels.timer.TimerPanel', 'debug_toolbar.panels.settings.SettingsPanel', 'debug_toolbar.panels.headers.HeadersPanel', 'debug_toolbar.panels.request.RequestPanel', 'debug_toolbar.panels.sql.SQLPanel', 'debug_toolbar.panels.staticfiles.StaticFilesPanel', # 'debug_toolbar.panels.templates.TemplatesPanel', 'debug_toolbar.panels.cache.CachePanel', 'debug_toolbar.panels.signals.SignalsPanel', 'debug_toolbar.panels.logging.LoggingPanel', 'debug_toolbar.panels.redirects.RedirectsPanel', 'debug_toolbar.panels.profiling.ProfilingPanel', 'djdt_flamegraph.FlamegraphPanel', ] MIDDLEWARE = [*MIDDLEWARE, 'debug_toolbar.middleware.DebugToolbarMiddleware'] if DEBUG: from django_autotyping.typing import AutotypingSettingsDict INSTALLED_APPS += ['django_autotyping'] AUTOTYPING: AutotypingSettingsDict = { "STUBS_GENERATION": { "LOCAL_STUBS_DIR": Path(CONFIG.PACKAGE_DIR) / "typings", } } # https://github.com/bensi94/Django-Requests-Tracker (improved version of django-debug-toolbar) # Must delete archivebox/templates/admin to use because it relies on some things we override # visit /__requests_tracker__/ to access DEBUG_REQUESTS_TRACKER = True DEBUG_REQUESTS_TRACKER = DEBUG_REQUESTS_TRACKER and DEBUG if DEBUG_REQUESTS_TRACKER: import requests_tracker INSTALLED_APPS += ["requests_tracker"] MIDDLEWARE += ["requests_tracker.middleware.requests_tracker_middleware"] INTERNAL_IPS = ["127.0.0.1", "10.0.2.2", "0.0.0.0", "*"] TEMPLATE_DIRS.insert(0, str(Path(inspect.getfile(requests_tracker)).parent / "templates")) REQUESTS_TRACKER_CONFIG = { "TRACK_SQL": True, "ENABLE_STACKTRACES": False, "IGNORE_PATHS_PATTERNS": ( r".*/favicon\.ico", r".*\.png", r"/admin/jsi18n/", ), "IGNORE_SQL_PATTERNS": ( r"^SELECT .* FROM django_migrations WHERE app = 'requests_tracker'", r"^SELECT .* FROM django_migrations WHERE app = 'auth'", ), } # https://docs.pydantic.dev/logfire/integrations/django/ (similar to DataDog / NewRelic / etc.) DEBUG_LOGFIRE = False DEBUG_LOGFIRE = DEBUG_LOGFIRE and (Path(CONFIG.OUTPUT_DIR) / '.logfire').is_dir() # For usage with https://www.jetadmin.io/integrations/django # INSTALLED_APPS += ['jet_django'] # JET_PROJECT = 'archivebox' # JET_TOKEN = 'some-api-token-here'