2019-04-11 07:40:37 +00:00
__package__ = ' archivebox.core '
2019-04-02 22:53:21 +00:00
2019-04-17 07:49:18 +00:00
import os
2019-04-22 23:07:39 +00:00
import sys
2021-02-16 01:51:23 +00:00
import re
import logging
2024-08-27 03:14:47 +00:00
import inspect
2021-03-27 06:17:12 +00:00
import tempfile
2020-10-31 07:08:03 +00:00
2024-09-06 06:19:21 +00:00
from typing import Dict
2020-09-30 19:54:32 +00:00
from pathlib import Path
2024-09-06 06:19:21 +00:00
import django
2020-04-23 01:14:43 +00:00
from django . utils . crypto import get_random_string
2019-04-02 22:53:21 +00:00
2024-08-21 01:31:21 +00:00
from . . config import CONFIG
from . . config_stubs import AttrDict
assert isinstance ( CONFIG , AttrDict )
2019-04-17 09:42:09 +00:00
2020-10-31 07:08:03 +00:00
IS_MIGRATING = ' makemigrations ' in sys . argv [ : 3 ] or ' migrate ' in sys . argv [ : 3 ]
IS_TESTING = ' test ' in sys . argv [ : 3 ] or ' PYTEST_CURRENT_TEST ' in os . environ
2019-04-22 23:07:39 +00:00
IS_SHELL = ' shell ' in sys . argv [ : 3 ] or ' shell_plus ' in sys . argv [ : 3 ]
2019-04-02 22:53:21 +00:00
2024-09-23 02:26:26 +00:00
PACKAGE_DIR = Path ( __file__ ) . resolve ( ) . parent . parent
assert PACKAGE_DIR == CONFIG . PACKAGE_DIR
2024-09-03 07:58:50 +00:00
2024-09-24 08:25:55 +00:00
DATA_DIR = Path ( os . curdir ) . resolve ( )
assert DATA_DIR == CONFIG . OUTPUT_DIR
ARCHIVE_DIR = DATA_DIR / ' archive '
assert ARCHIVE_DIR == CONFIG . ARCHIVE_DIR
2024-09-03 07:58:50 +00:00
################################################################################
### ArchiveBox Plugin Settings
################################################################################
2024-09-13 10:25:46 +00:00
def find_plugins_in_dir ( plugins_dir : Path , prefix : str ) - > Dict [ str , Path ] :
2024-09-24 08:25:55 +00:00
""" { " pkg_plugins.pip " : " /app/archivebox/pkg_plugins/pip " , " user_plugins.other " : " /data/user_plugins/other " ,...} """
2024-09-03 07:58:50 +00:00
return {
2024-09-13 10:25:46 +00:00
f " { prefix } . { plugin_entrypoint . parent . name } " : plugin_entrypoint . parent
for plugin_entrypoint in sorted ( plugins_dir . glob ( " */apps.py " ) ) # key=get_plugin_order # Someday enforcing plugin import order may be required, but right now it's not needed
2024-09-03 07:58:50 +00:00
}
2024-09-24 08:25:55 +00:00
PLUGIN_DIRS = {
' sys_plugins ' : PACKAGE_DIR / ' sys_plugins ' ,
' pkg_plugins ' : PACKAGE_DIR / ' pkg_plugins ' ,
' auth_plugins ' : PACKAGE_DIR / ' auth_plugins ' ,
' extractor_plugins ' : PACKAGE_DIR / ' extractor_plugins ' ,
' user_plugins ' : DATA_DIR / ' user_plugins ' ,
2024-09-03 07:58:50 +00:00
}
2024-09-24 08:25:55 +00:00
INSTALLED_PLUGINS = { }
for plugin_prefix , plugin_dir in PLUGIN_DIRS . items ( ) :
INSTALLED_PLUGINS . update ( find_plugins_in_dir ( plugin_dir , prefix = plugin_prefix ) )
2024-09-03 07:58:50 +00:00
2024-09-24 08:25:55 +00:00
### Plugins Globals (filled by plugin_type.pluginname.apps.PluginName.register() after Django startup)
2024-09-03 07:58:50 +00:00
PLUGINS = AttrDict ( { } )
2024-09-05 10:36:18 +00:00
HOOKS = AttrDict ( { } )
2024-09-03 07:58:50 +00:00
2024-09-13 10:25:46 +00:00
# Created later by Hook.register(settings) when each Plugin.register(settings) is called
2024-09-06 08:48:18 +00:00
# CONFIGS = AttrDict({})
# BINPROVIDERS = AttrDict({})
# BINARIES = AttrDict({})
# EXTRACTORS = AttrDict({})
# REPLAYERS = AttrDict({})
# CHECKS = AttrDict({})
# ADMINDATAVIEWS = AttrDict({})
2024-09-03 07:58:50 +00:00
2020-10-31 07:08:03 +00:00
################################################################################
### Django Core Settings
################################################################################
WSGI_APPLICATION = ' core.wsgi.application '
2024-09-06 04:43:42 +00:00
ASGI_APPLICATION = " core.asgi.application "
2020-10-31 07:08:03 +00:00
ROOT_URLCONF = ' core.urls '
LOGIN_URL = ' /accounts/login/ '
2022-03-31 19:40:14 +00:00
LOGOUT_REDIRECT_URL = os . environ . get ( ' LOGOUT_REDIRECT_URL ' , ' / ' )
2020-10-31 07:08:03 +00:00
PASSWORD_RESET_URL = ' /accounts/password_reset/ '
APPEND_SLASH = True
2020-04-23 01:14:43 +00:00
2024-08-21 01:31:21 +00:00
DEBUG = CONFIG . DEBUG or ( ' --debug ' in sys . argv )
2021-01-30 11:07:35 +00:00
2024-05-18 03:48:11 +00:00
2019-04-02 22:53:21 +00:00
INSTALLED_APPS = [
2024-09-06 04:45:43 +00:00
' daphne ' ,
2024-09-13 10:25:46 +00:00
2024-09-03 07:58:50 +00:00
# Django default apps
2019-04-17 07:49:18 +00:00
' django.contrib.auth ' ,
' django.contrib.contenttypes ' ,
' django.contrib.sessions ' ,
' django.contrib.messages ' ,
' django.contrib.staticfiles ' ,
2020-07-28 03:26:45 +00:00
' django.contrib.admin ' ,
2024-05-18 03:48:11 +00:00
2024-09-03 07:58:50 +00:00
# 3rd-party apps from PyPI
2024-09-06 06:19:21 +00:00
' django_jsonform ' , # handles rendering Pydantic models to Django HTML widgets/forms https://github.com/bhch/django-jsonform
' signal_webhooks ' , # handles REST API outbound webhooks https://github.com/MrThearMan/django-signal-webhooks
' django_object_actions ' , # provides easy Django Admin action buttons on change views https://github.com/crccheck/django-object-actions
2024-09-13 10:25:46 +00:00
2024-09-06 08:48:18 +00:00
# Our ArchiveBox-provided apps
2024-09-10 07:04:39 +00:00
' queues ' , # handles starting and managing background workers and processes
2024-09-03 07:58:50 +00:00
' abid_utils ' , # handles ABID ID creation, handling, and models
' plugantic ' , # ArchiveBox plugin API definition + finding/registering/calling interface
' core ' , # core django model with Snapshot, ArchiveResult, etc.
' api ' , # Django-Ninja-based Rest API interfaces, config, APIToken model, etc.
# ArchiveBox plugins
2024-09-24 08:25:55 +00:00
* INSTALLED_PLUGINS . keys ( ) , # all plugin django-apps found in archivebox/*_plugins and data/user_plugins,
2024-09-13 10:25:46 +00:00
# plugin.register(settings) is called at import of each plugin (in the order they are listed here), then plugin.ready() is called at AppConfig.ready() time
2024-09-03 07:58:50 +00:00
# 3rd-party apps from PyPI that need to be loaded last
' admin_data_views ' , # handles rendering some convenient automatic read-only views of data in Django admin
' django_extensions ' , # provides Django Debug Toolbar (and other non-debug helpers)
2024-09-10 07:04:39 +00:00
' django_huey ' , # provides multi-queue support for django huey https://github.com/gaiacoop/django-huey
' bx_django_utils ' , # needed for huey_monitor https://github.com/boxine/bx_django_utils
' huey_monitor ' , # adds an admin UI for monitoring background huey tasks https://github.com/boxine/django-huey-monitor
2019-04-02 22:53:21 +00:00
]
2021-02-16 21:23:09 +00:00
2023-09-14 09:41:27 +00:00
2021-02-16 21:23:09 +00:00
MIDDLEWARE = [
2021-04-10 08:16:12 +00:00
' core.middleware.TimezoneMiddleware ' ,
2021-02-16 21:23:09 +00:00
' django.middleware.security.SecurityMiddleware ' ,
' django.contrib.sessions.middleware.SessionMiddleware ' ,
' django.middleware.common.CommonMiddleware ' ,
' django.middleware.csrf.CsrfViewMiddleware ' ,
' django.contrib.auth.middleware.AuthenticationMiddleware ' ,
2021-09-30 15:40:13 +00:00
' core.middleware.ReverseProxyAuthMiddleware ' ,
2021-02-16 21:23:09 +00:00
' django.contrib.messages.middleware.MessageMiddleware ' ,
2021-04-10 08:16:12 +00:00
' core.middleware.CacheControlMiddleware ' ,
2021-02-16 21:23:09 +00:00
]
2023-08-17 02:53:49 +00:00
################################################################################
### Authentication Settings
################################################################################
2024-08-20 08:58:36 +00:00
# AUTH_USER_MODEL = 'auth.User' # cannot be easily changed unfortunately
2021-02-16 21:23:09 +00:00
AUTHENTICATION_BACKENDS = [
2021-09-30 15:40:13 +00:00
' django.contrib.auth.backends.RemoteUserBackend ' ,
2021-02-16 21:23:09 +00:00
' django.contrib.auth.backends.ModelBackend ' ,
]
2024-09-24 08:25:55 +00:00
from . . auth_plugins . ldap . settings import LDAP_CONFIG
2023-08-17 02:53:49 +00:00
2024-09-24 08:25:55 +00:00
if LDAP_CONFIG . LDAP_ENABLED :
AUTH_LDAP_BIND_DN = LDAP_CONFIG . LDAP_BIND_DN
AUTH_LDAP_SERVER_URI = LDAP_CONFIG . LDAP_SERVER_URI
AUTH_LDAP_BIND_PASSWORD = LDAP_CONFIG . LDAP_BIND_PASSWORD
AUTH_LDAP_USER_ATTR_MAP = LDAP_CONFIG . LDAP_USER_ATTR_MAP
AUTH_LDAP_USER_SEARCH = LDAP_CONFIG . AUTH_LDAP_USER_SEARCH
AUTHENTICATION_BACKENDS = LDAP_CONFIG . AUTHENTICATION_BACKENDS
2024-04-26 00:59:54 +00:00
2020-10-31 07:08:03 +00:00
################################################################################
### Staticfile and Template Settings
################################################################################
STATIC_URL = ' /static/ '
STATICFILES_DIRS = [
2024-08-21 01:31:21 +00:00
* ( [ str ( CONFIG . CUSTOM_TEMPLATES_DIR / ' static ' ) ] if CONFIG . CUSTOM_TEMPLATES_DIR else [ ] ) ,
2024-09-23 02:26:26 +00:00
str ( PACKAGE_DIR / CONFIG . TEMPLATES_DIR_NAME / ' static ' ) ,
2020-10-31 07:08:03 +00:00
]
TEMPLATE_DIRS = [
2024-08-21 01:31:21 +00:00
* ( [ str ( CONFIG . CUSTOM_TEMPLATES_DIR ) ] if CONFIG . CUSTOM_TEMPLATES_DIR else [ ] ) ,
2024-09-23 02:26:26 +00:00
str ( PACKAGE_DIR / CONFIG . TEMPLATES_DIR_NAME / ' core ' ) ,
str ( PACKAGE_DIR / CONFIG . TEMPLATES_DIR_NAME / ' admin ' ) ,
str ( PACKAGE_DIR / CONFIG . TEMPLATES_DIR_NAME ) ,
2020-10-31 07:08:03 +00:00
]
2019-04-02 22:53:21 +00:00
TEMPLATES = [
{
' BACKEND ' : ' django.template.backends.django.DjangoTemplates ' ,
2020-10-31 07:08:03 +00:00
' DIRS ' : TEMPLATE_DIRS ,
2019-04-02 22:53:21 +00:00
' APP_DIRS ' : True ,
' OPTIONS ' : {
' context_processors ' : [
' django.template.context_processors.debug ' ,
' django.template.context_processors.request ' ,
' django.contrib.auth.context_processors.auth ' ,
' django.contrib.messages.context_processors.messages ' ,
] ,
} ,
} ,
]
2020-10-31 07:08:03 +00:00
################################################################################
### External Service Settings
################################################################################
2019-04-02 22:53:21 +00:00
2024-05-12 08:42:34 +00:00
CACHE_DB_FILENAME = ' cache.sqlite3 '
2024-08-21 01:31:21 +00:00
CACHE_DB_PATH = CONFIG . CACHE_DIR / CACHE_DB_FILENAME
2024-05-12 08:42:34 +00:00
CACHE_DB_TABLE = ' django_cache '
2024-09-23 02:26:26 +00:00
DATABASE_FILE = DATA_DIR / CONFIG . SQL_INDEX_FILENAME
2021-01-20 23:42:10 +00:00
DATABASE_NAME = os . environ . get ( " ARCHIVEBOX_DATABASE_NAME " , str ( DATABASE_FILE ) )
2020-12-08 23:05:37 +00:00
2024-09-10 07:04:39 +00:00
QUEUE_DATABASE_NAME = DATABASE_NAME . replace ( ' index.sqlite3 ' , ' queue.sqlite3 ' )
2024-09-13 10:25:46 +00:00
SQLITE_CONNECTION_OPTIONS = {
" TIME_ZONE " : CONFIG . TIMEZONE ,
" OPTIONS " : {
# https://gcollazo.com/optimal-sqlite-settings-for-django/
" timeout " : 5 ,
" check_same_thread " : False ,
" transaction_mode " : " IMMEDIATE " ,
" init_command " : (
" PRAGMA foreign_keys=ON; "
" PRAGMA journal_mode = WAL; "
" PRAGMA synchronous = NORMAL; "
" PRAGMA temp_store = MEMORY; "
" PRAGMA mmap_size = 134217728; "
" PRAGMA journal_size_limit = 67108864; "
" PRAGMA cache_size = 2000; "
) ,
} ,
}
2019-04-02 22:53:21 +00:00
DATABASES = {
2024-09-10 07:04:39 +00:00
" default " : {
" ENGINE " : " django.db.backends.sqlite3 " ,
" NAME " : DATABASE_NAME ,
2021-02-16 11:18:39 +00:00
# DB setup is sometimes modified at runtime by setup_django() in config.py
2024-05-12 08:42:34 +00:00
} ,
2024-09-10 07:04:39 +00:00
" queue " : {
" ENGINE " : " django.db.backends.sqlite3 " ,
" NAME " : QUEUE_DATABASE_NAME ,
2024-09-13 10:25:46 +00:00
* * SQLITE_CONNECTION_OPTIONS ,
2024-09-10 07:04:39 +00:00
} ,
2024-05-13 12:12:12 +00:00
# 'cache': {
# 'ENGINE': 'django.db.backends.sqlite3',
# 'NAME': CACHE_DB_PATH,
2024-09-13 10:25:46 +00:00
# **SQLITE_CONNECTION_OPTIONS,
2024-05-13 12:12:12 +00:00
# },
2019-04-02 22:53:21 +00:00
}
2024-05-13 12:12:12 +00:00
MIGRATION_MODULES = { ' signal_webhooks ' : None }
2019-04-02 22:53:21 +00:00
2024-05-13 09:37:48 +00:00
# as much as I'd love this to be a UUID or ULID field, it's not supported yet as of Django 5.0
DEFAULT_AUTO_FIELD = ' django.db.models.BigAutoField '
2021-02-17 23:25:23 +00:00
2024-09-10 07:04:39 +00:00
HUEY = {
" huey_class " : " huey.SqliteHuey " ,
" filename " : QUEUE_DATABASE_NAME ,
" name " : " system_tasks " ,
" results " : True ,
" store_none " : True ,
" immediate " : False ,
" utc " : True ,
" consumer " : {
" workers " : 1 ,
" worker_type " : " thread " ,
" initial_delay " : 0.1 , # Smallest polling interval, same as -d.
" backoff " : 1.15 , # Exponential backoff using this rate, -b.
" max_delay " : 10.0 , # Max possible polling interval, -m.
" scheduler_interval " : 1 , # Check schedule every second, -s.
" periodic " : True , # Enable crontab feature.
" check_worker_health " : True , # Enable worker health checks.
" health_check_interval " : 1 , # Check worker health every second.
} ,
}
# https://huey.readthedocs.io/en/latest/contrib.html#setting-things-up
# https://github.com/gaiacoop/django-huey
DJANGO_HUEY = {
" default " : " system_tasks " ,
" queues " : {
HUEY [ " name " ] : HUEY . copy ( ) ,
# more registered here at plugin import-time by BaseQueue.register()
} ,
}
class HueyDBRouter :
2024-09-11 23:50:44 +00:00
"""
A router to store all the Huey result k : v / Huey Monitor models in the queue . sqlite3 database .
We keep the databases separate because the queue database receives many more reads / writes per second
and we want to avoid single - write lock contention with the main database . Also all the in - progress task
data is ephemeral / not - important - long - term . This makes it easier to for the user to clear non - critical
temp data by just deleting queue . sqlite3 and leaving index . sqlite3 .
"""
2024-09-10 07:04:39 +00:00
route_app_labels = { " huey_monitor " , " django_huey " , " djhuey " }
def db_for_read ( self , model , * * hints ) :
if model . _meta . app_label in self . route_app_labels :
return " queue "
return ' default '
def db_for_write ( self , model , * * hints ) :
if model . _meta . app_label in self . route_app_labels :
return " queue "
return ' default '
def allow_relation ( self , obj1 , obj2 , * * hints ) :
if obj1 . _meta . app_label in self . route_app_labels or obj2 . _meta . app_label in self . route_app_labels :
return obj1 . _meta . app_label == obj2 . _meta . app_label
return None
def allow_migrate ( self , db , app_label , model_name = None , * * hints ) :
if app_label in self . route_app_labels :
return db == " queue "
return db == " default "
DATABASE_ROUTERS = [ ' core.settings.HueyDBRouter ' ]
2021-02-17 23:25:23 +00:00
CACHES = {
2024-05-13 12:12:12 +00:00
' default ' : { ' BACKEND ' : ' django.core.cache.backends.locmem.LocMemCache ' } ,
# 'sqlite': {'BACKEND': 'django.core.cache.backends.db.DatabaseCache', 'LOCATION': 'cache'},
# 'dummy': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'},
# 'filebased': {"BACKEND": "django.core.cache.backends.filebased.FileBasedCache", "LOCATION": CACHE_DIR / 'cache_filebased'},
2021-02-17 23:25:23 +00:00
}
2021-02-16 11:18:39 +00:00
2020-10-31 07:08:03 +00:00
EMAIL_BACKEND = ' django.core.mail.backends.console.EmailBackend '
2019-04-02 22:53:21 +00:00
2024-05-06 13:58:03 +00:00
STORAGES = {
2024-05-06 14:14:01 +00:00
" default " : {
" BACKEND " : " django.core.files.storage.FileSystemStorage " ,
} ,
" staticfiles " : {
" BACKEND " : " django.contrib.staticfiles.storage.StaticFilesStorage " ,
} ,
2024-05-06 13:58:03 +00:00
" archive " : {
" BACKEND " : " django.core.files.storage.FileSystemStorage " ,
" OPTIONS " : {
" base_url " : " /archive/ " ,
2024-08-21 01:31:21 +00:00
" location " : CONFIG . ARCHIVE_DIR ,
2024-05-06 13:58:03 +00:00
} ,
} ,
# "personas": {
# "BACKEND": "django.core.files.storage.FileSystemStorage",
# "OPTIONS": {
# "base_url": "/personas/",
# "location": PERSONAS_DIR,
# },
# },
}
2019-04-22 23:07:39 +00:00
################################################################################
### Security Settings
################################################################################
2020-10-31 07:08:03 +00:00
2024-08-21 01:31:21 +00:00
SECRET_KEY = CONFIG . SECRET_KEY or get_random_string ( 50 , ' abcdefghijklmnopqrstuvwxyz0123456789_ ' )
2020-10-31 07:08:03 +00:00
2024-08-21 01:31:21 +00:00
ALLOWED_HOSTS = CONFIG . ALLOWED_HOSTS . split ( ' , ' )
2024-08-23 09:01:40 +00:00
CSRF_TRUSTED_ORIGINS = list ( set ( CONFIG . CSRF_TRUSTED_ORIGINS . split ( ' , ' ) ) )
2024-08-23 01:40:47 +00:00
# automatically fix case when user sets ALLOWED_HOSTS (e.g. to archivebox.example.com)
# but forgets to add https://archivebox.example.com to CSRF_TRUSTED_ORIGINS
2024-08-23 09:01:40 +00:00
for hostname in ALLOWED_HOSTS :
https_endpoint = f ' https:// { hostname } '
if hostname != ' * ' and https_endpoint not in CSRF_TRUSTED_ORIGINS :
print ( f ' [!] WARNING: { https_endpoint } from ALLOWED_HOSTS should be added to CSRF_TRUSTED_ORIGINS ' )
CSRF_TRUSTED_ORIGINS . append ( https_endpoint )
2020-10-31 07:08:03 +00:00
2019-04-22 23:07:39 +00:00
SECURE_BROWSER_XSS_FILTER = True
SECURE_CONTENT_TYPE_NOSNIFF = True
2021-04-10 08:19:30 +00:00
SECURE_REFERRER_POLICY = ' strict-origin-when-cross-origin '
2020-10-31 07:08:03 +00:00
2019-04-22 23:07:39 +00:00
CSRF_COOKIE_SECURE = False
2020-10-31 07:08:03 +00:00
SESSION_COOKIE_SECURE = False
2024-09-03 21:16:44 +00:00
SESSION_COOKIE_HTTPONLY = True
2019-04-22 23:07:39 +00:00
SESSION_COOKIE_DOMAIN = None
2020-10-31 07:08:03 +00:00
SESSION_COOKIE_AGE = 1209600 # 2 weeks
2019-04-22 23:07:39 +00:00
SESSION_EXPIRE_AT_BROWSER_CLOSE = False
2024-09-03 21:16:44 +00:00
SESSION_SAVE_EVERY_REQUEST = False
2019-04-22 23:07:39 +00:00
2021-02-16 11:18:39 +00:00
SESSION_ENGINE = " django.contrib.sessions.backends.db "
2020-10-31 07:08:03 +00:00
AUTH_PASSWORD_VALIDATORS = [
{ ' NAME ' : ' django.contrib.auth.password_validation.UserAttributeSimilarityValidator ' } ,
{ ' NAME ' : ' django.contrib.auth.password_validation.MinimumLengthValidator ' } ,
{ ' NAME ' : ' django.contrib.auth.password_validation.CommonPasswordValidator ' } ,
{ ' NAME ' : ' django.contrib.auth.password_validation.NumericPasswordValidator ' } ,
]
2024-08-27 03:14:47 +00:00
DATA_UPLOAD_MAX_NUMBER_FIELDS = None
2024-09-10 01:42:59 +00:00
DATA_UPLOAD_MAX_MEMORY_SIZE = 26_214_400 # 25MB
2024-08-27 03:14:47 +00:00
2020-10-31 07:08:03 +00:00
################################################################################
### Shell Settings
################################################################################
2019-04-22 23:07:39 +00:00
SHELL_PLUS = ' ipython '
SHELL_PLUS_PRINT_SQL = False
IPYTHON_ARGUMENTS = [ ' --no-confirm-exit ' , ' --no-banner ' ]
IPYTHON_KERNEL_DISPLAY_NAME = ' ArchiveBox Django Shell '
if IS_SHELL :
2024-09-23 02:26:26 +00:00
os . environ [ ' PYTHONSTARTUP ' ] = str ( PACKAGE_DIR / ' core ' / ' welcome_message.py ' )
2019-04-22 23:07:39 +00:00
2019-04-02 22:53:21 +00:00
2020-10-31 07:08:03 +00:00
################################################################################
### Internationalization & Localization Settings
################################################################################
2019-04-02 22:53:21 +00:00
LANGUAGE_CODE = ' en-us '
2021-04-10 08:19:30 +00:00
USE_I18N = True
USE_TZ = True
2024-09-05 06:43:25 +00:00
DATETIME_FORMAT = ' Y-m-d h:i:s A '
SHORT_DATETIME_FORMAT = ' Y-m-d h:i:s A '
2024-08-21 01:31:21 +00:00
TIME_ZONE = CONFIG . TIMEZONE # django convention is TIME_ZONE, archivebox config uses TIMEZONE, they are equivalent
2022-06-09 03:00:29 +00:00
2021-02-16 01:51:23 +00:00
2024-08-21 01:31:21 +00:00
from django . conf . locale . en import formats as en_formats # type: ignore
2021-04-10 08:19:30 +00:00
en_formats . DATETIME_FORMAT = DATETIME_FORMAT
en_formats . SHORT_DATETIME_FORMAT = SHORT_DATETIME_FORMAT
2021-02-16 01:51:23 +00:00
################################################################################
### Logging Settings
################################################################################
2024-09-06 04:45:43 +00:00
IGNORABLE_URL_PATTERNS = [
re . compile ( r " /.*/?apple-touch-icon.* \ .png " ) ,
re . compile ( r " /.*/?favicon \ .ico " ) ,
re . compile ( r " /.*/?robots \ .txt " ) ,
re . compile ( r " /.*/?.* \ .(css|js) \ .map " ) ,
re . compile ( r " /.*/?.* \ .(css|js) \ .map " ) ,
re . compile ( r " /static/.* " ) ,
re . compile ( r " /admin/jsi18n/ " ) ,
2024-08-27 03:14:47 +00:00
]
2021-02-16 01:51:23 +00:00
class NoisyRequestsFilter ( logging . Filter ) :
2024-08-23 00:57:33 +00:00
def filter ( self , record ) - > bool :
2021-02-16 01:51:23 +00:00
logline = record . getMessage ( )
2024-09-06 04:45:43 +00:00
# '"GET /api/v1/docs HTTP/1.1" 200 1023'
# '"GET /static/admin/js/SelectFilter2.js HTTP/1.1" 200 15502'
# '"GET /static/admin/js/SelectBox.js HTTP/1.1" 304 0'
# '"GET /admin/jsi18n/ HTTP/1.1" 200 3352'
# '"GET /admin/api/apitoken/0191bbf8-fd5e-0b8c-83a8-0f32f048a0af/change/ HTTP/1.1" 200 28778'
# ignore harmless 404s for the patterns in IGNORABLE_URL_PATTERNS
for pattern in IGNORABLE_URL_PATTERNS :
ignorable_GET_request = re . compile ( f ' " GET { pattern . pattern } HTTP/.* " (2..|30.|404) .+$ ' , re . I | re . M )
if ignorable_GET_request . match ( logline ) :
2024-08-23 00:57:33 +00:00
return False
2021-02-16 01:51:23 +00:00
2024-09-06 04:45:43 +00:00
ignorable_404_pattern = re . compile ( f ' Not Found: { pattern . pattern } ' , re . I | re . M )
if ignorable_404_pattern . match ( logline ) :
2024-08-27 03:14:47 +00:00
return False
2021-02-16 01:51:23 +00:00
2024-08-23 00:57:33 +00:00
return True
2021-02-16 01:51:23 +00:00
2024-09-06 06:19:21 +00:00
class CustomOutboundWebhookLogFormatter ( logging . Formatter ) :
def format ( self , record ) :
result = super ( ) . format ( record )
return result . replace ( ' HTTP Request: ' , ' OutboundWebhook: ' )
2024-09-06 04:45:43 +00:00
2024-08-27 03:14:47 +00:00
ERROR_LOG = tempfile . NamedTemporaryFile ( ) . name
2024-08-21 01:31:21 +00:00
if CONFIG . LOGS_DIR . exists ( ) :
ERROR_LOG = ( CONFIG . LOGS_DIR / ' errors.log ' )
2021-03-27 06:17:12 +00:00
else :
2023-09-14 09:41:27 +00:00
# historically too many edge cases here around creating log dir w/ correct permissions early on
# if there's an issue on startup, we trash the log and let user figure it out via stdout/stderr
2024-08-27 03:14:47 +00:00
print ( f ' [!] WARNING: data/logs dir does not exist. Logging to temp file: { ERROR_LOG } ' )
2021-02-16 06:23:01 +00:00
2024-09-06 04:45:43 +00:00
LOG_LEVEL_DATABASE = ' DEBUG ' if DEBUG else ' WARNING '
LOG_LEVEL_REQUEST = ' DEBUG ' if DEBUG else ' WARNING '
2024-09-24 08:25:55 +00:00
2024-09-06 04:45:43 +00:00
import pydantic
import django . template
2021-02-16 01:51:23 +00:00
LOGGING = {
2024-09-06 04:45:43 +00:00
" version " : 1 ,
" disable_existing_loggers " : False ,
" formatters " : {
" rich " : {
2024-09-06 06:19:21 +00:00
" datefmt " : " [ % Y- % m- %d % H: % M: % S] " ,
2024-09-06 04:45:43 +00:00
# "format": "{asctime} {levelname} {module} {name} {message} {username}",
2024-09-06 06:19:21 +00:00
" format " : " %(name)s %(message)s " ,
2024-09-06 04:45:43 +00:00
} ,
2024-09-06 06:19:21 +00:00
" outbound_webhooks " : {
" () " : CustomOutboundWebhookLogFormatter ,
" datefmt " : " [ % Y- % m- %d % H: % M: % S] " ,
2021-02-16 01:51:23 +00:00
} ,
} ,
2024-09-06 04:45:43 +00:00
" filters " : {
" noisyrequestsfilter " : {
" () " : NoisyRequestsFilter ,
2024-09-03 07:58:50 +00:00
} ,
" require_debug_false " : {
" () " : " django.utils.log.RequireDebugFalse " ,
} ,
" require_debug_true " : {
" () " : " django.utils.log.RequireDebugTrue " ,
} ,
} ,
2024-09-06 04:45:43 +00:00
" handlers " : {
# "console": {
# "level": "DEBUG",
# 'formatter': 'simple',
# "class": "logging.StreamHandler",
# 'filters': ['noisyrequestsfilter', 'add_extra_logging_attrs'],
# },
2024-09-06 06:19:21 +00:00
" default " : {
2024-09-06 04:45:43 +00:00
" class " : " rich.logging.RichHandler " ,
" formatter " : " rich " ,
" level " : " DEBUG " ,
" markup " : False ,
" rich_tracebacks " : True ,
" filters " : [ " noisyrequestsfilter " ] ,
" tracebacks_suppress " : [
2024-09-06 06:19:21 +00:00
django ,
2024-09-06 04:45:43 +00:00
pydantic ,
] ,
2024-09-03 07:58:50 +00:00
} ,
2024-09-06 04:45:43 +00:00
" logfile " : {
2024-09-06 06:19:21 +00:00
" level " : " INFO " ,
2024-09-06 04:45:43 +00:00
" class " : " logging.handlers.RotatingFileHandler " ,
" filename " : ERROR_LOG ,
" maxBytes " : 1024 * 1024 * 25 , # 25 MB
" backupCount " : 10 ,
2024-09-06 06:19:21 +00:00
" formatter " : " rich " ,
2024-09-06 04:45:43 +00:00
" filters " : [ " noisyrequestsfilter " ] ,
2024-09-03 07:58:50 +00:00
} ,
2024-09-06 06:19:21 +00:00
" outbound_webhooks " : {
" class " : " rich.logging.RichHandler " ,
" markup " : False ,
" rich_tracebacks " : True ,
" formatter " : " outbound_webhooks " ,
} ,
2024-09-06 04:45:43 +00:00
# "mail_admins": {
# "level": "ERROR",
# "filters": ["require_debug_false"],
# "class": "django.utils.log.AdminEmailHandler",
# },
" null " : {
" class " : " logging.NullHandler " ,
2024-09-03 07:58:50 +00:00
} ,
2021-02-16 01:51:23 +00:00
} ,
2024-09-06 04:45:43 +00:00
" root " : {
2024-09-06 06:19:21 +00:00
" handlers " : [ " default " , " logfile " ] ,
2024-09-06 04:45:43 +00:00
" level " : " INFO " ,
2024-09-06 06:19:21 +00:00
" formatter " : " rich " ,
2024-09-06 04:45:43 +00:00
} ,
" loggers " : {
" api " : {
2024-09-06 06:19:21 +00:00
" handlers " : [ " default " , " logfile " ] ,
2024-09-06 04:45:43 +00:00
" level " : " DEBUG " ,
2024-09-03 07:58:50 +00:00
} ,
2024-09-06 04:45:43 +00:00
" checks " : {
2024-09-06 06:19:21 +00:00
" handlers " : [ " default " , " logfile " ] ,
2024-09-06 04:45:43 +00:00
" level " : " DEBUG " ,
2024-09-03 07:58:50 +00:00
} ,
2024-09-06 04:45:43 +00:00
" core " : {
2024-09-06 06:19:21 +00:00
" handlers " : [ " default " , " logfile " ] ,
2024-09-06 04:45:43 +00:00
" level " : " DEBUG " ,
2024-09-03 07:58:50 +00:00
} ,
2024-09-24 08:25:55 +00:00
" extractor_plugins " : {
2024-09-06 06:19:21 +00:00
" handlers " : [ " default " , " logfile " ] ,
2024-09-06 04:45:43 +00:00
" level " : " DEBUG " ,
} ,
2024-09-06 06:19:21 +00:00
" httpx " : {
" handlers " : [ " outbound_webhooks " ] ,
" level " : " INFO " ,
" formatter " : " outbound_webhooks " ,
" propagate " : False ,
} ,
2024-09-06 04:45:43 +00:00
" django " : {
2024-09-06 06:19:21 +00:00
" handlers " : [ " default " , " logfile " ] ,
2024-09-06 04:45:43 +00:00
" level " : " INFO " ,
" filters " : [ " noisyrequestsfilter " ] ,
2024-09-03 07:58:50 +00:00
} ,
2024-09-06 04:45:43 +00:00
" django.utils.autoreload " : {
" propagate " : False ,
" handlers " : [ ] ,
" level " : " ERROR " ,
2021-02-16 01:51:23 +00:00
} ,
2024-09-06 04:45:43 +00:00
" django.channels.server " : {
2024-09-06 06:19:21 +00:00
# see archivebox.monkey_patches.ModifiedAccessLogGenerator for dedicated daphne server logging settings
2024-09-06 04:45:43 +00:00
" propagate " : False ,
2024-09-06 06:19:21 +00:00
" handlers " : [ " default " , " logfile " ] ,
2024-09-06 04:45:43 +00:00
" level " : " INFO " ,
" filters " : [ " noisyrequestsfilter " ] ,
2024-09-03 21:17:02 +00:00
} ,
2024-09-06 04:45:43 +00:00
" django.server " : { # logs all requests (2xx, 3xx, 4xx)
" propagate " : False ,
2024-09-06 06:19:21 +00:00
" handlers " : [ " default " , " logfile " ] ,
2024-09-06 04:45:43 +00:00
" level " : " INFO " ,
" filters " : [ " noisyrequestsfilter " ] ,
2024-09-03 21:17:02 +00:00
} ,
2024-09-06 04:45:43 +00:00
" django.request " : { # only logs 4xx and 5xx errors
" propagate " : False ,
2024-09-06 06:19:21 +00:00
" handlers " : [ " default " , " logfile " ] ,
2024-09-06 09:54:53 +00:00
" level " : " ERROR " ,
2024-09-06 04:45:43 +00:00
" filters " : [ " noisyrequestsfilter " ] ,
} ,
" django.db.backends " : {
" propagate " : False ,
2024-09-06 06:19:21 +00:00
" handlers " : [ " default " ] ,
2024-09-06 04:45:43 +00:00
" level " : LOG_LEVEL_DATABASE ,
} ,
2021-02-16 01:51:23 +00:00
} ,
}
2024-05-06 13:58:03 +00:00
2024-08-27 03:14:47 +00:00
################################################################################
### REST API Outbound Webhooks settings
################################################################################
2024-05-06 13:58:03 +00:00
# Add default webhook configuration to the User model
2024-05-13 09:36:15 +00:00
SIGNAL_WEBHOOKS_CUSTOM_MODEL = ' api.models.OutboundWebhook '
2024-05-06 13:58:03 +00:00
SIGNAL_WEBHOOKS = {
" HOOKS " : {
2024-05-13 09:36:15 +00:00
# ... is a special sigil value that means "use the default autogenerated hooks"
" django.contrib.auth.models.User " : . . . ,
2024-05-06 14:13:54 +00:00
" core.models.Snapshot " : . . . ,
" core.models.ArchiveResult " : . . . ,
" core.models.Tag " : . . . ,
" api.models.APIToken " : . . . ,
2024-05-06 13:58:03 +00:00
} ,
}
2024-05-06 18:06:42 +00:00
2024-08-27 03:14:47 +00:00
################################################################################
### Admin Data View Settings
################################################################################
2024-05-06 18:06:42 +00:00
ADMIN_DATA_VIEWS = {
2024-05-18 03:13:54 +00:00
" NAME " : " Environment " ,
2024-05-06 18:06:42 +00:00
" URLS " : [
{
2024-05-18 03:13:54 +00:00
" route " : " config/ " ,
2024-05-06 18:06:42 +00:00
" view " : " core.views.live_config_list_view " ,
2024-05-18 03:13:54 +00:00
" name " : " Configuration " ,
2024-05-06 18:06:42 +00:00
" items " : {
" route " : " <str:key>/ " ,
" view " : " core.views.live_config_value_view " ,
2024-05-18 03:13:54 +00:00
" name " : " config_val " ,
} ,
} ,
{
" route " : " binaries/ " ,
" view " : " plugantic.views.binaries_list_view " ,
" name " : " Binaries " ,
" items " : {
" route " : " <str:key>/ " ,
" view " : " plugantic.views.binary_detail_view " ,
" name " : " binary " ,
} ,
} ,
{
" route " : " plugins/ " ,
" view " : " plugantic.views.plugins_list_view " ,
" name " : " Plugins " ,
" items " : {
" route " : " <str:key>/ " ,
" view " : " plugantic.views.plugin_detail_view " ,
" name " : " plugin " ,
2024-05-06 18:06:42 +00:00
} ,
} ,
2024-09-10 07:05:01 +00:00
{
" route " : " workers/ " ,
" view " : " plugantic.views.worker_list_view " ,
" name " : " Workers " ,
" items " : {
" route " : " <str:key>/ " ,
" view " : " plugantic.views.worker_detail_view " ,
" name " : " worker " ,
} ,
} ,
{
" route " : " logs/ " ,
" view " : " plugantic.views.log_list_view " ,
" name " : " Logs " ,
" items " : {
" route " : " <str:key>/ " ,
" view " : " plugantic.views.log_detail_view " ,
" name " : " log " ,
} ,
} ,
2024-05-06 18:06:42 +00:00
] ,
}
2024-08-27 03:14:47 +00:00
################################################################################
### Debug Settings
################################################################################
# only enable debug toolbar when in DEBUG mode with --nothreading (it doesnt work in multithreaded mode)
2024-08-27 03:20:33 +00:00
DEBUG_TOOLBAR = False
2024-08-27 03:14:47 +00:00
DEBUG_TOOLBAR = DEBUG_TOOLBAR and DEBUG and ( ' --nothreading ' in sys . argv ) and ( ' --reload ' not in sys . argv )
if DEBUG_TOOLBAR :
try :
import debug_toolbar # noqa
DEBUG_TOOLBAR = True
except ImportError :
DEBUG_TOOLBAR = False
if DEBUG_TOOLBAR :
INSTALLED_APPS = [ * INSTALLED_APPS , ' debug_toolbar ' ]
INTERNAL_IPS = [ ' 0.0.0.0 ' , ' 127.0.0.1 ' , ' * ' ]
DEBUG_TOOLBAR_CONFIG = {
" SHOW_TOOLBAR_CALLBACK " : lambda request : True ,
" RENDER_PANELS " : True ,
}
DEBUG_TOOLBAR_PANELS = [
' debug_toolbar.panels.history.HistoryPanel ' ,
' debug_toolbar.panels.versions.VersionsPanel ' ,
' debug_toolbar.panels.timer.TimerPanel ' ,
' debug_toolbar.panels.settings.SettingsPanel ' ,
' debug_toolbar.panels.headers.HeadersPanel ' ,
' debug_toolbar.panels.request.RequestPanel ' ,
' debug_toolbar.panels.sql.SQLPanel ' ,
' debug_toolbar.panels.staticfiles.StaticFilesPanel ' ,
# 'debug_toolbar.panels.templates.TemplatesPanel',
' debug_toolbar.panels.cache.CachePanel ' ,
' debug_toolbar.panels.signals.SignalsPanel ' ,
' debug_toolbar.panels.logging.LoggingPanel ' ,
' debug_toolbar.panels.redirects.RedirectsPanel ' ,
' debug_toolbar.panels.profiling.ProfilingPanel ' ,
' djdt_flamegraph.FlamegraphPanel ' ,
]
MIDDLEWARE = [ * MIDDLEWARE , ' debug_toolbar.middleware.DebugToolbarMiddleware ' ]
if DEBUG :
from django_autotyping . typing import AutotypingSettingsDict
INSTALLED_APPS + = [ ' django_autotyping ' ]
AUTOTYPING : AutotypingSettingsDict = {
" STUBS_GENERATION " : {
2024-09-23 02:26:26 +00:00
" LOCAL_STUBS_DIR " : PACKAGE_DIR / " typings " ,
2024-08-27 03:14:47 +00:00
}
}
# https://github.com/bensi94/Django-Requests-Tracker (improved version of django-debug-toolbar)
# Must delete archivebox/templates/admin to use because it relies on some things we override
# visit /__requests_tracker__/ to access
DEBUG_REQUESTS_TRACKER = True
2024-08-27 03:20:33 +00:00
DEBUG_REQUESTS_TRACKER = DEBUG_REQUESTS_TRACKER and DEBUG
2024-08-27 03:14:47 +00:00
if DEBUG_REQUESTS_TRACKER :
import requests_tracker
INSTALLED_APPS + = [ " requests_tracker " ]
MIDDLEWARE + = [ " requests_tracker.middleware.requests_tracker_middleware " ]
INTERNAL_IPS = [ " 127.0.0.1 " , " 10.0.2.2 " , " 0.0.0.0 " , " * " ]
TEMPLATE_DIRS . insert ( 0 , str ( Path ( inspect . getfile ( requests_tracker ) ) . parent / " templates " ) )
REQUESTS_TRACKER_CONFIG = {
" TRACK_SQL " : True ,
" ENABLE_STACKTRACES " : False ,
" IGNORE_PATHS_PATTERNS " : (
r " .*/favicon \ .ico " ,
r " .* \ .png " ,
r " /admin/jsi18n/ " ,
) ,
" IGNORE_SQL_PATTERNS " : (
r " ^SELECT .* FROM django_migrations WHERE app = ' requests_tracker ' " ,
r " ^SELECT .* FROM django_migrations WHERE app = ' auth ' " ,
) ,
}
# https://docs.pydantic.dev/logfire/integrations/django/ (similar to DataDog / NewRelic / etc.)
DEBUG_LOGFIRE = False
2024-09-23 02:26:26 +00:00
DEBUG_LOGFIRE = DEBUG_LOGFIRE and ( DATA_DIR / ' .logfire ' ) . is_dir ( )
2024-09-03 07:58:50 +00:00
# For usage with https://www.jetadmin.io/integrations/django
# INSTALLED_APPS += ['jet_django']
# JET_PROJECT = 'archivebox'
# JET_TOKEN = 'some-api-token-here'