From eae11cba1940f48e8525d4f9e24a99ae26940ad1 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Wed, 11 Sep 2024 16:50:44 -0700 Subject: [PATCH] add recommended SQLite db connection settings to avoid single-writer lock contention --- archivebox/core/settings.py | 45 +++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index ee7c99a0..7f36d738 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -222,21 +222,42 @@ DATABASES = { "default": { "ENGINE": "django.db.backends.sqlite3", "NAME": DATABASE_NAME, - "OPTIONS": { - "timeout": 60, - "check_same_thread": False, - }, "TIME_ZONE": CONFIG.TIMEZONE, + "OPTIONS": { + # https://gcollazo.com/optimal-sqlite-settings-for-django/ + "timeout": 5, + "check_same_thread": False, + "transaction_mode": "IMMEDIATE", + "init_command": ( + "PRAGMA foreign_keys=ON;" + "PRAGMA journal_mode = WAL;" + "PRAGMA synchronous = NORMAL;" + "PRAGMA temp_store = MEMORY;" + "PRAGMA mmap_size = 134217728;" + "PRAGMA journal_size_limit = 67108864;" + "PRAGMA cache_size = 2000;" + ), + }, # DB setup is sometimes modified at runtime by setup_django() in config.py }, "queue": { "ENGINE": "django.db.backends.sqlite3", "NAME": QUEUE_DATABASE_NAME, - "OPTIONS": { - "timeout": 60, - "check_same_thread": False, - }, "TIME_ZONE": CONFIG.TIMEZONE, + "OPTIONS": { + "timeout": 5, + "check_same_thread": False, + "transaction_mode": "IMMEDIATE", + "init_command": ( + "PRAGMA foreign_keys=ON;" + "PRAGMA journal_mode = WAL;" + "PRAGMA synchronous = NORMAL;" + "PRAGMA temp_store = MEMORY;" + "PRAGMA mmap_size = 134217728;" + "PRAGMA journal_size_limit = 67108864;" + "PRAGMA cache_size = 2000;" + ), + }, }, # 'cache': { # 'ENGINE': 'django.db.backends.sqlite3', @@ -286,7 +307,13 @@ DJANGO_HUEY = { } class HueyDBRouter: - """A router to store all the Huey Monitor models in the queue.sqlite3 database.""" + """ + A router to store all the Huey result k:v / Huey Monitor models in the queue.sqlite3 database. + We keep the databases separate because the queue database receives many more reads/writes per second + and we want to avoid single-write lock contention with the main database. Also all the in-progress task + data is ephemeral/not-important-long-term. This makes it easier to for the user to clear non-critical + temp data by just deleting queue.sqlite3 and leaving index.sqlite3. + """ route_app_labels = {"huey_monitor", "django_huey", "djhuey"}