clean up config loading in settings and config file layout

This commit is contained in:
Nick Sweeting 2020-10-31 03:08:03 -04:00
parent f82cc16667
commit 18355dc2c6
6 changed files with 94 additions and 76 deletions

View file

@ -1,4 +1,4 @@
__package__ = 'archivebox.config'
__package__ = 'archivebox'
import os
import io
@ -17,7 +17,7 @@ from subprocess import run, PIPE, DEVNULL
from configparser import ConfigParser
from collections import defaultdict
from .stubs import (
from .config_stubs import (
SimpleConfigValueDict,
ConfigValue,
ConfigDict,
@ -162,6 +162,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
},
}
# for backwards compatibility with old config files, check old/deprecated names for each key
CONFIG_ALIASES = {
alias: key
for section in CONFIG_DEFAULTS.values()
@ -169,6 +170,7 @@ CONFIG_ALIASES = {
for alias in default.get('aliases', ())
}
USER_CONFIG = {key for section in CONFIG_DEFAULTS.values() for key in section.keys()}
def get_real_name(key: str) -> str:
return CONFIG_ALIASES.get(key.upper().strip(), key.upper().strip())
@ -223,7 +225,7 @@ STATICFILE_EXTENSIONS = {
# html, htm, shtml, xhtml, xml, aspx, php, cgi
}
PYTHON_DIR_NAME = 'archivebox'
PACKAGE_DIR_NAME = 'archivebox'
TEMPLATES_DIR_NAME = 'themes'
ARCHIVE_DIR_NAME = 'archive'
@ -257,9 +259,8 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
'USER': {'default': lambda c: getpass.getuser() or os.getlogin()},
'ANSI': {'default': lambda c: DEFAULT_CLI_COLORS if c['USE_COLOR'] else {k: '' for k in DEFAULT_CLI_COLORS.keys()}},
'REPO_DIR': {'default': lambda c: Path(__file__).resolve().parent.parent.parent},
'PYTHON_DIR': {'default': lambda c: c['REPO_DIR'] / PYTHON_DIR_NAME},
'TEMPLATES_DIR': {'default': lambda c: c['PYTHON_DIR'] / TEMPLATES_DIR_NAME / 'legacy'},
'PACKAGE_DIR': {'default': lambda c: Path(__file__).resolve().parent},
'TEMPLATES_DIR': {'default': lambda c: c['PACKAGE_DIR'] / TEMPLATES_DIR_NAME / 'legacy'},
'OUTPUT_DIR': {'default': lambda c: Path(c['OUTPUT_DIR']).resolve() if c['OUTPUT_DIR'] else Path(os.curdir).resolve()},
'ARCHIVE_DIR': {'default': lambda c: c['OUTPUT_DIR'] / ARCHIVE_DIR_NAME},
@ -271,7 +272,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
'URL_BLACKLIST_PTN': {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'] or '', re.IGNORECASE | re.UNICODE | re.MULTILINE)},
'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0]},
'VERSION': {'default': lambda c: json.loads((Path(c['PYTHON_DIR']) / 'package.json').read_text().strip())['version']},
'VERSION': {'default': lambda c: json.loads((Path(c['PACKAGE_DIR']) / 'package.json').read_text().strip())['version']},
'GIT_SHA': {'default': lambda c: c['VERSION'].split('+')[-1] or 'unknown'},
'PYTHON_BINARY': {'default': lambda c: sys.executable},
@ -412,7 +413,7 @@ def load_config_file(out_dir: str=None) -> Optional[Dict[str, str]]:
def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
"""load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
from ..system import atomic_write
from .system import atomic_write
out_dir = out_dir or Path(os.getenv('OUTPUT_DIR', '.')).resolve()
config_path = Path(out_dir) / CONFIG_FILENAME
@ -652,15 +653,10 @@ def wget_supports_compression(config):
def get_code_locations(config: ConfigDict) -> SimpleConfigValueDict:
return {
'REPO_DIR': {
'path': config['REPO_DIR'].resolve(),
'PACKAGE_DIR': {
'path': (config['PACKAGE_DIR']).resolve(),
'enabled': True,
'is_valid': (config['REPO_DIR'] / 'archivebox').exists(),
},
'PYTHON_DIR': {
'path': (config['PYTHON_DIR']).resolve(),
'enabled': True,
'is_valid': (config['PYTHON_DIR'] / '__main__.py').exists(),
'is_valid': (config['PACKAGE_DIR'] / '__main__.py').exists(),
},
'TEMPLATES_DIR': {
'path': (config['TEMPLATES_DIR']).resolve(),
@ -689,7 +685,7 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
'OUTPUT_DIR': {
'path': config['OUTPUT_DIR'].resolve(),
'enabled': True,
'is_valid': (config['OUTPUT_DIR'] / JSON_INDEX_FILENAME).exists(),
'is_valid': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).exists(),
},
'SOURCES_DIR': {
'path': config['SOURCES_DIR'].resolve(),
@ -716,16 +712,6 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
'enabled': True,
'is_valid': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).exists(),
},
'JSON_INDEX': {
'path': (config['OUTPUT_DIR'] / JSON_INDEX_FILENAME).resolve(),
'enabled': True,
'is_valid': (config['OUTPUT_DIR'] / JSON_INDEX_FILENAME).exists(),
},
'HTML_INDEX': {
'path': (config['OUTPUT_DIR'] / HTML_INDEX_FILENAME).resolve(),
'enabled': True,
'is_valid': (config['OUTPUT_DIR'] / HTML_INDEX_FILENAME).exists(),
},
}
def get_dependency_info(config: ConfigDict) -> ConfigValue:
@ -943,7 +929,7 @@ def check_data_folder(out_dir: Optional[str]=None, config: ConfigDict=CONFIG) ->
stderr(' archivebox init')
raise SystemExit(2)
from ..index.sql import list_migrations
from .index.sql import list_migrations
pending_migrations = [name for status, name in list_migrations() if not status]
@ -971,12 +957,13 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG)
output_dir = out_dir or Path(config['OUTPUT_DIR'])
assert isinstance(output_dir, Path) and isinstance(config['PYTHON_DIR'], Path)
assert isinstance(output_dir, Path) and isinstance(config['PACKAGE_DIR'], Path)
try:
import django
sys.path.append(str(config['PYTHON_DIR']))
sys.path.append(str(config['PACKAGE_DIR']))
os.environ.setdefault('OUTPUT_DIR', str(output_dir))
assert (config['PACKAGE_DIR'] / 'core' / 'settings.py').exists(), 'settings.py was not found at archivebox/core/settings.py'
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
django.setup()

View file

@ -33,8 +33,9 @@ class ConfigDict(BaseConfig, total=False):
SHOW_PROGRESS: bool
IN_DOCKER: bool
OUTPUT_DIR: Union[str, Path, None]
CONFIG_FILE: Union[str, Path, None]
PACKAGE_DIR: Path
OUTPUT_DIR: Path
CONFIG_FILE: Path
ONLY_NEW: bool
TIMEOUT: int
MEDIA_TIMEOUT: int

View file

@ -2,24 +2,36 @@ __package__ = 'archivebox.core'
import os
import sys
from pathlib import Path
from django.utils.crypto import get_random_string
from ..config import ( # noqa: F401
DEBUG,
SECRET_KEY,
ALLOWED_HOSTS,
PYTHON_DIR,
PACKAGE_DIR,
ACTIVE_THEME,
SQL_INDEX_FILENAME,
OUTPUT_DIR,
)
ALLOWED_HOSTS = ALLOWED_HOSTS.split(',')
IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3]
IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ
IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3]
SECRET_KEY = SECRET_KEY or get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789-_+!.')
################################################################################
### Django Core Settings
################################################################################
WSGI_APPLICATION = 'core.wsgi.application'
ROOT_URLCONF = 'core.urls'
LOGIN_URL = '/accounts/login/'
LOGOUT_REDIRECT_URL = '/'
PASSWORD_RESET_URL = '/accounts/password_reset/'
APPEND_SLASH = True
INSTALLED_APPS = [
'django.contrib.auth',
@ -44,16 +56,32 @@ MIDDLEWARE = [
'django.contrib.messages.middleware.MessageMiddleware',
]
ROOT_URLCONF = 'core.urls'
APPEND_SLASH = True
AUTHENTICATION_BACKENDS = [
'django.contrib.auth.backends.ModelBackend',
]
################################################################################
### Staticfile and Template Settings
################################################################################
STATIC_URL = '/static/'
STATICFILES_DIRS = [
str(Path(PACKAGE_DIR) / 'themes' / ACTIVE_THEME / 'static'),
str(Path(PACKAGE_DIR) / 'themes' / 'default' / 'static'),
]
TEMPLATE_DIRS = [
str(Path(PACKAGE_DIR) / 'themes' / ACTIVE_THEME),
str(Path(PACKAGE_DIR) / 'themes' / 'default'),
str(Path(PACKAGE_DIR) / 'themes'),
]
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [
str(Path(PYTHON_DIR) / 'themes' / ACTIVE_THEME),
str(Path(PYTHON_DIR) / 'themes' / 'default'),
str(Path(PYTHON_DIR) / 'themes'),
],
'DIRS': TEMPLATE_DIRS,
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
@ -66,7 +94,10 @@ TEMPLATES = [
},
]
WSGI_APPLICATION = 'core.wsgi.application'
################################################################################
### External Service Settings
################################################################################
DATABASE_FILE = Path(OUTPUT_DIR) / SQL_INDEX_FILENAME
DATABASES = {
@ -76,9 +107,27 @@ DATABASES = {
}
}
AUTHENTICATION_BACKENDS = [
'django.contrib.auth.backends.ModelBackend',
]
EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
################################################################################
### Security Settings
################################################################################
SECRET_KEY = SECRET_KEY or get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789-_+!.')
ALLOWED_HOSTS = ALLOWED_HOSTS.split(',')
SECURE_BROWSER_XSS_FILTER = True
SECURE_CONTENT_TYPE_NOSNIFF = True
CSRF_COOKIE_SECURE = False
SESSION_COOKIE_SECURE = False
SESSION_COOKIE_DOMAIN = None
SESSION_COOKIE_AGE = 1209600 # 2 weeks
SESSION_EXPIRE_AT_BROWSER_CLOSE = False
SESSION_SAVE_EVERY_REQUEST = True
AUTH_PASSWORD_VALIDATORS = [
{'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator'},
{'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator'},
@ -86,30 +135,23 @@ AUTH_PASSWORD_VALIDATORS = [
{'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator'},
]
################################################################################
### Security Settings
################################################################################
SECURE_BROWSER_XSS_FILTER = True
SECURE_CONTENT_TYPE_NOSNIFF = True
SESSION_COOKIE_SECURE = False
CSRF_COOKIE_SECURE = False
SESSION_COOKIE_DOMAIN = None
SESSION_EXPIRE_AT_BROWSER_CLOSE = False
SESSION_SAVE_EVERY_REQUEST = True
SESSION_COOKIE_AGE = 1209600 # 2 weeks
LOGIN_URL = '/accounts/login/'
LOGOUT_REDIRECT_URL = '/'
PASSWORD_RESET_URL = '/accounts/password_reset/'
################################################################################
### Shell Settings
################################################################################
SHELL_PLUS = 'ipython'
SHELL_PLUS_PRINT_SQL = False
IPYTHON_ARGUMENTS = ['--no-confirm-exit', '--no-banner']
IPYTHON_KERNEL_DISPLAY_NAME = 'ArchiveBox Django Shell'
if IS_SHELL:
os.environ['PYTHONSTARTUP'] = str(Path(PYTHON_DIR) / 'core' / 'welcome_message.py')
os.environ['PYTHONSTARTUP'] = str(Path(PACKAGE_DIR) / 'core' / 'welcome_message.py')
################################################################################
### Internationalization & Localization Settings
################################################################################
LANGUAGE_CODE = 'en-us'
TIME_ZONE = 'UTC'
USE_I18N = False
@ -118,12 +160,3 @@ USE_TZ = False
DATETIME_FORMAT = 'Y-m-d g:iA'
SHORT_DATETIME_FORMAT = 'Y-m-d h:iA'
EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
STATIC_URL = '/static/'
STATICFILES_DIRS = [
str(Path(PYTHON_DIR) / 'themes' / ACTIVE_THEME / 'static'),
str(Path(PYTHON_DIR) / 'themes' / 'default' / 'static'),
]

View file

@ -61,8 +61,7 @@ def save_mercury(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT)
atomic_write(str(output_folder / "content.txt"), txtresult_json["content"])
atomic_write(str(output_folder / "article.json"), result_json)
# parse out number of files downloaded from last line of stderr:
# "Downloaded: 76 files, 4.0M in 1.6s (2.52 MB/s)"
# parse out last line of stderr
output_tail = [
line.strip()
for line in (result.stdout + result.stderr).decode().rsplit('\n', 20)[-20:]

View file

@ -15,8 +15,6 @@ from typing import Optional, List, Dict, Union, IO, TYPE_CHECKING
if TYPE_CHECKING:
from .index.schema import Link, ArchiveResult
from .index.json import MAIN_INDEX_HEADER
from .util import enforce_types
from .config import (
ConfigDict,

View file

@ -216,7 +216,7 @@ def version(quiet: bool=False,
print(printable_dependency_version(name, dependency))
print()
print('{white}[i] Code locations:{reset}'.format(**ANSI))
print('{white}[i] Source-code locations:{reset}'.format(**ANSI))
for name, folder in CODE_LOCATIONS.items():
print(printable_folder_status(name, folder))