mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-10 06:34:16 +00:00
clean up config loading in settings and config file layout
This commit is contained in:
parent
f82cc16667
commit
18355dc2c6
6 changed files with 94 additions and 76 deletions
|
@ -1,4 +1,4 @@
|
|||
__package__ = 'archivebox.config'
|
||||
__package__ = 'archivebox'
|
||||
|
||||
import os
|
||||
import io
|
||||
|
@ -17,7 +17,7 @@ from subprocess import run, PIPE, DEVNULL
|
|||
from configparser import ConfigParser
|
||||
from collections import defaultdict
|
||||
|
||||
from .stubs import (
|
||||
from .config_stubs import (
|
||||
SimpleConfigValueDict,
|
||||
ConfigValue,
|
||||
ConfigDict,
|
||||
|
@ -162,6 +162,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
|
|||
},
|
||||
}
|
||||
|
||||
# for backwards compatibility with old config files, check old/deprecated names for each key
|
||||
CONFIG_ALIASES = {
|
||||
alias: key
|
||||
for section in CONFIG_DEFAULTS.values()
|
||||
|
@ -169,6 +170,7 @@ CONFIG_ALIASES = {
|
|||
for alias in default.get('aliases', ())
|
||||
}
|
||||
USER_CONFIG = {key for section in CONFIG_DEFAULTS.values() for key in section.keys()}
|
||||
|
||||
def get_real_name(key: str) -> str:
|
||||
return CONFIG_ALIASES.get(key.upper().strip(), key.upper().strip())
|
||||
|
||||
|
@ -223,7 +225,7 @@ STATICFILE_EXTENSIONS = {
|
|||
# html, htm, shtml, xhtml, xml, aspx, php, cgi
|
||||
}
|
||||
|
||||
PYTHON_DIR_NAME = 'archivebox'
|
||||
PACKAGE_DIR_NAME = 'archivebox'
|
||||
TEMPLATES_DIR_NAME = 'themes'
|
||||
|
||||
ARCHIVE_DIR_NAME = 'archive'
|
||||
|
@ -257,9 +259,8 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
|||
'USER': {'default': lambda c: getpass.getuser() or os.getlogin()},
|
||||
'ANSI': {'default': lambda c: DEFAULT_CLI_COLORS if c['USE_COLOR'] else {k: '' for k in DEFAULT_CLI_COLORS.keys()}},
|
||||
|
||||
'REPO_DIR': {'default': lambda c: Path(__file__).resolve().parent.parent.parent},
|
||||
'PYTHON_DIR': {'default': lambda c: c['REPO_DIR'] / PYTHON_DIR_NAME},
|
||||
'TEMPLATES_DIR': {'default': lambda c: c['PYTHON_DIR'] / TEMPLATES_DIR_NAME / 'legacy'},
|
||||
'PACKAGE_DIR': {'default': lambda c: Path(__file__).resolve().parent},
|
||||
'TEMPLATES_DIR': {'default': lambda c: c['PACKAGE_DIR'] / TEMPLATES_DIR_NAME / 'legacy'},
|
||||
|
||||
'OUTPUT_DIR': {'default': lambda c: Path(c['OUTPUT_DIR']).resolve() if c['OUTPUT_DIR'] else Path(os.curdir).resolve()},
|
||||
'ARCHIVE_DIR': {'default': lambda c: c['OUTPUT_DIR'] / ARCHIVE_DIR_NAME},
|
||||
|
@ -271,7 +272,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
|||
'URL_BLACKLIST_PTN': {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'] or '', re.IGNORECASE | re.UNICODE | re.MULTILINE)},
|
||||
|
||||
'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0]},
|
||||
'VERSION': {'default': lambda c: json.loads((Path(c['PYTHON_DIR']) / 'package.json').read_text().strip())['version']},
|
||||
'VERSION': {'default': lambda c: json.loads((Path(c['PACKAGE_DIR']) / 'package.json').read_text().strip())['version']},
|
||||
'GIT_SHA': {'default': lambda c: c['VERSION'].split('+')[-1] or 'unknown'},
|
||||
|
||||
'PYTHON_BINARY': {'default': lambda c: sys.executable},
|
||||
|
@ -412,7 +413,7 @@ def load_config_file(out_dir: str=None) -> Optional[Dict[str, str]]:
|
|||
def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
|
||||
"""load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
|
||||
|
||||
from ..system import atomic_write
|
||||
from .system import atomic_write
|
||||
|
||||
out_dir = out_dir or Path(os.getenv('OUTPUT_DIR', '.')).resolve()
|
||||
config_path = Path(out_dir) / CONFIG_FILENAME
|
||||
|
@ -652,15 +653,10 @@ def wget_supports_compression(config):
|
|||
|
||||
def get_code_locations(config: ConfigDict) -> SimpleConfigValueDict:
|
||||
return {
|
||||
'REPO_DIR': {
|
||||
'path': config['REPO_DIR'].resolve(),
|
||||
'PACKAGE_DIR': {
|
||||
'path': (config['PACKAGE_DIR']).resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': (config['REPO_DIR'] / 'archivebox').exists(),
|
||||
},
|
||||
'PYTHON_DIR': {
|
||||
'path': (config['PYTHON_DIR']).resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': (config['PYTHON_DIR'] / '__main__.py').exists(),
|
||||
'is_valid': (config['PACKAGE_DIR'] / '__main__.py').exists(),
|
||||
},
|
||||
'TEMPLATES_DIR': {
|
||||
'path': (config['TEMPLATES_DIR']).resolve(),
|
||||
|
@ -689,7 +685,7 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
|
|||
'OUTPUT_DIR': {
|
||||
'path': config['OUTPUT_DIR'].resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': (config['OUTPUT_DIR'] / JSON_INDEX_FILENAME).exists(),
|
||||
'is_valid': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).exists(),
|
||||
},
|
||||
'SOURCES_DIR': {
|
||||
'path': config['SOURCES_DIR'].resolve(),
|
||||
|
@ -716,16 +712,6 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
|
|||
'enabled': True,
|
||||
'is_valid': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).exists(),
|
||||
},
|
||||
'JSON_INDEX': {
|
||||
'path': (config['OUTPUT_DIR'] / JSON_INDEX_FILENAME).resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': (config['OUTPUT_DIR'] / JSON_INDEX_FILENAME).exists(),
|
||||
},
|
||||
'HTML_INDEX': {
|
||||
'path': (config['OUTPUT_DIR'] / HTML_INDEX_FILENAME).resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': (config['OUTPUT_DIR'] / HTML_INDEX_FILENAME).exists(),
|
||||
},
|
||||
}
|
||||
|
||||
def get_dependency_info(config: ConfigDict) -> ConfigValue:
|
||||
|
@ -943,7 +929,7 @@ def check_data_folder(out_dir: Optional[str]=None, config: ConfigDict=CONFIG) ->
|
|||
stderr(' archivebox init')
|
||||
raise SystemExit(2)
|
||||
|
||||
from ..index.sql import list_migrations
|
||||
from .index.sql import list_migrations
|
||||
|
||||
pending_migrations = [name for status, name in list_migrations() if not status]
|
||||
|
||||
|
@ -971,12 +957,13 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG)
|
|||
|
||||
output_dir = out_dir or Path(config['OUTPUT_DIR'])
|
||||
|
||||
assert isinstance(output_dir, Path) and isinstance(config['PYTHON_DIR'], Path)
|
||||
assert isinstance(output_dir, Path) and isinstance(config['PACKAGE_DIR'], Path)
|
||||
|
||||
try:
|
||||
import django
|
||||
sys.path.append(str(config['PYTHON_DIR']))
|
||||
sys.path.append(str(config['PACKAGE_DIR']))
|
||||
os.environ.setdefault('OUTPUT_DIR', str(output_dir))
|
||||
assert (config['PACKAGE_DIR'] / 'core' / 'settings.py').exists(), 'settings.py was not found at archivebox/core/settings.py'
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
|
||||
django.setup()
|
||||
|
|
@ -33,8 +33,9 @@ class ConfigDict(BaseConfig, total=False):
|
|||
SHOW_PROGRESS: bool
|
||||
IN_DOCKER: bool
|
||||
|
||||
OUTPUT_DIR: Union[str, Path, None]
|
||||
CONFIG_FILE: Union[str, Path, None]
|
||||
PACKAGE_DIR: Path
|
||||
OUTPUT_DIR: Path
|
||||
CONFIG_FILE: Path
|
||||
ONLY_NEW: bool
|
||||
TIMEOUT: int
|
||||
MEDIA_TIMEOUT: int
|
|
@ -2,24 +2,36 @@ __package__ = 'archivebox.core'
|
|||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pathlib import Path
|
||||
from django.utils.crypto import get_random_string
|
||||
|
||||
|
||||
from ..config import ( # noqa: F401
|
||||
DEBUG,
|
||||
SECRET_KEY,
|
||||
ALLOWED_HOSTS,
|
||||
PYTHON_DIR,
|
||||
PACKAGE_DIR,
|
||||
ACTIVE_THEME,
|
||||
SQL_INDEX_FILENAME,
|
||||
OUTPUT_DIR,
|
||||
)
|
||||
|
||||
ALLOWED_HOSTS = ALLOWED_HOSTS.split(',')
|
||||
|
||||
IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3]
|
||||
IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ
|
||||
IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3]
|
||||
|
||||
SECRET_KEY = SECRET_KEY or get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789-_+!.')
|
||||
################################################################################
|
||||
### Django Core Settings
|
||||
################################################################################
|
||||
|
||||
WSGI_APPLICATION = 'core.wsgi.application'
|
||||
ROOT_URLCONF = 'core.urls'
|
||||
|
||||
LOGIN_URL = '/accounts/login/'
|
||||
LOGOUT_REDIRECT_URL = '/'
|
||||
PASSWORD_RESET_URL = '/accounts/password_reset/'
|
||||
APPEND_SLASH = True
|
||||
|
||||
INSTALLED_APPS = [
|
||||
'django.contrib.auth',
|
||||
|
@ -44,16 +56,32 @@ MIDDLEWARE = [
|
|||
'django.contrib.messages.middleware.MessageMiddleware',
|
||||
]
|
||||
|
||||
ROOT_URLCONF = 'core.urls'
|
||||
APPEND_SLASH = True
|
||||
AUTHENTICATION_BACKENDS = [
|
||||
'django.contrib.auth.backends.ModelBackend',
|
||||
]
|
||||
|
||||
|
||||
################################################################################
|
||||
### Staticfile and Template Settings
|
||||
################################################################################
|
||||
|
||||
STATIC_URL = '/static/'
|
||||
|
||||
STATICFILES_DIRS = [
|
||||
str(Path(PACKAGE_DIR) / 'themes' / ACTIVE_THEME / 'static'),
|
||||
str(Path(PACKAGE_DIR) / 'themes' / 'default' / 'static'),
|
||||
]
|
||||
|
||||
TEMPLATE_DIRS = [
|
||||
str(Path(PACKAGE_DIR) / 'themes' / ACTIVE_THEME),
|
||||
str(Path(PACKAGE_DIR) / 'themes' / 'default'),
|
||||
str(Path(PACKAGE_DIR) / 'themes'),
|
||||
]
|
||||
|
||||
TEMPLATES = [
|
||||
{
|
||||
'BACKEND': 'django.template.backends.django.DjangoTemplates',
|
||||
'DIRS': [
|
||||
str(Path(PYTHON_DIR) / 'themes' / ACTIVE_THEME),
|
||||
str(Path(PYTHON_DIR) / 'themes' / 'default'),
|
||||
str(Path(PYTHON_DIR) / 'themes'),
|
||||
],
|
||||
'DIRS': TEMPLATE_DIRS,
|
||||
'APP_DIRS': True,
|
||||
'OPTIONS': {
|
||||
'context_processors': [
|
||||
|
@ -66,7 +94,10 @@ TEMPLATES = [
|
|||
},
|
||||
]
|
||||
|
||||
WSGI_APPLICATION = 'core.wsgi.application'
|
||||
|
||||
################################################################################
|
||||
### External Service Settings
|
||||
################################################################################
|
||||
|
||||
DATABASE_FILE = Path(OUTPUT_DIR) / SQL_INDEX_FILENAME
|
||||
DATABASES = {
|
||||
|
@ -76,9 +107,27 @@ DATABASES = {
|
|||
}
|
||||
}
|
||||
|
||||
AUTHENTICATION_BACKENDS = [
|
||||
'django.contrib.auth.backends.ModelBackend',
|
||||
]
|
||||
EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
|
||||
|
||||
|
||||
################################################################################
|
||||
### Security Settings
|
||||
################################################################################
|
||||
|
||||
SECRET_KEY = SECRET_KEY or get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789-_+!.')
|
||||
|
||||
ALLOWED_HOSTS = ALLOWED_HOSTS.split(',')
|
||||
|
||||
SECURE_BROWSER_XSS_FILTER = True
|
||||
SECURE_CONTENT_TYPE_NOSNIFF = True
|
||||
|
||||
CSRF_COOKIE_SECURE = False
|
||||
SESSION_COOKIE_SECURE = False
|
||||
SESSION_COOKIE_DOMAIN = None
|
||||
SESSION_COOKIE_AGE = 1209600 # 2 weeks
|
||||
SESSION_EXPIRE_AT_BROWSER_CLOSE = False
|
||||
SESSION_SAVE_EVERY_REQUEST = True
|
||||
|
||||
AUTH_PASSWORD_VALIDATORS = [
|
||||
{'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator'},
|
||||
{'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator'},
|
||||
|
@ -86,30 +135,23 @@ AUTH_PASSWORD_VALIDATORS = [
|
|||
{'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator'},
|
||||
]
|
||||
|
||||
################################################################################
|
||||
### Security Settings
|
||||
################################################################################
|
||||
SECURE_BROWSER_XSS_FILTER = True
|
||||
SECURE_CONTENT_TYPE_NOSNIFF = True
|
||||
SESSION_COOKIE_SECURE = False
|
||||
CSRF_COOKIE_SECURE = False
|
||||
SESSION_COOKIE_DOMAIN = None
|
||||
SESSION_EXPIRE_AT_BROWSER_CLOSE = False
|
||||
SESSION_SAVE_EVERY_REQUEST = True
|
||||
SESSION_COOKIE_AGE = 1209600 # 2 weeks
|
||||
LOGIN_URL = '/accounts/login/'
|
||||
LOGOUT_REDIRECT_URL = '/'
|
||||
PASSWORD_RESET_URL = '/accounts/password_reset/'
|
||||
|
||||
################################################################################
|
||||
### Shell Settings
|
||||
################################################################################
|
||||
|
||||
SHELL_PLUS = 'ipython'
|
||||
SHELL_PLUS_PRINT_SQL = False
|
||||
IPYTHON_ARGUMENTS = ['--no-confirm-exit', '--no-banner']
|
||||
IPYTHON_KERNEL_DISPLAY_NAME = 'ArchiveBox Django Shell'
|
||||
if IS_SHELL:
|
||||
os.environ['PYTHONSTARTUP'] = str(Path(PYTHON_DIR) / 'core' / 'welcome_message.py')
|
||||
os.environ['PYTHONSTARTUP'] = str(Path(PACKAGE_DIR) / 'core' / 'welcome_message.py')
|
||||
|
||||
|
||||
################################################################################
|
||||
### Internationalization & Localization Settings
|
||||
################################################################################
|
||||
|
||||
LANGUAGE_CODE = 'en-us'
|
||||
TIME_ZONE = 'UTC'
|
||||
USE_I18N = False
|
||||
|
@ -118,12 +160,3 @@ USE_TZ = False
|
|||
|
||||
DATETIME_FORMAT = 'Y-m-d g:iA'
|
||||
SHORT_DATETIME_FORMAT = 'Y-m-d h:iA'
|
||||
|
||||
|
||||
EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
|
||||
|
||||
STATIC_URL = '/static/'
|
||||
STATICFILES_DIRS = [
|
||||
str(Path(PYTHON_DIR) / 'themes' / ACTIVE_THEME / 'static'),
|
||||
str(Path(PYTHON_DIR) / 'themes' / 'default' / 'static'),
|
||||
]
|
||||
|
|
|
@ -61,8 +61,7 @@ def save_mercury(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT)
|
|||
atomic_write(str(output_folder / "content.txt"), txtresult_json["content"])
|
||||
atomic_write(str(output_folder / "article.json"), result_json)
|
||||
|
||||
# parse out number of files downloaded from last line of stderr:
|
||||
# "Downloaded: 76 files, 4.0M in 1.6s (2.52 MB/s)"
|
||||
# parse out last line of stderr
|
||||
output_tail = [
|
||||
line.strip()
|
||||
for line in (result.stdout + result.stderr).decode().rsplit('\n', 20)[-20:]
|
||||
|
|
|
@ -15,8 +15,6 @@ from typing import Optional, List, Dict, Union, IO, TYPE_CHECKING
|
|||
if TYPE_CHECKING:
|
||||
from .index.schema import Link, ArchiveResult
|
||||
|
||||
from .index.json import MAIN_INDEX_HEADER
|
||||
|
||||
from .util import enforce_types
|
||||
from .config import (
|
||||
ConfigDict,
|
||||
|
|
|
@ -216,7 +216,7 @@ def version(quiet: bool=False,
|
|||
print(printable_dependency_version(name, dependency))
|
||||
|
||||
print()
|
||||
print('{white}[i] Code locations:{reset}'.format(**ANSI))
|
||||
print('{white}[i] Source-code locations:{reset}'.format(**ANSI))
|
||||
for name, folder in CODE_LOCATIONS.items():
|
||||
print(printable_folder_status(name, folder))
|
||||
|
||||
|
|
Loading…
Reference in a new issue