rename configfile to collection

This commit is contained in:
Nick Sweeting 2024-10-24 15:40:24 -07:00
parent 63bf902f35
commit 60f0458c77
No known key found for this signature in database
9 changed files with 41 additions and 37 deletions

View file

@ -14,7 +14,6 @@ from pydantic_pkgr import (
EnvProvider,
)
from archivebox.config import CONSTANTS
from archivebox.config.permissions import ARCHIVEBOX_USER
import abx
@ -34,6 +33,7 @@ class BaseBinProvider(BinProvider):
return [self]
class BaseBinary(Binary):
# TODO: formalize state diagram, final states, transitions, side effects, etc.
@staticmethod
def symlink_to_lib(binary, bin_dir=None) -> None:

View file

@ -99,7 +99,7 @@ class BaseConfigSet(BaseSettings):
)
load_from_defaults: ClassVar[bool] = True
load_from_configfile: ClassVar[bool] = True
load_from_collection: ClassVar[bool] = True
load_from_environment: ClassVar[bool] = True
@classmethod
@ -128,7 +128,8 @@ class BaseConfigSet(BaseSettings):
try:
precedence_order = precedence_order or {
'defaults': init_settings,
'configfile': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
# 'collection': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
'collection': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
'environment': env_settings,
}
except Exception as err:
@ -144,14 +145,15 @@ class BaseConfigSet(BaseSettings):
precedence_order = {
'defaults': init_settings,
'configfile': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
# 'collection': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
'collection': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
'environment': env_settings,
}
if not cls.load_from_environment:
precedence_order.pop('environment')
if not cls.load_from_configfile:
precedence_order.pop('configfile')
if not cls.load_from_collection:
precedence_order.pop('collection')
if not cls.load_from_defaults:
precedence_order.pop('defaults')
@ -278,15 +280,15 @@ class BaseConfigSet(BaseSettings):
"""Get the dictionary of {key: value} config loaded from the default values"""
class OnlyDefaultsConfig(self.__class__):
load_from_defaults = True
load_from_configfile = False
load_from_collection = False
load_from_environment = False
return benedict(OnlyDefaultsConfig().model_dump(exclude_unset=False, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
def from_configfile(self) -> Dict[str, Any]:
"""Get the dictionary of {key: value} config loaded from the configfile ArchiveBox.conf"""
def from_collection(self) -> Dict[str, Any]:
"""Get the dictionary of {key: value} config loaded from the collection ArchiveBox.conf"""
class OnlyConfigFileConfig(self.__class__):
load_from_defaults = False
load_from_configfile = True
load_from_collection = True
load_from_environment = False
return benedict(OnlyConfigFileConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
@ -294,7 +296,7 @@ class BaseConfigSet(BaseSettings):
"""Get the dictionary of {key: value} config loaded from the environment variables"""
class OnlyEnvironmentConfig(self.__class__):
load_from_defaults = False
load_from_configfile = False
load_from_collection = False
load_from_environment = True
return benedict(OnlyEnvironmentConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))

View file

@ -4,10 +4,9 @@ import json
import os
from typing import Optional, List, Literal, Annotated, Dict, Any, Tuple
from typing_extensions import Self
from pathlib import Path
from pydantic import model_validator, AfterValidator
from pydantic import AfterValidator
from pydantic_pkgr import BinName
from django.utils.functional import cached_property
from django.utils import timezone
@ -17,36 +16,22 @@ import abx
from .base_binary import BaseBinary
def no_empty_args(args: List[str]) -> List[str]:
def assert_no_empty_args(args: List[str]) -> List[str]:
assert all(len(arg) for arg in args)
return args
ExtractorName = Literal['wget', 'warc', 'media', 'singlefile'] | str
ExtractorName = Annotated[str, AfterValidator(lambda s: s.isidentifier())]
HandlerFuncStr = Annotated[str, AfterValidator(lambda s: s.startswith('self.'))]
CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(no_empty_args)]
CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(assert_no_empty_args)]
class BaseExtractor:
name: ExtractorName
binary: BinName
output_path_func: HandlerFuncStr = 'self.get_output_path'
should_extract_func: HandlerFuncStr = 'self.should_extract'
extract_func: HandlerFuncStr = 'self.extract'
exec_func: HandlerFuncStr = 'self.exec'
default_args: CmdArgsList = []
extra_args: CmdArgsList = []
args: Optional[CmdArgsList] = None
@model_validator(mode='after')
def validate_model(self) -> Self:
if self.args is None:
self.args = [*self.default_args, *self.extra_args]
return self
def get_output_path(self, snapshot) -> Path:
return Path(self.__class__.__name__.lower())
@ -71,7 +56,7 @@ class BaseExtractor:
snapshot = Snapshot.objects.get(id=snapshot_id)
if not self.should_extract(snapshot):
if not self.should_extract(snapshot.url):
return {}
status = 'failed'

View file

@ -57,7 +57,7 @@ def get_HOOKS() -> Set[str]:
for hook_name in get_PLUGIN(plugin_id).hooks
}
def get_CONFIGS() -> Dict[str, 'BaseConfigSet']:
def get_CONFIGS() -> benedict: # Dict[str, 'BaseConfigSet']
return benedict({
config_id: configset
for plugin_configs in pm.hook.get_CONFIG()

View file

@ -88,7 +88,7 @@ def create_root_snapshot_from_seed(crawl):
def create_archiveresults_pending_from_snapshot(snapshot, config):
config = get_scope_config(
# defaults=settings.CONFIG_FROM_DEFAULTS,
# configfile=settings.CONFIG_FROM_FILE,
# collection=settings.CONFIG_FROM_FILE,
# environment=settings.CONFIG_FROM_ENVIRONMENT,
persona=archiveresult.snapshot.crawl.persona,
seed=archiveresult.snapshot.crawl.seed,

View file

@ -15,7 +15,7 @@ from archivebox.misc.logging import stderr
def get_real_name(key: str) -> str:
"""get the current canonical name for a given deprecated config key"""
"""get the up-to-date canonical name for a given old alias or current key"""
from django.conf import settings
for section in settings.CONFIGS.values():

View file

@ -1,3 +1,15 @@
"""
Constants are for things that never change at runtime.
(but they can change from run-to-run or machine-to-machine)
DATA_DIR will never change at runtime, but you can run
archivebox from inside a different DATA_DIR on the same machine.
This is loaded very early in the archivebox startup flow, so nothing in this file
or imported from this file should import anything from archivebox.config.common,
django, other INSTALLED_APPS, or anything else that is not in a standard library.
"""
__package__ = 'archivebox.config'
import re
@ -197,10 +209,12 @@ class ConstantsDict(Mapping):
@classmethod
def __getitem__(cls, key: str):
# so it behaves like a dict[key] == dict.key or object attr
return getattr(cls, key)
@classmethod
def __benedict__(cls):
# when casting to benedict, only include uppercase keys that don't start with an underscore
return benedict({key: value for key, value in cls.__dict__.items() if key.isupper() and not key.startswith('_')})
@classmethod
@ -214,5 +228,6 @@ class ConstantsDict(Mapping):
CONSTANTS = ConstantsDict()
CONSTANTS_CONFIG = CONSTANTS.__benedict__()
# add all key: values to globals() for easier importing
globals().update(CONSTANTS)
# add all key: values to globals() for easier importing, e.g.:
# from archivebox.config.constants import IS_ROOT, PERSONAS_DIR, ...
# globals().update(CONSTANTS)

View file

@ -22,7 +22,7 @@ from archivebox.misc.logging import stderr, hint
from archivebox.config import CONSTANTS, VERSION, DATA_DIR, ARCHIVE_DIR
from archivebox.config.common import SHELL_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG, SERVER_CONFIG, ARCHIVING_CONFIG
from archivebox.config.permissions import SudoPermission, IN_DOCKER
from archivebox.config.configfile import (
from archivebox.config.collection import (
write_config_file,
load_all_config,
get_real_name,

View file

@ -126,6 +126,7 @@ def is_static_file(url: str):
def enforce_types(func):
"""
Enforce function arg and kwarg types at runtime using its python3 type hints
Simpler version of pydantic @validate_call decorator
"""
# TODO: check return type as well
@ -283,6 +284,7 @@ def get_headers(url: str, timeout: int=None) -> str:
def ansi_to_html(text: str) -> str:
"""
Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html
Simple way to render colored CLI stdout/stderr in HTML properly, Textual/rich is probably better though.
"""
TEMPLATE = '<span style="color: rgb{}"><br>'