mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-25 05:30:23 +00:00
fix LIB_DIR and TMP_DIR loading when primary option isnt available
This commit is contained in:
parent
deb116eed4
commit
a211461ffc
21 changed files with 712 additions and 303 deletions
|
@ -37,7 +37,8 @@ class BaseBinary(Binary):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def symlink_to_lib(binary, bin_dir=None) -> None:
|
def symlink_to_lib(binary, bin_dir=None) -> None:
|
||||||
bin_dir = bin_dir or CONSTANTS.LIB_BIN_DIR
|
from archivebox.config.common import STORAGE_CONFIG
|
||||||
|
bin_dir = bin_dir or STORAGE_CONFIG.LIB_DIR / 'bin'
|
||||||
|
|
||||||
if not (binary.abspath and os.access(binary.abspath, os.R_OK)):
|
if not (binary.abspath and os.access(binary.abspath, os.R_OK)):
|
||||||
return
|
return
|
||||||
|
@ -55,9 +56,10 @@ class BaseBinary(Binary):
|
||||||
|
|
||||||
@validate_call
|
@validate_call
|
||||||
def load(self, fresh=False, **kwargs) -> Self:
|
def load(self, fresh=False, **kwargs) -> Self:
|
||||||
|
from archivebox.config.common import STORAGE_CONFIG
|
||||||
if fresh:
|
if fresh:
|
||||||
binary = super().load(**kwargs)
|
binary = super().load(**kwargs)
|
||||||
self.symlink_to_lib(binary=binary, bin_dir=CONSTANTS.LIB_BIN_DIR)
|
self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
|
||||||
else:
|
else:
|
||||||
# get cached binary from db
|
# get cached binary from db
|
||||||
try:
|
try:
|
||||||
|
@ -72,16 +74,18 @@ class BaseBinary(Binary):
|
||||||
|
|
||||||
@validate_call
|
@validate_call
|
||||||
def install(self, **kwargs) -> Self:
|
def install(self, **kwargs) -> Self:
|
||||||
|
from archivebox.config.common import STORAGE_CONFIG
|
||||||
binary = super().install(**kwargs)
|
binary = super().install(**kwargs)
|
||||||
self.symlink_to_lib(binary=binary, bin_dir=CONSTANTS.LIB_BIN_DIR)
|
self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
|
||||||
return binary
|
return binary
|
||||||
|
|
||||||
@validate_call
|
@validate_call
|
||||||
def load_or_install(self, fresh=False, **kwargs) -> Self:
|
def load_or_install(self, fresh=False, **kwargs) -> Self:
|
||||||
|
from archivebox.config.common import STORAGE_CONFIG
|
||||||
try:
|
try:
|
||||||
binary = self.load(fresh=fresh)
|
binary = self.load(fresh=fresh)
|
||||||
if binary and binary.version:
|
if binary and binary.version:
|
||||||
self.symlink_to_lib(binary=binary, bin_dir=CONSTANTS.LIB_BIN_DIR)
|
self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
|
||||||
return binary
|
return binary
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -1,8 +1,13 @@
|
||||||
__package__ = 'abx.archivebox'
|
__package__ = 'abx.archivebox'
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Type, Tuple, Callable, ClassVar
|
from typing import Type, Tuple, Callable, ClassVar, Dict, Any
|
||||||
|
|
||||||
|
import toml
|
||||||
|
from rich import print
|
||||||
|
|
||||||
from benedict import benedict
|
from benedict import benedict
|
||||||
from pydantic import model_validator, TypeAdapter
|
from pydantic import model_validator, TypeAdapter
|
||||||
|
@ -18,6 +23,11 @@ from . import toml_util
|
||||||
PACKAGE_DIR = Path(__file__).resolve().parent.parent
|
PACKAGE_DIR = Path(__file__).resolve().parent.parent
|
||||||
DATA_DIR = Path(os.getcwd()).resolve()
|
DATA_DIR = Path(os.getcwd()).resolve()
|
||||||
|
|
||||||
|
ARCHIVEBOX_CONFIG_FILE = DATA_DIR / "ArchiveBox.conf"
|
||||||
|
ARCHIVEBOX_CONFIG_FILE_BAK = ARCHIVEBOX_CONFIG_FILE.parent / ".ArchiveBox.conf.bak"
|
||||||
|
|
||||||
|
AUTOFIXES_HEADER = "[AUTOFIXES]"
|
||||||
|
AUTOFIXES_SUBHEADER = "# The following config was added automatically to fix problems detected at startup:"
|
||||||
|
|
||||||
|
|
||||||
class FlatTomlConfigSettingsSource(TomlConfigSettingsSource):
|
class FlatTomlConfigSettingsSource(TomlConfigSettingsSource):
|
||||||
|
@ -53,7 +63,7 @@ class FlatTomlConfigSettingsSource(TomlConfigSettingsSource):
|
||||||
super(TomlConfigSettingsSource, self).__init__(settings_cls, self.toml_data)
|
super(TomlConfigSettingsSource, self).__init__(settings_cls, self.toml_data)
|
||||||
|
|
||||||
|
|
||||||
class ArchiveBoxBaseConfig(BaseSettings):
|
class BaseConfigSet(BaseSettings):
|
||||||
"""
|
"""
|
||||||
This is the base class for an ArchiveBox ConfigSet.
|
This is the base class for an ArchiveBox ConfigSet.
|
||||||
It handles loading values from schema defaults, ArchiveBox.conf TOML config, and environment variables.
|
It handles loading values from schema defaults, ArchiveBox.conf TOML config, and environment variables.
|
||||||
|
@ -83,7 +93,7 @@ class ArchiveBoxBaseConfig(BaseSettings):
|
||||||
loc_by_alias=False,
|
loc_by_alias=False,
|
||||||
validate_assignment=True,
|
validate_assignment=True,
|
||||||
validate_return=True,
|
validate_return=True,
|
||||||
revalidate_instances="always",
|
revalidate_instances="subclass-instances",
|
||||||
)
|
)
|
||||||
|
|
||||||
load_from_defaults: ClassVar[bool] = True
|
load_from_defaults: ClassVar[bool] = True
|
||||||
|
@ -101,9 +111,6 @@ class ArchiveBoxBaseConfig(BaseSettings):
|
||||||
) -> Tuple[PydanticBaseSettingsSource, ...]:
|
) -> Tuple[PydanticBaseSettingsSource, ...]:
|
||||||
"""Defines the config precedence order: Schema defaults -> ArchiveBox.conf (TOML) -> Environment variables"""
|
"""Defines the config precedence order: Schema defaults -> ArchiveBox.conf (TOML) -> Environment variables"""
|
||||||
|
|
||||||
ARCHIVEBOX_CONFIG_FILE = DATA_DIR / "ArchiveBox.conf"
|
|
||||||
ARCHIVEBOX_CONFIG_FILE_BAK = ARCHIVEBOX_CONFIG_FILE.parent / ".ArchiveBox.conf.bak"
|
|
||||||
|
|
||||||
# import ipdb; ipdb.set_trace()
|
# import ipdb; ipdb.set_trace()
|
||||||
|
|
||||||
precedence_order = {}
|
precedence_order = {}
|
||||||
|
@ -152,27 +159,36 @@ class ArchiveBoxBaseConfig(BaseSettings):
|
||||||
def fill_defaults(self):
|
def fill_defaults(self):
|
||||||
"""Populate any unset values using function provided as their default"""
|
"""Populate any unset values using function provided as their default"""
|
||||||
|
|
||||||
for key, field in self.model_fields.items():
|
for key in self.model_fields.keys():
|
||||||
value = getattr(self, key)
|
if isinstance(getattr(self, key), Callable):
|
||||||
|
if self.load_from_defaults:
|
||||||
if isinstance(value, Callable):
|
computed_default = self.get_default_value(key)
|
||||||
# if value is a function, execute it to get the actual value, passing existing config as a dict arg if expected
|
# set generated default value as final validated value
|
||||||
if func_takes_args_or_kwargs(value):
|
setattr(self, key, computed_default)
|
||||||
# assemble dict of existing field values to pass to default factory functions
|
|
||||||
config_so_far = benedict(self.model_dump(include=set(self.model_fields.keys()), warnings=False))
|
|
||||||
computed_default = field.default(config_so_far)
|
|
||||||
else:
|
|
||||||
# otherwise it's a pure function with no args, just call it
|
|
||||||
computed_default = field.default()
|
|
||||||
|
|
||||||
# coerce/check to make sure default factory return value matches type annotation
|
|
||||||
TypeAdapter(field.annotation).validate_python(computed_default)
|
|
||||||
|
|
||||||
# set generated default value as final validated value
|
|
||||||
setattr(self, key, computed_default)
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def update_in_place(self, warn=True, **kwargs):
|
def get_default_value(self, key):
|
||||||
|
"""Get the default value for a given config key"""
|
||||||
|
field = self.model_fields[key]
|
||||||
|
value = getattr(self, key)
|
||||||
|
|
||||||
|
if isinstance(value, Callable):
|
||||||
|
# if value is a function, execute it to get the actual value, passing existing config as a dict arg if expected
|
||||||
|
if func_takes_args_or_kwargs(value):
|
||||||
|
# assemble dict of existing field values to pass to default factory functions
|
||||||
|
config_so_far = benedict(self.model_dump(include=set(self.model_fields.keys()), warnings=False))
|
||||||
|
computed_default = field.default(config_so_far)
|
||||||
|
else:
|
||||||
|
# otherwise it's a pure function with no args, just call it
|
||||||
|
computed_default = field.default()
|
||||||
|
|
||||||
|
# coerce/check to make sure default factory return value matches type annotation
|
||||||
|
TypeAdapter(field.annotation).validate_python(computed_default)
|
||||||
|
|
||||||
|
return computed_default
|
||||||
|
return value
|
||||||
|
|
||||||
|
def update_in_place(self, warn=True, persist=False, hint='', **kwargs):
|
||||||
"""
|
"""
|
||||||
Update the config with new values. Use this sparingly! We should almost never be updating config at runtime.
|
Update the config with new values. Use this sparingly! We should almost never be updating config at runtime.
|
||||||
Sets them in the environment so they propagate to spawned subprocesses / across future re-__init__()s and reload from environment
|
Sets them in the environment so they propagate to spawned subprocesses / across future re-__init__()s and reload from environment
|
||||||
|
@ -180,25 +196,106 @@ class ArchiveBoxBaseConfig(BaseSettings):
|
||||||
Example acceptable use case: user config says SEARCH_BACKEND_ENGINE=sonic but sonic_client pip library is not installed so we cannot use it.
|
Example acceptable use case: user config says SEARCH_BACKEND_ENGINE=sonic but sonic_client pip library is not installed so we cannot use it.
|
||||||
SEARCH_BACKEND_CONFIG.update_in_place(SEARCH_BACKEND_ENGINE='ripgrep') can be used to reset it back to ripgrep so we can continue.
|
SEARCH_BACKEND_CONFIG.update_in_place(SEARCH_BACKEND_ENGINE='ripgrep') can be used to reset it back to ripgrep so we can continue.
|
||||||
"""
|
"""
|
||||||
|
from archivebox.misc.toml_util import CustomTOMLEncoder
|
||||||
|
|
||||||
if warn:
|
if warn:
|
||||||
print('[!] WARNING: Some of the provided user config values cannot be used, temporarily ignoring them:')
|
fix_scope = 'in ArchiveBox.conf' if persist else 'just for current run'
|
||||||
|
print(f'[yellow]:warning: WARNING: Some config cannot be used as-is, fixing automatically {fix_scope}:[/yellow] {hint}', file=sys.stderr)
|
||||||
|
|
||||||
|
# set the new values in the environment
|
||||||
for key, value in kwargs.items():
|
for key, value in kwargs.items():
|
||||||
os.environ[key] = str(value)
|
os.environ[key] = str(value)
|
||||||
original_value = getattr(self, key)
|
original_value = getattr(self, key)
|
||||||
if warn:
|
if warn:
|
||||||
print(f' {key}={original_value} -> {value}')
|
print(f' {key}={original_value} -> {value}')
|
||||||
|
|
||||||
|
# if persist=True, write config changes to data/ArchiveBox.conf [AUTOFIXES] section
|
||||||
|
try:
|
||||||
|
if persist and ARCHIVEBOX_CONFIG_FILE.is_file():
|
||||||
|
autofixes_to_add = benedict(kwargs).to_toml(encoder=CustomTOMLEncoder())
|
||||||
|
|
||||||
|
existing_config = ARCHIVEBOX_CONFIG_FILE.read_text().split(AUTOFIXES_HEADER, 1)[0].strip()
|
||||||
|
if AUTOFIXES_HEADER in existing_config:
|
||||||
|
existing_autofixes = existing_config.split(AUTOFIXES_HEADER, 1)[-1].strip().replace(AUTOFIXES_SUBHEADER, '').replace(AUTOFIXES_HEADER, '').strip()
|
||||||
|
else:
|
||||||
|
existing_autofixes = ''
|
||||||
|
|
||||||
|
new_config = '\n'.join(line for line in [
|
||||||
|
existing_config,
|
||||||
|
'\n' + AUTOFIXES_HEADER,
|
||||||
|
AUTOFIXES_SUBHEADER,
|
||||||
|
existing_autofixes,
|
||||||
|
autofixes_to_add,
|
||||||
|
] if line.strip()).strip() + '\n'
|
||||||
|
ARCHIVEBOX_CONFIG_FILE.write_text(new_config)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
self.__init__()
|
self.__init__()
|
||||||
|
if warn:
|
||||||
|
print(file=sys.stderr)
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def as_legacy_config_schema(self):
|
@property
|
||||||
|
def toml_section_header(self):
|
||||||
|
"""Convert the class name to a TOML section header e.g. ShellConfig -> SHELL_CONFIG"""
|
||||||
|
class_name = self.__class__.__name__
|
||||||
|
return re.sub('([A-Z]+)', r'_\1', class_name).upper().strip('_')
|
||||||
|
|
||||||
|
|
||||||
|
def from_defaults(self) -> Dict[str, Any]:
|
||||||
|
"""Get the dictionary of {key: value} config loaded from the default values"""
|
||||||
|
class OnlyDefaultsConfig(self.__class__):
|
||||||
|
load_from_defaults = True
|
||||||
|
load_from_configfile = False
|
||||||
|
load_from_environment = False
|
||||||
|
return benedict(OnlyDefaultsConfig().model_dump(exclude_unset=False, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
|
||||||
|
|
||||||
|
def from_configfile(self) -> Dict[str, Any]:
|
||||||
|
"""Get the dictionary of {key: value} config loaded from the configfile ArchiveBox.conf"""
|
||||||
|
class OnlyConfigFileConfig(self.__class__):
|
||||||
|
load_from_defaults = False
|
||||||
|
load_from_configfile = True
|
||||||
|
load_from_environment = False
|
||||||
|
return benedict(OnlyConfigFileConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
|
||||||
|
|
||||||
|
def from_environment(self) -> Dict[str, Any]:
|
||||||
|
"""Get the dictionary of {key: value} config loaded from the environment variables"""
|
||||||
|
class OnlyEnvironmentConfig(self.__class__):
|
||||||
|
load_from_defaults = False
|
||||||
|
load_from_configfile = False
|
||||||
|
load_from_environment = True
|
||||||
|
return benedict(OnlyEnvironmentConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
|
||||||
|
|
||||||
|
def from_computed(self) -> Dict[str, Any]:
|
||||||
|
"""Get the dictionary of {key: value} config loaded from the computed fields"""
|
||||||
|
return benedict(self.model_dump(include=set(self.model_computed_fields.keys())))
|
||||||
|
|
||||||
|
|
||||||
|
def to_toml_dict(self, defaults=False) -> Dict[str, Any]:
|
||||||
|
"""Get the current config as a TOML-ready dict"""
|
||||||
|
config_dict = {}
|
||||||
|
for key, value in benedict(self).items():
|
||||||
|
if defaults or value != self.get_default_value(key):
|
||||||
|
config_dict[key] = value
|
||||||
|
|
||||||
|
return benedict({self.toml_section_header: config_dict})
|
||||||
|
|
||||||
|
def to_toml_str(self, defaults=False) -> str:
|
||||||
|
"""Get the current config as a TOML string"""
|
||||||
|
from archivebox.misc.toml_util import CustomTOMLEncoder
|
||||||
|
|
||||||
|
toml_dict = self.to_toml_dict(defaults=defaults)
|
||||||
|
if not toml_dict[self.toml_section_header]:
|
||||||
|
# if the section is empty, don't write it
|
||||||
|
toml_dict.pop(self.toml_section_header)
|
||||||
|
|
||||||
|
return toml.dumps(toml_dict, encoder=CustomTOMLEncoder())
|
||||||
|
|
||||||
|
def as_legacy_config_schema(self) -> Dict[str, Any]:
|
||||||
# shim for backwards compatibility with old config schema style
|
# shim for backwards compatibility with old config schema style
|
||||||
model_values = self.model_dump()
|
model_values = self.model_dump()
|
||||||
return benedict({
|
return benedict({
|
||||||
key: {'type': field.annotation, 'default': model_values[key]}
|
key: {'type': field.annotation, 'default': model_values[key]}
|
||||||
for key, field in self.model_fields.items()
|
for key, field in self.model_fields.items()
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
class BaseConfigSet(ArchiveBoxBaseConfig): # type: ignore[type-arg]
|
|
||||||
|
|
||||||
pass
|
|
||||||
|
|
|
@ -18,13 +18,7 @@ def get_PLUGIN() -> Dict[str, Dict[str, Any]]:
|
||||||
def get_CONFIG() -> Dict[str, BaseConfigSet]:
|
def get_CONFIG() -> Dict[str, BaseConfigSet]:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
@hookspec
|
|
||||||
def get_BINARIES() -> Dict[str, BaseBinary]:
|
|
||||||
return {}
|
|
||||||
|
|
||||||
@hookspec
|
|
||||||
def get_BINPROVIDERS() -> Dict[str, BaseBinProvider]:
|
|
||||||
return {}
|
|
||||||
|
|
||||||
@hookspec
|
@hookspec
|
||||||
def get_EXTRACTORS() -> Dict[str, BaseExtractor]:
|
def get_EXTRACTORS() -> Dict[str, BaseExtractor]:
|
||||||
|
@ -45,3 +39,14 @@ def get_SEARCHBACKENDS() -> Dict[str, BaseSearchBackend]:
|
||||||
# @hookspec
|
# @hookspec
|
||||||
# def get_QUEUES():
|
# def get_QUEUES():
|
||||||
# return {}
|
# return {}
|
||||||
|
|
||||||
|
|
||||||
|
##############################################################
|
||||||
|
# provided by abx.pydantic_pkgr.hookspec:
|
||||||
|
# @hookspec
|
||||||
|
# def get_BINARIES() -> Dict[str, BaseBinary]:
|
||||||
|
# return {}
|
||||||
|
|
||||||
|
# @hookspec
|
||||||
|
# def get_BINPROVIDERS() -> Dict[str, BaseBinProvider]:
|
||||||
|
# return {}
|
||||||
|
|
|
@ -131,9 +131,12 @@ def get_SEARCHBACKENDS() -> Dict[str, 'BaseSearchBackend']:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_scope_config(defaults=settings.CONFIG, persona=None, seed=None, crawl=None, snapshot=None, archiveresult=None, extra_config=None):
|
def get_scope_config(defaults: benedict | None = None, persona=None, seed=None, crawl=None, snapshot=None, archiveresult=None, extra_config=None):
|
||||||
"""Get all the relevant config for the given scope, in correct precedence order"""
|
"""Get all the relevant config for the given scope, in correct precedence order"""
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
default_config: benedict = defaults or settings.CONFIG
|
||||||
|
|
||||||
snapshot = snapshot or (archiveresult and archiveresult.snapshot)
|
snapshot = snapshot or (archiveresult and archiveresult.snapshot)
|
||||||
crawl = crawl or (snapshot and snapshot.crawl)
|
crawl = crawl or (snapshot and snapshot.crawl)
|
||||||
seed = seed or (crawl and crawl.seed)
|
seed = seed or (crawl and crawl.seed)
|
||||||
|
@ -147,7 +150,7 @@ def get_scope_config(defaults=settings.CONFIG, persona=None, seed=None, crawl=No
|
||||||
extra_config = extra_config or {}
|
extra_config = extra_config or {}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
**defaults, # defaults / config file / environment variables
|
**default_config, # defaults / config file / environment variables
|
||||||
**persona_config, # lowest precedence
|
**persona_config, # lowest precedence
|
||||||
**seed_config,
|
**seed_config,
|
||||||
**crawl_config,
|
**crawl_config,
|
||||||
|
|
|
@ -164,13 +164,18 @@ def run_subcommand(subcommand: str,
|
||||||
# print('DATA_DIR is', DATA_DIR)
|
# print('DATA_DIR is', DATA_DIR)
|
||||||
# print('pwd is', os.getcwd())
|
# print('pwd is', os.getcwd())
|
||||||
|
|
||||||
cmd_requires_db = subcommand in archive_cmds
|
cmd_requires_db = (subcommand in archive_cmds)
|
||||||
init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
|
init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
|
||||||
|
|
||||||
check_db = cmd_requires_db and not init_pending
|
check_db = cmd_requires_db and not init_pending
|
||||||
|
|
||||||
setup_django(in_memory_db=subcommand in fake_db, check_db=check_db)
|
setup_django(in_memory_db=subcommand in fake_db, check_db=check_db)
|
||||||
|
|
||||||
|
for ignore_pattern in ('help', '-h', '--help', 'version', '--version'):
|
||||||
|
if ignore_pattern in sys.argv[:4]:
|
||||||
|
cmd_requires_db = False
|
||||||
|
break
|
||||||
|
|
||||||
if subcommand in archive_cmds:
|
if subcommand in archive_cmds:
|
||||||
if cmd_requires_db:
|
if cmd_requires_db:
|
||||||
check_migrations()
|
check_migrations()
|
||||||
|
|
|
@ -1,18 +1,18 @@
|
||||||
__package__ = 'archivebox.config'
|
__package__ = 'archivebox.config'
|
||||||
|
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
import shutil
|
import shutil
|
||||||
|
import tempfile
|
||||||
from typing import Dict, Optional
|
from typing import Dict, Optional
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from rich import print
|
from rich import print
|
||||||
from pydantic import Field, field_validator, computed_field
|
from pydantic import Field, field_validator, computed_field, model_validator
|
||||||
from django.utils.crypto import get_random_string
|
from django.utils.crypto import get_random_string
|
||||||
|
|
||||||
from abx.archivebox.base_configset import BaseConfigSet
|
from abx.archivebox.base_configset import BaseConfigSet
|
||||||
|
|
||||||
|
|
||||||
from .constants import CONSTANTS
|
from .constants import CONSTANTS
|
||||||
from .version import get_COMMIT_HASH, get_BUILD_TIME
|
from .version import get_COMMIT_HASH, get_BUILD_TIME
|
||||||
from .permissions import IN_DOCKER
|
from .permissions import IN_DOCKER
|
||||||
|
@ -35,7 +35,6 @@ class ShellConfig(BaseConfigSet):
|
||||||
VERSIONS_AVAILABLE: bool = False # .check_for_update.get_versions_available_on_github(c)},
|
VERSIONS_AVAILABLE: bool = False # .check_for_update.get_versions_available_on_github(c)},
|
||||||
CAN_UPGRADE: bool = False # .check_for_update.can_upgrade(c)},
|
CAN_UPGRADE: bool = False # .check_for_update.can_upgrade(c)},
|
||||||
|
|
||||||
|
|
||||||
@computed_field
|
@computed_field
|
||||||
@property
|
@property
|
||||||
def TERM_WIDTH(self) -> int:
|
def TERM_WIDTH(self) -> int:
|
||||||
|
@ -57,6 +56,16 @@ SHELL_CONFIG = ShellConfig()
|
||||||
|
|
||||||
|
|
||||||
class StorageConfig(BaseConfigSet):
|
class StorageConfig(BaseConfigSet):
|
||||||
|
# TMP_DIR must be a local, fast, readable/writable dir by archivebox user,
|
||||||
|
# must be a short path due to unix path length restrictions for socket files (<100 chars)
|
||||||
|
# must be a local SSD/tmpfs for speed and because bind mounts/network mounts/FUSE dont support unix sockets
|
||||||
|
TMP_DIR: Path = Field(default=CONSTANTS.DEFAULT_TMP_DIR)
|
||||||
|
|
||||||
|
# LIB_DIR must be a local, fast, readable/writable dir by archivebox user,
|
||||||
|
# must be able to contain executable binaries (up to 5GB size)
|
||||||
|
# should not be a remote/network/FUSE mount for speed reasons, otherwise extractors will be slow
|
||||||
|
LIB_DIR: Path = Field(default=CONSTANTS.DEFAULT_LIB_DIR)
|
||||||
|
|
||||||
OUTPUT_PERMISSIONS: str = Field(default='644')
|
OUTPUT_PERMISSIONS: str = Field(default='644')
|
||||||
RESTRICT_FILE_NAMES: str = Field(default='windows')
|
RESTRICT_FILE_NAMES: str = Field(default='windows')
|
||||||
ENFORCE_ATOMIC_WRITES: bool = Field(default=True)
|
ENFORCE_ATOMIC_WRITES: bool = Field(default=True)
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
__package__ = 'archivebox.config'
|
__package__ = 'archivebox.config'
|
||||||
|
|
||||||
import os
|
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
@ -97,14 +96,10 @@ class ConstantsDict(Mapping):
|
||||||
|
|
||||||
# Runtime dirs
|
# Runtime dirs
|
||||||
TMP_DIR_NAME: str = 'tmp'
|
TMP_DIR_NAME: str = 'tmp'
|
||||||
TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME / MACHINE_ID
|
DEFAULT_TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME / MACHINE_ID # ./data/tmp/abc3244323
|
||||||
|
|
||||||
LIB_DIR_NAME: str = 'lib'
|
LIB_DIR_NAME: str = 'lib'
|
||||||
LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME / MACHINE_TYPE
|
DEFAULT_LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME / MACHINE_TYPE # ./data/lib/arm64-linux-docker
|
||||||
LIB_PIP_DIR: Path = LIB_DIR / 'pip'
|
|
||||||
LIB_NPM_DIR: Path = LIB_DIR / 'npm'
|
|
||||||
LIB_BROWSERS_DIR: Path = LIB_DIR / 'browsers'
|
|
||||||
LIB_BIN_DIR: Path = LIB_DIR / 'bin'
|
|
||||||
BIN_DIR: Path = LIB_BIN_DIR
|
|
||||||
|
|
||||||
# Config constants
|
# Config constants
|
||||||
TIMEZONE: str = 'UTC'
|
TIMEZONE: str = 'UTC'
|
||||||
|
@ -198,91 +193,7 @@ class ConstantsDict(Mapping):
|
||||||
".archivebox_id",
|
".archivebox_id",
|
||||||
"Dockerfile",
|
"Dockerfile",
|
||||||
))
|
))
|
||||||
|
|
||||||
CODE_LOCATIONS = benedict({
|
|
||||||
'PACKAGE_DIR': {
|
|
||||||
'path': (PACKAGE_DIR).resolve(),
|
|
||||||
'enabled': True,
|
|
||||||
'is_valid': os.access(PACKAGE_DIR / '__main__.py', os.X_OK), # executable
|
|
||||||
},
|
|
||||||
'TEMPLATES_DIR': {
|
|
||||||
'path': TEMPLATES_DIR.resolve(),
|
|
||||||
'enabled': True,
|
|
||||||
'is_valid': os.access(STATIC_DIR, os.R_OK) and os.access(STATIC_DIR, os.X_OK), # read + list
|
|
||||||
},
|
|
||||||
'CUSTOM_TEMPLATES_DIR': {
|
|
||||||
'path': CUSTOM_TEMPLATES_DIR.resolve(),
|
|
||||||
'enabled': os.path.isdir(CUSTOM_TEMPLATES_DIR),
|
|
||||||
'is_valid': os.path.isdir(CUSTOM_TEMPLATES_DIR) and os.access(CUSTOM_TEMPLATES_DIR, os.R_OK), # read
|
|
||||||
},
|
|
||||||
'USER_PLUGINS_DIR': {
|
|
||||||
'path': USER_PLUGINS_DIR.resolve(),
|
|
||||||
'enabled': os.path.isdir(USER_PLUGINS_DIR),
|
|
||||||
'is_valid': os.path.isdir(USER_PLUGINS_DIR) and os.access(USER_PLUGINS_DIR, os.R_OK), # read
|
|
||||||
},
|
|
||||||
'LIB_DIR': {
|
|
||||||
'path': LIB_DIR.resolve(),
|
|
||||||
'enabled': True,
|
|
||||||
'is_valid': os.path.isdir(LIB_DIR) and os.access(LIB_DIR, os.R_OK) and os.access(LIB_DIR, os.W_OK), # read + write
|
|
||||||
},
|
|
||||||
})
|
|
||||||
|
|
||||||
DATA_LOCATIONS = benedict({
|
|
||||||
"DATA_DIR": {
|
|
||||||
"path": DATA_DIR.resolve(),
|
|
||||||
"enabled": True,
|
|
||||||
"is_valid": os.path.isdir(DATA_DIR) and os.access(DATA_DIR, os.R_OK) and os.access(DATA_DIR, os.W_OK),
|
|
||||||
"is_mount": os.path.ismount(DATA_DIR.resolve()),
|
|
||||||
},
|
|
||||||
"CONFIG_FILE": {
|
|
||||||
"path": CONFIG_FILE.resolve(),
|
|
||||||
"enabled": True,
|
|
||||||
"is_valid": os.path.isfile(CONFIG_FILE) and os.access(CONFIG_FILE, os.R_OK) and os.access(CONFIG_FILE, os.W_OK),
|
|
||||||
},
|
|
||||||
"SQL_INDEX": {
|
|
||||||
"path": DATABASE_FILE.resolve(),
|
|
||||||
"enabled": True,
|
|
||||||
"is_valid": os.path.isfile(DATABASE_FILE) and os.access(DATABASE_FILE, os.R_OK) and os.access(DATABASE_FILE, os.W_OK),
|
|
||||||
"is_mount": os.path.ismount(DATABASE_FILE.resolve()),
|
|
||||||
},
|
|
||||||
"QUEUE_DATABASE": {
|
|
||||||
"path": QUEUE_DATABASE_FILE.resolve(),
|
|
||||||
"enabled": True,
|
|
||||||
"is_valid": os.path.isfile(QUEUE_DATABASE_FILE) and os.access(QUEUE_DATABASE_FILE, os.R_OK) and os.access(QUEUE_DATABASE_FILE, os.W_OK),
|
|
||||||
"is_mount": os.path.ismount(QUEUE_DATABASE_FILE.resolve()),
|
|
||||||
},
|
|
||||||
"ARCHIVE_DIR": {
|
|
||||||
"path": ARCHIVE_DIR.resolve(),
|
|
||||||
"enabled": True,
|
|
||||||
"is_valid": os.path.isdir(ARCHIVE_DIR) and os.access(ARCHIVE_DIR, os.R_OK) and os.access(ARCHIVE_DIR, os.W_OK),
|
|
||||||
"is_mount": os.path.ismount(ARCHIVE_DIR.resolve()),
|
|
||||||
},
|
|
||||||
"SOURCES_DIR": {
|
|
||||||
"path": SOURCES_DIR.resolve(),
|
|
||||||
"enabled": True,
|
|
||||||
"is_valid": os.path.isdir(SOURCES_DIR) and os.access(SOURCES_DIR, os.R_OK) and os.access(SOURCES_DIR, os.W_OK),
|
|
||||||
},
|
|
||||||
"PERSONAS_DIR": {
|
|
||||||
"path": PERSONAS_DIR.resolve(),
|
|
||||||
"enabled": os.path.isdir(PERSONAS_DIR),
|
|
||||||
"is_valid": os.path.isdir(PERSONAS_DIR) and os.access(PERSONAS_DIR, os.R_OK) and os.access(PERSONAS_DIR, os.W_OK), # read + write
|
|
||||||
},
|
|
||||||
"LOGS_DIR": {
|
|
||||||
"path": LOGS_DIR.resolve(),
|
|
||||||
"enabled": True,
|
|
||||||
"is_valid": os.path.isdir(LOGS_DIR) and os.access(LOGS_DIR, os.R_OK) and os.access(LOGS_DIR, os.W_OK), # read + write
|
|
||||||
},
|
|
||||||
'TMP_DIR': {
|
|
||||||
'path': TMP_DIR.resolve(),
|
|
||||||
'enabled': True,
|
|
||||||
'is_valid': os.path.isdir(TMP_DIR) and os.access(TMP_DIR, os.R_OK) and os.access(TMP_DIR, os.W_OK), # read + write
|
|
||||||
},
|
|
||||||
# "CACHE_DIR": {
|
|
||||||
# "path": CACHE_DIR.resolve(),
|
|
||||||
# "enabled": True,
|
|
||||||
# "is_valid": os.access(CACHE_DIR, os.R_OK) and os.access(CACHE_DIR, os.W_OK), # read + write
|
|
||||||
# },
|
|
||||||
})
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def __getitem__(cls, key: str):
|
def __getitem__(cls, key: str):
|
||||||
|
|
|
@ -258,6 +258,9 @@ def load_config_val(key: str,
|
||||||
|
|
||||||
elif type is list or type is dict:
|
elif type is list or type is dict:
|
||||||
return json.loads(val)
|
return json.loads(val)
|
||||||
|
|
||||||
|
elif type is Path:
|
||||||
|
return Path(val)
|
||||||
|
|
||||||
raise Exception('Config values can only be str, bool, int, or json')
|
raise Exception('Config values can only be str, bool, int, or json')
|
||||||
|
|
||||||
|
@ -574,7 +577,7 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CON
|
||||||
with SudoPermission(uid=0):
|
with SudoPermission(uid=0):
|
||||||
# running as root is a special case where it's ok to be a bit slower
|
# running as root is a special case where it's ok to be a bit slower
|
||||||
# make sure data dir is always owned by the correct user
|
# make sure data dir is always owned by the correct user
|
||||||
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}"')
|
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}" 2>/dev/null')
|
||||||
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}"/* 2>/dev/null')
|
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}"/* 2>/dev/null')
|
||||||
|
|
||||||
bump_startup_progress_bar()
|
bump_startup_progress_bar()
|
||||||
|
|
|
@ -1,12 +1,16 @@
|
||||||
__package__ = 'archivebox.config'
|
__package__ = 'archivebox.config'
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import socket
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import tempfile
|
||||||
import platform
|
import platform
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from functools import cache
|
from functools import cache
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
from benedict import benedict
|
||||||
|
|
||||||
from .permissions import SudoPermission, IS_ROOT, ARCHIVEBOX_USER
|
from .permissions import SudoPermission, IS_ROOT, ARCHIVEBOX_USER
|
||||||
|
|
||||||
#############################################################################################
|
#############################################################################################
|
||||||
|
@ -88,7 +92,7 @@ def get_machine_type() -> str:
|
||||||
return LIB_DIR_SCOPE
|
return LIB_DIR_SCOPE
|
||||||
|
|
||||||
|
|
||||||
def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = None, fallback=True) -> bool:
|
def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = None, fallback=True, chown=True) -> bool:
|
||||||
"""Check if a given directory is writable by a specific user and group (fallback=try as current user is unable to check with provided uid)"""
|
"""Check if a given directory is writable by a specific user and group (fallback=try as current user is unable to check with provided uid)"""
|
||||||
current_uid, current_gid = os.geteuid(), os.getegid()
|
current_uid, current_gid = os.geteuid(), os.getegid()
|
||||||
uid, gid = uid or current_uid, gid or current_gid
|
uid, gid = uid or current_uid, gid or current_gid
|
||||||
|
@ -101,10 +105,197 @@ def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = No
|
||||||
test_file.unlink()
|
test_file.unlink()
|
||||||
return True
|
return True
|
||||||
except (IOError, OSError, PermissionError):
|
except (IOError, OSError, PermissionError):
|
||||||
pass
|
if chown:
|
||||||
|
# try fixing it using sudo permissions
|
||||||
|
with SudoPermission(uid=uid, fallback=fallback):
|
||||||
|
os.system(f'chown {uid}:{gid} "{dir_path}" 2>/dev/null')
|
||||||
|
return dir_is_writable(dir_path, uid=uid, gid=gid, fallback=fallback, chown=False)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def assert_dir_can_contain_unix_sockets(dir_path: Path) -> bool:
|
||||||
|
"""Check if a given directory can contain unix sockets (e.g. /tmp/supervisord.sock)"""
|
||||||
|
from archivebox.logging_util import pretty_path
|
||||||
|
|
||||||
|
try:
|
||||||
|
socket_path = str(dir_path / '.test_socket.sock')
|
||||||
|
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
||||||
|
try:
|
||||||
|
os.remove(socket_path)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
s.bind(socket_path)
|
||||||
|
s.close()
|
||||||
|
try:
|
||||||
|
os.remove(socket_path)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(f'ArchiveBox failed to create a test UNIX socket file in {pretty_path(dir_path, color=False)}') from e
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def create_and_chown_dir(dir_path: Path) -> None:
|
||||||
|
with SudoPermission(uid=0, fallback=True):
|
||||||
|
dir_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
os.system(f'chown {ARCHIVEBOX_USER} "{dir_path}" 2>/dev/null')
|
||||||
|
os.system(f'chown {ARCHIVEBOX_USER} "{dir_path}"/* 2>/dev/null &')
|
||||||
|
|
||||||
|
@cache
|
||||||
|
def get_or_create_working_tmp_dir(autofix=True, quiet=False):
|
||||||
|
from archivebox import CONSTANTS
|
||||||
|
from archivebox.config.common import STORAGE_CONFIG
|
||||||
|
from archivebox.misc.checks import check_tmp_dir
|
||||||
|
|
||||||
|
# try a few potential directories in order of preference
|
||||||
|
CANDIDATES = [
|
||||||
|
STORAGE_CONFIG.TMP_DIR, # <user-specified>
|
||||||
|
CONSTANTS.DEFAULT_TMP_DIR, # ./data/tmp/<machine_id>
|
||||||
|
Path('/var/run/archivebox') / get_collection_id(), # /var/run/archivebox/abc5d8512
|
||||||
|
Path('/tmp') / 'archivebox' / get_collection_id(), # /tmp/archivebox/abc5d8512
|
||||||
|
Path('~/.tmp/archivebox').expanduser() / get_collection_id(), # ~/.tmp/archivebox/abc5d8512
|
||||||
|
Path(tempfile.gettempdir()) / 'archivebox' / get_collection_id(), # /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/archivebox/abc5d8512
|
||||||
|
Path(tempfile.gettempdir()) / 'archivebox' / get_collection_id()[:4], # /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/archivebox/abc5d
|
||||||
|
Path(tempfile.gettempdir()) / 'abx' / get_collection_id()[:4], # /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/abx/abc5
|
||||||
|
]
|
||||||
|
for candidate in CANDIDATES:
|
||||||
|
try:
|
||||||
|
create_and_chown_dir(candidate)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if check_tmp_dir(candidate, throw=False, quiet=True, must_exist=True):
|
||||||
|
if autofix and STORAGE_CONFIG.TMP_DIR != candidate:
|
||||||
|
STORAGE_CONFIG.update_in_place(TMP_DIR=candidate, warn=not quiet)
|
||||||
|
return candidate
|
||||||
|
|
||||||
|
if not quiet:
|
||||||
|
raise OSError(f'ArchiveBox is unable to find a writable TMP_DIR, tried {CANDIDATES}!')
|
||||||
|
|
||||||
|
@cache
|
||||||
|
def get_or_create_working_lib_dir(autofix=True, quiet=False):
|
||||||
|
from archivebox import CONSTANTS
|
||||||
|
from archivebox.config.common import STORAGE_CONFIG
|
||||||
|
from archivebox.misc.checks import check_lib_dir
|
||||||
|
|
||||||
|
# try a few potential directories in order of preference
|
||||||
|
CANDIDATES = [
|
||||||
|
STORAGE_CONFIG.LIB_DIR, # <user-specified>
|
||||||
|
CONSTANTS.DEFAULT_LIB_DIR, # ./data/lib/arm64-linux-docker
|
||||||
|
Path('/usr/local/share/archivebox') / get_collection_id(), # /usr/local/share/archivebox/abc5
|
||||||
|
*([Path('/opt/homebrew/share/archivebox') / get_collection_id()] if os.path.isfile('/opt/homebrew/bin/archivebox') else []), # /opt/homebrew/share/archivebox/abc5
|
||||||
|
Path('~/.local/share/archivebox').expanduser() / get_collection_id(), # ~/.local/share/archivebox/abc5
|
||||||
|
]
|
||||||
|
|
||||||
|
for candidate in CANDIDATES:
|
||||||
|
try:
|
||||||
|
create_and_chown_dir(candidate)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if check_lib_dir(candidate, throw=False, quiet=True, must_exist=True):
|
||||||
|
if autofix and STORAGE_CONFIG.LIB_DIR != candidate:
|
||||||
|
STORAGE_CONFIG.update_in_place(LIB_DIR=candidate, warn=not quiet)
|
||||||
|
return candidate
|
||||||
|
|
||||||
|
if not quiet:
|
||||||
|
raise OSError(f'ArchiveBox is unable to find a writable LIB_DIR, tried {CANDIDATES}!')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@cache
|
||||||
|
def get_data_locations():
|
||||||
|
from archivebox.config import CONSTANTS
|
||||||
|
from archivebox.config.common import STORAGE_CONFIG
|
||||||
|
|
||||||
|
return benedict({
|
||||||
|
"DATA_DIR": {
|
||||||
|
"path": DATA_DIR.resolve(),
|
||||||
|
"enabled": True,
|
||||||
|
"is_valid": os.path.isdir(DATA_DIR) and os.access(DATA_DIR, os.R_OK) and os.access(DATA_DIR, os.W_OK),
|
||||||
|
"is_mount": os.path.ismount(DATA_DIR.resolve()),
|
||||||
|
},
|
||||||
|
"CONFIG_FILE": {
|
||||||
|
"path": CONSTANTS.CONFIG_FILE.resolve(),
|
||||||
|
"enabled": True,
|
||||||
|
"is_valid": os.path.isfile(CONSTANTS.CONFIG_FILE) and os.access(CONSTANTS.CONFIG_FILE, os.R_OK) and os.access(CONSTANTS.CONFIG_FILE, os.W_OK),
|
||||||
|
},
|
||||||
|
"SQL_INDEX": {
|
||||||
|
"path": DATABASE_FILE.resolve(),
|
||||||
|
"enabled": True,
|
||||||
|
"is_valid": os.path.isfile(DATABASE_FILE) and os.access(DATABASE_FILE, os.R_OK) and os.access(DATABASE_FILE, os.W_OK),
|
||||||
|
"is_mount": os.path.ismount(DATABASE_FILE.resolve()),
|
||||||
|
},
|
||||||
|
"QUEUE_DATABASE": {
|
||||||
|
"path": CONSTANTS.QUEUE_DATABASE_FILE,
|
||||||
|
"enabled": True,
|
||||||
|
"is_valid": os.path.isfile(CONSTANTS.QUEUE_DATABASE_FILE) and os.access(CONSTANTS.QUEUE_DATABASE_FILE, os.R_OK) and os.access(CONSTANTS.QUEUE_DATABASE_FILE, os.W_OK),
|
||||||
|
"is_mount": os.path.ismount(CONSTANTS.QUEUE_DATABASE_FILE),
|
||||||
|
},
|
||||||
|
"ARCHIVE_DIR": {
|
||||||
|
"path": ARCHIVE_DIR.resolve(),
|
||||||
|
"enabled": True,
|
||||||
|
"is_valid": os.path.isdir(ARCHIVE_DIR) and os.access(ARCHIVE_DIR, os.R_OK) and os.access(ARCHIVE_DIR, os.W_OK),
|
||||||
|
"is_mount": os.path.ismount(ARCHIVE_DIR.resolve()),
|
||||||
|
},
|
||||||
|
"SOURCES_DIR": {
|
||||||
|
"path": CONSTANTS.SOURCES_DIR.resolve(),
|
||||||
|
"enabled": True,
|
||||||
|
"is_valid": os.path.isdir(CONSTANTS.SOURCES_DIR) and os.access(CONSTANTS.SOURCES_DIR, os.R_OK) and os.access(CONSTANTS.SOURCES_DIR, os.W_OK),
|
||||||
|
},
|
||||||
|
"PERSONAS_DIR": {
|
||||||
|
"path": CONSTANTS.PERSONAS_DIR.resolve(),
|
||||||
|
"enabled": os.path.isdir(CONSTANTS.PERSONAS_DIR),
|
||||||
|
"is_valid": os.path.isdir(CONSTANTS.PERSONAS_DIR) and os.access(CONSTANTS.PERSONAS_DIR, os.R_OK) and os.access(CONSTANTS.PERSONAS_DIR, os.W_OK), # read + write
|
||||||
|
},
|
||||||
|
"LOGS_DIR": {
|
||||||
|
"path": CONSTANTS.LOGS_DIR.resolve(),
|
||||||
|
"enabled": True,
|
||||||
|
"is_valid": os.path.isdir(CONSTANTS.LOGS_DIR) and os.access(CONSTANTS.LOGS_DIR, os.R_OK) and os.access(CONSTANTS.LOGS_DIR, os.W_OK), # read + write
|
||||||
|
},
|
||||||
|
'TMP_DIR': {
|
||||||
|
'path': STORAGE_CONFIG.TMP_DIR.resolve(),
|
||||||
|
'enabled': True,
|
||||||
|
'is_valid': os.path.isdir(STORAGE_CONFIG.TMP_DIR) and os.access(STORAGE_CONFIG.TMP_DIR, os.R_OK) and os.access(STORAGE_CONFIG.TMP_DIR, os.W_OK), # read + write
|
||||||
|
},
|
||||||
|
# "CACHE_DIR": {
|
||||||
|
# "path": CACHE_DIR.resolve(),
|
||||||
|
# "enabled": True,
|
||||||
|
# "is_valid": os.access(CACHE_DIR, os.R_OK) and os.access(CACHE_DIR, os.W_OK), # read + write
|
||||||
|
# },
|
||||||
|
})
|
||||||
|
|
||||||
|
@cache
|
||||||
|
def get_code_locations():
|
||||||
|
from archivebox.config import CONSTANTS
|
||||||
|
from archivebox.config.common import STORAGE_CONFIG
|
||||||
|
|
||||||
|
return benedict({
|
||||||
|
'PACKAGE_DIR': {
|
||||||
|
'path': (PACKAGE_DIR).resolve(),
|
||||||
|
'enabled': True,
|
||||||
|
'is_valid': os.access(PACKAGE_DIR / '__main__.py', os.X_OK), # executable
|
||||||
|
},
|
||||||
|
'TEMPLATES_DIR': {
|
||||||
|
'path': CONSTANTS.TEMPLATES_DIR.resolve(),
|
||||||
|
'enabled': True,
|
||||||
|
'is_valid': os.access(CONSTANTS.STATIC_DIR, os.R_OK) and os.access(CONSTANTS.STATIC_DIR, os.X_OK), # read + list
|
||||||
|
},
|
||||||
|
'CUSTOM_TEMPLATES_DIR': {
|
||||||
|
'path': CONSTANTS.CUSTOM_TEMPLATES_DIR.resolve(),
|
||||||
|
'enabled': os.path.isdir(CONSTANTS.CUSTOM_TEMPLATES_DIR),
|
||||||
|
'is_valid': os.path.isdir(CONSTANTS.CUSTOM_TEMPLATES_DIR) and os.access(CONSTANTS.CUSTOM_TEMPLATES_DIR, os.R_OK), # read
|
||||||
|
},
|
||||||
|
'USER_PLUGINS_DIR': {
|
||||||
|
'path': CONSTANTS.USER_PLUGINS_DIR.resolve(),
|
||||||
|
'enabled': os.path.isdir(CONSTANTS.USER_PLUGINS_DIR),
|
||||||
|
'is_valid': os.path.isdir(CONSTANTS.USER_PLUGINS_DIR) and os.access(CONSTANTS.USER_PLUGINS_DIR, os.R_OK), # read
|
||||||
|
},
|
||||||
|
'LIB_DIR': {
|
||||||
|
'path': STORAGE_CONFIG.LIB_DIR.resolve(),
|
||||||
|
'enabled': True,
|
||||||
|
'is_valid': os.path.isdir(STORAGE_CONFIG.LIB_DIR) and os.access(STORAGE_CONFIG.LIB_DIR, os.R_OK) and os.access(STORAGE_CONFIG.LIB_DIR, os.W_OK), # read + write
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# @cache
|
# @cache
|
||||||
|
|
|
@ -510,7 +510,7 @@ def log_removal_finished(all_links: int, to_remove: int):
|
||||||
### Helpers
|
### Helpers
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def pretty_path(path: Union[Path, str], pwd: Union[Path, str]=DATA_DIR) -> str:
|
def pretty_path(path: Union[Path, str], pwd: Union[Path, str]=DATA_DIR, color: bool=True) -> str:
|
||||||
"""convert paths like .../ArchiveBox/archivebox/../output/abc into output/abc"""
|
"""convert paths like .../ArchiveBox/archivebox/../output/abc into output/abc"""
|
||||||
pwd = str(Path(pwd)) # .resolve()
|
pwd = str(Path(pwd)) # .resolve()
|
||||||
path = str(path)
|
path = str(path)
|
||||||
|
@ -520,7 +520,10 @@ def pretty_path(path: Union[Path, str], pwd: Union[Path, str]=DATA_DIR) -> str:
|
||||||
|
|
||||||
# replace long absolute paths with ./ relative ones to save on terminal output width
|
# replace long absolute paths with ./ relative ones to save on terminal output width
|
||||||
if path.startswith(pwd) and (pwd != '/') and path != pwd:
|
if path.startswith(pwd) and (pwd != '/') and path != pwd:
|
||||||
path = path.replace(pwd, '[light_slate_blue].[/light_slate_blue]', 1)
|
if color:
|
||||||
|
path = path.replace(pwd, '[light_slate_blue].[/light_slate_blue]', 1)
|
||||||
|
else:
|
||||||
|
path = path.replace(pwd, '.', 1)
|
||||||
|
|
||||||
# quote paths containing spaces
|
# quote paths containing spaces
|
||||||
if ' ' in path:
|
if ' ' in path:
|
||||||
|
|
|
@ -189,6 +189,7 @@ def version(quiet: bool=False,
|
||||||
if quiet or '--version' in sys.argv:
|
if quiet or '--version' in sys.argv:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
from rich.panel import Panel
|
||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
console = Console()
|
console = Console()
|
||||||
prnt = console.print
|
prnt = console.print
|
||||||
|
@ -197,6 +198,7 @@ def version(quiet: bool=False,
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from archivebox.config.version import get_COMMIT_HASH, get_BUILD_TIME
|
from archivebox.config.version import get_COMMIT_HASH, get_BUILD_TIME
|
||||||
from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, RUNNING_AS_UID, RUNNING_AS_GID
|
from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, RUNNING_AS_UID, RUNNING_AS_GID
|
||||||
|
from archivebox.config.paths import get_data_locations, get_code_locations
|
||||||
|
|
||||||
from abx.archivebox.base_binary import BaseBinary, apt, brew, env
|
from abx.archivebox.base_binary import BaseBinary, apt, brew, env
|
||||||
|
|
||||||
|
@ -221,7 +223,7 @@ def version(quiet: bool=False,
|
||||||
f'PLATFORM={platform.platform()}',
|
f'PLATFORM={platform.platform()}',
|
||||||
f'PYTHON={sys.implementation.name.title()}' + (' (venv)' if CONSTANTS.IS_INSIDE_VENV else ''),
|
f'PYTHON={sys.implementation.name.title()}' + (' (venv)' if CONSTANTS.IS_INSIDE_VENV else ''),
|
||||||
)
|
)
|
||||||
OUTPUT_IS_REMOTE_FS = CONSTANTS.DATA_LOCATIONS.DATA_DIR.is_mount or CONSTANTS.DATA_LOCATIONS.ARCHIVE_DIR.is_mount
|
OUTPUT_IS_REMOTE_FS = get_data_locations().DATA_DIR.is_mount or get_data_locations().ARCHIVE_DIR.is_mount
|
||||||
DATA_DIR_STAT = CONSTANTS.DATA_DIR.stat()
|
DATA_DIR_STAT = CONSTANTS.DATA_DIR.stat()
|
||||||
prnt(
|
prnt(
|
||||||
f'EUID={os.geteuid()}:{os.getegid()} UID={RUNNING_AS_UID}:{RUNNING_AS_GID} PUID={ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}',
|
f'EUID={os.geteuid()}:{os.getegid()} UID={RUNNING_AS_UID}:{RUNNING_AS_GID} PUID={ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}',
|
||||||
|
@ -240,6 +242,21 @@ def version(quiet: bool=False,
|
||||||
#f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually
|
#f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually
|
||||||
)
|
)
|
||||||
prnt()
|
prnt()
|
||||||
|
|
||||||
|
if not (os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) and os.access(CONSTANTS.CONFIG_FILE, os.R_OK)):
|
||||||
|
PANEL_TEXT = '\n'.join((
|
||||||
|
# '',
|
||||||
|
# f'[yellow]CURRENT DIR =[/yellow] [red]{os.getcwd()}[/red]',
|
||||||
|
'',
|
||||||
|
'[violet]Hint:[/violet] [green]cd[/green] into a collection [blue]DATA_DIR[/blue] and run [green]archivebox version[/green] again...',
|
||||||
|
' [grey53]OR[/grey53] run [green]archivebox init[/green] to create a new collection in the current dir.',
|
||||||
|
'',
|
||||||
|
' [i][grey53](this is [red]REQUIRED[/red] if you are opening a Github Issue to get help)[/grey53][/i]',
|
||||||
|
'',
|
||||||
|
))
|
||||||
|
prnt(Panel(PANEL_TEXT, expand=False, border_style='grey53', title='[red]:exclamation: No collection [blue]DATA_DIR[/blue] is currently active[/red]', subtitle='Full version info is only available when inside a collection [light_slate_blue]DATA DIR[/light_slate_blue]'))
|
||||||
|
prnt()
|
||||||
|
return
|
||||||
|
|
||||||
prnt('[pale_green1][i] Binary Dependencies:[/pale_green1]')
|
prnt('[pale_green1][i] Binary Dependencies:[/pale_green1]')
|
||||||
failures = []
|
failures = []
|
||||||
|
@ -299,13 +316,13 @@ def version(quiet: bool=False,
|
||||||
|
|
||||||
prnt()
|
prnt()
|
||||||
prnt('[deep_sky_blue3][i] Code locations:[/deep_sky_blue3]')
|
prnt('[deep_sky_blue3][i] Code locations:[/deep_sky_blue3]')
|
||||||
for name, path in CONSTANTS.CODE_LOCATIONS.items():
|
for name, path in get_code_locations().items():
|
||||||
prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
|
prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
|
||||||
|
|
||||||
prnt()
|
prnt()
|
||||||
if os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) or os.access(CONSTANTS.CONFIG_FILE, os.R_OK):
|
if os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) or os.access(CONSTANTS.CONFIG_FILE, os.R_OK):
|
||||||
prnt('[bright_yellow][i] Data locations:[/bright_yellow]')
|
prnt('[bright_yellow][i] Data locations:[/bright_yellow]')
|
||||||
for name, path in CONSTANTS.DATA_LOCATIONS.items():
|
for name, path in get_data_locations().items():
|
||||||
prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
|
prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
|
||||||
|
|
||||||
from archivebox.misc.checks import check_data_dir_permissions
|
from archivebox.misc.checks import check_data_dir_permissions
|
||||||
|
@ -395,7 +412,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
|
||||||
print(f' √ ./{CONSTANTS.DATABASE_FILE.relative_to(DATA_DIR)}')
|
print(f' √ ./{CONSTANTS.DATABASE_FILE.relative_to(DATA_DIR)}')
|
||||||
|
|
||||||
# from django.contrib.auth.models import User
|
# from django.contrib.auth.models import User
|
||||||
# if SHELL_CONFIG.IS_TTY and not User.objects.filter(is_superuser=True).exists():
|
# if SHELL_CONFIG.IS_TTY and not User.objects.filter(is_superuser=True).exclude(username='system').exists():
|
||||||
# print('{green}[+] Creating admin user account...{reset}'.format(**SHELL_CONFIG.ANSI))
|
# print('{green}[+] Creating admin user account...{reset}'.format(**SHELL_CONFIG.ANSI))
|
||||||
# call_command("createsuperuser", interactive=True)
|
# call_command("createsuperuser", interactive=True)
|
||||||
|
|
||||||
|
@ -486,9 +503,13 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
|
||||||
html_index.rename(f"{index_name}.html")
|
html_index.rename(f"{index_name}.html")
|
||||||
|
|
||||||
CONSTANTS.PERSONAS_DIR.mkdir(parents=True, exist_ok=True)
|
CONSTANTS.PERSONAS_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
CONSTANTS.TMP_DIR.mkdir(parents=True, exist_ok=True)
|
CONSTANTS.DEFAULT_TMP_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
CONSTANTS.LIB_DIR.mkdir(parents=True, exist_ok=True)
|
CONSTANTS.DEFAULT_LIB_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
from archivebox.config.common import STORAGE_CONFIG
|
||||||
|
STORAGE_CONFIG.TMP_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
STORAGE_CONFIG.LIB_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
if install:
|
if install:
|
||||||
run_subcommand('install', pwd=out_dir)
|
run_subcommand('install', pwd=out_dir)
|
||||||
|
|
||||||
|
@ -1115,7 +1136,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
|
||||||
from django.contrib.auth import get_user_model
|
from django.contrib.auth import get_user_model
|
||||||
User = get_user_model()
|
User = get_user_model()
|
||||||
|
|
||||||
if not User.objects.filter(is_superuser=True).exists():
|
if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
|
||||||
stderr('\n[+] Don\'t forget to create a new admin user for the Web UI...', color='green')
|
stderr('\n[+] Don\'t forget to create a new admin user for the Web UI...', color='green')
|
||||||
stderr(' archivebox manage createsuperuser')
|
stderr(' archivebox manage createsuperuser')
|
||||||
# run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
|
# run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
|
||||||
|
@ -1399,46 +1420,43 @@ def server(runserver_args: Optional[List[str]]=None,
|
||||||
from django.core.management import call_command
|
from django.core.management import call_command
|
||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
|
|
||||||
|
if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
|
||||||
|
print()
|
||||||
|
# print('[yellow][!] No admin accounts exist, you must create one to be able to log in to the Admin UI![/yellow]')
|
||||||
|
print('[violet]Hint:[/violet] To create an [bold]admin username & password[/bold] for the [deep_sky_blue3][underline][link=http://{host}:{port}/admin]Admin UI[/link][/underline][/deep_sky_blue3], run:')
|
||||||
|
print(' [green]archivebox manage createsuperuser[/green]')
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
print('[green][+] Starting ArchiveBox webserver...[/green]')
|
host = '127.0.0.1'
|
||||||
print(' > Logging errors to ./logs/errors.log')
|
port = '8000'
|
||||||
if not User.objects.filter(is_superuser=True).exists():
|
|
||||||
print('[yellow][!] No admin users exist yet, you will not be able to edit links in the UI.[/yellow]')
|
|
||||||
print()
|
|
||||||
print(' [violet]Hint:[/violet] To create an admin user, run:')
|
|
||||||
print(' archivebox manage createsuperuser')
|
|
||||||
print()
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
host_and_port = [arg for arg in runserver_args if arg.replace('.', '').replace(':', '').isdigit()][0]
|
||||||
|
if ':' in host_and_port:
|
||||||
|
host, port = host_and_port.split(':')
|
||||||
|
else:
|
||||||
|
if '.' in host_and_port:
|
||||||
|
host = host_and_port
|
||||||
|
else:
|
||||||
|
port = host_and_port
|
||||||
|
except IndexError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
print('[green][+] Starting ArchiveBox webserver...[/green]')
|
||||||
|
print(f' [blink][green]>[/green][/blink] Starting ArchiveBox webserver on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
|
||||||
|
print(f' [green]>[/green] Log in to ArchiveBox Admin UI on [deep_sky_blue3][link=http://{host}:{port}/admin]http://{host}:{port}/admin[/link][/deep_sky_blue3]')
|
||||||
|
print(' > Writing ArchiveBox error log to ./logs/errors.log')
|
||||||
|
|
||||||
if SHELL_CONFIG.DEBUG:
|
if SHELL_CONFIG.DEBUG:
|
||||||
if not reload:
|
if not reload:
|
||||||
runserver_args.append('--noreload') # '--insecure'
|
runserver_args.append('--noreload') # '--insecure'
|
||||||
call_command("runserver", *runserver_args)
|
call_command("runserver", *runserver_args)
|
||||||
else:
|
else:
|
||||||
host = '127.0.0.1'
|
|
||||||
port = '8000'
|
|
||||||
|
|
||||||
try:
|
|
||||||
host_and_port = [arg for arg in runserver_args if arg.replace('.', '').replace(':', '').isdigit()][0]
|
|
||||||
if ':' in host_and_port:
|
|
||||||
host, port = host_and_port.split(':')
|
|
||||||
else:
|
|
||||||
if '.' in host_and_port:
|
|
||||||
host = host_and_port
|
|
||||||
else:
|
|
||||||
port = host_and_port
|
|
||||||
except IndexError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
print(f' [blink][green]>[/green][/blink] Starting ArchiveBox webserver on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link][/deep_sky_blue4]')
|
|
||||||
|
|
||||||
from queues.supervisor_util import start_server_workers
|
from queues.supervisor_util import start_server_workers
|
||||||
|
|
||||||
print()
|
print()
|
||||||
|
|
||||||
start_server_workers(host=host, port=port, daemonize=False)
|
start_server_workers(host=host, port=port, daemonize=False)
|
||||||
|
|
||||||
print("\n[i][green][🟩] ArchiveBox server shut down gracefully.[/green][/i]")
|
print("\n[i][green][🟩] ArchiveBox server shut down gracefully.[/green][/i]")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -5,16 +5,24 @@ import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from rich import print
|
from rich import print
|
||||||
|
from rich.panel import Panel
|
||||||
|
|
||||||
# DO NOT ADD ANY TOP-LEVEL IMPORTS HERE
|
# DO NOT ADD ANY TOP-LEVEL IMPORTS HERE to anything other than builtin python libraries
|
||||||
# this file is imported by archivebox/__init__.py
|
# this file is imported by archivebox/__init__.py
|
||||||
# and any imports here will be imported by EVERYTHING else
|
# and any imports here will be imported by EVERYTHING else
|
||||||
# so this file should only be used for pure python checks
|
# so this file should only be used for pure python checks
|
||||||
# that don't need to import other parts of ArchiveBox
|
# that don't need to import other parts of ArchiveBox
|
||||||
|
|
||||||
|
# if a check needs to import other parts of ArchiveBox,
|
||||||
|
# the imports should be done inside the check function
|
||||||
|
# and you should make sure if you need to import any django stuff
|
||||||
|
# that the check is called after django.setup() has been called
|
||||||
|
|
||||||
|
|
||||||
def check_data_folder() -> None:
|
def check_data_folder() -> None:
|
||||||
from archivebox import DATA_DIR, ARCHIVE_DIR
|
from archivebox import DATA_DIR, ARCHIVE_DIR
|
||||||
|
from archivebox.config import CONSTANTS
|
||||||
|
from archivebox.config.paths import create_and_chown_dir, get_or_create_working_tmp_dir, get_or_create_working_lib_dir
|
||||||
|
|
||||||
archive_dir_exists = os.access(ARCHIVE_DIR, os.R_OK) and ARCHIVE_DIR.is_dir()
|
archive_dir_exists = os.access(ARCHIVE_DIR, os.R_OK) and ARCHIVE_DIR.is_dir()
|
||||||
if not archive_dir_exists:
|
if not archive_dir_exists:
|
||||||
|
@ -30,8 +38,21 @@ def check_data_folder() -> None:
|
||||||
raise SystemExit(2)
|
raise SystemExit(2)
|
||||||
|
|
||||||
|
|
||||||
|
# Create data dir subdirs
|
||||||
|
create_and_chown_dir(CONSTANTS.SOURCES_DIR)
|
||||||
|
create_and_chown_dir(CONSTANTS.PERSONAS_DIR / 'Default')
|
||||||
|
create_and_chown_dir(CONSTANTS.LOGS_DIR)
|
||||||
|
# create_and_chown_dir(CONSTANTS.CACHE_DIR)
|
||||||
|
|
||||||
|
# Create /tmp and /lib dirs if they don't exist
|
||||||
|
get_or_create_working_tmp_dir(autofix=True, quiet=False)
|
||||||
|
get_or_create_working_lib_dir(autofix=True, quiet=False)
|
||||||
|
|
||||||
|
# Check data dir permissions, /tmp, and /lib permissions
|
||||||
|
check_data_dir_permissions()
|
||||||
|
|
||||||
def check_migrations():
|
def check_migrations():
|
||||||
from archivebox import DATA_DIR, CONSTANTS
|
from archivebox import DATA_DIR
|
||||||
from ..index.sql import list_migrations
|
from ..index.sql import list_migrations
|
||||||
|
|
||||||
pending_migrations = [name for status, name in list_migrations() if not status]
|
pending_migrations = [name for status, name in list_migrations() if not status]
|
||||||
|
@ -45,13 +66,6 @@ def check_migrations():
|
||||||
print(' archivebox init', file=sys.stderr)
|
print(' archivebox init', file=sys.stderr)
|
||||||
raise SystemExit(3)
|
raise SystemExit(3)
|
||||||
|
|
||||||
CONSTANTS.SOURCES_DIR.mkdir(exist_ok=True)
|
|
||||||
CONSTANTS.LOGS_DIR.mkdir(exist_ok=True)
|
|
||||||
# CONSTANTS.CACHE_DIR.mkdir(exist_ok=True)
|
|
||||||
(CONSTANTS.LIB_DIR / 'bin').mkdir(exist_ok=True, parents=True)
|
|
||||||
(CONSTANTS.PERSONAS_DIR / 'Default').mkdir(exist_ok=True, parents=True)
|
|
||||||
|
|
||||||
|
|
||||||
def check_io_encoding():
|
def check_io_encoding():
|
||||||
PYTHON_ENCODING = (sys.__stdout__ or sys.stdout or sys.__stderr__ or sys.stderr).encoding.upper().replace('UTF8', 'UTF-8')
|
PYTHON_ENCODING = (sys.__stdout__ or sys.stdout or sys.__stderr__ or sys.stderr).encoding.upper().replace('UTF8', 'UTF-8')
|
||||||
|
|
||||||
|
@ -128,3 +142,98 @@ def check_data_dir_permissions():
|
||||||
STDERR.print(' [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#permissions]https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#permissions[/link]')
|
STDERR.print(' [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#permissions]https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#permissions[/link]')
|
||||||
STDERR.print(' [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid]https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid[/link]')
|
STDERR.print(' [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid]https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid[/link]')
|
||||||
STDERR.print(' [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#filesystem-doesnt-support-fsync-eg-network-mounts]https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#filesystem-doesnt-support-fsync-eg-network-mounts[/link]')
|
STDERR.print(' [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#filesystem-doesnt-support-fsync-eg-network-mounts]https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#filesystem-doesnt-support-fsync-eg-network-mounts[/link]')
|
||||||
|
|
||||||
|
from archivebox.config.common import STORAGE_CONFIG
|
||||||
|
|
||||||
|
# Check /tmp dir permissions
|
||||||
|
check_tmp_dir(STORAGE_CONFIG.TMP_DIR, throw=False, must_exist=True)
|
||||||
|
|
||||||
|
# Check /lib dir permissions
|
||||||
|
check_lib_dir(STORAGE_CONFIG.LIB_DIR, throw=False, must_exist=True)
|
||||||
|
|
||||||
|
|
||||||
|
def check_tmp_dir(tmp_dir=None, throw=False, quiet=False, must_exist=True):
|
||||||
|
from archivebox.config.paths import assert_dir_can_contain_unix_sockets, dir_is_writable, get_or_create_working_tmp_dir
|
||||||
|
from archivebox.misc.logging import STDERR
|
||||||
|
from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
|
||||||
|
from archivebox.config.common import STORAGE_CONFIG
|
||||||
|
from archivebox.logging_util import pretty_path
|
||||||
|
|
||||||
|
tmp_dir = tmp_dir or STORAGE_CONFIG.TMP_DIR
|
||||||
|
socket_file = tmp_dir.absolute().resolve() / "supervisord.sock"
|
||||||
|
|
||||||
|
if not must_exist and not os.path.isdir(tmp_dir):
|
||||||
|
# just check that its viable based on its length (because dir may not exist yet, we cant check if its writable)
|
||||||
|
return len(f'file://{socket_file}') <= 96
|
||||||
|
|
||||||
|
tmp_is_valid = False
|
||||||
|
try:
|
||||||
|
tmp_is_valid = dir_is_writable(tmp_dir)
|
||||||
|
tmp_is_valid = tmp_is_valid and assert_dir_can_contain_unix_sockets(tmp_dir)
|
||||||
|
assert tmp_is_valid, f'ArchiveBox user PUID={ARCHIVEBOX_USER} PGID={ARCHIVEBOX_GROUP} is unable to write to TMP_DIR={tmp_dir}'
|
||||||
|
assert len(f'file://{socket_file}') <= 96, f'ArchiveBox TMP_DIR={tmp_dir} is too long, dir containing unix socket files must be <90 chars.'
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
if not quiet:
|
||||||
|
STDERR.print()
|
||||||
|
ERROR_TEXT = '\n'.join((
|
||||||
|
'',
|
||||||
|
f'[red]:cross_mark: ArchiveBox is unable to use TMP_DIR={pretty_path(tmp_dir)}[/red]',
|
||||||
|
f' [yellow]{e}[/yellow]',
|
||||||
|
'',
|
||||||
|
'[blue]Info:[/blue] [grey53]The TMP_DIR is used for the supervisord unix socket file and other temporary files.',
|
||||||
|
' - It [red]must[/red] be on a local drive (not inside a docker volume, remote network drive, or FUSE mount).',
|
||||||
|
f' - It [red]must[/red] be readable and writable by the ArchiveBox user (PUID={ARCHIVEBOX_USER}, PGID={ARCHIVEBOX_GROUP}).',
|
||||||
|
' - It [red]must[/red] be a *short* path (less than 90 characters) due to UNIX path length restrictions for sockets.',
|
||||||
|
' - It [yellow]should[/yellow] be able to hold at least 200MB of data (in-progress downloads can be large).[/grey53]',
|
||||||
|
'',
|
||||||
|
'[violet]Hint:[/violet] Fix it by setting TMP_DIR to a path that meets these requirements, e.g.:',
|
||||||
|
f' [green]archivebox config --set TMP_DIR={get_or_create_working_tmp_dir(autofix=False, quiet=True) or "/tmp/archivebox"}[/green]',
|
||||||
|
'',
|
||||||
|
))
|
||||||
|
STDERR.print(Panel(ERROR_TEXT, expand=False, border_style='red', title='[red]:cross_mark: Error with configured TMP_DIR[/red]', subtitle='Background workers may fail to start until fixed.'))
|
||||||
|
STDERR.print()
|
||||||
|
if throw:
|
||||||
|
raise OSError(f'TMP_DIR={tmp_dir} is invalid, ArchiveBox is unable to use it and the server will fail to start!') from e
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def check_lib_dir(lib_dir: Path | None = None, throw=False, quiet=False, must_exist=True):
|
||||||
|
from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
|
||||||
|
from archivebox.misc.logging import STDERR
|
||||||
|
from archivebox.config.paths import dir_is_writable, get_or_create_working_lib_dir
|
||||||
|
from archivebox.config.common import STORAGE_CONFIG
|
||||||
|
from archivebox.logging_util import pretty_path
|
||||||
|
|
||||||
|
lib_dir = lib_dir or STORAGE_CONFIG.LIB_DIR
|
||||||
|
|
||||||
|
if not must_exist and not os.path.isdir(lib_dir):
|
||||||
|
return True
|
||||||
|
|
||||||
|
lib_is_valid = False
|
||||||
|
try:
|
||||||
|
lib_is_valid = dir_is_writable(lib_dir)
|
||||||
|
assert lib_is_valid, f'ArchiveBox user PUID={ARCHIVEBOX_USER} PGID={ARCHIVEBOX_GROUP} is unable to write to LIB_DIR={lib_dir}'
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
if not quiet:
|
||||||
|
STDERR.print()
|
||||||
|
ERROR_TEXT = '\n'.join((
|
||||||
|
'',
|
||||||
|
f'[red]:cross_mark: ArchiveBox is unable to use LIB_DIR={pretty_path(lib_dir)}[/red]',
|
||||||
|
f' [yellow]{e}[/yellow]',
|
||||||
|
'',
|
||||||
|
'[blue]Info:[/blue] [grey53]The LIB_DIR is used to store ArchiveBox auto-installed plugin library and binary dependencies.',
|
||||||
|
f' - It [red]must[/red] be readable and writable by the ArchiveBox user (PUID={ARCHIVEBOX_USER}, PGID={ARCHIVEBOX_GROUP}).',
|
||||||
|
' - It [yellow]should[/yellow] be on a local (ideally fast) drive like an SSD or HDD (not on a network drive or external HDD).',
|
||||||
|
' - It [yellow]should[/yellow] be able to hold at least 1GB of data (some dependencies like Chrome can be large).[/grey53]',
|
||||||
|
'',
|
||||||
|
'[violet]Hint:[/violet] Fix it by setting LIB_DIR to a path that meets these requirements, e.g.:',
|
||||||
|
f' [green]archivebox config --set LIB_DIR={get_or_create_working_lib_dir(autofix=False, quiet=True) or "/usr/local/share/archivebox"}[/green]',
|
||||||
|
'',
|
||||||
|
))
|
||||||
|
STDERR.print(Panel(ERROR_TEXT, expand=False, border_style='red', title='[red]:cross_mark: Error with configured LIB_DIR[/red]', subtitle='[yellow]Dependencies may not auto-install properly until fixed.[/yellow]'))
|
||||||
|
STDERR.print()
|
||||||
|
if throw:
|
||||||
|
raise OSError(f'LIB_DIR={lib_dir} is invalid, ArchiveBox is unable to use it and dependencies will fail to install.') from e
|
||||||
|
return False
|
||||||
|
|
|
@ -49,7 +49,7 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
prnt('[i] :heavy_dollar_sign: Welcome to the ArchiveBox Shell!')
|
prnt('[i] :heavy_dollar_sign: Welcome to the ArchiveBox Shell!')
|
||||||
prnt(' [deep_sky_blue4]Docs:[/deep_sky_blue4] [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage[/link]')
|
prnt(' [deep_sky_blue4]Docs:[/deep_sky_blue4] [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage[/link]')
|
||||||
prnt(' [link=https://docs.archivebox.io/en/latest/modules.html]https://docs.archivebox.io/en/latest/modules.html[/link]')
|
prnt(' [link=https://docs.archivebox.io/en/dev/apidocs/archivebox/archivebox.html]https://docs.archivebox.io/en/dev/apidocs/archivebox/archivebox.html[/link]')
|
||||||
prnt()
|
prnt()
|
||||||
prnt(' :grey_question: [violet]Hint[/] [i]Here are some examples to get started:[/]')
|
prnt(' :grey_question: [violet]Hint[/] [i]Here are some examples to get started:[/]')
|
||||||
prnt(' add[blink][deep_sky_blue4]?[/deep_sky_blue4][/blink] [grey53]# add ? after anything to get help[/]')
|
prnt(' add[blink][deep_sky_blue4]?[/deep_sky_blue4][/blink] [grey53]# add ? after anything to get help[/]')
|
||||||
|
|
|
@ -82,10 +82,10 @@ class JSONSchemaWithLambdas(GenerateJsonSchema):
|
||||||
if isinstance(default, Callable):
|
if isinstance(default, Callable):
|
||||||
return '{{lambda ' + inspect.getsource(default).split('=lambda ')[-1].strip()[:-1] + '}}'
|
return '{{lambda ' + inspect.getsource(default).split('=lambda ')[-1].strip()[:-1] + '}}'
|
||||||
return to_jsonable_python(
|
return to_jsonable_python(
|
||||||
default,
|
default,
|
||||||
timedelta_mode=config.ser_json_timedelta,
|
timedelta_mode=config.ser_json_timedelta,
|
||||||
bytes_mode=config.ser_json_bytes,
|
bytes_mode=config.ser_json_bytes,
|
||||||
serialize_unknown=True
|
serialize_unknown=True
|
||||||
)
|
)
|
||||||
|
|
||||||
# for computed_field properties render them like this instead:
|
# for computed_field properties render them like this instead:
|
||||||
|
|
|
@ -104,7 +104,10 @@ class ChromeBinary(BaseBinary):
|
||||||
}
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def symlink_to_lib(binary, bin_dir=CONSTANTS.LIB_BIN_DIR) -> None:
|
def symlink_to_lib(binary, bin_dir=None) -> None:
|
||||||
|
from archivebox.config.common import STORAGE_CONFIG
|
||||||
|
bin_dir = bin_dir or STORAGE_CONFIG.LIB_DIR / 'bin'
|
||||||
|
|
||||||
if not (binary.abspath and os.access(binary.abspath, os.F_OK)):
|
if not (binary.abspath and os.access(binary.abspath, os.F_OK)):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
|
@ -3,8 +3,6 @@ __package__ = 'plugins_pkg.npm'
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from pydantic import model_validator
|
|
||||||
|
|
||||||
from pydantic_pkgr import NpmProvider, PATHStr, BinProviderName
|
from pydantic_pkgr import NpmProvider, PATHStr, BinProviderName
|
||||||
|
|
||||||
from archivebox.config import DATA_DIR, CONSTANTS
|
from archivebox.config import DATA_DIR, CONSTANTS
|
||||||
|
@ -14,7 +12,7 @@ from abx.archivebox.base_binary import BaseBinProvider
|
||||||
|
|
||||||
|
|
||||||
OLD_NODE_BIN_PATH = DATA_DIR / 'node_modules' / '.bin'
|
OLD_NODE_BIN_PATH = DATA_DIR / 'node_modules' / '.bin'
|
||||||
NEW_NODE_BIN_PATH = CONSTANTS.LIB_NPM_DIR / 'node_modules' / '.bin'
|
NEW_NODE_BIN_PATH = CONSTANTS.DEFAULT_LIB_DIR / 'npm' / 'node_modules' / '.bin'
|
||||||
|
|
||||||
|
|
||||||
class SystemNpmBinProvider(NpmProvider, BaseBinProvider):
|
class SystemNpmBinProvider(NpmProvider, BaseBinProvider):
|
||||||
|
@ -27,12 +25,16 @@ class LibNpmBinProvider(NpmProvider, BaseBinProvider):
|
||||||
name: BinProviderName = "lib_npm"
|
name: BinProviderName = "lib_npm"
|
||||||
PATH: PATHStr = f'{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
|
PATH: PATHStr = f'{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
|
||||||
|
|
||||||
npm_prefix: Optional[Path] = CONSTANTS.LIB_NPM_DIR
|
npm_prefix: Optional[Path] = CONSTANTS.DEFAULT_LIB_DIR / 'npm'
|
||||||
|
|
||||||
@model_validator(mode='after')
|
def setup(self) -> None:
|
||||||
def validate_path(self):
|
# update paths from config if they arent the default
|
||||||
assert self.npm_prefix == NEW_NODE_BIN_PATH.parent.parent
|
from archivebox.config.common import STORAGE_CONFIG
|
||||||
return self
|
if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR:
|
||||||
|
self.npm_prefix = STORAGE_CONFIG.LIB_DIR / 'npm'
|
||||||
|
self.PATH = f'{STORAGE_CONFIG.LIB_DIR / "npm" / "node_modules" / ".bin"}:{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
|
||||||
|
|
||||||
|
super().setup()
|
||||||
|
|
||||||
|
|
||||||
SYS_NPM_BINPROVIDER = SystemNpmBinProvider()
|
SYS_NPM_BINPROVIDER = SystemNpmBinProvider()
|
||||||
|
|
|
@ -49,7 +49,15 @@ class LibPipBinProvider(PipProvider, BaseBinProvider):
|
||||||
name: BinProviderName = "lib_pip"
|
name: BinProviderName = "lib_pip"
|
||||||
INSTALLER_BIN: BinName = "pip"
|
INSTALLER_BIN: BinName = "pip"
|
||||||
|
|
||||||
pip_venv: Optional[Path] = CONSTANTS.LIB_PIP_DIR / 'venv'
|
pip_venv: Optional[Path] = CONSTANTS.DEFAULT_LIB_DIR / 'pip' / 'venv'
|
||||||
|
|
||||||
|
def setup(self) -> None:
|
||||||
|
# update paths from config if they arent the default
|
||||||
|
from archivebox.config.common import STORAGE_CONFIG
|
||||||
|
if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR:
|
||||||
|
self.pip_venv = STORAGE_CONFIG.LIB_DIR / 'pip' / 'venv'
|
||||||
|
|
||||||
|
super().setup()
|
||||||
|
|
||||||
SYS_PIP_BINPROVIDER = SystemPipBinProvider()
|
SYS_PIP_BINPROVIDER = SystemPipBinProvider()
|
||||||
PIPX_PIP_BINPROVIDER = SystemPipxBinProvider()
|
PIPX_PIP_BINPROVIDER = SystemPipxBinProvider()
|
||||||
|
|
|
@ -35,7 +35,7 @@ class PlaywrightBinProvider(BaseBinProvider):
|
||||||
name: BinProviderName = "playwright"
|
name: BinProviderName = "playwright"
|
||||||
INSTALLER_BIN: BinName = PLAYWRIGHT_BINARY.name
|
INSTALLER_BIN: BinName = PLAYWRIGHT_BINARY.name
|
||||||
|
|
||||||
PATH: PATHStr = f"{CONSTANTS.LIB_BIN_DIR}:{DEFAULT_ENV_PATH}"
|
PATH: PATHStr = f"{CONSTANTS.DEFAULT_LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}"
|
||||||
|
|
||||||
playwright_browsers_dir: Path = (
|
playwright_browsers_dir: Path = (
|
||||||
MACOS_PLAYWRIGHT_CACHE_DIR.expanduser()
|
MACOS_PLAYWRIGHT_CACHE_DIR.expanduser()
|
||||||
|
@ -56,6 +56,11 @@ class PlaywrightBinProvider(BaseBinProvider):
|
||||||
return PLAYWRIGHT_BINARY.load().abspath
|
return PLAYWRIGHT_BINARY.load().abspath
|
||||||
|
|
||||||
def setup(self) -> None:
|
def setup(self) -> None:
|
||||||
|
# update paths from config if they arent the default
|
||||||
|
from archivebox.config.common import STORAGE_CONFIG
|
||||||
|
if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR:
|
||||||
|
self.PATH = f"{STORAGE_CONFIG.LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}"
|
||||||
|
|
||||||
assert SYS_PIP_BINPROVIDER.INSTALLER_BIN_ABSPATH, "Pip bin provider not initialized"
|
assert SYS_PIP_BINPROVIDER.INSTALLER_BIN_ABSPATH, "Pip bin provider not initialized"
|
||||||
|
|
||||||
if self.playwright_browsers_dir:
|
if self.playwright_browsers_dir:
|
||||||
|
|
|
@ -23,19 +23,16 @@ from abx.archivebox.base_binary import BaseBinProvider
|
||||||
from plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER
|
from plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER
|
||||||
|
|
||||||
|
|
||||||
LIB_DIR_BROWSERS = CONSTANTS.LIB_BROWSERS_DIR
|
|
||||||
|
|
||||||
|
|
||||||
class PuppeteerBinProvider(BaseBinProvider):
|
class PuppeteerBinProvider(BaseBinProvider):
|
||||||
name: BinProviderName = "puppeteer"
|
name: BinProviderName = "puppeteer"
|
||||||
INSTALLER_BIN: BinName = "npx"
|
INSTALLER_BIN: BinName = "npx"
|
||||||
|
|
||||||
PATH: PATHStr = str(CONSTANTS.LIB_BIN_DIR)
|
PATH: PATHStr = str(CONSTANTS.DEFAULT_LIB_DIR / 'bin')
|
||||||
|
|
||||||
euid: Optional[int] = ARCHIVEBOX_USER
|
euid: Optional[int] = ARCHIVEBOX_USER
|
||||||
|
|
||||||
puppeteer_browsers_dir: Path = LIB_DIR_BROWSERS
|
puppeteer_browsers_dir: Path = CONSTANTS.DEFAULT_LIB_DIR / 'browsers'
|
||||||
puppeteer_install_args: List[str] = ['--yes', "@puppeteer/browsers", "install", "--path", str(LIB_DIR_BROWSERS)]
|
puppeteer_install_args: List[str] = ['--yes', "@puppeteer/browsers", "install"]
|
||||||
|
|
||||||
packages_handler: BinProviderOverrides = Field(default={
|
packages_handler: BinProviderOverrides = Field(default={
|
||||||
"chrome": lambda:
|
"chrome": lambda:
|
||||||
|
@ -45,6 +42,11 @@ class PuppeteerBinProvider(BaseBinProvider):
|
||||||
_browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
|
_browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
|
||||||
|
|
||||||
def setup(self) -> None:
|
def setup(self) -> None:
|
||||||
|
# update paths from config
|
||||||
|
from archivebox.config.common import STORAGE_CONFIG
|
||||||
|
self.puppeteer_browsers_dir = STORAGE_CONFIG.LIB_DIR / 'browsers'
|
||||||
|
self.PATH = str(STORAGE_CONFIG.LIB_DIR / 'bin')
|
||||||
|
|
||||||
assert SYS_NPM_BINPROVIDER.INSTALLER_BIN_ABSPATH, "NPM bin provider not initialized"
|
assert SYS_NPM_BINPROVIDER.INSTALLER_BIN_ABSPATH, "NPM bin provider not initialized"
|
||||||
|
|
||||||
if self.puppeteer_browsers_dir:
|
if self.puppeteer_browsers_dir:
|
||||||
|
@ -90,7 +92,7 @@ class PuppeteerBinProvider(BaseBinProvider):
|
||||||
|
|
||||||
# print(f'[*] {self.__class__.__name__}: Installing {bin_name}: {self.INSTALLER_BIN_ABSPATH} install {packages}')
|
# print(f'[*] {self.__class__.__name__}: Installing {bin_name}: {self.INSTALLER_BIN_ABSPATH} install {packages}')
|
||||||
|
|
||||||
install_args = [*self.puppeteer_install_args]
|
install_args = [*self.puppeteer_install_args, "--path", str(self.puppeteer_browsers_dir)]
|
||||||
|
|
||||||
proc = self.exec(bin_name=self.INSTALLER_BIN_ABSPATH, cmd=[*install_args, *packages])
|
proc = self.exec(bin_name=self.INSTALLER_BIN_ABSPATH, cmd=[*install_args, *packages])
|
||||||
|
|
||||||
|
|
|
@ -1,40 +0,0 @@
|
||||||
import tempfile
|
|
||||||
from pathlib import Path
|
|
||||||
from functools import cache
|
|
||||||
|
|
||||||
from archivebox.config import CONSTANTS
|
|
||||||
from archivebox.config.paths import get_collection_id
|
|
||||||
|
|
||||||
DATA_DIR = CONSTANTS.DATA_DIR
|
|
||||||
LOGS_DIR = CONSTANTS.LOGS_DIR
|
|
||||||
TMP_DIR = CONSTANTS.TMP_DIR
|
|
||||||
|
|
||||||
SUPERVISORD_CONFIG_FILE = TMP_DIR / "supervisord.conf"
|
|
||||||
PID_FILE = TMP_DIR / "supervisord.pid"
|
|
||||||
SOCK_FILE = TMP_DIR / "supervisord.sock"
|
|
||||||
LOG_FILE = TMP_DIR / "supervisord.log"
|
|
||||||
WORKERS_DIR = TMP_DIR / "workers"
|
|
||||||
|
|
||||||
@cache
|
|
||||||
def get_sock_file():
|
|
||||||
"""Get the path to the supervisord socket file, symlinking to a shorter path if needed due to unix path length limits"""
|
|
||||||
TMP_DIR.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
if len(f'file://{SOCK_FILE.absolute().resolve()}') > 98:
|
|
||||||
# socket absolute paths cannot be longer than 104 bytes on macos, and 108 bytes on linux
|
|
||||||
# symlink it to a shorter path and use that instead
|
|
||||||
|
|
||||||
# place the actual socket file in a shorter tmp dir
|
|
||||||
# /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/archivebox_supervisord_3d1e544e.sock
|
|
||||||
shorter_sock_file = Path(tempfile.gettempdir()) / f"archivebox_supervisord_{get_collection_id()}.sock"
|
|
||||||
|
|
||||||
# symlink ./data/tmp/<collection_id>/supervisord.sock -> /var/folders/qy/abc234235/T/archivebox_supervisord_3d1e544e.sock
|
|
||||||
# for convenience/consistency
|
|
||||||
symlink = SOCK_FILE
|
|
||||||
symlink.unlink(missing_ok=True)
|
|
||||||
symlink.symlink_to(shorter_sock_file)
|
|
||||||
|
|
||||||
assert len(f'file://{shorter_sock_file}') <= 98, f'Failed to create supervisord SOCK_FILE, system tmp dir location is too long {shorter_sock_file} (unix only allows 108 characters for socket paths)'
|
|
||||||
return shorter_sock_file
|
|
||||||
|
|
||||||
return SOCK_FILE
|
|
|
@ -1,23 +1,39 @@
|
||||||
__package__ = 'archivebox.queues'
|
__package__ = 'archivebox.queues'
|
||||||
|
|
||||||
|
import sys
|
||||||
import time
|
import time
|
||||||
import signal
|
import signal
|
||||||
import psutil
|
import psutil
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
|
from typing import Dict, cast, Iterator
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from functools import cache
|
||||||
|
|
||||||
from rich import print
|
from rich import print
|
||||||
|
|
||||||
from typing import Dict, cast
|
|
||||||
|
|
||||||
from supervisor.xmlrpc import SupervisorTransport
|
from supervisor.xmlrpc import SupervisorTransport
|
||||||
from xmlrpc.client import ServerProxy
|
from xmlrpc.client import ServerProxy
|
||||||
|
|
||||||
|
from archivebox.config import CONSTANTS
|
||||||
|
from archivebox.config.paths import get_or_create_working_tmp_dir
|
||||||
from archivebox.config.permissions import ARCHIVEBOX_USER
|
from archivebox.config.permissions import ARCHIVEBOX_USER
|
||||||
|
from archivebox.misc.logging import STDERR
|
||||||
|
from archivebox.logging_util import pretty_path
|
||||||
|
|
||||||
from .settings import SUPERVISORD_CONFIG_FILE, DATA_DIR, PID_FILE, get_sock_file, LOG_FILE, WORKERS_DIR, TMP_DIR, LOGS_DIR
|
LOG_FILE_NAME = "supervisord.log"
|
||||||
|
CONFIG_FILE_NAME = "supervisord.conf"
|
||||||
|
PID_FILE_NAME = "supervisord.pid"
|
||||||
|
WORKERS_DIR_NAME = "workers"
|
||||||
|
|
||||||
from typing import Iterator
|
@cache
|
||||||
|
def get_sock_file():
|
||||||
|
"""Get the path to the supervisord socket file, symlinking to a shorter path if needed due to unix path length limits"""
|
||||||
|
TMP_DIR = get_or_create_working_tmp_dir(autofix=True, quiet=False)
|
||||||
|
assert TMP_DIR, "Failed to find or create a writable TMP_DIR!"
|
||||||
|
socket_file = TMP_DIR / "supervisord.sock"
|
||||||
|
|
||||||
|
return socket_file
|
||||||
|
|
||||||
def follow(file, sleep_sec=0.1) -> Iterator[str]:
|
def follow(file, sleep_sec=0.1) -> Iterator[str]:
|
||||||
""" Yield each line from a file as they are written.
|
""" Yield each line from a file as they are written.
|
||||||
|
@ -35,24 +51,30 @@ def follow(file, sleep_sec=0.1) -> Iterator[str]:
|
||||||
|
|
||||||
|
|
||||||
def create_supervisord_config():
|
def create_supervisord_config():
|
||||||
|
SOCK_FILE = get_sock_file()
|
||||||
|
WORKERS_DIR = SOCK_FILE.parent / WORKERS_DIR_NAME
|
||||||
|
CONFIG_FILE = SOCK_FILE.parent / CONFIG_FILE_NAME
|
||||||
|
PID_FILE = SOCK_FILE.parent / PID_FILE_NAME
|
||||||
|
LOG_FILE = CONSTANTS.LOGS_DIR / LOG_FILE_NAME
|
||||||
|
|
||||||
config_content = f"""
|
config_content = f"""
|
||||||
[supervisord]
|
[supervisord]
|
||||||
nodaemon = true
|
nodaemon = true
|
||||||
environment = IS_SUPERVISORD_PARENT="true"
|
environment = IS_SUPERVISORD_PARENT="true"
|
||||||
pidfile = {TMP_DIR}/{PID_FILE.name}
|
pidfile = {PID_FILE}
|
||||||
logfile = {LOGS_DIR}/{LOG_FILE.name}
|
logfile = {LOG_FILE}
|
||||||
childlogdir = {LOGS_DIR}
|
childlogdir = {CONSTANTS.LOGS_DIR}
|
||||||
directory = {DATA_DIR}
|
directory = {CONSTANTS.DATA_DIR}
|
||||||
strip_ansi = true
|
strip_ansi = true
|
||||||
nocleanup = true
|
nocleanup = true
|
||||||
user = {ARCHIVEBOX_USER}
|
user = {ARCHIVEBOX_USER}
|
||||||
|
|
||||||
[unix_http_server]
|
[unix_http_server]
|
||||||
file = {get_sock_file()}
|
file = {SOCK_FILE}
|
||||||
chmod = 0700
|
chmod = 0700
|
||||||
|
|
||||||
[supervisorctl]
|
[supervisorctl]
|
||||||
serverurl = unix://{get_sock_file()}
|
serverurl = unix://{SOCK_FILE}
|
||||||
|
|
||||||
[rpcinterface:supervisor]
|
[rpcinterface:supervisor]
|
||||||
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
|
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
|
||||||
|
@ -61,9 +83,14 @@ supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
|
||||||
files = {WORKERS_DIR}/*.conf
|
files = {WORKERS_DIR}/*.conf
|
||||||
|
|
||||||
"""
|
"""
|
||||||
SUPERVISORD_CONFIG_FILE.write_text(config_content)
|
CONFIG_FILE.write_text(config_content)
|
||||||
|
Path.mkdir(WORKERS_DIR, exist_ok=True)
|
||||||
|
(WORKERS_DIR / 'initial_startup.conf').write_text('') # hides error about "no files found to include" when supervisord starts
|
||||||
|
|
||||||
def create_worker_config(daemon):
|
def create_worker_config(daemon):
|
||||||
|
SOCK_FILE = get_sock_file()
|
||||||
|
WORKERS_DIR = SOCK_FILE.parent / WORKERS_DIR_NAME
|
||||||
|
|
||||||
Path.mkdir(WORKERS_DIR, exist_ok=True)
|
Path.mkdir(WORKERS_DIR, exist_ok=True)
|
||||||
|
|
||||||
name = daemon['name']
|
name = daemon['name']
|
||||||
|
@ -80,13 +107,14 @@ def create_worker_config(daemon):
|
||||||
|
|
||||||
|
|
||||||
def get_existing_supervisord_process():
|
def get_existing_supervisord_process():
|
||||||
|
SOCK_FILE = get_sock_file()
|
||||||
try:
|
try:
|
||||||
transport = SupervisorTransport(None, None, f"unix://{get_sock_file()}")
|
transport = SupervisorTransport(None, None, f"unix://{SOCK_FILE}")
|
||||||
server = ServerProxy("http://localhost", transport=transport)
|
server = ServerProxy("http://localhost", transport=transport)
|
||||||
current_state = cast(Dict[str, int | str], server.supervisor.getState())
|
current_state = cast(Dict[str, int | str], server.supervisor.getState())
|
||||||
if current_state["statename"] == "RUNNING":
|
if current_state["statename"] == "RUNNING":
|
||||||
pid = server.supervisor.getPID()
|
pid = server.supervisor.getPID()
|
||||||
print(f"[🦸♂️] Supervisord connected (pid={pid}) via unix://{str(get_sock_file()).replace(str(DATA_DIR), '.')}.")
|
print(f"[🦸♂️] Supervisord connected (pid={pid}) via unix://{pretty_path(SOCK_FILE)}.")
|
||||||
return server.supervisor
|
return server.supervisor
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
return None
|
return None
|
||||||
|
@ -95,58 +123,83 @@ def get_existing_supervisord_process():
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def stop_existing_supervisord_process():
|
def stop_existing_supervisord_process():
|
||||||
|
SOCK_FILE = get_sock_file()
|
||||||
|
PID_FILE = SOCK_FILE.parent / PID_FILE_NAME
|
||||||
|
|
||||||
try:
|
try:
|
||||||
pid = int(PID_FILE.read_text())
|
try:
|
||||||
except FileNotFoundError:
|
pid = int(PID_FILE.read_text())
|
||||||
return
|
except (FileNotFoundError, ValueError):
|
||||||
except ValueError:
|
return
|
||||||
PID_FILE.unlink()
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
print(f"[🦸♂️] Stopping supervisord process (pid={pid})...")
|
print(f"[🦸♂️] Stopping supervisord process (pid={pid})...")
|
||||||
proc = psutil.Process(pid)
|
proc = psutil.Process(pid)
|
||||||
proc.terminate()
|
proc.terminate()
|
||||||
proc.wait()
|
proc.wait()
|
||||||
except Exception:
|
except (Exception, BrokenPipeError, IOError):
|
||||||
pass
|
pass
|
||||||
try:
|
finally:
|
||||||
PID_FILE.unlink()
|
try:
|
||||||
except FileNotFoundError:
|
# clear PID file and socket file
|
||||||
pass
|
PID_FILE.unlink(missing_ok=True)
|
||||||
|
get_sock_file().unlink(missing_ok=True)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
def start_new_supervisord_process(daemonize=False):
|
def start_new_supervisord_process(daemonize=False):
|
||||||
|
SOCK_FILE = get_sock_file()
|
||||||
|
WORKERS_DIR = SOCK_FILE.parent / WORKERS_DIR_NAME
|
||||||
|
LOG_FILE = CONSTANTS.LOGS_DIR / LOG_FILE_NAME
|
||||||
|
CONFIG_FILE = SOCK_FILE.parent / CONFIG_FILE_NAME
|
||||||
|
PID_FILE = SOCK_FILE.parent / PID_FILE_NAME
|
||||||
|
|
||||||
print(f"[🦸♂️] Supervisord starting{' in background' if daemonize else ''}...")
|
print(f"[🦸♂️] Supervisord starting{' in background' if daemonize else ''}...")
|
||||||
# Create a config file in the current working directory
|
pretty_log_path = pretty_path(LOG_FILE)
|
||||||
|
print(f" > Writing supervisord logs to: {pretty_log_path}")
|
||||||
|
print(f" > Writing task worker logs to: {pretty_log_path.replace('supervisord.log', 'worker_*.log')}")
|
||||||
|
print(f' > Using supervisord config file: {pretty_path(CONFIG_FILE)}')
|
||||||
|
print(f" > Using supervisord UNIX socket: {pretty_path(SOCK_FILE)}")
|
||||||
|
print()
|
||||||
|
|
||||||
# clear out existing stale state files
|
# clear out existing stale state files
|
||||||
shutil.rmtree(WORKERS_DIR, ignore_errors=True)
|
shutil.rmtree(WORKERS_DIR, ignore_errors=True)
|
||||||
PID_FILE.unlink(missing_ok=True)
|
PID_FILE.unlink(missing_ok=True)
|
||||||
get_sock_file().unlink(missing_ok=True)
|
get_sock_file().unlink(missing_ok=True)
|
||||||
SUPERVISORD_CONFIG_FILE.unlink(missing_ok=True)
|
CONFIG_FILE.unlink(missing_ok=True)
|
||||||
|
|
||||||
|
# create the supervisord config file
|
||||||
create_supervisord_config()
|
create_supervisord_config()
|
||||||
|
|
||||||
# Start supervisord
|
# Start supervisord
|
||||||
|
# panel = Panel(f"Starting supervisord with config: {SUPERVISORD_CONFIG_FILE}")
|
||||||
|
# with Live(panel, refresh_per_second=1) as live:
|
||||||
|
|
||||||
subprocess.Popen(
|
subprocess.Popen(
|
||||||
f"supervisord --configuration={SUPERVISORD_CONFIG_FILE}",
|
f"supervisord --configuration={CONFIG_FILE}",
|
||||||
stdin=None,
|
stdin=None,
|
||||||
shell=True,
|
shell=True,
|
||||||
start_new_session=daemonize,
|
start_new_session=daemonize,
|
||||||
)
|
)
|
||||||
|
|
||||||
def exit_signal_handler(signum, frame):
|
def exit_signal_handler(signum, frame):
|
||||||
if signum != 13:
|
if signum == 2:
|
||||||
print(f"\n[🦸♂️] Supervisord got stop signal ({signal.strsignal(signum)}). Terminating child processes...")
|
STDERR.print("\n[🛑] Got Ctrl+C. Terminating child processes...")
|
||||||
|
elif signum != 13:
|
||||||
|
STDERR.print(f"\n[🦸♂️] Supervisord got stop signal ({signal.strsignal(signum)}). Terminating child processes...")
|
||||||
stop_existing_supervisord_process()
|
stop_existing_supervisord_process()
|
||||||
raise SystemExit(0)
|
raise SystemExit(0)
|
||||||
|
|
||||||
# Monitor for termination signals and cleanup child processes
|
# Monitor for termination signals and cleanup child processes
|
||||||
if not daemonize:
|
if not daemonize:
|
||||||
signal.signal(signal.SIGINT, exit_signal_handler)
|
try:
|
||||||
signal.signal(signal.SIGHUP, exit_signal_handler)
|
signal.signal(signal.SIGINT, exit_signal_handler)
|
||||||
signal.signal(signal.SIGPIPE, exit_signal_handler)
|
signal.signal(signal.SIGHUP, exit_signal_handler)
|
||||||
signal.signal(signal.SIGTERM, exit_signal_handler)
|
signal.signal(signal.SIGPIPE, exit_signal_handler)
|
||||||
|
signal.signal(signal.SIGTERM, exit_signal_handler)
|
||||||
|
except Exception:
|
||||||
|
# signal handlers only work in main thread
|
||||||
|
pass
|
||||||
# otherwise supervisord will containue in background even if parent proc is ends (aka daemon mode)
|
# otherwise supervisord will containue in background even if parent proc is ends (aka daemon mode)
|
||||||
|
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
@ -154,14 +207,32 @@ def start_new_supervisord_process(daemonize=False):
|
||||||
return get_existing_supervisord_process()
|
return get_existing_supervisord_process()
|
||||||
|
|
||||||
def get_or_create_supervisord_process(daemonize=False):
|
def get_or_create_supervisord_process(daemonize=False):
|
||||||
|
SOCK_FILE = get_sock_file()
|
||||||
|
WORKERS_DIR = SOCK_FILE.parent / WORKERS_DIR_NAME
|
||||||
|
|
||||||
supervisor = get_existing_supervisord_process()
|
supervisor = get_existing_supervisord_process()
|
||||||
if supervisor is None:
|
if supervisor is None:
|
||||||
stop_existing_supervisord_process()
|
stop_existing_supervisord_process()
|
||||||
supervisor = start_new_supervisord_process(daemonize=daemonize)
|
supervisor = start_new_supervisord_process(daemonize=daemonize)
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
# wait up to 5s in case supervisord is slow to start
|
||||||
|
if not supervisor:
|
||||||
|
for _ in range(10):
|
||||||
|
if supervisor is not None:
|
||||||
|
print()
|
||||||
|
break
|
||||||
|
sys.stdout.write('.')
|
||||||
|
sys.stdout.flush()
|
||||||
|
time.sleep(0.5)
|
||||||
|
supervisor = get_existing_supervisord_process()
|
||||||
|
else:
|
||||||
|
print()
|
||||||
|
|
||||||
assert supervisor, "Failed to start supervisord or connect to it!"
|
assert supervisor, "Failed to start supervisord or connect to it!"
|
||||||
supervisor.getPID() # make sure it doesn't throw an exception
|
supervisor.getPID() # make sure it doesn't throw an exception
|
||||||
|
|
||||||
|
(WORKERS_DIR / 'initial_startup.conf').unlink(missing_ok=True)
|
||||||
|
|
||||||
return supervisor
|
return supervisor
|
||||||
|
|
||||||
|
@ -242,9 +313,9 @@ def tail_worker_logs(log_path: str):
|
||||||
for line in follow(f):
|
for line in follow(f):
|
||||||
if '://' in line:
|
if '://' in line:
|
||||||
live.console.print(f"Working on: {line.strip()}")
|
live.console.print(f"Working on: {line.strip()}")
|
||||||
table.add_row("123124234", line.strip())
|
# table.add_row("123124234", line.strip())
|
||||||
except KeyboardInterrupt:
|
except (KeyboardInterrupt, BrokenPipeError, IOError):
|
||||||
print("\n[🛑] Got Ctrl+C, stopping gracefully...")
|
STDERR.print("\n[🛑] Got Ctrl+C, stopping gracefully...")
|
||||||
except SystemExit:
|
except SystemExit:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -321,12 +392,12 @@ def start_server_workers(host='0.0.0.0', port='8000', daemonize=False):
|
||||||
if not daemonize:
|
if not daemonize:
|
||||||
try:
|
try:
|
||||||
watch_worker(supervisor, "worker_daphne")
|
watch_worker(supervisor, "worker_daphne")
|
||||||
except KeyboardInterrupt:
|
except (KeyboardInterrupt, BrokenPipeError, IOError):
|
||||||
print("\n[🛑] Got Ctrl+C, stopping gracefully...")
|
STDERR.print("\n[🛑] Got Ctrl+C, stopping gracefully...")
|
||||||
except SystemExit:
|
except SystemExit:
|
||||||
pass
|
pass
|
||||||
except BaseException as e:
|
except BaseException as e:
|
||||||
print(f"\n[🛑] Got {e.__class__.__name__} exception, stopping web server gracefully...")
|
STDERR.print(f"\n[🛑] Got {e.__class__.__name__} exception, stopping web server gracefully...")
|
||||||
raise
|
raise
|
||||||
finally:
|
finally:
|
||||||
stop_worker(supervisor, "worker_daphne")
|
stop_worker(supervisor, "worker_daphne")
|
||||||
|
@ -350,12 +421,12 @@ def start_cli_workers(watch=False):
|
||||||
if watch:
|
if watch:
|
||||||
try:
|
try:
|
||||||
watch_worker(supervisor, "worker_system_tasks")
|
watch_worker(supervisor, "worker_system_tasks")
|
||||||
except KeyboardInterrupt:
|
except (KeyboardInterrupt, BrokenPipeError, IOError):
|
||||||
print("\n[🛑] Got Ctrl+C, stopping gracefully...")
|
STDERR.print("\n[🛑] Got Ctrl+C, stopping gracefully...")
|
||||||
except SystemExit:
|
except SystemExit:
|
||||||
pass
|
pass
|
||||||
except BaseException as e:
|
except BaseException as e:
|
||||||
print(f"\n[🛑] Got {e.__class__.__name__} exception, stopping web server gracefully...")
|
STDERR.print(f"\n[🛑] Got {e.__class__.__name__} exception, stopping web server gracefully...")
|
||||||
raise
|
raise
|
||||||
finally:
|
finally:
|
||||||
stop_worker(supervisor, "worker_system_tasks")
|
stop_worker(supervisor, "worker_system_tasks")
|
||||||
|
|
Loading…
Reference in a new issue