mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-23 04:33:11 +00:00
improve config loading of TMP_DIR, LIB_DIR, move to separate files
This commit is contained in:
parent
7a895d9285
commit
cf1ea8f80f
49 changed files with 767 additions and 527 deletions
14
Dockerfile
14
Dockerfile
|
@ -287,22 +287,12 @@ WORKDIR "$DATA_DIR"
|
|||
RUN openssl rand -hex 16 > /etc/machine-id \
|
||||
&& chown -R "$DEFAULT_PUID:$DEFAULT_PGID" "/tmp"
|
||||
ENV IN_DOCKER=True \
|
||||
SYSTEM_LIB_DIR=/app/lib \
|
||||
SYSTEM_TMP_DIR=/tmp \
|
||||
SYSTEM_LIB_DIR=/usr/share/archivebox \
|
||||
SYSTEM_TMP_DIR=/tmp/archivebox \
|
||||
GOOGLE_API_KEY=no \
|
||||
GOOGLE_DEFAULT_CLIENT_ID=no \
|
||||
GOOGLE_DEFAULT_CLIENT_SECRET=no \
|
||||
ALLOWED_HOSTS=*
|
||||
## No need to set explicitly, these values will be autodetected by archivebox in docker:
|
||||
# WGET_BINARY="wget" \
|
||||
# YOUTUBEDL_BINARY="yt-dlp" \
|
||||
# CHROME_BINARY="/usr/bin/chromium-browser" \
|
||||
# USE_SINGLEFILE=True \
|
||||
# SINGLEFILE_BINARY="$NODE_MODULES/.bin/single-file" \
|
||||
# USE_READABILITY=True \
|
||||
# READABILITY_BINARY="$NODE_MODULES/.bin/readability-extractor" \
|
||||
# USE_MERCURY=True \
|
||||
# MERCURY_BINARY="$NODE_MODULES/.bin/postlight-parser"
|
||||
|
||||
# Print version for nice docker finish summary
|
||||
RUN (echo -e "\n\n[√] Finished Docker build succesfully. Saving build summary in: /VERSION.txt" \
|
||||
|
|
|
@ -13,7 +13,7 @@ __package__ = 'archivebox'
|
|||
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
ASCII_LOGO = """
|
||||
|
@ -25,37 +25,36 @@ ASCII_LOGO = """
|
|||
╚═╝ ╚═╝╚═╝ ╚═╝ ╚═════╝╚═╝ ╚═╝╚═╝ ╚═══╝ ╚══════╝ ╚═════╝ ╚═════╝ ╚═╝ ╚═╝
|
||||
"""
|
||||
|
||||
SYSTEM_TMP_DIR = Path(tempfile.gettempdir()) / 'archivebox'
|
||||
SYSTEM_TMP_DIR.mkdir(parents=True, exist_ok=True)
|
||||
os.environ['SYSTEM_TMP_DIR'] = str(SYSTEM_TMP_DIR)
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'core.settings'
|
||||
# detect ArchiveBox user's UID/GID based on data dir ownership
|
||||
from archivebox.config.permissions import drop_privileges # noqa
|
||||
drop_privileges()
|
||||
|
||||
# if we are outside a data dir, cd into an ephemeral tmp dir so that
|
||||
# we can run version/help without polluting cwd with an index.sqlite3
|
||||
if len(sys.argv) > 1 and sys.argv[1] in ('version', 'help'):
|
||||
current_dir = Path(os.getcwd()).resolve()
|
||||
if not (current_dir / 'index.sqlite3').exists():
|
||||
os.chdir(SYSTEM_TMP_DIR)
|
||||
from archivebox.misc.checks import check_not_root, check_io_encoding # noqa
|
||||
check_not_root()
|
||||
check_io_encoding()
|
||||
|
||||
# make sure PACKAGE_DIR is in sys.path so we can import all subfolders
|
||||
# without necessarily waiting for django to load them thorugh INSTALLED_APPS
|
||||
PACKAGE_DIR = Path(__file__).resolve().parent
|
||||
if str(PACKAGE_DIR) not in sys.path:
|
||||
sys.path.append(str(PACKAGE_DIR))
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'core.settings'
|
||||
|
||||
|
||||
# print('INSTALLING MONKEY PATCHES')
|
||||
from .monkey_patches import * # noqa
|
||||
from archivebox.monkey_patches import * # noqa
|
||||
# print('DONE INSTALLING MONKEY PATCHES')
|
||||
|
||||
|
||||
# print('LOADING VENDORED LIBRARIES')
|
||||
from .vendor import load_vendored_libs # noqa
|
||||
from archivebox.vendor import load_vendored_libs # noqa
|
||||
load_vendored_libs()
|
||||
# print('DONE LOADING VENDORED LIBRARIES')
|
||||
|
||||
|
||||
from .config.constants import CONSTANTS, DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, VERSION # noqa
|
||||
from archivebox.config.constants import CONSTANTS # noqa
|
||||
from archivebox.config.paths import PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa
|
||||
from archivebox.config.version import VERSION # noqa
|
||||
|
||||
__version__ = VERSION
|
||||
__author__ = 'Nick Sweeting'
|
||||
|
|
|
@ -12,12 +12,13 @@ from ninja import NinjaAPI, Swagger
|
|||
|
||||
# TODO: explore adding https://eadwincode.github.io/django-ninja-extra/
|
||||
|
||||
from archivebox.config import SHELL_CONFIG, VERSION
|
||||
from archivebox.config import VERSION
|
||||
from archivebox.config.version import get_COMMIT_HASH
|
||||
|
||||
from api.auth import API_AUTH_METHODS
|
||||
|
||||
|
||||
COMMIT_HASH = SHELL_CONFIG.COMMIT_HASH or 'unknown'
|
||||
COMMIT_HASH = get_COMMIT_HASH() or 'unknown'
|
||||
|
||||
html_description=f'''
|
||||
<h3>Welcome to your ArchiveBox server's REST API <code>[v1 ALPHA]</code> homepage!</h3>
|
||||
|
|
|
@ -13,7 +13,7 @@ from ..main import (
|
|||
schedule,
|
||||
)
|
||||
from archivebox.misc.util import ansi_to_html
|
||||
from archivebox.config import ARCHIVING_CONFIG
|
||||
from archivebox.config.common import ARCHIVING_CONFIG
|
||||
|
||||
|
||||
from .auth import API_AUTH_METHODS
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
__package__ = 'archivebox.cli'
|
||||
__command__ = 'archivebox'
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import threading
|
||||
|
@ -25,6 +26,10 @@ if len(sys.argv) > 1 and sys.argv[1] == 'setup':
|
|||
print(':warning: [bold red]DEPRECATED[/bold red] `archivebox setup` is deprecated, use `archivebox install` instead')
|
||||
sys.argv[1] = 'install'
|
||||
|
||||
if '--debug' in sys.argv:
|
||||
os.environ['DEBUG'] = 'True'
|
||||
sys.argv.remove('--debug')
|
||||
|
||||
|
||||
# def list_subcommands() -> Dict[str, str]:
|
||||
# """find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
|
||||
|
@ -50,8 +55,8 @@ SUBCOMMAND_MODULES = {
|
|||
|
||||
'init': 'archivebox_init',
|
||||
'install': 'archivebox_install',
|
||||
##############################################
|
||||
'config': 'archivebox_config',
|
||||
|
||||
'add': 'archivebox_add',
|
||||
'remove': 'archivebox_remove',
|
||||
'update': 'archivebox_update',
|
||||
|
@ -63,7 +68,7 @@ SUBCOMMAND_MODULES = {
|
|||
'shell': 'archivebox_shell',
|
||||
'manage': 'archivebox_manage',
|
||||
|
||||
'oneshot': 'archivebox_oneshot',
|
||||
# 'oneshot': 'archivebox_oneshot',
|
||||
}
|
||||
|
||||
# every imported command module must have these properties in order to be valid
|
||||
|
@ -102,11 +107,11 @@ CLI_SUBCOMMANDS = LazySubcommands()
|
|||
|
||||
# these common commands will appear sorted before any others for ease-of-use
|
||||
meta_cmds = ('help', 'version') # dont require valid data folder at all
|
||||
main_cmds = ('init', 'config', 'setup', 'install') # dont require existing db present
|
||||
archive_cmds = ('add', 'remove', 'update', 'list', 'status') # require existing db present
|
||||
setup_cmds = ('init', 'setup', 'install') # require valid data folder, but dont require DB present in it yet
|
||||
archive_cmds = ('add', 'remove', 'update', 'list', 'status', 'schedule', 'server', 'shell', 'manage') # require valid data folder + existing db present
|
||||
fake_db = ("oneshot",) # use fake in-memory db
|
||||
|
||||
display_first = (*meta_cmds, *main_cmds, *archive_cmds)
|
||||
display_first = (*meta_cmds, *setup_cmds, *archive_cmds)
|
||||
|
||||
|
||||
IGNORED_BG_THREADS = ('MainThread', 'ThreadPoolExecutor', 'IPythonHistorySavingThread', 'Scheduler') # threads we dont have to wait for before exiting
|
||||
|
@ -157,14 +162,16 @@ def run_subcommand(subcommand: str,
|
|||
from archivebox.config.legacy import setup_django
|
||||
|
||||
# print('DATA_DIR is', DATA_DIR)
|
||||
# print('pwd is', os.getcwd())
|
||||
# print('pwd is', os.getcwd())
|
||||
|
||||
cmd_requires_db = subcommand in archive_cmds
|
||||
init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
|
||||
|
||||
setup_django(in_memory_db=subcommand in fake_db, check_db=cmd_requires_db and not init_pending)
|
||||
check_db = cmd_requires_db and not init_pending
|
||||
|
||||
if subcommand not in meta_cmds:
|
||||
setup_django(in_memory_db=subcommand in fake_db, check_db=check_db)
|
||||
|
||||
if subcommand in archive_cmds:
|
||||
if cmd_requires_db:
|
||||
check_migrations()
|
||||
|
||||
|
|
|
@ -9,7 +9,8 @@ import argparse
|
|||
from typing import List, Optional, IO
|
||||
|
||||
from archivebox.misc.util import docstring
|
||||
from archivebox.config import DATA_DIR, ARCHIVING_CONFIG
|
||||
from archivebox.config import DATA_DIR
|
||||
from archivebox.config.common import ARCHIVING_CONFIG
|
||||
|
||||
from ..main import add
|
||||
from ..parsers import PARSERS
|
||||
|
|
|
@ -9,7 +9,8 @@ from pathlib import Path
|
|||
from typing import Optional, List, IO
|
||||
|
||||
from archivebox.misc.util import docstring
|
||||
from archivebox.config import DATA_DIR, SERVER_CONFIG
|
||||
from archivebox.config import DATA_DIR
|
||||
from archivebox.config.common import SERVER_CONFIG
|
||||
from ..logging_util import SmartFormatter, reject_stdin
|
||||
from ..main import server
|
||||
|
||||
|
|
|
@ -1,27 +1,9 @@
|
|||
__package__ = 'archivebox.config'
|
||||
|
||||
from .constants import CONSTANTS, CONSTANTS_CONFIG, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR, VERSION
|
||||
from .defaults import (
|
||||
SHELL_CONFIG,
|
||||
STORAGE_CONFIG,
|
||||
GENERAL_CONFIG,
|
||||
SERVER_CONFIG,
|
||||
ARCHIVING_CONFIG,
|
||||
SEARCH_BACKEND_CONFIG,
|
||||
from .paths import (
|
||||
PACKAGE_DIR, # noqa
|
||||
DATA_DIR, # noqa
|
||||
ARCHIVE_DIR, # noqa
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
'CONSTANTS',
|
||||
'PACKAGE_DIR',
|
||||
'DATA_DIR',
|
||||
'ARCHIVE_DIR',
|
||||
'VERSION',
|
||||
'SHELL_CONFIG',
|
||||
'STORAGE_CONFIG',
|
||||
'GENERAL_CONFIG',
|
||||
'SERVER_CONFIG',
|
||||
'ARCHIVING_CONFIG',
|
||||
'SEARCH_BACKEND_CONFIG',
|
||||
'CONSTANTS_CONFIG',
|
||||
]
|
||||
from .constants import CONSTANTS, CONSTANTS_CONFIG # noqa
|
||||
from .version import VERSION # noqa
|
||||
|
|
|
@ -8,7 +8,7 @@ from abx.archivebox.base_hook import BaseHook
|
|||
|
||||
|
||||
from .constants import CONSTANTS, CONSTANTS_CONFIG, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa
|
||||
from .defaults import (
|
||||
from .common import (
|
||||
ShellConfig, # noqa: F401
|
||||
StorageConfig, # noqa: F401
|
||||
GeneralConfig, # noqa: F401
|
||||
|
|
|
@ -1,47 +0,0 @@
|
|||
# def get_versions_available_on_github(config):
|
||||
# """
|
||||
# returns a dictionary containing the ArchiveBox GitHub release info for
|
||||
# the recommended upgrade version and the currently installed version
|
||||
# """
|
||||
|
||||
# # we only want to perform the (relatively expensive) check for new versions
|
||||
# # when its most relevant, e.g. when the user runs a long-running command
|
||||
# subcommand_run_by_user = sys.argv[3] if len(sys.argv) > 3 else 'help'
|
||||
# long_running_commands = ('add', 'schedule', 'update', 'status', 'server')
|
||||
# if subcommand_run_by_user not in long_running_commands:
|
||||
# return None
|
||||
|
||||
# github_releases_api = "https://api.github.com/repos/ArchiveBox/ArchiveBox/releases"
|
||||
# response = requests.get(github_releases_api)
|
||||
# if response.status_code != 200:
|
||||
# stderr(f'[!] Warning: GitHub API call to check for new ArchiveBox version failed! (status={response.status_code})', color='lightyellow', config=config)
|
||||
# return None
|
||||
# all_releases = response.json()
|
||||
|
||||
# installed_version = parse_version_string(config['VERSION'])
|
||||
|
||||
# # find current version or nearest older version (to link to)
|
||||
# current_version = None
|
||||
# for idx, release in enumerate(all_releases):
|
||||
# release_version = parse_version_string(release['tag_name'])
|
||||
# if release_version <= installed_version:
|
||||
# current_version = release
|
||||
# break
|
||||
|
||||
# current_version = current_version or all_releases[-1]
|
||||
|
||||
# # recommended version is whatever comes after current_version in the release list
|
||||
# # (perhaps too conservative to only recommend upgrading one version at a time, but it's safest)
|
||||
# try:
|
||||
# recommended_version = all_releases[idx+1]
|
||||
# except IndexError:
|
||||
# recommended_version = None
|
||||
|
||||
# return {'recommended_version': recommended_version, 'current_version': current_version}
|
||||
|
||||
# def can_upgrade(config):
|
||||
# if config['VERSIONS_AVAILABLE'] and config['VERSIONS_AVAILABLE']['recommended_version']:
|
||||
# recommended_version = parse_version_string(config['VERSIONS_AVAILABLE']['recommended_version']['tag_name'])
|
||||
# current_version = parse_version_string(config['VERSIONS_AVAILABLE']['current_version']['tag_name'])
|
||||
# return recommended_version > current_version
|
||||
# return False
|
|
@ -1,21 +1,21 @@
|
|||
__package__ = 'archivebox.config'
|
||||
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
|
||||
from typing import Dict, Optional
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from rich import print
|
||||
from pydantic import Field, field_validator, model_validator, computed_field
|
||||
from pydantic import Field, field_validator, computed_field
|
||||
from django.utils.crypto import get_random_string
|
||||
|
||||
from abx.archivebox.base_configset import BaseConfigSet
|
||||
|
||||
|
||||
from .constants import CONSTANTS, PACKAGE_DIR
|
||||
from .constants import CONSTANTS
|
||||
from .version import get_COMMIT_HASH, get_BUILD_TIME
|
||||
from .permissions import IN_DOCKER
|
||||
|
||||
###################### Config ##########################
|
||||
|
||||
|
@ -27,14 +27,8 @@ class ShellConfig(BaseConfigSet):
|
|||
USE_COLOR: bool = Field(default=lambda c: c.IS_TTY)
|
||||
SHOW_PROGRESS: bool = Field(default=lambda c: c.IS_TTY)
|
||||
|
||||
IN_DOCKER: bool = Field(default=False)
|
||||
IN_DOCKER: bool = Field(default=IN_DOCKER)
|
||||
IN_QEMU: bool = Field(default=False)
|
||||
|
||||
USER: str = Field(default=Path('~').expanduser().resolve().name)
|
||||
PUID: int = Field(default=os.getuid())
|
||||
PGID: int = Field(default=os.getgid())
|
||||
|
||||
PYTHON_ENCODING: str = Field(default=(sys.__stdout__ or sys.stdout or sys.__stderr__ or sys.stderr).encoding.upper().replace('UTF8', 'UTF-8'))
|
||||
|
||||
ANSI: Dict[str, str] = Field(default=lambda c: CONSTANTS.DEFAULT_CLI_COLORS if c.USE_COLOR else CONSTANTS.DISABLED_CLI_COLORS)
|
||||
|
||||
|
@ -52,63 +46,12 @@ class ShellConfig(BaseConfigSet):
|
|||
@computed_field
|
||||
@property
|
||||
def COMMIT_HASH(self) -> Optional[str]:
|
||||
try:
|
||||
git_dir = PACKAGE_DIR / '../.git'
|
||||
ref = (git_dir / 'HEAD').read_text().strip().split(' ')[-1]
|
||||
commit_hash = git_dir.joinpath(ref).read_text().strip()
|
||||
return commit_hash
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
return list((PACKAGE_DIR / '../.git/refs/heads/').glob('*'))[0].read_text().strip()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
return get_COMMIT_HASH()
|
||||
|
||||
@computed_field
|
||||
@property
|
||||
def BUILD_TIME(self) -> str:
|
||||
if self.IN_DOCKER:
|
||||
docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0]
|
||||
return docker_build_end_time
|
||||
|
||||
src_last_modified_unix_timestamp = (PACKAGE_DIR / 'README.md').stat().st_mtime
|
||||
return datetime.fromtimestamp(src_last_modified_unix_timestamp).strftime('%Y-%m-%d %H:%M:%S %s')
|
||||
|
||||
|
||||
@model_validator(mode='after')
|
||||
def validate_not_running_as_root(self):
|
||||
attempted_command = ' '.join(sys.argv[:3])
|
||||
if self.PUID == 0 and attempted_command not in ('setup', 'install'):
|
||||
# stderr('[!] ArchiveBox should never be run as root!', color='red')
|
||||
# stderr(' For more information, see the security overview documentation:')
|
||||
# stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root')
|
||||
print('[red][!] ArchiveBox should never be run as root![/red]', file=sys.stderr)
|
||||
print(' For more information, see the security overview documentation:', file=sys.stderr)
|
||||
print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root', file=sys.stderr)
|
||||
|
||||
if self.IN_DOCKER:
|
||||
print('[red][!] When using Docker, you must run commands with [green]docker run[/green] instead of [yellow3]docker exec[/yellow3], e.g.:', file=sys.stderr)
|
||||
print(' docker compose run archivebox {attempted_command}', file=sys.stderr)
|
||||
print(f' docker run -it -v $PWD/data:/data archivebox/archivebox {attempted_command}', file=sys.stderr)
|
||||
print(' or:', file=sys.stderr)
|
||||
print(f' docker compose exec --user=archivebox archivebox /bin/bash -c "archivebox {attempted_command}"', file=sys.stderr)
|
||||
print(f' docker exec -it --user=archivebox <container id> /bin/bash -c "archivebox {attempted_command}"', file=sys.stderr)
|
||||
raise SystemExit(2)
|
||||
|
||||
# check python locale
|
||||
if self.PYTHON_ENCODING != 'UTF-8':
|
||||
print(f'[red][X] Your system is running python3 scripts with a bad locale setting: {self.PYTHON_ENCODING} (it should be UTF-8).[/red]', file=sys.stderr)
|
||||
print(' To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)', file=sys.stderr)
|
||||
print(' Or if you\'re using ubuntu/debian, run "dpkg-reconfigure locales"', file=sys.stderr)
|
||||
print('')
|
||||
print(' Confirm that it\'s fixed by opening a new shell and running:', file=sys.stderr)
|
||||
print(' python3 -c "import sys; print(sys.stdout.encoding)" # should output UTF-8', file=sys.stderr)
|
||||
raise SystemExit(2)
|
||||
|
||||
return self
|
||||
return get_BUILD_TIME()
|
||||
|
||||
SHELL_CONFIG = ShellConfig()
|
||||
|
|
@ -1,115 +0,0 @@
|
|||
from pathlib import Path
|
||||
from typing import Optional, Dict, Union, Tuple, Callable, Pattern, Type, Any, List
|
||||
from mypy_extensions import TypedDict
|
||||
|
||||
from benedict import benedict
|
||||
|
||||
SimpleConfigValue = Union[str, bool, int, None, Pattern, Dict[str, Any]]
|
||||
SimpleConfigValueDict = Dict[str, SimpleConfigValue]
|
||||
SimpleConfigValueGetter = Callable[[], SimpleConfigValue]
|
||||
ConfigValue = Union[SimpleConfigValue, SimpleConfigValueDict, SimpleConfigValueGetter]
|
||||
|
||||
|
||||
|
||||
class BaseConfig(TypedDict):
|
||||
pass
|
||||
|
||||
class ConfigDict(BaseConfig, benedict, total=False):
|
||||
"""
|
||||
# Regenerate by pasting this quine into `archivebox shell` 🥚
|
||||
from archivebox.config import ConfigDict, CONFIG_DEFAULTS
|
||||
print('class ConfigDict(BaseConfig, total=False):')
|
||||
print(' ' + '"'*3 + ConfigDict.__doc__ + '"'*3)
|
||||
for section, configs in CONFIG_DEFAULTS.items():
|
||||
for key, attrs in configs.items():
|
||||
Type, default = attrs['type'], attrs['default']
|
||||
if default is None:
|
||||
print(f' {key}: Optional[{Type.__name__}]')
|
||||
else:
|
||||
print(f' {key}: {Type.__name__}')
|
||||
print()
|
||||
"""
|
||||
|
||||
IS_TTY: bool
|
||||
USE_COLOR: bool
|
||||
SHOW_PROGRESS: bool
|
||||
IN_DOCKER: bool
|
||||
|
||||
PACKAGE_DIR: Path
|
||||
CONFIG_FILE: Path
|
||||
ONLY_NEW: bool
|
||||
TIMEOUT: int
|
||||
MEDIA_TIMEOUT: int
|
||||
OUTPUT_PERMISSIONS: str
|
||||
RESTRICT_FILE_NAMES: str
|
||||
URL_DENYLIST: str
|
||||
|
||||
SECRET_KEY: Optional[str]
|
||||
BIND_ADDR: str
|
||||
ALLOWED_HOSTS: str
|
||||
DEBUG: bool
|
||||
PUBLIC_INDEX: bool
|
||||
PUBLIC_SNAPSHOTS: bool
|
||||
FOOTER_INFO: str
|
||||
|
||||
SAVE_TITLE: bool
|
||||
SAVE_FAVICON: bool
|
||||
SAVE_WGET: bool
|
||||
SAVE_WGET_REQUISITES: bool
|
||||
SAVE_SINGLEFILE: bool
|
||||
SAVE_READABILITY: bool
|
||||
SAVE_MERCURY: bool
|
||||
SAVE_PDF: bool
|
||||
SAVE_SCREENSHOT: bool
|
||||
SAVE_DOM: bool
|
||||
SAVE_WARC: bool
|
||||
SAVE_GIT: bool
|
||||
SAVE_MEDIA: bool
|
||||
SAVE_ARCHIVE_DOT_ORG: bool
|
||||
|
||||
RESOLUTION: str
|
||||
GIT_DOMAINS: str
|
||||
CHECK_SSL_VALIDITY: bool
|
||||
CURL_USER_AGENT: str
|
||||
WGET_USER_AGENT: str
|
||||
CHROME_USER_AGENT: str
|
||||
COOKIES_FILE: Union[str, Path, None]
|
||||
CHROME_USER_DATA_DIR: Union[str, Path, None]
|
||||
CHROME_TIMEOUT: int
|
||||
CHROME_HEADLESS: bool
|
||||
CHROME_SANDBOX: bool
|
||||
|
||||
USE_CURL: bool
|
||||
USE_WGET: bool
|
||||
USE_SINGLEFILE: bool
|
||||
USE_READABILITY: bool
|
||||
USE_MERCURY: bool
|
||||
USE_GIT: bool
|
||||
USE_CHROME: bool
|
||||
USE_YOUTUBEDL: bool
|
||||
CURL_BINARY: str
|
||||
GIT_BINARY: str
|
||||
WGET_BINARY: str
|
||||
SINGLEFILE_BINARY: str
|
||||
READABILITY_BINARY: str
|
||||
MERCURY_BINARY: str
|
||||
YOUTUBEDL_BINARY: str
|
||||
CHROME_BINARY: Optional[str]
|
||||
|
||||
YOUTUBEDL_ARGS: List[str]
|
||||
WGET_ARGS: List[str]
|
||||
CURL_ARGS: List[str]
|
||||
GIT_ARGS: List[str]
|
||||
TAG_SEPARATOR_PATTERN: str
|
||||
|
||||
|
||||
ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue]
|
||||
ConfigDefaultValue = Union[ConfigValue, ConfigDefaultValueGetter]
|
||||
|
||||
ConfigDefault = TypedDict('ConfigDefault', {
|
||||
'default': ConfigDefaultValue,
|
||||
'type': Optional[Type],
|
||||
'aliases': Optional[Tuple[str, ...]],
|
||||
}, total=False)
|
||||
|
||||
ConfigDefaultDict = Dict[str, ConfigDefault]
|
|
@ -1,118 +1,115 @@
|
|||
__package__ = 'archivebox.config'
|
||||
|
||||
|
||||
import os
|
||||
import re
|
||||
import platform
|
||||
import tempfile
|
||||
|
||||
from typing import Dict
|
||||
from pathlib import Path
|
||||
import importlib.metadata
|
||||
from collections.abc import Mapping
|
||||
|
||||
from benedict import benedict
|
||||
|
||||
from ..misc.logging import DEFAULT_CLI_COLORS
|
||||
|
||||
from .paths import (
|
||||
PACKAGE_DIR,
|
||||
DATA_DIR,
|
||||
ARCHIVE_DIR,
|
||||
get_collection_id,
|
||||
get_LIB_DIR,
|
||||
get_TMP_DIR,
|
||||
)
|
||||
from .permissions import (
|
||||
IS_ROOT,
|
||||
IN_DOCKER,
|
||||
RUNNING_AS_UID,
|
||||
RUNNING_AS_GID,
|
||||
DEFAULT_PUID,
|
||||
DEFAULT_PGID,
|
||||
ARCHIVEBOX_USER,
|
||||
ARCHIVEBOX_GROUP,
|
||||
)
|
||||
from .version import detect_installed_version
|
||||
|
||||
###################### Config ##########################
|
||||
|
||||
PACKAGE_DIR: Path = Path(__file__).resolve().parent.parent # archivebox source code dir
|
||||
DATA_DIR: Path = Path(os.getcwd()).resolve() # archivebox user data dir
|
||||
ARCHIVE_DIR: Path = DATA_DIR / 'archive' # archivebox snapshot data dir
|
||||
|
||||
def _detect_installed_version(PACKAGE_DIR: Path):
|
||||
"""Autodetect the installed archivebox version by using pip package metadata, pyproject.toml file, or package.json file"""
|
||||
try:
|
||||
# if in production install, use pip-installed package metadata
|
||||
return importlib.metadata.version(__package__ or 'archivebox').strip()
|
||||
except importlib.metadata.PackageNotFoundError:
|
||||
pass
|
||||
|
||||
try:
|
||||
# if in dev Git repo dir, use pyproject.toml file
|
||||
pyproject_config = (PACKAGE_DIR.parent / 'pyproject.toml').read_text().split('\n')
|
||||
for line in pyproject_config:
|
||||
if line.startswith('version = '):
|
||||
return line.split(' = ', 1)[-1].strip('"').strip()
|
||||
except FileNotFoundError:
|
||||
# building docs, pyproject.toml is not available
|
||||
pass
|
||||
|
||||
# raise Exception('Failed to detect installed archivebox version!')
|
||||
return 'dev'
|
||||
|
||||
VERSION: str = _detect_installed_version(PACKAGE_DIR)
|
||||
|
||||
|
||||
|
||||
|
||||
class ConstantsDict(Mapping):
|
||||
IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'yes')
|
||||
OS = platform.system().lower() # darwin, linux, etc.
|
||||
ARCH = platform.machine().lower() # arm64, x86_64, etc.
|
||||
LIB_DIR_SCOPE = f'{ARCH}-{OS}' + ('-docker' if IN_DOCKER else '')
|
||||
|
||||
PACKAGE_DIR: Path = PACKAGE_DIR # archivebox source code dir
|
||||
DATA_DIR: Path = DATA_DIR # archivebox user data dir
|
||||
ARCHIVE_DIR: Path = ARCHIVE_DIR # archivebox snapshot data dir
|
||||
VERSION: str = VERSION
|
||||
PACKAGE_DIR: Path = PACKAGE_DIR
|
||||
DATA_DIR: Path = DATA_DIR
|
||||
ARCHIVE_DIR: Path = ARCHIVE_DIR
|
||||
COLLECTION_ID: str = get_collection_id(DATA_DIR)
|
||||
|
||||
# Host system
|
||||
VERSION: str = detect_installed_version(PACKAGE_DIR)
|
||||
OS: str = platform.system().lower() # darwin, linux, etc.
|
||||
ARCH: str = platform.machine().lower() # arm64, x86_64, aarch64, etc.
|
||||
IN_DOCKER: bool = IN_DOCKER
|
||||
|
||||
# Permissions
|
||||
IS_ROOT: bool = IS_ROOT
|
||||
ARCHIVEBOX_USER: int = ARCHIVEBOX_USER
|
||||
ARCHIVEBOX_GROUP: int = ARCHIVEBOX_GROUP
|
||||
RUNNING_AS_UID: int = RUNNING_AS_UID
|
||||
RUNNING_AS_GID: int = RUNNING_AS_GID
|
||||
DEFAULT_PUID: int = DEFAULT_PUID
|
||||
DEFAULT_PGID: int = DEFAULT_PGID
|
||||
|
||||
# Source code dirs
|
||||
PACKAGE_DIR_NAME: str = PACKAGE_DIR.name
|
||||
TEMPLATES_DIR_NAME: str = 'templates'
|
||||
TEMPLATES_DIR: Path = PACKAGE_DIR / TEMPLATES_DIR_NAME
|
||||
STATIC_DIR: Path = TEMPLATES_DIR / 'static'
|
||||
STATIC_DIR_NAME: str = 'static'
|
||||
STATIC_DIR: Path = TEMPLATES_DIR / STATIC_DIR_NAME
|
||||
|
||||
# Data dirs
|
||||
ARCHIVE_DIR_NAME: str = 'archive'
|
||||
SOURCES_DIR_NAME: str = 'sources'
|
||||
PERSONAS_DIR_NAME: str = 'personas'
|
||||
CRONTABS_DIR_NAME: str = 'crontabs'
|
||||
CACHE_DIR_NAME: str = 'cache'
|
||||
LOGS_DIR_NAME: str = 'logs'
|
||||
USER_PLUGINS_DIR_NAME: str = 'user_plugins'
|
||||
CUSTOM_TEMPLATES_DIR_NAME: str = 'user_templates'
|
||||
|
||||
ARCHIVE_DIR_NAME: str = 'archive'
|
||||
SOURCES_DIR_NAME: str = 'sources'
|
||||
PERSONAS_DIR_NAME: str = 'personas'
|
||||
CRONTABS_DIR_NAME: str = 'crontabs'
|
||||
CACHE_DIR_NAME: str = 'cache'
|
||||
LOGS_DIR_NAME: str = 'logs'
|
||||
LIB_DIR_NAME: str = 'lib'
|
||||
TMP_DIR_NAME: str = 'tmp'
|
||||
|
||||
SYSTEM_TMP_DIR: Path = Path(os.environ['SYSTEM_TMP_DIR']) if 'SYSTEM_TMP_DIR' in os.environ else (Path(tempfile.gettempdir()) / 'archivebox')
|
||||
# DATA_DIR_TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME / machineid.hashed_id('archivebox')[:16] # cant be used because of socket path length restrictions break too often if data dir is in some deep subdir: ocket.error reported AF_UNIX path too long
|
||||
SYSTEM_LIB_DIR: Path = Path(os.environ['SYSTEM_LIB_DIR']) if 'SYSTEM_LIB_DIR' in os.environ else (PACKAGE_DIR / LIB_DIR_NAME)
|
||||
DATA_DIR_LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME / LIB_DIR_SCOPE
|
||||
|
||||
ARCHIVE_DIR: Path = DATA_DIR / ARCHIVE_DIR_NAME
|
||||
SOURCES_DIR: Path = DATA_DIR / SOURCES_DIR_NAME
|
||||
PERSONAS_DIR: Path = DATA_DIR / PERSONAS_DIR_NAME
|
||||
CACHE_DIR: Path = DATA_DIR / CACHE_DIR_NAME
|
||||
LOGS_DIR: Path = DATA_DIR / LOGS_DIR_NAME
|
||||
LIB_DIR: Path = SYSTEM_LIB_DIR if IN_DOCKER else DATA_DIR_LIB_DIR # e.g. /app/lib or ./data/lib/arm64-darwin-docker
|
||||
TMP_DIR: Path = SYSTEM_TMP_DIR
|
||||
CACHE_DIR: Path = DATA_DIR / CACHE_DIR_NAME
|
||||
CUSTOM_TEMPLATES_DIR: Path = DATA_DIR / CUSTOM_TEMPLATES_DIR_NAME
|
||||
USER_PLUGINS_DIR: Path = DATA_DIR / USER_PLUGINS_DIR_NAME
|
||||
|
||||
# Data dir files
|
||||
CONFIG_FILENAME: str = 'ArchiveBox.conf'
|
||||
SQL_INDEX_FILENAME: str = 'index.sqlite3'
|
||||
QUEUE_DATABASE_FILENAME: str = 'queue.sqlite3'
|
||||
CONFIG_FILE: Path = DATA_DIR / CONFIG_FILENAME
|
||||
DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME
|
||||
QUEUE_DATABASE_FILE: Path = DATA_DIR / QUEUE_DATABASE_FILENAME
|
||||
|
||||
JSON_INDEX_FILENAME: str = 'index.json'
|
||||
HTML_INDEX_FILENAME: str = 'index.html'
|
||||
ROBOTS_TXT_FILENAME: str = 'robots.txt'
|
||||
FAVICON_FILENAME: str = 'favicon.ico'
|
||||
|
||||
# Runtime dirs
|
||||
TMP_DIR_NAME: str = 'tmp'
|
||||
TMP_DIR: Path = get_TMP_DIR()
|
||||
LIB_DIR_NAME: str = 'lib'
|
||||
LIB_DIR: Path = get_LIB_DIR()
|
||||
LIB_PIP_DIR: Path = LIB_DIR / 'pip'
|
||||
LIB_NPM_DIR: Path = LIB_DIR / 'npm'
|
||||
LIB_BROWSERS_DIR: Path = LIB_DIR / 'browsers'
|
||||
LIB_BIN_DIR: Path = LIB_DIR / 'bin'
|
||||
BIN_DIR: Path = LIB_BIN_DIR
|
||||
|
||||
CONFIG_FILENAME: str = 'ArchiveBox.conf'
|
||||
SQL_INDEX_FILENAME: str = 'index.sqlite3'
|
||||
QUEUE_DATABASE_FILENAME: str = 'queue.sqlite3'
|
||||
# Config constants
|
||||
TIMEZONE: str = 'UTC'
|
||||
DEFAULT_CLI_COLORS: Dict[str, str] = DEFAULT_CLI_COLORS
|
||||
DISABLED_CLI_COLORS: Dict[str, str] = benedict({k: '' for k in DEFAULT_CLI_COLORS})
|
||||
|
||||
CONFIG_FILE: Path = DATA_DIR / CONFIG_FILENAME
|
||||
DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME
|
||||
QUEUE_DATABASE_FILE: Path = DATA_DIR / QUEUE_DATABASE_FILENAME
|
||||
|
||||
JSON_INDEX_FILENAME: str = 'index.json'
|
||||
HTML_INDEX_FILENAME: str = 'index.html'
|
||||
ROBOTS_TXT_FILENAME: str = 'robots.txt'
|
||||
FAVICON_FILENAME: str = 'favicon.ico'
|
||||
|
||||
TIMEZONE: str = 'UTC'
|
||||
DEFAULT_CLI_COLORS: Dict[str, str] = DEFAULT_CLI_COLORS
|
||||
DISABLED_CLI_COLORS: Dict[str, str] = benedict({k: '' for k in DEFAULT_CLI_COLORS})
|
||||
|
||||
ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE
|
||||
ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE
|
||||
|
||||
STATICFILE_EXTENSIONS: frozenset[str] = frozenset((
|
||||
# 99.999% of the time, URLs ending in these extensions are static files
|
||||
|
@ -136,17 +133,6 @@ class ConstantsDict(Mapping):
|
|||
# html, htm, shtml, xhtml, xml, aspx, php, cgi
|
||||
))
|
||||
|
||||
INGORED_PATHS: frozenset[str] = frozenset((
|
||||
".git",
|
||||
".svn",
|
||||
".DS_Store",
|
||||
".gitignore",
|
||||
"lost+found",
|
||||
".DS_Store",
|
||||
".env",
|
||||
"Dockerfile",
|
||||
".ArchiveBox.conf.bak",
|
||||
))
|
||||
PIP_RELATED_NAMES: frozenset[str] = frozenset((
|
||||
".venv",
|
||||
"venv",
|
||||
|
@ -160,7 +146,15 @@ class ConstantsDict(Mapping):
|
|||
"yarn.lock",
|
||||
))
|
||||
|
||||
DATA_DIR_NAMES: frozenset[str] = frozenset((
|
||||
# When initializing archivebox in a new directory, we check to make sure the dir is
|
||||
# actually empty so that we dont clobber someone's home directory or desktop by accident.
|
||||
# These files are exceptions to the is_empty check when we're trying to init a new dir,
|
||||
# as they could be from a previous archivebox version, system artifacts, dependencies, etc.
|
||||
ALLOWED_IN_DATA_DIR: frozenset[str] = frozenset((
|
||||
*PIP_RELATED_NAMES,
|
||||
*NPM_RELATED_NAMES,
|
||||
|
||||
### Dirs:
|
||||
ARCHIVE_DIR_NAME,
|
||||
SOURCES_DIR_NAME,
|
||||
LOGS_DIR_NAME,
|
||||
|
@ -171,9 +165,12 @@ class ConstantsDict(Mapping):
|
|||
CUSTOM_TEMPLATES_DIR_NAME,
|
||||
USER_PLUGINS_DIR_NAME,
|
||||
CRONTABS_DIR_NAME,
|
||||
))
|
||||
DATA_DIRS: frozenset[Path] = frozenset(DATA_DIR / dirname for dirname in DATA_DIR_NAMES)
|
||||
DATA_FILE_NAMES: frozenset[str] = frozenset((
|
||||
"static", # created by old static exports <v0.6.0
|
||||
"sonic", # created by docker bind mount / sonic FTS process
|
||||
".git",
|
||||
".svn",
|
||||
|
||||
### Files:
|
||||
CONFIG_FILENAME,
|
||||
SQL_INDEX_FILENAME,
|
||||
f"{SQL_INDEX_FILENAME}-wal",
|
||||
|
@ -188,43 +185,37 @@ class ConstantsDict(Mapping):
|
|||
FAVICON_FILENAME,
|
||||
CONFIG_FILENAME,
|
||||
f"{CONFIG_FILENAME}.bak",
|
||||
f".{CONFIG_FILENAME}.bak",
|
||||
"static_index.json",
|
||||
))
|
||||
|
||||
# When initializing archivebox in a new directory, we check to make sure the dir is
|
||||
# actually empty so that we dont clobber someone's home directory or desktop by accident.
|
||||
# These files are exceptions to the is_empty check when we're trying to init a new dir,
|
||||
# as they could be from a previous archivebox version, system artifacts, dependencies, etc.
|
||||
ALLOWED_IN_DATA_DIR: frozenset[str] = frozenset((
|
||||
*INGORED_PATHS,
|
||||
*PIP_RELATED_NAMES,
|
||||
*NPM_RELATED_NAMES,
|
||||
*DATA_DIR_NAMES,
|
||||
*DATA_FILE_NAMES,
|
||||
"static", # created by old static exports <v0.6.0
|
||||
"sonic", # created by docker bind mount
|
||||
".DS_Store",
|
||||
".gitignore",
|
||||
"lost+found",
|
||||
".DS_Store",
|
||||
".env",
|
||||
".collection_id",
|
||||
"Dockerfile",
|
||||
))
|
||||
|
||||
CODE_LOCATIONS = benedict({
|
||||
'PACKAGE_DIR': {
|
||||
'path': (PACKAGE_DIR).resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': (PACKAGE_DIR / '__main__.py').exists(),
|
||||
'is_valid': (PACKAGE_DIR / '__main__.py').exists(), # read + list
|
||||
},
|
||||
'TEMPLATES_DIR': {
|
||||
'path': TEMPLATES_DIR.resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': STATIC_DIR.exists(),
|
||||
'is_valid': STATIC_DIR.exists() and os.access(STATIC_DIR, os.R_OK) and os.access(STATIC_DIR, os.X_OK), # read + list
|
||||
},
|
||||
'LIB_DIR': {
|
||||
'path': LIB_DIR.resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': LIB_DIR.is_dir(),
|
||||
'is_valid': LIB_DIR.is_dir() and os.access(LIB_DIR, os.R_OK) and os.access(LIB_DIR, os.X_OK) and os.access(LIB_DIR, os.W_OK), # read + write
|
||||
},
|
||||
'TMP_DIR': {
|
||||
'path': TMP_DIR.resolve(),
|
||||
'enabled': True,
|
||||
'is_valid': TMP_DIR.is_dir(),
|
||||
'is_valid': TMP_DIR.is_dir() and os.access(TMP_DIR, os.R_OK) and os.access(TMP_DIR, os.X_OK) and os.access(TMP_DIR, os.W_OK), # read + write
|
||||
},
|
||||
})
|
||||
|
||||
|
@ -232,61 +223,61 @@ class ConstantsDict(Mapping):
|
|||
"DATA_DIR": {
|
||||
"path": DATA_DIR.resolve(),
|
||||
"enabled": True,
|
||||
"is_valid": DATABASE_FILE.exists(),
|
||||
"is_valid": DATABASE_FILE.exists() and os.access(DATA_DIR, os.R_OK) and os.access(DATA_DIR, os.W_OK) and os.access(DATA_DIR, os.X_OK),
|
||||
"is_mount": os.path.ismount(DATA_DIR.resolve()),
|
||||
},
|
||||
"CONFIG_FILE": {
|
||||
"path": CONFIG_FILE.resolve(),
|
||||
"enabled": True,
|
||||
"is_valid": CONFIG_FILE.exists(),
|
||||
"is_valid": CONFIG_FILE.exists() and os.access(CONFIG_FILE, os.W_OK),
|
||||
},
|
||||
"SQL_INDEX": {
|
||||
"path": DATABASE_FILE.resolve(),
|
||||
"enabled": True,
|
||||
"is_valid": DATABASE_FILE.exists(),
|
||||
"is_valid": DATABASE_FILE.exists() and os.access(DATABASE_FILE, os.R_OK) and os.access(DATABASE_FILE, os.W_OK),
|
||||
"is_mount": os.path.ismount(DATABASE_FILE.resolve()),
|
||||
},
|
||||
"QUEUE_DATABASE": {
|
||||
"path": QUEUE_DATABASE_FILE.resolve(),
|
||||
"enabled": True,
|
||||
"is_valid": QUEUE_DATABASE_FILE.exists(),
|
||||
"is_valid": QUEUE_DATABASE_FILE.exists() and os.access(QUEUE_DATABASE_FILE, os.R_OK) and os.access(QUEUE_DATABASE_FILE, os.W_OK),
|
||||
"is_mount": os.path.ismount(QUEUE_DATABASE_FILE.resolve()),
|
||||
},
|
||||
"ARCHIVE_DIR": {
|
||||
"path": ARCHIVE_DIR.resolve(),
|
||||
"enabled": True,
|
||||
"is_valid": ARCHIVE_DIR.exists(),
|
||||
"is_valid": ARCHIVE_DIR.exists() and os.access(ARCHIVE_DIR, os.R_OK) and os.access(ARCHIVE_DIR, os.W_OK) and os.access(ARCHIVE_DIR, os.X_OK),
|
||||
"is_mount": os.path.ismount(ARCHIVE_DIR.resolve()),
|
||||
},
|
||||
"SOURCES_DIR": {
|
||||
"path": SOURCES_DIR.resolve(),
|
||||
"enabled": True,
|
||||
"is_valid": SOURCES_DIR.exists(),
|
||||
"is_valid": SOURCES_DIR.exists() and os.access(SOURCES_DIR, os.R_OK) and os.access(SOURCES_DIR, os.W_OK) and os.access(SOURCES_DIR, os.X_OK),
|
||||
},
|
||||
"LOGS_DIR": {
|
||||
"path": LOGS_DIR.resolve(),
|
||||
"enabled": True,
|
||||
"is_valid": LOGS_DIR.is_dir(),
|
||||
"is_valid": LOGS_DIR.is_dir() and os.access(LOGS_DIR, os.R_OK) and os.access(LOGS_DIR, os.W_OK) and os.access(LOGS_DIR, os.X_OK), # read + write
|
||||
},
|
||||
# "CACHE_DIR": {
|
||||
# "path": CACHE_DIR.resolve(),
|
||||
# "enabled": True,
|
||||
# "is_valid": CACHE_DIR.is_dir(),
|
||||
# "is_valid": CACHE_DIR.is_dir() and os.access(CACHE_DIR, os.R_OK) and os.access(CACHE_DIR, os.W_OK) and os.access(CACHE_DIR, os.X_OK), # read + write
|
||||
# },
|
||||
"PERSONAS_DIR": {
|
||||
"path": PERSONAS_DIR.resolve(),
|
||||
"enabled": PERSONAS_DIR.exists(),
|
||||
"is_valid": PERSONAS_DIR.is_dir(),
|
||||
"is_valid": PERSONAS_DIR.is_dir() and os.access(PERSONAS_DIR, os.R_OK) and os.access(PERSONAS_DIR, os.W_OK) and os.access(PERSONAS_DIR, os.X_OK), # read + write
|
||||
},
|
||||
'CUSTOM_TEMPLATES_DIR': {
|
||||
'path': CUSTOM_TEMPLATES_DIR.resolve(),
|
||||
'enabled': CUSTOM_TEMPLATES_DIR.exists(),
|
||||
'is_valid': CUSTOM_TEMPLATES_DIR.is_dir(),
|
||||
'is_valid': CUSTOM_TEMPLATES_DIR.is_dir() and os.access(CUSTOM_TEMPLATES_DIR, os.R_OK) and os.access(CUSTOM_TEMPLATES_DIR, os.X_OK), # read
|
||||
},
|
||||
'USER_PLUGINS_DIR': {
|
||||
'path': USER_PLUGINS_DIR.resolve(),
|
||||
'enabled': USER_PLUGINS_DIR.exists(),
|
||||
'is_valid': USER_PLUGINS_DIR.is_dir(),
|
||||
'is_valid': USER_PLUGINS_DIR.is_dir() and os.access(USER_PLUGINS_DIR, os.R_OK) and os.access(USER_PLUGINS_DIR, os.X_OK), # read
|
||||
},
|
||||
})
|
||||
|
||||
|
@ -314,5 +305,6 @@ globals().update(CONSTANTS)
|
|||
|
||||
|
||||
# these need to always exist as we need them to run almost everything
|
||||
# TODO: figure out a better time to make these than import-time
|
||||
CONSTANTS.LIB_DIR.mkdir(parents=True, exist_ok=True)
|
||||
CONSTANTS.TMP_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
|
|
@ -22,41 +22,34 @@ Documentation:
|
|||
__package__ = 'archivebox.config'
|
||||
|
||||
import os
|
||||
import io
|
||||
import re
|
||||
import sys
|
||||
import json
|
||||
import shutil
|
||||
|
||||
from hashlib import md5
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional, Type, Tuple, Dict
|
||||
from subprocess import run, PIPE, DEVNULL, STDOUT, TimeoutExpired
|
||||
from typing import Optional, Type, Tuple, Dict, Any
|
||||
from subprocess import run, DEVNULL
|
||||
from configparser import ConfigParser
|
||||
|
||||
from rich.progress import Progress
|
||||
from rich.console import Console
|
||||
from benedict import benedict
|
||||
from pydantic_pkgr import SemVer
|
||||
|
||||
import django
|
||||
from django.db.backends.sqlite3.base import Database as sqlite3
|
||||
|
||||
|
||||
from .constants import CONSTANTS, TIMEZONE
|
||||
from .constants import CONSTANTS
|
||||
from .constants import *
|
||||
from .config_stubs import (
|
||||
ConfigValue,
|
||||
ConfigDefaultValue,
|
||||
ConfigDefaultDict,
|
||||
)
|
||||
|
||||
from ..misc.logging import (
|
||||
stderr,
|
||||
hint, # noqa
|
||||
)
|
||||
|
||||
from .defaults import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG
|
||||
from .common import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG
|
||||
from archivebox.plugins_auth.ldap.apps import LDAP_CONFIG
|
||||
from archivebox.plugins_extractor.favicon.apps import FAVICON_CONFIG
|
||||
from archivebox.plugins_extractor.wget.apps import WGET_CONFIG
|
||||
|
@ -67,7 +60,7 @@ LDAP = LDAP_CONFIG.LDAP_ENABLED
|
|||
|
||||
############################### Config Schema ##################################
|
||||
|
||||
CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
||||
CONFIG_SCHEMA: Dict[str, Dict[str, Any]] = {
|
||||
'SHELL_CONFIG': SHELL_CONFIG.as_legacy_config_schema(),
|
||||
|
||||
'SERVER_CONFIG': SERVER_CONFIG.as_legacy_config_schema(),
|
||||
|
@ -194,7 +187,7 @@ def get_real_name(key: str) -> str:
|
|||
|
||||
# These are derived/computed values calculated *after* all user-provided config values are ingested
|
||||
# they appear in `archivebox config` output and are intended to be read-only for the user
|
||||
DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
||||
DYNAMIC_CONFIG_SCHEMA: Dict[str, Any] = {
|
||||
'URL_DENYLIST_PTN': {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS)},
|
||||
'URL_ALLOWLIST_PTN': {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS)},
|
||||
|
||||
|
@ -209,12 +202,12 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
|||
|
||||
|
||||
def load_config_val(key: str,
|
||||
default: ConfigDefaultValue=None,
|
||||
default: Any=None,
|
||||
type: Optional[Type]=None,
|
||||
aliases: Optional[Tuple[str, ...]]=None,
|
||||
config: Optional[benedict]=None,
|
||||
env_vars: Optional[os._Environ]=None,
|
||||
config_file_vars: Optional[Dict[str, str]]=None) -> ConfigValue:
|
||||
config_file_vars: Optional[Dict[str, str]]=None) -> Any:
|
||||
"""parse bool, int, and str key=value pairs from env"""
|
||||
|
||||
assert isinstance(config, dict)
|
||||
|
@ -372,7 +365,7 @@ def write_config_file(config: Dict[str, str], out_dir: str | None=CONSTANTS.DATA
|
|||
|
||||
|
||||
|
||||
def load_config(defaults: ConfigDefaultDict,
|
||||
def load_config(defaults: Dict[str, Any],
|
||||
config: Optional[benedict]=None,
|
||||
out_dir: Optional[str]=None,
|
||||
env_vars: Optional[os._Environ]=None,
|
||||
|
@ -505,7 +498,7 @@ def load_all_config():
|
|||
# add all final config values in CONFIG to globals in this file
|
||||
CONFIG: benedict = load_all_config()
|
||||
globals().update(CONFIG)
|
||||
# this lets us do: from .config import DEBUG, MEDIA_TIMEOUT, ...
|
||||
|
||||
|
||||
# print("FINISHED LOADING CONFIG USING SCHEMAS + FILE + ENV")
|
||||
|
||||
|
@ -521,8 +514,8 @@ globals().update(CONFIG)
|
|||
|
||||
|
||||
# Set timezone to UTC and umask to OUTPUT_PERMISSIONS
|
||||
assert TIMEZONE == 'UTC', f'The server timezone should always be set to UTC (got {TIMEZONE})' # noqa: F821
|
||||
os.environ["TZ"] = TIMEZONE # noqa: F821
|
||||
assert CONSTANTS.TIMEZONE == 'UTC', f'The server timezone should always be set to UTC (got {CONSTANTS.TIMEZONE})' # noqa: F821
|
||||
os.environ["TZ"] = CONSTANTS.TIMEZONE # noqa: F821
|
||||
os.umask(0o777 - int(STORAGE_CONFIG.DIR_OUTPUT_PERMISSIONS, base=8)) # noqa: F821
|
||||
|
||||
########################### Config Validity Checkers ###########################
|
||||
|
@ -533,7 +526,8 @@ if not SHELL_CONFIG.SHOW_PROGRESS:
|
|||
os.environ['TERM'] = 'dumb'
|
||||
|
||||
# recreate rich console obj based on new config values
|
||||
CONSOLE = Console()
|
||||
STDOUT = CONSOLE = Console()
|
||||
STDERR = Console(stderr=True)
|
||||
from ..misc import logging
|
||||
logging.CONSOLE = CONSOLE
|
||||
|
||||
|
@ -541,11 +535,11 @@ logging.CONSOLE = CONSOLE
|
|||
INITIAL_STARTUP_PROGRESS = None
|
||||
INITIAL_STARTUP_PROGRESS_TASK = 0
|
||||
|
||||
def bump_startup_progress_bar():
|
||||
def bump_startup_progress_bar(advance=1):
|
||||
global INITIAL_STARTUP_PROGRESS
|
||||
global INITIAL_STARTUP_PROGRESS_TASK
|
||||
if INITIAL_STARTUP_PROGRESS:
|
||||
INITIAL_STARTUP_PROGRESS.update(INITIAL_STARTUP_PROGRESS_TASK, advance=1) # type: ignore
|
||||
INITIAL_STARTUP_PROGRESS.update(INITIAL_STARTUP_PROGRESS_TASK, advance=advance) # type: ignore
|
||||
|
||||
|
||||
def setup_django_minimal():
|
||||
|
@ -559,6 +553,8 @@ DJANGO_SET_UP = False
|
|||
|
||||
|
||||
def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CONFIG, in_memory_db=False) -> None:
|
||||
from rich.panel import Panel
|
||||
|
||||
global INITIAL_STARTUP_PROGRESS
|
||||
global INITIAL_STARTUP_PROGRESS_TASK
|
||||
global DJANGO_SET_UP
|
||||
|
@ -568,7 +564,7 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CON
|
|||
# TODO: figure out why CLI entrypoints with init_pending are running this twice sometimes
|
||||
return
|
||||
|
||||
with Progress(transient=True, expand=True, console=CONSOLE) as INITIAL_STARTUP_PROGRESS:
|
||||
with Progress(transient=True, expand=True, console=STDERR) as INITIAL_STARTUP_PROGRESS:
|
||||
INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25)
|
||||
|
||||
output_dir = out_dir or CONSTANTS.DATA_DIR
|
||||
|
@ -595,7 +591,14 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CON
|
|||
else:
|
||||
# Otherwise use default sqlite3 file-based database and initialize django
|
||||
# without running migrations automatically (user runs them manually by calling init)
|
||||
django.setup()
|
||||
try:
|
||||
django.setup()
|
||||
except Exception as e:
|
||||
bump_startup_progress_bar(advance=1000)
|
||||
STDERR.print()
|
||||
STDERR.print(Panel(f'\n[red]{e.__class__.__name__}[/red]: [yellow]{e}[/yellow]\nPlease check your config and [blue]DATA_DIR[/blue] permissions.\n', title='\n\n[red][X] Error while trying to load database!', subtitle='[grey53]NO WRITES CAN BE PERFORMED[/grey53]', expand=False, style='bold red'))
|
||||
STDERR.print()
|
||||
return
|
||||
|
||||
bump_startup_progress_bar()
|
||||
|
||||
|
@ -608,6 +611,17 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CON
|
|||
f.write(f"\n> {command}; TS={ts} VERSION={CONSTANTS.VERSION} IN_DOCKER={SHELL_CONFIG.IN_DOCKER} IS_TTY={SHELL_CONFIG.IS_TTY}\n")
|
||||
|
||||
if check_db:
|
||||
# make sure the data dir is owned by a non-root user
|
||||
if CONSTANTS.DATA_DIR.stat().st_uid == 0:
|
||||
STDERR.print('[red][X] Error: ArchiveBox DATA_DIR cannot be owned by root![/red]')
|
||||
STDERR.print(f' {CONSTANTS.DATA_DIR}')
|
||||
STDERR.print()
|
||||
STDERR.print('[violet]Hint:[/violet] Are you running archivebox in the right folder? (and as a non-root user?)')
|
||||
STDERR.print(' cd path/to/your/archive/data')
|
||||
STDERR.print(' archivebox [command]')
|
||||
STDERR.print()
|
||||
raise SystemExit(9)
|
||||
|
||||
# Create cache table in DB if needed
|
||||
try:
|
||||
from django.core.cache import cache
|
||||
|
|
152
archivebox/config/paths.py
Normal file
152
archivebox/config/paths.py
Normal file
|
@ -0,0 +1,152 @@
|
|||
__package__ = 'archivebox.config'
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
|
||||
from functools import cache
|
||||
from platformdirs import PlatformDirs
|
||||
|
||||
from .permissions import SudoPermission, IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
|
||||
|
||||
#############################################################################################
|
||||
|
||||
PACKAGE_DIR: Path = Path(__file__).resolve().parent.parent # archivebox source code dir
|
||||
DATA_DIR: Path = Path(os.getcwd()).resolve() # archivebox user data dir
|
||||
ARCHIVE_DIR: Path = DATA_DIR / 'archive' # archivebox snapshot data dir
|
||||
|
||||
#############################################################################################
|
||||
|
||||
@cache
|
||||
def get_collection_id(DATA_DIR=DATA_DIR):
|
||||
"""Get a short, stable, unique ID for the current collection"""
|
||||
collection_id_file = DATA_DIR / '.collection_id'
|
||||
|
||||
try:
|
||||
return collection_id_file.read_text().strip()
|
||||
except (OSError, FileNotFoundError, PermissionError):
|
||||
pass
|
||||
|
||||
hash_key = str(DATA_DIR.resolve()).encode()
|
||||
collection_id = hashlib.sha256(hash_key).hexdigest()[:8]
|
||||
try:
|
||||
collection_id_file.write_text(collection_id)
|
||||
except (OSError, FileNotFoundError, PermissionError):
|
||||
pass
|
||||
return collection_id
|
||||
|
||||
|
||||
def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = None, fallback=True) -> bool:
|
||||
"""Check if a given directory is writable by a specific user and group (fallback=try as current user is unable to check with provided uid)"""
|
||||
current_uid, current_gid = os.geteuid(), os.getegid()
|
||||
uid, gid = uid or current_uid, gid or current_gid
|
||||
|
||||
test_file = dir_path / '.permissions_test'
|
||||
try:
|
||||
with SudoPermission(uid=uid, fallback=fallback):
|
||||
test_file.exists()
|
||||
test_file.write_text(f'Checking if PUID={uid} PGID={gid} can write to dir')
|
||||
test_file.unlink()
|
||||
return True
|
||||
except (IOError, OSError, PermissionError):
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
|
||||
|
||||
@cache
|
||||
def get_LIB_DIR():
|
||||
"""
|
||||
- should be shared with other collections on the same host
|
||||
- must be scoped by CPU architecture, OS family, and archivebox version
|
||||
- should not be shared with other hosts/archivebox versions
|
||||
- must be writable by any archivebox user
|
||||
- should be persistent across reboots
|
||||
- can be on a docker bin mount but probably shouldnt be
|
||||
- ok to have a long path (doesnt contain SOCKETS)
|
||||
"""
|
||||
from .version import detect_installed_version
|
||||
|
||||
HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False)
|
||||
|
||||
if 'SYSTEM_LIB_DIR' in os.environ:
|
||||
lib_dir = Path(os.environ['SYSTEM_LIB_DIR'])
|
||||
else:
|
||||
with SudoPermission(uid=ARCHIVEBOX_USER, fallback=True):
|
||||
lib_dir = HOST_DIRS.site_data_path
|
||||
|
||||
# Docker: /usr/local/share/archivebox/0.8.5
|
||||
# Ubuntu: /usr/local/share/archivebox/0.8.5
|
||||
# macOS: /Library/Application Support/archivebox
|
||||
try:
|
||||
with SudoPermission(uid=0, fallback=True):
|
||||
lib_dir.mkdir(parents=True, exist_ok=True)
|
||||
except PermissionError:
|
||||
# our user cannot
|
||||
lib_dir = HOST_DIRS.user_data_path
|
||||
lib_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if not dir_is_writable(lib_dir):
|
||||
if IS_ROOT:
|
||||
# make sure lib dir is owned by the archivebox user, not root
|
||||
with SudoPermission(uid=0):
|
||||
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{lib_dir}"')
|
||||
else:
|
||||
raise PermissionError(f'SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
|
||||
|
||||
return lib_dir
|
||||
|
||||
@cache
|
||||
def get_TMP_DIR():
|
||||
"""
|
||||
- must NOT be inside DATA_DIR / inside a docker volume bind mount
|
||||
- must NOT have a long PATH (UNIX socket path length restrictions)
|
||||
- must NOT be shared with other collections/hosts
|
||||
- must be writable by archivebox user & root
|
||||
- must be cleared on every boot / not persisted
|
||||
- must be cleared on every archivebox version upgrade
|
||||
"""
|
||||
from .version import detect_installed_version
|
||||
|
||||
HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False)
|
||||
|
||||
# print('DATA_DIR OWNED BY:', ARCHIVEBOX_USER, ARCHIVEBOX_GROUP)
|
||||
# print('RUNNING AS:', self.PUID, self.PGID)
|
||||
|
||||
if 'SYSTEM_TMP_DIR' in os.environ:
|
||||
run_dir = Path(os.environ['SYSTEM_TMP_DIR']).resolve() / get_collection_id(DATA_DIR=DATA_DIR)
|
||||
with SudoPermission(uid=0, fallback=True):
|
||||
run_dir.mkdir(parents=True, exist_ok=True)
|
||||
if not dir_is_writable(run_dir):
|
||||
if IS_ROOT:
|
||||
with SudoPermission(uid=0, fallback=False):
|
||||
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
|
||||
else:
|
||||
raise PermissionError(f'SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
|
||||
assert len(str(run_dir / 'supervisord.conf')) < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
|
||||
return run_dir
|
||||
|
||||
run_dir = (HOST_DIRS.site_runtime_path / get_collection_id(DATA_DIR=DATA_DIR)).resolve()
|
||||
try:
|
||||
assert len(str(run_dir)) + len('/supervisord.sock') < 95
|
||||
except AssertionError:
|
||||
run_dir = Path(tempfile.gettempdir()).resolve() / 'archivebox' / get_collection_id(DATA_DIR=DATA_DIR)
|
||||
assert len(str(run_dir)) + len('/supervisord.sock') < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
|
||||
|
||||
with SudoPermission(uid=0, fallback=True):
|
||||
run_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if not dir_is_writable(run_dir):
|
||||
if IS_ROOT:
|
||||
with SudoPermission(uid=0):
|
||||
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
|
||||
else:
|
||||
raise PermissionError(f'SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
|
||||
|
||||
# Docker: /tmp/archivebox/0.8.5/abc324235
|
||||
# Ubuntu: /tmp/archivebox/0.8.5/abc324235
|
||||
# macOS: /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/archivebox/0.8.5/abc324235
|
||||
return run_dir
|
||||
|
70
archivebox/config/permissions.py
Normal file
70
archivebox/config/permissions.py
Normal file
|
@ -0,0 +1,70 @@
|
|||
__package__ = 'archivebox.config'
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from contextlib import contextmanager
|
||||
|
||||
#############################################################################################
|
||||
|
||||
DATA_DIR = Path(os.getcwd())
|
||||
|
||||
DATA_DIR_STAT = Path(DATA_DIR).stat()
|
||||
DATA_DIR_UID = DATA_DIR_STAT.st_uid
|
||||
DATA_DIR_GID = DATA_DIR_STAT.st_gid
|
||||
DEFAULT_PUID = 911
|
||||
DEFAULT_PGID = 911
|
||||
RUNNING_AS_UID = os.getuid()
|
||||
RUNNING_AS_GID = os.getgid()
|
||||
EUID = os.geteuid()
|
||||
EGID = os.getegid()
|
||||
USER: str = Path('~').expanduser().resolve().name
|
||||
|
||||
IS_ROOT = RUNNING_AS_UID == 0
|
||||
IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE', 'yes')
|
||||
|
||||
os.environ.setdefault('PUID', str(DATA_DIR_UID or RUNNING_AS_UID or DEFAULT_PUID))
|
||||
os.environ.setdefault('PGID', str(DATA_DIR_GID or RUNNING_AS_GID or DEFAULT_PGID))
|
||||
|
||||
ARCHIVEBOX_USER = int(os.environ['PUID'])
|
||||
ARCHIVEBOX_GROUP = int(os.environ['PGID'])
|
||||
|
||||
#############################################################################################
|
||||
|
||||
def drop_privileges():
|
||||
"""If running as root, drop privileges to the user that owns the data dir (or PUID, or default=911)"""
|
||||
|
||||
# always run archivebox as the user that owns the data dir, never as root
|
||||
if os.getuid() == 0:
|
||||
# drop permissions to the user that owns the data dir / provided PUID
|
||||
if os.geteuid() != ARCHIVEBOX_USER:
|
||||
os.seteuid(ARCHIVEBOX_USER)
|
||||
# if we need sudo (e.g. for installing dependencies) code should use SudoPermissions() context manager to regain root
|
||||
|
||||
|
||||
@contextmanager
|
||||
def SudoPermission(uid=0, fallback=False):
|
||||
"""Attempt to run code with sudo permissions for a given user (or root)"""
|
||||
|
||||
if os.geteuid() == uid:
|
||||
# no need to change effective UID, we are already that user
|
||||
yield
|
||||
return
|
||||
|
||||
try:
|
||||
# change our effective UID to the given UID
|
||||
os.seteuid(uid)
|
||||
except PermissionError as err:
|
||||
if not fallback:
|
||||
raise PermissionError(f'Not enough permissions to run code as uid={uid}, please retry with sudo') from err
|
||||
try:
|
||||
# yield back to the caller so they can run code inside context as root
|
||||
yield
|
||||
finally:
|
||||
# then set effective UID back to DATA_DIR owner
|
||||
DATA_DIR_OWNER = DATA_DIR.stat().st_uid
|
||||
try:
|
||||
os.seteuid(DATA_DIR_OWNER)
|
||||
except PermissionError as err:
|
||||
if not fallback:
|
||||
raise PermissionError(f'Failed to revert uid={uid} back to {DATA_DIR_OWNER} after running code with sudo') from err
|
||||
|
121
archivebox/config/version.py
Normal file
121
archivebox/config/version.py
Normal file
|
@ -0,0 +1,121 @@
|
|||
__package__ = 'archivebox.config'
|
||||
|
||||
import os
|
||||
import importlib.metadata
|
||||
|
||||
from pathlib import Path
|
||||
from functools import cache
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
#############################################################################################
|
||||
|
||||
IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE', 'yes')
|
||||
|
||||
PACKAGE_DIR: Path = Path(__file__).resolve().parent.parent # archivebox source code dir
|
||||
DATA_DIR: Path = Path(os.getcwd()).resolve() # archivebox user data dir
|
||||
ARCHIVE_DIR: Path = DATA_DIR / 'archive' # archivebox snapshot data dir
|
||||
|
||||
#############################################################################################
|
||||
|
||||
|
||||
@cache
|
||||
def detect_installed_version(PACKAGE_DIR: Path=PACKAGE_DIR):
|
||||
"""Autodetect the installed archivebox version by using pip package metadata, pyproject.toml file, or package.json file"""
|
||||
try:
|
||||
# if in production install, use pip-installed package metadata
|
||||
return importlib.metadata.version('archivebox').strip()
|
||||
except importlib.metadata.PackageNotFoundError:
|
||||
pass
|
||||
|
||||
try:
|
||||
# if in dev Git repo dir, use pyproject.toml file
|
||||
pyproject_config = (PACKAGE_DIR.parent / 'pyproject.toml').read_text().split('\n')
|
||||
for line in pyproject_config:
|
||||
if line.startswith('version = '):
|
||||
return line.split(' = ', 1)[-1].strip('"').strip()
|
||||
except FileNotFoundError:
|
||||
# building docs, pyproject.toml is not available
|
||||
pass
|
||||
|
||||
# raise Exception('Failed to detect installed archivebox version!')
|
||||
return 'dev'
|
||||
|
||||
|
||||
@cache
|
||||
def get_COMMIT_HASH() -> Optional[str]:
|
||||
try:
|
||||
git_dir = PACKAGE_DIR / '../.git'
|
||||
ref = (git_dir / 'HEAD').read_text().strip().split(' ')[-1]
|
||||
commit_hash = git_dir.joinpath(ref).read_text().strip()
|
||||
return commit_hash
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
return list((PACKAGE_DIR / '../.git/refs/heads/').glob('*'))[0].read_text().strip()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
@cache
|
||||
def get_BUILD_TIME() -> str:
|
||||
if IN_DOCKER:
|
||||
docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0]
|
||||
return docker_build_end_time
|
||||
|
||||
src_last_modified_unix_timestamp = (PACKAGE_DIR / 'README.md').stat().st_mtime
|
||||
return datetime.fromtimestamp(src_last_modified_unix_timestamp).strftime('%Y-%m-%d %H:%M:%S %s')
|
||||
|
||||
|
||||
# def get_versions_available_on_github(config):
|
||||
# """
|
||||
# returns a dictionary containing the ArchiveBox GitHub release info for
|
||||
# the recommended upgrade version and the currently installed version
|
||||
# """
|
||||
|
||||
# # we only want to perform the (relatively expensive) check for new versions
|
||||
# # when its most relevant, e.g. when the user runs a long-running command
|
||||
# subcommand_run_by_user = sys.argv[3] if len(sys.argv) > 3 else 'help'
|
||||
# long_running_commands = ('add', 'schedule', 'update', 'status', 'server')
|
||||
# if subcommand_run_by_user not in long_running_commands:
|
||||
# return None
|
||||
|
||||
# github_releases_api = "https://api.github.com/repos/ArchiveBox/ArchiveBox/releases"
|
||||
# response = requests.get(github_releases_api)
|
||||
# if response.status_code != 200:
|
||||
# stderr(f'[!] Warning: GitHub API call to check for new ArchiveBox version failed! (status={response.status_code})', color='lightyellow', config=config)
|
||||
# return None
|
||||
# all_releases = response.json()
|
||||
|
||||
# installed_version = parse_version_string(config['VERSION'])
|
||||
|
||||
# # find current version or nearest older version (to link to)
|
||||
# current_version = None
|
||||
# for idx, release in enumerate(all_releases):
|
||||
# release_version = parse_version_string(release['tag_name'])
|
||||
# if release_version <= installed_version:
|
||||
# current_version = release
|
||||
# break
|
||||
|
||||
# current_version = current_version or all_releases[-1]
|
||||
|
||||
# # recommended version is whatever comes after current_version in the release list
|
||||
# # (perhaps too conservative to only recommend upgrading one version at a time, but it's safest)
|
||||
# try:
|
||||
# recommended_version = all_releases[idx+1]
|
||||
# except IndexError:
|
||||
# recommended_version = None
|
||||
|
||||
# return {'recommended_version': recommended_version, 'current_version': current_version}
|
||||
|
||||
# def can_upgrade(config):
|
||||
# if config['VERSIONS_AVAILABLE'] and config['VERSIONS_AVAILABLE']['recommended_version']:
|
||||
# recommended_version = parse_version_string(config['VERSIONS_AVAILABLE']['recommended_version']['tag_name'])
|
||||
# current_version = parse_version_string(config['VERSIONS_AVAILABLE']['current_version']['tag_name'])
|
||||
# return recommended_version > current_version
|
||||
# return False
|
||||
|
||||
|
||||
VERSION: str = detect_installed_version()
|
|
@ -5,7 +5,7 @@ from django.utils import timezone
|
|||
from django.contrib.auth.middleware import RemoteUserMiddleware
|
||||
from django.core.exceptions import ImproperlyConfigured
|
||||
|
||||
from archivebox.config import SERVER_CONFIG
|
||||
from archivebox.config.common import SERVER_CONFIG
|
||||
|
||||
|
||||
def detect_timezone(request, activate: bool=True):
|
||||
|
|
|
@ -13,7 +13,8 @@ import abx.archivebox
|
|||
import abx.archivebox.use
|
||||
import abx.django.use
|
||||
|
||||
from archivebox.config import VERSION, DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS, SHELL_CONFIG, SERVER_CONFIG # noqa
|
||||
from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS
|
||||
from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG # noqa
|
||||
|
||||
IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3]
|
||||
IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ
|
||||
|
|
|
@ -27,7 +27,8 @@ from core.admin import result_url
|
|||
|
||||
from queues.tasks import bg_add
|
||||
|
||||
from archivebox.config import CONSTANTS_CONFIG, DATA_DIR, VERSION, SHELL_CONFIG, SERVER_CONFIG
|
||||
from archivebox.config import CONSTANTS_CONFIG, DATA_DIR, VERSION
|
||||
from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG
|
||||
from archivebox.misc.util import base_url, htmlencode, ts_to_date_str
|
||||
|
||||
from .serve_static import serve_static_with_byterange_support
|
||||
|
|
|
@ -5,7 +5,8 @@ import io
|
|||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from archivebox.config import VERSION, ARCHIVING_CONFIG
|
||||
from archivebox.config import VERSION
|
||||
from archivebox.config.common import ARCHIVING_CONFIG
|
||||
from archivebox.config.legacy import SAVE_HTMLTOTEXT
|
||||
from archivebox.misc.system import atomic_write
|
||||
from archivebox.misc.util import enforce_types, is_static_file
|
||||
|
|
|
@ -12,9 +12,11 @@ from urllib.parse import urlparse
|
|||
from django.db.models import QuerySet, Q
|
||||
|
||||
|
||||
from archivebox.config import DATA_DIR, CONSTANTS, ARCHIVING_CONFIG, STORAGE_CONFIG, SEARCH_BACKEND_CONFIG
|
||||
from archivebox.misc.util import scheme, enforce_types, ExtendedEncoder
|
||||
from archivebox.misc.logging import stderr
|
||||
from archivebox.misc.util import scheme, enforce_types, ExtendedEncoder
|
||||
|
||||
from archivebox.config import DATA_DIR, CONSTANTS
|
||||
from archivebox.config.common import ARCHIVING_CONFIG, STORAGE_CONFIG, SEARCH_BACKEND_CONFIG
|
||||
from archivebox.config.legacy import URL_DENYLIST_PTN, URL_ALLOWLIST_PTN
|
||||
|
||||
from ..logging_util import (
|
||||
|
|
|
@ -16,7 +16,9 @@ from archivebox.misc.util import (
|
|||
htmlencode,
|
||||
urldecode,
|
||||
)
|
||||
from archivebox.config import CONSTANTS, DATA_DIR, VERSION, SHELL_CONFIG, SERVER_CONFIG
|
||||
from archivebox.config import CONSTANTS, DATA_DIR, VERSION
|
||||
from archivebox.config.common import SERVER_CONFIG
|
||||
from archivebox.config.version import get_COMMIT_HASH
|
||||
from archivebox.plugins_extractor.archivedotorg.apps import ARCHIVEDOTORG_CONFIG
|
||||
|
||||
from .schema import Link
|
||||
|
@ -56,7 +58,7 @@ def main_index_template(links: List[Link], template: str=MAIN_INDEX_TEMPLATE) ->
|
|||
|
||||
return render_django_template(template, {
|
||||
'version': VERSION,
|
||||
'git_sha': SHELL_CONFIG.COMMIT_HASH or VERSION,
|
||||
'git_sha': get_COMMIT_HASH() or VERSION,
|
||||
'num_links': str(len(links)),
|
||||
'date_updated': datetime.now(timezone.utc).strftime('%Y-%m-%d'),
|
||||
'time_updated': datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M'),
|
||||
|
|
|
@ -8,7 +8,8 @@ from pathlib import Path
|
|||
from datetime import datetime, timezone
|
||||
from typing import List, Optional, Iterator, Any, Union
|
||||
|
||||
from archivebox.config import VERSION, DATA_DIR, CONSTANTS, SERVER_CONFIG, SHELL_CONFIG
|
||||
from archivebox.config import VERSION, DATA_DIR, CONSTANTS
|
||||
from archivebox.config.common import SERVER_CONFIG, SHELL_CONFIG
|
||||
|
||||
from .schema import Link
|
||||
from archivebox.misc.system import atomic_write
|
||||
|
|
|
@ -9,7 +9,8 @@ from django.db.models import QuerySet
|
|||
from django.db import transaction
|
||||
|
||||
from archivebox.misc.util import enforce_types, parse_date
|
||||
from archivebox.config import DATA_DIR, GENERAL_CONFIG
|
||||
from archivebox.config import DATA_DIR
|
||||
from archivebox.config.common import GENERAL_CONFIG
|
||||
|
||||
from .schema import Link
|
||||
|
||||
|
|
|
@ -22,7 +22,8 @@ from rich.panel import Panel
|
|||
from rich_argparse import RichHelpFormatter
|
||||
from django.core.management.base import DjangoHelpFormatter
|
||||
|
||||
from archivebox.config import CONSTANTS, DATA_DIR, VERSION, SHELL_CONFIG
|
||||
from archivebox.config import CONSTANTS, DATA_DIR, VERSION
|
||||
from archivebox.config.common import SHELL_CONFIG
|
||||
from archivebox.misc.system import get_dir_size
|
||||
from archivebox.misc.util import enforce_types
|
||||
from archivebox.misc.logging import ANSI, stderr
|
||||
|
|
|
@ -14,13 +14,15 @@ from crontab import CronTab, CronSlices
|
|||
from django.db.models import QuerySet
|
||||
from django.utils import timezone
|
||||
|
||||
from archivebox.config import CONSTANTS, VERSION, DATA_DIR, ARCHIVE_DIR, SHELL_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG, SERVER_CONFIG, ARCHIVING_CONFIG
|
||||
from archivebox.config import CONSTANTS, VERSION, DATA_DIR, ARCHIVE_DIR
|
||||
from archivebox.config.common import SHELL_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG, SERVER_CONFIG, ARCHIVING_CONFIG
|
||||
from archivebox.config.permissions import SudoPermission, IN_DOCKER
|
||||
from .cli import (
|
||||
CLI_SUBCOMMANDS,
|
||||
run_subcommand,
|
||||
display_first,
|
||||
meta_cmds,
|
||||
main_cmds,
|
||||
setup_cmds,
|
||||
archive_cmds,
|
||||
)
|
||||
from .parsers import (
|
||||
|
@ -101,7 +103,7 @@ def help(out_dir: Path=DATA_DIR) -> None:
|
|||
) + '\n\n ' + '\n '.join(
|
||||
f'[green]{cmd.ljust(20)}[/green] {func.__doc__}'
|
||||
for cmd, func in all_subcommands.items()
|
||||
if cmd in main_cmds
|
||||
if cmd in setup_cmds
|
||||
) + '\n\n ' + '\n '.join(
|
||||
f'[green]{cmd.ljust(20)}[/green] {func.__doc__}'
|
||||
for cmd, func in all_subcommands.items()
|
||||
|
@ -119,10 +121,10 @@ def help(out_dir: Path=DATA_DIR) -> None:
|
|||
|
||||
[grey53]# using Docker:[/grey53]
|
||||
[blue]docker run[/blue] -v [light_slate_blue]$PWD:/data[/light_slate_blue] [grey53]-p 8000:8000[/grey53] -it [dark_green]archivebox/archivebox[/dark_green] [green]\\[command][/green] [green3][...args][/green3] [violet][--help][/violet] [grey53][--version][/grey53]
|
||||
''' if SHELL_CONFIG.IN_DOCKER else ''
|
||||
DOCKER_DOCS = '\n [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Docker[/link]' if SHELL_CONFIG.IN_DOCKER else ''
|
||||
DOCKER_OUTSIDE_HINT = "\n [grey53]# outside of Docker:[/grey53]" if SHELL_CONFIG.IN_DOCKER else ''
|
||||
DOCKER_CMD_PREFIX = "[blue]docker ... [/blue]" if SHELL_CONFIG.IN_DOCKER else ''
|
||||
''' if IN_DOCKER else ''
|
||||
DOCKER_DOCS = '\n [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Docker[/link]' if IN_DOCKER else ''
|
||||
DOCKER_OUTSIDE_HINT = "\n [grey53]# outside of Docker:[/grey53]" if IN_DOCKER else ''
|
||||
DOCKER_CMD_PREFIX = "[blue]docker ... [/blue]" if IN_DOCKER else ''
|
||||
|
||||
print(f'''{DOCKER_USAGE}
|
||||
[deep_sky_blue4]Usage:[/deep_sky_blue4]{DOCKER_OUTSIDE_HINT}
|
||||
|
@ -158,7 +160,7 @@ def help(out_dir: Path=DATA_DIR) -> None:
|
|||
print(Panel(EXAMPLE_USAGE, expand=False, border_style='grey53', title='[green3]:white_check_mark: A collection [light_slate_blue]DATA DIR[/light_slate_blue] is currently active[/green3]', subtitle='Commands run inside this dir will only apply to this collection.'))
|
||||
else:
|
||||
DATA_SETUP_HELP = '\n'
|
||||
if SHELL_CONFIG.IN_DOCKER:
|
||||
if IN_DOCKER:
|
||||
DATA_SETUP_HELP += '[violet]Hint:[/violet] When using Docker, you need to mount a volume to use as your data dir:\n'
|
||||
DATA_SETUP_HELP += ' docker run [violet]-v /some/path/data:/data[/violet] archivebox/archivebox ...\n\n'
|
||||
DATA_SETUP_HELP += 'To load an [dark_blue]existing[/dark_blue] collection:\n'
|
||||
|
@ -190,6 +192,8 @@ def version(quiet: bool=False,
|
|||
|
||||
from plugins_auth.ldap.apps import LDAP_CONFIG
|
||||
from django.conf import settings
|
||||
from archivebox.config.version import get_COMMIT_HASH, get_BUILD_TIME
|
||||
from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, RUNNING_AS_UID, RUNNING_AS_GID
|
||||
|
||||
# 0.7.1
|
||||
# ArchiveBox v0.7.1+editable COMMIT_HASH=951bba5 BUILD_TIME=2023-12-17 16:46:05 1702860365
|
||||
|
@ -198,13 +202,14 @@ def version(quiet: bool=False,
|
|||
# DEBUG=False IS_TTY=True TZ=UTC SEARCH_BACKEND=ripgrep LDAP=False
|
||||
|
||||
p = platform.uname()
|
||||
COMMIT_HASH = get_COMMIT_HASH()
|
||||
prnt(
|
||||
'[dark_green]ArchiveBox[/dark_green] [dark_goldenrod]v{}[/dark_goldenrod]'.format(CONSTANTS.VERSION),
|
||||
f'COMMIT_HASH={SHELL_CONFIG.COMMIT_HASH[:7] if SHELL_CONFIG.COMMIT_HASH else "unknown"}',
|
||||
f'BUILD_TIME={SHELL_CONFIG.BUILD_TIME}',
|
||||
f'COMMIT_HASH={COMMIT_HASH[:7] if COMMIT_HASH else "unknown"}',
|
||||
f'BUILD_TIME={get_BUILD_TIME()}',
|
||||
)
|
||||
prnt(
|
||||
f'IN_DOCKER={SHELL_CONFIG.IN_DOCKER}',
|
||||
f'IN_DOCKER={IN_DOCKER}',
|
||||
f'IN_QEMU={SHELL_CONFIG.IN_QEMU}',
|
||||
f'ARCH={p.machine}',
|
||||
f'OS={p.system}',
|
||||
|
@ -212,11 +217,13 @@ def version(quiet: bool=False,
|
|||
f'PYTHON={sys.implementation.name.title()}',
|
||||
)
|
||||
OUTPUT_IS_REMOTE_FS = CONSTANTS.DATA_LOCATIONS.DATA_DIR.is_mount or CONSTANTS.DATA_LOCATIONS.ARCHIVE_DIR.is_mount
|
||||
DATA_DIR_STAT = CONSTANTS.DATA_DIR.stat()
|
||||
prnt(
|
||||
f'EUID={os.geteuid()} UID={RUNNING_AS_UID} PUID={ARCHIVEBOX_USER} FS_UID={DATA_DIR_STAT.st_uid}',
|
||||
f'EGID={os.getegid()} GID={RUNNING_AS_GID} PGID={ARCHIVEBOX_GROUP} FS_GID={DATA_DIR_STAT.st_gid}',
|
||||
f'FS_PERMS={STORAGE_CONFIG.OUTPUT_PERMISSIONS}',
|
||||
f'FS_ATOMIC={STORAGE_CONFIG.ENFORCE_ATOMIC_WRITES}',
|
||||
f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}',
|
||||
f'FS_USER={SHELL_CONFIG.PUID}:{SHELL_CONFIG.PGID}',
|
||||
f'FS_PERMS={STORAGE_CONFIG.OUTPUT_PERMISSIONS}',
|
||||
)
|
||||
prnt(
|
||||
f'DEBUG={SHELL_CONFIG.DEBUG}',
|
||||
|
@ -261,8 +268,36 @@ def version(quiet: bool=False,
|
|||
else:
|
||||
prnt()
|
||||
prnt('[red][i] Data locations:[/red] (not in a data directory)')
|
||||
|
||||
|
||||
prnt()
|
||||
|
||||
from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, DEFAULT_PUID, DEFAULT_PGID, IS_ROOT, USER
|
||||
|
||||
data_dir_stat = Path(DATA_DIR).stat()
|
||||
data_dir_uid, data_dir_gid = data_dir_stat.st_uid, data_dir_stat.st_gid
|
||||
data_owned_by_root = data_dir_uid == 0 or data_dir_gid == 0
|
||||
|
||||
data_owned_by_default_user = data_dir_uid == DEFAULT_PUID or data_dir_gid == DEFAULT_PGID
|
||||
data_owner_doesnt_match = (data_dir_uid != ARCHIVEBOX_USER and data_dir_gid != ARCHIVEBOX_GROUP) and not IS_ROOT
|
||||
data_not_writable = not (os.access(DATA_DIR, os.W_OK) and os.access(CONSTANTS.LIB_DIR, os.W_OK) and os.access(CONSTANTS.TMP_DIR, os.W_OK))
|
||||
if data_owned_by_root:
|
||||
prnt('[yellow]:warning: Warning: ArchiveBox [blue]DATA_DIR[/blue] is currently owned by [red]root[/red], ArchiveBox will refuse to run![/yellow]')
|
||||
elif data_owner_doesnt_match or data_not_writable:
|
||||
prnt(f'[yellow]:warning: Warning: ArchiveBox [blue]DATA_DIR[/blue] is currently owned by [red]{data_dir_uid}:{data_dir_gid}[/red], but ArchiveBox user is [blue]{ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/blue] ({USER})! (ArchiveBox may not be able to write to the data dir)[/yellow]')
|
||||
else:
|
||||
prnt(f':information: [blue]DATA_DIR[/blue] is currently owned by [blue]{data_dir_uid}:{data_dir_gid}[/blue] (PUID:PGID)')
|
||||
|
||||
if data_owned_by_root or data_owner_doesnt_match or data_owned_by_default_user or data_not_writable:
|
||||
prnt(f'[violet]Hint:[/violet] If you encounter permissions errors, change [red]{data_dir_uid}[/red]:{data_dir_gid} (PUID:PGID) to match the user that will run ArchiveBox, e.g.:')
|
||||
prnt(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {DATA_DIR.resolve()}')
|
||||
prnt(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {CONSTANTS.LIB_DIR.resolve()}')
|
||||
prnt(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {CONSTANTS.TMP_DIR.resolve()}')
|
||||
prnt()
|
||||
prnt('[blue]More info:[/blue]')
|
||||
prnt(' [link=https://github.com/ArchiveBox/ArchiveBox#storage-requirements]https://github.com/ArchiveBox/ArchiveBox#storage-requirements[/link]')
|
||||
prnt(' [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#permissions]https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#permissions[/link]')
|
||||
prnt(' [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid]https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid[/link]')
|
||||
prnt(' [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#filesystem-doesnt-support-fsync-eg-network-mounts]https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#filesystem-doesnt-support-fsync-eg-network-mounts[/link]')
|
||||
|
||||
|
||||
@enforce_types
|
||||
|
@ -948,23 +983,56 @@ def list_folders(links: List[Link],
|
|||
@enforce_types
|
||||
def install(out_dir: Path=DATA_DIR) -> None:
|
||||
"""Automatically install all ArchiveBox dependencies and extras"""
|
||||
|
||||
# if running as root:
|
||||
# - run init to create index + lib dir
|
||||
# - chown -R 911 DATA_DIR
|
||||
# - install all binaries as root
|
||||
# - chown -R 911 LIB_DIR
|
||||
# else:
|
||||
# - run init to create index + lib dir as current user
|
||||
# - install all binaries as current user
|
||||
# - recommend user re-run with sudo if any deps need to be installed as root
|
||||
|
||||
from rich import print
|
||||
from django.conf import settings
|
||||
|
||||
from archivebox import CONSTANTS
|
||||
from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
|
||||
|
||||
if not ARCHIVE_DIR.exists():
|
||||
run_subcommand('init', stdin=None, pwd=out_dir)
|
||||
|
||||
stderr('\n[+] Installing ArchiveBox dependencies automatically...', color='green')
|
||||
run_subcommand('init', stdin=None, pwd=out_dir) # must init full index because we need a db to store InstalledBinary entries in
|
||||
|
||||
print('\n[green][+] Installing ArchiveBox dependencies automatically...[/green]')
|
||||
|
||||
# we never want the data dir to be owned by root, detect owner of existing owner of DATA_DIR to try and guess desired non-root UID
|
||||
if IS_ROOT:
|
||||
# if we have sudo/root permissions, take advantage of them just while installing dependencies
|
||||
print()
|
||||
print('[yellow]:warning: Using [red]root[/red] privileges only to install dependencies that need it, all other operations should be done as a [blue]non-root[/blue] user.[/yellow]')
|
||||
print(f' DATA_DIR, LIB_DIR, and TMP_DIR will be owned by [blue]{ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/blue].')
|
||||
print()
|
||||
|
||||
for binary in reversed(list(settings.BINARIES.values())):
|
||||
providers = ' [grey53]or[/grey53] '.join(provider.name for provider in binary.binproviders_supported)
|
||||
print(f'[+] Locating / Installing [yellow]{binary.name}[/yellow] using [red]{providers}[/red]...')
|
||||
try:
|
||||
print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'}))
|
||||
if IS_ROOT:
|
||||
with SudoPermission(uid=0):
|
||||
os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"')
|
||||
except Exception as e:
|
||||
print(f'[X] Failed to install {binary.name}: {e}')
|
||||
|
||||
if IS_ROOT:
|
||||
print(f'[yellow]:warning: Retrying {binary.name} installation with [red]sudo[/red]...[/yellow]')
|
||||
with SudoPermission(uid=0):
|
||||
try:
|
||||
print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'}))
|
||||
os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"')
|
||||
except Exception as e:
|
||||
print(f'[red]:cross_mark: Failed to install {binary.name} as root: {e}[/red]')
|
||||
else:
|
||||
print(f'[red]:cross_mark: Failed to install {binary.name} as user {ARCHIVEBOX_USER}: {e}[/red]')
|
||||
|
||||
|
||||
from django.contrib.auth import get_user_model
|
||||
User = get_user_model()
|
||||
|
@ -974,12 +1042,13 @@ def install(out_dir: Path=DATA_DIR) -> None:
|
|||
stderr(' archivebox manage createsuperuser')
|
||||
# run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
|
||||
|
||||
stderr('\n[√] Set up ArchiveBox and its dependencies successfully.', color='green')
|
||||
print('\n[green][√] Set up ArchiveBox and its dependencies successfully.[/green]\n', file=sys.stderr)
|
||||
|
||||
from plugins_pkg.pip.apps import ARCHIVEBOX_BINARY
|
||||
|
||||
run_shell([ARCHIVEBOX_BINARY.load().abspath, 'version'], capture_output=False, cwd=out_dir)
|
||||
|
||||
|
||||
# backwards-compatibility:
|
||||
setup = install
|
||||
|
||||
|
@ -1100,6 +1169,7 @@ def schedule(add: bool=False,
|
|||
|
||||
check_data_folder()
|
||||
from archivebox.plugins_pkg.pip.apps import ARCHIVEBOX_BINARY
|
||||
from archivebox.config.permissions import USER
|
||||
|
||||
Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
|
||||
|
||||
|
@ -1156,7 +1226,7 @@ def schedule(add: bool=False,
|
|||
existing_jobs = list(cron.find_comment(CRON_COMMENT))
|
||||
|
||||
print()
|
||||
print('{green}[√] Scheduled new ArchiveBox cron job for user: {} ({} jobs are active).{reset}'.format(SHELL_CONFIG.USER, len(existing_jobs), **SHELL_CONFIG.ANSI))
|
||||
print('{green}[√] Scheduled new ArchiveBox cron job for user: {} ({} jobs are active).{reset}'.format(USER, len(existing_jobs), **SHELL_CONFIG.ANSI))
|
||||
print('\n'.join(f' > {cmd}' if str(cmd) == str(new_job) else f' {cmd}' for cmd in existing_jobs))
|
||||
if total_runs > 60 and not quiet:
|
||||
stderr()
|
||||
|
@ -1170,7 +1240,7 @@ def schedule(add: bool=False,
|
|||
if existing_jobs:
|
||||
print('\n'.join(str(cmd) for cmd in existing_jobs))
|
||||
else:
|
||||
stderr('{red}[X] There are no ArchiveBox cron jobs scheduled for your user ({}).{reset}'.format(SHELL_CONFIG.USER, **SHELL_CONFIG.ANSI))
|
||||
stderr('{red}[X] There are no ArchiveBox cron jobs scheduled for your user ({}).{reset}'.format(USER, **SHELL_CONFIG.ANSI))
|
||||
stderr(' To schedule a new job, run:')
|
||||
stderr(' archivebox schedule --every=[timeperiod] --depth=1 https://example.com/some/rss/feed.xml')
|
||||
raise SystemExit(0)
|
||||
|
@ -1294,7 +1364,7 @@ def manage(args: Optional[List[str]]=None, out_dir: Path=DATA_DIR) -> None:
|
|||
check_data_folder()
|
||||
from django.core.management import execute_from_command_line
|
||||
|
||||
if (args and "createsuperuser" in args) and (SHELL_CONFIG.IN_DOCKER and not SHELL_CONFIG.IS_TTY):
|
||||
if (args and "createsuperuser" in args) and (IN_DOCKER and not SHELL_CONFIG.IS_TTY):
|
||||
stderr('[!] Warning: you need to pass -it to use interactive commands in docker', color='lightyellow')
|
||||
stderr(' docker run -it archivebox manage {}'.format(' '.join(args or ['...'])), color='lightyellow')
|
||||
stderr('')
|
||||
|
|
|
@ -1,37 +1,44 @@
|
|||
__package__ = 'archivebox.misc'
|
||||
|
||||
from archivebox.config import DATA_DIR, ARCHIVE_DIR, CONSTANTS, SHELL_CONFIG
|
||||
import sys
|
||||
from rich import print
|
||||
|
||||
from .logging import stderr
|
||||
# DO NOT ADD ANY TOP-LEVEL IMPORTS HERE
|
||||
# this file is imported by archivebox/__init__.py
|
||||
# and any imports here will be imported by EVERYTHING else
|
||||
# so this file should only be used for pure python checks
|
||||
# that don't need to import other parts of ArchiveBox
|
||||
|
||||
|
||||
def check_data_folder() -> None:
|
||||
|
||||
from archivebox import DATA_DIR, ARCHIVE_DIR
|
||||
|
||||
archive_dir_exists = ARCHIVE_DIR.exists()
|
||||
if not archive_dir_exists:
|
||||
stderr('[X] No archivebox index found in the current directory.', color='red')
|
||||
stderr(f' {DATA_DIR}', color='lightyellow')
|
||||
stderr()
|
||||
stderr(' {lightred}Hint{reset}: Are you running archivebox in the right folder?'.format(**SHELL_CONFIG.ANSI))
|
||||
stderr(' cd path/to/your/archive/folder')
|
||||
stderr(' archivebox [command]')
|
||||
stderr()
|
||||
stderr(' {lightred}Hint{reset}: To create a new archive collection or import existing data in this folder, run:'.format(**SHELL_CONFIG.ANSI))
|
||||
stderr(' archivebox init')
|
||||
print('[red][X] No archivebox index found in the current directory.[/red]', file=sys.stderr)
|
||||
print(f' {DATA_DIR}', file=sys.stderr)
|
||||
print(file=sys.stderr)
|
||||
print(' [violet]Hint[/violet]: Are you running archivebox in the right folder?', file=sys.stderr)
|
||||
print(' cd path/to/your/archive/folder', file=sys.stderr)
|
||||
print(' archivebox [command]', file=sys.stderr)
|
||||
print(file=sys.stderr)
|
||||
print(' [violet]Hint[/violet]: To create a new archive collection or import existing data in this folder, run:', file=sys.stderr)
|
||||
print(' archivebox init', file=sys.stderr)
|
||||
raise SystemExit(2)
|
||||
|
||||
|
||||
|
||||
|
||||
def check_migrations():
|
||||
from archivebox import DATA_DIR, CONSTANTS
|
||||
from ..index.sql import list_migrations
|
||||
|
||||
pending_migrations = [name for status, name in list_migrations() if not status]
|
||||
|
||||
if pending_migrations:
|
||||
stderr('[X] This collection was created with an older version of ArchiveBox and must be upgraded first.', color='lightyellow')
|
||||
stderr(f' {DATA_DIR}')
|
||||
stderr()
|
||||
stderr(f' To upgrade it to the latest version and apply the {len(pending_migrations)} pending migrations, run:')
|
||||
stderr(' archivebox init')
|
||||
print('[red][X] This collection was created with an older version of ArchiveBox and must be upgraded first.[/red]')
|
||||
print(f' {DATA_DIR}', file=sys.stderr)
|
||||
print(file=sys.stderr)
|
||||
print(f' [violet]Hint:[/violet] To upgrade it to the latest version and apply the {len(pending_migrations)} pending migrations, run:', file=sys.stderr)
|
||||
print(' archivebox init', file=sys.stderr)
|
||||
raise SystemExit(3)
|
||||
|
||||
CONSTANTS.SOURCES_DIR.mkdir(exist_ok=True)
|
||||
|
@ -39,3 +46,39 @@ def check_migrations():
|
|||
# CONSTANTS.CACHE_DIR.mkdir(exist_ok=True)
|
||||
(CONSTANTS.LIB_DIR / 'bin').mkdir(exist_ok=True, parents=True)
|
||||
(CONSTANTS.PERSONAS_DIR / 'Default').mkdir(exist_ok=True, parents=True)
|
||||
|
||||
|
||||
def check_io_encoding():
|
||||
PYTHON_ENCODING = (sys.__stdout__ or sys.stdout or sys.__stderr__ or sys.stderr).encoding.upper().replace('UTF8', 'UTF-8')
|
||||
|
||||
if PYTHON_ENCODING != 'UTF-8':
|
||||
print(f'[red][X] Your system is running python3 scripts with a bad locale setting: {PYTHON_ENCODING} (it should be UTF-8).[/red]', file=sys.stderr)
|
||||
print(' To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)', file=sys.stderr)
|
||||
print(' Or if you\'re using ubuntu/debian, run "dpkg-reconfigure locales"', file=sys.stderr)
|
||||
print('')
|
||||
print(' Confirm that it\'s fixed by opening a new shell and running:', file=sys.stderr)
|
||||
print(' python3 -c "import sys; print(sys.stdout.encoding)" # should output UTF-8', file=sys.stderr)
|
||||
raise SystemExit(2)
|
||||
|
||||
|
||||
def check_not_root():
|
||||
from archivebox.config.permissions import IS_ROOT, IN_DOCKER
|
||||
|
||||
attempted_command = ' '.join(sys.argv[1:]) if len(sys.argv) > 1 else ''
|
||||
is_getting_help = '-h' in sys.argv or '--help' in sys.argv or 'help' in sys.argv[:2]
|
||||
is_getting_version = '--version' in sys.argv or 'version' in sys.argv[:2]
|
||||
is_installing = 'setup' in sys.argv[:2] or 'install' in sys.argv[:2]
|
||||
|
||||
if IS_ROOT and not (is_getting_help or is_getting_version or is_installing):
|
||||
print('[red][!] ArchiveBox should never be run as root![/red]', file=sys.stderr)
|
||||
print(' For more information, see the security overview documentation:', file=sys.stderr)
|
||||
print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root', file=sys.stderr)
|
||||
|
||||
if IN_DOCKER:
|
||||
print('[red][!] When using Docker, you must run commands with [green]docker run[/green] instead of [yellow3]docker exec[/yellow3], e.g.:', file=sys.stderr)
|
||||
print(' docker compose run archivebox {attempted_command}', file=sys.stderr)
|
||||
print(f' docker run -it -v $PWD/data:/data archivebox/archivebox {attempted_command}', file=sys.stderr)
|
||||
print(' or:', file=sys.stderr)
|
||||
print(f' docker compose exec --user=archivebox archivebox /bin/bash -c "archivebox {attempted_command}"', file=sys.stderr)
|
||||
print(f' docker exec -it --user=archivebox <container id> /bin/bash -c "archivebox {attempted_command}"', file=sys.stderr)
|
||||
raise SystemExit(2)
|
||||
|
|
|
@ -13,6 +13,7 @@ from rich.highlighter import Highlighter
|
|||
|
||||
# SETUP RICH CONSOLE / TTY detection / COLOR / PROGRESS BARS
|
||||
CONSOLE = Console()
|
||||
STDERR = Console(stderr=True)
|
||||
IS_TTY = CONSOLE.is_interactive
|
||||
|
||||
|
||||
|
@ -51,7 +52,7 @@ COLOR_DICT = defaultdict(lambda: [(0, 0, 0), (0, 0, 0)], {
|
|||
'37': [(255, 255, 255), (255, 255, 255)],
|
||||
})
|
||||
|
||||
# Logging Helpers
|
||||
# Logging Helpers (DEPRECATED, use rich.print instead going forward)
|
||||
def stdout(*args, color: Optional[str]=None, prefix: str='', config: Optional[benedict]=None) -> None:
|
||||
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
||||
|
||||
|
|
|
@ -4,7 +4,6 @@ __package__ = 'archivebox.misc'
|
|||
import os
|
||||
import signal
|
||||
import shutil
|
||||
import getpass
|
||||
|
||||
from json import dump
|
||||
from pathlib import Path
|
||||
|
@ -14,7 +13,7 @@ from subprocess import _mswindows, PIPE, Popen, CalledProcessError, CompletedPro
|
|||
from crontab import CronTab
|
||||
from atomicwrites import atomic_write as lib_atomic_write
|
||||
|
||||
from archivebox.config import STORAGE_CONFIG
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
from archivebox.misc.util import enforce_types, ExtendedEncoder
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
__package__ = 'archivebox'
|
||||
__package__ = 'archivebox.misc'
|
||||
|
||||
import re
|
||||
import requests
|
||||
|
@ -25,10 +25,10 @@ except ImportError:
|
|||
detect_encoding = lambda rawdata: "utf-8"
|
||||
|
||||
|
||||
from archivebox.config.constants import STATICFILE_EXTENSIONS
|
||||
from archivebox.config import ARCHIVING_CONFIG
|
||||
from archivebox.config import CONSTANTS
|
||||
from archivebox.config.common import ARCHIVING_CONFIG
|
||||
|
||||
from .misc.logging import COLOR_DICT
|
||||
from .logging import COLOR_DICT
|
||||
|
||||
|
||||
### Parsing Helpers
|
||||
|
@ -120,7 +120,7 @@ def find_all_urls(urls_str: str):
|
|||
|
||||
def is_static_file(url: str):
|
||||
# TODO: the proper way is with MIME type detection + ext, not only extension
|
||||
return extension(url).lower() in STATICFILE_EXTENSIONS
|
||||
return extension(url).lower() in CONSTANTS.STATICFILE_EXTENSIONS
|
||||
|
||||
|
||||
def enforce_types(func):
|
||||
|
|
|
@ -13,7 +13,8 @@ from typing import IO, Tuple, List, Optional
|
|||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from archivebox.config import DATA_DIR, CONSTANTS, SHELL_CONFIG, ARCHIVING_CONFIG
|
||||
from archivebox.config import DATA_DIR, CONSTANTS
|
||||
from archivebox.config.common import SHELL_CONFIG, ARCHIVING_CONFIG
|
||||
from archivebox.misc.system import atomic_write
|
||||
from archivebox.misc.logging import stderr, hint
|
||||
from archivebox.misc.util import (
|
||||
|
|
|
@ -25,7 +25,8 @@ from abx.archivebox.base_binary import BaseBinary, env
|
|||
from abx.archivebox.base_hook import BaseHook
|
||||
|
||||
# Depends on Other Plugins:
|
||||
from archivebox.config import CONSTANTS, ARCHIVING_CONFIG, SHELL_CONFIG
|
||||
from archivebox.config import CONSTANTS
|
||||
from archivebox.config.common import ARCHIVING_CONFIG, SHELL_CONFIG
|
||||
from plugins_pkg.puppeteer.apps import PUPPETEER_BINPROVIDER
|
||||
from plugins_pkg.playwright.apps import PLAYWRIGHT_BINPROVIDER
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ from abx.archivebox.base_configset import BaseConfigSet
|
|||
from abx.archivebox.base_binary import BaseBinary, env, apt, brew
|
||||
# from abx.archivebox.base_extractor import BaseExtractor, ExtractorName
|
||||
|
||||
from archivebox.config import ARCHIVING_CONFIG
|
||||
from archivebox.config.common import ARCHIVING_CONFIG
|
||||
from archivebox.plugins_extractor.favicon.apps import FAVICON_CONFIG
|
||||
from archivebox.plugins_extractor.archivedotorg.apps import ARCHIVEDOTORG_CONFIG
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ from abx.archivebox.base_configset import BaseConfigSet
|
|||
from abx.archivebox.base_binary import BaseBinary, env, apt, brew
|
||||
from abx.archivebox.base_extractor import BaseExtractor, ExtractorName
|
||||
|
||||
from archivebox.config import ARCHIVING_CONFIG
|
||||
from archivebox.config.common import ARCHIVING_CONFIG
|
||||
|
||||
|
||||
class GitConfig(BaseConfigSet):
|
||||
|
|
|
@ -5,14 +5,14 @@ from pathlib import Path
|
|||
from subprocess import run
|
||||
|
||||
from pydantic import InstanceOf, Field
|
||||
from pydantic_pkgr import BinProvider, BinName, bin_abspath
|
||||
from pydantic_pkgr import BinProvider, BinName, BinProviderName, ProviderLookupDict, bin_abspath
|
||||
|
||||
from abx.archivebox.base_plugin import BasePlugin, BaseHook
|
||||
from abx.archivebox.base_configset import BaseConfigSet
|
||||
from abx.archivebox.base_binary import BaseBinary, BinProviderName,ProviderLookupDict, env
|
||||
from abx.archivebox.base_binary import BaseBinary, env
|
||||
from abx.archivebox.base_extractor import BaseExtractor, ExtractorName
|
||||
|
||||
from archivebox.config import ARCHIVING_CONFIG, STORAGE_CONFIG
|
||||
from archivebox.config.common import ARCHIVING_CONFIG, STORAGE_CONFIG
|
||||
from archivebox.plugins_pkg.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
|
||||
|
||||
class MercuryConfig(BaseConfigSet):
|
||||
|
|
|
@ -16,7 +16,7 @@ from abx.archivebox.base_extractor import BaseExtractor
|
|||
from abx.archivebox.base_hook import BaseHook
|
||||
|
||||
# Depends on Other Plugins:
|
||||
from archivebox.config import ARCHIVING_CONFIG
|
||||
from archivebox.config.common import ARCHIVING_CONFIG
|
||||
from plugins_pkg.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
|
||||
|
||||
###################### Config ##########################
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
__package__ = 'archivebox.plugins_extractor.singlefile'
|
||||
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Optional, ClassVar
|
||||
from typing import List, Dict, Optional
|
||||
# from typing_extensions import Self
|
||||
|
||||
# Depends on other PyPI/vendor packages:
|
||||
from pydantic import InstanceOf, Field, validate_call
|
||||
from pydantic import InstanceOf, Field
|
||||
from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName, bin_abspath, ShallowBinary
|
||||
|
||||
# Depends on other Django apps:
|
||||
|
@ -17,7 +17,7 @@ from abx.archivebox.base_queue import BaseQueue
|
|||
from abx.archivebox.base_hook import BaseHook
|
||||
|
||||
# Depends on Other Plugins:
|
||||
from archivebox.config import ARCHIVING_CONFIG
|
||||
from archivebox.config.common import ARCHIVING_CONFIG
|
||||
from plugins_pkg.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
|
||||
|
||||
###################### Config ##########################
|
||||
|
|
|
@ -14,7 +14,7 @@ from abx.archivebox.base_configset import BaseConfigSet
|
|||
from abx.archivebox.base_binary import BaseBinary, env, apt, brew
|
||||
from abx.archivebox.base_extractor import BaseExtractor, ExtractorName
|
||||
|
||||
from archivebox.config import ARCHIVING_CONFIG, STORAGE_CONFIG
|
||||
from archivebox.config.common import ARCHIVING_CONFIG, STORAGE_CONFIG
|
||||
from .wget_util import wget_output_path
|
||||
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ from abx.archivebox.base_configset import BaseConfigSet
|
|||
from abx.archivebox.base_binary import BaseBinary, env, apt, brew
|
||||
from abx.archivebox.base_hook import BaseHook
|
||||
|
||||
from archivebox.config import ARCHIVING_CONFIG
|
||||
from archivebox.config.common import ARCHIVING_CONFIG
|
||||
from plugins_pkg.pip.apps import pip
|
||||
|
||||
###################### Config ##########################
|
||||
|
|
|
@ -18,7 +18,8 @@ from abx.archivebox.base_hook import BaseHook
|
|||
from abx.archivebox.base_searchbackend import BaseSearchBackend
|
||||
|
||||
# Depends on Other Plugins:
|
||||
from archivebox.config import CONSTANTS, SEARCH_BACKEND_CONFIG
|
||||
from archivebox.config import CONSTANTS
|
||||
from archivebox.config.common import SEARCH_BACKEND_CONFIG
|
||||
|
||||
###################### Config ##########################
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ from abx.archivebox.base_hook import BaseHook
|
|||
from abx.archivebox.base_searchbackend import BaseSearchBackend
|
||||
|
||||
# Depends on Other Plugins:
|
||||
from archivebox.config import SEARCH_BACKEND_CONFIG
|
||||
from archivebox.config.common import SEARCH_BACKEND_CONFIG
|
||||
|
||||
SONIC_LIB = None
|
||||
try:
|
||||
|
|
|
@ -17,7 +17,7 @@ from abx.archivebox.base_hook import BaseHook
|
|||
from abx.archivebox.base_searchbackend import BaseSearchBackend
|
||||
|
||||
# Depends on Other Plugins:
|
||||
from archivebox.config import SEARCH_BACKEND_CONFIG
|
||||
from archivebox.config.common import SEARCH_BACKEND_CONFIG
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
__package__ = 'archivebox.queues'
|
||||
|
||||
import os
|
||||
import time
|
||||
import signal
|
||||
import psutil
|
||||
|
@ -12,6 +13,8 @@ from typing import Dict, cast
|
|||
from supervisor.xmlrpc import SupervisorTransport
|
||||
from xmlrpc.client import ServerProxy
|
||||
|
||||
from archivebox.config.permissions import ARCHIVEBOX_USER
|
||||
|
||||
from .settings import SUPERVISORD_CONFIG_FILE, DATA_DIR, PID_FILE, SOCK_FILE, LOG_FILE, WORKERS_DIR, TMP_DIR, LOGS_DIR
|
||||
|
||||
from typing import Iterator
|
||||
|
@ -42,6 +45,7 @@ childlogdir = {LOGS_DIR}
|
|||
directory = {DATA_DIR}
|
||||
strip_ansi = true
|
||||
nocleanup = true
|
||||
user = {ARCHIVEBOX_USER}
|
||||
|
||||
[unix_http_server]
|
||||
file = {TMP_DIR}/{SOCK_FILE.name}
|
||||
|
|
|
@ -11,7 +11,7 @@ import abx.archivebox.use
|
|||
from archivebox.index.schema import Link
|
||||
from archivebox.misc.util import enforce_types
|
||||
from archivebox.misc.logging import stderr
|
||||
from archivebox.config import SEARCH_BACKEND_CONFIG
|
||||
from archivebox.config.common import SEARCH_BACKEND_CONFIG
|
||||
|
||||
|
||||
def log_index_started(url):
|
||||
|
|
|
@ -110,12 +110,11 @@ if [[ -d "$PLAYWRIGHT_BROWSERS_PATH/.links" ]]; then
|
|||
chown -h $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"/.links/*
|
||||
fi
|
||||
|
||||
# also chown tmp dir
|
||||
mkdir -p /tmp/archivebox
|
||||
chmod 777 /tmp
|
||||
chown $PUID:$PGID /tmp/archivebox
|
||||
mkdir -p /app/lib
|
||||
chown $PUID:$PGID /app/lib /app/lib/*
|
||||
# also chown tmp dir and lib dir
|
||||
mkdir -p "$SYSTEM_TMP_DIR"
|
||||
chown $PUID:$PGID "$SYSTEM_TMP_DIR"
|
||||
mkdir -p "$SYSTEM_LIB_DIR"
|
||||
chown $PUID:$PGID "$SYSTEM_LIB_DIR" "$SYSTEM_LIB_DIR"/*
|
||||
|
||||
# (this check is written in blood in 2023, QEMU silently breaks things in ways that are not obvious)
|
||||
export IN_QEMU="$(pmap 1 | grep qemu >/dev/null && echo 'True' || echo 'False')"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[project]
|
||||
name = "archivebox"
|
||||
version = "0.8.5rc2"
|
||||
version = "0.8.5rc3"
|
||||
requires-python = ">=3.10"
|
||||
description = "Self-hosted internet archiving solution."
|
||||
authors = [{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"}]
|
||||
|
@ -77,6 +77,7 @@ dependencies = [
|
|||
"atomicwrites==1.4.1",
|
||||
"django-taggit==1.3.0",
|
||||
"base32-crockford==0.3.0",
|
||||
"platformdirs>=4.3.6",
|
||||
# "pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7",
|
||||
# "pydantic-pkgr>=0.4.7",
|
||||
############# Plugin Dependencies ################
|
||||
|
@ -133,7 +134,6 @@ dev-dependencies = [
|
|||
"django-autotyping>=0.5.1",
|
||||
]
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["pdm-backend"]
|
||||
build-backend = "pdm.backend"
|
||||
|
|
|
@ -7,11 +7,11 @@ from pathlib import Path
|
|||
import json, shutil
|
||||
import sqlite3
|
||||
|
||||
from archivebox.config import OUTPUT_PERMISSIONS
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
|
||||
from .fixtures import *
|
||||
|
||||
DIR_PERMISSIONS = OUTPUT_PERMISSIONS.replace('6', '7').replace('4', '5')
|
||||
DIR_PERMISSIONS = STORAGE_CONFIG.OUTPUT_PERMISSIONS.replace('6', '7').replace('4', '5')
|
||||
|
||||
def test_init(tmp_path, process):
|
||||
assert "Initializing a new ArchiveBox" in process.stdout.decode("utf-8")
|
||||
|
@ -57,7 +57,7 @@ def test_correct_permissions_output_folder(tmp_path, process):
|
|||
index_files = ['index.sqlite3', 'archive']
|
||||
for file in index_files:
|
||||
file_path = tmp_path / file
|
||||
assert oct(file_path.stat().st_mode)[-3:] in (OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
|
||||
assert oct(file_path.stat().st_mode)[-3:] in (STORAGE_CONFIG.OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
|
||||
|
||||
def test_correct_permissions_add_command_results(tmp_path, process, disable_extractors_dict):
|
||||
os.chdir(tmp_path)
|
||||
|
@ -65,7 +65,7 @@ def test_correct_permissions_add_command_results(tmp_path, process, disable_extr
|
|||
env=disable_extractors_dict)
|
||||
archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
|
||||
for path in archived_item_path.iterdir():
|
||||
assert oct(path.stat().st_mode)[-3:] in (OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
|
||||
assert oct(path.stat().st_mode)[-3:] in (STORAGE_CONFIG.OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
|
||||
|
||||
def test_collision_urls_different_timestamps(tmp_path, process, disable_extractors_dict):
|
||||
os.chdir(tmp_path)
|
||||
|
|
2
uv.lock
2
uv.lock
|
@ -41,7 +41,7 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "archivebox"
|
||||
version = "0.8.5rc2"
|
||||
version = "0.8.5rc3"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "atomicwrites" },
|
||||
|
|
Loading…
Reference in a new issue