move config into dedicated global app

This commit is contained in:
Nick Sweeting 2024-09-30 15:59:05 -07:00
parent ee7f73bd7b
commit 3e5b6ddeae
No known key found for this signature in database
79 changed files with 494 additions and 525 deletions

View file

@ -1,52 +1,33 @@
__package__ = 'archivebox'
# print('INSTALLING MONKEY PATCHES')
from .monkey_patches import * # noqa
# print('DONE INSTALLING MONKEY PATCHES')
import os
import sys
import importlib.metadata
from pathlib import Path
PACKAGE_DIR = Path(__file__).resolve().parent # archivebox source code dir
DATA_DIR = Path(os.curdir).resolve() # archivebox user data dir
ARCHIVE_DIR = DATA_DIR / 'archive'
PACKAGE_DIR = Path(__file__).resolve().parent # archivebox source code dir
DATA_DIR = Path(os.curdir).resolve() # archivebox user data dir
ARCHIVE_DIR = DATA_DIR / 'archive' # archivebox snapshot data dir
# make sure PACKAGE_DIR is in sys.path so we can import all subfolders
# without necessarily waiting for django to load them thorugh INSTALLED_APPS
if str(PACKAGE_DIR) not in sys.path:
sys.path.append(str(PACKAGE_DIR))
# load fallback libraries from vendor dir
from .vendor import load_vendored_libs
load_vendored_libs()
from .config.constants import CONSTANTS, VERSION, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa
os.environ['OUTPUT_DIR'] = str(DATA_DIR)
os.environ['DJANGO_SETTINGS_MODULE'] = 'core.settings'
# print('INSTALLING MONKEY PATCHES')
from .monkey_patches import * # noqa
# print('DONE INSTALLING MONKEY PATCHES')
def _detect_installed_version():
try:
return importlib.metadata.version(__package__ or 'archivebox')
except importlib.metadata.PackageNotFoundError:
try:
pyproject_config = (PACKAGE_DIR / 'pyproject.toml').read_text()
for line in pyproject_config:
if line.startswith('version = '):
return line.split(' = ', 1)[-1].strip('"')
except FileNotFoundError:
# building docs, pyproject.toml is not available
return 'dev'
raise Exception('Failed to detect installed archivebox version!')
VERSION = _detect_installed_version()
# print('LOADING VENDOR LIBRARIES')
from .vendor import load_vendored_libs # noqa
load_vendored_libs()
# print('DONE LOADING VENDOR LIBRARIES')
__version__ = VERSION
from .constants import CONSTANTS
__author__ = 'Nick Sweeting'
__license__ = 'MIT'

View file

@ -15,7 +15,8 @@ from pydantic_pkgr import (
)
import abx
import archivebox
from archivebox.config import CONSTANTS
from .base_hook import BaseHook, HookType
@ -54,7 +55,7 @@ class BaseBinary(BaseHook, Binary):
@staticmethod
def symlink_to_lib(binary, bin_dir=None) -> None:
bin_dir = bin_dir or archivebox.CONSTANTS.LIB_BIN_DIR
bin_dir = bin_dir or CONSTANTS.LIB_BIN_DIR
if not (binary.abspath and binary.abspath.exists()):
return
@ -68,19 +69,19 @@ class BaseBinary(BaseHook, Binary):
@validate_call
def load(self, **kwargs) -> Self:
binary = super().load(**kwargs)
self.symlink_to_lib(binary=binary, bin_dir=archivebox.CONSTANTS.LIB_BIN_DIR)
self.symlink_to_lib(binary=binary, bin_dir=CONSTANTS.LIB_BIN_DIR)
return binary
@validate_call
def install(self, **kwargs) -> Self:
binary = super().install(**kwargs)
self.symlink_to_lib(binary=binary, bin_dir=archivebox.CONSTANTS.LIB_BIN_DIR)
self.symlink_to_lib(binary=binary, bin_dir=CONSTANTS.LIB_BIN_DIR)
return binary
@validate_call
def load_or_install(self, **kwargs) -> Self:
binary = super().load_or_install(**kwargs)
self.symlink_to_lib(binary=binary, bin_dir=archivebox.CONSTANTS.LIB_BIN_DIR)
self.symlink_to_lib(binary=binary, bin_dir=CONSTANTS.LIB_BIN_DIR)
return binary
@property

View file

@ -127,7 +127,7 @@ class BasePlugin(BaseModel):
@abx.hookimpl
def register(self, settings):
from archivebox.config import bump_startup_progress_bar
from archivebox.config.legacy import bump_startup_progress_bar
self._is_registered = True
bump_startup_progress_bar()
@ -139,7 +139,7 @@ class BasePlugin(BaseModel):
def ready(self, settings=None):
"""Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
from archivebox.config import bump_startup_progress_bar
from archivebox.config.legacy import bump_startup_progress_bar
assert self._is_registered, f"Tried to run {self.plugin_module}.ready() but it was never registered!"
self._is_ready = True

View file

@ -1,7 +1,7 @@
__package__ = 'abx.django'
import itertools
from benedict import benedict
# from benedict import benedict
from .. import pm

View file

@ -12,8 +12,7 @@ from ninja import NinjaAPI, Swagger
# TODO: explore adding https://eadwincode.github.io/django-ninja-extra/
import archivebox
from plugins_sys.config.apps import SHELL_CONFIG
from archivebox.config import SHELL_CONFIG, VERSION
from api.auth import API_AUTH_METHODS
@ -32,7 +31,7 @@ html_description=f'''
<li>📚 ArchiveBox Documentation: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki">Github Wiki</a></li>
<li>📜 See the API source code: <a href="https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/api"><code>archivebox/api/</code></a></li>
</ul>
<small>Served by ArchiveBox v{archivebox.VERSION} (<a href="https://github.com/ArchiveBox/ArchiveBox/commit/{COMMIT_HASH}"><code>{COMMIT_HASH[:8]}</code></a>), API powered by <a href="https://django-ninja.dev/"><code>django-ninja</code></a>.</small>
<small>Served by ArchiveBox v{VERSION} (<a href="https://github.com/ArchiveBox/ArchiveBox/commit/{COMMIT_HASH}"><code>{COMMIT_HASH[:8]}</code></a>), API powered by <a href="https://django-ninja.dev/"><code>django-ninja</code></a>.</small>
'''

View file

@ -13,7 +13,7 @@ from ..main import (
schedule,
)
from ..util import ansi_to_html
from ..config import ONLY_NEW
from ..config.legacy import ONLY_NEW
from .auth import API_AUTH_METHODS

View file

@ -4,7 +4,6 @@ __command__ = 'archivebox'
import sys
import argparse
import threading
import archivebox
from time import sleep
from collections.abc import Mapping
@ -12,6 +11,7 @@ from collections.abc import Mapping
from typing import Optional, List, IO, Union, Iterable
from pathlib import Path
from archivebox.config import DATA_DIR
from ..misc.checks import check_data_folder, check_migrations
from ..misc.logging import stderr
@ -149,7 +149,7 @@ def run_subcommand(subcommand: str,
subcommand_args = subcommand_args or []
if subcommand not in meta_cmds:
from ..config import setup_django, CONFIG
from ..config.legacy import setup_django, CONFIG
cmd_requires_db = subcommand in archive_cmds
init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
@ -234,12 +234,12 @@ def main(args: List[str] | Omitted=OMITTED, stdin: IO | Omitted=OMITTED, pwd: st
subcommand=command.subcommand,
subcommand_args=command.subcommand_args,
stdin=stdin or None,
pwd=pwd or archivebox.DATA_DIR,
pwd=pwd or DATA_DIR,
)
run_subcommand(
subcommand=command.subcommand,
subcommand_args=command.subcommand_args,
stdin=stdin or None,
pwd=pwd or archivebox.DATA_DIR,
pwd=pwd or DATA_DIR,
)

View file

@ -11,7 +11,7 @@ from typing import List, Optional, IO
from ..main import add
from ..util import docstring
from ..parsers import PARSERS
from ..config import OUTPUT_DIR, ONLY_NEW
from ..config.legacy import OUTPUT_DIR, ONLY_NEW
from ..logging_util import SmartFormatter, accept_stdin, stderr

View file

@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import config
from ..util import docstring
from ..config import OUTPUT_DIR
from ..config.legacy import OUTPUT_DIR
from ..logging_util import SmartFormatter, accept_stdin

View file

@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import help
from ..util import docstring
from ..config import OUTPUT_DIR
from ..config.legacy import OUTPUT_DIR
from ..logging_util import SmartFormatter, reject_stdin

View file

@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import init
from ..util import docstring
from ..config import OUTPUT_DIR
from ..config.legacy import OUTPUT_DIR
from ..logging_util import SmartFormatter, reject_stdin

View file

@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import list_all
from ..util import docstring
from ..config import OUTPUT_DIR
from ..config.legacy import OUTPUT_DIR
from ..index import (
LINK_FILTERS,
get_indexed_folders,

View file

@ -9,7 +9,7 @@ from typing import Optional, List, IO
from ..main import manage
from ..util import docstring
from ..config import OUTPUT_DIR
from ..config.legacy import OUTPUT_DIR
@docstring(manage.__doc__)

View file

@ -11,7 +11,7 @@ from typing import List, Optional, IO
from ..main import oneshot
from ..util import docstring
from ..config import OUTPUT_DIR
from ..config.legacy import OUTPUT_DIR
from ..logging_util import SmartFormatter, accept_stdin, stderr

View file

@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import remove
from ..util import docstring
from ..config import OUTPUT_DIR
from ..config.legacy import OUTPUT_DIR
from ..logging_util import SmartFormatter, accept_stdin

View file

@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import schedule
from ..util import docstring
from ..config import OUTPUT_DIR
from ..config.legacy import OUTPUT_DIR
from ..logging_util import SmartFormatter, reject_stdin

View file

@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import server
from ..util import docstring
from ..config import OUTPUT_DIR, BIND_ADDR
from ..config.legacy import OUTPUT_DIR, BIND_ADDR
from ..logging_util import SmartFormatter, reject_stdin
@docstring(server.__doc__)

View file

@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import setup
from ..util import docstring
from ..config import OUTPUT_DIR
from ..config.legacy import OUTPUT_DIR
from ..logging_util import SmartFormatter, reject_stdin

View file

@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import shell
from ..util import docstring
from ..config import OUTPUT_DIR
from ..config.legacy import OUTPUT_DIR
from ..logging_util import SmartFormatter, reject_stdin

View file

@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import status
from ..util import docstring
from ..config import OUTPUT_DIR
from ..config.legacy import OUTPUT_DIR
from ..logging_util import SmartFormatter, reject_stdin

View file

@ -10,7 +10,7 @@ from typing import List, Optional, IO
from ..main import update
from ..util import docstring
from ..config import OUTPUT_DIR
from ..config.legacy import OUTPUT_DIR
from ..index import (
LINK_FILTERS,
get_indexed_folders,

View file

@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import version
from ..util import docstring
from ..config import OUTPUT_DIR
from ..config.legacy import OUTPUT_DIR
from ..logging_util import SmartFormatter, reject_stdin

View file

@ -32,7 +32,7 @@ os.environ.update(TEST_CONFIG)
from ..main import init
from ..index import load_main_index
from ..config import (
from ..config.legacy import (
SQL_INDEX_FILENAME,
JSON_INDEX_FILENAME,
HTML_INDEX_FILENAME,

View file

@ -0,0 +1,26 @@
__package__ = 'archivebox.config'
from .constants import CONSTANTS, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR, VERSION
from .defaults import (
SHELL_CONFIG,
STORAGE_CONFIG,
GENERAL_CONFIG,
SERVER_CONFIG,
ARCHIVING_CONFIG,
SEARCH_BACKEND_CONFIG,
)
__all__ = [
'CONSTANTS',
'PACKAGE_DIR',
'DATA_DIR',
'ARCHIVE_DIR',
'VERSION',
'SHELL_CONFIG',
'STORAGE_CONFIG',
'GENERAL_CONFIG',
'SERVER_CONFIG',
'ARCHIVING_CONFIG',
'SEARCH_BACKEND_CONFIG',
]

58
archivebox/config/apps.py Normal file
View file

@ -0,0 +1,58 @@
__package__ = 'archivebox.config'
from typing import List
from pydantic import InstanceOf
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_hook import BaseHook
from .constants import CONSTANTS, CONSTANTS_CONFIG, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa
from .defaults import (
ShellConfig, # noqa: F401
StorageConfig, # noqa: F401
GeneralConfig, # noqa: F401
ServerConfig, # noqa: F401
ArchivingConfig, # noqa: F401
SearchBackendConfig, # noqa: F401
SHELL_CONFIG,
STORAGE_CONFIG,
GENERAL_CONFIG,
SERVER_CONFIG,
ARCHIVING_CONFIG,
SEARCH_BACKEND_CONFIG,
)
###################### Config ##########################
class ConfigPlugin(BasePlugin):
app_label: str = 'CONFIG'
verbose_name: str = 'Configuration'
hooks: List[InstanceOf[BaseHook]] = [
SHELL_CONFIG,
GENERAL_CONFIG,
STORAGE_CONFIG,
SERVER_CONFIG,
ARCHIVING_CONFIG,
SEARCH_BACKEND_CONFIG,
]
PLUGIN = ConfigPlugin()
DJANGO_APP = PLUGIN.AppConfig
# # register django apps
# @abx.hookimpl
# def get_INSTALLED_APPS():
# return [DJANGO_APP.name]
# # register configs
# @abx.hookimpl
# def register_CONFIG():
# return PLUGIN.HOOKS_BY_TYPE['CONFIG'].values()

View file

@ -1,27 +1,46 @@
__package__ = 'archivebox'
__package__ = 'archivebox.config'
import os
import re
from typing import Dict
from pathlib import Path
import importlib.metadata
from benedict import benedict
import archivebox
from .misc.logging import DEFAULT_CLI_COLORS
from ..misc.logging import DEFAULT_CLI_COLORS
###################### Config ##########################
VERSION = archivebox.VERSION
PACKAGE_DIR = archivebox.PACKAGE_DIR
DATA_DIR = archivebox.DATA_DIR
ARCHIVE_DIR = archivebox.ARCHIVE_DIR
PACKAGE_DIR = Path(__file__).resolve().parent.parent # archivebox source code dir
DATA_DIR = Path(os.curdir).resolve() # archivebox user data dir
ARCHIVE_DIR = DATA_DIR / 'archive' # archivebox snapshot data dir
PACKAGE_DIR_NAME: str = archivebox.PACKAGE_DIR.name
def _detect_installed_version():
"""Autodetect the installed archivebox version by using pip package metadata or pyproject.toml file"""
try:
return importlib.metadata.version(__package__ or 'archivebox')
except importlib.metadata.PackageNotFoundError:
try:
pyproject_config = (PACKAGE_DIR / 'pyproject.toml').read_text()
for line in pyproject_config:
if line.startswith('version = '):
return line.split(' = ', 1)[-1].strip('"')
except FileNotFoundError:
# building docs, pyproject.toml is not available
return 'dev'
raise Exception('Failed to detect installed archivebox version!')
VERSION = _detect_installed_version()
__version__ = VERSION
PACKAGE_DIR_NAME: str = PACKAGE_DIR.name
TEMPLATES_DIR_NAME: str = 'templates'
TEMPLATES_DIR: Path = archivebox.PACKAGE_DIR / TEMPLATES_DIR_NAME
TEMPLATES_DIR: Path = PACKAGE_DIR / TEMPLATES_DIR_NAME
STATIC_DIR: Path = TEMPLATES_DIR / 'static'
USER_PLUGINS_DIR_NAME: str = 'user_plugins'
CUSTOM_TEMPLATES_DIR_NAME: str = 'user_templates'
@ -35,16 +54,16 @@ LOGS_DIR_NAME: str = 'logs'
LIB_DIR_NAME: str = 'lib'
TMP_DIR_NAME: str = 'tmp'
OUTPUT_DIR: Path = archivebox.DATA_DIR
ARCHIVE_DIR: Path = archivebox.DATA_DIR / ARCHIVE_DIR_NAME
SOURCES_DIR: Path = archivebox.DATA_DIR / SOURCES_DIR_NAME
PERSONAS_DIR: Path = archivebox.DATA_DIR / PERSONAS_DIR_NAME
CACHE_DIR: Path = archivebox.DATA_DIR / CACHE_DIR_NAME
LOGS_DIR: Path = archivebox.DATA_DIR / LOGS_DIR_NAME
LIB_DIR: Path = archivebox.DATA_DIR / LIB_DIR_NAME
TMP_DIR: Path = archivebox.DATA_DIR / TMP_DIR_NAME
CUSTOM_TEMPLATES_DIR: Path = archivebox.DATA_DIR / CUSTOM_TEMPLATES_DIR_NAME
USER_PLUGINS_DIR: Path = archivebox.DATA_DIR / USER_PLUGINS_DIR_NAME
OUTPUT_DIR: Path = DATA_DIR
ARCHIVE_DIR: Path = DATA_DIR / ARCHIVE_DIR_NAME
SOURCES_DIR: Path = DATA_DIR / SOURCES_DIR_NAME
PERSONAS_DIR: Path = DATA_DIR / PERSONAS_DIR_NAME
CACHE_DIR: Path = DATA_DIR / CACHE_DIR_NAME
LOGS_DIR: Path = DATA_DIR / LOGS_DIR_NAME
LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME
TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME
CUSTOM_TEMPLATES_DIR: Path = DATA_DIR / CUSTOM_TEMPLATES_DIR_NAME
USER_PLUGINS_DIR: Path = DATA_DIR / USER_PLUGINS_DIR_NAME
LIB_PIP_DIR: Path = LIB_DIR / 'pip'
LIB_NPM_DIR: Path = LIB_DIR / 'npm'
@ -55,9 +74,9 @@ BIN_DIR: Path = LIB_BIN_DIR
CONFIG_FILENAME: str = 'ArchiveBox.conf'
SQL_INDEX_FILENAME: str = 'index.sqlite3'
CONFIG_FILE: Path = archivebox.DATA_DIR / CONFIG_FILENAME
DATABASE_FILE: Path = archivebox.DATA_DIR / SQL_INDEX_FILENAME
QUEUE_DATABASE_FILE: Path = archivebox.DATA_DIR / SQL_INDEX_FILENAME.replace('index.', 'queue.')
CONFIG_FILE: Path = DATA_DIR / CONFIG_FILENAME
DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME
QUEUE_DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME.replace('index.', 'queue.')
JSON_INDEX_FILENAME: str = 'index.json'
HTML_INDEX_FILENAME: str = 'index.html'
@ -125,7 +144,7 @@ DATA_DIR_NAMES: frozenset[str] = frozenset((
CUSTOM_TEMPLATES_DIR_NAME,
USER_PLUGINS_DIR_NAME,
))
DATA_DIRS: frozenset[Path] = frozenset(archivebox.DATA_DIR / dirname for dirname in DATA_DIR_NAMES)
DATA_DIRS: frozenset[Path] = frozenset(DATA_DIR / dirname for dirname in DATA_DIR_NAMES)
DATA_FILE_NAMES: frozenset[str] = frozenset((
CONFIG_FILENAME,
SQL_INDEX_FILENAME,
@ -160,9 +179,9 @@ ALLOWED_IN_OUTPUT_DIR: frozenset[str] = frozenset((
CODE_LOCATIONS = benedict({
'PACKAGE_DIR': {
'path': (archivebox.PACKAGE_DIR).resolve(),
'path': (PACKAGE_DIR).resolve(),
'enabled': True,
'is_valid': (archivebox.PACKAGE_DIR / '__main__.py').exists(),
'is_valid': (PACKAGE_DIR / '__main__.py').exists(),
},
'LIB_DIR': {
'path': LIB_DIR.resolve(),
@ -188,10 +207,10 @@ CODE_LOCATIONS = benedict({
DATA_LOCATIONS = benedict({
"OUTPUT_DIR": {
"path": archivebox.DATA_DIR.resolve(),
"path": DATA_DIR.resolve(),
"enabled": True,
"is_valid": DATABASE_FILE.exists(),
"is_mount": os.path.ismount(archivebox.DATA_DIR.resolve()),
"is_mount": os.path.ismount(DATA_DIR.resolve()),
},
"CONFIG_FILE": {
"path": CONFIG_FILE.resolve(),

View file

@ -1,24 +1,21 @@
__package__ = 'plugins_sys.config'
__package__ = 'archivebox.config'
import os
import sys
import shutil
from typing import List, ClassVar, Dict, Optional
from typing import ClassVar, Dict, Optional
from datetime import datetime
from pathlib import Path
from rich import print
from pydantic import InstanceOf, Field, field_validator, model_validator, computed_field
from pydantic import Field, field_validator, model_validator, computed_field
from django.utils.crypto import get_random_string
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_hook import BaseHook
import archivebox
from archivebox.constants import CONSTANTS, CONSTANTS_CONFIG # noqa
from .constants import CONSTANTS, PACKAGE_DIR
###################### Config ##########################
@ -26,7 +23,7 @@ from archivebox.constants import CONSTANTS, CONSTANTS_CONFIG # noqa
class ShellConfig(BaseConfigSet):
section: ClassVar[ConfigSectionName] = 'SHELL_CONFIG'
DEBUG: bool = Field(default=False)
DEBUG: bool = Field(default=lambda: '--debug' in sys.argv)
IS_TTY: bool = Field(default=sys.stdout.isatty())
USE_COLOR: bool = Field(default=lambda c: c.IS_TTY)
@ -56,7 +53,7 @@ class ShellConfig(BaseConfigSet):
@property
def COMMIT_HASH(self) -> Optional[str]:
try:
git_dir = archivebox.PACKAGE_DIR / '../.git'
git_dir = PACKAGE_DIR / '../.git'
ref = (git_dir / 'HEAD').read_text().strip().split(' ')[-1]
commit_hash = git_dir.joinpath(ref).read_text().strip()
return commit_hash
@ -64,7 +61,7 @@ class ShellConfig(BaseConfigSet):
pass
try:
return list((archivebox.PACKAGE_DIR / '../.git/refs/heads/').glob('*'))[0].read_text().strip()
return list((PACKAGE_DIR / '../.git/refs/heads/').glob('*'))[0].read_text().strip()
except Exception:
pass
@ -77,7 +74,7 @@ class ShellConfig(BaseConfigSet):
docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0]
return docker_build_end_time
src_last_modified_unix_timestamp = (archivebox.PACKAGE_DIR / 'config.py').stat().st_mtime
src_last_modified_unix_timestamp = (PACKAGE_DIR / 'package.json').stat().st_mtime
return datetime.fromtimestamp(src_last_modified_unix_timestamp).strftime('%Y-%m-%d %H:%M:%S %s')
@ -227,39 +224,3 @@ class SearchBackendConfig(BaseConfigSet):
SEARCH_BACKEND_CONFIG = SearchBackendConfig()
class ConfigPlugin(BasePlugin):
app_label: str = 'CONFIG'
verbose_name: str = 'Configuration'
hooks: List[InstanceOf[BaseHook]] = [
SHELL_CONFIG,
GENERAL_CONFIG,
STORAGE_CONFIG,
SERVER_CONFIG,
ARCHIVING_CONFIG,
SEARCH_BACKEND_CONFIG,
]
# def register(self, settings, parent_plugin=None):
# try:
# super().register(settings, parent_plugin=parent_plugin)
# except Exception as e:
# print(f'[red][X] Error registering config plugin: {e}[/red]', file=sys.stderr)
PLUGIN = ConfigPlugin()
DJANGO_APP = PLUGIN.AppConfig
# # register django apps
# @abx.hookimpl
# def get_INSTALLED_APPS():
# return [DJANGO_APP.name]
# # register configs
# @abx.hookimpl
# def register_CONFIG():
# return PLUGIN.HOOKS_BY_TYPE['CONFIG'].values()

View file

@ -19,7 +19,7 @@ Documentation:
"""
__package__ = 'archivebox'
__package__ = 'archivebox.config'
import os
import io
@ -38,31 +38,27 @@ from configparser import ConfigParser
from rich.progress import Progress
from rich.console import Console
from benedict import benedict
from pydantic_pkgr import SemVer
import django
from django.db.backends.sqlite3.base import Database as sqlite3
import archivebox
from archivebox.constants import CONSTANTS
from archivebox.constants import *
from pydantic_pkgr import SemVer
from .constants import CONSTANTS, TIMEZONE, OUTPUT_DIR
from .constants import *
from .config_stubs import (
ConfigValue,
ConfigDict,
ConfigDefaultValue,
ConfigDefaultDict,
)
from .misc.logging import (
from ..misc.logging import (
stderr,
hint, # noqa
)
from .plugins_sys.config.apps import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG
from .plugins_auth.ldap.apps import LDAP_CONFIG
from .plugins_extractor.favicon.apps import FAVICON_CONFIG
from .defaults import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG
from ..plugins_auth.ldap.apps import LDAP_CONFIG
from ..plugins_extractor.favicon.apps import FAVICON_CONFIG
ANSI = SHELL_CONFIG.ANSI
LDAP = LDAP_CONFIG.LDAP_ENABLED
@ -218,7 +214,7 @@ def get_real_name(key: str) -> str:
# These are derived/computed values calculated *after* all user-provided config values are ingested
# they appear in `archivebox config` output and are intended to be read-only for the user
DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
'PACKAGE_DIR': {'default': lambda c: archivebox.PACKAGE_DIR.resolve()},
'PACKAGE_DIR': {'default': lambda c: CONSTANTS.PACKAGE_DIR.resolve()},
'TEMPLATES_DIR': {'default': lambda c: c['PACKAGE_DIR'] / CONSTANTS.TEMPLATES_DIR_NAME},
'CUSTOM_TEMPLATES_DIR': {'default': lambda c: c['CUSTOM_TEMPLATES_DIR'] and Path(c['CUSTOM_TEMPLATES_DIR'])},
@ -259,8 +255,8 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
# 'CODE_LOCATIONS': {'default': lambda c: get_code_locations(c)},
# 'DATA_LOCATIONS': {'default': lambda c: get_data_locations(c)},
'SAVE_ALLOWLIST_PTN': {'default': lambda c: c['SAVE_ALLOWLIST'] and {re.compile(k, ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_ALLOWLIST'].items()}},
'SAVE_DENYLIST_PTN': {'default': lambda c: c['SAVE_DENYLIST'] and {re.compile(k, ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_DENYLIST'].items()}},
'SAVE_ALLOWLIST_PTN': {'default': lambda c: c['SAVE_ALLOWLIST'] and {re.compile(k, CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_ALLOWLIST'].items()}},
'SAVE_DENYLIST_PTN': {'default': lambda c: c['SAVE_DENYLIST'] and {re.compile(k, CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_DENYLIST'].items()}},
}
@ -273,7 +269,7 @@ def load_config_val(key: str,
default: ConfigDefaultValue=None,
type: Optional[Type]=None,
aliases: Optional[Tuple[str, ...]]=None,
config: Optional[ConfigDict]=None,
config: Optional[benedict]=None,
env_vars: Optional[os._Environ]=None,
config_file_vars: Optional[Dict[str, str]]=None) -> ConfigValue:
"""parse bool, int, and str key=value pairs from env"""
@ -334,16 +330,16 @@ def load_config_val(key: str,
raise Exception('Config values can only be str, bool, int, or json')
def load_config_file(out_dir: str | None=archivebox.DATA_DIR) -> Optional[ConfigDict]:
def load_config_file(out_dir: str | None=CONSTANTS.DATA_DIR) -> Optional[benedict]:
"""load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
config_path = archivebox.CONSTANTS.CONFIG_FILE
config_path = CONSTANTS.CONFIG_FILE
if config_path.exists():
config_file = ConfigParser()
config_file.optionxform = str
config_file.read(config_path)
# flatten into one namespace
config_file_vars = ConfigDict({
config_file_vars = benedict({
key.upper(): val
for section, options in config_file.items()
for key, val in options.items()
@ -354,10 +350,10 @@ def load_config_file(out_dir: str | None=archivebox.DATA_DIR) -> Optional[Config
return None
def write_config_file(config: Dict[str, str], out_dir: str | None=archivebox.DATA_DIR) -> ConfigDict:
def write_config_file(config: Dict[str, str], out_dir: str | None=CONSTANTS.DATA_DIR) -> benedict:
"""load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
from .system import atomic_write
from ..system import atomic_write
CONFIG_HEADER = (
"""# This is the config file for your ArchiveBox collection.
@ -373,7 +369,7 @@ def write_config_file(config: Dict[str, str], out_dir: str | None=archivebox.DAT
""")
config_path = archivebox.CONSTANTS.CONFIG_FILE
config_path = CONSTANTS.CONFIG_FILE
if not config_path.exists():
atomic_write(config_path, CONFIG_HEADER)
@ -394,7 +390,7 @@ def write_config_file(config: Dict[str, str], out_dir: str | None=archivebox.DAT
existing_config = dict(config_file[section])
else:
existing_config = {}
config_file[section] = ConfigDict({**existing_config, key: val})
config_file[section] = benedict({**existing_config, key: val})
# always make sure there's a SECRET_KEY defined for Django
existing_secret_key = None
@ -426,15 +422,15 @@ def write_config_file(config: Dict[str, str], out_dir: str | None=archivebox.DAT
if Path(f'{config_path}.bak').exists():
os.remove(f'{config_path}.bak')
return {
return benedict({
key.upper(): CONFIG.get(key.upper())
for key in config.keys()
}
})
def load_config(defaults: ConfigDefaultDict,
config: Optional[ConfigDict]=None,
config: Optional[benedict]=None,
out_dir: Optional[str]=None,
env_vars: Optional[os._Environ]=None,
config_file_vars: Optional[Dict[str, str]]=None) -> benedict:
@ -442,7 +438,7 @@ def load_config(defaults: ConfigDefaultDict,
env_vars = env_vars or os.environ
config_file_vars = config_file_vars or load_config_file(out_dir=out_dir)
extended_config: ConfigDict = config.copy() if config else {}
extended_config = benedict(config.copy() if config else {})
for key, default in defaults.items():
try:
# print('LOADING CONFIG KEY:', key, 'DEFAULT=', default)
@ -614,7 +610,7 @@ def wget_supports_compression(config):
return False
def get_dependency_info(config: ConfigDict) -> ConfigValue:
def get_dependency_info(config: benedict) -> ConfigValue:
return {
# 'PYTHON_BINARY': {
# 'path': bin_path(config['PYTHON_BINARY']),
@ -733,7 +729,7 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue:
def load_all_config():
CONFIG: ConfigDict = ConfigDict()
CONFIG = benedict()
for section_name, section_config in CONFIG_SCHEMA.items():
# print('LOADING CONFIG SECTION:', section_name)
CONFIG = load_config(section_config, CONFIG)
@ -742,7 +738,7 @@ def load_all_config():
return load_config(DYNAMIC_CONFIG_SCHEMA, CONFIG)
# add all final config values in CONFIG to globals in this file
CONFIG: ConfigDict = load_all_config()
CONFIG: benedict = load_all_config()
globals().update(CONFIG)
# this lets us do: from .config import DEBUG, MEDIA_TIMEOUT, ...
@ -773,7 +769,7 @@ if not SHELL_CONFIG.SHOW_PROGRESS:
# recreate rich console obj based on new config values
CONSOLE = Console()
from .misc import logging
from ..misc import logging
logging.CONSOLE = CONSOLE
@ -788,8 +784,8 @@ def bump_startup_progress_bar():
def setup_django_minimal():
# sys.path.append(str(archivebox.PACKAGE_DIR))
# os.environ.setdefault('OUTPUT_DIR', str(archivebox.DATA_DIR))
# sys.path.append(str(CONSTANTS.PACKAGE_DIR))
# os.environ.setdefault('OUTPUT_DIR', str(CONSTANTS.DATA_DIR))
# os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
# django.setup()
raise Exception('dont use this anymore')
@ -797,7 +793,7 @@ def setup_django_minimal():
DJANGO_SET_UP = False
def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None:
def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CONFIG, in_memory_db=False) -> None:
global INITIAL_STARTUP_PROGRESS
global INITIAL_STARTUP_PROGRESS_TASK
global DJANGO_SET_UP
@ -808,9 +804,9 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=C
with Progress(transient=True, expand=True, console=CONSOLE) as INITIAL_STARTUP_PROGRESS:
INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25)
output_dir = out_dir or archivebox.DATA_DIR
output_dir = out_dir or CONSTANTS.DATA_DIR
assert isinstance(output_dir, Path) and isinstance(archivebox.PACKAGE_DIR, Path)
assert isinstance(output_dir, Path) and isinstance(CONSTANTS.PACKAGE_DIR, Path)
bump_startup_progress_bar()
try:
@ -842,7 +838,7 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=C
with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
command = ' '.join(sys.argv)
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
f.write(f"\n> {command}; TS={ts} VERSION={archivebox.VERSION} IN_DOCKER={SHELL_CONFIG.IN_DOCKER} IS_TTY={SHELL_CONFIG.IS_TTY}\n")
f.write(f"\n> {command}; TS={ts} VERSION={CONSTANTS.VERSION} IN_DOCKER={SHELL_CONFIG.IN_DOCKER} IS_TTY={SHELL_CONFIG.IS_TTY}\n")
if check_db:
# Create cache table in DB if needed
@ -861,9 +857,9 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=C
for conn in connections.all():
conn.close_if_unusable_or_obsolete()
sql_index_path = archivebox.CONSTANTS.DATABASE_FILE
sql_index_path = CONSTANTS.DATABASE_FILE
assert sql_index_path.exists(), (
f'No database file {sql_index_path} found in: {archivebox.DATA_DIR} (Are you in an ArchiveBox collection directory?)')
f'No database file {sql_index_path} found in: {CONSTANTS.DATA_DIR} (Are you in an ArchiveBox collection directory?)')
bump_startup_progress_bar()
@ -876,7 +872,7 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: ConfigDict=C
logfire.configure()
logfire.instrument_django(is_sql_commentor_enabled=True)
logfire.info(f'Started ArchiveBox v{archivebox.VERSION}', argv=sys.argv)
logfire.info(f'Started ArchiveBox v{CONSTANTS.VERSION}', argv=sys.argv)
except KeyboardInterrupt:
raise SystemExit(2)

View file

@ -13,8 +13,7 @@ from django.utils.html import format_html, mark_safe
from admin_data_views.typing import TableContext, ItemContext
from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
import archivebox
from archivebox.config import CONSTANTS
from archivebox.util import parse_date
@ -381,7 +380,7 @@ def log_list_view(request: HttpRequest, **kwargs) -> TableContext:
assert request.user.is_superuser, "Must be a superuser to view configuration settings."
log_files = archivebox.CONSTANTS.LOGS_DIR.glob("*.log")
log_files = CONSTANTS.LOGS_DIR.glob("*.log")
log_files = sorted(log_files, key=os.path.getmtime)[::-1]
rows = {
@ -419,7 +418,7 @@ def log_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
from django.conf import settings
log_file = [logfile for logfile in archivebox.CONSTANTS.LOGS_DIR.glob('*.log') if key in logfile.name][0]
log_file = [logfile for logfile in CONSTANTS.LOGS_DIR.glob('*.log') if key in logfile.name][0]
log_text = log_file.read_text()
log_stat = log_file.stat()

View file

@ -18,11 +18,10 @@ from django.template import Template, RequestContext
from django.conf import settings
from django import forms
import archivebox
from signal_webhooks.admin import WebhookAdmin
from signal_webhooks.utils import get_webhook_model
# from abx.archivebox.admin import CustomPlugin
from archivebox.config import VERSION
from ..util import htmldecode, urldecode
@ -30,7 +29,7 @@ from core.models import Snapshot, ArchiveResult, Tag
from core.mixins import SearchResultsAdminMixin
from api.models import APIToken
from abid_utils.admin import ABIDModelAdmin
from queues.tasks import bg_archive_links, bg_archive_link, bg_add
from queues.tasks import bg_archive_links, bg_add
from index.html import snapshot_icons
from logging_util import printable_filesize
@ -40,7 +39,7 @@ from extractors import archive_links
CONFIG = settings.CONFIG
GLOBAL_CONTEXT = {'VERSION': archivebox.VERSION, 'VERSIONS_AVAILABLE': [], 'CAN_UPGRADE': False}
GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': [], 'CAN_UPGRADE': False}
# Admin URLs
# /admin/

View file

@ -1,7 +1,7 @@
__package__ = 'archivebox.core'
from ..config import (
from ..config.legacy import (
LDAP
)

View file

@ -1,4 +1,4 @@
from ..config import (
from ..config.legacy import (
LDAP_CREATE_SUPERUSER
)

View file

@ -5,7 +5,7 @@ from django.utils import timezone
from django.contrib.auth.middleware import RemoteUserMiddleware
from django.core.exceptions import ImproperlyConfigured
from ..config import PUBLIC_SNAPSHOTS, REVERSE_PROXY_USER_HEADER, REVERSE_PROXY_WHITELIST
from ..config.legacy import PUBLIC_SNAPSHOTS, REVERSE_PROXY_USER_HEADER, REVERSE_PROXY_WHITELIST
def detect_timezone(request, activate: bool=True):

View file

@ -1,14 +1,18 @@
# Generated by Django 3.0.8 on 2020-11-04 12:25
import os
import json
from pathlib import Path
from django.db import migrations, models
import django.db.models.deletion
from config import CONFIG
from index.json import to_json
DATA_DIR = Path(os.curdir).resolve() # archivebox user data dir
ARCHIVE_DIR = DATA_DIR / 'archive' # archivebox snapshot data dir
try:
JSONField = models.JSONField
except AttributeError:
@ -22,7 +26,7 @@ def forwards_func(apps, schema_editor):
snapshots = Snapshot.objects.all()
for snapshot in snapshots:
out_dir = Path(CONFIG['ARCHIVE_DIR']) / snapshot.timestamp
out_dir = ARCHIVE_DIR / snapshot.timestamp
try:
with open(out_dir / "index.json", "r") as f:
@ -57,7 +61,7 @@ def forwards_func(apps, schema_editor):
def verify_json_index_integrity(snapshot):
results = snapshot.archiveresult_set.all()
out_dir = Path(CONFIG['ARCHIVE_DIR']) / snapshot.timestamp
out_dir = ARCHIVE_DIR / snapshot.timestamp
with open(out_dir / "index.json", "r") as f:
index = json.load(f)

View file

@ -17,10 +17,9 @@ from django.db.models import Case, When, Value, IntegerField
from django.contrib import admin
from django.conf import settings
import archivebox
from archivebox.config import CONSTANTS
from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField
from queues.tasks import bg_archive_snapshot
from ..system import get_dir_size
@ -261,11 +260,11 @@ class Snapshot(ABIDModel):
@cached_property
def link_dir(self):
return str(archivebox.CONSTANTS.ARCHIVE_DIR / self.timestamp)
return str(CONSTANTS.ARCHIVE_DIR / self.timestamp)
@cached_property
def archive_path(self):
return '{}/{}'.format(archivebox.CONSTANTS.ARCHIVE_DIR_NAME, self.timestamp)
return '{}/{}'.format(CONSTANTS.ARCHIVE_DIR_NAME, self.timestamp)
@cached_property
def archive_size(self):
@ -375,17 +374,17 @@ class Snapshot(ABIDModel):
# def get_storage_dir(self, create=True, symlink=True) -> Path:
# date_str = self.bookmarked_at.strftime('%Y%m%d')
# domain_str = domain(self.url)
# abs_storage_dir = Path(archivebox.CONSTANTS.ARCHIVE_DIR) / 'snapshots' / date_str / domain_str / str(self.ulid)
# abs_storage_dir = Path(CONSTANTS.ARCHIVE_DIR) / 'snapshots' / date_str / domain_str / str(self.ulid)
# if create and not abs_storage_dir.is_dir():
# abs_storage_dir.mkdir(parents=True, exist_ok=True)
# if symlink:
# LINK_PATHS = [
# Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
# # Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_id' / str(self.ulid),
# Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_date' / date_str / domain_str / str(self.ulid),
# Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_domain' / domain_str / date_str / str(self.ulid),
# Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
# # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_id' / str(self.ulid),
# Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_date' / date_str / domain_str / str(self.ulid),
# Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_domain' / domain_str / date_str / str(self.ulid),
# ]
# for link_path in LINK_PATHS:
# link_path.parent.mkdir(parents=True, exist_ok=True)
@ -524,18 +523,18 @@ class ArchiveResult(ABIDModel):
# def get_storage_dir(self, create=True, symlink=True):
# date_str = self.snapshot.bookmarked_at.strftime('%Y%m%d')
# domain_str = domain(self.snapshot.url)
# abs_storage_dir = Path(archivebox.CONSTANTS.ARCHIVE_DIR) / 'results' / date_str / domain_str / self.extractor / str(self.ulid)
# abs_storage_dir = Path(CONSTANTS.ARCHIVE_DIR) / 'results' / date_str / domain_str / self.extractor / str(self.ulid)
# if create and not abs_storage_dir.is_dir():
# abs_storage_dir.mkdir(parents=True, exist_ok=True)
# if symlink:
# LINK_PATHS = [
# Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
# # Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_id' / str(self.ulid),
# # Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_date' / date_str / domain_str / self.extractor / str(self.ulid),
# Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_domain' / domain_str / date_str / self.extractor / str(self.ulid),
# Path(archivebox.CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_type' / self.extractor / date_str / domain_str / str(self.ulid),
# Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
# # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_id' / str(self.ulid),
# # Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_date' / date_str / domain_str / self.extractor / str(self.ulid),
# Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_domain' / domain_str / date_str / self.extractor / str(self.ulid),
# Path(CONSTANTS.ARCHIVE_DIR).parent / 'index' / 'results_by_type' / self.extractor / date_str / domain_str / str(self.ulid),
# ]
# for link_path in LINK_PATHS:
# link_path.parent.mkdir(parents=True, exist_ok=True)

View file

@ -13,20 +13,15 @@ import abx.archivebox
import abx.archivebox.use
import abx.django.use
import archivebox
from archivebox.constants import CONSTANTS
from archivebox.config import VERSION, DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS # noqa
from ..config import CONFIG
from ..config.legacy import CONFIG
IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3]
IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ
IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3]
VERSION = archivebox.VERSION
PACKAGE_DIR = archivebox.PACKAGE_DIR
DATA_DIR = archivebox.DATA_DIR
ARCHIVE_DIR = archivebox.ARCHIVE_DIR
################################################################################
### ArchiveBox Plugin Settings
@ -40,14 +35,14 @@ PLUGIN_HOOKSPECS = [
abx.register_hookspecs(PLUGIN_HOOKSPECS)
BUILTIN_PLUGIN_DIRS = {
'plugins_sys': archivebox.PACKAGE_DIR / 'plugins_sys',
'plugins_pkg': archivebox.PACKAGE_DIR / 'plugins_pkg',
'plugins_auth': archivebox.PACKAGE_DIR / 'plugins_auth',
'plugins_search': archivebox.PACKAGE_DIR / 'plugins_search',
'plugins_extractor': archivebox.PACKAGE_DIR / 'plugins_extractor',
'archivebox': PACKAGE_DIR,
'plugins_pkg': PACKAGE_DIR / 'plugins_pkg',
'plugins_auth': PACKAGE_DIR / 'plugins_auth',
'plugins_search': PACKAGE_DIR / 'plugins_search',
'plugins_extractor': PACKAGE_DIR / 'plugins_extractor',
}
USER_PLUGIN_DIRS = {
'user_plugins': archivebox.DATA_DIR / 'user_plugins',
'user_plugins': DATA_DIR / 'user_plugins',
}
BUILTIN_PLUGINS = abx.get_plugins_in_dirs(BUILTIN_PLUGIN_DIRS)
@ -105,6 +100,7 @@ INSTALLED_APPS = [
'django_object_actions', # provides easy Django Admin action buttons on change views https://github.com/crccheck/django-object-actions
# Our ArchiveBox-provided apps
#'config', # ArchiveBox config settings
'queues', # handles starting and managing background workers and processes
'abid_utils', # handles ABID ID creation, handling, and models
'core', # core django model with Snapshot, ArchiveResult, etc.
@ -481,41 +477,41 @@ ADMIN_DATA_VIEWS = {
},
{
"route": "binaries/",
"view": "plugins_sys.config.views.binaries_list_view",
"view": "archivebox.config.views.binaries_list_view",
"name": "Binaries",
"items": {
"route": "<str:key>/",
"view": "plugins_sys.config.views.binary_detail_view",
"view": "archivebox.config.views.binary_detail_view",
"name": "binary",
},
},
{
"route": "plugins/",
"view": "plugins_sys.config.views.plugins_list_view",
"view": "archivebox.config.views.plugins_list_view",
"name": "Plugins",
"items": {
"route": "<str:key>/",
"view": "plugins_sys.config.views.plugin_detail_view",
"view": "archivebox.config.views.plugin_detail_view",
"name": "plugin",
},
},
{
"route": "workers/",
"view": "plugins_sys.config.views.worker_list_view",
"view": "archivebox.config.views.worker_list_view",
"name": "Workers",
"items": {
"route": "<str:key>/",
"view": "plugins_sys.config.views.worker_detail_view",
"view": "archivebox.config.views.worker_detail_view",
"name": "worker",
},
},
{
"route": "logs/",
"view": "plugins_sys.config.views.log_list_view",
"view": "archivebox.config.views.log_list_view",
"name": "Logs",
"items": {
"route": "<str:key>/",
"view": "plugins_sys.config.views.log_detail_view",
"view": "archivebox.config.views.log_detail_view",
"name": "log",
},
},

View file

@ -7,7 +7,7 @@ import logging
import pydantic
import django.template
import archivebox
from archivebox.config import CONSTANTS
from ..misc.logging import IS_TTY
@ -52,7 +52,7 @@ class CustomOutboundWebhookLogFormatter(logging.Formatter):
ERROR_LOG = tempfile.NamedTemporaryFile().name
LOGS_DIR = archivebox.DATA_DIR / 'logs'
LOGS_DIR = CONSTANTS.LOGS_DIR
if LOGS_DIR.is_dir():
ERROR_LOG = (LOGS_DIR / 'errors.log')

View file

@ -10,7 +10,7 @@ from .views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthC
from .serve_static import serve_static
# GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306
# from config import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE
# from .config.legacy import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE
# GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': VERSIONS_AVAILABLE, 'CAN_UPGRADE': CAN_UPGRADE}

View file

@ -20,8 +20,6 @@ from django.utils.decorators import method_decorator
from admin_data_views.typing import TableContext, ItemContext
from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
import archivebox
from archivebox.constants import CONSTANTS
from core.models import Snapshot
from core.forms import AddLinkForm
@ -29,10 +27,10 @@ from core.admin import result_url
from queues.tasks import bg_add
from ..plugins_sys.config.apps import SHELL_CONFIG, SERVER_CONFIG
from archivebox.config import CONSTANTS, DATA_DIR, VERSION, SHELL_CONFIG, SERVER_CONFIG
from ..plugins_extractor.archivedotorg.apps import ARCHIVEDOTORG_CONFIG
from ..config import (
from ..config.legacy import (
CONFIG_SCHEMA,
DYNAMIC_CONFIG_SCHEMA,
USER_CONFIG,
@ -381,7 +379,7 @@ class PublicIndexView(ListView):
def get_context_data(self, **kwargs):
return {
**super().get_context_data(**kwargs),
'VERSION': archivebox.VERSION,
'VERSION': VERSION,
'COMMIT_HASH': SHELL_CONFIG.COMMIT_HASH,
'FOOTER_INFO': SERVER_CONFIG.FOOTER_INFO,
}
@ -451,7 +449,7 @@ class AddView(UserPassesTestMixin, FormView):
'title': "Add URLs",
# We can't just call request.build_absolute_uri in the template, because it would include query parameters
'absolute_add_path': self.request.build_absolute_uri(self.request.path),
'VERSION': archivebox.VERSION,
'VERSION': VERSION,
'FOOTER_INFO': SERVER_CONFIG.FOOTER_INFO,
'stdout': '',
}
@ -469,7 +467,7 @@ class AddView(UserPassesTestMixin, FormView):
"depth": depth,
"parser": parser,
"update_all": False,
"out_dir": archivebox.DATA_DIR,
"out_dir": DATA_DIR,
"created_by_id": self.request.user.pk,
}
if extractors:

View file

@ -10,7 +10,7 @@ from datetime import datetime, timezone
from django.db.models import QuerySet
from ..config import (
from ..config.legacy import (
SAVE_ALLOWLIST_PTN,
SAVE_DENYLIST_PTN,
)

View file

@ -12,7 +12,7 @@ from ..util import (
is_static_file,
dedupe,
)
from ..config import (
from ..config.legacy import (
TIMEOUT,
CURL_ARGS,
CURL_EXTRA_ARGS,
@ -24,6 +24,7 @@ from ..config import (
)
from ..logging_util import TimedProgress
def get_output_path():
return 'archive.org.txt'

View file

@ -11,7 +11,7 @@ from ..util import (
domain,
dedupe,
)
from ..config import CONFIG
from ..config.legacy import CONFIG
from ..logging_util import TimedProgress

View file

@ -14,7 +14,7 @@ from ..util import (
without_query,
without_fragment,
)
from ..config import CONFIG
from ..config.legacy import CONFIG
from ..logging_util import TimedProgress

View file

@ -11,7 +11,7 @@ from ..util import (
get_headers,
dedupe,
)
from ..config import (
from ..config.legacy import (
TIMEOUT,
CURL_BINARY,
CURL_ARGS,

View file

@ -1,13 +1,12 @@
__package__ = 'archivebox.extractors'
import archivebox
from html.parser import HTMLParser
import io
from pathlib import Path
from typing import Optional
from ..config import (
from archivebox.config import VERSION
from ..config.legacy import (
SAVE_HTMLTOTEXT,
TIMEOUT,
)
@ -154,7 +153,7 @@ def save_htmltotext(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
return ArchiveResult(
cmd=cmd,
pwd=str(out_dir),
cmd_version=archivebox.__version__,
cmd_version=VERSION,
output=output,
status=status,
index_texts=[extracted_text] if extracted_text else [],

View file

@ -13,7 +13,7 @@ from ..util import (
is_static_file,
dedupe,
)
from ..config import (
from ..config.legacy import (
TIMEOUT,
SAVE_MERCURY,
DEPENDENCIES,

View file

@ -12,7 +12,7 @@ from ..util import (
htmldecode,
dedupe,
)
from ..config import (
from ..config.legacy import (
TIMEOUT,
CHECK_SSL_VALIDITY,
SAVE_TITLE,

View file

@ -17,7 +17,7 @@ from ..util import (
urldecode,
dedupe,
)
from ..config import (
from ..config.legacy import (
WGET_ARGS,
WGET_EXTRA_ARGS,
TIMEOUT,

View file

@ -12,15 +12,14 @@ from urllib.parse import urlparse
from django.db.models import QuerySet, Q
import archivebox
from archivebox.config import DATA_DIR, CONSTANTS, SEARCH_BACKEND_CONFIG
from ..util import (
scheme,
enforce_types,
ExtendedEncoder,
)
from ..misc.logging import stderr
from ..config import (
from ..config.legacy import (
TIMEOUT,
URL_DENYLIST_PTN,
URL_ALLOWLIST_PTN,
@ -223,28 +222,28 @@ def timed_index_update(out_path: Path):
@enforce_types
def write_main_index(links: List[Link], out_dir: Path=archivebox.DATA_DIR, created_by_id: int | None=None) -> None:
def write_main_index(links: List[Link], out_dir: Path=DATA_DIR, created_by_id: int | None=None) -> None:
"""Writes links to sqlite3 file for a given list of links"""
log_indexing_process_started(len(links))
try:
with timed_index_update(archivebox.CONSTANTS.DATABASE_FILE):
with timed_index_update(CONSTANTS.DATABASE_FILE):
write_sql_main_index(links, out_dir=out_dir, created_by_id=created_by_id)
os.chmod(archivebox.CONSTANTS.DATABASE_FILE, int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes
os.chmod(CONSTANTS.DATABASE_FILE, int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes
except (KeyboardInterrupt, SystemExit):
stderr('[!] Warning: Still writing index to disk...', color='lightyellow')
stderr(' Run archivebox init to fix any inconsistencies from an ungraceful exit.')
with timed_index_update(archivebox.CONSTANTS.DATABASE_FILE):
with timed_index_update(CONSTANTS.DATABASE_FILE):
write_sql_main_index(links, out_dir=out_dir, created_by_id=created_by_id)
os.chmod(archivebox.CONSTANTS.DATABASE_FILE, int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes
os.chmod(CONSTANTS.DATABASE_FILE, int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes
raise SystemExit(0)
log_indexing_process_finished()
@enforce_types
def load_main_index(out_dir: Path=archivebox.DATA_DIR, warn: bool=True) -> List[Link]:
def load_main_index(out_dir: Path=DATA_DIR, warn: bool=True) -> List[Link]:
"""parse and load existing index with any new links from import_path merged in"""
from core.models import Snapshot
try:
@ -254,8 +253,8 @@ def load_main_index(out_dir: Path=archivebox.DATA_DIR, warn: bool=True) -> List[
raise SystemExit(0)
@enforce_types
def load_main_index_meta(out_dir: Path=archivebox.DATA_DIR) -> Optional[dict]:
index_path = out_dir / archivebox.CONSTANTS.JSON_INDEX_FILENAME
def load_main_index_meta(out_dir: Path=DATA_DIR) -> Optional[dict]:
index_path = out_dir / CONSTANTS.JSON_INDEX_FILENAME
if index_path.exists():
with open(index_path, 'r', encoding='utf-8') as f:
meta_dict = pyjson.load(f)
@ -377,7 +376,6 @@ def q_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='
return snapshots.filter(q_filter)
def search_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='search') -> QuerySet:
from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG
from ..search import query_search_index
if not SEARCH_BACKEND_CONFIG.USE_SEARCHING_BACKEND:
@ -406,7 +404,7 @@ def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type
return search_filter(snapshots, filter_patterns, filter_type)
def get_indexed_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]:
def get_indexed_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
"""indexed links without checking archive status or data directory validity"""
links = (snapshot.as_link() for snapshot in snapshots.iterator(chunk_size=500))
return {
@ -414,7 +412,7 @@ def get_indexed_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[st
for link in links
}
def get_archived_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]:
def get_archived_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
"""indexed links that are archived with a valid data directory"""
links = (snapshot.as_link() for snapshot in snapshots.iterator(chunk_size=500))
return {
@ -422,7 +420,7 @@ def get_archived_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[s
for link in filter(is_archived, links)
}
def get_unarchived_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]:
def get_unarchived_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
"""indexed links that are unarchived with no data directory or an empty data directory"""
links = (snapshot.as_link() for snapshot in snapshots.iterator(chunk_size=500))
return {
@ -430,12 +428,12 @@ def get_unarchived_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict
for link in filter(is_unarchived, links)
}
def get_present_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]:
def get_present_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
"""dirs that actually exist in the archive/ folder"""
all_folders = {}
for entry in (out_dir / archivebox.CONSTANTS.ARCHIVE_DIR_NAME).iterdir():
for entry in (out_dir / CONSTANTS.ARCHIVE_DIR_NAME).iterdir():
if entry.is_dir():
link = None
try:
@ -447,7 +445,7 @@ def get_present_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[st
return all_folders
def get_valid_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]:
def get_valid_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
"""dirs with a valid index matched to the main index and archived content"""
links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator(chunk_size=500)]
return {
@ -455,7 +453,7 @@ def get_valid_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str,
for link in filter(is_valid, links)
}
def get_invalid_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]:
def get_invalid_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
"""dirs that are invalid for any reason: corrupted/duplicate/orphaned/unrecognized"""
duplicate = get_duplicate_folders(snapshots, out_dir=out_dir)
orphaned = get_orphaned_folders(snapshots, out_dir=out_dir)
@ -464,7 +462,7 @@ def get_invalid_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[st
return {**duplicate, **orphaned, **corrupted, **unrecognized}
def get_duplicate_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]:
def get_duplicate_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
"""dirs that conflict with other directories that have the same link URL or timestamp"""
by_url = {}
by_timestamp = {}
@ -472,7 +470,7 @@ def get_duplicate_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[
data_folders = (
str(entry)
for entry in archivebox.CONSTANTS.ARCHIVE_DIR.iterdir()
for entry in CONSTANTS.ARCHIVE_DIR.iterdir()
if entry.is_dir() and not snapshots.filter(timestamp=entry.name).exists()
)
@ -498,11 +496,11 @@ def get_duplicate_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[
duplicate_folders[path] = link
return duplicate_folders
def get_orphaned_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]:
def get_orphaned_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
"""dirs that contain a valid index but aren't listed in the main index"""
orphaned_folders = {}
for entry in archivebox.CONSTANTS.ARCHIVE_DIR.iterdir():
for entry in CONSTANTS.ARCHIVE_DIR.iterdir():
if entry.is_dir():
link = None
try:
@ -516,7 +514,7 @@ def get_orphaned_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[s
return orphaned_folders
def get_corrupted_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]:
def get_corrupted_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
"""dirs that don't contain a valid index and aren't listed in the main index"""
corrupted = {}
for snapshot in snapshots.iterator(chunk_size=500):
@ -525,11 +523,11 @@ def get_corrupted_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[
corrupted[link.link_dir] = link
return corrupted
def get_unrecognized_folders(snapshots, out_dir: Path=archivebox.DATA_DIR) -> Dict[str, Optional[Link]]:
def get_unrecognized_folders(snapshots, out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
"""dirs that don't contain recognizable archive data and aren't listed in the main index"""
unrecognized_folders: Dict[str, Optional[Link]] = {}
for entry in (Path(out_dir) / archivebox.CONSTANTS.ARCHIVE_DIR_NAME).iterdir():
for entry in (Path(out_dir) / CONSTANTS.ARCHIVE_DIR_NAME).iterdir():
if entry.is_dir():
index_exists = (entry / "index.json").exists()
link = None
@ -594,10 +592,10 @@ def is_unarchived(link: Link) -> bool:
return not link.is_archived
def fix_invalid_folder_locations(out_dir: Path=archivebox.DATA_DIR) -> Tuple[List[str], List[str]]:
def fix_invalid_folder_locations(out_dir: Path=DATA_DIR) -> Tuple[List[str], List[str]]:
fixed = []
cant_fix = []
for entry in os.scandir(out_dir / archivebox.CONSTANTS.ARCHIVE_DIR_NAME):
for entry in os.scandir(out_dir / CONSTANTS.ARCHIVE_DIR_NAME):
if entry.is_dir(follow_symlinks=True):
if (Path(entry.path) / 'index.json').exists():
try:
@ -608,7 +606,7 @@ def fix_invalid_folder_locations(out_dir: Path=archivebox.DATA_DIR) -> Tuple[Lis
continue
if not entry.path.endswith(f'/{link.timestamp}'):
dest = out_dir /archivebox.CONSTANTS.ARCHIVE_DIR_NAME / link.timestamp
dest = out_dir /CONSTANTS.ARCHIVE_DIR_NAME / link.timestamp
if dest.exists():
cant_fix.append(entry.path)
else:

View file

@ -1,6 +1,5 @@
__package__ = 'archivebox.index'
import archivebox
from pathlib import Path
from datetime import datetime, timezone
from collections import defaultdict
@ -19,10 +18,11 @@ from ..util import (
htmlencode,
urldecode,
)
from ..config import (
from archivebox.config.legacy import (
SAVE_ARCHIVE_DOT_ORG,
PREVIEW_ORIGINALS,
)
from archivebox.config import CONSTANTS, DATA_DIR, VERSION, SHELL_CONFIG, SERVER_CONFIG
MAIN_INDEX_TEMPLATE = 'static_index.html'
MINIMAL_INDEX_TEMPLATE = 'minimal_index.html'
@ -33,11 +33,9 @@ TITLE_LOADING_MSG = 'Not yet archived...'
### Main Links Index
@enforce_types
def parse_html_main_index(out_dir: Path=archivebox.DATA_DIR) -> Iterator[str]:
def parse_html_main_index(out_dir: Path=DATA_DIR) -> Iterator[str]:
"""parse an archive index html file and return the list of urls"""
from plugins_sys.config.constants import CONSTANTS
index_path = Path(out_dir) / CONSTANTS.HTML_INDEX_FILENAME
if index_path.exists():
with open(index_path, 'r', encoding='utf-8') as f:
@ -58,11 +56,9 @@ def generate_index_from_links(links: List[Link], with_headers: bool):
def main_index_template(links: List[Link], template: str=MAIN_INDEX_TEMPLATE) -> str:
"""render the template for the entire main index"""
from plugins_sys.config.apps import SHELL_CONFIG, SERVER_CONFIG
return render_django_template(template, {
'version': archivebox.VERSION,
'git_sha': SHELL_CONFIG.COMMIT_HASH or archivebox.VERSION,
'version': VERSION,
'git_sha': SHELL_CONFIG.COMMIT_HASH or VERSION,
'num_links': str(len(links)),
'date_updated': datetime.now(timezone.utc).strftime('%Y-%m-%d'),
'time_updated': datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M'),
@ -75,7 +71,6 @@ def main_index_template(links: List[Link], template: str=MAIN_INDEX_TEMPLATE) ->
@enforce_types
def write_html_link_details(link: Link, out_dir: Optional[str]=None) -> None:
from plugins_sys.config.constants import CONSTANTS
out_dir = out_dir or link.link_dir
rendered_html = link_details_template(link)

View file

@ -8,7 +8,7 @@ from pathlib import Path
from datetime import datetime, timezone
from typing import List, Optional, Iterator, Any, Union
import archivebox
from archivebox.config import VERSION, DATA_DIR, CONSTANTS, SERVER_CONFIG, SHELL_CONFIG
from .schema import Link
from ..system import atomic_write
@ -19,7 +19,6 @@ from ..util import enforce_types
@enforce_types
def generate_json_index_from_links(links: List[Link], with_headers: bool):
from django.conf import settings
from plugins_sys.config.apps import SERVER_CONFIG
MAIN_INDEX_HEADER = {
'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.',
@ -27,8 +26,8 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool):
'copyright_info': SERVER_CONFIG.FOOTER_INFO,
'meta': {
'project': 'ArchiveBox',
'version': archivebox.VERSION,
'git_sha': archivebox.VERSION, # not used anymore, but kept for backwards compatibility
'version': VERSION,
'git_sha': VERSION, # not used anymore, but kept for backwards compatibility
'website': 'https://ArchiveBox.io',
'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki',
'source': 'https://github.com/ArchiveBox/ArchiveBox',
@ -52,11 +51,9 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool):
@enforce_types
def parse_json_main_index(out_dir: Path=archivebox.DATA_DIR) -> Iterator[Link]:
def parse_json_main_index(out_dir: Path=DATA_DIR) -> Iterator[Link]:
"""parse an archive index json file and return the list of links"""
from plugins_sys.config.constants import CONSTANTS
index_path = Path(out_dir) / CONSTANTS.JSON_INDEX_FILENAME
if index_path.exists():
with open(index_path, 'r', encoding='utf-8') as f:
@ -68,7 +65,7 @@ def parse_json_main_index(out_dir: Path=archivebox.DATA_DIR) -> Iterator[Link]:
print(" {lightyellow}! Found an index.json in the project root but couldn't load links from it: {} {}".format(
err.__class__.__name__,
err,
**ANSI,
**SHELL_CONFIG.ANSI,
))
return ()
@ -94,8 +91,6 @@ def parse_json_main_index(out_dir: Path=archivebox.DATA_DIR) -> Iterator[Link]:
def write_json_link_details(link: Link, out_dir: Optional[str]=None) -> None:
"""write a json file with some info about the link"""
from plugins_sys.config.constants import CONSTANTS
out_dir = out_dir or link.link_dir
path = Path(out_dir) / CONSTANTS.JSON_INDEX_FILENAME
atomic_write(str(path), link._asdict(extended=True))
@ -104,7 +99,6 @@ def write_json_link_details(link: Link, out_dir: Optional[str]=None) -> None:
@enforce_types
def parse_json_link_details(out_dir: Union[Path, str], guess: bool=False) -> Optional[Link]:
"""load the json link index from a given directory"""
from plugins_sys.config.constants import CONSTANTS
existing_index = Path(out_dir) / CONSTANTS.JSON_INDEX_FILENAME
if existing_index.exists():
@ -121,7 +115,6 @@ def parse_json_link_details(out_dir: Union[Path, str], guess: bool=False) -> Opt
def parse_json_links_details(out_dir: Union[Path, str]) -> Iterator[Link]:
"""read through all the archive data folders and return the parsed links"""
from plugins_sys.config.constants import CONSTANTS
for entry in os.scandir(CONSTANTS.ARCHIVE_DIR):
if entry.is_dir(follow_symlinks=True):

View file

@ -17,7 +17,7 @@ from dataclasses import dataclass, asdict, field, fields
from django.utils.functional import cached_property
from archivebox.constants import ARCHIVE_DIR, ARCHIVE_DIR_NAME
from archivebox.config.constants import ARCHIVE_DIR, ARCHIVE_DIR_NAME
from plugins_extractor.favicon.apps import FAVICON_CONFIG
@ -160,7 +160,7 @@ class Link:
return float(self.timestamp) > float(other.timestamp)
def typecheck(self) -> None:
from ..config import stderr, ANSI
from ..config.legacy import stderr, ANSI
try:
assert self.schema == self.__class__.__name__
assert isinstance(self.timestamp, str) and self.timestamp

View file

@ -10,7 +10,7 @@ from django.db import transaction
from .schema import Link
from ..util import enforce_types, parse_date
from ..config import (
from ..config.legacy import (
OUTPUT_DIR,
TAG_SEPARATOR_PATTERN,
)

View file

@ -4,10 +4,8 @@ import re
import os
import sys
import stat
import shutil
import time
import argparse
import archivebox
from math import log
from multiprocessing import Process
@ -23,6 +21,7 @@ if TYPE_CHECKING:
from rich import print
from rich.panel import Panel
from archivebox.config import CONSTANTS, DATA_DIR, VERSION, SHELL_CONFIG
from .system import get_dir_size
from .util import enforce_types
from .misc.logging import ANSI, stderr
@ -133,11 +132,8 @@ class TimedProgress:
def __init__(self, seconds, prefix=''):
from plugins_sys.config.apps import SHELL_CONFIG
self.SHOW_PROGRESS = SHELL_CONFIG.SHOW_PROGRESS
self.ANSI = SHELL_CONFIG.ANSI
self.TERM_WIDTH = lambda: shutil.get_terminal_size().columns # lambda so it live-updates when terminal is resized
if self.SHOW_PROGRESS:
self.p = Process(target=progress_bar, args=(seconds, prefix, self.ANSI))
@ -169,7 +165,7 @@ class TimedProgress:
# clear whole terminal line
try:
sys.stdout.write('\r{}{}\r'.format((' ' * self.TERM_WIDTH()), self.ANSI['reset']))
sys.stdout.write('\r{}{}\r'.format((' ' * SHELL_CONFIG.TERM_WIDTH), self.ANSI['reset']))
except (IOError, BrokenPipeError):
# ignore when the parent proc has stopped listening to our stdout
pass
@ -182,11 +178,11 @@ def progress_bar(seconds: int, prefix: str='', ANSI: Dict[str, str]=ANSI) -> Non
"""show timer in the form of progress bar, with percentage and seconds remaining"""
output_buf = (sys.stdout or sys.__stdout__ or sys.stderr or sys.__stderr__)
chunk = '' if output_buf and output_buf.encoding.upper() == 'UTF-8' else '#'
last_width = TERM_WIDTH()
last_width = SHELL_CONFIG.TERM_WIDTH
chunks = last_width - len(prefix) - 20 # number of progress chunks to show (aka max bar width)
try:
for s in range(seconds * chunks):
max_width = TERM_WIDTH()
max_width = SHELL_CONFIG.TERM_WIDTH
if max_width < last_width:
# when the terminal size is shrunk, we have to write a newline
# otherwise the progress bar will keep wrapping incorrectly
@ -224,7 +220,7 @@ def progress_bar(seconds: int, prefix: str='', ANSI: Dict[str, str]=ANSI) -> Non
sys.stdout.flush()
# uncomment to have it disappear when it hits 100% instead of staying full red:
# time.sleep(0.5)
# sys.stdout.write('\r{}{}\r'.format((' ' * TERM_WIDTH()), ANSI['reset']))
# sys.stdout.write('\r{}{}\r'.format((' ' * SHELL_CONFIG.TERM_WIDTH), ANSI['reset']))
# sys.stdout.flush()
except (KeyboardInterrupt, BrokenPipeError):
print()
@ -234,7 +230,7 @@ def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional
args = ' '.join(subcommand_args)
version_msg = '[dark_magenta]\\[i] [{now}] ArchiveBox v{VERSION}: [/dark_magenta][green4]archivebox [green3]{subcommand}[green2] {args}[/green2]'.format(
now=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S'),
VERSION=archivebox.__version__,
VERSION=VERSION,
subcommand=subcommand,
args=args,
)
@ -256,7 +252,6 @@ def log_importing_started(urls: Union[str, List[str]], depth: int, index_only: b
))
def log_source_saved(source_file: str):
from plugins_sys.config.constants import CONSTANTS
print(' > Saved verbatim input to {}/{}'.format(CONSTANTS.SOURCES_DIR_NAME, source_file.rsplit('/', 1)[-1]))
def log_parsing_finished(num_parsed: int, parser_name: str):
@ -289,14 +284,12 @@ def log_indexing_process_finished():
def log_indexing_started(out_path: str):
from plugins_sys.config.apps import SHELL_CONFIG
if SHELL_CONFIG.IS_TTY:
sys.stdout.write(f' > ./{Path(out_path).relative_to(archivebox.DATA_DIR)}')
sys.stdout.write(f' > ./{Path(out_path).relative_to(DATA_DIR)}')
def log_indexing_finished(out_path: str):
print(f'\r √ ./{Path(out_path).relative_to(archivebox.DATA_DIR)}')
print(f'\r √ ./{Path(out_path).relative_to(DATA_DIR)}')
### Archiving Stage
@ -532,7 +525,7 @@ def log_shell_welcome_msg():
### Helpers
@enforce_types
def pretty_path(path: Union[Path, str], pwd: Union[Path, str]=archivebox.DATA_DIR) -> str:
def pretty_path(path: Union[Path, str], pwd: Union[Path, str]=DATA_DIR) -> str:
"""convert paths like .../ArchiveBox/archivebox/../output/abc into output/abc"""
pwd = str(Path(pwd)) # .resolve()
path = str(path)

View file

@ -4,7 +4,6 @@ import os
import sys
import shutil
import platform
import archivebox
from typing import Dict, List, Optional, Iterable, IO, Union
from pathlib import Path
@ -15,6 +14,7 @@ from crontab import CronTab, CronSlices
from django.db.models import QuerySet
from django.utils import timezone
from archivebox.config import CONSTANTS, VERSION, DATA_DIR, ARCHIVE_DIR, SHELL_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG, SERVER_CONFIG, ARCHIVING_CONFIG
from .cli import (
CLI_SUBCOMMANDS,
run_subcommand,
@ -66,22 +66,9 @@ from .index.html import (
)
from .index.csv import links_to_csv
from .extractors import archive_links, archive_link, ignore_methods
from .misc.logging import stderr, hint, ANSI
from .misc.logging import stderr, hint
from .misc.checks import check_data_folder
from .config import (
ConfigDict,
IS_TTY,
DEBUG,
IN_DOCKER,
IN_QEMU,
PUID,
PGID,
TIMEZONE,
ONLY_NEW,
JSON_INDEX_FILENAME,
HTML_INDEX_FILENAME,
SQL_INDEX_FILENAME,
LDAP,
from .config.legacy import (
write_config_file,
DEPENDENCIES,
load_all_config,
@ -104,15 +91,9 @@ from .logging_util import (
printable_dependency_version,
)
CONSTANTS = archivebox.CONSTANTS
VERSION = archivebox.VERSION
PACKAGE_DIR = archivebox.PACKAGE_DIR
OUTPUT_DIR = archivebox.DATA_DIR
ARCHIVE_DIR = archivebox.DATA_DIR / 'archive'
@enforce_types
def help(out_dir: Path=archivebox.DATA_DIR) -> None:
def help(out_dir: Path=DATA_DIR) -> None:
"""Print the ArchiveBox help message and usage"""
all_subcommands = CLI_SUBCOMMANDS
@ -135,7 +116,7 @@ def help(out_dir: Path=archivebox.DATA_DIR) -> None:
)
if archivebox.CONSTANTS.DATABASE_FILE.exists():
if CONSTANTS.DATABASE_FILE.exists():
print('''{green}ArchiveBox v{}: The self-hosted internet archive.{reset}
{lightred}Active data directory:{reset}
@ -161,17 +142,17 @@ def help(out_dir: Path=archivebox.DATA_DIR) -> None:
{lightred}Documentation:{reset}
https://github.com/ArchiveBox/ArchiveBox/wiki
'''.format(VERSION, out_dir, COMMANDS_HELP_TEXT, **ANSI))
'''.format(VERSION, out_dir, COMMANDS_HELP_TEXT, **SHELL_CONFIG.ANSI))
else:
print('{green}Welcome to ArchiveBox v{}!{reset}'.format(VERSION, **ANSI))
print('{green}Welcome to ArchiveBox v{}!{reset}'.format(VERSION, **SHELL_CONFIG.ANSI))
print()
if IN_DOCKER:
if SHELL_CONFIG.IN_DOCKER:
print('When using Docker, you need to mount a volume to use as your data dir:')
print(' docker run -v /some/path:/data archivebox ...')
print()
print('To import an existing archive (from a previous version of ArchiveBox):')
print(' 1. cd into your data dir OUTPUT_DIR (usually ArchiveBox/output) and run:')
print(' 1. cd into your data dir DATA_DIR (usually ArchiveBox/output) and run:')
print(' 2. archivebox init')
print()
print('To start a new archive:')
@ -184,10 +165,9 @@ def help(out_dir: Path=archivebox.DATA_DIR) -> None:
@enforce_types
def version(quiet: bool=False,
out_dir: Path=OUTPUT_DIR) -> None:
out_dir: Path=DATA_DIR) -> None:
"""Print the ArchiveBox version and dependency information"""
from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG, STORAGE_CONFIG, SHELL_CONFIG
from plugins_auth.ldap.apps import LDAP_CONFIG
from django.conf import settings
@ -202,19 +182,19 @@ def version(quiet: bool=False,
p = platform.uname()
print(
'ArchiveBox v{}'.format(archivebox.__version__),
'ArchiveBox v{}'.format(CONSTANTS.VERSION),
f'COMMIT_HASH={SHELL_CONFIG.COMMIT_HASH[:7] if SHELL_CONFIG.COMMIT_HASH else "unknown"}',
f'BUILD_TIME={SHELL_CONFIG.BUILD_TIME}',
)
print(
f'IN_DOCKER={IN_DOCKER}',
f'IN_QEMU={IN_QEMU}',
f'IN_DOCKER={SHELL_CONFIG.IN_DOCKER}',
f'IN_QEMU={SHELL_CONFIG.IN_QEMU}',
f'ARCH={p.machine}',
f'OS={p.system}',
f'PLATFORM={platform.platform()}',
f'PYTHON={sys.implementation.name.title()}',
)
OUTPUT_IS_REMOTE_FS = CONSTANTS.DATA_LOCATIONS['OUTPUT_DIR']['is_mount'] or CONSTANTS.DATA_LOCATIONS['ARCHIVE_DIR']['is_mount']
OUTPUT_IS_REMOTE_FS = CONSTANTS.DATA_LOCATIONS['DATA_DIR']['is_mount'] or CONSTANTS.DATA_LOCATIONS['ARCHIVE_DIR']['is_mount']
print(
f'FS_ATOMIC={STORAGE_CONFIG.ENFORCE_ATOMIC_WRITES}',
f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}',
@ -224,14 +204,14 @@ def version(quiet: bool=False,
print(
f'DEBUG={SHELL_CONFIG.DEBUG}',
f'IS_TTY={SHELL_CONFIG.IS_TTY}',
f'TZ={TIMEZONE}',
f'TZ={CONSTANTS.TIMEZONE}',
f'SEARCH_BACKEND={SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}',
f'LDAP={LDAP_CONFIG.LDAP_ENABLED}',
#f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually
)
print()
print('{white}[i] Old dependency versions:{reset}'.format(**ANSI))
print('{white}[i] Old dependency versions:{reset}'.format(**SHELL_CONFIG.ANSI))
for name, dependency in DEPENDENCIES.items():
print(printable_dependency_version(name, dependency))
@ -240,7 +220,7 @@ def version(quiet: bool=False,
print()
print()
print('{white}[i] New dependency versions:{reset}'.format(**ANSI))
print('{white}[i] New dependency versions:{reset}'.format(**SHELL_CONFIG.ANSI))
for name, binary in settings.BINARIES.items():
err = None
try:
@ -252,18 +232,18 @@ def version(quiet: bool=False,
print('', '' if loaded_bin.is_valid else 'X', '', loaded_bin.name.ljust(21), str(loaded_bin.version).ljust(15), loaded_bin.abspath or str(err))
print()
print('{white}[i] Source-code locations:{reset}'.format(**ANSI))
print('{white}[i] Source-code locations:{reset}'.format(**SHELL_CONFIG.ANSI))
for name, path in CONSTANTS.CODE_LOCATIONS.items():
print(printable_folder_status(name, path))
print()
if CONSTANTS.DATABASE_FILE.exists() or CONSTANTS.ARCHIVE_DIR.exists() or CONSTANTS.CONFIG_FILE.exists():
print('{white}[i] Data locations:{reset}'.format(**ANSI))
print('{white}[i] Data locations:{reset}'.format(**SHELL_CONFIG.ANSI))
for name, path in CONSTANTS.DATA_LOCATIONS.items():
print(printable_folder_status(name, path))
else:
print()
print('{white}[i] Data locations:{reset} (not in a data directory)'.format(**ANSI))
print('{white}[i] Data locations:{reset} (not in a data directory)'.format(**SHELL_CONFIG.ANSI))
print()
@ -272,7 +252,7 @@ def version(quiet: bool=False,
def run(subcommand: str,
subcommand_args: Optional[List[str]],
stdin: Optional[IO]=None,
out_dir: Path=OUTPUT_DIR) -> None:
out_dir: Path=DATA_DIR) -> None:
"""Run a given ArchiveBox subcommand with the given list of args"""
run_subcommand(
subcommand=subcommand,
@ -283,27 +263,27 @@ def run(subcommand: str,
@enforce_types
def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=archivebox.DATA_DIR) -> None:
def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=DATA_DIR) -> None:
"""Initialize a new ArchiveBox collection in the current directory"""
from core.models import Snapshot
out_dir.mkdir(exist_ok=True)
is_empty = not len(set(os.listdir(out_dir)) - CONSTANTS.ALLOWED_IN_OUTPUT_DIR)
is_empty = not len(set(os.listdir(out_dir)) - CONSTANTS.ALLOWED_IN_DATA_DIR)
if (out_dir / archivebox.CONSTANTS.JSON_INDEX_FILENAME).exists():
if (out_dir / CONSTANTS.JSON_INDEX_FILENAME).exists():
stderr("[!] This folder contains a JSON index. It is deprecated, and will no longer be kept up to date automatically.", color="lightyellow")
stderr(" You can run `archivebox list --json --with-headers > static_index.json` to manually generate it.", color="lightyellow")
existing_index = archivebox.CONSTANTS.DATABASE_FILE.exists()
existing_index = CONSTANTS.DATABASE_FILE.exists()
if is_empty and not existing_index:
print('{green}[+] Initializing a new ArchiveBox v{} collection...{reset}'.format(VERSION, **ANSI))
print('{green}----------------------------------------------------------------------{reset}'.format(**ANSI))
print('{green}[+] Initializing a new ArchiveBox v{} collection...{reset}'.format(VERSION, **SHELL_CONFIG.ANSI))
print('{green}----------------------------------------------------------------------{reset}'.format(**SHELL_CONFIG.ANSI))
elif existing_index:
# TODO: properly detect and print the existing version in current index as well
print('{green}[*] Verifying and updating existing ArchiveBox collection to v{}...{reset}'.format(VERSION, **ANSI))
print('{green}----------------------------------------------------------------------{reset}'.format(**ANSI))
print('{green}[*] Verifying and updating existing ArchiveBox collection to v{}...{reset}'.format(VERSION, **SHELL_CONFIG.ANSI))
print('{green}----------------------------------------------------------------------{reset}'.format(**SHELL_CONFIG.ANSI))
else:
if force:
stderr('[!] This folder appears to already have files in it, but no index.sqlite3 is present.', color='lightyellow')
@ -315,41 +295,41 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
" {lightred}Hint:{reset} To import an existing data folder make sure to cd into the folder first, \n"
" then run and run 'archivebox init' to pick up where you left off.\n\n"
" (Always make sure your data folder is backed up first before updating ArchiveBox)"
).format(**ANSI)
).format(**SHELL_CONFIG.ANSI)
)
raise SystemExit(2)
if existing_index:
print('\n{green}[*] Verifying archive folder structure...{reset}'.format(**ANSI))
print('\n{green}[*] Verifying archive folder structure...{reset}'.format(**SHELL_CONFIG.ANSI))
else:
print('\n{green}[+] Building archive folder structure...{reset}'.format(**ANSI))
print('\n{green}[+] Building archive folder structure...{reset}'.format(**SHELL_CONFIG.ANSI))
print(f' + ./{CONSTANTS.ARCHIVE_DIR.relative_to(OUTPUT_DIR)}, ./{CONSTANTS.SOURCES_DIR.relative_to(OUTPUT_DIR)}, ./{CONSTANTS.LOGS_DIR.relative_to(OUTPUT_DIR)}...')
print(f' + ./{CONSTANTS.ARCHIVE_DIR.relative_to(DATA_DIR)}, ./{CONSTANTS.SOURCES_DIR.relative_to(DATA_DIR)}, ./{CONSTANTS.LOGS_DIR.relative_to(DATA_DIR)}...')
Path(CONSTANTS.SOURCES_DIR).mkdir(exist_ok=True)
Path(CONSTANTS.ARCHIVE_DIR).mkdir(exist_ok=True)
Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
print(f' + ./{CONSTANTS.CONFIG_FILE.relative_to(OUTPUT_DIR)}...')
print(f' + ./{CONSTANTS.CONFIG_FILE.relative_to(DATA_DIR)}...')
write_config_file({}, out_dir=out_dir)
if CONSTANTS.DATABASE_FILE.exists():
print('\n{green}[*] Verifying main SQL index and running any migrations needed...{reset}'.format(**ANSI))
print('\n{green}[*] Verifying main SQL index and running any migrations needed...{reset}'.format(**SHELL_CONFIG.ANSI))
else:
print('\n{green}[+] Building main SQL index and running initial migrations...{reset}'.format(**ANSI))
print('\n{green}[+] Building main SQL index and running initial migrations...{reset}'.format(**SHELL_CONFIG.ANSI))
for migration_line in apply_migrations(out_dir):
print(f' {migration_line}')
assert CONSTANTS.DATABASE_FILE.exists()
print()
print(f' √ ./{CONSTANTS.DATABASE_FILE.relative_to(OUTPUT_DIR)}')
print(f' √ ./{CONSTANTS.DATABASE_FILE.relative_to(DATA_DIR)}')
# from django.contrib.auth.models import User
# if IS_TTY and not User.objects.filter(is_superuser=True).exists():
# print('{green}[+] Creating admin user account...{reset}'.format(**ANSI))
# if SHELL_CONFIG.IS_TTY and not User.objects.filter(is_superuser=True).exists():
# print('{green}[+] Creating admin user account...{reset}'.format(**SHELL_CONFIG.ANSI))
# call_command("createsuperuser", interactive=True)
print()
print('{green}[*] Checking links from indexes and archive folders (safe to Ctrl+C)...{reset}'.format(**ANSI))
print('{green}[*] Checking links from indexes and archive folders (safe to Ctrl+C)...{reset}'.format(**SHELL_CONFIG.ANSI))
all_links = Snapshot.objects.none()
pending_links: Dict[str, Link] = {}
@ -365,9 +345,9 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
# Links in data folders that dont match their timestamp
fixed, cant_fix = fix_invalid_folder_locations(out_dir=out_dir)
if fixed:
print(' {lightyellow}√ Fixed {} data directory locations that didn\'t match their link timestamps.{reset}'.format(len(fixed), **ANSI))
print(' {lightyellow}√ Fixed {} data directory locations that didn\'t match their link timestamps.{reset}'.format(len(fixed), **SHELL_CONFIG.ANSI))
if cant_fix:
print(' {lightyellow}! Could not fix {} data directory locations due to conflicts with existing folders.{reset}'.format(len(cant_fix), **ANSI))
print(' {lightyellow}! Could not fix {} data directory locations due to conflicts with existing folders.{reset}'.format(len(cant_fix), **SHELL_CONFIG.ANSI))
# Links in JSON index but not in main index
orphaned_json_links = {
@ -377,7 +357,7 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
}
if orphaned_json_links:
pending_links.update(orphaned_json_links)
print(' {lightyellow}√ Added {} orphaned links from existing JSON index...{reset}'.format(len(orphaned_json_links), **ANSI))
print(' {lightyellow}√ Added {} orphaned links from existing JSON index...{reset}'.format(len(orphaned_json_links), **SHELL_CONFIG.ANSI))
# Links in data dir indexes but not in main index
orphaned_data_dir_links = {
@ -387,7 +367,7 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
}
if orphaned_data_dir_links:
pending_links.update(orphaned_data_dir_links)
print(' {lightyellow}√ Added {} orphaned links from existing archive directories.{reset}'.format(len(orphaned_data_dir_links), **ANSI))
print(' {lightyellow}√ Added {} orphaned links from existing archive directories.{reset}'.format(len(orphaned_data_dir_links), **SHELL_CONFIG.ANSI))
# Links in invalid/duplicate data dirs
invalid_folders = {
@ -395,10 +375,10 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
for folder, link in get_invalid_folders(all_links, out_dir=out_dir).items()
}
if invalid_folders:
print(' {lightyellow}! Skipped adding {} invalid link data directories.{reset}'.format(len(invalid_folders), **ANSI))
print(' X ' + '\n X '.join(f'./{Path(folder).relative_to(OUTPUT_DIR)} {link}' for folder, link in invalid_folders.items()))
print(' {lightyellow}! Skipped adding {} invalid link data directories.{reset}'.format(len(invalid_folders), **SHELL_CONFIG.ANSI))
print(' X ' + '\n X '.join(f'./{Path(folder).relative_to(DATA_DIR)} {link}' for folder, link in invalid_folders.items()))
print()
print(' {lightred}Hint:{reset} For more information about the link data directories that were skipped, run:'.format(**ANSI))
print(' {lightred}Hint:{reset} For more information about the link data directories that were skipped, run:'.format(**SHELL_CONFIG.ANSI))
print(' archivebox status')
print(' archivebox list --status=invalid')
@ -407,28 +387,27 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
stderr('[x] Stopped checking archive directories due to Ctrl-C/SIGTERM', color='red')
stderr(' Your archive data is safe, but you should re-run `archivebox init` to finish the process later.')
stderr()
stderr(' {lightred}Hint:{reset} In the future you can run a quick init without checking dirs like so:'.format(**ANSI))
stderr(' {lightred}Hint:{reset} In the future you can run a quick init without checking dirs like so:'.format(**SHELL_CONFIG.ANSI))
stderr(' archivebox init --quick')
raise SystemExit(1)
write_main_index(list(pending_links.values()), out_dir=out_dir)
print('\n{green}----------------------------------------------------------------------{reset}'.format(**ANSI))
print('\n{green}----------------------------------------------------------------------{reset}'.format(**SHELL_CONFIG.ANSI))
from django.contrib.auth.models import User
from plugins_sys.config.apps import SERVER_CONFIG
if (SERVER_CONFIG.ADMIN_USERNAME and SERVER_CONFIG.ADMIN_PASSWORD) and not User.objects.filter(username=SERVER_CONFIG.ADMIN_USERNAME).exists():
print('{green}[+] Found ADMIN_USERNAME and ADMIN_PASSWORD configuration options, creating new admin user.{reset}'.format(**ANSI))
print('{green}[+] Found ADMIN_USERNAME and ADMIN_PASSWORD configuration options, creating new admin user.{reset}'.format(**SHELL_CONFIG.ANSI))
User.objects.create_superuser(username=SERVER_CONFIG.ADMIN_USERNAME, password=SERVER_CONFIG.ADMIN_PASSWORD)
if existing_index:
print('{green}[√] Done. Verified and updated the existing ArchiveBox collection.{reset}'.format(**ANSI))
print('{green}[√] Done. Verified and updated the existing ArchiveBox collection.{reset}'.format(**SHELL_CONFIG.ANSI))
else:
print('{green}[√] Done. A new ArchiveBox collection was initialized ({} links).{reset}'.format(len(all_links) + len(pending_links), **ANSI))
print('{green}[√] Done. A new ArchiveBox collection was initialized ({} links).{reset}'.format(len(all_links) + len(pending_links), **SHELL_CONFIG.ANSI))
json_index = out_dir / JSON_INDEX_FILENAME
html_index = out_dir / HTML_INDEX_FILENAME
json_index = out_dir / CONSTANTS.JSON_INDEX_FILENAME
html_index = out_dir / CONSTANTS.HTML_INDEX_FILENAME
index_name = f"{date.today()}_index_old"
if json_index.exists():
json_index.rename(f"{index_name}.json")
@ -440,7 +419,7 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
if Snapshot.objects.count() < 25: # hide the hints for experienced users
print()
print(' {lightred}Hint:{reset} To view your archive index, run:'.format(**ANSI))
print(' {lightred}Hint:{reset} To view your archive index, run:'.format(**SHELL_CONFIG.ANSI))
print(' archivebox server # then visit http://127.0.0.1:8000')
print()
print(' To add new links, you can run:')
@ -450,7 +429,7 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
print(' archivebox help')
@enforce_types
def status(out_dir: Path=OUTPUT_DIR) -> None:
def status(out_dir: Path=DATA_DIR) -> None:
"""Print out some info and statistics about the archive collection"""
check_data_folder(CONFIG)
@ -459,8 +438,8 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
from django.contrib.auth import get_user_model
User = get_user_model()
print('{green}[*] Scanning archive main index...{reset}'.format(**ANSI))
print(ANSI['lightyellow'], f' {out_dir}/*', ANSI['reset'])
print('{green}[*] Scanning archive main index...{reset}'.format(**SHELL_CONFIG.ANSI))
print(SHELL_CONFIG.ANSI['lightyellow'], f' {out_dir}/*', SHELL_CONFIG.ANSI['reset'])
num_bytes, num_dirs, num_files = get_dir_size(out_dir, recursive=False, pattern='index.')
size = printable_filesize(num_bytes)
print(f' Index size: {size} across {num_files} files')
@ -469,15 +448,15 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
links = load_main_index(out_dir=out_dir)
num_sql_links = links.count()
num_link_details = sum(1 for link in parse_json_links_details(out_dir=out_dir))
print(f' > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {SQL_INDEX_FILENAME})')
print(f' > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {CONSTANTS.SQL_INDEX_FILENAME})')
print(f' > JSON Link Details: {num_link_details} links'.ljust(36), f'(found in {ARCHIVE_DIR.name}/*/index.json)')
print()
print('{green}[*] Scanning archive data directories...{reset}'.format(**ANSI))
print(ANSI['lightyellow'], f' {ARCHIVE_DIR}/*', ANSI['reset'])
print('{green}[*] Scanning archive data directories...{reset}'.format(**SHELL_CONFIG.ANSI))
print(SHELL_CONFIG.ANSI['lightyellow'], f' {ARCHIVE_DIR}/*', SHELL_CONFIG.ANSI['reset'])
num_bytes, num_dirs, num_files = get_dir_size(ARCHIVE_DIR)
size = printable_filesize(num_bytes)
print(f' Size: {size} across {num_files} files in {num_dirs} directories')
print(ANSI['black'])
print(SHELL_CONFIG.ANSI['black'])
num_indexed = len(get_indexed_folders(links, out_dir=out_dir))
num_archived = len(get_archived_folders(links, out_dir=out_dir))
num_unarchived = len(get_unarchived_folders(links, out_dir=out_dir))
@ -502,23 +481,23 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
print(f' > corrupted: {len(corrupted)}'.ljust(36), f'({get_corrupted_folders.__doc__})')
print(f' > unrecognized: {len(unrecognized)}'.ljust(36), f'({get_unrecognized_folders.__doc__})')
print(ANSI['reset'])
print(SHELL_CONFIG.ANSI['reset'])
if num_indexed:
print(' {lightred}Hint:{reset} You can list link data directories by status like so:'.format(**ANSI))
print(' {lightred}Hint:{reset} You can list link data directories by status like so:'.format(**SHELL_CONFIG.ANSI))
print(' archivebox list --status=<status> (e.g. indexed, corrupted, archived, etc.)')
if orphaned:
print(' {lightred}Hint:{reset} To automatically import orphaned data directories into the main index, run:'.format(**ANSI))
print(' {lightred}Hint:{reset} To automatically import orphaned data directories into the main index, run:'.format(**SHELL_CONFIG.ANSI))
print(' archivebox init')
if num_invalid:
print(' {lightred}Hint:{reset} You may need to manually remove or fix some invalid data directories, afterwards make sure to run:'.format(**ANSI))
print(' {lightred}Hint:{reset} You may need to manually remove or fix some invalid data directories, afterwards make sure to run:'.format(**SHELL_CONFIG.ANSI))
print(' archivebox init')
print()
print('{green}[*] Scanning recent archive changes and user logins:{reset}'.format(**ANSI))
print(ANSI['lightyellow'], f' {CONSTANTS.LOGS_DIR}/*', ANSI['reset'])
print('{green}[*] Scanning recent archive changes and user logins:{reset}'.format(**SHELL_CONFIG.ANSI))
print(SHELL_CONFIG.ANSI['lightyellow'], f' {CONSTANTS.LOGS_DIR}/*', SHELL_CONFIG.ANSI['reset'])
users = get_admins().values_list('username', flat=True)
print(f' UI users {len(users)}: {", ".join(users)}')
last_login = User.objects.order_by('last_login').last()
@ -530,7 +509,7 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
if not users:
print()
print(' {lightred}Hint:{reset} You can create an admin user by running:'.format(**ANSI))
print(' {lightred}Hint:{reset} You can create an admin user by running:'.format(**SHELL_CONFIG.ANSI))
print(' archivebox manage createsuperuser')
print()
@ -538,19 +517,19 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
if not snapshot.downloaded_at:
continue
print(
ANSI['black'],
SHELL_CONFIG.ANSI['black'],
(
f' > {str(snapshot.downloaded_at)[:16]} '
f'[{snapshot.num_outputs} {("X", "")[snapshot.is_archived]} {printable_filesize(snapshot.archive_size)}] '
f'"{snapshot.title}": {snapshot.url}'
)[:SHELL_CONFIG.TERM_WIDTH],
ANSI['reset'],
SHELL_CONFIG.ANSI['reset'],
)
print(ANSI['black'], ' ...', ANSI['reset'])
print(SHELL_CONFIG.ANSI['black'], ' ...', SHELL_CONFIG.ANSI['reset'])
@enforce_types
def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR, created_by_id: int | None=None) -> List[Link]:
def oneshot(url: str, extractors: str="", out_dir: Path=DATA_DIR, created_by_id: int | None=None) -> List[Link]:
"""
Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
You can run this to archive single pages without needing to create a whole collection with archivebox init.
@ -571,7 +550,7 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR, created_by_i
def add(urls: Union[str, List[str]],
tag: str='',
depth: int=0,
update: bool=not ONLY_NEW,
update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
update_all: bool=False,
index_only: bool=False,
overwrite: bool=False,
@ -580,7 +559,7 @@ def add(urls: Union[str, List[str]],
extractors: str="",
parser: str="auto",
created_by_id: int | None=None,
out_dir: Path=OUTPUT_DIR) -> List[Link]:
out_dir: Path=DATA_DIR) -> List[Link]:
"""Add a new URL or list of URLs to your archive"""
from core.models import Snapshot, Tag
@ -693,7 +672,7 @@ def remove(filter_str: Optional[str]=None,
before: Optional[float]=None,
yes: bool=False,
delete: bool=False,
out_dir: Path=OUTPUT_DIR) -> List[Link]:
out_dir: Path=DATA_DIR) -> List[Link]:
"""Remove the specified URLs from the archive"""
check_data_folder(CONFIG)
@ -767,7 +746,7 @@ def remove(filter_str: Optional[str]=None,
@enforce_types
def update(resume: Optional[float]=None,
only_new: bool=ONLY_NEW,
only_new: bool=ARCHIVING_CONFIG.ONLY_NEW,
index_only: bool=False,
overwrite: bool=False,
filter_patterns_str: Optional[str]=None,
@ -777,7 +756,7 @@ def update(resume: Optional[float]=None,
after: Optional[str]=None,
before: Optional[str]=None,
extractors: str="",
out_dir: Path=OUTPUT_DIR) -> List[Link]:
out_dir: Path=DATA_DIR) -> List[Link]:
"""Import any new links from subscriptions and retry any previously failed/skipped links"""
from core.models import ArchiveResult
@ -853,7 +832,7 @@ def list_all(filter_patterns_str: Optional[str]=None,
json: bool=False,
html: bool=False,
with_headers: bool=False,
out_dir: Path=OUTPUT_DIR) -> Iterable[Link]:
out_dir: Path=DATA_DIR) -> Iterable[Link]:
"""List, filter, and export information about archive entries"""
check_data_folder(CONFIG)
@ -902,7 +881,7 @@ def list_links(snapshots: Optional[QuerySet]=None,
filter_type: str='exact',
after: Optional[float]=None,
before: Optional[float]=None,
out_dir: Path=OUTPUT_DIR) -> Iterable[Link]:
out_dir: Path=DATA_DIR) -> Iterable[Link]:
check_data_folder(CONFIG)
@ -926,7 +905,7 @@ def list_links(snapshots: Optional[QuerySet]=None,
@enforce_types
def list_folders(links: List[Link],
status: str,
out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
out_dir: Path=DATA_DIR) -> Dict[str, Optional[Link]]:
check_data_folder(CONFIG)
@ -949,7 +928,7 @@ def list_folders(links: List[Link],
raise ValueError('Status not recognized.')
@enforce_types
def setup(out_dir: Path=OUTPUT_DIR) -> None:
def setup(out_dir: Path=DATA_DIR) -> None:
"""Automatically install all ArchiveBox dependencies and extras"""
from rich import print
@ -996,7 +975,7 @@ def config(config_options_str: Optional[str]=None,
get: bool=False,
set: bool=False,
reset: bool=False,
out_dir: Path=OUTPUT_DIR) -> None:
out_dir: Path=DATA_DIR) -> None:
"""Get and set your ArchiveBox project configuration values"""
check_data_folder(CONFIG)
@ -1014,7 +993,7 @@ def config(config_options_str: Optional[str]=None,
no_args = not (get or set or reset or config_options)
matching_config: ConfigDict = {}
matching_config = {}
if get or no_args:
if config_options:
config_options = [get_real_name(key) for key in config_options]
@ -1054,11 +1033,11 @@ def config(config_options_str: Optional[str]=None,
if new_config:
before = CONFIG
matching_config = write_config_file(new_config, out_dir=OUTPUT_DIR)
matching_config = write_config_file(new_config, out_dir=DATA_DIR)
after = load_all_config()
print(printable_config(matching_config))
side_effect_changes: ConfigDict = {}
side_effect_changes = {}
for key, val in after.items():
if key in USER_CONFIG and (before[key] != after[key]) and (key not in matching_config):
side_effect_changes[key] = after[key]
@ -1095,14 +1074,13 @@ def schedule(add: bool=False,
tag: str='',
depth: int=0,
overwrite: bool=False,
update: bool=not ONLY_NEW,
update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
import_path: Optional[str]=None,
out_dir: Path=OUTPUT_DIR):
out_dir: Path=DATA_DIR):
"""Set ArchiveBox to regularly import URLs at specific times using cron"""
check_data_folder(CONFIG)
from plugins_pkg.pip.apps import ARCHIVEBOX_BINARY
from plugins_sys.config.apps import SHELL_CONFIG, CONSTANTS
Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
@ -1222,7 +1200,7 @@ def server(runserver_args: Optional[List[str]]=None,
init: bool=False,
quick_init: bool=False,
createsuperuser: bool=False,
out_dir: Path=OUTPUT_DIR) -> None:
out_dir: Path=DATA_DIR) -> None:
"""Run the ArchiveBox HTTP server"""
runserver_args = runserver_args or []
@ -1238,10 +1216,6 @@ def server(runserver_args: Optional[List[str]]=None,
run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
print()
# setup config for django runserver
from . import config
config.SHOW_PROGRESS = False
config.DEBUG = config.DEBUG or debug
check_data_folder(CONFIG)
@ -1250,20 +1224,17 @@ def server(runserver_args: Optional[List[str]]=None,
print('{green}[+] Starting ArchiveBox webserver... {reset}'.format(**ANSI))
print('{green}[+] Starting ArchiveBox webserver... {reset}'.format(**SHELL_CONFIG.ANSI))
print(' > Logging errors to ./logs/errors.log')
if not User.objects.filter(is_superuser=True).exists():
print('{lightyellow}[!] No admin users exist yet, you will not be able to edit links in the UI.{reset}'.format(**ANSI))
print('{lightyellow}[!] No admin users exist yet, you will not be able to edit links in the UI.{reset}'.format(**SHELL_CONFIG.ANSI))
print()
print(' To create an admin user, run:')
print(' archivebox manage createsuperuser')
print()
# toggle autoreloading when archivebox code changes
config.SHOW_PROGRESS = False
config.DEBUG = config.DEBUG or debug
if debug:
if SHELL_CONFIG.DEBUG:
if not reload:
runserver_args.append('--noreload') # '--insecure'
call_command("runserver", *runserver_args)
@ -1295,13 +1266,13 @@ def server(runserver_args: Optional[List[str]]=None,
@enforce_types
def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None:
def manage(args: Optional[List[str]]=None, out_dir: Path=DATA_DIR) -> None:
"""Run an ArchiveBox Django management command"""
check_data_folder(CONFIG)
from django.core.management import execute_from_command_line
if (args and "createsuperuser" in args) and (IN_DOCKER and not IS_TTY):
if (args and "createsuperuser" in args) and (SHELL_CONFIG.IN_DOCKER and not SHELL_CONFIG.IS_TTY):
stderr('[!] Warning: you need to pass -it to use interactive commands in docker', color='lightyellow')
stderr(' docker run -it archivebox manage {}'.format(' '.join(args or ['...'])), color='lightyellow')
stderr('')
@ -1312,7 +1283,7 @@ def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None:
@enforce_types
def shell(out_dir: Path=OUTPUT_DIR) -> None:
def shell(out_dir: Path=DATA_DIR) -> None:
"""Enter an interactive ArchiveBox Django shell"""
check_data_folder(CONFIG)

View file

@ -2,45 +2,42 @@ __package__ = 'archivebox.misc'
from benedict import benedict
import archivebox
from archivebox.config import DATA_DIR, ARCHIVE_DIR, CONSTANTS, SHELL_CONFIG
from .logging import stderr, ANSI
from .logging import stderr
def check_data_folder(config: benedict) -> None:
output_dir = archivebox.DATA_DIR
archive_dir_exists = (archivebox.CONSTANTS.ARCHIVE_DIR).exists()
archive_dir_exists = ARCHIVE_DIR.exists()
if not archive_dir_exists:
stderr('[X] No archivebox index found in the current directory.', color='red')
stderr(f' {output_dir}', color='lightyellow')
stderr(f' {DATA_DIR}', color='lightyellow')
stderr()
stderr(' {lightred}Hint{reset}: Are you running archivebox in the right folder?'.format(**ANSI))
stderr(' {lightred}Hint{reset}: Are you running archivebox in the right folder?'.format(**SHELL_CONFIG.ANSI))
stderr(' cd path/to/your/archive/folder')
stderr(' archivebox [command]')
stderr()
stderr(' {lightred}Hint{reset}: To create a new archive collection or import existing data in this folder, run:'.format(**ANSI))
stderr(' {lightred}Hint{reset}: To create a new archive collection or import existing data in this folder, run:'.format(**SHELL_CONFIG.ANSI))
stderr(' archivebox init')
raise SystemExit(2)
def check_migrations(config: benedict):
output_dir = archivebox.DATA_DIR
from ..index.sql import list_migrations
pending_migrations = [name for status, name in list_migrations() if not status]
if pending_migrations:
stderr('[X] This collection was created with an older version of ArchiveBox and must be upgraded first.', color='lightyellow')
stderr(f' {output_dir}')
stderr(f' {DATA_DIR}')
stderr()
stderr(f' To upgrade it to the latest version and apply the {len(pending_migrations)} pending migrations, run:')
stderr(' archivebox init')
raise SystemExit(3)
archivebox.CONSTANTS.SOURCES_DIR.mkdir(exist_ok=True)
archivebox.CONSTANTS.LOGS_DIR.mkdir(exist_ok=True)
archivebox.CONSTANTS.CACHE_DIR.mkdir(exist_ok=True)
(archivebox.CONSTANTS.LIB_DIR / 'bin').mkdir(exist_ok=True, parents=True)
(archivebox.CONSTANTS.PERSONAS_DIR / 'Default').mkdir(exist_ok=True, parents=True)
CONSTANTS.SOURCES_DIR.mkdir(exist_ok=True)
CONSTANTS.LOGS_DIR.mkdir(exist_ok=True)
CONSTANTS.CACHE_DIR.mkdir(exist_ok=True)
(CONSTANTS.LIB_DIR / 'bin').mkdir(exist_ok=True, parents=True)
(CONSTANTS.PERSONAS_DIR / 'Default').mkdir(exist_ok=True, parents=True)

View file

@ -14,7 +14,7 @@ from datetime import datetime, timezone
from pathlib import Path
from ..system import atomic_write
from ..config import (
from ..config.legacy import (
ANSI,
OUTPUT_DIR,
SOURCES_DIR_NAME,

View file

@ -2,24 +2,25 @@ __package__ = 'archivebox.parsers'
import re
import archivebox
from typing import IO, Iterable, Optional
from configparser import ConfigParser
from pocket import Pocket
from archivebox.config import CONSTANTS
from ..index.schema import Link
from ..util import enforce_types
from ..system import atomic_write
from ..config import (
from ..config.legacy import (
POCKET_CONSUMER_KEY,
POCKET_ACCESS_TOKENS,
)
COUNT_PER_PAGE = 500
API_DB_PATH = archivebox.DATA_DIR / 'sources' / 'pocket_api.db'
API_DB_PATH = CONSTANTS.SOURCES_DIR / 'pocket_api.db'
# search for broken protocols that sometimes come from the Pocket API
_BROKEN_PROTOCOL_RE = re.compile('^(http[s]?)(:/(?!/))')

View file

@ -3,19 +3,20 @@ __package__ = "archivebox.parsers"
import re
import requests
import archivebox
from datetime import datetime
from typing import IO, Iterable, Optional
from configparser import ConfigParser
from archivebox.config import CONSTANTS
from ..index.schema import Link
from ..util import enforce_types
from ..system import atomic_write
from ..config import READWISE_READER_TOKENS
from ..config.legacy import READWISE_READER_TOKENS
API_DB_PATH = archivebox.DATA_DIR / "sources" / "readwise_reader_api.db"
API_DB_PATH = CONSTANTS.SOURCES_DIR / "readwise_reader_api.db"
class ReadwiseReaderAPI:

View file

@ -5,8 +5,6 @@ import platform
from pathlib import Path
from typing import List, Optional, Dict, ClassVar
from django.conf import settings
# Depends on other PyPI/vendor packages:
from rich import print
from pydantic import InstanceOf, Field, model_validator
@ -18,8 +16,6 @@ from pydantic_pkgr import (
bin_abspath,
)
import archivebox
# Depends on other Django apps:
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
@ -29,7 +25,7 @@ from abx.archivebox.base_binary import BaseBinary, env
from abx.archivebox.base_hook import BaseHook
# Depends on Other Plugins:
from plugins_sys.config.apps import ARCHIVING_CONFIG, SHELL_CONFIG
from archivebox.config import CONSTANTS, ARCHIVING_CONFIG, SHELL_CONFIG
from plugins_pkg.puppeteer.apps import PUPPETEER_BINPROVIDER
from plugins_pkg.playwright.apps import PLAYWRIGHT_BINPROVIDER
@ -217,7 +213,7 @@ class ChromeBinary(BaseBinary):
}
@staticmethod
def symlink_to_lib(binary, bin_dir=archivebox.CONSTANTS.LIB_BIN_DIR) -> None:
def symlink_to_lib(binary, bin_dir=CONSTANTS.LIB_BIN_DIR) -> None:
if not (binary.abspath and binary.abspath.exists()):
return

View file

@ -18,7 +18,7 @@ from abx.archivebox.base_extractor import BaseExtractor
from abx.archivebox.base_hook import BaseHook
# Depends on Other Plugins:
from plugins_sys.config.apps import ARCHIVING_CONFIG
from archivebox.config import ARCHIVING_CONFIG
from plugins_pkg.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
###################### Config ##########################

View file

@ -19,7 +19,7 @@ from abx.archivebox.base_queue import BaseQueue
from abx.archivebox.base_hook import BaseHook
# Depends on Other Plugins:
from plugins_sys.config.apps import ARCHIVING_CONFIG
from archivebox.config import ARCHIVING_CONFIG
from plugins_pkg.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
###################### Config ##########################

View file

@ -12,7 +12,7 @@ from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_binary import BaseBinary, env, apt, brew
from abx.archivebox.base_hook import BaseHook
from plugins_sys.config.apps import ARCHIVING_CONFIG
from archivebox.config import ARCHIVING_CONFIG
from plugins_pkg.pip.apps import pip
###################### Config ##########################

View file

@ -1,16 +1,14 @@
__package__ = 'archivebox.plugins_pkg.npm'
import archivebox
__package__ = 'plugins_pkg.npm'
from pathlib import Path
from typing import List, Optional
from django.conf import settings
from pydantic import InstanceOf, model_validator
from pydantic_pkgr import BinProvider, NpmProvider, BinName, PATHStr, BinProviderName
from archivebox.config import DATA_DIR, CONSTANTS
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet
from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env, apt, brew
@ -36,8 +34,8 @@ DEFAULT_GLOBAL_CONFIG = {
NPM_CONFIG = NpmDependencyConfigs(**DEFAULT_GLOBAL_CONFIG)
OLD_NODE_BIN_PATH = archivebox.DATA_DIR / 'node_modules' / '.bin'
NEW_NODE_BIN_PATH = archivebox.CONSTANTS.LIB_NPM_DIR / 'node_modules' / '.bin'
OLD_NODE_BIN_PATH = DATA_DIR / 'node_modules' / '.bin'
NEW_NODE_BIN_PATH = CONSTANTS.LIB_NPM_DIR / 'node_modules' / '.bin'
class SystemNpmProvider(NpmProvider, BaseBinProvider):
name: BinProviderName = "sys_npm"
@ -48,7 +46,7 @@ class LibNpmProvider(NpmProvider, BaseBinProvider):
name: BinProviderName = "lib_npm"
PATH: PATHStr = str(OLD_NODE_BIN_PATH)
npm_prefix: Optional[Path] = archivebox.CONSTANTS.LIB_NPM_DIR
npm_prefix: Optional[Path] = CONSTANTS.LIB_NPM_DIR
@model_validator(mode='after')
def validate_path(self):

View file

@ -3,18 +3,19 @@ __package__ = 'archivebox.plugins_pkg.pip'
import os
import sys
import inspect
import archivebox
from pathlib import Path
from typing import List, Dict, Optional, ClassVar
from pydantic import InstanceOf, Field, model_validator
import abx
import django
from django.db.backends.sqlite3.base import Database as django_sqlite3 # type: ignore[import-type]
from django.core.checks import Error, Tags
from pydantic_pkgr import BinProvider, PipProvider, BinName, BinProviderName, ProviderLookupDict, SemVer
from archivebox.config import CONSTANTS, VERSION
import abx
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet, ConfigSectionName
from abx.archivebox.base_check import BaseCheck
@ -70,7 +71,7 @@ class LibPipBinProvider(PipProvider, BaseBinProvider):
name: BinProviderName = "lib_pip"
INSTALLER_BIN: BinName = "pip"
pip_venv: Optional[Path] = archivebox.CONSTANTS.LIB_PIP_DIR / 'venv'
pip_venv: Optional[Path] = CONSTANTS.LIB_PIP_DIR / 'venv'
SYS_PIP_BINPROVIDER = SystemPipBinProvider()
PIPX_PIP_BINPROVIDER = SystemPipxBinProvider()
@ -84,10 +85,10 @@ class ArchiveboxBinary(BaseBinary):
binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
VENV_PIP_BINPROVIDER.name: {'packages': lambda: [], 'version': lambda: archivebox.__version__},
SYS_PIP_BINPROVIDER.name: {'packages': lambda: [], 'version': lambda: archivebox.__version__},
apt.name: {'packages': lambda: [], 'version': lambda: archivebox.__version__},
brew.name: {'packages': lambda: [], 'version': lambda: archivebox.__version__},
VENV_PIP_BINPROVIDER.name: {'packages': lambda: [], 'version': lambda: VERSION},
SYS_PIP_BINPROVIDER.name: {'packages': lambda: [], 'version': lambda: VERSION},
apt.name: {'packages': lambda: [], 'version': lambda: VERSION},
brew.name: {'packages': lambda: [], 'version': lambda: VERSION},
}
ARCHIVEBOX_BINARY = ArchiveboxBinary()

View file

@ -2,8 +2,6 @@ import platform
from pathlib import Path
from typing import List, Optional, Dict, ClassVar
from django.conf import settings
# Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, computed_field, Field
from pydantic_pkgr import (
@ -19,7 +17,7 @@ from pydantic_pkgr import (
DEFAULT_ENV_PATH,
)
import archivebox
from archivebox.config import CONSTANTS
# Depends on other Django apps:
from abx.archivebox.base_plugin import BasePlugin
@ -47,7 +45,7 @@ class PlaywrightConfigs(BaseConfigSet):
PLAYWRIGHT_CONFIG = PlaywrightConfigs()
LIB_DIR_BROWSERS = archivebox.CONSTANTS.LIB_BROWSERS_DIR
LIB_DIR_BROWSERS = CONSTANTS.LIB_BROWSERS_DIR
@ -65,7 +63,7 @@ class PlaywrightBinProvider(BaseBinProvider):
name: BinProviderName = "playwright"
INSTALLER_BIN: BinName = PLAYWRIGHT_BINARY.name
PATH: PATHStr = f"{archivebox.CONSTANTS.LIB_BIN_DIR}:{DEFAULT_ENV_PATH}"
PATH: PATHStr = f"{CONSTANTS.LIB_BIN_DIR}:{DEFAULT_ENV_PATH}"
puppeteer_browsers_dir: Optional[Path] = (
Path("~/Library/Caches/ms-playwright").expanduser() # macos playwright cache dir

View file

@ -2,8 +2,6 @@ import platform
from pathlib import Path
from typing import List, Optional, Dict, ClassVar
from django.conf import settings
# Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, Field
from pydantic_pkgr import (
@ -16,7 +14,7 @@ from pydantic_pkgr import (
HostBinPath,
)
import archivebox
from archivebox.config import CONSTANTS
# Depends on other Django apps:
from abx.archivebox.base_plugin import BasePlugin
@ -45,7 +43,7 @@ class PuppeteerConfigs(BaseConfigSet):
PUPPETEER_CONFIG = PuppeteerConfigs()
LIB_DIR_BROWSERS = archivebox.CONSTANTS.LIB_BROWSERS_DIR
LIB_DIR_BROWSERS = CONSTANTS.LIB_BROWSERS_DIR
class PuppeteerBinary(BaseBinary):
@ -61,7 +59,7 @@ class PuppeteerBinProvider(BaseBinProvider):
name: BinProviderName = "puppeteer"
INSTALLER_BIN: BinName = "npx"
PATH: PATHStr = str(archivebox.CONSTANTS.LIB_BIN_DIR)
PATH: PATHStr = str(CONSTANTS.LIB_BIN_DIR)
puppeteer_browsers_dir: Optional[Path] = LIB_DIR_BROWSERS
puppeteer_install_args: List[str] = ["@puppeteer/browsers", "install", "--path", str(LIB_DIR_BROWSERS)]
@ -140,7 +138,7 @@ PUPPETEER_BINPROVIDER = PuppeteerBinProvider()
# ALTERNATIVE INSTALL METHOD using Ansible:
# install_playbook = self.plugin_dir / 'install_puppeteer.yml'
# chrome_bin = run_playbook(install_playbook, data_dir=archivebox.DATA_DIR, quiet=quiet).BINARIES.chrome
# chrome_bin = run_playbook(install_playbook, data_dir=DATA_DIR, quiet=quiet).BINARIES.chrome
# return self.__class__.model_validate(
# {
# **self.model_dump(),

View file

@ -6,8 +6,6 @@ from subprocess import run
from typing import List, Dict, ClassVar, Iterable
# from typing_extensions import Self
import archivebox
# Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, Field
from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName
@ -20,7 +18,7 @@ from abx.archivebox.base_hook import BaseHook
from abx.archivebox.base_searchbackend import BaseSearchBackend
# Depends on Other Plugins:
from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG
from archivebox.config import CONSTANTS, SEARCH_BACKEND_CONFIG
###################### Config ##########################
@ -38,7 +36,7 @@ class RipgrepConfig(BaseConfigSet):
'--files-with-matches',
'--regexp',
])
RIPGREP_SEARCH_DIR: Path = archivebox.CONSTANTS.ARCHIVE_DIR
RIPGREP_SEARCH_DIR: Path = CONSTANTS.ARCHIVE_DIR
RIPGREP_CONFIG = RipgrepConfig()

View file

@ -1,11 +1,8 @@
__package__ = 'archivebox.plugins_search.sonic'
import os
import sys
from typing import List, Dict, ClassVar, Generator, cast
from django.conf import settings
# Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, Field, model_validator
from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName
@ -18,7 +15,7 @@ from abx.archivebox.base_hook import BaseHook
from abx.archivebox.base_searchbackend import BaseSearchBackend
# Depends on Other Plugins:
from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG
from archivebox.config import SEARCH_BACKEND_CONFIG
SONIC_LIB = None
try:

View file

@ -17,7 +17,7 @@ from abx.archivebox.base_hook import BaseHook
from abx.archivebox.base_searchbackend import BaseSearchBackend
# Depends on Other Plugins:
from plugins_sys.config.apps import SEARCH_BACKEND_CONFIG
from archivebox.config import SEARCH_BACKEND_CONFIG

View file

@ -1 +0,0 @@
from archivebox.constants import *

View file

@ -1,11 +1,10 @@
from pathlib import Path
from archivebox.config import DATA_DIR, CONSTANTS
import archivebox
OUTPUT_DIR = archivebox.DATA_DIR
LOGS_DIR = archivebox.CONSTANTS.LOGS_DIR
TMP_DIR = archivebox.CONSTANTS.TMP_DIR
OUTPUT_DIR = DATA_DIR
LOGS_DIR = CONSTANTS.LOGS_DIR
TMP_DIR = CONSTANTS.TMP_DIR
Path.mkdir(TMP_DIR, exist_ok=True)
CONFIG_FILE = TMP_DIR / "supervisord.conf"

View file

@ -6,9 +6,9 @@ from django.conf import settings
from archivebox.index.schema import Link
from archivebox.util import enforce_types
from archivebox.config import stderr
from archivebox.misc.logging import stderr
# from archivebox.plugins_sys.config.apps import settings.CONFIGS.SearchBackendConfig
# from archivebox.archivebox.config import settings.CONFIGS.SearchBackendConfig
from .utils import get_indexable_content, log_index_started

View file

@ -1,7 +1,7 @@
from django.db.models import QuerySet
from archivebox.util import enforce_types
from archivebox.config import ANSI
from archivebox.config.legacy import ANSI
def log_index_started(url):
print('{green}[*] Indexing url: {} in the search index {reset}'.format(url, **ANSI))

View file

@ -15,7 +15,7 @@ from crontab import CronTab
from atomicwrites import atomic_write as lib_atomic_write
from .util import enforce_types, ExtendedEncoder
from .config import OUTPUT_PERMISSIONS, DIR_OUTPUT_PERMISSIONS, ENFORCE_ATOMIC_WRITES
from .config.legacy import OUTPUT_PERMISSIONS, DIR_OUTPUT_PERMISSIONS, ENFORCE_ATOMIC_WRITES
def run(cmd, *args, input=None, capture_output=True, timeout=None, check=False, text=False, start_new_session=True, **kwargs):

View file

@ -25,8 +25,8 @@ except ImportError:
detect_encoding = lambda rawdata: "utf-8"
from archivebox.constants import STATICFILE_EXTENSIONS
from plugins_sys.config.apps import ARCHIVING_CONFIG
from archivebox.config.constants import STATICFILE_EXTENSIONS
from archivebox.config import ARCHIVING_CONFIG
from .misc.logging import COLOR_DICT