mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-21 19:53:06 +00:00
speed up startup time, add rich startup progressbar, split logging and checks into misc, fix search index import backend bug
This commit is contained in:
parent
7ffb81f61b
commit
64c7100cf9
22 changed files with 566 additions and 762 deletions
|
@ -1,5 +1,7 @@
|
|||
__package__ = 'archivebox'
|
||||
|
||||
# print('INSTALLING MONKEY PATCHES')
|
||||
|
||||
from .monkey_patches import *
|
||||
|
||||
import os
|
||||
|
@ -28,3 +30,5 @@ def _detect_installed_version():
|
|||
|
||||
|
||||
__version__ = _detect_installed_version()
|
||||
|
||||
# print('DONE INSTALLING MONKEY PATCHES')
|
||||
|
|
|
@ -1,16 +1,20 @@
|
|||
__package__ = 'archivebox.cli'
|
||||
__command__ = 'archivebox'
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import threading
|
||||
from time import sleep
|
||||
import archivebox
|
||||
|
||||
from typing import Optional, Dict, List, IO, Union, Iterable
|
||||
from time import sleep
|
||||
from collections.abc import Mapping
|
||||
|
||||
from typing import Optional, List, IO, Union, Iterable
|
||||
from pathlib import Path
|
||||
|
||||
from ..config import OUTPUT_DIR, check_data_folder, check_migrations, stderr
|
||||
|
||||
from ..misc.checks import check_data_folder, check_migrations
|
||||
from ..misc.logging import stderr
|
||||
|
||||
from importlib import import_module
|
||||
|
||||
|
@ -18,13 +22,46 @@ BUILTIN_LIST = list
|
|||
|
||||
CLI_DIR = Path(__file__).resolve().parent
|
||||
|
||||
# these common commands will appear sorted before any others for ease-of-use
|
||||
meta_cmds = ('help', 'version') # dont require valid data folder at all
|
||||
main_cmds = ('init', 'config', 'setup') # dont require existing db present
|
||||
archive_cmds = ('add', 'remove', 'update', 'list', 'status') # require existing db present
|
||||
fake_db = ("oneshot",) # use fake in-memory db
|
||||
|
||||
display_first = (*meta_cmds, *main_cmds, *archive_cmds)
|
||||
# def list_subcommands() -> Dict[str, str]:
|
||||
# """find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
|
||||
# COMMANDS = []
|
||||
# for filename in os.listdir(CLI_DIR):
|
||||
# if is_cli_module(filename):
|
||||
# subcommand = filename.replace('archivebox_', '').replace('.py', '')
|
||||
# module = import_module('.archivebox_{}'.format(subcommand), __package__)
|
||||
# assert is_valid_cli_module(module, subcommand)
|
||||
# COMMANDS.append((subcommand, module.main.__doc__))
|
||||
# globals()[subcommand] = module.main
|
||||
# display_order = lambda cmd: (
|
||||
# display_first.index(cmd[0])
|
||||
# if cmd[0] in display_first else
|
||||
# 100 + len(cmd[0])
|
||||
# )
|
||||
# return dict(sorted(COMMANDS, key=display_order))
|
||||
|
||||
# just define it statically, it's much faster:
|
||||
SUBCOMMAND_MODULES = {
|
||||
'help': 'archivebox_help',
|
||||
'version': 'archivebox_version' ,
|
||||
|
||||
'init': 'archivebox_init',
|
||||
'config': 'archivebox_config',
|
||||
'setup': 'archivebox_setup',
|
||||
|
||||
'add': 'archivebox_add',
|
||||
'remove': 'archivebox_remove',
|
||||
'update': 'archivebox_update',
|
||||
'list': 'archivebox_list',
|
||||
'status': 'archivebox_status',
|
||||
|
||||
'schedule': 'archivebox_schedule',
|
||||
'server': 'archivebox_server',
|
||||
'shell': 'archivebox_shell',
|
||||
'manage': 'archivebox_manage',
|
||||
|
||||
'oneshot': 'archivebox_oneshot',
|
||||
}
|
||||
|
||||
# every imported command module must have these properties in order to be valid
|
||||
required_attrs = ('__package__', '__command__', 'main')
|
||||
|
@ -36,6 +73,38 @@ is_valid_cli_module = lambda module, subcommand: (
|
|||
and module.__command__.split(' ')[-1] == subcommand
|
||||
)
|
||||
|
||||
class LazySubcommands(Mapping):
|
||||
def keys(self):
|
||||
return SUBCOMMAND_MODULES.keys()
|
||||
|
||||
def values(self):
|
||||
return [self[key] for key in self.keys()]
|
||||
|
||||
def items(self):
|
||||
return [(key, self[key]) for key in self.keys()]
|
||||
|
||||
def __getitem__(self, key):
|
||||
module = import_module(f'.{SUBCOMMAND_MODULES[key]}', __package__)
|
||||
assert is_valid_cli_module(module, key)
|
||||
return module.main
|
||||
|
||||
def __iter__(self):
|
||||
return iter(SUBCOMMAND_MODULES.keys())
|
||||
|
||||
def __len__(self):
|
||||
return len(SUBCOMMAND_MODULES)
|
||||
|
||||
CLI_SUBCOMMANDS = LazySubcommands()
|
||||
|
||||
|
||||
# these common commands will appear sorted before any others for ease-of-use
|
||||
meta_cmds = ('help', 'version') # dont require valid data folder at all
|
||||
main_cmds = ('init', 'config', 'setup') # dont require existing db present
|
||||
archive_cmds = ('add', 'remove', 'update', 'list', 'status') # require existing db present
|
||||
fake_db = ("oneshot",) # use fake in-memory db
|
||||
|
||||
display_first = (*meta_cmds, *main_cmds, *archive_cmds)
|
||||
|
||||
|
||||
IGNORED_BG_THREADS = ('MainThread', 'ThreadPoolExecutor', 'IPythonHistorySavingThread', 'Scheduler') # threads we dont have to wait for before exiting
|
||||
|
||||
|
@ -71,29 +140,9 @@ def wait_for_bg_threads_to_exit(thread_names: Iterable[str]=(), ignore_names: It
|
|||
raise Exception(f'Background threads failed to exit after {tries}s: {threads_summary}')
|
||||
|
||||
|
||||
def list_subcommands() -> Dict[str, str]:
|
||||
"""find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
|
||||
|
||||
COMMANDS = []
|
||||
for filename in os.listdir(CLI_DIR):
|
||||
if is_cli_module(filename):
|
||||
subcommand = filename.replace('archivebox_', '').replace('.py', '')
|
||||
module = import_module('.archivebox_{}'.format(subcommand), __package__)
|
||||
assert is_valid_cli_module(module, subcommand)
|
||||
COMMANDS.append((subcommand, module.main.__doc__))
|
||||
globals()[subcommand] = module.main
|
||||
|
||||
display_order = lambda cmd: (
|
||||
display_first.index(cmd[0])
|
||||
if cmd[0] in display_first else
|
||||
100 + len(cmd[0])
|
||||
)
|
||||
|
||||
return dict(sorted(COMMANDS, key=display_order))
|
||||
|
||||
|
||||
def run_subcommand(subcommand: str,
|
||||
subcommand_args: List[str]=None,
|
||||
subcommand_args: List[str] | None = None,
|
||||
stdin: Optional[IO]=None,
|
||||
pwd: Union[Path, str, None]=None) -> None:
|
||||
"""Run a given ArchiveBox subcommand with the given list of args"""
|
||||
|
@ -101,18 +150,18 @@ def run_subcommand(subcommand: str,
|
|||
subcommand_args = subcommand_args or []
|
||||
|
||||
if subcommand not in meta_cmds:
|
||||
from ..config import setup_django
|
||||
from ..config import setup_django, CONFIG
|
||||
|
||||
cmd_requires_db = subcommand in archive_cmds
|
||||
init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
|
||||
|
||||
if cmd_requires_db:
|
||||
check_data_folder(pwd)
|
||||
check_data_folder(CONFIG)
|
||||
|
||||
setup_django(in_memory_db=subcommand in fake_db, check_db=cmd_requires_db and not init_pending)
|
||||
|
||||
if cmd_requires_db:
|
||||
check_migrations()
|
||||
check_migrations(CONFIG)
|
||||
|
||||
module = import_module('.archivebox_{}'.format(subcommand), __package__)
|
||||
module.main(args=subcommand_args, stdin=stdin, pwd=pwd) # type: ignore
|
||||
|
@ -121,17 +170,28 @@ def run_subcommand(subcommand: str,
|
|||
wait_for_bg_threads_to_exit(timeout=60)
|
||||
|
||||
|
||||
SUBCOMMANDS = list_subcommands()
|
||||
|
||||
|
||||
|
||||
class NotProvided:
|
||||
pass
|
||||
def __len__(self):
|
||||
return 0
|
||||
def __bool__(self):
|
||||
return False
|
||||
def __repr__(self):
|
||||
return '<not provided>'
|
||||
|
||||
Omitted = Union[None, NotProvided]
|
||||
|
||||
OMITTED = NotProvided()
|
||||
|
||||
|
||||
def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided, pwd: Optional[str]=None) -> None:
|
||||
args = sys.argv[1:] if args is NotProvided else args
|
||||
stdin = sys.stdin if stdin is NotProvided else stdin
|
||||
def main(args: List[str] | Omitted=OMITTED, stdin: IO | Omitted=OMITTED, pwd: str | None=None) -> None:
|
||||
# print('STARTING CLI MAIN ENTRYPOINT')
|
||||
|
||||
args = sys.argv[1:] if args is OMITTED else args
|
||||
stdin = sys.stdin if stdin is OMITTED else stdin
|
||||
|
||||
subcommands = list_subcommands()
|
||||
parser = argparse.ArgumentParser(
|
||||
prog=__command__,
|
||||
description='ArchiveBox: The self-hosted internet archive',
|
||||
|
@ -141,19 +201,19 @@ def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided,
|
|||
group.add_argument(
|
||||
'--help', '-h',
|
||||
action='store_true',
|
||||
help=subcommands['help'],
|
||||
help=CLI_SUBCOMMANDS['help'].__doc__,
|
||||
)
|
||||
group.add_argument(
|
||||
'--version',
|
||||
action='store_true',
|
||||
help=subcommands['version'],
|
||||
help=CLI_SUBCOMMANDS['version'].__doc__,
|
||||
)
|
||||
group.add_argument(
|
||||
"subcommand",
|
||||
type=str,
|
||||
help= "The name of the subcommand to run",
|
||||
nargs='?',
|
||||
choices=subcommands.keys(),
|
||||
choices=CLI_SUBCOMMANDS.keys(),
|
||||
default=None,
|
||||
)
|
||||
parser.add_argument(
|
||||
|
@ -174,23 +234,13 @@ def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided,
|
|||
log_cli_command(
|
||||
subcommand=command.subcommand,
|
||||
subcommand_args=command.subcommand_args,
|
||||
stdin=stdin,
|
||||
pwd=pwd or OUTPUT_DIR
|
||||
stdin=stdin or None,
|
||||
pwd=pwd or archivebox.DATA_DIR,
|
||||
)
|
||||
|
||||
run_subcommand(
|
||||
subcommand=command.subcommand,
|
||||
subcommand_args=command.subcommand_args,
|
||||
stdin=stdin,
|
||||
pwd=pwd or OUTPUT_DIR,
|
||||
stdin=stdin or None,
|
||||
pwd=pwd or archivebox.DATA_DIR,
|
||||
)
|
||||
|
||||
|
||||
__all__ = (
|
||||
'SUBCOMMANDS',
|
||||
'list_subcommands',
|
||||
'run_subcommand',
|
||||
*SUBCOMMANDS.keys(),
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -28,21 +28,19 @@ import sys
|
|||
import json
|
||||
import inspect
|
||||
import getpass
|
||||
import platform
|
||||
import shutil
|
||||
import requests
|
||||
|
||||
from hashlib import md5
|
||||
from pathlib import Path
|
||||
from benedict import benedict
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional, Type, Tuple, Dict, Union, List
|
||||
from typing import Optional, Type, Tuple, Dict
|
||||
from subprocess import run, PIPE, DEVNULL, STDOUT, TimeoutExpired
|
||||
from configparser import ConfigParser
|
||||
from collections import defaultdict
|
||||
import importlib.metadata
|
||||
|
||||
from pydantic_pkgr import SemVer
|
||||
from rich.progress import Progress
|
||||
|
||||
import django
|
||||
from django.db.backends.sqlite3.base import Database as sqlite3
|
||||
|
@ -56,6 +54,17 @@ from .config_stubs import (
|
|||
ConfigDefaultDict,
|
||||
)
|
||||
|
||||
from .misc.logging import (
|
||||
CONSOLE,
|
||||
SHOW_PROGRESS,
|
||||
DEFAULT_CLI_COLORS,
|
||||
ANSI,
|
||||
COLOR_DICT,
|
||||
stderr,
|
||||
hint,
|
||||
)
|
||||
from .misc.checks import check_system_config
|
||||
|
||||
# print('STARTING CONFIG LOADING')
|
||||
|
||||
# load fallback libraries from vendor dir
|
||||
|
@ -70,7 +79,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
|||
'SHELL_CONFIG': {
|
||||
'IS_TTY': {'type': bool, 'default': lambda _: sys.stdout.isatty()},
|
||||
'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']},
|
||||
'SHOW_PROGRESS': {'type': bool, 'default': lambda c: (c['IS_TTY'] and platform.system() != 'Darwin')}, # progress bars are buggy on mac, disable for now
|
||||
'SHOW_PROGRESS': {'type': bool, 'default': lambda c: c['IS_TTY']}, # progress bars are buggy on mac, disable for now
|
||||
'IN_DOCKER': {'type': bool, 'default': False},
|
||||
'IN_QEMU': {'type': bool, 'default': False},
|
||||
'PUID': {'type': int, 'default': os.getuid()},
|
||||
|
@ -306,32 +315,7 @@ ROBOTS_TXT_FILENAME = 'robots.txt'
|
|||
FAVICON_FILENAME = 'favicon.ico'
|
||||
CONFIG_FILENAME = 'ArchiveBox.conf'
|
||||
|
||||
DEFAULT_CLI_COLORS = benedict(
|
||||
{
|
||||
"reset": "\033[00;00m",
|
||||
"lightblue": "\033[01;30m",
|
||||
"lightyellow": "\033[01;33m",
|
||||
"lightred": "\033[01;35m",
|
||||
"red": "\033[01;31m",
|
||||
"green": "\033[01;32m",
|
||||
"blue": "\033[01;34m",
|
||||
"white": "\033[01;37m",
|
||||
"black": "\033[01;30m",
|
||||
}
|
||||
)
|
||||
ANSI = AttrDict({k: '' for k in DEFAULT_CLI_COLORS.keys()})
|
||||
|
||||
COLOR_DICT = defaultdict(lambda: [(0, 0, 0), (0, 0, 0)], {
|
||||
'00': [(0, 0, 0), (0, 0, 0)],
|
||||
'30': [(0, 0, 0), (0, 0, 0)],
|
||||
'31': [(255, 0, 0), (128, 0, 0)],
|
||||
'32': [(0, 200, 0), (0, 128, 0)],
|
||||
'33': [(255, 255, 0), (128, 128, 0)],
|
||||
'34': [(0, 0, 255), (0, 0, 128)],
|
||||
'35': [(255, 0, 255), (128, 0, 128)],
|
||||
'36': [(0, 255, 255), (0, 128, 128)],
|
||||
'37': [(255, 255, 255), (255, 255, 255)],
|
||||
})
|
||||
|
||||
STATICFILE_EXTENSIONS = {
|
||||
# 99.999% of the time, URLs ending in these extensions are static files
|
||||
|
@ -880,37 +864,6 @@ def parse_version_string(version: str) -> Tuple[int, int, int]:
|
|||
return tuple(int(part) for part in base.split('.'))[:3]
|
||||
|
||||
|
||||
# Logging Helpers
|
||||
def stdout(*args, color: Optional[str]=None, prefix: str='', config: Optional[ConfigDict]=None) -> None:
|
||||
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
||||
|
||||
if color:
|
||||
strs = [ansi[color], ' '.join(str(a) for a in args), ansi['reset'], '\n']
|
||||
else:
|
||||
strs = [' '.join(str(a) for a in args), '\n']
|
||||
|
||||
sys.stdout.write(prefix + ''.join(strs))
|
||||
|
||||
def stderr(*args, color: Optional[str]=None, prefix: str='', config: Optional[ConfigDict]=None) -> None:
|
||||
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
||||
|
||||
if color:
|
||||
strs = [ansi[color], ' '.join(str(a) for a in args), ansi['reset'], '\n']
|
||||
else:
|
||||
strs = [' '.join(str(a) for a in args), '\n']
|
||||
|
||||
sys.stderr.write(prefix + ''.join(strs))
|
||||
|
||||
def hint(text: Union[Tuple[str, ...], List[str], str], prefix=' ', config: Optional[ConfigDict]=None) -> None:
|
||||
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
||||
|
||||
if isinstance(text, str):
|
||||
stderr('{}{lightred}Hint:{reset} {}'.format(prefix, text, **ansi))
|
||||
else:
|
||||
stderr('{}{lightred}Hint:{reset} {}'.format(prefix, text[0], **ansi))
|
||||
for line in text[1:]:
|
||||
stderr('{} {}'.format(prefix, line))
|
||||
|
||||
|
||||
# Dependency Metadata Helpers
|
||||
def bin_version(binary: Optional[str], cmd: Optional[str]=None, timeout: int=3) -> Optional[str]:
|
||||
|
@ -919,6 +872,10 @@ def bin_version(binary: Optional[str], cmd: Optional[str]=None, timeout: int=3)
|
|||
abspath = bin_path(binary)
|
||||
if not binary or not abspath:
|
||||
return None
|
||||
|
||||
return '999.999.999'
|
||||
|
||||
# Now handled by new BinProvider plugin system, no longer needed:
|
||||
|
||||
try:
|
||||
bin_env = os.environ | {'LANG': 'C'}
|
||||
|
@ -960,6 +917,9 @@ def bin_path(binary: Optional[str]) -> Optional[str]:
|
|||
return shutil.which(str(Path(binary).expanduser())) or shutil.which(str(binary)) or binary
|
||||
|
||||
def bin_hash(binary: Optional[str]) -> Optional[str]:
|
||||
return 'UNUSED'
|
||||
# DEPRECATED: now handled by new BinProvider plugin system, no longer needed:
|
||||
|
||||
if binary is None:
|
||||
return None
|
||||
abs_path = bin_path(binary)
|
||||
|
@ -1329,246 +1289,123 @@ if not CONFIG['CHECK_SSL_VALIDITY']:
|
|||
|
||||
########################### Config Validity Checkers ###########################
|
||||
|
||||
INITIAL_STARTUP_PROGRESS = None
|
||||
INITIAL_STARTUP_PROGRESS_TASK = 0
|
||||
|
||||
def check_system_config(config: ConfigDict=CONFIG) -> None:
|
||||
### Check system environment
|
||||
if config['USER'] == 'root' or str(config['PUID']) == "0":
|
||||
stderr('[!] ArchiveBox should never be run as root!', color='red')
|
||||
stderr(' For more information, see the security overview documentation:')
|
||||
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root')
|
||||
|
||||
if config['IN_DOCKER']:
|
||||
attempted_command = ' '.join(sys.argv[:3])
|
||||
stderr('')
|
||||
stderr(' {lightred}Hint{reset}: When using Docker, you must run commands with {green}docker run{reset} instead of {lightyellow}docker exec{reset}, e.g.:'.format(**config['ANSI']))
|
||||
stderr(f' docker compose run archivebox {attempted_command}')
|
||||
stderr(f' docker run -it -v $PWD/data:/data archivebox/archivebox {attempted_command}')
|
||||
stderr(' or:')
|
||||
stderr(f' docker compose exec --user=archivebox archivebox /bin/bash -c "archivebox {attempted_command}"')
|
||||
stderr(f' docker exec -it --user=archivebox <container id> /bin/bash -c "archivebox {attempted_command}"')
|
||||
|
||||
raise SystemExit(2)
|
||||
|
||||
### Check Python environment
|
||||
if sys.version_info[:3] < (3, 7, 0):
|
||||
stderr(f'[X] Python version is not new enough: {config["PYTHON_VERSION"]} (>3.6 is required)', color='red')
|
||||
stderr(' See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
|
||||
raise SystemExit(2)
|
||||
|
||||
if int(CONFIG['DJANGO_VERSION'].split('.')[0]) < 3:
|
||||
stderr(f'[X] Django version is not new enough: {config["DJANGO_VERSION"]} (>3.0 is required)', color='red')
|
||||
stderr(' Upgrade django using pip or your system package manager: pip3 install --upgrade django')
|
||||
raise SystemExit(2)
|
||||
|
||||
if config['PYTHON_ENCODING'] not in ('UTF-8', 'UTF8'):
|
||||
stderr(f'[X] Your system is running python3 scripts with a bad locale setting: {config["PYTHON_ENCODING"]} (it should be UTF-8).', color='red')
|
||||
stderr(' To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)')
|
||||
stderr(' Or if you\'re using ubuntu/debian, run "dpkg-reconfigure locales"')
|
||||
stderr('')
|
||||
stderr(' Confirm that it\'s fixed by opening a new shell and running:')
|
||||
stderr(' python3 -c "import sys; print(sys.stdout.encoding)" # should output UTF-8')
|
||||
raise SystemExit(2)
|
||||
|
||||
# stderr('[i] Using Chrome binary: {}'.format(shutil.which(CHROME_BINARY) or CHROME_BINARY))
|
||||
# stderr('[i] Using Chrome data dir: {}'.format(os.path.abspath(CHROME_USER_DATA_DIR)))
|
||||
if config['CHROME_USER_DATA_DIR'] is not None and Path(config['CHROME_USER_DATA_DIR']).exists():
|
||||
if not (Path(config['CHROME_USER_DATA_DIR']) / 'Default').exists():
|
||||
stderr('[X] Could not find profile "Default" in CHROME_USER_DATA_DIR.', color='red')
|
||||
stderr(f' {config["CHROME_USER_DATA_DIR"]}')
|
||||
stderr(' Make sure you set it to a Chrome user data directory containing a Default profile folder.')
|
||||
stderr(' For more info see:')
|
||||
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#CHROME_USER_DATA_DIR')
|
||||
if '/Default' in str(config['CHROME_USER_DATA_DIR']):
|
||||
stderr()
|
||||
stderr(' Try removing /Default from the end e.g.:')
|
||||
stderr(' CHROME_USER_DATA_DIR="{}"'.format(str(config['CHROME_USER_DATA_DIR']).split('/Default')[0]))
|
||||
|
||||
# hard error is too annoying here, instead just set it to nothing
|
||||
# raise SystemExit(2)
|
||||
config['CHROME_USER_DATA_DIR'] = None
|
||||
else:
|
||||
config['CHROME_USER_DATA_DIR'] = None
|
||||
|
||||
|
||||
def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
|
||||
invalid_dependencies = [
|
||||
(name, info) for name, info in config['DEPENDENCIES'].items()
|
||||
if info['enabled'] and not info['is_valid']
|
||||
]
|
||||
if invalid_dependencies and show_help:
|
||||
stderr(f'[!] Warning: Missing {len(invalid_dependencies)} recommended dependencies', color='lightyellow')
|
||||
for dependency, info in invalid_dependencies:
|
||||
stderr(
|
||||
' ! {}: {} ({})'.format(
|
||||
dependency,
|
||||
info['path'] or 'unable to find binary',
|
||||
info['version'] or 'unable to detect version',
|
||||
)
|
||||
)
|
||||
if dependency in ('YOUTUBEDL_BINARY', 'CHROME_BINARY', 'SINGLEFILE_BINARY', 'READABILITY_BINARY', 'MERCURY_BINARY'):
|
||||
hint(('To install all packages automatically run: archivebox setup',
|
||||
f'or to disable it and silence this warning: archivebox config --set SAVE_{dependency.rsplit("_", 1)[0]}=False',
|
||||
''), prefix=' ')
|
||||
stderr('')
|
||||
|
||||
if config['TIMEOUT'] < 5:
|
||||
stderr(f'[!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={config["TIMEOUT"]} seconds)', color='red')
|
||||
stderr(' You must allow *at least* 5 seconds for indexing and archive methods to run succesfully.')
|
||||
stderr(' (Setting it to somewhere between 30 and 3000 seconds is recommended)')
|
||||
stderr()
|
||||
stderr(' If you want to make ArchiveBox run faster, disable specific archive methods instead:')
|
||||
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles')
|
||||
stderr()
|
||||
|
||||
elif config['USE_CHROME'] and config['TIMEOUT'] < 15:
|
||||
stderr(f'[!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={config["TIMEOUT"]} seconds)', color='red')
|
||||
stderr(' Chrome will fail to archive all sites if set to less than ~15 seconds.')
|
||||
stderr(' (Setting it to somewhere between 30 and 300 seconds is recommended)')
|
||||
stderr()
|
||||
stderr(' If you want to make ArchiveBox run faster, disable specific archive methods instead:')
|
||||
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles')
|
||||
stderr()
|
||||
|
||||
if config['USE_YOUTUBEDL'] and config['MEDIA_TIMEOUT'] < 20:
|
||||
stderr(f'[!] Warning: MEDIA_TIMEOUT is set too low! (currently set to MEDIA_TIMEOUT={config["MEDIA_TIMEOUT"]} seconds)', color='red')
|
||||
stderr(' youtube-dl/yt-dlp will fail to archive any media if set to less than ~20 seconds.')
|
||||
stderr(' (Setting it somewhere over 60 seconds is recommended)')
|
||||
stderr()
|
||||
stderr(' If you want to disable media archiving entirely, set SAVE_MEDIA=False instead:')
|
||||
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_media')
|
||||
stderr()
|
||||
|
||||
|
||||
def check_data_folder(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG) -> None:
|
||||
output_dir = out_dir or config['OUTPUT_DIR']
|
||||
assert isinstance(output_dir, (str, Path))
|
||||
|
||||
archive_dir_exists = (Path(output_dir) / ARCHIVE_DIR_NAME).exists()
|
||||
if not archive_dir_exists:
|
||||
stderr('[X] No archivebox index found in the current directory.', color='red')
|
||||
stderr(f' {output_dir}', color='lightyellow')
|
||||
stderr()
|
||||
stderr(' {lightred}Hint{reset}: Are you running archivebox in the right folder?'.format(**config['ANSI']))
|
||||
stderr(' cd path/to/your/archive/folder')
|
||||
stderr(' archivebox [command]')
|
||||
stderr()
|
||||
stderr(' {lightred}Hint{reset}: To create a new archive collection or import existing data in this folder, run:'.format(**config['ANSI']))
|
||||
stderr(' archivebox init')
|
||||
raise SystemExit(2)
|
||||
|
||||
|
||||
def check_migrations(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG):
|
||||
output_dir = out_dir or config['OUTPUT_DIR']
|
||||
from .index.sql import list_migrations
|
||||
|
||||
pending_migrations = [name for status, name in list_migrations() if not status]
|
||||
|
||||
if pending_migrations:
|
||||
stderr('[X] This collection was created with an older version of ArchiveBox and must be upgraded first.', color='lightyellow')
|
||||
stderr(f' {output_dir}')
|
||||
stderr()
|
||||
stderr(f' To upgrade it to the latest version and apply the {len(pending_migrations)} pending migrations, run:')
|
||||
stderr(' archivebox init')
|
||||
raise SystemExit(3)
|
||||
|
||||
(Path(output_dir) / SOURCES_DIR_NAME).mkdir(exist_ok=True)
|
||||
(Path(output_dir) / LOGS_DIR_NAME).mkdir(exist_ok=True)
|
||||
(Path(output_dir) / CACHE_DIR_NAME).mkdir(exist_ok=True)
|
||||
(Path(output_dir) / LIB_DIR_NAME / 'bin').mkdir(exist_ok=True, parents=True)
|
||||
(Path(output_dir) / PERSONAS_DIR_NAME / 'Default').mkdir(exist_ok=True, parents=True)
|
||||
|
||||
|
||||
def bump_startup_progress_bar():
|
||||
global INITIAL_STARTUP_PROGRESS
|
||||
global INITIAL_STARTUP_PROGRESS_TASK
|
||||
if INITIAL_STARTUP_PROGRESS:
|
||||
INITIAL_STARTUP_PROGRESS.update(INITIAL_STARTUP_PROGRESS_TASK, advance=1) # type: ignore
|
||||
|
||||
def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None:
|
||||
check_system_config()
|
||||
global INITIAL_STARTUP_PROGRESS
|
||||
global INITIAL_STARTUP_PROGRESS_TASK
|
||||
|
||||
with Progress(transient=True, expand=True, console=CONSOLE) as INITIAL_STARTUP_PROGRESS:
|
||||
INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25)
|
||||
check_system_config(config)
|
||||
|
||||
output_dir = out_dir or Path(config['OUTPUT_DIR'])
|
||||
output_dir = out_dir or Path(config['OUTPUT_DIR'])
|
||||
|
||||
assert isinstance(output_dir, Path) and isinstance(config['PACKAGE_DIR'], Path)
|
||||
assert isinstance(output_dir, Path) and isinstance(config['PACKAGE_DIR'], Path)
|
||||
|
||||
try:
|
||||
from django.core.management import call_command
|
||||
|
||||
sys.path.append(str(config['PACKAGE_DIR']))
|
||||
os.environ.setdefault('OUTPUT_DIR', str(output_dir))
|
||||
assert (config['PACKAGE_DIR'] / 'core' / 'settings.py').exists(), 'settings.py was not found at archivebox/core/settings.py'
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
|
||||
|
||||
# Check to make sure JSON extension is available in our Sqlite3 instance
|
||||
bump_startup_progress_bar()
|
||||
try:
|
||||
cursor = sqlite3.connect(':memory:').cursor()
|
||||
cursor.execute('SELECT JSON(\'{"a": "b"}\')')
|
||||
except sqlite3.OperationalError as exc:
|
||||
stderr(f'[X] Your SQLite3 version is missing the required JSON1 extension: {exc}', color='red')
|
||||
hint([
|
||||
'Upgrade your Python version or install the extension manually:',
|
||||
'https://code.djangoproject.com/wiki/JSON1Extension'
|
||||
])
|
||||
from django.core.management import call_command
|
||||
|
||||
if in_memory_db:
|
||||
# some commands (e.g. oneshot) dont store a long-lived sqlite3 db file on disk.
|
||||
# in those cases we create a temporary in-memory db and run the migrations
|
||||
# immediately to get a usable in-memory-database at startup
|
||||
os.environ.setdefault("ARCHIVEBOX_DATABASE_NAME", ":memory:")
|
||||
django.setup()
|
||||
call_command("migrate", interactive=False, verbosity=0)
|
||||
else:
|
||||
# Otherwise use default sqlite3 file-based database and initialize django
|
||||
# without running migrations automatically (user runs them manually by calling init)
|
||||
django.setup()
|
||||
sys.path.append(str(config['PACKAGE_DIR']))
|
||||
os.environ.setdefault('OUTPUT_DIR', str(output_dir))
|
||||
assert (config['PACKAGE_DIR'] / 'core' / 'settings.py').exists(), 'settings.py was not found at archivebox/core/settings.py'
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
# log startup message to the error log
|
||||
with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
|
||||
command = ' '.join(sys.argv)
|
||||
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
|
||||
f.write(f"\n> {command}; TS={ts} VERSION={config['VERSION']} IN_DOCKER={config['IN_DOCKER']} IS_TTY={config['IS_TTY']}\n")
|
||||
|
||||
if check_db:
|
||||
# Enable WAL mode in sqlite3
|
||||
from django.db import connection
|
||||
with connection.cursor() as cursor:
|
||||
|
||||
# Set Journal mode to WAL to allow for multiple writers
|
||||
current_mode = cursor.execute("PRAGMA journal_mode")
|
||||
if current_mode != 'wal':
|
||||
cursor.execute("PRAGMA journal_mode=wal;")
|
||||
|
||||
# Set max blocking delay for concurrent writes and write sync mode
|
||||
# https://litestream.io/tips/#busy-timeout
|
||||
cursor.execute("PRAGMA busy_timeout = 5000;")
|
||||
cursor.execute("PRAGMA synchronous = NORMAL;")
|
||||
|
||||
# Create cache table in DB if needed
|
||||
# Check to make sure JSON extension is available in our Sqlite3 instance
|
||||
try:
|
||||
from django.core.cache import cache
|
||||
cache.get('test', None)
|
||||
except django.db.utils.OperationalError:
|
||||
call_command("createcachetable", verbosity=0)
|
||||
cursor = sqlite3.connect(':memory:').cursor()
|
||||
cursor.execute('SELECT JSON(\'{"a": "b"}\')')
|
||||
except sqlite3.OperationalError as exc:
|
||||
stderr(f'[X] Your SQLite3 version is missing the required JSON1 extension: {exc}', color='red')
|
||||
hint([
|
||||
'Upgrade your Python version or install the extension manually:',
|
||||
'https://code.djangoproject.com/wiki/JSON1Extension'
|
||||
])
|
||||
|
||||
bump_startup_progress_bar()
|
||||
|
||||
# if archivebox gets imported multiple times, we have to close
|
||||
# the sqlite3 whenever we init from scratch to avoid multiple threads
|
||||
# sharing the same connection by accident
|
||||
from django.db import connections
|
||||
for conn in connections.all():
|
||||
conn.close_if_unusable_or_obsolete()
|
||||
if in_memory_db:
|
||||
# some commands (e.g. oneshot) dont store a long-lived sqlite3 db file on disk.
|
||||
# in those cases we create a temporary in-memory db and run the migrations
|
||||
# immediately to get a usable in-memory-database at startup
|
||||
os.environ.setdefault("ARCHIVEBOX_DATABASE_NAME", ":memory:")
|
||||
django.setup()
|
||||
|
||||
bump_startup_progress_bar()
|
||||
call_command("migrate", interactive=False, verbosity=0)
|
||||
else:
|
||||
# Otherwise use default sqlite3 file-based database and initialize django
|
||||
# without running migrations automatically (user runs them manually by calling init)
|
||||
django.setup()
|
||||
|
||||
bump_startup_progress_bar()
|
||||
|
||||
sql_index_path = Path(output_dir) / SQL_INDEX_FILENAME
|
||||
assert sql_index_path.exists(), (
|
||||
f'No database file {SQL_INDEX_FILENAME} found in: {config["OUTPUT_DIR"]} (Are you in an ArchiveBox collection directory?)')
|
||||
from django.conf import settings
|
||||
|
||||
# log startup message to the error log
|
||||
with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
|
||||
command = ' '.join(sys.argv)
|
||||
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
|
||||
f.write(f"\n> {command}; TS={ts} VERSION={config['VERSION']} IN_DOCKER={config['IN_DOCKER']} IS_TTY={config['IS_TTY']}\n")
|
||||
|
||||
# https://docs.pydantic.dev/logfire/integrations/django/ Logfire Debugging
|
||||
if settings.DEBUG_LOGFIRE:
|
||||
from opentelemetry.instrumentation.sqlite3 import SQLite3Instrumentor
|
||||
SQLite3Instrumentor().instrument()
|
||||
if check_db:
|
||||
# Enable WAL mode in sqlite3
|
||||
from django.db import connection
|
||||
with connection.cursor() as cursor:
|
||||
|
||||
import logfire
|
||||
# Set Journal mode to WAL to allow for multiple writers
|
||||
current_mode = cursor.execute("PRAGMA journal_mode")
|
||||
if current_mode != 'wal':
|
||||
cursor.execute("PRAGMA journal_mode=wal;")
|
||||
|
||||
logfire.configure()
|
||||
logfire.instrument_django(is_sql_commentor_enabled=True)
|
||||
logfire.info(f'Started ArchiveBox v{CONFIG.VERSION}', argv=sys.argv)
|
||||
# Set max blocking delay for concurrent writes and write sync mode
|
||||
# https://litestream.io/tips/#busy-timeout
|
||||
cursor.execute("PRAGMA busy_timeout = 5000;")
|
||||
cursor.execute("PRAGMA synchronous = NORMAL;")
|
||||
|
||||
except KeyboardInterrupt:
|
||||
raise SystemExit(2)
|
||||
# Create cache table in DB if needed
|
||||
try:
|
||||
from django.core.cache import cache
|
||||
cache.get('test', None)
|
||||
except django.db.utils.OperationalError:
|
||||
call_command("createcachetable", verbosity=0)
|
||||
|
||||
bump_startup_progress_bar()
|
||||
|
||||
# if archivebox gets imported multiple times, we have to close
|
||||
# the sqlite3 whenever we init from scratch to avoid multiple threads
|
||||
# sharing the same connection by accident
|
||||
from django.db import connections
|
||||
for conn in connections.all():
|
||||
conn.close_if_unusable_or_obsolete()
|
||||
|
||||
sql_index_path = Path(output_dir) / SQL_INDEX_FILENAME
|
||||
assert sql_index_path.exists(), (
|
||||
f'No database file {SQL_INDEX_FILENAME} found in: {config["OUTPUT_DIR"]} (Are you in an ArchiveBox collection directory?)')
|
||||
|
||||
bump_startup_progress_bar()
|
||||
|
||||
# https://docs.pydantic.dev/logfire/integrations/django/ Logfire Debugging
|
||||
if settings.DEBUG_LOGFIRE:
|
||||
from opentelemetry.instrumentation.sqlite3 import SQLite3Instrumentor
|
||||
SQLite3Instrumentor().instrument()
|
||||
|
||||
import logfire
|
||||
|
||||
logfire.configure()
|
||||
logfire.instrument_django(is_sql_commentor_enabled=True)
|
||||
logfire.info(f'Started ArchiveBox v{CONFIG.VERSION}', argv=sys.argv)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
raise SystemExit(2)
|
||||
|
||||
INITIAL_STARTUP_PROGRESS = None
|
||||
INITIAL_STARTUP_PROGRESS_TASK = None
|
||||
|
|
|
@ -170,6 +170,7 @@ STATICFILES_DIRS = [
|
|||
*[
|
||||
str(plugin_dir / 'static')
|
||||
for plugin_dir in PLUGIN_DIRS.values()
|
||||
if (plugin_dir / 'static').is_dir()
|
||||
],
|
||||
str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME / 'static'),
|
||||
]
|
||||
|
@ -179,6 +180,7 @@ TEMPLATE_DIRS = [
|
|||
*[
|
||||
str(plugin_dir / 'templates')
|
||||
for plugin_dir in PLUGIN_DIRS.values()
|
||||
if (plugin_dir / 'templates').is_dir()
|
||||
],
|
||||
str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME / 'core'),
|
||||
str(PACKAGE_DIR / CONFIG.TEMPLATES_DIR_NAME / 'admin'),
|
||||
|
|
|
@ -141,18 +141,22 @@ SETTINGS_LOGGING = {
|
|||
"api": {
|
||||
"handlers": ["default", "logfile"],
|
||||
"level": "DEBUG",
|
||||
"propagate": False,
|
||||
},
|
||||
"checks": {
|
||||
"handlers": ["default", "logfile"],
|
||||
"level": "DEBUG",
|
||||
"propagate": False,
|
||||
},
|
||||
"core": {
|
||||
"handlers": ["default", "logfile"],
|
||||
"level": "DEBUG",
|
||||
"propagate": False,
|
||||
},
|
||||
"plugins_extractor": {
|
||||
"handlers": ["default", "logfile"],
|
||||
"level": "DEBUG",
|
||||
"propagate": False,
|
||||
},
|
||||
"httpx": {
|
||||
"handlers": ["outbound_webhooks"],
|
||||
|
@ -164,6 +168,7 @@ SETTINGS_LOGGING = {
|
|||
"handlers": ["default", "logfile"],
|
||||
"level": "INFO",
|
||||
"filters": ["noisyrequestsfilter"],
|
||||
"propagate": False,
|
||||
},
|
||||
"django.utils.autoreload": {
|
||||
"propagate": False,
|
||||
|
|
|
@ -230,7 +230,7 @@ def progress_bar(seconds: int, prefix: str='') -> None:
|
|||
print()
|
||||
|
||||
|
||||
def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional[str], pwd: str):
|
||||
def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional[str | IO], pwd: str):
|
||||
cmd = ' '.join(('archivebox', subcommand, *subcommand_args))
|
||||
stderr('{black}[i] [{now}] ArchiveBox v{VERSION}: {cmd}{reset}'.format(
|
||||
now=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S'),
|
||||
|
@ -526,11 +526,11 @@ def log_removal_finished(all_links: int, to_remove: int):
|
|||
|
||||
|
||||
def log_shell_welcome_msg():
|
||||
from .cli import list_subcommands
|
||||
from .cli import CLI_SUBCOMMANDS
|
||||
|
||||
print('{green}# ArchiveBox Imports{reset}'.format(**ANSI))
|
||||
print('{green}from core.models import Snapshot, ArchiveResult, Tag, User{reset}'.format(**ANSI))
|
||||
print('{green}from cli import *\n {}{reset}'.format("\n ".join(list_subcommands().keys()), **ANSI))
|
||||
print('{green}from cli import *\n {}{reset}'.format("\n ".join(CLI_SUBCOMMANDS.keys()), **ANSI))
|
||||
print()
|
||||
print('[i] Welcome to the ArchiveBox Shell!')
|
||||
print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage')
|
||||
|
|
|
@ -16,7 +16,7 @@ from django.db.models import QuerySet
|
|||
from django.utils import timezone
|
||||
|
||||
from .cli import (
|
||||
list_subcommands,
|
||||
CLI_SUBCOMMANDS,
|
||||
run_subcommand,
|
||||
display_first,
|
||||
meta_cmds,
|
||||
|
@ -66,9 +66,9 @@ from .index.html import (
|
|||
)
|
||||
from .index.csv import links_to_csv
|
||||
from .extractors import archive_links, archive_link, ignore_methods
|
||||
from .misc.logging import stderr, hint
|
||||
from .misc.checks import check_data_folder, check_dependencies
|
||||
from .config import (
|
||||
stderr,
|
||||
hint,
|
||||
ConfigDict,
|
||||
ANSI,
|
||||
IS_TTY,
|
||||
|
@ -98,8 +98,6 @@ from .config import (
|
|||
SEARCH_BACKEND_ENGINE,
|
||||
LDAP,
|
||||
get_version,
|
||||
check_dependencies,
|
||||
check_data_folder,
|
||||
write_config_file,
|
||||
VERSION,
|
||||
VERSIONS_AVAILABLE,
|
||||
|
@ -146,7 +144,7 @@ from .logging_util import (
|
|||
def help(out_dir: Path=OUTPUT_DIR) -> None:
|
||||
"""Print the ArchiveBox help message and usage"""
|
||||
|
||||
all_subcommands = list_subcommands()
|
||||
all_subcommands = CLI_SUBCOMMANDS
|
||||
COMMANDS_HELP_TEXT = '\n '.join(
|
||||
f'{cmd.ljust(20)} {summary}'
|
||||
for cmd, summary in all_subcommands.items()
|
||||
|
@ -281,7 +279,7 @@ def version(quiet: bool=False,
|
|||
print('{white}[i] Data locations:{reset} (not in a data directory)'.format(**ANSI))
|
||||
|
||||
print()
|
||||
check_dependencies()
|
||||
check_dependencies(CONFIG)
|
||||
|
||||
|
||||
@enforce_types
|
||||
|
@ -469,7 +467,7 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
|
|||
def status(out_dir: Path=OUTPUT_DIR) -> None:
|
||||
"""Print out some info and statistics about the archive collection"""
|
||||
|
||||
check_data_folder(out_dir=out_dir)
|
||||
check_data_folder(CONFIG)
|
||||
|
||||
from core.models import Snapshot
|
||||
from django.contrib.auth import get_user_model
|
||||
|
@ -609,8 +607,8 @@ def add(urls: Union[str, List[str]],
|
|||
run_subcommand('init', stdin=None, pwd=out_dir)
|
||||
|
||||
# Load list of links from the existing index
|
||||
check_data_folder(out_dir=out_dir)
|
||||
check_dependencies()
|
||||
check_data_folder(CONFIG)
|
||||
check_dependencies(CONFIG)
|
||||
new_links: List[Link] = []
|
||||
all_links = load_main_index(out_dir=out_dir)
|
||||
|
||||
|
@ -705,7 +703,7 @@ def remove(filter_str: Optional[str]=None,
|
|||
out_dir: Path=OUTPUT_DIR) -> List[Link]:
|
||||
"""Remove the specified URLs from the archive"""
|
||||
|
||||
check_data_folder(out_dir=out_dir)
|
||||
check_data_folder(CONFIG)
|
||||
|
||||
if snapshots is None:
|
||||
if filter_str and filter_patterns:
|
||||
|
@ -792,8 +790,8 @@ def update(resume: Optional[float]=None,
|
|||
from core.models import ArchiveResult
|
||||
from .search import index_links
|
||||
|
||||
check_data_folder(out_dir=out_dir)
|
||||
check_dependencies()
|
||||
check_data_folder(CONFIG)
|
||||
check_dependencies(CONFIG)
|
||||
new_links: List[Link] = [] # TODO: Remove input argument: only_new
|
||||
|
||||
extractors = extractors.split(",") if extractors else []
|
||||
|
@ -863,7 +861,7 @@ def list_all(filter_patterns_str: Optional[str]=None,
|
|||
out_dir: Path=OUTPUT_DIR) -> Iterable[Link]:
|
||||
"""List, filter, and export information about archive entries"""
|
||||
|
||||
check_data_folder(out_dir=out_dir)
|
||||
check_data_folder(CONFIG)
|
||||
|
||||
if filter_patterns and filter_patterns_str:
|
||||
stderr(
|
||||
|
@ -911,7 +909,7 @@ def list_links(snapshots: Optional[QuerySet]=None,
|
|||
before: Optional[float]=None,
|
||||
out_dir: Path=OUTPUT_DIR) -> Iterable[Link]:
|
||||
|
||||
check_data_folder(out_dir=out_dir)
|
||||
check_data_folder(CONFIG)
|
||||
|
||||
if snapshots:
|
||||
all_snapshots = snapshots
|
||||
|
@ -935,7 +933,7 @@ def list_folders(links: List[Link],
|
|||
status: str,
|
||||
out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||
|
||||
check_data_folder(out_dir=out_dir)
|
||||
check_data_folder(CONFIG)
|
||||
|
||||
STATUS_FUNCTIONS = {
|
||||
"indexed": get_indexed_folders,
|
||||
|
@ -1080,7 +1078,7 @@ def config(config_options_str: Optional[str]=None,
|
|||
out_dir: Path=OUTPUT_DIR) -> None:
|
||||
"""Get and set your ArchiveBox project configuration values"""
|
||||
|
||||
check_data_folder(out_dir=out_dir)
|
||||
check_data_folder(CONFIG)
|
||||
|
||||
if config_options and config_options_str:
|
||||
stderr(
|
||||
|
@ -1183,7 +1181,7 @@ def schedule(add: bool=False,
|
|||
out_dir: Path=OUTPUT_DIR):
|
||||
"""Set ArchiveBox to regularly import URLs at specific times using cron"""
|
||||
|
||||
check_data_folder(out_dir=out_dir)
|
||||
check_data_folder(CONFIG)
|
||||
|
||||
Path(LOGS_DIR).mkdir(exist_ok=True)
|
||||
|
||||
|
@ -1324,7 +1322,7 @@ def server(runserver_args: Optional[List[str]]=None,
|
|||
config.SHOW_PROGRESS = False
|
||||
config.DEBUG = config.DEBUG or debug
|
||||
|
||||
check_data_folder(out_dir=out_dir)
|
||||
check_data_folder(CONFIG)
|
||||
|
||||
from django.core.management import call_command
|
||||
from django.contrib.auth.models import User
|
||||
|
@ -1417,7 +1415,7 @@ def server(runserver_args: Optional[List[str]]=None,
|
|||
def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None:
|
||||
"""Run an ArchiveBox Django management command"""
|
||||
|
||||
check_data_folder(out_dir=out_dir)
|
||||
check_data_folder(CONFIG)
|
||||
from django.core.management import execute_from_command_line
|
||||
|
||||
if (args and "createsuperuser" in args) and (IN_DOCKER and not IS_TTY):
|
||||
|
@ -1432,7 +1430,7 @@ def manage(args: Optional[List[str]]=None, out_dir: Path=OUTPUT_DIR) -> None:
|
|||
def shell(out_dir: Path=OUTPUT_DIR) -> None:
|
||||
"""Enter an interactive ArchiveBox Django shell"""
|
||||
|
||||
check_data_folder(out_dir=out_dir)
|
||||
check_data_folder(CONFIG)
|
||||
|
||||
from django.core.management import call_command
|
||||
call_command("shell_plus")
|
||||
|
|
|
@ -7,7 +7,7 @@ if __name__ == '__main__':
|
|||
# versions of ./manage.py commands whenever possible. When that's not possible
|
||||
# (e.g. makemigrations), you can comment out this check temporarily
|
||||
|
||||
allowed_commands = ['makemigrations', 'migrate', 'startapp','squashmigrations', 'generate_stubs']
|
||||
allowed_commands = ['makemigrations', 'migrate', 'startapp','squashmigrations', 'generate_stubs', 'test']
|
||||
|
||||
if not any(cmd in sys.argv for cmd in allowed_commands):
|
||||
print("[X] Don't run ./manage.py directly (unless you are a developer running makemigrations):")
|
||||
|
|
0
archivebox/misc/__init__.py
Normal file
0
archivebox/misc/__init__.py
Normal file
159
archivebox/misc/checks.py
Normal file
159
archivebox/misc/checks.py
Normal file
|
@ -0,0 +1,159 @@
|
|||
__package__ = 'archivebox.misc'
|
||||
|
||||
# TODO: migrate all of these to new plugantic/base_check.py Check system
|
||||
|
||||
import sys
|
||||
from benedict import benedict
|
||||
from pathlib import Path
|
||||
|
||||
from .logging import stderr, hint
|
||||
|
||||
|
||||
def check_system_config(config: benedict) -> None:
|
||||
### Check system environment
|
||||
if config['USER'] == 'root' or str(config['PUID']) == "0":
|
||||
stderr('[!] ArchiveBox should never be run as root!', color='red')
|
||||
stderr(' For more information, see the security overview documentation:')
|
||||
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root')
|
||||
|
||||
if config['IN_DOCKER']:
|
||||
attempted_command = ' '.join(sys.argv[:3])
|
||||
stderr('')
|
||||
stderr(' {lightred}Hint{reset}: When using Docker, you must run commands with {green}docker run{reset} instead of {lightyellow}docker exec{reset}, e.g.:'.format(**config['ANSI']))
|
||||
stderr(f' docker compose run archivebox {attempted_command}')
|
||||
stderr(f' docker run -it -v $PWD/data:/data archivebox/archivebox {attempted_command}')
|
||||
stderr(' or:')
|
||||
stderr(f' docker compose exec --user=archivebox archivebox /bin/bash -c "archivebox {attempted_command}"')
|
||||
stderr(f' docker exec -it --user=archivebox <container id> /bin/bash -c "archivebox {attempted_command}"')
|
||||
|
||||
raise SystemExit(2)
|
||||
|
||||
### Check Python environment
|
||||
if sys.version_info[:3] < (3, 7, 0):
|
||||
stderr(f'[X] Python version is not new enough: {config["PYTHON_VERSION"]} (>3.6 is required)', color='red')
|
||||
stderr(' See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
|
||||
raise SystemExit(2)
|
||||
|
||||
if int(config['DJANGO_VERSION'].split('.')[0]) < 3:
|
||||
stderr(f'[X] Django version is not new enough: {config["DJANGO_VERSION"]} (>3.0 is required)', color='red')
|
||||
stderr(' Upgrade django using pip or your system package manager: pip3 install --upgrade django')
|
||||
raise SystemExit(2)
|
||||
|
||||
if config['PYTHON_ENCODING'] not in ('UTF-8', 'UTF8'):
|
||||
stderr(f'[X] Your system is running python3 scripts with a bad locale setting: {config["PYTHON_ENCODING"]} (it should be UTF-8).', color='red')
|
||||
stderr(' To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)')
|
||||
stderr(' Or if you\'re using ubuntu/debian, run "dpkg-reconfigure locales"')
|
||||
stderr('')
|
||||
stderr(' Confirm that it\'s fixed by opening a new shell and running:')
|
||||
stderr(' python3 -c "import sys; print(sys.stdout.encoding)" # should output UTF-8')
|
||||
raise SystemExit(2)
|
||||
|
||||
# stderr('[i] Using Chrome binary: {}'.format(shutil.which(CHROME_BINARY) or CHROME_BINARY))
|
||||
# stderr('[i] Using Chrome data dir: {}'.format(os.path.abspath(CHROME_USER_DATA_DIR)))
|
||||
if config['CHROME_USER_DATA_DIR'] is not None and Path(config['CHROME_USER_DATA_DIR']).exists():
|
||||
if not (Path(config['CHROME_USER_DATA_DIR']) / 'Default').exists():
|
||||
stderr('[X] Could not find profile "Default" in CHROME_USER_DATA_DIR.', color='red')
|
||||
stderr(f' {config["CHROME_USER_DATA_DIR"]}')
|
||||
stderr(' Make sure you set it to a Chrome user data directory containing a Default profile folder.')
|
||||
stderr(' For more info see:')
|
||||
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#CHROME_USER_DATA_DIR')
|
||||
if '/Default' in str(config['CHROME_USER_DATA_DIR']):
|
||||
stderr()
|
||||
stderr(' Try removing /Default from the end e.g.:')
|
||||
stderr(' CHROME_USER_DATA_DIR="{}"'.format(str(config['CHROME_USER_DATA_DIR']).split('/Default')[0]))
|
||||
|
||||
# hard error is too annoying here, instead just set it to nothing
|
||||
# raise SystemExit(2)
|
||||
config['CHROME_USER_DATA_DIR'] = None
|
||||
else:
|
||||
config['CHROME_USER_DATA_DIR'] = None
|
||||
|
||||
|
||||
def check_dependencies(config: benedict, show_help: bool=True) -> None:
|
||||
invalid_dependencies = [
|
||||
(name, info) for name, info in config['DEPENDENCIES'].items()
|
||||
if info['enabled'] and not info['is_valid']
|
||||
]
|
||||
if invalid_dependencies and show_help:
|
||||
stderr(f'[!] Warning: Missing {len(invalid_dependencies)} recommended dependencies', color='lightyellow')
|
||||
for dependency, info in invalid_dependencies:
|
||||
stderr(
|
||||
' ! {}: {} ({})'.format(
|
||||
dependency,
|
||||
info['path'] or 'unable to find binary',
|
||||
info['version'] or 'unable to detect version',
|
||||
)
|
||||
)
|
||||
if dependency in ('YOUTUBEDL_BINARY', 'CHROME_BINARY', 'SINGLEFILE_BINARY', 'READABILITY_BINARY', 'MERCURY_BINARY'):
|
||||
hint(('To install all packages automatically run: archivebox setup',
|
||||
f'or to disable it and silence this warning: archivebox config --set SAVE_{dependency.rsplit("_", 1)[0]}=False',
|
||||
''), prefix=' ')
|
||||
stderr('')
|
||||
|
||||
if config['TIMEOUT'] < 5:
|
||||
stderr(f'[!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={config["TIMEOUT"]} seconds)', color='red')
|
||||
stderr(' You must allow *at least* 5 seconds for indexing and archive methods to run succesfully.')
|
||||
stderr(' (Setting it to somewhere between 30 and 3000 seconds is recommended)')
|
||||
stderr()
|
||||
stderr(' If you want to make ArchiveBox run faster, disable specific archive methods instead:')
|
||||
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles')
|
||||
stderr()
|
||||
|
||||
elif config['USE_CHROME'] and config['TIMEOUT'] < 15:
|
||||
stderr(f'[!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={config["TIMEOUT"]} seconds)', color='red')
|
||||
stderr(' Chrome will fail to archive all sites if set to less than ~15 seconds.')
|
||||
stderr(' (Setting it to somewhere between 30 and 300 seconds is recommended)')
|
||||
stderr()
|
||||
stderr(' If you want to make ArchiveBox run faster, disable specific archive methods instead:')
|
||||
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles')
|
||||
stderr()
|
||||
|
||||
if config['USE_YOUTUBEDL'] and config['MEDIA_TIMEOUT'] < 20:
|
||||
stderr(f'[!] Warning: MEDIA_TIMEOUT is set too low! (currently set to MEDIA_TIMEOUT={config["MEDIA_TIMEOUT"]} seconds)', color='red')
|
||||
stderr(' youtube-dl/yt-dlp will fail to archive any media if set to less than ~20 seconds.')
|
||||
stderr(' (Setting it somewhere over 60 seconds is recommended)')
|
||||
stderr()
|
||||
stderr(' If you want to disable media archiving entirely, set SAVE_MEDIA=False instead:')
|
||||
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_media')
|
||||
stderr()
|
||||
|
||||
|
||||
|
||||
|
||||
def check_data_folder(config: benedict) -> None:
|
||||
output_dir = config['OUTPUT_DIR']
|
||||
|
||||
archive_dir_exists = (Path(output_dir) / 'archive').exists()
|
||||
if not archive_dir_exists:
|
||||
stderr('[X] No archivebox index found in the current directory.', color='red')
|
||||
stderr(f' {output_dir}', color='lightyellow')
|
||||
stderr()
|
||||
stderr(' {lightred}Hint{reset}: Are you running archivebox in the right folder?'.format(**config['ANSI']))
|
||||
stderr(' cd path/to/your/archive/folder')
|
||||
stderr(' archivebox [command]')
|
||||
stderr()
|
||||
stderr(' {lightred}Hint{reset}: To create a new archive collection or import existing data in this folder, run:'.format(**config['ANSI']))
|
||||
stderr(' archivebox init')
|
||||
raise SystemExit(2)
|
||||
|
||||
|
||||
def check_migrations(config: benedict):
|
||||
output_dir = config['OUTPUT_DIR']
|
||||
|
||||
from ..index.sql import list_migrations
|
||||
|
||||
pending_migrations = [name for status, name in list_migrations() if not status]
|
||||
|
||||
if pending_migrations:
|
||||
stderr('[X] This collection was created with an older version of ArchiveBox and must be upgraded first.', color='lightyellow')
|
||||
stderr(f' {output_dir}')
|
||||
stderr()
|
||||
stderr(f' To upgrade it to the latest version and apply the {len(pending_migrations)} pending migrations, run:')
|
||||
stderr(' archivebox init')
|
||||
raise SystemExit(3)
|
||||
|
||||
(Path(output_dir) / config['SOURCES_DIR_NAME']).mkdir(exist_ok=True)
|
||||
(Path(output_dir) / config['LOGS_DIR_NAME']).mkdir(exist_ok=True)
|
||||
(Path(output_dir) / config['CACHE_DIR_NAME']).mkdir(exist_ok=True)
|
||||
(Path(output_dir) / config['LIB_DIR_NAME'] / 'bin').mkdir(exist_ok=True, parents=True)
|
||||
(Path(output_dir) / config['PERSONAS_DIR_NAME'] / 'Default').mkdir(exist_ok=True, parents=True)
|
30
archivebox/misc/debugging.py
Normal file
30
archivebox/misc/debugging.py
Normal file
|
@ -0,0 +1,30 @@
|
|||
from functools import wraps
|
||||
from time import time
|
||||
|
||||
def timed_function(func):
|
||||
"""
|
||||
Very simple profiling decorator for debugging.
|
||||
Usage:
|
||||
@timed_function
|
||||
def my_func():
|
||||
...
|
||||
|
||||
More advanced alternatives:
|
||||
- viztracer ../.venv/bin/archivebox manage check # https://viztracer.readthedocs.io/en/latest/filter.html
|
||||
- python -m cProfile -o archivebox.prof ../.venv/bin/archivebox manage check; snakeviz archivebox.prof
|
||||
- Django Debug Toolbar + django-debug-toolbar-flamegraph
|
||||
+ Django Requests Tracker (requests-tracker)
|
||||
"""
|
||||
@wraps(func)
|
||||
def wrap(*args, **kwargs):
|
||||
if args and hasattr(args[0], '__module__'):
|
||||
module = args[0].__module__
|
||||
else:
|
||||
module = func.__module__
|
||||
ts_start = time()
|
||||
result = func(*args, **kwargs)
|
||||
ts_end = time()
|
||||
ms_elapsed = int((ts_end-ts_start) * 1000)
|
||||
print(f'[DEBUG][{ms_elapsed}ms] {module}.{func.__name__}(...)')
|
||||
return result
|
||||
return wrap
|
77
archivebox/misc/logging.py
Normal file
77
archivebox/misc/logging.py
Normal file
|
@ -0,0 +1,77 @@
|
|||
__package__ = 'archivebox.misc'
|
||||
|
||||
# TODO: merge/dedupe this file with archivebox/logging_util.py
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Optional, Union, Tuple, List
|
||||
from collections import defaultdict
|
||||
from benedict import benedict
|
||||
from rich.console import Console
|
||||
|
||||
from ..config_stubs import ConfigDict
|
||||
|
||||
SHOW_PROGRESS = None
|
||||
if os.environ.get('SHOW_PROGRESS', 'None') in ('True', '1', 'true', 'yes'):
|
||||
SHOW_PROGRESS = True
|
||||
|
||||
CONSOLE = Console(force_interactive=SHOW_PROGRESS)
|
||||
SHOW_PROGRESS = CONSOLE.is_interactive if SHOW_PROGRESS is None else SHOW_PROGRESS
|
||||
|
||||
DEFAULT_CLI_COLORS = benedict(
|
||||
{
|
||||
"reset": "\033[00;00m",
|
||||
"lightblue": "\033[01;30m",
|
||||
"lightyellow": "\033[01;33m",
|
||||
"lightred": "\033[01;35m",
|
||||
"red": "\033[01;31m",
|
||||
"green": "\033[01;32m",
|
||||
"blue": "\033[01;34m",
|
||||
"white": "\033[01;37m",
|
||||
"black": "\033[01;30m",
|
||||
}
|
||||
)
|
||||
ANSI = benedict({k: '' for k in DEFAULT_CLI_COLORS.keys()})
|
||||
|
||||
COLOR_DICT = defaultdict(lambda: [(0, 0, 0), (0, 0, 0)], {
|
||||
'00': [(0, 0, 0), (0, 0, 0)],
|
||||
'30': [(0, 0, 0), (0, 0, 0)],
|
||||
'31': [(255, 0, 0), (128, 0, 0)],
|
||||
'32': [(0, 200, 0), (0, 128, 0)],
|
||||
'33': [(255, 255, 0), (128, 128, 0)],
|
||||
'34': [(0, 0, 255), (0, 0, 128)],
|
||||
'35': [(255, 0, 255), (128, 0, 128)],
|
||||
'36': [(0, 255, 255), (0, 128, 128)],
|
||||
'37': [(255, 255, 255), (255, 255, 255)],
|
||||
})
|
||||
|
||||
# Logging Helpers
|
||||
def stdout(*args, color: Optional[str]=None, prefix: str='', config: Optional[ConfigDict]=None) -> None:
|
||||
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
||||
|
||||
if color:
|
||||
strs = [ansi[color], ' '.join(str(a) for a in args), ansi['reset'], '\n']
|
||||
else:
|
||||
strs = [' '.join(str(a) for a in args), '\n']
|
||||
|
||||
sys.stdout.write(prefix + ''.join(strs))
|
||||
|
||||
def stderr(*args, color: Optional[str]=None, prefix: str='', config: Optional[ConfigDict]=None) -> None:
|
||||
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
||||
|
||||
if color:
|
||||
strs = [ansi[color], ' '.join(str(a) for a in args), ansi['reset'], '\n']
|
||||
else:
|
||||
strs = [' '.join(str(a) for a in args), '\n']
|
||||
|
||||
sys.stderr.write(prefix + ''.join(strs))
|
||||
|
||||
def hint(text: Union[Tuple[str, ...], List[str], str], prefix=' ', config: Optional[ConfigDict]=None) -> None:
|
||||
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
||||
|
||||
if isinstance(text, str):
|
||||
stderr('{}{lightred}Hint:{reset} {}'.format(prefix, text, **ansi))
|
||||
else:
|
||||
stderr('{}{lightred}Hint:{reset} {}'.format(prefix, text[0], **ansi))
|
||||
for line in text[1:]:
|
||||
stderr('{} {}'.format(prefix, line))
|
|
@ -10,7 +10,6 @@ import datetime
|
|||
from django.utils import timezone
|
||||
timezone.utc = datetime.timezone.utc
|
||||
|
||||
|
||||
# monkey patch django-signals-webhooks to change how it shows up in Admin UI
|
||||
# from signal_webhooks.apps import DjangoSignalWebhooksConfig
|
||||
# DjangoSignalWebhooksConfig.verbose_name = 'API'
|
||||
|
|
6
archivebox/package-lock.json
generated
6
archivebox/package-lock.json
generated
|
@ -371,9 +371,9 @@
|
|||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/bare-events": {
|
||||
"version": "2.4.2",
|
||||
"resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.4.2.tgz",
|
||||
"integrity": "sha512-qMKFd2qG/36aA4GwvKq8MxnPgCQAmBWmSyLWsJcbn8v03wvIPQ/hG1Ms8bPzndZxMDoHpxez5VOS+gC9Yi24/Q==",
|
||||
"version": "2.5.0",
|
||||
"resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.5.0.tgz",
|
||||
"integrity": "sha512-/E8dDe9dsbLyh2qrZ64PEPadOQ0F4gbl1sUJOrmph7xOiIxfY8vwab/4bFLh4Y88/Hk/ujKcrQKc+ps0mv873A==",
|
||||
"license": "Apache-2.0",
|
||||
"optional": true
|
||||
},
|
||||
|
|
|
@ -3,6 +3,7 @@ __package__ = "archivebox.plugantic"
|
|||
from typing import Dict, List
|
||||
from typing_extensions import Self
|
||||
|
||||
from benedict import benedict
|
||||
from pydantic import Field, InstanceOf, validate_call
|
||||
from pydantic_pkgr import (
|
||||
Binary,
|
||||
|
@ -17,7 +18,6 @@ from pydantic_pkgr import (
|
|||
from django.conf import settings
|
||||
|
||||
from .base_hook import BaseHook, HookType
|
||||
from ..config_stubs import AttrDict
|
||||
|
||||
|
||||
class BaseBinProvider(BaseHook, BinProvider):
|
||||
|
@ -38,7 +38,7 @@ class BaseBinProvider(BaseHook, BinProvider):
|
|||
def register(self, settings, parent_plugin=None):
|
||||
# self._plugin = parent_plugin # for debugging only, never rely on this!
|
||||
|
||||
settings.BINPROVIDERS = getattr(settings, "BINPROVIDERS", None) or AttrDict({})
|
||||
settings.BINPROVIDERS = getattr(settings, "BINPROVIDERS", None) or benedict({})
|
||||
settings.BINPROVIDERS[self.id] = self
|
||||
|
||||
super().register(settings, parent_plugin=parent_plugin)
|
||||
|
@ -58,7 +58,7 @@ class BaseBinary(BaseHook, Binary):
|
|||
def register(self, settings, parent_plugin=None):
|
||||
# self._plugin = parent_plugin # for debugging only, never rely on this!
|
||||
|
||||
settings.BINARIES = getattr(settings, "BINARIES", None) or AttrDict({})
|
||||
settings.BINARIES = getattr(settings, "BINARIES", None) or benedict({})
|
||||
settings.BINARIES[self.id] = self
|
||||
|
||||
super().register(settings, parent_plugin=parent_plugin)
|
||||
|
|
|
@ -28,7 +28,7 @@ class BaseCheck(BaseHook):
|
|||
def register(self, settings, parent_plugin=None):
|
||||
# self._plugin = parent_plugin # backref to parent is for debugging only, never rely on this!
|
||||
|
||||
self.register_with_django_check_system() # (SIDE EFFECT)
|
||||
self.register_with_django_check_system(settings) # (SIDE EFFECT)
|
||||
|
||||
# install hook into settings.CHECKS
|
||||
settings.CHECKS = getattr(settings, "CHECKS", None) or AttrDict({})
|
||||
|
@ -37,12 +37,9 @@ class BaseCheck(BaseHook):
|
|||
# record installed hook in settings.HOOKS
|
||||
super().register(settings, parent_plugin=parent_plugin)
|
||||
|
||||
def register_with_django_check_system(self):
|
||||
|
||||
def register_with_django_check_system(self, settings):
|
||||
def run_check(app_configs, **kwargs) -> List[Warning]:
|
||||
from django.conf import settings
|
||||
import logging
|
||||
|
||||
return self.check(settings, logging.getLogger("checks"))
|
||||
|
||||
run_check.__name__ = self.id
|
||||
|
|
|
@ -96,14 +96,13 @@ class BaseHook(BaseModel):
|
|||
# e.g. /admin/environment/config/LdapConfig/
|
||||
return f"/admin/environment/{self.hook_type.lower()}/{self.id}/"
|
||||
|
||||
|
||||
def register(self, settings, parent_plugin=None):
|
||||
"""Load a record of an installed hook into global Django settings.HOOKS at runtime."""
|
||||
self._plugin = parent_plugin # for debugging only, never rely on this!
|
||||
|
||||
# assert json.dumps(self.model_json_schema(), indent=4), f"Hook {self.hook_module} has invalid JSON schema."
|
||||
|
||||
print(' -', self.hook_module, '.register()')
|
||||
# print(' -', self.hook_module, '.register()')
|
||||
|
||||
# record installed hook in settings.HOOKS
|
||||
settings.HOOKS[self.id] = self
|
||||
|
@ -118,7 +117,7 @@ class BaseHook(BaseModel):
|
|||
def ready(self, settings):
|
||||
"""Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
|
||||
|
||||
print(' -', self.hook_module, '.ready()')
|
||||
# print(' -', self.hook_module, '.ready()')
|
||||
|
||||
assert self.id in settings.HOOKS, f"Tried to ready hook {self.hook_module} but it is not registered in settings.HOOKS."
|
||||
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
__package__ = 'archivebox.plugantic'
|
||||
|
||||
import json
|
||||
import inspect
|
||||
from pathlib import Path
|
||||
|
||||
|
@ -18,10 +17,11 @@ from pydantic import (
|
|||
computed_field,
|
||||
validate_call,
|
||||
)
|
||||
from benedict import benedict
|
||||
|
||||
from .base_hook import BaseHook, HookType
|
||||
|
||||
from ..config import AttrDict
|
||||
from ..config import bump_startup_progress_bar
|
||||
|
||||
|
||||
class BasePlugin(BaseModel):
|
||||
|
@ -90,7 +90,8 @@ class BasePlugin(BaseModel):
|
|||
|
||||
assert self.app_label and self.app_label and self.verbose_name, f'{self.__class__.__name__} is missing .name or .app_label or .verbose_name'
|
||||
|
||||
assert json.dumps(self.model_json_schema(), indent=4), f"Plugin {self.plugin_module} has invalid JSON schema."
|
||||
# assert json.dumps(self.model_json_schema(), indent=4), f"Plugin {self.plugin_module} has invalid JSON schema."
|
||||
|
||||
return self
|
||||
|
||||
@property
|
||||
|
@ -114,13 +115,13 @@ class BasePlugin(BaseModel):
|
|||
|
||||
@property
|
||||
def HOOKS_BY_ID(self) -> Dict[str, InstanceOf[BaseHook]]:
|
||||
return AttrDict({hook.id: hook for hook in self.hooks})
|
||||
return benedict({hook.id: hook for hook in self.hooks})
|
||||
|
||||
@property
|
||||
def HOOKS_BY_TYPE(self) -> Dict[HookType, Dict[str, InstanceOf[BaseHook]]]:
|
||||
hooks = AttrDict({})
|
||||
hooks = benedict({})
|
||||
for hook in self.hooks:
|
||||
hooks[hook.hook_type] = hooks.get(hook.hook_type) or AttrDict({})
|
||||
hooks[hook.hook_type] = hooks.get(hook.hook_type) or benedict({})
|
||||
hooks[hook.hook_type][hook.id] = hook
|
||||
return hooks
|
||||
|
||||
|
@ -131,10 +132,10 @@ class BasePlugin(BaseModel):
|
|||
from django.conf import settings as django_settings
|
||||
settings = django_settings
|
||||
|
||||
print()
|
||||
print(self.plugin_module_full, '.register()')
|
||||
# print()
|
||||
# print(self.plugin_module_full, '.register()')
|
||||
|
||||
assert json.dumps(self.model_json_schema(), indent=4), f'Plugin {self.plugin_module} has invalid JSON schema.'
|
||||
# assert json.dumps(self.model_json_schema(), indent=4), f'Plugin {self.plugin_module} has invalid JSON schema.'
|
||||
|
||||
assert self.id not in settings.PLUGINS, f'Tried to register plugin {self.plugin_module} but it conflicts with existing plugin of the same name ({self.app_label}).'
|
||||
|
||||
|
@ -149,6 +150,7 @@ class BasePlugin(BaseModel):
|
|||
|
||||
settings.PLUGINS[self.id]._is_registered = True
|
||||
# print('√ REGISTERED PLUGIN:', self.plugin_module)
|
||||
bump_startup_progress_bar()
|
||||
|
||||
def ready(self, settings=None):
|
||||
"""Runs any runtime code needed when AppConfig.ready() is called (after all models are imported)."""
|
||||
|
@ -157,8 +159,8 @@ class BasePlugin(BaseModel):
|
|||
from django.conf import settings as django_settings
|
||||
settings = django_settings
|
||||
|
||||
print()
|
||||
print(self.plugin_module_full, '.ready()')
|
||||
# print()
|
||||
# print(self.plugin_module_full, '.ready()')
|
||||
|
||||
assert (
|
||||
self.id in settings.PLUGINS and settings.PLUGINS[self.id]._is_registered
|
||||
|
@ -171,6 +173,7 @@ class BasePlugin(BaseModel):
|
|||
hook.ready(settings)
|
||||
|
||||
settings.PLUGINS[self.id]._is_ready = True
|
||||
bump_startup_progress_bar()
|
||||
|
||||
# @validate_call
|
||||
# def install_binaries(self) -> Self:
|
||||
|
|
|
@ -83,338 +83,3 @@ class JSONSchemaWithLambdas(GenerateJsonSchema):
|
|||
# for computed_field properties render them like this instead:
|
||||
# inspect.getsource(field.wrapped_property.fget).split('def ', 1)[-1].split('\n', 1)[-1].strip().strip('return '),
|
||||
|
||||
|
||||
|
||||
### Basic Assertions
|
||||
|
||||
# test_input = """
|
||||
# [SERVER_CONFIG]
|
||||
# IS_TTY=False
|
||||
# USE_COLOR=False
|
||||
# SHOW_PROGRESS=False
|
||||
# IN_DOCKER=False
|
||||
# IN_QEMU=False
|
||||
# PUID=501
|
||||
# PGID=20
|
||||
# OUTPUT_DIR=/opt/archivebox/data
|
||||
# CONFIG_FILE=/opt/archivebox/data/ArchiveBox.conf
|
||||
# ONLY_NEW=True
|
||||
# TIMEOUT=60
|
||||
# MEDIA_TIMEOUT=3600
|
||||
# OUTPUT_PERMISSIONS=644
|
||||
# RESTRICT_FILE_NAMES=windows
|
||||
# URL_DENYLIST=\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$
|
||||
# URL_ALLOWLIST=None
|
||||
# ADMIN_USERNAME=None
|
||||
# ADMIN_PASSWORD=None
|
||||
# ENFORCE_ATOMIC_WRITES=True
|
||||
# TAG_SEPARATOR_PATTERN=[,]
|
||||
# SECRET_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
||||
# BIND_ADDR=127.0.0.1:8000
|
||||
# ALLOWED_HOSTS=*
|
||||
# DEBUG=False
|
||||
# PUBLIC_INDEX=True
|
||||
# PUBLIC_SNAPSHOTS=True
|
||||
# PUBLIC_ADD_VIEW=False
|
||||
# FOOTER_INFO=Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.
|
||||
# SNAPSHOTS_PER_PAGE=40
|
||||
# CUSTOM_TEMPLATES_DIR=None
|
||||
# TIME_ZONE=UTC
|
||||
# TIMEZONE=UTC
|
||||
# REVERSE_PROXY_USER_HEADER=Remote-User
|
||||
# REVERSE_PROXY_WHITELIST=
|
||||
# LOGOUT_REDIRECT_URL=/
|
||||
# PREVIEW_ORIGINALS=True
|
||||
# LDAP=False
|
||||
# LDAP_SERVER_URI=None
|
||||
# LDAP_BIND_DN=None
|
||||
# LDAP_BIND_PASSWORD=None
|
||||
# LDAP_USER_BASE=None
|
||||
# LDAP_USER_FILTER=None
|
||||
# LDAP_USERNAME_ATTR=None
|
||||
# LDAP_FIRSTNAME_ATTR=None
|
||||
# LDAP_LASTNAME_ATTR=None
|
||||
# LDAP_EMAIL_ATTR=None
|
||||
# LDAP_CREATE_SUPERUSER=False
|
||||
# SAVE_TITLE=True
|
||||
# SAVE_FAVICON=True
|
||||
# SAVE_WGET=True
|
||||
# SAVE_WGET_REQUISITES=True
|
||||
# SAVE_SINGLEFILE=True
|
||||
# SAVE_READABILITY=True
|
||||
# SAVE_MERCURY=True
|
||||
# SAVE_HTMLTOTEXT=True
|
||||
# SAVE_PDF=True
|
||||
# SAVE_SCREENSHOT=True
|
||||
# SAVE_DOM=True
|
||||
# SAVE_HEADERS=True
|
||||
# SAVE_WARC=True
|
||||
# SAVE_GIT=True
|
||||
# SAVE_MEDIA=True
|
||||
# SAVE_ARCHIVE_DOT_ORG=True
|
||||
# RESOLUTION=1440,2000
|
||||
# GIT_DOMAINS=github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht
|
||||
# CHECK_SSL_VALIDITY=True
|
||||
# MEDIA_MAX_SIZE=750m
|
||||
# USER_AGENT=None
|
||||
# CURL_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)
|
||||
# WGET_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5
|
||||
# CHROME_USER_AGENT=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)
|
||||
# COOKIES_FILE=None
|
||||
# CHROME_USER_DATA_DIR=None
|
||||
# CHROME_TIMEOUT=0
|
||||
# CHROME_HEADLESS=True
|
||||
# CHROME_SANDBOX=True
|
||||
# CHROME_EXTRA_ARGS=[]
|
||||
# YOUTUBEDL_ARGS=['--restrict-filenames', '--trim-filenames', '128', '--write-description', '--write-info-json', '--write-annotations', '--write-thumbnail', '--no-call-home', '--write-sub', '--write-auto-subs', '--convert-subs=srt', '--yes-playlist', '--continue', '--no-abort-on-error', '--ignore-errors', '--geo-bypass', '--add-metadata', '--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)']
|
||||
# YOUTUBEDL_EXTRA_ARGS=[]
|
||||
# WGET_ARGS=['--no-verbose', '--adjust-extension', '--convert-links', '--force-directories', '--backup-converted', '--span-hosts', '--no-parent', '-e', 'robots=off']
|
||||
# WGET_EXTRA_ARGS=[]
|
||||
# CURL_ARGS=['--silent', '--location', '--compressed']
|
||||
# CURL_EXTRA_ARGS=[]
|
||||
# GIT_ARGS=['--recursive']
|
||||
# SINGLEFILE_ARGS=[]
|
||||
# SINGLEFILE_EXTRA_ARGS=[]
|
||||
# MERCURY_ARGS=['--format=text']
|
||||
# MERCURY_EXTRA_ARGS=[]
|
||||
# FAVICON_PROVIDER=https://www.google.com/s2/favicons?domain={}
|
||||
# USE_INDEXING_BACKEND=True
|
||||
# USE_SEARCHING_BACKEND=True
|
||||
# SEARCH_BACKEND_ENGINE=ripgrep
|
||||
# SEARCH_BACKEND_HOST_NAME=localhost
|
||||
# SEARCH_BACKEND_PORT=1491
|
||||
# SEARCH_BACKEND_PASSWORD=SecretPassword
|
||||
# SEARCH_PROCESS_HTML=True
|
||||
# SONIC_COLLECTION=archivebox
|
||||
# SONIC_BUCKET=snapshots
|
||||
# SEARCH_BACKEND_TIMEOUT=90
|
||||
# FTS_SEPARATE_DATABASE=True
|
||||
# FTS_TOKENIZERS=porter unicode61 remove_diacritics 2
|
||||
# FTS_SQLITE_MAX_LENGTH=1000000000
|
||||
# USE_CURL=True
|
||||
# USE_WGET=True
|
||||
# USE_SINGLEFILE=True
|
||||
# USE_READABILITY=True
|
||||
# USE_MERCURY=True
|
||||
# USE_GIT=True
|
||||
# USE_CHROME=True
|
||||
# USE_NODE=True
|
||||
# USE_YOUTUBEDL=True
|
||||
# USE_RIPGREP=True
|
||||
# CURL_BINARY=curl
|
||||
# GIT_BINARY=git
|
||||
# WGET_BINARY=wget
|
||||
# SINGLEFILE_BINARY=single-file
|
||||
# READABILITY_BINARY=readability-extractor
|
||||
# MERCURY_BINARY=postlight-parser
|
||||
# YOUTUBEDL_BINARY=yt-dlp
|
||||
# NODE_BINARY=node
|
||||
# RIPGREP_BINARY=rg
|
||||
# CHROME_BINARY=chrome
|
||||
# POCKET_CONSUMER_KEY=None
|
||||
# USER=squash
|
||||
# PACKAGE_DIR=/opt/archivebox/archivebox
|
||||
# TEMPLATES_DIR=/opt/archivebox/archivebox/templates
|
||||
# ARCHIVE_DIR=/opt/archivebox/data/archive
|
||||
# SOURCES_DIR=/opt/archivebox/data/sources
|
||||
# LOGS_DIR=/opt/archivebox/data/logs
|
||||
# PERSONAS_DIR=/opt/archivebox/data/personas
|
||||
# URL_DENYLIST_PTN=re.compile('\\.(css|js|otf|ttf|woff|woff2|gstatic\\.com|googleapis\\.com/css)(\\?.*)?$', re.IGNORECASE|re.MULTILINE)
|
||||
# URL_ALLOWLIST_PTN=None
|
||||
# DIR_OUTPUT_PERMISSIONS=755
|
||||
# ARCHIVEBOX_BINARY=/opt/archivebox/.venv/bin/archivebox
|
||||
# VERSION=0.8.0
|
||||
# COMMIT_HASH=102e87578c6036bb0132dd1ebd17f8f05ffc880f
|
||||
# BUILD_TIME=2024-05-15 03:28:05 1715768885
|
||||
# VERSIONS_AVAILABLE=None
|
||||
# CAN_UPGRADE=False
|
||||
# PYTHON_BINARY=/opt/archivebox/.venv/bin/python3.10
|
||||
# PYTHON_ENCODING=UTF-8
|
||||
# PYTHON_VERSION=3.10.14
|
||||
# DJANGO_BINARY=/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py
|
||||
# DJANGO_VERSION=5.0.6 final (0)
|
||||
# SQLITE_BINARY=/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py
|
||||
# SQLITE_VERSION=2.6.0
|
||||
# CURL_VERSION=curl 8.4.0 (x86_64-apple-darwin23.0)
|
||||
# WGET_VERSION=GNU Wget 1.24.5
|
||||
# WGET_AUTO_COMPRESSION=True
|
||||
# RIPGREP_VERSION=ripgrep 14.1.0
|
||||
# SINGLEFILE_VERSION=None
|
||||
# READABILITY_VERSION=None
|
||||
# MERCURY_VERSION=None
|
||||
# GIT_VERSION=git version 2.44.0
|
||||
# YOUTUBEDL_VERSION=2024.04.09
|
||||
# CHROME_VERSION=Google Chrome 124.0.6367.207
|
||||
# NODE_VERSION=v21.7.3
|
||||
# """
|
||||
|
||||
|
||||
# expected_output = TOML_HEADER + '''[SERVER_CONFIG]
|
||||
# IS_TTY = false
|
||||
# USE_COLOR = false
|
||||
# SHOW_PROGRESS = false
|
||||
# IN_DOCKER = false
|
||||
# IN_QEMU = false
|
||||
# PUID = 501
|
||||
# PGID = 20
|
||||
# OUTPUT_DIR = "/opt/archivebox/data"
|
||||
# CONFIG_FILE = "/opt/archivebox/data/ArchiveBox.conf"
|
||||
# ONLY_NEW = true
|
||||
# TIMEOUT = 60
|
||||
# MEDIA_TIMEOUT = 3600
|
||||
# OUTPUT_PERMISSIONS = 644
|
||||
# RESTRICT_FILE_NAMES = "windows"
|
||||
# URL_DENYLIST = "\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$"
|
||||
# URL_ALLOWLIST = null
|
||||
# ADMIN_USERNAME = null
|
||||
# ADMIN_PASSWORD = null
|
||||
# ENFORCE_ATOMIC_WRITES = true
|
||||
# TAG_SEPARATOR_PATTERN = "[,]"
|
||||
# SECRET_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||
# BIND_ADDR = "127.0.0.1:8000"
|
||||
# ALLOWED_HOSTS = "*"
|
||||
# DEBUG = false
|
||||
# PUBLIC_INDEX = true
|
||||
# PUBLIC_SNAPSHOTS = true
|
||||
# PUBLIC_ADD_VIEW = false
|
||||
# FOOTER_INFO = "Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests."
|
||||
# SNAPSHOTS_PER_PAGE = 40
|
||||
# CUSTOM_TEMPLATES_DIR = null
|
||||
# TIME_ZONE = "UTC"
|
||||
# TIMEZONE = "UTC"
|
||||
# REVERSE_PROXY_USER_HEADER = "Remote-User"
|
||||
# REVERSE_PROXY_WHITELIST = ""
|
||||
# LOGOUT_REDIRECT_URL = "/"
|
||||
# PREVIEW_ORIGINALS = true
|
||||
# LDAP = false
|
||||
# LDAP_SERVER_URI = null
|
||||
# LDAP_BIND_DN = null
|
||||
# LDAP_BIND_PASSWORD = null
|
||||
# LDAP_USER_BASE = null
|
||||
# LDAP_USER_FILTER = null
|
||||
# LDAP_USERNAME_ATTR = null
|
||||
# LDAP_FIRSTNAME_ATTR = null
|
||||
# LDAP_LASTNAME_ATTR = null
|
||||
# LDAP_EMAIL_ATTR = null
|
||||
# LDAP_CREATE_SUPERUSER = false
|
||||
# SAVE_TITLE = true
|
||||
# SAVE_FAVICON = true
|
||||
# SAVE_WGET = true
|
||||
# SAVE_WGET_REQUISITES = true
|
||||
# SAVE_SINGLEFILE = true
|
||||
# SAVE_READABILITY = true
|
||||
# SAVE_MERCURY = true
|
||||
# SAVE_HTMLTOTEXT = true
|
||||
# SAVE_PDF = true
|
||||
# SAVE_SCREENSHOT = true
|
||||
# SAVE_DOM = true
|
||||
# SAVE_HEADERS = true
|
||||
# SAVE_WARC = true
|
||||
# SAVE_GIT = true
|
||||
# SAVE_MEDIA = true
|
||||
# SAVE_ARCHIVE_DOT_ORG = true
|
||||
# RESOLUTION = [1440, 2000]
|
||||
# GIT_DOMAINS = "github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht"
|
||||
# CHECK_SSL_VALIDITY = true
|
||||
# MEDIA_MAX_SIZE = "750m"
|
||||
# USER_AGENT = null
|
||||
# CURL_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) curl/curl 8.4.0 (x86_64-apple-darwin23.0)"
|
||||
# WGET_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/) wget/GNU Wget 1.24.5"
|
||||
# CHROME_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/0.8.0 (+https://github.com/ArchiveBox/ArchiveBox/)"
|
||||
# COOKIES_FILE = null
|
||||
# CHROME_USER_DATA_DIR = null
|
||||
# CHROME_TIMEOUT = false
|
||||
# CHROME_HEADLESS = true
|
||||
# CHROME_SANDBOX = true
|
||||
# CHROME_EXTRA_ARGS = []
|
||||
# YOUTUBEDL_ARGS = ["--restrict-filenames", "--trim-filenames", "128", "--write-description", "--write-info-json", "--write-annotations", "--write-thumbnail", "--no-call-home", "--write-sub", "--write-auto-subs", "--convert-subs=srt", "--yes-playlist", "--continue", "--no-abort-on-error", "--ignore-errors", "--geo-bypass", "--add-metadata", "--format=(bv*+ba/b)[filesize<=750m][filesize_approx<=?750m]/(bv*+ba/b)"]
|
||||
# YOUTUBEDL_EXTRA_ARGS = []
|
||||
# WGET_ARGS = ["--no-verbose", "--adjust-extension", "--convert-links", "--force-directories", "--backup-converted", "--span-hosts", "--no-parent", "-e", "robots=off"]
|
||||
# WGET_EXTRA_ARGS = []
|
||||
# CURL_ARGS = ["--silent", "--location", "--compressed"]
|
||||
# CURL_EXTRA_ARGS = []
|
||||
# GIT_ARGS = ["--recursive"]
|
||||
# SINGLEFILE_ARGS = []
|
||||
# SINGLEFILE_EXTRA_ARGS = []
|
||||
# MERCURY_ARGS = ["--format=text"]
|
||||
# MERCURY_EXTRA_ARGS = []
|
||||
# FAVICON_PROVIDER = "https://www.google.com/s2/favicons?domain={}"
|
||||
# USE_INDEXING_BACKEND = true
|
||||
# USE_SEARCHING_BACKEND = true
|
||||
# SEARCH_BACKEND_ENGINE = "ripgrep"
|
||||
# SEARCH_BACKEND_HOST_NAME = "localhost"
|
||||
# SEARCH_BACKEND_PORT = 1491
|
||||
# SEARCH_BACKEND_PASSWORD = "SecretPassword"
|
||||
# SEARCH_PROCESS_HTML = true
|
||||
# SONIC_COLLECTION = "archivebox"
|
||||
# SONIC_BUCKET = "snapshots"
|
||||
# SEARCH_BACKEND_TIMEOUT = 90
|
||||
# FTS_SEPARATE_DATABASE = true
|
||||
# FTS_TOKENIZERS = "porter unicode61 remove_diacritics 2"
|
||||
# FTS_SQLITE_MAX_LENGTH = 1000000000
|
||||
# USE_CURL = true
|
||||
# USE_WGET = true
|
||||
# USE_SINGLEFILE = true
|
||||
# USE_READABILITY = true
|
||||
# USE_MERCURY = true
|
||||
# USE_GIT = true
|
||||
# USE_CHROME = true
|
||||
# USE_NODE = true
|
||||
# USE_YOUTUBEDL = true
|
||||
# USE_RIPGREP = true
|
||||
# CURL_BINARY = "curl"
|
||||
# GIT_BINARY = "git"
|
||||
# WGET_BINARY = "wget"
|
||||
# SINGLEFILE_BINARY = "single-file"
|
||||
# READABILITY_BINARY = "readability-extractor"
|
||||
# MERCURY_BINARY = "postlight-parser"
|
||||
# YOUTUBEDL_BINARY = "yt-dlp"
|
||||
# NODE_BINARY = "node"
|
||||
# RIPGREP_BINARY = "rg"
|
||||
# CHROME_BINARY = "chrome"
|
||||
# POCKET_CONSUMER_KEY = null
|
||||
# USER = "squash"
|
||||
# PACKAGE_DIR = "/opt/archivebox/archivebox"
|
||||
# TEMPLATES_DIR = "/opt/archivebox/archivebox/templates"
|
||||
# ARCHIVE_DIR = "/opt/archivebox/data/archive"
|
||||
# SOURCES_DIR = "/opt/archivebox/data/sources"
|
||||
# LOGS_DIR = "/opt/archivebox/data/logs"
|
||||
# PERSONAS_DIR = "/opt/archivebox/data/personas"
|
||||
# URL_DENYLIST_PTN = "re.compile(\'\\\\.(css|js|otf|ttf|woff|woff2|gstatic\\\\.com|googleapis\\\\.com/css)(\\\\?.*)?$\', re.IGNORECASE|re.MULTILINE)"
|
||||
# URL_ALLOWLIST_PTN = null
|
||||
# DIR_OUTPUT_PERMISSIONS = 755
|
||||
# ARCHIVEBOX_BINARY = "/opt/archivebox/.venv/bin/archivebox"
|
||||
# VERSION = "0.8.0"
|
||||
# COMMIT_HASH = "102e87578c6036bb0132dd1ebd17f8f05ffc880f"
|
||||
# BUILD_TIME = "2024-05-15 03:28:05 1715768885"
|
||||
# VERSIONS_AVAILABLE = null
|
||||
# CAN_UPGRADE = false
|
||||
# PYTHON_BINARY = "/opt/archivebox/.venv/bin/python3.10"
|
||||
# PYTHON_ENCODING = "UTF-8"
|
||||
# PYTHON_VERSION = "3.10.14"
|
||||
# DJANGO_BINARY = "/opt/archivebox/.venv/lib/python3.10/site-packages/django/__init__.py"
|
||||
# DJANGO_VERSION = "5.0.6 final (0)"
|
||||
# SQLITE_BINARY = "/opt/homebrew/Cellar/python@3.10/3.10.14/Frameworks/Python.framework/Versions/3.10/lib/python3.10/sqlite3/dbapi2.py"
|
||||
# SQLITE_VERSION = "2.6.0"
|
||||
# CURL_VERSION = "curl 8.4.0 (x86_64-apple-darwin23.0)"
|
||||
# WGET_VERSION = "GNU Wget 1.24.5"
|
||||
# WGET_AUTO_COMPRESSION = true
|
||||
# RIPGREP_VERSION = "ripgrep 14.1.0"
|
||||
# SINGLEFILE_VERSION = null
|
||||
# READABILITY_VERSION = null
|
||||
# MERCURY_VERSION = null
|
||||
# GIT_VERSION = "git version 2.44.0"
|
||||
# YOUTUBEDL_VERSION = "2024.04.09"
|
||||
# CHROME_VERSION = "Google Chrome 124.0.6367.207"
|
||||
# NODE_VERSION = "v21.7.3"'''
|
||||
|
||||
|
||||
# first_output = convert(test_input) # make sure ini -> toml parses correctly
|
||||
# second_output = convert(first_output) # make sure toml -> toml parses/dumps consistently
|
||||
# assert first_output == second_output == expected_output # make sure parsing is indempotent
|
||||
|
||||
# # DEBUGGING
|
||||
# import sys
|
||||
# import difflib
|
||||
# sys.stdout.writelines(difflib.context_diff(first_output, second_output, fromfile='first', tofile='second'))
|
||||
# print(repr(second_output))
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
__package__ = 'archivebox.plugins_extractor.chrome'
|
||||
|
||||
import platform
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Dict, ClassVar
|
||||
|
@ -77,40 +79,16 @@ def create_macos_app_symlink(target: Path, shortcut: Path):
|
|||
###################### Config ##########################
|
||||
|
||||
|
||||
class ChromeDependencyConfigs(BaseConfigSet):
|
||||
class ChromeConfig(BaseConfigSet):
|
||||
section: ClassVar[ConfigSectionName] = "DEPENDENCY_CONFIG"
|
||||
|
||||
CHROME_BINARY: str = Field(default='chrome')
|
||||
CHROME_ARGS: Optional[List[str]] = Field(default=None)
|
||||
CHROME_EXTRA_ARGS: List[str] = []
|
||||
CHROME_DEFAULT_ARGS: List[str] = ['--timeout={TIMEOUT-10}']
|
||||
|
||||
# def load(self) -> Self:
|
||||
# # for each field in the model, load its value
|
||||
# # load from each source in order of precedence (lowest to highest):
|
||||
# # - schema default
|
||||
# # - ArchiveBox.conf INI file
|
||||
# # - environment variables
|
||||
# # - command-line arguments
|
||||
|
||||
# LOADED_VALUES: Dict[str, Any] = {}
|
||||
CHROME_BINARY: str = Field(default='chrome')
|
||||
CHROME_ARGS: List[str] | None = Field(default=None)
|
||||
CHROME_EXTRA_ARGS: List[str] = Field(default=[])
|
||||
CHROME_DEFAULT_ARGS: List[str] = Field(default=lambda: ['--timeout={TIMEOUT-10}'])
|
||||
|
||||
# for field_name, field in self.__fields__.items():
|
||||
# def_value = field.default_factory() if field.default_factory else field.default
|
||||
# ini_value = settings.INI_CONFIG.get_value(field_name)
|
||||
# env_value = settings.ENV_CONFIG.get_value(field_name)
|
||||
# cli_value = settings.CLI_CONFIG.get_value(field_name)
|
||||
# run_value = settings.RUN_CONFIG.get_value(field_name)
|
||||
# value = run_value or cli_value or env_value or ini_value or def_value
|
||||
|
||||
class ChromeConfigs(ChromeDependencyConfigs):
|
||||
# section: ConfigSectionName = 'ALL_CONFIGS'
|
||||
pass
|
||||
|
||||
DEFAULT_GLOBAL_CONFIG = {
|
||||
}
|
||||
|
||||
CHROME_CONFIG = ChromeConfigs(**DEFAULT_GLOBAL_CONFIG)
|
||||
CHROME_CONFIG = ChromeConfig()
|
||||
|
||||
|
||||
class ChromeBinary(BaseBinary):
|
||||
|
@ -133,6 +111,7 @@ class ChromeBinary(BaseBinary):
|
|||
def symlink_to_lib(binary, bin_dir=settings.CONFIG.BIN_DIR) -> None:
|
||||
if not (binary.abspath and binary.abspath.exists()):
|
||||
return
|
||||
|
||||
bin_dir.mkdir(parents=True, exist_ok=True)
|
||||
symlink = bin_dir / binary.name
|
||||
|
||||
|
@ -146,7 +125,6 @@ class ChromeBinary(BaseBinary):
|
|||
|
||||
CHROME_BINARY = ChromeBinary()
|
||||
|
||||
PLUGIN_BINARIES = [CHROME_BINARY]
|
||||
|
||||
class ChromePlugin(BasePlugin):
|
||||
app_label: str = 'chrome'
|
||||
|
|
|
@ -149,6 +149,7 @@ class CheckUserIsNotRoot(BaseCheck):
|
|||
)
|
||||
logger.debug('[√] UID is not root')
|
||||
return errors
|
||||
|
||||
|
||||
class CheckPipEnvironment(BaseCheck):
|
||||
label: str = "CheckPipEnvironment"
|
||||
|
|
|
@ -14,7 +14,7 @@ from .utils import get_indexable_content, log_index_started
|
|||
|
||||
|
||||
def import_backend():
|
||||
for backend in settings.SEARCH_BACKENDS:
|
||||
for backend in settings.SEARCH_BACKENDS.values():
|
||||
if backend.name == settings.CONFIGS.SearchBackendConfig.SEARCH_BACKEND_ENGINE:
|
||||
return backend
|
||||
raise Exception(f'Could not load {settings.CONFIGS.SearchBackendConfig.SEARCH_BACKEND_ENGINE} as search backend')
|
||||
|
|
Loading…
Reference in a new issue