diff --git a/archivebox/base_models/models.py b/archivebox/base_models/models.py index 179e2e7e..ca11520c 100644 --- a/archivebox/base_models/models.py +++ b/archivebox/base_models/models.py @@ -21,6 +21,8 @@ from django.urls import reverse_lazy from django_stubs_ext.db.models import TypedModelMeta +from archivebox.index.json import to_json + from .abid import ( ABID, ABID_LEN, @@ -438,7 +440,7 @@ class ModelWithOutputDir(ABIDModel): def write_indexes(self): """Write the Snapshot json, html, and merkle indexes to its output dir""" - print(f'{self}.write_indexes()') + print(f'{type(self).__name__}[{self.ABID}].write_indexes()') self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True) self.migrate_output_dir() self.save_merkle_index() @@ -447,14 +449,14 @@ class ModelWithOutputDir(ABIDModel): def migrate_output_dir(self): """Move the output files to the new folder structure if needed""" - print(f'{self}.migrate_output_dir()') + print(f'{type(self).__name__}[{self.ABID}].migrate_output_dir()') self.migrate_from_0_7_2() self.migrate_from_0_8_6() # ... future migrations here def migrate_from_0_7_2(self) -> None: """Migrate output_dir generated by ArchiveBox <= 0.7.2 to current version""" - print(f'{self}.migrate_from_0_7_2()') + print(f'{type(self).__name__}[{self.ABID}].migrate_from_0_7_2()') # move /data/archive/ -> /data/archive/snapshots/ # update self.output_path = /data/archive/snapshots/ pass @@ -462,27 +464,28 @@ class ModelWithOutputDir(ABIDModel): def migrate_from_0_8_6(self) -> None: """Migrate output_dir generated by ArchiveBox <= 0.8.6 to current version""" # ... future migration code here ... - print(f'{self}.migrate_from_0_8_6()') + print(f'{type(self).__name__}[{self.ABID}].migrate_from_0_8_6()') pass def save_merkle_index(self, **kwargs) -> None: """Write the ./.index.merkle file to the output dir""" # write self.generate_merkle_tree() to self.output_dir / '.index.merkle' - print(f'{self}.save_merkle_index()') + print(f'{type(self).__name__}[{self.ABID}].save_merkle_index()') pass def save_html_index(self, **kwargs) -> None: # write self.as_html() to self.output_dir / 'index.html' - print(f'{self}.save_html_index()') + print(f'{type(self).__name__}[{self.ABID}].save_html_index()') pass def save_json_index(self, **kwargs) -> None: - print(f'{self}.save_json_index()') + print(f'{type(self).__name__}[{self.ABID}].save_json_index()') # write self.as_json() to self.output_dir / 'index.json' + (self.OUTPUT_DIR / 'index.json').write_text(to_json(self.as_json())) pass def save_symlinks_index(self) -> None: - print(f'{self}.save_symlinks_index()') + print(f'{type(self).__name__}[{self.ABID}].save_symlinks_index()') # ln -s ../../../../self.output_dir data/index/snapshots_by_date/2024-01-01/example.com/ # ln -s ../../../../self.output_dir data/index/snapshots_by_domain/example.com/2024-01-01/ # ln -s self.output_dir data/archive/1453452234234.21445 diff --git a/archivebox/cli/archivebox_install.py b/archivebox/cli/archivebox_install.py index 4f1ecdcd..44a46a64 100755 --- a/archivebox/cli/archivebox_install.py +++ b/archivebox/cli/archivebox_install.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 __package__ = 'archivebox.cli' -__command__ = 'archivebox install' import os import sys @@ -128,7 +127,9 @@ def install(binproviders: Optional[List[str]]=None, binaries: Optional[List[str] # if we are only installing a single binary, raise the exception so the user can see what went wrong raise - + from archivebox.config.django import setup_django + setup_django() + from django.contrib.auth import get_user_model User = get_user_model() diff --git a/archivebox/cli/archivebox_server.py b/archivebox/cli/archivebox_server.py index 470e0d8b..c369e6ce 100644 --- a/archivebox/cli/archivebox_server.py +++ b/archivebox/cli/archivebox_server.py @@ -1,59 +1,45 @@ #!/usr/bin/env python3 __package__ = 'archivebox.cli' -__command__ = 'archivebox server' -import sys -import argparse -from pathlib import Path -from typing import Optional, List, IO +from typing import Iterable -from archivebox.misc.util import docstring -from archivebox.config import DATA_DIR +import rich_click as click +from rich import print + +from archivebox.misc.util import docstring, enforce_types from archivebox.config.common import SERVER_CONFIG -from archivebox.misc.logging_util import SmartFormatter, reject_stdin - -# @enforce_types -def server(runserver_args: Optional[List[str]]=None, - reload: bool=False, - debug: bool=False, - init: bool=False, - quick_init: bool=False, - createsuperuser: bool=False, - daemonize: bool=False, - out_dir: Path=DATA_DIR) -> None: +@enforce_types +def server(runserver_args: Iterable[str]=(SERVER_CONFIG.BIND_ADDR,), + reload: bool=False, + init: bool=False, + debug: bool=False, + daemonize: bool=False, + nothreading: bool=False) -> None: """Run the ArchiveBox HTTP server""" - from rich import print - - runserver_args = runserver_args or [] + runserver_args = list(runserver_args) if init: - run_subcommand('init', stdin=None, pwd=out_dir) - print() - elif quick_init: - run_subcommand('init', subcommand_args=['--quick'], stdin=None, pwd=out_dir) + from archivebox.cli.archivebox_init import init as archivebox_init + archivebox_init(quick=True) print() - if createsuperuser: - run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir) - print() - - + from archivebox.misc.checks import check_data_folder check_data_folder() from django.core.management import call_command from django.contrib.auth.models import User + from archivebox.config.common import SHELL_CONFIG + if not User.objects.filter(is_superuser=True).exclude(username='system').exists(): print() - # print('[yellow][!] No admin accounts exist, you must create one to be able to log in to the Admin UI![/yellow]') print('[violet]Hint:[/violet] To create an [bold]admin username & password[/bold] for the [deep_sky_blue3][underline][link=http://{host}:{port}/admin]Admin UI[/link][/underline][/deep_sky_blue3], run:') print(' [green]archivebox manage createsuperuser[/green]') print() - host = '127.0.0.1' port = '8000' @@ -78,80 +64,28 @@ def server(runserver_args: Optional[List[str]]=None, if SHELL_CONFIG.DEBUG: if not reload: runserver_args.append('--noreload') # '--insecure' + if nothreading: + runserver_args.append('--nothreading') call_command("runserver", *runserver_args) else: from workers.supervisord_util import start_server_workers print() - start_server_workers(host=host, port=port, daemonize=False) + start_server_workers(host=host, port=port, daemonize=daemonize) print("\n[i][green][🟩] ArchiveBox server shut down gracefully.[/green][/i]") - +@click.command() +@click.argument('runserver_args', nargs=-1) +@click.option('--reload', is_flag=True, help='Enable auto-reloading when code or templates change') +@click.option('--debug', is_flag=True, help='Enable DEBUG=True mode with more verbose errors') +@click.option('--nothreading', is_flag=True, help='Force runserver to run in single-threaded mode') +@click.option('--init', is_flag=True, help='Run a full archivebox init/upgrade before starting the server') +@click.option('--daemonize', is_flag=True, help='Run the server in the background as a daemon') @docstring(server.__doc__) -def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None: - parser = argparse.ArgumentParser( - prog=__command__, - description=server.__doc__, - add_help=True, - formatter_class=SmartFormatter, - ) - parser.add_argument( - 'runserver_args', - nargs='*', - type=str, - default=[SERVER_CONFIG.BIND_ADDR], - help='Arguments to pass to Django runserver' - ) - parser.add_argument( - '--reload', - action='store_true', - help='Enable auto-reloading when code or templates change', - ) - parser.add_argument( - '--debug', - action='store_true', - help='Enable DEBUG=True mode with more verbose errors', - ) - parser.add_argument( - '--nothreading', - action='store_true', - help='Force runserver to run in single-threaded mode', - ) - parser.add_argument( - '--init', - action='store_true', - help='Run a full archivebox init/upgrade before starting the server', - ) - parser.add_argument( - '--quick-init', '-i', - action='store_true', - help='Run quick archivebox init/upgrade before starting the server', - ) - parser.add_argument( - '--createsuperuser', - action='store_true', - help='Run archivebox manage createsuperuser before starting the server', - ) - parser.add_argument( - '--daemonize', - action='store_true', - help='Run the server in the background as a daemon', - ) - command = parser.parse_args(args or ()) - reject_stdin(__command__, stdin) - - server( - runserver_args=command.runserver_args + (['--nothreading'] if command.nothreading else []), - reload=command.reload, - debug=command.debug, - init=command.init, - quick_init=command.quick_init, - createsuperuser=command.createsuperuser, - daemonize=command.daemonize, - out_dir=Path(pwd) if pwd else DATA_DIR, - ) +def main(**kwargs): + server(**kwargs) if __name__ == '__main__': - main(args=sys.argv[1:], stdin=sys.stdin) + main() diff --git a/archivebox/config/collection.py b/archivebox/config/collection.py index 00578fe6..920323f8 100644 --- a/archivebox/config/collection.py +++ b/archivebox/config/collection.py @@ -23,7 +23,7 @@ def get_real_name(key: str) -> str: for section in CONFIGS.values(): try: return section.aliases[key] - except KeyError: + except (KeyError, AttributeError): pass return key @@ -159,6 +159,9 @@ def write_config_file(config: Dict[str, str]) -> benedict: section = section_for_key(key) assert section is not None + if not hasattr(section, 'toml_section_header'): + raise ValueError(f'{key} is read-only (defined in {type(section).__module__}.{type(section).__name__}). Refusing to set.') + section_name = section.toml_section_header if section_name in config_file: diff --git a/archivebox/core/models.py b/archivebox/core/models.py index 5d9c5974..bdf01af4 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -192,7 +192,7 @@ class Snapshot(ModelWithOutputDir, ModelWithStateMachine, ABIDModel): objects = SnapshotManager() def save(self, *args, **kwargs): - print(f'{self}.save()') + print(f'Snapshot[{self.ABID}].save()') if self.pk: existing_snapshot = self.__class__.objects.filter(pk=self.pk).first() if existing_snapshot and existing_snapshot.status == self.StatusChoices.SEALED: @@ -634,7 +634,7 @@ class ArchiveResult(ModelWithOutputDir, ModelWithStateMachine, ABIDModel): return repr(self) def save(self, *args, write_indexes: bool=False, **kwargs): - print(f'{self}.save()') + print(f'ArchiveResult[{self.ABID}].save()') # if (self.pk and self.__class__.objects.filter(pk=self.pk).values_list('status', flat=True)[0] in [self.StatusChoices.FAILED, self.StatusChoices.SUCCEEDED, self.StatusChoices.SKIPPED]): # raise Exception(f'ArchiveResult {self.pk} is in a final state, it cannot be modified any further.') if self.pk: diff --git a/archivebox/misc/monkey_patches.py b/archivebox/misc/monkey_patches.py index 18a68af4..12ed05a1 100644 --- a/archivebox/misc/monkey_patches.py +++ b/archivebox/misc/monkey_patches.py @@ -23,13 +23,19 @@ timezone.utc = datetime.timezone.utc # Install rich for pretty tracebacks in console logs # https://rich.readthedocs.io/en/stable/traceback.html#traceback-handler -from rich.traceback import install +from rich.traceback import install # noqa TERM_WIDTH = (shutil.get_terminal_size((200, 10)).columns - 1) if sys.stdout.isatty() else 200 # os.environ.setdefault('COLUMNS', str(TERM_WIDTH)) install(show_locals=True, word_wrap=False, locals_max_length=10, locals_hide_dunder=True, suppress=[django, pydantic], extra_lines=2, width=TERM_WIDTH) +# Hide site-packages/sonic/client.py:115: SyntaxWarning +# https://github.com/xmonader/python-sonic-client/pull/18 +import warnings # noqa +warnings.filterwarnings("ignore", category=SyntaxWarning, module='sonic') + +# Make daphne log requests quieter and esier to read from daphne import access # noqa class ModifiedAccessLogGenerator(access.AccessLogGenerator): @@ -53,7 +59,7 @@ class ModifiedAccessLogGenerator(access.AccessLogGenerator): # clean up the log format to mostly match the same format as django.conf.settings.LOGGING rich formats self.stream.write( - "[%s] HTTP %s (%s) %s\n" + "%s HTTP %s %s %s\n" % ( date.strftime("%Y-%m-%d %H:%M:%S"), request, diff --git a/archivebox/workers/actor.py b/archivebox/workers/actor.py index e4d57ae6..920203a3 100644 --- a/archivebox/workers/actor.py +++ b/archivebox/workers/actor.py @@ -490,13 +490,13 @@ class ActorType(Generic[ModelType]): # abx.pm.hook.on_actor_shutdown(actor=self, last_obj=last_obj, last_error=last_error) def on_tick_start(self, obj_to_process: ModelType) -> None: - print(f'🏃‍♂️ {self}.on_tick_start() {obj_to_process.ABID} {obj_to_process.status} {obj_to_process.retry_at}') + # print(f'🏃‍♂️ {self}.on_tick_start() {obj_to_process.ABID} {obj_to_process.status} {obj_to_process.retry_at}') # abx.pm.hook.on_actor_tick_start(actor=self, obj_to_process=obj) # self.timer = TimedProgress(self.MAX_TICK_TIME, prefix=' ') pass def on_tick_end(self, obj_to_process: ModelType) -> None: - print(f'🏃‍♂️ {self}.on_tick_end() {obj_to_process.ABID} {obj_to_process.status} {obj_to_process.retry_at}') + # print(f'🏃‍♂️ {self}.on_tick_end() {obj_to_process.ABID} {obj_to_process.status} {obj_to_process.retry_at}') # abx.pm.hook.on_actor_tick_end(actor=self, obj_to_process=obj_to_process) # self.timer.end() pass