working archivebox update CLI cmd

This commit is contained in:
Nick Sweeting 2024-11-19 02:31:59 -08:00
parent a0edf218e8
commit c9a05c9d94
No known key found for this signature in database
7 changed files with 61 additions and 114 deletions

View file

@ -21,6 +21,8 @@ from django.urls import reverse_lazy
from django_stubs_ext.db.models import TypedModelMeta
from archivebox.index.json import to_json
from .abid import (
ABID,
ABID_LEN,
@ -438,7 +440,7 @@ class ModelWithOutputDir(ABIDModel):
def write_indexes(self):
"""Write the Snapshot json, html, and merkle indexes to its output dir"""
print(f'{self}.write_indexes()')
print(f'{type(self).__name__}[{self.ABID}].write_indexes()')
self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
self.migrate_output_dir()
self.save_merkle_index()
@ -447,14 +449,14 @@ class ModelWithOutputDir(ABIDModel):
def migrate_output_dir(self):
"""Move the output files to the new folder structure if needed"""
print(f'{self}.migrate_output_dir()')
print(f'{type(self).__name__}[{self.ABID}].migrate_output_dir()')
self.migrate_from_0_7_2()
self.migrate_from_0_8_6()
# ... future migrations here
def migrate_from_0_7_2(self) -> None:
"""Migrate output_dir generated by ArchiveBox <= 0.7.2 to current version"""
print(f'{self}.migrate_from_0_7_2()')
print(f'{type(self).__name__}[{self.ABID}].migrate_from_0_7_2()')
# move /data/archive/<timestamp> -> /data/archive/snapshots/<abid>
# update self.output_path = /data/archive/snapshots/<abid>
pass
@ -462,27 +464,28 @@ class ModelWithOutputDir(ABIDModel):
def migrate_from_0_8_6(self) -> None:
"""Migrate output_dir generated by ArchiveBox <= 0.8.6 to current version"""
# ... future migration code here ...
print(f'{self}.migrate_from_0_8_6()')
print(f'{type(self).__name__}[{self.ABID}].migrate_from_0_8_6()')
pass
def save_merkle_index(self, **kwargs) -> None:
"""Write the ./.index.merkle file to the output dir"""
# write self.generate_merkle_tree() to self.output_dir / '.index.merkle'
print(f'{self}.save_merkle_index()')
print(f'{type(self).__name__}[{self.ABID}].save_merkle_index()')
pass
def save_html_index(self, **kwargs) -> None:
# write self.as_html() to self.output_dir / 'index.html'
print(f'{self}.save_html_index()')
print(f'{type(self).__name__}[{self.ABID}].save_html_index()')
pass
def save_json_index(self, **kwargs) -> None:
print(f'{self}.save_json_index()')
print(f'{type(self).__name__}[{self.ABID}].save_json_index()')
# write self.as_json() to self.output_dir / 'index.json'
(self.OUTPUT_DIR / 'index.json').write_text(to_json(self.as_json()))
pass
def save_symlinks_index(self) -> None:
print(f'{self}.save_symlinks_index()')
print(f'{type(self).__name__}[{self.ABID}].save_symlinks_index()')
# ln -s ../../../../self.output_dir data/index/snapshots_by_date/2024-01-01/example.com/<abid>
# ln -s ../../../../self.output_dir data/index/snapshots_by_domain/example.com/2024-01-01/<abid>
# ln -s self.output_dir data/archive/1453452234234.21445

View file

@ -1,7 +1,6 @@
#!/usr/bin/env python3
__package__ = 'archivebox.cli'
__command__ = 'archivebox install'
import os
import sys
@ -128,7 +127,9 @@ def install(binproviders: Optional[List[str]]=None, binaries: Optional[List[str]
# if we are only installing a single binary, raise the exception so the user can see what went wrong
raise
from archivebox.config.django import setup_django
setup_django()
from django.contrib.auth import get_user_model
User = get_user_model()

View file

@ -1,59 +1,45 @@
#!/usr/bin/env python3
__package__ = 'archivebox.cli'
__command__ = 'archivebox server'
import sys
import argparse
from pathlib import Path
from typing import Optional, List, IO
from typing import Iterable
from archivebox.misc.util import docstring
from archivebox.config import DATA_DIR
import rich_click as click
from rich import print
from archivebox.misc.util import docstring, enforce_types
from archivebox.config.common import SERVER_CONFIG
from archivebox.misc.logging_util import SmartFormatter, reject_stdin
# @enforce_types
def server(runserver_args: Optional[List[str]]=None,
reload: bool=False,
debug: bool=False,
init: bool=False,
quick_init: bool=False,
createsuperuser: bool=False,
daemonize: bool=False,
out_dir: Path=DATA_DIR) -> None:
@enforce_types
def server(runserver_args: Iterable[str]=(SERVER_CONFIG.BIND_ADDR,),
reload: bool=False,
init: bool=False,
debug: bool=False,
daemonize: bool=False,
nothreading: bool=False) -> None:
"""Run the ArchiveBox HTTP server"""
from rich import print
runserver_args = runserver_args or []
runserver_args = list(runserver_args)
if init:
run_subcommand('init', stdin=None, pwd=out_dir)
print()
elif quick_init:
run_subcommand('init', subcommand_args=['--quick'], stdin=None, pwd=out_dir)
from archivebox.cli.archivebox_init import init as archivebox_init
archivebox_init(quick=True)
print()
if createsuperuser:
run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
print()
from archivebox.misc.checks import check_data_folder
check_data_folder()
from django.core.management import call_command
from django.contrib.auth.models import User
from archivebox.config.common import SHELL_CONFIG
if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
print()
# print('[yellow][!] No admin accounts exist, you must create one to be able to log in to the Admin UI![/yellow]')
print('[violet]Hint:[/violet] To create an [bold]admin username & password[/bold] for the [deep_sky_blue3][underline][link=http://{host}:{port}/admin]Admin UI[/link][/underline][/deep_sky_blue3], run:')
print(' [green]archivebox manage createsuperuser[/green]')
print()
host = '127.0.0.1'
port = '8000'
@ -78,80 +64,28 @@ def server(runserver_args: Optional[List[str]]=None,
if SHELL_CONFIG.DEBUG:
if not reload:
runserver_args.append('--noreload') # '--insecure'
if nothreading:
runserver_args.append('--nothreading')
call_command("runserver", *runserver_args)
else:
from workers.supervisord_util import start_server_workers
print()
start_server_workers(host=host, port=port, daemonize=False)
start_server_workers(host=host, port=port, daemonize=daemonize)
print("\n[i][green][🟩] ArchiveBox server shut down gracefully.[/green][/i]")
@click.command()
@click.argument('runserver_args', nargs=-1)
@click.option('--reload', is_flag=True, help='Enable auto-reloading when code or templates change')
@click.option('--debug', is_flag=True, help='Enable DEBUG=True mode with more verbose errors')
@click.option('--nothreading', is_flag=True, help='Force runserver to run in single-threaded mode')
@click.option('--init', is_flag=True, help='Run a full archivebox init/upgrade before starting the server')
@click.option('--daemonize', is_flag=True, help='Run the server in the background as a daemon')
@docstring(server.__doc__)
def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
parser = argparse.ArgumentParser(
prog=__command__,
description=server.__doc__,
add_help=True,
formatter_class=SmartFormatter,
)
parser.add_argument(
'runserver_args',
nargs='*',
type=str,
default=[SERVER_CONFIG.BIND_ADDR],
help='Arguments to pass to Django runserver'
)
parser.add_argument(
'--reload',
action='store_true',
help='Enable auto-reloading when code or templates change',
)
parser.add_argument(
'--debug',
action='store_true',
help='Enable DEBUG=True mode with more verbose errors',
)
parser.add_argument(
'--nothreading',
action='store_true',
help='Force runserver to run in single-threaded mode',
)
parser.add_argument(
'--init',
action='store_true',
help='Run a full archivebox init/upgrade before starting the server',
)
parser.add_argument(
'--quick-init', '-i',
action='store_true',
help='Run quick archivebox init/upgrade before starting the server',
)
parser.add_argument(
'--createsuperuser',
action='store_true',
help='Run archivebox manage createsuperuser before starting the server',
)
parser.add_argument(
'--daemonize',
action='store_true',
help='Run the server in the background as a daemon',
)
command = parser.parse_args(args or ())
reject_stdin(__command__, stdin)
server(
runserver_args=command.runserver_args + (['--nothreading'] if command.nothreading else []),
reload=command.reload,
debug=command.debug,
init=command.init,
quick_init=command.quick_init,
createsuperuser=command.createsuperuser,
daemonize=command.daemonize,
out_dir=Path(pwd) if pwd else DATA_DIR,
)
def main(**kwargs):
server(**kwargs)
if __name__ == '__main__':
main(args=sys.argv[1:], stdin=sys.stdin)
main()

View file

@ -23,7 +23,7 @@ def get_real_name(key: str) -> str:
for section in CONFIGS.values():
try:
return section.aliases[key]
except KeyError:
except (KeyError, AttributeError):
pass
return key
@ -159,6 +159,9 @@ def write_config_file(config: Dict[str, str]) -> benedict:
section = section_for_key(key)
assert section is not None
if not hasattr(section, 'toml_section_header'):
raise ValueError(f'{key} is read-only (defined in {type(section).__module__}.{type(section).__name__}). Refusing to set.')
section_name = section.toml_section_header
if section_name in config_file:

View file

@ -192,7 +192,7 @@ class Snapshot(ModelWithOutputDir, ModelWithStateMachine, ABIDModel):
objects = SnapshotManager()
def save(self, *args, **kwargs):
print(f'{self}.save()')
print(f'Snapshot[{self.ABID}].save()')
if self.pk:
existing_snapshot = self.__class__.objects.filter(pk=self.pk).first()
if existing_snapshot and existing_snapshot.status == self.StatusChoices.SEALED:
@ -634,7 +634,7 @@ class ArchiveResult(ModelWithOutputDir, ModelWithStateMachine, ABIDModel):
return repr(self)
def save(self, *args, write_indexes: bool=False, **kwargs):
print(f'{self}.save()')
print(f'ArchiveResult[{self.ABID}].save()')
# if (self.pk and self.__class__.objects.filter(pk=self.pk).values_list('status', flat=True)[0] in [self.StatusChoices.FAILED, self.StatusChoices.SUCCEEDED, self.StatusChoices.SKIPPED]):
# raise Exception(f'ArchiveResult {self.pk} is in a final state, it cannot be modified any further.')
if self.pk:

View file

@ -23,13 +23,19 @@ timezone.utc = datetime.timezone.utc
# Install rich for pretty tracebacks in console logs
# https://rich.readthedocs.io/en/stable/traceback.html#traceback-handler
from rich.traceback import install
from rich.traceback import install # noqa
TERM_WIDTH = (shutil.get_terminal_size((200, 10)).columns - 1) if sys.stdout.isatty() else 200
# os.environ.setdefault('COLUMNS', str(TERM_WIDTH))
install(show_locals=True, word_wrap=False, locals_max_length=10, locals_hide_dunder=True, suppress=[django, pydantic], extra_lines=2, width=TERM_WIDTH)
# Hide site-packages/sonic/client.py:115: SyntaxWarning
# https://github.com/xmonader/python-sonic-client/pull/18
import warnings # noqa
warnings.filterwarnings("ignore", category=SyntaxWarning, module='sonic')
# Make daphne log requests quieter and esier to read
from daphne import access # noqa
class ModifiedAccessLogGenerator(access.AccessLogGenerator):
@ -53,7 +59,7 @@ class ModifiedAccessLogGenerator(access.AccessLogGenerator):
# clean up the log format to mostly match the same format as django.conf.settings.LOGGING rich formats
self.stream.write(
"[%s] HTTP %s (%s) %s\n"
"%s HTTP %s %s %s\n"
% (
date.strftime("%Y-%m-%d %H:%M:%S"),
request,

View file

@ -490,13 +490,13 @@ class ActorType(Generic[ModelType]):
# abx.pm.hook.on_actor_shutdown(actor=self, last_obj=last_obj, last_error=last_error)
def on_tick_start(self, obj_to_process: ModelType) -> None:
print(f'🏃‍♂️ {self}.on_tick_start() {obj_to_process.ABID} {obj_to_process.status} {obj_to_process.retry_at}')
# print(f'🏃‍♂️ {self}.on_tick_start() {obj_to_process.ABID} {obj_to_process.status} {obj_to_process.retry_at}')
# abx.pm.hook.on_actor_tick_start(actor=self, obj_to_process=obj)
# self.timer = TimedProgress(self.MAX_TICK_TIME, prefix=' ')
pass
def on_tick_end(self, obj_to_process: ModelType) -> None:
print(f'🏃‍♂️ {self}.on_tick_end() {obj_to_process.ABID} {obj_to_process.status} {obj_to_process.retry_at}')
# print(f'🏃‍♂️ {self}.on_tick_end() {obj_to_process.ABID} {obj_to_process.status} {obj_to_process.retry_at}')
# abx.pm.hook.on_actor_tick_end(actor=self, obj_to_process=obj_to_process)
# self.timer.end()
pass