better help text output

This commit is contained in:
Nick Sweeting 2024-10-02 19:46:31 -07:00
parent 968adf64da
commit 035a14b6ea
No known key found for this signature in database
9 changed files with 206 additions and 131 deletions

View file

@ -1,11 +1,12 @@
#!/usr/bin/env python3
#
# █████╗ ██████╗ ██████╗██╗ ██╗██╗██╗ ██╗███████╗██████╗ ██████╗ ██╗ ██╗
# ██╔══██╗██╔══██╗██╔════╝██║ ██║██║██║ ██║██╔════╝██╔══██╗██╔═══██╗╚██╗██╔╝
# ███████║██████╔╝██║ ███████║██║██║ ██║█████╗ ██████╔╝██║ ██║ ╚███╔╝
# ██╔══██║██╔══██╗██║ ██╔══██║██║╚██╗ ██╔╝██╔══╝ ██╔══██╗██║ ██║ ██╔██╗
# ██║ ██║██║ ██║╚██████╗██║ ██║██║ ╚████╔╝ ███████╗██████╔╝╚██████╔╝██╔╝ ██╗
# ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═════╝╚═╝ ╚═╝╚═╝ ╚═══╝ ╚══════╝╚═════╝ ╚═════╝ ╚═╝ ╚═╝
ASCII_LOGO = """
"""
# Welcome to the ArchiveBox source code! Thanks for checking it out!
#
# "We are swimming upstream against a great torrent of disorganization.
@ -49,45 +50,47 @@ __version__ = VERSION
__author__ = 'Nick Sweeting'
__license__ = 'MIT'
# ██████████████████████████████████████████████████████████████████████████████████████████████████
# ██████████████████████████████████████████████████████████████████████████████████████████████████
# ██████████████████████████████████████████████████████████████████████████████████████████████████
# ██████████████████████████████████████████████████████████████████████████████████████████████████
# ██████████████████████████████████████████████████████████████████████████████████████████████████
# ██████████████████████████████████████████████████████████████████████████████████████████████████
# ██████████████████████████████████████████████████████████████████████████████████████████████████
# ██ ██
# ██ ██
# ██ ██
# ██ ██
# ██ ██
# ██ ██
# ██ ██
# ██ ██
# ██ ██
# ██ ██
# ██ ██
# ██ ██
# ██ ████████████████████████████████████ ██
# ██ ██ █████████████████████████ █ ██
# ██ ██ █████████████████████████ █ ██
# ██ ██ █████████████████████████ █ ██
# ██ ██ █████████████████████████ █ ██
# ██ ██ █████████████████████████ █ ██
# ██ ██ █████████████████████████ █ ██
# ██ ██ █████████████████████████ █ ██
# ██ ██ █████████████████████████ █ ██
# ██ ██ █████████████████████████ █ ██
# ██ ████████████████████████████████████ ██
# ██ ██
# ██ ██
# ██ ██
# ██ ██
# ██ ██████████████████████████████████████████ ██
# ██ ██████████████████████████████████████████ ██
# ██ ██
# ██ ██
# ██ ██
# ██ ██
# ██ ██
# ████████████████████████████████████████████████████████████████████████████████
ASCII_ICON = """
"""

View file

@ -228,7 +228,7 @@ def main(args: List[str] | Omitted=OMITTED, stdin: IO | Omitted=OMITTED, pwd: st
elif command.help or command.subcommand is None:
command.subcommand = 'help'
if command.subcommand not in ('help', 'version', 'status'):
if command.subcommand not in ('version',):
from ..logging_util import log_cli_command
log_cli_command(

View file

@ -1,10 +1,60 @@
from archivebox.logging_util import log_shell_welcome_msg
__package__ = 'archivebox.core'
from rich.console import Console
# helpful imports that make the shell easier to work with out-of-the-box:
import re # noqa
import os # noqa
import sys # noqa
import json # noqa
import psutil # noqa
import django # noqa
import pydantic # noqa
import requests # noqa
import subprocess # noqa
import archivebox # noqa
import abx # noqa
from benedict import benedict # noqa
from django.utils import timezone # noqa
from datetime import datetime, timedelta # noqa
from django.conf import settings # noqa
from archivebox import CONSTANTS # noqa
from ..main import * # noqa
from ..cli import CLI_SUBCOMMANDS
CONFIG = settings.FLAT_CONFIG
CLI_COMMAND_NAMES = ", ".join(CLI_SUBCOMMANDS.keys())
if __name__ == '__main__':
# load the rich extension for ipython for pretty printing
# https://rich.readthedocs.io/en/stable/introduction.html#ipython-extension
get_ipython().run_line_magic('load_ext', 'rich')
get_ipython().run_line_magic('load_ext', 'rich') # type: ignore # noqa
# prnt = print with cropping using ... ellipsis for helptext that doens't matter that much
console = Console()
prnt = lambda *args, **kwargs: console.print(*args, overflow='ellipsis', soft_wrap=True, **kwargs)
# print the welcome message
log_shell_welcome_msg()
prnt('[green]import re, os, sys, psutil, subprocess, reqiests, json, pydantic, benedict, django, abx[/]')
prnt('[yellow4]# ArchiveBox Imports[/]')
prnt('[yellow4]import archivebox[/]')
prnt('[yellow4]from archivebox.main import {}[/]'.format(CLI_COMMAND_NAMES))
prnt()
if console.width >= 80:
from archivebox.misc.logging import rainbow
prnt(rainbow(archivebox.ASCII_LOGO))
prnt('[i] :heavy_dollar_sign: Welcome to the ArchiveBox Shell!')
prnt(' [deep_sky_blue4]Docs:[/deep_sky_blue4] [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage[/link]')
prnt(' [link=https://docs.archivebox.io/en/latest/modules.html]https://docs.archivebox.io/en/latest/modules.html[/link]')
prnt()
prnt(' :grey_question: [violet]Hint[/] [i]Here are some examples to get started:[/]')
prnt(' add[blink][deep_sky_blue4]?[/deep_sky_blue4][/blink] [grey53]# add ? after anything to get help[/]')
prnt(' add("https://example.com/some/new/url") [grey53]# call CLI methods from the shell[/]')
prnt(' snap = Snapshot.objects.filter(url__contains="https://example.com").last() [grey53]# query for individual snapshots[/]')
prnt(' archivebox.plugins_extractor.wget.apps.WGET_EXTRACTOR.extract(snap.id) [grey53]# call an extractor directly[/]')
prnt(' snap.archiveresult_set.all() [grey53]# see extractor results[/]')
prnt(' bool(re.compile(CONFIG.URL_DENYLIST).search("https://example.com/abc.exe")) [grey53]# test out a config change[/]')

View file

@ -5,7 +5,6 @@ import os
import sys
import stat
import time
import argparse
from math import log
from multiprocessing import Process
@ -20,6 +19,8 @@ if TYPE_CHECKING:
from rich import print
from rich.panel import Panel
from rich_argparse import RichHelpFormatter
from django.core.management.base import DjangoHelpFormatter
from archivebox.config import CONSTANTS, DATA_DIR, VERSION, SHELL_CONFIG
from archivebox.misc.system import get_dir_size
@ -79,12 +80,12 @@ def get_fd_info(fd) -> Dict[str, Any]:
class SmartFormatter(argparse.HelpFormatter):
class SmartFormatter(DjangoHelpFormatter, RichHelpFormatter):
"""Patched formatter that prints newlines in argparse help strings"""
def _split_lines(self, text, width):
if '\n' in text:
return text.splitlines()
return argparse.HelpFormatter._split_lines(self, text, width)
return RichHelpFormatter._split_lines(self, text, width)
def reject_stdin(caller: str, stdin: Optional[IO]=sys.stdin) -> None:
@ -505,23 +506,6 @@ def log_removal_finished(all_links: int, to_remove: int):
print(f' Index now contains {all_links - to_remove} links.')
def log_shell_welcome_msg():
from .cli import CLI_SUBCOMMANDS
print('[green]# ArchiveBox Imports[/]')
print('[green]from core.models import Snapshot, ArchiveResult, Tag, User[/]')
print('[green]from cli import *\n {}[/]'.format("\n ".join(CLI_SUBCOMMANDS.keys())))
print()
print('[i] Welcome to the ArchiveBox Shell!')
print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage')
print()
print(' [violet]Hint:[/] Example use:')
print(' print(Snapshot.objects.filter(is_archived=True).count())')
print(' Snapshot.objects.get(url="https://example.com").as_json()')
print(' add("https://example.com/some/new/url")')
### Helpers
@enforce_types

View file

@ -90,73 +90,89 @@ from .logging_util import (
def help(out_dir: Path=DATA_DIR) -> None:
"""Print the ArchiveBox help message and usage"""
from rich import print
from rich.panel import Panel
all_subcommands = CLI_SUBCOMMANDS
COMMANDS_HELP_TEXT = '\n '.join(
f'{cmd.ljust(20)} {func.__doc__}'
f'[green]{cmd.ljust(20)}[/green] {func.__doc__}'
for cmd, func in all_subcommands.items()
if cmd in meta_cmds
) + '\n\n ' + '\n '.join(
f'{cmd.ljust(20)} {func.__doc__}'
f'[green]{cmd.ljust(20)}[/green] {func.__doc__}'
for cmd, func in all_subcommands.items()
if cmd in main_cmds
) + '\n\n ' + '\n '.join(
f'{cmd.ljust(20)} {func.__doc__}'
f'[green]{cmd.ljust(20)}[/green] {func.__doc__}'
for cmd, func in all_subcommands.items()
if cmd in archive_cmds
) + '\n\n ' + '\n '.join(
f'{cmd.ljust(20)} {func.__doc__}'
f'[green]{cmd.ljust(20)}[/green] {func.__doc__}'
for cmd, func in all_subcommands.items()
if cmd not in display_first
)
DOCKER_USAGE = '''
[dodger_blue3]Docker Usage:[/dodger_blue3]
[grey53]# using Docker Compose:[/grey53]
[blue]docker compose run[/blue] [dark_green]archivebox[/dark_green] [green]\\[command][/green] [green3][...args][/green3] [violet][--help][/violet] [grey53][--version][/grey53]
[grey53]# using Docker:[/grey53]
[blue]docker run[/blue] -v [light_slate_blue]$PWD:/data[/light_slate_blue] [grey53]-p 8000:8000[/grey53] -it [dark_green]archivebox/archivebox[/dark_green] [green]\\[command][/green] [green3][...args][/green3] [violet][--help][/violet] [grey53][--version][/grey53]
''' if SHELL_CONFIG.IN_DOCKER else ''
DOCKER_DOCS = '\n [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Docker[/link]' if SHELL_CONFIG.IN_DOCKER else ''
DOCKER_OUTSIDE_HINT = "\n [grey53]# outside of Docker:[/grey53]" if SHELL_CONFIG.IN_DOCKER else ''
DOCKER_CMD_PREFIX = "[blue]docker ... [/blue]" if SHELL_CONFIG.IN_DOCKER else ''
print(f'''{DOCKER_USAGE}
[deep_sky_blue4]Usage:[/deep_sky_blue4]{DOCKER_OUTSIDE_HINT}
[dark_green]archivebox[/dark_green] [green]\\[command][/green] [green3][...args][/green3] [violet][--help][/violet] [grey53][--version][/grey53]
[deep_sky_blue4]Commands:[/deep_sky_blue4]
{COMMANDS_HELP_TEXT}
[deep_sky_blue4]Documentation:[/deep_sky_blue4]
[link=https://github.com/ArchiveBox/ArchiveBox/wiki]https://github.com/ArchiveBox/ArchiveBox/wiki[/link]{DOCKER_DOCS}
[link=https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#cli-usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Usage[/link]
[link=https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration]https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration[/link]
''')
if CONSTANTS.DATABASE_FILE.exists():
print('''{green}ArchiveBox v{}: The self-hosted internet archive.{reset}
pretty_out_dir = str(out_dir).replace(str(Path('~').expanduser()), '~')
EXAMPLE_USAGE = f'''
[light_slate_blue]DATA DIR[/light_slate_blue]: [yellow]{pretty_out_dir}[/yellow]
{lightred}Active data directory:{reset}
{}
[violet]Hint:[/violet] [i]Common maintenance tasks:[/i]
[dark_green]archivebox[/dark_green] [green]init[/green] [grey53]# make sure database is up-to-date (safe to run multiple times)[/grey53]
[dark_green]archivebox[/dark_green] [green]install[/green] [grey53]# make sure plugins are up-to-date (wget, chrome, singlefile, etc.)[/grey53]
[dark_green]archivebox[/dark_green] [green]status[/green] [grey53]# get a health checkup report on your collection[/grey53]
[dark_green]archivebox[/dark_green] [green]update[/green] [grey53]# retry any previously failed or interrupted archiving tasks[/grey53]
{lightred}Usage:{reset}
archivebox [command] [--help] [--version] [...args]
{lightred}Commands:{reset}
{}
{lightred}Example Use:{reset}
mkdir -p ~/archivebox/data; cd ~/archivebox/data
archivebox init
archivebox install
archivebox version
archivebox status
archivebox add https://example.com/some/page
archivebox add --depth=1 ~/Downloads/bookmarks_export.html
archivebox list --sort=timestamp --csv=timestamp,url,is_archived
archivebox schedule --every=day https://example.com/some/feed.rss
archivebox update --resume=15109948213.123
{lightred}Documentation:{reset}
https://github.com/ArchiveBox/ArchiveBox/wiki
'''.format(VERSION, out_dir, COMMANDS_HELP_TEXT, **SHELL_CONFIG.ANSI))
[violet]Hint:[/violet] [i]More example usage:[/i]
[dark_green]archivebox[/dark_green] [green]add[/green] --depth=1 "https://example.com/some/page"
[dark_green]archivebox[/dark_green] [green]list[/green] --sort=timestamp --csv=timestamp,downloaded_at,url,title
[dark_green]archivebox[/dark_green] [green]schedule[/green] --every=day --depth=1 "https://example.com/some/feed.rss"
[dark_green]archivebox[/dark_green] [green]server[/green] [blue]0.0.0.0:8000[/blue] [grey53]# Start the Web UI / API server[/grey53]
'''
print(Panel(EXAMPLE_USAGE, expand=False, border_style='grey53', title='[green3]:white_check_mark: A collection [light_slate_blue]DATA DIR[/light_slate_blue] is currently active[/green3]', subtitle='Commands run inside this dir will only apply to this collection.'))
else:
print('{green}Welcome to ArchiveBox v{}!{reset}'.format(VERSION, **SHELL_CONFIG.ANSI))
print()
DATA_SETUP_HELP = '\n'
if SHELL_CONFIG.IN_DOCKER:
print('When using Docker, you need to mount a volume to use as your data dir:')
print(' docker run -v /some/path:/data archivebox ...')
print()
print('To import an existing archive (from a previous version of ArchiveBox):')
print(' 1. cd into your data dir DATA_DIR (usually ArchiveBox/output) and run:')
print(' 2. archivebox init')
print()
print('To start a new archive:')
print(' 1. Create an empty directory, then cd into it and run:')
print(' 2. archivebox init')
print()
print('For more information, see the documentation here:')
print(' https://github.com/ArchiveBox/ArchiveBox/wiki')
DATA_SETUP_HELP += '[violet]Hint:[/violet] When using Docker, you need to mount a volume to use as your data dir:\n'
DATA_SETUP_HELP += ' docker run [violet]-v /some/path/data:/data[/violet] archivebox/archivebox ...\n\n'
DATA_SETUP_HELP += 'To load an [dark_blue]existing[/dark_blue] collection:\n'
DATA_SETUP_HELP += ' 1. [green]cd[/green] ~/archivebox/data [grey53]# go into existing [light_slate_blue]DATA DIR[/light_slate_blue] (can be anywhere)[/grey53]\n'
DATA_SETUP_HELP += f' 2. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]init[/green] [grey53]# migrate to latest version (safe to run multiple times)[/grey53]\n'
DATA_SETUP_HELP += f' 3. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]install[/green] [grey53]# auto-update all plugins (wget, chrome, singlefile, etc.)[/grey53]\n'
DATA_SETUP_HELP += f' 4. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]help[/green] [grey53]# ...get help with next steps... [/grey53]\n\n'
DATA_SETUP_HELP += 'To start a [sea_green1]new[/sea_green1] collection:\n'
DATA_SETUP_HELP += ' 1. [green]mkdir[/green] ~/archivebox/data [grey53]# create a new, empty [light_slate_blue]DATA DIR[/light_slate_blue] (can be anywhere)[/grey53]\n'
DATA_SETUP_HELP += ' 2. [green]cd[/green] ~/archivebox/data [grey53]# cd into the new directory[/grey53]\n'
DATA_SETUP_HELP += f' 3. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]init[/green] [grey53]# initialize ArchiveBox in the new data dir[/grey53]\n'
DATA_SETUP_HELP += f' 4. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]install[/green] [grey53]# auto-install all plugins (wget, chrome, singlefile, etc.)[/grey53]\n'
DATA_SETUP_HELP += f' 5. {DOCKER_CMD_PREFIX}[dark_green]archivebox[/dark_green] [green]help[/green] [grey53]# ... get help with next steps... [/grey53]\n'
print(Panel(DATA_SETUP_HELP, expand=False, border_style='grey53', title='[red]:cross_mark: No collection is currently active[/red]', subtitle='All archivebox [green]commands[/green] should be run from inside a collection [light_slate_blue]DATA DIR[/light_slate_blue]'))
@enforce_types
@ -423,7 +439,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
if Snapshot.objects.count() < 25: # hide the hints for experienced users
print()
print(' {lightred}Hint:{reset} To view your archive index, run:'.format(**SHELL_CONFIG.ANSI))
print(' archivebox server # then visit http://127.0.0.1:8000')
print(f' archivebox server # then visit [deep_sky_blue4][link=http://127.0.0.1:8000]http://127.0.0.1:8000[/link]')
print()
print(' To add new links, you can run:')
print(" archivebox add < ~/some/path/to/list_of_links.txt")
@ -1202,9 +1218,12 @@ def server(runserver_args: Optional[List[str]]=None,
init: bool=False,
quick_init: bool=False,
createsuperuser: bool=False,
daemonize: bool=False,
out_dir: Path=DATA_DIR) -> None:
"""Run the ArchiveBox HTTP server"""
from rich import print
runserver_args = runserver_args or []
if init:
@ -1226,12 +1245,12 @@ def server(runserver_args: Optional[List[str]]=None,
print('{green}[+] Starting ArchiveBox webserver... {reset}'.format(**SHELL_CONFIG.ANSI))
print('[green][+] Starting ArchiveBox webserver...[/green]')
print(' > Logging errors to ./logs/errors.log')
if not User.objects.filter(is_superuser=True).exists():
print('{lightyellow}[!] No admin users exist yet, you will not be able to edit links in the UI.{reset}'.format(**SHELL_CONFIG.ANSI))
print('[yellow][!] No admin users exist yet, you will not be able to edit links in the UI.[/yellow]')
print()
print(' To create an admin user, run:')
print(' [violet]Hint:[/violet] To create an admin user, run:')
print(' archivebox manage createsuperuser')
print()
@ -1256,15 +1275,15 @@ def server(runserver_args: Optional[List[str]]=None,
except IndexError:
pass
print(f' > Starting ArchiveBox webserver on http://{host}:{port}/')
print(f' [blink][green]>[/green][/blink] Starting ArchiveBox webserver on [deep_sky_blue4][link=http://{host}:{port}]http://{host}:{port}[/link]')
from queues.supervisor_util import start_server_workers
print()
start_server_workers(host=host, port=port)
start_server_workers(host=host, port=port, daemonize=False)
print("\n[🟩] ArchiveBox server shut down gracefully.")
print("\n[i][green][🟩] ArchiveBox server shut down gracefully.[/green][/i]")
@enforce_types

View file

@ -5,14 +5,25 @@ __package__ = 'archivebox.misc'
import sys
from typing import Optional, Union, Tuple, List
from collections import defaultdict
from random import randint
from benedict import benedict
from rich.console import Console
from rich.highlighter import Highlighter
# SETUP RICH CONSOLE / TTY detection / COLOR / PROGRESS BARS
CONSOLE = Console()
IS_TTY = CONSOLE.is_interactive
class RainbowHighlighter(Highlighter):
def highlight(self, text):
for index in range(len(text)):
text.stylize(f"color({randint(90, 98)})", index, index + 1)
rainbow = RainbowHighlighter()
DEFAULT_CLI_COLORS = benedict(
{
"reset": "\033[00;00m",

View file

@ -30,7 +30,7 @@ TERM_WIDTH = (shutil.get_terminal_size((200, 10)).columns - 1) if sys.stdout.isa
install(show_locals=True, word_wrap=False, locals_max_length=10, locals_hide_dunder=True, suppress=[django, pydantic], extra_lines=2, width=TERM_WIDTH)
from daphne import access
from daphne import access # noqa
class ModifiedAccessLogGenerator(access.AccessLogGenerator):
"""Clutge workaround until daphne uses the Python logging framework. https://github.com/django/daphne/pull/473/files"""
@ -62,4 +62,11 @@ class ModifiedAccessLogGenerator(access.AccessLogGenerator):
)
)
access.AccessLogGenerator.write_entry = ModifiedAccessLogGenerator.write_entry
access.AccessLogGenerator.write_entry = ModifiedAccessLogGenerator.write_entry # type: ignore
# fix benedict objects to pretty-print/repr more nicely with rich
# https://stackoverflow.com/a/79048811/2156113
# https://rich.readthedocs.io/en/stable/pretty.html#rich-repr-protocol
import benedict # noqa
benedict.benedict.__rich_repr__ = lambda self: (dict(self),) # type: ignore

View file

@ -5,7 +5,7 @@ import signal
import psutil
import subprocess
from pathlib import Path
from rich.pretty import pprint
from rich import print
from typing import Dict, cast

View file

@ -74,6 +74,7 @@ dependencies = [
"python-crontab>=3.2.0", # for: archivebox schedule
"croniter>=3.0.3", # for: archivebox schedule
"ipython>=8.27.0", # for: archivebox shell
"py-machineid>=0.6.0", # for: machine/detect.py calculating machine guid
"python-benedict[io,parse]>=0.33.2",
"pydantic-settings>=2.5.2",
############# VENDORED LIBS ######################
@ -86,7 +87,7 @@ dependencies = [
"base32-crockford==0.3.0",
############# Extractor Dependencies #############
"yt-dlp>=2024.8.6", # for: media
"py-machineid>=0.6.0",
"rich-argparse>=1.5.2",
]
# pdm lock --group=':all'