mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-24 21:23:22 +00:00
fix archivebox init and archivebox install CLI commands
This commit is contained in:
parent
5f01fc8307
commit
a0edf218e8
2 changed files with 68 additions and 118 deletions
|
@ -1,30 +1,36 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
__package__ = 'archivebox.cli'
|
||||
__command__ = 'archivebox init'
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, IO
|
||||
|
||||
from rich import print
|
||||
import rich_click as click
|
||||
|
||||
from archivebox.misc.util import docstring, enforce_types
|
||||
|
||||
|
||||
from archivebox.misc.util import docstring
|
||||
from archivebox.config import DATA_DIR
|
||||
from archivebox.misc.logging_util import SmartFormatter, reject_stdin
|
||||
|
||||
|
||||
def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Path=DATA_DIR) -> None:
|
||||
@enforce_types
|
||||
def init(force: bool=False, quick: bool=False, install: bool=False, setup: bool=False) -> None:
|
||||
"""Initialize a new ArchiveBox collection in the current directory"""
|
||||
|
||||
from core.models import Snapshot
|
||||
from rich import print
|
||||
install = install or setup
|
||||
|
||||
from archivebox.config import CONSTANTS, VERSION, DATA_DIR
|
||||
from archivebox.config.common import SERVER_CONFIG
|
||||
from archivebox.config.collection import write_config_file
|
||||
from archivebox.index import load_main_index, write_main_index, fix_invalid_folder_locations, get_invalid_folders
|
||||
from archivebox.index.schema import Link
|
||||
from archivebox.index.json import parse_json_main_index, parse_json_links_details
|
||||
from archivebox.index.sql import apply_migrations
|
||||
|
||||
# if os.access(out_dir / CONSTANTS.JSON_INDEX_FILENAME, os.F_OK):
|
||||
# print("[red]:warning: This folder contains a JSON index. It is deprecated, and will no longer be kept up to date automatically.[/red]", file=sys.stderr)
|
||||
# print("[red] You can run `archivebox list --json --with-headers > static_index.json` to manually generate it.[/red]", file=sys.stderr)
|
||||
|
||||
is_empty = not len(set(os.listdir(out_dir)) - CONSTANTS.ALLOWED_IN_DATA_DIR)
|
||||
is_empty = not len(set(os.listdir(DATA_DIR)) - CONSTANTS.ALLOWED_IN_DATA_DIR)
|
||||
existing_index = os.path.isfile(CONSTANTS.DATABASE_FILE)
|
||||
if is_empty and not existing_index:
|
||||
print(f'[turquoise4][+] Initializing a new ArchiveBox v{VERSION} collection...[/turquoise4]')
|
||||
|
@ -62,7 +68,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
|
|||
|
||||
# create the .archivebox_id file with a unique ID for this collection
|
||||
from archivebox.config.paths import _get_collection_id
|
||||
_get_collection_id(CONSTANTS.DATA_DIR, force_create=True)
|
||||
_get_collection_id(DATA_DIR, force_create=True)
|
||||
|
||||
# create the ArchiveBox.conf file
|
||||
write_config_file({'SECRET_KEY': SERVER_CONFIG.SECRET_KEY})
|
||||
|
@ -73,7 +79,10 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
|
|||
else:
|
||||
print('\n[green][+] Building main SQL index and running initial migrations...[/green]')
|
||||
|
||||
for migration_line in apply_migrations(out_dir):
|
||||
from archivebox.config.django import setup_django
|
||||
setup_django()
|
||||
|
||||
for migration_line in apply_migrations(DATA_DIR):
|
||||
sys.stdout.write(f' {migration_line}\n')
|
||||
|
||||
assert os.path.isfile(CONSTANTS.DATABASE_FILE) and os.access(CONSTANTS.DATABASE_FILE, os.R_OK)
|
||||
|
@ -88,11 +97,13 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
|
|||
print()
|
||||
print('[dodger_blue3][*] Checking links from indexes and archive folders (safe to Ctrl+C)...[/dodger_blue3]')
|
||||
|
||||
from core.models import Snapshot
|
||||
|
||||
all_links = Snapshot.objects.none()
|
||||
pending_links: Dict[str, Link] = {}
|
||||
pending_links: dict[str, Link] = {}
|
||||
|
||||
if existing_index:
|
||||
all_links = load_main_index(out_dir=out_dir, warn=False)
|
||||
all_links = load_main_index(DATA_DIR, warn=False)
|
||||
print(f' √ Loaded {all_links.count()} links from existing main index.')
|
||||
|
||||
if quick:
|
||||
|
@ -100,7 +111,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
|
|||
else:
|
||||
try:
|
||||
# Links in data folders that dont match their timestamp
|
||||
fixed, cant_fix = fix_invalid_folder_locations(out_dir=out_dir)
|
||||
fixed, cant_fix = fix_invalid_folder_locations(DATA_DIR)
|
||||
if fixed:
|
||||
print(f' [yellow]√ Fixed {len(fixed)} data directory locations that didn\'t match their link timestamps.[/yellow]')
|
||||
if cant_fix:
|
||||
|
@ -109,7 +120,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
|
|||
# Links in JSON index but not in main index
|
||||
orphaned_json_links = {
|
||||
link.url: link
|
||||
for link in parse_json_main_index(out_dir)
|
||||
for link in parse_json_main_index(DATA_DIR)
|
||||
if not all_links.filter(url=link.url).exists()
|
||||
}
|
||||
if orphaned_json_links:
|
||||
|
@ -119,7 +130,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
|
|||
# Links in data dir indexes but not in main index
|
||||
orphaned_data_dir_links = {
|
||||
link.url: link
|
||||
for link in parse_json_links_details(out_dir)
|
||||
for link in parse_json_links_details(DATA_DIR)
|
||||
if not all_links.filter(url=link.url).exists()
|
||||
}
|
||||
if orphaned_data_dir_links:
|
||||
|
@ -129,7 +140,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
|
|||
# Links in invalid/duplicate data dirs
|
||||
invalid_folders = {
|
||||
folder: link
|
||||
for folder, link in get_invalid_folders(all_links, out_dir=out_dir).items()
|
||||
for folder, link in get_invalid_folders(all_links, DATA_DIR).items()
|
||||
}
|
||||
if invalid_folders:
|
||||
print(f' [red]! Skipped adding {len(invalid_folders)} invalid link data directories.[/red]')
|
||||
|
@ -148,7 +159,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
|
|||
print(' archivebox init --quick', file=sys.stderr)
|
||||
raise SystemExit(1)
|
||||
|
||||
write_main_index(list(pending_links.values()), out_dir=out_dir)
|
||||
write_main_index(list(pending_links.values()), DATA_DIR)
|
||||
|
||||
print('\n[green]----------------------------------------------------------------------[/green]')
|
||||
|
||||
|
@ -163,13 +174,6 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
|
|||
else:
|
||||
print(f'[green][√] Done. A new ArchiveBox collection was initialized ({len(all_links) + len(pending_links)} links).[/green]')
|
||||
|
||||
json_index = out_dir / CONSTANTS.JSON_INDEX_FILENAME
|
||||
html_index = out_dir / CONSTANTS.HTML_INDEX_FILENAME
|
||||
index_name = f"{date.today()}_index_old"
|
||||
if os.access(json_index, os.F_OK):
|
||||
json_index.rename(f"{index_name}.json")
|
||||
if os.access(html_index, os.F_OK):
|
||||
html_index.rename(f"{index_name}.html")
|
||||
|
||||
CONSTANTS.PERSONAS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
CONSTANTS.DEFAULT_TMP_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
@ -180,7 +184,8 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
|
|||
STORAGE_CONFIG.LIB_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if install:
|
||||
run_subcommand('install', pwd=out_dir)
|
||||
from archivebox.cli.archivebox_install import install as install_method
|
||||
install_method()
|
||||
|
||||
if Snapshot.objects.count() < 25: # hide the hints for experienced users
|
||||
print()
|
||||
|
@ -194,44 +199,16 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
|
|||
print(' archivebox help')
|
||||
|
||||
|
||||
@docstring(init.__doc__)
|
||||
def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
prog=__command__,
|
||||
description=init.__doc__,
|
||||
add_help=True,
|
||||
formatter_class=SmartFormatter,
|
||||
)
|
||||
parser.add_argument(
|
||||
'--force', # '-f',
|
||||
action='store_true',
|
||||
help='Ignore unrecognized files in current directory and initialize anyway',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--quick', '-q',
|
||||
action='store_true',
|
||||
help='Run any updates or migrations without rechecking all snapshot dirs',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--install', #'-s',
|
||||
action='store_true',
|
||||
help='Automatically install dependencies and extras used for archiving',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--setup', #'-s',
|
||||
action='store_true',
|
||||
help='DEPRECATED: equivalent to --install',
|
||||
)
|
||||
command = parser.parse_args(args or ())
|
||||
reject_stdin(__command__, stdin)
|
||||
|
||||
init(
|
||||
force=command.force,
|
||||
quick=command.quick,
|
||||
install=command.install or command.setup,
|
||||
out_dir=pwd or DATA_DIR,
|
||||
)
|
||||
|
||||
@click.command()
|
||||
@click.option('--force', '-f', is_flag=True, help='Ignore unrecognized files in current directory and initialize anyway')
|
||||
@click.option('--quick', '-q', is_flag=True, help='Run any updates or migrations without rechecking all snapshot dirs')
|
||||
@click.option('--install', '-s', is_flag=True, help='Automatically install dependencies and extras used for archiving')
|
||||
@click.option('--setup', '-s', is_flag=True, help='DEPRECATED: equivalent to --install')
|
||||
@docstring(init.__doc__)
|
||||
def main(**kwargs) -> None:
|
||||
init(**kwargs)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(args=sys.argv[1:], stdin=sys.stdin)
|
||||
main()
|
||||
|
|
|
@ -5,16 +5,16 @@ __command__ = 'archivebox install'
|
|||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, IO
|
||||
from typing import Optional, List
|
||||
|
||||
from archivebox.misc.util import docstring
|
||||
from archivebox.config import DATA_DIR
|
||||
from archivebox.misc.logging_util import SmartFormatter, reject_stdin
|
||||
import rich_click as click
|
||||
from rich import print
|
||||
|
||||
from archivebox.misc.util import docstring, enforce_types
|
||||
|
||||
|
||||
def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, binaries: Optional[List[str]]=None, dry_run: bool=False) -> None:
|
||||
@enforce_types
|
||||
def install(binproviders: Optional[List[str]]=None, binaries: Optional[List[str]]=None, dry_run: bool=False) -> None:
|
||||
"""Automatically install all ArchiveBox dependencies and extras"""
|
||||
|
||||
# if running as root:
|
||||
|
@ -27,13 +27,17 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
|
|||
# - install all binaries as current user
|
||||
# - recommend user re-run with sudo if any deps need to be installed as root
|
||||
|
||||
from rich import print
|
||||
|
||||
from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
|
||||
from archivebox.config.paths import get_or_create_working_lib_dir
|
||||
import abx
|
||||
import archivebox
|
||||
from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, SudoPermission
|
||||
from archivebox.config.paths import DATA_DIR, ARCHIVE_DIR, get_or_create_working_lib_dir
|
||||
from archivebox.misc.logging import stderr
|
||||
from archivebox.cli.archivebox_init import init
|
||||
from archivebox.misc.system import run as run_shell
|
||||
|
||||
|
||||
if not (os.access(ARCHIVE_DIR, os.R_OK) and ARCHIVE_DIR.is_dir()):
|
||||
run_subcommand('init', stdin=None, pwd=out_dir) # must init full index because we need a db to store InstalledBinary entries in
|
||||
init() # must init full index because we need a db to store InstalledBinary entries in
|
||||
|
||||
print('\n[green][+] Installing ArchiveBox dependencies automatically...[/green]')
|
||||
|
||||
|
@ -143,49 +147,18 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
|
|||
if binaries:
|
||||
extra_args.append(f'--binaries={",".join(binaries)}')
|
||||
|
||||
proc = run_shell([ARCHIVEBOX_BINARY.load().abspath, 'version', *extra_args], capture_output=False, cwd=out_dir)
|
||||
proc = run_shell([ARCHIVEBOX_BINARY.load().abspath, 'version', *extra_args], capture_output=False, cwd=DATA_DIR)
|
||||
raise SystemExit(proc.returncode)
|
||||
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option('--binproviders', '-p', type=str, help='Select binproviders to use DEFAULT=env,apt,brew,sys_pip,venv_pip,lib_pip,pipx,sys_npm,lib_npm,puppeteer,playwright (all)', default=None)
|
||||
@click.option('--binaries', '-b', type=str, help='Select binaries to install DEFAULT=curl,wget,git,yt-dlp,chrome,single-file,readability-extractor,postlight-parser,... (all)', default=None)
|
||||
@click.option('--dry-run', '-d', is_flag=True, help='Show what would be installed without actually installing anything', default=False)
|
||||
@docstring(install.__doc__)
|
||||
def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog=__command__,
|
||||
description=install.__doc__,
|
||||
add_help=True,
|
||||
formatter_class=SmartFormatter,
|
||||
)
|
||||
parser.add_argument(
|
||||
'--binproviders', '-p',
|
||||
type=str,
|
||||
help='Select binproviders to use DEFAULT=env,apt,brew,sys_pip,venv_pip,lib_pip,pipx,sys_npm,lib_npm,puppeteer,playwright (all)',
|
||||
default=None,
|
||||
)
|
||||
parser.add_argument(
|
||||
'--binaries', '-b',
|
||||
type=str,
|
||||
help='Select binaries to install DEFAULT=curl,wget,git,yt-dlp,chrome,single-file,readability-extractor,postlight-parser,... (all)',
|
||||
default=None,
|
||||
)
|
||||
parser.add_argument(
|
||||
'--dry-run', '-d',
|
||||
action='store_true',
|
||||
help='Show what would be installed without actually installing anything',
|
||||
default=False,
|
||||
)
|
||||
command = parser.parse_args(args or ()) # noqa
|
||||
reject_stdin(__command__, stdin)
|
||||
|
||||
install(
|
||||
# force=command.force,
|
||||
out_dir=Path(pwd) if pwd else DATA_DIR,
|
||||
binaries=command.binaries.split(',') if command.binaries else None,
|
||||
binproviders=command.binproviders.split(',') if command.binproviders else None,
|
||||
dry_run=command.dry_run,
|
||||
)
|
||||
def main(**kwargs) -> None:
|
||||
install(**kwargs)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(args=sys.argv[1:], stdin=sys.stdin)
|
||||
main()
|
||||
|
|
Loading…
Reference in a new issue