improve install flow with sudo, check package managers, and fix docker build

This commit is contained in:
Nick Sweeting 2024-10-09 00:41:16 -07:00
parent e4f543f54a
commit 613caec8eb
No known key found for this signature in database
10 changed files with 161 additions and 95 deletions

View file

@ -2,7 +2,7 @@ __package__ = 'archivebox.config'
import os import os
import re import re
import platform import sys
from typing import Dict from typing import Dict
from pathlib import Path from pathlib import Path
@ -56,6 +56,7 @@ class ConstantsDict(Mapping):
RUNNING_AS_GID: int = RUNNING_AS_GID RUNNING_AS_GID: int = RUNNING_AS_GID
DEFAULT_PUID: int = DEFAULT_PUID DEFAULT_PUID: int = DEFAULT_PUID
DEFAULT_PGID: int = DEFAULT_PGID DEFAULT_PGID: int = DEFAULT_PGID
IS_INSIDE_VENV: bool = sys.prefix != sys.base_prefix
# Source code dirs # Source code dirs
PACKAGE_DIR_NAME: str = PACKAGE_DIR.name PACKAGE_DIR_NAME: str = PACKAGE_DIR.name
@ -209,15 +210,20 @@ class ConstantsDict(Mapping):
'enabled': True, 'enabled': True,
'is_valid': os.access(STATIC_DIR, os.R_OK) and os.access(STATIC_DIR, os.X_OK), # read + list 'is_valid': os.access(STATIC_DIR, os.R_OK) and os.access(STATIC_DIR, os.X_OK), # read + list
}, },
'CUSTOM_TEMPLATES_DIR': {
'path': CUSTOM_TEMPLATES_DIR.resolve(),
'enabled': os.path.isdir(CUSTOM_TEMPLATES_DIR),
'is_valid': os.path.isdir(CUSTOM_TEMPLATES_DIR) and os.access(CUSTOM_TEMPLATES_DIR, os.R_OK), # read
},
'USER_PLUGINS_DIR': {
'path': USER_PLUGINS_DIR.resolve(),
'enabled': os.path.isdir(USER_PLUGINS_DIR),
'is_valid': os.path.isdir(USER_PLUGINS_DIR) and os.access(USER_PLUGINS_DIR, os.R_OK), # read
},
'LIB_DIR': { 'LIB_DIR': {
'path': LIB_DIR.resolve(), 'path': LIB_DIR.resolve(),
'enabled': True, 'enabled': True,
'is_valid': os.access(LIB_DIR, os.R_OK) and os.access(LIB_DIR, os.X_OK) and os.access(LIB_DIR, os.W_OK), # read + write 'is_valid': os.path.isdir(LIB_DIR) and os.access(LIB_DIR, os.R_OK) and os.access(LIB_DIR, os.W_OK), # read + write
},
'TMP_DIR': {
'path': TMP_DIR.resolve(),
'enabled': True,
'is_valid': os.access(TMP_DIR, os.R_OK) and os.access(TMP_DIR, os.X_OK) and os.access(TMP_DIR, os.W_OK), # read + write
}, },
}) })
@ -225,62 +231,57 @@ class ConstantsDict(Mapping):
"DATA_DIR": { "DATA_DIR": {
"path": DATA_DIR.resolve(), "path": DATA_DIR.resolve(),
"enabled": True, "enabled": True,
"is_valid": os.access(DATA_DIR, os.R_OK) and os.access(DATA_DIR, os.W_OK) and os.access(DATA_DIR, os.X_OK), "is_valid": os.path.isdir(DATA_DIR) and os.access(DATA_DIR, os.R_OK) and os.access(DATA_DIR, os.W_OK),
"is_mount": os.path.ismount(DATA_DIR.resolve()), "is_mount": os.path.ismount(DATA_DIR.resolve()),
}, },
"CONFIG_FILE": { "CONFIG_FILE": {
"path": CONFIG_FILE.resolve(), "path": CONFIG_FILE.resolve(),
"enabled": True, "enabled": True,
"is_valid": os.access(CONFIG_FILE, os.R_OK) and os.access(CONFIG_FILE, os.W_OK), "is_valid": os.path.isfile(CONFIG_FILE) and os.access(CONFIG_FILE, os.R_OK) and os.access(CONFIG_FILE, os.W_OK),
}, },
"SQL_INDEX": { "SQL_INDEX": {
"path": DATABASE_FILE.resolve(), "path": DATABASE_FILE.resolve(),
"enabled": True, "enabled": True,
"is_valid": os.access(DATABASE_FILE, os.R_OK) and os.access(DATABASE_FILE, os.W_OK), "is_valid": os.path.isfile(DATABASE_FILE) and os.access(DATABASE_FILE, os.R_OK) and os.access(DATABASE_FILE, os.W_OK),
"is_mount": os.path.ismount(DATABASE_FILE.resolve()), "is_mount": os.path.ismount(DATABASE_FILE.resolve()),
}, },
"QUEUE_DATABASE": { "QUEUE_DATABASE": {
"path": QUEUE_DATABASE_FILE.resolve(), "path": QUEUE_DATABASE_FILE.resolve(),
"enabled": True, "enabled": True,
"is_valid": os.access(QUEUE_DATABASE_FILE, os.R_OK) and os.access(QUEUE_DATABASE_FILE, os.W_OK), "is_valid": os.path.isfile(QUEUE_DATABASE_FILE) and os.access(QUEUE_DATABASE_FILE, os.R_OK) and os.access(QUEUE_DATABASE_FILE, os.W_OK),
"is_mount": os.path.ismount(QUEUE_DATABASE_FILE.resolve()), "is_mount": os.path.ismount(QUEUE_DATABASE_FILE.resolve()),
}, },
"ARCHIVE_DIR": { "ARCHIVE_DIR": {
"path": ARCHIVE_DIR.resolve(), "path": ARCHIVE_DIR.resolve(),
"enabled": True, "enabled": True,
"is_valid": os.access(ARCHIVE_DIR, os.R_OK) and os.access(ARCHIVE_DIR, os.W_OK) and os.access(ARCHIVE_DIR, os.X_OK), "is_valid": os.path.isdir(ARCHIVE_DIR) and os.access(ARCHIVE_DIR, os.R_OK) and os.access(ARCHIVE_DIR, os.W_OK),
"is_mount": os.path.ismount(ARCHIVE_DIR.resolve()), "is_mount": os.path.ismount(ARCHIVE_DIR.resolve()),
}, },
"SOURCES_DIR": { "SOURCES_DIR": {
"path": SOURCES_DIR.resolve(), "path": SOURCES_DIR.resolve(),
"enabled": True, "enabled": True,
"is_valid": os.access(SOURCES_DIR, os.R_OK) and os.access(SOURCES_DIR, os.W_OK) and os.access(SOURCES_DIR, os.X_OK), "is_valid": os.path.isdir(SOURCES_DIR) and os.access(SOURCES_DIR, os.R_OK) and os.access(SOURCES_DIR, os.W_OK),
},
"PERSONAS_DIR": {
"path": PERSONAS_DIR.resolve(),
"enabled": os.path.isdir(PERSONAS_DIR),
"is_valid": os.path.isdir(PERSONAS_DIR) and os.access(PERSONAS_DIR, os.R_OK) and os.access(PERSONAS_DIR, os.W_OK), # read + write
}, },
"LOGS_DIR": { "LOGS_DIR": {
"path": LOGS_DIR.resolve(), "path": LOGS_DIR.resolve(),
"enabled": True, "enabled": True,
"is_valid": os.access(LOGS_DIR, os.R_OK) and os.access(LOGS_DIR, os.W_OK) and os.access(LOGS_DIR, os.X_OK), # read + write "is_valid": os.path.isdir(LOGS_DIR) and os.access(LOGS_DIR, os.R_OK) and os.access(LOGS_DIR, os.W_OK), # read + write
},
'TMP_DIR': {
'path': TMP_DIR.resolve(),
'enabled': True,
'is_valid': os.path.isdir(TMP_DIR) and os.access(TMP_DIR, os.R_OK) and os.access(TMP_DIR, os.W_OK), # read + write
}, },
# "CACHE_DIR": { # "CACHE_DIR": {
# "path": CACHE_DIR.resolve(), # "path": CACHE_DIR.resolve(),
# "enabled": True, # "enabled": True,
# "is_valid": os.access(CACHE_DIR, os.R_OK) and os.access(CACHE_DIR, os.W_OK) and os.access(CACHE_DIR, os.X_OK), # read + write # "is_valid": os.access(CACHE_DIR, os.R_OK) and os.access(CACHE_DIR, os.W_OK), # read + write
# }, # },
"PERSONAS_DIR": {
"path": PERSONAS_DIR.resolve(),
"enabled": os.access(PERSONAS_DIR, os.R_OK),
"is_valid": os.access(PERSONAS_DIR, os.R_OK) and os.access(PERSONAS_DIR, os.W_OK) and os.access(PERSONAS_DIR, os.X_OK), # read + write
},
'CUSTOM_TEMPLATES_DIR': {
'path': CUSTOM_TEMPLATES_DIR.resolve(),
'enabled': os.access(CUSTOM_TEMPLATES_DIR, os.R_OK),
'is_valid': os.access(CUSTOM_TEMPLATES_DIR, os.R_OK) and os.access(CUSTOM_TEMPLATES_DIR, os.X_OK), # read
},
'USER_PLUGINS_DIR': {
'path': USER_PLUGINS_DIR.resolve(),
'enabled': os.access(USER_PLUGINS_DIR, os.R_OK),
'is_valid': os.access(USER_PLUGINS_DIR, os.R_OK) and os.access(USER_PLUGINS_DIR, os.X_OK), # read
},
}) })
@classmethod @classmethod

View file

@ -5,8 +5,9 @@ import hashlib
import platform import platform
from pathlib import Path from pathlib import Path
from functools import cache from functools import cache
from datetime import datetime
from .permissions import SudoPermission from .permissions import SudoPermission, IS_ROOT, ARCHIVEBOX_USER
############################################################################################# #############################################################################################
@ -30,14 +31,28 @@ def get_collection_id(DATA_DIR=DATA_DIR) -> str:
except (OSError, FileNotFoundError, PermissionError): except (OSError, FileNotFoundError, PermissionError):
pass pass
hash_key = str(DATA_DIR.resolve()).encode() # hash the machine_id + collection dir path + creation time to get a unique collection_id
collection_id = hashlib.sha256(hash_key).hexdigest()[:8] machine_id = get_machine_id()
collection_path = DATA_DIR.resolve()
try:
creation_date = DATA_DIR.stat().st_ctime
except Exception:
creation_date = datetime.now().isoformat()
collection_id = hashlib.sha256(f'{machine_id}:{collection_path}@{creation_date}'.encode()).hexdigest()[:8]
try: try:
# only persist collection_id file if we already have an index.sqlite3 file present # only persist collection_id file if we already have an index.sqlite3 file present
# otherwise we might be running in a directory that is not a collection, no point creating cruft files # otherwise we might be running in a directory that is not a collection, no point creating cruft files
if os.path.isfile(DATABASE_FILE) and os.access(DATA_DIR, os.W_OK): if os.path.isfile(DATABASE_FILE) and os.access(DATA_DIR, os.W_OK):
collection_id_file.write_text(collection_id) collection_id_file.write_text(collection_id)
# if we're running as root right now, make sure the collection_id file is owned by the archivebox user
if IS_ROOT:
with SudoPermission(uid=0):
if ARCHIVEBOX_USER == 0:
os.system(f'chmod 777 "{collection_id_file}"')
else:
os.system(f'chown {ARCHIVEBOX_USER} "{collection_id_file}"')
except (OSError, FileNotFoundError, PermissionError): except (OSError, FileNotFoundError, PermissionError):
pass pass
return collection_id return collection_id

View file

@ -195,6 +195,8 @@ def version(quiet: bool=False,
from archivebox.config.version import get_COMMIT_HASH, get_BUILD_TIME from archivebox.config.version import get_COMMIT_HASH, get_BUILD_TIME
from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, RUNNING_AS_UID, RUNNING_AS_GID from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, RUNNING_AS_UID, RUNNING_AS_GID
from abx.archivebox.base_binary import BaseBinary, apt, brew, env
# 0.7.1 # 0.7.1
# ArchiveBox v0.7.1+editable COMMIT_HASH=951bba5 BUILD_TIME=2023-12-17 16:46:05 1702860365 # ArchiveBox v0.7.1+editable COMMIT_HASH=951bba5 BUILD_TIME=2023-12-17 16:46:05 1702860365
# IN_DOCKER=False IN_QEMU=False ARCH=arm64 OS=Darwin PLATFORM=macOS-14.2-arm64-arm-64bit PYTHON=Cpython # IN_DOCKER=False IN_QEMU=False ARCH=arm64 OS=Darwin PLATFORM=macOS-14.2-arm64-arm-64bit PYTHON=Cpython
@ -214,7 +216,7 @@ def version(quiet: bool=False,
f'ARCH={p.machine}', f'ARCH={p.machine}',
f'OS={p.system}', f'OS={p.system}',
f'PLATFORM={platform.platform()}', f'PLATFORM={platform.platform()}',
f'PYTHON={sys.implementation.name.title()}', f'PYTHON={sys.implementation.name.title()}' + (' (venv)' if CONSTANTS.IS_INSIDE_VENV else ''),
) )
OUTPUT_IS_REMOTE_FS = CONSTANTS.DATA_LOCATIONS.DATA_DIR.is_mount or CONSTANTS.DATA_LOCATIONS.ARCHIVE_DIR.is_mount OUTPUT_IS_REMOTE_FS = CONSTANTS.DATA_LOCATIONS.DATA_DIR.is_mount or CONSTANTS.DATA_LOCATIONS.ARCHIVE_DIR.is_mount
DATA_DIR_STAT = CONSTANTS.DATA_DIR.stat() DATA_DIR_STAT = CONSTANTS.DATA_DIR.stat()
@ -228,14 +230,15 @@ def version(quiet: bool=False,
prnt( prnt(
f'DEBUG={SHELL_CONFIG.DEBUG}', f'DEBUG={SHELL_CONFIG.DEBUG}',
f'IS_TTY={SHELL_CONFIG.IS_TTY}', f'IS_TTY={SHELL_CONFIG.IS_TTY}',
f'TZ={CONSTANTS.TIMEZONE}', f'SUDO={CONSTANTS.IS_ROOT}',
f'ID={CONSTANTS.MACHINE_ID}:{CONSTANTS.COLLECTION_ID}',
f'SEARCH_BACKEND={SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}', f'SEARCH_BACKEND={SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}',
f'LDAP={LDAP_CONFIG.LDAP_ENABLED}', f'LDAP={LDAP_CONFIG.LDAP_ENABLED}',
#f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually #f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually
) )
prnt() prnt()
prnt('[pale_green1][i] Dependency versions:[/pale_green1]') prnt('[pale_green1][i] Binary Dependencies:[/pale_green1]')
failures = [] failures = []
for name, binary in reversed(list(settings.BINARIES.items())): for name, binary in reversed(list(settings.BINARIES.items())):
if binary.name == 'archivebox': if binary.name == 'archivebox':
@ -247,7 +250,7 @@ def version(quiet: bool=False,
except Exception as e: except Exception as e:
err = e err = e
loaded_bin = binary loaded_bin = binary
provider_summary = f'[dark_sea_green3]{loaded_bin.binprovider.name.ljust(10)}[/dark_sea_green3]' if loaded_bin.binprovider else '[grey23]not found[/grey23]' provider_summary = f'[dark_sea_green3]{loaded_bin.binprovider.name.ljust(10)}[/dark_sea_green3]' if loaded_bin.binprovider else '[grey23]not found[/grey23] '
if loaded_bin.abspath: if loaded_bin.abspath:
abspath = str(loaded_bin.abspath).replace(str(DATA_DIR), '[light_slate_blue].[/light_slate_blue]').replace(str(Path('~').expanduser()), '~') abspath = str(loaded_bin.abspath).replace(str(DATA_DIR), '[light_slate_blue].[/light_slate_blue]').replace(str(Path('~').expanduser()), '~')
if ' ' in abspath: if ' ' in abspath:
@ -258,6 +261,25 @@ def version(quiet: bool=False,
if not loaded_bin.is_valid: if not loaded_bin.is_valid:
failures.append(loaded_bin.name) failures.append(loaded_bin.name)
prnt()
prnt('[gold3][i] Package Managers:[/gold3]')
for name, binprovider in reversed(list(settings.BINPROVIDERS.items())):
err = None
# TODO: implement a BinProvider.BINARY() method that gets the loaded binary for a binprovider's INSTALLER_BIN
loaded_bin = binprovider.INSTALLER_BINARY or BaseBinary(name=binprovider.INSTALLER_BIN, binproviders=[env, apt, brew])
abspath = None
if loaded_bin.abspath:
abspath = str(loaded_bin.abspath).replace(str(DATA_DIR), '.').replace(str(Path('~').expanduser()), '~')
if ' ' in abspath:
abspath = abspath.replace(' ', r'\ ')
PATH = str(binprovider.PATH).replace(str(DATA_DIR), '[light_slate_blue].[/light_slate_blue]').replace(str(Path('~').expanduser()), '~')
ownership_summary = f'UID=[blue]{str(binprovider.euid).ljust(4)}[/blue]'
provider_summary = f'[dark_sea_green3]{str(abspath).ljust(52)}[/dark_sea_green3]' if abspath else f'[grey23]{"not available".ljust(52)}[/grey23]'
prnt('', '[green]√[/green]' if binprovider.is_valid else '[red]X[/red]', '', binprovider.name.ljust(11), provider_summary, ownership_summary, f'PATH={PATH}' if abspath else '', overflow='ellipsis', soft_wrap=True)
prnt() prnt()
prnt('[deep_sky_blue3][i] Source-code locations:[/deep_sky_blue3]') prnt('[deep_sky_blue3][i] Source-code locations:[/deep_sky_blue3]')
for name, path in CONSTANTS.CODE_LOCATIONS.items(): for name, path in CONSTANTS.CODE_LOCATIONS.items():
@ -278,11 +300,9 @@ def version(quiet: bool=False,
prnt() prnt()
if failures: if failures:
raise SystemExit(1) raise SystemExit(1)
else: raise SystemExit(0)
raise SystemExit(0)
@enforce_types @enforce_types
def run(subcommand: str, def run(subcommand: str,
@ -451,6 +471,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
if os.access(html_index, os.F_OK): if os.access(html_index, os.F_OK):
html_index.rename(f"{index_name}.html") html_index.rename(f"{index_name}.html")
CONSTANTS.PERSONAS_DIR.mkdir(parents=True, exist_ok=True)
CONSTANTS.TMP_DIR.mkdir(parents=True, exist_ok=True) CONSTANTS.TMP_DIR.mkdir(parents=True, exist_ok=True)
CONSTANTS.LIB_DIR.mkdir(parents=True, exist_ok=True) CONSTANTS.LIB_DIR.mkdir(parents=True, exist_ok=True)
@ -985,7 +1006,7 @@ def install(out_dir: Path=DATA_DIR) -> None:
from django.conf import settings from django.conf import settings
from archivebox import CONSTANTS from archivebox import CONSTANTS
from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, USER
if not (os.access(ARCHIVE_DIR, os.R_OK) and ARCHIVE_DIR.is_dir()): if not (os.access(ARCHIVE_DIR, os.R_OK) and ARCHIVE_DIR.is_dir()):
run_subcommand('init', stdin=None, pwd=out_dir) # must init full index because we need a db to store InstalledBinary entries in run_subcommand('init', stdin=None, pwd=out_dir) # must init full index because we need a db to store InstalledBinary entries in
@ -994,15 +1015,17 @@ def install(out_dir: Path=DATA_DIR) -> None:
# we never want the data dir to be owned by root, detect owner of existing owner of DATA_DIR to try and guess desired non-root UID # we never want the data dir to be owned by root, detect owner of existing owner of DATA_DIR to try and guess desired non-root UID
if IS_ROOT: if IS_ROOT:
EUID = os.geteuid()
# if we have sudo/root permissions, take advantage of them just while installing dependencies # if we have sudo/root permissions, take advantage of them just while installing dependencies
print() print()
print('[yellow]:warning: Using [red]root[/red] privileges only to install dependencies that need it, all other operations should be done as a [blue]non-root[/blue] user.[/yellow]') print(f'[yellow]:warning: Running as [blue]{USER}[/blue] ({EUID}) with [red]sudo[/red] only for dependencies that need it.[/yellow]')
print(f' DATA_DIR, LIB_DIR, and TMP_DIR will be owned by [blue]{ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/blue].') print(f' DATA_DIR, LIB_DIR, and TMP_DIR will be owned by [blue]{ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/blue].')
print() print()
package_manager_names = ', '.join(binprovider.name for binprovider in reversed(list(settings.BINPROVIDERS.values()))) package_manager_names = ', '.join(f'[yellow]{binprovider.name}[/yellow]' for binprovider in reversed(list(settings.BINPROVIDERS.values())))
print(f'[+] Setting up package managers [yellow]{package_manager_names}[/yellow]...') print(f'[+] Setting up package managers {package_manager_names}...')
for binprovider in reversed(list(settings.BINPROVIDERS.values())): for binprovider in reversed(list(settings.BINPROVIDERS.values())):
try: try:
binprovider.setup() binprovider.setup()
@ -1016,9 +1039,11 @@ def install(out_dir: Path=DATA_DIR) -> None:
for binary in reversed(list(settings.BINARIES.values())): for binary in reversed(list(settings.BINARIES.values())):
providers = ' [grey53]or[/grey53] '.join(provider.name for provider in binary.binproviders_supported) providers = ' [grey53]or[/grey53] '.join(provider.name for provider in binary.binproviders_supported)
print(f'[+] Locating / Installing [yellow]{binary.name}[/yellow] using [red]{providers}[/red]...') print(f'[+] Detecting / Installing [yellow]{binary.name.ljust(22)}[/yellow] using [red]{providers}[/red]...')
try: try:
print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'})) with SudoPermission(uid=0, fallback=True):
# print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'}))
binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'})
if IS_ROOT: if IS_ROOT:
with SudoPermission(uid=0): with SudoPermission(uid=0):
if ARCHIVEBOX_USER == 0: if ARCHIVEBOX_USER == 0:
@ -1026,19 +1051,7 @@ def install(out_dir: Path=DATA_DIR) -> None:
else: else:
os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"') os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"')
except Exception as e: except Exception as e:
if IS_ROOT: print(f'[red]:cross_mark: Failed to install {binary.name} as user {ARCHIVEBOX_USER}: {e}[/red]')
print(f'[yellow]:warning: Retrying {binary.name} installation with [red]sudo[/red]...[/yellow]')
with SudoPermission(uid=0):
try:
print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'}))
if ARCHIVEBOX_USER == 0:
os.system(f'chmod -R 777 "{CONSTANTS.LIB_DIR.resolve()}"')
else:
os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"')
except Exception as e:
print(f'[red]:cross_mark: Failed to install {binary.name} as root: {e}[/red]')
else:
print(f'[red]:cross_mark: Failed to install {binary.name} as user {ARCHIVEBOX_USER}: {e}[/red]')
from django.contrib.auth import get_user_model from django.contrib.auth import get_user_model

View file

@ -2,6 +2,7 @@ __package__ = 'archivebox.misc'
# TODO: merge/dedupe this file with archivebox/logging_util.py # TODO: merge/dedupe this file with archivebox/logging_util.py
import sys import sys
from typing import Optional, Union, Tuple, List from typing import Optional, Union, Tuple, List
from collections import defaultdict from collections import defaultdict
@ -16,7 +17,6 @@ CONSOLE = Console()
STDERR = Console(stderr=True) STDERR = Console(stderr=True)
IS_TTY = CONSOLE.is_interactive IS_TTY = CONSOLE.is_interactive
class RainbowHighlighter(Highlighter): class RainbowHighlighter(Highlighter):
def highlight(self, text): def highlight(self, text):
for index in range(len(text)): for index in range(len(text)):

View file

@ -46,24 +46,28 @@ class SinglefileBinary(BaseBinary):
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env] binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env]
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = { provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
env.name: {
'abspath': lambda:
bin_abspath(SINGLEFILE_CONFIG.SINGLEFILE_BINARY, PATH=env.PATH)
or bin_abspath('single-file', PATH=env.PATH)
or bin_abspath('single-file-node.js', PATH=env.PATH),
},
LIB_NPM_BINPROVIDER.name: { LIB_NPM_BINPROVIDER.name: {
"abspath": lambda: "abspath": lambda:
bin_abspath(SINGLEFILE_CONFIG.SINGLEFILE_BINARY, PATH=env.PATH) bin_abspath(SINGLEFILE_CONFIG.SINGLEFILE_BINARY, PATH=LIB_NPM_BINPROVIDER.PATH)
or bin_abspath("single-file", PATH=LIB_NPM_BINPROVIDER.PATH) or bin_abspath("single-file", PATH=LIB_NPM_BINPROVIDER.PATH)
or bin_abspath("single-file-node.js", PATH=LIB_NPM_BINPROVIDER.PATH), or bin_abspath("single-file-node.js", PATH=LIB_NPM_BINPROVIDER.PATH),
"packages": lambda: "packages": lambda:
[f"single-file-cli@>={SINGLEFILE_MIN_VERSION} <{SINGLEFILE_MAX_VERSION}"], [f"single-file-cli@>={SINGLEFILE_MIN_VERSION} <{SINGLEFILE_MAX_VERSION}"],
}, },
SYS_NPM_BINPROVIDER.name: { SYS_NPM_BINPROVIDER.name: {
"abspath": lambda:
bin_abspath(SINGLEFILE_CONFIG.SINGLEFILE_BINARY, PATH=SYS_NPM_BINPROVIDER.PATH)
or bin_abspath("single-file", PATH=SYS_NPM_BINPROVIDER.PATH)
or bin_abspath("single-file-node.js", PATH=SYS_NPM_BINPROVIDER.PATH),
"packages": lambda: "packages": lambda:
[], # prevent modifying system global npm packages [], # prevent modifying system global npm packages
}, },
env.name: {
'abspath': lambda:
bin_abspath(SINGLEFILE_CONFIG.SINGLEFILE_BINARY, PATH=env.PATH)
or bin_abspath('single-file', PATH=env.PATH)
or bin_abspath('single-file-node.js', PATH=env.PATH),
},
} }
def install(self, binprovider_name: Optional[BinProviderName]=None, **kwargs) -> ShallowBinary: def install(self, binprovider_name: Optional[BinProviderName]=None, **kwargs) -> ShallowBinary:

View file

@ -42,7 +42,7 @@ class SystemNpmProvider(NpmProvider, BaseBinProvider):
class LibNpmProvider(NpmProvider, BaseBinProvider): class LibNpmProvider(NpmProvider, BaseBinProvider):
name: BinProviderName = "lib_npm" name: BinProviderName = "lib_npm"
PATH: PATHStr = str(OLD_NODE_BIN_PATH) PATH: PATHStr = f'{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
npm_prefix: Optional[Path] = CONSTANTS.LIB_NPM_DIR npm_prefix: Optional[Path] = CONSTANTS.LIB_NPM_DIR
@ -56,13 +56,6 @@ SYS_NPM_BINPROVIDER = SystemNpmProvider()
LIB_NPM_BINPROVIDER = LibNpmProvider() LIB_NPM_BINPROVIDER = LibNpmProvider()
npm = LIB_NPM_BINPROVIDER npm = LIB_NPM_BINPROVIDER
class NpmBinary(BaseBinary):
name: BinName = 'npm'
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
NPM_BINARY = NpmBinary()
class NodeBinary(BaseBinary): class NodeBinary(BaseBinary):
name: BinName = 'node' name: BinName = 'node'
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
@ -71,6 +64,22 @@ class NodeBinary(BaseBinary):
NODE_BINARY = NodeBinary() NODE_BINARY = NodeBinary()
class NpmBinary(BaseBinary):
name: BinName = 'npm'
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
NPM_BINARY = NpmBinary()
class NpxBinary(BaseBinary):
name: BinName = 'npx'
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
NPX_BINARY = NpxBinary()
class NpmPlugin(BasePlugin): class NpmPlugin(BasePlugin):
app_label: str = 'npm' app_label: str = 'npm'
@ -82,6 +91,7 @@ class NpmPlugin(BasePlugin):
LIB_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER,
NODE_BINARY, NODE_BINARY,
NPM_BINARY, NPM_BINARY,
NPX_BINARY,
] ]

View file

@ -2,13 +2,13 @@ __package__ = 'archivebox.plugins_pkg.pip'
import os import os
import sys import sys
import inspect
from pathlib import Path from pathlib import Path
from typing import List, Dict, Optional from typing import List, Dict, Optional
from pydantic import InstanceOf, Field, model_validator, validate_call from pydantic import InstanceOf, Field, model_validator, validate_call
import django import django
import django.db.backends.sqlite3.base
from django.db.backends.sqlite3.base import Database as django_sqlite3 # type: ignore[import-type] from django.db.backends.sqlite3.base import Database as django_sqlite3 # type: ignore[import-type]
from django.core.checks import Error, Tags from django.core.checks import Error, Tags
from pydantic_pkgr import BinProvider, PipProvider, BinName, BinProviderName, ProviderLookupDict, SemVer, bin_abspath from pydantic_pkgr import BinProvider, PipProvider, BinName, BinProviderName, ProviderLookupDict, SemVer, bin_abspath
@ -54,11 +54,13 @@ class SystemPipxBinProvider(PipProvider, BaseBinProvider):
pip_venv: Optional[Path] = None # global pipx scope pip_venv: Optional[Path] = None # global pipx scope
IS_INSIDE_VENV = sys.prefix != sys.base_prefix
class VenvPipBinProvider(PipProvider, BaseBinProvider): class VenvPipBinProvider(PipProvider, BaseBinProvider):
name: BinProviderName = "venv_pip" name: BinProviderName = "venv_pip"
INSTALLER_BIN: BinName = "pip" INSTALLER_BIN: BinName = "pip"
pip_venv: Optional[Path] = Path(os.environ.get("VIRTUAL_ENV", None) or '/tmp/NotInsideAVenv') pip_venv: Optional[Path] = Path(sys.prefix if IS_INSIDE_VENV else os.environ.get("VIRTUAL_ENV", '/tmp/NotInsideAVenv/lib'))
def setup(self): def setup(self):
"""never attempt to create a venv here, this is just used to detect if we are inside an existing one""" """never attempt to create a venv here, this is just used to detect if we are inside an existing one"""
@ -78,6 +80,9 @@ LIB_PIP_BINPROVIDER = LibPipBinProvider()
pip = LIB_PIP_BINPROVIDER pip = LIB_PIP_BINPROVIDER
# ensure python libraries are importable from these locations (if archivebox wasnt executed from one of these then they wont already be in sys.path) # ensure python libraries are importable from these locations (if archivebox wasnt executed from one of these then they wont already be in sys.path)
assert VENV_PIP_BINPROVIDER.pip_venv is not None
assert LIB_PIP_BINPROVIDER.pip_venv is not None
site_packages_dir = 'lib/python{}.{}/site-packages'.format(*sys.version_info[:2]) site_packages_dir = 'lib/python{}.{}/site-packages'.format(*sys.version_info[:2])
if os.environ.get("VIRTUAL_ENV", None): if os.environ.get("VIRTUAL_ENV", None):
sys.path.append(str(VENV_PIP_BINPROVIDER.pip_venv / site_packages_dir)) sys.path.append(str(VENV_PIP_BINPROVIDER.pip_venv / site_packages_dir))
@ -127,17 +132,22 @@ class PythonBinary(BaseBinary):
PYTHON_BINARY = PythonBinary() PYTHON_BINARY = PythonBinary()
LOADED_SQLITE_PATH = Path(django.db.backends.sqlite3.base.__file__)
LOADED_SQLITE_VERSION = SemVer(django_sqlite3.version)
LOADED_SQLITE_FROM_VENV = str(LOADED_SQLITE_PATH.absolute().resolve()).startswith(str(VENV_PIP_BINPROVIDER.pip_venv.absolute().resolve()))
class SqliteBinary(BaseBinary): class SqliteBinary(BaseBinary):
name: BinName = 'sqlite' name: BinName = 'sqlite'
binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER]) binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = { provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
VENV_PIP_BINPROVIDER.name: { VENV_PIP_BINPROVIDER.name: {
"abspath": lambda: Path(inspect.getfile(django_sqlite3)), "abspath": lambda: LOADED_SQLITE_PATH if LOADED_SQLITE_FROM_VENV else None,
"version": lambda: SemVer(django_sqlite3.version), "version": lambda: LOADED_SQLITE_VERSION if LOADED_SQLITE_FROM_VENV else None,
}, },
SYS_PIP_BINPROVIDER.name: { SYS_PIP_BINPROVIDER.name: {
"abspath": lambda: Path(inspect.getfile(django_sqlite3)), "abspath": lambda: LOADED_SQLITE_PATH if not LOADED_SQLITE_FROM_VENV else None,
"version": lambda: SemVer(django_sqlite3.version), "version": lambda: LOADED_SQLITE_VERSION if not LOADED_SQLITE_FROM_VENV else None,
}, },
} }
@ -166,18 +176,22 @@ class SqliteBinary(BaseBinary):
SQLITE_BINARY = SqliteBinary() SQLITE_BINARY = SqliteBinary()
LOADED_DJANGO_PATH = Path(django.__file__)
LOADED_DJANGO_VERSION = SemVer(django.VERSION[:3])
LOADED_DJANGO_FROM_VENV = str(LOADED_DJANGO_PATH.absolute().resolve()).startswith(str(VENV_PIP_BINPROVIDER.pip_venv.absolute().resolve()))
class DjangoBinary(BaseBinary): class DjangoBinary(BaseBinary):
name: BinName = 'django' name: BinName = 'django'
binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER]) binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = { provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
VENV_PIP_BINPROVIDER.name: { VENV_PIP_BINPROVIDER.name: {
"abspath": lambda: inspect.getfile(django), "abspath": lambda: LOADED_DJANGO_PATH if LOADED_DJANGO_FROM_VENV else None,
"version": lambda: django.VERSION[:3], "version": lambda: LOADED_DJANGO_VERSION if LOADED_DJANGO_FROM_VENV else None,
}, },
SYS_PIP_BINPROVIDER.name: { SYS_PIP_BINPROVIDER.name: {
"abspath": lambda: inspect.getfile(django), "abspath": lambda: LOADED_DJANGO_PATH if not LOADED_DJANGO_FROM_VENV else None,
"version": lambda: django.VERSION[:3], "version": lambda: LOADED_DJANGO_VERSION if not LOADED_DJANGO_FROM_VENV else None,
}, },
} }
@ -206,6 +220,13 @@ class PipBinary(BaseBinary):
PIP_BINARY = PipBinary() PIP_BINARY = PipBinary()
class PipxBinary(BaseBinary):
name: BinName = "pipx"
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
PIPX_BINARY = PipxBinary()
class CheckUserIsNotRoot(BaseCheck): class CheckUserIsNotRoot(BaseCheck):
label: str = 'CheckUserIsNotRoot' label: str = 'CheckUserIsNotRoot'
tag: str = Tags.database tag: str = Tags.database
@ -262,6 +283,7 @@ class PipPlugin(BasePlugin):
VENV_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER,
LIB_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER,
PIP_BINARY, PIP_BINARY,
PIPX_BINARY,
ARCHIVEBOX_BINARY, ARCHIVEBOX_BINARY,
PYTHON_BINARY, PYTHON_BINARY,
SQLITE_BINARY, SQLITE_BINARY,
@ -270,6 +292,7 @@ class PipPlugin(BasePlugin):
PIP_ENVIRONMENT_CHECK, PIP_ENVIRONMENT_CHECK,
] ]
PLUGIN = PipPlugin() PLUGIN = PipPlugin()
# PLUGIN.register(settings) # PLUGIN.register(settings)
DJANGO_APP = PLUGIN.AppConfig DJANGO_APP = PLUGIN.AppConfig

@ -1 +1 @@
Subproject commit fa47402471ccb1f2e5ed33806e3fd3e2dee590c8 Subproject commit 830b3738f49109a05c8068df12f1e2167901953f

View file

@ -110,11 +110,11 @@ if [[ -d "$PLAYWRIGHT_BROWSERS_PATH/.links" ]]; then
chown -h $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"/.links/* chown -h $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"/.links/*
fi fi
# also chown tmp dir and lib dir # also create and chown tmp dir and lib dirs
mkdir -p "$SYSTEM_TMP_DIR" mkdir -p "$DATA_DIR"/lib/bin
chown $PUID:$PGID "$SYSTEM_TMP_DIR" chown $PUID:$PGID "$DATA_DIR"/lib "$DATA_DIR"/lib/*
mkdir -p "$SYSTEM_LIB_DIR" mkdir -p "$DATA_DIR"/tmp/workers
chown $PUID:$PGID "$SYSTEM_LIB_DIR" "$SYSTEM_LIB_DIR"/* chown $PUID:$PGID "$DATA_DIR"/tmp "$DATA_DIR"/tmp/*
# (this check is written in blood in 2023, QEMU silently breaks things in ways that are not obvious) # (this check is written in blood in 2023, QEMU silently breaks things in ways that are not obvious)
export IN_QEMU="$(pmap 1 | grep qemu >/dev/null && echo 'True' || echo 'False')" export IN_QEMU="$(pmap 1 | grep qemu >/dev/null && echo 'True' || echo 'False')"
@ -177,7 +177,7 @@ else
fi fi
# symlink etc crontabs into place # symlink etc crontabs into place
mkdir -p "$DATA_DIR/crontabs" mkdir -p "$DATA_DIR"/crontabs
if ! test -L /var/spool/cron/crontabs; then if ! test -L /var/spool/cron/crontabs; then
# move files from old location into new data dir location # move files from old location into new data dir location
for existing_file in /var/spool/cron/crontabs/*; do for existing_file in /var/spool/cron/crontabs/*; do
@ -187,7 +187,7 @@ if ! test -L /var/spool/cron/crontabs; then
rm -Rf /var/spool/cron/crontabs rm -Rf /var/spool/cron/crontabs
ln -sf "$DATA_DIR/crontabs" /var/spool/cron/crontabs ln -sf "$DATA_DIR/crontabs" /var/spool/cron/crontabs
fi fi
chown -R $PUID "$DATA_DIR/crontabs" chown -R $PUID "$DATA_DIR"/crontabs
# set DBUS_SYSTEM_BUS_ADDRESS & DBUS_SESSION_BUS_ADDRESS # set DBUS_SYSTEM_BUS_ADDRESS & DBUS_SESSION_BUS_ADDRESS
# (dbus is not actually needed, it makes chrome log fewer warnings but isn't worth making our docker images bigger) # (dbus is not actually needed, it makes chrome log fewer warnings but isn't worth making our docker images bigger)

View file

@ -78,7 +78,7 @@ dependencies = [
"django-taggit==1.3.0", "django-taggit==1.3.0",
"base32-crockford==0.3.0", "base32-crockford==0.3.0",
# "pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7", # "pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7",
"pydantic-pkgr>=0.4.13", "pydantic-pkgr>=0.4.16",
############# Plugin Dependencies ################ ############# Plugin Dependencies ################
"sonic-client>=1.0.0", "sonic-client>=1.0.0",
"yt-dlp>=2024.8.6", # for: media" "yt-dlp>=2024.8.6", # for: media"