fuck it go back to nested lib and tmp dirs with supervisord sock workaround

This commit is contained in:
Nick Sweeting 2024-10-08 17:48:59 -07:00
parent df68f416fb
commit 4b34b729ab
No known key found for this signature in database
6 changed files with 52 additions and 21 deletions

View file

@ -16,9 +16,9 @@ from .paths import (
PACKAGE_DIR,
DATA_DIR,
ARCHIVE_DIR,
get_collection_id,
get_LIB_DIR,
get_TMP_DIR,
# get_collection_id,
# get_LIB_DIR,
# get_TMP_DIR,
)
from .permissions import (
IS_ROOT,
@ -39,13 +39,14 @@ class ConstantsDict(Mapping):
PACKAGE_DIR: Path = PACKAGE_DIR
DATA_DIR: Path = DATA_DIR
ARCHIVE_DIR: Path = ARCHIVE_DIR
COLLECTION_ID: str = get_collection_id(DATA_DIR)
# COLLECTION_ID: str = get_collection_id(DATA_DIR)
# Host system
VERSION: str = detect_installed_version(PACKAGE_DIR)
OS: str = platform.system().lower() # darwin, linux, etc.
ARCH: str = platform.machine().lower() # arm64, x86_64, aarch64, etc.
IN_DOCKER: bool = IN_DOCKER
LIB_DIR_SCOPE: str = f'{ARCH}-{OS}-docker' if IN_DOCKER else f'{ARCH}-{OS}'
# Permissions
IS_ROOT: bool = IS_ROOT
@ -95,9 +96,11 @@ class ConstantsDict(Mapping):
# Runtime dirs
TMP_DIR_NAME: str = 'tmp'
TMP_DIR: Path = get_TMP_DIR()
# TMP_DIR: Path = get_TMP_DIR()
TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME
LIB_DIR_NAME: str = 'lib'
LIB_DIR: Path = get_LIB_DIR()
# LIB_DIR: Path = get_LIB_DIR()
LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME / LIB_DIR_SCOPE
LIB_PIP_DIR: Path = LIB_DIR / 'pip'
LIB_NPM_DIR: Path = LIB_DIR / 'npm'
LIB_BROWSERS_DIR: Path = LIB_DIR / 'browsers'

View file

@ -5,9 +5,10 @@ import sys
import tempfile
import hashlib
from pathlib import Path
from functools import cache
from platformdirs import PlatformDirs
from rich import print
from .permissions import SudoPermission, IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
@ -91,7 +92,7 @@ def get_LIB_DIR():
lib_dir = HOST_DIRS.user_data_path
lib_dir.mkdir(parents=True, exist_ok=True)
if not dir_is_writable(lib_dir):
if IS_ROOT or not dir_is_writable(lib_dir, uid=ARCHIVEBOX_USER):
if IS_ROOT:
# make sure lib dir is owned by the archivebox user, not root
with SudoPermission(uid=0):
@ -130,7 +131,7 @@ def get_TMP_DIR():
run_dir = Path(os.environ['SYSTEM_TMP_DIR']).resolve() / get_collection_id(DATA_DIR=DATA_DIR)
with SudoPermission(uid=0, fallback=True):
run_dir.mkdir(parents=True, exist_ok=True)
if not dir_is_writable(run_dir):
if not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
if IS_ROOT:
with SudoPermission(uid=0, fallback=False):
if ARCHIVEBOX_USER == 0:
@ -153,7 +154,7 @@ def get_TMP_DIR():
with SudoPermission(uid=0, fallback=True):
run_dir.mkdir(parents=True, exist_ok=True)
if not dir_is_writable(run_dir):
if IS_ROOT or not dir_is_writable(run_dir, uid=ARCHIVEBOX_USER):
if IS_ROOT:
with SudoPermission(uid=0):
if ARCHIVEBOX_USER == 0:

View file

@ -450,6 +450,9 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
json_index.rename(f"{index_name}.json")
if os.access(html_index, os.F_OK):
html_index.rename(f"{index_name}.html")
CONSTANTS.TMP_DIR.mkdir(parents=True, exist_ok=True)
CONSTANTS.LIB_DIR.mkdir(parents=True, exist_ok=True)
if install:
run_subcommand('install', pwd=out_dir)
@ -1004,14 +1007,20 @@ def install(out_dir: Path=DATA_DIR) -> None:
print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'}))
if IS_ROOT:
with SudoPermission(uid=0):
os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"')
if ARCHIVEBOX_USER == 0:
os.system(f'chmod -R 777 "{CONSTANTS.LIB_DIR.resolve()}"')
else:
os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"')
except Exception as e:
if IS_ROOT:
print(f'[yellow]:warning: Retrying {binary.name} installation with [red]sudo[/red]...[/yellow]')
with SudoPermission(uid=0):
try:
print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'}))
os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"')
if ARCHIVEBOX_USER == 0:
os.system(f'chmod -R 777 "{CONSTANTS.LIB_DIR.resolve()}"')
else:
os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"')
except Exception as e:
print(f'[red]:cross_mark: Failed to install {binary.name} as root: {e}[/red]')
else:

View file

@ -109,8 +109,8 @@ def check_data_dir_permissions():
data_owned_by_root = data_dir_uid == 0
# data_owned_by_default_user = data_dir_uid == DEFAULT_PUID or data_dir_gid == DEFAULT_PGID
data_owner_doesnt_match = (data_dir_uid != ARCHIVEBOX_USER and data_dir_gid != ARCHIVEBOX_GROUP) and not IS_ROOT
data_not_writable = not (os.access(DATA_DIR, os.W_OK) and os.access(CONSTANTS.LIB_DIR, os.W_OK) and os.access(CONSTANTS.TMP_DIR, os.W_OK))
data_owner_doesnt_match = (data_dir_uid != ARCHIVEBOX_USER and data_dir_gid != ARCHIVEBOX_GROUP) if not IS_ROOT else False
data_not_writable = not (os.isdir(DATA_DIR) and os.access(DATA_DIR, os.W_OK)) # and os.access(CONSTANTS.LIB_DIR, os.W_OK) and os.access(CONSTANTS.TMP_DIR, os.W_OK))
if data_owned_by_root:
STDERR.print('\n[yellow]:warning: Warning: ArchiveBox [blue]DATA_DIR[/blue] is currently owned by [red]root[/red], ArchiveBox will refuse to run![/yellow]')
elif data_owner_doesnt_match or data_not_writable:

View file

@ -1,14 +1,33 @@
import tempfile
from pathlib import Path
from archivebox.config import CONSTANTS
from archivebox.config.paths import get_collection_id
DATA_DIR = CONSTANTS.DATA_DIR
LOGS_DIR = CONSTANTS.LOGS_DIR
TMP_DIR = CONSTANTS.TMP_DIR
Path.mkdir(TMP_DIR, exist_ok=True)
SUPERVISORD_CONFIG_FILE = TMP_DIR / "supervisord.conf"
PID_FILE = TMP_DIR / "supervisord.pid"
SOCK_FILE = TMP_DIR / "supervisord.sock"
LOG_FILE = TMP_DIR / "supervisord.log"
WORKERS_DIR = TMP_DIR / "workers"
def get_sock_file():
TMP_DIR.mkdir(parents=True, exist_ok=True)
if len(str(SOCK_FILE)) > 100:
# socket absolute paths cannot be longer than 108 characters on some systems
# symlink it to a shorter path and use that instead
# use tmpfile to atomically overwrite any existing symlink
symlink = Path(tempfile.gettempdir()) / f"archivebox_supervisord_{get_collection_id()}.sock.tmp"
symlink.unlink(missing_ok=True)
symlink.symlink_to(SOCK_FILE)
symlink.rename(str(symlink).replace('.sock.tmp', '.sock'))
assert len(str(symlink)) <= 100, f'Failed to create supervisord SOCK_FILE, system tmp dir location is too long {symlink} (unix only allows 108 characters for socket paths)'
return symlink
return SOCK_FILE

View file

@ -1,6 +1,5 @@
__package__ = 'archivebox.queues'
import os
import time
import signal
import psutil
@ -15,7 +14,7 @@ from xmlrpc.client import ServerProxy
from archivebox.config.permissions import ARCHIVEBOX_USER
from .settings import SUPERVISORD_CONFIG_FILE, DATA_DIR, PID_FILE, SOCK_FILE, LOG_FILE, WORKERS_DIR, TMP_DIR, LOGS_DIR
from .settings import SUPERVISORD_CONFIG_FILE, DATA_DIR, PID_FILE, get_sock_file, LOG_FILE, WORKERS_DIR, TMP_DIR, LOGS_DIR
from typing import Iterator
@ -48,11 +47,11 @@ nocleanup = true
user = {ARCHIVEBOX_USER}
[unix_http_server]
file = {TMP_DIR}/{SOCK_FILE.name}
file = {get_sock_file()}
chmod = 0700
[supervisorctl]
serverurl = unix://{TMP_DIR}/{SOCK_FILE.name}
serverurl = unix://{get_sock_file()}
[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
@ -81,12 +80,12 @@ def create_worker_config(daemon):
def get_existing_supervisord_process():
try:
transport = SupervisorTransport(None, None, f"unix://{SOCK_FILE}")
transport = SupervisorTransport(None, None, f"unix://{get_sock_file()}")
server = ServerProxy("http://localhost", transport=transport)
current_state = cast(Dict[str, int | str], server.supervisor.getState())
if current_state["statename"] == "RUNNING":
pid = server.supervisor.getPID()
print(f"[🦸‍♂️] Supervisord connected (pid={pid}) via unix://{str(SOCK_FILE).replace(str(TMP_DIR), 'tmp')}.")
print(f"[🦸‍♂️] Supervisord connected (pid={pid}) via unix://{str(get_sock_file()).replace(str(TMP_DIR), 'tmp')}.")
return server.supervisor
except FileNotFoundError:
return None