diff --git a/Dockerfile b/Dockerfile index 0c5a4e80..2b510416 100644 --- a/Dockerfile +++ b/Dockerfile @@ -74,12 +74,7 @@ ENV CODE_DIR=/app \ PLAYWRIGHT_BROWSERS_PATH=/browsers # TODO: add TMP_DIR and LIB_DIR? -# Application-level paths -ENV APP_VENV=/app/.venv \ - NODE_MODULES=/app/node_modules - # Build shell config -ENV PATH="$PATH:$GLOBAL_VENV/bin:$APP_VENV/bin:$NODE_MODULES/.bin" SHELL ["/bin/bash", "-o", "pipefail", "-o", "errexit", "-o", "errtrace", "-o", "nounset", "-c"] ######### System Environment #################################### @@ -99,7 +94,6 @@ RUN (echo "[i] Docker build for ArchiveBox $(cat /VERSION.txt) starting..." \ && echo "PLATFORM=${TARGETPLATFORM} ARCH=$(uname -m) ($(uname -s) ${TARGETARCH} ${TARGETVARIANT})" \ && echo "BUILD_START_TIME=$(date +"%Y-%m-%d %H:%M:%S %s") TZ=${TZ} LANG=${LANG}" \ && echo \ - && echo "GLOBAL_VENV=${GLOBAL_VENV} APP_VENV=${APP_VENV} NODE_MODULES=${NODE_MODULES}" \ && echo "PYTHON=${PYTHON_VERSION} NODE=${NODE_VERSION} PATH=${PATH}" \ && echo "CODE_DIR=${CODE_DIR} DATA_DIR=${DATA_DIR}" \ && echo \ diff --git a/archivebox/__init__.py b/archivebox/__init__.py index d60e2122..cb0b2cd1 100755 --- a/archivebox/__init__.py +++ b/archivebox/__init__.py @@ -1,12 +1,5 @@ #!/usr/bin/env python3 -ASCII_LOGO = """ - █████╗ ██████╗ ██████╗██╗ ██╗██╗██╗ ██╗███████╗ ██████╗ ██████╗ ██╗ ██╗ -██╔══██╗██╔══██╗██╔════╝██║ ██║██║██║ ██║██╔════╝ ██╔══██╗██╔═══██╗╚██╗██╔╝ -███████║██████╔╝██║ ███████║██║██║ ██║█████╗ ██████╔╝██║ ██║ ╚███╔╝ -██╔══██║██╔══██╗██║ ██╔══██║██║╚██╗ ██╔╝██╔══╝ ██╔══██╗██║ ██║ ██╔██╗ -██║ ██║██║ ██║╚██████╗██║ ██║██║ ╚████╔╝ ███████╗ ██████╔╝╚██████╔╝██╔╝ ██╗ -╚═╝ ╚═╝╚═╝ ╚═╝ ╚═════╝╚═╝ ╚═╝╚═╝ ╚═══╝ ╚══════╝ ╚═════╝ ╚═════╝ ╚═╝ ╚═╝ -""" + # Welcome to the ArchiveBox source code! Thanks for checking it out! # # "We are swimming upstream against a great torrent of disorganization. @@ -23,14 +16,25 @@ import sys import tempfile from pathlib import Path -USING_TMP_DATA_DIR = None +ASCII_LOGO = """ + █████╗ ██████╗ ██████╗██╗ ██╗██╗██╗ ██╗███████╗ ██████╗ ██████╗ ██╗ ██╗ +██╔══██╗██╔══██╗██╔════╝██║ ██║██║██║ ██║██╔════╝ ██╔══██╗██╔═══██╗╚██╗██╔╝ +███████║██████╔╝██║ ███████║██║██║ ██║█████╗ ██████╔╝██║ ██║ ╚███╔╝ +██╔══██║██╔══██╗██║ ██╔══██║██║╚██╗ ██╔╝██╔══╝ ██╔══██╗██║ ██║ ██╔██╗ +██║ ██║██║ ██║╚██████╗██║ ██║██║ ╚████╔╝ ███████╗ ██████╔╝╚██████╔╝██╔╝ ██╗ +╚═╝ ╚═╝╚═╝ ╚═╝ ╚═════╝╚═╝ ╚═╝╚═╝ ╚═══╝ ╚══════╝ ╚═════╝ ╚═════╝ ╚═╝ ╚═╝ +""" +SYSTEM_TMP_DIR = Path(tempfile.gettempdir()) / 'archivebox' +SYSTEM_TMP_DIR.mkdir(parents=True, exist_ok=True) +os.environ['SYSTEM_TMP_DIR'] = str(SYSTEM_TMP_DIR) + +# if we are outside a data dir, cd into an ephemeral tmp dir so that +# we can run version/help without polluting cwd with an index.sqlite3 if len(sys.argv) > 1 and sys.argv[1] in ('version', 'help'): current_dir = Path(os.getcwd()).resolve() if not (current_dir / 'index.sqlite3').exists(): - USING_TMP_DATA_DIR = Path(tempfile.gettempdir()) / 'archivebox' - USING_TMP_DATA_DIR.mkdir(parents=True, exist_ok=True) - os.chdir(USING_TMP_DATA_DIR) + os.chdir(SYSTEM_TMP_DIR) # make sure PACKAGE_DIR is in sys.path so we can import all subfolders # without necessarily waiting for django to load them thorugh INSTALLED_APPS diff --git a/archivebox/__main__.py b/archivebox/__main__.py index 3aa62867..1b6ea657 100755 --- a/archivebox/__main__.py +++ b/archivebox/__main__.py @@ -2,7 +2,9 @@ """This is the main entry point for the ArchiveBox CLI.""" __package__ = 'archivebox' +import archivebox # noqa # make sure monkey patches are applied before anything else import sys + from .cli import main ASCII_LOGO_MINI = r""" diff --git a/archivebox/config/constants.py b/archivebox/config/constants.py index ac17744b..8f38eecf 100644 --- a/archivebox/config/constants.py +++ b/archivebox/config/constants.py @@ -4,6 +4,8 @@ __package__ = 'archivebox.config' import os import re import platform +import machineid +import tempfile from typing import Dict from pathlib import Path @@ -53,6 +55,17 @@ def _detect_installed_version(PACKAGE_DIR: Path): VERSION: str = _detect_installed_version(PACKAGE_DIR) +# Ensure system tmp dir and data dir exist as we need them to run almost everything +if 'SYSTEM_TMP_DIR' in os.environ: + SYSTEM_TMP_DIR = Path(os.environ['SYSTEM_TMP_DIR']) +else: + SYSTEM_TMP_DIR = Path(tempfile.gettempdir()) / 'archivebox' + SYSTEM_TMP_DIR.mkdir(parents=True, exist_ok=True) + +DATA_DIR_TMP_DIR = DATA_DIR / 'tmp' / machineid.hashed_id('archivebox')[:16] +DATA_DIR_TMP_DIR.mkdir(parents=True, exist_ok=True) + + class ConstantsDict(Mapping): IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'yes') OS = platform.system().lower() # darwin, linux, etc. @@ -81,13 +94,16 @@ class ConstantsDict(Mapping): LIB_DIR_NAME: str = 'lib' TMP_DIR_NAME: str = 'tmp' + SYSTEM_TMP_DIR: Path = SYSTEM_TMP_DIR + DATA_DIR_TMP_DIR: Path = DATA_DIR_TMP_DIR + ARCHIVE_DIR: Path = DATA_DIR / ARCHIVE_DIR_NAME SOURCES_DIR: Path = DATA_DIR / SOURCES_DIR_NAME PERSONAS_DIR: Path = DATA_DIR / PERSONAS_DIR_NAME CACHE_DIR: Path = DATA_DIR / CACHE_DIR_NAME LOGS_DIR: Path = DATA_DIR / LOGS_DIR_NAME LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME / LIB_DIR_SCOPE # e.g. data/lib/arm64-darwin-docker - TMP_DIR: Path = (Path('/tmp') if IN_DOCKER else DATA_DIR) / TMP_DIR_NAME + TMP_DIR: Path = SYSTEM_TMP_DIR if IN_DOCKER else DATA_DIR_TMP_DIR # e.g. /var/folders/bk/63jsns1s.../T/archivebox or ./data/tmp/abcwe324234 CUSTOM_TEMPLATES_DIR: Path = DATA_DIR / CUSTOM_TEMPLATES_DIR_NAME USER_PLUGINS_DIR: Path = DATA_DIR / USER_PLUGINS_DIR_NAME diff --git a/archivebox/vendor/pydantic-pkgr b/archivebox/vendor/pydantic-pkgr index 0f610c2a..5bb42056 160000 --- a/archivebox/vendor/pydantic-pkgr +++ b/archivebox/vendor/pydantic-pkgr @@ -1 +1 @@ -Subproject commit 0f610c2ab688d81711acec73c73d4286ba14d033 +Subproject commit 5bb42056bda9269e600885d83369b89f8dd916a5 diff --git a/pdm.lock b/pdm.lock index 5fa5d8bf..6312aee1 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "all", "ldap", "sonic"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:be2740879f6045b108b48e90997be10a6a670805e7682a0c86fc3cd1e98811c4" +content_hash = "sha256:0aa8e150b08d1c571c1f8bfa844fdb3ad0995f9b01eb9199b6449ed0230acbd5" [[metadata.targets]] requires_python = "==3.12.*" @@ -1158,7 +1158,7 @@ files = [ [[package]] name = "pydantic-pkgr" -version = "0.3.9" +version = "0.4.2" requires_python = ">=3.10" summary = "System package manager APIs in strongly typed Python" groups = ["default"] @@ -1169,8 +1169,8 @@ dependencies = [ "typing-extensions>=4.11.0", ] files = [ - {file = "pydantic_pkgr-0.3.9-py3-none-any.whl", hash = "sha256:8cdece5142b79127cdf96baf84cefdc0b811297e3b0e13526f88e10019ae60c1"}, - {file = "pydantic_pkgr-0.3.9.tar.gz", hash = "sha256:f811600e9222b98b7d52df27375cab92ccfa702020f80a46076c4e5eeb099dc4"}, + {file = "pydantic_pkgr-0.4.2-py3-none-any.whl", hash = "sha256:b78e421a58c1777098792236ed6da50b70167e18579c6e4353309ab121972f7b"}, + {file = "pydantic_pkgr-0.4.2.tar.gz", hash = "sha256:879654052a22122484bebd2616c4ade6887f2f6fb3afae397937a5bb23473f76"}, ] [[package]] diff --git a/pyproject.toml b/pyproject.toml index 72f96364..f711e458 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,7 +83,7 @@ dependencies = [ ############# VENDORED LIBS ###################### # these can be safely omitted when installation subsystem does not provide these as packages (e.g. apt/debian) # archivebox will automatically load fallback vendored copies bundled via archivebox/vendor/__init__.py - "pydantic-pkgr>=0.3.9", + "pydantic-pkgr>=0.4.2", "atomicwrites==1.4.1", "pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7", "django-taggit==1.3.0", diff --git a/requirements.txt b/requirements.txt index 6059e64b..92e31f3d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -77,7 +77,7 @@ pycparser==2.22; platform_python_implementation != "PyPy" and python_version == pycryptodomex==3.21.0; python_version == "3.12" pydantic==2.9.2; python_version == "3.12" pydantic-core==2.23.4; python_version == "3.12" -pydantic-pkgr==0.3.9; python_version == "3.12" +pydantic-pkgr==0.4.2; python_version == "3.12" pydantic-settings==2.5.2; python_version == "3.12" pygments==2.18.0; python_version == "3.12" pyopenssl==24.2.1; python_version == "3.12"