mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-24 21:23:22 +00:00
fix tmp data dir resolution when running help or version outside data dir
This commit is contained in:
parent
f321d25f4c
commit
12f32c4690
12 changed files with 30 additions and 208 deletions
|
@ -20,21 +20,26 @@ __package__ = 'archivebox'
|
|||
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
PACKAGE_DIR = Path(__file__).resolve().parent # archivebox source code dir
|
||||
DATA_DIR = Path(os.curdir).resolve() # archivebox user data dir
|
||||
ARCHIVE_DIR = DATA_DIR / 'archive' # archivebox snapshot data dir
|
||||
USING_TMP_DATA_DIR = None
|
||||
|
||||
if len(sys.argv) > 1 and sys.argv[1] in ('version', 'help'):
|
||||
current_dir = Path(os.getcwd()).resolve()
|
||||
if not (current_dir / 'index.sqlite3').exists():
|
||||
USING_TMP_DATA_DIR = Path(tempfile.gettempdir()) / 'archivebox'
|
||||
USING_TMP_DATA_DIR.mkdir(parents=True, exist_ok=True)
|
||||
os.chdir(USING_TMP_DATA_DIR)
|
||||
|
||||
# make sure PACKAGE_DIR is in sys.path so we can import all subfolders
|
||||
# without necessarily waiting for django to load them thorugh INSTALLED_APPS
|
||||
PACKAGE_DIR = Path(__file__).resolve().parent
|
||||
if str(PACKAGE_DIR) not in sys.path:
|
||||
sys.path.append(str(PACKAGE_DIR))
|
||||
|
||||
from .config.constants import CONSTANTS, VERSION # noqa
|
||||
from .config.constants import CONSTANTS, DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, VERSION # noqa
|
||||
|
||||
os.environ['ARCHIVEBOX_PACKAGE_DIR'] = str(PACKAGE_DIR)
|
||||
os.environ['ARCHIVEBOX_DATA_DIR'] = str(DATA_DIR)
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'core.settings'
|
||||
|
||||
# print('INSTALLING MONKEY PATCHES')
|
||||
|
|
|
@ -2,9 +2,7 @@
|
|||
"""This is the main entry point for the ArchiveBox CLI."""
|
||||
__package__ = 'archivebox'
|
||||
|
||||
import archivebox # noqa # import archivebox/__init__.py to apply monkey patches, load vendored libs, etc.
|
||||
import sys
|
||||
|
||||
from .cli import main
|
||||
|
||||
ASCII_LOGO_MINI = r"""
|
||||
|
|
|
@ -18,7 +18,7 @@ from . import toml_util
|
|||
|
||||
|
||||
PACKAGE_DIR = Path(__file__).resolve().parent.parent
|
||||
DATA_DIR = Path(os.curdir).resolve()
|
||||
DATA_DIR = Path(os.getcwd()).resolve()
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
__package__ = 'abx.archivebox'
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
from typing import Optional, List, Literal, Annotated, Dict, Any, Tuple
|
||||
from typing_extensions import Self
|
||||
|
@ -189,7 +190,7 @@ class BaseExtractor(BaseHook):
|
|||
|
||||
# TODO: move this to a hookimpl
|
||||
def exec(self, args: CmdArgsList=(), cwd: Optional[Path]=None, installed_binary=None):
|
||||
cwd = cwd or Path('.')
|
||||
cwd = cwd or Path(os.getcwd())
|
||||
binary = self.load_binary(installed_binary=installed_binary)
|
||||
|
||||
return binary.exec(cmd=args, cwd=cwd)
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
__package__ = 'archivebox.cli'
|
||||
__command__ = 'archivebox'
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import threading
|
||||
import tempfile
|
||||
|
||||
from time import sleep
|
||||
from collections.abc import Mapping
|
||||
|
@ -11,10 +13,6 @@ from collections.abc import Mapping
|
|||
from typing import Optional, List, IO, Union, Iterable
|
||||
from pathlib import Path
|
||||
|
||||
from archivebox.config import DATA_DIR
|
||||
from archivebox.misc.checks import check_migrations
|
||||
from archivebox.misc.logging import stderr
|
||||
|
||||
from importlib import import_module
|
||||
|
||||
BUILTIN_LIST = list
|
||||
|
@ -135,9 +133,10 @@ def wait_for_bg_threads_to_exit(thread_names: Iterable[str]=(), ignore_names: It
|
|||
if blocking_threads:
|
||||
sleep(1)
|
||||
if tries == 5: # only show stderr message if we need to wait more than 5s
|
||||
stderr(
|
||||
print(
|
||||
f'[…] Waiting up to {timeout}s for background jobs (e.g. webhooks) to finish...',
|
||||
threads_summary,
|
||||
file=sys.stderr,
|
||||
)
|
||||
else:
|
||||
return tries
|
||||
|
@ -154,8 +153,12 @@ def run_subcommand(subcommand: str,
|
|||
|
||||
subcommand_args = subcommand_args or []
|
||||
|
||||
from archivebox.misc.checks import check_migrations
|
||||
from archivebox.config.legacy import setup_django
|
||||
|
||||
# print('DATA_DIR is', DATA_DIR)
|
||||
# print('pwd is', os.getcwd())
|
||||
|
||||
cmd_requires_db = subcommand in archive_cmds
|
||||
init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
|
||||
|
||||
|
@ -237,12 +240,10 @@ def main(args: List[str] | Omitted=OMITTED, stdin: IO | Omitted=OMITTED, pwd: st
|
|||
subcommand=command.subcommand,
|
||||
subcommand_args=command.subcommand_args,
|
||||
stdin=stdin or None,
|
||||
pwd=pwd or DATA_DIR,
|
||||
)
|
||||
|
||||
run_subcommand(
|
||||
subcommand=command.subcommand,
|
||||
subcommand_args=command.subcommand_args,
|
||||
stdin=stdin or None,
|
||||
pwd=pwd or DATA_DIR,
|
||||
)
|
||||
|
|
|
@ -17,7 +17,7 @@ from ..misc.logging import DEFAULT_CLI_COLORS
|
|||
###################### Config ##########################
|
||||
|
||||
PACKAGE_DIR: Path = Path(__file__).resolve().parent.parent # archivebox source code dir
|
||||
DATA_DIR: Path = Path(os.curdir).resolve() # archivebox user data dir
|
||||
DATA_DIR: Path = Path(os.getcwd()).resolve() # archivebox user data dir
|
||||
ARCHIVE_DIR: Path = DATA_DIR / 'archive' # archivebox snapshot data dir
|
||||
|
||||
def _detect_installed_version(PACKAGE_DIR: Path):
|
||||
|
|
|
@ -207,11 +207,6 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
|||
# 'GIT_VERSION': {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None},
|
||||
# 'SAVE_GIT': {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
|
||||
|
||||
|
||||
# 'DEPENDENCIES': {'default': lambda c: get_dependency_info(c)},
|
||||
# 'CODE_LOCATIONS': {'default': lambda c: get_code_locations(c)},
|
||||
# 'DATA_LOCATIONS': {'default': lambda c: get_data_locations(c)},
|
||||
|
||||
'SAVE_ALLOWLIST_PTN': {'default': lambda c: c['SAVE_ALLOWLIST'] and {re.compile(k, CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_ALLOWLIST'].items()}},
|
||||
'SAVE_DENYLIST_PTN': {'default': lambda c: c['SAVE_DENYLIST'] and {re.compile(k, CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_DENYLIST'].items()}},
|
||||
}
|
||||
|
@ -427,74 +422,6 @@ def load_config(defaults: ConfigDefaultDict,
|
|||
|
||||
|
||||
|
||||
# Dependency Metadata Helpers
|
||||
def bin_version(binary: Optional[str], cmd: Optional[str]=None, timeout: int=3) -> Optional[str]:
|
||||
"""check the presence and return valid version line of a specified binary"""
|
||||
|
||||
abspath = bin_path(binary)
|
||||
if not binary or not abspath:
|
||||
return None
|
||||
|
||||
return '999.999.999'
|
||||
|
||||
# Now handled by new BinProvider plugin system, no longer needed:
|
||||
|
||||
try:
|
||||
bin_env = os.environ | {'LANG': 'C'}
|
||||
is_cmd_str = cmd and isinstance(cmd, str)
|
||||
version_str = (
|
||||
run(cmd or [abspath, "--version"], timeout=timeout, shell=is_cmd_str, stdout=PIPE, stderr=STDOUT, env=bin_env)
|
||||
.stdout.strip()
|
||||
.decode()
|
||||
)
|
||||
if not version_str:
|
||||
version_str = (
|
||||
run(cmd or [abspath, "--version"], timeout=timeout, shell=is_cmd_str, stdout=PIPE, stderr=STDOUT)
|
||||
.stdout.strip()
|
||||
.decode()
|
||||
)
|
||||
|
||||
# take first 3 columns of first line of version info
|
||||
semver = SemVer.parse(version_str)
|
||||
if semver:
|
||||
return str(semver)
|
||||
except (OSError, TimeoutExpired):
|
||||
pass
|
||||
# stderr(f'[X] Unable to find working version of dependency: {binary}', color='red')
|
||||
# stderr(' Make sure it\'s installed, then confirm it\'s working by running:')
|
||||
# stderr(f' {binary} --version')
|
||||
# stderr()
|
||||
# stderr(' If you don\'t want to install it, you can disable it via config. See here for more info:')
|
||||
# stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Install')
|
||||
return None
|
||||
|
||||
def bin_path(binary: Optional[str]) -> Optional[str]:
|
||||
if binary is None:
|
||||
return None
|
||||
|
||||
node_modules_bin = Path('.') / 'node_modules' / '.bin' / binary
|
||||
if node_modules_bin.exists():
|
||||
return str(node_modules_bin.resolve())
|
||||
|
||||
return shutil.which(str(Path(binary).expanduser())) or shutil.which(str(binary)) or binary
|
||||
|
||||
def bin_hash(binary: Optional[str]) -> Optional[str]:
|
||||
return 'UNUSED'
|
||||
# DEPRECATED: now handled by new BinProvider plugin system, no longer needed:
|
||||
|
||||
if binary is None:
|
||||
return None
|
||||
abs_path = bin_path(binary)
|
||||
if abs_path is None or not Path(abs_path).exists():
|
||||
return None
|
||||
|
||||
file_hash = md5()
|
||||
with io.open(abs_path, mode='rb') as f:
|
||||
for chunk in iter(lambda: f.read(io.DEFAULT_BUFFER_SIZE), b''):
|
||||
file_hash.update(chunk)
|
||||
|
||||
return f'md5:{file_hash.hexdigest()}'
|
||||
|
||||
def find_chrome_binary() -> Optional[str]:
|
||||
"""find any installed chrome binaries in the default locations"""
|
||||
# Precedence: Chromium, Chrome, Beta, Canary, Unstable, Dev
|
||||
|
@ -567,116 +494,6 @@ def wget_supports_compression(config):
|
|||
return False
|
||||
|
||||
|
||||
def get_dependency_info(config: benedict) -> ConfigValue:
|
||||
return {
|
||||
# 'PYTHON_BINARY': {
|
||||
# 'path': bin_path(config['PYTHON_BINARY']),
|
||||
# 'version': config['PYTHON_VERSION'],
|
||||
# 'hash': bin_hash(config['PYTHON_BINARY']),
|
||||
# 'enabled': True,
|
||||
# 'is_valid': bool(config['PYTHON_VERSION']),
|
||||
# },
|
||||
# 'SQLITE_BINARY': {
|
||||
# 'path': bin_path(config['SQLITE_BINARY']),
|
||||
# 'version': config['SQLITE_VERSION'],
|
||||
# 'hash': bin_hash(config['SQLITE_BINARY']),
|
||||
# 'enabled': True,
|
||||
# 'is_valid': bool(config['SQLITE_VERSION']),
|
||||
# },
|
||||
# 'DJANGO_BINARY': {
|
||||
# 'path': bin_path(config['DJANGO_BINARY']),
|
||||
# 'version': config['DJANGO_VERSION'],
|
||||
# 'hash': bin_hash(config['DJANGO_BINARY']),
|
||||
# 'enabled': True,
|
||||
# 'is_valid': bool(config['DJANGO_VERSION']),
|
||||
# },
|
||||
# 'ARCHIVEBOX_BINARY': {
|
||||
# 'path': bin_path(config['ARCHIVEBOX_BINARY']),
|
||||
# 'version': config['VERSION'],
|
||||
# 'hash': bin_hash(config['ARCHIVEBOX_BINARY']),
|
||||
# 'enabled': True,
|
||||
# 'is_valid': True,
|
||||
# },
|
||||
|
||||
# 'CURL_BINARY': {
|
||||
# 'path': bin_path(config['CURL_BINARY']),
|
||||
# 'version': config['CURL_VERSION'],
|
||||
# 'hash': bin_hash(config['CURL_BINARY']),
|
||||
# 'enabled': config['USE_CURL'],
|
||||
# 'is_valid': bool(config['CURL_VERSION']),
|
||||
# },
|
||||
# 'WGET_BINARY': {
|
||||
# 'path': bin_path(config['WGET_BINARY']),
|
||||
# 'version': config['WGET_VERSION'],
|
||||
# 'hash': bin_hash(config['WGET_BINARY']),
|
||||
# 'enabled': config['USE_WGET'],
|
||||
# 'is_valid': bool(config['WGET_VERSION']),
|
||||
# },
|
||||
# 'NODE_BINARY': {
|
||||
# 'path': bin_path(config['NODE_BINARY']),
|
||||
# 'version': config['NODE_VERSION'],
|
||||
# 'hash': bin_hash(config['NODE_BINARY']),
|
||||
# 'enabled': config['USE_NODE'],
|
||||
# 'is_valid': bool(config['NODE_VERSION']),
|
||||
# },
|
||||
# 'MERCURY_BINARY': {
|
||||
# 'path': bin_path(config['MERCURY_BINARY']),
|
||||
# 'version': config['MERCURY_VERSION'],
|
||||
# 'hash': bin_hash(config['MERCURY_BINARY']),
|
||||
# 'enabled': config['USE_MERCURY'],
|
||||
# 'is_valid': bool(config['MERCURY_VERSION']),
|
||||
# },
|
||||
# 'GIT_BINARY': {
|
||||
# 'path': bin_path(config['GIT_BINARY']),
|
||||
# 'version': config['GIT_VERSION'],
|
||||
# 'hash': bin_hash(config['GIT_BINARY']),
|
||||
# 'enabled': config['USE_GIT'],
|
||||
# 'is_valid': bool(config['GIT_VERSION']),
|
||||
# },
|
||||
# 'SINGLEFILE_BINARY': {
|
||||
# 'path': bin_path(config['SINGLEFILE_BINARY']),
|
||||
# 'version': config['SINGLEFILE_VERSION'],
|
||||
# 'hash': bin_hash(config['SINGLEFILE_BINARY']),
|
||||
# 'enabled': config['USE_SINGLEFILE'],
|
||||
# 'is_valid': bool(config['SINGLEFILE_VERSION']),
|
||||
# },
|
||||
# 'READABILITY_BINARY': {
|
||||
# 'path': bin_path(config['READABILITY_BINARY']),
|
||||
# 'version': config['READABILITY_VERSION'],
|
||||
# 'hash': bin_hash(config['READABILITY_BINARY']),
|
||||
# 'enabled': config['USE_READABILITY'],
|
||||
# 'is_valid': bool(config['READABILITY_VERSION']),
|
||||
# },
|
||||
# 'YOUTUBEDL_BINARY': {
|
||||
# 'path': bin_path(config['YOUTUBEDL_BINARY']),
|
||||
# 'version': config['YOUTUBEDL_VERSION'],
|
||||
# 'hash': bin_hash(config['YOUTUBEDL_BINARY']),
|
||||
# 'enabled': config['USE_YOUTUBEDL'],
|
||||
# 'is_valid': bool(config['YOUTUBEDL_VERSION']),
|
||||
# },
|
||||
# 'CHROME_BINARY': {
|
||||
# 'path': bin_path(config['CHROME_BINARY']),
|
||||
# 'version': config['CHROME_VERSION'],
|
||||
# 'hash': bin_hash(config['CHROME_BINARY']),
|
||||
# 'enabled': config['USE_CHROME'],
|
||||
# 'is_valid': bool(config['CHROME_VERSION']),
|
||||
# },
|
||||
# 'RIPGREP_BINARY': {
|
||||
# 'path': bin_path(config['RIPGREP_BINARY']),
|
||||
# 'version': config['RIPGREP_VERSION'],
|
||||
# 'hash': bin_hash(config['RIPGREP_BINARY']),
|
||||
# 'enabled': config['USE_RIPGREP'],
|
||||
# 'is_valid': bool(config['RIPGREP_VERSION']),
|
||||
# },
|
||||
# 'SONIC_BINARY': {
|
||||
# 'path': bin_path(config['SONIC_BINARY']),
|
||||
# 'version': config['SONIC_VERSION'],
|
||||
# 'hash': bin_hash(config['SONIC_BINARY']),
|
||||
# 'enabled': config['USE_SONIC'],
|
||||
# 'is_valid': bool(config['SONIC_VERSION']),
|
||||
# },
|
||||
}
|
||||
|
||||
# ******************************************************************************
|
||||
# ******************************************************************************
|
||||
# ******************************** Load Config *********************************
|
||||
|
|
|
@ -9,7 +9,7 @@ import django.db.models.deletion
|
|||
|
||||
from index.json import to_json
|
||||
|
||||
DATA_DIR = Path(os.curdir).resolve() # archivebox user data dir
|
||||
DATA_DIR = Path(os.getcwd()).resolve() # archivebox user data dir
|
||||
ARCHIVE_DIR = DATA_DIR / 'archive' # archivebox snapshot data dir
|
||||
|
||||
|
||||
|
|
|
@ -227,7 +227,7 @@ def progress_bar(seconds: int, prefix: str='', ANSI: Dict[str, str]=ANSI) -> Non
|
|||
print()
|
||||
|
||||
|
||||
def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional[str | IO], pwd: str):
|
||||
def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional[str | IO], pwd: str='.'):
|
||||
args = ' '.join(subcommand_args)
|
||||
version_msg = '[dark_magenta]\\[{now}][/dark_magenta] [dark_red]ArchiveBox[/dark_red] [dark_goldenrod]v{VERSION}[/dark_goldenrod]: [green4]archivebox [green3]{subcommand}[green2] {args}[/green2]'.format(
|
||||
now=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S'),
|
||||
|
|
|
@ -15,7 +15,7 @@ import machineid # https://github.com/keygen-sh/py-machineid
|
|||
from rich import print
|
||||
|
||||
PACKAGE_DIR = Path(__file__).parent
|
||||
DATA_DIR = Path('.').resolve()
|
||||
DATA_DIR = Path(os.getcwd()).resolve()
|
||||
|
||||
def get_vm_info():
|
||||
hw_in_docker = bool(os.getenv('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE'))
|
||||
|
|
|
@ -138,7 +138,7 @@ def help(out_dir: Path=DATA_DIR) -> None:
|
|||
''')
|
||||
|
||||
|
||||
if CONSTANTS.DATABASE_FILE.exists():
|
||||
if CONSTANTS.ARCHIVE_DIR.exists():
|
||||
pretty_out_dir = str(out_dir).replace(str(Path('~').expanduser()), '~')
|
||||
EXAMPLE_USAGE = f'''
|
||||
[light_slate_blue]DATA DIR[/light_slate_blue]: [yellow]{pretty_out_dir}[/yellow]
|
||||
|
@ -254,7 +254,7 @@ def version(quiet: bool=False,
|
|||
prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
|
||||
|
||||
prnt()
|
||||
if CONSTANTS.DATABASE_FILE.exists() or CONSTANTS.ARCHIVE_DIR.exists() or CONSTANTS.CONFIG_FILE.exists():
|
||||
if CONSTANTS.ARCHIVE_DIR.exists() or CONSTANTS.CONFIG_FILE.exists():
|
||||
prnt('[bright_yellow][i] Data locations:[/bright_yellow]')
|
||||
for name, path in CONSTANTS.DATA_LOCATIONS.items():
|
||||
prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
|
||||
|
|
|
@ -111,10 +111,10 @@ def atomic_write(path: Union[Path, str], contents: Union[dict, str, bytes], over
|
|||
os.chmod(path, int(STORAGE_CONFIG.OUTPUT_PERMISSIONS, base=8))
|
||||
|
||||
@enforce_types
|
||||
def chmod_file(path: str, cwd: str='.') -> None:
|
||||
def chmod_file(path: str, cwd: str='') -> None:
|
||||
"""chmod -R <permissions> <cwd>/<path>"""
|
||||
|
||||
root = Path(cwd) / path
|
||||
root = Path(cwd or os.getcwd()) / path
|
||||
if not root.exists():
|
||||
raise Exception('Failed to chmod: {} does not exist (did the previous step fail?)'.format(path))
|
||||
|
||||
|
|
Loading…
Reference in a new issue