__package__ = 'archivebox.config' import os import re import platform from typing import Dict from pathlib import Path from collections.abc import Mapping from benedict import benedict from ..misc.logging import DEFAULT_CLI_COLORS from .paths import ( PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR, get_collection_id, get_LIB_DIR, get_TMP_DIR, ) from .permissions import ( IS_ROOT, IN_DOCKER, RUNNING_AS_UID, RUNNING_AS_GID, DEFAULT_PUID, DEFAULT_PGID, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, ) from .version import detect_installed_version ###################### Config ########################## class ConstantsDict(Mapping): PACKAGE_DIR: Path = PACKAGE_DIR DATA_DIR: Path = DATA_DIR ARCHIVE_DIR: Path = ARCHIVE_DIR COLLECTION_ID: str = get_collection_id(DATA_DIR) # Host system VERSION: str = detect_installed_version(PACKAGE_DIR) OS: str = platform.system().lower() # darwin, linux, etc. ARCH: str = platform.machine().lower() # arm64, x86_64, aarch64, etc. IN_DOCKER: bool = IN_DOCKER # Permissions IS_ROOT: bool = IS_ROOT ARCHIVEBOX_USER: int = ARCHIVEBOX_USER ARCHIVEBOX_GROUP: int = ARCHIVEBOX_GROUP RUNNING_AS_UID: int = RUNNING_AS_UID RUNNING_AS_GID: int = RUNNING_AS_GID DEFAULT_PUID: int = DEFAULT_PUID DEFAULT_PGID: int = DEFAULT_PGID # Source code dirs PACKAGE_DIR_NAME: str = PACKAGE_DIR.name TEMPLATES_DIR_NAME: str = 'templates' TEMPLATES_DIR: Path = PACKAGE_DIR / TEMPLATES_DIR_NAME STATIC_DIR_NAME: str = 'static' STATIC_DIR: Path = TEMPLATES_DIR / STATIC_DIR_NAME # Data dirs ARCHIVE_DIR_NAME: str = 'archive' SOURCES_DIR_NAME: str = 'sources' PERSONAS_DIR_NAME: str = 'personas' CRONTABS_DIR_NAME: str = 'crontabs' CACHE_DIR_NAME: str = 'cache' LOGS_DIR_NAME: str = 'logs' USER_PLUGINS_DIR_NAME: str = 'user_plugins' CUSTOM_TEMPLATES_DIR_NAME: str = 'user_templates' ARCHIVE_DIR: Path = DATA_DIR / ARCHIVE_DIR_NAME SOURCES_DIR: Path = DATA_DIR / SOURCES_DIR_NAME PERSONAS_DIR: Path = DATA_DIR / PERSONAS_DIR_NAME LOGS_DIR: Path = DATA_DIR / LOGS_DIR_NAME CACHE_DIR: Path = DATA_DIR / CACHE_DIR_NAME CUSTOM_TEMPLATES_DIR: Path = DATA_DIR / CUSTOM_TEMPLATES_DIR_NAME USER_PLUGINS_DIR: Path = DATA_DIR / USER_PLUGINS_DIR_NAME # Data dir files CONFIG_FILENAME: str = 'ArchiveBox.conf' SQL_INDEX_FILENAME: str = 'index.sqlite3' QUEUE_DATABASE_FILENAME: str = 'queue.sqlite3' CONFIG_FILE: Path = DATA_DIR / CONFIG_FILENAME DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME QUEUE_DATABASE_FILE: Path = DATA_DIR / QUEUE_DATABASE_FILENAME JSON_INDEX_FILENAME: str = 'index.json' HTML_INDEX_FILENAME: str = 'index.html' ROBOTS_TXT_FILENAME: str = 'robots.txt' FAVICON_FILENAME: str = 'favicon.ico' # Runtime dirs TMP_DIR_NAME: str = 'tmp' TMP_DIR: Path = get_TMP_DIR() LIB_DIR_NAME: str = 'lib' LIB_DIR: Path = get_LIB_DIR() LIB_PIP_DIR: Path = LIB_DIR / 'pip' LIB_NPM_DIR: Path = LIB_DIR / 'npm' LIB_BROWSERS_DIR: Path = LIB_DIR / 'browsers' LIB_BIN_DIR: Path = LIB_DIR / 'bin' BIN_DIR: Path = LIB_BIN_DIR # Config constants TIMEZONE: str = 'UTC' DEFAULT_CLI_COLORS: Dict[str, str] = DEFAULT_CLI_COLORS DISABLED_CLI_COLORS: Dict[str, str] = benedict({k: '' for k in DEFAULT_CLI_COLORS}) ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE STATICFILE_EXTENSIONS: frozenset[str] = frozenset(( # 99.999% of the time, URLs ending in these extensions are static files # that can be downloaded as-is, not html pages that need to be rendered 'gif', 'jpeg', 'jpg', 'png', 'tif', 'tiff', 'wbmp', 'ico', 'jng', 'bmp', 'svg', 'svgz', 'webp', 'ps', 'eps', 'ai', 'mp3', 'mp4', 'm4a', 'mpeg', 'mpg', 'mkv', 'mov', 'webm', 'm4v', 'flv', 'wmv', 'avi', 'ogg', 'ts', 'm3u8', 'pdf', 'txt', 'rtf', 'rtfd', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx', 'atom', 'rss', 'css', 'js', 'json', 'dmg', 'iso', 'img', 'rar', 'war', 'hqx', 'zip', 'gz', 'bz2', '7z', # Less common extensions to consider adding later # jar, swf, bin, com, exe, dll, deb # ear, hqx, eot, wmlc, kml, kmz, cco, jardiff, jnlp, run, msi, msp, msm, # pl pm, prc pdb, rar, rpm, sea, sit, tcl tk, der, pem, crt, xpi, xspf, # ra, mng, asx, asf, 3gpp, 3gp, mid, midi, kar, jad, wml, htc, mml # These are always treated as pages, not as static files, never add them: # html, htm, shtml, xhtml, xml, aspx, php, cgi )) PIP_RELATED_NAMES: frozenset[str] = frozenset(( ".venv", "venv", "virtualenv", ".virtualenv", )) NPM_RELATED_NAMES: frozenset[str] = frozenset(( "node_modules", "package.json", "package-lock.json", "yarn.lock", )) # When initializing archivebox in a new directory, we check to make sure the dir is # actually empty so that we dont clobber someone's home directory or desktop by accident. # These files are exceptions to the is_empty check when we're trying to init a new dir, # as they could be from a previous archivebox version, system artifacts, dependencies, etc. ALLOWED_IN_DATA_DIR: frozenset[str] = frozenset(( *PIP_RELATED_NAMES, *NPM_RELATED_NAMES, ### Dirs: ARCHIVE_DIR_NAME, SOURCES_DIR_NAME, LOGS_DIR_NAME, CACHE_DIR_NAME, LIB_DIR_NAME, TMP_DIR_NAME, PERSONAS_DIR_NAME, CUSTOM_TEMPLATES_DIR_NAME, USER_PLUGINS_DIR_NAME, CRONTABS_DIR_NAME, "static", # created by old static exports