2024-09-30 22:59:05 +00:00
__package__ = ' archivebox.config '
2024-09-27 07:41:21 +00:00
2024-09-24 08:25:55 +00:00
import os
import sys
2024-09-25 12:10:09 +00:00
import shutil
2024-09-24 08:25:55 +00:00
2024-09-30 23:50:36 +00:00
from typing import Dict , Optional
2024-09-25 12:10:09 +00:00
from datetime import datetime
2024-09-24 08:25:55 +00:00
from pathlib import Path
2024-09-27 07:41:21 +00:00
from rich import print
2024-09-30 22:59:05 +00:00
from pydantic import Field , field_validator , model_validator , computed_field
2024-09-25 12:10:09 +00:00
from django . utils . crypto import get_random_string
2024-09-24 08:25:55 +00:00
2024-09-30 23:50:36 +00:00
from abx . archivebox . base_configset import BaseConfigSet
2024-09-27 07:41:21 +00:00
2024-09-30 22:59:05 +00:00
from . constants import CONSTANTS , PACKAGE_DIR
2024-09-24 08:25:55 +00:00
###################### Config ##########################
class ShellConfig ( BaseConfigSet ) :
2024-09-30 22:59:05 +00:00
DEBUG : bool = Field ( default = lambda : ' --debug ' in sys . argv )
2024-09-24 08:25:55 +00:00
IS_TTY : bool = Field ( default = sys . stdout . isatty ( ) )
USE_COLOR : bool = Field ( default = lambda c : c . IS_TTY )
2024-09-25 12:10:09 +00:00
SHOW_PROGRESS : bool = Field ( default = lambda c : c . IS_TTY )
2024-09-24 08:25:55 +00:00
IN_DOCKER : bool = Field ( default = False )
IN_QEMU : bool = Field ( default = False )
2024-09-25 12:10:09 +00:00
USER : str = Field ( default = Path ( ' ~ ' ) . expanduser ( ) . resolve ( ) . name )
2024-09-24 08:25:55 +00:00
PUID : int = Field ( default = os . getuid ( ) )
PGID : int = Field ( default = os . getgid ( ) )
2024-09-25 07:41:24 +00:00
2024-09-25 08:14:48 +00:00
PYTHON_ENCODING : str = Field ( default = ( sys . __stdout__ or sys . stdout or sys . __stderr__ or sys . stderr ) . encoding . upper ( ) . replace ( ' UTF8 ' , ' UTF-8 ' ) )
2024-09-25 12:10:09 +00:00
ANSI : Dict [ str , str ] = Field ( default = lambda c : CONSTANTS . DEFAULT_CLI_COLORS if c . USE_COLOR else CONSTANTS . DISABLED_CLI_COLORS )
VERSIONS_AVAILABLE : bool = False # .check_for_update.get_versions_available_on_github(c)},
CAN_UPGRADE : bool = False # .check_for_update.can_upgrade(c)},
2024-09-25 08:14:48 +00:00
2024-09-25 12:10:09 +00:00
@computed_field
@property
def TERM_WIDTH ( self ) - > int :
2024-10-01 01:29:17 +00:00
if not self . IS_TTY :
return 200
return shutil . get_terminal_size ( ( 140 , 10 ) ) . columns
2024-09-25 12:10:09 +00:00
@computed_field
@property
def COMMIT_HASH ( self ) - > Optional [ str ] :
try :
2024-09-30 22:59:05 +00:00
git_dir = PACKAGE_DIR / ' ../.git '
2024-09-25 12:10:09 +00:00
ref = ( git_dir / ' HEAD ' ) . read_text ( ) . strip ( ) . split ( ' ' ) [ - 1 ]
commit_hash = git_dir . joinpath ( ref ) . read_text ( ) . strip ( )
return commit_hash
except Exception :
pass
try :
2024-09-30 22:59:05 +00:00
return list ( ( PACKAGE_DIR / ' ../.git/refs/heads/ ' ) . glob ( ' * ' ) ) [ 0 ] . read_text ( ) . strip ( )
2024-09-25 12:10:09 +00:00
except Exception :
pass
return None
@computed_field
@property
def BUILD_TIME ( self ) - > str :
if self . IN_DOCKER :
docker_build_end_time = Path ( ' /VERSION.txt ' ) . read_text ( ) . rsplit ( ' BUILD_END_TIME= ' ) [ - 1 ] . split ( ' \n ' , 1 ) [ 0 ]
return docker_build_end_time
2024-09-30 22:59:05 +00:00
src_last_modified_unix_timestamp = ( PACKAGE_DIR / ' package.json ' ) . stat ( ) . st_mtime
2024-09-25 12:10:09 +00:00
return datetime . fromtimestamp ( src_last_modified_unix_timestamp ) . strftime ( ' % Y- % m- %d % H: % M: % S %s ' )
2024-09-25 07:41:24 +00:00
@model_validator ( mode = ' after ' )
def validate_not_running_as_root ( self ) :
attempted_command = ' ' . join ( sys . argv [ : 3 ] )
2024-10-01 06:19:11 +00:00
if self . PUID == 0 and attempted_command not in ( ' setup ' , ' install ' ) :
2024-09-25 07:41:24 +00:00
# stderr('[!] ArchiveBox should never be run as root!', color='red')
# stderr(' For more information, see the security overview documentation:')
# stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root')
print ( ' [red][!] ArchiveBox should never be run as root![/red] ' , file = sys . stderr )
print ( ' For more information, see the security overview documentation: ' , file = sys . stderr )
print ( ' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root ' , file = sys . stderr )
if self . IN_DOCKER :
print ( ' [red][!] When using Docker, you must run commands with [green]docker run[/green] instead of [yellow3]docker exec[/yellow3], e.g.: ' , file = sys . stderr )
print ( ' docker compose run archivebox {attempted_command} ' , file = sys . stderr )
print ( f ' docker run -it -v $PWD/data:/data archivebox/archivebox { attempted_command } ' , file = sys . stderr )
print ( ' or: ' , file = sys . stderr )
print ( f ' docker compose exec --user=archivebox archivebox /bin/bash -c " archivebox { attempted_command } " ' , file = sys . stderr )
print ( f ' docker exec -it --user=archivebox <container id> /bin/bash -c " archivebox { attempted_command } " ' , file = sys . stderr )
raise SystemExit ( 2 )
2024-09-25 08:14:48 +00:00
# check python locale
if self . PYTHON_ENCODING != ' UTF-8 ' :
print ( f ' [red][X] Your system is running python3 scripts with a bad locale setting: { self . PYTHON_ENCODING } (it should be UTF-8).[/red] ' , file = sys . stderr )
print ( ' To fix it, add the line " export PYTHONIOENCODING=UTF-8 " to your ~/.bashrc file (without quotes) ' , file = sys . stderr )
print ( ' Or if you \' re using ubuntu/debian, run " dpkg-reconfigure locales " ' , file = sys . stderr )
print ( ' ' )
print ( ' Confirm that it \' s fixed by opening a new shell and running: ' , file = sys . stderr )
print ( ' python3 -c " import sys; print(sys.stdout.encoding) " # should output UTF-8 ' , file = sys . stderr )
raise SystemExit ( 2 )
2024-09-25 07:41:24 +00:00
return self
2024-09-24 08:25:55 +00:00
SHELL_CONFIG = ShellConfig ( )
class StorageConfig ( BaseConfigSet ) :
OUTPUT_PERMISSIONS : str = Field ( default = ' 644 ' )
RESTRICT_FILE_NAMES : str = Field ( default = ' windows ' )
ENFORCE_ATOMIC_WRITES : bool = Field ( default = True )
2024-09-26 09:42:50 +00:00
# not supposed to be user settable:
DIR_OUTPUT_PERMISSIONS : str = Field ( default = lambda c : c [ ' OUTPUT_PERMISSIONS ' ] . replace ( ' 6 ' , ' 7 ' ) . replace ( ' 4 ' , ' 5 ' ) )
2024-09-24 08:25:55 +00:00
2024-09-27 07:41:21 +00:00
2024-09-24 08:25:55 +00:00
STORAGE_CONFIG = StorageConfig ( )
class GeneralConfig ( BaseConfigSet ) :
TAG_SEPARATOR_PATTERN : str = Field ( default = r ' [,] ' )
GENERAL_CONFIG = GeneralConfig ( )
class ServerConfig ( BaseConfigSet ) :
2024-09-25 12:10:09 +00:00
SECRET_KEY : str = Field ( default = lambda : get_random_string ( 50 , ' abcdefghijklmnopqrstuvwxyz0123456789_ ' ) )
2024-09-24 08:25:55 +00:00
BIND_ADDR : str = Field ( default = lambda : [ ' 127.0.0.1:8000 ' , ' 0.0.0.0:8000 ' ] [ SHELL_CONFIG . IN_DOCKER ] )
ALLOWED_HOSTS : str = Field ( default = ' * ' )
CSRF_TRUSTED_ORIGINS : str = Field ( default = lambda c : ' http://localhost:8000,http://127.0.0.1:8000,http://0.0.0.0:8000,http:// {} ' . format ( c . BIND_ADDR ) )
SNAPSHOTS_PER_PAGE : int = Field ( default = 40 )
FOOTER_INFO : str = Field ( default = ' Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests. ' )
2024-10-03 10:20:25 +00:00
# CUSTOM_TEMPLATES_DIR: Path = Field(default=None) # this is now a constant
2024-09-24 08:25:55 +00:00
PUBLIC_INDEX : bool = Field ( default = True )
PUBLIC_SNAPSHOTS : bool = Field ( default = True )
PUBLIC_ADD_VIEW : bool = Field ( default = False )
ADMIN_USERNAME : str = Field ( default = None )
ADMIN_PASSWORD : str = Field ( default = None )
REVERSE_PROXY_USER_HEADER : str = Field ( default = ' Remote-User ' )
REVERSE_PROXY_WHITELIST : str = Field ( default = ' ' )
LOGOUT_REDIRECT_URL : str = Field ( default = ' / ' )
PREVIEW_ORIGINALS : bool = Field ( default = True )
SERVER_CONFIG = ServerConfig ( )
class ArchivingConfig ( BaseConfigSet ) :
ONLY_NEW : bool = Field ( default = True )
TIMEOUT : int = Field ( default = 60 )
MEDIA_TIMEOUT : int = Field ( default = 3600 )
MEDIA_MAX_SIZE : str = Field ( default = ' 750m ' )
RESOLUTION : str = Field ( default = ' 1440,2000 ' )
CHECK_SSL_VALIDITY : bool = Field ( default = True )
USER_AGENT : str = Field ( default = ' Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 ArchiveBox/ {VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) ' )
COOKIES_FILE : Path | None = Field ( default = None )
URL_DENYLIST : str = Field ( default = r ' \ .(css|js|otf|ttf|woff|woff2|gstatic \ .com|googleapis \ .com/css)( \ ?.*)?$ ' , alias = ' URL_BLACKLIST ' )
URL_ALLOWLIST : str | None = Field ( default = None , alias = ' URL_WHITELIST ' )
# GIT_DOMAINS: str = Field(default='github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht')
# WGET_USER_AGENT: str = Field(default=lambda c: c['USER_AGENT'] + ' wget/{WGET_VERSION}')
# CURL_USER_AGENT: str = Field(default=lambda c: c['USER_AGENT'] + ' curl/{CURL_VERSION}')
# CHROME_USER_AGENT: str = Field(default=lambda c: c['USER_AGENT'])
# CHROME_USER_DATA_DIR: str | None = Field(default=None)
# CHROME_TIMEOUT: int = Field(default=0)
# CHROME_HEADLESS: bool = Field(default=True)
# CHROME_SANDBOX: bool = Field(default=lambda: not SHELL_CONFIG.IN_DOCKER)
2024-09-25 07:41:24 +00:00
@field_validator ( ' TIMEOUT ' , mode = ' after ' )
def validate_timeout ( cls , v ) :
print ( f ' [red][!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT= { v } seconds)[/red] ' , file = sys . stderr )
print ( ' You must allow *at least* 5 seconds for indexing and archive methods to run succesfully. ' , file = sys . stderr )
print ( ' (Setting it to somewhere between 30 and 3000 seconds is recommended) ' , file = sys . stderr )
print ( file = sys . stderr )
print ( ' If you want to make ArchiveBox run faster, disable specific archive methods instead: ' , file = sys . stderr )
print ( ' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles ' , file = sys . stderr )
print ( file = sys . stderr )
return v
@field_validator ( ' CHECK_SSL_VALIDITY ' , mode = ' after ' )
def validate_check_ssl_validity ( cls , v ) :
""" SIDE EFFECT: disable " you really shouldnt disable ssl " warnings emitted by requests """
if not v :
import requests
import urllib3
requests . packages . urllib3 . disable_warnings ( requests . packages . urllib3 . exceptions . InsecureRequestWarning )
urllib3 . disable_warnings ( urllib3 . exceptions . InsecureRequestWarning )
return v
2024-09-24 08:25:55 +00:00
ARCHIVING_CONFIG = ArchivingConfig ( )
class SearchBackendConfig ( BaseConfigSet ) :
USE_INDEXING_BACKEND : bool = Field ( default = True )
USE_SEARCHING_BACKEND : bool = Field ( default = True )
SEARCH_BACKEND_ENGINE : str = Field ( default = ' ripgrep ' )
SEARCH_PROCESS_HTML : bool = Field ( default = True )
2024-09-24 10:05:43 +00:00
SEARCH_BACKEND_TIMEOUT : int = Field ( default = 10 )
2024-09-24 08:25:55 +00:00
SEARCH_BACKEND_CONFIG = SearchBackendConfig ( )