config and attr access improvements

This commit is contained in:
Nick Sweeting 2024-08-20 18:31:21 -07:00
parent 4ae186dfca
commit 0285aa52a0
No known key found for this signature in database
15 changed files with 203 additions and 187 deletions

View file

@ -115,7 +115,7 @@ def uri_hash(uri: Union[str, bytes], salt: str=DEFAULT_ABID_URI_SALT) -> str:
if isinstance(uri, bytes):
uri_str: str = uri.decode()
else:
uri_str = uri
uri_str = str(uri)
# only hash the domain part of URLs
if '://' in uri_str:

View file

@ -15,6 +15,7 @@ from charidfield import CharIDField # type: ignore[import-untyped]
from django.conf import settings
from django.db import models
from django.utils import timezone
from django.db.utils import OperationalError
from django.contrib.auth import get_user_model
@ -115,7 +116,8 @@ class ABIDModel(models.Model):
raise Exception(f'{self.__class__.__name__}.abid_prefix must be defined to calculate ABIDs (suggested: {suggested_abid})')
if not ts:
ts = datetime.utcfromtimestamp(0)
# default to unix epoch with 00:00:00 UTC
ts = datetime.fromtimestamp(0, timezone.utc) # equivalent to: ts = datetime.utcfromtimestamp(0)
print(f'[!] WARNING: Generating ABID with ts=0000000000 placeholder because {self.__class__.__name__}.abid_ts_src={self.abid_ts_src} is unset!', ts.isoformat())
if not uri:
@ -146,7 +148,13 @@ class ABIDModel(models.Model):
"""
ULIDParts(timestamp='01HX9FPYTR', url='E4A5CCD9', subtype='00', randomness='ZYEBQE')
"""
abid = None
# if object is not yet saved to DB, always generate fresh ABID from values
if self._state.adding:
return self.generate_abid()
# otherwise DB is single source of truth, load ABID from existing db pk
abid: ABID | None = None
try:
abid = abid or ABID.parse(self.pk)
except Exception:
@ -158,12 +166,7 @@ class ABIDModel(models.Model):
pass
try:
abid = abid or ABID.parse(self.uuid)
except Exception:
pass
try:
abid = abid or ABID.parse(self.abid)
abid = abid or ABID.parse(cast(str, self.abid))
except Exception:
pass

View file

@ -1,6 +1,6 @@
__package__ = 'archivebox.api'
from typing import Optional
from typing import Optional, cast
from django.http import HttpRequest
from django.contrib.auth import login
@ -18,12 +18,13 @@ def auth_using_token(token, request: Optional[HttpRequest]=None) -> Optional[Abs
submitted_empty_form = token in ('string', '', None)
if submitted_empty_form:
assert request is not None, 'No request provided for API key authentication'
user = request.user # see if user is authed via django session and use that as the default
else:
try:
token = APIToken.objects.get(token=token)
if token.is_valid():
user = token.user
user = token.created_by
except APIToken.DoesNotExist:
pass
@ -38,6 +39,7 @@ def auth_using_password(username, password, request: Optional[HttpRequest]=None)
submitted_empty_form = (username, password) in (('string', 'string'), ('', ''), (None, None))
if submitted_empty_form:
assert request is not None, 'No request provided for API key authentication'
user = request.user # see if user is authed via django session and use that as the default
else:
user = authenticate(
@ -47,8 +49,9 @@ def auth_using_password(username, password, request: Optional[HttpRequest]=None)
if not user:
print('[❌] Failed to authenticate API user using API Key:', request)
user = None
return user
return cast(AbstractBaseUser | None, user)
### Base Auth Types

View file

@ -12,7 +12,8 @@ from signal_webhooks.models import WebhookBase
from django_stubs_ext.db.models import TypedModelMeta
from abid_utils.models import ABIDModel, ABIDField
from abid_utils.models import ABIDModel, ABIDField, get_or_create_system_user_pk
def generate_secret_token() -> str:
@ -32,15 +33,13 @@ class APIToken(ABIDModel):
abid_rand_src = 'self.id'
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
uuid = models.UUIDField(blank=True, null=True, editable=False, unique=True)
abid = ABIDField(prefix=abid_prefix)
created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
token = models.CharField(max_length=32, default=generate_secret_token, unique=True)
created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk)
created = models.DateTimeField(auto_now_add=True)
expires = models.DateTimeField(null=True, blank=True)
token = models.CharField(max_length=32, default=generate_secret_token, unique=True)
expires = models.DateTimeField(null=True, blank=True)
class Meta(TypedModelMeta):
verbose_name = "API Key"
@ -50,7 +49,7 @@ class APIToken(ABIDModel):
return self.token
def __repr__(self) -> str:
return f'<APIToken user={self.user.username} token=************{self.token[-4:]}>'
return f'<APIToken user={self.created_by.username} token=************{self.token[-4:]}>'
def __json__(self) -> dict:
return {
@ -63,10 +62,6 @@ class APIToken(ABIDModel):
"expires": self.expires_as_iso8601,
}
@property
def ulid(self):
return self.get_abid().ulid
@property
def expires_as_iso8601(self):
"""Returns the expiry date of the token in ISO 8601 format or a date 100 years in the future if none."""
@ -100,10 +95,15 @@ class OutboundWebhook(ABIDModel, WebhookBase):
abid_subtype_src = 'self.ref'
abid_rand_src = 'self.id'
id = models.UUIDField(blank=True, null=True, unique=True, editable=True)
uuid = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True)
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
abid = ABIDField(prefix=abid_prefix)
created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk)
created = models.DateTimeField(auto_now_add=True)
modified = models.DateTimeField(auto_now=True)
# More fields here: WebhookBase...
WebhookBase._meta.get_field('name').help_text = (
'Give your webhook a descriptive name (e.g. Notify ACME Slack channel of any new ArchiveResults).')
WebhookBase._meta.get_field('signal').help_text = (

View file

@ -309,9 +309,9 @@ def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
# snapshot = Snapshot.objects.create(**payload.dict())
# return snapshot
#
# @router.put("/snapshot/{snapshot_uuid}", response=SnapshotSchema)
# def update_snapshot(request, snapshot_uuid: str, payload: SnapshotSchema):
# snapshot = get_object_or_404(Snapshot, uuid=snapshot_uuid)
# @router.put("/snapshot/{snapshot_id}", response=SnapshotSchema)
# def update_snapshot(request, snapshot_id: str, payload: SnapshotSchema):
# snapshot = get_object_or_404(Snapshot, uuid=snapshot_id)
#
# for attr, value in payload.dict().items():
# setattr(snapshot, attr, value)
@ -319,9 +319,9 @@ def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
#
# return snapshot
#
# @router.delete("/snapshot/{snapshot_uuid}")
# def delete_snapshot(request, snapshot_uuid: str):
# snapshot = get_object_or_404(Snapshot, uuid=snapshot_uuid)
# @router.delete("/snapshot/{snapshot_id}")
# def delete_snapshot(request, snapshot_id: str):
# snapshot = get_object_or_404(Snapshot, uuid=snapshot_id)
# snapshot.delete()
# return {"success": True}

View file

@ -44,6 +44,7 @@ from collections import defaultdict
import importlib.metadata
from .config_stubs import (
AttrDict,
SimpleConfigValueDict,
ConfigValue,
ConfigDict,
@ -379,6 +380,29 @@ ALLOWED_IN_OUTPUT_DIR = {
ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE
CONSTANTS = {
"PACKAGE_DIR_NAME": {'default': lambda c: PACKAGE_DIR_NAME},
"TEMPLATES_DIR_NAME": {'default': lambda c: TEMPLATES_DIR_NAME},
"ARCHIVE_DIR_NAME": {'default': lambda c: ARCHIVE_DIR_NAME},
"SOURCES_DIR_NAME": {'default': lambda c: SOURCES_DIR_NAME},
"LOGS_DIR_NAME": {'default': lambda c: LOGS_DIR_NAME},
"CACHE_DIR_NAME": {'default': lambda c: CACHE_DIR_NAME},
"PERSONAS_DIR_NAME": {'default': lambda c: PERSONAS_DIR_NAME},
"CRONTABS_DIR_NAME": {'default': lambda c: CRONTABS_DIR_NAME},
"SQL_INDEX_FILENAME": {'default': lambda c: SQL_INDEX_FILENAME},
"JSON_INDEX_FILENAME": {'default': lambda c: JSON_INDEX_FILENAME},
"HTML_INDEX_FILENAME": {'default': lambda c: HTML_INDEX_FILENAME},
"ROBOTS_TXT_FILENAME": {'default': lambda c: ROBOTS_TXT_FILENAME},
"FAVICON_FILENAME": {'default': lambda c: FAVICON_FILENAME},
"CONFIG_FILENAME": {'default': lambda c: CONFIG_FILENAME},
"DEFAULT_CLI_COLORS": {'default': lambda c: DEFAULT_CLI_COLORS},
"ANSI": {'default': lambda c: ANSI},
"COLOR_DICT": {'default': lambda c: COLOR_DICT},
"STATICFILE_EXTENSIONS": {'default': lambda c: STATICFILE_EXTENSIONS},
"ALLOWED_IN_OUTPUT_DIR": {'default': lambda c: ALLOWED_IN_OUTPUT_DIR},
"ALLOWDENYLIST_REGEX_FLAGS": {'default': lambda c: ALLOWDENYLIST_REGEX_FLAGS},
}
############################## Version Config ##################################
def get_system_user() -> str:
@ -498,9 +522,13 @@ def can_upgrade(config):
############################## Derived Config ##################################
# These are derived/computed values calculated *after* all user-provided config values are ingested
# they appear in `archivebox config` output and are intended to be read-only for the user
DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
**CONSTANTS,
'TERM_WIDTH': {'default': lambda c: lambda: shutil.get_terminal_size((100, 10)).columns},
'USER': {'default': lambda c: get_system_user()},
'ANSI': {'default': lambda c: DEFAULT_CLI_COLORS if c['USE_COLOR'] else {k: '' for k in DEFAULT_CLI_COLORS.keys()}},
@ -678,28 +706,29 @@ def load_config_val(key: str,
raise Exception('Config values can only be str, bool, int, or json')
def load_config_file(out_dir: str=None) -> Optional[Dict[str, str]]:
def load_config_file(out_dir: str | None=None) -> Optional[ConfigDict]:
"""load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
out_dir = out_dir or Path(os.getenv('OUTPUT_DIR', '.')).resolve()
assert out_dir and out_dir.is_dir()
config_path = Path(out_dir) / CONFIG_FILENAME
if config_path.exists():
config_file = ConfigParser()
config_file.optionxform = str
config_file.read(config_path)
# flatten into one namespace
config_file_vars = {
config_file_vars = ConfigDict({
key.upper(): val
for section, options in config_file.items()
for key, val in options.items()
}
})
# print('[i] Loaded config file', os.path.abspath(config_path))
# print(config_file_vars)
return config_file_vars
return None
def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
def write_config_file(config: Dict[str, str], out_dir: str | None=None) -> ConfigDict:
"""load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
from .system import atomic_write
@ -740,7 +769,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
existing_config = dict(config_file[section])
else:
existing_config = {}
config_file[section] = {**existing_config, key: val}
config_file[section] = ConfigDict({**existing_config, key: val})
# always make sure there's a SECRET_KEY defined for Django
existing_secret_key = None
@ -815,7 +844,7 @@ def load_config(defaults: ConfigDefaultDict,
# raise
raise SystemExit(2)
return extended_config
return AttrDict(extended_config)
def parse_version_string(version: str) -> Tuple[int, int, int]:
@ -1198,14 +1227,14 @@ def get_chrome_info(config: ConfigDict) -> ConfigValue:
def load_all_config():
CONFIG: ConfigDict = {}
CONFIG: ConfigDict = ConfigDict()
for section_name, section_config in CONFIG_SCHEMA.items():
CONFIG = load_config(section_config, CONFIG)
return load_config(DYNAMIC_CONFIG_SCHEMA, CONFIG)
# add all final config values in CONFIG to globals in this file
CONFIG = load_all_config()
CONFIG: ConfigDict = load_all_config()
globals().update(CONFIG)
# this lets us do: from .config import DEBUG, MEDIA_TIMEOUT, ...

View file

@ -9,11 +9,15 @@ SimpleConfigValueDict = Dict[str, SimpleConfigValue]
SimpleConfigValueGetter = Callable[[], SimpleConfigValue]
ConfigValue = Union[SimpleConfigValue, SimpleConfigValueDict, SimpleConfigValueGetter]
class AttrDict(dict):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.__dict__ = self
class BaseConfig(TypedDict):
pass
class ConfigDict(BaseConfig, total=False):
class ConfigDict(BaseConfig, AttrDict, total=False):
"""
# Regenerate by pasting this quine into `archivebox shell` 🥚
from archivebox.config import ConfigDict, CONFIG_DEFAULTS
@ -28,6 +32,7 @@ class ConfigDict(BaseConfig, total=False):
print(f' {key}: {Type.__name__}')
print()
"""
IS_TTY: bool
USE_COLOR: bool
SHOW_PROGRESS: bool

View file

@ -7,6 +7,7 @@ from io import StringIO
from pathlib import Path
from contextlib import redirect_stdout
from datetime import datetime, timezone
from typing import Dict, Any
from django.contrib import admin
from django.db.models import Count, Q
@ -16,10 +17,12 @@ from django.utils.safestring import mark_safe
from django.shortcuts import render, redirect
from django.contrib.auth import get_user_model
from django.core.exceptions import ValidationError
from django.conf import settings
from django import forms
from signal_webhooks.admin import WebhookAdmin, get_webhook_model
from signal_webhooks.admin import WebhookAdmin
from signal_webhooks.utils import get_webhook_model
# from plugantic.admin import CustomPlugin
from ..util import htmldecode, urldecode, ansi_to_html
@ -34,16 +37,11 @@ from index.html import snapshot_icons
from logging_util import printable_filesize
from main import add, remove
from extractors import archive_links
from config import (
OUTPUT_DIR,
SNAPSHOTS_PER_PAGE,
VERSION,
VERSIONS_AVAILABLE,
CAN_UPGRADE
)
GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': VERSIONS_AVAILABLE, 'CAN_UPGRADE': CAN_UPGRADE}
CONFIG = settings.CONFIG
GLOBAL_CONTEXT = {'VERSION': CONFIG.VERSION, 'VERSIONS_AVAILABLE': CONFIG.VERSIONS_AVAILABLE, 'CAN_UPGRADE': CONFIG.CAN_UPGRADE}
# Admin URLs
# /admin/
@ -74,7 +72,7 @@ class ArchiveBoxAdmin(admin.AdminSite):
return redirect(f'/admin/login/?next={request.path}')
request.current_app = self.name
context = {
context: Dict[str, Any] = {
**self.each_context(request),
'title': 'Add URLs',
}
@ -92,7 +90,7 @@ class ArchiveBoxAdmin(admin.AdminSite):
"urls": url,
"depth": depth,
"update_all": False,
"out_dir": OUTPUT_DIR,
"out_dir": CONFIG.OUTPUT_DIR,
}
add_stdout = StringIO()
with redirect_stdout(add_stdout):
@ -101,7 +99,7 @@ class ArchiveBoxAdmin(admin.AdminSite):
context.update({
"stdout": ansi_to_html(add_stdout.getvalue().strip()),
"form": AddLinkForm()
"form": AddLinkForm(),
})
else:
context["form"] = form
@ -118,12 +116,14 @@ archivebox_admin.disable_action('delete_selected')
# archivebox_admin.register(CustomPlugin)
# patch admin with methods to add data views (implemented by admin_data_views package)
# https://github.com/MrThearMan/django-admin-data-views
# https://mrthearman.github.io/django-admin-data-views/setup/
############### Additional sections are defined in settings.ADMIN_DATA_VIEWS #########
from admin_data_views.admin import get_app_list, admin_data_index_view, get_admin_data_urls, get_urls
archivebox_admin.get_app_list = get_app_list.__get__(archivebox_admin, ArchiveBoxAdmin)
archivebox_admin.admin_data_index_view = admin_data_index_view.__get__(archivebox_admin, ArchiveBoxAdmin)
archivebox_admin.get_admin_data_urls = get_admin_data_urls.__get__(archivebox_admin, ArchiveBoxAdmin)
archivebox_admin.admin_data_index_view = admin_data_index_view.__get__(archivebox_admin, ArchiveBoxAdmin) # type: ignore
archivebox_admin.get_admin_data_urls = get_admin_data_urls.__get__(archivebox_admin, ArchiveBoxAdmin) # type: ignore
archivebox_admin.get_urls = get_urls(archivebox_admin.get_urls).__get__(archivebox_admin, ArchiveBoxAdmin)
@ -146,7 +146,7 @@ class ArchiveResultInline(admin.TabularInline):
class TagInline(admin.TabularInline):
model = Tag.snapshot_set.through
model = Tag.snapshot_set.through # type: ignore
# fk_name = 'snapshot'
fields = ('id', 'tag')
extra = 1
@ -241,7 +241,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
autocomplete_fields = ['tags']
inlines = [TagInline, ArchiveResultInline]
list_per_page = SNAPSHOTS_PER_PAGE
list_per_page = CONFIG.SNAPSHOTS_PER_PAGE
action_form = SnapshotActionForm
@ -433,7 +433,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
# Monkey patch here plus core_tags.py
self.change_list_template = 'private_index_grid.html'
self.list_per_page = SNAPSHOTS_PER_PAGE
self.list_per_page = CONFIG.SNAPSHOTS_PER_PAGE
self.list_max_show_all = self.list_per_page
# Call monkey patched view
@ -458,7 +458,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
archive_links([
snapshot.as_link()
for snapshot in queryset
], out_dir=OUTPUT_DIR)
], out_dir=CONFIG.OUTPUT_DIR)
@admin.action(
description="⬇️ Title"
@ -467,7 +467,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
archive_links([
snapshot.as_link()
for snapshot in queryset
], overwrite=True, methods=('title','favicon'), out_dir=OUTPUT_DIR)
], overwrite=True, methods=('title','favicon'), out_dir=CONFIG.OUTPUT_DIR)
@admin.action(
description="Re-Snapshot"
@ -485,13 +485,13 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
archive_links([
snapshot.as_link()
for snapshot in queryset
], overwrite=True, out_dir=OUTPUT_DIR)
], overwrite=True, out_dir=CONFIG.OUTPUT_DIR)
@admin.action(
description="Delete"
)
def delete_snapshots(self, request, queryset):
remove(snapshots=queryset, yes=True, delete=True, out_dir=OUTPUT_DIR)
remove(snapshots=queryset, yes=True, delete=True, out_dir=CONFIG.OUTPUT_DIR)
@admin.action(
@ -578,7 +578,7 @@ class ArchiveResultAdmin(admin.ModelAdmin):
list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
ordering = ['-start_ts']
list_per_page = SNAPSHOTS_PER_PAGE
list_per_page = CONFIG.SNAPSHOTS_PER_PAGE
@admin.display(
description='Snapshot Info'
@ -620,7 +620,7 @@ class ArchiveResultAdmin(admin.ModelAdmin):
)
def output_summary(self, result):
snapshot_dir = Path(OUTPUT_DIR) / str(result.pwd).split('data/', 1)[-1]
snapshot_dir = Path(CONFIG.OUTPUT_DIR) / str(result.pwd).split('data/', 1)[-1]
output_str = format_html(
'<pre style="display: inline-block">{}</pre><br/>',
result.output,

View file

@ -1,7 +1,7 @@
__package__ = 'archivebox.core'
from typing import Optional, List, Dict
from typing import Optional, List, Dict, Iterable
from django_stubs_ext.db.models import TypedModelMeta
import json
@ -17,10 +17,10 @@ from django.utils.text import slugify
from django.core.cache import cache
from django.urls import reverse, reverse_lazy
from django.db.models import Case, When, Value, IntegerField
from django.conf import settings
from abid_utils.models import ABIDModel, ABIDField
from ..config import ARCHIVE_DIR, ARCHIVE_DIR_NAME
from ..system import get_dir_size
from ..util import parse_date, base_url
from ..index.schema import Link
@ -72,6 +72,7 @@ class Tag(ABIDModel):
slug = models.SlugField(unique=True, blank=False, max_length=100, editable=False)
# slug is autoset on save from name, never set it manually
snapshot_set: models.Manager['Snapshot']
class Meta(TypedModelMeta):
verbose_name = "Tag"
@ -154,6 +155,8 @@ class Snapshot(ABIDModel):
keys = ('url', 'timestamp', 'title', 'tags', 'updated')
archiveresult_set: models.Manager['ArchiveResult']
@property
def uuid(self):
return self.id
@ -246,11 +249,11 @@ class Snapshot(ABIDModel):
@cached_property
def link_dir(self):
return str(ARCHIVE_DIR / self.timestamp)
return str(settings.CONFIG.ARCHIVE_DIR / self.timestamp)
@cached_property
def archive_path(self):
return '{}/{}'.format(ARCHIVE_DIR_NAME, self.timestamp)
return '{}/{}'.format(settings.CONFIG.ARCHIVE_DIR_NAME, self.timestamp)
@cached_property
def archive_size(self):
@ -284,7 +287,7 @@ class Snapshot(ABIDModel):
@cached_property
def status_code(self) -> Optional[str]:
return self.headers and self.headers.get('Status-Code')
return self.headers.get('Status-Code') if self.headers else None
@cached_property
def history(self) -> dict:
@ -322,7 +325,7 @@ class Snapshot(ABIDModel):
return None
def save_tags(self, tags: List[str]=()) -> None:
def save_tags(self, tags: Iterable[str]=()) -> None:
tags_id = []
for tag in tags:
if tag.strip():
@ -334,17 +337,17 @@ class Snapshot(ABIDModel):
# def get_storage_dir(self, create=True, symlink=True) -> Path:
# date_str = self.added.strftime('%Y%m%d')
# domain_str = domain(self.url)
# abs_storage_dir = Path(ARCHIVE_DIR) / 'snapshots' / date_str / domain_str / str(self.ulid)
# abs_storage_dir = Path(settings.CONFIG.ARCHIVE_DIR) / 'snapshots' / date_str / domain_str / str(self.ulid)
# if create and not abs_storage_dir.is_dir():
# abs_storage_dir.mkdir(parents=True, exist_ok=True)
# if symlink:
# LINK_PATHS = [
# Path(ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
# # Path(ARCHIVE_DIR).parent / 'index' / 'snapshots_by_id' / str(self.ulid),
# Path(ARCHIVE_DIR).parent / 'index' / 'snapshots_by_date' / date_str / domain_str / str(self.ulid),
# Path(ARCHIVE_DIR).parent / 'index' / 'snapshots_by_domain' / domain_str / date_str / str(self.ulid),
# Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
# # Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_id' / str(self.ulid),
# Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_date' / date_str / domain_str / str(self.ulid),
# Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_domain' / domain_str / date_str / str(self.ulid),
# ]
# for link_path in LINK_PATHS:
# link_path.parent.mkdir(parents=True, exist_ok=True)
@ -439,8 +442,8 @@ class ArchiveResult(ABIDModel):
should be used for user-facing iframe embeds of this result
"""
if hasattr(self.extractor_module, 'get_embed_path'):
return self.extractor_module.get_embed_path(self)
if get_embed_path_func := getattr(self.extractor_module, 'get_embed_path', None):
return get_embed_path_func(self)
return self.extractor_module.get_output_path()
@ -455,18 +458,18 @@ class ArchiveResult(ABIDModel):
# def get_storage_dir(self, create=True, symlink=True):
# date_str = self.snapshot.added.strftime('%Y%m%d')
# domain_str = domain(self.snapshot.url)
# abs_storage_dir = Path(ARCHIVE_DIR) / 'results' / date_str / domain_str / self.extractor / str(self.ulid)
# abs_storage_dir = Path(settings.CONFIG.ARCHIVE_DIR) / 'results' / date_str / domain_str / self.extractor / str(self.ulid)
# if create and not abs_storage_dir.is_dir():
# abs_storage_dir.mkdir(parents=True, exist_ok=True)
# if symlink:
# LINK_PATHS = [
# Path(ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
# # Path(ARCHIVE_DIR).parent / 'index' / 'results_by_id' / str(self.ulid),
# # Path(ARCHIVE_DIR).parent / 'index' / 'results_by_date' / date_str / domain_str / self.extractor / str(self.ulid),
# Path(ARCHIVE_DIR).parent / 'index' / 'results_by_domain' / domain_str / date_str / self.extractor / str(self.ulid),
# Path(ARCHIVE_DIR).parent / 'index' / 'results_by_type' / self.extractor / date_str / domain_str / str(self.ulid),
# Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
# # Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'results_by_id' / str(self.ulid),
# # Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'results_by_date' / date_str / domain_str / self.extractor / str(self.ulid),
# Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'results_by_domain' / domain_str / date_str / self.extractor / str(self.ulid),
# Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'results_by_type' / self.extractor / date_str / domain_str / str(self.ulid),
# ]
# for link_path in LINK_PATHS:
# link_path.parent.mkdir(parents=True, exist_ok=True)

View file

@ -9,32 +9,9 @@ import tempfile
from pathlib import Path
from django.utils.crypto import get_random_string
from ..config import (
CONFIG,
DEBUG,
SECRET_KEY,
ALLOWED_HOSTS,
PACKAGE_DIR,
TEMPLATES_DIR_NAME,
CUSTOM_TEMPLATES_DIR,
SQL_INDEX_FILENAME,
OUTPUT_DIR,
ARCHIVE_DIR,
LOGS_DIR,
CACHE_DIR,
TIMEZONE,
LDAP,
LDAP_SERVER_URI,
LDAP_BIND_DN,
LDAP_BIND_PASSWORD,
LDAP_USER_BASE,
LDAP_USER_FILTER,
LDAP_USERNAME_ATTR,
LDAP_FIRSTNAME_ATTR,
LDAP_LASTNAME_ATTR,
LDAP_EMAIL_ATTR,
)
from ..config import CONFIG
from ..config_stubs import AttrDict
assert isinstance(CONFIG, AttrDict)
IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3]
IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ
@ -53,12 +30,12 @@ LOGOUT_REDIRECT_URL = os.environ.get('LOGOUT_REDIRECT_URL', '/')
PASSWORD_RESET_URL = '/accounts/password_reset/'
APPEND_SLASH = True
DEBUG = DEBUG or ('--debug' in sys.argv)
DEBUG = CONFIG.DEBUG or ('--debug' in sys.argv)
# add plugins folders to system path, and load plugins in installed_apps
BUILTIN_PLUGINS_DIR = PACKAGE_DIR / 'plugins'
USER_PLUGINS_DIR = OUTPUT_DIR / 'plugins'
BUILTIN_PLUGINS_DIR = CONFIG.PACKAGE_DIR / 'plugins'
USER_PLUGINS_DIR = CONFIG.OUTPUT_DIR / 'plugins'
sys.path.insert(0, str(BUILTIN_PLUGINS_DIR))
sys.path.insert(0, str(USER_PLUGINS_DIR))
@ -127,7 +104,7 @@ AUTHENTICATION_BACKENDS = [
'django.contrib.auth.backends.ModelBackend',
]
if LDAP:
if CONFIG.LDAP:
try:
import ldap
from django_auth_ldap.config import LDAPSearch
@ -138,23 +115,23 @@ if LDAP:
global AUTH_LDAP_USER_SEARCH
global AUTH_LDAP_USER_ATTR_MAP
AUTH_LDAP_SERVER_URI = LDAP_SERVER_URI
AUTH_LDAP_BIND_DN = LDAP_BIND_DN
AUTH_LDAP_BIND_PASSWORD = LDAP_BIND_PASSWORD
AUTH_LDAP_SERVER_URI = CONFIG.LDAP_SERVER_URI
AUTH_LDAP_BIND_DN = CONFIG.LDAP_BIND_DN
AUTH_LDAP_BIND_PASSWORD = CONFIG.LDAP_BIND_PASSWORD
assert AUTH_LDAP_SERVER_URI and LDAP_USERNAME_ATTR and LDAP_USER_FILTER, 'LDAP_* config options must all be set if LDAP=True'
assert AUTH_LDAP_SERVER_URI and CONFIG.LDAP_USERNAME_ATTR and CONFIG.LDAP_USER_FILTER, 'LDAP_* config options must all be set if LDAP=True'
AUTH_LDAP_USER_SEARCH = LDAPSearch(
LDAP_USER_BASE,
CONFIG.LDAP_USER_BASE,
ldap.SCOPE_SUBTREE,
'(&(' + LDAP_USERNAME_ATTR + '=%(user)s)' + LDAP_USER_FILTER + ')',
'(&(' + CONFIG.LDAP_USERNAME_ATTR + '=%(user)s)' + CONFIG.LDAP_USER_FILTER + ')',
)
AUTH_LDAP_USER_ATTR_MAP = {
'username': LDAP_USERNAME_ATTR,
'first_name': LDAP_FIRSTNAME_ATTR,
'last_name': LDAP_LASTNAME_ATTR,
'email': LDAP_EMAIL_ATTR,
'username': CONFIG.LDAP_USERNAME_ATTR,
'first_name': CONFIG.LDAP_FIRSTNAME_ATTR,
'last_name': CONFIG.LDAP_LASTNAME_ATTR,
'email': CONFIG.LDAP_EMAIL_ATTR,
}
AUTHENTICATION_BACKENDS = [
@ -206,6 +183,15 @@ if DEBUG_TOOLBAR:
]
MIDDLEWARE = [*MIDDLEWARE, 'debug_toolbar.middleware.DebugToolbarMiddleware']
if DEBUG:
from django_autotyping.typing import AutotypingSettingsDict
INSTALLED_APPS += ['django_autotyping']
AUTOTYPING: AutotypingSettingsDict = {
"STUBS_GENERATION": {
"LOCAL_STUBS_DIR": Path(CONFIG.PACKAGE_DIR) / "typings",
}
}
# https://github.com/bensi94/Django-Requests-Tracker (improved version of django-debug-toolbar)
# Must delete archivebox/templates/admin to use because it relies on some things we override
@ -224,15 +210,15 @@ if DEBUG_REQUESTS_TRACKER:
STATIC_URL = '/static/'
STATICFILES_DIRS = [
*([str(CUSTOM_TEMPLATES_DIR / 'static')] if CUSTOM_TEMPLATES_DIR else []),
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'static'),
*([str(CONFIG.CUSTOM_TEMPLATES_DIR / 'static')] if CONFIG.CUSTOM_TEMPLATES_DIR else []),
str(Path(CONFIG.PACKAGE_DIR) / CONFIG.TEMPLATES_DIR_NAME / 'static'),
]
TEMPLATE_DIRS = [
*([str(CUSTOM_TEMPLATES_DIR)] if CUSTOM_TEMPLATES_DIR else []),
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'core'),
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'admin'),
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME),
*([str(CONFIG.CUSTOM_TEMPLATES_DIR)] if CONFIG.CUSTOM_TEMPLATES_DIR else []),
str(Path(CONFIG.PACKAGE_DIR) / CONFIG.TEMPLATES_DIR_NAME / 'core'),
str(Path(CONFIG.PACKAGE_DIR) / CONFIG.TEMPLATES_DIR_NAME / 'admin'),
str(Path(CONFIG.PACKAGE_DIR) / CONFIG.TEMPLATES_DIR_NAME),
]
TEMPLATES = [
@ -258,10 +244,10 @@ TEMPLATES = [
CACHE_DB_FILENAME = 'cache.sqlite3'
CACHE_DB_PATH = CACHE_DIR / CACHE_DB_FILENAME
CACHE_DB_PATH = CONFIG.CACHE_DIR / CACHE_DB_FILENAME
CACHE_DB_TABLE = 'django_cache'
DATABASE_FILE = Path(OUTPUT_DIR) / SQL_INDEX_FILENAME
DATABASE_FILE = Path(CONFIG.OUTPUT_DIR) / CONFIG.SQL_INDEX_FILENAME
DATABASE_NAME = os.environ.get("ARCHIVEBOX_DATABASE_NAME", str(DATABASE_FILE))
DATABASES = {
@ -272,7 +258,7 @@ DATABASES = {
'timeout': 60,
'check_same_thread': False,
},
'TIME_ZONE': TIMEZONE,
'TIME_ZONE': CONFIG.TIMEZONE,
# DB setup is sometimes modified at runtime by setup_django() in config.py
},
# 'cache': {
@ -282,7 +268,7 @@ DATABASES = {
# 'timeout': 60,
# 'check_same_thread': False,
# },
# 'TIME_ZONE': TIMEZONE,
# 'TIME_ZONE': CONFIG.TIMEZONE,
# },
}
MIGRATION_MODULES = {'signal_webhooks': None}
@ -312,7 +298,7 @@ STORAGES = {
"BACKEND": "django.core.files.storage.FileSystemStorage",
"OPTIONS": {
"base_url": "/archive/",
"location": ARCHIVE_DIR,
"location": CONFIG.ARCHIVE_DIR,
},
},
# "personas": {
@ -328,9 +314,9 @@ STORAGES = {
### Security Settings
################################################################################
SECRET_KEY = SECRET_KEY or get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789_')
SECRET_KEY = CONFIG.SECRET_KEY or get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789_')
ALLOWED_HOSTS = ALLOWED_HOSTS.split(',')
ALLOWED_HOSTS = CONFIG.ALLOWED_HOSTS.split(',')
SECURE_BROWSER_XSS_FILTER = True
SECURE_CONTENT_TYPE_NOSNIFF = True
@ -361,7 +347,7 @@ SHELL_PLUS_PRINT_SQL = False
IPYTHON_ARGUMENTS = ['--no-confirm-exit', '--no-banner']
IPYTHON_KERNEL_DISPLAY_NAME = 'ArchiveBox Django Shell'
if IS_SHELL:
os.environ['PYTHONSTARTUP'] = str(Path(PACKAGE_DIR) / 'core' / 'welcome_message.py')
os.environ['PYTHONSTARTUP'] = str(Path(CONFIG.PACKAGE_DIR) / 'core' / 'welcome_message.py')
################################################################################
@ -373,10 +359,10 @@ USE_I18N = True
USE_TZ = True
DATETIME_FORMAT = 'Y-m-d g:iA'
SHORT_DATETIME_FORMAT = 'Y-m-d h:iA'
TIME_ZONE = TIMEZONE # django convention is TIME_ZONE, archivebox config uses TIMEZONE, they are equivalent
TIME_ZONE = CONFIG.TIMEZONE # django convention is TIME_ZONE, archivebox config uses TIMEZONE, they are equivalent
from django.conf.locale.en import formats as en_formats
from django.conf.locale.en import formats as en_formats # type: ignore
en_formats.DATETIME_FORMAT = DATETIME_FORMAT
en_formats.SHORT_DATETIME_FORMAT = SHORT_DATETIME_FORMAT
@ -410,8 +396,8 @@ class NoisyRequestsFilter(logging.Filter):
return 1
if LOGS_DIR.exists():
ERROR_LOG = (LOGS_DIR / 'errors.log')
if CONFIG.LOGS_DIR.exists():
ERROR_LOG = (CONFIG.LOGS_DIR / 'errors.log')
else:
# historically too many edge cases here around creating log dir w/ correct permissions early on
# if there's an issue on startup, we trash the log and let user figure it out via stdout/stderr

View file

@ -46,7 +46,7 @@ urlpatterns = [
# path('jet_api/', include('jet_django.urls')), Enable to use https://www.jetadmin.io/integrations/django
path('index.html', RedirectView.as_view(url='/')),
path('index.json', static.serve, {'document_root': settings.OUTPUT_DIR, 'path': 'index.json'}),
path('index.json', static.serve, {'document_root': settings.CONFIG.OUTPUT_DIR, 'path': 'index.json'}),
path('', HomepageView.as_view(), name='Home'),
]
urlpatterns += staticfiles_urlpatterns()

View file

@ -11,27 +11,18 @@ from ..util import (
domain,
dedupe,
)
from ..config import (
TIMEOUT,
SAVE_FAVICON,
FAVICON_PROVIDER,
CURL_BINARY,
CURL_ARGS,
CURL_EXTRA_ARGS,
CURL_VERSION,
CHECK_SSL_VALIDITY,
CURL_USER_AGENT,
)
from ..config import CONFIG
from ..logging_util import TimedProgress
@enforce_types
def should_save_favicon(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
out_dir = out_dir or Path(link.link_dir)
def should_save_favicon(link: Link, out_dir: str | Path | None=None, overwrite: bool=False) -> bool:
assert link.link_dir
out_dir = Path(out_dir or link.link_dir)
if not overwrite and (out_dir / 'favicon.ico').exists():
return False
return SAVE_FAVICON
return CONFIG.SAVE_FAVICON
@enforce_types
def get_output_path():
@ -39,24 +30,26 @@ def get_output_path():
@enforce_types
def save_favicon(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult:
def save_favicon(link: Link, out_dir: str | Path | None=None, timeout: int=CONFIG.TIMEOUT) -> ArchiveResult:
"""download site favicon from google's favicon api"""
out_dir = out_dir or link.link_dir
out_dir = Path(out_dir or link.link_dir)
assert out_dir.exists()
output: ArchiveOutput = 'favicon.ico'
# later options take precedence
options = [
*CURL_ARGS,
*CURL_EXTRA_ARGS,
*CONFIG.CURL_ARGS,
*CONFIG.CURL_EXTRA_ARGS,
'--max-time', str(timeout),
'--output', str(output),
*(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
*([] if CHECK_SSL_VALIDITY else ['--insecure']),
*(['--user-agent', '{}'.format(CONFIG.CURL_USER_AGENT)] if CONFIG.CURL_USER_AGENT else []),
*([] if CONFIG.CHECK_SSL_VALIDITY else ['--insecure']),
]
cmd = [
CURL_BINARY,
CONFIG.CURL_BINARY,
*dedupe(options),
FAVICON_PROVIDER.format(domain(link.url)),
CONFIG.FAVICON_PROVIDER.format(domain(link.url)),
]
status = 'failed'
timer = TimedProgress(timeout, prefix=' ')
@ -72,7 +65,7 @@ def save_favicon(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT)
return ArchiveResult(
cmd=cmd,
pwd=str(out_dir),
cmd_version=CURL_VERSION,
cmd_version=CONFIG.CURL_VERSION,
output=output,
status=status,
**timer.stats,

View file

@ -14,15 +14,7 @@ from ..util import (
without_query,
without_fragment,
)
from ..config import (
TIMEOUT,
SAVE_GIT,
GIT_BINARY,
GIT_ARGS,
GIT_VERSION,
GIT_DOMAINS,
CHECK_SSL_VALIDITY
)
from ..config import CONFIG
from ..logging_util import TimedProgress
@ -50,17 +42,17 @@ def should_save_git(link: Link, out_dir: Optional[Path]=None, overwrite: Optiona
return False
is_clonable_url = (
(domain(link.url) in GIT_DOMAINS)
(domain(link.url) in CONFIG.GIT_DOMAINS)
or (extension(link.url) == 'git')
)
if not is_clonable_url:
return False
return SAVE_GIT
return CONFIG.SAVE_GIT
@enforce_types
def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult:
def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=CONFIG.TIMEOUT) -> ArchiveResult:
"""download full site using git"""
out_dir = out_dir or Path(link.link_dir)
@ -68,10 +60,10 @@ def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
output_path = out_dir / output
output_path.mkdir(exist_ok=True)
cmd = [
GIT_BINARY,
CONFIG.GIT_BINARY,
'clone',
*GIT_ARGS,
*([] if CHECK_SSL_VALIDITY else ['-c', 'http.sslVerify=false']),
*CONFIG.GIT_ARGS,
*([] if CONFIG.CHECK_SSL_VALIDITY else ['-c', 'http.sslVerify=false']),
without_query(without_fragment(link.url)),
]
status = 'succeeded'
@ -96,7 +88,7 @@ def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
return ArchiveResult(
cmd=cmd,
pwd=str(out_dir),
cmd_version=GIT_VERSION,
cmd_version=CONFIG.GIT_VERSION,
output=output,
status=status,
**timer.stats,

View file

@ -192,7 +192,7 @@ class Link:
if extended:
info.update({
'snapshot_id': self.snapshot_id,
'snapshot_uuid': self.snapshot_uuid,
'snapshot_old_id': self.snapshot_old_id,
'snapshot_abid': self.snapshot_abid,
'link_dir': self.link_dir,
@ -266,15 +266,15 @@ class Link:
@cached_property
def snapshot(self):
from core.models import Snapshot
return Snapshot.objects.only('id').get(url=self.url)
return Snapshot.objects.only('id', 'old_id', 'abid').get(url=self.url)
@cached_property
def snapshot_id(self):
return str(self.snapshot.pk)
@cached_property
def snapshot_uuid(self):
return str(self.snapshot.id)
def snapshot_old_id(self):
return str(self.snapshot.old_id)
@cached_property
def snapshot_abid(self):

View file

@ -7,7 +7,9 @@ if __name__ == '__main__':
# versions of ./manage.py commands whenever possible. When that's not possible
# (e.g. makemigrations), you can comment out this check temporarily
if not ('makemigrations' in sys.argv or 'migrate' in sys.argv or 'startapp' in sys.argv or 'squashmigrations' in sys.argv):
allowed_commands = ['makemigrations', 'migrate', 'startapp','squashmigrations', 'generate_stubs']
if not any(cmd in sys.argv for cmd in allowed_commands):
print("[X] Don't run ./manage.py directly (unless you are a developer running makemigrations):")
print()
print(' Hint: Use these archivebox CLI commands instead of the ./manage.py equivalents:')