use constants in more places

This commit is contained in:
Nick Sweeting 2024-09-26 02:41:09 -07:00
parent eb360f188a
commit ed45f58758
No known key found for this signature in database
5 changed files with 53 additions and 64 deletions

View file

@ -2,7 +2,6 @@ __package__ = 'archivebox.core'
from typing import Callable
import threading
from pathlib import Path
from django.shortcuts import render, redirect
@ -12,6 +11,7 @@ from django.views import View
from django.views.generic.list import ListView
from django.views.generic import FormView
from django.db.models import Q
from django.conf import settings
from django.contrib import messages
from django.contrib.auth.mixins import UserPassesTestMixin
from django.views.decorators.csrf import csrf_exempt
@ -20,6 +20,8 @@ from django.utils.decorators import method_decorator
from admin_data_views.typing import TableContext, ItemContext
from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
import archivebox
from archivebox.constants import CONSTANTS
from core.models import Snapshot
from core.forms import AddLinkForm
@ -27,28 +29,17 @@ from core.admin import result_url
from queues.tasks import bg_add
from ..plugins_sys.config.apps import SHELL_CONFIG, SERVER_CONFIG
from ..plugins_extractor.archivedotorg.apps import ARCHIVEDOTORG_CONFIG
from ..config import (
OUTPUT_DIR,
PUBLIC_INDEX,
PUBLIC_SNAPSHOTS,
PUBLIC_ADD_VIEW,
VERSION,
COMMIT_HASH,
FOOTER_INFO,
SNAPSHOTS_PER_PAGE,
CONFIG,
CONFIG_SCHEMA,
DYNAMIC_CONFIG_SCHEMA,
USER_CONFIG,
SAVE_ARCHIVE_DOT_ORG,
PREVIEW_ORIGINALS,
CONSTANTS,
)
from ..logging_util import printable_filesize
from ..main import add
from ..util import base_url, ansi_to_html, htmlencode, urldecode, urlencode, ts_to_date_str
from ..util import base_url, htmlencode, ts_to_date_str
from ..search import query_search_index
from ..extractors.wget import wget_output_path
from .serve_static import serve_static_with_byterange_support
@ -57,7 +48,7 @@ class HomepageView(View):
if request.user.is_authenticated:
return redirect('/admin/core/snapshot/')
if PUBLIC_INDEX:
if SERVER_CONFIG.PUBLIC_INDEX:
return redirect('/public')
return redirect(f'/admin/login/?next={request.path}')
@ -166,8 +157,8 @@ class SnapshotView(View):
'status_color': 'success' if link.is_archived else 'danger',
'oldest_archive_date': ts_to_date_str(link.oldest_archive_date),
'warc_path': warc_path,
'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG,
'PREVIEW_ORIGINALS': PREVIEW_ORIGINALS,
'SAVE_ARCHIVE_DOT_ORG': ARCHIVEDOTORG_CONFIG.SAVE_ARCHIVE_DOT_ORG,
'PREVIEW_ORIGINALS': SERVER_CONFIG.PREVIEW_ORIGINALS,
'archiveresults': sorted(archiveresults.values(), key=lambda r: all_types.index(r['name']) if r['name'] in all_types else -r['size']),
'best_result': best_result,
# 'tags_str': 'somealskejrewlkrjwer,werlmwrwlekrjewlkrjwer324m532l,4m32,23m324234',
@ -176,7 +167,7 @@ class SnapshotView(View):
def get(self, request, path):
if not request.user.is_authenticated and not PUBLIC_SNAPSHOTS:
if not request.user.is_authenticated and not SERVER_CONFIG.PUBLIC_SNAPSHOTS:
return redirect(f'/admin/login/?next={request.path}')
snapshot = None
@ -381,15 +372,15 @@ class SnapshotView(View):
class PublicIndexView(ListView):
template_name = 'public_index.html'
model = Snapshot
paginate_by = SNAPSHOTS_PER_PAGE
paginate_by = SERVER_CONFIG.SNAPSHOTS_PER_PAGE
ordering = ['-bookmarked_at', '-created_at']
def get_context_data(self, **kwargs):
return {
**super().get_context_data(**kwargs),
'VERSION': VERSION,
'COMMIT_HASH': COMMIT_HASH,
'FOOTER_INFO': FOOTER_INFO,
'VERSION': archivebox.VERSION,
'COMMIT_HASH': SHELL_CONFIG.COMMIT_HASH,
'FOOTER_INFO': SERVER_CONFIG.FOOTER_INFO,
}
def get_queryset(self, **kwargs):
@ -428,7 +419,7 @@ class PublicIndexView(ListView):
return qs.distinct()
def get(self, *args, **kwargs):
if PUBLIC_INDEX or self.request.user.is_authenticated:
if SERVER_CONFIG.PUBLIC_INDEX or self.request.user.is_authenticated:
response = super().get(*args, **kwargs)
return response
else:
@ -449,7 +440,7 @@ class AddView(UserPassesTestMixin, FormView):
return super().get_initial()
def test_func(self):
return PUBLIC_ADD_VIEW or self.request.user.is_authenticated
return SERVER_CONFIG.PUBLIC_ADD_VIEW or self.request.user.is_authenticated
def get_context_data(self, **kwargs):
return {
@ -457,8 +448,8 @@ class AddView(UserPassesTestMixin, FormView):
'title': "Add URLs",
# We can't just call request.build_absolute_uri in the template, because it would include query parameters
'absolute_add_path': self.request.build_absolute_uri(self.request.path),
'VERSION': VERSION,
'FOOTER_INFO': FOOTER_INFO,
'VERSION': archivebox.VERSION,
'FOOTER_INFO': SERVER_CONFIG.FOOTER_INFO,
'stdout': '',
}
@ -475,7 +466,7 @@ class AddView(UserPassesTestMixin, FormView):
"depth": depth,
"parser": parser,
"update_all": False,
"out_dir": OUTPUT_DIR,
"out_dir": archivebox.DATA_DIR,
"created_by_id": self.request.user.pk,
}
if extractors:

View file

@ -9,8 +9,6 @@ These are the old types we used to use before ArchiveBox v0.4 (before we switche
__package__ = 'archivebox.index'
from pathlib import Path
from datetime import datetime, timezone, timedelta
from typing import List, Dict, Any, Optional, Union
@ -19,9 +17,13 @@ from dataclasses import dataclass, asdict, field, fields
from django.utils.functional import cached_property
from archivebox.constants import ARCHIVE_DIR, ARCHIVE_DIR_NAME
from plugins_extractor.favicon.apps import FAVICON_CONFIG
from ..system import get_dir_size
from ..util import ts_to_date_str, parse_date
from ..config import OUTPUT_DIR, ARCHIVE_DIR_NAME, FAVICON_PROVIDER
class ArchiveError(Exception):
def __init__(self, message, hints=None):
@ -88,7 +90,7 @@ class ArchiveResult:
info['start_ts'] = parse_date(info['start_ts'])
info['end_ts'] = parse_date(info['end_ts'])
if "pwd" not in keys:
info["pwd"] = str(Path(OUTPUT_DIR) / ARCHIVE_DIR_NAME / json_info["timestamp"])
info["pwd"] = str(ARCHIVE_DIR / json_info["timestamp"])
if "cmd_version" not in keys:
info["cmd_version"] = "Undefined"
if "cmd" not in keys:
@ -281,12 +283,10 @@ class Link:
@property
def link_dir(self) -> str:
from ..config import CONFIG
return str(Path(CONFIG['ARCHIVE_DIR']) / self.timestamp)
return str(ARCHIVE_DIR / self.timestamp)
@property
def archive_path(self) -> str:
from ..config import ARCHIVE_DIR_NAME
return '{}/{}'.format(ARCHIVE_DIR_NAME, self.timestamp)
@property
@ -385,7 +385,6 @@ class Link:
@property
def is_archived(self) -> bool:
from ..config import ARCHIVE_DIR
from ..util import domain
output_paths = (
@ -402,7 +401,7 @@ class Link:
)
return any(
(Path(ARCHIVE_DIR) / self.timestamp / path).exists()
(ARCHIVE_DIR / self.timestamp / path).exists()
for path in output_paths
)
@ -438,7 +437,7 @@ class Link:
canonical = {
'index_path': 'index.html',
'favicon_path': 'favicon.ico',
'google_favicon_path': FAVICON_PROVIDER.format(self.domain),
'google_favicon_path': FAVICON_CONFIG.FAVICON_PROVIDER.format(self.domain),
'wget_path': wget_output_path(self),
'warc_path': 'warc/',
'singlefile_path': 'singlefile.html',

View file

@ -12,6 +12,8 @@ from django.utils.html import format_html, mark_safe
from admin_data_views.typing import TableContext, ItemContext
from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
import archivebox
from ..config_stubs import AttrDict
from ..util import parse_date
@ -378,9 +380,8 @@ def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
def log_list_view(request: HttpRequest, **kwargs) -> TableContext:
assert request.user.is_superuser, "Must be a superuser to view configuration settings."
from django.conf import settings
log_files = settings.CONFIG.LOGS_DIR.glob("*.log")
log_files = archivebox.CONSTANTS.LOGS_DIR.glob("*.log")
log_files = sorted(log_files, key=os.path.getmtime)[::-1]
rows = {
@ -418,7 +419,7 @@ def log_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
from django.conf import settings
log_file = [logfile for logfile in settings.CONFIG.LOGS_DIR.glob('*.log') if key in logfile.name][0]
log_file = [logfile for logfile in archivebox.CONSTANTS.LOGS_DIR.glob('*.log') if key in logfile.name][0]
log_text = log_file.read_text()
log_stat = log_file.stat()

View file

@ -37,7 +37,7 @@ class RipgrepConfig(BaseConfigSet):
'--files-with-matches',
'--regexp',
])
RIPGREP_SEARCH_DIR: str = Field(default=lambda: str(settings.ARCHIVE_DIR))
RIPGREP_SEARCH_DIR: Path = archivebox.CONSTANTS.ARCHIVE_DIR
RIPGREP_CONFIG = RipgrepConfig()
@ -81,7 +81,7 @@ class RipgrepSearchBackend(BaseSearchBackend):
ripgrep_binary.abspath,
*RIPGREP_CONFIG.RIPGREP_ARGS_DEFAULT,
text,
RIPGREP_CONFIG.RIPGREP_SEARCH_DIR,
str(RIPGREP_CONFIG.RIPGREP_SEARCH_DIR),
]
proc = run(cmd, timeout=SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_TIMEOUT, capture_output=True, text=True)
timestamps = set()

View file

@ -18,13 +18,19 @@ from requests.exceptions import RequestException, ReadTimeout
from base32_crockford import encode as base32_encode # type: ignore
from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
try:
import chardet
detect_encoding = lambda rawdata: chardet.detect(rawdata)["encoding"]
except ImportError:
detect_encoding = lambda rawdata: "utf-8"
from archivebox.constants import STATICFILE_EXTENSIONS
from archivebox.plugins_sys.config.apps import ARCHIVING_CONFIG
from .misc.logging import COLOR_DICT
### Parsing Helpers
# All of these are (str) -> str
@ -114,7 +120,6 @@ def find_all_urls(urls_str: str):
def is_static_file(url: str):
# TODO: the proper way is with MIME type detection + ext, not only extension
from .config import STATICFILE_EXTENSIONS
return extension(url).lower() in STATICFILE_EXTENSIONS
@ -206,25 +211,20 @@ def parse_date(date: Any) -> Optional[datetime]:
@enforce_types
def download_url(url: str, timeout: int=None) -> str:
"""Download the contents of a remote url and return the text"""
from .config import (
TIMEOUT,
CHECK_SSL_VALIDITY,
WGET_USER_AGENT,
COOKIES_FILE,
)
timeout = timeout or TIMEOUT
timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
session = requests.Session()
if COOKIES_FILE and Path(COOKIES_FILE).is_file():
cookie_jar = http.cookiejar.MozillaCookieJar(COOKIES_FILE)
if ARCHIVING_CONFIG.COOKIES_FILE and Path(ARCHIVING_CONFIG.COOKIES_FILE).is_file():
cookie_jar = http.cookiejar.MozillaCookieJar(ARCHIVING_CONFIG.COOKIES_FILE)
cookie_jar.load(ignore_discard=True, ignore_expires=True)
for cookie in cookie_jar:
session.cookies.set(cookie.name, cookie.value, domain=cookie.domain, path=cookie.path)
response = session.get(
url,
headers={'User-Agent': WGET_USER_AGENT},
verify=CHECK_SSL_VALIDITY,
headers={'User-Agent': ARCHIVING_CONFIG.USER_AGENT},
verify=ARCHIVING_CONFIG.CHECK_SSL_VALIDITY,
timeout=timeout,
)
@ -243,14 +243,13 @@ def download_url(url: str, timeout: int=None) -> str:
@enforce_types
def get_headers(url: str, timeout: int=None) -> str:
"""Download the contents of a remote url and return the headers"""
from .config import TIMEOUT, CHECK_SSL_VALIDITY, WGET_USER_AGENT
timeout = timeout or TIMEOUT
timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
try:
response = requests.head(
url,
headers={'User-Agent': WGET_USER_AGENT},
verify=CHECK_SSL_VALIDITY,
headers={'User-Agent': ARCHIVING_CONFIG.USER_AGENT},
verify=ARCHIVING_CONFIG.CHECK_SSL_VALIDITY,
timeout=timeout,
allow_redirects=True,
)
@ -261,8 +260,8 @@ def get_headers(url: str, timeout: int=None) -> str:
except RequestException:
response = requests.get(
url,
headers={'User-Agent': WGET_USER_AGENT},
verify=CHECK_SSL_VALIDITY,
headers={'User-Agent': ARCHIVING_CONFIG.USER_AGENT},
verify=ARCHIVING_CONFIG.CHECK_SSL_VALIDITY,
timeout=timeout,
stream=True
)
@ -285,7 +284,6 @@ def ansi_to_html(text: str) -> str:
"""
Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html
"""
from .config import COLOR_DICT
TEMPLATE = '<span style="color: rgb{}"><br>'
text = text.replace('[m', '</span>')