mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-25 05:30:23 +00:00
use constants in more places
This commit is contained in:
parent
eb360f188a
commit
ed45f58758
5 changed files with 53 additions and 64 deletions
|
@ -2,7 +2,6 @@ __package__ = 'archivebox.core'
|
|||
|
||||
from typing import Callable
|
||||
|
||||
import threading
|
||||
from pathlib import Path
|
||||
|
||||
from django.shortcuts import render, redirect
|
||||
|
@ -12,6 +11,7 @@ from django.views import View
|
|||
from django.views.generic.list import ListView
|
||||
from django.views.generic import FormView
|
||||
from django.db.models import Q
|
||||
from django.conf import settings
|
||||
from django.contrib import messages
|
||||
from django.contrib.auth.mixins import UserPassesTestMixin
|
||||
from django.views.decorators.csrf import csrf_exempt
|
||||
|
@ -20,6 +20,8 @@ from django.utils.decorators import method_decorator
|
|||
from admin_data_views.typing import TableContext, ItemContext
|
||||
from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
|
||||
|
||||
import archivebox
|
||||
from archivebox.constants import CONSTANTS
|
||||
|
||||
from core.models import Snapshot
|
||||
from core.forms import AddLinkForm
|
||||
|
@ -27,28 +29,17 @@ from core.admin import result_url
|
|||
|
||||
from queues.tasks import bg_add
|
||||
|
||||
from ..plugins_sys.config.apps import SHELL_CONFIG, SERVER_CONFIG
|
||||
from ..plugins_extractor.archivedotorg.apps import ARCHIVEDOTORG_CONFIG
|
||||
|
||||
from ..config import (
|
||||
OUTPUT_DIR,
|
||||
PUBLIC_INDEX,
|
||||
PUBLIC_SNAPSHOTS,
|
||||
PUBLIC_ADD_VIEW,
|
||||
VERSION,
|
||||
COMMIT_HASH,
|
||||
FOOTER_INFO,
|
||||
SNAPSHOTS_PER_PAGE,
|
||||
CONFIG,
|
||||
CONFIG_SCHEMA,
|
||||
DYNAMIC_CONFIG_SCHEMA,
|
||||
USER_CONFIG,
|
||||
SAVE_ARCHIVE_DOT_ORG,
|
||||
PREVIEW_ORIGINALS,
|
||||
CONSTANTS,
|
||||
)
|
||||
from ..logging_util import printable_filesize
|
||||
from ..main import add
|
||||
from ..util import base_url, ansi_to_html, htmlencode, urldecode, urlencode, ts_to_date_str
|
||||
from ..util import base_url, htmlencode, ts_to_date_str
|
||||
from ..search import query_search_index
|
||||
from ..extractors.wget import wget_output_path
|
||||
from .serve_static import serve_static_with_byterange_support
|
||||
|
||||
|
||||
|
@ -57,7 +48,7 @@ class HomepageView(View):
|
|||
if request.user.is_authenticated:
|
||||
return redirect('/admin/core/snapshot/')
|
||||
|
||||
if PUBLIC_INDEX:
|
||||
if SERVER_CONFIG.PUBLIC_INDEX:
|
||||
return redirect('/public')
|
||||
|
||||
return redirect(f'/admin/login/?next={request.path}')
|
||||
|
@ -166,8 +157,8 @@ class SnapshotView(View):
|
|||
'status_color': 'success' if link.is_archived else 'danger',
|
||||
'oldest_archive_date': ts_to_date_str(link.oldest_archive_date),
|
||||
'warc_path': warc_path,
|
||||
'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG,
|
||||
'PREVIEW_ORIGINALS': PREVIEW_ORIGINALS,
|
||||
'SAVE_ARCHIVE_DOT_ORG': ARCHIVEDOTORG_CONFIG.SAVE_ARCHIVE_DOT_ORG,
|
||||
'PREVIEW_ORIGINALS': SERVER_CONFIG.PREVIEW_ORIGINALS,
|
||||
'archiveresults': sorted(archiveresults.values(), key=lambda r: all_types.index(r['name']) if r['name'] in all_types else -r['size']),
|
||||
'best_result': best_result,
|
||||
# 'tags_str': 'somealskejrewlkrjwer,werlmwrwlekrjewlkrjwer324m532l,4m32,23m324234',
|
||||
|
@ -176,7 +167,7 @@ class SnapshotView(View):
|
|||
|
||||
|
||||
def get(self, request, path):
|
||||
if not request.user.is_authenticated and not PUBLIC_SNAPSHOTS:
|
||||
if not request.user.is_authenticated and not SERVER_CONFIG.PUBLIC_SNAPSHOTS:
|
||||
return redirect(f'/admin/login/?next={request.path}')
|
||||
|
||||
snapshot = None
|
||||
|
@ -381,15 +372,15 @@ class SnapshotView(View):
|
|||
class PublicIndexView(ListView):
|
||||
template_name = 'public_index.html'
|
||||
model = Snapshot
|
||||
paginate_by = SNAPSHOTS_PER_PAGE
|
||||
paginate_by = SERVER_CONFIG.SNAPSHOTS_PER_PAGE
|
||||
ordering = ['-bookmarked_at', '-created_at']
|
||||
|
||||
def get_context_data(self, **kwargs):
|
||||
return {
|
||||
**super().get_context_data(**kwargs),
|
||||
'VERSION': VERSION,
|
||||
'COMMIT_HASH': COMMIT_HASH,
|
||||
'FOOTER_INFO': FOOTER_INFO,
|
||||
'VERSION': archivebox.VERSION,
|
||||
'COMMIT_HASH': SHELL_CONFIG.COMMIT_HASH,
|
||||
'FOOTER_INFO': SERVER_CONFIG.FOOTER_INFO,
|
||||
}
|
||||
|
||||
def get_queryset(self, **kwargs):
|
||||
|
@ -428,7 +419,7 @@ class PublicIndexView(ListView):
|
|||
return qs.distinct()
|
||||
|
||||
def get(self, *args, **kwargs):
|
||||
if PUBLIC_INDEX or self.request.user.is_authenticated:
|
||||
if SERVER_CONFIG.PUBLIC_INDEX or self.request.user.is_authenticated:
|
||||
response = super().get(*args, **kwargs)
|
||||
return response
|
||||
else:
|
||||
|
@ -449,7 +440,7 @@ class AddView(UserPassesTestMixin, FormView):
|
|||
return super().get_initial()
|
||||
|
||||
def test_func(self):
|
||||
return PUBLIC_ADD_VIEW or self.request.user.is_authenticated
|
||||
return SERVER_CONFIG.PUBLIC_ADD_VIEW or self.request.user.is_authenticated
|
||||
|
||||
def get_context_data(self, **kwargs):
|
||||
return {
|
||||
|
@ -457,8 +448,8 @@ class AddView(UserPassesTestMixin, FormView):
|
|||
'title': "Add URLs",
|
||||
# We can't just call request.build_absolute_uri in the template, because it would include query parameters
|
||||
'absolute_add_path': self.request.build_absolute_uri(self.request.path),
|
||||
'VERSION': VERSION,
|
||||
'FOOTER_INFO': FOOTER_INFO,
|
||||
'VERSION': archivebox.VERSION,
|
||||
'FOOTER_INFO': SERVER_CONFIG.FOOTER_INFO,
|
||||
'stdout': '',
|
||||
}
|
||||
|
||||
|
@ -475,7 +466,7 @@ class AddView(UserPassesTestMixin, FormView):
|
|||
"depth": depth,
|
||||
"parser": parser,
|
||||
"update_all": False,
|
||||
"out_dir": OUTPUT_DIR,
|
||||
"out_dir": archivebox.DATA_DIR,
|
||||
"created_by_id": self.request.user.pk,
|
||||
}
|
||||
if extractors:
|
||||
|
|
|
@ -9,8 +9,6 @@ These are the old types we used to use before ArchiveBox v0.4 (before we switche
|
|||
|
||||
__package__ = 'archivebox.index'
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from datetime import datetime, timezone, timedelta
|
||||
|
||||
from typing import List, Dict, Any, Optional, Union
|
||||
|
@ -19,9 +17,13 @@ from dataclasses import dataclass, asdict, field, fields
|
|||
|
||||
from django.utils.functional import cached_property
|
||||
|
||||
from archivebox.constants import ARCHIVE_DIR, ARCHIVE_DIR_NAME
|
||||
|
||||
from plugins_extractor.favicon.apps import FAVICON_CONFIG
|
||||
|
||||
from ..system import get_dir_size
|
||||
from ..util import ts_to_date_str, parse_date
|
||||
from ..config import OUTPUT_DIR, ARCHIVE_DIR_NAME, FAVICON_PROVIDER
|
||||
|
||||
|
||||
class ArchiveError(Exception):
|
||||
def __init__(self, message, hints=None):
|
||||
|
@ -88,7 +90,7 @@ class ArchiveResult:
|
|||
info['start_ts'] = parse_date(info['start_ts'])
|
||||
info['end_ts'] = parse_date(info['end_ts'])
|
||||
if "pwd" not in keys:
|
||||
info["pwd"] = str(Path(OUTPUT_DIR) / ARCHIVE_DIR_NAME / json_info["timestamp"])
|
||||
info["pwd"] = str(ARCHIVE_DIR / json_info["timestamp"])
|
||||
if "cmd_version" not in keys:
|
||||
info["cmd_version"] = "Undefined"
|
||||
if "cmd" not in keys:
|
||||
|
@ -281,12 +283,10 @@ class Link:
|
|||
|
||||
@property
|
||||
def link_dir(self) -> str:
|
||||
from ..config import CONFIG
|
||||
return str(Path(CONFIG['ARCHIVE_DIR']) / self.timestamp)
|
||||
return str(ARCHIVE_DIR / self.timestamp)
|
||||
|
||||
@property
|
||||
def archive_path(self) -> str:
|
||||
from ..config import ARCHIVE_DIR_NAME
|
||||
return '{}/{}'.format(ARCHIVE_DIR_NAME, self.timestamp)
|
||||
|
||||
@property
|
||||
|
@ -385,7 +385,6 @@ class Link:
|
|||
|
||||
@property
|
||||
def is_archived(self) -> bool:
|
||||
from ..config import ARCHIVE_DIR
|
||||
from ..util import domain
|
||||
|
||||
output_paths = (
|
||||
|
@ -402,7 +401,7 @@ class Link:
|
|||
)
|
||||
|
||||
return any(
|
||||
(Path(ARCHIVE_DIR) / self.timestamp / path).exists()
|
||||
(ARCHIVE_DIR / self.timestamp / path).exists()
|
||||
for path in output_paths
|
||||
)
|
||||
|
||||
|
@ -438,7 +437,7 @@ class Link:
|
|||
canonical = {
|
||||
'index_path': 'index.html',
|
||||
'favicon_path': 'favicon.ico',
|
||||
'google_favicon_path': FAVICON_PROVIDER.format(self.domain),
|
||||
'google_favicon_path': FAVICON_CONFIG.FAVICON_PROVIDER.format(self.domain),
|
||||
'wget_path': wget_output_path(self),
|
||||
'warc_path': 'warc/',
|
||||
'singlefile_path': 'singlefile.html',
|
||||
|
|
|
@ -12,6 +12,8 @@ from django.utils.html import format_html, mark_safe
|
|||
from admin_data_views.typing import TableContext, ItemContext
|
||||
from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
|
||||
|
||||
import archivebox
|
||||
|
||||
from ..config_stubs import AttrDict
|
||||
from ..util import parse_date
|
||||
|
||||
|
@ -378,9 +380,8 @@ def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
|
|||
def log_list_view(request: HttpRequest, **kwargs) -> TableContext:
|
||||
assert request.user.is_superuser, "Must be a superuser to view configuration settings."
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
log_files = settings.CONFIG.LOGS_DIR.glob("*.log")
|
||||
log_files = archivebox.CONSTANTS.LOGS_DIR.glob("*.log")
|
||||
log_files = sorted(log_files, key=os.path.getmtime)[::-1]
|
||||
|
||||
rows = {
|
||||
|
@ -418,7 +419,7 @@ def log_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
|
|||
|
||||
from django.conf import settings
|
||||
|
||||
log_file = [logfile for logfile in settings.CONFIG.LOGS_DIR.glob('*.log') if key in logfile.name][0]
|
||||
log_file = [logfile for logfile in archivebox.CONSTANTS.LOGS_DIR.glob('*.log') if key in logfile.name][0]
|
||||
|
||||
log_text = log_file.read_text()
|
||||
log_stat = log_file.stat()
|
||||
|
|
|
@ -37,7 +37,7 @@ class RipgrepConfig(BaseConfigSet):
|
|||
'--files-with-matches',
|
||||
'--regexp',
|
||||
])
|
||||
RIPGREP_SEARCH_DIR: str = Field(default=lambda: str(settings.ARCHIVE_DIR))
|
||||
RIPGREP_SEARCH_DIR: Path = archivebox.CONSTANTS.ARCHIVE_DIR
|
||||
|
||||
RIPGREP_CONFIG = RipgrepConfig()
|
||||
|
||||
|
@ -81,7 +81,7 @@ class RipgrepSearchBackend(BaseSearchBackend):
|
|||
ripgrep_binary.abspath,
|
||||
*RIPGREP_CONFIG.RIPGREP_ARGS_DEFAULT,
|
||||
text,
|
||||
RIPGREP_CONFIG.RIPGREP_SEARCH_DIR,
|
||||
str(RIPGREP_CONFIG.RIPGREP_SEARCH_DIR),
|
||||
]
|
||||
proc = run(cmd, timeout=SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_TIMEOUT, capture_output=True, text=True)
|
||||
timestamps = set()
|
||||
|
|
|
@ -18,13 +18,19 @@ from requests.exceptions import RequestException, ReadTimeout
|
|||
|
||||
from base32_crockford import encode as base32_encode # type: ignore
|
||||
from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
|
||||
|
||||
try:
|
||||
import chardet
|
||||
detect_encoding = lambda rawdata: chardet.detect(rawdata)["encoding"]
|
||||
except ImportError:
|
||||
detect_encoding = lambda rawdata: "utf-8"
|
||||
|
||||
|
||||
from archivebox.constants import STATICFILE_EXTENSIONS
|
||||
from archivebox.plugins_sys.config.apps import ARCHIVING_CONFIG
|
||||
|
||||
from .misc.logging import COLOR_DICT
|
||||
|
||||
|
||||
### Parsing Helpers
|
||||
|
||||
# All of these are (str) -> str
|
||||
|
@ -114,7 +120,6 @@ def find_all_urls(urls_str: str):
|
|||
|
||||
def is_static_file(url: str):
|
||||
# TODO: the proper way is with MIME type detection + ext, not only extension
|
||||
from .config import STATICFILE_EXTENSIONS
|
||||
return extension(url).lower() in STATICFILE_EXTENSIONS
|
||||
|
||||
|
||||
|
@ -206,25 +211,20 @@ def parse_date(date: Any) -> Optional[datetime]:
|
|||
@enforce_types
|
||||
def download_url(url: str, timeout: int=None) -> str:
|
||||
"""Download the contents of a remote url and return the text"""
|
||||
from .config import (
|
||||
TIMEOUT,
|
||||
CHECK_SSL_VALIDITY,
|
||||
WGET_USER_AGENT,
|
||||
COOKIES_FILE,
|
||||
)
|
||||
timeout = timeout or TIMEOUT
|
||||
|
||||
timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
|
||||
session = requests.Session()
|
||||
|
||||
if COOKIES_FILE and Path(COOKIES_FILE).is_file():
|
||||
cookie_jar = http.cookiejar.MozillaCookieJar(COOKIES_FILE)
|
||||
if ARCHIVING_CONFIG.COOKIES_FILE and Path(ARCHIVING_CONFIG.COOKIES_FILE).is_file():
|
||||
cookie_jar = http.cookiejar.MozillaCookieJar(ARCHIVING_CONFIG.COOKIES_FILE)
|
||||
cookie_jar.load(ignore_discard=True, ignore_expires=True)
|
||||
for cookie in cookie_jar:
|
||||
session.cookies.set(cookie.name, cookie.value, domain=cookie.domain, path=cookie.path)
|
||||
|
||||
response = session.get(
|
||||
url,
|
||||
headers={'User-Agent': WGET_USER_AGENT},
|
||||
verify=CHECK_SSL_VALIDITY,
|
||||
headers={'User-Agent': ARCHIVING_CONFIG.USER_AGENT},
|
||||
verify=ARCHIVING_CONFIG.CHECK_SSL_VALIDITY,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
|
@ -243,14 +243,13 @@ def download_url(url: str, timeout: int=None) -> str:
|
|||
@enforce_types
|
||||
def get_headers(url: str, timeout: int=None) -> str:
|
||||
"""Download the contents of a remote url and return the headers"""
|
||||
from .config import TIMEOUT, CHECK_SSL_VALIDITY, WGET_USER_AGENT
|
||||
timeout = timeout or TIMEOUT
|
||||
timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
|
||||
|
||||
try:
|
||||
response = requests.head(
|
||||
url,
|
||||
headers={'User-Agent': WGET_USER_AGENT},
|
||||
verify=CHECK_SSL_VALIDITY,
|
||||
headers={'User-Agent': ARCHIVING_CONFIG.USER_AGENT},
|
||||
verify=ARCHIVING_CONFIG.CHECK_SSL_VALIDITY,
|
||||
timeout=timeout,
|
||||
allow_redirects=True,
|
||||
)
|
||||
|
@ -261,8 +260,8 @@ def get_headers(url: str, timeout: int=None) -> str:
|
|||
except RequestException:
|
||||
response = requests.get(
|
||||
url,
|
||||
headers={'User-Agent': WGET_USER_AGENT},
|
||||
verify=CHECK_SSL_VALIDITY,
|
||||
headers={'User-Agent': ARCHIVING_CONFIG.USER_AGENT},
|
||||
verify=ARCHIVING_CONFIG.CHECK_SSL_VALIDITY,
|
||||
timeout=timeout,
|
||||
stream=True
|
||||
)
|
||||
|
@ -285,7 +284,6 @@ def ansi_to_html(text: str) -> str:
|
|||
"""
|
||||
Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html
|
||||
"""
|
||||
from .config import COLOR_DICT
|
||||
|
||||
TEMPLATE = '<span style="color: rgb{}"><br>'
|
||||
text = text.replace('[m', '</span>')
|
||||
|
|
Loading…
Reference in a new issue