ArchiveBox/archivebox/core/views.py

__package__ = 'archivebox.core'

from io import StringIO
from contextlib import redirect_stdout

from django.shortcuts import render, redirect
from django.http import HttpResponse, Http404
from django.utils.html import format_html, mark_safe
from django.views import View, static
from django.views.generic.list import ListView
from django.views.generic import FormView
from django.db.models import Q
from django.contrib.auth.mixins import UserPassesTestMixin
from django.views.decorators.csrf import csrf_exempt
from django.utils.decorators import method_decorator

from core.models import Snapshot
from core.forms import AddLinkForm

from ..config import (
    OUTPUT_DIR,
    PUBLIC_INDEX,
    PUBLIC_SNAPSHOTS,
    PUBLIC_ADD_VIEW,
    VERSION,
    FOOTER_INFO,
    SNAPSHOTS_PER_PAGE,
)
from ..main import add
from ..util import base_url, ansi_to_html
from ..search import query_search_index


class HomepageView(View):
    def get(self, request):
        if request.user.is_authenticated:
            return redirect('/admin/core/snapshot/')

        if PUBLIC_INDEX:
            return redirect('/public')

        return redirect(f'/admin/login/?next={request.path}')


class SnapshotView(View):
    # render static html index from filesystem archive/<timestamp>/index.html

    def get(self, request, path):
        if not request.user.is_authenticated and not PUBLIC_SNAPSHOTS:
            return redirect(f'/admin/login/?next={request.path}')

        try:
            slug, archivefile = path.split('/', 1)
        except (IndexError, ValueError):
            slug, archivefile = path.split('/', 1)[0], 'index.html'

        # slug is a timestamp
        if slug.replace('.','').isdigit():

            # missing trailing slash -> redirect to index
            if '/' not in path:
                return redirect(f'{path}/index.html')

            try:
                try:
                    snapshot = Snapshot.objects.get(Q(timestamp=slug) | Q(id__startswith=slug))
                    response = static.serve(request, archivefile, document_root=snapshot.link_dir, show_indexes=True)
                    response["Link"] = f'<{snapshot.url}>; rel="canonical"'
                    return response
                except Snapshot.DoesNotExist:
                    if Snapshot.objects.filter(timestamp__startswith=slug).exists():
                        raise Snapshot.MultipleObjectsReturned
                    else:
                        raise
            except Snapshot.DoesNotExist:
                # Snapshot does not exist
                return HttpResponse(
                    format_html(
                        (
                            '<center><br/><br/><br/>'
                            'No Snapshot directories match the given timestamp or UUID: <code>{}</code><br/><br/>'
                            'You can <a href="/add/" target="_top">add a new Snapshot</a>, or return to the <a href="/" target="_top">Main Index</a>'
                            '</center>'
                        ),
                        slug,
                        path,
                    ),
                    content_type="text/html",
                    status=404,
                )
            except Snapshot.MultipleObjectsReturned:
                snapshot_hrefs = mark_safe('<br/>').join(
                    format_html(
                        '{} <a href="/archive/{}/index.html"><b><code>{}</code></b></a> {} <b>{}</b>',
                        snap.added.strftime('%Y-%m-%d %H:%M:%S'),
                        snap.timestamp,
                        snap.timestamp,
                        snap.url,
                        snap.title or '',
                    )
                    for snap in Snapshot.objects.filter(timestamp__startswith=slug).only('url', 'timestamp', 'title', 'added').order_by('-added')
                )
                return HttpResponse(
                    format_html(
                        (
                            'Multiple Snapshots match the given timestamp/UUID <code>{}</code><br/><pre>'
                        ),
                        slug,
                    ) + snapshot_hrefs + format_html(
                        (
                            '</pre><br/>'
                            'Choose a Snapshot to proceed or go back to the <a href="/" target="_top">Main Index</a>'
                        )
                    ),
                    content_type="text/html",
                    status=404,
                )
            except Http404:
                # Snapshot dir exists but file within does not e.g. 124235.324234/screenshot.png
                return HttpResponse(
                    format_html(
                        (
                            '<center><br/><br/><br/>'
                            f'Snapshot <a href="/archive/{snapshot.timestamp}/index.html" target="_top"><b><code>[{snapshot.timestamp}]</code></b></a> exists in DB, but resource <b><code>{snapshot.timestamp}/'
                            '{}'
                            f'</code></b> does not exist in <a href="/archive/{snapshot.timestamp}/" target="_top">snapshot dir</a> yet.<br/><br/>'
                            'Maybe this resource type is not availabe for this Snapshot,<br/>or the archiving process has not completed yet?<br/>'
                            f'<pre><code># run this cmd to finish archiving this Snapshot<br/>archivebox update -t timestamp {snapshot.timestamp}</code></pre><br/><br/>'
                            '<div class="text-align: left; width: 100%; max-width: 400px">'
                            '<i><b>Next steps:</i></b><br/>'
                            f'- list all the <a href="/archive/{snapshot.timestamp}/" target="_top">Snapshot files <code>.*</code></a><br/>'
                            f'- view the <a href="/archive/{snapshot.timestamp}/index.html" target="_top">Snapshot <code>./index.html</code></a><br/>'
                            f'- go to the <a href="/admin/core/snapshot/{snapshot.id}/change/" target="_top">Snapshot admin</a> to edit<br/>'
                            f'- go to the <a href="/admin/core/snapshot/?id__startswith={snapshot.id}" target="_top">Snapshot actions</a> to re-archive<br/>'
                            '- or return to <a href="/" target="_top">the main index...</a></div>'
                            '</center>'
                        ),
                        archivefile,
                    ),
                    content_type="text/html",
                    status=404,
                )
        # slug is a URL
        try:
            try:
                # try exact match on full url first
                snapshot = Snapshot.objects.get(
                    Q(url='http://' + path) | Q(url='https://' + path) | Q(id__startswith=path)
                )
            except Snapshot.DoesNotExist:
                # fall back to match on exact base_url
                try:
                    snapshot = Snapshot.objects.get(
                        Q(url='http://' + base_url(path)) | Q(url='https://' + base_url(path))
                    )
                except Snapshot.DoesNotExist:
                    # fall back to matching base_url as prefix
                    snapshot = Snapshot.objects.get(
                        Q(url__startswith='http://' + base_url(path)) | Q(url__startswith='https://' + base_url(path))
                    )
            return redirect(f'/archive/{snapshot.timestamp}/index.html')
        except Snapshot.DoesNotExist:
            return HttpResponse(
                format_html(
                    (
                        '<center><br/><br/><br/>'
                        'No Snapshots match the given url: <code>{}</code><br/><br/><br/>'
                        'Return to the <a href="/" target="_top">Main Index</a>, or:<br/><br/>'
                        '+ <i><a href="/add/?url={}" target="_top">Add a new Snapshot for <code>{}</code></a><br/><br/></i>'
                        '</center>'
                    ),
                    base_url(path),
                    path if '://' in path else f'https://{path}',
                    path,
                ),
                content_type="text/html",
                status=404,
            )
        except Snapshot.MultipleObjectsReturned:
            snapshot_hrefs = mark_safe('<br/>').join(
                format_html(
                    '{} <a href="/archive/{}/index.html"><b><code>{}</code></b></a> {} <b>{}</b>',
                    snap.added.strftime('%Y-%m-%d %H:%M:%S'),
                    snap.timestamp,
                    snap.timestamp,
                    snap.url,
                    snap.title or '',
                )
                for snap in Snapshot.objects.filter(
                    Q(url__startswith='http://' + base_url(path)) | Q(url__startswith='https://' + base_url(path))
                ).only('url', 'timestamp', 'title', 'added').order_by('-added')
            )
            return HttpResponse(
                format_html(
                    (
                        'Multiple Snapshots match the given URL <code>{}</code><br/><pre>'
                    ),
                    base_url(path),
                ) + snapshot_hrefs + format_html(
                    (
                        '</pre><br/>'
                        'Choose a Snapshot to proceed or go back to the <a href="/" target="_top">Main Index</a>'
                    )
                ),
                content_type="text/html",
                status=404,
            )


class PublicIndexView(ListView):
    template_name = 'public_index.html'
    model = Snapshot
    paginate_by = SNAPSHOTS_PER_PAGE
    ordering = ['-added']

    def get_context_data(self, **kwargs):
        return {
            **super().get_context_data(**kwargs),
            'VERSION': VERSION,
            'FOOTER_INFO': FOOTER_INFO,
        }

    def get_queryset(self, **kwargs):
        qs = super().get_queryset(**kwargs)
        query = self.request.GET.get('q')
        if query and query.strip():
            qs = qs.filter(Q(title__icontains=query) | Q(url__icontains=query) | Q(timestamp__icontains=query) | Q(tags__name__icontains=query))
            try:
                qs = qs | query_search_index(query)
            except Exception as err:
                print(f'[!] Error while using search backend: {err.__class__.__name__} {err}')
        return qs

    def get(self, *args, **kwargs):
        if PUBLIC_INDEX or self.request.user.is_authenticated:
            response = super().get(*args, **kwargs)
            return response
        else:
            return redirect(f'/admin/login/?next={self.request.path}')

@method_decorator(csrf_exempt, name='dispatch')
class AddView(UserPassesTestMixin, FormView):
    template_name = "add.html"
    form_class = AddLinkForm

    def get_initial(self):
        """Prefill the AddLinkForm with the 'url' GET parameter"""
        if self.request.method == 'GET':
            url = self.request.GET.get('url', None)
            if url:
                return {'url': url if '://' in url else f'https://{url}'}

        return super().get_initial()

    def test_func(self):
        return PUBLIC_ADD_VIEW or self.request.user.is_authenticated

    def get_context_data(self, **kwargs):
        return {
            **super().get_context_data(**kwargs),
            'title': "Add URLs",
            # We can't just call request.build_absolute_uri in the template, because it would include query parameters
            'absolute_add_path': self.request.build_absolute_uri(self.request.path),
            'VERSION': VERSION,
            'FOOTER_INFO': FOOTER_INFO,
            'stdout': '',
        }

    def form_valid(self, form):
        url = form.cleaned_data["url"]
        print(f'[+] Adding URL: {url}')
        parser = form.cleaned_data["parser"]
        tag = form.cleaned_data["tag"]
        depth = 0 if form.cleaned_data["depth"] == "0" else 1
        extractors = ','.join(form.cleaned_data["archive_methods"])
        input_kwargs = {
            "urls": url,
            "tag": tag,
            "depth": depth,
            "parser": parser,
            "update_all": False,
            "out_dir": OUTPUT_DIR,
        }
        if extractors:
            input_kwargs.update({"extractors": extractors})
        add_stdout = StringIO()
        with redirect_stdout(add_stdout):
            add(**input_kwargs)
            print(add_stdout.getvalue())

        context = self.get_context_data()

        context.update({
            "stdout": ansi_to_html(add_stdout.getvalue().strip()),
            "form": AddLinkForm()
        })
        return render(template_name=self.template_name, request=self.request, context=context)


class HealthCheckView(View):
    """
    A Django view that renders plain text "OK" for service discovery tools
    """
    def get(self, request):
        """
        Handle a GET request
        """
        return HttpResponse(
            'OK',
            content_type='text/plain',
            status=200
        )
make archivebox server work for urls, hashes, and timestamps 2019-05-01 03:13:21 +00:00			`__package__ = 'archivebox.core'`
wip initial django setup 2019-04-02 20:36:41 +00:00
add public add view + toggle setting 2020-08-25 18:15:42 +00:00			`from io import StringIO`
			`from contextlib import redirect_stdout`

make archivebox server work for urls, hashes, and timestamps 2019-05-01 03:13:21 +00:00			`from django.shortcuts import render, redirect`
speed up the Snapshot handling view and show index page when extractor output is missing or multiple snapshots returned 2021-02-16 01:52:08 +00:00			`from django.http import HttpResponse, Http404`
			`from django.utils.html import format_html, mark_safe`
make archivebox server work for urls, hashes, and timestamps 2019-05-01 03:13:21 +00:00			`from django.views import View, static`
new public view derived from django 2020-08-20 14:04:34 +00:00			`from django.views.generic.list import ListView`
refactor: Change View to FormView 2020-08-28 14:58:32 +00:00			`from django.views.generic import FormView`
also search url, timestamp, tags on public index 2021-01-29 14:08:03 +00:00			`from django.db.models import Q`
refactor: Change View to FormView 2020-08-28 14:58:32 +00:00			`from django.contrib.auth.mixins import UserPassesTestMixin`
Exempt /add route from CSRF 2021-07-02 00:55:51 +00:00			`from django.views.decorators.csrf import csrf_exempt`
			`from django.utils.decorators import method_decorator`
search view inherits from modified public view 2020-08-20 20:43:28 +00:00
rename model Page to Snapshot 2019-05-01 03:44:51 +00:00			`from core.models import Snapshot`
add public add view + toggle setting 2020-08-25 18:15:42 +00:00			`from core.forms import AddLinkForm`

expose more django server config options 2019-05-02 23:15:16 +00:00			`from ..config import (`
			`OUTPUT_DIR,`
			`PUBLIC_INDEX,`
			`PUBLIC_SNAPSHOTS,`
fix public index missing template context 2020-11-28 06:29:34 +00:00			`PUBLIC_ADD_VIEW,`
			`VERSION,`
			`FOOTER_INFO,`
add new SNAPSHOTS_PER_PAGE pagination limit config 2021-02-16 01:42:00 +00:00			`SNAPSHOTS_PER_PAGE,`
expose more django server config options 2019-05-02 23:15:16 +00:00			`)`
allow full-text search from public index 2021-04-10 09:13:56 +00:00			`from ..main import add`
add public add view + toggle setting 2020-08-25 18:15:42 +00:00			`from ..util import base_url, ansi_to_html`
allow full-text search from public index 2021-04-10 09:13:56 +00:00			`from ..search import query_search_index`
feat: Allow feed loading from the add links view 2020-07-02 20:54:25 +00:00
first views for archivebox server 2019-04-17 09:42:21 +00:00
cleanup templates and views 2021-01-30 10:35:07 +00:00			`class HomepageView(View):`
first views for archivebox server 2019-04-17 09:42:21 +00:00			`def get(self, request):`
use proper url naming instead of hardcoding paths 2020-07-28 03:56:35 +00:00			`if request.user.is_authenticated:`
			`return redirect('/admin/core/snapshot/')`

			`if PUBLIC_INDEX:`
cleanup templates and views 2021-01-30 10:35:07 +00:00			`return redirect('/public')`
healthcheck endpoint 2021-10-03 17:12:03 +00:00
use proper url naming instead of hardcoding paths 2020-07-28 03:56:35 +00:00			`return redirect(f'/admin/login/?next={request.path}')`

add manage command and shell welcome message 2019-04-22 23:08:01 +00:00
cleanup templates and views 2021-01-30 10:35:07 +00:00			`class SnapshotView(View):`
			`# render static html index from filesystem archive/<timestamp>/index.html`

make archivebox server work for urls, hashes, and timestamps 2019-05-01 03:13:21 +00:00			`def get(self, request, path):`
expose more django server config options 2019-05-02 23:15:16 +00:00			`if not request.user.is_authenticated and not PUBLIC_SNAPSHOTS:`
			`return redirect(f'/admin/login/?next={request.path}')`

make archivebox server work for urls, hashes, and timestamps 2019-05-01 03:13:21 +00:00			`try:`
			`slug, archivefile = path.split('/', 1)`
			`except (IndexError, ValueError):`
			`slug, archivefile = path.split('/', 1)[0], 'index.html'`

			`# slug is a timestamp`
speed up the Snapshot handling view and show index page when extractor output is missing or multiple snapshots returned 2021-02-16 01:52:08 +00:00			`if slug.replace('.','').isdigit():`

			`# missing trailing slash -> redirect to index`
			`if '/' not in path:`
			`return redirect(f'{path}/index.html')`

			`try:`
			`try:`
cross link snapshot index, admin, and actions pages using uuids 2021-02-18 13:04:50 +00:00			`snapshot = Snapshot.objects.get(Q(timestamp=slug) \| Q(id__startswith=slug))`
tweak snapshot asset serving logic to show multiple choices in case of conflict 2021-02-16 20:51:56 +00:00			`response = static.serve(request, archivefile, document_root=snapshot.link_dir, show_indexes=True)`
			`response["Link"] = f'<{snapshot.url}>; rel="canonical"'`
			`return response`
speed up the Snapshot handling view and show index page when extractor output is missing or multiple snapshots returned 2021-02-16 01:52:08 +00:00			`except Snapshot.DoesNotExist:`
			`if Snapshot.objects.filter(timestamp__startswith=slug).exists():`
			`raise Snapshot.MultipleObjectsReturned`
tweak snapshot asset serving logic to show multiple choices in case of conflict 2021-02-16 20:51:56 +00:00			`else:`
			`raise`
speed up the Snapshot handling view and show index page when extractor output is missing or multiple snapshots returned 2021-02-16 01:52:08 +00:00			`except Snapshot.DoesNotExist:`
			`# Snapshot does not exist`
			`return HttpResponse(`
			`format_html(`
			`(`
			`'<center><br/><br/><br/>'`
cross link snapshot index, admin, and actions pages using uuids 2021-02-18 13:04:50 +00:00			`'No Snapshot directories match the given timestamp or UUID: <code>{}</code><br/><br/>'`
speed up the Snapshot handling view and show index page when extractor output is missing or multiple snapshots returned 2021-02-16 01:52:08 +00:00			`'You can <a href="/add/" target="_top">add a new Snapshot</a>, or return to the <a href="/" target="_top">Main Index</a>'`
			`'</center>'`
			`),`
			`slug,`
			`path,`
			`),`
			`content_type="text/html",`
			`status=404,`
			`)`
			`except Snapshot.MultipleObjectsReturned:`
			`snapshot_hrefs = mark_safe('<br/>').join(`
			`format_html(`
			`'{} <a href="/archive/{}/index.html"><b><code>{}</code></b></a> {} <b>{}</b>',`
			`snap.added.strftime('%Y-%m-%d %H:%M:%S'),`
			`snap.timestamp,`
			`snap.timestamp,`
			`snap.url,`
			`snap.title or '',`
			`)`
			`for snap in Snapshot.objects.filter(timestamp__startswith=slug).only('url', 'timestamp', 'title', 'added').order_by('-added')`
			`)`
			`return HttpResponse(`
			`format_html(`
			`(`
tweak snapshot asset serving logic to show multiple choices in case of conflict 2021-02-16 20:51:56 +00:00			`'Multiple Snapshots match the given timestamp/UUID <code>{}</code><br/><pre>'`
speed up the Snapshot handling view and show index page when extractor output is missing or multiple snapshots returned 2021-02-16 01:52:08 +00:00			`),`
			`slug,`
			`) + snapshot_hrefs + format_html(`
			`(`
			`'</pre><br/>'`
			`'Choose a Snapshot to proceed or go back to the <a href="/" target="_top">Main Index</a>'`
			`)`
			`),`
			`content_type="text/html",`
			`status=404,`
			`)`
			`except Http404:`
			`# Snapshot dir exists but file within does not e.g. 124235.324234/screenshot.png`
			`return HttpResponse(`
			`format_html(`
			`(`
			`'<center><br/><br/><br/>'`
cross link snapshot index, admin, and actions pages using uuids 2021-02-18 13:04:50 +00:00			`f'Snapshot <a href="/archive/{snapshot.timestamp}/index.html" target="_top"><b><code>[{snapshot.timestamp}]</code></b></a> exists in DB, but resource <b><code>{snapshot.timestamp}/'`
			`'{}'`
			`f'</code></b> does not exist in <a href="/archive/{snapshot.timestamp}/" target="_top">snapshot dir</a> yet.<br/><br/>'`
			`'Maybe this resource type is not availabe for this Snapshot,<br/>or the archiving process has not completed yet?<br/>'`
			`f'<pre><code># run this cmd to finish archiving this Snapshot<br/>archivebox update -t timestamp {snapshot.timestamp}</code></pre><br/><br/>'`
			`'<div class="text-align: left; width: 100%; max-width: 400px">'`
			`'<i><b>Next steps:</i></b><br/>'`
			`f'- list all the <a href="/archive/{snapshot.timestamp}/" target="_top">Snapshot files <code>.*</code></a><br/>'`
			`f'- view the <a href="/archive/{snapshot.timestamp}/index.html" target="_top">Snapshot <code>./index.html</code></a><br/>'`
			`f'- go to the <a href="/admin/core/snapshot/{snapshot.id}/change/" target="_top">Snapshot admin</a> to edit<br/>'`
			`f'- go to the <a href="/admin/core/snapshot/?id__startswith={snapshot.id}" target="_top">Snapshot actions</a> to re-archive<br/>'`
			`'- or return to <a href="/" target="_top">the main index...</a></div>'`
speed up the Snapshot handling view and show index page when extractor output is missing or multiple snapshots returned 2021-02-16 01:52:08 +00:00			`'</center>'`
			`),`
			`archivefile,`
			`),`
			`content_type="text/html",`
			`status=404,`
			`)`
make archivebox server work for urls, hashes, and timestamps 2019-05-01 03:13:21 +00:00			`# slug is a URL`
tweak snapshot asset serving logic to show multiple choices in case of conflict 2021-02-16 20:51:56 +00:00			`try:`
speed up the Snapshot handling view and show index page when extractor output is missing or multiple snapshots returned 2021-02-16 01:52:08 +00:00			`try:`
tweak snapshot asset serving logic to show multiple choices in case of conflict 2021-02-16 20:51:56 +00:00			`# try exact match on full url first`
			`snapshot = Snapshot.objects.get(`
cross link snapshot index, admin, and actions pages using uuids 2021-02-18 13:04:50 +00:00			`Q(url='http://' + path) \| Q(url='https://' + path) \| Q(id__startswith=path)`
tweak snapshot asset serving logic to show multiple choices in case of conflict 2021-02-16 20:51:56 +00:00			`)`
			`except Snapshot.DoesNotExist:`
			`# fall back to match on exact base_url`
speed up the Snapshot handling view and show index page when extractor output is missing or multiple snapshots returned 2021-02-16 01:52:08 +00:00			`try:`
			`snapshot = Snapshot.objects.get(`
tweak snapshot asset serving logic to show multiple choices in case of conflict 2021-02-16 20:51:56 +00:00			`Q(url='http://' + base_url(path)) \| Q(url='https://' + base_url(path))`
speed up the Snapshot handling view and show index page when extractor output is missing or multiple snapshots returned 2021-02-16 01:52:08 +00:00			`)`
			`except Snapshot.DoesNotExist:`
tweak snapshot asset serving logic to show multiple choices in case of conflict 2021-02-16 20:51:56 +00:00			`# fall back to matching base_url as prefix`
			`snapshot = Snapshot.objects.get(`
speed up the Snapshot handling view and show index page when extractor output is missing or multiple snapshots returned 2021-02-16 01:52:08 +00:00			`Q(url__startswith='http://' + base_url(path)) \| Q(url__startswith='https://' + base_url(path))`
tweak snapshot asset serving logic to show multiple choices in case of conflict 2021-02-16 20:51:56 +00:00			`)`
			`return redirect(f'/archive/{snapshot.timestamp}/index.html')`
			`except Snapshot.DoesNotExist:`
			`return HttpResponse(`
			`format_html(`
			`(`
			`'<center><br/><br/><br/>'`
only add https to url if needed when prompting to save new snapshot 2021-02-18 07:34:03 +00:00			`'No Snapshots match the given url: <code>{}</code><br/><br/><br/>'`
			`'Return to the <a href="/" target="_top">Main Index</a>, or:<br/><br/>'`
			`'+ <i><a href="/add/?url={}" target="_top">Add a new Snapshot for <code>{}</code></a><br/><br/></i>'`
tweak snapshot asset serving logic to show multiple choices in case of conflict 2021-02-16 20:51:56 +00:00			`'</center>'`
speed up the Snapshot handling view and show index page when extractor output is missing or multiple snapshots returned 2021-02-16 01:52:08 +00:00			`),`
tweak snapshot asset serving logic to show multiple choices in case of conflict 2021-02-16 20:51:56 +00:00			`base_url(path),`
only add https to url if needed when prompting to save new snapshot 2021-02-18 07:34:03 +00:00			`path if '://' in path else f'https://{path}',`
tweak snapshot asset serving logic to show multiple choices in case of conflict 2021-02-16 20:51:56 +00:00			`path,`
			`),`
			`content_type="text/html",`
			`status=404,`
			`)`
			`except Snapshot.MultipleObjectsReturned:`
			`snapshot_hrefs = mark_safe('<br/>').join(`
			`format_html(`
			`'{} <a href="/archive/{}/index.html"><b><code>{}</code></b></a> {} <b>{}</b>',`
			`snap.added.strftime('%Y-%m-%d %H:%M:%S'),`
			`snap.timestamp,`
			`snap.timestamp,`
			`snap.url,`
			`snap.title or '',`
speed up the Snapshot handling view and show index page when extractor output is missing or multiple snapshots returned 2021-02-16 01:52:08 +00:00			`)`
tweak snapshot asset serving logic to show multiple choices in case of conflict 2021-02-16 20:51:56 +00:00			`for snap in Snapshot.objects.filter(`
			`Q(url__startswith='http://' + base_url(path)) \| Q(url__startswith='https://' + base_url(path))`
			`).only('url', 'timestamp', 'title', 'added').order_by('-added')`
			`)`
			`return HttpResponse(`
			`format_html(`
			`(`
			`'Multiple Snapshots match the given URL <code>{}</code><br/><pre>'`
			`),`
			`base_url(path),`
			`) + snapshot_hrefs + format_html(`
			`(`
			`'</pre><br/>'`
			`'Choose a Snapshot to proceed or go back to the <a href="/" target="_top">Main Index</a>'`
			`)`
			`),`
			`content_type="text/html",`
			`status=404,`
			`)`
healthcheck endpoint 2021-10-03 17:12:03 +00:00
new public view derived from django 2020-08-20 14:04:34 +00:00
cleanup templates and views 2021-01-30 10:35:07 +00:00			`class PublicIndexView(ListView):`
			`template_name = 'public_index.html'`
new public view derived from django 2020-08-20 14:04:34 +00:00			`model = Snapshot`
add new SNAPSHOTS_PER_PAGE pagination limit config 2021-02-16 01:42:00 +00:00			`paginate_by = SNAPSHOTS_PER_PAGE`
fix snapshot icon caching and ordering 2021-04-01 06:22:15 +00:00			`ordering = ['-added']`
search view inherits from modified public view 2020-08-20 20:43:28 +00:00
fix public index missing template context 2020-11-28 06:29:34 +00:00			`def get_context_data(self, **kwargs):`
			`return {`
			`super().get_context_data(kwargs),`
			`'VERSION': VERSION,`
			`'FOOTER_INFO': FOOTER_INFO,`
			`}`

healthcheck endpoint 2021-10-03 17:12:03 +00:00			`def get_queryset(self, **kwargs):`
fix color option in docker 2021-02-16 06:26:26 +00:00			`qs = super().get_queryset(**kwargs)`
unify public archive view 2020-08-25 19:31:09 +00:00			`query = self.request.GET.get('q')`
allow full-text search from public index 2021-04-10 09:13:56 +00:00			`if query and query.strip():`
also search url, timestamp, tags on public index 2021-01-29 14:08:03 +00:00			`qs = qs.filter(Q(title__icontains=query) \| Q(url__icontains=query) \| Q(timestamp__icontains=query) \| Q(tags__name__icontains=query))`
dont prevent search when backend throws an exception or times out 2021-04-10 12:18:13 +00:00			`try:`
			`qs = qs \| query_search_index(query)`
			`except Exception as err:`
			`print(f'[!] Error while using search backend: {err.__class__.__name__} {err}')`
search view inherits from modified public view 2020-08-20 20:43:28 +00:00			`return qs`

			`def get(self, args, *kwargs):`
			`if PUBLIC_INDEX or self.request.user.is_authenticated:`
			`response = super().get(args, *kwargs)`
			`return response`
			`else:`
			`return redirect(f'/admin/login/?next={self.request.path}')`

Exempt /add route from CSRF 2021-07-02 00:55:51 +00:00			`@method_decorator(csrf_exempt, name='dispatch')`
refactor: Change View to FormView 2020-08-28 14:58:32 +00:00			`class AddView(UserPassesTestMixin, FormView):`
cleanup templates and views 2021-01-30 10:35:07 +00:00			`template_name = "add.html"`
refactor: Change View to FormView 2020-08-28 14:58:32 +00:00			`form_class = AddLinkForm`

Add a bookmarklet The bookmarklet lets you quickly open the Add page with the URL already populated in the URLs box. 2020-10-03 19:57:55 +00:00			`def get_initial(self):`
			`"""Prefill the AddLinkForm with the 'url' GET parameter"""`
			`if self.request.method == 'GET':`
			`url = self.request.GET.get('url', None)`
			`if url:`
only add https to url if needed when prompting to save new snapshot 2021-02-18 07:34:03 +00:00			`return {'url': url if '://' in url else f'https://{url}'}`
healthcheck endpoint 2021-10-03 17:12:03 +00:00
only add https to url if needed when prompting to save new snapshot 2021-02-18 07:34:03 +00:00			`return super().get_initial()`
Add a bookmarklet The bookmarklet lets you quickly open the Add page with the URL already populated in the URLs box. 2020-10-03 19:57:55 +00:00
refactor: Change View to FormView 2020-08-28 14:58:32 +00:00			`def test_func(self):`
			`return PUBLIC_ADD_VIEW or self.request.user.is_authenticated`

fix static html num_outputs info 2020-11-28 07:01:53 +00:00			`def get_context_data(self, **kwargs):`
			`return {`
			`super().get_context_data(kwargs),`
			`'title': "Add URLs",`
			`# We can't just call request.build_absolute_uri in the template, because it would include query parameters`
			`'absolute_add_path': self.request.build_absolute_uri(self.request.path),`
			`'VERSION': VERSION,`
			`'FOOTER_INFO': FOOTER_INFO,`
fix missing stdout template var on /add when not rendering success page 2021-02-16 01:54:47 +00:00			`'stdout': '',`
fix static html num_outputs info 2020-11-28 07:01:53 +00:00			`}`
refactor: Change View to FormView 2020-08-28 14:58:32 +00:00
			`def form_valid(self, form):`
			`url = form.cleaned_data["url"]`
			`print(f'[+] Adding URL: {url}')`
add form parser option 2021-04-01 06:34:16 +00:00			`parser = form.cleaned_data["parser"]`
support adding urls with tags directly via CLI and add page 2021-03-27 08:30:15 +00:00			`tag = form.cleaned_data["tag"]`
refactor: Change View to FormView 2020-08-28 14:58:32 +00:00			`depth = 0 if form.cleaned_data["depth"] == "0" else 1`
Update archivebox/core/views.py Cleaner handling of the archive methods input Co-authored-by: Nick Sweeting <git@sweeting.me> 2020-12-10 17:45:30 +00:00			`extractors = ','.join(form.cleaned_data["archive_methods"])`
refactor: Change View to FormView 2020-08-28 14:58:32 +00:00			`input_kwargs = {`
			`"urls": url,`
support adding urls with tags directly via CLI and add page 2021-03-27 08:30:15 +00:00			`"tag": tag,`
refactor: Change View to FormView 2020-08-28 14:58:32 +00:00			`"depth": depth,`
add form parser option 2021-04-01 06:34:16 +00:00			`"parser": parser,`
refactor: Change View to FormView 2020-08-28 14:58:32 +00:00			`"update_all": False,`
			`"out_dir": OUTPUT_DIR,`
			`}`
Add selector for archive modes 2020-12-10 15:51:57 +00:00			`if extractors:`
update instead of append 2020-12-10 16:08:27 +00:00			`input_kwargs.update({"extractors": extractors})`
refactor: Change View to FormView 2020-08-28 14:58:32 +00:00			`add_stdout = StringIO()`
			`with redirect_stdout(add_stdout):`
			`add(**input_kwargs)`
			`print(add_stdout.getvalue())`

			`context = self.get_context_data()`

			`context.update({`
			`"stdout": ansi_to_html(add_stdout.getvalue().strip()),`
			`"form": AddLinkForm()`
			`})`
fix: Remove unused imports 2020-08-28 15:06:48 +00:00			`return render(template_name=self.template_name, request=self.request, context=context)`
healthcheck endpoint 2021-10-03 17:12:03 +00:00

			`class HealthCheckView(View):`
			`"""`
			`A Django view that renders plain text "OK" for service discovery tools`
			`"""`
			`def get(self, request):`
			`"""`
			`Handle a GET request`
			`"""`
			`return HttpResponse(`
			`'OK',`
			`content_type='text/plain',`
			`status=200`
			`)`