minor snapshot details ui fixes and migrations log msg improvements

2025-02-18 22:38:26 +00:00 · 2024-06-03 04:00:18 -07:00 · 2024-06-03 04:00:18 -07:00 · de489d3c60
commit de489d3c60
parent 78f0ae469e
3 changed files with 18 additions and 13 deletions
--- a/archivebox/core/migrations/0024_auto_20240513_1143.py
+++ b/archivebox/core/migrations/0024_auto_20240513_1143.py
@ -47,12 +47,14 @@ def calculate_abid(self):
 def copy_snapshot_uuids(apps, schema_editor):
    print('   Copying snapshot.id -> snapshot.uuid...')
    Snapshot = apps.get_model("core", "Snapshot")
    for snapshot in Snapshot.objects.all():
        snapshot.uuid = snapshot.id
        snapshot.save(update_fields=["uuid"])
 def generate_snapshot_abids(apps, schema_editor):
    print('   Generating snapshot.abid values...')
    Snapshot = apps.get_model("core", "Snapshot")
    for snapshot in Snapshot.objects.all():
        snapshot.abid_prefix = 'snp_'
@ -65,6 +67,7 @@ def generate_snapshot_abids(apps, schema_editor):
        snapshot.save(update_fields=["abid"])
 def generate_archiveresult_abids(apps, schema_editor):
    print('   Generating ArchiveResult.abid values... (may take an hour or longer for large collections...)')
    ArchiveResult = apps.get_model("core", "ArchiveResult")
    Snapshot = apps.get_model("core", "Snapshot")
    for result in ArchiveResult.objects.all():
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@ -90,7 +90,7 @@ class SnapshotView(View):
                archiveresults[result.extractor] = result_info
        existing_files = {result['path'] for result in archiveresults.values()}
-        min_size_threshold = 128  # bytes
+        min_size_threshold = 10_000  # bytes
        allowed_extensions = {
            'txt',
            'html',
@ -108,12 +108,14 @@ class SnapshotView(View):
            'md',
        }
        # iterate through all the files in the snapshot dir and add the biggest ones to the result list
-        for result_file in Path(snapshot.link_dir).glob('*/*/*'):
+        snap_dir = Path(snapshot.link_dir)
        for result_file in (*snap_dir.glob('*'), *snap_dir.glob('*/*')):
            extension = result_file.suffix.lstrip('.').lower()
            if result_file.is_dir() or result_file.name.startswith('.') or extension not in allowed_extensions:
                continue
-            if result_file.name in existing_files:
+            if result_file.name in existing_files or result_file.name == 'index.html':
                continue
            file_size = result_file.stat().st_size or 0
@ -121,7 +123,7 @@ class SnapshotView(View):
            if file_size > min_size_threshold:
                archiveresults[result_file.name] = {
                    'name': result_file.stem,
-                    'path': result_file.relative_to(snapshot.link_dir),
+                    'path': result_file.relative_to(snap_dir),
                    'ts': ts_to_date_str(result_file.stat().st_mtime or 0),
                    'size': file_size,
                }
@ -140,7 +142,7 @@ class SnapshotView(View):
        link_info = link._asdict(extended=True)
        try:
-            warc_path = 'warc/' + list(Path(snapshot.link_dir).glob('warc/*.warc.*'))[0].name
+            warc_path = 'warc/' + list(Path(snap_dir).glob('warc/*.warc.*'))[0].name
        except IndexError:
            warc_path = 'warc/'
@ -160,7 +162,7 @@ class SnapshotView(View):
            'warc_path': warc_path,
            'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG,
            'PREVIEW_ORIGINALS': PREVIEW_ORIGINALS,
-            'archiveresults': sorted(archiveresults.values(), key=lambda r: all_types.index(r['name'])),
+            'archiveresults': sorted(archiveresults.values(), key=lambda r: all_types.index(r['name']) if r['name'] in all_types else -r['size']),
            'best_result': best_result,
            # 'tags_str': 'somealskejrewlkrjwer,werlmwrwlekrjewlkrjwer324m532l,4m32,23m324234',
        }
--- a/archivebox/templates/core/snapshot_live.html
+++ b/archivebox/templates/core/snapshot_live.html
@ -401,13 +401,13 @@
                        <div class="col-lg-2">
                            <div class="card {% if forloop.first %}selected-card{% endif %}">
                                <div class="card-body">
-                                    <a href="{{result.path}}" target="preview" title="./{{result.path}} (downloaded {{result.ts}})">
+                                    <a href="{{result.path|urlencode}}" target="preview" title="./{{result.path}} (downloaded {{result.ts}})">
-                                        <h4>{{result.name}} <small>({{result.size|filesizeformat}})</small></h4>
+                                        <h4>{{result.name|truncatechars:24}} <small>({{result.size|filesizeformat}})</small></h4>
                                        <!-- <p class="card-text" ><code>./{{result.path|truncatechars:30}}</code></p> -->
                                    </a>
                                    <!--<a href="{{result.path}}" target="preview"><h4 class="card-title">{{result.name}}</h4></a>-->
                                </div>
-                                <iframe class="card-img-top" src="{{result.path}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no" loading="lazy"></iframe>
+                                <iframe class="card-img-top" src="{{result.path|urlencode}}?autoplay=0" allow="autoplay 'none'; fullscreen 'none'; navigation-override 'none'; " sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no" loading="lazy"></iframe>
                            </div>
                        </div>
                    {% endfor %}
@ -419,7 +419,7 @@
                                <a href="./" target="preview">
                                    <h4>Headers, JSON, etc.</h4>
                                </a>
-                                <!--<a href="{{result.path}}" target="preview"><h4 class="card-title">{{result.name}}</h4></a>-->
+                                <!--<a href="{{result.path|urlencode}}" target="preview"><h4 class="card-title">{{result.name}}</h4></a>-->
                            </div>
                            <iframe class="card-img-top" src="./" sandbox="" scrolling="no" loading="lazy"></iframe>
                        </div>
@ -430,7 +430,7 @@
-        <iframe id="main-frame" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" class="full-page-iframe" src="{{best_result.path}}" name="preview"></iframe>
+        <iframe id="main-frame" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" class="full-page-iframe" src="{{best_result.path|urlencode}}" name="preview"></iframe>
@ -444,9 +444,9 @@
                    this.src = this.src + '#toolbar=0'
                }
                this.onload = function() {
-                    if (this.src.endsWith('.pdf')) {
+                    if (this.src.includes('.pdf')) {
                        this.removeAttribute('sandbox')
-                        this.src = this.src + '#toolbar=0'
+                        this.src = this.src.split('?autoplay=')[0] + '#toolbar=0'
                    }
                    try {
                        // doesnt work if frame origin rules prevent accessing its DOM via JS