refactor: list command is functional

2024-11-28 15:10:40 +00:00 · 2020-12-31 12:59:06 -05:00 · 2020-12-31 12:59:06 -05:00 · a4e1bebc46
commit a4e1bebc46
parent c51d789ad4
5 changed files with 35 additions and 23 deletions
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@ -2,7 +2,7 @@ __package__ = 'archivebox.core'

 import uuid
 from pathlib import Path
-from typing import Dict, Optional
+from typing import Dict, Optional, List

 from django.db import models, transaction
 from django.utils.functional import cached_property
@ -91,6 +91,11 @@ class Snapshot(models.Model):
        title = self.title or '-'
        return f'[{self.timestamp}] {self.url[:64]} ({title[:64]})'

+    def field_names():
+        fields = self._meta.get_field_names()
+        exclude = ["tags", "archiveresult"] # Exclude relationships for now
+        return [field.name for field in fields if field.name not in exclude]
+

    @classmethod
    def from_json(cls, info: dict):
@ -105,6 +110,11 @@ class Snapshot(models.Model):
            for key in args
        }

+
+    def as_csv(self, cols: Optional[List[str]]=None, separator: str=',', ljust: int=0) -> str:
+        from ..index.csv import to_csv
+        return to_csv(self, cols=cols or self.field_names(), separator=separator, ljust=ljust)
+
    def as_link(self) -> Link:
        return Link.from_json(self.as_json())

--- a/archivebox/index/csv.py
+++ b/archivebox/index/csv.py
@ -2,12 +2,14 @@ __package__ = 'archivebox.index'

 from typing import List, Optional, Any

+from django.db.models import Model
+
 from ..util import enforce_types
 from .schema import Link


@enforce_types
-def links_to_csv(links: List[Link],
+def snapshots_to_csv(snapshots: List[Model],
                 cols: Optional[List[str]]=None,
                 header: bool=True,
                 separator: str=',',
@ -20,8 +22,8 @@ def links_to_csv(links: List[Link],
        header_str = separator.join(col.ljust(ljust) for col in cols)

    row_strs = (
-        link.to_csv(cols=cols, ljust=ljust, separator=separator)
-        for link in links
+        snapshot.as_csv(cols=cols, ljust=ljust, separator=separator)
+        for snapshot in snapshots
    )

    return '\n'.join((header_str, *row_strs))
--- a/archivebox/index/html.py
+++ b/archivebox/index/html.py
@ -47,24 +47,24 @@ def parse_html_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[str]:
    return ()

@enforce_types
-def generate_index_from_links(links: List[Link], with_headers: bool):
+def generate_index_from_snapshots(snapshots: List[Model], with_headers: bool):
    if with_headers:
-        output = main_index_template(links)
+        output = main_index_template(snapshots)
    else:
-        output = main_index_template(links, template=MINIMAL_INDEX_TEMPLATE)
+        output = main_index_template(snapshots, template=MINIMAL_INDEX_TEMPLATE)
    return output

@enforce_types
-def main_index_template(links: List[Link], template: str=MAIN_INDEX_TEMPLATE) -> str:
+def main_index_template(snapshots: List[Model], template: str=MAIN_INDEX_TEMPLATE) -> str:
    """render the template for the entire main index"""

    return render_django_template(template, {
        'version': VERSION,
        'git_sha': GIT_SHA,
-        'num_links': str(len(links)),
+        'num_links': str(len(snapshots)),
        'date_updated': datetime.now().strftime('%Y-%m-%d'),
        'time_updated': datetime.now().strftime('%Y-%m-%d %H:%M'),
-        'links': [link._asdict(extended=True) for link in links],
+        'links': [snapshot.as_json() for snapshot in snapshots],
        'FOOTER_INFO': FOOTER_INFO,
    })

--- a/archivebox/index/json.py
+++ b/archivebox/index/json.py
@ -41,17 +41,17 @@ MAIN_INDEX_HEADER = {
 }

@enforce_types
-def generate_json_index_from_links(links: List[Link], with_headers: bool):
+def generate_json_index_from_snapshots(snapshots: List[Model], with_headers: bool):
    if with_headers:
        output = {
            **MAIN_INDEX_HEADER,
-            'num_links': len(links),
+            'num_links': len(snapshots),
            'updated': datetime.now(),
            'last_run_cmd': sys.argv,
-            'links': links,
+            'links': snapshots,
        }
    else:
-        output = links
+        output = snapshots 
    return to_json(output, indent=4, sort_keys=True)


--- a/archivebox/main.py
+++ b/archivebox/main.py
@ -49,7 +49,7 @@ from .index import (
 from .index.json import (
    parse_json_main_index,
    parse_json_snapshot_details,
-    generate_json_index_from_links,
+    generate_json_index_from_snapshots,
 )
 from .index.sql import (
    get_admins,
@ -57,9 +57,9 @@ from .index.sql import (
    remove_from_sql_main_index,
 )
 from .index.html import (
-    generate_index_from_links,
+    generate_index_from_snapshots,
 )
-from .index.csv import links_to_csv
+from .index.csv import snapshots_to_csv
 from .extractors import archive_snapshots, archive_snapshot, ignore_methods
 from .config import (
    stderr,
@ -646,7 +646,7 @@ def remove(filter_str: Optional[str]=None,
    log_list_started(filter_patterns, filter_type)
    timer = TimedProgress(360, prefix='      ')
    try:
-        snapshots = list_links(**list_kwargs)
+        snapshots = list_snapshots(**list_kwargs)
    finally:
        timer.end()

@ -771,7 +771,7 @@ def list_all(filter_patterns_str: Optional[str]=None,
    elif filter_patterns_str:
        filter_patterns = filter_patterns_str.split('\n')

-    snapshots = list_links(
+    snapshots = list_snapshots(
        filter_patterns=filter_patterns,
        filter_type=filter_type,
        before=before,
@ -782,17 +782,17 @@ def list_all(filter_patterns_str: Optional[str]=None,
        snapshots = snapshots.order_by(sort)

    folders = list_folders(
-        links=snapshots,
+        snapshots=snapshots,
        status=status,
        out_dir=out_dir,
    )

    if json: 
-        output = generate_json_index_from_links(folders.values(), with_headers)
+        output = generate_json_index_from_snapshots(folders.values(), with_headers)
    elif html:
-        output = generate_index_from_links(folders.values(), with_headers)
+        output = generate_index_from_snapshots(folders.values(), with_headers)
    elif csv:
-        output = links_to_csv(folders.values(), cols=csv.split(','), header=with_headers)
+        output = snapshots_to_csv(folders.values(), cols=csv.split(','), header=with_headers)
    else:
        output = printable_folders(folders, with_headers=with_headers)
    print(output)