refactor: oneshot command is functional

2024-11-24 13:13:09 +00:00 · 2020-12-31 12:19:14 -05:00 · 2020-12-31 12:19:14 -05:00 · c51d789ad4
commit c51d789ad4
parent 973f8b6abc
5 changed files with 13 additions and 10 deletions
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@ -14,7 +14,7 @@ from ..index.schema import Link
 from ..config import CONFIG

 #EXTRACTORS = [(extractor[0], extractor[0]) for extractor in get_default_archive_methods()]
-EXTRACTORS = ["title", "wget"]
+EXTRACTORS = [("title", "title"), ("wget", "wget")]
 STATUS_CHOICES = [
    ("succeeded", "succeeded"),
    ("failed", "failed"),
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@ -41,6 +41,7 @@ class MainIndex(View):


 class LinkDetails(View):
+
    def get(self, request, path):
        # missing trailing slash -> redirect to index
        if '/' not in path:
--- a/archivebox/index/sql.py
+++ b/archivebox/index/sql.py
@ -61,7 +61,7 @@ def write_sql_snapshot_details(snapshot: Model, out_dir: Path=OUTPUT_DIR) -> Non
        try:
            snap = Snapshot.objects.get(url=snapshot.url)
        except Snapshot.DoesNotExist:
-            snap = write_snapshot_to_sql_index(snapshot)
+            snap = write_snapshot_to_index(snapshot)
        snap.title = snapshot.title

        # TODO: If there are actual tags, this will break
--- a/archivebox/main.py
+++ b/archivebox/main.py
@ -22,7 +22,7 @@ from .cli import (
 from .parsers import (
    save_text_as_source,
    save_file_as_source,
-    parse_links_memory,
+    parse_snapshots_memory,
 )
 from .index.schema import Link
 from .util import enforce_types                         # type: ignore
@ -516,8 +516,8 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
    Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
    You can run this to archive single pages without needing to create a whole collection with archivebox init.
    """
-    oneshot_link, _ = parse_links_memory([url])
-    if len(oneshot_link) > 1:
+    oneshot_snapshots, _ = parse_snapshots_memory([url])
+    if len(oneshot_snapshots) > 1:
        stderr(
                '[X] You should pass a single url to the oneshot command',
                color='red'
@ -525,8 +525,10 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
        raise SystemExit(2)

    methods = extractors.split(",") if extractors else ignore_methods(['title'])
-    archive_link(oneshot_link[0], out_dir=out_dir, methods=methods)
-    return oneshot_link
+    snapshot = oneshot_snapshots[0]
+    snapshot.save() # Oneshot uses an in-memory database, so this is safe
+    archive_snapshot(snapshot, out_dir=out_dir, methods=methods)
+    return snapshot

@enforce_types
 def add(urls: Union[str, List[str]],
--- a/archivebox/parsers/init.py
+++ b/archivebox/parsers/init.py
@ -66,7 +66,7 @@ PARSERS = (


@enforce_types
-def parse_links_memory(urls: List[str], root_url: Optional[str]=None):
+def parse_snapshots_memory(urls: List[str], root_url: Optional[str]=None):
    """
    parse a list of URLS without touching the filesystem
    """
@ -77,12 +77,12 @@ def parse_links_memory(urls: List[str], root_url: Optional[str]=None):
    file = StringIO()
    file.writelines(urls)
    file.name = "io_string"
-    links, parser = run_parser_functions(file, timer, root_url=root_url)
+    snapshots, parser = run_parser_functions(file, timer, root_url=root_url)
    timer.end()

    if parser is None:
        return [], 'Failed to parse'
-    return links, parser
+    return snapshots, parser
    

@enforce_types