mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-24 13:13:09 +00:00
refactor: oneshot command is functional
This commit is contained in:
parent
973f8b6abc
commit
c51d789ad4
5 changed files with 13 additions and 10 deletions
|
@ -14,7 +14,7 @@ from ..index.schema import Link
|
|||
from ..config import CONFIG
|
||||
|
||||
#EXTRACTORS = [(extractor[0], extractor[0]) for extractor in get_default_archive_methods()]
|
||||
EXTRACTORS = ["title", "wget"]
|
||||
EXTRACTORS = [("title", "title"), ("wget", "wget")]
|
||||
STATUS_CHOICES = [
|
||||
("succeeded", "succeeded"),
|
||||
("failed", "failed"),
|
||||
|
|
|
@ -41,6 +41,7 @@ class MainIndex(View):
|
|||
|
||||
|
||||
class LinkDetails(View):
|
||||
|
||||
def get(self, request, path):
|
||||
# missing trailing slash -> redirect to index
|
||||
if '/' not in path:
|
||||
|
|
|
@ -61,7 +61,7 @@ def write_sql_snapshot_details(snapshot: Model, out_dir: Path=OUTPUT_DIR) -> Non
|
|||
try:
|
||||
snap = Snapshot.objects.get(url=snapshot.url)
|
||||
except Snapshot.DoesNotExist:
|
||||
snap = write_snapshot_to_sql_index(snapshot)
|
||||
snap = write_snapshot_to_index(snapshot)
|
||||
snap.title = snapshot.title
|
||||
|
||||
# TODO: If there are actual tags, this will break
|
||||
|
|
|
@ -22,7 +22,7 @@ from .cli import (
|
|||
from .parsers import (
|
||||
save_text_as_source,
|
||||
save_file_as_source,
|
||||
parse_links_memory,
|
||||
parse_snapshots_memory,
|
||||
)
|
||||
from .index.schema import Link
|
||||
from .util import enforce_types # type: ignore
|
||||
|
@ -516,8 +516,8 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
|
|||
Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
|
||||
You can run this to archive single pages without needing to create a whole collection with archivebox init.
|
||||
"""
|
||||
oneshot_link, _ = parse_links_memory([url])
|
||||
if len(oneshot_link) > 1:
|
||||
oneshot_snapshots, _ = parse_snapshots_memory([url])
|
||||
if len(oneshot_snapshots) > 1:
|
||||
stderr(
|
||||
'[X] You should pass a single url to the oneshot command',
|
||||
color='red'
|
||||
|
@ -525,8 +525,10 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
|
|||
raise SystemExit(2)
|
||||
|
||||
methods = extractors.split(",") if extractors else ignore_methods(['title'])
|
||||
archive_link(oneshot_link[0], out_dir=out_dir, methods=methods)
|
||||
return oneshot_link
|
||||
snapshot = oneshot_snapshots[0]
|
||||
snapshot.save() # Oneshot uses an in-memory database, so this is safe
|
||||
archive_snapshot(snapshot, out_dir=out_dir, methods=methods)
|
||||
return snapshot
|
||||
|
||||
@enforce_types
|
||||
def add(urls: Union[str, List[str]],
|
||||
|
|
|
@ -66,7 +66,7 @@ PARSERS = (
|
|||
|
||||
|
||||
@enforce_types
|
||||
def parse_links_memory(urls: List[str], root_url: Optional[str]=None):
|
||||
def parse_snapshots_memory(urls: List[str], root_url: Optional[str]=None):
|
||||
"""
|
||||
parse a list of URLS without touching the filesystem
|
||||
"""
|
||||
|
@ -77,12 +77,12 @@ def parse_links_memory(urls: List[str], root_url: Optional[str]=None):
|
|||
file = StringIO()
|
||||
file.writelines(urls)
|
||||
file.name = "io_string"
|
||||
links, parser = run_parser_functions(file, timer, root_url=root_url)
|
||||
snapshots, parser = run_parser_functions(file, timer, root_url=root_url)
|
||||
timer.end()
|
||||
|
||||
if parser is None:
|
||||
return [], 'Failed to parse'
|
||||
return links, parser
|
||||
return snapshots, parser
|
||||
|
||||
|
||||
@enforce_types
|
||||
|
|
Loading…
Reference in a new issue