From ca7f48042be39fcfd4f832c221127dacb16fd759 Mon Sep 17 00:00:00 2001 From: Cristian Date: Tue, 29 Dec 2020 12:46:51 -0500 Subject: [PATCH] refactor: singlefile uses snapshot instead of link --- archivebox/extractors/singlefile.py | 16 +++++++++------- archivebox/extractors/wget.py | 2 +- package-lock.json | 2 +- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/archivebox/extractors/singlefile.py b/archivebox/extractors/singlefile.py index 2e5c3896..8dd91936 100644 --- a/archivebox/extractors/singlefile.py +++ b/archivebox/extractors/singlefile.py @@ -5,7 +5,9 @@ from pathlib import Path from typing import Optional import json -from ..index.schema import Link, ArchiveResult, ArchiveError +from django.db.models import Model + +from ..index.schema import ArchiveResult, ArchiveError from ..system import run, chmod_file from ..util import ( enforce_types, @@ -23,9 +25,9 @@ from ..logging_util import TimedProgress @enforce_types -def should_save_singlefile(link: Link, out_dir: Optional[Path]=None) -> bool: - out_dir = out_dir or Path(link.link_dir) - if is_static_file(link.url): +def should_save_singlefile(snapshot: Model, out_dir: Optional[Path]=None) -> bool: + out_dir = out_dir or Path(snapshot.snapshot_dir) + if is_static_file(snapshot.url): return False output = out_dir / 'singlefile.html' @@ -33,10 +35,10 @@ def should_save_singlefile(link: Link, out_dir: Optional[Path]=None) -> bool: @enforce_types -def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult: +def save_singlefile(snapshot: Model, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult: """download full site using single-file""" - out_dir = out_dir or Path(link.link_dir) + out_dir = out_dir or Path(snapshot.snapshot_dir) output = str(out_dir.absolute() / "singlefile.html") browser_args = chrome_args(TIMEOUT=0) @@ -47,7 +49,7 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO DEPENDENCIES['SINGLEFILE_BINARY']['path'], '--browser-executable-path={}'.format(CHROME_BINARY), browser_args, - link.url, + snapshot.url, output ] diff --git a/archivebox/extractors/wget.py b/archivebox/extractors/wget.py index b62c9edf..6b7b10a4 100644 --- a/archivebox/extractors/wget.py +++ b/archivebox/extractors/wget.py @@ -8,7 +8,7 @@ from datetime import datetime from django.db.models import Model -from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError +from ..index.schema import ArchiveResult, ArchiveOutput, ArchiveError from ..system import run, chmod_file from ..util import ( enforce_types, diff --git a/package-lock.json b/package-lock.json index 9df2c490..099deddf 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "archivebox", - "version": "0.4.21", + "version": "0.5.1", "lockfileVersion": 1, "requires": true, "dependencies": {