From 2db03245398f0a6c7fcda77a3ebc5688e3836396 Mon Sep 17 00:00:00 2001 From: Cristian Date: Tue, 7 Jul 2020 09:49:28 -0500 Subject: [PATCH] feat: depth=0 crawls the current page only --- archivebox/cli/archivebox_add.py | 14 +++++++++++--- tests/test_args.py | 12 ++++++++++-- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/archivebox/cli/archivebox_add.py b/archivebox/cli/archivebox_add.py index 77a11bd0..5bbccb19 100644 --- a/archivebox/cli/archivebox_add.py +++ b/archivebox/cli/archivebox_add.py @@ -53,14 +53,22 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional help="Recursively archive all linked pages up to this many hops away" ) command = parser.parse_args(args or ()) - import_str = accept_stdin(stdin) + #import_str = accept_stdin(stdin) add( - import_str=import_str, - import_path=command.import_path, + import_str=command.import_path, + import_path=None, update_all=command.update_all, index_only=command.index_only, out_dir=pwd or OUTPUT_DIR, ) + #if command.depth == 1: + # add( + # import_str=None, + # import_path=command.import_path, + # update_all=command.update_all, + # index_only=command.index_only, + # out_dir=pwd or OUTPUT_DIR, + # ) if __name__ == '__main__': diff --git a/tests/test_args.py b/tests/test_args.py index b8df1941..59d43fee 100644 --- a/tests/test_args.py +++ b/tests/test_args.py @@ -1,7 +1,15 @@ import subprocess +import json from .fixtures import * -def test_depth_flag_is_accepted(tmp_path, process): +def test_depth_flag_is_accepted(process): arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=0"], capture_output=True) - assert 'unrecognized arguments: --depth' not in arg_process.stderr.decode('utf-8') \ No newline at end of file + assert 'unrecognized arguments: --depth' not in arg_process.stderr.decode('utf-8') + +def test_depth_flag_0_crawls_only_the_arg_page(tmp_path, process): + arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=0"], capture_output=True) + archived_item_path = list(tmp_path.glob('archive/**/*'))[0] + with open(archived_item_path / "index.json", "r") as f: + output_json = json.load(f) + assert output_json["base_url"] == "example.com" \ No newline at end of file