From 8cfe6f4afbcc839fc497dd5e0a536cab0c4091d4 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 9 May 2022 20:15:55 -0700 Subject: [PATCH] cleanup update flag handling and show better logging to clarify when its working --- archivebox/cli/archivebox_add.py | 9 ++++++++- archivebox/main.py | 18 +++++++++++++++--- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/archivebox/cli/archivebox_add.py b/archivebox/cli/archivebox_add.py index 2c3d7ce3..ed05584c 100644 --- a/archivebox/cli/archivebox_add.py +++ b/archivebox/cli/archivebox_add.py @@ -30,11 +30,17 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional help="Tag the added URLs with the provided tags e.g. --tag=tag1,tag2,tag3", ) parser.add_argument( - '--update-all', #'-n', + '--update', #'-u', action='store_true', default=not ONLY_NEW, # when ONLY_NEW=True we skip updating old links help="Also retry previously skipped/failed links when adding new links", ) + parser.add_argument( + '--update-all', #'-n', + action='store_true', + default=False, + help="Also update ALL links in index when finished adding new links", + ) parser.add_argument( '--index-only', #'-o', action='store_true', @@ -104,6 +110,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional urls=stdin_urls or urls, depth=command.depth, tag=command.tag, + update=command.update, update_all=command.update_all, index_only=command.index_only, overwrite=command.overwrite, diff --git a/archivebox/main.py b/archivebox/main.py index d4e7d9c3..b330d496 100755 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -4,8 +4,9 @@ import os import sys import shutil import platform +from django.utils import timezone from pathlib import Path -from datetime import date +from datetime import date, datetime from typing import Dict, List, Optional, Iterable, IO, Union from crontab import CronTab, CronSlices @@ -554,7 +555,8 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR): def add(urls: Union[str, List[str]], tag: str='', depth: int=0, - update_all: bool=not ONLY_NEW, + update: bool=not ONLY_NEW, + update_all: bool=False, index_only: bool=False, overwrite: bool=False, # duplicate: bool=False, # TODO: reuse the logic from admin.py resnapshot to allow adding multiple snapshots by appending timestamp automatically @@ -621,11 +623,21 @@ def add(urls: Union[str, List[str]], if extractors: archive_kwargs["methods"] = extractors - if update_all: + stderr() + + ts = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S') + + if update: + stderr(f'[*] [{ts}] Archiving + updating', len(imported_links), 'URLs from added set...', color='green') + archive_links(imported_links, overwrite=overwrite, **archive_kwargs) + elif update_all: + stderr(f'[*] [{ts}] Archiving + updating ', len(all_links), 'URLs from entire library...', color='green') archive_links(all_links, overwrite=overwrite, **archive_kwargs) elif overwrite: + stderr(f'[*] [{ts}] Archiving + overwriting', len(imported_links), 'URLs from added set...', color='green') archive_links(imported_links, overwrite=True, **archive_kwargs) elif new_links: + stderr(f'[*] [{ts}] Archiving', len(imported_links), 'URLs from added set...', color='green') archive_links(new_links, overwrite=False, **archive_kwargs)