From 8ac6096b39e17fbc8a29d92b29605532fbcd1412 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 15 Jun 2017 17:32:37 -0500 Subject: [PATCH] print timestamps and show number of links in index --- archive.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/archive.py b/archive.py index 46a7ce12..c6a0f2ba 100755 --- a/archive.py +++ b/archive.py @@ -262,7 +262,7 @@ def dump_index(links, service): def dump_website(link, service, overwrite=False): """download the DOM, PDF, and a screenshot into a folder named after the link's timestamp""" - print('[+] [{time}] Archiving "{title}": {base_url}'.format(**link)) + print('[+] [{timestamp} ({time})] "{title}": {base_url}'.format(**link)) out_dir = ''.join((service, '/archive/{timestamp}')).format(**link) if not os.path.exists(out_dir): @@ -302,19 +302,25 @@ def create_archive(export_file, service, resume=None): links = parse_pinboard_export(f) elif service == "bookmarks": links = parse_bookmarks_export(f) - links = list(reversed(sorted(links, key=lambda l: l['timestamp']))) # most recent first + + links = valid_links(links) # remove chrome://, about:, mailto: etc. + links = uniquefied_links(links) # fix duplicate timestamps, returns sorted list if resume: - links = [link for link in links if link['timestamp'] >= resume] + try: + links = [link for link in links if float(link['timestamp']) >= float(resume)] + except TypeError: + print('Resume value and all timestamp values must be valid numbers.') if not links: print('[X] No links found in {}, is it a {} export file?'.format(export_file, service)) raise SystemExit(1) + dump_index(links, service) run(['chmod', '-R', '755', service], timeout=10) - print('[*] [{}] Created archive index.'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))) + print('[*] [{}] Created archive index with {} links.'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), len(links))) check_dependencies()