better progress output

2024-11-22 20:23:12 +00:00 · 2019-02-06 22:06:21 -08:00 · 2019-02-06 22:06:21 -08:00 · 56d382235f
commit 56d382235f
parent 33ba29ea90
5 changed files with 28 additions and 29 deletions
--- a/archivebox/archive.py
+++ b/archivebox/archive.py
@ -25,8 +25,10 @@ from config import (
    ONLY_NEW,
    OUTPUT_PERMISSIONS,
    OUTPUT_DIR,
+    REPO_DIR,
    ANSI,
    TIMEOUT,
+    SHOW_PROGRESS,
    GIT_SHA,
 )
 from util import (
@ -69,21 +71,13 @@ def merge_links(archive_path=OUTPUT_DIR, import_path=None, only_new=False):
        all_links = validate_links(existing_links + all_links)

    num_new_links = len(all_links) - len(existing_links)
-    if num_new_links and not only_new:
-        print('{green}[+] [{}] Adding {} new links to index from {} ({} format){reset}'.format(
-            datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
-            num_new_links,
-            pretty_path(import_path),
-            parser_name,
-            **ANSI,
-        ))
-    # else:
-    #     print('[*] [{}] No new links added to {}/index.json{}'.format(
-    #         datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
-    #         archive_path,
-    #         ' from {}'.format(import_path) if import_path else '',
-    #         **ANSI,
-    #     ))
+    if SHOW_PROGRESS:
+        print()
+    print('    > Adding {} new links to index from {} (parsed as {} format)'.format(
+        num_new_links,
+        pretty_path(import_path),
+        parser_name,
+    ))

    if only_new:
        return new_links(all_links, existing_links)
@ -102,7 +96,7 @@ def update_archive(archive_path, links, source=None, resume=None, append=True):
             **ANSI,
        ))
    else:
-        print('{green}[▶] [{}] Downloading content for {} pages in archive...{reset}'.format(
+        print('{green}[▶] [{}] Updating content for {} pages in archive...{reset}'.format(
             datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
             len(links),
             **ANSI,
@ -119,7 +113,7 @@ def update_archive(archive_path, links, source=None, resume=None, append=True):
    else:
        duration = '{0:.2f} sec'.format(seconds, 2)

-    print('{}[√] [{}] Update of {} links complete ({}){}'.format(
+    print('{}[√] [{}] Update of {} pages complete ({}){}'.format(
        ANSI['green'],
        datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        len(links),
@ -129,6 +123,7 @@ def update_archive(archive_path, links, source=None, resume=None, append=True):
    print('    - {} entries skipped'.format(_RESULTS_TOTALS['skipped']))
    print('    - {} entries updated'.format(_RESULTS_TOTALS['succeded']))
    print('    - {} errors'.format(_RESULTS_TOTALS['failed']))
+    print('    To view your archive, open: {}/index.html'.format(OUTPUT_DIR.replace(REPO_DIR + '/', '')))


 if __name__ == '__main__':
--- a/archivebox/archive_methods.py
+++ b/archivebox/archive_methods.py
@ -134,8 +134,8 @@ def log_link_archive(link_dir, link, update_existing):
    ))

    print('    > {}{}'.format(pretty_path(link_dir), '' if update_existing else ' (new)'))
-    if link['type']:
-        print('      i {}'.format(link['type']))
+    # if link['type']:
+    #     print('      i {}'.format(link['type']))



--- a/archivebox/index.py
+++ b/archivebox/index.py
@ -28,14 +28,16 @@ def write_links_index(out_dir, links):
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

-    write_json_links_index(out_dir, links)
-    write_html_links_index(out_dir, links)
-    
-    print('{green}[√] [{}] Updated main index files:{reset}'.format(
+    print('{green}[*] [{}] Updating main index files...{reset}'.format(
        datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
-        **ANSI))
+        **ANSI,
+    ))
+    write_json_links_index(out_dir, links)
    print('    > {}/index.json'.format(pretty_path(out_dir)))
+    
+    write_html_links_index(out_dir, links)
    print('    > {}/index.html'.format(pretty_path(out_dir)))
+    

 def write_json_links_index(out_dir, links):
    """write the json link index to a given path"""
--- a/archivebox/parse.py
+++ b/archivebox/parse.py
@ -18,6 +18,7 @@ Parsed link schema: {
 """

 import re
+import sys
 import json
 import urllib
 from collections import OrderedDict
@ -25,7 +26,7 @@ import xml.etree.ElementTree as etree

 from datetime import datetime

-from config import ANSI
+from config import ANSI, SHOW_PROGRESS
 from util import (
    domain,
    base_url,
@ -60,6 +61,8 @@ def parse_links(path):
            path.rsplit('/', 1)[-1],
            **ANSI,
        ))
+        if SHOW_PROGRESS:
+            sys.stdout.write('    ')

        for parser_name, parser_func in get_parsers(file).items():
            # otherwise try all parsers until one works
@ -72,8 +75,6 @@ def parse_links(path):
                # print('[!] Parser {} failed: {} {}'.format(parser_name, err.__class__.__name__, err))
                pass

-    print()
-
    return links, parser_name


--- a/archivebox/util.py
+++ b/archivebox/util.py
@ -233,8 +233,9 @@ def fetch_page_title(url, default=True):
        default = url

    try:
-        sys.stdout.write('.')
-        sys.stdout.flush()
+        if SHOW_PROGRESS:
+            sys.stdout.write('.')
+            sys.stdout.flush()
        html_content = urllib.request.urlopen(url, timeout=10).read().decode('utf-8')
        match = re.search('<title>(.*?)</title>', html_content)
        return match.group(1) if match else default or None