better progress output

This commit is contained in:
Nick Sweeting 2019-02-06 22:06:21 -08:00
parent 33ba29ea90
commit 56d382235f
5 changed files with 28 additions and 29 deletions

View file

@ -25,8 +25,10 @@ from config import (
ONLY_NEW,
OUTPUT_PERMISSIONS,
OUTPUT_DIR,
REPO_DIR,
ANSI,
TIMEOUT,
SHOW_PROGRESS,
GIT_SHA,
)
from util import (
@ -69,21 +71,13 @@ def merge_links(archive_path=OUTPUT_DIR, import_path=None, only_new=False):
all_links = validate_links(existing_links + all_links)
num_new_links = len(all_links) - len(existing_links)
if num_new_links and not only_new:
print('{green}[+] [{}] Adding {} new links to index from {} ({} format){reset}'.format(
datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
num_new_links,
pretty_path(import_path),
parser_name,
**ANSI,
))
# else:
# print('[*] [{}] No new links added to {}/index.json{}'.format(
# datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
# archive_path,
# ' from {}'.format(import_path) if import_path else '',
# **ANSI,
# ))
if SHOW_PROGRESS:
print()
print(' > Adding {} new links to index from {} (parsed as {} format)'.format(
num_new_links,
pretty_path(import_path),
parser_name,
))
if only_new:
return new_links(all_links, existing_links)
@ -102,7 +96,7 @@ def update_archive(archive_path, links, source=None, resume=None, append=True):
**ANSI,
))
else:
print('{green}[▶] [{}] Downloading content for {} pages in archive...{reset}'.format(
print('{green}[▶] [{}] Updating content for {} pages in archive...{reset}'.format(
datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
len(links),
**ANSI,
@ -119,7 +113,7 @@ def update_archive(archive_path, links, source=None, resume=None, append=True):
else:
duration = '{0:.2f} sec'.format(seconds, 2)
print('{}[√] [{}] Update of {} links complete ({}){}'.format(
print('{}[√] [{}] Update of {} pages complete ({}){}'.format(
ANSI['green'],
datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
len(links),
@ -129,6 +123,7 @@ def update_archive(archive_path, links, source=None, resume=None, append=True):
print(' - {} entries skipped'.format(_RESULTS_TOTALS['skipped']))
print(' - {} entries updated'.format(_RESULTS_TOTALS['succeded']))
print(' - {} errors'.format(_RESULTS_TOTALS['failed']))
print(' To view your archive, open: {}/index.html'.format(OUTPUT_DIR.replace(REPO_DIR + '/', '')))
if __name__ == '__main__':

View file

@ -134,8 +134,8 @@ def log_link_archive(link_dir, link, update_existing):
))
print(' > {}{}'.format(pretty_path(link_dir), '' if update_existing else ' (new)'))
if link['type']:
print(' i {}'.format(link['type']))
# if link['type']:
# print(' i {}'.format(link['type']))

View file

@ -28,14 +28,16 @@ def write_links_index(out_dir, links):
if not os.path.exists(out_dir):
os.makedirs(out_dir)
write_json_links_index(out_dir, links)
write_html_links_index(out_dir, links)
print('{green}[√] [{}] Updated main index files:{reset}'.format(
print('{green}[*] [{}] Updating main index files...{reset}'.format(
datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
**ANSI))
**ANSI,
))
write_json_links_index(out_dir, links)
print(' > {}/index.json'.format(pretty_path(out_dir)))
write_html_links_index(out_dir, links)
print(' > {}/index.html'.format(pretty_path(out_dir)))
def write_json_links_index(out_dir, links):
"""write the json link index to a given path"""

View file

@ -18,6 +18,7 @@ Parsed link schema: {
"""
import re
import sys
import json
import urllib
from collections import OrderedDict
@ -25,7 +26,7 @@ import xml.etree.ElementTree as etree
from datetime import datetime
from config import ANSI
from config import ANSI, SHOW_PROGRESS
from util import (
domain,
base_url,
@ -60,6 +61,8 @@ def parse_links(path):
path.rsplit('/', 1)[-1],
**ANSI,
))
if SHOW_PROGRESS:
sys.stdout.write(' ')
for parser_name, parser_func in get_parsers(file).items():
# otherwise try all parsers until one works
@ -72,8 +75,6 @@ def parse_links(path):
# print('[!] Parser {} failed: {} {}'.format(parser_name, err.__class__.__name__, err))
pass
print()
return links, parser_name

View file

@ -233,8 +233,9 @@ def fetch_page_title(url, default=True):
default = url
try:
sys.stdout.write('.')
sys.stdout.flush()
if SHOW_PROGRESS:
sys.stdout.write('.')
sys.stdout.flush()
html_content = urllib.request.urlopen(url, timeout=10).read().decode('utf-8')
match = re.search('<title>(.*?)</title>', html_content)
return match.group(1) if match else default or None