diff --git a/archivebox/archive_methods.py b/archivebox/archive_methods.py index 8b0bf103..283365a4 100644 --- a/archivebox/archive_methods.py +++ b/archivebox/archive_methods.py @@ -91,13 +91,13 @@ def archive_link(link_dir, link): if method_name not in link['history']: link['history'][method_name] = [] - if not should_run(link_dir, link): + if should_run(link_dir, link): + if skipped_entirely: + skipped_entirely = False + print() + else: continue - if skipped_entirely: - skipped_entirely = False - print() - log_archive_method_started(method_name) result = method_function(link_dir, link) log_archive_method_finished(result) diff --git a/archivebox/logs.py b/archivebox/logs.py index 36b92682..3d6a804b 100644 --- a/archivebox/logs.py +++ b/archivebox/logs.py @@ -26,89 +26,7 @@ def pretty_path(path): return path.replace(REPO_DIR + '/', '') -def log_link_archiving_started(link_dir, link, is_new): - # [*] [2019-03-22 13:46:45] "Log Structured Merge Trees - ben stopford" - # http://www.benstopford.com/2015/02/14/log-structured-merge-trees/ - # > output/archive/1478739709 - - print('\n[{symbol_color}{symbol}{reset}] [{symbol_color}{now}{reset}] "{title}"'.format( - symbol_color=ANSI['green' if is_new else 'black'], - symbol='+' if is_new else '*', - now=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), - title=link['title'] or link['url'], - **ANSI, - )) - print(' {blue}{url}{reset}'.format(url=link['url'], **ANSI)) - sys.stdout.write(' > {}{}'.format( - pretty_path(link_dir), - ' (new)' if is_new else '', - )) - -def log_link_archiving_finished(link_dir, link, is_new, skipped_entirely): - if all(output == 'succeeded' for output in link['latest']): - _LAST_RUN_STATS['succeeded'] += 1 - elif skipped_entirely or all(output == 'skipped' for output in link['latest']): - _LAST_RUN_STATS['skipped'] += 1 - else: - _LAST_RUN_STATS['failed'] += 1 - # import ipdb; ipdb.set_trace() - - if skipped_entirely: - print('\r √ {}{}'.format( - pretty_path(link_dir), - ' (new)' if is_new else '', - )) - -def log_archive_method_started(method): - print(' > {}'.format(method)) - -def log_archive_method_finished(result): - """quote the argument with whitespace in a command so the user can - copy-paste the outputted string directly to run the cmd - """ - required_keys = ('cmd', 'pwd', 'output', 'status', 'start_ts', 'end_ts') - assert ( - isinstance(result, dict) - and all(key in result for key in required_keys) - and ('output' in result) - ), 'Archive method did not return a valid result.' - - # Prettify CMD string and make it save to copy-paste by quoting arguments - quoted_cmd = ' '.join( - '"{}"'.format(arg) if ' ' in arg else arg - for arg in result['cmd'] - ) - - if result['status'] == 'failed': - # Prettify error output hints string and limit to five lines - hints = getattr(result['output'], 'hints', None) or () - if hints: - hints = hints if isinstance(hints, (list, tuple)) else hints.split('\n') - hints = ( - ' {}{}{}'.format(ANSI['lightyellow'], line.strip(), ANSI['reset']) - for line in hints[:5] if line.strip() - ) - - # Collect and prefix output lines with indentation - output_lines = [ - '{}Failed:{} {}{}'.format( - ANSI['red'], - result['output'].__class__.__name__.replace('ArchiveError', ''), - result['output'], - ANSI['reset'] - ), - *hints, - '{}Run to see full output:{}'.format(ANSI['lightred'], ANSI['reset']), - ' cd {};'.format(result['pwd']), - ' {}'.format(quoted_cmd), - ] - print('\n'.join( - ' {}'.format(line) - for line in output_lines - if line - )) - -### Logging Helpers +### Parsing Stage def log_parsing_started(source_file): start_ts = datetime.now() @@ -125,6 +43,9 @@ def log_parsing_finished(num_new_links, parser_name): parser_name, )) + +### Indexing Stage + def log_indexing_process_started(): start_ts = datetime.now() _LAST_RUN_STATS['index_start_ts'] = start_ts @@ -141,6 +62,9 @@ def log_indexing_finished(out_dir, out_file): _LAST_RUN_STATS['index_end_ts'] = end_ts print('\r √ {}/{}'.format(pretty_path(out_dir), out_file)) + +### Archiving Stage + def log_archiving_started(num_links, resume): start_ts = datetime.now() _LAST_RUN_STATS['start_ts'] = start_ts @@ -196,3 +120,88 @@ def log_archiving_finished(num_links): print(' - {} entries updated'.format(_LAST_RUN_STATS['succeeded'])) print(' - {} errors'.format(_LAST_RUN_STATS['failed'])) print(' To view your archive, open: {}/index.html'.format(OUTPUT_DIR.replace(REPO_DIR + '/', ''))) + + +def log_link_archiving_started(link_dir, link, is_new): + # [*] [2019-03-22 13:46:45] "Log Structured Merge Trees - ben stopford" + # http://www.benstopford.com/2015/02/14/log-structured-merge-trees/ + # > output/archive/1478739709 + + print('\n[{symbol_color}{symbol}{reset}] [{symbol_color}{now}{reset}] "{title}"'.format( + symbol_color=ANSI['green' if is_new else 'black'], + symbol='+' if is_new else '*', + now=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), + title=link['title'] or link['url'], + **ANSI, + )) + print(' {blue}{url}{reset}'.format(url=link['url'], **ANSI)) + sys.stdout.write(' > {}{}'.format( + pretty_path(link_dir), + ' (new)' if is_new else '', + )) + +def log_link_archiving_finished(link_dir, link, is_new, skipped_entirely): + from util import latest_output + + if all(output == 'succeeded' for output in latest_output(link).values()): + _LAST_RUN_STATS['succeeded'] += 1 + elif any(output == 'failed' for output in latest_output(link).values()): + _LAST_RUN_STATS['failed'] += 1 + else: + _LAST_RUN_STATS['skipped'] += 1 + + if skipped_entirely: + print('\r √ {}{}'.format( + pretty_path(link_dir), + ' (new)' if is_new else '', + )) + + +def log_archive_method_started(method): + print(' > {}'.format(method)) + +def log_archive_method_finished(result): + """quote the argument with whitespace in a command so the user can + copy-paste the outputted string directly to run the cmd + """ + required_keys = ('cmd', 'pwd', 'output', 'status', 'start_ts', 'end_ts') + assert ( + isinstance(result, dict) + and all(key in result for key in required_keys) + and ('output' in result) + ), 'Archive method did not return a valid result.' + + # Prettify CMD string and make it safe to copy-paste by quoting arguments + quoted_cmd = ' '.join( + '"{}"'.format(arg) if ' ' in arg else arg + for arg in result['cmd'] + ) + + if result['status'] == 'failed': + # Prettify error output hints string and limit to five lines + hints = getattr(result['output'], 'hints', None) or () + if hints: + hints = hints if isinstance(hints, (list, tuple)) else hints.split('\n') + hints = ( + ' {}{}{}'.format(ANSI['lightyellow'], line.strip(), ANSI['reset']) + for line in hints[:5] if line.strip() + ) + + # Collect and prefix output lines with indentation + output_lines = [ + '{}Failed:{} {}{}'.format( + ANSI['red'], + result['output'].__class__.__name__.replace('ArchiveError', ''), + result['output'], + ANSI['reset'] + ), + *hints, + '{}Run to see full output:{}'.format(ANSI['lightred'], ANSI['reset']), + ' cd {};'.format(result['pwd']), + ' {}'.format(quoted_cmd), + ] + print('\n'.join( + ' {}'.format(line) + for line in output_lines + if line + )) diff --git a/archivebox/parse.py b/archivebox/parse.py index fa7eaffe..ce6b0358 100644 --- a/archivebox/parse.py +++ b/archivebox/parse.py @@ -69,7 +69,7 @@ def parse_links(source_file): pass timer.end() - return [], 'Plain Text' + return [], 'Failed to parse' ### Import Parser Functions