mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-26 06:00:22 +00:00
show which format file was parsed as
This commit is contained in:
parent
ae0c20dc76
commit
c32a385e8f
2 changed files with 5 additions and 4 deletions
|
@ -59,7 +59,7 @@ def merge_links(archive_path=OUTPUT_DIR, import_path=None, only_new=False):
|
|||
all_links = []
|
||||
if import_path:
|
||||
# parse and validate the import file
|
||||
raw_links = parse_links(import_path)
|
||||
raw_links, parser_name = parse_links(import_path)
|
||||
all_links = validate_links(raw_links)
|
||||
|
||||
# merge existing links in archive_path and new links
|
||||
|
@ -70,11 +70,12 @@ def merge_links(archive_path=OUTPUT_DIR, import_path=None, only_new=False):
|
|||
|
||||
num_new_links = len(all_links) - len(existing_links)
|
||||
if num_new_links and not only_new:
|
||||
print('[{green}+{reset}] [{}] Adding {} new links from {} to {}/index.json'.format(
|
||||
print('[{green}+{reset}] [{}] Adding {} new links from {} to {}/index.json (parsed as {})'.format(
|
||||
datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
||||
num_new_links,
|
||||
pretty_path(import_path),
|
||||
pretty_path(archive_path),
|
||||
parser_name,
|
||||
**ANSI,
|
||||
))
|
||||
# else:
|
||||
|
|
|
@ -53,7 +53,7 @@ def parse_links(path):
|
|||
|
||||
links = []
|
||||
with open(path, 'r', encoding='utf-8') as file:
|
||||
for parser_func in get_parsers(file).values():
|
||||
for parser_name, parser_func in get_parsers(file).items():
|
||||
# otherwise try all parsers until one works
|
||||
try:
|
||||
links += list(parser_func(file))
|
||||
|
@ -63,7 +63,7 @@ def parse_links(path):
|
|||
# parser not supported on this file
|
||||
pass
|
||||
|
||||
return links
|
||||
return links, parser_name
|
||||
|
||||
|
||||
def parse_pocket_export(html_file):
|
||||
|
|
Loading…
Reference in a new issue