From 58c9b47d433b5ef68d5fd8fa510e2bd37aff60ba Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 25 Mar 2019 16:27:50 -0400 Subject: [PATCH] fix rss parsing when items have newlines between them --- archivebox/parse.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/archivebox/parse.py b/archivebox/parse.py index ce6b0358..baaa447e 100644 --- a/archivebox/parse.py +++ b/archivebox/parse.py @@ -154,7 +154,8 @@ def parse_rss_export(rss_file): """Parse RSS XML-format files into links""" rss_file.seek(0) - items = rss_file.read().split('\n') + items = rss_file.read().split('') + items = items[1:] if items else [] for item in items: # example item: # @@ -166,7 +167,7 @@ def parse_rss_export(rss_file): # trailing_removed = item.split('', 1)[0] - leading_removed = trailing_removed.split('', 1)[-1] + leading_removed = trailing_removed.split('', 1)[-1].strip() rows = leading_removed.split('\n') def get_row(key):