mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-26 06:00:22 +00:00
fix rss parsing when items have newlines between them
This commit is contained in:
parent
c48b1bbb3c
commit
58c9b47d43
1 changed files with 3 additions and 2 deletions
|
@ -154,7 +154,8 @@ def parse_rss_export(rss_file):
|
|||
"""Parse RSS XML-format files into links"""
|
||||
|
||||
rss_file.seek(0)
|
||||
items = rss_file.read().split('</item>\n<item>')
|
||||
items = rss_file.read().split('<item>')
|
||||
items = items[1:] if items else []
|
||||
for item in items:
|
||||
# example item:
|
||||
# <item>
|
||||
|
@ -166,7 +167,7 @@ def parse_rss_export(rss_file):
|
|||
# </item>
|
||||
|
||||
trailing_removed = item.split('</item>', 1)[0]
|
||||
leading_removed = trailing_removed.split('<item>', 1)[-1]
|
||||
leading_removed = trailing_removed.split('<item>', 1)[-1].strip()
|
||||
rows = leading_removed.split('\n')
|
||||
|
||||
def get_row(key):
|
||||
|
|
Loading…
Reference in a new issue