save the url as title for staticfiles or non html files

This commit is contained in:
Nick Sweeting 2021-01-30 22:01:49 -05:00
parent 24e24934f7
commit 385daf9af8

View file

@ -62,9 +62,6 @@ class TitleParser(HTMLParser):
@enforce_types
def should_save_title(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
if is_static_file(link.url):
return False
# if link already has valid title, skip it
if not overwrite and link.title and not link.title.lower().startswith('http'):
return False
@ -113,7 +110,11 @@ def save_title(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -
timestamp=link.timestamp)\
.update(title=output)
else:
raise ArchiveError('Unable to detect page title')
# if no content was returned, dont save a title (because it might be a temporary error)
if not html:
raise ArchiveError('Unable to detect page title')
# output = html[:128] # use first bit of content as the title
output = link.base_url # use the filename as the title (better UX)
except Exception as err:
status = 'failed'
output = err