mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-22 20:23:12 +00:00
save the url as title for staticfiles or non html files
This commit is contained in:
parent
24e24934f7
commit
385daf9af8
1 changed files with 5 additions and 4 deletions
|
@ -62,9 +62,6 @@ class TitleParser(HTMLParser):
|
|||
|
||||
@enforce_types
|
||||
def should_save_title(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
|
||||
if is_static_file(link.url):
|
||||
return False
|
||||
|
||||
# if link already has valid title, skip it
|
||||
if not overwrite and link.title and not link.title.lower().startswith('http'):
|
||||
return False
|
||||
|
@ -113,7 +110,11 @@ def save_title(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -
|
|||
timestamp=link.timestamp)\
|
||||
.update(title=output)
|
||||
else:
|
||||
raise ArchiveError('Unable to detect page title')
|
||||
# if no content was returned, dont save a title (because it might be a temporary error)
|
||||
if not html:
|
||||
raise ArchiveError('Unable to detect page title')
|
||||
# output = html[:128] # use first bit of content as the title
|
||||
output = link.base_url # use the filename as the title (better UX)
|
||||
except Exception as err:
|
||||
status = 'failed'
|
||||
output = err
|
||||
|
|
Loading…
Reference in a new issue