mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-10 14:44:18 +00:00
fix fetch page title default
This commit is contained in:
parent
67d103a293
commit
d35c6cf8b5
1 changed files with 7 additions and 3 deletions
|
@ -212,15 +212,19 @@ def download_url(url):
|
|||
return source_path
|
||||
|
||||
|
||||
def fetch_page_title(url, default=None):
|
||||
def fetch_page_title(url, default=True):
|
||||
"""Attempt to guess a page's title by downloading the html"""
|
||||
|
||||
if default is True:
|
||||
default = url
|
||||
|
||||
try:
|
||||
html_content = urllib.request.urlopen(url).read().decode('utf-8')
|
||||
|
||||
match = re.search('<title>(.*?)</title>', html_content)
|
||||
return match.group(1) if match else default
|
||||
return match.group(1) if match else default or None
|
||||
except Exception:
|
||||
if default is False:
|
||||
raise
|
||||
return default
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue