mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-26 22:20:21 +00:00
fix parsing of chrome and ff histories
This commit is contained in:
parent
9ec1f81bd5
commit
5498822a97
1 changed files with 9 additions and 4 deletions
|
@ -93,14 +93,19 @@ def parse_json_export(json_file):
|
||||||
# {"href":"http:\/\/www.reddit.com\/r\/example","description":"title here","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"reddit android"}]
|
# {"href":"http:\/\/www.reddit.com\/r\/example","description":"title here","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"reddit android"}]
|
||||||
if line:
|
if line:
|
||||||
erg = line
|
erg = line
|
||||||
time = datetime.strptime(erg['time'].split(',', 1)[0], '%Y-%m-%dT%H:%M:%SZ')
|
if erg.get('timestamp'):
|
||||||
|
timestamp = str(erg['timestamp']/10000000) # chrome/ff histories use a very precise timestamp
|
||||||
|
elif erg.get('time'):
|
||||||
|
timestamp = str(datetime.strptime(erg['time'].split(',', 1)[0], '%Y-%m-%dT%H:%M:%SZ').timestamp())
|
||||||
|
else:
|
||||||
|
timestamp = str(datetime.now().timestamp())
|
||||||
info = {
|
info = {
|
||||||
'url': erg['href'],
|
'url': erg['href'],
|
||||||
'domain': domain(erg['href']),
|
'domain': domain(erg['href']),
|
||||||
'base_url': base_url(erg['href']),
|
'base_url': base_url(erg['href']),
|
||||||
'timestamp': erg.get('timestamp') or str(time.timestamp()),
|
'timestamp': timestamp,
|
||||||
'tags': erg['tags'],
|
'tags': erg.get('tags') or '',
|
||||||
'title': erg['description'].replace(' — Readability', ''),
|
'title': (erg.get('description') or '').replace(' — Readability', ''),
|
||||||
'sources': [json_file.name],
|
'sources': [json_file.name],
|
||||||
}
|
}
|
||||||
info['type'] = get_link_type(info)
|
info['type'] = get_link_type(info)
|
||||||
|
|
Loading…
Reference in a new issue