Add support for micro-, nanosecond timestamps in importer (#151)

This commit is contained in:
Sascha Ißbrücker 2021-08-26 12:33:54 +02:00 committed by GitHub
parent 55a0d189dd
commit e47c00bd07
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 85 additions and 5 deletions

View file

@ -1,6 +1,5 @@
import logging
from dataclasses import dataclass
from datetime import datetime
from django.contrib.auth.models import User
from django.utils import timezone
@ -8,6 +7,7 @@ from django.utils import timezone
from bookmarks.models import Bookmark, parse_tag_string
from bookmarks.services.parser import parse, NetscapeBookmark
from bookmarks.services.tags import get_or_create_tags
from bookmarks.utils import parse_timestamp
logger = logging.getLogger(__name__)
@ -47,7 +47,7 @@ def _import_bookmark_tag(netscape_bookmark: NetscapeBookmark, user: User):
bookmark.url = netscape_bookmark.href
if netscape_bookmark.date_added:
bookmark.date_added = datetime.utcfromtimestamp(int(netscape_bookmark.date_added)).astimezone()
bookmark.date_added = parse_timestamp(netscape_bookmark.date_added)
else:
bookmark.date_added = timezone.now()
bookmark.date_modified = bookmark.date_added

View file

@ -11,10 +11,10 @@
<DT><A HREF="https://example.com/1" ADD_DATE="1616337559" PRIVATE="0" TOREAD="0" TAGS="tag1">test title 1</A>
<DD>test description 1
<DT><A HREF="https://example.com/2" ADD_DATE="1616337559" PRIVATE="0" TOREAD="0" TAGS="tag2">test title 2</A>
<DT><A HREF="https://example.com/2" ADD_DATE="1616337559000" PRIVATE="0" TOREAD="0" TAGS="tag2">test title 2</A>
<DD>test description 2
<DT><A HREF="https://example.com/3" ADD_DATE="1616337559" PRIVATE="0" TOREAD="0" TAGS="tag3">test title 3</A>
<DT><A HREF="https://example.com/3" ADD_DATE="1616337559000000" PRIVATE="0" TOREAD="0" TAGS="tag3">test title 3</A>
<DD>test description 3
</DL><p>

View file

@ -1,9 +1,10 @@
from unittest.mock import patch
from dateutil.relativedelta import relativedelta
from django.test import TestCase
from django.utils import timezone
from bookmarks.utils import humanize_absolute_date, humanize_relative_date
from bookmarks.utils import humanize_absolute_date, humanize_relative_date, parse_timestamp
class UtilsTestCase(TestCase):
@ -63,3 +64,45 @@ class UtilsTestCase(TestCase):
# Regression: Test that subsequent calls use current date instead of cached date (#107)
with patch.object(timezone, 'now', return_value=timezone.datetime(2021, 1, 13)):
self.assertEqual(humanize_relative_date(timezone.datetime(2021, 1, 13)), 'Today')
def verify_timestamp(self, date, factor=1):
timestamp_string = str(int(date.timestamp() * factor))
parsed_date = parse_timestamp(timestamp_string)
self.assertEqual(date, parsed_date)
def test_parse_timestamp_fails_for_invalid_timestamps(self):
with self.assertRaises(ValueError):
parse_timestamp('invalid')
def test_parse_timestamp_parses_millisecond_timestamps(self):
now = timezone.now().replace(microsecond=0)
fifty_years_ago = now - relativedelta(year=50)
fifty_years_from_now = now + relativedelta(year=50)
self.verify_timestamp(now)
self.verify_timestamp(fifty_years_ago)
self.verify_timestamp(fifty_years_from_now)
def test_parse_timestamp_parses_microsecond_timestamps(self):
now = timezone.now().replace(microsecond=0)
fifty_years_ago = now - relativedelta(year=50)
fifty_years_from_now = now + relativedelta(year=50)
self.verify_timestamp(now, 1000)
self.verify_timestamp(fifty_years_ago, 1000)
self.verify_timestamp(fifty_years_from_now, 1000)
def test_parse_timestamp_parses_nanosecond_timestamps(self):
now = timezone.now().replace(microsecond=0)
fifty_years_ago = now - relativedelta(year=50)
fifty_years_from_now = now + relativedelta(year=50)
self.verify_timestamp(now, 1000000)
self.verify_timestamp(fifty_years_ago, 1000000)
self.verify_timestamp(fifty_years_from_now, 1000000)
def test_parse_timestamp_fails_for_out_of_range_timestamp(self):
now = timezone.now().replace(microsecond=0)
with self.assertRaises(ValueError):
self.verify_timestamp(now, 1000000000)

View file

@ -58,3 +58,40 @@ def humanize_relative_date(value: datetime, now: Optional[datetime] = None):
return 'Yesterday'
else:
return weekday_names[value.isoweekday()]
def parse_timestamp(value: str):
"""
Parses a string timestamp into a datetime value
First tries to parse the timestamp as milliseconds.
If that fails with an error indicating that the timestamp exceeds the maximum,
it tries to parse the timestamp as microseconds, and then as nanoseconds
:param value:
:return:
"""
try:
timestamp = int(value)
except ValueError:
raise ValueError(f'{value} is not a valid timestamp')
try:
return datetime.utcfromtimestamp(timestamp).astimezone()
except (OverflowError, ValueError, OSError):
pass
# Value exceeds the max. allowed timestamp
# Try parsing as microseconds
try:
return datetime.utcfromtimestamp(timestamp / 1000).astimezone()
except (OverflowError, ValueError, OSError):
pass
# Value exceeds the max. allowed timestamp
# Try parsing as nanoseconds
try:
return datetime.utcfromtimestamp(timestamp / 1000000).astimezone()
except (OverflowError, ValueError, OSError):
pass
# Timestamp is out of range
raise ValueError(f'{value} exceeds maximum value for a timestamp')