From e47c00bd07a33f016ea8e437baea88070a5f8dfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sascha=20I=C3=9Fbr=C3=BCcker?= Date: Thu, 26 Aug 2021 12:33:54 +0200 Subject: [PATCH] Add support for micro-, nanosecond timestamps in importer (#151) --- bookmarks/services/importer.py | 4 +- .../resources/simple_valid_import_file.html | 4 +- bookmarks/tests/test_utils.py | 45 ++++++++++++++++++- bookmarks/utils.py | 37 +++++++++++++++ 4 files changed, 85 insertions(+), 5 deletions(-) diff --git a/bookmarks/services/importer.py b/bookmarks/services/importer.py index dd3067b..ae75388 100644 --- a/bookmarks/services/importer.py +++ b/bookmarks/services/importer.py @@ -1,6 +1,5 @@ import logging from dataclasses import dataclass -from datetime import datetime from django.contrib.auth.models import User from django.utils import timezone @@ -8,6 +7,7 @@ from django.utils import timezone from bookmarks.models import Bookmark, parse_tag_string from bookmarks.services.parser import parse, NetscapeBookmark from bookmarks.services.tags import get_or_create_tags +from bookmarks.utils import parse_timestamp logger = logging.getLogger(__name__) @@ -47,7 +47,7 @@ def _import_bookmark_tag(netscape_bookmark: NetscapeBookmark, user: User): bookmark.url = netscape_bookmark.href if netscape_bookmark.date_added: - bookmark.date_added = datetime.utcfromtimestamp(int(netscape_bookmark.date_added)).astimezone() + bookmark.date_added = parse_timestamp(netscape_bookmark.date_added) else: bookmark.date_added = timezone.now() bookmark.date_modified = bookmark.date_added diff --git a/bookmarks/tests/resources/simple_valid_import_file.html b/bookmarks/tests/resources/simple_valid_import_file.html index 74435aa..2298667 100644 --- a/bookmarks/tests/resources/simple_valid_import_file.html +++ b/bookmarks/tests/resources/simple_valid_import_file.html @@ -11,10 +11,10 @@
test title 1
test description 1 -
test title 2 +
test title 2
test description 2 -
test title 3 +
test title 3
test description 3

\ No newline at end of file diff --git a/bookmarks/tests/test_utils.py b/bookmarks/tests/test_utils.py index 74845d0..924910c 100644 --- a/bookmarks/tests/test_utils.py +++ b/bookmarks/tests/test_utils.py @@ -1,9 +1,10 @@ from unittest.mock import patch +from dateutil.relativedelta import relativedelta from django.test import TestCase from django.utils import timezone -from bookmarks.utils import humanize_absolute_date, humanize_relative_date +from bookmarks.utils import humanize_absolute_date, humanize_relative_date, parse_timestamp class UtilsTestCase(TestCase): @@ -63,3 +64,45 @@ class UtilsTestCase(TestCase): # Regression: Test that subsequent calls use current date instead of cached date (#107) with patch.object(timezone, 'now', return_value=timezone.datetime(2021, 1, 13)): self.assertEqual(humanize_relative_date(timezone.datetime(2021, 1, 13)), 'Today') + + def verify_timestamp(self, date, factor=1): + timestamp_string = str(int(date.timestamp() * factor)) + parsed_date = parse_timestamp(timestamp_string) + self.assertEqual(date, parsed_date) + + def test_parse_timestamp_fails_for_invalid_timestamps(self): + with self.assertRaises(ValueError): + parse_timestamp('invalid') + + def test_parse_timestamp_parses_millisecond_timestamps(self): + now = timezone.now().replace(microsecond=0) + fifty_years_ago = now - relativedelta(year=50) + fifty_years_from_now = now + relativedelta(year=50) + + self.verify_timestamp(now) + self.verify_timestamp(fifty_years_ago) + self.verify_timestamp(fifty_years_from_now) + + def test_parse_timestamp_parses_microsecond_timestamps(self): + now = timezone.now().replace(microsecond=0) + fifty_years_ago = now - relativedelta(year=50) + fifty_years_from_now = now + relativedelta(year=50) + + self.verify_timestamp(now, 1000) + self.verify_timestamp(fifty_years_ago, 1000) + self.verify_timestamp(fifty_years_from_now, 1000) + + def test_parse_timestamp_parses_nanosecond_timestamps(self): + now = timezone.now().replace(microsecond=0) + fifty_years_ago = now - relativedelta(year=50) + fifty_years_from_now = now + relativedelta(year=50) + + self.verify_timestamp(now, 1000000) + self.verify_timestamp(fifty_years_ago, 1000000) + self.verify_timestamp(fifty_years_from_now, 1000000) + + def test_parse_timestamp_fails_for_out_of_range_timestamp(self): + now = timezone.now().replace(microsecond=0) + + with self.assertRaises(ValueError): + self.verify_timestamp(now, 1000000000) diff --git a/bookmarks/utils.py b/bookmarks/utils.py index d3e9570..a931105 100644 --- a/bookmarks/utils.py +++ b/bookmarks/utils.py @@ -58,3 +58,40 @@ def humanize_relative_date(value: datetime, now: Optional[datetime] = None): return 'Yesterday' else: return weekday_names[value.isoweekday()] + + +def parse_timestamp(value: str): + """ + Parses a string timestamp into a datetime value + First tries to parse the timestamp as milliseconds. + If that fails with an error indicating that the timestamp exceeds the maximum, + it tries to parse the timestamp as microseconds, and then as nanoseconds + :param value: + :return: + """ + try: + timestamp = int(value) + except ValueError: + raise ValueError(f'{value} is not a valid timestamp') + + try: + return datetime.utcfromtimestamp(timestamp).astimezone() + except (OverflowError, ValueError, OSError): + pass + + # Value exceeds the max. allowed timestamp + # Try parsing as microseconds + try: + return datetime.utcfromtimestamp(timestamp / 1000).astimezone() + except (OverflowError, ValueError, OSError): + pass + + # Value exceeds the max. allowed timestamp + # Try parsing as nanoseconds + try: + return datetime.utcfromtimestamp(timestamp / 1000000).astimezone() + except (OverflowError, ValueError, OSError): + pass + + # Timestamp is out of range + raise ValueError(f'{value} exceeds maximum value for a timestamp')