From 880b425df64f40944d78e307d52b845e01318b77 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Sat, 30 Mar 2019 17:57:39 -0400 Subject: [PATCH] add note about saving timestamp strings independently --- archivebox/util.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/archivebox/util.py b/archivebox/util.py index c47741d5..617c7e2e 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -373,6 +373,11 @@ def parse_date(date: Any) -> Optional[datetime]: # anything from hours to decades, depending on which app, OS, # and sytem time configuration was used for the original timestamp # more info: https://github.com/pirate/ArchiveBox/issues/119 + + # Note: always always always store the original timestamp string + # somewhere indepentendly of the parsed datetime, so that later + # bugs dont repeatedly misparse and rewrite increasingly worse dates. + # the correct date can always be re-derived from the timestamp str timestamp = float(date) EARLIEST_POSSIBLE = 473403600.0 # 1985 @@ -389,6 +394,12 @@ def parse_date(date: Any) -> Optional[datetime]: # number is microseconds return datetime.fromtimestamp(timestamp / (1000*1000)) + else: + # continue to the end and raise a parsing failed error. + # we dont want to even attempt parsing timestamp strings that + # arent within these ranges + pass + if '-' in date: try: return datetime.fromisoformat(date)