From e6ce8a89be1fa57ac8d8f25b5ccef92dbf563617 Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Sun, 20 Aug 2023 07:32:48 -0500 Subject: [PATCH] try and fix `into datetime` to accept more dt formats (#10063) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description This PR tries to fix `into datetime`. The problem was that it didn't support many input formats and the `--format` was clunky. `--format` is still a bit clunky but can work. The big change here is that it first tries to use `dtparse` to convert text into datetime. ### Before ```nushell ❯ '20220604' | into datetime Thu, 01 Jan 1970 00:00:00 +0000 (53 years ago) ``` ### After ```nushell ❯ '20220604' | into datetime Sat, 04 Jun 2022 00:00:00 -0500 (a year ago) ``` ## Supported Input Formats `dtparse` should support all these formats. Taken from their [repo](https://github.com/bspeice/dtparse/blob/master/build_pycompat.py). ```python 'test_parse_default': [ "Thu Sep 25 10:36:28", "Sep 10:36:28", "10:36:28", "10:36", "Sep 2003", "Sep", "2003", "10h36m28.5s", "10h36m28s", "10h36m", "10h", "10 h 36", "10 h 36.5", "36 m 5", "36 m 5 s", "36 m 05", "36 m 05 s", "10h am", "10h pm", "10am", "10pm", "10:00 am", "10:00 pm", "10:00am", "10:00pm", "10:00a.m", "10:00p.m", "10:00a.m.", "10:00p.m.", "October", "31-Dec-00", "0:01:02", "12h 01m02s am", "12:08 PM", "01h02m03", "01h02", "01h02s", "01m02", "01m02h", "2004 10 Apr 11h30m", # testPertain 'Sep 03', 'Sep of 03', # test_hmBY - Note: This appears to be Python 3 only, no idea why '02:17NOV2017', # Weekdays "Thu Sep 10:36:28", "Thu 10:36:28", "Wed", "Wednesday" ], 'test_parse_simple': [ "Thu Sep 25 10:36:28 2003", "Thu Sep 25 2003", "2003-09-25T10:49:41", "2003-09-25T10:49", "2003-09-25T10", "2003-09-25", "20030925T104941", "20030925T1049", "20030925T10", "20030925", "2003-09-25 10:49:41,502", "199709020908", "19970902090807", "2003-09-25", "09-25-2003", "25-09-2003", "10-09-2003", "10-09-03", "2003.09.25", "09.25.2003", "25.09.2003", "10.09.2003", "10.09.03", "2003/09/25", "09/25/2003", "25/09/2003", "10/09/2003", "10/09/03", "2003 09 25", "09 25 2003", "25 09 2003", "10 09 2003", "10 09 03", "25 09 03", "03 25 Sep", "25 03 Sep", " July 4 , 1976 12:01:02 am ", "Wed, July 10, '96", "1996.July.10 AD 12:08 PM", "July 4, 1976", "7 4 1976", "4 jul 1976", "7-4-76", "19760704", "0:01:02 on July 4, 1976", "0:01:02 on July 4, 1976", "July 4, 1976 12:01:02 am", "Mon Jan 2 04:24:27 1995", "04.04.95 00:22", "Jan 1 1999 11:23:34.578", "950404 122212", "3rd of May 2001", "5th of March 2001", "1st of May 2003", '0099-01-01T00:00:00', '0031-01-01T00:00:00', "20080227T21:26:01.123456789", '13NOV2017', '0003-03-04', 'December.0031.30', # testNoYearFirstNoDayFirst '090107', # test_mstridx '2015-15-May', ], 'test_parse_tzinfo': [ 'Thu Sep 25 10:36:28 BRST 2003', '2003 10:36:28 BRST 25 Sep Thu', ], 'test_parse_offset': [ 'Thu, 25 Sep 2003 10:49:41 -0300', '2003-09-25T10:49:41.5-03:00', '2003-09-25T10:49:41-03:00', '20030925T104941.5-0300', '20030925T104941-0300', # dtparse-specific "2018-08-10 10:00:00 UTC+3", "2018-08-10 03:36:47 PM GMT-4", "2018-08-10 04:15:00 AM Z-02:00" ], 'test_parse_dayfirst': [ '10-09-2003', '10.09.2003', '10/09/2003', '10 09 2003', # testDayFirst '090107', # testUnambiguousDayFirst '2015 09 25' ], 'test_parse_yearfirst': [ '10-09-03', '10.09.03', '10/09/03', '10 09 03', # testYearFirst '090107', # testUnambiguousYearFirst '2015 09 25' ], 'test_parse_dfyf': [ # testDayFirstYearFirst '090107', # testUnambiguousDayFirstYearFirst '2015 09 25' ], 'test_unspecified_fallback': [ 'April 2009', 'Feb 2007', 'Feb 2008' ], 'test_parse_ignoretz': [ 'Thu Sep 25 10:36:28 BRST 2003', '1996.07.10 AD at 15:08:56 PDT', 'Tuesday, April 12, 1952 AD 3:30:42pm PST', 'November 5, 1994, 8:15:30 am EST', '1994-11-05T08:15:30-05:00', '1994-11-05T08:15:30Z', '1976-07-04T00:01:02Z', '1986-07-05T08:15:30z', 'Tue Apr 4 00:22:12 PDT 1995' ], 'test_fuzzy_tzinfo': [ 'Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.' ], 'test_fuzzy_tokens_tzinfo': [ 'Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.' ], 'test_fuzzy_simple': [ 'I have a meeting on March 1, 1974', # testFuzzyAMPMProblem 'On June 8th, 2020, I am going to be the first man on Mars', # testFuzzyAMPMProblem 'Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003', # testFuzzyAMPMProblem 'Meet me at 3:00 AM on December 3rd, 2003 at the AM/PM on Sunset', # testFuzzyAMPMProblem 'Jan 29, 1945 14:45 AM I going to see you there?', # testFuzzyIgnoreAMPM '2017-07-17 06:15:', # test_idx_check ], ``` # User-Facing Changes # Tests + Formatting # After Submitting --- .../src/conversions/into/datetime.rs | 122 ++++++++++++------ 1 file changed, 79 insertions(+), 43 deletions(-) diff --git a/crates/nu-command/src/conversions/into/datetime.rs b/crates/nu-command/src/conversions/into/datetime.rs index dd33eb6772..8fce51f56a 100644 --- a/crates/nu-command/src/conversions/into/datetime.rs +++ b/crates/nu-command/src/conversions/into/datetime.rs @@ -87,7 +87,7 @@ impl Command for SubCommand { .named( "format", SyntaxShape::String, - "Specify expected format of string input to parse to datetime. Use --list to see options", + "Specify expected format of INPUT string to parse to datetime. Use --list to see options", Some('f'), ) .switch( @@ -236,6 +236,20 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value { let timezone = &args.zone_options; let dateformat = &args.format_options; + // Let's try dtparse first + if matches!(input, Value::String { .. }) && dateformat.is_none() { + if let Ok(input_val) = input.as_spanned_string() { + match parse_date_from_string(&input_val.item, input_val.span) { + Ok(date) => { + return Value::Date { + val: date, + span: input_val.span, + } + } + Err(err) => err, + }; + } + } const HOUR: i32 = 60 * 60; // Check to see if input looks like a Unix timestamp (i.e. can it be parsed to an int?) @@ -256,51 +270,72 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value { } }; - if let Ok(ts) = timestamp { - macro_rules! match_datetime { - ($expr:expr) => { - match $expr { - dt => Value::Date { - val: dt.into(), - span: head, + if dateformat.is_none() { + if let Ok(ts) = timestamp { + return match timezone { + // note all these `.timestamp_nanos()` could overflow if we didn't check range in ` | into int`. + + // default to UTC + None => Value::Date { + val: Utc.timestamp_nanos(ts).into(), + span: head, + }, + Some(Spanned { item, span }) => match item { + Zone::Utc => { + let dt = Utc.timestamp_nanos(ts); + Value::Date { + val: dt.into(), + span: *span, + } + } + Zone::Local => { + let dt = Local.timestamp_nanos(ts); + Value::Date { + val: dt.into(), + span: *span, + } + } + Zone::East(i) => match FixedOffset::east_opt((*i as i32) * HOUR) { + Some(eastoffset) => { + let dt = eastoffset.timestamp_nanos(ts); + Value::Date { + val: dt, + span: *span, + } + } + None => Value::Error { + error: Box::new(ShellError::DatetimeParseError( + input.debug_value(), + *span, + )), + }, }, - } + Zone::West(i) => match FixedOffset::west_opt((*i as i32) * HOUR) { + Some(westoffset) => { + let dt = westoffset.timestamp_nanos(ts); + Value::Date { + val: dt, + span: *span, + } + } + None => Value::Error { + error: Box::new(ShellError::DatetimeParseError( + input.debug_value(), + *span, + )), + }, + }, + Zone::Error => Value::Error { + // This is an argument error, not an input error + error: Box::new(ShellError::TypeMismatch { + err_message: "Invalid timezone or offset".to_string(), + span: *span, + }), + }, + }, }; - } - - return match timezone { - // note all these `.timestamp_nanos()` could overflow if we didn't check range in ` | into int`. - - // default to UTC - None => Value::Date { - val: Utc.timestamp_nanos(ts).into(), - span: head, - }, - Some(Spanned { item, span }) => match item { - Zone::Utc => match_datetime!(Utc.timestamp_nanos(ts)), - Zone::Local => match_datetime!(Local.timestamp_nanos(ts)), - Zone::East(i) => match FixedOffset::east_opt((*i as i32) * HOUR) { - Some(eastoffset) => match_datetime!(eastoffset.timestamp_nanos(ts)), - None => Value::Error { - error: Box::new(ShellError::DatetimeParseError(input.debug_value(), *span)), - }, - }, - Zone::West(i) => match FixedOffset::west_opt((*i as i32) * HOUR) { - Some(westoffset) => match_datetime!(westoffset.timestamp_nanos(ts)), - None => Value::Error { - error: Box::new(ShellError::DatetimeParseError(input.debug_value(), *span)), - }, - }, - Zone::Error => Value::Error { - // This is an argument error, not an input error - error: Box::new(ShellError::TypeMismatch { - err_message: "Invalid timezone or offset".to_string(), - span: *span, - }), - }, - }, }; - }; + } // If input is not a timestamp, try parsing it as a string match input { @@ -314,6 +349,7 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value { } } }, + // Tries to automatically parse the date // (i.e. without a format string) // and assumes the system's local timezone if none is specified