try and fix into datetime to accept more dt formats (#10063)

# Description

This PR tries to fix `into datetime`. The problem was that it didn't
support many input formats and the `--format` was clunky. `--format` is
still a bit clunky but can work. The big change here is that it first
tries to use `dtparse` to convert text into datetime.

### Before
```nushell
❯ '20220604' | into datetime
Thu, 01 Jan 1970 00:00:00 +0000 (53 years ago)
```
### After
```nushell
❯ '20220604' | into datetime
Sat, 04 Jun 2022 00:00:00 -0500 (a year ago)
```
## Supported Input Formats
`dtparse` should support all these formats. Taken from their
[repo](https://github.com/bspeice/dtparse/blob/master/build_pycompat.py).
```python
    'test_parse_default': [
        "Thu Sep 25 10:36:28",
        "Sep 10:36:28", "10:36:28", "10:36", "Sep 2003", "Sep", "2003",
        "10h36m28.5s", "10h36m28s", "10h36m", "10h", "10 h 36", "10 h 36.5",
        "36 m 5", "36 m 5 s", "36 m 05", "36 m 05 s", "10h am", "10h pm",
        "10am", "10pm", "10:00 am", "10:00 pm", "10:00am", "10:00pm",
        "10:00a.m", "10:00p.m", "10:00a.m.", "10:00p.m.",
        "October", "31-Dec-00", "0:01:02", "12h 01m02s am", "12:08 PM",
        "01h02m03", "01h02", "01h02s", "01m02", "01m02h", "2004 10 Apr 11h30m",
        # testPertain
        'Sep 03', 'Sep of 03',
        # test_hmBY - Note: This appears to be Python 3 only, no idea why
        '02:17NOV2017',
        # Weekdays
        "Thu Sep 10:36:28", "Thu 10:36:28", "Wed", "Wednesday"
    ],
    'test_parse_simple': [
        "Thu Sep 25 10:36:28 2003", "Thu Sep 25 2003", "2003-09-25T10:49:41",
        "2003-09-25T10:49", "2003-09-25T10", "2003-09-25", "20030925T104941",
        "20030925T1049", "20030925T10", "20030925", "2003-09-25 10:49:41,502",
        "199709020908", "19970902090807", "2003-09-25", "09-25-2003",
        "25-09-2003", "10-09-2003", "10-09-03", "2003.09.25", "09.25.2003",
        "25.09.2003", "10.09.2003", "10.09.03", "2003/09/25", "09/25/2003",
        "25/09/2003", "10/09/2003", "10/09/03", "2003 09 25", "09 25 2003",
        "25 09 2003", "10 09 2003", "10 09 03", "25 09 03", "03 25 Sep",
        "25 03 Sep", "  July   4 ,  1976   12:01:02   am  ",
        "Wed, July 10, '96", "1996.July.10 AD 12:08 PM", "July 4, 1976",
        "7 4 1976", "4 jul 1976", "7-4-76", "19760704",
        "0:01:02 on July 4, 1976", "0:01:02 on July 4, 1976",
        "July 4, 1976 12:01:02 am", "Mon Jan  2 04:24:27 1995",
        "04.04.95 00:22", "Jan 1 1999 11:23:34.578", "950404 122212",
        "3rd of May 2001", "5th of March 2001", "1st of May 2003",
        '0099-01-01T00:00:00', '0031-01-01T00:00:00',
        "20080227T21:26:01.123456789", '13NOV2017', '0003-03-04',
        'December.0031.30',
        # testNoYearFirstNoDayFirst
        '090107',
        # test_mstridx
        '2015-15-May',
    ],
    'test_parse_tzinfo': [
        'Thu Sep 25 10:36:28 BRST 2003', '2003 10:36:28 BRST 25 Sep Thu',
    ],
    'test_parse_offset': [
        'Thu, 25 Sep 2003 10:49:41 -0300', '2003-09-25T10:49:41.5-03:00',
        '2003-09-25T10:49:41-03:00', '20030925T104941.5-0300',
        '20030925T104941-0300',
        # dtparse-specific
        "2018-08-10 10:00:00 UTC+3", "2018-08-10 03:36:47 PM GMT-4", "2018-08-10 04:15:00 AM Z-02:00"
    ],
    'test_parse_dayfirst': [
        '10-09-2003', '10.09.2003', '10/09/2003', '10 09 2003',
        # testDayFirst
        '090107',
        # testUnambiguousDayFirst
        '2015 09 25'
    ],
    'test_parse_yearfirst': [
        '10-09-03', '10.09.03', '10/09/03', '10 09 03',
        # testYearFirst
        '090107',
        # testUnambiguousYearFirst
        '2015 09 25'
    ],
    'test_parse_dfyf': [
        # testDayFirstYearFirst
        '090107',
        # testUnambiguousDayFirstYearFirst
        '2015 09 25'
    ],
    'test_unspecified_fallback': [
        'April 2009', 'Feb 2007', 'Feb 2008'
    ],
    'test_parse_ignoretz': [
        'Thu Sep 25 10:36:28 BRST 2003', '1996.07.10 AD at 15:08:56 PDT',
        'Tuesday, April 12, 1952 AD 3:30:42pm PST',
        'November 5, 1994, 8:15:30 am EST', '1994-11-05T08:15:30-05:00',
        '1994-11-05T08:15:30Z', '1976-07-04T00:01:02Z', '1986-07-05T08:15:30z',
        'Tue Apr 4 00:22:12 PDT 1995'
    ],
    'test_fuzzy_tzinfo': [
        'Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.'
    ],
    'test_fuzzy_tokens_tzinfo': [
        'Today is 25 of September of 2003, exactly at 10:49:41 with timezone -03:00.'
    ],
    'test_fuzzy_simple': [
        'I have a meeting on March 1, 1974', # testFuzzyAMPMProblem
        'On June 8th, 2020, I am going to be the first man on Mars', # testFuzzyAMPMProblem
        'Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003', # testFuzzyAMPMProblem
        'Meet me at 3:00 AM on December 3rd, 2003 at the AM/PM on Sunset', # testFuzzyAMPMProblem
        'Jan 29, 1945 14:45 AM I going to see you there?', # testFuzzyIgnoreAMPM
        '2017-07-17 06:15:', # test_idx_check
    ],
```
# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A
clippy::needless_collect -A clippy::result_large_err` to check that
you're using the standard code style
- `cargo test --workspace` to check that all tests pass
- `cargo run -- -c "use std testing; testing run-tests --path
crates/nu-std"` to run the tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
This commit is contained in:
Darren Schroeder 2023-08-20 07:32:48 -05:00 committed by GitHub
parent 74f8081290
commit e6ce8a89be
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -87,7 +87,7 @@ impl Command for SubCommand {
.named(
"format",
SyntaxShape::String,
"Specify expected format of string input to parse to datetime. Use --list to see options",
"Specify expected format of INPUT string to parse to datetime. Use --list to see options",
Some('f'),
)
.switch(
@ -236,6 +236,20 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value {
let timezone = &args.zone_options;
let dateformat = &args.format_options;
// Let's try dtparse first
if matches!(input, Value::String { .. }) && dateformat.is_none() {
if let Ok(input_val) = input.as_spanned_string() {
match parse_date_from_string(&input_val.item, input_val.span) {
Ok(date) => {
return Value::Date {
val: date,
span: input_val.span,
}
}
Err(err) => err,
};
}
}
const HOUR: i32 = 60 * 60;
// Check to see if input looks like a Unix timestamp (i.e. can it be parsed to an int?)
@ -256,51 +270,72 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value {
}
};
if let Ok(ts) = timestamp {
macro_rules! match_datetime {
($expr:expr) => {
match $expr {
dt => Value::Date {
val: dt.into(),
span: head,
if dateformat.is_none() {
if let Ok(ts) = timestamp {
return match timezone {
// note all these `.timestamp_nanos()` could overflow if we didn't check range in `<date> | into int`.
// default to UTC
None => Value::Date {
val: Utc.timestamp_nanos(ts).into(),
span: head,
},
Some(Spanned { item, span }) => match item {
Zone::Utc => {
let dt = Utc.timestamp_nanos(ts);
Value::Date {
val: dt.into(),
span: *span,
}
}
Zone::Local => {
let dt = Local.timestamp_nanos(ts);
Value::Date {
val: dt.into(),
span: *span,
}
}
Zone::East(i) => match FixedOffset::east_opt((*i as i32) * HOUR) {
Some(eastoffset) => {
let dt = eastoffset.timestamp_nanos(ts);
Value::Date {
val: dt,
span: *span,
}
}
None => Value::Error {
error: Box::new(ShellError::DatetimeParseError(
input.debug_value(),
*span,
)),
},
},
}
Zone::West(i) => match FixedOffset::west_opt((*i as i32) * HOUR) {
Some(westoffset) => {
let dt = westoffset.timestamp_nanos(ts);
Value::Date {
val: dt,
span: *span,
}
}
None => Value::Error {
error: Box::new(ShellError::DatetimeParseError(
input.debug_value(),
*span,
)),
},
},
Zone::Error => Value::Error {
// This is an argument error, not an input error
error: Box::new(ShellError::TypeMismatch {
err_message: "Invalid timezone or offset".to_string(),
span: *span,
}),
},
},
};
}
return match timezone {
// note all these `.timestamp_nanos()` could overflow if we didn't check range in `<date> | into int`.
// default to UTC
None => Value::Date {
val: Utc.timestamp_nanos(ts).into(),
span: head,
},
Some(Spanned { item, span }) => match item {
Zone::Utc => match_datetime!(Utc.timestamp_nanos(ts)),
Zone::Local => match_datetime!(Local.timestamp_nanos(ts)),
Zone::East(i) => match FixedOffset::east_opt((*i as i32) * HOUR) {
Some(eastoffset) => match_datetime!(eastoffset.timestamp_nanos(ts)),
None => Value::Error {
error: Box::new(ShellError::DatetimeParseError(input.debug_value(), *span)),
},
},
Zone::West(i) => match FixedOffset::west_opt((*i as i32) * HOUR) {
Some(westoffset) => match_datetime!(westoffset.timestamp_nanos(ts)),
None => Value::Error {
error: Box::new(ShellError::DatetimeParseError(input.debug_value(), *span)),
},
},
Zone::Error => Value::Error {
// This is an argument error, not an input error
error: Box::new(ShellError::TypeMismatch {
err_message: "Invalid timezone or offset".to_string(),
span: *span,
}),
},
},
};
};
}
// If input is not a timestamp, try parsing it as a string
match input {
@ -314,6 +349,7 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value {
}
}
},
// Tries to automatically parse the date
// (i.e. without a format string)
// and assumes the system's local timezone if none is specified