mirror of
https://github.com/getzola/zola
synced 2024-12-12 05:12:29 +00:00
Fix YAML date parsing (#2549)
* Refine YAML date regex This commit does a few changes: - Introduce a new regex - it is a bit off-spec (it allows one-digit months and days in date-only mode) - uses named groups - avoids group duplication - parses offset once Fixes #2538 * Fix nanosecond parsing * Rename variables for brewity * Add tests
This commit is contained in:
parent
aa2847aa1e
commit
56122defde
1 changed files with 152 additions and 43 deletions
|
@ -9,46 +9,26 @@ use serde::{Deserialize, Deserializer};
|
|||
|
||||
pub fn parse_yaml_datetime(date_string: &str) -> Result<time::OffsetDateTime> {
|
||||
// See https://github.com/getzola/zola/issues/2071#issuecomment-1530610650
|
||||
let re = Regex::new(r#"^"?([0-9]{4})-([0-9][0-9]?)-([0-9][0-9]?)([Tt]|[ \t]+)([0-9][0-9]?):([0-9]{2}):([0-9]{2})\.([0-9]*)?Z?([ \t]([-+][0-9][0-9]?)(:([0-9][0-9]?))?Z?|([-+][0-9]{2})?:([0-9]{2})?)?|([0-9]{4})-([0-9]{2})-([0-9]{2})"?$"#).unwrap();
|
||||
let re = Regex::new(r#"^"?(?P<year>[0-9]{4})-(?P<month>[0-9][0-9]?)-(?P<day>[0-9][0-9]?)(?:(?:[Tt]|[ \t]+)(?P<hour>[0-9][0-9]?):(?P<minute>[0-9]{2}):(?P<second>[0-9]{2})(?P<fraction>\.[0-9]{0,9})?[ \t]*(?:(?P<utc>Z)|(?P<offset>(?P<offset_hour>[-+][0-9][0-9]?)(?::(?P<offset_minute>[0-9][0-9]))?))?)?"?$"#).unwrap();
|
||||
let captures = if let Some(captures_) = re.captures(date_string) {
|
||||
Ok(captures_)
|
||||
} else {
|
||||
Err(anyhow!("Error parsing YAML datetime"))
|
||||
}?;
|
||||
let year =
|
||||
if let Some(cap) = captures.get(1) { cap } else { captures.get(15).unwrap() }.as_str();
|
||||
let month =
|
||||
if let Some(cap) = captures.get(2) { cap } else { captures.get(16).unwrap() }.as_str();
|
||||
let day =
|
||||
if let Some(cap) = captures.get(3) { cap } else { captures.get(17).unwrap() }.as_str();
|
||||
let hours = if let Some(hours_) = captures.get(5) { hours_.as_str() } else { "0" };
|
||||
let minutes = if let Some(minutes_) = captures.get(6) { minutes_.as_str() } else { "0" };
|
||||
let seconds = if let Some(seconds_) = captures.get(7) { seconds_.as_str() } else { "0" };
|
||||
let fractional_seconds_raw =
|
||||
if let Some(fractionals) = captures.get(8) { fractionals.as_str() } else { "" };
|
||||
let fractional_seconds_intermediate = fractional_seconds_raw.trim_end_matches("0");
|
||||
let year = captures.name("year").unwrap().as_str();
|
||||
let month = captures.name("month").unwrap().as_str();
|
||||
let day = captures.name("day").unwrap().as_str();
|
||||
let hour = if let Some(hour_) = captures.name("hour") { hour_.as_str() } else { "0" };
|
||||
let minute = if let Some(minute_) = captures.name("minute") { minute_.as_str() } else { "0" };
|
||||
let second = if let Some(second_) = captures.name("second") { second_.as_str() } else { "0" };
|
||||
let fraction_raw =
|
||||
if let Some(fraction_) = captures.name("fraction") { fraction_.as_str() } else { "" };
|
||||
let fraction_intermediate = fraction_raw.trim_end_matches("0");
|
||||
//
|
||||
// Prepare for eventual conversion into nanoseconds
|
||||
let fractional_seconds = if fractional_seconds_intermediate.len() > 0
|
||||
&& fractional_seconds_intermediate.len() <= 9
|
||||
{
|
||||
fractional_seconds_intermediate
|
||||
} else {
|
||||
"0"
|
||||
};
|
||||
let maybe_timezone_hour_1 = captures.get(10);
|
||||
let maybe_timezone_minute_1 = captures.get(12);
|
||||
let maybe_timezone_hour_2 = captures.get(13);
|
||||
let maybe_timezone_minute_2 = captures.get(14);
|
||||
let maybe_timezone_hour;
|
||||
let maybe_timezone_minute;
|
||||
if maybe_timezone_hour_2.is_some() {
|
||||
maybe_timezone_hour = maybe_timezone_hour_2;
|
||||
maybe_timezone_minute = maybe_timezone_minute_2;
|
||||
} else {
|
||||
maybe_timezone_hour = maybe_timezone_hour_1;
|
||||
maybe_timezone_minute = maybe_timezone_minute_1;
|
||||
}
|
||||
let fraction = if fraction_intermediate.len() > 0 { fraction_intermediate } else { "0" };
|
||||
let maybe_timezone_hour = captures.name("offset_hour");
|
||||
let maybe_timezone_minute = captures.name("offset_minute");
|
||||
|
||||
let mut offset_datetime = time::OffsetDateTime::UNIX_EPOCH;
|
||||
|
||||
|
@ -67,10 +47,10 @@ pub fn parse_yaml_datetime(date_string: &str) -> Result<time::OffsetDateTime> {
|
|||
.replace_year(year.parse().unwrap())?
|
||||
.replace_month(time::Month::try_from(month.parse::<u8>().unwrap())?)?
|
||||
.replace_day(day.parse().unwrap())?
|
||||
.replace_hour(hours.parse().unwrap())?
|
||||
.replace_minute(minutes.parse().unwrap())?
|
||||
.replace_second(seconds.parse().unwrap())?
|
||||
.replace_nanosecond(fractional_seconds.parse::<u32>().unwrap() * 100_000_000)?)
|
||||
.replace_hour(hour.parse().unwrap())?
|
||||
.replace_minute(minute.parse().unwrap())?
|
||||
.replace_second(second.parse().unwrap())?
|
||||
.replace_nanosecond((fraction.parse::<f64>().unwrap_or(0.0) * 1_000_000_000.0) as u32)?)
|
||||
}
|
||||
|
||||
/// Used as an attribute when we want to convert from TOML to a string date
|
||||
|
@ -167,23 +147,31 @@ mod tests {
|
|||
use time::macros::datetime;
|
||||
|
||||
#[test]
|
||||
fn yaml_spec_examples_pass() {
|
||||
fn yaml_draft_timestamp_pass() {
|
||||
// tests only the values from the YAML 1.1 Timestamp Draft
|
||||
// See https://yaml.org/type/timestamp.html
|
||||
let canonical = "2001-12-15T02:59:43.1Z";
|
||||
let valid_iso8601 = "2001-12-14t21:59:43.10-05:00";
|
||||
let space_separated = "2001-12-14 21:59:43.10 -5";
|
||||
let no_time_zone = "2001-12-15 2:59:43.10";
|
||||
let date = "2002-12-14";
|
||||
assert_eq!(parse_yaml_datetime(canonical).unwrap(), datetime!(2001-12-15 2:59:43.1 +0));
|
||||
assert_eq!(
|
||||
parse_yaml_datetime(canonical).unwrap(),
|
||||
datetime!(2001-12-15 02:59:43.100 +00:00)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_yaml_datetime(valid_iso8601).unwrap(),
|
||||
datetime!(2001-12-14 21:59:43.1 -5)
|
||||
datetime!(2001-12-14 21:59:43.100 -05:00)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_yaml_datetime(space_separated).unwrap(),
|
||||
datetime!(2001-12-14 21:59:43.1 -5)
|
||||
datetime!(2001-12-14 21:59:43.100 -05:00)
|
||||
);
|
||||
assert_eq!(parse_yaml_datetime(no_time_zone).unwrap(), datetime!(2001-12-15 2:59:43.1 +0));
|
||||
assert_eq!(parse_yaml_datetime(date).unwrap(), datetime!(2002-12-14 0:00:00 +0));
|
||||
assert_eq!(
|
||||
parse_yaml_datetime(no_time_zone).unwrap(),
|
||||
datetime!(2001-12-15 02:59:43.100 +00:00)
|
||||
);
|
||||
assert_eq!(parse_yaml_datetime(date).unwrap(), datetime!(2002-12-14 00:00:00.000 +00:00));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -218,4 +206,125 @@ mod tests {
|
|||
let unparseable_time = "2001-12-15:59:4x.1Z";
|
||||
assert!(parse_yaml_datetime(unparseable_time).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn toml_test_pass() {
|
||||
// tests subset from toml-test
|
||||
// Taken from https://github.com/toml-lang/toml-test/tree/a80ce8268cbcf5ea95f02b2e6d6cc38406ce28c9/tests/valid/datetime
|
||||
let space = "1987-07-05 17:45:00Z";
|
||||
// Z is not allowed to be lowercase
|
||||
let lower = "1987-07-05t17:45:00Z";
|
||||
|
||||
let first_offset = "0001-01-01 00:00:00Z";
|
||||
let first_local = "0001-01-01 00:00:00";
|
||||
let first_date = "0001-01-01";
|
||||
let last_offset = "9999-12-31 23:59:59Z";
|
||||
let last_local = "9999-12-31 23:59:59";
|
||||
let last_date = "9999-12-31";
|
||||
|
||||
// valid leap years
|
||||
let datetime_2000 = "2000-02-29 15:15:15Z";
|
||||
let datetime_2024 = "2024-02-29 15:15:15Z";
|
||||
|
||||
// milliseconds
|
||||
let ms1 = "1987-07-05T17:45:56.123Z";
|
||||
let ms2 = "1987-07-05T17:45:56.6Z";
|
||||
|
||||
// timezones
|
||||
let utc = "1987-07-05T17:45:56Z";
|
||||
let pdt = "1987-07-05T17:45:56-05:00";
|
||||
let nzst = "1987-07-05T17:45:56+12:00";
|
||||
let nzdt = "1987-07-05T17:45:56+13:00"; // DST
|
||||
|
||||
assert_eq!(parse_yaml_datetime(space).unwrap(), datetime!(1987-07-05 17:45:00.000 +00:00));
|
||||
assert_eq!(parse_yaml_datetime(lower).unwrap(), datetime!(1987-07-05 17:45:00.000 +00:00));
|
||||
|
||||
assert_eq!(
|
||||
parse_yaml_datetime(first_offset).unwrap(),
|
||||
datetime!(0001-01-01 00:00:00.000 +00:00)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_yaml_datetime(first_local).unwrap(),
|
||||
datetime!(0001-01-01 00:00:00.000 +00:00)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_yaml_datetime(first_date).unwrap(),
|
||||
datetime!(0001-01-01 00:00:00.000 +00:00)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_yaml_datetime(last_offset).unwrap(),
|
||||
datetime!(9999-12-31 23:59:59.000 +00:00)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_yaml_datetime(last_local).unwrap(),
|
||||
datetime!(9999-12-31 23:59:59.000 +00:00)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_yaml_datetime(last_date).unwrap(),
|
||||
datetime!(9999-12-31 00:00:00.000 +00:00)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_yaml_datetime(datetime_2000).unwrap(),
|
||||
datetime!(2000-02-29 15:15:15.000 +00:00)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_yaml_datetime(datetime_2024).unwrap(),
|
||||
datetime!(2024-02-29 15:15:15.000 +00:00)
|
||||
);
|
||||
|
||||
assert_eq!(parse_yaml_datetime(ms1).unwrap(), datetime!(1987-07-05 17:45:56.123 +00:00));
|
||||
assert_eq!(parse_yaml_datetime(ms2).unwrap(), datetime!(1987-07-05 17:45:56.600 +00:00));
|
||||
|
||||
assert_eq!(parse_yaml_datetime(utc).unwrap(), datetime!(1987-07-05 17:45:56.000 +00:00));
|
||||
assert_eq!(parse_yaml_datetime(pdt).unwrap(), datetime!(1987-07-05 22:45:56.000 +00:00));
|
||||
assert_eq!(parse_yaml_datetime(nzst).unwrap(), datetime!(1987-07-05 05:45:56.000 +00:00));
|
||||
assert_eq!(parse_yaml_datetime(nzdt).unwrap(), datetime!(1987-07-05 04:45:56.000 +00:00));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn toml_test_fail() {
|
||||
let not_a_leap_year = "2100-02-29T15:15:15Z";
|
||||
assert!(parse_yaml_datetime(not_a_leap_year).is_err());
|
||||
|
||||
let feb_30 = "1988-02-30T15:15:15Z";
|
||||
assert!(parse_yaml_datetime(feb_30).is_err());
|
||||
|
||||
let hour_over = "2006-01-01T24:00:00-00:00";
|
||||
assert!(parse_yaml_datetime(hour_over).is_err());
|
||||
|
||||
let mday_over = "2006-01-32T00:00:00-00:00";
|
||||
assert!(parse_yaml_datetime(mday_over).is_err());
|
||||
|
||||
let mday_under = "2006-01-00T00:00:00-00:00";
|
||||
assert!(parse_yaml_datetime(mday_under).is_err());
|
||||
|
||||
let minute_over = "2006-01-01T00:60:00-00:00";
|
||||
assert!(parse_yaml_datetime(minute_over).is_err());
|
||||
|
||||
let month_over = "2006-13-01T00:00:00-00:00";
|
||||
assert!(parse_yaml_datetime(month_over).is_err());
|
||||
|
||||
let month_under = "2007-00-01T00:00:00-00:00";
|
||||
assert!(parse_yaml_datetime(month_under).is_err());
|
||||
|
||||
let no_secs = "1987-07-05T17:45Z";
|
||||
assert!(parse_yaml_datetime(no_secs).is_err());
|
||||
|
||||
let no_sep = "1987-07-0517:45:00Z";
|
||||
assert!(parse_yaml_datetime(no_sep).is_err());
|
||||
|
||||
// 'time' supports up until ±25:59:59
|
||||
let offset_overflow = "1985-06-18 17:04:07+26:00";
|
||||
assert!(parse_yaml_datetime(offset_overflow).is_err());
|
||||
|
||||
let offset_overflow = "1985-06-18 17:04:07+12:61";
|
||||
assert!(parse_yaml_datetime(offset_overflow).is_err());
|
||||
|
||||
let second_overflow = "2006-01-01T00:00:61-00:00";
|
||||
assert!(parse_yaml_datetime(second_overflow).is_err());
|
||||
|
||||
let y10k = "10000-01-01 00:00:00z";
|
||||
assert!(parse_yaml_datetime(y10k).is_err());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue