mirror of
https://github.com/getzola/zola
synced 2025-01-06 00:48:47 +00:00
Fix YAML date parsing (#2549)
* Refine YAML date regex This commit does a few changes: - Introduce a new regex - it is a bit off-spec (it allows one-digit months and days in date-only mode) - uses named groups - avoids group duplication - parses offset once Fixes #2538 * Fix nanosecond parsing * Rename variables for brewity * Add tests
This commit is contained in:
parent
aa2847aa1e
commit
56122defde
1 changed files with 152 additions and 43 deletions
|
@ -9,46 +9,26 @@ use serde::{Deserialize, Deserializer};
|
||||||
|
|
||||||
pub fn parse_yaml_datetime(date_string: &str) -> Result<time::OffsetDateTime> {
|
pub fn parse_yaml_datetime(date_string: &str) -> Result<time::OffsetDateTime> {
|
||||||
// See https://github.com/getzola/zola/issues/2071#issuecomment-1530610650
|
// See https://github.com/getzola/zola/issues/2071#issuecomment-1530610650
|
||||||
let re = Regex::new(r#"^"?([0-9]{4})-([0-9][0-9]?)-([0-9][0-9]?)([Tt]|[ \t]+)([0-9][0-9]?):([0-9]{2}):([0-9]{2})\.([0-9]*)?Z?([ \t]([-+][0-9][0-9]?)(:([0-9][0-9]?))?Z?|([-+][0-9]{2})?:([0-9]{2})?)?|([0-9]{4})-([0-9]{2})-([0-9]{2})"?$"#).unwrap();
|
let re = Regex::new(r#"^"?(?P<year>[0-9]{4})-(?P<month>[0-9][0-9]?)-(?P<day>[0-9][0-9]?)(?:(?:[Tt]|[ \t]+)(?P<hour>[0-9][0-9]?):(?P<minute>[0-9]{2}):(?P<second>[0-9]{2})(?P<fraction>\.[0-9]{0,9})?[ \t]*(?:(?P<utc>Z)|(?P<offset>(?P<offset_hour>[-+][0-9][0-9]?)(?::(?P<offset_minute>[0-9][0-9]))?))?)?"?$"#).unwrap();
|
||||||
let captures = if let Some(captures_) = re.captures(date_string) {
|
let captures = if let Some(captures_) = re.captures(date_string) {
|
||||||
Ok(captures_)
|
Ok(captures_)
|
||||||
} else {
|
} else {
|
||||||
Err(anyhow!("Error parsing YAML datetime"))
|
Err(anyhow!("Error parsing YAML datetime"))
|
||||||
}?;
|
}?;
|
||||||
let year =
|
let year = captures.name("year").unwrap().as_str();
|
||||||
if let Some(cap) = captures.get(1) { cap } else { captures.get(15).unwrap() }.as_str();
|
let month = captures.name("month").unwrap().as_str();
|
||||||
let month =
|
let day = captures.name("day").unwrap().as_str();
|
||||||
if let Some(cap) = captures.get(2) { cap } else { captures.get(16).unwrap() }.as_str();
|
let hour = if let Some(hour_) = captures.name("hour") { hour_.as_str() } else { "0" };
|
||||||
let day =
|
let minute = if let Some(minute_) = captures.name("minute") { minute_.as_str() } else { "0" };
|
||||||
if let Some(cap) = captures.get(3) { cap } else { captures.get(17).unwrap() }.as_str();
|
let second = if let Some(second_) = captures.name("second") { second_.as_str() } else { "0" };
|
||||||
let hours = if let Some(hours_) = captures.get(5) { hours_.as_str() } else { "0" };
|
let fraction_raw =
|
||||||
let minutes = if let Some(minutes_) = captures.get(6) { minutes_.as_str() } else { "0" };
|
if let Some(fraction_) = captures.name("fraction") { fraction_.as_str() } else { "" };
|
||||||
let seconds = if let Some(seconds_) = captures.get(7) { seconds_.as_str() } else { "0" };
|
let fraction_intermediate = fraction_raw.trim_end_matches("0");
|
||||||
let fractional_seconds_raw =
|
|
||||||
if let Some(fractionals) = captures.get(8) { fractionals.as_str() } else { "" };
|
|
||||||
let fractional_seconds_intermediate = fractional_seconds_raw.trim_end_matches("0");
|
|
||||||
//
|
//
|
||||||
// Prepare for eventual conversion into nanoseconds
|
// Prepare for eventual conversion into nanoseconds
|
||||||
let fractional_seconds = if fractional_seconds_intermediate.len() > 0
|
let fraction = if fraction_intermediate.len() > 0 { fraction_intermediate } else { "0" };
|
||||||
&& fractional_seconds_intermediate.len() <= 9
|
let maybe_timezone_hour = captures.name("offset_hour");
|
||||||
{
|
let maybe_timezone_minute = captures.name("offset_minute");
|
||||||
fractional_seconds_intermediate
|
|
||||||
} else {
|
|
||||||
"0"
|
|
||||||
};
|
|
||||||
let maybe_timezone_hour_1 = captures.get(10);
|
|
||||||
let maybe_timezone_minute_1 = captures.get(12);
|
|
||||||
let maybe_timezone_hour_2 = captures.get(13);
|
|
||||||
let maybe_timezone_minute_2 = captures.get(14);
|
|
||||||
let maybe_timezone_hour;
|
|
||||||
let maybe_timezone_minute;
|
|
||||||
if maybe_timezone_hour_2.is_some() {
|
|
||||||
maybe_timezone_hour = maybe_timezone_hour_2;
|
|
||||||
maybe_timezone_minute = maybe_timezone_minute_2;
|
|
||||||
} else {
|
|
||||||
maybe_timezone_hour = maybe_timezone_hour_1;
|
|
||||||
maybe_timezone_minute = maybe_timezone_minute_1;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut offset_datetime = time::OffsetDateTime::UNIX_EPOCH;
|
let mut offset_datetime = time::OffsetDateTime::UNIX_EPOCH;
|
||||||
|
|
||||||
|
@ -67,10 +47,10 @@ pub fn parse_yaml_datetime(date_string: &str) -> Result<time::OffsetDateTime> {
|
||||||
.replace_year(year.parse().unwrap())?
|
.replace_year(year.parse().unwrap())?
|
||||||
.replace_month(time::Month::try_from(month.parse::<u8>().unwrap())?)?
|
.replace_month(time::Month::try_from(month.parse::<u8>().unwrap())?)?
|
||||||
.replace_day(day.parse().unwrap())?
|
.replace_day(day.parse().unwrap())?
|
||||||
.replace_hour(hours.parse().unwrap())?
|
.replace_hour(hour.parse().unwrap())?
|
||||||
.replace_minute(minutes.parse().unwrap())?
|
.replace_minute(minute.parse().unwrap())?
|
||||||
.replace_second(seconds.parse().unwrap())?
|
.replace_second(second.parse().unwrap())?
|
||||||
.replace_nanosecond(fractional_seconds.parse::<u32>().unwrap() * 100_000_000)?)
|
.replace_nanosecond((fraction.parse::<f64>().unwrap_or(0.0) * 1_000_000_000.0) as u32)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Used as an attribute when we want to convert from TOML to a string date
|
/// Used as an attribute when we want to convert from TOML to a string date
|
||||||
|
@ -167,23 +147,31 @@ mod tests {
|
||||||
use time::macros::datetime;
|
use time::macros::datetime;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn yaml_spec_examples_pass() {
|
fn yaml_draft_timestamp_pass() {
|
||||||
|
// tests only the values from the YAML 1.1 Timestamp Draft
|
||||||
|
// See https://yaml.org/type/timestamp.html
|
||||||
let canonical = "2001-12-15T02:59:43.1Z";
|
let canonical = "2001-12-15T02:59:43.1Z";
|
||||||
let valid_iso8601 = "2001-12-14t21:59:43.10-05:00";
|
let valid_iso8601 = "2001-12-14t21:59:43.10-05:00";
|
||||||
let space_separated = "2001-12-14 21:59:43.10 -5";
|
let space_separated = "2001-12-14 21:59:43.10 -5";
|
||||||
let no_time_zone = "2001-12-15 2:59:43.10";
|
let no_time_zone = "2001-12-15 2:59:43.10";
|
||||||
let date = "2002-12-14";
|
let date = "2002-12-14";
|
||||||
assert_eq!(parse_yaml_datetime(canonical).unwrap(), datetime!(2001-12-15 2:59:43.1 +0));
|
assert_eq!(
|
||||||
|
parse_yaml_datetime(canonical).unwrap(),
|
||||||
|
datetime!(2001-12-15 02:59:43.100 +00:00)
|
||||||
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse_yaml_datetime(valid_iso8601).unwrap(),
|
parse_yaml_datetime(valid_iso8601).unwrap(),
|
||||||
datetime!(2001-12-14 21:59:43.1 -5)
|
datetime!(2001-12-14 21:59:43.100 -05:00)
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse_yaml_datetime(space_separated).unwrap(),
|
parse_yaml_datetime(space_separated).unwrap(),
|
||||||
datetime!(2001-12-14 21:59:43.1 -5)
|
datetime!(2001-12-14 21:59:43.100 -05:00)
|
||||||
);
|
);
|
||||||
assert_eq!(parse_yaml_datetime(no_time_zone).unwrap(), datetime!(2001-12-15 2:59:43.1 +0));
|
assert_eq!(
|
||||||
assert_eq!(parse_yaml_datetime(date).unwrap(), datetime!(2002-12-14 0:00:00 +0));
|
parse_yaml_datetime(no_time_zone).unwrap(),
|
||||||
|
datetime!(2001-12-15 02:59:43.100 +00:00)
|
||||||
|
);
|
||||||
|
assert_eq!(parse_yaml_datetime(date).unwrap(), datetime!(2002-12-14 00:00:00.000 +00:00));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -218,4 +206,125 @@ mod tests {
|
||||||
let unparseable_time = "2001-12-15:59:4x.1Z";
|
let unparseable_time = "2001-12-15:59:4x.1Z";
|
||||||
assert!(parse_yaml_datetime(unparseable_time).is_err());
|
assert!(parse_yaml_datetime(unparseable_time).is_err());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn toml_test_pass() {
|
||||||
|
// tests subset from toml-test
|
||||||
|
// Taken from https://github.com/toml-lang/toml-test/tree/a80ce8268cbcf5ea95f02b2e6d6cc38406ce28c9/tests/valid/datetime
|
||||||
|
let space = "1987-07-05 17:45:00Z";
|
||||||
|
// Z is not allowed to be lowercase
|
||||||
|
let lower = "1987-07-05t17:45:00Z";
|
||||||
|
|
||||||
|
let first_offset = "0001-01-01 00:00:00Z";
|
||||||
|
let first_local = "0001-01-01 00:00:00";
|
||||||
|
let first_date = "0001-01-01";
|
||||||
|
let last_offset = "9999-12-31 23:59:59Z";
|
||||||
|
let last_local = "9999-12-31 23:59:59";
|
||||||
|
let last_date = "9999-12-31";
|
||||||
|
|
||||||
|
// valid leap years
|
||||||
|
let datetime_2000 = "2000-02-29 15:15:15Z";
|
||||||
|
let datetime_2024 = "2024-02-29 15:15:15Z";
|
||||||
|
|
||||||
|
// milliseconds
|
||||||
|
let ms1 = "1987-07-05T17:45:56.123Z";
|
||||||
|
let ms2 = "1987-07-05T17:45:56.6Z";
|
||||||
|
|
||||||
|
// timezones
|
||||||
|
let utc = "1987-07-05T17:45:56Z";
|
||||||
|
let pdt = "1987-07-05T17:45:56-05:00";
|
||||||
|
let nzst = "1987-07-05T17:45:56+12:00";
|
||||||
|
let nzdt = "1987-07-05T17:45:56+13:00"; // DST
|
||||||
|
|
||||||
|
assert_eq!(parse_yaml_datetime(space).unwrap(), datetime!(1987-07-05 17:45:00.000 +00:00));
|
||||||
|
assert_eq!(parse_yaml_datetime(lower).unwrap(), datetime!(1987-07-05 17:45:00.000 +00:00));
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
parse_yaml_datetime(first_offset).unwrap(),
|
||||||
|
datetime!(0001-01-01 00:00:00.000 +00:00)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
parse_yaml_datetime(first_local).unwrap(),
|
||||||
|
datetime!(0001-01-01 00:00:00.000 +00:00)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
parse_yaml_datetime(first_date).unwrap(),
|
||||||
|
datetime!(0001-01-01 00:00:00.000 +00:00)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
parse_yaml_datetime(last_offset).unwrap(),
|
||||||
|
datetime!(9999-12-31 23:59:59.000 +00:00)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
parse_yaml_datetime(last_local).unwrap(),
|
||||||
|
datetime!(9999-12-31 23:59:59.000 +00:00)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
parse_yaml_datetime(last_date).unwrap(),
|
||||||
|
datetime!(9999-12-31 00:00:00.000 +00:00)
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
parse_yaml_datetime(datetime_2000).unwrap(),
|
||||||
|
datetime!(2000-02-29 15:15:15.000 +00:00)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
parse_yaml_datetime(datetime_2024).unwrap(),
|
||||||
|
datetime!(2024-02-29 15:15:15.000 +00:00)
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(parse_yaml_datetime(ms1).unwrap(), datetime!(1987-07-05 17:45:56.123 +00:00));
|
||||||
|
assert_eq!(parse_yaml_datetime(ms2).unwrap(), datetime!(1987-07-05 17:45:56.600 +00:00));
|
||||||
|
|
||||||
|
assert_eq!(parse_yaml_datetime(utc).unwrap(), datetime!(1987-07-05 17:45:56.000 +00:00));
|
||||||
|
assert_eq!(parse_yaml_datetime(pdt).unwrap(), datetime!(1987-07-05 22:45:56.000 +00:00));
|
||||||
|
assert_eq!(parse_yaml_datetime(nzst).unwrap(), datetime!(1987-07-05 05:45:56.000 +00:00));
|
||||||
|
assert_eq!(parse_yaml_datetime(nzdt).unwrap(), datetime!(1987-07-05 04:45:56.000 +00:00));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn toml_test_fail() {
|
||||||
|
let not_a_leap_year = "2100-02-29T15:15:15Z";
|
||||||
|
assert!(parse_yaml_datetime(not_a_leap_year).is_err());
|
||||||
|
|
||||||
|
let feb_30 = "1988-02-30T15:15:15Z";
|
||||||
|
assert!(parse_yaml_datetime(feb_30).is_err());
|
||||||
|
|
||||||
|
let hour_over = "2006-01-01T24:00:00-00:00";
|
||||||
|
assert!(parse_yaml_datetime(hour_over).is_err());
|
||||||
|
|
||||||
|
let mday_over = "2006-01-32T00:00:00-00:00";
|
||||||
|
assert!(parse_yaml_datetime(mday_over).is_err());
|
||||||
|
|
||||||
|
let mday_under = "2006-01-00T00:00:00-00:00";
|
||||||
|
assert!(parse_yaml_datetime(mday_under).is_err());
|
||||||
|
|
||||||
|
let minute_over = "2006-01-01T00:60:00-00:00";
|
||||||
|
assert!(parse_yaml_datetime(minute_over).is_err());
|
||||||
|
|
||||||
|
let month_over = "2006-13-01T00:00:00-00:00";
|
||||||
|
assert!(parse_yaml_datetime(month_over).is_err());
|
||||||
|
|
||||||
|
let month_under = "2007-00-01T00:00:00-00:00";
|
||||||
|
assert!(parse_yaml_datetime(month_under).is_err());
|
||||||
|
|
||||||
|
let no_secs = "1987-07-05T17:45Z";
|
||||||
|
assert!(parse_yaml_datetime(no_secs).is_err());
|
||||||
|
|
||||||
|
let no_sep = "1987-07-0517:45:00Z";
|
||||||
|
assert!(parse_yaml_datetime(no_sep).is_err());
|
||||||
|
|
||||||
|
// 'time' supports up until ±25:59:59
|
||||||
|
let offset_overflow = "1985-06-18 17:04:07+26:00";
|
||||||
|
assert!(parse_yaml_datetime(offset_overflow).is_err());
|
||||||
|
|
||||||
|
let offset_overflow = "1985-06-18 17:04:07+12:61";
|
||||||
|
assert!(parse_yaml_datetime(offset_overflow).is_err());
|
||||||
|
|
||||||
|
let second_overflow = "2006-01-01T00:00:61-00:00";
|
||||||
|
assert!(parse_yaml_datetime(second_overflow).is_err());
|
||||||
|
|
||||||
|
let y10k = "10000-01-01 00:00:00z";
|
||||||
|
assert!(parse_yaml_datetime(y10k).is_err());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue