From e787f81544fb8051a59a151d6ece5eae2a4a555b Mon Sep 17 00:00:00 2001 From: Serial <69764315+Serial-ATA@users.noreply.github.com> Date: Mon, 9 Sep 2024 06:55:21 -0400 Subject: [PATCH] Timestamp: Support parsing without separators --- CHANGELOG.md | 1 + lofty/src/tag/items/timestamp.rs | 114 ++++++++++++++++++++++++++++--- 2 files changed, 106 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a6e6833..3dbc86b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - **MusePack**: Fix potential panic when the beginning silence makes up the entire sample count ([PR](https://github.com/Serial-ATA/lofty-rs/pull/449)) +- **Timestamp**: Support timestamps without separators (ex. "20240906" vs "2024-09-06") ([issue](https://github.com/Serial-ATA/lofty-rs/issues/452)) ([PR](https://github.com/Serial-ATA/lofty-rs/issues/453)) ## [0.21.1] - 2024-08-28 diff --git a/lofty/src/tag/items/timestamp.rs b/lofty/src/tag/items/timestamp.rs index 8b149056..411a05e8 100644 --- a/lofty/src/tag/items/timestamp.rs +++ b/lofty/src/tag/items/timestamp.rs @@ -79,6 +79,8 @@ impl Timestamp { /// The maximum length of a timestamp in bytes pub const MAX_LENGTH: usize = 19; + const SEPARATORS: [u8; 3] = [b'-', b'T', b':']; + /// Read a [`Timestamp`] /// /// NOTE: This will take [`Self::MAX_LENGTH`] bytes from the reader. Ensure that it only contains the timestamp @@ -94,10 +96,8 @@ impl Timestamp { macro_rules! read_segment { ($expr:expr) => { match $expr { + Ok((_, 0)) => break, Ok((val, _)) => Some(val as u8), - Err(LoftyError { - kind: ErrorKind::Io(io), - }) if matches!(io.kind(), std::io::ErrorKind::UnexpectedEof) => break, Err(e) => return Err(e.into()), } }; @@ -118,6 +118,12 @@ impl Timestamp { return Ok(None); } + // It is valid for a timestamp to contain no separators, but this will lower our tolerance + // for common mistakes. We ignore the "T" separator here because it is **ALWAYS** required. + let timestamp_contains_separators = content + .iter() + .any(|&b| b != b'T' && Self::SEPARATORS.contains(&b)); + let reader = &mut &content[..]; // We need to very that the year is exactly 4 bytes long. This doesn't matter for other segments. @@ -129,14 +135,33 @@ impl Timestamp { } timestamp.year = year; + if reader.is_empty() { + return Ok(Some(timestamp)); + } #[allow(clippy::never_loop)] loop { - timestamp.month = read_segment!(Self::segment::<2>(reader, Some(b'-'), parse_mode)); - timestamp.day = read_segment!(Self::segment::<2>(reader, Some(b'-'), parse_mode)); + timestamp.month = read_segment!(Self::segment::<2>( + reader, + timestamp_contains_separators.then_some(b'-'), + parse_mode + )); + timestamp.day = read_segment!(Self::segment::<2>( + reader, + timestamp_contains_separators.then_some(b'-'), + parse_mode + )); timestamp.hour = read_segment!(Self::segment::<2>(reader, Some(b'T'), parse_mode)); - timestamp.minute = read_segment!(Self::segment::<2>(reader, Some(b':'), parse_mode)); - timestamp.second = read_segment!(Self::segment::<2>(reader, Some(b':'), parse_mode)); + timestamp.minute = read_segment!(Self::segment::<2>( + reader, + timestamp_contains_separators.then_some(b':'), + parse_mode + )); + timestamp.second = read_segment!(Self::segment::<2>( + reader, + timestamp_contains_separators.then_some(b':'), + parse_mode + )); break; } @@ -148,7 +173,9 @@ impl Timestamp { sep: Option, parse_mode: ParsingMode, ) -> Result<(u16, usize)> { - const SEPARATORS: [u8; 3] = [b'-', b'T', b':']; + if content.is_empty() { + return Ok((0, 0)); + } if let Some(sep) = sep { let byte = content.read_u8()?; @@ -181,7 +208,10 @@ impl Timestamp { // // The easiest way to check for a missing digit is to see if we're just eating into // the next segment's separator. - if sep.is_some() && SEPARATORS.contains(&i) && parse_mode != ParsingMode::Strict { + if sep.is_some() + && Self::SEPARATORS.contains(&i) + && parse_mode != ParsingMode::Strict + { break; } @@ -370,4 +400,70 @@ mod tests { let empty_timestamp_strict = Timestamp::parse(&mut "".as_bytes(), ParsingMode::Strict); assert!(empty_timestamp_strict.is_err()); } + + #[test_log::test] + fn timestamp_no_separators() { + let timestamp = "20240603T140849"; + let parsed_timestamp = + Timestamp::parse(&mut timestamp.as_bytes(), ParsingMode::BestAttempt).unwrap(); + assert_eq!(parsed_timestamp, Some(expected())); + } + + #[test_log::test] + fn timestamp_decode_partial_no_separators() { + let partial_timestamps: [(&[u8], Timestamp); 6] = [ + ( + b"2024", + Timestamp { + year: 2024, + ..Timestamp::default() + }, + ), + ( + b"202406", + Timestamp { + year: 2024, + month: Some(6), + ..Timestamp::default() + }, + ), + ( + b"20240603", + Timestamp { + year: 2024, + month: Some(6), + day: Some(3), + ..Timestamp::default() + }, + ), + ( + b"20240603T14", + Timestamp { + year: 2024, + month: Some(6), + day: Some(3), + hour: Some(14), + ..Timestamp::default() + }, + ), + ( + b"20240603T1408", + Timestamp { + year: 2024, + month: Some(6), + day: Some(3), + hour: Some(14), + minute: Some(8), + ..Timestamp::default() + }, + ), + (b"20240603T140849", expected()), + ]; + + for (data, expected) in partial_timestamps { + let parsed_timestamp = Timestamp::parse(&mut &data[..], ParsingMode::Strict) + .unwrap_or_else(|e| panic!("{e}: {}", std::str::from_utf8(data).unwrap())); + assert_eq!(parsed_timestamp, Some(expected)); + } + } }