Timestamp: Support parsing without separators

2024-11-10 06:34:18 +00:00 · 2024-09-09 06:55:21 -04:00 · 2024-09-09 06:55:21 -04:00 · e787f81544
commit e787f81544
parent a0298587c0
2 changed files with 106 additions and 9 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Fixed
 - **MusePack**: Fix potential panic when the beginning silence makes up the entire sample count ([PR](https://github.com/Serial-ATA/lofty-rs/pull/449))
+- **Timestamp**: Support timestamps without separators (ex. "20240906" vs "2024-09-06") ([issue](https://github.com/Serial-ATA/lofty-rs/issues/452)) ([PR](https://github.com/Serial-ATA/lofty-rs/issues/453))

 ## [0.21.1] - 2024-08-28

--- a/lofty/src/tag/items/timestamp.rs
+++ b/lofty/src/tag/items/timestamp.rs
@ -79,6 +79,8 @@ impl Timestamp {
 	/// The maximum length of a timestamp in bytes
 	pub const MAX_LENGTH: usize = 19;

+	const SEPARATORS: [u8; 3] = [b'-', b'T', b':'];
+
 	/// Read a [`Timestamp`]
 	///
 	/// NOTE: This will take [`Self::MAX_LENGTH`] bytes from the reader. Ensure that it only contains the timestamp
@ -94,10 +96,8 @@ impl Timestamp {
 		macro_rules! read_segment {
 			($expr:expr) => {
 				match $expr {
+					Ok((_, 0)) => break,
 					Ok((val, _)) => Some(val as u8),
-					Err(LoftyError {
-						kind: ErrorKind::Io(io),
-					}) if matches!(io.kind(), std::io::ErrorKind::UnexpectedEof) => break,
 					Err(e) => return Err(e.into()),
 				}
 			};
@ -118,6 +118,12 @@ impl Timestamp {
 			return Ok(None);
 		}

+		// It is valid for a timestamp to contain no separators, but this will lower our tolerance
+		// for common mistakes. We ignore the "T" separator here because it is **ALWAYS** required.
+		let timestamp_contains_separators = content
+			.iter()
+			.any(|&b| b != b'T' && Self::SEPARATORS.contains(&b));
+
 		let reader = &mut &content[..];

 		// We need to very that the year is exactly 4 bytes long. This doesn't matter for other segments.
@ -129,14 +135,33 @@ impl Timestamp {
 		}

 		timestamp.year = year;
+		if reader.is_empty() {
+			return Ok(Some(timestamp));
+		}

 		#[allow(clippy::never_loop)]
 		loop {
-			timestamp.month = read_segment!(Self::segment::<2>(reader, Some(b'-'), parse_mode));
-			timestamp.day = read_segment!(Self::segment::<2>(reader, Some(b'-'), parse_mode));
+			timestamp.month = read_segment!(Self::segment::<2>(
+				reader,
+				timestamp_contains_separators.then_some(b'-'),
+				parse_mode
+			));
+			timestamp.day = read_segment!(Self::segment::<2>(
+				reader,
+				timestamp_contains_separators.then_some(b'-'),
+				parse_mode
+			));
 			timestamp.hour = read_segment!(Self::segment::<2>(reader, Some(b'T'), parse_mode));
-			timestamp.minute = read_segment!(Self::segment::<2>(reader, Some(b':'), parse_mode));
-			timestamp.second = read_segment!(Self::segment::<2>(reader, Some(b':'), parse_mode));
+			timestamp.minute = read_segment!(Self::segment::<2>(
+				reader,
+				timestamp_contains_separators.then_some(b':'),
+				parse_mode
+			));
+			timestamp.second = read_segment!(Self::segment::<2>(
+				reader,
+				timestamp_contains_separators.then_some(b':'),
+				parse_mode
+			));
 			break;
 		}

@ -148,7 +173,9 @@ impl Timestamp {
 		sep: Option<u8>,
 		parse_mode: ParsingMode,
 	) -> Result<(u16, usize)> {
-		const SEPARATORS: [u8; 3] = [b'-', b'T', b':'];
+		if content.is_empty() {
+			return Ok((0, 0));
+		}

 		if let Some(sep) = sep {
 			let byte = content.read_u8()?;
@ -181,7 +208,10 @@ impl Timestamp {
 				//
 				// The easiest way to check for a missing digit is to see if we're just eating into
 				// the next segment's separator.
-				if sep.is_some() && SEPARATORS.contains(&i) && parse_mode != ParsingMode::Strict {
+				if sep.is_some()
+					&& Self::SEPARATORS.contains(&i)
+					&& parse_mode != ParsingMode::Strict
+				{
 					break;
 				}

@ -370,4 +400,70 @@ mod tests {
 		let empty_timestamp_strict = Timestamp::parse(&mut "".as_bytes(), ParsingMode::Strict);
 		assert!(empty_timestamp_strict.is_err());
 	}
+
+	#[test_log::test]
+	fn timestamp_no_separators() {
+		let timestamp = "20240603T140849";
+		let parsed_timestamp =
+			Timestamp::parse(&mut timestamp.as_bytes(), ParsingMode::BestAttempt).unwrap();
+		assert_eq!(parsed_timestamp, Some(expected()));
+	}
+
+	#[test_log::test]
+	fn timestamp_decode_partial_no_separators() {
+		let partial_timestamps: [(&[u8], Timestamp); 6] = [
+			(
+				b"2024",
+				Timestamp {
+					year: 2024,
+					..Timestamp::default()
+				},
+			),
+			(
+				b"202406",
+				Timestamp {
+					year: 2024,
+					month: Some(6),
+					..Timestamp::default()
+				},
+			),
+			(
+				b"20240603",
+				Timestamp {
+					year: 2024,
+					month: Some(6),
+					day: Some(3),
+					..Timestamp::default()
+				},
+			),
+			(
+				b"20240603T14",
+				Timestamp {
+					year: 2024,
+					month: Some(6),
+					day: Some(3),
+					hour: Some(14),
+					..Timestamp::default()
+				},
+			),
+			(
+				b"20240603T1408",
+				Timestamp {
+					year: 2024,
+					month: Some(6),
+					day: Some(3),
+					hour: Some(14),
+					minute: Some(8),
+					..Timestamp::default()
+				},
+			),
+			(b"20240603T140849", expected()),
+		];
+
+		for (data, expected) in partial_timestamps {
+			let parsed_timestamp = Timestamp::parse(&mut &data[..], ParsingMode::Strict)
+				.unwrap_or_else(|e| panic!("{e}: {}", std::str::from_utf8(data).unwrap()));
+			assert_eq!(parsed_timestamp, Some(expected));
+		}
+	}
 }