Timestamp: Allow short-circuiting on partially invalid inputs

This commit is contained in:
Serial 2024-09-15 11:46:45 -04:00 committed by Alex
parent 98da062776
commit 7571d968e9
3 changed files with 153 additions and 29 deletions

View file

@ -28,7 +28,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- **MusePack**: Fix potential panic when the beginning silence makes up the entire sample count ([PR](https://github.com/Serial-ATA/lofty-rs/pull/449))
- **Timestamp**: Support timestamps without separators (ex. "20240906" vs "2024-09-06") ([issue](https://github.com/Serial-ATA/lofty-rs/issues/452)) ([PR](https://github.com/Serial-ATA/lofty-rs/issues/453))
- **Timestamp**:
- Support timestamps without separators (ex. "20240906" vs "2024-09-06") ([issue](https://github.com/Serial-ATA/lofty-rs/issues/452)) ([PR](https://github.com/Serial-ATA/lofty-rs/pull/453))
- `Timestamp::parse` will now short-circuit when possible in `ParsingMode::{BestAttempt, Relaxed}` ([issue](https://github.com/Serial-ATA/lofty-rs/issues/462)) ([PR](https://github.com/Serial-ATA/lofty-rs/pull/463))
- For example, the timestamp "2024-06-03 14:08:49" contains a space instead of the required "T" marker.
In `ParsingMode::Strict`, this would be an error. Otherwise, the parser will just stop once it hits the space
and return the timestamp up to that point.
- **ID3v2**:
- `ItemKey::Director` will now be written correctly as a TXXX frame ([PR](https://github.com/Serial-ATA/lofty-rs/issues/454))
- When skipping invalid frames in `ParsingMode::{BestAttempt, Relaxed}`, the parser will no longer be able to go out of the bounds

View file

@ -1,5 +1,5 @@
use crate::config::ParsingMode;
use crate::error::{ErrorKind, LoftyError, Result};
use crate::error::Result;
use crate::id3::v2::{FrameFlags, FrameHeader, FrameId};
use crate::macros::err;
use crate::tag::items::Timestamp;
@ -77,15 +77,19 @@ impl<'a> TimestampFrame<'a> {
return Ok(None);
};
let Some(encoding) = TextEncoding::from_u8(encoding_byte) else {
return Err(LoftyError::new(ErrorKind::TextDecode(
"Found invalid encoding",
)));
if parse_mode != ParsingMode::Relaxed {
err!(TextDecode("Found invalid encoding"))
}
return Ok(None);
};
let value = decode_text(reader, TextDecodeOptions::new().encoding(encoding))?.content;
if !value.is_ascii() {
if parse_mode == ParsingMode::Strict {
err!(BadTimestamp("Timestamp contains non-ASCII characters"))
}
return Ok(None);
}
let header = FrameHeader::new(id, frame_flags);
let mut frame = TimestampFrame {
@ -96,7 +100,18 @@ impl<'a> TimestampFrame<'a> {
let reader = &mut value.as_bytes();
let Some(timestamp) = Timestamp::parse(reader, parse_mode)? else {
let result;
match Timestamp::parse(reader, parse_mode) {
Ok(timestamp) => result = timestamp,
Err(e) => {
if parse_mode != ParsingMode::Relaxed {
return Err(e);
}
return Ok(None);
},
}
let Some(timestamp) = result else {
// Timestamp is empty
return Ok(None);
};
@ -105,7 +120,7 @@ impl<'a> TimestampFrame<'a> {
Ok(Some(frame))
}
/// Convert an [`TimestampFrame`] to a byte vec
/// Convert a [`TimestampFrame`] to a byte vec
///
/// # Errors
///

View file

@ -98,7 +98,7 @@ impl Timestamp {
match $expr {
Ok((_, 0)) => break,
Ok((val, _)) => Some(val as u8),
Err(e) => return Err(e.into()),
Err(e) => return Err(e),
}
};
}
@ -126,7 +126,7 @@ impl Timestamp {
let reader = &mut &content[..];
// We need to very that the year is exactly 4 bytes long. This doesn't matter for other segments.
// We need to verify that the year is exactly 4 bytes long. This doesn't matter for other segments.
let (year, bytes_read) = Self::segment::<4>(reader, None, parse_mode)?;
if bytes_read != 4 {
err!(BadTimestamp(
@ -173,22 +173,31 @@ impl Timestamp {
sep: Option<u8>,
parse_mode: ParsingMode,
) -> Result<(u16, usize)> {
const STOP_PARSING: (u16, usize) = (0, 0);
if content.is_empty() {
return Ok((0, 0));
return Ok(STOP_PARSING);
}
if let Some(sep) = sep {
let byte = content.read_u8()?;
if byte != sep {
if parse_mode == ParsingMode::Strict {
err!(BadTimestamp("Expected a separator"))
}
return Ok(STOP_PARSING);
}
}
if content.len() < SIZE {
if parse_mode == ParsingMode::Strict {
err!(BadTimestamp("Timestamp segment is too short"))
}
let mut num = 0;
return Ok(STOP_PARSING);
}
let mut num = None;
let mut byte_count = 0;
for i in content[..SIZE].iter().copied() {
// Common spec violation: Timestamps may use spaces instead of zeros, so the month of June
@ -202,6 +211,8 @@ impl Timestamp {
continue;
}
// TODO: This is a spec violation for ID3v2, but not for ISO 8601 in general. Maybe consider
// making this a warning and allow it for all parsing modes?
if !i.is_ascii_digit() {
// Another spec violation, timestamps in the wild may not use a zero or a space, so
// we would have to treat "06", "6", and " 6" as valid.
@ -220,13 +231,23 @@ impl Timestamp {
))
}
num = num * 10 + u16::from(i - b'0');
num = Some(num.unwrap_or(0) * 10 + u16::from(i - b'0'));
byte_count += 1;
}
let Some(parsed_num) = num else {
assert_ne!(
parse_mode,
ParsingMode::Strict,
"The timestamp segment is empty, the parser should've failed before this point."
);
return Ok(STOP_PARSING);
};
*content = &content[byte_count..];
Ok((num, byte_count))
Ok((parsed_num, byte_count))
}
pub(crate) fn verify(&self) -> Result<()> {
@ -316,24 +337,86 @@ mod tests {
assert_eq!(timestamp.to_string().len(), 7);
}
#[test_log::test]
fn reject_broken_timestamps() {
let broken_timestamps: &[&[u8]] = &[
fn broken_timestamps() -> [(&'static [u8], Timestamp); 7] {
[
(
b"2024-",
Timestamp {
year: 2024,
..Timestamp::default()
},
),
(
b"2024-06-",
Timestamp {
year: 2024,
month: Some(6),
..Timestamp::default()
},
),
(
b"2024--",
Timestamp {
year: 2024,
..Timestamp::default()
},
),
(
b"2024- -",
Timestamp {
year: 2024,
..Timestamp::default()
},
),
(
b"2024-06-03T",
Timestamp {
year: 2024,
month: Some(6),
day: Some(3),
..Timestamp::default()
},
),
(
b"2024:06",
Timestamp {
year: 2024,
..Timestamp::default()
},
),
(
b"2024-0-",
];
Timestamp {
year: 2024,
month: Some(0),
..Timestamp::default()
},
),
]
}
for timestamp in broken_timestamps {
let parsed_timestamp = Timestamp::parse(&mut &timestamp[..], ParsingMode::BestAttempt);
#[test_log::test]
fn reject_broken_timestamps_strict() {
for (timestamp, _) in broken_timestamps() {
let parsed_timestamp = Timestamp::parse(&mut &timestamp[..], ParsingMode::Strict);
assert!(parsed_timestamp.is_err());
}
}
#[test_log::test]
fn accept_broken_timestamps_best_attempt() {
for (timestamp, partial_result) in broken_timestamps() {
let parsed_timestamp = Timestamp::parse(&mut &timestamp[..], ParsingMode::BestAttempt);
assert!(parsed_timestamp.is_ok());
assert_eq!(
parsed_timestamp.unwrap(),
Some(partial_result),
"{}",
timestamp.escape_ascii()
);
}
}
#[test_log::test]
fn timestamp_decode_partial() {
let partial_timestamps: [(&[u8], Timestamp); 6] = [
@ -466,4 +549,25 @@ mod tests {
assert_eq!(parsed_timestamp, Some(expected));
}
}
#[test_log::test]
fn timestamp_no_time_marker() {
let timestamp = "2024-06-03 14:08:49";
let parsed_timestamp_strict =
Timestamp::parse(&mut timestamp.as_bytes(), ParsingMode::Strict);
assert!(parsed_timestamp_strict.is_err());
let parsed_timestamp_best_attempt =
Timestamp::parse(&mut timestamp.as_bytes(), ParsingMode::BestAttempt).unwrap();
assert_eq!(
parsed_timestamp_best_attempt,
Some(Timestamp {
year: 2024,
month: Some(6),
day: Some(3),
..Timestamp::default()
})
);
}
}