mirror of
https://github.com/Serial-ATA/lofty-rs
synced 2024-12-04 18:09:11 +00:00
Timestamp: Allow short-circuiting on partially invalid inputs
This commit is contained in:
parent
98da062776
commit
7571d968e9
3 changed files with 153 additions and 29 deletions
|
@ -28,7 +28,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
|
||||
### Fixed
|
||||
- **MusePack**: Fix potential panic when the beginning silence makes up the entire sample count ([PR](https://github.com/Serial-ATA/lofty-rs/pull/449))
|
||||
- **Timestamp**: Support timestamps without separators (ex. "20240906" vs "2024-09-06") ([issue](https://github.com/Serial-ATA/lofty-rs/issues/452)) ([PR](https://github.com/Serial-ATA/lofty-rs/issues/453))
|
||||
- **Timestamp**:
|
||||
- Support timestamps without separators (ex. "20240906" vs "2024-09-06") ([issue](https://github.com/Serial-ATA/lofty-rs/issues/452)) ([PR](https://github.com/Serial-ATA/lofty-rs/pull/453))
|
||||
- `Timestamp::parse` will now short-circuit when possible in `ParsingMode::{BestAttempt, Relaxed}` ([issue](https://github.com/Serial-ATA/lofty-rs/issues/462)) ([PR](https://github.com/Serial-ATA/lofty-rs/pull/463))
|
||||
- For example, the timestamp "2024-06-03 14:08:49" contains a space instead of the required "T" marker.
|
||||
In `ParsingMode::Strict`, this would be an error. Otherwise, the parser will just stop once it hits the space
|
||||
and return the timestamp up to that point.
|
||||
- **ID3v2**:
|
||||
- `ItemKey::Director` will now be written correctly as a TXXX frame ([PR](https://github.com/Serial-ATA/lofty-rs/issues/454))
|
||||
- When skipping invalid frames in `ParsingMode::{BestAttempt, Relaxed}`, the parser will no longer be able to go out of the bounds
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use crate::config::ParsingMode;
|
||||
use crate::error::{ErrorKind, LoftyError, Result};
|
||||
use crate::error::Result;
|
||||
use crate::id3::v2::{FrameFlags, FrameHeader, FrameId};
|
||||
use crate::macros::err;
|
||||
use crate::tag::items::Timestamp;
|
||||
|
@ -77,14 +77,18 @@ impl<'a> TimestampFrame<'a> {
|
|||
return Ok(None);
|
||||
};
|
||||
let Some(encoding) = TextEncoding::from_u8(encoding_byte) else {
|
||||
return Err(LoftyError::new(ErrorKind::TextDecode(
|
||||
"Found invalid encoding",
|
||||
)));
|
||||
if parse_mode != ParsingMode::Relaxed {
|
||||
err!(TextDecode("Found invalid encoding"))
|
||||
}
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let value = decode_text(reader, TextDecodeOptions::new().encoding(encoding))?.content;
|
||||
if !value.is_ascii() {
|
||||
err!(BadTimestamp("Timestamp contains non-ASCII characters"))
|
||||
if parse_mode == ParsingMode::Strict {
|
||||
err!(BadTimestamp("Timestamp contains non-ASCII characters"))
|
||||
}
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let header = FrameHeader::new(id, frame_flags);
|
||||
|
@ -96,7 +100,18 @@ impl<'a> TimestampFrame<'a> {
|
|||
|
||||
let reader = &mut value.as_bytes();
|
||||
|
||||
let Some(timestamp) = Timestamp::parse(reader, parse_mode)? else {
|
||||
let result;
|
||||
match Timestamp::parse(reader, parse_mode) {
|
||||
Ok(timestamp) => result = timestamp,
|
||||
Err(e) => {
|
||||
if parse_mode != ParsingMode::Relaxed {
|
||||
return Err(e);
|
||||
}
|
||||
return Ok(None);
|
||||
},
|
||||
}
|
||||
|
||||
let Some(timestamp) = result else {
|
||||
// Timestamp is empty
|
||||
return Ok(None);
|
||||
};
|
||||
|
@ -105,7 +120,7 @@ impl<'a> TimestampFrame<'a> {
|
|||
Ok(Some(frame))
|
||||
}
|
||||
|
||||
/// Convert an [`TimestampFrame`] to a byte vec
|
||||
/// Convert a [`TimestampFrame`] to a byte vec
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
|
|
|
@ -98,7 +98,7 @@ impl Timestamp {
|
|||
match $expr {
|
||||
Ok((_, 0)) => break,
|
||||
Ok((val, _)) => Some(val as u8),
|
||||
Err(e) => return Err(e.into()),
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -126,7 +126,7 @@ impl Timestamp {
|
|||
|
||||
let reader = &mut &content[..];
|
||||
|
||||
// We need to very that the year is exactly 4 bytes long. This doesn't matter for other segments.
|
||||
// We need to verify that the year is exactly 4 bytes long. This doesn't matter for other segments.
|
||||
let (year, bytes_read) = Self::segment::<4>(reader, None, parse_mode)?;
|
||||
if bytes_read != 4 {
|
||||
err!(BadTimestamp(
|
||||
|
@ -173,22 +173,31 @@ impl Timestamp {
|
|||
sep: Option<u8>,
|
||||
parse_mode: ParsingMode,
|
||||
) -> Result<(u16, usize)> {
|
||||
const STOP_PARSING: (u16, usize) = (0, 0);
|
||||
|
||||
if content.is_empty() {
|
||||
return Ok((0, 0));
|
||||
return Ok(STOP_PARSING);
|
||||
}
|
||||
|
||||
if let Some(sep) = sep {
|
||||
let byte = content.read_u8()?;
|
||||
if byte != sep {
|
||||
err!(BadTimestamp("Expected a separator"))
|
||||
if parse_mode == ParsingMode::Strict {
|
||||
err!(BadTimestamp("Expected a separator"))
|
||||
}
|
||||
return Ok(STOP_PARSING);
|
||||
}
|
||||
}
|
||||
|
||||
if content.len() < SIZE {
|
||||
err!(BadTimestamp("Timestamp segment is too short"))
|
||||
if parse_mode == ParsingMode::Strict {
|
||||
err!(BadTimestamp("Timestamp segment is too short"))
|
||||
}
|
||||
|
||||
return Ok(STOP_PARSING);
|
||||
}
|
||||
|
||||
let mut num = 0;
|
||||
let mut num = None;
|
||||
let mut byte_count = 0;
|
||||
for i in content[..SIZE].iter().copied() {
|
||||
// Common spec violation: Timestamps may use spaces instead of zeros, so the month of June
|
||||
|
@ -202,6 +211,8 @@ impl Timestamp {
|
|||
continue;
|
||||
}
|
||||
|
||||
// TODO: This is a spec violation for ID3v2, but not for ISO 8601 in general. Maybe consider
|
||||
// making this a warning and allow it for all parsing modes?
|
||||
if !i.is_ascii_digit() {
|
||||
// Another spec violation, timestamps in the wild may not use a zero or a space, so
|
||||
// we would have to treat "06", "6", and " 6" as valid.
|
||||
|
@ -220,13 +231,23 @@ impl Timestamp {
|
|||
))
|
||||
}
|
||||
|
||||
num = num * 10 + u16::from(i - b'0');
|
||||
num = Some(num.unwrap_or(0) * 10 + u16::from(i - b'0'));
|
||||
byte_count += 1;
|
||||
}
|
||||
|
||||
let Some(parsed_num) = num else {
|
||||
assert_ne!(
|
||||
parse_mode,
|
||||
ParsingMode::Strict,
|
||||
"The timestamp segment is empty, the parser should've failed before this point."
|
||||
);
|
||||
|
||||
return Ok(STOP_PARSING);
|
||||
};
|
||||
|
||||
*content = &content[byte_count..];
|
||||
|
||||
Ok((num, byte_count))
|
||||
Ok((parsed_num, byte_count))
|
||||
}
|
||||
|
||||
pub(crate) fn verify(&self) -> Result<()> {
|
||||
|
@ -316,24 +337,86 @@ mod tests {
|
|||
assert_eq!(timestamp.to_string().len(), 7);
|
||||
}
|
||||
|
||||
#[test_log::test]
|
||||
fn reject_broken_timestamps() {
|
||||
let broken_timestamps: &[&[u8]] = &[
|
||||
b"2024-",
|
||||
b"2024-06-",
|
||||
b"2024--",
|
||||
b"2024- -",
|
||||
b"2024-06-03T",
|
||||
b"2024:06",
|
||||
b"2024-0-",
|
||||
];
|
||||
fn broken_timestamps() -> [(&'static [u8], Timestamp); 7] {
|
||||
[
|
||||
(
|
||||
b"2024-",
|
||||
Timestamp {
|
||||
year: 2024,
|
||||
..Timestamp::default()
|
||||
},
|
||||
),
|
||||
(
|
||||
b"2024-06-",
|
||||
Timestamp {
|
||||
year: 2024,
|
||||
month: Some(6),
|
||||
..Timestamp::default()
|
||||
},
|
||||
),
|
||||
(
|
||||
b"2024--",
|
||||
Timestamp {
|
||||
year: 2024,
|
||||
..Timestamp::default()
|
||||
},
|
||||
),
|
||||
(
|
||||
b"2024- -",
|
||||
Timestamp {
|
||||
year: 2024,
|
||||
..Timestamp::default()
|
||||
},
|
||||
),
|
||||
(
|
||||
b"2024-06-03T",
|
||||
Timestamp {
|
||||
year: 2024,
|
||||
month: Some(6),
|
||||
day: Some(3),
|
||||
..Timestamp::default()
|
||||
},
|
||||
),
|
||||
(
|
||||
b"2024:06",
|
||||
Timestamp {
|
||||
year: 2024,
|
||||
..Timestamp::default()
|
||||
},
|
||||
),
|
||||
(
|
||||
b"2024-0-",
|
||||
Timestamp {
|
||||
year: 2024,
|
||||
month: Some(0),
|
||||
..Timestamp::default()
|
||||
},
|
||||
),
|
||||
]
|
||||
}
|
||||
|
||||
for timestamp in broken_timestamps {
|
||||
let parsed_timestamp = Timestamp::parse(&mut ×tamp[..], ParsingMode::BestAttempt);
|
||||
#[test_log::test]
|
||||
fn reject_broken_timestamps_strict() {
|
||||
for (timestamp, _) in broken_timestamps() {
|
||||
let parsed_timestamp = Timestamp::parse(&mut ×tamp[..], ParsingMode::Strict);
|
||||
assert!(parsed_timestamp.is_err());
|
||||
}
|
||||
}
|
||||
|
||||
#[test_log::test]
|
||||
fn accept_broken_timestamps_best_attempt() {
|
||||
for (timestamp, partial_result) in broken_timestamps() {
|
||||
let parsed_timestamp = Timestamp::parse(&mut ×tamp[..], ParsingMode::BestAttempt);
|
||||
assert!(parsed_timestamp.is_ok());
|
||||
assert_eq!(
|
||||
parsed_timestamp.unwrap(),
|
||||
Some(partial_result),
|
||||
"{}",
|
||||
timestamp.escape_ascii()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test_log::test]
|
||||
fn timestamp_decode_partial() {
|
||||
let partial_timestamps: [(&[u8], Timestamp); 6] = [
|
||||
|
@ -466,4 +549,25 @@ mod tests {
|
|||
assert_eq!(parsed_timestamp, Some(expected));
|
||||
}
|
||||
}
|
||||
|
||||
#[test_log::test]
|
||||
fn timestamp_no_time_marker() {
|
||||
let timestamp = "2024-06-03 14:08:49";
|
||||
|
||||
let parsed_timestamp_strict =
|
||||
Timestamp::parse(&mut timestamp.as_bytes(), ParsingMode::Strict);
|
||||
assert!(parsed_timestamp_strict.is_err());
|
||||
|
||||
let parsed_timestamp_best_attempt =
|
||||
Timestamp::parse(&mut timestamp.as_bytes(), ParsingMode::BestAttempt).unwrap();
|
||||
assert_eq!(
|
||||
parsed_timestamp_best_attempt,
|
||||
Some(Timestamp {
|
||||
year: 2024,
|
||||
month: Some(6),
|
||||
day: Some(3),
|
||||
..Timestamp::default()
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue