mirror of
https://github.com/Serial-ATA/lofty-rs
synced 2024-11-10 06:34:18 +00:00
ID3v2: Parse timestamp frames
This commit is contained in:
parent
c1f117f5a4
commit
0f8ce7a4f0
11 changed files with 591 additions and 6 deletions
|
@ -49,6 +49,8 @@ pub enum ErrorKind {
|
|||
FakeTag,
|
||||
/// Errors that arise while decoding text
|
||||
TextDecode(&'static str),
|
||||
/// Arises when decoding OR encoding a problematic [`Timestamp`](crate::tag::items::Timestamp)
|
||||
BadTimestamp(&'static str),
|
||||
/// Errors that arise while reading/writing ID3v2 tags
|
||||
Id3v2(Id3v2Error),
|
||||
|
||||
|
@ -66,6 +68,8 @@ pub enum ErrorKind {
|
|||
StrFromUtf8(std::str::Utf8Error),
|
||||
/// Represents all cases of [`std::io::Error`].
|
||||
Io(std::io::Error),
|
||||
/// Represents all cases of [`std::fmt::Error`].
|
||||
Fmt(std::fmt::Error),
|
||||
/// Failure to allocate enough memory
|
||||
Alloc(TryReserveError),
|
||||
/// This should **never** be encountered
|
||||
|
@ -477,6 +481,14 @@ impl From<std::io::Error> for LoftyError {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<std::fmt::Error> for LoftyError {
|
||||
fn from(input: std::fmt::Error) -> Self {
|
||||
Self {
|
||||
kind: ErrorKind::Fmt(input),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::string::FromUtf8Error> for LoftyError {
|
||||
fn from(input: std::string::FromUtf8Error) -> Self {
|
||||
Self {
|
||||
|
@ -517,6 +529,7 @@ impl Display for LoftyError {
|
|||
ErrorKind::StringFromUtf8(ref err) => write!(f, "{err}"),
|
||||
ErrorKind::StrFromUtf8(ref err) => write!(f, "{err}"),
|
||||
ErrorKind::Io(ref err) => write!(f, "{err}"),
|
||||
ErrorKind::Fmt(ref err) => write!(f, "{err}"),
|
||||
ErrorKind::Alloc(ref err) => write!(f, "{err}"),
|
||||
|
||||
ErrorKind::UnknownFormat => {
|
||||
|
@ -532,6 +545,9 @@ impl Display for LoftyError {
|
|||
),
|
||||
ErrorKind::FakeTag => write!(f, "Reading: Expected a tag, found invalid data"),
|
||||
ErrorKind::TextDecode(message) => write!(f, "Text decoding: {message}"),
|
||||
ErrorKind::BadTimestamp(message) => {
|
||||
write!(f, "Encountered an invalid timestamp: {message}")
|
||||
},
|
||||
ErrorKind::Id3v2(ref id3v2_err) => write!(f, "{id3v2_err}"),
|
||||
ErrorKind::BadAtom(message) => write!(f, "MP4 Atom: {message}"),
|
||||
ErrorKind::AtomMismatch => write!(
|
||||
|
|
|
@ -5,7 +5,8 @@ use crate::id3::v2::header::Id3v2Version;
|
|||
use crate::id3::v2::items::{
|
||||
AttachedPictureFrame, CommentFrame, EventTimingCodesFrame, ExtendedTextFrame, ExtendedUrlFrame,
|
||||
KeyValueFrame, OwnershipFrame, Popularimeter, PrivateFrame, RelativeVolumeAdjustmentFrame,
|
||||
TextInformationFrame, UniqueFileIdentifierFrame, UnsynchronizedTextFrame, UrlLinkFrame,
|
||||
TextInformationFrame, TimestampFrame, UniqueFileIdentifierFrame, UnsynchronizedTextFrame,
|
||||
UrlLinkFrame,
|
||||
};
|
||||
use crate::macros::err;
|
||||
use crate::util::text::TextEncoding;
|
||||
|
@ -41,6 +42,7 @@ pub(super) fn parse_content<R: Read>(
|
|||
"WFED" | "GRP1" | "MVNM" | "MVIN" => TextInformationFrame::parse(reader, version)?.map(FrameValue::Text),
|
||||
_ if id.starts_with('W') => UrlLinkFrame::parse(reader)?.map(FrameValue::Url),
|
||||
"POPM" => Some(FrameValue::Popularimeter(Popularimeter::parse(reader)?)),
|
||||
"TDEN" | "TDOR" | "TDRC" | "TDRL" | "TDTG" => TimestampFrame::parse(reader, parse_mode)?.map(FrameValue::Timestamp),
|
||||
// SYLT, GEOB, and any unknown frames
|
||||
_ => {
|
||||
let mut content = Vec::new();
|
||||
|
|
|
@ -7,7 +7,8 @@ use super::header::Id3v2Version;
|
|||
use super::items::{
|
||||
AttachedPictureFrame, CommentFrame, EventTimingCodesFrame, ExtendedTextFrame, ExtendedUrlFrame,
|
||||
KeyValueFrame, OwnershipFrame, Popularimeter, PrivateFrame, RelativeVolumeAdjustmentFrame,
|
||||
TextInformationFrame, UniqueFileIdentifierFrame, UnsynchronizedTextFrame, UrlLinkFrame,
|
||||
TextInformationFrame, TimestampFrame, UniqueFileIdentifierFrame, UnsynchronizedTextFrame,
|
||||
UrlLinkFrame,
|
||||
};
|
||||
use super::util::upgrade::{upgrade_v2, upgrade_v3};
|
||||
use crate::error::{ErrorKind, Id3v2Error, Id3v2ErrorKind, LoftyError, Result};
|
||||
|
@ -189,6 +190,8 @@ pub enum FrameValue {
|
|||
EventTimingCodes(EventTimingCodesFrame),
|
||||
/// Represents a "PRIV" frame
|
||||
Private(PrivateFrame),
|
||||
/// Represents a timestamp for the "TDEN", "TDOR", "TDRC", "TDRL", and "TDTG" frames
|
||||
Timestamp(TimestampFrame),
|
||||
/// Binary data
|
||||
///
|
||||
/// NOTES:
|
||||
|
@ -220,7 +223,8 @@ impl FrameValue {
|
|||
FrameValue::Binary(binary) => binary.is_empty(),
|
||||
FrameValue::Popularimeter(_)
|
||||
| FrameValue::RelativeVolumeAdjustment(_)
|
||||
| FrameValue::Ownership(_) => {
|
||||
| FrameValue::Ownership(_)
|
||||
| FrameValue::Timestamp(_) => {
|
||||
// Undefined.
|
||||
return None;
|
||||
},
|
||||
|
@ -336,6 +340,12 @@ impl From<PrivateFrame> for FrameValue {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<TimestampFrame> for FrameValue {
|
||||
fn from(value: TimestampFrame) -> Self {
|
||||
Self::Timestamp(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl FrameValue {
|
||||
pub(super) fn as_bytes(&self) -> Result<Vec<u8>> {
|
||||
Ok(match self {
|
||||
|
@ -353,6 +363,7 @@ impl FrameValue {
|
|||
FrameValue::Ownership(frame) => frame.as_bytes()?,
|
||||
FrameValue::EventTimingCodes(frame) => frame.as_bytes(),
|
||||
FrameValue::Private(frame) => frame.as_bytes(),
|
||||
FrameValue::Timestamp(frame) => frame.as_bytes()?,
|
||||
FrameValue::Binary(binary) => binary.clone(),
|
||||
})
|
||||
}
|
||||
|
@ -374,6 +385,7 @@ impl FrameValue {
|
|||
FrameValue::Ownership(_) => "Ownership",
|
||||
FrameValue::EventTimingCodes(_) => "EventTimingCodes",
|
||||
FrameValue::Private(_) => "Private",
|
||||
FrameValue::Timestamp(_) => "Timestamp",
|
||||
FrameValue::Binary(_) => "Binary",
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@ mod private_frame;
|
|||
mod relative_volume_adjustment_frame;
|
||||
mod sync_text;
|
||||
mod text_information_frame;
|
||||
mod timestamp_frame;
|
||||
mod unique_file_identifier;
|
||||
mod url_link_frame;
|
||||
|
||||
|
@ -31,5 +32,6 @@ pub use relative_volume_adjustment_frame::{
|
|||
};
|
||||
pub use sync_text::{SyncTextContentType, SynchronizedText, TimestampFormat};
|
||||
pub use text_information_frame::TextInformationFrame;
|
||||
pub use timestamp_frame::TimestampFrame;
|
||||
pub use unique_file_identifier::UniqueFileIdentifierFrame;
|
||||
pub use url_link_frame::UrlLinkFrame;
|
||||
|
|
92
lofty/src/id3/v2/items/timestamp_frame.rs
Normal file
92
lofty/src/id3/v2/items/timestamp_frame.rs
Normal file
|
@ -0,0 +1,92 @@
|
|||
use crate::config::ParsingMode;
|
||||
use crate::error::{ErrorKind, LoftyError, Result};
|
||||
use crate::macros::err;
|
||||
use crate::tag::items::Timestamp;
|
||||
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
|
||||
|
||||
use std::io::Read;
|
||||
|
||||
use byteorder::ReadBytesExt;
|
||||
|
||||
/// An `ID3v2` timestamp frame
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct TimestampFrame {
|
||||
pub encoding: TextEncoding,
|
||||
pub timestamp: Timestamp,
|
||||
}
|
||||
|
||||
impl PartialOrd for TimestampFrame {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for TimestampFrame {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
self.timestamp.cmp(&other.timestamp)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for TimestampFrame {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
encoding: TextEncoding::UTF8,
|
||||
timestamp: Timestamp::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TimestampFrame {
|
||||
/// Read a [`TimestampFrame`]
|
||||
///
|
||||
/// NOTE: This expects the frame header to have already been skipped
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// * Failure to read from `reader`
|
||||
#[allow(clippy::never_loop)]
|
||||
pub fn parse<R>(reader: &mut R, parse_mode: ParsingMode) -> Result<Option<Self>>
|
||||
where
|
||||
R: Read,
|
||||
{
|
||||
let Ok(encoding_byte) = reader.read_u8() else {
|
||||
return Ok(None);
|
||||
};
|
||||
let Some(encoding) = TextEncoding::from_u8(encoding_byte) else {
|
||||
return Err(LoftyError::new(ErrorKind::TextDecode(
|
||||
"Found invalid encoding",
|
||||
)));
|
||||
};
|
||||
|
||||
let value = decode_text(reader, TextDecodeOptions::new().encoding(encoding))?.content;
|
||||
if !value.is_ascii() {
|
||||
err!(BadTimestamp("Timestamp contains non-ASCII characters"))
|
||||
}
|
||||
|
||||
let mut frame = TimestampFrame {
|
||||
encoding,
|
||||
timestamp: Timestamp::default(),
|
||||
};
|
||||
|
||||
let reader = &mut value.as_bytes();
|
||||
|
||||
frame.timestamp = Timestamp::parse(reader, parse_mode)?;
|
||||
Ok(Some(frame))
|
||||
}
|
||||
|
||||
/// Convert an [`TimestampFrame`] to a byte vec
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// * The timestamp is invalid
|
||||
/// * Failure to write to the buffer
|
||||
pub fn as_bytes(&self) -> Result<Vec<u8>> {
|
||||
self.timestamp.verify()?;
|
||||
|
||||
let mut encoded_text = encode_text(&self.timestamp.to_string(), self.encoding, false);
|
||||
encoded_text.insert(0, self.encoding as u8);
|
||||
|
||||
Ok(encoded_text)
|
||||
}
|
||||
}
|
|
@ -16,7 +16,7 @@ use crate::id3::v2::util::mappings::TIPL_MAPPINGS;
|
|||
use crate::id3::v2::util::pairs::{
|
||||
format_number_pair, set_number, NUMBER_PAIR_KEYS, NUMBER_PAIR_SEPARATOR,
|
||||
};
|
||||
use crate::id3::v2::KeyValueFrame;
|
||||
use crate::id3::v2::{KeyValueFrame, TimestampFrame};
|
||||
use crate::mp4::AdvisoryRating;
|
||||
use crate::picture::{Picture, PictureType, TOMBSTONE_PICTURE};
|
||||
use crate::tag::{
|
||||
|
@ -29,7 +29,9 @@ use crate::util::text::{decode_text, TextDecodeOptions, TextEncoding};
|
|||
use std::borrow::Cow;
|
||||
use std::io::{Cursor, Write};
|
||||
use std::ops::Deref;
|
||||
use std::str::FromStr;
|
||||
|
||||
use crate::tag::items::Timestamp;
|
||||
use lofty_attr::tag;
|
||||
|
||||
const USER_DEFINED_TEXT_FRAME_ID: &str = "TXXX";
|
||||
|
@ -1201,6 +1203,20 @@ impl SplitTag for Id3v2Tag {
|
|||
// round trips?
|
||||
return true; // Keep frame
|
||||
},
|
||||
FrameValue::Timestamp(frame)
|
||||
if !matches!(item_key, ItemKey::Unknown(_)) =>
|
||||
{
|
||||
if frame.timestamp.verify().is_err() {
|
||||
return true; // Keep frame
|
||||
}
|
||||
|
||||
tag.items.push(TagItem::new(
|
||||
item_key,
|
||||
ItemValue::Text(frame.timestamp.to_string()),
|
||||
));
|
||||
|
||||
return false; // Frame consumed
|
||||
},
|
||||
FrameValue::Text(TextInformationFrame { value: content, .. }) => {
|
||||
for c in content.split(V4_MULTI_VALUE_SEPARATOR) {
|
||||
tag.items.push(TagItem::new(
|
||||
|
@ -1227,7 +1243,8 @@ impl SplitTag for Id3v2Tag {
|
|||
| FrameValue::RelativeVolumeAdjustment(_)
|
||||
| FrameValue::Ownership(_)
|
||||
| FrameValue::EventTimingCodes(_)
|
||||
| FrameValue::Private(_) => {
|
||||
| FrameValue::Private(_)
|
||||
| FrameValue::Timestamp(_) => {
|
||||
return true; // Keep unsupported frame
|
||||
},
|
||||
};
|
||||
|
@ -1442,6 +1459,40 @@ impl MergeTag for SplitTagRemainder {
|
|||
}
|
||||
}
|
||||
|
||||
// Timestamps
|
||||
for item_key in [&ItemKey::RecordingDate, &ItemKey::OriginalReleaseDate] {
|
||||
let Some(text) = tag.take_strings(item_key).next() else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let frame_id = item_key
|
||||
.map_key(TagType::Id3v2, false)
|
||||
.expect("valid frame id");
|
||||
|
||||
let frame_value;
|
||||
match Timestamp::from_str(&text) {
|
||||
Ok(timestamp) => {
|
||||
frame_value = FrameValue::Timestamp(TimestampFrame {
|
||||
encoding: TextEncoding::UTF8,
|
||||
timestamp,
|
||||
})
|
||||
},
|
||||
Err(_) => {
|
||||
// We can just preserve it as a text frame
|
||||
frame_value = FrameValue::Text(TextInformationFrame {
|
||||
encoding: TextEncoding::UTF8,
|
||||
value: text,
|
||||
});
|
||||
},
|
||||
}
|
||||
|
||||
merged.insert(Frame {
|
||||
id: FrameId::Valid(Cow::Borrowed(frame_id)),
|
||||
value: frame_value,
|
||||
flags: FrameFlags::default(),
|
||||
});
|
||||
}
|
||||
|
||||
// Insert all remaining items as single frames and deduplicate as needed
|
||||
for item in tag.items {
|
||||
merged.insert_item(item);
|
||||
|
|
|
@ -2,7 +2,9 @@ use crate::config::ParsingMode;
|
|||
use crate::id3::v2::header::Id3v2Header;
|
||||
use crate::id3::v2::items::Popularimeter;
|
||||
use crate::id3::v2::util::pairs::DEFAULT_NUMBER_IN_PAIR;
|
||||
use crate::id3::v2::TimestampFrame;
|
||||
use crate::picture::MimeType;
|
||||
use crate::tag::items::Timestamp;
|
||||
use crate::tag::utils::test_utils::read_path;
|
||||
|
||||
use super::*;
|
||||
|
@ -1354,3 +1356,50 @@ fn itunes_advisory_roundtrip() {
|
|||
|
||||
assert_eq!(tag.advisory_rating(), Some(AdvisoryRating::Explicit));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn timestamp_roundtrip() {
|
||||
let mut tag = Id3v2Tag::default();
|
||||
tag.insert(
|
||||
Frame::new(
|
||||
"TDRC",
|
||||
FrameValue::Timestamp(TimestampFrame {
|
||||
encoding: TextEncoding::UTF8,
|
||||
timestamp: Timestamp {
|
||||
year: 2024,
|
||||
month: Some(6),
|
||||
day: Some(3),
|
||||
hour: Some(14),
|
||||
minute: Some(8),
|
||||
second: Some(49),
|
||||
},
|
||||
}),
|
||||
FrameFlags::default(),
|
||||
)
|
||||
.unwrap(),
|
||||
);
|
||||
|
||||
let tag: Tag = tag.into();
|
||||
assert_eq!(tag.len(), 1);
|
||||
assert_eq!(
|
||||
tag.get_string(&ItemKey::RecordingDate),
|
||||
Some("2024-06-03T14:08:49")
|
||||
);
|
||||
|
||||
let tag: Id3v2Tag = tag.into();
|
||||
assert_eq!(tag.frames.len(), 1);
|
||||
|
||||
let frame = tag.frames.first().unwrap();
|
||||
assert_eq!(frame.id, FrameId::Valid(Cow::Borrowed("TDRC")));
|
||||
match &frame.value {
|
||||
FrameValue::Timestamp(frame) => {
|
||||
assert_eq!(frame.timestamp.year, 2024);
|
||||
assert_eq!(frame.timestamp.month, Some(6));
|
||||
assert_eq!(frame.timestamp.day, Some(3));
|
||||
assert_eq!(frame.timestamp.hour, Some(14));
|
||||
assert_eq!(frame.timestamp.minute, Some(8));
|
||||
assert_eq!(frame.timestamp.second, Some(49));
|
||||
},
|
||||
_ => panic!("Expected a TimestampFrame"),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,7 +34,8 @@ fn verify_frame(frame: &FrameRef<'_>) -> Result<()> {
|
|||
| ("UFID", FrameValue::UniqueFileIdentifier(_))
|
||||
| ("POPM", FrameValue::Popularimeter(_))
|
||||
| ("TIPL" | "TMCL", FrameValue::KeyValue { .. })
|
||||
| ("WFED" | "GRP1" | "MVNM" | "MVIN", FrameValue::Text { .. }) => Ok(()),
|
||||
| ("WFED" | "GRP1" | "MVNM" | "MVIN", FrameValue::Text { .. })
|
||||
| ("TDEN" | "TDOR" | "TDRC" | "TDRL" | "TDTG", FrameValue::Timestamp(_)) => Ok(()),
|
||||
(id, FrameValue::Text { .. }) if id.starts_with('T') => Ok(()),
|
||||
(id, FrameValue::Url(_)) if id.starts_with('W') => Ok(()),
|
||||
(id, frame_value) => Err(Id3v2Error::new(Id3v2ErrorKind::BadFrame(
|
||||
|
|
5
lofty/src/tag/items/mod.rs
Normal file
5
lofty/src/tag/items/mod.rs
Normal file
|
@ -0,0 +1,5 @@
|
|||
//! Various generic representations of tag items
|
||||
|
||||
mod timestamp;
|
||||
|
||||
pub use timestamp::Timestamp;
|
354
lofty/src/tag/items/timestamp.rs
Normal file
354
lofty/src/tag/items/timestamp.rs
Normal file
|
@ -0,0 +1,354 @@
|
|||
use crate::config::ParsingMode;
|
||||
use crate::error::{ErrorKind, LoftyError, Result};
|
||||
|
||||
use std::fmt::Display;
|
||||
use std::io::Read;
|
||||
use std::str::FromStr;
|
||||
|
||||
use crate::macros::err;
|
||||
use byteorder::ReadBytesExt;
|
||||
|
||||
/// A subset of the ISO 8601 timestamp format
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Default)]
|
||||
#[allow(missing_docs)]
|
||||
pub struct Timestamp {
|
||||
pub year: u16,
|
||||
pub month: Option<u8>,
|
||||
pub day: Option<u8>,
|
||||
pub hour: Option<u8>,
|
||||
pub minute: Option<u8>,
|
||||
pub second: Option<u8>,
|
||||
}
|
||||
|
||||
impl PartialOrd for Timestamp {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for Timestamp {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
self.year
|
||||
.cmp(&other.year)
|
||||
.then(self.month.cmp(&other.month))
|
||||
.then(self.day.cmp(&other.day))
|
||||
.then(self.hour.cmp(&other.hour))
|
||||
.then(self.minute.cmp(&other.minute))
|
||||
.then(self.second.cmp(&other.second))
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Timestamp {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:04}", self.year)?;
|
||||
|
||||
if let Some(month) = self.month {
|
||||
write!(f, "-{:02}", month)?;
|
||||
|
||||
if let Some(day) = self.day {
|
||||
write!(f, "-{:02}", day)?;
|
||||
|
||||
if let Some(hour) = self.hour {
|
||||
write!(f, "T{:02}", hour)?;
|
||||
|
||||
if let Some(minute) = self.minute {
|
||||
write!(f, ":{:02}", minute)?;
|
||||
|
||||
if let Some(second) = self.second {
|
||||
write!(f, ":{:02}", second)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Timestamp {
|
||||
type Err = LoftyError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self> {
|
||||
Timestamp::parse(&mut s.as_bytes(), ParsingMode::BestAttempt)
|
||||
}
|
||||
}
|
||||
|
||||
impl Timestamp {
|
||||
/// The maximum length of a timestamp in bytes
|
||||
pub const MAX_LENGTH: usize = 19;
|
||||
|
||||
/// Read a [`Timestamp`]
|
||||
///
|
||||
/// NOTE: This will take [`Self::MAX_LENGTH`] bytes from the reader. Ensure that it only contains the timestamp
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// * Failure to read from `reader`
|
||||
/// * The timestamp is invalid
|
||||
pub fn parse<R>(reader: &mut R, parse_mode: ParsingMode) -> Result<Self>
|
||||
where
|
||||
R: Read,
|
||||
{
|
||||
macro_rules! read_segment {
|
||||
($expr:expr) => {
|
||||
match $expr {
|
||||
Ok((val, _)) => Some(val as u8),
|
||||
Err(LoftyError {
|
||||
kind: ErrorKind::Io(io),
|
||||
}) if matches!(io.kind(), std::io::ErrorKind::UnexpectedEof) => break,
|
||||
Err(e) => return Err(e.into()),
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
let mut timestamp = Timestamp::default();
|
||||
|
||||
let mut content = Vec::with_capacity(Self::MAX_LENGTH);
|
||||
reader
|
||||
.take(Self::MAX_LENGTH as u64)
|
||||
.read_to_end(&mut content)?;
|
||||
|
||||
let reader = &mut &content[..];
|
||||
|
||||
// We need to very that the year is exactly 4 bytes long. This doesn't matter for other segments.
|
||||
let (year, bytes_read) = Self::segment::<4>(reader, None, parse_mode)?;
|
||||
if bytes_read != 4 {
|
||||
err!(BadTimestamp(
|
||||
"Encountered an invalid year length (should be 4 digits)"
|
||||
))
|
||||
}
|
||||
|
||||
timestamp.year = year;
|
||||
|
||||
#[allow(clippy::never_loop)]
|
||||
loop {
|
||||
timestamp.month = read_segment!(Self::segment::<2>(reader, Some(b'-'), parse_mode));
|
||||
timestamp.day = read_segment!(Self::segment::<2>(reader, Some(b'-'), parse_mode));
|
||||
timestamp.hour = read_segment!(Self::segment::<2>(reader, Some(b'T'), parse_mode));
|
||||
timestamp.minute = read_segment!(Self::segment::<2>(reader, Some(b':'), parse_mode));
|
||||
timestamp.second = read_segment!(Self::segment::<2>(reader, Some(b':'), parse_mode));
|
||||
break;
|
||||
}
|
||||
|
||||
Ok(timestamp)
|
||||
}
|
||||
|
||||
fn segment<const SIZE: usize>(
|
||||
content: &mut &[u8],
|
||||
sep: Option<u8>,
|
||||
parse_mode: ParsingMode,
|
||||
) -> Result<(u16, usize)> {
|
||||
const SEPARATORS: [u8; 3] = [b'-', b'T', b':'];
|
||||
|
||||
if let Some(sep) = sep {
|
||||
let byte = content.read_u8()?;
|
||||
if byte != sep {
|
||||
err!(BadTimestamp("Expected a separator"))
|
||||
}
|
||||
}
|
||||
|
||||
if content.len() < SIZE {
|
||||
err!(BadTimestamp("Timestamp segment is too short"))
|
||||
}
|
||||
|
||||
let mut num = 0;
|
||||
let mut byte_count = 0;
|
||||
for i in content[..SIZE].iter().copied() {
|
||||
// Common spec violation: Timestamps may use spaces instead of zeros, so the month of June
|
||||
// could be written as " 6" rather than "06" for example.
|
||||
if i == b' ' {
|
||||
if parse_mode == ParsingMode::Strict {
|
||||
err!(BadTimestamp("Timestamp contains spaces"))
|
||||
}
|
||||
|
||||
byte_count += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if !i.is_ascii_digit() {
|
||||
// Another spec violation, timestamps in the wild may not use a zero or a space, so
|
||||
// we would have to treat "06", "6", and " 6" as valid.
|
||||
//
|
||||
// The easiest way to check for a missing digit is to see if we're just eating into
|
||||
// the next segment's separator.
|
||||
if sep.is_some() && SEPARATORS.contains(&i) && parse_mode != ParsingMode::Strict {
|
||||
break;
|
||||
}
|
||||
|
||||
err!(BadTimestamp(
|
||||
"Timestamp segment contains non-digit characters"
|
||||
))
|
||||
}
|
||||
|
||||
num = num * 10 + u16::from(i - b'0');
|
||||
byte_count += 1;
|
||||
}
|
||||
|
||||
*content = &content[byte_count..];
|
||||
|
||||
Ok((num, byte_count))
|
||||
}
|
||||
|
||||
pub(crate) fn verify(&self) -> Result<()> {
|
||||
fn verify_field(field: Option<u8>, limit: u8, parent: Option<u8>) -> bool {
|
||||
if let Some(field) = field {
|
||||
return parent.is_some() && field <= limit;
|
||||
}
|
||||
return true; // Field does not exist, so it's valid
|
||||
}
|
||||
|
||||
if self.year > 9999
|
||||
|| !verify_field(self.month, 12, Some(self.year as u8))
|
||||
|| !verify_field(self.day, 31, self.month)
|
||||
|| !verify_field(self.hour, 23, self.day)
|
||||
|| !verify_field(self.minute, 59, self.hour)
|
||||
|| !verify_field(self.second, 59, self.minute)
|
||||
{
|
||||
err!(BadTimestamp(
|
||||
"Timestamp contains segment(s) that exceed their limits"
|
||||
))
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::config::ParsingMode;
|
||||
use crate::tag::items::timestamp::Timestamp;
|
||||
|
||||
fn expected() -> Timestamp {
|
||||
// 2024-06-03T14:08:49
|
||||
Timestamp {
|
||||
year: 2024,
|
||||
month: Some(6),
|
||||
day: Some(3),
|
||||
hour: Some(14),
|
||||
minute: Some(8),
|
||||
second: Some(49),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn timestamp_decode() {
|
||||
let content = "2024-06-03T14:08:49";
|
||||
let parsed_timestamp =
|
||||
Timestamp::parse(&mut content.as_bytes(), ParsingMode::Strict).unwrap();
|
||||
|
||||
assert_eq!(parsed_timestamp, expected());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn timestamp_decode_no_zero() {
|
||||
// Zeroes are not used
|
||||
let content = "2024-6-3T14:8:49";
|
||||
|
||||
let parsed_timestamp =
|
||||
Timestamp::parse(&mut content.as_bytes(), ParsingMode::BestAttempt).unwrap();
|
||||
|
||||
assert_eq!(parsed_timestamp, expected());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn timestamp_decode_zero_substitution() {
|
||||
// Zeros are replaced by spaces
|
||||
let content = "2024- 6- 3T14: 8:49";
|
||||
|
||||
let parsed_timestamp =
|
||||
Timestamp::parse(&mut content.as_bytes(), ParsingMode::BestAttempt).unwrap();
|
||||
|
||||
assert_eq!(parsed_timestamp, expected());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn timestamp_encode() {
|
||||
let encoded = expected().to_string();
|
||||
assert_eq!(encoded, "2024-06-03T14:08:49");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn timestamp_encode_invalid() {
|
||||
let mut timestamp = expected();
|
||||
|
||||
// Hour, minute, and second have a dependency on day
|
||||
timestamp.day = None;
|
||||
assert_eq!(timestamp.to_string().len(), 7);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reject_broken_timestamps() {
|
||||
let broken_timestamps: &[&[u8]] = &[
|
||||
b"2024-",
|
||||
b"2024-06-",
|
||||
b"2024--",
|
||||
b"2024- -",
|
||||
b"2024-06-03T",
|
||||
b"2024:06",
|
||||
b"2024-0-",
|
||||
];
|
||||
|
||||
for timestamp in broken_timestamps {
|
||||
let parsed_timestamp = Timestamp::parse(&mut ×tamp[..], ParsingMode::BestAttempt);
|
||||
assert!(parsed_timestamp.is_err());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn timestamp_decode_partial() {
|
||||
let partial_timestamps: [(&[u8], Timestamp); 6] = [
|
||||
(
|
||||
b"2024",
|
||||
Timestamp {
|
||||
year: 2024,
|
||||
..Timestamp::default()
|
||||
},
|
||||
),
|
||||
(
|
||||
b"2024-06",
|
||||
Timestamp {
|
||||
year: 2024,
|
||||
month: Some(6),
|
||||
..Timestamp::default()
|
||||
},
|
||||
),
|
||||
(
|
||||
b"2024-06-03",
|
||||
Timestamp {
|
||||
year: 2024,
|
||||
month: Some(6),
|
||||
day: Some(3),
|
||||
..Timestamp::default()
|
||||
},
|
||||
),
|
||||
(
|
||||
b"2024-06-03T14",
|
||||
Timestamp {
|
||||
year: 2024,
|
||||
month: Some(6),
|
||||
day: Some(3),
|
||||
hour: Some(14),
|
||||
..Timestamp::default()
|
||||
},
|
||||
),
|
||||
(
|
||||
b"2024-06-03T14:08",
|
||||
Timestamp {
|
||||
year: 2024,
|
||||
month: Some(6),
|
||||
day: Some(3),
|
||||
hour: Some(14),
|
||||
minute: Some(8),
|
||||
..Timestamp::default()
|
||||
},
|
||||
),
|
||||
(b"2024-06-03T14:08:49", expected()),
|
||||
];
|
||||
|
||||
for (data, expected) in partial_timestamps {
|
||||
let parsed_timestamp = Timestamp::parse(&mut &data[..], ParsingMode::Strict).unwrap();
|
||||
assert_eq!(parsed_timestamp, expected);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
mod accessor;
|
||||
pub(crate) mod item;
|
||||
pub mod items;
|
||||
mod split_merge_tag;
|
||||
mod tag_ext;
|
||||
mod tag_type;
|
||||
|
|
Loading…
Reference in a new issue