From a6b7823d732c98d3d18b4607297d7eb87897b5f1 Mon Sep 17 00:00:00 2001 From: Serial <69764315+Serial-ATA@users.noreply.github.com> Date: Sun, 23 Apr 2023 18:48:21 -0400 Subject: [PATCH] ID3v2: Stop eagerly allocating frame content --- src/id3/v2/frame/content.rs | 33 +++-- src/id3/v2/frame/mod.rs | 4 +- src/id3/v2/frame/read.rs | 150 +++++++++++++++------ src/id3/v2/items/attached_picture_frame.rs | 21 +-- src/id3/v2/items/extended_text_frame.rs | 81 +++++------ src/id3/v2/items/extended_url_frame.rs | 18 +-- src/id3/v2/items/identifier.rs | 20 +-- src/id3/v2/items/language_frame.rs | 32 +++-- src/id3/v2/items/popularimeter.rs | 20 ++- src/id3/v2/items/text_information_frame.rs | 16 ++- src/id3/v2/items/url_link_frame.rs | 14 +- src/util/text.rs | 17 ++- tests/picture/format_parsers.rs | 4 +- 13 files changed, 276 insertions(+), 154 deletions(-) diff --git a/src/id3/v2/frame/content.rs b/src/id3/v2/frame/content.rs index b789015a..0ed91c17 100644 --- a/src/id3/v2/frame/content.rs +++ b/src/id3/v2/frame/content.rs @@ -8,31 +8,38 @@ use crate::id3::v2::ID3v2Version; use crate::macros::err; use crate::util::text::TextEncoding; +use std::io::Read; + #[rustfmt::skip] -pub(super) fn parse_content( - content: &mut &[u8], +pub(super) fn parse_content( + reader: &mut R, id: &str, version: ID3v2Version, ) -> Result> { Ok(match id { // The ID was previously upgraded, but the content remains unchanged, so version is necessary "APIC" => { - let attached_picture = AttachedPictureFrame::parse(content, version)?; + let attached_picture = AttachedPictureFrame::parse(reader, version)?; Some(FrameValue::Picture(attached_picture)) }, - "TXXX" => ExtendedTextFrame::parse(content, version)?.map(FrameValue::UserText), - "WXXX" => ExtendedUrlFrame::parse(content, version)?.map(FrameValue::UserUrl), - "COMM" => CommentFrame::parse(content, version)?.map(FrameValue::Comment), - "USLT" => UnsynchronizedTextFrame::parse(content, version)?.map(FrameValue::UnsynchronizedText), - "UFID" => UniqueFileIdentifierFrame::parse(content)?.map(FrameValue::UniqueFileIdentifier), - _ if id.starts_with('T') => TextInformationFrame::parse(content, version)?.map(FrameValue::Text), + "TXXX" => ExtendedTextFrame::parse(reader, version)?.map(FrameValue::UserText), + "WXXX" => ExtendedUrlFrame::parse(reader, version)?.map(FrameValue::UserUrl), + "COMM" => CommentFrame::parse(reader, version)?.map(FrameValue::Comment), + "USLT" => UnsynchronizedTextFrame::parse(reader, version)?.map(FrameValue::UnsynchronizedText), + "UFID" => UniqueFileIdentifierFrame::parse(reader)?.map(FrameValue::UniqueFileIdentifier), + _ if id.starts_with('T') => TextInformationFrame::parse(reader, version)?.map(FrameValue::Text), // Apple proprietary frames // WFED (Podcast URL), GRP1 (Grouping), MVNM (Movement Name), MVIN (Movement Number) - "WFED" | "GRP1" | "MVNM" | "MVIN" => TextInformationFrame::parse(content, version)?.map(FrameValue::Text), - _ if id.starts_with('W') => UrlLinkFrame::parse(content)?.map(FrameValue::Url), - "POPM" => Some(FrameValue::Popularimeter(Popularimeter::parse(content)?)), + "WFED" | "GRP1" | "MVNM" | "MVIN" => TextInformationFrame::parse(reader, version)?.map(FrameValue::Text), + _ if id.starts_with('W') => UrlLinkFrame::parse(reader)?.map(FrameValue::Url), + "POPM" => Some(FrameValue::Popularimeter(Popularimeter::parse(reader)?)), // SYLT, GEOB, and any unknown frames - _ => Some(FrameValue::Binary(content.to_vec())), + _ => { + let mut content = Vec::new(); + reader.read_to_end(&mut content)?; + + Some(FrameValue::Binary(content)) + }, }) } diff --git a/src/id3/v2/frame/mod.rs b/src/id3/v2/frame/mod.rs index 72e196af..38a9b099 100644 --- a/src/id3/v2/frame/mod.rs +++ b/src/id3/v2/frame/mod.rs @@ -362,7 +362,7 @@ impl From for Option> { }) }, (FrameId::Valid(ref s), ItemValue::Binary(text)) if s == "POPM" => { - FrameValue::Popularimeter(Popularimeter::parse(&text).ok()?) + FrameValue::Popularimeter(Popularimeter::parse(&mut &text[..]).ok()?) }, (_, item_value) => { let Ok(value) = item_value.try_into() else { @@ -496,7 +496,7 @@ impl<'a> TryFrom<&'a TagItem> for FrameRef<'a> { }) }, ("POPM", ItemValue::Binary(contents)) => { - FrameValue::Popularimeter(Popularimeter::parse(contents)?) + FrameValue::Popularimeter(Popularimeter::parse(&mut &contents[..])?) }, (_, value) => value.try_into()?, }; diff --git a/src/id3/v2/frame/read.rs b/src/id3/v2/frame/read.rs index 891ff43a..781928a6 100644 --- a/src/id3/v2/frame/read.rs +++ b/src/id3/v2/frame/read.rs @@ -2,8 +2,8 @@ use super::header::{parse_header, parse_v2_header}; use super::Frame; use crate::error::{Id3v2Error, Id3v2ErrorKind, Result}; use crate::id3::v2::frame::content::parse_content; -use crate::id3::v2::util::synchsafe::SynchsafeInteger; -use crate::id3::v2::{FrameValue, ID3v2Version}; +use crate::id3::v2::util::synchsafe::{SynchsafeInteger, UnsynchronizedStream}; +use crate::id3::v2::{FrameFlags, FrameId, FrameValue, ID3v2Version}; use crate::macros::try_vec; use std::io::Read; @@ -58,50 +58,122 @@ impl<'a> Frame<'a> { size -= 4; } - let mut content = try_vec![0; size as usize]; - reader.read_exact(&mut content)?; - - if flags.unsynchronisation { - content = crate::id3::v2::util::synchsafe::unsynch_content(content.as_slice())?; - } - - #[cfg(feature = "id3v2_compression_support")] - if flags.compression { - // This is guaranteed to be set above - let data_length_indicator = flags.data_length_indicator.unwrap() as usize; - - let mut decompressed = Vec::with_capacity(data_length_indicator); - flate2::read::ZlibDecoder::new(&content[..]).read_to_end(&mut decompressed)?; - if data_length_indicator != decompressed.len() { - log::debug!("Frame data length indicator does not match true decompressed length"); - } - - content = decompressed - } - - #[cfg(not(feature = "id3v2_compression_support"))] - if flags.compression { - return Err(Id3v2Error::new(Id3v2ErrorKind::CompressedFrameEncountered).into()); - } - // Frames must have at least 1 byte, *after* all of the additional data flags can provide if size == 0 { return Err(Id3v2Error::new(Id3v2ErrorKind::BadFrameLength).into()); } - let value = if flags.encryption.is_some() { - if flags.data_length_indicator.is_none() { - return Err(Id3v2Error::new(Id3v2ErrorKind::MissingDataLengthIndicator).into()); - } + // Restrict the reader to the frame content + let mut reader = reader.take(u64::from(size)); - Some(FrameValue::Binary(content)) - } else { - parse_content(&mut &content[..], id.as_str(), version)? - }; + // It seems like the flags are applied in the order: + // + // unsynchronization -> compression -> encryption + // + // Which all have their own needs, so this gets a little messy... + match flags { + // Possible combinations: + // + // * unsynchronized + compressed + encrypted + // * unsynchronized + compressed + // * unsynchronized + encrypted + // * unsynchronized + FrameFlags { + unsynchronisation: true, + .. + } => { + let mut unsynchronized_reader = UnsynchronizedStream::new(reader); - match value { - Some(value) => Ok((Some(Self { id, value, flags }), false)), - None => Ok((None, false)), + if flags.compression { + let mut compression_reader = handle_compression(unsynchronized_reader)?; + + if flags.encryption.is_some() { + return handle_encryption(&mut compression_reader, size, id, flags); + } + + return parse_frame(&mut compression_reader, id, flags, version); + } + + if flags.encryption.is_some() { + return handle_encryption(&mut unsynchronized_reader, size, id, flags); + } + + return parse_frame(&mut unsynchronized_reader, id, flags, version); + }, + // Possible combinations: + // + // * compressed + encrypted + // * compressed + FrameFlags { + compression: true, .. + } => { + let mut compression_reader = handle_compression(reader)?; + + if flags.encryption.is_some() { + return handle_encryption(&mut compression_reader, size, id, flags); + } + + return parse_frame(&mut compression_reader, id, flags, version); + }, + // Possible combinations: + // + // * encrypted + FrameFlags { + encryption: Some(_), + .. + } => { + return handle_encryption(&mut reader, size, id, flags); + }, + // Everything else that doesn't have special flags + _ => { + return parse_frame(&mut reader, id, flags, version); + }, } } } + +#[cfg(feature = "id3v2_compression_support")] +#[allow(clippy::unnecessary_wraps)] +fn handle_compression(reader: R) -> Result> { + Ok(flate2::read::ZlibDecoder::new(reader)) +} + +#[cfg(not(feature = "id3v2_compression_support"))] +fn handle_compression(reader: &mut R) -> flate2::read::ZlibDecoder { + return Err(Id3v2Error::new(Id3v2ErrorKind::CompressedFrameEncountered).into()); +} + +fn handle_encryption( + reader: &mut R, + size: u32, + id: FrameId<'static>, + flags: FrameFlags, +) -> Result<(Option>, bool)> { + if flags.data_length_indicator.is_none() { + return Err(Id3v2Error::new(Id3v2ErrorKind::MissingDataLengthIndicator).into()); + } + + let mut content = try_vec![0; size as usize]; + reader.read_exact(&mut content)?; + + let encrypted_frame = Frame { + id, + value: FrameValue::Binary(content), + flags, + }; + + // Nothing further we can do with encrypted frames + Ok((Some(encrypted_frame), false)) +} + +fn parse_frame( + reader: &mut R, + id: FrameId<'static>, + flags: FrameFlags, + version: ID3v2Version, +) -> Result<(Option>, bool)> { + match parse_content(reader, id.as_str(), version)? { + Some(value) => Ok((Some(Frame { id, value, flags }), false)), + None => Ok((None, false)), + } +} diff --git a/src/id3/v2/items/attached_picture_frame.rs b/src/id3/v2/items/attached_picture_frame.rs index 1d971c7a..7386e6a5 100644 --- a/src/id3/v2/items/attached_picture_frame.rs +++ b/src/id3/v2/items/attached_picture_frame.rs @@ -5,7 +5,7 @@ use crate::picture::{MimeType, Picture, PictureType}; use crate::util::text::{encode_text, TextEncoding}; use std::borrow::Cow; -use std::io::{Cursor, Read, Write as _}; +use std::io::{Read, Write as _}; use byteorder::{ReadBytesExt as _, WriteBytesExt as _}; @@ -33,17 +33,18 @@ impl AttachedPictureFrame { /// ID3v2.2: /// /// * The format is not "PNG" or "JPG" - pub fn parse(bytes: &[u8], version: ID3v2Version) -> Result { - let mut cursor = Cursor::new(bytes); - - let encoding = match TextEncoding::from_u8(cursor.read_u8()?) { + pub fn parse(reader: &mut R, version: ID3v2Version) -> Result + where + R: Read, + { + let encoding = match TextEncoding::from_u8(reader.read_u8()?) { Some(encoding) => encoding, None => err!(NotAPicture), }; let mime_type = if version == ID3v2Version::V2 { let mut format = [0; 3]; - cursor.read_exact(&mut format)?; + reader.read_exact(&mut format)?; match format { [b'P', b'N', b'G'] => MimeType::Png, @@ -56,18 +57,18 @@ impl AttachedPictureFrame { }, } } else { - (crate::util::text::decode_text(&mut cursor, TextEncoding::UTF8, true)?.text_or_none()) + (crate::util::text::decode_text(reader, TextEncoding::UTF8, true)?.text_or_none()) .map_or(MimeType::None, |mime_type| MimeType::from_str(&mime_type)) }; - let pic_type = PictureType::from_u8(cursor.read_u8()?); + let pic_type = PictureType::from_u8(reader.read_u8()?); - let description = crate::util::text::decode_text(&mut cursor, encoding, true)? + let description = crate::util::text::decode_text(reader, encoding, true)? .text_or_none() .map(Cow::from); let mut data = Vec::new(); - cursor.read_to_end(&mut data)?; + reader.read_to_end(&mut data)?; let picture = Picture { pic_type, diff --git a/src/id3/v2/items/extended_text_frame.rs b/src/id3/v2/items/extended_text_frame.rs index 479f91a5..01c7e338 100644 --- a/src/id3/v2/items/extended_text_frame.rs +++ b/src/id3/v2/items/extended_text_frame.rs @@ -4,7 +4,7 @@ use crate::id3::v2::ID3v2Version; use crate::util::text::{decode_text, encode_text, read_to_terminator, utf16_decode, TextEncoding}; use std::hash::{Hash, Hasher}; -use std::io::{Cursor, Read}; +use std::io::Read; use byteorder::ReadBytesExt; @@ -48,53 +48,58 @@ impl ExtendedTextFrame { /// ID3v2.2: /// /// * The encoding is not [`TextEncoding::Latin1`] or [`TextEncoding::UTF16`] - pub fn parse(content: &[u8], version: ID3v2Version) -> Result> { - if content.len() < 2 { + pub fn parse(reader: &mut R, version: ID3v2Version) -> Result> + where + R: Read, + { + let Ok(encoding_byte) = reader.read_u8() else { return Ok(None); - } + }; - let mut content = &mut &content[..]; - let encoding = verify_encoding(content.read_u8()?, version)?; - - let mut endianness: fn([u8; 2]) -> u16 = u16::from_le_bytes; - if encoding == TextEncoding::UTF16 { - let mut cursor = Cursor::new(content); - let mut bom = [0; 2]; - cursor.read_exact(&mut bom)?; - - match [bom[0], bom[1]] { - [0xFF, 0xFE] => endianness = u16::from_le_bytes, - [0xFE, 0xFF] => endianness = u16::from_be_bytes, - // We'll catch an invalid BOM below - _ => {}, - }; - - content = cursor.into_inner(); - } - - let description = decode_text(content, encoding, true)?.content; + let encoding = verify_encoding(encoding_byte, version)?; + let description = decode_text(reader, encoding, true)?; let frame_content; + if encoding != TextEncoding::UTF16 { + frame_content = decode_text(reader, encoding, false)?.content; + + return Ok(Some(ExtendedTextFrame { + encoding, + description: description.content, + content: frame_content, + })); + } + // It's possible for the description to be the only string with a BOM - if encoding == TextEncoding::UTF16 { - if content.len() >= 2 && (content[..2] == [0xFF, 0xFE] || content[..2] == [0xFE, 0xFF]) - { - frame_content = decode_text(content, encoding, false)?.content; - } else { - frame_content = match read_to_terminator(content, TextEncoding::UTF16) { - Some(raw_text) => utf16_decode(&raw_text, endianness).map_err(|_| { - Into::::into(Id3v2Error::new(Id3v2ErrorKind::BadSyncText)) - })?, - None => String::new(), - } + 'utf16: { + let bom = description.bom; + let Some(raw_text) = read_to_terminator(reader, TextEncoding::UTF16) else { + // Nothing left to do + frame_content = String::new(); + break 'utf16; + }; + + if raw_text.starts_with(&[0xFF, 0xFE]) || raw_text.starts_with(&[0xFE, 0xFF]) { + frame_content = + decode_text(&mut &raw_text[..], TextEncoding::UTF16, false)?.content; + break 'utf16; } - } else { - frame_content = decode_text(content, encoding, false)?.content; + + let endianness = match bom { + [0xFF, 0xFE] => u16::from_le_bytes, + [0xFE, 0xFF] => u16::from_be_bytes, + // Handled in description decoding + _ => unreachable!(), + }; + + frame_content = utf16_decode(&raw_text, endianness).map_err(|_| { + Into::::into(Id3v2Error::new(Id3v2ErrorKind::BadSyncText)) + })?; } Ok(Some(ExtendedTextFrame { encoding, - description, + description: description.content, content: frame_content, })) } diff --git a/src/id3/v2/items/extended_url_frame.rs b/src/id3/v2/items/extended_url_frame.rs index 856e5c8f..a82f1917 100644 --- a/src/id3/v2/items/extended_url_frame.rs +++ b/src/id3/v2/items/extended_url_frame.rs @@ -4,6 +4,7 @@ use crate::id3::v2::ID3v2Version; use crate::util::text::{decode_text, encode_text, TextEncoding}; use std::hash::{Hash, Hasher}; +use std::io::Read; use byteorder::ReadBytesExt; @@ -47,16 +48,17 @@ impl ExtendedUrlFrame { /// ID3v2.2: /// /// * The encoding is not [`TextEncoding::Latin1`] or [`TextEncoding::UTF16`] - pub fn parse(content: &[u8], version: ID3v2Version) -> Result> { - if content.len() < 2 { + pub fn parse(reader: &mut R, version: ID3v2Version) -> Result> + where + R: Read, + { + let Ok(encoding_byte) = reader.read_u8() else { return Ok(None); - } + }; - let content = &mut &content[..]; - - let encoding = verify_encoding(content.read_u8()?, version)?; - let description = decode_text(content, encoding, true)?.content; - let content = decode_text(content, TextEncoding::Latin1, false)?.content; + let encoding = verify_encoding(encoding_byte, version)?; + let description = decode_text(reader, encoding, true)?.content; + let content = decode_text(reader, TextEncoding::Latin1, false)?.content; Ok(Some(ExtendedUrlFrame { encoding, diff --git a/src/id3/v2/items/identifier.rs b/src/id3/v2/items/identifier.rs index 70bcac56..abf04bcc 100644 --- a/src/id3/v2/items/identifier.rs +++ b/src/id3/v2/items/identifier.rs @@ -1,9 +1,10 @@ -use std::hash::{Hash, Hasher}; - use crate::error::{Id3v2Error, Id3v2ErrorKind}; use crate::util::text::{decode_text, encode_text}; use crate::{Result, TextEncoding}; +use std::hash::{Hash, Hasher}; +use std::io::Read; + /// An `ID3v2` unique file identifier frame (UFID). #[derive(Clone, Debug, Eq)] pub struct UniqueFileIdentifierFrame { @@ -19,15 +20,16 @@ impl UniqueFileIdentifierFrame { /// # Errors /// /// Owner is missing or improperly encoded - pub fn parse(input: &mut &[u8]) -> Result> { - if input.is_empty() { - return Ok(None); - } - - let Some(owner) = decode_text(input, TextEncoding::Latin1, true)?.text_or_none() else { + pub fn parse(reader: &mut R) -> Result> + where + R: Read, + { + let Some(owner) = decode_text(reader, TextEncoding::Latin1, true)?.text_or_none() else { return Err(Id3v2Error::new(Id3v2ErrorKind::MissingUfidOwner).into()); }; - let identifier = input.to_vec(); + + let mut identifier = Vec::new(); + reader.read_to_end(&mut identifier)?; Ok(Some(Self { owner, identifier })) } diff --git a/src/id3/v2/items/language_frame.rs b/src/id3/v2/items/language_frame.rs index a693fd37..c7fab9ee 100644 --- a/src/id3/v2/items/language_frame.rs +++ b/src/id3/v2/items/language_frame.rs @@ -19,19 +19,21 @@ struct LanguageFrame { } impl LanguageFrame { - fn parse(content: &[u8], version: ID3v2Version) -> Result> { - if content.len() < 5 { + fn parse(reader: &mut R, version: ID3v2Version) -> Result> + where + R: Read, + { + let Ok(encoding_byte) = reader.read_u8() else { return Ok(None); - } + }; - let content = &mut &content[..]; - let encoding = verify_encoding(content.read_u8()?, version)?; + let encoding = verify_encoding(encoding_byte, version)?; let mut language = [0; 3]; - content.read_exact(&mut language)?; + reader.read_exact(&mut language)?; - let description = decode_text(content, encoding, true)?.content; - let content = decode_text(content, encoding, false)?.content; + let description = decode_text(reader, encoding, true)?.content; + let content = decode_text(reader, encoding, false)?.content; Ok(Some(Self { encoding, @@ -111,8 +113,11 @@ impl CommentFrame { /// ID3v2.2: /// /// * The encoding is not [`TextEncoding::Latin1`] or [`TextEncoding::UTF16`] - pub fn parse(content: &[u8], version: ID3v2Version) -> Result> { - Ok(LanguageFrame::parse(content, version)?.map(Into::into)) + pub fn parse(reader: &mut R, version: ID3v2Version) -> Result> + where + R: Read, + { + Ok(LanguageFrame::parse(reader, version)?.map(Into::into)) } /// Convert a [`CommentFrame`] to a byte vec @@ -183,8 +188,11 @@ impl UnsynchronizedTextFrame { /// ID3v2.2: /// /// * The encoding is not [`TextEncoding::Latin1`] or [`TextEncoding::UTF16`] - pub fn parse(content: &[u8], version: ID3v2Version) -> Result> { - Ok(LanguageFrame::parse(content, version)?.map(Into::into)) + pub fn parse(reader: &mut R, version: ID3v2Version) -> Result> + where + R: Read, + { + Ok(LanguageFrame::parse(reader, version)?.map(Into::into)) } /// Convert a [`UnsynchronizedTextFrame`] to a byte vec diff --git a/src/id3/v2/items/popularimeter.rs b/src/id3/v2/items/popularimeter.rs index 289c4539..5008bd2d 100644 --- a/src/id3/v2/items/popularimeter.rs +++ b/src/id3/v2/items/popularimeter.rs @@ -1,8 +1,10 @@ use crate::error::Result; use crate::util::text::{decode_text, encode_text, TextEncoding}; -use byteorder::ReadBytesExt; use std::hash::{Hash, Hasher}; +use std::io::Read; + +use byteorder::ReadBytesExt; /// The contents of a popularimeter ("POPM") frame /// @@ -30,21 +32,25 @@ impl Popularimeter { /// /// * Email is improperly encoded /// * `bytes` doesn't contain enough data - pub fn parse(mut bytes: &[u8]) -> Result { - let content = &mut bytes; + pub fn parse(reader: &mut R) -> Result + where + R: Read, + { + let email = decode_text(reader, TextEncoding::Latin1, true)?; + let rating = reader.read_u8()?; - let email = decode_text(content, TextEncoding::Latin1, true)?; - let rating = content.read_u8()?; + let mut counter_content = Vec::new(); + reader.read_to_end(&mut counter_content)?; let counter; - let remaining_size = content.len(); + let remaining_size = counter_content.len(); if remaining_size > 8 { counter = u64::MAX; } else { let mut counter_bytes = [0; 8]; let counter_start_pos = 8 - remaining_size; - counter_bytes[counter_start_pos..].copy_from_slice(content); + counter_bytes[counter_start_pos..].copy_from_slice(&counter_content); counter = u64::from_be_bytes(counter_bytes); } diff --git a/src/id3/v2/items/text_information_frame.rs b/src/id3/v2/items/text_information_frame.rs index bf480aba..abd93a42 100644 --- a/src/id3/v2/items/text_information_frame.rs +++ b/src/id3/v2/items/text_information_frame.rs @@ -5,6 +5,8 @@ use crate::util::text::{decode_text, encode_text, TextEncoding}; use byteorder::ReadBytesExt; +use std::io::Read; + /// An `ID3v2` text frame #[derive(Clone, Debug, Eq, PartialEq, Hash)] pub struct TextInformationFrame { @@ -26,14 +28,16 @@ impl TextInformationFrame { /// ID3v2.2: /// /// * The encoding is not [`TextEncoding::Latin1`] or [`TextEncoding::UTF16`] - pub fn parse(content: &[u8], version: ID3v2Version) -> Result> { - if content.len() < 2 { + pub fn parse(reader: &mut R, version: ID3v2Version) -> Result> + where + R: Read, + { + let Ok(encoding_byte) = reader.read_u8() else { return Ok(None); - } + }; - let content = &mut &content[..]; - let encoding = verify_encoding(content.read_u8()?, version)?; - let value = decode_text(content, encoding, true)?.content; + let encoding = verify_encoding(encoding_byte, version)?; + let value = decode_text(reader, encoding, true)?.content; Ok(Some(TextInformationFrame { encoding, value })) } diff --git a/src/id3/v2/items/url_link_frame.rs b/src/id3/v2/items/url_link_frame.rs index 83ccc034..6c9720ea 100644 --- a/src/id3/v2/items/url_link_frame.rs +++ b/src/id3/v2/items/url_link_frame.rs @@ -1,6 +1,8 @@ use crate::error::Result; use crate::util::text::{decode_text, encode_text, TextEncoding}; +use std::io::Read; + /// An `ID3v2` URL frame #[derive(Clone, Debug, Eq, PartialEq, Hash)] pub struct UrlLinkFrame(pub(crate) String); @@ -13,14 +15,16 @@ impl UrlLinkFrame { /// # Errors /// /// * Unable to decode the text as [`TextEncoding::Latin1`] - pub fn parse(content: &[u8]) -> Result> { - if content.is_empty() { + pub fn parse(reader: &mut R) -> Result> + where + R: Read, + { + let url = decode_text(reader, TextEncoding::Latin1, true)?; + if url.bytes_read == 0 { return Ok(None); } - let url = decode_text(&mut &content[..], TextEncoding::Latin1, true)?.content; - - Ok(Some(UrlLinkFrame(url))) + Ok(Some(UrlLinkFrame(url.content))) } /// Convert an [`UrlLinkFrame`] to a byte vec diff --git a/src/util/text.rs b/src/util/text.rs index ef93fd7b..5dbc430b 100644 --- a/src/util/text.rs +++ b/src/util/text.rs @@ -41,6 +41,7 @@ impl TextEncoding { pub(crate) struct DecodeTextResult { pub(crate) content: String, pub(crate) bytes_read: usize, + pub(crate) bom: [u8; 2], } impl DecodeTextResult { @@ -56,6 +57,7 @@ impl DecodeTextResult { const EMPTY_DECODED_TEXT: DecodeTextResult = DecodeTextResult { content: String::new(), bytes_read: 0, + bom: [0, 0], }; pub(crate) fn decode_text( @@ -93,6 +95,7 @@ where raw_bytes = bytes; } + let mut bom = [0, 0]; let read_string = match encoding { TextEncoding::Latin1 => raw_bytes.iter().map(|c| *c as char).collect::(), TextEncoding::UTF16 => { @@ -105,8 +108,14 @@ where } match (raw_bytes[0], raw_bytes[1]) { - (0xFE, 0xFF) => utf16_decode(&raw_bytes[2..], u16::from_be_bytes)?, - (0xFF, 0xFE) => utf16_decode(&raw_bytes[2..], u16::from_le_bytes)?, + (0xFE, 0xFF) => { + bom = [0xFE, 0xFF]; + utf16_decode(&raw_bytes[2..], u16::from_be_bytes)? + }, + (0xFF, 0xFE) => { + bom = [0xFF, 0xFE]; + utf16_decode(&raw_bytes[2..], u16::from_le_bytes)? + }, _ => err!(TextDecode("UTF-16 string has an invalid byte order mark")), } }, @@ -122,6 +131,7 @@ where Ok(DecodeTextResult { content: read_string, bytes_read, + bom, }) } @@ -261,7 +271,8 @@ mod tests { ) .unwrap(); - assert_eq!(be_utf16_decode, le_utf16_decode); + assert_eq!(be_utf16_decode.content, le_utf16_decode.content); + assert_eq!(be_utf16_decode.bytes_read, le_utf16_decode.bytes_read); assert_eq!(be_utf16_decode.content, TEST_STRING.to_string()); let utf8_decode = diff --git a/tests/picture/format_parsers.rs b/tests/picture/format_parsers.rs index d69b0699..7500f828 100644 --- a/tests/picture/format_parsers.rs +++ b/tests/picture/format_parsers.rs @@ -28,7 +28,7 @@ fn create_original_picture() -> Picture { fn id3v24_apic() { let buf = get_buf("tests/picture/assets/png_640x628.apic"); - let apic = AttachedPictureFrame::parse(&buf, ID3v2Version::V4).unwrap(); + let apic = AttachedPictureFrame::parse(&mut &buf[..], ID3v2Version::V4).unwrap(); assert_eq!(create_original_picture(), apic.picture); } @@ -52,7 +52,7 @@ fn as_apic_bytes() { fn id3v22_pic() { let buf = get_buf("tests/picture/assets/png_640x628.pic"); - let pic = AttachedPictureFrame::parse(&buf, ID3v2Version::V2).unwrap(); + let pic = AttachedPictureFrame::parse(&mut &buf[..], ID3v2Version::V2).unwrap(); assert_eq!(create_original_picture(), pic.picture); }