ID3v2: Support UTF-16 TIPL frames with single BOM

This changes the signature of `decode_text` to take the new `TextDecodeOptions`. This allows us to specify a UTF-16 BOM ahead of time.

closes #306
This commit is contained in:
Serial 2023-12-26 13:52:17 -05:00 committed by Alex
parent 7e93b3c1f7
commit 3c33cb1f43
17 changed files with 216 additions and 69 deletions

View file

@ -2,7 +2,7 @@ use crate::error::{Id3v2Error, Id3v2ErrorKind, Result};
use crate::id3::v2::header::Id3v2Version;
use crate::macros::err;
use crate::picture::{MimeType, Picture, PictureType};
use crate::util::text::{encode_text, TextEncoding};
use crate::util::text::{encode_text, TextDecodeOptions, TextEncoding};
use std::borrow::Cow;
use std::io::{Read, Write as _};
@ -58,16 +58,24 @@ impl AttachedPictureFrame {
},
}
} else {
let mime_type_str =
crate::util::text::decode_text(reader, TextEncoding::UTF8, true)?.text_or_none();
let mime_type_str = crate::util::text::decode_text(
reader,
TextDecodeOptions::new()
.encoding(TextEncoding::Latin1)
.terminated(true),
)?
.text_or_none();
mime_type = mime_type_str.map(|mime_type_str| MimeType::from_str(&mime_type_str));
};
let pic_type = PictureType::from_u8(reader.read_u8()?);
let description = crate::util::text::decode_text(reader, encoding, true)?
.text_or_none()
.map(Cow::from);
let description = crate::util::text::decode_text(
reader,
TextDecodeOptions::new().encoding(encoding).terminated(true),
)?
.text_or_none()
.map(Cow::from);
let mut data = Vec::new();
reader.read_to_end(&mut data)?;

View file

@ -1,5 +1,5 @@
use crate::error::{ErrorKind, Id3v2Error, Id3v2ErrorKind, LoftyError, Result};
use crate::util::text::{decode_text, encode_text, TextEncoding};
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
use std::hash::{Hash, Hasher};
@ -96,11 +96,21 @@ impl AudioTextFrame {
let encoding = TextEncoding::from_u8(content.read_u8()?)
.ok_or_else(|| LoftyError::new(ErrorKind::TextDecode("Found invalid encoding")))?;
let mime_type = decode_text(content, TextEncoding::Latin1, true)?.content;
let mime_type = decode_text(
content,
TextDecodeOptions::new()
.encoding(TextEncoding::Latin1)
.terminated(true),
)?
.content;
let flags = AudioTextFrameFlags::from_u8(content.read_u8()?);
let equivalent_text = decode_text(content, encoding, true)?.content;
let equivalent_text = decode_text(
content,
TextDecodeOptions::new().encoding(encoding).terminated(true),
)?
.content;
Ok(Self {
encoding,

View file

@ -1,5 +1,5 @@
use crate::error::{ErrorKind, Id3v2Error, Id3v2ErrorKind, LoftyError, Result};
use crate::util::text::{decode_text, encode_text, TextEncoding};
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
use std::io::{Cursor, Read};
@ -36,9 +36,17 @@ impl GeneralEncapsulatedObject {
let mut cursor = Cursor::new(&data[1..]);
let mime_type = decode_text(&mut cursor, TextEncoding::Latin1, true)?;
let file_name = decode_text(&mut cursor, encoding, true)?;
let descriptor = decode_text(&mut cursor, encoding, true)?;
let mime_type = decode_text(
&mut cursor,
TextDecodeOptions::new()
.encoding(TextEncoding::Latin1)
.terminated(true),
)?;
let text_decode_options = TextDecodeOptions::new().encoding(encoding).terminated(true);
let file_name = decode_text(&mut cursor, text_decode_options)?;
let descriptor = decode_text(&mut cursor, text_decode_options)?;
let mut data = Vec::new();
cursor.read_to_end(&mut data)?;

View file

@ -1,7 +1,9 @@
use crate::error::{Id3v2Error, Id3v2ErrorKind, LoftyError, Result};
use crate::id3::v2::frame::content::verify_encoding;
use crate::id3::v2::header::Id3v2Version;
use crate::util::text::{decode_text, encode_text, utf16_decode_bytes, TextEncoding};
use crate::util::text::{
decode_text, encode_text, utf16_decode_bytes, TextDecodeOptions, TextEncoding,
};
use std::hash::{Hash, Hasher};
use std::io::Read;
@ -57,11 +59,15 @@ impl ExtendedTextFrame {
};
let encoding = verify_encoding(encoding_byte, version)?;
let description = decode_text(reader, encoding, true)?;
let description = decode_text(
reader,
TextDecodeOptions::new().encoding(encoding).terminated(true),
)?;
let frame_content;
if encoding != TextEncoding::UTF16 {
frame_content = decode_text(reader, encoding, false)?.content;
frame_content =
decode_text(reader, TextDecodeOptions::new().encoding(encoding))?.content;
return Ok(Some(ExtendedTextFrame {
encoding,

View file

@ -1,7 +1,7 @@
use crate::error::Result;
use crate::id3::v2::frame::content::verify_encoding;
use crate::id3::v2::header::Id3v2Version;
use crate::util::text::{decode_text, encode_text, TextEncoding};
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
use std::hash::{Hash, Hasher};
use std::io::Read;
@ -57,8 +57,16 @@ impl ExtendedUrlFrame {
};
let encoding = verify_encoding(encoding_byte, version)?;
let description = decode_text(reader, encoding, true)?.content;
let content = decode_text(reader, TextEncoding::Latin1, false)?.content;
let description = decode_text(
reader,
TextDecodeOptions::new().encoding(encoding).terminated(true),
)?
.content;
let content = decode_text(
reader,
TextDecodeOptions::new().encoding(TextEncoding::Latin1),
)?
.content;
Ok(Some(ExtendedUrlFrame {
encoding,

View file

@ -1,7 +1,7 @@
use crate::error::Result;
use crate::id3::v2::frame::content::verify_encoding;
use crate::id3::v2::header::Id3v2Version;
use crate::util::text::{decode_text, encode_text, TextEncoding};
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
use byteorder::ReadBytesExt;
@ -40,9 +40,28 @@ impl KeyValueFrame {
let mut values = Vec::new();
let mut text_decode_options = TextDecodeOptions::new().encoding(encoding).terminated(true);
// We have to read the first key/value pair separately because it may be the only string with a BOM
let first_key = decode_text(reader, text_decode_options)?;
if first_key.bytes_read == 0 {
return Ok(None);
}
if encoding == TextEncoding::UTF16 {
text_decode_options = text_decode_options.bom(first_key.bom);
}
values.push((
first_key.content,
decode_text(reader, text_decode_options)?.content,
));
loop {
let key = decode_text(reader, encoding, true)?;
let value = decode_text(reader, encoding, true)?;
let key = decode_text(reader, text_decode_options)?;
let value = decode_text(reader, text_decode_options)?;
if key.bytes_read == 0 || value.bytes_read == 0 {
break;
}

View file

@ -1,7 +1,7 @@
use crate::error::{Id3v2Error, Id3v2ErrorKind, Result};
use crate::id3::v2::frame::content::verify_encoding;
use crate::id3::v2::header::Id3v2Version;
use crate::util::text::{decode_text, encode_text, TextEncoding};
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
use std::hash::{Hash, Hasher};
use std::io::Read;
@ -32,8 +32,12 @@ impl LanguageFrame {
let mut language = [0; 3];
reader.read_exact(&mut language)?;
let description = decode_text(reader, encoding, true)?.content;
let content = decode_text(reader, encoding, false)?.content;
let description = decode_text(
reader,
TextDecodeOptions::new().encoding(encoding).terminated(true),
)?
.content;
let content = decode_text(reader, TextDecodeOptions::new().encoding(encoding))?.content;
Ok(Some(Self {
encoding,

View file

@ -1,5 +1,7 @@
use crate::error::{ErrorKind, Id3v2Error, Id3v2ErrorKind, LoftyError, Result};
use crate::util::text::{decode_text, encode_text, utf8_decode_str, TextEncoding};
use crate::util::text::{
decode_text, encode_text, utf8_decode_str, TextDecodeOptions, TextEncoding,
};
use std::hash::Hash;
use std::io::Read;
@ -45,14 +47,20 @@ impl OwnershipFrame {
let encoding = TextEncoding::from_u8(encoding_byte)
.ok_or_else(|| LoftyError::new(ErrorKind::TextDecode("Found invalid encoding")))?;
let price_paid = decode_text(reader, TextEncoding::Latin1, true)?.content;
let price_paid = decode_text(
reader,
TextDecodeOptions::new()
.encoding(TextEncoding::Latin1)
.terminated(true),
)?
.content;
let mut date_bytes = [0u8; 8];
reader.read_exact(&mut date_bytes)?;
let date_of_purchase = utf8_decode_str(&date_bytes)?.to_owned();
let seller = decode_text(reader, encoding, false)?.content;
let seller = decode_text(reader, TextDecodeOptions::new().encoding(encoding))?.content;
Ok(Some(OwnershipFrame {
encoding,

View file

@ -1,5 +1,5 @@
use crate::error::Result;
use crate::util::text::{decode_text, encode_text, TextEncoding};
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
use std::hash::{Hash, Hasher};
use std::io::Read;
@ -36,7 +36,12 @@ impl Popularimeter {
where
R: Read,
{
let email = decode_text(reader, TextEncoding::Latin1, true)?;
let email = decode_text(
reader,
TextDecodeOptions::new()
.encoding(TextEncoding::Latin1)
.terminated(true),
)?;
let rating = reader.read_u8()?;
let mut counter_content = Vec::new();

View file

@ -1,5 +1,5 @@
use crate::error::Result;
use crate::util::text::{decode_text, encode_text, TextEncoding};
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
use std::io::Read;
@ -28,7 +28,12 @@ impl PrivateFrame {
where
R: Read,
{
let Ok(owner) = decode_text(reader, TextEncoding::Latin1, true) else {
let Ok(owner) = decode_text(
reader,
TextDecodeOptions::new()
.encoding(TextEncoding::Latin1)
.terminated(true),
) else {
return Ok(None);
};

View file

@ -1,7 +1,7 @@
use crate::error::{Id3v2Error, Id3v2ErrorKind, Result};
use crate::macros::try_vec;
use crate::probe::ParsingMode;
use crate::util::text::{decode_text, encode_text, TextEncoding};
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
use std::collections::HashMap;
use std::hash::{Hash, Hasher};
@ -110,7 +110,13 @@ impl RelativeVolumeAdjustmentFrame {
where
R: Read,
{
let identification = decode_text(reader, TextEncoding::Latin1, true)?.content;
let identification = decode_text(
reader,
TextDecodeOptions::new()
.encoding(TextEncoding::Latin1)
.terminated(true),
)?
.content;
let mut channels = HashMap::new();
while let Ok(channel_type_byte) = reader.read_u8() {

View file

@ -1,7 +1,8 @@
use crate::error::{ErrorKind, Id3v2Error, Id3v2ErrorKind, LoftyError, Result};
use crate::macros::err;
use crate::util::text::{
decode_text, encode_text, read_to_terminator, utf16_decode_bytes, TextEncoding,
decode_text, encode_text, read_to_terminator, utf16_decode_bytes, TextDecodeOptions,
TextEncoding,
};
use std::io::{Cursor, Read, Seek, SeekFrom, Write};
@ -106,9 +107,12 @@ impl SynchronizedText {
.ok_or_else(|| Id3v2Error::new(Id3v2ErrorKind::BadSyncText))?;
let mut cursor = Cursor::new(&data[6..]);
let description = crate::util::text::decode_text(&mut cursor, encoding, true)
.map_err(|_| Id3v2Error::new(Id3v2ErrorKind::BadSyncText))?
.text_or_none();
let description = crate::util::text::decode_text(
&mut cursor,
TextDecodeOptions::new().encoding(encoding).terminated(true),
)
.map_err(|_| Id3v2Error::new(Id3v2ErrorKind::BadSyncText))?
.text_or_none();
let mut endianness: fn([u8; 2]) -> u16 = u16::from_le_bytes;
@ -154,8 +158,11 @@ impl SynchronizedText {
}
}
let decoded_text = decode_text(&mut cursor, encoding, true)
.map_err(|_| Id3v2Error::new(Id3v2ErrorKind::BadSyncText))?;
let decoded_text = decode_text(
&mut cursor,
TextDecodeOptions::new().encoding(encoding).terminated(true),
)
.map_err(|_| Id3v2Error::new(Id3v2ErrorKind::BadSyncText))?;
pos += decoded_text.bytes_read as u64;
Ok(decoded_text.content)

View file

@ -1,7 +1,7 @@
use crate::error::Result;
use crate::id3::v2::frame::content::verify_encoding;
use crate::id3::v2::header::Id3v2Version;
use crate::util::text::{decode_text, encode_text, TextEncoding};
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
use byteorder::ReadBytesExt;
@ -37,7 +37,7 @@ impl TextInformationFrame {
};
let encoding = verify_encoding(encoding_byte, version)?;
let value = decode_text(reader, encoding, false)?.content;
let value = decode_text(reader, TextDecodeOptions::new().encoding(encoding))?.content;
Ok(Some(TextInformationFrame { encoding, value }))
}

View file

@ -1,7 +1,7 @@
use crate::error::{Id3v2Error, Id3v2ErrorKind, Result};
use crate::macros::parse_mode_choice;
use crate::probe::ParsingMode;
use crate::util::text::{decode_text, encode_text, TextEncoding};
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
use std::hash::{Hash, Hasher};
use std::io::Read;
@ -25,7 +25,12 @@ impl UniqueFileIdentifierFrame {
where
R: Read,
{
let owner_decode_result = decode_text(reader, TextEncoding::Latin1, true)?;
let owner_decode_result = decode_text(
reader,
TextDecodeOptions::new()
.encoding(TextEncoding::Latin1)
.terminated(true),
)?;
let owner;
match owner_decode_result.text_or_none() {

View file

@ -1,5 +1,5 @@
use crate::error::Result;
use crate::util::text::{decode_text, encode_text, TextEncoding};
use crate::util::text::{decode_text, encode_text, TextDecodeOptions, TextEncoding};
use std::io::Read;
@ -19,7 +19,12 @@ impl UrlLinkFrame {
where
R: Read,
{
let url = decode_text(reader, TextEncoding::Latin1, true)?;
let url = decode_text(
reader,
TextDecodeOptions::new()
.encoding(TextEncoding::Latin1)
.terminated(true),
)?;
if url.bytes_read == 0 {
return Ok(None);
}

View file

@ -17,7 +17,7 @@ use crate::picture::{Picture, PictureType, TOMBSTONE_PICTURE};
use crate::tag::item::{ItemKey, ItemValue, TagItem};
use crate::tag::{try_parse_year, Tag, TagType};
use crate::traits::{Accessor, MergeTag, SplitTag, TagExt};
use crate::util::text::{decode_text, TextEncoding};
use crate::util::text::{decode_text, TextDecodeOptions, TextEncoding};
use std::borrow::Cow;
use std::convert::TryInto;
@ -1084,9 +1084,10 @@ impl SplitTag for Id3v2Tag {
) => {
if owner == MUSICBRAINZ_UFID_OWNER {
let mut identifier = Cursor::new(identifier);
let Ok(recording_id) =
decode_text(&mut identifier, TextEncoding::Latin1, false)
else {
let Ok(recording_id) = decode_text(
&mut identifier,
TextDecodeOptions::new().encoding(TextEncoding::Latin1),
) else {
return true; // Keep frame
};
tag.items.push(TagItem::new(

View file

@ -60,20 +60,54 @@ const EMPTY_DECODED_TEXT: DecodeTextResult = DecodeTextResult {
bom: [0, 0],
};
pub(crate) fn decode_text<R>(
reader: &mut R,
encoding: TextEncoding,
terminated: bool,
) -> Result<DecodeTextResult>
#[derive(Copy, Clone, Debug)]
pub(crate) struct TextDecodeOptions {
pub encoding: TextEncoding,
pub terminated: bool,
pub bom: [u8; 2],
}
impl TextDecodeOptions {
pub(crate) fn new() -> Self {
Self::default()
}
pub(crate) fn encoding(mut self, encoding: TextEncoding) -> Self {
self.encoding = encoding;
self
}
pub(crate) fn terminated(mut self, terminated: bool) -> Self {
self.terminated = terminated;
self
}
pub(crate) fn bom(mut self, bom: [u8; 2]) -> Self {
self.bom = bom;
self
}
}
impl Default for TextDecodeOptions {
fn default() -> Self {
Self {
encoding: TextEncoding::UTF8,
terminated: false,
bom: [0, 0],
}
}
}
pub(crate) fn decode_text<R>(reader: &mut R, options: TextDecodeOptions) -> Result<DecodeTextResult>
where
R: Read,
{
let raw_bytes;
let bytes_read;
if terminated {
if let Some(bytes) = read_to_terminator(reader, encoding) {
let null_terminator_length = match encoding {
if options.terminated {
if let Some(bytes) = read_to_terminator(reader, options.encoding) {
let null_terminator_length = match options.encoding {
TextEncoding::Latin1 | TextEncoding::UTF8 => 1,
TextEncoding::UTF16 | TextEncoding::UTF16BE => 2,
};
@ -96,7 +130,7 @@ where
}
let mut bom = [0, 0];
let read_string = match encoding {
let read_string = match options.encoding {
TextEncoding::Latin1 => latin1_decode(&raw_bytes),
TextEncoding::UTF16 => {
if raw_bytes.len() < 2 {
@ -107,12 +141,19 @@ where
err!(TextDecode("UTF-16 string has an odd length"));
}
match (raw_bytes[0], raw_bytes[1]) {
(0xFE, 0xFF) => {
let bom_to_check;
if options.bom == [0, 0] {
bom_to_check = [raw_bytes[0], raw_bytes[1]];
} else {
bom_to_check = options.bom;
}
match bom_to_check {
[0xFE, 0xFF] => {
bom = [0xFE, 0xFF];
utf16_decode_bytes(&raw_bytes[2..], u16::from_be_bytes)?
},
(0xFF, 0xFE) => {
[0xFF, 0xFE] => {
bom = [0xFF, 0xFE];
utf16_decode_bytes(&raw_bytes[2..], u16::from_le_bytes)?
},
@ -280,7 +321,7 @@ fn utf16_encode(
#[cfg(test)]
mod tests {
use crate::util::text::TextEncoding;
use crate::util::text::{TextDecodeOptions, TextEncoding};
use std::io::Cursor;
const TEST_STRING: &str = "l\u{00f8}ft\u{00a5}";
@ -303,16 +344,14 @@ mod tests {
&mut Cursor::new(&[
0xFE, 0xFF, 0x00, 0x6C, 0x00, 0xF8, 0x00, 0x66, 0x00, 0x74, 0x00, 0xA5, 0x00, 0x00,
]),
TextEncoding::UTF16,
false,
TextDecodeOptions::new().encoding(TextEncoding::UTF16),
)
.unwrap();
let le_utf16_decode = super::decode_text(
&mut Cursor::new(&[
0xFF, 0xFE, 0x6C, 0x00, 0xF8, 0x00, 0x66, 0x00, 0x74, 0x00, 0xA5, 0x00, 0x00, 0x00,
]),
TextEncoding::UTF16,
false,
TextDecodeOptions::new().encoding(TextEncoding::UTF16),
)
.unwrap();
@ -320,8 +359,11 @@ mod tests {
assert_eq!(be_utf16_decode.bytes_read, le_utf16_decode.bytes_read);
assert_eq!(be_utf16_decode.content, TEST_STRING.to_string());
let utf8_decode =
super::decode_text(&mut TEST_STRING.as_bytes(), TextEncoding::UTF8, false).unwrap();
let utf8_decode = super::decode_text(
&mut TEST_STRING.as_bytes(),
TextDecodeOptions::new().encoding(TextEncoding::UTF8),
)
.unwrap();
assert_eq!(utf8_decode.content, TEST_STRING.to_string());
}