mirror of
https://github.com/Serial-ATA/lofty-rs
synced 2024-12-13 06:02:32 +00:00
ID3v2: Support decoding UTF-16 T/WXXX frames with missing content BOM
closes #53
This commit is contained in:
parent
1d5d397dfa
commit
efff0eb306
5 changed files with 47 additions and 7 deletions
|
@ -37,6 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
- **AIFF**: Fixed division by zero panic during property reading ([issue](https://github.com/Serial-ATA/lofty-rs/issues/56))
|
- **AIFF**: Fixed division by zero panic during property reading ([issue](https://github.com/Serial-ATA/lofty-rs/issues/56))
|
||||||
|
- **ID3v2**: Support decoding UTF-16 T/WXXX frames with missing content BOM ([issue](https://github.com/Serial-ATA/lofty-rs/issues/53))
|
||||||
|
|
||||||
## [0.6.3] - 2022-05-18
|
## [0.6.3] - 2022-05-18
|
||||||
|
|
||||||
|
|
|
@ -2,13 +2,15 @@ use crate::error::{ErrorKind, Id3v2Error, Id3v2ErrorKind, LoftyError, Result};
|
||||||
use crate::id3::v2::frame::FrameValue;
|
use crate::id3::v2::frame::FrameValue;
|
||||||
use crate::id3::v2::items::encoded_text_frame::EncodedTextFrame;
|
use crate::id3::v2::items::encoded_text_frame::EncodedTextFrame;
|
||||||
use crate::id3::v2::items::language_frame::LanguageFrame;
|
use crate::id3::v2::items::language_frame::LanguageFrame;
|
||||||
use crate::id3::v2::util::text_utils::{decode_text, TextEncoding};
|
use crate::id3::v2::items::popularimeter::Popularimeter;
|
||||||
|
use crate::id3::v2::util::text_utils::{
|
||||||
|
decode_text, read_to_terminator, utf16_decode, TextEncoding,
|
||||||
|
};
|
||||||
use crate::id3::v2::Id3v2Version;
|
use crate::id3::v2::Id3v2Version;
|
||||||
use crate::picture::Picture;
|
use crate::picture::Picture;
|
||||||
|
|
||||||
use std::io::Read;
|
use std::io::{Cursor, Read};
|
||||||
|
|
||||||
use crate::id3::v2::items::popularimeter::Popularimeter;
|
|
||||||
use byteorder::ReadBytesExt;
|
use byteorder::ReadBytesExt;
|
||||||
|
|
||||||
pub(super) fn parse_content(
|
pub(super) fn parse_content(
|
||||||
|
@ -39,7 +41,7 @@ pub(super) fn parse_content(
|
||||||
|
|
||||||
// There are 2 possibilities for the frame's content: text or link.
|
// There are 2 possibilities for the frame's content: text or link.
|
||||||
fn parse_user_defined(
|
fn parse_user_defined(
|
||||||
content: &mut &[u8],
|
mut content: &mut &[u8],
|
||||||
link: bool,
|
link: bool,
|
||||||
version: Id3v2Version,
|
version: Id3v2Version,
|
||||||
) -> Result<Option<FrameValue>> {
|
) -> Result<Option<FrameValue>> {
|
||||||
|
@ -49,6 +51,22 @@ fn parse_user_defined(
|
||||||
|
|
||||||
let encoding = verify_encoding(content.read_u8()?, version)?;
|
let encoding = verify_encoding(content.read_u8()?, version)?;
|
||||||
|
|
||||||
|
let mut endianness: fn([u8; 2]) -> u16 = u16::from_le_bytes;
|
||||||
|
if encoding == TextEncoding::UTF16 {
|
||||||
|
let mut cursor = Cursor::new(content);
|
||||||
|
let mut bom = [0; 2];
|
||||||
|
cursor.read_exact(&mut bom)?;
|
||||||
|
|
||||||
|
match [bom[0], bom[1]] {
|
||||||
|
[0xFF, 0xFE] => endianness = u16::from_le_bytes,
|
||||||
|
[0xFE, 0xFF] => endianness = u16::from_be_bytes,
|
||||||
|
// We'll catch an invalid BOM below
|
||||||
|
_ => {},
|
||||||
|
};
|
||||||
|
|
||||||
|
content = cursor.into_inner();
|
||||||
|
}
|
||||||
|
|
||||||
let description = decode_text(content, encoding, true)?.unwrap_or_default();
|
let description = decode_text(content, encoding, true)?.unwrap_or_default();
|
||||||
|
|
||||||
Ok(Some(if link {
|
Ok(Some(if link {
|
||||||
|
@ -60,12 +78,28 @@ fn parse_user_defined(
|
||||||
content,
|
content,
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
let content = decode_text(content, encoding, false)?.unwrap_or_default();
|
let frame_content;
|
||||||
|
// It's possible for the description to be the only string with a BOM
|
||||||
|
if encoding == TextEncoding::UTF16 {
|
||||||
|
if content.len() >= 2 && (content[..2] == [0xFF, 0xFE] || content[..2] == [0xFE, 0xFF])
|
||||||
|
{
|
||||||
|
frame_content = decode_text(content, encoding, false)?.unwrap_or_default();
|
||||||
|
} else {
|
||||||
|
frame_content = match read_to_terminator(content, TextEncoding::UTF16) {
|
||||||
|
Some(raw_text) => utf16_decode(&*raw_text, endianness).map_err(|_| {
|
||||||
|
Into::<LoftyError>::into(Id3v2Error::new(Id3v2ErrorKind::BadSyncText))
|
||||||
|
})?,
|
||||||
|
None => String::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
frame_content = decode_text(content, encoding, false)?.unwrap_or_default();
|
||||||
|
}
|
||||||
|
|
||||||
FrameValue::UserText(EncodedTextFrame {
|
FrameValue::UserText(EncodedTextFrame {
|
||||||
encoding,
|
encoding,
|
||||||
description,
|
description,
|
||||||
content,
|
content: frame_content,
|
||||||
})
|
})
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
|
@ -140,7 +140,7 @@ impl SynchronizedText {
|
||||||
.map_err(|_| Id3v2Error::new(Id3v2ErrorKind::BadSyncText))?;
|
.map_err(|_| Id3v2Error::new(Id3v2ErrorKind::BadSyncText))?;
|
||||||
|
|
||||||
// Encountered text that doesn't include a BOM
|
// Encountered text that doesn't include a BOM
|
||||||
if bom != [0xFF, 0xFE] || bom != [0xFE, 0xFF] {
|
if bom != [0xFF, 0xFE] && bom != [0xFE, 0xFF] {
|
||||||
cursor.seek(SeekFrom::Current(-2))?;
|
cursor.seek(SeekFrom::Current(-2))?;
|
||||||
|
|
||||||
if let Some(raw_text) = read_to_terminator(&mut cursor, TextEncoding::UTF16)
|
if let Some(raw_text) = read_to_terminator(&mut cursor, TextEncoding::UTF16)
|
||||||
|
|
|
@ -1059,4 +1059,9 @@ mod tests {
|
||||||
let tag: Id3v2Tag = tag.into();
|
let tag: Id3v2Tag = tag.into();
|
||||||
assert_eq!(tag.artist(), Some("foo/bar/baz"))
|
assert_eq!(tag.artist(), Some("foo/bar/baz"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn utf16_txxx_with_single_bom() {
|
||||||
|
let _ = read_tag("tests/tags/assets/id3v2/issue_53.id3v24");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
BIN
tests/tags/assets/id3v2/issue_53.id3v24
Normal file
BIN
tests/tags/assets/id3v2/issue_53.id3v24
Normal file
Binary file not shown.
Loading…
Reference in a new issue