mirror of
https://github.com/Serial-ATA/lofty-rs
synced 2024-12-13 14:12:31 +00:00
ID3v2: Support decoding UTF-16 T/WXXX frames with missing content BOM
closes #53
This commit is contained in:
parent
1d5d397dfa
commit
efff0eb306
5 changed files with 47 additions and 7 deletions
|
@ -37,6 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
|
||||
### Fixed
|
||||
- **AIFF**: Fixed division by zero panic during property reading ([issue](https://github.com/Serial-ATA/lofty-rs/issues/56))
|
||||
- **ID3v2**: Support decoding UTF-16 T/WXXX frames with missing content BOM ([issue](https://github.com/Serial-ATA/lofty-rs/issues/53))
|
||||
|
||||
## [0.6.3] - 2022-05-18
|
||||
|
||||
|
|
|
@ -2,13 +2,15 @@ use crate::error::{ErrorKind, Id3v2Error, Id3v2ErrorKind, LoftyError, Result};
|
|||
use crate::id3::v2::frame::FrameValue;
|
||||
use crate::id3::v2::items::encoded_text_frame::EncodedTextFrame;
|
||||
use crate::id3::v2::items::language_frame::LanguageFrame;
|
||||
use crate::id3::v2::util::text_utils::{decode_text, TextEncoding};
|
||||
use crate::id3::v2::items::popularimeter::Popularimeter;
|
||||
use crate::id3::v2::util::text_utils::{
|
||||
decode_text, read_to_terminator, utf16_decode, TextEncoding,
|
||||
};
|
||||
use crate::id3::v2::Id3v2Version;
|
||||
use crate::picture::Picture;
|
||||
|
||||
use std::io::Read;
|
||||
use std::io::{Cursor, Read};
|
||||
|
||||
use crate::id3::v2::items::popularimeter::Popularimeter;
|
||||
use byteorder::ReadBytesExt;
|
||||
|
||||
pub(super) fn parse_content(
|
||||
|
@ -39,7 +41,7 @@ pub(super) fn parse_content(
|
|||
|
||||
// There are 2 possibilities for the frame's content: text or link.
|
||||
fn parse_user_defined(
|
||||
content: &mut &[u8],
|
||||
mut content: &mut &[u8],
|
||||
link: bool,
|
||||
version: Id3v2Version,
|
||||
) -> Result<Option<FrameValue>> {
|
||||
|
@ -49,6 +51,22 @@ fn parse_user_defined(
|
|||
|
||||
let encoding = verify_encoding(content.read_u8()?, version)?;
|
||||
|
||||
let mut endianness: fn([u8; 2]) -> u16 = u16::from_le_bytes;
|
||||
if encoding == TextEncoding::UTF16 {
|
||||
let mut cursor = Cursor::new(content);
|
||||
let mut bom = [0; 2];
|
||||
cursor.read_exact(&mut bom)?;
|
||||
|
||||
match [bom[0], bom[1]] {
|
||||
[0xFF, 0xFE] => endianness = u16::from_le_bytes,
|
||||
[0xFE, 0xFF] => endianness = u16::from_be_bytes,
|
||||
// We'll catch an invalid BOM below
|
||||
_ => {},
|
||||
};
|
||||
|
||||
content = cursor.into_inner();
|
||||
}
|
||||
|
||||
let description = decode_text(content, encoding, true)?.unwrap_or_default();
|
||||
|
||||
Ok(Some(if link {
|
||||
|
@ -60,12 +78,28 @@ fn parse_user_defined(
|
|||
content,
|
||||
})
|
||||
} else {
|
||||
let content = decode_text(content, encoding, false)?.unwrap_or_default();
|
||||
let frame_content;
|
||||
// It's possible for the description to be the only string with a BOM
|
||||
if encoding == TextEncoding::UTF16 {
|
||||
if content.len() >= 2 && (content[..2] == [0xFF, 0xFE] || content[..2] == [0xFE, 0xFF])
|
||||
{
|
||||
frame_content = decode_text(content, encoding, false)?.unwrap_or_default();
|
||||
} else {
|
||||
frame_content = match read_to_terminator(content, TextEncoding::UTF16) {
|
||||
Some(raw_text) => utf16_decode(&*raw_text, endianness).map_err(|_| {
|
||||
Into::<LoftyError>::into(Id3v2Error::new(Id3v2ErrorKind::BadSyncText))
|
||||
})?,
|
||||
None => String::new(),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
frame_content = decode_text(content, encoding, false)?.unwrap_or_default();
|
||||
}
|
||||
|
||||
FrameValue::UserText(EncodedTextFrame {
|
||||
encoding,
|
||||
description,
|
||||
content,
|
||||
content: frame_content,
|
||||
})
|
||||
}))
|
||||
}
|
||||
|
|
|
@ -140,7 +140,7 @@ impl SynchronizedText {
|
|||
.map_err(|_| Id3v2Error::new(Id3v2ErrorKind::BadSyncText))?;
|
||||
|
||||
// Encountered text that doesn't include a BOM
|
||||
if bom != [0xFF, 0xFE] || bom != [0xFE, 0xFF] {
|
||||
if bom != [0xFF, 0xFE] && bom != [0xFE, 0xFF] {
|
||||
cursor.seek(SeekFrom::Current(-2))?;
|
||||
|
||||
if let Some(raw_text) = read_to_terminator(&mut cursor, TextEncoding::UTF16)
|
||||
|
|
|
@ -1059,4 +1059,9 @@ mod tests {
|
|||
let tag: Id3v2Tag = tag.into();
|
||||
assert_eq!(tag.artist(), Some("foo/bar/baz"))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn utf16_txxx_with_single_bom() {
|
||||
let _ = read_tag("tests/tags/assets/id3v2/issue_53.id3v24");
|
||||
}
|
||||
}
|
||||
|
|
BIN
tests/tags/assets/id3v2/issue_53.id3v24
Normal file
BIN
tests/tags/assets/id3v2/issue_53.id3v24
Normal file
Binary file not shown.
Loading…
Reference in a new issue