ID3v2: Properly handle multi-value UTF-16 encoded frames

This commit is contained in:
Serial 2023-10-26 13:50:44 -04:00 committed by Alex
parent 9985a55e02
commit 23c334e1de
3 changed files with 16 additions and 12 deletions

View file

@ -21,7 +21,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
parse it as another atom definition. As the specification is broad, there is no way for us to say *with certainty*
that an identifier is invalid. Now we unfortunately have to guess the validity based on the commonly known atoms.
For this, we follow [TagLib]'s [checks](https://github.com/taglib/taglib/blob/b40b834b1bdbd74593c5619e969e793d4d4886d9/taglib/mp4/mp4atom.cpp#L89).
- **ID3v2**: No longer error on inputs shorter than 128 bytes (the length of an ID3v1 tag). ([PR](https://github.com/Serial-ATA/lofty-rs/pull/270))
- **ID3v1**: No longer error on inputs shorter than 128 bytes (the length of an ID3v1 tag). ([PR](https://github.com/Serial-ATA/lofty-rs/pull/270))
- **ID3v2**: No longer error on multi-value UTF-16 encoded text frames ([issue](https://github.com/Serial-ATA/lofty-rs/issues/265)) ([PR](https://github.com/Serial-ATA/lofty-rs/pull/284))
### Removed
- **MP4**: `Ilst::{track_total, disc_number, disc_total}` ([PR](https://github.com/Serial-ATA/lofty-rs/pull/269))

View file

@ -1,9 +1,7 @@
use crate::error::{Id3v2Error, Id3v2ErrorKind, LoftyError, Result};
use crate::id3::v2::frame::content::verify_encoding;
use crate::id3::v2::header::Id3v2Version;
use crate::util::text::{
decode_text, encode_text, read_to_terminator, utf16_decode_bytes, TextEncoding,
};
use crate::util::text::{decode_text, encode_text, utf16_decode_bytes, TextEncoding};
use std::hash::{Hash, Hasher};
use std::io::Read;
@ -74,17 +72,19 @@ impl ExtendedTextFrame {
// It's possible for the description to be the only string with a BOM
'utf16: {
let bom = description.bom;
let Some(raw_text) = read_to_terminator(reader, TextEncoding::UTF16) else {
let mut raw_text = Vec::new();
reader.read_to_end(&mut raw_text)?;
if raw_text.is_empty() {
// Nothing left to do
frame_content = String::new();
break 'utf16;
};
}
let mut bom = description.bom;
if raw_text.starts_with(&[0xFF, 0xFE]) || raw_text.starts_with(&[0xFE, 0xFF]) {
frame_content =
decode_text(&mut &raw_text[..], TextEncoding::UTF16, false)?.content;
break 'utf16;
// The text specifies a BOM
bom = [raw_text[0], raw_text[1]];
}
let endianness = match bom {

View file

@ -207,8 +207,11 @@ pub(crate) fn utf16_decode_bytes(bytes: &[u8], endianness: fn([u8; 2]) -> u16) -
let unverified: Vec<u16> = bytes
.chunks_exact(2)
.map_while(|c| match c {
[0, 0] => None,
// In ID3v2, it is possible to have multiple UTF-16 strings separated by null.
// This also makes it possible for us to encounter multiple BOMs in a single string.
// We must filter them out.
.filter_map(|c| match c {
[0xFF, 0xFE] | [0xFE, 0xFF] => None,
_ => Some(endianness(c.try_into().unwrap())), // Infallible
})
.collect();