mirror of
https://github.com/Serial-ATA/lofty-rs
synced 2025-03-04 14:57:17 +00:00
ID3v2: Properly handle multi-value UTF-16 encoded frames
This commit is contained in:
parent
9985a55e02
commit
23c334e1de
3 changed files with 16 additions and 12 deletions
|
@ -21,7 +21,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
parse it as another atom definition. As the specification is broad, there is no way for us to say *with certainty*
|
||||
that an identifier is invalid. Now we unfortunately have to guess the validity based on the commonly known atoms.
|
||||
For this, we follow [TagLib]'s [checks](https://github.com/taglib/taglib/blob/b40b834b1bdbd74593c5619e969e793d4d4886d9/taglib/mp4/mp4atom.cpp#L89).
|
||||
- **ID3v2**: No longer error on inputs shorter than 128 bytes (the length of an ID3v1 tag). ([PR](https://github.com/Serial-ATA/lofty-rs/pull/270))
|
||||
- **ID3v1**: No longer error on inputs shorter than 128 bytes (the length of an ID3v1 tag). ([PR](https://github.com/Serial-ATA/lofty-rs/pull/270))
|
||||
- **ID3v2**: No longer error on multi-value UTF-16 encoded text frames ([issue](https://github.com/Serial-ATA/lofty-rs/issues/265)) ([PR](https://github.com/Serial-ATA/lofty-rs/pull/284))
|
||||
|
||||
### Removed
|
||||
- **MP4**: `Ilst::{track_total, disc_number, disc_total}` ([PR](https://github.com/Serial-ATA/lofty-rs/pull/269))
|
||||
|
|
|
@ -1,9 +1,7 @@
|
|||
use crate::error::{Id3v2Error, Id3v2ErrorKind, LoftyError, Result};
|
||||
use crate::id3::v2::frame::content::verify_encoding;
|
||||
use crate::id3::v2::header::Id3v2Version;
|
||||
use crate::util::text::{
|
||||
decode_text, encode_text, read_to_terminator, utf16_decode_bytes, TextEncoding,
|
||||
};
|
||||
use crate::util::text::{decode_text, encode_text, utf16_decode_bytes, TextEncoding};
|
||||
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::io::Read;
|
||||
|
@ -74,17 +72,19 @@ impl ExtendedTextFrame {
|
|||
|
||||
// It's possible for the description to be the only string with a BOM
|
||||
'utf16: {
|
||||
let bom = description.bom;
|
||||
let Some(raw_text) = read_to_terminator(reader, TextEncoding::UTF16) else {
|
||||
let mut raw_text = Vec::new();
|
||||
reader.read_to_end(&mut raw_text)?;
|
||||
|
||||
if raw_text.is_empty() {
|
||||
// Nothing left to do
|
||||
frame_content = String::new();
|
||||
break 'utf16;
|
||||
};
|
||||
}
|
||||
|
||||
let mut bom = description.bom;
|
||||
if raw_text.starts_with(&[0xFF, 0xFE]) || raw_text.starts_with(&[0xFE, 0xFF]) {
|
||||
frame_content =
|
||||
decode_text(&mut &raw_text[..], TextEncoding::UTF16, false)?.content;
|
||||
break 'utf16;
|
||||
// The text specifies a BOM
|
||||
bom = [raw_text[0], raw_text[1]];
|
||||
}
|
||||
|
||||
let endianness = match bom {
|
||||
|
|
|
@ -207,8 +207,11 @@ pub(crate) fn utf16_decode_bytes(bytes: &[u8], endianness: fn([u8; 2]) -> u16) -
|
|||
|
||||
let unverified: Vec<u16> = bytes
|
||||
.chunks_exact(2)
|
||||
.map_while(|c| match c {
|
||||
[0, 0] => None,
|
||||
// In ID3v2, it is possible to have multiple UTF-16 strings separated by null.
|
||||
// This also makes it possible for us to encounter multiple BOMs in a single string.
|
||||
// We must filter them out.
|
||||
.filter_map(|c| match c {
|
||||
[0xFF, 0xFE] | [0xFE, 0xFF] => None,
|
||||
_ => Some(endianness(c.try_into().unwrap())), // Infallible
|
||||
})
|
||||
.collect();
|
||||
|
|
Loading…
Add table
Reference in a new issue