RIFF INFO: Ignore invalid text encodings

This commit is contained in:
Serial 2024-11-20 01:38:06 -05:00 committed by Alex
parent 0578ee4dfd
commit 3d291d1739
4 changed files with 40 additions and 10 deletions

View file

@ -34,6 +34,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `gnre` present + `©gen` present, `©gen` takes precedence and `gnre` is discarded
- With [ParsingOptions::implicit_conversions](https://docs.rs/lofty/latest/lofty/config/struct.ParseOptions.html#method.implicit_conversions)
set to `false`, `gnre` will be retained as an atom of type `Unknown`.
- **RIFF INFO**: Ignore text decoding errors when not using `ParsingMode::Strict` ([issue](https://github.com/Serial-ATA/lofty-rs/issues/373))
- RIFF INFO tags may be encoded with a non UTF-8 system encoding, that we have no way of knowing. It's no longer an error to read these files,
it's just unlikely that anything useful come out of the RIFF INFO tags.
### Fixed
- **MusePack**: Fix potential panic when the beginning silence makes up the entire sample count ([PR](https://github.com/Serial-ATA/lofty-rs/pull/449))

View file

@ -93,7 +93,13 @@ where
err!(SizeMismatch);
}
super::tag::read::parse_riff_info(data, &mut chunks, end, &mut riff_info)?;
super::tag::read::parse_riff_info(
data,
&mut chunks,
end,
&mut riff_info,
parse_options.parsing_mode,
)?;
},
_ => {
data.seek(SeekFrom::Current(-4))?;

View file

@ -354,7 +354,7 @@ pub(crate) fn tagitems_into_riff<'a>(
#[cfg(test)]
mod tests {
use crate::config::WriteOptions;
use crate::config::{ParsingMode, WriteOptions};
use crate::iff::chunk::Chunks;
use crate::iff::wav::RiffInfoList;
use crate::prelude::*;
@ -383,6 +383,7 @@ mod tests {
&mut Chunks::<LittleEndian>::new(tag.len() as u64),
(tag.len() - 1) as u64,
&mut parsed_tag,
ParsingMode::Strict,
)
.unwrap();
@ -399,6 +400,7 @@ mod tests {
&mut Chunks::<LittleEndian>::new(tag.len() as u64),
(tag.len() - 1) as u64,
&mut parsed_tag,
ParsingMode::Strict,
)
.unwrap();
@ -415,6 +417,7 @@ mod tests {
&mut Chunks::<LittleEndian>::new(tag.len() as u64),
(tag.len() - 13) as u64,
&mut temp_parsed_tag,
ParsingMode::Strict,
)
.unwrap();
@ -433,6 +436,7 @@ mod tests {
&mut Chunks::<LittleEndian>::new(tag_bytes.len() as u64),
(tag_bytes.len() - 1) as u64,
&mut riff_info,
ParsingMode::Strict,
)
.unwrap();

View file

@ -1,5 +1,6 @@
use super::RiffInfoList;
use crate::error::Result;
use crate::config::ParsingMode;
use crate::error::{ErrorKind, Result};
use crate::iff::chunk::Chunks;
use crate::macros::decode_err;
use crate::util::text::utf8_decode_str;
@ -13,24 +14,40 @@ pub(in crate::iff::wav) fn parse_riff_info<R>(
chunks: &mut Chunks<LittleEndian>,
end: u64,
tag: &mut RiffInfoList,
parse_mode: ParsingMode,
) -> Result<()>
where
R: Read + Seek,
{
while data.stream_position()? != end && chunks.next(data).is_ok() {
let key_str = utf8_decode_str(&chunks.fourcc)
.map_err(|_| decode_err!(Wav, "Non UTF-8 item key found in RIFF INFO"))?;
.map_err(|_| decode_err!(Wav, "Invalid item key found in RIFF INFO"))?;
if !verify_key(key_str) {
decode_err!(@BAIL Wav, "RIFF INFO item key contains invalid characters");
}
tag.items.push((
key_str.to_owned(),
chunks
.read_cstring(data)
.map_err(|_| decode_err!(Wav, "Failed to read RIFF INFO item value"))?,
));
let key = key_str.to_owned();
let value;
match chunks.read_cstring(data) {
Ok(cstr) => value = cstr,
Err(e) => {
if parse_mode == ParsingMode::Strict {
decode_err!(@BAIL Wav, "Failed to read RIFF INFO item value")
}
// RIFF INFO tags have no standard text encoding, so they will occasionally default
// to the system encoding, which isn't always UTF-8. In reality, if one item fails
// they likely all will, but we'll keep trying.
if matches!(e.kind(), ErrorKind::StringFromUtf8(_)) {
continue;
}
return Err(e);
},
}
tag.items.push((key, value));
}
Ok(())