RIFF INFO: Ignore invalid text encodings

2024-12-04 18:09:11 +00:00 · 2024-11-20 01:38:06 -05:00 · 2024-11-20 01:38:06 -05:00 · 3d291d1739
commit 3d291d1739
parent 0578ee4dfd
4 changed files with 40 additions and 10 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -34,6 +34,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
    - `gnre` present + `©gen` present, `©gen` takes precedence and `gnre` is discarded
      - With [ParsingOptions::implicit_conversions](https://docs.rs/lofty/latest/lofty/config/struct.ParseOptions.html#method.implicit_conversions)
        set to `false`, `gnre` will be retained as an atom of type `Unknown`.
+- **RIFF INFO**: Ignore text decoding errors when not using `ParsingMode::Strict` ([issue](https://github.com/Serial-ATA/lofty-rs/issues/373))
+  - RIFF INFO tags may be encoded with a non UTF-8 system encoding, that we have no way of knowing. It's no longer an error to read these files,
+    it's just unlikely that anything useful come out of the RIFF INFO tags.

 ### Fixed
 - **MusePack**: Fix potential panic when the beginning silence makes up the entire sample count ([PR](https://github.com/Serial-ATA/lofty-rs/pull/449))
--- a/lofty/src/iff/wav/read.rs
+++ b/lofty/src/iff/wav/read.rs
@ -93,7 +93,13 @@ where
 							err!(SizeMismatch);
 						}

-						super::tag::read::parse_riff_info(data, &mut chunks, end, &mut riff_info)?;
+						super::tag::read::parse_riff_info(
+							data,
+							&mut chunks,
+							end,
+							&mut riff_info,
+							parse_options.parsing_mode,
+						)?;
 					},
 					_ => {
 						data.seek(SeekFrom::Current(-4))?;
--- a/lofty/src/iff/wav/tag/mod.rs
+++ b/lofty/src/iff/wav/tag/mod.rs
@ -354,7 +354,7 @@ pub(crate) fn tagitems_into_riff<'a>(

 #[cfg(test)]
 mod tests {
-	use crate::config::WriteOptions;
+	use crate::config::{ParsingMode, WriteOptions};
 	use crate::iff::chunk::Chunks;
 	use crate::iff::wav::RiffInfoList;
 	use crate::prelude::*;
@ -383,6 +383,7 @@ mod tests {
 			&mut Chunks::<LittleEndian>::new(tag.len() as u64),
 			(tag.len() - 1) as u64,
 			&mut parsed_tag,
+			ParsingMode::Strict,
 		)
 		.unwrap();

@ -399,6 +400,7 @@ mod tests {
 			&mut Chunks::<LittleEndian>::new(tag.len() as u64),
 			(tag.len() - 1) as u64,
 			&mut parsed_tag,
+			ParsingMode::Strict,
 		)
 		.unwrap();

@ -415,6 +417,7 @@ mod tests {
 			&mut Chunks::<LittleEndian>::new(tag.len() as u64),
 			(tag.len() - 13) as u64,
 			&mut temp_parsed_tag,
+			ParsingMode::Strict,
 		)
 		.unwrap();

@ -433,6 +436,7 @@ mod tests {
 			&mut Chunks::<LittleEndian>::new(tag_bytes.len() as u64),
 			(tag_bytes.len() - 1) as u64,
 			&mut riff_info,
+			ParsingMode::Strict,
 		)
 		.unwrap();

--- a/lofty/src/iff/wav/tag/read.rs
+++ b/lofty/src/iff/wav/tag/read.rs
@ -1,5 +1,6 @@
 use super::RiffInfoList;
-use crate::error::Result;
+use crate::config::ParsingMode;
+use crate::error::{ErrorKind, Result};
 use crate::iff::chunk::Chunks;
 use crate::macros::decode_err;
 use crate::util::text::utf8_decode_str;
@ -13,24 +14,40 @@ pub(in crate::iff::wav) fn parse_riff_info<R>(
 	chunks: &mut Chunks<LittleEndian>,
 	end: u64,
 	tag: &mut RiffInfoList,
+	parse_mode: ParsingMode,
 ) -> Result<()>
 where
 	R: Read + Seek,
 {
 	while data.stream_position()? != end && chunks.next(data).is_ok() {
 		let key_str = utf8_decode_str(&chunks.fourcc)
-			.map_err(|_| decode_err!(Wav, "Non UTF-8 item key found in RIFF INFO"))?;
+			.map_err(|_| decode_err!(Wav, "Invalid item key found in RIFF INFO"))?;

 		if !verify_key(key_str) {
 			decode_err!(@BAIL Wav, "RIFF INFO item key contains invalid characters");
 		}

-		tag.items.push((
-			key_str.to_owned(),
-			chunks
-				.read_cstring(data)
-				.map_err(|_| decode_err!(Wav, "Failed to read RIFF INFO item value"))?,
-		));
+		let key = key_str.to_owned();
+		let value;
+		match chunks.read_cstring(data) {
+			Ok(cstr) => value = cstr,
+			Err(e) => {
+				if parse_mode == ParsingMode::Strict {
+					decode_err!(@BAIL Wav, "Failed to read RIFF INFO item value")
+				}
+
+				// RIFF INFO tags have no standard text encoding, so they will occasionally default
+				// to the system encoding, which isn't always UTF-8. In reality, if one item fails
+				// they likely all will, but we'll keep trying.
+				if matches!(e.kind(), ErrorKind::StringFromUtf8(_)) {
+					continue;
+				}
+
+				return Err(e);
+			},
+		}
+
+		tag.items.push((key, value));
 	}

 	Ok(())