ID3: Search through junk in find_id3v2()

This commit is contained in:
Serial 2024-03-28 12:29:35 -04:00 committed by Alex
parent 87028cfa18
commit a54861b02c
7 changed files with 119 additions and 81 deletions

View file

@ -6,7 +6,7 @@ use crate::error::Result;
use crate::id3::v1::tag::Id3v1Tag;
use crate::id3::v2::read::parse_id3v2;
use crate::id3::v2::tag::Id3v2Tag;
use crate::id3::{find_id3v1, find_id3v2, find_lyrics3v2, ID3FindResults};
use crate::id3::{find_id3v1, find_id3v2, find_lyrics3v2, FindId3v2Config, ID3FindResults};
use crate::macros::decode_err;
use crate::probe::ParseOptions;
@ -28,7 +28,9 @@ where
let mut ape_tag: Option<ApeTag> = None;
// ID3v2 tags are unsupported in APE files, but still possible
if let ID3FindResults(Some(header), Some(content)) = find_id3v2(data, true)? {
if let ID3FindResults(Some(header), Some(content)) =
find_id3v2(data, FindId3v2Config::READ_TAG)?
{
log::warn!("Encountered an ID3v2 tag. This tag cannot be rewritten to the APE file!");
stream_len -= u64::from(header.size);

View file

@ -3,7 +3,7 @@ use super::ApeTagRef;
use crate::ape::constants::APE_PREAMBLE;
use crate::ape::tag::read;
use crate::error::Result;
use crate::id3::{find_id3v1, find_id3v2, find_lyrics3v2};
use crate::id3::{find_id3v1, find_id3v2, find_lyrics3v2, FindId3v2Config};
use crate::macros::{decode_err, err};
use crate::probe::Probe;
use crate::tag::item::ItemValueRef;
@ -28,7 +28,7 @@ where
let data = probe.into_inner();
// We don't actually need the ID3v2 tag, but reading it will seek to the end of it if it exists
find_id3v2(data, false)?;
find_id3v2(data, FindId3v2Config::NO_READ_TAG)?;
let mut ape_preamble = [0; 8];
data.read_exact(&mut ape_preamble)?;

View file

@ -7,7 +7,7 @@ use crate::flac::block::{
BLOCK_ID_VORBIS_COMMENTS,
};
use crate::id3::v2::read::parse_id3v2;
use crate::id3::{find_id3v2, ID3FindResults};
use crate::id3::{find_id3v2, FindId3v2Config, ID3FindResults};
use crate::macros::decode_err;
use crate::ogg::read::read_comments;
use crate::picture::Picture;
@ -48,7 +48,9 @@ where
};
// It is possible for a FLAC file to contain an ID3v2 tag
if let ID3FindResults(Some(header), Some(content)) = find_id3v2(data, true)? {
if let ID3FindResults(Some(header), Some(content)) =
find_id3v2(data, FindId3v2Config::READ_TAG)?
{
log::warn!("Encountered an ID3v2 tag. This tag cannot be rewritten to the FLAC file!");
let reader = &mut &*content;

View file

@ -96,22 +96,58 @@ where
Ok(ID3FindResults(header, id3v1))
}
#[derive(Copy, Clone, Debug)]
pub(crate) struct FindId3v2Config {
pub(crate) read: bool,
pub(crate) allowed_junk_window: Option<u64>,
}
impl FindId3v2Config {
pub(crate) const NO_READ_TAG: Self = Self {
read: false,
allowed_junk_window: None,
};
pub(crate) const READ_TAG: Self = Self {
read: true,
allowed_junk_window: None,
};
}
pub(crate) fn find_id3v2<R>(
data: &mut R,
read: bool,
config: FindId3v2Config,
) -> Result<ID3FindResults<Id3v2Header, Option<Vec<u8>>>>
where
R: Read + Seek,
{
log::debug!("Searching for an ID3v2 tag");
log::debug!(
"Searching for an ID3v2 tag at offset: {}",
data.stream_position()?
);
let mut header = None;
let mut id3v2 = None;
if let Some(junk_window) = config.allowed_junk_window {
let mut id3v2_search_window = data.by_ref().take(junk_window);
let Some(id3v2_offset) = find_id3v2_in_junk(&mut id3v2_search_window)? else {
return Ok(ID3FindResults(None, None));
};
log::warn!(
"Found an ID3v2 tag preceded by junk data, offset: {}",
id3v2_offset
);
data.seek(SeekFrom::Current(-3))?;
}
if let Ok(id3v2_header) = Id3v2Header::parse(data) {
log::debug!("Found an ID3v2 tag, parsing");
if read {
if config.read {
let mut tag = try_vec![0; id3v2_header.size as usize];
data.read_exact(&mut tag)?;
@ -131,3 +167,25 @@ where
Ok(ID3FindResults(header, id3v2))
}
/// Searches for an ID3v2 tag in (potential) junk data between the start
/// of the file and the first frame
fn find_id3v2_in_junk<R>(reader: &mut R) -> Result<Option<u64>>
where
R: Read,
{
let bytes = reader.bytes();
let mut id3v2_header = [0; 3];
for (index, byte) in bytes.enumerate() {
id3v2_header[0] = id3v2_header[1];
id3v2_header[1] = id3v2_header[2];
id3v2_header[2] = byte?;
if id3v2_header == *b"ID3" {
return Ok(Some((index - 2) as u64));
}
}
Ok(None)
}

View file

@ -4,11 +4,11 @@ mod frame;
use super::Id3v2TagFlags;
use crate::error::Result;
use crate::file::FileType;
use crate::id3::find_id3v2;
use crate::id3::v2::frame::FrameRef;
use crate::id3::v2::tag::Id3v2TagRef;
use crate::id3::v2::util::synchsafe::SynchsafeInteger;
use crate::id3::v2::Id3v2Tag;
use crate::id3::{find_id3v2, FindId3v2Config};
use crate::macros::err;
use crate::probe::Probe;
@ -82,7 +82,8 @@ pub(crate) fn write_id3v2<'a, I: Iterator<Item = FrameRef<'a>> + Clone + 'a>(
let id3v2 = create_tag(tag)?;
// find_id3v2 will seek us to the end of the tag
find_id3v2(data, false)?;
// TODO: Search through junk
find_id3v2(data, FindId3v2Config::NO_READ_TAG)?;
let mut file_bytes = Vec::new();
data.read_to_end(&mut file_bytes)?;

View file

@ -4,7 +4,7 @@ use crate::ape::header::read_ape_header;
use crate::error::Result;
use crate::id3::v2::header::Id3v2Header;
use crate::id3::v2::read::parse_id3v2;
use crate::id3::{find_id3v1, find_lyrics3v2, ID3FindResults};
use crate::id3::{find_id3v1, find_lyrics3v2, FindId3v2Config, ID3FindResults};
use crate::macros::{decode_err, err};
use crate::mpeg::header::HEADER_MASK;
use crate::probe::{ParseOptions, ParsingMode};
@ -89,53 +89,48 @@ where
#[allow(clippy::neg_multiply)]
reader.seek(SeekFrom::Current(-1 * header.len() as i64))?;
#[allow(clippy::used_underscore_binding)]
if let Some((_first_frame_header, _first_frame_offset)) = find_next_frame(reader)? {
// TODO: We are manually searching through junk here, this could potentially be moved into `find_id3v2()`
if file.id3v2_tag.is_none()
&& parse_options.parsing_mode != ParsingMode::Strict
&& _first_frame_offset > 0
{
reader.seek(SeekFrom::Start(0))?;
let search_window_size =
std::cmp::min(_first_frame_offset, parse_options.max_junk_bytes as u64);
let mut id3v2_search_window = reader.take(search_window_size);
// TODO: A whole lot of code duplication here, its nearly identical to what we did above
if let Some(id3v2_offset) = find_id3v2_in_junk(&mut id3v2_search_window)? {
log::warn!(
"Found an ID3v2 tag preceded by junk data, offset: {}",
id3v2_offset
);
reader.seek(SeekFrom::Current(-3))?;
let header = Id3v2Header::parse(reader)?;
let skip_footer = header.flags.footer;
let id3v2 = parse_id3v2(reader, header, parse_options.parsing_mode)?;
if let Some(existing_tag) = &mut file.id3v2_tag {
// https://github.com/Serial-ATA/lofty-rs/issues/87
// Duplicate tags should have their frames appended to the previous
for frame in id3v2.frames {
existing_tag.insert(frame);
}
continue;
}
if skip_footer {
reader.seek(SeekFrom::Current(10))?;
}
file.id3v2_tag = Some(id3v2);
}
}
first_frame_offset = _first_frame_offset;
first_frame_header = Some(_first_frame_header);
let Some((_first_frame_header, _first_frame_offset)) = find_next_frame(reader)?
else {
break;
};
if file.id3v2_tag.is_none()
&& parse_options.parsing_mode != ParsingMode::Strict
&& _first_frame_offset > 0
{
reader.seek(SeekFrom::Start(0))?;
let search_window_size =
std::cmp::min(_first_frame_offset, parse_options.max_junk_bytes as u64);
let config = FindId3v2Config {
read: true,
allowed_junk_window: Some(search_window_size),
};
if let ID3FindResults(Some(header), Some(id3v2_bytes)) =
crate::id3::find_id3v2(reader, config)?
{
let reader = &mut &*id3v2_bytes;
let id3v2 = parse_id3v2(reader, header, parse_options.parsing_mode)?;
if let Some(existing_tag) = &mut file.id3v2_tag {
// https://github.com/Serial-ATA/lofty-rs/issues/87
// Duplicate tags should have their frames appended to the previous
for frame in id3v2.frames {
existing_tag.insert(frame);
}
continue;
}
file.id3v2_tag = Some(id3v2);
}
}
first_frame_offset = _first_frame_offset;
first_frame_header = Some(_first_frame_header);
break;
},
}
}
@ -233,25 +228,3 @@ where
Ok(None)
}
/// Searches for an ID3v2 tag in (potential) junk data between the start
/// of the file and the first frame
fn find_id3v2_in_junk<R>(reader: &mut R) -> Result<Option<u64>>
where
R: Read,
{
let bytes = reader.bytes();
let mut id3v2_header = [0; 3];
for (index, byte) in bytes.enumerate() {
id3v2_header[0] = id3v2_header[1];
id3v2_header[1] = id3v2_header[2];
id3v2_header[2] = byte?;
if id3v2_header == *b"ID3" {
return Ok(Some((index - 2) as u64));
}
}
Ok(None)
}

View file

@ -4,7 +4,7 @@ use super::sv8::MpcSv8Properties;
use super::{MpcFile, MpcProperties, MpcStreamVersion};
use crate::error::Result;
use crate::id3::v2::read::parse_id3v2;
use crate::id3::{find_id3v1, find_id3v2, find_lyrics3v2, ID3FindResults};
use crate::id3::{find_id3v1, find_id3v2, find_lyrics3v2, FindId3v2Config, ID3FindResults};
use crate::probe::ParseOptions;
use crate::traits::SeekStreamLen;
@ -25,7 +25,9 @@ where
// ID3v2 tags are unsupported in MPC files, but still possible
#[allow(unused_variables)]
if let ID3FindResults(Some(header), Some(content)) = find_id3v2(reader, true)? {
if let ID3FindResults(Some(header), Some(content)) =
find_id3v2(reader, FindId3v2Config::READ_TAG)?
{
let reader = &mut &*content;
let id3v2 = parse_id3v2(reader, header, parse_options.parsing_mode)?;