From 24da5bac31da0ecd7473196892ad4679551bb299 Mon Sep 17 00:00:00 2001 From: Serial <69764315+Serial-ATA@users.noreply.github.com> Date: Sat, 4 May 2024 16:03:22 -0400 Subject: [PATCH] MPEG: Improve duration estimation --- lofty/src/mpeg/header.rs | 100 ++++++++++++++++++++++++++++------ lofty/src/mpeg/properties.rs | 46 ++++++---------- lofty/src/mpeg/read.rs | 8 ++- lofty/src/properties/tests.rs | 6 +- 4 files changed, 108 insertions(+), 52 deletions(-) diff --git a/lofty/src/mpeg/header.rs b/lofty/src/mpeg/header.rs index 507398f7..f57eacc2 100644 --- a/lofty/src/mpeg/header.rs +++ b/lofty/src/mpeg/header.rs @@ -1,4 +1,5 @@ use super::constants::{BITRATES, PADDING_SIZES, SAMPLES, SAMPLE_RATES, SIDE_INFORMATION_SIZES}; +use crate::config::ParsingMode; use crate::error::Result; use crate::macros::decode_err; @@ -49,10 +50,11 @@ where // Unlike `search_for_frame_sync`, since this has the `Seek` bound, it will seek the reader // back to the start of the header. const REV_FRAME_SEARCH_BOUNDS: u64 = 1024; -pub(super) fn rev_search_for_frame_sync( +pub(super) fn rev_search_for_frame_header( input: &mut R, pos: &mut u64, -) -> std::io::Result> + parse_mode: ParsingMode, +) -> Result> where R: Read + Seek, { @@ -61,15 +63,49 @@ where *pos -= search_bounds; input.seek(SeekFrom::Start(*pos))?; - let ret = search_for_frame_sync(&mut input.take(search_bounds)); - if let Ok(Some(_)) = ret { - // Seek to the start of the frame sync - input.seek(SeekFrom::Current(-2))?; + let mut buf = Vec::with_capacity(search_bounds as usize); + input.take(search_bounds).read_to_end(&mut buf)?; + + let mut frame_sync = [0u8; 2]; + for (i, byte) in buf.iter().rev().enumerate() { + frame_sync[1] = frame_sync[0]; + frame_sync[0] = *byte; + if verify_frame_sync(frame_sync) { + let relative_frame_start = (search_bounds as usize) - (i + 1); + if relative_frame_start + 4 > buf.len() { + if parse_mode == ParsingMode::Strict { + decode_err!(@BAIL Mpeg, "Expected full frame header (incomplete stream?)") + } + + log::warn!("MPEG: Incomplete frame header, giving up"); + break; + } + + let header = Header::read(u32::from_be_bytes([ + frame_sync[0], + frame_sync[1], + buf[relative_frame_start + 2], + buf[relative_frame_start + 3], + ])); + + // We need to check if the header is actually valid. For + // all we know, we could be in some junk (ex. 0xFF_FF_FF_FF). + if header.is_none() { + continue; + } + + // Seek to the start of the frame sync + *pos += relative_frame_start as u64; + input.seek(SeekFrom::Start(*pos))?; + + return Ok(header); + } } - ret + Ok(None) } +/// See [`cmp_header()`]. pub(crate) enum HeaderCmpResult { Equal, Undetermined, @@ -80,6 +116,18 @@ pub(crate) enum HeaderCmpResult { // If they aren't equal, something is broken. pub(super) const HEADER_MASK: u32 = 0xFFFE_0C00; +/// Compares the versions, layers, and sample rates of two frame headers. +/// +/// It is safe to assume that the reader will no longer produce valid headers if [`HeaderCmpResult::Undetermined`] +/// is returned. +/// +/// To compare two already constructed [`Header`]s, use [`Header::cmp()`]. +/// +/// ## Returns +/// +/// - [`HeaderCmpResult::Equal`] if the headers are equal. +/// - [`HeaderCmpResult::NotEqual`] if the headers are not equal. +/// - [`HeaderCmpResult::Undetermined`] if the comparison could not be made (Some IO error occurred). pub(crate) fn cmp_header( reader: &mut R, header_size: u32, @@ -162,7 +210,7 @@ pub enum Emphasis { CCIT_J17, } -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug)] pub(crate) struct Header { pub(crate) sample_rate: u32, pub(crate) len: u32, @@ -269,14 +317,21 @@ impl Header { Some(header) } + + /// Equivalent of [`cmp_header()`], but for an already constructed `Header`. + pub(super) fn cmp(self, other: &Self) -> bool { + self.version == other.version + && self.layer == other.layer + && self.sample_rate == other.sample_rate + } } -pub(super) struct XingHeader { +pub(super) struct VbrHeader { pub frames: u32, pub size: u32, } -impl XingHeader { +impl VbrHeader { pub(super) fn read(reader: &mut &[u8]) -> Result> { let reader_len = reader.len(); @@ -331,7 +386,9 @@ impl XingHeader { #[cfg(test)] mod tests { + use crate::config::ParsingMode; use crate::tag::utils::test_utils::read_path; + use std::io::{Cursor, Read, Seek, SeekFrom}; #[test] @@ -347,21 +404,30 @@ mod tests { } #[test] - fn rev_search_for_frame_sync() { - fn test(reader: &mut R, expected_result: Option) { + #[rustfmt::skip] + fn rev_search_for_frame_header() { + fn test(reader: &mut R, expected_reader_position: Option) { // We have to start these at the end to do a reverse search, of course :) let mut pos = reader.seek(SeekFrom::End(0)).unwrap(); - let ret = super::rev_search_for_frame_sync(reader, &mut pos).unwrap(); - assert_eq!(ret, expected_result); + let ret = super::rev_search_for_frame_header(reader, &mut pos, ParsingMode::Strict); + + if expected_reader_position.is_some() { + assert!(ret.is_ok()); + assert!(ret.unwrap().is_some()); + assert_eq!(Some(pos), expected_reader_position); + return; + } + + assert!(ret.unwrap().is_none()); } - test(&mut Cursor::new([0xFF, 0xFB, 0x00]), Some(0)); - test(&mut Cursor::new([0x00, 0x00, 0x01, 0xFF, 0xFB]), Some(3)); + test(&mut Cursor::new([0xFF, 0xFB, 0x52, 0xC4]), Some(0)); + test(&mut Cursor::new([0x00, 0x00, 0x01, 0xFF, 0xFB, 0x52, 0xC4]), Some(3)); test(&mut Cursor::new([0x01, 0xFF]), None); let bytes = read_path("tests/files/assets/rev_frame_sync_search.mp3"); let mut reader = Cursor::new(bytes); - test(&mut reader, Some(283)); + test(&mut reader, Some(595)); } } diff --git a/lofty/src/mpeg/properties.rs b/lofty/src/mpeg/properties.rs index 80a50cc0..c703a94b 100644 --- a/lofty/src/mpeg/properties.rs +++ b/lofty/src/mpeg/properties.rs @@ -1,14 +1,13 @@ -use super::header::{ChannelMode, Emphasis, Header, Layer, MpegVersion, XingHeader}; +use super::header::{ChannelMode, Emphasis, Header, Layer, MpegVersion, VbrHeader}; +use crate::config::ParsingMode; use crate::error::Result; -use crate::mpeg::header::{cmp_header, rev_search_for_frame_sync, HeaderCmpResult, HEADER_MASK}; +use crate::mpeg::header::rev_search_for_frame_header; use crate::properties::{ChannelMask, FileProperties}; use crate::util::math::RoundedDivision; use std::io::{Read, Seek, SeekFrom}; use std::time::Duration; -use byteorder::{BigEndian, ReadBytesExt}; - /// An MPEG file's audio properties #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] #[non_exhaustive] @@ -127,8 +126,9 @@ pub(super) fn read_properties( reader: &mut R, first_frame: (Header, u64), mut last_frame_offset: u64, - xing_header: Option, + xing_header: Option, file_length: u64, + parse_mode: ParsingMode, ) -> Result<()> where R: Read + Seek, @@ -177,29 +177,17 @@ where reader.seek(SeekFrom::Start(last_frame_offset))?; let mut last_frame = None; - let mut pos = reader.stream_position()?; + let mut pos = last_frame_offset; while pos > 0 { - match rev_search_for_frame_sync(reader, &mut pos) { - // Found a frame sync, attempt to read a header - Ok(Some(_)) => { + match rev_search_for_frame_header(reader, &mut pos, parse_mode) { + // Found a frame header + Ok(Some(header)) => { // Move `last_frame_offset` back to the actual position - last_frame_offset = reader.stream_position()?; - let last_frame_data = reader.read_u32::()?; + last_frame_offset = pos; - if let Some(last_frame_header) = Header::read(last_frame_data) { - match cmp_header( - reader, - 4, - last_frame_header.len, - last_frame_data, - HEADER_MASK, - ) { - HeaderCmpResult::Equal | HeaderCmpResult::Undetermined => { - last_frame = Some(last_frame_header); - break; - }, - HeaderCmpResult::NotEqual => {}, - } + if header.cmp(&first_frame_header) { + last_frame = Some(header); + break; } }, // Encountered some IO error, just break @@ -211,11 +199,11 @@ where if let Some(last_frame_header) = last_frame { let stream_len = last_frame_offset - first_frame_offset + u64::from(last_frame_header.len); - let length = ((stream_len as f64) * 8.0) / f64::from(properties.audio_bitrate) + 0.5; + let length = (stream_len * 8).div_round(u64::from(properties.audio_bitrate)); - if length > 0.0 { - properties.overall_bitrate = (((file_length as f64) * 8.0) / length) as u32; - properties.duration = Duration::from_millis(length as u64); + if length > 0 { + properties.overall_bitrate = ((file_length * 8) / length) as u32; + properties.duration = Duration::from_millis(length); } } diff --git a/lofty/src/mpeg/read.rs b/lofty/src/mpeg/read.rs index 628e2368..41ef39d6 100644 --- a/lofty/src/mpeg/read.rs +++ b/lofty/src/mpeg/read.rs @@ -1,4 +1,4 @@ -use super::header::{cmp_header, search_for_frame_sync, Header, HeaderCmpResult, XingHeader}; +use super::header::{cmp_header, search_for_frame_sync, Header, HeaderCmpResult, VbrHeader}; use super::{MpegFile, MpegProperties}; use crate::ape::header::read_ape_header; use crate::config::{ParseOptions, ParsingMode}; @@ -6,6 +6,7 @@ use crate::error::Result; use crate::id3::v2::header::Id3v2Header; use crate::id3::v2::read::parse_id3v2; use crate::id3::{find_id3v1, find_lyrics3v2, FindId3v2Config, ID3FindResults}; +use crate::io::SeekStreamLen; use crate::macros::{decode_err, err}; use crate::mpeg::header::HEADER_MASK; @@ -182,9 +183,9 @@ where let mut xing_reader = [0; 32]; reader.read_exact(&mut xing_reader)?; - let xing_header = XingHeader::read(&mut &xing_reader[..])?; + let xing_header = VbrHeader::read(&mut &xing_reader[..])?; - let file_length = reader.seek(SeekFrom::End(0))?; + let file_length = reader.stream_len_hack()?; super::properties::read_properties( &mut file.properties, @@ -193,6 +194,7 @@ where last_frame_offset, xing_header, file_length, + parse_options.parsing_mode, )?; } diff --git a/lofty/src/properties/tests.rs b/lofty/src/properties/tests.rs index 1a06c2bd..fc755f61 100644 --- a/lofty/src/properties/tests.rs +++ b/lofty/src/properties/tests.rs @@ -75,7 +75,7 @@ const MP1_PROPERTIES: MpegProperties = MpegProperties { copyright: false, original: true, duration: Duration::from_millis(588), // FFmpeg reports 576, possibly an issue - overall_bitrate: 383, // TODO: FFmpeg reports 392 + overall_bitrate: 384, // TODO: FFmpeg reports 392 audio_bitrate: 384, sample_rate: 32000, channels: 2, @@ -89,8 +89,8 @@ const MP2_PROPERTIES: MpegProperties = MpegProperties { mode_extension: None, copyright: false, original: true, - duration: Duration::from_millis(1344), // TODO: FFmpeg reports 1440 here - overall_bitrate: 411, // FFmpeg reports 384, related to above issue + duration: Duration::from_millis(1440), + overall_bitrate: 384, audio_bitrate: 384, sample_rate: 48000, channels: 2,