diff --git a/src/ape/properties.rs b/src/ape/properties.rs index 3bf088fa..f22e8450 100644 --- a/src/ape/properties.rs +++ b/src/ape/properties.rs @@ -74,7 +74,7 @@ impl ApeProperties { self.sample_rate } - /// bits per sample + /// Bits per sample pub fn bit_depth(&self) -> u8 { self.bit_depth } @@ -155,7 +155,6 @@ where return Err(LoftyError::Ape("File contains no frames")); } - // Unused let bits_per_sample = header_read.read_u16::()?; let channels = header_read.read_u16::()?; @@ -212,7 +211,6 @@ where let compression_level = header_first.read_u16::()?; - // Unused let format_flags = header_first.read_u16::()?; // https://github.com/fernandotcl/monkeys-audio/blob/5fe956c7e67c13daa80518a4cc7001e9fa185297/src/MACLib/MACLib.h#L74 let bit_depth = if (format_flags & 0b1) == 1 { diff --git a/src/error.rs b/src/error.rs index 62a10372..78d3ab1c 100644 --- a/src/error.rs +++ b/src/error.rs @@ -16,7 +16,7 @@ pub enum LoftyError { // File data related errors /// Provided an empty file - EmptyFile, + EmptyFile, // TODO: Remove this /// Attempting to read/write an abnormally large amount of data TooMuchData, diff --git a/src/iff/wav/properties.rs b/src/iff/wav/properties.rs index 739422e2..f69bc69a 100644 --- a/src/iff/wav/properties.rs +++ b/src/iff/wav/properties.rs @@ -91,7 +91,7 @@ impl WavProperties { self.sample_rate } - /// bits per sample + /// Bits per sample pub fn bit_depth(&self) -> u8 { self.bit_depth } diff --git a/src/mp3/header.rs b/src/mp3/header.rs index 49924a07..aac7da7a 100644 --- a/src/mp3/header.rs +++ b/src/mp3/header.rs @@ -9,11 +9,11 @@ pub(crate) fn verify_frame_sync(frame_sync: [u8; 2]) -> bool { frame_sync[0] == 0xFF && frame_sync[1] >> 5 == 0b111 } -/// Searches for a frame sync (11 bits with the value 1 like `0b1111_1111_111`) in the input reader. -/// The search starts at the beginning of the reader and returns the index relative to this beginning. -/// Only the first match is returned and on no match, [`None`] is returned instead. -/// -/// Note that the search searches in 8 bit steps, i.e. the first 8 bits need to be byte aligned. +// Searches for a frame sync (11 set bits) in the reader. +// The search starts at the beginning of the reader and returns the index relative to this beginning. +// This will return the first match, if one is found. +// +// Note that the search searches in 8 bit steps, i.e. the first 8 bits need to be byte aligned. pub(crate) fn search_for_frame_sync(input: &mut R) -> std::io::Result> where R: Read, @@ -25,16 +25,18 @@ where if let Some(byte) = iterator.next() { buffer[0] = byte?; } - // create a stream of overlapping 2 byte pairs - // example: [0x01, 0x02, 0x03, 0x04] should be analyzed as + // Create a stream of overlapping 2 byte pairs + // + // Example: + // [0x01, 0x02, 0x03, 0x04] should be analyzed as // [0x01, 0x02], [0x02, 0x03], [0x03, 0x04] for (index, byte) in iterator.enumerate() { buffer[1] = byte?; - // check the two bytes in the buffer + // Check the two bytes in the buffer if verify_frame_sync(buffer) { return Ok(Some(index as u64)); } - // if they do not match, copy the last byte in the buffer to the front for the next iteration + // If they do not match, copy the last byte in the buffer to the front for the next iteration buffer[0] = buffer[1]; } Ok(None) diff --git a/src/mp3/read.rs b/src/mp3/read.rs index 10bc466d..5499ae1c 100644 --- a/src/mp3/read.rs +++ b/src/mp3/read.rs @@ -80,17 +80,18 @@ where continue; } }, - // metadata blocks might be followed by junk bytes before the first MP3 frame begins + // Tags might be followed by junk bytes before the first MP3 frame begins _ => { - // seek back the length of the temporary header buffer - // so that all bytes are included in the search for a frame sync + // seek back the length of the temporary header buffer, to include them + // in the frame sync search #[allow(clippy::neg_multiply)] let start_of_search_area = reader.seek(SeekFrom::Current(-1 * header.len() as i64))?; + if let Some(first_mp3_frame_start_relative) = search_for_frame_sync(reader)? { let first_mp3_frame_start_absolute = start_of_search_area + first_mp3_frame_start_relative; - // read the first four bytes of the found frame + // Seek back to the start of the frame and read the header reader.seek(SeekFrom::Start(first_mp3_frame_start_absolute))?; let header = Header::read(reader.read_u32::()?)?; diff --git a/src/mp4/properties.rs b/src/mp4/properties.rs index f39d4bde..6086e400 100644 --- a/src/mp4/properties.rs +++ b/src/mp4/properties.rs @@ -32,6 +32,7 @@ pub struct Mp4Properties { overall_bitrate: u32, audio_bitrate: u32, sample_rate: u32, + bit_depth: Option, channels: u8, } @@ -42,7 +43,7 @@ impl From for FileProperties { overall_bitrate: Some(input.overall_bitrate), audio_bitrate: Some(input.audio_bitrate), sample_rate: Some(input.sample_rate), - bit_depth: None, + bit_depth: input.bit_depth, channels: Some(input.channels), } } @@ -56,6 +57,7 @@ impl Mp4Properties { overall_bitrate: u32, audio_bitrate: u32, sample_rate: u32, + bit_depth: Option, channels: u8, ) -> Self { Self { @@ -64,6 +66,7 @@ impl Mp4Properties { overall_bitrate, audio_bitrate, sample_rate, + bit_depth, channels, } } @@ -88,6 +91,11 @@ impl Mp4Properties { self.sample_rate } + /// Bits per sample + pub fn bit_depth(&self) -> Option { + self.bit_depth + } + /// Channel count pub fn channels(&self) -> u8 { self.channels @@ -201,6 +209,7 @@ where overall_bitrate: 0, audio_bitrate: 0, sample_rate: 0, + bit_depth: None, channels: 0, }; @@ -281,7 +290,7 @@ where // Descriptor length (1) // Elementary stream ID (2) // Flags (1) - let _info = stsd.read_u32::()?; + stsd.seek(SeekFrom::Current(4))?; // There is another descriptor embedded in the previous one let mut specific_config = [0; 4]; @@ -291,12 +300,11 @@ where if specific_config == [0x04, 0x80, 0x80, 0x80] { // Skipping 10 bytes // Descriptor length (1) - // MPEG4 Audio (1) + // Codec (1) // Stream type (1) // Buffer size (3) // Max bitrate (4) - let mut info = [0; 10]; - stsd.read_exact(&mut info)?; + stsd.seek(SeekFrom::Current(10))?; let average_bitrate = stsd.read_u32::()?; @@ -337,15 +345,20 @@ where if alac.ident == AtomIdent::Fourcc(*b"alac") { properties.codec = Mp4Codec::ALAC; - // Skipping 13 bytes + // Skipping 9 bytes // Version (4) // Samples per frame (4) // Compatible version (1) + data.seek(SeekFrom::Current(9))?; + // Sample size (1) + properties.bit_depth = Some(data.read_u8()?); + + // Skipping 3 bytes // Rice history mult (1) // Rice initial history (1) // Rice parameter limit (1) - data.seek(SeekFrom::Current(13))?; + data.seek(SeekFrom::Current(3))?; properties.channels = data.read_u8()?; diff --git a/src/probe.rs b/src/probe.rs index d68b1529..4ad58276 100644 --- a/src/probe.rs +++ b/src/probe.rs @@ -153,48 +153,43 @@ impl Probe { // (36 is just a guess as to how long the data for estimating the file type might be) let mut buf = [0; 36]; - // read the first 36 bytes and seek back to the starting position let starting_position = self.inner.stream_position()?; + // Read (up to) 36 bytes let buf_len = std::io::copy( &mut self.inner.by_ref().take(buf.len() as u64), &mut Cursor::new(&mut buf[..]), )? as usize; + self.inner.seek(SeekFrom::Start(starting_position))?; - // estimate the file type by using these 36 bytes - // note that any error from `from_buffer_inner` are suppressed, as it returns an error on unknown format + // Guess the file type by using these 36 bytes + // Note that any error from `from_buffer_inner` are suppressed, as it returns an error on unknown format match FileType::from_buffer_inner(&buf[..buf_len]) { - // the file type was guessed based on these bytes - Ok((Some(f_ty), _)) => Ok(Some(f_ty)), - // the first data block is ID3 data; this means other data can follow (e.g. APE or MP3 frames) - Ok((None, id3_len)) => { - // the position right after the ID3 block is the internal size value (id3_len) - // added to the length of the ID3 header (which is 10 bytes), - // as the size does not include the header itself + // We were able to determine a file type + (Some(f_ty), _) => Ok(Some(f_ty)), + // The file starts with an ID3v2 tag; this means other data can follow (e.g. APE or MP3 frames) + (None, Some(id3_len)) => { + // `id3_len` is the size of the tag, not including the header (10 bytes) let position_after_id3_block = self .inner .seek(SeekFrom::Current(i64::from(10 + id3_len)))?; - let file_type_after_id3_block = { - // try to guess the file type after the ID3 block by inspecting the first 3 bytes - let mut ident = [0; 3]; - std::io::copy( - &mut self.inner.by_ref().take(ident.len() as u64), - &mut Cursor::new(&mut ident[..]), - )?; + // try to guess the file type after the ID3 block by inspecting the first 3 bytes + let mut ident = [0; 3]; + std::io::copy( + &mut self.inner.by_ref().take(ident.len() as u64), + &mut Cursor::new(&mut ident[..]), + )?; - if &ident == b"MAC" { - Ok(Some(FileType::APE)) - } else { - // potentially some junk bytes are between the ID3 block and the following MP3 block - // search for any possible sync bits after the ID3 block - self.inner.seek(SeekFrom::Start(position_after_id3_block))?; - if search_for_frame_sync(&mut self.inner)?.is_some() { - Ok(Some(FileType::MP3)) - } else { - Ok(None) - } - } + self.inner.seek(SeekFrom::Start(position_after_id3_block))?; + + let file_type_after_id3_block = match &ident { + b"MAC" => Ok(Some(FileType::APE)), + // Search for a frame sync, which may be preceded by junk + _ if search_for_frame_sync(&mut self.inner)?.is_some() => { + Ok(Some(FileType::MP3)) + }, + _ => Ok(None), }; // before returning any result for a file type, seek back to the front @@ -270,7 +265,7 @@ mod tests { use crate::Probe; #[test] - fn mp3_file_id3v2_3() { + fn mp3_id3v2_trailing_junk() { // test data that contains 4 bytes of junk (0x20) between the ID3 portion and the first MP3 frame let data: [&[u8]; 4] = [ // ID3v2.3 header (10 bytes) diff --git a/src/types/file.rs b/src/types/file.rs index 2ba78b62..d2b32994 100644 --- a/src/types/file.rs +++ b/src/types/file.rs @@ -286,41 +286,59 @@ impl FileType { /// Attempts to extract a [`FileType`] from a buffer /// - /// NOTE: This is for use in [`Probe::guess_file_type`](crate::Probe::guess_file_type), it + /// NOTES: + /// + /// * This is for use in [`Probe::guess_file_type`], it /// is recommended to use it that way + /// * This **will not** search past tags at the start of the buffer. + /// For this behavior, use [`Probe::guess_file_type`]. + /// + /// [`Probe::guess_file_type`]: crate::Probe::guess_file_type pub fn from_buffer(buf: &[u8]) -> Option { match Self::from_buffer_inner(buf) { - Ok((Some(f_ty), _)) => Some(f_ty), + (Some(f_ty), _) => Some(f_ty), + // We make no attempt to search past an ID3v2 tag here, since + // we only provided a fixed-sized buffer to search from. + // + // That case is handled in `Probe::guess_file_type` _ => None, } } - pub(crate) fn from_buffer_inner(buf: &[u8]) -> Result<(Option, u32)> { + // TODO: APE tags in the beginning of the file + pub(crate) fn from_buffer_inner(buf: &[u8]) -> (Option, Option) { use crate::id3::v2::unsynch_u32; + // Start out with an empty return: (File type, id3 size) + // Only one can be set + let mut ret = (None, None); + if buf.is_empty() { - return Err(LoftyError::EmptyFile); + return ret; } match Self::quick_type_guess(buf) { - Some(f_ty) => Ok((Some(f_ty), 0)), + Some(f_ty) => ret.0 = Some(f_ty), // Special case for ID3, gets checked in `Probe::guess_file_type` - None if buf.len() >= 11 && &buf[..3] == b"ID3" => { - let size = unsynch_u32(u32::from_be_bytes( - buf[6..10] - .try_into() - .map_err(|_| LoftyError::UnknownFormat)?, - )); - - Ok((None, size)) + // The bare minimum size for an ID3v2 header is 10 bytes + None if buf.len() >= 10 && &buf[..3] == b"ID3" => { + // This is infallible, but preferable to an unwrap + if let Ok(arr) = buf[6..10].try_into() { + // Set the ID3v2 size + ret.1 = Some(unsynch_u32(u32::from_be_bytes(arr))); + } }, - None => Err(LoftyError::UnknownFormat), + // We aren't able to determine a format + _ => {}, } + + ret } fn quick_type_guess(buf: &[u8]) -> Option { use crate::mp3::header::verify_frame_sync; + // Safe to unwrap, since we return early on an empty buffer match buf.first().unwrap() { 77 if buf.starts_with(b"MAC") => Some(Self::APE), 255 if buf.len() >= 2 && verify_frame_sync([buf[0], buf[1]]) => Some(Self::MP3), diff --git a/tests/files/assets/b.m4a b/tests/files/assets/b.m4a new file mode 100644 index 00000000..665cc978 Binary files /dev/null and b/tests/files/assets/b.m4a differ diff --git a/tests/properties.rs b/tests/properties.rs index a6142df0..2b0b7919 100644 --- a/tests/properties.rs +++ b/tests/properties.rs @@ -46,6 +46,17 @@ const MP4_PROPERTIES: Mp4Properties = Mp4Properties::new( 135, 124, 48000, + None, + 2, +); + +const ALAC_PROPERTIES: Mp4Properties = Mp4Properties::new( + Mp4Codec::ALAC, + Duration::from_millis(1428), + 331, + 124, + 48000, + Some(16), 2, ); @@ -126,6 +137,14 @@ fn mp4_properties() { ) } +#[test] +fn alac_properties() { + assert_eq!( + get_properties::("tests/files/assets/b.m4a").bit_depth(), + ALAC_PROPERTIES.bit_depth() + ) +} + #[test] fn opus_properties() { assert_eq!(