Merge branch 'main' into wave-depth

This commit is contained in:
sagu 2022-01-24 16:35:24 +01:00 committed by GitHub
commit 6325665ab2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 115 additions and 69 deletions

View file

@ -74,7 +74,7 @@ impl ApeProperties {
self.sample_rate
}
/// bits per sample
/// Bits per sample
pub fn bit_depth(&self) -> u8 {
self.bit_depth
}
@ -155,7 +155,6 @@ where
return Err(LoftyError::Ape("File contains no frames"));
}
// Unused
let bits_per_sample = header_read.read_u16::<LittleEndian>()?;
let channels = header_read.read_u16::<LittleEndian>()?;
@ -212,7 +211,6 @@ where
let compression_level = header_first.read_u16::<LittleEndian>()?;
// Unused
let format_flags = header_first.read_u16::<LittleEndian>()?;
// https://github.com/fernandotcl/monkeys-audio/blob/5fe956c7e67c13daa80518a4cc7001e9fa185297/src/MACLib/MACLib.h#L74
let bit_depth = if (format_flags & 0b1) == 1 {

View file

@ -16,7 +16,7 @@ pub enum LoftyError {
// File data related errors
/// Provided an empty file
EmptyFile,
EmptyFile, // TODO: Remove this
/// Attempting to read/write an abnormally large amount of data
TooMuchData,

View file

@ -91,7 +91,7 @@ impl WavProperties {
self.sample_rate
}
/// bits per sample
/// Bits per sample
pub fn bit_depth(&self) -> u8 {
self.bit_depth
}

View file

@ -9,11 +9,11 @@ pub(crate) fn verify_frame_sync(frame_sync: [u8; 2]) -> bool {
frame_sync[0] == 0xFF && frame_sync[1] >> 5 == 0b111
}
/// Searches for a frame sync (11 bits with the value 1 like `0b1111_1111_111`) in the input reader.
/// The search starts at the beginning of the reader and returns the index relative to this beginning.
/// Only the first match is returned and on no match, [`None`] is returned instead.
///
/// Note that the search searches in 8 bit steps, i.e. the first 8 bits need to be byte aligned.
// Searches for a frame sync (11 set bits) in the reader.
// The search starts at the beginning of the reader and returns the index relative to this beginning.
// This will return the first match, if one is found.
//
// Note that the search searches in 8 bit steps, i.e. the first 8 bits need to be byte aligned.
pub(crate) fn search_for_frame_sync<R>(input: &mut R) -> std::io::Result<Option<u64>>
where
R: Read,
@ -25,16 +25,18 @@ where
if let Some(byte) = iterator.next() {
buffer[0] = byte?;
}
// create a stream of overlapping 2 byte pairs
// example: [0x01, 0x02, 0x03, 0x04] should be analyzed as
// Create a stream of overlapping 2 byte pairs
//
// Example:
// [0x01, 0x02, 0x03, 0x04] should be analyzed as
// [0x01, 0x02], [0x02, 0x03], [0x03, 0x04]
for (index, byte) in iterator.enumerate() {
buffer[1] = byte?;
// check the two bytes in the buffer
// Check the two bytes in the buffer
if verify_frame_sync(buffer) {
return Ok(Some(index as u64));
}
// if they do not match, copy the last byte in the buffer to the front for the next iteration
// If they do not match, copy the last byte in the buffer to the front for the next iteration
buffer[0] = buffer[1];
}
Ok(None)

View file

@ -80,17 +80,18 @@ where
continue;
}
},
// metadata blocks might be followed by junk bytes before the first MP3 frame begins
// Tags might be followed by junk bytes before the first MP3 frame begins
_ => {
// seek back the length of the temporary header buffer
// so that all bytes are included in the search for a frame sync
// seek back the length of the temporary header buffer, to include them
// in the frame sync search
#[allow(clippy::neg_multiply)]
let start_of_search_area = reader.seek(SeekFrom::Current(-1 * header.len() as i64))?;
if let Some(first_mp3_frame_start_relative) = search_for_frame_sync(reader)? {
let first_mp3_frame_start_absolute =
start_of_search_area + first_mp3_frame_start_relative;
// read the first four bytes of the found frame
// Seek back to the start of the frame and read the header
reader.seek(SeekFrom::Start(first_mp3_frame_start_absolute))?;
let header = Header::read(reader.read_u32::<BigEndian>()?)?;

View file

@ -32,6 +32,7 @@ pub struct Mp4Properties {
overall_bitrate: u32,
audio_bitrate: u32,
sample_rate: u32,
bit_depth: Option<u8>,
channels: u8,
}
@ -42,7 +43,7 @@ impl From<Mp4Properties> for FileProperties {
overall_bitrate: Some(input.overall_bitrate),
audio_bitrate: Some(input.audio_bitrate),
sample_rate: Some(input.sample_rate),
bit_depth: None,
bit_depth: input.bit_depth,
channels: Some(input.channels),
}
}
@ -56,6 +57,7 @@ impl Mp4Properties {
overall_bitrate: u32,
audio_bitrate: u32,
sample_rate: u32,
bit_depth: Option<u8>,
channels: u8,
) -> Self {
Self {
@ -64,6 +66,7 @@ impl Mp4Properties {
overall_bitrate,
audio_bitrate,
sample_rate,
bit_depth,
channels,
}
}
@ -88,6 +91,11 @@ impl Mp4Properties {
self.sample_rate
}
/// Bits per sample
pub fn bit_depth(&self) -> Option<u8> {
self.bit_depth
}
/// Channel count
pub fn channels(&self) -> u8 {
self.channels
@ -201,6 +209,7 @@ where
overall_bitrate: 0,
audio_bitrate: 0,
sample_rate: 0,
bit_depth: None,
channels: 0,
};
@ -281,7 +290,7 @@ where
// Descriptor length (1)
// Elementary stream ID (2)
// Flags (1)
let _info = stsd.read_u32::<BigEndian>()?;
stsd.seek(SeekFrom::Current(4))?;
// There is another descriptor embedded in the previous one
let mut specific_config = [0; 4];
@ -291,12 +300,11 @@ where
if specific_config == [0x04, 0x80, 0x80, 0x80] {
// Skipping 10 bytes
// Descriptor length (1)
// MPEG4 Audio (1)
// Codec (1)
// Stream type (1)
// Buffer size (3)
// Max bitrate (4)
let mut info = [0; 10];
stsd.read_exact(&mut info)?;
stsd.seek(SeekFrom::Current(10))?;
let average_bitrate = stsd.read_u32::<BigEndian>()?;
@ -337,15 +345,20 @@ where
if alac.ident == AtomIdent::Fourcc(*b"alac") {
properties.codec = Mp4Codec::ALAC;
// Skipping 13 bytes
// Skipping 9 bytes
// Version (4)
// Samples per frame (4)
// Compatible version (1)
data.seek(SeekFrom::Current(9))?;
// Sample size (1)
properties.bit_depth = Some(data.read_u8()?);
// Skipping 3 bytes
// Rice history mult (1)
// Rice initial history (1)
// Rice parameter limit (1)
data.seek(SeekFrom::Current(13))?;
data.seek(SeekFrom::Current(3))?;
properties.channels = data.read_u8()?;

View file

@ -153,48 +153,43 @@ impl<R: Read + Seek> Probe<R> {
// (36 is just a guess as to how long the data for estimating the file type might be)
let mut buf = [0; 36];
// read the first 36 bytes and seek back to the starting position
let starting_position = self.inner.stream_position()?;
// Read (up to) 36 bytes
let buf_len = std::io::copy(
&mut self.inner.by_ref().take(buf.len() as u64),
&mut Cursor::new(&mut buf[..]),
)? as usize;
self.inner.seek(SeekFrom::Start(starting_position))?;
// estimate the file type by using these 36 bytes
// note that any error from `from_buffer_inner` are suppressed, as it returns an error on unknown format
// Guess the file type by using these 36 bytes
// Note that any error from `from_buffer_inner` are suppressed, as it returns an error on unknown format
match FileType::from_buffer_inner(&buf[..buf_len]) {
// the file type was guessed based on these bytes
Ok((Some(f_ty), _)) => Ok(Some(f_ty)),
// the first data block is ID3 data; this means other data can follow (e.g. APE or MP3 frames)
Ok((None, id3_len)) => {
// the position right after the ID3 block is the internal size value (id3_len)
// added to the length of the ID3 header (which is 10 bytes),
// as the size does not include the header itself
// We were able to determine a file type
(Some(f_ty), _) => Ok(Some(f_ty)),
// The file starts with an ID3v2 tag; this means other data can follow (e.g. APE or MP3 frames)
(None, Some(id3_len)) => {
// `id3_len` is the size of the tag, not including the header (10 bytes)
let position_after_id3_block = self
.inner
.seek(SeekFrom::Current(i64::from(10 + id3_len)))?;
let file_type_after_id3_block = {
// try to guess the file type after the ID3 block by inspecting the first 3 bytes
let mut ident = [0; 3];
std::io::copy(
&mut self.inner.by_ref().take(ident.len() as u64),
&mut Cursor::new(&mut ident[..]),
)?;
// try to guess the file type after the ID3 block by inspecting the first 3 bytes
let mut ident = [0; 3];
std::io::copy(
&mut self.inner.by_ref().take(ident.len() as u64),
&mut Cursor::new(&mut ident[..]),
)?;
if &ident == b"MAC" {
Ok(Some(FileType::APE))
} else {
// potentially some junk bytes are between the ID3 block and the following MP3 block
// search for any possible sync bits after the ID3 block
self.inner.seek(SeekFrom::Start(position_after_id3_block))?;
if search_for_frame_sync(&mut self.inner)?.is_some() {
Ok(Some(FileType::MP3))
} else {
Ok(None)
}
}
self.inner.seek(SeekFrom::Start(position_after_id3_block))?;
let file_type_after_id3_block = match &ident {
b"MAC" => Ok(Some(FileType::APE)),
// Search for a frame sync, which may be preceded by junk
_ if search_for_frame_sync(&mut self.inner)?.is_some() => {
Ok(Some(FileType::MP3))
},
_ => Ok(None),
};
// before returning any result for a file type, seek back to the front
@ -270,7 +265,7 @@ mod tests {
use crate::Probe;
#[test]
fn mp3_file_id3v2_3() {
fn mp3_id3v2_trailing_junk() {
// test data that contains 4 bytes of junk (0x20) between the ID3 portion and the first MP3 frame
let data: [&[u8]; 4] = [
// ID3v2.3 header (10 bytes)

View file

@ -286,41 +286,59 @@ impl FileType {
/// Attempts to extract a [`FileType`] from a buffer
///
/// NOTE: This is for use in [`Probe::guess_file_type`](crate::Probe::guess_file_type), it
/// NOTES:
///
/// * This is for use in [`Probe::guess_file_type`], it
/// is recommended to use it that way
/// * This **will not** search past tags at the start of the buffer.
/// For this behavior, use [`Probe::guess_file_type`].
///
/// [`Probe::guess_file_type`]: crate::Probe::guess_file_type
pub fn from_buffer(buf: &[u8]) -> Option<Self> {
match Self::from_buffer_inner(buf) {
Ok((Some(f_ty), _)) => Some(f_ty),
(Some(f_ty), _) => Some(f_ty),
// We make no attempt to search past an ID3v2 tag here, since
// we only provided a fixed-sized buffer to search from.
//
// That case is handled in `Probe::guess_file_type`
_ => None,
}
}
pub(crate) fn from_buffer_inner(buf: &[u8]) -> Result<(Option<Self>, u32)> {
// TODO: APE tags in the beginning of the file
pub(crate) fn from_buffer_inner(buf: &[u8]) -> (Option<Self>, Option<u32>) {
use crate::id3::v2::unsynch_u32;
// Start out with an empty return: (File type, id3 size)
// Only one can be set
let mut ret = (None, None);
if buf.is_empty() {
return Err(LoftyError::EmptyFile);
return ret;
}
match Self::quick_type_guess(buf) {
Some(f_ty) => Ok((Some(f_ty), 0)),
Some(f_ty) => ret.0 = Some(f_ty),
// Special case for ID3, gets checked in `Probe::guess_file_type`
None if buf.len() >= 11 && &buf[..3] == b"ID3" => {
let size = unsynch_u32(u32::from_be_bytes(
buf[6..10]
.try_into()
.map_err(|_| LoftyError::UnknownFormat)?,
));
Ok((None, size))
// The bare minimum size for an ID3v2 header is 10 bytes
None if buf.len() >= 10 && &buf[..3] == b"ID3" => {
// This is infallible, but preferable to an unwrap
if let Ok(arr) = buf[6..10].try_into() {
// Set the ID3v2 size
ret.1 = Some(unsynch_u32(u32::from_be_bytes(arr)));
}
},
None => Err(LoftyError::UnknownFormat),
// We aren't able to determine a format
_ => {},
}
ret
}
fn quick_type_guess(buf: &[u8]) -> Option<Self> {
use crate::mp3::header::verify_frame_sync;
// Safe to unwrap, since we return early on an empty buffer
match buf.first().unwrap() {
77 if buf.starts_with(b"MAC") => Some(Self::APE),
255 if buf.len() >= 2 && verify_frame_sync([buf[0], buf[1]]) => Some(Self::MP3),

BIN
tests/files/assets/b.m4a Normal file

Binary file not shown.

View file

@ -46,6 +46,17 @@ const MP4_PROPERTIES: Mp4Properties = Mp4Properties::new(
135,
124,
48000,
None,
2,
);
const ALAC_PROPERTIES: Mp4Properties = Mp4Properties::new(
Mp4Codec::ALAC,
Duration::from_millis(1428),
331,
124,
48000,
Some(16),
2,
);
@ -126,6 +137,14 @@ fn mp4_properties() {
)
}
#[test]
fn alac_properties() {
assert_eq!(
get_properties::<Mp4File>("tests/files/assets/b.m4a").bit_depth(),
ALAC_PROPERTIES.bit_depth()
)
}
#[test]
fn opus_properties() {
assert_eq!(