From fafda6243d451f3999904f0b80893ae3cc3b10f8 Mon Sep 17 00:00:00 2001 From: Serial <69764315+Serial-ATA@users.noreply.github.com> Date: Wed, 1 Sep 2021 17:23:50 -0400 Subject: [PATCH] Start work to parse mp4 atoms --- benches/read_file.rs | 4 +- src/logic/id3/v2/read.rs | 2 - src/logic/mp4/atom.rs | 59 +++++++++++ src/logic/mp4/ilst.rs | 210 +++++++++++++++++++++++++++++++++++++++ src/logic/mp4/mod.rs | 52 ++++++++++ src/logic/mp4/moov.rs | 87 ++++++++++++++++ src/logic/mp4/read.rs | 82 +++++++++++++++ src/probe.rs | 21 ++-- 8 files changed, 503 insertions(+), 14 deletions(-) create mode 100644 src/logic/mp4/atom.rs create mode 100644 src/logic/mp4/ilst.rs create mode 100644 src/logic/mp4/mod.rs create mode 100644 src/logic/mp4/moov.rs create mode 100644 src/logic/mp4/read.rs diff --git a/benches/read_file.rs b/benches/read_file.rs index 1e74bda6..5f03983c 100644 --- a/benches/read_file.rs +++ b/benches/read_file.rs @@ -12,7 +12,7 @@ macro_rules! test_read { test_read!(read_aiff, "tests/assets/a_text.aiff"); test_read!(read_ape, "tests/assets/a.ape"); test_read!(read_flac, "tests/assets/a.flac"); -// test_read!(read_m4a, "tests/assets/a.m4a"); TODO +test_read!(read_m4a, "tests/assets/a.m4a"); test_read!(read_mp3, "tests/assets/a.mp3"); test_read!(read_vorbis, "tests/assets/a.ogg"); test_read!(read_opus, "tests/assets/a.opus"); @@ -23,7 +23,7 @@ fn bench_sig(c: &mut Criterion) { g.bench_function("AIFF", |b| b.iter(read_aiff)); g.bench_function("APE", |b| b.iter(read_ape)); g.bench_function("FLAC", |b| b.iter(read_flac)); - // g.bench_function("MP4", |b| b.iter(read_m4a)); + g.bench_function("MP4", |b| b.iter(read_m4a)); g.bench_function("MP3", |b| b.iter(read_mp3)); g.bench_function("VORBIS", |b| b.iter(read_vorbis)); g.bench_function("OPUS", |b| b.iter(read_opus)); diff --git a/src/logic/id3/v2/read.rs b/src/logic/id3/v2/read.rs index 3d03e1c6..8eb7842d 100644 --- a/src/logic/id3/v2/read.rs +++ b/src/logic/id3/v2/read.rs @@ -1,5 +1,4 @@ use crate::error::Result; -#[cfg(feature = "id3v2_restrictions")] use crate::logic::id3::decode_u32; use crate::logic::id3::v2::frame::content::FrameContent; use crate::logic::id3::v2::frame::Frame; @@ -13,7 +12,6 @@ use crate::{LoftyError, TagType}; use std::io::Read; -#[cfg(feature = "id3v2_restrictions")] use byteorder::{BigEndian, ReadBytesExt}; pub(crate) fn parse_id3v2(bytes: &mut &[u8]) -> Result { diff --git a/src/logic/mp4/atom.rs b/src/logic/mp4/atom.rs new file mode 100644 index 00000000..8ca6b8ab --- /dev/null +++ b/src/logic/mp4/atom.rs @@ -0,0 +1,59 @@ +use crate::error::{LoftyError, Result}; + +use std::io::{Read, Seek, SeekFrom}; + +use byteorder::{BigEndian, ReadBytesExt}; + +pub(crate) struct Atom { + pub(crate) len: u64, + pub(crate) extended: bool, + pub(crate) ident: String, +} + +impl Atom { + pub(crate) fn read(data: &mut R) -> Result + where + R: Read + Seek, + { + let len = data.read_u32::()?; + + let mut ident = [0; 4]; + data.read_exact(&mut ident)?; + + let (len, extended) = match len { + // The atom extends to the end of the file + 0 => { + let pos = data.seek(SeekFrom::Current(0))?; + let end = data.seek(SeekFrom::End(0))?; + + data.seek(SeekFrom::Start(pos))?; + + (end - pos, false) + }, + // There's an extended length + 1 => (data.read_u64::()?, true), + _ if len < 8 => return Err(LoftyError::BadAtom("Found an invalid length (< 8)")), + _ => (u64::from(len), false), + }; + + let ident = if ident[0] == 0xA9 { + let end = simdutf8::basic::from_utf8(&ident[1..]) + .map_err(|_| LoftyError::BadAtom("Encountered a non UTF-8 atom identifier"))?; + + let mut ident = String::from('\u{a9}'); + ident.push_str(end); + + ident + } else { + simdutf8::basic::from_utf8(&ident) + .map_err(|_| LoftyError::BadAtom("Encountered a non UTF-8 atom identifier"))? + .to_string() + }; + + Ok(Self { + len, + extended, + ident, + }) + } +} diff --git a/src/logic/mp4/ilst.rs b/src/logic/mp4/ilst.rs new file mode 100644 index 00000000..a879bc42 --- /dev/null +++ b/src/logic/mp4/ilst.rs @@ -0,0 +1,210 @@ +use super::read::skip_unneeded; +use crate::error::{LoftyError, Result}; +use crate::logic::id3::v2::util::text_utils::utf16_decode; +use crate::logic::id3::v2::TextEncoding; +use crate::logic::mp4::atom::Atom; +use crate::types::item::ItemKey; +use crate::types::picture::{MimeType, Picture, PictureInformation, PictureType}; +use crate::types::tag::{ItemValue, Tag, TagItem, TagType}; + +use std::borrow::Cow; +use std::io::{Cursor, Read, Seek, SeekFrom}; + +use byteorder::{BigEndian, ReadBytesExt}; + +pub(crate) fn parse_ilst(data: &mut R, len: u64) -> Result> +where + R: Read + Seek, +{ + let mut contents = vec![0; len as usize]; + data.read_exact(&mut contents)?; + + let mut cursor = Cursor::new(contents); + + let mut tag = Tag::new(TagType::Mp4Atom); + + while let Ok(atom) = Atom::read(&mut cursor) { + // Safe to unwrap here since ItemKey::Unknown exists + let key = match &*atom.ident { + "free" | "skip" => { + skip_unneeded(&mut cursor, atom.extended, atom.len)?; + continue; + }, + "covr" => { + let (mime_type, picture) = match parse_data(&mut cursor)? { + (ItemValue::Binary(picture), 13) => (MimeType::Jpeg, picture), + (ItemValue::Binary(picture), 14) => (MimeType::Png, picture), + (ItemValue::Binary(picture), 27) => (MimeType::Bmp, picture), + // GIF is deprecated + (ItemValue::Binary(picture), 12) => (MimeType::Gif, picture), + // Type 0 is implicit + (ItemValue::Binary(picture), 0) => (MimeType::None, picture), + _ => return Err(LoftyError::BadAtom("\"covr\" atom has an unknown type")), + }; + + tag.push_picture(Picture { + pic_type: PictureType::Other, + text_encoding: TextEncoding::UTF8, + mime_type, + description: None, + information: PictureInformation { + width: 0, + height: 0, + color_depth: 0, + num_colors: 0, + }, + data: Cow::from(picture), + }); + + continue; + }, + "----" => ItemKey::from_key(&TagType::Mp4Atom, &*parse_freeform(&mut cursor)?), + other => ItemKey::from_key(&TagType::Mp4Atom, other), + } + .unwrap(); + + let data = parse_data(&mut cursor)?.0; + + match key { + ItemKey::TrackNumber | ItemKey::DiscNumber => { + if let ItemValue::Binary(pair) = data { + let pair = &mut &pair[2..6]; + + let number = u32::from(pair.read_u16::()?); + let total = u32::from(pair.read_u16::()?); + + if total == 0 { + match key { + ItemKey::TrackNumber => tag.insert_item_unchecked(TagItem::new( + ItemKey::TrackTotal, + ItemValue::UInt(total), + )), + ItemKey::DiscNumber => tag.insert_item_unchecked(TagItem::new( + ItemKey::DiscTotal, + ItemValue::UInt(total), + )), + _ => unreachable!(), + } + } + + if number == 0 { + tag.insert_item_unchecked(TagItem::new(key, ItemValue::UInt(number))) + } + } else { + return Err(LoftyError::BadAtom( + "Expected atom data to include integer pair", + )); + } + }, + _ => tag.insert_item_unchecked(TagItem::new(key, data)), + } + } + + Ok(Some(tag)) +} + +fn parse_data(data: &mut R) -> Result<(ItemValue, u32)> +where + R: Read + Seek, +{ + let atom = Atom::read(data)?; + + if atom.ident != "data" { + return Err(LoftyError::BadAtom("Expected atom \"data\" to follow name")); + } + + // We don't care about the version + let _version = data.read_u8()?; + + let mut flags = [0; 3]; + data.read_exact(&mut flags)?; + + let flags = u32::from_be_bytes([0, flags[0], flags[1], flags[2]]); + + // We don't care about the locale + data.seek(SeekFrom::Current(4))?; + + let mut content = vec![0; (atom.len - 16) as usize]; + data.read_exact(&mut content)?; + + // https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/Metadata/Metadata.html#//apple_ref/doc/uid/TP40000939-CH1-SW35 + let value = match flags { + 1 => ItemValue::Text(String::from_utf8(content)?), + 2 => ItemValue::Text(utf16_decode(&*content, u16::from_be_bytes)?), + 15 => ItemValue::Locator(String::from_utf8(content)?), + 22 | 76 | 77 | 78 => parse_uint(&*content)?, + 21 | 66 | 67 | 74 => parse_int(&*content)?, + _ => ItemValue::Binary(content), + }; + + Ok((value, flags)) +} + +fn parse_uint(bytes: &[u8]) -> Result { + Ok(match bytes.len() { + 1 => ItemValue::UInt(u32::from(bytes[0])), + 2 => ItemValue::UInt(u32::from(u16::from_be_bytes([bytes[0], bytes[1]]))), + 3 => ItemValue::UInt(u32::from_be_bytes([0, bytes[0], bytes[1], bytes[2]])), + 4 => ItemValue::UInt(u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])), + 8 => ItemValue::UInt64(u64::from_be_bytes([ + bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], + ])), + _ => { + return Err(LoftyError::BadAtom( + "Unexpected atom size for type \"BE unsigned integer\"", + )) + }, + }) +} + +fn parse_int(bytes: &[u8]) -> Result { + Ok(match bytes.len() { + 1 => ItemValue::Int(i32::from(bytes[0])), + 2 => ItemValue::Int(i32::from(i16::from_be_bytes([bytes[0], bytes[1]]))), + 3 => ItemValue::Int(i32::from_be_bytes([0, bytes[0], bytes[1], bytes[2]]) as i32), + 4 => ItemValue::Int(i32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as i32), + 8 => ItemValue::Int64(i64::from_be_bytes([ + bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], + ])), + _ => { + return Err(LoftyError::BadAtom( + "Unexpected atom size for type \"BE signed integer\"", + )) + }, + }) +} + +fn parse_freeform(data: &mut R) -> Result +where + R: Read + Seek, +{ + let mut freeform = String::new(); + freeform.push_str("----:"); + + freeform_chunk(data, "mean", &mut freeform)?; + freeform_chunk(data, "name", &mut freeform)?; + + Ok(freeform) +} + +fn freeform_chunk(data: &mut R, name: &str, freeform: &mut String) -> Result<()> +where + R: Read + Seek, +{ + let atom = Atom::read(data)?; + + if atom.ident != name { + return Err(LoftyError::BadAtom( + "Found freeform identifier \"----\" with no trailing \"mean\" or \"name\" atoms", + )); + } + + let mut content = vec![0; atom.len as usize]; + data.read_exact(&mut content)?; + + freeform.push_str(std::str::from_utf8(&*content).map_err(|_| { + LoftyError::BadAtom("Found a non UTF-8 string while reading freeform identifier") + })?); + + Ok(()) +} diff --git a/src/logic/mp4/mod.rs b/src/logic/mp4/mod.rs new file mode 100644 index 00000000..00454950 --- /dev/null +++ b/src/logic/mp4/mod.rs @@ -0,0 +1,52 @@ +mod atom; +mod ilst; +mod moov; +pub(crate) mod read; +mod trak; + +use crate::types::file::AudioFile; +use crate::{FileProperties, Result, Tag, TagType}; + +use std::io::{Read, Seek}; + +#[allow(dead_code)] +/// An MP4 file +pub struct Mp4File { + /// The file format from ftyp's "major brand" (Ex. "M4A ") + pub(crate) ftyp: String, + /// The [`Tag`] parsed from the ilst atom, not guaranteed + pub(crate) ilst: Option, + /// The file's audio properties + pub(crate) properties: FileProperties, +} + +impl AudioFile for Mp4File { + fn read_from(reader: &mut R) -> Result + where + R: Read + Seek, + { + self::read::read_from(reader) + } + + fn properties(&self) -> &FileProperties { + &self.properties + } + + fn contains_tag(&self) -> bool { + self.ilst.is_some() + } + + fn contains_tag_type(&self, tag_type: &TagType) -> bool { + match tag_type { + TagType::Mp4Atom => self.ilst.is_some(), + _ => false, + } + } +} + +impl Mp4File { + /// Returns a reference to the "ilst" tag if it exists + pub fn ilst(&self) -> Option<&Tag> { + self.ilst.as_ref() + } +} diff --git a/src/logic/mp4/moov.rs b/src/logic/mp4/moov.rs new file mode 100644 index 00000000..db9144ca --- /dev/null +++ b/src/logic/mp4/moov.rs @@ -0,0 +1,87 @@ +use super::atom::Atom; +use super::ilst::parse_ilst; +use super::read::skip_unneeded; +use super::trak::Trak; +use crate::error::Result; +use crate::types::tag::Tag; + +use byteorder::{BigEndian, ReadBytesExt}; +use std::io::{Read, Seek, SeekFrom}; + +pub(crate) struct Moov { + pub(crate) traks: Vec, + // Represents a parsed moov.udta.meta.ilst since we don't need anything else + pub(crate) meta: Option, +} + +impl Moov { + pub(crate) fn parse(data: &mut R) -> Result + where + R: Read + Seek, + { + let mut traks = Vec::new(); + let mut meta = None; + + while let Ok(atom) = Atom::read(data) { + match &*atom.ident { + //"trak" => traks.push(Trak::parse(data, &atom)?), + "udta" => { + meta = meta_from_udta(data, atom.len - 8)?; + }, + _ => skip_unneeded(data, atom.extended, atom.len)?, + } + } + + Ok(Self { traks, meta }) + } +} + +fn meta_from_udta(data: &mut R, len: u64) -> Result> +where + R: Read + Seek, +{ + let mut read = 8; + let mut meta = (false, 0_u64); + + while read < len { + let atom = Atom::read(data)?; + + if &*atom.ident == "meta" { + meta = (true, atom.len); + break; + } + + read += atom.len; + skip_unneeded(data, atom.extended, atom.len)?; + } + + if !meta.0 { + return Ok(None); + } + + // The meta atom has 4 bytes we don't care about + // Version (1) + // Flags (3) + let _version_flags = data.read_u32::()?; + + read = 8; + let mut islt = (false, 0_u64); + + while read < meta.1 { + let atom = Atom::read(data)?; + + if &*atom.ident == "ilst" { + islt = (true, atom.len); + break; + } + + read += atom.len; + skip_unneeded(data, atom.extended, atom.len)?; + } + + if !islt.0 { + return Ok(None); + } + + parse_ilst(data, islt.1 - 8) +} diff --git a/src/logic/mp4/read.rs b/src/logic/mp4/read.rs new file mode 100644 index 00000000..620408fe --- /dev/null +++ b/src/logic/mp4/read.rs @@ -0,0 +1,82 @@ +use super::atom::Atom; +use super::moov::Moov; +use super::trak::Trak; +use super::Mp4File; +use crate::types::properties::FileProperties; +use crate::error::{LoftyError, Result}; + +use std::io::{Read, Seek, SeekFrom}; + +fn verify_mp4(data: &mut R) -> Result +where + R: Read + Seek, +{ + let atom = Atom::read(data)?; + + if atom.ident != "ftyp" { + return Err(LoftyError::UnknownFormat); + } + + let mut major_brand = vec![0; 4]; + data.read_exact(&mut major_brand)?; + + data.seek(SeekFrom::Current((atom.len - 12) as i64))?; + + String::from_utf8(major_brand) + .map_err(|_| LoftyError::BadAtom("Unable to parse \"ftyp\"'s major brand")) +} + +fn read_properties(data: &mut R, traks: &[Trak]) -> Result + where + R: Read + Seek, +{} + +#[allow(clippy::similar_names)] +pub(crate) fn read_from(data: &mut R) -> Result +where + R: Read + Seek, +{ + let ftyp = verify_mp4(data)?; + + let mut moov = false; + + while let Ok(atom) = Atom::read(data) { + if &*atom.ident == "moov" { + moov = true; + break; + } + + skip_unneeded(data, atom.extended, atom.len)?; + } + + if !moov { + return Err(LoftyError::Mp4("No \"moov\" atom found")); + } + + let moov = Moov::parse(data)?; + + Ok(Mp4File { + ftyp, + ilst: moov.meta, + properties: Default::default(), + }) +} + +pub(crate) fn skip_unneeded(data: &mut R, ext: bool, len: u64) -> Result<()> +where + R: Read + Seek, +{ + if ext { + let pos = data.seek(SeekFrom::Current(0))?; + + if let (pos, false) = pos.overflowing_add(len - 8) { + data.seek(SeekFrom::Start(pos))?; + } else { + return Err(LoftyError::TooMuchData); + } + } else { + data.seek(SeekFrom::Current(i64::from(len as u32) - 8))?; + } + + Ok(()) +} diff --git a/src/probe.rs b/src/probe.rs index a9205b77..89eb76f4 100644 --- a/src/probe.rs +++ b/src/probe.rs @@ -8,6 +8,7 @@ use crate::logic::ogg::vorbis::VorbisFile; use crate::types::file::AudioFile; use crate::{FileType, LoftyError, Result, TaggedFile}; +use crate::logic::mp4::Mp4File; use std::io::{Cursor, Read, Seek}; use std::path::Path; @@ -85,14 +86,14 @@ fn _read_from(reader: &mut R, file_type: FileType) -> Result where R: Read + Seek, { - match file_type { - FileType::AIFF => Ok(AiffFile::read_from(reader)?.into()), - FileType::APE => Ok(ApeFile::read_from(reader)?.into()), - FileType::FLAC => Ok(FlacFile::read_from(reader)?.into()), - FileType::MP3 => Ok(MpegFile::read_from(reader)?.into()), - FileType::Opus => Ok(OpusFile::read_from(reader)?.into()), - FileType::Vorbis => Ok(VorbisFile::read_from(reader)?.into()), - FileType::WAV => Ok(WavFile::read_from(reader)?.into()), - _ => Err(LoftyError::UnknownFormat), // FileType::MP4 => {}, TODO, - } + Ok(match file_type { + FileType::AIFF => AiffFile::read_from(reader)?.into(), + FileType::APE => ApeFile::read_from(reader)?.into(), + FileType::FLAC => FlacFile::read_from(reader)?.into(), + FileType::MP3 => MpegFile::read_from(reader)?.into(), + FileType::Opus => OpusFile::read_from(reader)?.into(), + FileType::Vorbis => VorbisFile::read_from(reader)?.into(), + FileType::WAV => WavFile::read_from(reader)?.into(), + FileType::MP4 => Mp4File::read_from(reader)?.into(), + }) }