Start work to parse mp4 atoms

This commit is contained in:
Serial 2021-09-01 17:23:50 -04:00
parent fa97d27eb1
commit fafda6243d
8 changed files with 503 additions and 14 deletions

View file

@ -12,7 +12,7 @@ macro_rules! test_read {
test_read!(read_aiff, "tests/assets/a_text.aiff");
test_read!(read_ape, "tests/assets/a.ape");
test_read!(read_flac, "tests/assets/a.flac");
// test_read!(read_m4a, "tests/assets/a.m4a"); TODO
test_read!(read_m4a, "tests/assets/a.m4a");
test_read!(read_mp3, "tests/assets/a.mp3");
test_read!(read_vorbis, "tests/assets/a.ogg");
test_read!(read_opus, "tests/assets/a.opus");
@ -23,7 +23,7 @@ fn bench_sig(c: &mut Criterion) {
g.bench_function("AIFF", |b| b.iter(read_aiff));
g.bench_function("APE", |b| b.iter(read_ape));
g.bench_function("FLAC", |b| b.iter(read_flac));
// g.bench_function("MP4", |b| b.iter(read_m4a));
g.bench_function("MP4", |b| b.iter(read_m4a));
g.bench_function("MP3", |b| b.iter(read_mp3));
g.bench_function("VORBIS", |b| b.iter(read_vorbis));
g.bench_function("OPUS", |b| b.iter(read_opus));

View file

@ -1,5 +1,4 @@
use crate::error::Result;
#[cfg(feature = "id3v2_restrictions")]
use crate::logic::id3::decode_u32;
use crate::logic::id3::v2::frame::content::FrameContent;
use crate::logic::id3::v2::frame::Frame;
@ -13,7 +12,6 @@ use crate::{LoftyError, TagType};
use std::io::Read;
#[cfg(feature = "id3v2_restrictions")]
use byteorder::{BigEndian, ReadBytesExt};
pub(crate) fn parse_id3v2(bytes: &mut &[u8]) -> Result<Tag> {

59
src/logic/mp4/atom.rs Normal file
View file

@ -0,0 +1,59 @@
use crate::error::{LoftyError, Result};
use std::io::{Read, Seek, SeekFrom};
use byteorder::{BigEndian, ReadBytesExt};
pub(crate) struct Atom {
pub(crate) len: u64,
pub(crate) extended: bool,
pub(crate) ident: String,
}
impl Atom {
pub(crate) fn read<R>(data: &mut R) -> Result<Self>
where
R: Read + Seek,
{
let len = data.read_u32::<BigEndian>()?;
let mut ident = [0; 4];
data.read_exact(&mut ident)?;
let (len, extended) = match len {
// The atom extends to the end of the file
0 => {
let pos = data.seek(SeekFrom::Current(0))?;
let end = data.seek(SeekFrom::End(0))?;
data.seek(SeekFrom::Start(pos))?;
(end - pos, false)
},
// There's an extended length
1 => (data.read_u64::<BigEndian>()?, true),
_ if len < 8 => return Err(LoftyError::BadAtom("Found an invalid length (< 8)")),
_ => (u64::from(len), false),
};
let ident = if ident[0] == 0xA9 {
let end = simdutf8::basic::from_utf8(&ident[1..])
.map_err(|_| LoftyError::BadAtom("Encountered a non UTF-8 atom identifier"))?;
let mut ident = String::from('\u{a9}');
ident.push_str(end);
ident
} else {
simdutf8::basic::from_utf8(&ident)
.map_err(|_| LoftyError::BadAtom("Encountered a non UTF-8 atom identifier"))?
.to_string()
};
Ok(Self {
len,
extended,
ident,
})
}
}

210
src/logic/mp4/ilst.rs Normal file
View file

@ -0,0 +1,210 @@
use super::read::skip_unneeded;
use crate::error::{LoftyError, Result};
use crate::logic::id3::v2::util::text_utils::utf16_decode;
use crate::logic::id3::v2::TextEncoding;
use crate::logic::mp4::atom::Atom;
use crate::types::item::ItemKey;
use crate::types::picture::{MimeType, Picture, PictureInformation, PictureType};
use crate::types::tag::{ItemValue, Tag, TagItem, TagType};
use std::borrow::Cow;
use std::io::{Cursor, Read, Seek, SeekFrom};
use byteorder::{BigEndian, ReadBytesExt};
pub(crate) fn parse_ilst<R>(data: &mut R, len: u64) -> Result<Option<Tag>>
where
R: Read + Seek,
{
let mut contents = vec![0; len as usize];
data.read_exact(&mut contents)?;
let mut cursor = Cursor::new(contents);
let mut tag = Tag::new(TagType::Mp4Atom);
while let Ok(atom) = Atom::read(&mut cursor) {
// Safe to unwrap here since ItemKey::Unknown exists
let key = match &*atom.ident {
"free" | "skip" => {
skip_unneeded(&mut cursor, atom.extended, atom.len)?;
continue;
},
"covr" => {
let (mime_type, picture) = match parse_data(&mut cursor)? {
(ItemValue::Binary(picture), 13) => (MimeType::Jpeg, picture),
(ItemValue::Binary(picture), 14) => (MimeType::Png, picture),
(ItemValue::Binary(picture), 27) => (MimeType::Bmp, picture),
// GIF is deprecated
(ItemValue::Binary(picture), 12) => (MimeType::Gif, picture),
// Type 0 is implicit
(ItemValue::Binary(picture), 0) => (MimeType::None, picture),
_ => return Err(LoftyError::BadAtom("\"covr\" atom has an unknown type")),
};
tag.push_picture(Picture {
pic_type: PictureType::Other,
text_encoding: TextEncoding::UTF8,
mime_type,
description: None,
information: PictureInformation {
width: 0,
height: 0,
color_depth: 0,
num_colors: 0,
},
data: Cow::from(picture),
});
continue;
},
"----" => ItemKey::from_key(&TagType::Mp4Atom, &*parse_freeform(&mut cursor)?),
other => ItemKey::from_key(&TagType::Mp4Atom, other),
}
.unwrap();
let data = parse_data(&mut cursor)?.0;
match key {
ItemKey::TrackNumber | ItemKey::DiscNumber => {
if let ItemValue::Binary(pair) = data {
let pair = &mut &pair[2..6];
let number = u32::from(pair.read_u16::<BigEndian>()?);
let total = u32::from(pair.read_u16::<BigEndian>()?);
if total == 0 {
match key {
ItemKey::TrackNumber => tag.insert_item_unchecked(TagItem::new(
ItemKey::TrackTotal,
ItemValue::UInt(total),
)),
ItemKey::DiscNumber => tag.insert_item_unchecked(TagItem::new(
ItemKey::DiscTotal,
ItemValue::UInt(total),
)),
_ => unreachable!(),
}
}
if number == 0 {
tag.insert_item_unchecked(TagItem::new(key, ItemValue::UInt(number)))
}
} else {
return Err(LoftyError::BadAtom(
"Expected atom data to include integer pair",
));
}
},
_ => tag.insert_item_unchecked(TagItem::new(key, data)),
}
}
Ok(Some(tag))
}
fn parse_data<R>(data: &mut R) -> Result<(ItemValue, u32)>
where
R: Read + Seek,
{
let atom = Atom::read(data)?;
if atom.ident != "data" {
return Err(LoftyError::BadAtom("Expected atom \"data\" to follow name"));
}
// We don't care about the version
let _version = data.read_u8()?;
let mut flags = [0; 3];
data.read_exact(&mut flags)?;
let flags = u32::from_be_bytes([0, flags[0], flags[1], flags[2]]);
// We don't care about the locale
data.seek(SeekFrom::Current(4))?;
let mut content = vec![0; (atom.len - 16) as usize];
data.read_exact(&mut content)?;
// https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/Metadata/Metadata.html#//apple_ref/doc/uid/TP40000939-CH1-SW35
let value = match flags {
1 => ItemValue::Text(String::from_utf8(content)?),
2 => ItemValue::Text(utf16_decode(&*content, u16::from_be_bytes)?),
15 => ItemValue::Locator(String::from_utf8(content)?),
22 | 76 | 77 | 78 => parse_uint(&*content)?,
21 | 66 | 67 | 74 => parse_int(&*content)?,
_ => ItemValue::Binary(content),
};
Ok((value, flags))
}
fn parse_uint(bytes: &[u8]) -> Result<ItemValue> {
Ok(match bytes.len() {
1 => ItemValue::UInt(u32::from(bytes[0])),
2 => ItemValue::UInt(u32::from(u16::from_be_bytes([bytes[0], bytes[1]]))),
3 => ItemValue::UInt(u32::from_be_bytes([0, bytes[0], bytes[1], bytes[2]])),
4 => ItemValue::UInt(u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])),
8 => ItemValue::UInt64(u64::from_be_bytes([
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
])),
_ => {
return Err(LoftyError::BadAtom(
"Unexpected atom size for type \"BE unsigned integer\"",
))
},
})
}
fn parse_int(bytes: &[u8]) -> Result<ItemValue> {
Ok(match bytes.len() {
1 => ItemValue::Int(i32::from(bytes[0])),
2 => ItemValue::Int(i32::from(i16::from_be_bytes([bytes[0], bytes[1]]))),
3 => ItemValue::Int(i32::from_be_bytes([0, bytes[0], bytes[1], bytes[2]]) as i32),
4 => ItemValue::Int(i32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as i32),
8 => ItemValue::Int64(i64::from_be_bytes([
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
])),
_ => {
return Err(LoftyError::BadAtom(
"Unexpected atom size for type \"BE signed integer\"",
))
},
})
}
fn parse_freeform<R>(data: &mut R) -> Result<String>
where
R: Read + Seek,
{
let mut freeform = String::new();
freeform.push_str("----:");
freeform_chunk(data, "mean", &mut freeform)?;
freeform_chunk(data, "name", &mut freeform)?;
Ok(freeform)
}
fn freeform_chunk<R>(data: &mut R, name: &str, freeform: &mut String) -> Result<()>
where
R: Read + Seek,
{
let atom = Atom::read(data)?;
if atom.ident != name {
return Err(LoftyError::BadAtom(
"Found freeform identifier \"----\" with no trailing \"mean\" or \"name\" atoms",
));
}
let mut content = vec![0; atom.len as usize];
data.read_exact(&mut content)?;
freeform.push_str(std::str::from_utf8(&*content).map_err(|_| {
LoftyError::BadAtom("Found a non UTF-8 string while reading freeform identifier")
})?);
Ok(())
}

52
src/logic/mp4/mod.rs Normal file
View file

@ -0,0 +1,52 @@
mod atom;
mod ilst;
mod moov;
pub(crate) mod read;
mod trak;
use crate::types::file::AudioFile;
use crate::{FileProperties, Result, Tag, TagType};
use std::io::{Read, Seek};
#[allow(dead_code)]
/// An MP4 file
pub struct Mp4File {
/// The file format from ftyp's "major brand" (Ex. "M4A ")
pub(crate) ftyp: String,
/// The [`Tag`] parsed from the ilst atom, not guaranteed
pub(crate) ilst: Option<Tag>,
/// The file's audio properties
pub(crate) properties: FileProperties,
}
impl AudioFile for Mp4File {
fn read_from<R>(reader: &mut R) -> Result<Self>
where
R: Read + Seek,
{
self::read::read_from(reader)
}
fn properties(&self) -> &FileProperties {
&self.properties
}
fn contains_tag(&self) -> bool {
self.ilst.is_some()
}
fn contains_tag_type(&self, tag_type: &TagType) -> bool {
match tag_type {
TagType::Mp4Atom => self.ilst.is_some(),
_ => false,
}
}
}
impl Mp4File {
/// Returns a reference to the "ilst" tag if it exists
pub fn ilst(&self) -> Option<&Tag> {
self.ilst.as_ref()
}
}

87
src/logic/mp4/moov.rs Normal file
View file

@ -0,0 +1,87 @@
use super::atom::Atom;
use super::ilst::parse_ilst;
use super::read::skip_unneeded;
use super::trak::Trak;
use crate::error::Result;
use crate::types::tag::Tag;
use byteorder::{BigEndian, ReadBytesExt};
use std::io::{Read, Seek, SeekFrom};
pub(crate) struct Moov {
pub(crate) traks: Vec<Trak>,
// Represents a parsed moov.udta.meta.ilst since we don't need anything else
pub(crate) meta: Option<Tag>,
}
impl Moov {
pub(crate) fn parse<R>(data: &mut R) -> Result<Self>
where
R: Read + Seek,
{
let mut traks = Vec::new();
let mut meta = None;
while let Ok(atom) = Atom::read(data) {
match &*atom.ident {
//"trak" => traks.push(Trak::parse(data, &atom)?),
"udta" => {
meta = meta_from_udta(data, atom.len - 8)?;
},
_ => skip_unneeded(data, atom.extended, atom.len)?,
}
}
Ok(Self { traks, meta })
}
}
fn meta_from_udta<R>(data: &mut R, len: u64) -> Result<Option<Tag>>
where
R: Read + Seek,
{
let mut read = 8;
let mut meta = (false, 0_u64);
while read < len {
let atom = Atom::read(data)?;
if &*atom.ident == "meta" {
meta = (true, atom.len);
break;
}
read += atom.len;
skip_unneeded(data, atom.extended, atom.len)?;
}
if !meta.0 {
return Ok(None);
}
// The meta atom has 4 bytes we don't care about
// Version (1)
// Flags (3)
let _version_flags = data.read_u32::<BigEndian>()?;
read = 8;
let mut islt = (false, 0_u64);
while read < meta.1 {
let atom = Atom::read(data)?;
if &*atom.ident == "ilst" {
islt = (true, atom.len);
break;
}
read += atom.len;
skip_unneeded(data, atom.extended, atom.len)?;
}
if !islt.0 {
return Ok(None);
}
parse_ilst(data, islt.1 - 8)
}

82
src/logic/mp4/read.rs Normal file
View file

@ -0,0 +1,82 @@
use super::atom::Atom;
use super::moov::Moov;
use super::trak::Trak;
use super::Mp4File;
use crate::types::properties::FileProperties;
use crate::error::{LoftyError, Result};
use std::io::{Read, Seek, SeekFrom};
fn verify_mp4<R>(data: &mut R) -> Result<String>
where
R: Read + Seek,
{
let atom = Atom::read(data)?;
if atom.ident != "ftyp" {
return Err(LoftyError::UnknownFormat);
}
let mut major_brand = vec![0; 4];
data.read_exact(&mut major_brand)?;
data.seek(SeekFrom::Current((atom.len - 12) as i64))?;
String::from_utf8(major_brand)
.map_err(|_| LoftyError::BadAtom("Unable to parse \"ftyp\"'s major brand"))
}
fn read_properties<R>(data: &mut R, traks: &[Trak]) -> Result<FileProperties>
where
R: Read + Seek,
{}
#[allow(clippy::similar_names)]
pub(crate) fn read_from<R>(data: &mut R) -> Result<Mp4File>
where
R: Read + Seek,
{
let ftyp = verify_mp4(data)?;
let mut moov = false;
while let Ok(atom) = Atom::read(data) {
if &*atom.ident == "moov" {
moov = true;
break;
}
skip_unneeded(data, atom.extended, atom.len)?;
}
if !moov {
return Err(LoftyError::Mp4("No \"moov\" atom found"));
}
let moov = Moov::parse(data)?;
Ok(Mp4File {
ftyp,
ilst: moov.meta,
properties: Default::default(),
})
}
pub(crate) fn skip_unneeded<R>(data: &mut R, ext: bool, len: u64) -> Result<()>
where
R: Read + Seek,
{
if ext {
let pos = data.seek(SeekFrom::Current(0))?;
if let (pos, false) = pos.overflowing_add(len - 8) {
data.seek(SeekFrom::Start(pos))?;
} else {
return Err(LoftyError::TooMuchData);
}
} else {
data.seek(SeekFrom::Current(i64::from(len as u32) - 8))?;
}
Ok(())
}

View file

@ -8,6 +8,7 @@ use crate::logic::ogg::vorbis::VorbisFile;
use crate::types::file::AudioFile;
use crate::{FileType, LoftyError, Result, TaggedFile};
use crate::logic::mp4::Mp4File;
use std::io::{Cursor, Read, Seek};
use std::path::Path;
@ -85,14 +86,14 @@ fn _read_from<R>(reader: &mut R, file_type: FileType) -> Result<TaggedFile>
where
R: Read + Seek,
{
match file_type {
FileType::AIFF => Ok(AiffFile::read_from(reader)?.into()),
FileType::APE => Ok(ApeFile::read_from(reader)?.into()),
FileType::FLAC => Ok(FlacFile::read_from(reader)?.into()),
FileType::MP3 => Ok(MpegFile::read_from(reader)?.into()),
FileType::Opus => Ok(OpusFile::read_from(reader)?.into()),
FileType::Vorbis => Ok(VorbisFile::read_from(reader)?.into()),
FileType::WAV => Ok(WavFile::read_from(reader)?.into()),
_ => Err(LoftyError::UnknownFormat), // FileType::MP4 => {}, TODO,
}
Ok(match file_type {
FileType::AIFF => AiffFile::read_from(reader)?.into(),
FileType::APE => ApeFile::read_from(reader)?.into(),
FileType::FLAC => FlacFile::read_from(reader)?.into(),
FileType::MP3 => MpegFile::read_from(reader)?.into(),
FileType::Opus => OpusFile::read_from(reader)?.into(),
FileType::Vorbis => VorbisFile::read_from(reader)?.into(),
FileType::WAV => WavFile::read_from(reader)?.into(),
FileType::MP4 => Mp4File::read_from(reader)?.into(),
})
}