EBML: Implement reader/Parse the header for properties

This commit is contained in:
Serial 2023-09-05 13:59:55 -04:00
parent eb435e8a92
commit d94ec8c770
No known key found for this signature in database
GPG key ID: DA95198DC17C4568
5 changed files with 376 additions and 5 deletions

237
src/ebml/element_reader.rs Normal file
View file

@ -0,0 +1,237 @@
use crate::ebml::vint::VInt;
use crate::error::Result;
use crate::macros::decode_err;
use std::io::Read;
use byteorder::{BigEndian, ReadBytesExt};
pub struct ElementHeader {
pub(crate) id: VInt,
pub(crate) size: VInt,
}
impl ElementHeader {
fn read<R>(reader: &mut R, max_vint_length: u8) -> Result<Self>
where
R: Read,
{
Ok(Self {
id: VInt::parse(reader, max_vint_length)?,
size: VInt::parse(reader, max_vint_length)?,
})
}
}
#[derive(Copy, Clone)]
pub enum ElementDataType {
SignedInt,
UnsignedInt,
Float,
String,
Utf8,
Date,
Master,
Binary,
}
#[derive(Copy, Clone)]
struct MasterElement {
id: ElementIdent,
children: &'static [(VInt, ChildElementDescriptor)],
}
#[derive(Copy, Clone)]
pub(crate) struct ChildElementDescriptor {
pub(crate) ident: ElementIdent,
pub(crate) data_type: ElementDataType,
}
macro_rules! define_master_elements {
($(
$_readable_ident:ident : {
id: $vint_id:literal,
children: [
$($_readable_child_ident:ident : { $child_id:literal, $data_ty:ident }),* $(,)?
] $(,)?
}
),+ $(,)?) => {
#[derive(Copy, Clone, Eq, PartialEq)]
pub(crate) enum ElementIdent {
$(
$_readable_ident,
$($_readable_child_ident,)*
)+
}
static MASTER_ELEMENTS: once_cell::sync::Lazy<std::collections::HashMap<VInt, MasterElement>> = once_cell::sync::Lazy::new(|| {
let mut m = std::collections::HashMap::new();
$(
m.insert(
VInt($vint_id),
MasterElement {
id: ElementIdent::$_readable_ident,
children: &[$((VInt($child_id), ChildElementDescriptor {
ident: ElementIdent::$_readable_child_ident,
data_type: ElementDataType::$data_ty,
})),*][..]
}
);
)+
m
});
}
}
define_master_elements! {
EBML: {
id: 0x1A45DFA3,
children: [
EBMLVersion: { 0x4286, UnsignedInt },
EBMLReadVersion: { 0x42F7, UnsignedInt },
EBMLMaxIDLength: { 0x42F2, UnsignedInt },
EBMLMaxSizeLength: { 0x42F3, UnsignedInt },
DocType: { 0x4282, String },
DocTypeVersion: { 0x4287, UnsignedInt },
DocTypeReadVersion: { 0x4285, UnsignedInt },
],
},
DocTypeExtension: {
id: 0x4281,
children: [
DocTypeExtensionName: { 0x4283, String },
DocTypeExtensionVersion: { 0x4284, UnsignedInt },
],
},
}
struct ElementReaderContext {
/// Current master element
current_master: Option<MasterElement>,
/// Remaining length of the master element
master_length: u64,
/// Maximum size in octets of all element IDs
max_id_length: u8,
/// Maximum size in octets of all element data sizes
max_size_length: u8,
}
impl Default for ElementReaderContext {
fn default() -> Self {
Self {
current_master: None,
master_length: 0,
// https://www.rfc-editor.org/rfc/rfc8794.html#name-ebmlmaxidlength-element
max_id_length: 4,
// https://www.rfc-editor.org/rfc/rfc8794.html#name-ebmlmaxsizelength-element
max_size_length: 8,
}
}
}
pub(crate) enum ElementReaderYield {
Master((ElementIdent, u64)),
Child((ChildElementDescriptor, u64)),
Unknown(ElementHeader),
Eof,
}
pub struct ElementReader<R> {
reader: R,
ctx: ElementReaderContext,
}
impl<R> ElementReader<R>
where
R: Read,
{
pub(crate) fn new(reader: R) -> Self {
Self {
reader,
ctx: ElementReaderContext::default(),
}
}
pub(crate) fn set_max_id_length(&mut self, len: u8) {
self.ctx.max_id_length = len
}
pub(crate) fn set_max_size_length(&mut self, len: u8) {
self.ctx.max_size_length = len
}
fn next_master(&mut self) -> Result<ElementReaderYield> {
let header = ElementHeader::read(&mut self.reader, self.ctx.max_size_length)?;
let Some(master) = MASTER_ELEMENTS.get(&header.id) else {
// We encountered an unknown master element
return Ok(ElementReaderYield::Unknown(header));
};
self.ctx.current_master = Some(*master);
self.ctx.master_length = header.size.value();
Ok(ElementReaderYield::Master((
master.id,
self.ctx.master_length,
)))
}
pub(crate) fn next(&mut self) -> Result<ElementReaderYield> {
let Some(current_master) = self.ctx.current_master else {
return self.next_master();
};
if self.ctx.master_length == 0 {
return self.next_master();
}
let header = ElementHeader::read(&mut self.reader, self.ctx.max_size_length)?;
let Some((_, child)) = current_master
.children
.iter()
.find(|(id, _)| *id == header.id)
else {
return Ok(ElementReaderYield::Unknown(header));
};
Ok(ElementReaderYield::Child((*child, header.size.value())))
}
pub(crate) fn skip(&mut self, length: u64) -> Result<()> {
std::io::copy(&mut self.reader.by_ref().take(length), &mut std::io::sink())?;
Ok(())
}
pub(crate) fn read_signed_int(&mut self) -> Result<i64> {
todo!()
}
pub(crate) fn read_unsigned_int(&mut self) -> Result<u64> {
todo!()
}
pub(crate) fn read_float(&mut self, element_length: u64) -> Result<f64> {
Ok(match element_length {
0 => 0.0,
4 => self.reader.read_f32::<BigEndian>()? as f64,
8 => self.reader.read_f64::<BigEndian>()?,
_ => decode_err!(@BAIL Ebml, "Invalid size for float element"),
})
}
pub(crate) fn read_string(&mut self) -> Result<String> {
todo!()
}
pub(crate) fn read_utf8(&mut self) -> Result<String> {
todo!()
}
pub(crate) fn read_date(&mut self) -> Result<String> {
todo!()
}
pub(crate) fn read_binary(&mut self) -> Result<Vec<u8>> {
todo!()
}
}

View file

@ -1,4 +1,5 @@
//! EBML specific items
mod element_reader;
mod properties;
mod read;
mod tag;

View file

@ -1,8 +1,28 @@
use crate::properties::FileProperties;
#[derive(Debug, Clone, PartialEq, Default)]
pub struct EbmlHeaderProperties {
pub(crate) version: u64,
pub(crate) read_version: u64,
pub(crate) max_id_length: u8,
pub(crate) max_size_length: u8,
pub(crate) doc_type: String,
pub(crate) doc_type_version: u64,
pub(crate) doc_type_read_version: u64,
}
#[derive(Debug, Clone, PartialEq, Default)]
pub struct EbmlExtension {
pub(crate) name: String,
pub(crate) version: u64,
}
/// EBML audio properties
#[derive(Debug, Clone, PartialEq, Default)]
pub struct EbmlProperties {}
pub struct EbmlProperties {
pub(crate) header: EbmlHeaderProperties,
pub(crate) extensions: Vec<EbmlExtension>,
}
impl From<EbmlProperties> for FileProperties {
fn from(_input: EbmlProperties) -> Self {

View file

@ -1,12 +1,125 @@
use super::EbmlFile;
use crate::ebml::element_reader::{ElementIdent, ElementReader, ElementReaderYield};
use crate::ebml::EbmlProperties;
use crate::error::Result;
use crate::macros::decode_err;
use crate::probe::ParseOptions;
use std::io::{Read, Seek};
pub(super) fn read_from<R>(_reader: &mut R, _parse_options: ParseOptions) -> Result<EbmlFile>
pub(super) fn read_from<R>(reader: &mut R, parse_options: ParseOptions) -> Result<EbmlFile>
where
R: Read + Seek,
{
todo!()
// Default initialize the properties up here since we end up discovering
// new ones all scattered throughout the file
let mut properties = EbmlProperties::default();
let mut element_reader = ElementReader::new(reader);
// First we need to go through the elements in the EBML master element
read_ebml_header(&mut element_reader, parse_options, &mut properties)?;
loop {
let ident;
let data_ty;
let size;
let res = element_reader.next()?;
match res {
ElementReaderYield::Master(_) => continue,
ElementReaderYield::Child((child, size_)) => {
ident = child.ident;
data_ty = child.data_type;
size = size_;
},
ElementReaderYield::Unknown(element) => {
log::debug!("Encountered unknown EBML element: {}", element.id.0);
element_reader.skip(element.size.value())?;
continue;
},
ElementReaderYield::Eof => break,
}
}
Ok(EbmlFile {
ebml_tag: None,
properties,
})
}
fn read_ebml_header<R>(
element_reader: &mut ElementReader<R>,
parse_options: ParseOptions,
properties: &mut EbmlProperties,
) -> Result<()>
where
R: Read + Seek,
{
match element_reader.next() {
Ok(ElementReaderYield::Master((ElementIdent::EBML, _))) => {},
Ok(_) => decode_err!(@BAIL Ebml, "File does not start with an EBML master element"),
Err(e) => return Err(e),
}
loop {
let ident;
let data_ty;
let size;
let res = element_reader.next()?;
match res {
// The only expected master element in the header is `DocTypeExtension`
ElementReaderYield::Master((ElementIdent::DocTypeExtension, _)) => continue,
ElementReaderYield::Child((child, size_)) => {
ident = child.ident;
data_ty = child.data_type;
size = size_;
},
ElementReaderYield::Unknown(element) => {
log::debug!(
"Encountered unknown EBML element in header: {}",
element.id.0
);
element_reader.skip(element.size.value())?;
continue;
},
_ => break,
}
if ident == ElementIdent::EBMLMaxIDLength {
properties.header.max_id_length = element_reader.read_unsigned_int()? as u8;
element_reader.set_max_id_length(properties.header.max_id_length);
continue;
}
if ident == ElementIdent::EBMLMaxSizeLength {
properties.header.max_size_length = element_reader.read_unsigned_int()? as u8;
element_reader.set_max_size_length(properties.header.max_size_length);
continue;
}
// Anything else in the header is unnecessary, and only read for the properties
// struct
if !parse_options.read_properties {
element_reader.skip(size)?;
continue;
}
match ident {
ElementIdent::EBMLVersion => {
properties.header.version = element_reader.read_unsigned_int()?
},
ElementIdent::EBMLReadVersion => {
properties.header.read_version = element_reader.read_unsigned_int()?
},
ElementIdent::DocType => properties.header.doc_type = element_reader.read_string()?,
ElementIdent::DocTypeVersion => {
properties.header.doc_type_version = element_reader.read_unsigned_int()?
},
_ => element_reader.skip(size)?,
}
}
Ok(())
}

View file

@ -11,8 +11,8 @@ use byteorder::{ReadBytesExt, WriteBytesExt};
///
/// To ensure safe construction of `VInt`s, users must create them through [`VInt::parse`] or [`VInt::from_u64`].
#[repr(transparent)]
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
pub struct VInt(u64);
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
pub struct VInt(pub(crate) u64);
impl VInt {
// Each octet will shave a single bit off each byte