From d94ec8c7705483602065e512830a7cfc2a716d25 Mon Sep 17 00:00:00 2001 From: Serial <69764315+Serial-ATA@users.noreply.github.com> Date: Tue, 5 Sep 2023 13:59:55 -0400 Subject: [PATCH] EBML: Implement reader/Parse the header for properties --- src/ebml/element_reader.rs | 237 +++++++++++++++++++++++++++++++++++++ src/ebml/mod.rs | 1 + src/ebml/properties.rs | 22 +++- src/ebml/read.rs | 117 +++++++++++++++++- src/ebml/vint.rs | 4 +- 5 files changed, 376 insertions(+), 5 deletions(-) create mode 100644 src/ebml/element_reader.rs diff --git a/src/ebml/element_reader.rs b/src/ebml/element_reader.rs new file mode 100644 index 00000000..9d6064fd --- /dev/null +++ b/src/ebml/element_reader.rs @@ -0,0 +1,237 @@ +use crate::ebml::vint::VInt; +use crate::error::Result; +use crate::macros::decode_err; + +use std::io::Read; + +use byteorder::{BigEndian, ReadBytesExt}; + +pub struct ElementHeader { + pub(crate) id: VInt, + pub(crate) size: VInt, +} + +impl ElementHeader { + fn read(reader: &mut R, max_vint_length: u8) -> Result + where + R: Read, + { + Ok(Self { + id: VInt::parse(reader, max_vint_length)?, + size: VInt::parse(reader, max_vint_length)?, + }) + } +} + +#[derive(Copy, Clone)] +pub enum ElementDataType { + SignedInt, + UnsignedInt, + Float, + String, + Utf8, + Date, + Master, + Binary, +} + +#[derive(Copy, Clone)] +struct MasterElement { + id: ElementIdent, + children: &'static [(VInt, ChildElementDescriptor)], +} + +#[derive(Copy, Clone)] +pub(crate) struct ChildElementDescriptor { + pub(crate) ident: ElementIdent, + pub(crate) data_type: ElementDataType, +} + +macro_rules! define_master_elements { + ($( + $_readable_ident:ident : { + id: $vint_id:literal, + children: [ + $($_readable_child_ident:ident : { $child_id:literal, $data_ty:ident }),* $(,)? + ] $(,)? + } + ),+ $(,)?) => { + #[derive(Copy, Clone, Eq, PartialEq)] + pub(crate) enum ElementIdent { + $( + $_readable_ident, + $($_readable_child_ident,)* + )+ + } + + static MASTER_ELEMENTS: once_cell::sync::Lazy> = once_cell::sync::Lazy::new(|| { + let mut m = std::collections::HashMap::new(); + $( + m.insert( + VInt($vint_id), + MasterElement { + id: ElementIdent::$_readable_ident, + children: &[$((VInt($child_id), ChildElementDescriptor { + ident: ElementIdent::$_readable_child_ident, + data_type: ElementDataType::$data_ty, + })),*][..] + } + ); + )+ + m + }); + } +} + +define_master_elements! { + EBML: { + id: 0x1A45DFA3, + children: [ + EBMLVersion: { 0x4286, UnsignedInt }, + EBMLReadVersion: { 0x42F7, UnsignedInt }, + EBMLMaxIDLength: { 0x42F2, UnsignedInt }, + EBMLMaxSizeLength: { 0x42F3, UnsignedInt }, + DocType: { 0x4282, String }, + DocTypeVersion: { 0x4287, UnsignedInt }, + DocTypeReadVersion: { 0x4285, UnsignedInt }, + ], + }, + DocTypeExtension: { + id: 0x4281, + children: [ + DocTypeExtensionName: { 0x4283, String }, + DocTypeExtensionVersion: { 0x4284, UnsignedInt }, + ], + }, +} + +struct ElementReaderContext { + /// Current master element + current_master: Option, + /// Remaining length of the master element + master_length: u64, + /// Maximum size in octets of all element IDs + max_id_length: u8, + /// Maximum size in octets of all element data sizes + max_size_length: u8, +} + +impl Default for ElementReaderContext { + fn default() -> Self { + Self { + current_master: None, + master_length: 0, + // https://www.rfc-editor.org/rfc/rfc8794.html#name-ebmlmaxidlength-element + max_id_length: 4, + // https://www.rfc-editor.org/rfc/rfc8794.html#name-ebmlmaxsizelength-element + max_size_length: 8, + } + } +} + +pub(crate) enum ElementReaderYield { + Master((ElementIdent, u64)), + Child((ChildElementDescriptor, u64)), + Unknown(ElementHeader), + Eof, +} + +pub struct ElementReader { + reader: R, + ctx: ElementReaderContext, +} + +impl ElementReader +where + R: Read, +{ + pub(crate) fn new(reader: R) -> Self { + Self { + reader, + ctx: ElementReaderContext::default(), + } + } + + pub(crate) fn set_max_id_length(&mut self, len: u8) { + self.ctx.max_id_length = len + } + + pub(crate) fn set_max_size_length(&mut self, len: u8) { + self.ctx.max_size_length = len + } + + fn next_master(&mut self) -> Result { + let header = ElementHeader::read(&mut self.reader, self.ctx.max_size_length)?; + let Some(master) = MASTER_ELEMENTS.get(&header.id) else { + // We encountered an unknown master element + return Ok(ElementReaderYield::Unknown(header)); + }; + + self.ctx.current_master = Some(*master); + self.ctx.master_length = header.size.value(); + Ok(ElementReaderYield::Master(( + master.id, + self.ctx.master_length, + ))) + } + + pub(crate) fn next(&mut self) -> Result { + let Some(current_master) = self.ctx.current_master else { + return self.next_master(); + }; + + if self.ctx.master_length == 0 { + return self.next_master(); + } + + let header = ElementHeader::read(&mut self.reader, self.ctx.max_size_length)?; + + let Some((_, child)) = current_master + .children + .iter() + .find(|(id, _)| *id == header.id) + else { + return Ok(ElementReaderYield::Unknown(header)); + }; + + Ok(ElementReaderYield::Child((*child, header.size.value()))) + } + + pub(crate) fn skip(&mut self, length: u64) -> Result<()> { + std::io::copy(&mut self.reader.by_ref().take(length), &mut std::io::sink())?; + Ok(()) + } + + pub(crate) fn read_signed_int(&mut self) -> Result { + todo!() + } + + pub(crate) fn read_unsigned_int(&mut self) -> Result { + todo!() + } + + pub(crate) fn read_float(&mut self, element_length: u64) -> Result { + Ok(match element_length { + 0 => 0.0, + 4 => self.reader.read_f32::()? as f64, + 8 => self.reader.read_f64::()?, + _ => decode_err!(@BAIL Ebml, "Invalid size for float element"), + }) + } + + pub(crate) fn read_string(&mut self) -> Result { + todo!() + } + + pub(crate) fn read_utf8(&mut self) -> Result { + todo!() + } + + pub(crate) fn read_date(&mut self) -> Result { + todo!() + } + + pub(crate) fn read_binary(&mut self) -> Result> { + todo!() + } +} diff --git a/src/ebml/mod.rs b/src/ebml/mod.rs index 734c71ff..8cff2245 100644 --- a/src/ebml/mod.rs +++ b/src/ebml/mod.rs @@ -1,4 +1,5 @@ //! EBML specific items +mod element_reader; mod properties; mod read; mod tag; diff --git a/src/ebml/properties.rs b/src/ebml/properties.rs index 3bc080a3..27f3acb9 100644 --- a/src/ebml/properties.rs +++ b/src/ebml/properties.rs @@ -1,8 +1,28 @@ use crate::properties::FileProperties; +#[derive(Debug, Clone, PartialEq, Default)] +pub struct EbmlHeaderProperties { + pub(crate) version: u64, + pub(crate) read_version: u64, + pub(crate) max_id_length: u8, + pub(crate) max_size_length: u8, + pub(crate) doc_type: String, + pub(crate) doc_type_version: u64, + pub(crate) doc_type_read_version: u64, +} + +#[derive(Debug, Clone, PartialEq, Default)] +pub struct EbmlExtension { + pub(crate) name: String, + pub(crate) version: u64, +} + /// EBML audio properties #[derive(Debug, Clone, PartialEq, Default)] -pub struct EbmlProperties {} +pub struct EbmlProperties { + pub(crate) header: EbmlHeaderProperties, + pub(crate) extensions: Vec, +} impl From for FileProperties { fn from(_input: EbmlProperties) -> Self { diff --git a/src/ebml/read.rs b/src/ebml/read.rs index ec95bb6f..513504a5 100644 --- a/src/ebml/read.rs +++ b/src/ebml/read.rs @@ -1,12 +1,125 @@ use super::EbmlFile; +use crate::ebml::element_reader::{ElementIdent, ElementReader, ElementReaderYield}; +use crate::ebml::EbmlProperties; use crate::error::Result; +use crate::macros::decode_err; use crate::probe::ParseOptions; use std::io::{Read, Seek}; -pub(super) fn read_from(_reader: &mut R, _parse_options: ParseOptions) -> Result +pub(super) fn read_from(reader: &mut R, parse_options: ParseOptions) -> Result where R: Read + Seek, { - todo!() + // Default initialize the properties up here since we end up discovering + // new ones all scattered throughout the file + let mut properties = EbmlProperties::default(); + + let mut element_reader = ElementReader::new(reader); + + // First we need to go through the elements in the EBML master element + read_ebml_header(&mut element_reader, parse_options, &mut properties)?; + + loop { + let ident; + let data_ty; + let size; + + let res = element_reader.next()?; + match res { + ElementReaderYield::Master(_) => continue, + ElementReaderYield::Child((child, size_)) => { + ident = child.ident; + data_ty = child.data_type; + size = size_; + }, + ElementReaderYield::Unknown(element) => { + log::debug!("Encountered unknown EBML element: {}", element.id.0); + element_reader.skip(element.size.value())?; + continue; + }, + ElementReaderYield::Eof => break, + } + } + + Ok(EbmlFile { + ebml_tag: None, + properties, + }) +} + +fn read_ebml_header( + element_reader: &mut ElementReader, + parse_options: ParseOptions, + properties: &mut EbmlProperties, +) -> Result<()> +where + R: Read + Seek, +{ + match element_reader.next() { + Ok(ElementReaderYield::Master((ElementIdent::EBML, _))) => {}, + Ok(_) => decode_err!(@BAIL Ebml, "File does not start with an EBML master element"), + Err(e) => return Err(e), + } + + loop { + let ident; + let data_ty; + let size; + + let res = element_reader.next()?; + match res { + // The only expected master element in the header is `DocTypeExtension` + ElementReaderYield::Master((ElementIdent::DocTypeExtension, _)) => continue, + ElementReaderYield::Child((child, size_)) => { + ident = child.ident; + data_ty = child.data_type; + size = size_; + }, + ElementReaderYield::Unknown(element) => { + log::debug!( + "Encountered unknown EBML element in header: {}", + element.id.0 + ); + element_reader.skip(element.size.value())?; + continue; + }, + _ => break, + } + + if ident == ElementIdent::EBMLMaxIDLength { + properties.header.max_id_length = element_reader.read_unsigned_int()? as u8; + element_reader.set_max_id_length(properties.header.max_id_length); + continue; + } + + if ident == ElementIdent::EBMLMaxSizeLength { + properties.header.max_size_length = element_reader.read_unsigned_int()? as u8; + element_reader.set_max_size_length(properties.header.max_size_length); + continue; + } + + // Anything else in the header is unnecessary, and only read for the properties + // struct + if !parse_options.read_properties { + element_reader.skip(size)?; + continue; + } + + match ident { + ElementIdent::EBMLVersion => { + properties.header.version = element_reader.read_unsigned_int()? + }, + ElementIdent::EBMLReadVersion => { + properties.header.read_version = element_reader.read_unsigned_int()? + }, + ElementIdent::DocType => properties.header.doc_type = element_reader.read_string()?, + ElementIdent::DocTypeVersion => { + properties.header.doc_type_version = element_reader.read_unsigned_int()? + }, + _ => element_reader.skip(size)?, + } + } + + Ok(()) } diff --git a/src/ebml/vint.rs b/src/ebml/vint.rs index e7b43a91..8e7bb4ef 100644 --- a/src/ebml/vint.rs +++ b/src/ebml/vint.rs @@ -11,8 +11,8 @@ use byteorder::{ReadBytesExt, WriteBytesExt}; /// /// To ensure safe construction of `VInt`s, users must create them through [`VInt::parse`] or [`VInt::from_u64`]. #[repr(transparent)] -#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)] -pub struct VInt(u64); +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] +pub struct VInt(pub(crate) u64); impl VInt { // Each octet will shave a single bit off each byte