lofty-rs/src/probe.rs

553 lines
15 KiB
Rust
Raw Normal View History

2021-12-22 14:49:43 +00:00
use crate::ape::ApeFile;
2022-07-12 18:46:30 +00:00
use crate::error::Result;
2022-03-04 14:07:17 +00:00
use crate::file::{AudioFile, FileType, TaggedFile};
2022-03-18 19:06:42 +00:00
use crate::flac::FlacFile;
2021-12-22 14:49:43 +00:00
use crate::iff::aiff::AiffFile;
use crate::iff::wav::WavFile;
2022-07-12 18:46:30 +00:00
use crate::macros::err;
use crate::mp3::header::search_for_frame_sync;
2022-07-24 20:25:08 +00:00
use crate::mp3::MPEGFile;
2021-12-22 14:49:43 +00:00
use crate::mp4::Mp4File;
use crate::ogg::opus::OpusFile;
2022-01-31 23:19:11 +00:00
use crate::ogg::speex::SpeexFile;
2021-12-22 14:49:43 +00:00
use crate::ogg::vorbis::VorbisFile;
use crate::resolve::CUSTOM_RESOLVERS;
2022-05-30 02:14:40 +00:00
use crate::wavpack::WavPackFile;
2021-08-02 21:25:31 +00:00
2021-12-05 22:02:22 +00:00
use std::fs::File;
use std::io::{BufReader, Cursor, Read, Seek, SeekFrom};
2021-08-02 21:25:31 +00:00
use std::path::Path;
2021-12-05 22:02:22 +00:00
/// A format agnostic reader
///
/// This provides a way to determine the [`FileType`] of a reader, for when a concrete
/// type is not known.
2021-12-06 12:18:07 +00:00
///
/// ## Usage
///
/// When reading from a path, the [`FileType`] will be inferred from the path, rather than the
/// open file.
///
2022-07-06 03:26:53 +00:00
/// ```rust,no_run
2021-12-06 12:18:07 +00:00
/// # use lofty::{LoftyError, Probe};
/// # fn main() -> Result<(), LoftyError> {
/// use lofty::FileType;
///
2022-07-06 02:51:02 +00:00
/// let probe = Probe::open("path/to/my.mp3")?;
2021-12-06 12:18:07 +00:00
///
/// // Inferred from the `mp3` extension
2022-07-24 20:25:08 +00:00
/// assert_eq!(probe.file_type(), Some(FileType::MPEG));
2021-12-06 12:18:07 +00:00
/// # Ok(())
/// # }
/// ```
///
/// When a path isn't available, or is unreliable, content-based detection is also possible.
///
2022-07-06 03:26:53 +00:00
/// ```rust,no_run
2021-12-06 12:18:07 +00:00
/// # use lofty::{LoftyError, Probe};
/// # fn main() -> Result<(), LoftyError> {
/// use lofty::FileType;
///
/// // Our same path probe with a guessed file type
2022-07-06 02:51:02 +00:00
/// let probe = Probe::open("path/to/my.mp3")?.guess_file_type()?;
2021-12-06 12:18:07 +00:00
///
2021-12-23 15:00:54 +00:00
/// // Inferred from the file's content
2022-07-24 20:25:08 +00:00
/// assert_eq!(probe.file_type(), Some(FileType::MPEG));
2021-12-06 12:18:07 +00:00
/// # Ok(())
/// # }
/// ```
///
/// Or with another reader
///
/// ```rust
/// # use lofty::{LoftyError, Probe};
/// # fn main() -> Result<(), LoftyError> {
/// use lofty::FileType;
/// use std::io::Cursor;
///
/// static MAC_HEADER: &[u8; 3] = b"MAC";
///
/// let probe = Probe::new(Cursor::new(MAC_HEADER)).guess_file_type()?;
///
/// // Inferred from the MAC header
/// assert_eq!(probe.file_type(), Some(FileType::APE));
/// # Ok(())
/// # }
/// ```
2021-12-05 22:02:22 +00:00
pub struct Probe<R: Read> {
inner: R,
f_ty: Option<FileType>,
}
2021-08-02 21:25:31 +00:00
2021-12-05 22:02:22 +00:00
impl<R: Read> Probe<R> {
2021-08-02 21:25:31 +00:00
/// Create a new `Probe`
2022-07-06 03:26:53 +00:00
///
/// Before creating a `Probe`, consider wrapping it in a [`BufReader`](std::io::BufReader) for better
/// performance.
///
/// # Examples
///
/// ```rust
/// use lofty::Probe;
/// use std::fs::File;
/// use std::io::BufReader;
///
/// # fn main() -> lofty::Result<()> {
/// # let path = "tests/files/assets/minimal/full_test.mp3";
/// let file = File::open(path)?;
/// let reader = BufReader::new(file);
///
/// let probe = Probe::new(reader);
/// # Ok(()) }
/// ```
2021-12-05 22:02:22 +00:00
pub fn new(reader: R) -> Self {
Self {
inner: reader,
f_ty: None,
}
2021-08-02 21:25:31 +00:00
}
2021-12-05 22:02:22 +00:00
/// Create a new `Probe` with a specified [`FileType`]
2022-07-06 03:26:53 +00:00
///
/// Before creating a `Probe`, consider wrapping it in a [`BufReader`](std::io::BufReader) for better
/// performance.
///
/// # Examples
///
/// ```rust
/// use lofty::{FileType, Probe};
/// use std::fs::File;
/// use std::io::BufReader;
///
/// # fn main() -> lofty::Result<()> {
/// # let my_mp3_path = "tests/files/assets/minimal/full_test.mp3";
/// // We know the file is going to be an MP3,
/// // so we can skip the format detection
/// let file = File::open(my_mp3_path)?;
/// let reader = BufReader::new(file);
///
2022-07-24 20:25:08 +00:00
/// let probe = Probe::with_file_type(reader, FileType::MPEG);
2022-07-06 03:26:53 +00:00
/// # Ok(()) }
/// ```
2021-12-05 22:02:22 +00:00
pub fn with_file_type(reader: R, file_type: FileType) -> Self {
Self {
inner: reader,
f_ty: Some(file_type),
}
}
/// Returns the current [`FileType`]
2022-07-06 03:26:53 +00:00
///
/// # Examples
///
/// ```rust
/// use lofty::{FileType, Probe};
///
/// # fn main() -> lofty::Result<()> {
/// # let reader = std::io::Cursor::new(&[]);
/// let probe = Probe::new(reader);
///
/// let file_type = probe.file_type();
/// # Ok(()) }
/// ```
2021-12-05 22:02:22 +00:00
pub fn file_type(&self) -> Option<FileType> {
self.f_ty
}
/// Set the [`FileType`] with which to read the file
2022-07-06 03:26:53 +00:00
///
/// # Examples
///
/// ```rust
/// use lofty::{FileType, Probe};
///
/// # fn main() -> lofty::Result<()> {
/// # let reader = std::io::Cursor::new(&[]);
/// let mut probe = Probe::new(reader);
/// assert_eq!(probe.file_type(), None);
///
2022-07-24 20:25:08 +00:00
/// probe.set_file_type(FileType::MPEG);
2022-07-06 03:26:53 +00:00
///
2022-07-24 20:25:08 +00:00
/// assert_eq!(probe.file_type(), Some(FileType::MPEG));
2022-07-06 03:26:53 +00:00
/// # Ok(()) }
/// ```
2021-12-05 22:02:22 +00:00
pub fn set_file_type(&mut self, file_type: FileType) {
self.f_ty = Some(file_type)
}
/// Extract the reader
2022-07-06 03:26:53 +00:00
///
/// # Examples
///
/// ```rust
/// use lofty::{FileType, Probe};
///
/// # fn main() -> lofty::Result<()> {
/// # let reader = std::io::Cursor::new(&[]);
/// let probe = Probe::new(reader);
///
/// let reader = probe.into_inner();
/// # Ok(()) }
/// ```
2021-12-05 22:02:22 +00:00
pub fn into_inner(self) -> R {
self.inner
}
}
impl Probe<BufReader<File>> {
/// Opens a file for reading
///
/// This will initially guess the [`FileType`] from the path, but
/// this can be overwritten with [`Probe::guess_file_type`] or [`Probe::set_file_type`]
///
/// # Errors
///
/// * `path` does not exist
2022-07-06 03:26:53 +00:00
///
/// # Examples
///
/// ```rust,no_run
/// use lofty::{FileType, Probe};
///
/// # fn main() -> lofty::Result<()> {
/// let probe = Probe::open("path/to/my.mp3")?;
///
/// // Guessed from the "mp3" extension, see `FileType::from_ext`
2022-07-24 20:25:08 +00:00
/// assert_eq!(probe.file_type(), Some(FileType::MPEG));
2022-07-06 03:26:53 +00:00
/// # Ok(()) }
/// ```
2021-12-05 22:02:22 +00:00
pub fn open<P>(path: P) -> Result<Self>
2021-08-19 19:07:40 +00:00
where
2021-12-05 22:02:22 +00:00
P: AsRef<Path>,
2021-08-19 19:07:40 +00:00
{
2021-12-05 22:02:22 +00:00
let path = path.as_ref();
Ok(Self {
inner: BufReader::new(File::open(path)?),
2022-02-18 00:00:38 +00:00
f_ty: FileType::from_path(path),
2021-12-05 22:02:22 +00:00
})
2021-08-19 19:07:40 +00:00
}
2021-12-05 22:02:22 +00:00
}
2021-08-02 21:25:31 +00:00
2021-12-05 22:02:22 +00:00
impl<R: Read + Seek> Probe<R> {
/// Attempts to get the [`FileType`] based on the data in the reader
2021-08-19 19:07:40 +00:00
///
2021-12-05 22:02:22 +00:00
/// On success, the file type will be replaced
///
/// # Errors
///
/// All errors that occur within this function are [`std::io::Error`].
/// If an error does occur, there is likely an issue with the provided
/// reader, and the entire `Probe` should be discarded.
2022-07-06 03:26:53 +00:00
///
/// # Examples
///
/// ```rust
/// use lofty::{FileType, Probe};
///
/// # fn main() -> lofty::Result<()> {
/// # let path = "tests/files/assets/minimal/full_test.mp3";
/// # let file = std::fs::File::open(path)?;
/// # let reader = std::io::BufReader::new(file);
/// let probe = Probe::new(reader).guess_file_type()?;
///
/// // Determined the file is MP3 from the content
2022-07-24 20:25:08 +00:00
/// assert_eq!(probe.file_type(), Some(FileType::MPEG));
2022-07-06 03:26:53 +00:00
/// # Ok(()) }
/// ```
2022-01-27 20:53:41 +00:00
pub fn guess_file_type(mut self) -> std::io::Result<Self> {
2021-12-05 22:02:22 +00:00
let f_ty = self.guess_inner()?;
self.f_ty = f_ty.or(self.f_ty);
2021-08-02 21:25:31 +00:00
2021-12-05 22:02:22 +00:00
Ok(self)
2021-08-02 21:25:31 +00:00
}
2021-12-05 23:05:14 +00:00
#[allow(clippy::shadow_unrelated)]
fn guess_inner(&mut self) -> std::io::Result<Option<FileType>> {
// temporary buffer for storing 36 bytes
// (36 is just a guess as to how long the data for estimating the file type might be)
2021-12-05 22:02:22 +00:00
let mut buf = [0; 36];
let starting_position = self.inner.stream_position()?;
2022-01-23 19:11:16 +00:00
// Read (up to) 36 bytes
2021-12-05 22:02:22 +00:00
let buf_len = std::io::copy(
&mut self.inner.by_ref().take(buf.len() as u64),
2021-12-05 22:02:22 +00:00
&mut Cursor::new(&mut buf[..]),
)? as usize;
2022-01-23 19:11:16 +00:00
self.inner.seek(SeekFrom::Start(starting_position))?;
2021-12-05 22:02:22 +00:00
2022-01-23 19:11:16 +00:00
// Guess the file type by using these 36 bytes
2021-12-05 22:02:22 +00:00
match FileType::from_buffer_inner(&buf[..buf_len]) {
2022-01-23 19:11:16 +00:00
// We were able to determine a file type
(Some(f_ty), _) => Ok(Some(f_ty)),
// The file starts with an ID3v2 tag; this means other data can follow (e.g. APE or MP3 frames)
(None, Some(id3_len)) => {
// `id3_len` is the size of the tag, not including the header (10 bytes)
let position_after_id3_block = self
.inner
2021-12-05 22:02:22 +00:00
.seek(SeekFrom::Current(i64::from(10 + id3_len)))?;
2022-03-16 20:59:55 +00:00
// try to guess the file type after the ID3 block by inspecting the first 4 bytes
let mut ident = [0; 4];
2022-01-23 19:11:16 +00:00
std::io::copy(
&mut self.inner.by_ref().take(ident.len() as u64),
&mut Cursor::new(&mut ident[..]),
)?;
self.inner.seek(SeekFrom::Start(position_after_id3_block))?;
2022-01-23 19:11:16 +00:00
let file_type_after_id3_block = match &ident {
2022-03-16 20:59:55 +00:00
[b'M', b'A', b'C', ..] => Ok(Some(FileType::APE)),
b"fLaC" => Ok(Some(FileType::FLAC)),
2022-01-23 19:11:16 +00:00
// Search for a frame sync, which may be preceded by junk
_ if search_for_frame_sync(&mut self.inner)?.is_some() => {
2022-07-24 20:25:08 +00:00
Ok(Some(FileType::MPEG))
2022-01-23 19:11:16 +00:00
},
_ => Ok(None),
};
// before returning any result for a file type, seek back to the front
self.inner.seek(SeekFrom::Start(starting_position))?;
file_type_after_id3_block
2021-12-05 22:02:22 +00:00
},
2022-07-24 20:08:46 +00:00
_ => {
if let Ok(lock) = CUSTOM_RESOLVERS.lock() {
#[allow(clippy::significant_drop_in_scrutinee)]
for (_, resolve) in lock.iter() {
if let ret @ Some(_) = resolve.guess(&buf[..buf_len]) {
return Ok(ret);
}
}
}
Ok(None)
},
2021-12-05 22:02:22 +00:00
}
2021-08-02 21:25:31 +00:00
}
2021-12-05 22:02:22 +00:00
/// Attempts to extract a [`TaggedFile`] from the reader
2021-08-02 21:25:31 +00:00
///
2021-12-22 01:20:24 +00:00
/// If `read_properties` is false, the properties will be zeroed out.
///
2021-08-02 21:25:31 +00:00
/// # Errors
///
2021-12-05 22:02:22 +00:00
/// * No file type
/// - This expects the file type to have been set already, either with
/// [`Probe::guess_file_type`] or [`Probe::set_file_type`]. When reading from
/// paths, this is not necessary.
/// * The reader contains invalid data
2022-07-06 03:26:53 +00:00
///
2022-07-24 20:08:46 +00:00
/// # Panics
///
/// If an unregistered `FileType` ([`FileType::Custom`]) is encountered. See [`crate::resolve::register_custom_resolver`].
///
2022-07-06 03:26:53 +00:00
/// # Examples
///
/// ```rust
/// use lofty::{FileType, Probe};
///
/// # fn main() -> lofty::Result<()> {
/// # let path = "tests/files/assets/minimal/full_test.mp3";
/// # let file = std::fs::File::open(path)?;
/// # let reader = std::io::BufReader::new(file);
/// let probe = Probe::new(reader).guess_file_type()?;
///
/// let parsed_file = probe.read(true)?;
/// # Ok(()) }
/// ```
2021-12-22 01:20:24 +00:00
pub fn read(mut self, read_properties: bool) -> Result<TaggedFile> {
2021-12-05 22:02:22 +00:00
let reader = &mut self.inner;
match self.f_ty {
Some(f_type) => Ok(match f_type {
2021-12-22 01:20:24 +00:00
FileType::AIFF => AiffFile::read_from(reader, read_properties)?.into(),
FileType::APE => ApeFile::read_from(reader, read_properties)?.into(),
FileType::FLAC => FlacFile::read_from(reader, read_properties)?.into(),
2022-07-24 20:25:08 +00:00
FileType::MPEG => MPEGFile::read_from(reader, read_properties)?.into(),
2021-12-22 01:20:24 +00:00
FileType::Opus => OpusFile::read_from(reader, read_properties)?.into(),
FileType::Vorbis => VorbisFile::read_from(reader, read_properties)?.into(),
FileType::WAV => WavFile::read_from(reader, read_properties)?.into(),
FileType::MP4 => Mp4File::read_from(reader, read_properties)?.into(),
2022-01-31 23:19:11 +00:00
FileType::Speex => SpeexFile::read_from(reader, read_properties)?.into(),
2022-05-30 02:14:40 +00:00
FileType::WavPack => WavPackFile::read_from(reader, read_properties)?.into(),
2022-07-24 20:08:46 +00:00
FileType::Custom(c) => {
if let Some(r) = crate::resolve::lookup_resolver(c) {
r.read_from(reader, read_properties)?
} else {
panic!(
"Encountered an unregistered custom `FileType` named `{}`",
c
);
}
},
}),
2022-07-12 18:46:30 +00:00
None => err!(UnknownFormat),
2021-08-19 19:07:40 +00:00
}
}
2021-12-05 22:02:22 +00:00
}
2021-08-02 21:25:31 +00:00
2021-12-05 22:02:22 +00:00
/// Read a [`TaggedFile`] from a [File]
///
/// # Errors
///
/// See:
///
/// * [`Probe::guess_file_type`]
/// * [`Probe::read`]
2022-07-06 03:26:53 +00:00
///
/// # Examples
///
/// ```rust
/// use lofty::read_from;
/// use std::fs::File;
///
/// # fn main() -> lofty::Result<()> {
/// # let path = "tests/files/assets/minimal/full_test.mp3";
/// let mut file = File::open(path)?;
///
/// let parsed_file = read_from(&mut file, true)?;
/// # Ok(()) }
/// ```
2021-12-22 01:20:24 +00:00
pub fn read_from(file: &mut File, read_properties: bool) -> Result<TaggedFile> {
Probe::new(BufReader::new(file))
.guess_file_type()?
.read(read_properties)
2021-12-05 22:02:22 +00:00
}
/// Read a [`TaggedFile`] from a path
///
/// NOTE: This will determine the [`FileType`] from the extension
///
/// # Errors
///
/// See:
///
/// * [`Probe::open`]
/// * [`Probe::read`]
2022-07-06 03:26:53 +00:00
///
/// # Examples
///
/// ```rust
/// use lofty::read_from_path;
///
/// # fn main() -> lofty::Result<()> {
/// # let path = "tests/files/assets/minimal/full_test.mp3";
/// let parsed_file = read_from_path(path, true)?;
/// # Ok(()) }
/// ```
2021-12-22 01:20:24 +00:00
pub fn read_from_path<P>(path: P, read_properties: bool) -> Result<TaggedFile>
2021-12-05 22:02:22 +00:00
where
P: AsRef<Path>,
{
2021-12-22 01:20:24 +00:00
Probe::open(path)?.read(read_properties)
2021-08-02 21:25:31 +00:00
}
#[cfg(test)]
mod tests {
2022-03-18 17:54:55 +00:00
use crate::{FileType, Probe};
use std::fs::File;
#[test]
2022-01-23 19:11:16 +00:00
fn mp3_id3v2_trailing_junk() {
// test data that contains 4 bytes of junk (0x20) between the ID3 portion and the first MP3 frame
let data: [&[u8]; 4] = [
// ID3v2.3 header (10 bytes)
&[0x49, 0x44, 0x33, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23],
// TALB frame
&[
0x54, 0x41, 0x4C, 0x42, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x01, 0xFF, 0xFE, 0x61,
0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00, 0x61,
0x00, 0x61, 0x00, 0x61, 0x00, 0x61, 0x00,
],
// 4 bytes of junk
&[0x20, 0x20, 0x20, 0x20],
// start of MP3 frame (not all bytes are shown in this slice)
&[
0xFF, 0xFB, 0x50, 0xC4, 0x00, 0x03, 0xC0, 0x00, 0x01, 0xA4, 0x00, 0x00, 0x00, 0x20,
0x00, 0x00, 0x34, 0x80, 0x00, 0x00, 0x04,
],
];
2022-01-27 04:39:15 +00:00
let data: Vec<u8> = data.into_iter().flatten().copied().collect();
let data = std::io::Cursor::new(&data);
2022-01-21 16:31:14 +00:00
let probe = Probe::new(data).guess_file_type().unwrap();
2022-07-24 20:25:08 +00:00
assert_eq!(probe.file_type(), Some(FileType::MPEG));
}
2022-03-18 17:54:55 +00:00
fn test_probe(path: &str, expected_file_type_guess: FileType) {
test_probe_file(path, expected_file_type_guess);
test_probe_path(path, expected_file_type_guess);
}
// Test from file contents
fn test_probe_file(path: &str, expected_file_type_guess: FileType) {
let mut f = File::open(path).unwrap();
let probe = Probe::new(&mut f).guess_file_type().unwrap();
assert_eq!(probe.file_type(), Some(expected_file_type_guess));
}
// Test from file extension
fn test_probe_path(path: &str, expected_file_type_guess: FileType) {
let probe = Probe::open(path).unwrap();
assert_eq!(probe.file_type(), Some(expected_file_type_guess));
}
#[test]
fn probe_aiff() {
test_probe("tests/files/assets/minimal/full_test.aiff", FileType::AIFF);
}
#[test]
fn probe_ape_with_id3v2() {
test_probe("tests/files/assets/minimal/full_test.ape", FileType::APE);
}
#[test]
fn probe_flac() {
test_probe("tests/files/assets/minimal/full_test.flac", FileType::FLAC);
}
#[test]
fn probe_flac_with_id3v2() {
test_probe("tests/files/assets/flac_with_id3v2.flac", FileType::FLAC);
}
#[test]
fn probe_mp3_with_id3v2() {
2022-07-24 20:25:08 +00:00
test_probe("tests/files/assets/minimal/full_test.mp3", FileType::MPEG);
2022-03-18 17:54:55 +00:00
}
#[test]
fn probe_vorbis() {
test_probe("tests/files/assets/minimal/full_test.ogg", FileType::Vorbis);
}
#[test]
fn probe_opus() {
test_probe("tests/files/assets/minimal/full_test.opus", FileType::Opus);
}
#[test]
fn probe_speex() {
test_probe("tests/files/assets/minimal/full_test.spx", FileType::Speex);
}
#[test]
fn probe_mp4() {
test_probe(
"tests/files/assets/minimal/m4a_codec_aac.m4a",
FileType::MP4,
);
}
#[test]
fn probe_wav() {
test_probe(
"tests/files/assets/minimal/wav_format_pcm.wav",
FileType::WAV,
);
}
}