From c526df57b8e10b94b096476c5eaf20eae52e7061 Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Wed, 6 Oct 2021 21:08:11 +0200 Subject: [PATCH] tac: opportunistically use memory maps Since tac must read its input files completely to start processing them from the end, it is particularly suited to use memory maps to benefit from the page cache maintained by the operating systems to bring the necessary data into memory as required. This does also include situations where the input is stdin, but not via a pipe but for example a file descriptor set up by the user's shell through an input redirection. --- Cargo.lock | 10 ++++++ src/uu/tac/Cargo.toml | 3 ++ src/uu/tac/src/tac.rs | 72 +++++++++++++++++++++++++++++++++---------- 3 files changed, 68 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6c30a182d..3633928c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1064,6 +1064,15 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" +[[package]] +name = "memmap2" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4647a11b578fead29cdbb34d4adef8dd3dc35b876c9c6d5240d83f205abfe96e" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.6.4" @@ -3025,6 +3034,7 @@ version = "0.0.7" dependencies = [ "clap", "memchr 2.4.0", + "memmap2", "regex", "uucore", "uucore_procs", diff --git a/src/uu/tac/Cargo.toml b/src/uu/tac/Cargo.toml index 1e436e916..00803c8d2 100644 --- a/src/uu/tac/Cargo.toml +++ b/src/uu/tac/Cargo.toml @@ -1,3 +1,5 @@ +# spell-checker:ignore memmap + [package] name = "uu_tac" version = "0.0.7" @@ -16,6 +18,7 @@ path = "src/tac.rs" [dependencies] memchr = "2" +memmap2 = "0.5" regex = "1" clap = { version = "2.33", features = ["wrap_help"] } uucore = { version=">=0.0.9", package="uucore", path="../../uucore" } diff --git a/src/uu/tac/src/tac.rs b/src/uu/tac/src/tac.rs index 370e92230..cdb2d74e3 100644 --- a/src/uu/tac/src/tac.rs +++ b/src/uu/tac/src/tac.rs @@ -5,15 +5,19 @@ // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. -// spell-checker:ignore (ToDO) sbytes slen dlen memmem +// spell-checker:ignore (ToDO) sbytes slen dlen memmem memmap Mmap mmap SIGBUS #[macro_use] extern crate uucore; use clap::{crate_version, App, Arg}; use memchr::memmem; +use memmap2::Mmap; use std::io::{stdin, stdout, BufWriter, Read, Write}; -use std::{fs::read, path::Path}; +use std::{ + fs::{read, File}, + path::Path, +}; use uucore::display::Quotable; use uucore::InvalidEncodingHandling; @@ -220,14 +224,23 @@ fn tac(filenames: Vec<&str>, before: bool, regex: bool, separator: &str) -> i32 }; for &filename in &filenames { - let data = if filename == "-" { - let mut data = Vec::new(); - if let Err(e) = stdin().read_to_end(&mut data) { - show_error!("failed to read from stdin: {}", e); - exit_code = 1; - continue; + let mmap; + let buf; + + let data: &[u8] = if filename == "-" { + if let Some(mmap1) = try_mmap_stdin() { + mmap = mmap1; + &mmap + } else { + let mut buf1 = Vec::new(); + if let Err(e) = stdin().read_to_end(&mut buf1) { + show_error!("failed to read from stdin: {}", e); + exit_code = 1; + continue; + } + buf = buf1; + &buf } - data } else { let path = Path::new(filename); if path.is_dir() || path.metadata().is_err() { @@ -242,22 +255,47 @@ fn tac(filenames: Vec<&str>, before: bool, regex: bool, separator: &str) -> i32 exit_code = 1; continue; } - match read(path) { - Ok(data) => data, - Err(e) => { - show_error!("failed to read {}: {}", filename.quote(), e); - exit_code = 1; - continue; + + if let Some(mmap1) = try_mmap_path(path) { + mmap = mmap1; + &mmap + } else { + match read(path) { + Ok(buf1) => { + buf = buf1; + &buf + } + Err(e) => { + show_error!("failed to read {}: {}", filename.quote(), e); + exit_code = 1; + continue; + } } } }; if let Some(pattern) = &pattern { - buffer_tac_regex(&data, pattern, before) + buffer_tac_regex(data, pattern, before) } else { - buffer_tac(&data, before, separator) + buffer_tac(data, before, separator) } .unwrap_or_else(|e| crash!(1, "failed to write to stdout: {}", e)); } exit_code } + +fn try_mmap_stdin() -> Option { + // SAFETY: If the file is truncated while we map it, SIGBUS will be raised + // and our process will be terminated, thus preventing access of invalid memory. + unsafe { Mmap::map(&stdin()).ok() } +} + +fn try_mmap_path(path: &Path) -> Option { + let file = File::open(path).ok()?; + + // SAFETY: If the file is truncated while we map it, SIGBUS will be raised + // and our process will be terminated, thus preventing access of invalid memory. + let mmap = unsafe { Mmap::map(&file).ok()? }; + + Some(mmap) +}