tac: opportunistically use memory maps

Since tac must read its input files completely to start processing them
from the end, it is particularly suited to use memory maps to benefit
from the page cache maintained by the operating systems to bring the
necessary data into memory as required.

This does also include situations where the input is stdin, but not via
a pipe but for example a file descriptor set up by the user's shell
through an input redirection.
This commit is contained in:
Adam Reichold 2021-10-06 21:08:11 +02:00
parent 4eab275235
commit c526df57b8
3 changed files with 68 additions and 17 deletions

10
Cargo.lock generated
View file

@ -1064,6 +1064,15 @@ version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
[[package]]
name = "memmap2"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4647a11b578fead29cdbb34d4adef8dd3dc35b876c9c6d5240d83f205abfe96e"
dependencies = [
"libc",
]
[[package]] [[package]]
name = "memoffset" name = "memoffset"
version = "0.6.4" version = "0.6.4"
@ -3025,6 +3034,7 @@ version = "0.0.7"
dependencies = [ dependencies = [
"clap", "clap",
"memchr 2.4.0", "memchr 2.4.0",
"memmap2",
"regex", "regex",
"uucore", "uucore",
"uucore_procs", "uucore_procs",

View file

@ -1,3 +1,5 @@
# spell-checker:ignore memmap
[package] [package]
name = "uu_tac" name = "uu_tac"
version = "0.0.7" version = "0.0.7"
@ -16,6 +18,7 @@ path = "src/tac.rs"
[dependencies] [dependencies]
memchr = "2" memchr = "2"
memmap2 = "0.5"
regex = "1" regex = "1"
clap = { version = "2.33", features = ["wrap_help"] } clap = { version = "2.33", features = ["wrap_help"] }
uucore = { version=">=0.0.9", package="uucore", path="../../uucore" } uucore = { version=">=0.0.9", package="uucore", path="../../uucore" }

View file

@ -5,15 +5,19 @@
// * For the full copyright and license information, please view the LICENSE // * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code. // * file that was distributed with this source code.
// spell-checker:ignore (ToDO) sbytes slen dlen memmem // spell-checker:ignore (ToDO) sbytes slen dlen memmem memmap Mmap mmap SIGBUS
#[macro_use] #[macro_use]
extern crate uucore; extern crate uucore;
use clap::{crate_version, App, Arg}; use clap::{crate_version, App, Arg};
use memchr::memmem; use memchr::memmem;
use memmap2::Mmap;
use std::io::{stdin, stdout, BufWriter, Read, Write}; use std::io::{stdin, stdout, BufWriter, Read, Write};
use std::{fs::read, path::Path}; use std::{
fs::{read, File},
path::Path,
};
use uucore::display::Quotable; use uucore::display::Quotable;
use uucore::InvalidEncodingHandling; use uucore::InvalidEncodingHandling;
@ -220,14 +224,23 @@ fn tac(filenames: Vec<&str>, before: bool, regex: bool, separator: &str) -> i32
}; };
for &filename in &filenames { for &filename in &filenames {
let data = if filename == "-" { let mmap;
let mut data = Vec::new(); let buf;
if let Err(e) = stdin().read_to_end(&mut data) {
let data: &[u8] = if filename == "-" {
if let Some(mmap1) = try_mmap_stdin() {
mmap = mmap1;
&mmap
} else {
let mut buf1 = Vec::new();
if let Err(e) = stdin().read_to_end(&mut buf1) {
show_error!("failed to read from stdin: {}", e); show_error!("failed to read from stdin: {}", e);
exit_code = 1; exit_code = 1;
continue; continue;
} }
data buf = buf1;
&buf
}
} else { } else {
let path = Path::new(filename); let path = Path::new(filename);
if path.is_dir() || path.metadata().is_err() { if path.is_dir() || path.metadata().is_err() {
@ -242,22 +255,47 @@ fn tac(filenames: Vec<&str>, before: bool, regex: bool, separator: &str) -> i32
exit_code = 1; exit_code = 1;
continue; continue;
} }
if let Some(mmap1) = try_mmap_path(path) {
mmap = mmap1;
&mmap
} else {
match read(path) { match read(path) {
Ok(data) => data, Ok(buf1) => {
buf = buf1;
&buf
}
Err(e) => { Err(e) => {
show_error!("failed to read {}: {}", filename.quote(), e); show_error!("failed to read {}: {}", filename.quote(), e);
exit_code = 1; exit_code = 1;
continue; continue;
} }
} }
}
}; };
if let Some(pattern) = &pattern { if let Some(pattern) = &pattern {
buffer_tac_regex(&data, pattern, before) buffer_tac_regex(data, pattern, before)
} else { } else {
buffer_tac(&data, before, separator) buffer_tac(data, before, separator)
} }
.unwrap_or_else(|e| crash!(1, "failed to write to stdout: {}", e)); .unwrap_or_else(|e| crash!(1, "failed to write to stdout: {}", e));
} }
exit_code exit_code
} }
fn try_mmap_stdin() -> Option<Mmap> {
// SAFETY: If the file is truncated while we map it, SIGBUS will be raised
// and our process will be terminated, thus preventing access of invalid memory.
unsafe { Mmap::map(&stdin()).ok() }
}
fn try_mmap_path(path: &Path) -> Option<Mmap> {
let file = File::open(path).ok()?;
// SAFETY: If the file is truncated while we map it, SIGBUS will be raised
// and our process will be terminated, thus preventing access of invalid memory.
let mmap = unsafe { Mmap::map(&file).ok()? };
Some(mmap)
}