mirror of
https://github.com/uutils/coreutils
synced 2024-12-13 06:42:42 +00:00
tac: opportunistically use memory maps
Since tac must read its input files completely to start processing them from the end, it is particularly suited to use memory maps to benefit from the page cache maintained by the operating systems to bring the necessary data into memory as required. This does also include situations where the input is stdin, but not via a pipe but for example a file descriptor set up by the user's shell through an input redirection.
This commit is contained in:
parent
4eab275235
commit
c526df57b8
3 changed files with 68 additions and 17 deletions
10
Cargo.lock
generated
10
Cargo.lock
generated
|
@ -1064,6 +1064,15 @@ version = "2.4.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
|
checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memmap2"
|
||||||
|
version = "0.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4647a11b578fead29cdbb34d4adef8dd3dc35b876c9c6d5240d83f205abfe96e"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "memoffset"
|
name = "memoffset"
|
||||||
version = "0.6.4"
|
version = "0.6.4"
|
||||||
|
@ -3025,6 +3034,7 @@ version = "0.0.7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
"memchr 2.4.0",
|
"memchr 2.4.0",
|
||||||
|
"memmap2",
|
||||||
"regex",
|
"regex",
|
||||||
"uucore",
|
"uucore",
|
||||||
"uucore_procs",
|
"uucore_procs",
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
# spell-checker:ignore memmap
|
||||||
|
|
||||||
[package]
|
[package]
|
||||||
name = "uu_tac"
|
name = "uu_tac"
|
||||||
version = "0.0.7"
|
version = "0.0.7"
|
||||||
|
@ -16,6 +18,7 @@ path = "src/tac.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
memchr = "2"
|
memchr = "2"
|
||||||
|
memmap2 = "0.5"
|
||||||
regex = "1"
|
regex = "1"
|
||||||
clap = { version = "2.33", features = ["wrap_help"] }
|
clap = { version = "2.33", features = ["wrap_help"] }
|
||||||
uucore = { version=">=0.0.9", package="uucore", path="../../uucore" }
|
uucore = { version=">=0.0.9", package="uucore", path="../../uucore" }
|
||||||
|
|
|
@ -5,15 +5,19 @@
|
||||||
// * For the full copyright and license information, please view the LICENSE
|
// * For the full copyright and license information, please view the LICENSE
|
||||||
// * file that was distributed with this source code.
|
// * file that was distributed with this source code.
|
||||||
|
|
||||||
// spell-checker:ignore (ToDO) sbytes slen dlen memmem
|
// spell-checker:ignore (ToDO) sbytes slen dlen memmem memmap Mmap mmap SIGBUS
|
||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate uucore;
|
extern crate uucore;
|
||||||
|
|
||||||
use clap::{crate_version, App, Arg};
|
use clap::{crate_version, App, Arg};
|
||||||
use memchr::memmem;
|
use memchr::memmem;
|
||||||
|
use memmap2::Mmap;
|
||||||
use std::io::{stdin, stdout, BufWriter, Read, Write};
|
use std::io::{stdin, stdout, BufWriter, Read, Write};
|
||||||
use std::{fs::read, path::Path};
|
use std::{
|
||||||
|
fs::{read, File},
|
||||||
|
path::Path,
|
||||||
|
};
|
||||||
use uucore::display::Quotable;
|
use uucore::display::Quotable;
|
||||||
use uucore::InvalidEncodingHandling;
|
use uucore::InvalidEncodingHandling;
|
||||||
|
|
||||||
|
@ -220,14 +224,23 @@ fn tac(filenames: Vec<&str>, before: bool, regex: bool, separator: &str) -> i32
|
||||||
};
|
};
|
||||||
|
|
||||||
for &filename in &filenames {
|
for &filename in &filenames {
|
||||||
let data = if filename == "-" {
|
let mmap;
|
||||||
let mut data = Vec::new();
|
let buf;
|
||||||
if let Err(e) = stdin().read_to_end(&mut data) {
|
|
||||||
|
let data: &[u8] = if filename == "-" {
|
||||||
|
if let Some(mmap1) = try_mmap_stdin() {
|
||||||
|
mmap = mmap1;
|
||||||
|
&mmap
|
||||||
|
} else {
|
||||||
|
let mut buf1 = Vec::new();
|
||||||
|
if let Err(e) = stdin().read_to_end(&mut buf1) {
|
||||||
show_error!("failed to read from stdin: {}", e);
|
show_error!("failed to read from stdin: {}", e);
|
||||||
exit_code = 1;
|
exit_code = 1;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
data
|
buf = buf1;
|
||||||
|
&buf
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
let path = Path::new(filename);
|
let path = Path::new(filename);
|
||||||
if path.is_dir() || path.metadata().is_err() {
|
if path.is_dir() || path.metadata().is_err() {
|
||||||
|
@ -242,22 +255,47 @@ fn tac(filenames: Vec<&str>, before: bool, regex: bool, separator: &str) -> i32
|
||||||
exit_code = 1;
|
exit_code = 1;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(mmap1) = try_mmap_path(path) {
|
||||||
|
mmap = mmap1;
|
||||||
|
&mmap
|
||||||
|
} else {
|
||||||
match read(path) {
|
match read(path) {
|
||||||
Ok(data) => data,
|
Ok(buf1) => {
|
||||||
|
buf = buf1;
|
||||||
|
&buf
|
||||||
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
show_error!("failed to read {}: {}", filename.quote(), e);
|
show_error!("failed to read {}: {}", filename.quote(), e);
|
||||||
exit_code = 1;
|
exit_code = 1;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(pattern) = &pattern {
|
if let Some(pattern) = &pattern {
|
||||||
buffer_tac_regex(&data, pattern, before)
|
buffer_tac_regex(data, pattern, before)
|
||||||
} else {
|
} else {
|
||||||
buffer_tac(&data, before, separator)
|
buffer_tac(data, before, separator)
|
||||||
}
|
}
|
||||||
.unwrap_or_else(|e| crash!(1, "failed to write to stdout: {}", e));
|
.unwrap_or_else(|e| crash!(1, "failed to write to stdout: {}", e));
|
||||||
}
|
}
|
||||||
exit_code
|
exit_code
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn try_mmap_stdin() -> Option<Mmap> {
|
||||||
|
// SAFETY: If the file is truncated while we map it, SIGBUS will be raised
|
||||||
|
// and our process will be terminated, thus preventing access of invalid memory.
|
||||||
|
unsafe { Mmap::map(&stdin()).ok() }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn try_mmap_path(path: &Path) -> Option<Mmap> {
|
||||||
|
let file = File::open(path).ok()?;
|
||||||
|
|
||||||
|
// SAFETY: If the file is truncated while we map it, SIGBUS will be raised
|
||||||
|
// and our process will be terminated, thus preventing access of invalid memory.
|
||||||
|
let mmap = unsafe { Mmap::map(&file).ok()? };
|
||||||
|
|
||||||
|
Some(mmap)
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue