mirror of
https://github.com/uutils/coreutils
synced 2024-12-12 22:32:53 +00:00
tac: add support for --regex option to tac
Add support for `tac --regex`, where the line separator is interpreted as a regular expression.
This commit is contained in:
parent
92a1f1422e
commit
664c7a6ec5
4 changed files with 152 additions and 6 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -3051,6 +3051,7 @@ version = "0.0.7"
|
|||
dependencies = [
|
||||
"clap",
|
||||
"memchr 2.4.0",
|
||||
"regex",
|
||||
"uucore",
|
||||
"uucore_procs",
|
||||
]
|
||||
|
|
|
@ -16,6 +16,7 @@ path = "src/tac.rs"
|
|||
|
||||
[dependencies]
|
||||
memchr = "2"
|
||||
regex = "1"
|
||||
clap = { version = "2.33", features = ["wrap_help"] }
|
||||
uucore = { version=">=0.0.9", package="uucore", path="../../uucore" }
|
||||
uucore_procs = { version=">=0.0.6", package="uucore_procs", path="../../uucore_procs" }
|
||||
|
|
|
@ -69,7 +69,7 @@ pub fn uu_app() -> App<'static, 'static> {
|
|||
Arg::with_name(options::REGEX)
|
||||
.short("r")
|
||||
.long(options::REGEX)
|
||||
.help("interpret the sequence as a regular expression (NOT IMPLEMENTED)")
|
||||
.help("interpret the sequence as a regular expression")
|
||||
.takes_value(false),
|
||||
)
|
||||
.arg(
|
||||
|
@ -82,6 +82,82 @@ pub fn uu_app() -> App<'static, 'static> {
|
|||
.arg(Arg::with_name(options::FILE).hidden(true).multiple(true))
|
||||
}
|
||||
|
||||
/// Print lines of a buffer in reverse, with line separator given as a regex.
|
||||
///
|
||||
/// `data` contains the bytes of the file.
|
||||
///
|
||||
/// `pattern` is the regular expression given as a
|
||||
/// [`regex::bytes::Regex`] (not a [`regex::Regex`], since the input is
|
||||
/// given as a slice of bytes). If `before` is `true`, then each match
|
||||
/// of this pattern in `data` is interpreted as the start of a line. If
|
||||
/// `before` is `false`, then each match of this pattern is interpreted
|
||||
/// as the end of a line.
|
||||
///
|
||||
/// This function writes each line in `data` to [`std::io::Stdout`] in
|
||||
/// reverse.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// If there is a problem writing to `stdout`, then this function
|
||||
/// returns [`std::io::Error`].
|
||||
fn buffer_tac_regex(
|
||||
data: &[u8],
|
||||
pattern: regex::bytes::Regex,
|
||||
before: bool,
|
||||
) -> std::io::Result<()> {
|
||||
let mut out = stdout();
|
||||
|
||||
// The index of the line separator for the current line.
|
||||
//
|
||||
// As we scan through the `data` from right to left, we update this
|
||||
// variable each time we find a new line separator. We restrict our
|
||||
// regular expression search to only those bytes up to the line
|
||||
// separator.
|
||||
let mut this_line_end = data.len();
|
||||
|
||||
// The index of the start of the next line in the `data`.
|
||||
//
|
||||
// As we scan through the `data` from right to left, we update this
|
||||
// variable each time we find a new line.
|
||||
//
|
||||
// If `before` is `true`, then each line starts immediately before
|
||||
// the line separator. Otherwise, each line starts immediately after
|
||||
// the line separator.
|
||||
let mut following_line_start = data.len();
|
||||
|
||||
// Iterate over each byte in the buffer in reverse. When we find a
|
||||
// line separator, write the line to stdout.
|
||||
//
|
||||
// The `before` flag controls whether the line separator appears at
|
||||
// the end of the line (as in "abc\ndef\n") or at the beginning of
|
||||
// the line (as in "/abc/def").
|
||||
for i in (0..data.len()).rev() {
|
||||
// Determine if there is a match for `pattern` starting at index
|
||||
// `i` in `data`. Only search up to the line ending that was
|
||||
// found previously.
|
||||
if let Some(match_) = pattern.find_at(&data[..this_line_end], i) {
|
||||
// Record this index as the ending of the current line.
|
||||
this_line_end = i;
|
||||
|
||||
// The length of the match (that is, the line separator), in bytes.
|
||||
let slen = match_.end() - match_.start();
|
||||
|
||||
if before {
|
||||
out.write_all(&data[i..following_line_start])?;
|
||||
following_line_start = i;
|
||||
} else {
|
||||
out.write_all(&data[i + slen..following_line_start])?;
|
||||
following_line_start = i + slen;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// After the loop terminates, write whatever bytes are remaining at
|
||||
// the beginning of the buffer.
|
||||
out.write_all(&data[0..following_line_start])?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Write lines from `data` to stdout in reverse.
|
||||
///
|
||||
/// This function writes to [`stdout`] each line appearing in `data`,
|
||||
|
@ -132,7 +208,7 @@ fn buffer_tac(data: &[u8], before: bool, separator: &str) -> std::io::Result<()>
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn tac(filenames: Vec<String>, before: bool, _: bool, separator: &str) -> i32 {
|
||||
fn tac(filenames: Vec<String>, before: bool, regex: bool, separator: &str) -> i32 {
|
||||
let mut exit_code = 0;
|
||||
|
||||
for filename in &filenames {
|
||||
|
@ -168,9 +244,13 @@ fn tac(filenames: Vec<String>, before: bool, _: bool, separator: &str) -> i32 {
|
|||
exit_code = 1;
|
||||
continue;
|
||||
};
|
||||
|
||||
buffer_tac(&data, before, separator)
|
||||
.unwrap_or_else(|e| crash!(1, "failed to write to stdout: {}", e));
|
||||
if regex {
|
||||
let pattern = crash_if_err!(1, regex::bytes::Regex::new(separator));
|
||||
buffer_tac_regex(&data, pattern, before)
|
||||
} else {
|
||||
buffer_tac(&data, before, separator)
|
||||
}
|
||||
.unwrap_or_else(|e| crash!(1, "failed to write to stdout: {}", e));
|
||||
}
|
||||
exit_code
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// spell-checker:ignore axxbxx bxxaxx axxx axxxx xxaxx xxax xxxxa
|
||||
// spell-checker:ignore axxbxx bxxaxx axxx axxxx xxaxx xxax xxxxa axyz zyax zyxa
|
||||
use crate::common::util::*;
|
||||
|
||||
#[test]
|
||||
|
@ -205,3 +205,67 @@ fn test_null_separator() {
|
|||
.succeeds()
|
||||
.stdout_is("b\0a\0");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_regex() {
|
||||
new_ucmd!()
|
||||
.args(&["-r", "-s", "[xyz]+"])
|
||||
.pipe_in("axyz")
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_is("zyax");
|
||||
|
||||
new_ucmd!()
|
||||
.args(&["-r", "-s", ":+"])
|
||||
.pipe_in("a:b::c:::d::::")
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_is(":::d:::c::b:a:");
|
||||
|
||||
new_ucmd!()
|
||||
.args(&["-r", "-s", r"[\+]+[-]+[\+]+"])
|
||||
// line 0 1 2
|
||||
// |--||-----||--------|
|
||||
.pipe_in("a+-+b++--++c+d-e+---+")
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
// line 2 1 0
|
||||
// |--------||-----||--|
|
||||
.stdout_is("c+d-e+---+b++--++a+-+");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_regex_before() {
|
||||
new_ucmd!()
|
||||
.args(&["-b", "-r", "-s", "[xyz]+"])
|
||||
.pipe_in("axyz")
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_is("zyxa");
|
||||
|
||||
new_ucmd!()
|
||||
.args(&["-b", "-r", "-s", ":+"])
|
||||
.pipe_in(":a::b:::c::::d")
|
||||
.succeeds()
|
||||
.stdout_is(":d::::c:::b::a");
|
||||
|
||||
// Because `tac` searches for matches of the regular expression from
|
||||
// right to left, the second to last line is
|
||||
//
|
||||
// +--++b
|
||||
//
|
||||
// not
|
||||
//
|
||||
// ++--++b
|
||||
//
|
||||
new_ucmd!()
|
||||
.args(&["-b", "-r", "-s", r"[\+]+[-]+[\+]+"])
|
||||
// line 0 1 2
|
||||
// |---||----||--------|
|
||||
.pipe_in("+-+a++--++b+---+c+d-e")
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
// line 2 1 0
|
||||
// |--------||----||---|
|
||||
.stdout_is("+---+c+d-e+--++b+-+a+");
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue