mirror of
https://github.com/uutils/coreutils
synced 2024-12-13 14:52:41 +00:00
tac: add support for --regex option to tac
Add support for `tac --regex`, where the line separator is interpreted as a regular expression.
This commit is contained in:
parent
92a1f1422e
commit
664c7a6ec5
4 changed files with 152 additions and 6 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -3051,6 +3051,7 @@ version = "0.0.7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
"memchr 2.4.0",
|
"memchr 2.4.0",
|
||||||
|
"regex",
|
||||||
"uucore",
|
"uucore",
|
||||||
"uucore_procs",
|
"uucore_procs",
|
||||||
]
|
]
|
||||||
|
|
|
@ -16,6 +16,7 @@ path = "src/tac.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
memchr = "2"
|
memchr = "2"
|
||||||
|
regex = "1"
|
||||||
clap = { version = "2.33", features = ["wrap_help"] }
|
clap = { version = "2.33", features = ["wrap_help"] }
|
||||||
uucore = { version=">=0.0.9", package="uucore", path="../../uucore" }
|
uucore = { version=">=0.0.9", package="uucore", path="../../uucore" }
|
||||||
uucore_procs = { version=">=0.0.6", package="uucore_procs", path="../../uucore_procs" }
|
uucore_procs = { version=">=0.0.6", package="uucore_procs", path="../../uucore_procs" }
|
||||||
|
|
|
@ -69,7 +69,7 @@ pub fn uu_app() -> App<'static, 'static> {
|
||||||
Arg::with_name(options::REGEX)
|
Arg::with_name(options::REGEX)
|
||||||
.short("r")
|
.short("r")
|
||||||
.long(options::REGEX)
|
.long(options::REGEX)
|
||||||
.help("interpret the sequence as a regular expression (NOT IMPLEMENTED)")
|
.help("interpret the sequence as a regular expression")
|
||||||
.takes_value(false),
|
.takes_value(false),
|
||||||
)
|
)
|
||||||
.arg(
|
.arg(
|
||||||
|
@ -82,6 +82,82 @@ pub fn uu_app() -> App<'static, 'static> {
|
||||||
.arg(Arg::with_name(options::FILE).hidden(true).multiple(true))
|
.arg(Arg::with_name(options::FILE).hidden(true).multiple(true))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Print lines of a buffer in reverse, with line separator given as a regex.
|
||||||
|
///
|
||||||
|
/// `data` contains the bytes of the file.
|
||||||
|
///
|
||||||
|
/// `pattern` is the regular expression given as a
|
||||||
|
/// [`regex::bytes::Regex`] (not a [`regex::Regex`], since the input is
|
||||||
|
/// given as a slice of bytes). If `before` is `true`, then each match
|
||||||
|
/// of this pattern in `data` is interpreted as the start of a line. If
|
||||||
|
/// `before` is `false`, then each match of this pattern is interpreted
|
||||||
|
/// as the end of a line.
|
||||||
|
///
|
||||||
|
/// This function writes each line in `data` to [`std::io::Stdout`] in
|
||||||
|
/// reverse.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// If there is a problem writing to `stdout`, then this function
|
||||||
|
/// returns [`std::io::Error`].
|
||||||
|
fn buffer_tac_regex(
|
||||||
|
data: &[u8],
|
||||||
|
pattern: regex::bytes::Regex,
|
||||||
|
before: bool,
|
||||||
|
) -> std::io::Result<()> {
|
||||||
|
let mut out = stdout();
|
||||||
|
|
||||||
|
// The index of the line separator for the current line.
|
||||||
|
//
|
||||||
|
// As we scan through the `data` from right to left, we update this
|
||||||
|
// variable each time we find a new line separator. We restrict our
|
||||||
|
// regular expression search to only those bytes up to the line
|
||||||
|
// separator.
|
||||||
|
let mut this_line_end = data.len();
|
||||||
|
|
||||||
|
// The index of the start of the next line in the `data`.
|
||||||
|
//
|
||||||
|
// As we scan through the `data` from right to left, we update this
|
||||||
|
// variable each time we find a new line.
|
||||||
|
//
|
||||||
|
// If `before` is `true`, then each line starts immediately before
|
||||||
|
// the line separator. Otherwise, each line starts immediately after
|
||||||
|
// the line separator.
|
||||||
|
let mut following_line_start = data.len();
|
||||||
|
|
||||||
|
// Iterate over each byte in the buffer in reverse. When we find a
|
||||||
|
// line separator, write the line to stdout.
|
||||||
|
//
|
||||||
|
// The `before` flag controls whether the line separator appears at
|
||||||
|
// the end of the line (as in "abc\ndef\n") or at the beginning of
|
||||||
|
// the line (as in "/abc/def").
|
||||||
|
for i in (0..data.len()).rev() {
|
||||||
|
// Determine if there is a match for `pattern` starting at index
|
||||||
|
// `i` in `data`. Only search up to the line ending that was
|
||||||
|
// found previously.
|
||||||
|
if let Some(match_) = pattern.find_at(&data[..this_line_end], i) {
|
||||||
|
// Record this index as the ending of the current line.
|
||||||
|
this_line_end = i;
|
||||||
|
|
||||||
|
// The length of the match (that is, the line separator), in bytes.
|
||||||
|
let slen = match_.end() - match_.start();
|
||||||
|
|
||||||
|
if before {
|
||||||
|
out.write_all(&data[i..following_line_start])?;
|
||||||
|
following_line_start = i;
|
||||||
|
} else {
|
||||||
|
out.write_all(&data[i + slen..following_line_start])?;
|
||||||
|
following_line_start = i + slen;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// After the loop terminates, write whatever bytes are remaining at
|
||||||
|
// the beginning of the buffer.
|
||||||
|
out.write_all(&data[0..following_line_start])?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Write lines from `data` to stdout in reverse.
|
/// Write lines from `data` to stdout in reverse.
|
||||||
///
|
///
|
||||||
/// This function writes to [`stdout`] each line appearing in `data`,
|
/// This function writes to [`stdout`] each line appearing in `data`,
|
||||||
|
@ -132,7 +208,7 @@ fn buffer_tac(data: &[u8], before: bool, separator: &str) -> std::io::Result<()>
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn tac(filenames: Vec<String>, before: bool, _: bool, separator: &str) -> i32 {
|
fn tac(filenames: Vec<String>, before: bool, regex: bool, separator: &str) -> i32 {
|
||||||
let mut exit_code = 0;
|
let mut exit_code = 0;
|
||||||
|
|
||||||
for filename in &filenames {
|
for filename in &filenames {
|
||||||
|
@ -168,9 +244,13 @@ fn tac(filenames: Vec<String>, before: bool, _: bool, separator: &str) -> i32 {
|
||||||
exit_code = 1;
|
exit_code = 1;
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
|
if regex {
|
||||||
buffer_tac(&data, before, separator)
|
let pattern = crash_if_err!(1, regex::bytes::Regex::new(separator));
|
||||||
.unwrap_or_else(|e| crash!(1, "failed to write to stdout: {}", e));
|
buffer_tac_regex(&data, pattern, before)
|
||||||
|
} else {
|
||||||
|
buffer_tac(&data, before, separator)
|
||||||
|
}
|
||||||
|
.unwrap_or_else(|e| crash!(1, "failed to write to stdout: {}", e));
|
||||||
}
|
}
|
||||||
exit_code
|
exit_code
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
// spell-checker:ignore axxbxx bxxaxx axxx axxxx xxaxx xxax xxxxa
|
// spell-checker:ignore axxbxx bxxaxx axxx axxxx xxaxx xxax xxxxa axyz zyax zyxa
|
||||||
use crate::common::util::*;
|
use crate::common::util::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -205,3 +205,67 @@ fn test_null_separator() {
|
||||||
.succeeds()
|
.succeeds()
|
||||||
.stdout_is("b\0a\0");
|
.stdout_is("b\0a\0");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_regex() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-r", "-s", "[xyz]+"])
|
||||||
|
.pipe_in("axyz")
|
||||||
|
.succeeds()
|
||||||
|
.no_stderr()
|
||||||
|
.stdout_is("zyax");
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-r", "-s", ":+"])
|
||||||
|
.pipe_in("a:b::c:::d::::")
|
||||||
|
.succeeds()
|
||||||
|
.no_stderr()
|
||||||
|
.stdout_is(":::d:::c::b:a:");
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-r", "-s", r"[\+]+[-]+[\+]+"])
|
||||||
|
// line 0 1 2
|
||||||
|
// |--||-----||--------|
|
||||||
|
.pipe_in("a+-+b++--++c+d-e+---+")
|
||||||
|
.succeeds()
|
||||||
|
.no_stderr()
|
||||||
|
// line 2 1 0
|
||||||
|
// |--------||-----||--|
|
||||||
|
.stdout_is("c+d-e+---+b++--++a+-+");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_regex_before() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-b", "-r", "-s", "[xyz]+"])
|
||||||
|
.pipe_in("axyz")
|
||||||
|
.succeeds()
|
||||||
|
.no_stderr()
|
||||||
|
.stdout_is("zyxa");
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-b", "-r", "-s", ":+"])
|
||||||
|
.pipe_in(":a::b:::c::::d")
|
||||||
|
.succeeds()
|
||||||
|
.stdout_is(":d::::c:::b::a");
|
||||||
|
|
||||||
|
// Because `tac` searches for matches of the regular expression from
|
||||||
|
// right to left, the second to last line is
|
||||||
|
//
|
||||||
|
// +--++b
|
||||||
|
//
|
||||||
|
// not
|
||||||
|
//
|
||||||
|
// ++--++b
|
||||||
|
//
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-b", "-r", "-s", r"[\+]+[-]+[\+]+"])
|
||||||
|
// line 0 1 2
|
||||||
|
// |---||----||--------|
|
||||||
|
.pipe_in("+-+a++--++b+---+c+d-e")
|
||||||
|
.succeeds()
|
||||||
|
.no_stderr()
|
||||||
|
// line 2 1 0
|
||||||
|
// |--------||----||---|
|
||||||
|
.stdout_is("+---+c+d-e+--++b+-+a+");
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue