Merge pull request #2904 from jfinkels/tail-lines-positive-number-file

tail: fix a bug in tail [ -n | -c ] +NUM <file>
This commit is contained in:
Sylvestre Ledru 2022-01-22 10:18:38 +01:00 committed by GitHub
commit 8a787fe028
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 155 additions and 8 deletions

View file

@ -226,7 +226,7 @@ fn uu_tail(settings: &Settings) -> UResult<()> {
.map_err_context(|| format!("cannot open {} for reading", filename.quote()))?;
let md = file.metadata().unwrap();
if is_seekable(&mut file) && get_block_size(&md) > 0 {
bounded_tail(&mut file, settings);
bounded_tail(&mut file, &settings.mode, settings.beginning);
if settings.follow {
let reader = BufReader::new(file);
readers.push((Box::new(reader), filename));
@ -386,6 +386,83 @@ fn follow<T: BufRead>(readers: &mut [(T, &String)], settings: &Settings) -> URes
Ok(())
}
/// Find the index after the given number of instances of a given byte.
///
/// This function reads through a given reader until `num_delimiters`
/// instances of `delimiter` have been seen, returning the index of
/// the byte immediately following that delimiter. If there are fewer
/// than `num_delimiters` instances of `delimiter`, this returns the
/// total number of bytes read from the `reader` until EOF.
///
/// # Errors
///
/// This function returns an error if there is an error during reading
/// from `reader`.
///
/// # Examples
///
/// Basic usage:
///
/// ```rust,ignore
/// use std::io::Cursor;
///
/// let mut reader = Cursor::new("a\nb\nc\nd\ne\n");
/// let i = forwards_thru_file(&mut reader, 2, b'\n').unwrap();
/// assert_eq!(i, 4);
/// ```
///
/// If `num_delimiters` is zero, then this function always returns
/// zero:
///
/// ```rust,ignore
/// use std::io::Cursor;
///
/// let mut reader = Cursor::new("a\n");
/// let i = forwards_thru_file(&mut reader, 0, b'\n').unwrap();
/// assert_eq!(i, 0);
/// ```
///
/// If there are fewer than `num_delimiters` instances of `delimiter`
/// in the reader, then this function returns the total number of
/// bytes read:
///
/// ```rust,ignore
/// use std::io::Cursor;
///
/// let mut reader = Cursor::new("a\n");
/// let i = forwards_thru_file(&mut reader, 2, b'\n').unwrap();
/// assert_eq!(i, 2);
/// ```
fn forwards_thru_file<R>(
reader: &mut R,
num_delimiters: usize,
delimiter: u8,
) -> std::io::Result<usize>
where
R: Read,
{
let mut reader = BufReader::new(reader);
let mut buf = vec![];
let mut total = 0;
for _ in 0..num_delimiters {
match reader.read_until(delimiter, &mut buf) {
Ok(0) => {
return Ok(total);
}
Ok(n) => {
total += n;
buf.clear();
continue;
}
Err(e) => {
return Err(e);
}
}
}
Ok(total)
}
/// Iterate over bytes in the file, in reverse, until we find the
/// `num_delimiters` instance of `delimiter`. The `file` is left seek'd to the
/// position just after that delimiter.
@ -432,14 +509,24 @@ fn backwards_thru_file(file: &mut File, num_delimiters: usize, delimiter: u8) {
/// end of the file, and then read the file "backwards" in blocks of size
/// `BLOCK_SIZE` until we find the location of the first line/byte. This ends up
/// being a nice performance win for very large files.
fn bounded_tail(file: &mut File, settings: &Settings) {
fn bounded_tail(file: &mut File, mode: &FilterMode, beginning: bool) {
// Find the position in the file to start printing from.
match settings.mode {
FilterMode::Lines(count, delimiter) => {
backwards_thru_file(file, count as usize, delimiter);
match (mode, beginning) {
(FilterMode::Lines(count, delimiter), false) => {
backwards_thru_file(file, *count, *delimiter);
}
FilterMode::Bytes(count) => {
file.seek(SeekFrom::End(-(count as i64))).unwrap();
(FilterMode::Lines(count, delimiter), true) => {
let i = forwards_thru_file(file, (*count).max(1) - 1, *delimiter).unwrap();
file.seek(SeekFrom::Start(i as u64)).unwrap();
}
(FilterMode::Bytes(count), false) => {
file.seek(SeekFrom::End(-(*count as i64))).unwrap();
}
(FilterMode::Bytes(count), true) => {
// GNU `tail` seems to index bytes and lines starting at 1, not
// at 0. It seems to treat `+0` and `+1` as the same thing.
file.seek(SeekFrom::Start(((*count).max(1) - 1) as u64))
.unwrap();
}
}
@ -534,3 +621,32 @@ fn get_block_size(md: &Metadata) -> u64 {
md.len()
}
}
#[cfg(test)]
mod tests {
use crate::forwards_thru_file;
use std::io::Cursor;
#[test]
fn test_forwards_thru_file_zero() {
let mut reader = Cursor::new("a\n");
let i = forwards_thru_file(&mut reader, 0, b'\n').unwrap();
assert_eq!(i, 0);
}
#[test]
fn test_forwards_thru_file_basic() {
// 01 23 45 67 89
let mut reader = Cursor::new("a\nb\nc\nd\ne\n");
let i = forwards_thru_file(&mut reader, 2, b'\n').unwrap();
assert_eq!(i, 4);
}
#[test]
fn test_forwards_thru_file_past_end() {
let mut reader = Cursor::new("x\n");
let i = forwards_thru_file(&mut reader, 2, b'\n').unwrap();
assert_eq!(i, 2);
}
}

View file

@ -3,7 +3,7 @@
// * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code.
// spell-checker:ignore (ToDO) abcdefghijklmnopqrstuvwxyz efghijklmnopqrstuvwxyz vwxyz emptyfile bogusfile
// spell-checker:ignore (ToDO) abcdefghijklmnopqrstuvwxyz efghijklmnopqrstuvwxyz vwxyz emptyfile bogusfile siette ocho nueve diez
extern crate tail;
@ -358,6 +358,37 @@ fn test_positive_lines() {
.stdout_is("c\nd\ne\n");
}
/// Test for reading all but the first NUM lines of a file: `tail -n +3 infile`.
#[test]
fn test_positive_lines_file() {
new_ucmd!()
.args(&["-n", "+7", "foobar.txt"])
.succeeds()
.stdout_is(
"siette
ocho
nueve
diez
once
",
);
}
/// Test for reading all but the first NUM bytes of a file: `tail -c +3 infile`.
#[test]
fn test_positive_bytes_file() {
new_ucmd!()
.args(&["-c", "+42", "foobar.txt"])
.succeeds()
.stdout_is(
"ho
nueve
diez
once
",
);
}
/// Test for reading all but the first NUM lines: `tail -3`.
#[test]
fn test_obsolete_syntax_positive_lines() {