diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index 9ef69a77c..cf1f5c3c5 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -226,7 +226,7 @@ fn uu_tail(settings: &Settings) -> UResult<()> { .map_err_context(|| format!("cannot open {} for reading", filename.quote()))?; let md = file.metadata().unwrap(); if is_seekable(&mut file) && get_block_size(&md) > 0 { - bounded_tail(&mut file, settings); + bounded_tail(&mut file, &settings.mode, settings.beginning); if settings.follow { let reader = BufReader::new(file); readers.push((Box::new(reader), filename)); @@ -386,6 +386,83 @@ fn follow(readers: &mut [(T, &String)], settings: &Settings) -> URes Ok(()) } +/// Find the index after the given number of instances of a given byte. +/// +/// This function reads through a given reader until `num_delimiters` +/// instances of `delimiter` have been seen, returning the index of +/// the byte immediately following that delimiter. If there are fewer +/// than `num_delimiters` instances of `delimiter`, this returns the +/// total number of bytes read from the `reader` until EOF. +/// +/// # Errors +/// +/// This function returns an error if there is an error during reading +/// from `reader`. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ```rust,ignore +/// use std::io::Cursor; +/// +/// let mut reader = Cursor::new("a\nb\nc\nd\ne\n"); +/// let i = forwards_thru_file(&mut reader, 2, b'\n').unwrap(); +/// assert_eq!(i, 4); +/// ``` +/// +/// If `num_delimiters` is zero, then this function always returns +/// zero: +/// +/// ```rust,ignore +/// use std::io::Cursor; +/// +/// let mut reader = Cursor::new("a\n"); +/// let i = forwards_thru_file(&mut reader, 0, b'\n').unwrap(); +/// assert_eq!(i, 0); +/// ``` +/// +/// If there are fewer than `num_delimiters` instances of `delimiter` +/// in the reader, then this function returns the total number of +/// bytes read: +/// +/// ```rust,ignore +/// use std::io::Cursor; +/// +/// let mut reader = Cursor::new("a\n"); +/// let i = forwards_thru_file(&mut reader, 2, b'\n').unwrap(); +/// assert_eq!(i, 2); +/// ``` +fn forwards_thru_file( + reader: &mut R, + num_delimiters: usize, + delimiter: u8, +) -> std::io::Result +where + R: Read, +{ + let mut reader = BufReader::new(reader); + + let mut buf = vec![]; + let mut total = 0; + for _ in 0..num_delimiters { + match reader.read_until(delimiter, &mut buf) { + Ok(0) => { + return Ok(total); + } + Ok(n) => { + total += n; + buf.clear(); + continue; + } + Err(e) => { + return Err(e); + } + } + } + Ok(total) +} + /// Iterate over bytes in the file, in reverse, until we find the /// `num_delimiters` instance of `delimiter`. The `file` is left seek'd to the /// position just after that delimiter. @@ -432,14 +509,24 @@ fn backwards_thru_file(file: &mut File, num_delimiters: usize, delimiter: u8) { /// end of the file, and then read the file "backwards" in blocks of size /// `BLOCK_SIZE` until we find the location of the first line/byte. This ends up /// being a nice performance win for very large files. -fn bounded_tail(file: &mut File, settings: &Settings) { +fn bounded_tail(file: &mut File, mode: &FilterMode, beginning: bool) { // Find the position in the file to start printing from. - match settings.mode { - FilterMode::Lines(count, delimiter) => { - backwards_thru_file(file, count as usize, delimiter); + match (mode, beginning) { + (FilterMode::Lines(count, delimiter), false) => { + backwards_thru_file(file, *count, *delimiter); } - FilterMode::Bytes(count) => { - file.seek(SeekFrom::End(-(count as i64))).unwrap(); + (FilterMode::Lines(count, delimiter), true) => { + let i = forwards_thru_file(file, (*count).max(1) - 1, *delimiter).unwrap(); + file.seek(SeekFrom::Start(i as u64)).unwrap(); + } + (FilterMode::Bytes(count), false) => { + file.seek(SeekFrom::End(-(*count as i64))).unwrap(); + } + (FilterMode::Bytes(count), true) => { + // GNU `tail` seems to index bytes and lines starting at 1, not + // at 0. It seems to treat `+0` and `+1` as the same thing. + file.seek(SeekFrom::Start(((*count).max(1) - 1) as u64)) + .unwrap(); } } @@ -534,3 +621,32 @@ fn get_block_size(md: &Metadata) -> u64 { md.len() } } + +#[cfg(test)] +mod tests { + + use crate::forwards_thru_file; + use std::io::Cursor; + + #[test] + fn test_forwards_thru_file_zero() { + let mut reader = Cursor::new("a\n"); + let i = forwards_thru_file(&mut reader, 0, b'\n').unwrap(); + assert_eq!(i, 0); + } + + #[test] + fn test_forwards_thru_file_basic() { + // 01 23 45 67 89 + let mut reader = Cursor::new("a\nb\nc\nd\ne\n"); + let i = forwards_thru_file(&mut reader, 2, b'\n').unwrap(); + assert_eq!(i, 4); + } + + #[test] + fn test_forwards_thru_file_past_end() { + let mut reader = Cursor::new("x\n"); + let i = forwards_thru_file(&mut reader, 2, b'\n').unwrap(); + assert_eq!(i, 2); + } +} diff --git a/tests/by-util/test_tail.rs b/tests/by-util/test_tail.rs index e863e34b7..f4d932e79 100644 --- a/tests/by-util/test_tail.rs +++ b/tests/by-util/test_tail.rs @@ -3,7 +3,7 @@ // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. -// spell-checker:ignore (ToDO) abcdefghijklmnopqrstuvwxyz efghijklmnopqrstuvwxyz vwxyz emptyfile bogusfile +// spell-checker:ignore (ToDO) abcdefghijklmnopqrstuvwxyz efghijklmnopqrstuvwxyz vwxyz emptyfile bogusfile siette ocho nueve diez extern crate tail; @@ -358,6 +358,37 @@ fn test_positive_lines() { .stdout_is("c\nd\ne\n"); } +/// Test for reading all but the first NUM lines of a file: `tail -n +3 infile`. +#[test] +fn test_positive_lines_file() { + new_ucmd!() + .args(&["-n", "+7", "foobar.txt"]) + .succeeds() + .stdout_is( + "siette +ocho +nueve +diez +once +", + ); +} + +/// Test for reading all but the first NUM bytes of a file: `tail -c +3 infile`. +#[test] +fn test_positive_bytes_file() { + new_ucmd!() + .args(&["-c", "+42", "foobar.txt"]) + .succeeds() + .stdout_is( + "ho +nueve +diez +once +", + ); +} + /// Test for reading all but the first NUM lines: `tail -3`. #[test] fn test_obsolete_syntax_positive_lines() {