Merge pull request #2904 from jfinkels/tail-lines-positive-number-file

tail: fix a bug in tail [ -n | -c ] +NUM <file>
2024-11-16 09:48:03 +00:00 · 2022-01-22 10:18:38 +01:00 · 2022-01-22 10:18:38 +01:00 · 8a787fe028
commit 8a787fe028
parent 8c298e97a5 f595edaded
2 changed files with 155 additions and 8 deletions
--- a/src/uu/tail/src/tail.rs
+++ b/src/uu/tail/src/tail.rs
@ -226,7 +226,7 @@ fn uu_tail(settings: &Settings) -> UResult<()> {
                .map_err_context(|| format!("cannot open {} for reading", filename.quote()))?;
            let md = file.metadata().unwrap();
            if is_seekable(&mut file) && get_block_size(&md) > 0 {
-                bounded_tail(&mut file, settings);
+                bounded_tail(&mut file, &settings.mode, settings.beginning);
                if settings.follow {
                    let reader = BufReader::new(file);
                    readers.push((Box::new(reader), filename));
@ -386,6 +386,83 @@ fn follow<T: BufRead>(readers: &mut [(T, &String)], settings: &Settings) -> URes
    Ok(())
 }

+/// Find the index after the given number of instances of a given byte.
+///
+/// This function reads through a given reader until `num_delimiters`
+/// instances of `delimiter` have been seen, returning the index of
+/// the byte immediately following that delimiter. If there are fewer
+/// than `num_delimiters` instances of `delimiter`, this returns the
+/// total number of bytes read from the `reader` until EOF.
+///
+/// # Errors
+///
+/// This function returns an error if there is an error during reading
+/// from `reader`.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```rust,ignore
+/// use std::io::Cursor;
+///
+/// let mut reader = Cursor::new("a\nb\nc\nd\ne\n");
+/// let i = forwards_thru_file(&mut reader, 2, b'\n').unwrap();
+/// assert_eq!(i, 4);
+/// ```
+///
+/// If `num_delimiters` is zero, then this function always returns
+/// zero:
+///
+/// ```rust,ignore
+/// use std::io::Cursor;
+///
+/// let mut reader = Cursor::new("a\n");
+/// let i = forwards_thru_file(&mut reader, 0, b'\n').unwrap();
+/// assert_eq!(i, 0);
+/// ```
+///
+/// If there are fewer than `num_delimiters` instances of `delimiter`
+/// in the reader, then this function returns the total number of
+/// bytes read:
+///
+/// ```rust,ignore
+/// use std::io::Cursor;
+///
+/// let mut reader = Cursor::new("a\n");
+/// let i = forwards_thru_file(&mut reader, 2, b'\n').unwrap();
+/// assert_eq!(i, 2);
+/// ```
+fn forwards_thru_file<R>(
+    reader: &mut R,
+    num_delimiters: usize,
+    delimiter: u8,
+) -> std::io::Result<usize>
+where
+    R: Read,
+{
+    let mut reader = BufReader::new(reader);
+
+    let mut buf = vec![];
+    let mut total = 0;
+    for _ in 0..num_delimiters {
+        match reader.read_until(delimiter, &mut buf) {
+            Ok(0) => {
+                return Ok(total);
+            }
+            Ok(n) => {
+                total += n;
+                buf.clear();
+                continue;
+            }
+            Err(e) => {
+                return Err(e);
+            }
+        }
+    }
+    Ok(total)
+}
+
 /// Iterate over bytes in the file, in reverse, until we find the
 /// `num_delimiters` instance of `delimiter`. The `file` is left seek'd to the
 /// position just after that delimiter.
@ -432,14 +509,24 @@ fn backwards_thru_file(file: &mut File, num_delimiters: usize, delimiter: u8) {
 /// end of the file, and then read the file "backwards" in blocks of size
 /// `BLOCK_SIZE` until we find the location of the first line/byte. This ends up
 /// being a nice performance win for very large files.
-fn bounded_tail(file: &mut File, settings: &Settings) {
+fn bounded_tail(file: &mut File, mode: &FilterMode, beginning: bool) {
    // Find the position in the file to start printing from.
-    match settings.mode {
-        FilterMode::Lines(count, delimiter) => {
-            backwards_thru_file(file, count as usize, delimiter);
+    match (mode, beginning) {
+        (FilterMode::Lines(count, delimiter), false) => {
+            backwards_thru_file(file, *count, *delimiter);
        }
-        FilterMode::Bytes(count) => {
-            file.seek(SeekFrom::End(-(count as i64))).unwrap();
+        (FilterMode::Lines(count, delimiter), true) => {
+            let i = forwards_thru_file(file, (*count).max(1) - 1, *delimiter).unwrap();
+            file.seek(SeekFrom::Start(i as u64)).unwrap();
+        }
+        (FilterMode::Bytes(count), false) => {
+            file.seek(SeekFrom::End(-(*count as i64))).unwrap();
+        }
+        (FilterMode::Bytes(count), true) => {
+            // GNU `tail` seems to index bytes and lines starting at 1, not
+            // at 0. It seems to treat `+0` and `+1` as the same thing.
+            file.seek(SeekFrom::Start(((*count).max(1) - 1) as u64))
+                .unwrap();
        }
    }

@ -534,3 +621,32 @@ fn get_block_size(md: &Metadata) -> u64 {
        md.len()
    }
 }
+
+#[cfg(test)]
+mod tests {
+
+    use crate::forwards_thru_file;
+    use std::io::Cursor;
+
+    #[test]
+    fn test_forwards_thru_file_zero() {
+        let mut reader = Cursor::new("a\n");
+        let i = forwards_thru_file(&mut reader, 0, b'\n').unwrap();
+        assert_eq!(i, 0);
+    }
+
+    #[test]
+    fn test_forwards_thru_file_basic() {
+        //                   01 23 45 67 89
+        let mut reader = Cursor::new("a\nb\nc\nd\ne\n");
+        let i = forwards_thru_file(&mut reader, 2, b'\n').unwrap();
+        assert_eq!(i, 4);
+    }
+
+    #[test]
+    fn test_forwards_thru_file_past_end() {
+        let mut reader = Cursor::new("x\n");
+        let i = forwards_thru_file(&mut reader, 2, b'\n').unwrap();
+        assert_eq!(i, 2);
+    }
+}
--- a/tests/by-util/test_tail.rs
+++ b/tests/by-util/test_tail.rs
@ -3,7 +3,7 @@
 //  * For the full copyright and license information, please view the LICENSE
 //  * file that was distributed with this source code.

-// spell-checker:ignore (ToDO) abcdefghijklmnopqrstuvwxyz efghijklmnopqrstuvwxyz vwxyz emptyfile bogusfile
+// spell-checker:ignore (ToDO) abcdefghijklmnopqrstuvwxyz efghijklmnopqrstuvwxyz vwxyz emptyfile bogusfile siette ocho nueve diez

 extern crate tail;

@ -358,6 +358,37 @@ fn test_positive_lines() {
        .stdout_is("c\nd\ne\n");
 }

+/// Test for reading all but the first NUM lines of a file: `tail -n +3 infile`.
+#[test]
+fn test_positive_lines_file() {
+    new_ucmd!()
+        .args(&["-n", "+7", "foobar.txt"])
+        .succeeds()
+        .stdout_is(
+            "siette
+ocho
+nueve
+diez
+once
+",
+        );
+}
+
+/// Test for reading all but the first NUM bytes of a file: `tail -c +3 infile`.
+#[test]
+fn test_positive_bytes_file() {
+    new_ucmd!()
+        .args(&["-c", "+42", "foobar.txt"])
+        .succeeds()
+        .stdout_is(
+            "ho
+nueve
+diez
+once
+",
+        );
+}
+
 /// Test for reading all but the first NUM lines: `tail -3`.
 #[test]
 fn test_obsolete_syntax_positive_lines() {