od: implement --skip-bytes and --read-bytes

2025-02-18 15:08:53 +00:00 · 2016-08-07 01:33:23 +02:00 · 2016-08-07 01:33:23 +02:00 · 459db47c2b
commit 459db47c2b
parent 2b10cc47ff
5 changed files with 428 additions and 12 deletions
--- a/src/od/multifilereader.rs
+++ b/src/od/multifilereader.rs
@ -16,7 +16,11 @@ pub enum InputSource<'a> {
 pub struct MultifileReader<'a> {
    ni: Vec<InputSource<'a>>,
    curr_file: Option<Box<io::Read>>,
-    pub any_err: bool,
+    any_err: bool,
 }
 pub trait HasError {
    fn has_error(&self) -> bool;
 }
 impl<'b> MultifileReader<'b> {
@ -109,6 +113,13 @@ impl<'b> io::Read for MultifileReader<'b> {
    }
 }
 impl<'b> HasError for MultifileReader<'b> {
    fn has_error(&self) -> bool {
        self.any_err
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/src/od/od.rs
+++ b/src/od/od.rs
@ -17,11 +17,13 @@ extern crate byteorder;
 extern crate uucore;
 mod multifilereader;
 mod partialreader;
 mod byteorder_io;
 mod formatteriteminfo;
 mod prn_int;
 mod prn_char;
 mod prn_float;
 mod parse_nrofbytes;
 #[cfg(test)]
 mod mockstream;
@ -31,10 +33,12 @@ use std::io::Write;
 use unindent::*;
 use byteorder_io::*;
 use multifilereader::*;
 use partialreader::*;
 use prn_int::*;
 use prn_char::*;
 use prn_float::*;
 use formatteriteminfo::*;
 use parse_nrofbytes::*;
 //This is available in some versions of std, but not all that we target.
 macro_rules! hashmap {
@ -216,14 +220,42 @@ pub fn uumain(args: Vec<String>) -> i32 {
        let output_duplicates = matches.opt_present("v");
-        odfunc(line_bytes, input_offset_base, byte_order, inputs, &formats[..], output_duplicates)
+        let skip_bytes = match matches.opt_default("skip-bytes", "0") {
            None => 0,
            Some(s) => {
                match parse_number_of_bytes(&s) {
                    Ok(i) => { i }
                    Err(_) => {
                        disp_err!("Invalid argument --skip-bytes={}", s);
                        return 1;
                    }
                }
            }
        };
        let read_bytes = match matches.opt_str("read-bytes") {
            None => None,
            Some(s) => {
                match  parse_number_of_bytes(&s) {
                    Ok(i) => { Some(i) }
                    Err(_) => {
                        disp_err!("Invalid argument --read-bytes={}", s);
                        return 1;
                    }
                }
            }
        };
        odfunc(line_bytes, input_offset_base, byte_order, inputs, &formats[..],
                output_duplicates, skip_bytes, read_bytes)
 }
 fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder,
-        fnames: Vec<InputSource>, formats: &[FormatterItemInfo], output_duplicates: bool) -> i32 {
+        fnames: Vec<InputSource>, formats: &[FormatterItemInfo], output_duplicates: bool,
        skip_bytes: usize, read_bytes: Option<usize>) -> i32 {
-    let mut mf = MultifileReader::new(fnames);
+    let mf = MultifileReader::new(fnames);
-    let mut addr = 0;
+    let mut input = PartialReader::new(mf, skip_bytes, read_bytes);
    let mut addr = skip_bytes;
    let mut duplicate_line = false;
    let mut previous_bytes: Vec<u8> = Vec::new();
    let mut bytes: Vec<u8> = Vec::with_capacity(line_bytes);
@ -270,11 +302,9 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder,
        // print each line data (or multi-format raster of several lines describing the same data).
        // TODO: we need to read more data in case a multi-byte sequence starts at the end of the line
-        match mf.read(bytes.as_mut_slice()) {
+        match input.read(bytes.as_mut_slice()) {
            Ok(0) => {
-                if input_offset_base != Radix::NoPrefix {
+                print_final_offset(input_offset_base, addr);
                    print!("{}\n", print_with_radix(input_offset_base, addr)); // print final offset
                }
                break;
            }
            Ok(n) => {
@ -310,13 +340,15 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder,
                addr += n;
            }
-            Err(_) => {
+            Err(e) => {
-                break;
+                show_error!("{}", e);
                print_final_offset(input_offset_base, addr);
                return 1;
            }
        };
    }
-    if mf.any_err {
+    if input.has_error() {
        1
    } else {
        0
@ -421,6 +453,12 @@ fn print_with_radix(r: Radix, x: usize) -> String{
    }
 }
 fn print_final_offset(r: Radix, x: usize) {
    if r != Radix::NoPrefix {
        print!("{}\n", print_with_radix(r, x));
    }
 }
 struct SpacedFormatterItemInfo {
    frm: FormatterItemInfo,
    spacing: [usize; MAX_BYTES_PER_UNIT],
--- a/src/od/parse_nrofbytes.rs
+++ b/src/od/parse_nrofbytes.rs
@ -0,0 +1,129 @@
 pub fn parse_number_of_bytes(s: &String) -> Result<usize, &'static str> {
    let mut start = 0;
    let mut len = s.len();
    let mut radix = 10;
    let mut multiply = 1;
    if s.starts_with("0x") || s.starts_with("0X") {
        start = 2;
        radix = 16;
    }
    else if s.starts_with("0") {
        radix = 8;
    }
    let mut ends_with = s.chars().rev();
    match ends_with.next() {
        Some('b') if radix != 16 => {
            multiply = 512;
            len -= 1;
        },
        Some('k') | Some('K') => {
            multiply = 1024;
            len -= 1;
        }
        Some('m') | Some('M') => {
            multiply = 1024*1024;
            len -= 1;
        }
        Some('G') => {
            multiply = 1024*1024*1024;
            len -= 1;
        }
        #[cfg(target_pointer_width = "64")]
        Some('T') => {
            multiply = 1024*1024*1024*1024;
            len -= 1;
        }
        #[cfg(target_pointer_width = "64")]
        Some('P') => {
            multiply = 1024*1024*1024*1024*1024;
            len -= 1;
        }
        #[cfg(target_pointer_width = "64")]
        Some('E') => {
            multiply = 1024*1024*1024*1024*1024*1024;
            len -= 1;
        }
        Some('B') if radix != 16 => {
            len -= 2;
            multiply = match ends_with.next() {
                Some('k') | Some('K') => 1000,
                Some('m') | Some('M') => 1000*1000,
                Some('G') => 1000*1000*1000,
                #[cfg(target_pointer_width = "64")]
                Some('T') => 1000*1000*1000*1000,
                #[cfg(target_pointer_width = "64")]
                Some('P') => 1000*1000*1000*1000*1000,
                #[cfg(target_pointer_width = "64")]
                Some('E') => 1000*1000*1000*1000*1000*1000,
                _ => return Err("parse failed"),
            }
        },
        _ => {},
    }
    match usize::from_str_radix(&s[start..len], radix) {
        Ok(i) => Ok(i * multiply),
        Err(_) => Err("parse failed"),
    }
 }
 #[allow(dead_code)]
 fn parse_number_of_bytes_str(s: &str) -> Result<usize, &'static str> {
    parse_number_of_bytes(&String::from(s))
 }
 #[test]
 fn test_parse_number_of_bytes() {
    // normal decimal numbers
    assert_eq!(0, parse_number_of_bytes_str("0").unwrap());
    assert_eq!(5, parse_number_of_bytes_str("5").unwrap());
    assert_eq!(999, parse_number_of_bytes_str("999").unwrap());
    assert_eq!(2*512, parse_number_of_bytes_str("2b").unwrap());
    assert_eq!(2*1024, parse_number_of_bytes_str("2k").unwrap());
    assert_eq!(4*1024, parse_number_of_bytes_str("4K").unwrap());
    assert_eq!(2*1048576, parse_number_of_bytes_str("2m").unwrap());
    assert_eq!(4*1048576, parse_number_of_bytes_str("4M").unwrap());
    assert_eq!(1073741824, parse_number_of_bytes_str("1G").unwrap());
    assert_eq!(2000, parse_number_of_bytes_str("2kB").unwrap());
    assert_eq!(4000, parse_number_of_bytes_str("4KB").unwrap());
    assert_eq!(2000000, parse_number_of_bytes_str("2mB").unwrap());
    assert_eq!(4000000, parse_number_of_bytes_str("4MB").unwrap());
    assert_eq!(2000000000, parse_number_of_bytes_str("2GB").unwrap());
    // octal input
    assert_eq!(8, parse_number_of_bytes_str("010").unwrap());
    assert_eq!(8*512, parse_number_of_bytes_str("010b").unwrap());
    assert_eq!(8*1024, parse_number_of_bytes_str("010k").unwrap());
    assert_eq!(8*1048576, parse_number_of_bytes_str("010m").unwrap());
    // hex input
    assert_eq!(15, parse_number_of_bytes_str("0xf").unwrap());
    assert_eq!(15, parse_number_of_bytes_str("0XF").unwrap());
    assert_eq!(27, parse_number_of_bytes_str("0x1b").unwrap());
    assert_eq!(16*1024, parse_number_of_bytes_str("0x10k").unwrap());
    assert_eq!(16*1048576, parse_number_of_bytes_str("0x10m").unwrap());
    // invalid input
    parse_number_of_bytes_str("").unwrap_err();
    parse_number_of_bytes_str("-1").unwrap_err();
    parse_number_of_bytes_str("1e2").unwrap_err();
    parse_number_of_bytes_str("xyz").unwrap_err();
    parse_number_of_bytes_str("b").unwrap_err();
    parse_number_of_bytes_str("1Y").unwrap_err();
    parse_number_of_bytes_str("∞").unwrap_err();
 }
 #[test]
 #[cfg(target_pointer_width = "64")]
 fn test_parse_number_of_bytes_64bits() {
    assert_eq!(1099511627776, parse_number_of_bytes_str("1T").unwrap());
    assert_eq!(1125899906842624, parse_number_of_bytes_str("1P").unwrap());
    assert_eq!(1152921504606846976, parse_number_of_bytes_str("1E").unwrap());
    assert_eq!(2000000000000, parse_number_of_bytes_str("2TB").unwrap());
    assert_eq!(2000000000000000, parse_number_of_bytes_str("2PB").unwrap());
    assert_eq!(2000000000000000000, parse_number_of_bytes_str("2EB").unwrap());
 }
--- a/src/od/partialreader.rs
+++ b/src/od/partialreader.rs
@ -0,0 +1,204 @@
 use std::cmp;
 use std::io;
 use std::io::Read;
 use multifilereader::HasError;
 /// When a large number of bytes must be skipped, it will be read into a
 /// dynamically allocated buffer. The buffer will be limited to this size.
 const MAX_SKIP_BUFFER: usize = 64*1024;
 /// Wrapper for `std::io::Read` which can skip bytes at the beginning
 /// of the input, and it can limit the returned bytes to a particular
 /// number of bytes.
 pub struct PartialReader<R> {
    inner: R,
    skip: usize,
    limit: Option<usize>,
 }
 impl<R> PartialReader<R> {
    /// Create a new `PartialReader` wrapping `inner`, which will skip
    /// `skip` bytes, and limits the output to `limit` bytes. Set `limit`
    /// to `None` if there should be no limit.
    pub fn new(inner: R, skip: usize, limit: Option<usize>) -> Self {
        PartialReader {
            inner: inner,
            skip: skip,
            limit: limit,
        }
    }
 }
 impl<R: Read> Read for PartialReader<R> {
    fn read(&mut self, out: &mut [u8]) -> io::Result<usize> {
        if self.skip > 0 {
            let buf_size = cmp::min(self.skip, MAX_SKIP_BUFFER);
            let mut bytes: Vec<u8> = Vec::with_capacity(buf_size);
            unsafe { bytes.set_len(buf_size); }
            while self.skip > 0 {
                let skip_count = cmp::min(self.skip, buf_size);
                match self.inner.read_exact(&mut bytes[..skip_count]) {
                    Err(e) => return Err(e),
                    Ok(()) => self.skip -= skip_count,
                }
            }
        }
        match self.limit {
            None => self.inner.read(out),
            Some(0) => Ok(0),
            Some(ref mut limit) => {
                let slice = if *limit > out.len() { out } else { &mut out[0..*limit] };
                match self.inner.read(slice) {
                    Err(e) => Err(e),
                    Ok(r) => {
                        *limit -= r;
                        Ok(r)
                    },
                }
            },
        }
    }
 }
 impl<R: HasError> HasError for PartialReader<R> {
    fn has_error(&self) -> bool {
        self.inner.has_error()
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use std::io::{Cursor, Read, ErrorKind};
    use std::error::Error;
    use mockstream::*;
    #[test]
    fn test_read_without_limits() {
        let mut v = [0; 10];
        let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, None);
        assert_eq!(sut.read(v.as_mut()).unwrap(), 8);
        assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0]);
    }
    #[test]
    fn test_read_without_limits_with_error() {
        let mut v = [0; 10];
        let f = FailingMockStream::new(ErrorKind::PermissionDenied, "No access", 3);
        let mut sut = PartialReader::new(f, 0, None);
        let error = sut.read(v.as_mut()).unwrap_err();
        assert_eq!(error.kind(), ErrorKind::PermissionDenied);
        assert_eq!(error.description(), "No access");
    }
    #[test]
    fn test_read_skipping_bytes() {
        let mut v = [0; 10];
        let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 2, None);
        assert_eq!(sut.read(v.as_mut()).unwrap(), 6);
        assert_eq!(v, [0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0, 0, 0]);
    }
    #[test]
    fn test_read_skipping_all() {
        let mut v = [0; 10];
        let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 20, None);
        let error = sut.read(v.as_mut()).unwrap_err();
        assert_eq!(error.kind(), ErrorKind::UnexpectedEof);
    }
    #[test]
    fn test_read_skipping_with_error() {
        let mut v = [0; 10];
        let f = FailingMockStream::new(ErrorKind::PermissionDenied, "No access", 3);
        let mut sut = PartialReader::new(f, 2, None);
        let error = sut.read(v.as_mut()).unwrap_err();
        assert_eq!(error.kind(), ErrorKind::PermissionDenied);
        assert_eq!(error.description(), "No access");
    }
    #[test]
    fn test_read_skipping_with_two_reads_during_skip() {
        let mut v = [0; 10];
        let c = Cursor::new(&b"a"[..])
                .chain(Cursor::new(&b"bcdefgh"[..]));
        let mut sut = PartialReader::new(c, 2, None);
        assert_eq!(sut.read(v.as_mut()).unwrap(), 6);
        assert_eq!(v, [0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0, 0, 0]);
    }
    #[test]
    fn test_read_skipping_huge_number() {
        let mut v = [0; 10];
        // test if it does not eat all memory....
        let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), usize::max_value(), None);
        sut.read(v.as_mut()).unwrap_err();
    }
    #[test]
    fn test_read_limitting_all() {
        let mut v = [0; 10];
        let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(0));
        assert_eq!(sut.read(v.as_mut()).unwrap(), 0);
    }
    #[test]
    fn test_read_limitting() {
        let mut v = [0; 10];
        let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(6));
        assert_eq!(sut.read(v.as_mut()).unwrap(), 6);
        assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0, 0, 0, 0]);
    }
    #[test]
    fn test_read_limitting_with_error() {
        let mut v = [0; 10];
        let f = FailingMockStream::new(ErrorKind::PermissionDenied, "No access", 3);
        let mut sut = PartialReader::new(f, 0, Some(6));
        let error = sut.read(v.as_mut()).unwrap_err();
        assert_eq!(error.kind(), ErrorKind::PermissionDenied);
        assert_eq!(error.description(), "No access");
    }
    #[test]
    fn test_read_limitting_with_large_limit() {
        let mut v = [0; 10];
        let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(20));
        assert_eq!(sut.read(v.as_mut()).unwrap(), 8);
        assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0]);
    }
    #[test]
    fn test_read_limitting_with_multiple_reads() {
        let mut v = [0; 3];
        let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(6));
        assert_eq!(sut.read(v.as_mut()).unwrap(), 3);
        assert_eq!(v, [0x61, 0x62, 0x63]);
        assert_eq!(sut.read(v.as_mut()).unwrap(), 3);
        assert_eq!(v, [0x64, 0x65, 0x66]);
        assert_eq!(sut.read(v.as_mut()).unwrap(), 0);
    }
    #[test]
    fn test_read_skipping_and_limitting() {
        let mut v = [0; 10];
        let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 2, Some(4));
        assert_eq!(sut.read(v.as_mut()).unwrap(), 4);
        assert_eq!(v, [0x63, 0x64, 0x65, 0x66, 0, 0, 0, 0, 0, 0]);
    }
 }
--- a/tests/test_od.rs
+++ b/tests/test_od.rs
@ -475,3 +475,37 @@ fn test_invalid_offset(){
    assert!(!result.success);
 }
 #[test]
 fn test_skip_bytes(){
    let input = "abcdefghijklmnopq";
    let result = new_ucmd!().arg("-c").arg("--skip-bytes=5").run_piped_stdin(input.as_bytes());
    assert_empty_stderr!(result);
    assert!(result.success);
    assert_eq!(result.stdout, unindent("
            0000005   f   g   h   i   j   k   l   m   n   o   p   q
            0000021
            "));
 }
 #[test]
 fn test_skip_bytes_error(){
    let input = "12345";
    let result = new_ucmd!().arg("--skip-bytes=10").run_piped_stdin(input.as_bytes());
    assert!(!result.success);
 }
 #[test]
 fn test_read_bytes(){
    let input = "abcdefghijklmnopqrstuvwxyz\n12345678";
    let result = new_ucmd!().arg("--endian=little").arg("--read-bytes=27").run_piped_stdin(input.as_bytes());
    assert_empty_stderr!(result);
    assert!(result.success);
    assert_eq!(result.stdout, unindent(ALPHA_OUT));
 }