From 459db47c2bbe3ca75ed7a8c3096b15977209d630 Mon Sep 17 00:00:00 2001 From: Wim Hueskes Date: Sun, 7 Aug 2016 01:33:23 +0200 Subject: [PATCH] od: implement --skip-bytes and --read-bytes --- src/od/multifilereader.rs | 13 ++- src/od/od.rs | 60 +++++++++-- src/od/parse_nrofbytes.rs | 129 ++++++++++++++++++++++++ src/od/partialreader.rs | 204 ++++++++++++++++++++++++++++++++++++++ tests/test_od.rs | 34 +++++++ 5 files changed, 428 insertions(+), 12 deletions(-) create mode 100644 src/od/parse_nrofbytes.rs create mode 100644 src/od/partialreader.rs diff --git a/src/od/multifilereader.rs b/src/od/multifilereader.rs index 6df298f2b..6ab66cf68 100644 --- a/src/od/multifilereader.rs +++ b/src/od/multifilereader.rs @@ -16,7 +16,11 @@ pub enum InputSource<'a> { pub struct MultifileReader<'a> { ni: Vec>, curr_file: Option>, - pub any_err: bool, + any_err: bool, +} + +pub trait HasError { + fn has_error(&self) -> bool; } impl<'b> MultifileReader<'b> { @@ -109,6 +113,13 @@ impl<'b> io::Read for MultifileReader<'b> { } } +impl<'b> HasError for MultifileReader<'b> { + fn has_error(&self) -> bool { + self.any_err + } +} + + #[cfg(test)] mod tests { use super::*; diff --git a/src/od/od.rs b/src/od/od.rs index c8ab51f2e..2b91a0e29 100644 --- a/src/od/od.rs +++ b/src/od/od.rs @@ -17,11 +17,13 @@ extern crate byteorder; extern crate uucore; mod multifilereader; +mod partialreader; mod byteorder_io; mod formatteriteminfo; mod prn_int; mod prn_char; mod prn_float; +mod parse_nrofbytes; #[cfg(test)] mod mockstream; @@ -31,10 +33,12 @@ use std::io::Write; use unindent::*; use byteorder_io::*; use multifilereader::*; +use partialreader::*; use prn_int::*; use prn_char::*; use prn_float::*; use formatteriteminfo::*; +use parse_nrofbytes::*; //This is available in some versions of std, but not all that we target. macro_rules! hashmap { @@ -216,14 +220,42 @@ pub fn uumain(args: Vec) -> i32 { let output_duplicates = matches.opt_present("v"); - odfunc(line_bytes, input_offset_base, byte_order, inputs, &formats[..], output_duplicates) + let skip_bytes = match matches.opt_default("skip-bytes", "0") { + None => 0, + Some(s) => { + match parse_number_of_bytes(&s) { + Ok(i) => { i } + Err(_) => { + disp_err!("Invalid argument --skip-bytes={}", s); + return 1; + } + } + } + }; + let read_bytes = match matches.opt_str("read-bytes") { + None => None, + Some(s) => { + match parse_number_of_bytes(&s) { + Ok(i) => { Some(i) } + Err(_) => { + disp_err!("Invalid argument --read-bytes={}", s); + return 1; + } + } + } + }; + + odfunc(line_bytes, input_offset_base, byte_order, inputs, &formats[..], + output_duplicates, skip_bytes, read_bytes) } fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, - fnames: Vec, formats: &[FormatterItemInfo], output_duplicates: bool) -> i32 { + fnames: Vec, formats: &[FormatterItemInfo], output_duplicates: bool, + skip_bytes: usize, read_bytes: Option) -> i32 { - let mut mf = MultifileReader::new(fnames); - let mut addr = 0; + let mf = MultifileReader::new(fnames); + let mut input = PartialReader::new(mf, skip_bytes, read_bytes); + let mut addr = skip_bytes; let mut duplicate_line = false; let mut previous_bytes: Vec = Vec::new(); let mut bytes: Vec = Vec::with_capacity(line_bytes); @@ -270,11 +302,9 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, // print each line data (or multi-format raster of several lines describing the same data). // TODO: we need to read more data in case a multi-byte sequence starts at the end of the line - match mf.read(bytes.as_mut_slice()) { + match input.read(bytes.as_mut_slice()) { Ok(0) => { - if input_offset_base != Radix::NoPrefix { - print!("{}\n", print_with_radix(input_offset_base, addr)); // print final offset - } + print_final_offset(input_offset_base, addr); break; } Ok(n) => { @@ -310,13 +340,15 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, addr += n; } - Err(_) => { - break; + Err(e) => { + show_error!("{}", e); + print_final_offset(input_offset_base, addr); + return 1; } }; } - if mf.any_err { + if input.has_error() { 1 } else { 0 @@ -421,6 +453,12 @@ fn print_with_radix(r: Radix, x: usize) -> String{ } } +fn print_final_offset(r: Radix, x: usize) { + if r != Radix::NoPrefix { + print!("{}\n", print_with_radix(r, x)); + } +} + struct SpacedFormatterItemInfo { frm: FormatterItemInfo, spacing: [usize; MAX_BYTES_PER_UNIT], diff --git a/src/od/parse_nrofbytes.rs b/src/od/parse_nrofbytes.rs new file mode 100644 index 000000000..780c5f30b --- /dev/null +++ b/src/od/parse_nrofbytes.rs @@ -0,0 +1,129 @@ + +pub fn parse_number_of_bytes(s: &String) -> Result { + let mut start = 0; + let mut len = s.len(); + let mut radix = 10; + let mut multiply = 1; + + if s.starts_with("0x") || s.starts_with("0X") { + start = 2; + radix = 16; + } + else if s.starts_with("0") { + radix = 8; + } + + let mut ends_with = s.chars().rev(); + match ends_with.next() { + Some('b') if radix != 16 => { + multiply = 512; + len -= 1; + }, + Some('k') | Some('K') => { + multiply = 1024; + len -= 1; + } + Some('m') | Some('M') => { + multiply = 1024*1024; + len -= 1; + } + Some('G') => { + multiply = 1024*1024*1024; + len -= 1; + } + #[cfg(target_pointer_width = "64")] + Some('T') => { + multiply = 1024*1024*1024*1024; + len -= 1; + } + #[cfg(target_pointer_width = "64")] + Some('P') => { + multiply = 1024*1024*1024*1024*1024; + len -= 1; + } + #[cfg(target_pointer_width = "64")] + Some('E') => { + multiply = 1024*1024*1024*1024*1024*1024; + len -= 1; + } + Some('B') if radix != 16 => { + len -= 2; + multiply = match ends_with.next() { + Some('k') | Some('K') => 1000, + Some('m') | Some('M') => 1000*1000, + Some('G') => 1000*1000*1000, + #[cfg(target_pointer_width = "64")] + Some('T') => 1000*1000*1000*1000, + #[cfg(target_pointer_width = "64")] + Some('P') => 1000*1000*1000*1000*1000, + #[cfg(target_pointer_width = "64")] + Some('E') => 1000*1000*1000*1000*1000*1000, + _ => return Err("parse failed"), + } + }, + _ => {}, + } + + match usize::from_str_radix(&s[start..len], radix) { + Ok(i) => Ok(i * multiply), + Err(_) => Err("parse failed"), + } +} + +#[allow(dead_code)] +fn parse_number_of_bytes_str(s: &str) -> Result { + parse_number_of_bytes(&String::from(s)) +} + +#[test] +fn test_parse_number_of_bytes() { + // normal decimal numbers + assert_eq!(0, parse_number_of_bytes_str("0").unwrap()); + assert_eq!(5, parse_number_of_bytes_str("5").unwrap()); + assert_eq!(999, parse_number_of_bytes_str("999").unwrap()); + assert_eq!(2*512, parse_number_of_bytes_str("2b").unwrap()); + assert_eq!(2*1024, parse_number_of_bytes_str("2k").unwrap()); + assert_eq!(4*1024, parse_number_of_bytes_str("4K").unwrap()); + assert_eq!(2*1048576, parse_number_of_bytes_str("2m").unwrap()); + assert_eq!(4*1048576, parse_number_of_bytes_str("4M").unwrap()); + assert_eq!(1073741824, parse_number_of_bytes_str("1G").unwrap()); + assert_eq!(2000, parse_number_of_bytes_str("2kB").unwrap()); + assert_eq!(4000, parse_number_of_bytes_str("4KB").unwrap()); + assert_eq!(2000000, parse_number_of_bytes_str("2mB").unwrap()); + assert_eq!(4000000, parse_number_of_bytes_str("4MB").unwrap()); + assert_eq!(2000000000, parse_number_of_bytes_str("2GB").unwrap()); + + // octal input + assert_eq!(8, parse_number_of_bytes_str("010").unwrap()); + assert_eq!(8*512, parse_number_of_bytes_str("010b").unwrap()); + assert_eq!(8*1024, parse_number_of_bytes_str("010k").unwrap()); + assert_eq!(8*1048576, parse_number_of_bytes_str("010m").unwrap()); + + // hex input + assert_eq!(15, parse_number_of_bytes_str("0xf").unwrap()); + assert_eq!(15, parse_number_of_bytes_str("0XF").unwrap()); + assert_eq!(27, parse_number_of_bytes_str("0x1b").unwrap()); + assert_eq!(16*1024, parse_number_of_bytes_str("0x10k").unwrap()); + assert_eq!(16*1048576, parse_number_of_bytes_str("0x10m").unwrap()); + + // invalid input + parse_number_of_bytes_str("").unwrap_err(); + parse_number_of_bytes_str("-1").unwrap_err(); + parse_number_of_bytes_str("1e2").unwrap_err(); + parse_number_of_bytes_str("xyz").unwrap_err(); + parse_number_of_bytes_str("b").unwrap_err(); + parse_number_of_bytes_str("1Y").unwrap_err(); + parse_number_of_bytes_str("∞").unwrap_err(); +} + +#[test] +#[cfg(target_pointer_width = "64")] +fn test_parse_number_of_bytes_64bits() { + assert_eq!(1099511627776, parse_number_of_bytes_str("1T").unwrap()); + assert_eq!(1125899906842624, parse_number_of_bytes_str("1P").unwrap()); + assert_eq!(1152921504606846976, parse_number_of_bytes_str("1E").unwrap()); + + assert_eq!(2000000000000, parse_number_of_bytes_str("2TB").unwrap()); + assert_eq!(2000000000000000, parse_number_of_bytes_str("2PB").unwrap()); + assert_eq!(2000000000000000000, parse_number_of_bytes_str("2EB").unwrap()); +} diff --git a/src/od/partialreader.rs b/src/od/partialreader.rs new file mode 100644 index 000000000..e68b267e0 --- /dev/null +++ b/src/od/partialreader.rs @@ -0,0 +1,204 @@ +use std::cmp; +use std::io; +use std::io::Read; +use multifilereader::HasError; + +/// When a large number of bytes must be skipped, it will be read into a +/// dynamically allocated buffer. The buffer will be limited to this size. +const MAX_SKIP_BUFFER: usize = 64*1024; + +/// Wrapper for `std::io::Read` which can skip bytes at the beginning +/// of the input, and it can limit the returned bytes to a particular +/// number of bytes. +pub struct PartialReader { + inner: R, + skip: usize, + limit: Option, +} + +impl PartialReader { + /// Create a new `PartialReader` wrapping `inner`, which will skip + /// `skip` bytes, and limits the output to `limit` bytes. Set `limit` + /// to `None` if there should be no limit. + pub fn new(inner: R, skip: usize, limit: Option) -> Self { + PartialReader { + inner: inner, + skip: skip, + limit: limit, + } + } +} + +impl Read for PartialReader { + fn read(&mut self, out: &mut [u8]) -> io::Result { + if self.skip > 0 { + let buf_size = cmp::min(self.skip, MAX_SKIP_BUFFER); + let mut bytes: Vec = Vec::with_capacity(buf_size); + unsafe { bytes.set_len(buf_size); } + + while self.skip > 0 { + let skip_count = cmp::min(self.skip, buf_size); + + match self.inner.read_exact(&mut bytes[..skip_count]) { + Err(e) => return Err(e), + Ok(()) => self.skip -= skip_count, + } + } + } + match self.limit { + None => self.inner.read(out), + Some(0) => Ok(0), + Some(ref mut limit) => { + let slice = if *limit > out.len() { out } else { &mut out[0..*limit] }; + match self.inner.read(slice) { + Err(e) => Err(e), + Ok(r) => { + *limit -= r; + Ok(r) + }, + } + }, + } + } +} + +impl HasError for PartialReader { + fn has_error(&self) -> bool { + self.inner.has_error() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::{Cursor, Read, ErrorKind}; + use std::error::Error; + use mockstream::*; + + #[test] + fn test_read_without_limits() { + let mut v = [0; 10]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, None); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 8); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0]); + } + + #[test] + fn test_read_without_limits_with_error() { + let mut v = [0; 10]; + let f = FailingMockStream::new(ErrorKind::PermissionDenied, "No access", 3); + let mut sut = PartialReader::new(f, 0, None); + + let error = sut.read(v.as_mut()).unwrap_err(); + assert_eq!(error.kind(), ErrorKind::PermissionDenied); + assert_eq!(error.description(), "No access"); + } + + #[test] + fn test_read_skipping_bytes() { + let mut v = [0; 10]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 2, None); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 6); + assert_eq!(v, [0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0, 0, 0]); + } + + #[test] + fn test_read_skipping_all() { + let mut v = [0; 10]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 20, None); + + let error = sut.read(v.as_mut()).unwrap_err(); + assert_eq!(error.kind(), ErrorKind::UnexpectedEof); + } + + #[test] + fn test_read_skipping_with_error() { + let mut v = [0; 10]; + let f = FailingMockStream::new(ErrorKind::PermissionDenied, "No access", 3); + let mut sut = PartialReader::new(f, 2, None); + + let error = sut.read(v.as_mut()).unwrap_err(); + assert_eq!(error.kind(), ErrorKind::PermissionDenied); + assert_eq!(error.description(), "No access"); + } + + #[test] + fn test_read_skipping_with_two_reads_during_skip() { + let mut v = [0; 10]; + let c = Cursor::new(&b"a"[..]) + .chain(Cursor::new(&b"bcdefgh"[..])); + let mut sut = PartialReader::new(c, 2, None); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 6); + assert_eq!(v, [0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0, 0, 0]); + } + + #[test] + fn test_read_skipping_huge_number() { + let mut v = [0; 10]; + // test if it does not eat all memory.... + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), usize::max_value(), None); + + sut.read(v.as_mut()).unwrap_err(); + } + + #[test] + fn test_read_limitting_all() { + let mut v = [0; 10]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(0)); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 0); + } + + #[test] + fn test_read_limitting() { + let mut v = [0; 10]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(6)); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 6); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0, 0, 0, 0]); + } + + #[test] + fn test_read_limitting_with_error() { + let mut v = [0; 10]; + let f = FailingMockStream::new(ErrorKind::PermissionDenied, "No access", 3); + let mut sut = PartialReader::new(f, 0, Some(6)); + + let error = sut.read(v.as_mut()).unwrap_err(); + assert_eq!(error.kind(), ErrorKind::PermissionDenied); + assert_eq!(error.description(), "No access"); + } + + #[test] + fn test_read_limitting_with_large_limit() { + let mut v = [0; 10]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(20)); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 8); + assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0]); + } + + #[test] + fn test_read_limitting_with_multiple_reads() { + let mut v = [0; 3]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(6)); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 3); + assert_eq!(v, [0x61, 0x62, 0x63]); + assert_eq!(sut.read(v.as_mut()).unwrap(), 3); + assert_eq!(v, [0x64, 0x65, 0x66]); + assert_eq!(sut.read(v.as_mut()).unwrap(), 0); + } + + #[test] + fn test_read_skipping_and_limitting() { + let mut v = [0; 10]; + let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 2, Some(4)); + + assert_eq!(sut.read(v.as_mut()).unwrap(), 4); + assert_eq!(v, [0x63, 0x64, 0x65, 0x66, 0, 0, 0, 0, 0, 0]); + } +} diff --git a/tests/test_od.rs b/tests/test_od.rs index af4eca612..26f3d5b60 100644 --- a/tests/test_od.rs +++ b/tests/test_od.rs @@ -475,3 +475,37 @@ fn test_invalid_offset(){ assert!(!result.success); } + +#[test] +fn test_skip_bytes(){ + + let input = "abcdefghijklmnopq"; + let result = new_ucmd!().arg("-c").arg("--skip-bytes=5").run_piped_stdin(input.as_bytes()); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(" + 0000005 f g h i j k l m n o p q + 0000021 + ")); +} + +#[test] +fn test_skip_bytes_error(){ + + let input = "12345"; + let result = new_ucmd!().arg("--skip-bytes=10").run_piped_stdin(input.as_bytes()); + + assert!(!result.success); +} + +#[test] +fn test_read_bytes(){ + + let input = "abcdefghijklmnopqrstuvwxyz\n12345678"; + let result = new_ucmd!().arg("--endian=little").arg("--read-bytes=27").run_piped_stdin(input.as_bytes()); + + assert_empty_stderr!(result); + assert!(result.success); + assert_eq!(result.stdout, unindent(ALPHA_OUT)); +}