od: implement --skip-bytes and --read-bytes

This commit is contained in:
Wim Hueskes 2016-08-07 01:33:23 +02:00
parent 2b10cc47ff
commit 459db47c2b
5 changed files with 428 additions and 12 deletions

View file

@ -16,7 +16,11 @@ pub enum InputSource<'a> {
pub struct MultifileReader<'a> { pub struct MultifileReader<'a> {
ni: Vec<InputSource<'a>>, ni: Vec<InputSource<'a>>,
curr_file: Option<Box<io::Read>>, curr_file: Option<Box<io::Read>>,
pub any_err: bool, any_err: bool,
}
pub trait HasError {
fn has_error(&self) -> bool;
} }
impl<'b> MultifileReader<'b> { impl<'b> MultifileReader<'b> {
@ -109,6 +113,13 @@ impl<'b> io::Read for MultifileReader<'b> {
} }
} }
impl<'b> HasError for MultifileReader<'b> {
fn has_error(&self) -> bool {
self.any_err
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;

View file

@ -17,11 +17,13 @@ extern crate byteorder;
extern crate uucore; extern crate uucore;
mod multifilereader; mod multifilereader;
mod partialreader;
mod byteorder_io; mod byteorder_io;
mod formatteriteminfo; mod formatteriteminfo;
mod prn_int; mod prn_int;
mod prn_char; mod prn_char;
mod prn_float; mod prn_float;
mod parse_nrofbytes;
#[cfg(test)] #[cfg(test)]
mod mockstream; mod mockstream;
@ -31,10 +33,12 @@ use std::io::Write;
use unindent::*; use unindent::*;
use byteorder_io::*; use byteorder_io::*;
use multifilereader::*; use multifilereader::*;
use partialreader::*;
use prn_int::*; use prn_int::*;
use prn_char::*; use prn_char::*;
use prn_float::*; use prn_float::*;
use formatteriteminfo::*; use formatteriteminfo::*;
use parse_nrofbytes::*;
//This is available in some versions of std, but not all that we target. //This is available in some versions of std, but not all that we target.
macro_rules! hashmap { macro_rules! hashmap {
@ -216,14 +220,42 @@ pub fn uumain(args: Vec<String>) -> i32 {
let output_duplicates = matches.opt_present("v"); let output_duplicates = matches.opt_present("v");
odfunc(line_bytes, input_offset_base, byte_order, inputs, &formats[..], output_duplicates) let skip_bytes = match matches.opt_default("skip-bytes", "0") {
None => 0,
Some(s) => {
match parse_number_of_bytes(&s) {
Ok(i) => { i }
Err(_) => {
disp_err!("Invalid argument --skip-bytes={}", s);
return 1;
}
}
}
};
let read_bytes = match matches.opt_str("read-bytes") {
None => None,
Some(s) => {
match parse_number_of_bytes(&s) {
Ok(i) => { Some(i) }
Err(_) => {
disp_err!("Invalid argument --read-bytes={}", s);
return 1;
}
}
}
};
odfunc(line_bytes, input_offset_base, byte_order, inputs, &formats[..],
output_duplicates, skip_bytes, read_bytes)
} }
fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder, fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder,
fnames: Vec<InputSource>, formats: &[FormatterItemInfo], output_duplicates: bool) -> i32 { fnames: Vec<InputSource>, formats: &[FormatterItemInfo], output_duplicates: bool,
skip_bytes: usize, read_bytes: Option<usize>) -> i32 {
let mut mf = MultifileReader::new(fnames); let mf = MultifileReader::new(fnames);
let mut addr = 0; let mut input = PartialReader::new(mf, skip_bytes, read_bytes);
let mut addr = skip_bytes;
let mut duplicate_line = false; let mut duplicate_line = false;
let mut previous_bytes: Vec<u8> = Vec::new(); let mut previous_bytes: Vec<u8> = Vec::new();
let mut bytes: Vec<u8> = Vec::with_capacity(line_bytes); let mut bytes: Vec<u8> = Vec::with_capacity(line_bytes);
@ -270,11 +302,9 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder,
// print each line data (or multi-format raster of several lines describing the same data). // print each line data (or multi-format raster of several lines describing the same data).
// TODO: we need to read more data in case a multi-byte sequence starts at the end of the line // TODO: we need to read more data in case a multi-byte sequence starts at the end of the line
match mf.read(bytes.as_mut_slice()) { match input.read(bytes.as_mut_slice()) {
Ok(0) => { Ok(0) => {
if input_offset_base != Radix::NoPrefix { print_final_offset(input_offset_base, addr);
print!("{}\n", print_with_radix(input_offset_base, addr)); // print final offset
}
break; break;
} }
Ok(n) => { Ok(n) => {
@ -310,13 +340,15 @@ fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder,
addr += n; addr += n;
} }
Err(_) => { Err(e) => {
break; show_error!("{}", e);
print_final_offset(input_offset_base, addr);
return 1;
} }
}; };
} }
if mf.any_err { if input.has_error() {
1 1
} else { } else {
0 0
@ -421,6 +453,12 @@ fn print_with_radix(r: Radix, x: usize) -> String{
} }
} }
fn print_final_offset(r: Radix, x: usize) {
if r != Radix::NoPrefix {
print!("{}\n", print_with_radix(r, x));
}
}
struct SpacedFormatterItemInfo { struct SpacedFormatterItemInfo {
frm: FormatterItemInfo, frm: FormatterItemInfo,
spacing: [usize; MAX_BYTES_PER_UNIT], spacing: [usize; MAX_BYTES_PER_UNIT],

129
src/od/parse_nrofbytes.rs Normal file
View file

@ -0,0 +1,129 @@
pub fn parse_number_of_bytes(s: &String) -> Result<usize, &'static str> {
let mut start = 0;
let mut len = s.len();
let mut radix = 10;
let mut multiply = 1;
if s.starts_with("0x") || s.starts_with("0X") {
start = 2;
radix = 16;
}
else if s.starts_with("0") {
radix = 8;
}
let mut ends_with = s.chars().rev();
match ends_with.next() {
Some('b') if radix != 16 => {
multiply = 512;
len -= 1;
},
Some('k') | Some('K') => {
multiply = 1024;
len -= 1;
}
Some('m') | Some('M') => {
multiply = 1024*1024;
len -= 1;
}
Some('G') => {
multiply = 1024*1024*1024;
len -= 1;
}
#[cfg(target_pointer_width = "64")]
Some('T') => {
multiply = 1024*1024*1024*1024;
len -= 1;
}
#[cfg(target_pointer_width = "64")]
Some('P') => {
multiply = 1024*1024*1024*1024*1024;
len -= 1;
}
#[cfg(target_pointer_width = "64")]
Some('E') => {
multiply = 1024*1024*1024*1024*1024*1024;
len -= 1;
}
Some('B') if radix != 16 => {
len -= 2;
multiply = match ends_with.next() {
Some('k') | Some('K') => 1000,
Some('m') | Some('M') => 1000*1000,
Some('G') => 1000*1000*1000,
#[cfg(target_pointer_width = "64")]
Some('T') => 1000*1000*1000*1000,
#[cfg(target_pointer_width = "64")]
Some('P') => 1000*1000*1000*1000*1000,
#[cfg(target_pointer_width = "64")]
Some('E') => 1000*1000*1000*1000*1000*1000,
_ => return Err("parse failed"),
}
},
_ => {},
}
match usize::from_str_radix(&s[start..len], radix) {
Ok(i) => Ok(i * multiply),
Err(_) => Err("parse failed"),
}
}
#[allow(dead_code)]
fn parse_number_of_bytes_str(s: &str) -> Result<usize, &'static str> {
parse_number_of_bytes(&String::from(s))
}
#[test]
fn test_parse_number_of_bytes() {
// normal decimal numbers
assert_eq!(0, parse_number_of_bytes_str("0").unwrap());
assert_eq!(5, parse_number_of_bytes_str("5").unwrap());
assert_eq!(999, parse_number_of_bytes_str("999").unwrap());
assert_eq!(2*512, parse_number_of_bytes_str("2b").unwrap());
assert_eq!(2*1024, parse_number_of_bytes_str("2k").unwrap());
assert_eq!(4*1024, parse_number_of_bytes_str("4K").unwrap());
assert_eq!(2*1048576, parse_number_of_bytes_str("2m").unwrap());
assert_eq!(4*1048576, parse_number_of_bytes_str("4M").unwrap());
assert_eq!(1073741824, parse_number_of_bytes_str("1G").unwrap());
assert_eq!(2000, parse_number_of_bytes_str("2kB").unwrap());
assert_eq!(4000, parse_number_of_bytes_str("4KB").unwrap());
assert_eq!(2000000, parse_number_of_bytes_str("2mB").unwrap());
assert_eq!(4000000, parse_number_of_bytes_str("4MB").unwrap());
assert_eq!(2000000000, parse_number_of_bytes_str("2GB").unwrap());
// octal input
assert_eq!(8, parse_number_of_bytes_str("010").unwrap());
assert_eq!(8*512, parse_number_of_bytes_str("010b").unwrap());
assert_eq!(8*1024, parse_number_of_bytes_str("010k").unwrap());
assert_eq!(8*1048576, parse_number_of_bytes_str("010m").unwrap());
// hex input
assert_eq!(15, parse_number_of_bytes_str("0xf").unwrap());
assert_eq!(15, parse_number_of_bytes_str("0XF").unwrap());
assert_eq!(27, parse_number_of_bytes_str("0x1b").unwrap());
assert_eq!(16*1024, parse_number_of_bytes_str("0x10k").unwrap());
assert_eq!(16*1048576, parse_number_of_bytes_str("0x10m").unwrap());
// invalid input
parse_number_of_bytes_str("").unwrap_err();
parse_number_of_bytes_str("-1").unwrap_err();
parse_number_of_bytes_str("1e2").unwrap_err();
parse_number_of_bytes_str("xyz").unwrap_err();
parse_number_of_bytes_str("b").unwrap_err();
parse_number_of_bytes_str("1Y").unwrap_err();
parse_number_of_bytes_str("").unwrap_err();
}
#[test]
#[cfg(target_pointer_width = "64")]
fn test_parse_number_of_bytes_64bits() {
assert_eq!(1099511627776, parse_number_of_bytes_str("1T").unwrap());
assert_eq!(1125899906842624, parse_number_of_bytes_str("1P").unwrap());
assert_eq!(1152921504606846976, parse_number_of_bytes_str("1E").unwrap());
assert_eq!(2000000000000, parse_number_of_bytes_str("2TB").unwrap());
assert_eq!(2000000000000000, parse_number_of_bytes_str("2PB").unwrap());
assert_eq!(2000000000000000000, parse_number_of_bytes_str("2EB").unwrap());
}

204
src/od/partialreader.rs Normal file
View file

@ -0,0 +1,204 @@
use std::cmp;
use std::io;
use std::io::Read;
use multifilereader::HasError;
/// When a large number of bytes must be skipped, it will be read into a
/// dynamically allocated buffer. The buffer will be limited to this size.
const MAX_SKIP_BUFFER: usize = 64*1024;
/// Wrapper for `std::io::Read` which can skip bytes at the beginning
/// of the input, and it can limit the returned bytes to a particular
/// number of bytes.
pub struct PartialReader<R> {
inner: R,
skip: usize,
limit: Option<usize>,
}
impl<R> PartialReader<R> {
/// Create a new `PartialReader` wrapping `inner`, which will skip
/// `skip` bytes, and limits the output to `limit` bytes. Set `limit`
/// to `None` if there should be no limit.
pub fn new(inner: R, skip: usize, limit: Option<usize>) -> Self {
PartialReader {
inner: inner,
skip: skip,
limit: limit,
}
}
}
impl<R: Read> Read for PartialReader<R> {
fn read(&mut self, out: &mut [u8]) -> io::Result<usize> {
if self.skip > 0 {
let buf_size = cmp::min(self.skip, MAX_SKIP_BUFFER);
let mut bytes: Vec<u8> = Vec::with_capacity(buf_size);
unsafe { bytes.set_len(buf_size); }
while self.skip > 0 {
let skip_count = cmp::min(self.skip, buf_size);
match self.inner.read_exact(&mut bytes[..skip_count]) {
Err(e) => return Err(e),
Ok(()) => self.skip -= skip_count,
}
}
}
match self.limit {
None => self.inner.read(out),
Some(0) => Ok(0),
Some(ref mut limit) => {
let slice = if *limit > out.len() { out } else { &mut out[0..*limit] };
match self.inner.read(slice) {
Err(e) => Err(e),
Ok(r) => {
*limit -= r;
Ok(r)
},
}
},
}
}
}
impl<R: HasError> HasError for PartialReader<R> {
fn has_error(&self) -> bool {
self.inner.has_error()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::{Cursor, Read, ErrorKind};
use std::error::Error;
use mockstream::*;
#[test]
fn test_read_without_limits() {
let mut v = [0; 10];
let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, None);
assert_eq!(sut.read(v.as_mut()).unwrap(), 8);
assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0]);
}
#[test]
fn test_read_without_limits_with_error() {
let mut v = [0; 10];
let f = FailingMockStream::new(ErrorKind::PermissionDenied, "No access", 3);
let mut sut = PartialReader::new(f, 0, None);
let error = sut.read(v.as_mut()).unwrap_err();
assert_eq!(error.kind(), ErrorKind::PermissionDenied);
assert_eq!(error.description(), "No access");
}
#[test]
fn test_read_skipping_bytes() {
let mut v = [0; 10];
let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 2, None);
assert_eq!(sut.read(v.as_mut()).unwrap(), 6);
assert_eq!(v, [0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0, 0, 0]);
}
#[test]
fn test_read_skipping_all() {
let mut v = [0; 10];
let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 20, None);
let error = sut.read(v.as_mut()).unwrap_err();
assert_eq!(error.kind(), ErrorKind::UnexpectedEof);
}
#[test]
fn test_read_skipping_with_error() {
let mut v = [0; 10];
let f = FailingMockStream::new(ErrorKind::PermissionDenied, "No access", 3);
let mut sut = PartialReader::new(f, 2, None);
let error = sut.read(v.as_mut()).unwrap_err();
assert_eq!(error.kind(), ErrorKind::PermissionDenied);
assert_eq!(error.description(), "No access");
}
#[test]
fn test_read_skipping_with_two_reads_during_skip() {
let mut v = [0; 10];
let c = Cursor::new(&b"a"[..])
.chain(Cursor::new(&b"bcdefgh"[..]));
let mut sut = PartialReader::new(c, 2, None);
assert_eq!(sut.read(v.as_mut()).unwrap(), 6);
assert_eq!(v, [0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0, 0, 0]);
}
#[test]
fn test_read_skipping_huge_number() {
let mut v = [0; 10];
// test if it does not eat all memory....
let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), usize::max_value(), None);
sut.read(v.as_mut()).unwrap_err();
}
#[test]
fn test_read_limitting_all() {
let mut v = [0; 10];
let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(0));
assert_eq!(sut.read(v.as_mut()).unwrap(), 0);
}
#[test]
fn test_read_limitting() {
let mut v = [0; 10];
let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(6));
assert_eq!(sut.read(v.as_mut()).unwrap(), 6);
assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0, 0, 0, 0]);
}
#[test]
fn test_read_limitting_with_error() {
let mut v = [0; 10];
let f = FailingMockStream::new(ErrorKind::PermissionDenied, "No access", 3);
let mut sut = PartialReader::new(f, 0, Some(6));
let error = sut.read(v.as_mut()).unwrap_err();
assert_eq!(error.kind(), ErrorKind::PermissionDenied);
assert_eq!(error.description(), "No access");
}
#[test]
fn test_read_limitting_with_large_limit() {
let mut v = [0; 10];
let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(20));
assert_eq!(sut.read(v.as_mut()).unwrap(), 8);
assert_eq!(v, [0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0, 0]);
}
#[test]
fn test_read_limitting_with_multiple_reads() {
let mut v = [0; 3];
let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 0, Some(6));
assert_eq!(sut.read(v.as_mut()).unwrap(), 3);
assert_eq!(v, [0x61, 0x62, 0x63]);
assert_eq!(sut.read(v.as_mut()).unwrap(), 3);
assert_eq!(v, [0x64, 0x65, 0x66]);
assert_eq!(sut.read(v.as_mut()).unwrap(), 0);
}
#[test]
fn test_read_skipping_and_limitting() {
let mut v = [0; 10];
let mut sut = PartialReader::new(Cursor::new(&b"abcdefgh"[..]), 2, Some(4));
assert_eq!(sut.read(v.as_mut()).unwrap(), 4);
assert_eq!(v, [0x63, 0x64, 0x65, 0x66, 0, 0, 0, 0, 0, 0]);
}
}

View file

@ -475,3 +475,37 @@ fn test_invalid_offset(){
assert!(!result.success); assert!(!result.success);
} }
#[test]
fn test_skip_bytes(){
let input = "abcdefghijklmnopq";
let result = new_ucmd!().arg("-c").arg("--skip-bytes=5").run_piped_stdin(input.as_bytes());
assert_empty_stderr!(result);
assert!(result.success);
assert_eq!(result.stdout, unindent("
0000005 f g h i j k l m n o p q
0000021
"));
}
#[test]
fn test_skip_bytes_error(){
let input = "12345";
let result = new_ucmd!().arg("--skip-bytes=10").run_piped_stdin(input.as_bytes());
assert!(!result.success);
}
#[test]
fn test_read_bytes(){
let input = "abcdefghijklmnopqrstuvwxyz\n12345678";
let result = new_ucmd!().arg("--endian=little").arg("--read-bytes=27").run_piped_stdin(input.as_bytes());
assert_empty_stderr!(result);
assert!(result.success);
assert_eq!(result.stdout, unindent(ALPHA_OUT));
}