coreutils/src/od/od.rs

492 lines
17 KiB
Rust
Raw Normal View History

#![crate_name = "uu_od"]
/*
* This file is part of the uutils coreutils package.
*
* (c) Ben Hirsch <benhirsch24@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
extern crate getopts;
use std::fs::File;
use std::io::Read;
use std::mem;
use std::io::BufReader;
use std::io::Write;
use std::io;
//This is available in some versions of std, but not all that we target.
macro_rules! hashmap {
($( $key: expr => $val: expr ),*) => {{
let mut map = ::std::collections::HashMap::new();
$( map.insert($key, $val); )*
map
}}
}
2015-02-03 21:34:45 +00:00
#[derive(Debug)]
enum Radix { Decimal, Hexadecimal, Octal, Binary }
#[derive(Debug)]
enum InputSource<'a> {
FileName(&'a str ),
Stdin
}
pub fn uumain(args: Vec<String>) -> i32 {
let mut opts = getopts::Options::new();
opts.optopt("A", "address-radix",
"Select the base in which file offsets are printed.", "RADIX");
opts.optopt("j", "skip-bytes",
"Skip bytes input bytes before formatting and writing.", "BYTES");
opts.optopt("N", "read-bytes",
"limit dump to BYTES input bytes", "BYTES");
opts.optopt("S", "strings",
("output strings of at least BYTES graphic chars. 3 is assumed when \
BYTES is not specified."),
"BYTES");
opts.optflag("a", "", "named characters, ignoring high-order bit");
opts.optflag("b", "", "octal bytes");
opts.optflag("c", "", "ASCII characters or backslash escapes");
opts.optflag("d", "", "unsigned decimal 2-byte units");
opts.optflag("o", "", "unsigned decimal 2-byte units");
opts.optflag("I", "", "decimal 2-byte units");
opts.optflag("L", "", "decimal 2-byte units");
opts.optflag("i", "", "decimal 2-byte units");
opts.optflag("O", "", "octal 4-byte units");
opts.optflag("s", "", "decimal 4-byte units");
opts.optopt("t", "format", "select output format or formats", "TYPE");
opts.optflag("v", "output-duplicates", "do not use * to mark line suppression");
opts.optopt("w", "width",
("output BYTES bytes per output line. 32 is implied when BYTES is not \
specified."),
"BYTES");
opts.optflag("h", "help", "display this help and exit.");
opts.optflag("", "version", "output version information and exit.");
let matches = match opts.parse(&args[1..]) {
Ok(m) => m,
Err(f) => panic!("Invalid options\n{}", f)
};
let input_offset_base = match parse_radix(matches.opt_str("A")) {
Ok(r) => r,
Err(f) => { panic!("Invalid -A/--address-radix\n{}", f) }
};
// Gather up file names - args which don't start with '-'
let stdnionly = [InputSource::Stdin];
let inputs = args[1..]
.iter()
.filter_map(|w| match w as &str {
"--" => Some(InputSource::Stdin),
o if o.starts_with("-") => None,
x => Some(InputSource::FileName(x)),
})
.collect::<Vec<_>>();
// If no input files named, use stdin.
let inputs = if inputs.len() == 0 {
&stdnionly[..]
} else {
&inputs[..]
};
// Gather up format flags, we don't use getopts becase we need keep them in order.
let flags = args[1..]
.iter()
.filter_map(|w| match w as &str {
"--" => None,
o if o.starts_with("-") => Some(&o[1..]),
_ => None,
})
.collect::<Vec<_>>();
// At the moment, char (-a & -c)formats need the driver to set up a
// line by inserting a different # of of spaces at the start.
struct OdFormater {
writer: fn(p: u64, itembytes: usize),
offmarg: usize,
};
let oct = OdFormater {
writer: print_item_oct, offmarg: 2
};
let hex = OdFormater {
writer: print_item_hex, offmarg: 2
};
let dec_u = OdFormater {
writer: print_item_dec_u, offmarg: 2
};
let dec_s = OdFormater {
writer: print_item_dec_s, offmarg: 2
};
let a_char = OdFormater {
writer: print_item_a, offmarg: 1
};
let c_char = OdFormater {
writer: print_item_c, offmarg: 1
};
fn mkfmt(itembytes: usize, fmtspec: &OdFormater) -> OdFormat {
OdFormat {
itembytes: itembytes,
writer: fmtspec.writer,
offmarg: fmtspec.offmarg,
}
}
// TODO: -t fmts
let known_formats = hashmap![
"a" => (1, &a_char),
"B" => (2, &oct) ,
"b" => (1, &oct),
"c" => (1, &c_char),
"D" => (4, &dec_u),
// TODO: support floats
// "e" => (8, &flo64),
// "F" => (8, &flo64),
// "F" => (4, &flo32),
"H" => (4, &hex),
"X" => (4, &hex) ,
"o" => (2, &oct),
"x" => (2, &hex),
"h" => (2, &hex),
"I" => (2, &dec_s),
"L" => (2, &dec_s),
"i" => (2, &dec_s),
"O" => (4, &oct),
"s" => (2, &dec_u)
];
let mut formats = Vec::new();
for flag in flags.iter() {
match known_formats.get(flag) {
None => {} // not every option is a format
Some(r) => {
let (itembytes, fmtspec) = *r;
formats.push(mkfmt(itembytes, fmtspec))
}
}
}
if formats.is_empty() {
formats.push(mkfmt(2, &oct)); // 2 byte octal is the default
}
odfunc(&input_offset_base, &inputs, &formats[..])
}
const LINEBYTES:usize = 16;
const WORDBYTES:usize = 2;
fn odfunc(input_offset_base: &Radix, fnames: &[InputSource], formats: &[OdFormat]) -> i32 {
let mut mf = MultifileReader::new(fnames);
let mut addr = 0;
let bytes = &mut [b'\x00'; LINEBYTES];
loop {
// print each line data (or multi-format raster of several lines describing the same data).
print_with_radix(input_offset_base, addr); // print offset
// if printing in multiple formats offset is printed only once
match mf.f_read(bytes) {
Ok(0) => {
print!("\n");
break;
}
Ok(n) => {
let mut first = true; // First line of a multi-format raster.
for f in formats {
if !first {
// this takes the space of the file offset on subsequent
// lines of multi-format rasters.
print!(" ");
}
first = false;
print!("{:>width$}", "", width = f.offmarg);// 4 spaces after offset - we print 2 more before each word
for b in 0..n / f.itembytes {
let mut p: u64 = 0;
for i in 0..f.itembytes {
p |= (bytes[(f.itembytes * b) + i] as u64) << (8 * i);
}
(f.writer)(p, f.itembytes);
}
// not enough byte for a whole element, this should only happen on the last line.
if n % f.itembytes != 0 {
let b = n / f.itembytes;
let mut p2: u64 = 0;
for i in 0..(n % f.itembytes) {
p2 |= (bytes[(f.itembytes * b) + i] as u64) << (8 * i);
}
(f.writer)(p2, f.itembytes);
}
// Add extra spaces to pad out the short, presumably last, line.
if n < LINEBYTES {
// calc # of items we did not print, must be short at least WORDBYTES to be missing any.
let words_short = (LINEBYTES - n) / WORDBYTES;
// XXX this is running short for -c & -a
print!("{:>width$}", "", width = (words_short) * (6 + 2));
}
print!("\n");
}
addr += n;
}
Err(_) => {
break;
}
};
}
if mf.any_err {
1
} else {
0
}
}
// For file byte offset printed at left margin.
fn parse_radix(radix_str: Option<String>) -> Result<Radix, &'static str> {
match radix_str {
None => Ok(Radix::Octal),
Some(s) => {
let st = s.into_bytes();
if st.len() != 1 {
Err("Radix must be one of [d, o, b, x]\n")
} else {
let radix: char = *(st.get(0)
.expect("byte string of length 1 lacks a 0th elem")) as char;
match radix {
'd' => Ok(Radix::Decimal),
'x' => Ok(Radix::Hexadecimal),
'o' => Ok(Radix::Octal),
'b' => Ok(Radix::Binary),
_ => Err("Radix must be one of [d, o, b, x]\n")
}
}
}
}
}
fn print_with_radix(r: &Radix, x: usize) {
// TODO(keunwoo): field widths should be based on sizeof(x), or chosen dynamically based on the
// expected range of address values. Binary in particular is not great here.
match *r {
Radix::Decimal => print!("{:07}", x),
Radix::Hexadecimal => print!("{:07X}", x),
Radix::Octal => print!("{:07o}", x),
Radix::Binary => print!("{:07b}", x)
}
}
// MultifileReader - concatenate all our input, file or stdin.
struct MultifileReader<'a> {
ni: std::slice::Iter<'a, InputSource<'a>>,
curr_file: Option<Box<io::Read>>,
any_err: bool,
}
impl<'b> MultifileReader<'b> {
fn new<'a>(fnames: &'a [InputSource]) -> MultifileReader<'a> {
let mut mf = MultifileReader {
ni: fnames.iter(),
curr_file: None, // normally this means done; call next_file()
any_err: false,
};
mf.next_file();
return mf;
}
fn next_file(&mut self) {
// loop retries with subsequent files if err - normally 'loops' once
loop {
match self.ni.next() {
None => {
self.curr_file = None;
return;
}
Some(input) => {
match *input {
InputSource::Stdin => {
self.curr_file = Some(Box::new(BufReader::new(std::io::stdin())));
return;
}
InputSource::FileName(fname) => {
match File::open(fname) {
Ok(f) => {
self.curr_file = Some(Box::new(BufReader::new(f)));
return;
}
Err(e) => {
// If any file can't be opened,
// print an error at the time that the file is needed,
// then move on the the next file.
// This matches the behavior of the original `od`
let _ =
writeln!(&mut std::io::stderr(), "od: '{}': {}", fname, e);
self.any_err = true
}
}
}
}
}
}
}
}
// Fill buf with bytes read from the list of files
// Returns Ok(<number of bytes read>)
// Handles io errors itself, thus always returns OK
// Fills the provided buffer completely, unless it has run out of input.
// If any call returns short (< buf.len()), all subsequent calls will return Ok<0>
fn f_read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let mut xfrd = 0;
// while buffer we are filling is not full.. May go thru several files.
'fillloop: while xfrd < buf.len() {
match self.curr_file {
None => break,
Some(ref mut curr_file) => {
loop {
// stdin may return on 'return' (enter), even though the buffer isn't full.
xfrd += match curr_file.read(&mut buf[xfrd..]) {
Ok(0) => break,
Ok(n) => n,
Err(e) => panic!("file error: {}", e),
};
if xfrd == buf.len() {
// transferred all that was asked for.
break 'fillloop;
}
}
}
}
self.next_file();
}
Ok(xfrd)
}
}
struct OdFormat {
itembytes: usize,
writer: fn(u64, usize),
offmarg: usize,
}
// TODO: use some sort of byte iterator, instead of passing bytes in u64
fn print_item_oct(p: u64, itembytes: usize) {
let itemwidth = 3 * itembytes;
let itemspace = 4 * itembytes - itemwidth;
print!("{:>itemspace$}{:0width$o}",
"",
p,
width = itemwidth,
itemspace = itemspace);
}
fn print_item_hex(p: u64, itembytes: usize) {
let itemwidth = 2 * itembytes;
let itemspace = 4 * itembytes - itemwidth;
print!("{:>itemspace$}{:0width$x}",
"",
p,
width = itemwidth,
itemspace = itemspace);
}
fn sign_extend(item: u64, itembytes: usize) -> i64{
// https://graphics.stanford.edu/~seander/bithacks.html#VariableSignExtend
unsafe{
let b = 8 * itembytes; // number of bits representing the number in p
let m = mem::transmute::<u64,i64>(1u64 << (b - 1));
let x = mem::transmute::<u64,i64>(item) & (mem::transmute::<u64,i64>(1u64 << b) - 1);
let r = (x ^ m) - m;
r
}
}
fn print_item_dec_s(p: u64, itembytes: usize) {
// sign extend
let s = sign_extend(p,itembytes);
print!("{:totalwidth$}", s, totalwidth = 4 * itembytes);
}
fn print_item_dec_u(p: u64, itembytes: usize) {
print!("{:totalwidth$}", p, totalwidth = 4 * itembytes);
}
// TODO: multi-byte chars
// Quoth the man page: Multi-byte characters are displayed in the area corresponding to the first byte of the character. The remaining bytes are shown as `**'.
static A_CHRS : [&'static str; 160] =
["nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
"bs", "ht", "nl", "vt", "ff", "cr", "so", "si",
"dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
"can", "em", "sub", "esc", "fs", "gs", "rs", "us",
"sp", "!", "\"", "#", "$", "%", "&", "'",
"(", ")", "*", "+", ",", "-", ".", "/",
"0", "1", "2", "3", "4", "5", "6", "7",
"8", "9", ":", ";", "<", "=", ">", "?",
"@", "A", "B", "C", "D", "E", "F", "G",
"H", "I", "J", "K", "L", "M", "N", "O",
"P", "Q", "R", "S", "T", "U", "V", "W",
"X", "Y", "Z", "[", "\\", "]", "^", "_",
"`", "a", "b", "c", "d", "e", "f", "g",
"h", "i", "j", "k", "l", "m", "n", "o",
"p", "q", "r", "s", "t", "u", "v", "w",
"x", "y", "z", "{", "|", "}", "~", "del",
"80", "81", "82", "83", "84", "85", "86", "87",
"88", "89", "8a", "8b", "8c", "8d", "8e", "8f",
"90", "91", "92", "93", "94", "95", "96", "97",
"98", "99", "9a", "9b", "9c", "9d", "9e", "9f"];
fn print_item_a(p: u64, _: usize) {
// itembytes == 1
let b = (p & 0xff) as u8;
print!("{:>4}", A_CHRS.get(b as usize).unwrap_or(&"?") // XXX od dose not actually do this, it just prints the byte
);
}
static C_CHRS : [&'static str; 127] = [
"\\0", "001", "002", "003", "004", "005", "006", "\\a",
"\\b", "\\t", "\\n", "\\v", "\\f", "\\r", "016", "017",
"020", "021", "022", "023", "024", "025", "026", "027",
"030", "031", "032", "033", "034", "035", "036", "037",
" ", "!", "\"", "#", "$", "%", "&", "'",
"(", ")", "*", "+", ",", "-", ".", "/",
"0", "1", "2", "3", "4", "5", "6", "7",
"8", "9", ":", ";", "<", "=", ">", "?",
"@", "A", "B", "C", "D", "E", "F", "G",
"H", "I", "J", "K", "L", "M", "N", "O",
"P", "Q", "R", "S", "T", "U", "V", "W",
"X", "Y", "Z", "[", "\\", "]", "^", "_",
"`", "a", "b", "c", "d", "e", "f", "g",
"h", "i", "j", "k", "l", "m", "n", "o",
"p", "q", "r", "s", "t", "u", "v", "w",
"x", "y", "z", "{", "|", "}", "~" ];
fn print_item_c(p: u64, _: usize) {
// itembytes == 1
let b = (p & 0xff) as usize;
if b < C_CHRS.len() {
match C_CHRS.get(b as usize) {
Some(s) => print!("{:>4}", s),
None => print!("{:>4}", b),
}
}
}