coreutils/src/od/od.rs

#![crate_name = "uu_od"]

/*
 * This file is part of the uutils coreutils package.
 *
 * (c) Ben Hirsch <benhirsch24@gmail.com>
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code.
 */

extern crate getopts;
extern crate byteorder;

#[macro_use]
extern crate uucore;

mod multifilereader;
mod partialreader;
mod peekreader;
mod byteorder_io;
mod formatteriteminfo;
mod prn_int;
mod prn_char;
mod prn_float;
mod parse_nrofbytes;
mod parse_formats;
mod parse_inputs;
#[cfg(test)]
mod mockstream;

use std::cmp;
use std::io::Write;
use byteorder_io::*;
use multifilereader::*;
use partialreader::*;
use peekreader::*;
use formatteriteminfo::*;
use parse_nrofbytes::parse_number_of_bytes;
use parse_formats::{parse_format_flags, ParsedFormatterItemInfo};
use prn_char::format_ascii_dump;
use parse_inputs::{parse_inputs, CommandLineInputs};

static VERSION: &'static str = env!("CARGO_PKG_VERSION");
const MAX_BYTES_PER_UNIT: usize = 8;
const PEEK_BUFFER_SIZE: usize = 4; // utf-8 can be 4 bytes

#[derive(Copy, Clone, Debug, Eq, PartialEq)]
enum Radix { Decimal, Hexadecimal, Octal, NoPrefix }

static USAGE: &'static str =
r#"Usage:
    od [OPTION]... [--] [FILENAME]...
    od [-abcdDefFhHiIlLoOsxX] [FILENAME] [[+][0x]OFFSET[.][b]]
    od --traditional [OPTION]... [FILENAME] [[+][0x]OFFSET[.][b] [[+][0x]LABEL[.][b]]]

Displays data in various human-readable formats. If multiple formats are
specified, the output will contain all formats in the order they appear on the
commandline. Each format will be printed on a new line. Only the line
containing the first format will be prefixed with the offset.

If no filename is specified, or it is "-", stdin will be used. After a "--", no
more options will be recognised. This allows for filenames starting with a "-".

If a filename is a valid number which can be used as an offset in the second
form, you can force it to be recognised as a filename if you include an option
like "-j0", which is only valid in the first form.

RADIX is one of o,d,x,n for octal, decimal, hexadecimal or none.

BYTES is decimal by default, octal if prefixed with a "0", or hexadecimal if
prefixed with "0x". The suffixes b, KB, K, MB, M, GB, G, will multiply the
number with 512, 1000, 1024, 1000^2, 1024^2, 1000^3, 1024^3, 1000^2, 1024^2.

OFFSET and LABEL are octal by default, hexadecimal if prefixed with "0x" or
decimal if a "." suffix is added. The "b" suffix will multiply with 512.

TYPE contains one or more format specifications consisting of:
    a       for printable 7-bits ASCII
    c       for utf-8 characters or octal for undefined characters
    d[SIZE] for signed decimal
    f[SIZE] for floating point
    o[SIZE] for octal
    u[SIZE] for unsigned decimal
    x[SIZE] for hexadecimal
SIZE is the number of bytes which can be the number 1, 2, 4, 8 or 16,
    or C, I, S, L for 1, 2, 4, 8 bytes for integer types,
    or F, D, L for 4, 8, 16 bytes for floating point.
Any type specification can have a "z" suffic, which will add a ASCII dump at
    the end of the line.

If an error occurred, a diagnostic message will be printed to stderr, and the
exitcode will be non-zero."#;

pub fn uumain(args: Vec<String>) -> i32 {
    let mut opts = getopts::Options::new();

    opts.optopt("A", "address-radix",
                "Select the base in which file offsets are printed.", "RADIX");
    opts.optopt("j", "skip-bytes",
                "Skip bytes input bytes before formatting and writing.", "BYTES");
    opts.optopt("N", "read-bytes",
                "limit dump to BYTES input bytes", "BYTES");
    opts.optopt("", "endian", "byte order to use for multi-byte formats", "big|little");
    opts.optopt("S", "strings",
                ("output strings of at least BYTES graphic chars. 3 is assumed when \
                 BYTES is not specified."),
                "BYTES");
    opts.optflagmulti("a", "", "named characters, ignoring high-order bit");
    opts.optflagmulti("b", "", "octal bytes");
    opts.optflagmulti("c", "", "ASCII characters or backslash escapes");
    opts.optflagmulti("d", "", "unsigned decimal 2-byte units");
    opts.optflagmulti("D", "", "unsigned decimal 4-byte units");
    opts.optflagmulti("o", "", "octal 2-byte units");

    opts.optflagmulti("I", "", "decimal 8-byte units");
    opts.optflagmulti("L", "", "decimal 8-byte units");
    opts.optflagmulti("i", "", "decimal 4-byte units");
    opts.optflagmulti("l", "", "decimal 8-byte units");
    opts.optflagmulti("x", "", "hexadecimal 2-byte units");
    opts.optflagmulti("h", "", "hexadecimal 2-byte units");

    opts.optflagmulti("O", "", "octal 4-byte units");
    opts.optflagmulti("s", "", "decimal 2-byte units");
    opts.optflagmulti("X", "", "hexadecimal 4-byte units");
    opts.optflagmulti("H", "", "hexadecimal 4-byte units");

    opts.optflagmulti("e", "", "floating point double precision (64-bit) units");
    opts.optflagmulti("f", "", "floating point single precision (32-bit) units");
    opts.optflagmulti("F", "", "floating point double precision (64-bit) units");

    opts.optmulti("t", "format", "select output format or formats", "TYPE");
    opts.optflag("v", "output-duplicates", "do not use * to mark line suppression");
    opts.optflagopt("w", "width",
                ("output BYTES bytes per output line. 32 is implied when BYTES is not \
                 specified."),
                "BYTES");
    opts.optflag("h", "help", "display this help and exit.");
    opts.optflag("", "version", "output version information and exit.");
    opts.optflag("", "traditional", "compatibility mode with one input, offset and label.");

    let matches = match opts.parse(&args[1..]) {
        Ok(m) => m,
        Err(f) => {
            disp_err!("{}", f);
            return 1;
        }
    };

    if matches.opt_present("h") {
        println!("{}", opts.usage(&USAGE));
        return 0;
    }
    if matches.opt_present("version") {
        println!("{} {}", executable!(), VERSION);
        return 0;
    }

    let input_offset_base = match parse_radix(matches.opt_str("A")) {
        Ok(r) => r,
        Err(f) => {
            disp_err!("Invalid -A/--address-radix\n{}", f);
            return 1;
        }
    };

    let byte_order = match matches.opt_str("endian").as_ref().map(String::as_ref) {
        None => { ByteOrder::Native },
        Some("little") => { ByteOrder::Little },
        Some("big") => { ByteOrder::Big },
        Some(s) => {
            disp_err!("Invalid argument --endian={}", s);
            return 1;
        }
    };

    let mut skip_bytes = match matches.opt_default("skip-bytes", "0") {
        None => 0,
        Some(s) => {
            match parse_number_of_bytes(&s) {
                Ok(i) => { i }
                Err(_) => {
                    disp_err!("Invalid argument --skip-bytes={}", s);
                    return 1;
                }
            }
        }
    };

    let mut label: Option<usize> = None;

    let input_strings = match parse_inputs(&matches) {
        Ok(CommandLineInputs::FileNames(v)) => v,
        Ok(CommandLineInputs::FileAndOffset((f, s, l))) => {
            skip_bytes = s;
            label = l;
            vec!{f}
        },
        Err(e) => {
            disp_err!("Invalid inputs: {}", e);
            return 1;
        }
    };
    let inputs = input_strings
        .iter()
        .map(|w| match w as &str {
            "-" => InputSource::Stdin,
            x => InputSource::FileName(x),
        })
        .collect::<Vec<_>>();

    let formats = match parse_format_flags(&args) {
        Ok(f) => f,
        Err(e) => {
            disp_err!("{}", e);
            return 1;
        }
    };

    let mut line_bytes = match matches.opt_default("w", "32") {
        None => 16,
        Some(s) => {
            match s.parse::<usize>() {
                Ok(i) => { i }
                Err(_) => { 2 }
            }
        }
    };
    let min_bytes = formats.iter().fold(1, |max, next| cmp::max(max, next.formatter_item_info.byte_size));
    if line_bytes % min_bytes != 0 {
        show_warning!("invalid width {}; using {} instead", line_bytes, min_bytes);
        line_bytes = min_bytes;
    }

    let output_duplicates = matches.opt_present("v");

    let read_bytes = match matches.opt_str("read-bytes") {
        None => None,
        Some(s) => {
            match  parse_number_of_bytes(&s) {
                Ok(i) => { Some(i) }
                Err(_) => {
                    disp_err!("Invalid argument --read-bytes={}", s);
                    return 1;
                }
            }
        }
    };

    odfunc(line_bytes, input_offset_base, byte_order, inputs, &formats[..],
            output_duplicates, skip_bytes, read_bytes, label)
}

// TODO: refactor, too many arguments
fn odfunc(line_bytes: usize, input_offset_base: Radix, byte_order: ByteOrder,
        fnames: Vec<InputSource>, formats: &[ParsedFormatterItemInfo], output_duplicates: bool,
        skip_bytes: usize, read_bytes: Option<usize>, mut label: Option<usize>) -> i32 {

    let mf = MultifileReader::new(fnames);
    let pr = PartialReader::new(mf, skip_bytes, read_bytes);
    let mut input = PeekReader::new(pr);
    let mut addr = skip_bytes;
    let mut duplicate_line = false;
    let mut previous_bytes: Vec<u8> = Vec::new();
    let mut bytes: Vec<u8> = Vec::with_capacity(line_bytes + PEEK_BUFFER_SIZE);
    unsafe { bytes.set_len(line_bytes + PEEK_BUFFER_SIZE); } // fast but uninitialized

    let byte_size_block = formats.iter().fold(1, |max, next| cmp::max(max, next.formatter_item_info.byte_size));
    let print_width_block = formats
        .iter()
        .fold(1, |max, next| {
            cmp::max(max, next.formatter_item_info.print_width * (byte_size_block / next.formatter_item_info.byte_size))
        });
    let print_width_line = print_width_block * (line_bytes / byte_size_block);

    if byte_size_block > MAX_BYTES_PER_UNIT {
        panic!("{}-bits types are unsupported. Current max={}-bits.",
                8 * byte_size_block,
                8 * MAX_BYTES_PER_UNIT);
    }

    let mut spaced_formatters: Vec<SpacedFormatterItemInfo> = formats
        .iter()
        .map(|f| SpacedFormatterItemInfo { frm: *f, spacing: [0; MAX_BYTES_PER_UNIT] })
        .collect();

    // calculate proper alignment for each item
    for sf in &mut spaced_formatters {
        let mut byte_size = sf.frm.formatter_item_info.byte_size;
        let mut items_in_block = byte_size_block / byte_size;
        let thisblock_width = sf.frm.formatter_item_info.print_width * items_in_block;
        let mut missing_spacing = print_width_block - thisblock_width;

        while items_in_block > 0 {
            let avg_spacing: usize = missing_spacing / items_in_block;
            for i in 0..items_in_block {
                sf.spacing[i * byte_size] += avg_spacing;
                missing_spacing -= avg_spacing;
            }
            // this assumes the size of all types is a power of 2 (1, 2, 4, 8, 16, ...)
            items_in_block /= 2;
            byte_size *= 2;
        }
    }

    loop {
        // print each line data (or multi-format raster of several lines describing the same data).
        // TODO: we need to read more data in case a multi-byte sequence starts at the end of the line

        match input.peek_read(bytes.as_mut_slice(), PEEK_BUFFER_SIZE) {
            Ok((0, _)) => {
                print_final_offset(input_offset_base, addr, label);
                break;
            }
            Ok((n, peekbytes)) => {
                // not enough byte for a whole element, this should only happen on the last line.
                if n != line_bytes {
                    // set zero bytes in the part of the buffer that will be used, but is not filled.
                    let mut max_used = n + MAX_BYTES_PER_UNIT;
                    if max_used > line_bytes {
                        max_used = line_bytes;
                    }

                    for i in n..max_used {
                        bytes[i] = 0;
                    }
                }

                if !output_duplicates
                        && n == line_bytes
                        && !previous_bytes.is_empty()
                        && previous_bytes[..line_bytes] == bytes[..line_bytes] {
                    if !duplicate_line {
                        duplicate_line = true;
                        println!("*");
                    }
                }
                else {
                    duplicate_line = false;
                    if n == line_bytes {
                        // save a copy of the input unless it is the last line
                        previous_bytes.clone_from(&bytes);
                    }

                    print_bytes(byte_order, &bytes, n, peekbytes,
                        &print_with_radix(input_offset_base, addr, label),
                        &spaced_formatters, byte_size_block, print_width_line);
                }

                addr += n;
                if let Some(l) = label {
                    label = Some(l + n);
                }
            }
            Err(e) => {
                show_error!("{}", e);
                print_final_offset(input_offset_base, addr, label);
                return 1;
            }
        };
    }

    if input.has_error() {
        1
    } else {
        0
    }
}

fn print_bytes(byte_order: ByteOrder, bytes: &[u8], length: usize, peekbytes: usize, prefix: &str,
        formats: &[SpacedFormatterItemInfo], byte_size_block: usize, print_width_line: usize) {
    let mut first = true; // First line of a multi-format raster.
    for f in formats {
        let mut output_text = String::new();

        let mut b = 0;
        while b < length {
            let nextb = b + f.frm.formatter_item_info.byte_size;

            output_text.push_str(&format!("{:>width$}",
                    "",
                    width = f.spacing[b % byte_size_block]));

            match f.frm.formatter_item_info.formatter {
                FormatWriter::IntWriter(func) => {
                    let p: u64 = match f.frm.formatter_item_info.byte_size {
                        1 => {
                            bytes[b] as u64
                        }
                        2 => {
                            byte_order.read_u16(&bytes[b..nextb]) as u64
                        }
                        4 => {
                            byte_order.read_u32(&bytes[b..nextb]) as u64
                        }
                        8 => {
                            byte_order.read_u64(&bytes[b..nextb])
                        }
                        _ => { panic!("Invalid byte_size: {}", f.frm.formatter_item_info.byte_size); }
                    };
                    output_text.push_str(&func(p, f.frm.formatter_item_info.byte_size, f.frm.formatter_item_info.print_width));
                }
                FormatWriter::FloatWriter(func) => {
                    let p: f64 = match f.frm.formatter_item_info.byte_size {
                        4 => {
                            byte_order.read_f32(&bytes[b..nextb]) as f64
                        }
                        8 => {
                            byte_order.read_f64(&bytes[b..nextb])
                        }
                        _ => { panic!("Invalid byte_size: {}", f.frm.formatter_item_info.byte_size); }
                    };
                    output_text.push_str(&func(p));
                }
                FormatWriter::MultibyteWriter(func) => {
                    output_text.push_str(&func(&bytes[b..length+peekbytes]));
                }
            }
            b = nextb;
        }

        if f.frm.add_ascii_dump {
            let missing_spacing = print_width_line.saturating_sub(output_text.chars().count());
            output_text.push_str(&format!("{:>width$}  {}",
                    "",
                    format_ascii_dump(&bytes[..length]),
                    width=missing_spacing));
        }

        if first {
            print!("{}", prefix); // print offset
            // if printing in multiple formats offset is printed only once
            first = false;
        }
        else {
            // this takes the space of the file offset on subsequent
            // lines of multi-format rasters.
            print!("{:>width$}", "", width=prefix.chars().count());
        }
        print!("{}\n", output_text);
    }
}

// For file byte offset printed at left margin.
fn parse_radix(radix_str: Option<String>) -> Result<Radix, &'static str> {
    match radix_str {
        None => Ok(Radix::Octal),
        Some(s) => {
            let st = s.into_bytes();
            if st.len() != 1 {
                Err("Radix must be one of [d, o, n, x]\n")
            } else {
                let radix: char = *(st.get(0)
                                      .expect("byte string of length 1 lacks a 0th elem")) as char;
                match radix {
                    'd' => Ok(Radix::Decimal),
                    'x' => Ok(Radix::Hexadecimal),
                    'o' => Ok(Radix::Octal),
                    'n' => Ok(Radix::NoPrefix),
                    _ => Err("Radix must be one of [d, o, n, x]\n")
                }
            }
        }
    }
}

fn print_with_radix(r: Radix, x: usize, label: Option<usize>) -> String{
    match (r, label) {
        (Radix::Decimal, None) => format!("{:07}", x),
        (Radix::Decimal, Some(l)) => format!("{:07} ({:07})", x, l),
        (Radix::Hexadecimal, None) => format!("{:06X}", x),
        (Radix::Hexadecimal, Some(l)) => format!("{:06X} ({:06X})", x, l),
        (Radix::Octal, None) => format!("{:07o}", x),
        (Radix::Octal, Some(l)) => format!("{:07o} ({:07o})", x, l),
        (Radix::NoPrefix, None) => String::from(""),
        (Radix::NoPrefix, Some(l)) => format!("({:07o})", l),
    }
}

/// Prints the byte offset followed by a newline, or nothing at all if
/// both `Radix::NoPrefix` was set and no label (--traditional) is used.
fn print_final_offset(r: Radix, x: usize, label: Option<usize>) {
    if r != Radix::NoPrefix || label.is_some() {
        print!("{}\n", print_with_radix(r, x, label));
    }
}

struct SpacedFormatterItemInfo {
    frm: ParsedFormatterItemInfo,
    spacing: [usize; MAX_BYTES_PER_UNIT],
}