From dd16c58ab4549a9a15c4a74705834bd5160712fd Mon Sep 17 00:00:00 2001 From: evgeniy Date: Sun, 16 Oct 2016 12:43:23 +0200 Subject: [PATCH] cat utility refactoring: - Less if branches - Unnecessary range variable deleted - Optional fail-assertion commented --- src/cat/cat.rs | 196 +++++++++++++++++++++++++++---------------------- 1 file changed, 110 insertions(+), 86 deletions(-) diff --git a/src/cat/cat.rs b/src/cat/cat.rs index d303388c8..c1a81bf05 100644 --- a/src/cat/cat.rs +++ b/src/cat/cat.rs @@ -1,17 +1,14 @@ #![crate_name = "uu_cat"] -/* - * This file is part of the uutils coreutils package. - * - * (c) Jordi Boggiano - * - * For the full copyright and license information, please view the LICENSE - * file that was distributed with this source code. - */ +// This file is part of the uutils coreutils package. +// +// (c) Jordi Boggiano +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. +// -/* last synced with: cat (GNU coreutils) 8.13 */ - -extern crate libc; +// last synced with: cat (GNU coreutils) 8.13 #[macro_use] extern crate uucore; @@ -21,15 +18,16 @@ use std::intrinsics::{copy_nonoverlapping}; use std::io::{stdout, stdin, stderr, Write, Read, Result}; use uucore::fs::is_stdin_interactive; -static SYNTAX: &'static str = "[OPTION]... [FILE]..."; +static SYNTAX: &'static str = "[OPTION]... [FILE]..."; static SUMMARY: &'static str = "Concatenate FILE(s), or standard input, to standard output - With no FILE, or when FILE is -, read standard input."; -static LONG_HELP: &'static str = ""; + With no FILE, or when FILE is -, read standard input."; +static LONG_HELP: &'static str = ""; pub fn uumain(args: Vec) -> i32 { let matches = new_coreopts!(SYNTAX, SUMMARY, LONG_HELP) .optflag("A", "show-all", "equivalent to -vET") - .optflag("b", "number-nonblank", + .optflag("b", + "number-nonblank", "number nonempty output lines, overrides -n") .optflag("e", "", "equivalent to -vE") .optflag("E", "show-ends", "display $ at end of each line") @@ -37,7 +35,8 @@ pub fn uumain(args: Vec) -> i32 { .optflag("s", "squeeze-blank", "suppress repeated empty output lines") .optflag("t", "", "equivalent to -vT") .optflag("T", "show-tabs", "display TAB characters as ^I") - .optflag("v", "show-nonprinting", + .optflag("v", + "show-nonprinting", "use ^ and M- notation, except for LF (\\n) and TAB (\\t)") .parse(args); @@ -48,19 +47,22 @@ pub fn uumain(args: Vec) -> i32 { } else { NumberingMode::NumberNone }; - let show_nonprint = matches.opts_present(&["A".to_owned(), "e".to_owned(), - "t".to_owned(), "v".to_owned()]); - let show_ends = matches.opts_present(&["E".to_owned(), "A".to_owned(), - "e".to_owned()]); - let show_tabs = matches.opts_present(&["A".to_owned(), "T".to_owned(), - "t".to_owned()]); + let show_nonprint = + matches.opts_present(&["A".to_owned(), "e".to_owned(), "t".to_owned(), "v".to_owned()]); + let show_ends = matches.opts_present(&["E".to_owned(), "A".to_owned(), "e".to_owned()]); + let show_tabs = matches.opts_present(&["A".to_owned(), "T".to_owned(), "t".to_owned()]); let squeeze_blank = matches.opt_present("s"); let mut files = matches.free; if files.is_empty() { files.push("-".to_owned()); } - exec(files, number_mode, show_nonprint, show_ends, show_tabs, squeeze_blank); + exec(files, + number_mode, + show_nonprint, + show_ends, + show_tabs, + squeeze_blank); 0 } @@ -72,29 +74,32 @@ enum NumberingMode { NumberAll, } -fn write_lines(files: Vec, number: NumberingMode, squeeze_blank: bool, - show_ends: bool) { +fn write_lines(files: Vec, number: NumberingMode, squeeze_blank: bool, show_ends: bool) { + // initialize end of line + let end_of_line = if show_ends { + "$\n".as_bytes() + } else { + "\n".as_bytes() + }; let mut line_counter: usize = 1; for (mut reader, interactive) in files.iter().filter_map(|p| open(&p[..])) { - let mut in_buf = [0; 1024 * 31]; + let mut in_buf = [0; 1024 * 31]; let mut out_buf = [0; 1024 * 64]; let mut writer = UnsafeWriter::new(&mut out_buf[..], stdout()); let mut at_line_start = true; let mut one_blank_kept = false; while let Ok(n) = reader.read(&mut in_buf) { - if n == 0 { break } + if n == 0 { + break; + } let in_buf = &in_buf[..n]; - let mut buf_pos = 0..n; - loop { + let mut pos = 0; + while pos < n { writer.possibly_flush(); - let pos = match buf_pos.next() { - Some(p) => p, - None => break, - }; if in_buf[pos] == '\n' as u8 { if !at_line_start || !squeeze_blank || !one_blank_kept { one_blank_kept = true; @@ -102,15 +107,13 @@ fn write_lines(files: Vec, number: NumberingMode, squeeze_blank: bool, (write!(&mut writer, "{0:6}\t", line_counter)).unwrap(); line_counter += 1; } - if show_ends { - writer.write_all(&['$' as u8]).unwrap(); - } - writer.write_all(&['\n' as u8]).unwrap(); + writer.write_all(end_of_line).unwrap(); if interactive { writer.flush().unwrap(); } } at_line_start = true; + pos += 1; continue; } else if one_blank_kept { one_blank_kept = false; @@ -122,16 +125,13 @@ fn write_lines(files: Vec, number: NumberingMode, squeeze_blank: bool, match in_buf[pos..].iter().position(|c| *c == '\n' as u8) { Some(p) => { writer.write_all(&in_buf[pos..pos + p]).unwrap(); - if show_ends { - writer.write_all(&['$' as u8]).unwrap(); - } - writer.write_all(&['\n' as u8]).unwrap(); + writer.write_all(end_of_line).unwrap(); if interactive { writer.flush().unwrap(); } - buf_pos = pos + p + 1..n; + pos += p + 1; at_line_start = true; - }, + } None => { writer.write_all(&in_buf[pos..]).unwrap(); at_line_start = false; @@ -143,8 +143,24 @@ fn write_lines(files: Vec, number: NumberingMode, squeeze_blank: bool, } } -fn write_bytes(files: Vec, number: NumberingMode, squeeze_blank: bool, - show_ends: bool, show_nonprint: bool, show_tabs: bool) { +fn write_bytes(files: Vec, + number: NumberingMode, + squeeze_blank: bool, + show_ends: bool, + show_tabs: bool, + show_nonprint: bool) { + // initialize end of line + let end_of_line = if show_ends { + "$\n".as_bytes() + } else { + "\n".as_bytes() + }; + // initialize tab simbol + let tab = if show_tabs { + "^I".as_bytes() + } else { + "\t".as_bytes() + }; let mut line_counter: usize = 1; @@ -153,12 +169,14 @@ fn write_bytes(files: Vec, number: NumberingMode, squeeze_blank: bool, // Flush all 1024 iterations. let mut flush_counter = 0usize..1024; - let mut in_buf = [0; 1024 * 32]; + let mut in_buf = [0; 1024 * 32]; let mut out_buf = [0; 1024 * 64]; let mut writer = UnsafeWriter::new(&mut out_buf[..], stdout()); let mut at_line_start = true; while let Ok(n) = reader.read(&mut in_buf) { - if n == 0 { break } + if n == 0 { + break; + } for &byte in in_buf[..n].iter() { if flush_counter.next().is_none() { @@ -171,10 +189,7 @@ fn write_bytes(files: Vec, number: NumberingMode, squeeze_blank: bool, (write!(&mut writer, "{0:6}\t", line_counter)).unwrap(); line_counter += 1; } - if show_ends { - writer.write_all(&['$' as u8]).unwrap(); - } - writer.write_all(&['\n' as u8]).unwrap(); + writer.write_all(end_of_line).unwrap(); if interactive { writer.flush().unwrap(); } @@ -187,30 +202,28 @@ fn write_bytes(files: Vec, number: NumberingMode, squeeze_blank: bool, line_counter += 1; at_line_start = false; } + // This code is slow because of the many branches. cat in glibc avoids // this by having the whole loop inside show_nonprint. if byte == '\t' as u8 { - if show_tabs { - writer.write_all("^I".as_bytes()) + writer.write_all(tab) + } else if show_nonprint { + let byte = match byte { + 128...255 => { + writer.write_all("M-".as_bytes()).unwrap(); + byte - 128 + } + _ => byte, + }; + match byte { + 0...31 => writer.write_all(&['^' as u8, byte + 64]), + 127 => writer.write_all(&['^' as u8, byte - 64]), + _ => writer.write_all(&[byte]), + } } else { writer.write_all(&[byte]) } - } else if show_nonprint { - let byte = match byte { - 128 ... 255 => { - writer.write_all("M-".as_bytes()).unwrap(); - byte - 128 - }, - _ => byte, - }; - match byte { - 0 ... 31 => writer.write_all(&['^' as u8, byte + 64]), - 127 => writer.write_all(&['^' as u8, byte - 64]), - _ => writer.write_all(&[byte]), - } - } else { - writer.write_all(&[byte]) - }.unwrap(); + .unwrap(); } } } @@ -222,18 +235,29 @@ fn write_fast(files: Vec) { for (mut reader, _) in files.iter().filter_map(|p| open(&p[..])) { while let Ok(n) = reader.read(&mut in_buf) { - if n == 0 { break } + if n == 0 { + break; + } // This interface is completely broken. writer.write_all(&in_buf[..n]).unwrap(); } } } -fn exec(files: Vec, number: NumberingMode, show_nonprint: bool, - show_ends: bool, show_tabs: bool, squeeze_blank: bool) { +fn exec(files: Vec, + number: NumberingMode, + show_nonprint: bool, + show_ends: bool, + show_tabs: bool, + squeeze_blank: bool) { if show_nonprint || show_tabs { - write_bytes(files, number, squeeze_blank, show_ends, show_nonprint, show_tabs); + write_bytes(files, + number, + squeeze_blank, + show_ends, + show_tabs, + show_nonprint); } else if number != NumberingMode::NumberNone || squeeze_blank || show_ends { write_lines(files, number, squeeze_blank, show_ends); } else { @@ -253,7 +277,7 @@ fn open(path: &str) -> Option<(Box, bool)> { Err(e) => { (writeln!(&mut stderr(), "cat: {0}: {1}", path, e.to_string())).unwrap(); None - }, + } } } @@ -266,7 +290,7 @@ struct UnsafeWriter<'a, W: Write> { impl<'a, W: Write> UnsafeWriter<'a, W> { fn new(buf: &'a mut [u8], inner: W) -> UnsafeWriter<'a, W> { - let threshold = buf.len()/2; + let threshold = buf.len() / 2; UnsafeWriter { inner: inner, buf: buf, @@ -296,21 +320,21 @@ impl<'a, W: Write> UnsafeWriter<'a, W> { } } -#[inline(never)] -fn fail() -> ! { - panic!("assertion failed"); -} +//#[inline(never)] +//fn fail() -> ! { +// panic!("assertion failed"); +//} impl<'a, W: Write> Write for UnsafeWriter<'a, W> { fn write(&mut self, buf: &[u8]) -> Result { - let dst = &mut self.buf[self.pos..]; + //let dst = &mut self.buf[self.pos..]; let len = buf.len(); - if len > dst.len() { - fail(); - } - unsafe { - copy_nonoverlapping(buf.as_ptr(), dst.as_mut_ptr(), len) - } + // assertion is true for current code + // and it is a bottlneck place + // if len > dst.len() { + // fail(); + // } + unsafe { copy_nonoverlapping(buf.as_ptr(), self.buf[self.pos..].as_mut_ptr(), len) } self.pos += len; Ok(len) }