cat utility refactoring:

- Less if branches
 - Unnecessary range variable deleted
 - Optional fail-assertion commented
This commit is contained in:
evgeniy 2016-10-16 12:43:23 +02:00
parent 7bb5891810
commit dd16c58ab4

View file

@ -1,17 +1,14 @@
#![crate_name = "uu_cat"] #![crate_name = "uu_cat"]
/* // This file is part of the uutils coreutils package.
* This file is part of the uutils coreutils package. //
* // (c) Jordi Boggiano <j.boggiano@seld.be>
* (c) Jordi Boggiano <j.boggiano@seld.be> //
* // For the full copyright and license information, please view the LICENSE
* For the full copyright and license information, please view the LICENSE // file that was distributed with this source code.
* file that was distributed with this source code. //
*/
/* last synced with: cat (GNU coreutils) 8.13 */ // last synced with: cat (GNU coreutils) 8.13
extern crate libc;
#[macro_use] #[macro_use]
extern crate uucore; extern crate uucore;
@ -21,15 +18,16 @@ use std::intrinsics::{copy_nonoverlapping};
use std::io::{stdout, stdin, stderr, Write, Read, Result}; use std::io::{stdout, stdin, stderr, Write, Read, Result};
use uucore::fs::is_stdin_interactive; use uucore::fs::is_stdin_interactive;
static SYNTAX: &'static str = "[OPTION]... [FILE]..."; static SYNTAX: &'static str = "[OPTION]... [FILE]...";
static SUMMARY: &'static str = "Concatenate FILE(s), or standard input, to standard output static SUMMARY: &'static str = "Concatenate FILE(s), or standard input, to standard output
With no FILE, or when FILE is -, read standard input."; With no FILE, or when FILE is -, read standard input.";
static LONG_HELP: &'static str = ""; static LONG_HELP: &'static str = "";
pub fn uumain(args: Vec<String>) -> i32 { pub fn uumain(args: Vec<String>) -> i32 {
let matches = new_coreopts!(SYNTAX, SUMMARY, LONG_HELP) let matches = new_coreopts!(SYNTAX, SUMMARY, LONG_HELP)
.optflag("A", "show-all", "equivalent to -vET") .optflag("A", "show-all", "equivalent to -vET")
.optflag("b", "number-nonblank", .optflag("b",
"number-nonblank",
"number nonempty output lines, overrides -n") "number nonempty output lines, overrides -n")
.optflag("e", "", "equivalent to -vE") .optflag("e", "", "equivalent to -vE")
.optflag("E", "show-ends", "display $ at end of each line") .optflag("E", "show-ends", "display $ at end of each line")
@ -37,7 +35,8 @@ pub fn uumain(args: Vec<String>) -> i32 {
.optflag("s", "squeeze-blank", "suppress repeated empty output lines") .optflag("s", "squeeze-blank", "suppress repeated empty output lines")
.optflag("t", "", "equivalent to -vT") .optflag("t", "", "equivalent to -vT")
.optflag("T", "show-tabs", "display TAB characters as ^I") .optflag("T", "show-tabs", "display TAB characters as ^I")
.optflag("v", "show-nonprinting", .optflag("v",
"show-nonprinting",
"use ^ and M- notation, except for LF (\\n) and TAB (\\t)") "use ^ and M- notation, except for LF (\\n) and TAB (\\t)")
.parse(args); .parse(args);
@ -48,19 +47,22 @@ pub fn uumain(args: Vec<String>) -> i32 {
} else { } else {
NumberingMode::NumberNone NumberingMode::NumberNone
}; };
let show_nonprint = matches.opts_present(&["A".to_owned(), "e".to_owned(), let show_nonprint =
"t".to_owned(), "v".to_owned()]); matches.opts_present(&["A".to_owned(), "e".to_owned(), "t".to_owned(), "v".to_owned()]);
let show_ends = matches.opts_present(&["E".to_owned(), "A".to_owned(), let show_ends = matches.opts_present(&["E".to_owned(), "A".to_owned(), "e".to_owned()]);
"e".to_owned()]); let show_tabs = matches.opts_present(&["A".to_owned(), "T".to_owned(), "t".to_owned()]);
let show_tabs = matches.opts_present(&["A".to_owned(), "T".to_owned(),
"t".to_owned()]);
let squeeze_blank = matches.opt_present("s"); let squeeze_blank = matches.opt_present("s");
let mut files = matches.free; let mut files = matches.free;
if files.is_empty() { if files.is_empty() {
files.push("-".to_owned()); files.push("-".to_owned());
} }
exec(files, number_mode, show_nonprint, show_ends, show_tabs, squeeze_blank); exec(files,
number_mode,
show_nonprint,
show_ends,
show_tabs,
squeeze_blank);
0 0
} }
@ -72,29 +74,32 @@ enum NumberingMode {
NumberAll, NumberAll,
} }
fn write_lines(files: Vec<String>, number: NumberingMode, squeeze_blank: bool, fn write_lines(files: Vec<String>, number: NumberingMode, squeeze_blank: bool, show_ends: bool) {
show_ends: bool) { // initialize end of line
let end_of_line = if show_ends {
"$\n".as_bytes()
} else {
"\n".as_bytes()
};
let mut line_counter: usize = 1; let mut line_counter: usize = 1;
for (mut reader, interactive) in files.iter().filter_map(|p| open(&p[..])) { for (mut reader, interactive) in files.iter().filter_map(|p| open(&p[..])) {
let mut in_buf = [0; 1024 * 31]; let mut in_buf = [0; 1024 * 31];
let mut out_buf = [0; 1024 * 64]; let mut out_buf = [0; 1024 * 64];
let mut writer = UnsafeWriter::new(&mut out_buf[..], stdout()); let mut writer = UnsafeWriter::new(&mut out_buf[..], stdout());
let mut at_line_start = true; let mut at_line_start = true;
let mut one_blank_kept = false; let mut one_blank_kept = false;
while let Ok(n) = reader.read(&mut in_buf) { while let Ok(n) = reader.read(&mut in_buf) {
if n == 0 { break } if n == 0 {
break;
}
let in_buf = &in_buf[..n]; let in_buf = &in_buf[..n];
let mut buf_pos = 0..n; let mut pos = 0;
loop { while pos < n {
writer.possibly_flush(); writer.possibly_flush();
let pos = match buf_pos.next() {
Some(p) => p,
None => break,
};
if in_buf[pos] == '\n' as u8 { if in_buf[pos] == '\n' as u8 {
if !at_line_start || !squeeze_blank || !one_blank_kept { if !at_line_start || !squeeze_blank || !one_blank_kept {
one_blank_kept = true; one_blank_kept = true;
@ -102,15 +107,13 @@ fn write_lines(files: Vec<String>, number: NumberingMode, squeeze_blank: bool,
(write!(&mut writer, "{0:6}\t", line_counter)).unwrap(); (write!(&mut writer, "{0:6}\t", line_counter)).unwrap();
line_counter += 1; line_counter += 1;
} }
if show_ends { writer.write_all(end_of_line).unwrap();
writer.write_all(&['$' as u8]).unwrap();
}
writer.write_all(&['\n' as u8]).unwrap();
if interactive { if interactive {
writer.flush().unwrap(); writer.flush().unwrap();
} }
} }
at_line_start = true; at_line_start = true;
pos += 1;
continue; continue;
} else if one_blank_kept { } else if one_blank_kept {
one_blank_kept = false; one_blank_kept = false;
@ -122,16 +125,13 @@ fn write_lines(files: Vec<String>, number: NumberingMode, squeeze_blank: bool,
match in_buf[pos..].iter().position(|c| *c == '\n' as u8) { match in_buf[pos..].iter().position(|c| *c == '\n' as u8) {
Some(p) => { Some(p) => {
writer.write_all(&in_buf[pos..pos + p]).unwrap(); writer.write_all(&in_buf[pos..pos + p]).unwrap();
if show_ends { writer.write_all(end_of_line).unwrap();
writer.write_all(&['$' as u8]).unwrap();
}
writer.write_all(&['\n' as u8]).unwrap();
if interactive { if interactive {
writer.flush().unwrap(); writer.flush().unwrap();
} }
buf_pos = pos + p + 1..n; pos += p + 1;
at_line_start = true; at_line_start = true;
}, }
None => { None => {
writer.write_all(&in_buf[pos..]).unwrap(); writer.write_all(&in_buf[pos..]).unwrap();
at_line_start = false; at_line_start = false;
@ -143,8 +143,24 @@ fn write_lines(files: Vec<String>, number: NumberingMode, squeeze_blank: bool,
} }
} }
fn write_bytes(files: Vec<String>, number: NumberingMode, squeeze_blank: bool, fn write_bytes(files: Vec<String>,
show_ends: bool, show_nonprint: bool, show_tabs: bool) { number: NumberingMode,
squeeze_blank: bool,
show_ends: bool,
show_tabs: bool,
show_nonprint: bool) {
// initialize end of line
let end_of_line = if show_ends {
"$\n".as_bytes()
} else {
"\n".as_bytes()
};
// initialize tab simbol
let tab = if show_tabs {
"^I".as_bytes()
} else {
"\t".as_bytes()
};
let mut line_counter: usize = 1; let mut line_counter: usize = 1;
@ -153,12 +169,14 @@ fn write_bytes(files: Vec<String>, number: NumberingMode, squeeze_blank: bool,
// Flush all 1024 iterations. // Flush all 1024 iterations.
let mut flush_counter = 0usize..1024; let mut flush_counter = 0usize..1024;
let mut in_buf = [0; 1024 * 32]; let mut in_buf = [0; 1024 * 32];
let mut out_buf = [0; 1024 * 64]; let mut out_buf = [0; 1024 * 64];
let mut writer = UnsafeWriter::new(&mut out_buf[..], stdout()); let mut writer = UnsafeWriter::new(&mut out_buf[..], stdout());
let mut at_line_start = true; let mut at_line_start = true;
while let Ok(n) = reader.read(&mut in_buf) { while let Ok(n) = reader.read(&mut in_buf) {
if n == 0 { break } if n == 0 {
break;
}
for &byte in in_buf[..n].iter() { for &byte in in_buf[..n].iter() {
if flush_counter.next().is_none() { if flush_counter.next().is_none() {
@ -171,10 +189,7 @@ fn write_bytes(files: Vec<String>, number: NumberingMode, squeeze_blank: bool,
(write!(&mut writer, "{0:6}\t", line_counter)).unwrap(); (write!(&mut writer, "{0:6}\t", line_counter)).unwrap();
line_counter += 1; line_counter += 1;
} }
if show_ends { writer.write_all(end_of_line).unwrap();
writer.write_all(&['$' as u8]).unwrap();
}
writer.write_all(&['\n' as u8]).unwrap();
if interactive { if interactive {
writer.flush().unwrap(); writer.flush().unwrap();
} }
@ -187,30 +202,28 @@ fn write_bytes(files: Vec<String>, number: NumberingMode, squeeze_blank: bool,
line_counter += 1; line_counter += 1;
at_line_start = false; at_line_start = false;
} }
// This code is slow because of the many branches. cat in glibc avoids // This code is slow because of the many branches. cat in glibc avoids
// this by having the whole loop inside show_nonprint. // this by having the whole loop inside show_nonprint.
if byte == '\t' as u8 { if byte == '\t' as u8 {
if show_tabs { writer.write_all(tab)
writer.write_all("^I".as_bytes()) } else if show_nonprint {
let byte = match byte {
128...255 => {
writer.write_all("M-".as_bytes()).unwrap();
byte - 128
}
_ => byte,
};
match byte {
0...31 => writer.write_all(&['^' as u8, byte + 64]),
127 => writer.write_all(&['^' as u8, byte - 64]),
_ => writer.write_all(&[byte]),
}
} else { } else {
writer.write_all(&[byte]) writer.write_all(&[byte])
} }
} else if show_nonprint { .unwrap();
let byte = match byte {
128 ... 255 => {
writer.write_all("M-".as_bytes()).unwrap();
byte - 128
},
_ => byte,
};
match byte {
0 ... 31 => writer.write_all(&['^' as u8, byte + 64]),
127 => writer.write_all(&['^' as u8, byte - 64]),
_ => writer.write_all(&[byte]),
}
} else {
writer.write_all(&[byte])
}.unwrap();
} }
} }
} }
@ -222,18 +235,29 @@ fn write_fast(files: Vec<String>) {
for (mut reader, _) in files.iter().filter_map(|p| open(&p[..])) { for (mut reader, _) in files.iter().filter_map(|p| open(&p[..])) {
while let Ok(n) = reader.read(&mut in_buf) { while let Ok(n) = reader.read(&mut in_buf) {
if n == 0 { break } if n == 0 {
break;
}
// This interface is completely broken. // This interface is completely broken.
writer.write_all(&in_buf[..n]).unwrap(); writer.write_all(&in_buf[..n]).unwrap();
} }
} }
} }
fn exec(files: Vec<String>, number: NumberingMode, show_nonprint: bool, fn exec(files: Vec<String>,
show_ends: bool, show_tabs: bool, squeeze_blank: bool) { number: NumberingMode,
show_nonprint: bool,
show_ends: bool,
show_tabs: bool,
squeeze_blank: bool) {
if show_nonprint || show_tabs { if show_nonprint || show_tabs {
write_bytes(files, number, squeeze_blank, show_ends, show_nonprint, show_tabs); write_bytes(files,
number,
squeeze_blank,
show_ends,
show_tabs,
show_nonprint);
} else if number != NumberingMode::NumberNone || squeeze_blank || show_ends { } else if number != NumberingMode::NumberNone || squeeze_blank || show_ends {
write_lines(files, number, squeeze_blank, show_ends); write_lines(files, number, squeeze_blank, show_ends);
} else { } else {
@ -253,7 +277,7 @@ fn open(path: &str) -> Option<(Box<Read>, bool)> {
Err(e) => { Err(e) => {
(writeln!(&mut stderr(), "cat: {0}: {1}", path, e.to_string())).unwrap(); (writeln!(&mut stderr(), "cat: {0}: {1}", path, e.to_string())).unwrap();
None None
}, }
} }
} }
@ -266,7 +290,7 @@ struct UnsafeWriter<'a, W: Write> {
impl<'a, W: Write> UnsafeWriter<'a, W> { impl<'a, W: Write> UnsafeWriter<'a, W> {
fn new(buf: &'a mut [u8], inner: W) -> UnsafeWriter<'a, W> { fn new(buf: &'a mut [u8], inner: W) -> UnsafeWriter<'a, W> {
let threshold = buf.len()/2; let threshold = buf.len() / 2;
UnsafeWriter { UnsafeWriter {
inner: inner, inner: inner,
buf: buf, buf: buf,
@ -296,21 +320,21 @@ impl<'a, W: Write> UnsafeWriter<'a, W> {
} }
} }
#[inline(never)] //#[inline(never)]
fn fail() -> ! { //fn fail() -> ! {
panic!("assertion failed"); // panic!("assertion failed");
} //}
impl<'a, W: Write> Write for UnsafeWriter<'a, W> { impl<'a, W: Write> Write for UnsafeWriter<'a, W> {
fn write(&mut self, buf: &[u8]) -> Result<usize> { fn write(&mut self, buf: &[u8]) -> Result<usize> {
let dst = &mut self.buf[self.pos..]; //let dst = &mut self.buf[self.pos..];
let len = buf.len(); let len = buf.len();
if len > dst.len() { // assertion is true for current code
fail(); // and it is a bottlneck place
} // if len > dst.len() {
unsafe { // fail();
copy_nonoverlapping(buf.as_ptr(), dst.as_mut_ptr(), len) // }
} unsafe { copy_nonoverlapping(buf.as_ptr(), self.buf[self.pos..].as_mut_ptr(), len) }
self.pos += len; self.pos += len;
Ok(len) Ok(len)
} }