2014-07-06 08:13:36 +00:00
|
|
|
#![crate_name = "wc"]
|
2013-11-17 09:41:40 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This file is part of the uutils coreutils package.
|
|
|
|
*
|
|
|
|
* (c) Boden Garman <bpgarman@gmail.com>
|
|
|
|
*
|
|
|
|
* For the full copyright and license information, please view the LICENSE
|
|
|
|
* file that was distributed with this source code.
|
|
|
|
*/
|
|
|
|
|
2014-03-31 16:40:21 +00:00
|
|
|
#![feature(macro_rules)]
|
2014-02-07 06:39:07 +00:00
|
|
|
|
2014-02-16 21:29:31 +00:00
|
|
|
extern crate getopts;
|
2014-04-07 22:43:34 +00:00
|
|
|
extern crate libc;
|
2013-11-17 09:41:40 +00:00
|
|
|
|
2014-01-22 11:31:28 +00:00
|
|
|
use std::str::from_utf8;
|
2014-07-10 02:51:51 +00:00
|
|
|
use std::io::{print, stdin_raw, File, BufferedReader};
|
2014-06-09 01:49:06 +00:00
|
|
|
use StdResult = std::result::Result;
|
2014-02-07 06:39:07 +00:00
|
|
|
use getopts::Matches;
|
|
|
|
|
2014-02-23 22:17:48 +00:00
|
|
|
#[path = "../common/util.rs"]
|
2014-02-07 06:39:07 +00:00
|
|
|
mod util;
|
2013-11-17 09:41:40 +00:00
|
|
|
|
|
|
|
struct Result {
|
2014-05-25 09:20:52 +00:00
|
|
|
filename: String,
|
2013-11-17 09:41:40 +00:00
|
|
|
bytes: uint,
|
|
|
|
chars: uint,
|
|
|
|
lines: uint,
|
|
|
|
words: uint,
|
|
|
|
max_line_length: uint,
|
|
|
|
}
|
|
|
|
|
2014-02-07 06:39:07 +00:00
|
|
|
static NAME: &'static str = "wc";
|
|
|
|
|
2014-06-08 07:56:37 +00:00
|
|
|
pub fn uumain(args: Vec<String>) -> int {
|
2014-05-16 08:32:58 +00:00
|
|
|
let program = args.get(0).clone();
|
2014-05-30 08:35:54 +00:00
|
|
|
let opts = [
|
2014-02-07 06:39:07 +00:00
|
|
|
getopts::optflag("c", "bytes", "print the byte counts"),
|
|
|
|
getopts::optflag("m", "chars", "print the character counts"),
|
|
|
|
getopts::optflag("l", "lines", "print the newline counts"),
|
|
|
|
getopts::optflag("L", "max-line-length", "print the length of the longest line"),
|
|
|
|
getopts::optflag("w", "words", "print the word counts"),
|
|
|
|
getopts::optflag("h", "help", "display this help and exit"),
|
|
|
|
getopts::optflag("V", "version", "output version information and exit"),
|
2013-11-17 09:41:40 +00:00
|
|
|
];
|
|
|
|
|
2014-02-07 06:39:07 +00:00
|
|
|
let matches = match getopts::getopts(args.tail(), opts) {
|
2013-11-17 09:41:40 +00:00
|
|
|
Ok(m) => m,
|
|
|
|
Err(f) => {
|
2014-06-15 10:50:40 +00:00
|
|
|
crash!(1, "Invalid options\n{}", f)
|
2013-11-17 09:41:40 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
if matches.opt_present("help") {
|
2014-01-13 09:05:02 +00:00
|
|
|
println!("Usage:");
|
2013-11-17 09:41:40 +00:00
|
|
|
println!(" {0:s} [OPTION]... [FILE]...", program);
|
2014-01-13 09:05:02 +00:00
|
|
|
println!("");
|
2014-05-17 10:32:14 +00:00
|
|
|
print(getopts::usage("Print newline, word and byte counts for each FILE", opts).as_slice());
|
2014-01-13 09:05:02 +00:00
|
|
|
println!("");
|
|
|
|
println!("With no FILE, or when FILE is -, read standard input.");
|
2014-06-08 07:56:37 +00:00
|
|
|
return 0;
|
2013-11-17 09:41:40 +00:00
|
|
|
}
|
|
|
|
|
2014-01-22 11:31:28 +00:00
|
|
|
if matches.opt_present("version") {
|
2014-01-13 09:05:02 +00:00
|
|
|
println!("wc 1.0.0");
|
2014-06-08 07:56:37 +00:00
|
|
|
return 0;
|
2013-11-17 09:41:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
let mut files = matches.free.clone();
|
|
|
|
if files.is_empty() {
|
2014-05-28 06:33:39 +00:00
|
|
|
files = vec!("-".to_string());
|
2013-11-17 09:41:40 +00:00
|
|
|
}
|
|
|
|
|
2014-06-09 01:49:06 +00:00
|
|
|
match wc(files, &matches) {
|
|
|
|
Ok(()) => ( /* pass */ ),
|
|
|
|
Err(e) => return e
|
|
|
|
}
|
2014-06-08 07:56:37 +00:00
|
|
|
|
2014-06-12 04:41:53 +00:00
|
|
|
0
|
2013-11-17 09:41:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static CR: u8 = '\r' as u8;
|
|
|
|
static LF: u8 = '\n' as u8;
|
|
|
|
static SPACE: u8 = ' ' as u8;
|
|
|
|
static TAB: u8 = '\t' as u8;
|
|
|
|
static SYN: u8 = 0x16 as u8;
|
|
|
|
static FF: u8 = 0x0C as u8;
|
|
|
|
|
2014-07-10 02:51:51 +00:00
|
|
|
#[inline(always)]
|
2013-11-17 09:41:40 +00:00
|
|
|
fn is_word_seperator(byte: u8) -> bool {
|
|
|
|
byte == SPACE || byte == TAB || byte == CR || byte == SYN || byte == FF
|
|
|
|
}
|
|
|
|
|
2014-06-09 01:49:06 +00:00
|
|
|
pub fn wc(files: Vec<String>, matches: &Matches) -> StdResult<(), int> {
|
2013-11-17 09:41:40 +00:00
|
|
|
let mut total_line_count: uint = 0;
|
|
|
|
let mut total_word_count: uint = 0;
|
|
|
|
let mut total_char_count: uint = 0;
|
|
|
|
let mut total_byte_count: uint = 0;
|
|
|
|
let mut total_longest_line_length: uint = 0;
|
|
|
|
|
2014-04-26 05:03:08 +00:00
|
|
|
let mut results = vec!();
|
2013-11-17 09:41:40 +00:00
|
|
|
let mut max_str_len: uint = 0;
|
|
|
|
|
|
|
|
for path in files.iter() {
|
2014-05-28 06:33:39 +00:00
|
|
|
let mut reader = match open(path.to_string()) {
|
2014-06-09 01:49:06 +00:00
|
|
|
Ok(f) => f,
|
|
|
|
Err(e) => { return Err(e); }
|
2013-11-17 09:41:40 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
let mut line_count: uint = 0;
|
|
|
|
let mut word_count: uint = 0;
|
|
|
|
let mut byte_count: uint = 0;
|
|
|
|
let mut char_count: uint = 0;
|
|
|
|
let mut current_char_count: uint = 0;
|
|
|
|
let mut longest_line_length: uint = 0;
|
|
|
|
|
|
|
|
loop {
|
|
|
|
// reading from a TTY seems to raise a condition on, rather than return Some(0) like a file.
|
|
|
|
// hence the option wrapped in a result here
|
2014-02-05 17:36:29 +00:00
|
|
|
match reader.read_until(LF) {
|
|
|
|
Ok(raw_line) => {
|
2013-11-30 01:13:39 +00:00
|
|
|
// GNU 'wc' only counts lines that end in LF as lines
|
2014-01-22 11:31:28 +00:00
|
|
|
if raw_line.iter().last().unwrap() == &LF {
|
2013-11-30 01:13:39 +00:00
|
|
|
line_count += 1;
|
|
|
|
}
|
|
|
|
|
2013-11-17 09:41:40 +00:00
|
|
|
byte_count += raw_line.iter().len();
|
|
|
|
|
|
|
|
// try and convert the bytes to UTF-8 first
|
2014-04-26 05:03:08 +00:00
|
|
|
match from_utf8(raw_line.as_slice()) {
|
2013-11-17 09:41:40 +00:00
|
|
|
Some(line) => {
|
2014-06-09 08:31:42 +00:00
|
|
|
word_count += line.words().count();
|
|
|
|
current_char_count = line.chars().count();
|
2013-11-17 09:41:40 +00:00
|
|
|
char_count += current_char_count;
|
|
|
|
},
|
|
|
|
None => {
|
2014-06-09 08:31:42 +00:00
|
|
|
word_count += raw_line.as_slice().split(|&x| is_word_seperator(x)).count();
|
2013-11-17 09:41:40 +00:00
|
|
|
for byte in raw_line.iter() {
|
|
|
|
match byte.is_ascii() {
|
|
|
|
true => {
|
|
|
|
current_char_count += 1;
|
|
|
|
}
|
|
|
|
false => { }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
char_count += current_char_count;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-22 11:31:28 +00:00
|
|
|
if current_char_count > longest_line_length {
|
2013-11-30 01:57:15 +00:00
|
|
|
// we subtract one here because `line.iter().len()` includes the LF
|
|
|
|
// matches GNU 'wc' behaviour
|
|
|
|
longest_line_length = current_char_count - 1;
|
2013-11-17 09:41:40 +00:00
|
|
|
}
|
|
|
|
},
|
|
|
|
_ => break
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
results.push(Result {
|
2014-05-28 06:33:39 +00:00
|
|
|
filename: path.to_string(),
|
2013-11-17 09:41:40 +00:00
|
|
|
bytes: byte_count,
|
|
|
|
chars: char_count,
|
|
|
|
lines: line_count,
|
|
|
|
words: word_count,
|
|
|
|
max_line_length: longest_line_length,
|
|
|
|
});
|
|
|
|
|
|
|
|
total_line_count += line_count;
|
|
|
|
total_word_count += word_count;
|
|
|
|
total_char_count += char_count;
|
|
|
|
total_byte_count += byte_count;
|
|
|
|
|
2014-01-22 11:31:28 +00:00
|
|
|
if longest_line_length > total_longest_line_length {
|
2013-11-17 09:41:40 +00:00
|
|
|
total_longest_line_length = longest_line_length;
|
|
|
|
}
|
|
|
|
|
|
|
|
// used for formatting
|
2014-07-09 08:29:50 +00:00
|
|
|
max_str_len = total_byte_count.to_string().len();
|
2013-11-17 09:41:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for result in results.iter() {
|
2014-05-23 12:28:40 +00:00
|
|
|
print_stats(result.filename.as_slice(), result.lines, result.words, result.chars, result.bytes, result.max_line_length, matches, max_str_len);
|
2013-11-17 09:41:40 +00:00
|
|
|
}
|
|
|
|
|
2014-01-22 11:31:28 +00:00
|
|
|
if files.len() > 1 {
|
2014-05-23 12:28:40 +00:00
|
|
|
print_stats("total", total_line_count, total_word_count, total_char_count, total_byte_count, total_longest_line_length, matches, max_str_len);
|
2013-11-17 09:41:40 +00:00
|
|
|
}
|
2014-06-09 01:49:06 +00:00
|
|
|
|
2014-06-12 04:41:53 +00:00
|
|
|
Ok(())
|
2013-11-17 09:41:40 +00:00
|
|
|
}
|
|
|
|
|
2014-05-23 12:28:40 +00:00
|
|
|
fn print_stats(filename: &str, line_count: uint, word_count: uint, char_count: uint,
|
2013-11-17 09:41:40 +00:00
|
|
|
byte_count: uint, longest_line_length: uint, matches: &Matches, max_str_len: uint) {
|
2014-01-22 11:31:28 +00:00
|
|
|
if matches.opt_present("lines") {
|
2013-11-17 09:41:40 +00:00
|
|
|
print!("{:1$}", line_count, max_str_len);
|
|
|
|
}
|
2014-01-22 11:31:28 +00:00
|
|
|
if matches.opt_present("words") {
|
2013-11-17 09:41:40 +00:00
|
|
|
print!("{:1$}", word_count, max_str_len);
|
|
|
|
}
|
2014-01-22 11:31:28 +00:00
|
|
|
if matches.opt_present("bytes") {
|
2013-11-17 09:41:40 +00:00
|
|
|
print!("{:1$}", byte_count, max_str_len + 1);
|
|
|
|
}
|
2014-01-22 11:31:28 +00:00
|
|
|
if matches.opt_present("chars") {
|
2013-11-17 09:41:40 +00:00
|
|
|
print!("{:1$}", char_count, max_str_len);
|
|
|
|
}
|
2014-01-22 11:31:28 +00:00
|
|
|
if matches.opt_present("max-line-length") {
|
2013-11-17 09:41:40 +00:00
|
|
|
print!("{:1$}", longest_line_length, max_str_len);
|
|
|
|
}
|
|
|
|
|
|
|
|
// defaults
|
2014-01-22 11:31:28 +00:00
|
|
|
if !matches.opt_present("bytes")
|
|
|
|
&& !matches.opt_present("chars")
|
|
|
|
&& !matches.opt_present("lines")
|
|
|
|
&& !matches.opt_present("words")
|
|
|
|
&& !matches.opt_present("max-line-length") {
|
2013-11-17 09:41:40 +00:00
|
|
|
print!("{:1$}", line_count, max_str_len);
|
|
|
|
print!("{:1$}", word_count, max_str_len + 1);
|
|
|
|
print!("{:1$}", byte_count, max_str_len + 1);
|
|
|
|
}
|
|
|
|
|
2014-05-23 12:28:40 +00:00
|
|
|
if filename != "-" {
|
|
|
|
println!(" {}", filename.as_slice());
|
2013-11-17 09:41:40 +00:00
|
|
|
}
|
|
|
|
else {
|
2014-01-13 09:05:02 +00:00
|
|
|
println!("");
|
2013-11-17 09:41:40 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-06-09 01:49:06 +00:00
|
|
|
fn open(path: String) -> StdResult<BufferedReader<Box<Reader>>, int> {
|
2014-05-23 12:28:40 +00:00
|
|
|
if "-" == path.as_slice() {
|
2014-07-10 02:51:51 +00:00
|
|
|
let reader = box stdin_raw() as Box<Reader>;
|
2014-06-09 01:49:06 +00:00
|
|
|
return Ok(BufferedReader::new(reader));
|
2013-11-17 09:41:40 +00:00
|
|
|
}
|
|
|
|
|
2014-02-05 17:36:29 +00:00
|
|
|
match File::open(&std::path::Path::new(path.as_slice())) {
|
2013-11-17 09:41:40 +00:00
|
|
|
Ok(fd) => {
|
2014-05-09 00:12:57 +00:00
|
|
|
let reader = box fd as Box<Reader>;
|
2014-06-12 04:41:53 +00:00
|
|
|
Ok(BufferedReader::new(reader))
|
2013-11-17 09:41:40 +00:00
|
|
|
},
|
|
|
|
Err(e) => {
|
2014-07-09 08:29:50 +00:00
|
|
|
show_error!("wc: {0:s}: {1:s}", path, e.desc.to_string());
|
2014-06-12 04:41:53 +00:00
|
|
|
Err(1)
|
2013-11-17 09:41:40 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|