2015-12-08 02:42:08 +00:00
|
|
|
#![crate_name = "uu_uniq"]
|
2015-01-10 19:31:55 +00:00
|
|
|
|
2014-07-06 00:27:22 +00:00
|
|
|
/*
|
|
|
|
* This file is part of the uutils coreutils package.
|
|
|
|
*
|
|
|
|
* (c) Chirag B Jadwani <chirag.jadwani@gmail.com>
|
|
|
|
*
|
|
|
|
* For the full copyright and license information, please view the LICENSE
|
|
|
|
* file that was distributed with this source code.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
extern crate getopts;
|
|
|
|
|
2015-11-24 01:00:51 +00:00
|
|
|
#[macro_use]
|
|
|
|
extern crate uucore;
|
|
|
|
|
2015-05-21 18:42:42 +00:00
|
|
|
use getopts::{Matches, Options};
|
2015-04-27 19:25:01 +00:00
|
|
|
use std::fs::File;
|
2015-05-21 18:42:42 +00:00
|
|
|
use std::io::{BufRead, BufReader, BufWriter, Read, stdin, stdout, Write};
|
2015-04-27 19:25:01 +00:00
|
|
|
use std::path::Path;
|
2015-05-21 18:42:42 +00:00
|
|
|
use std::str::FromStr;
|
2014-07-06 00:27:22 +00:00
|
|
|
|
|
|
|
static NAME: &'static str = "uniq";
|
2015-11-25 09:52:10 +00:00
|
|
|
static VERSION: &'static str = env!("CARGO_PKG_VERSION");
|
2014-07-06 00:27:22 +00:00
|
|
|
|
|
|
|
struct Uniq {
|
|
|
|
repeats_only: bool,
|
|
|
|
uniques_only: bool,
|
|
|
|
all_repeated: bool,
|
|
|
|
delimiters: String,
|
|
|
|
show_counts: bool,
|
2015-07-31 17:37:40 +00:00
|
|
|
skip_fields: Option<usize>,
|
2015-01-10 18:07:08 +00:00
|
|
|
slice_start: Option<usize>,
|
|
|
|
slice_stop: Option<usize>,
|
2014-07-06 00:27:22 +00:00
|
|
|
ignore_case: bool,
|
2016-08-07 01:25:52 +00:00
|
|
|
zero_terminated: bool,
|
2014-07-06 00:27:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Uniq {
|
2015-04-27 19:25:01 +00:00
|
|
|
pub fn print_uniq<R: Read, W: Write>(&self, reader: &mut BufReader<R>, writer: &mut BufWriter<W>) {
|
2014-07-06 00:27:22 +00:00
|
|
|
let mut lines: Vec<String> = vec!();
|
|
|
|
let mut first_line_printed = false;
|
2015-04-27 19:25:01 +00:00
|
|
|
let delimiters = &self.delimiters[..];
|
2016-08-07 01:25:52 +00:00
|
|
|
let line_terminator = self.get_line_terminator();
|
2014-07-06 00:27:22 +00:00
|
|
|
|
2016-08-07 01:25:52 +00:00
|
|
|
for io_line in reader.split(line_terminator) {
|
|
|
|
let line = String::from_utf8(crash_if_err!(1, io_line)).unwrap();
|
2016-02-29 05:46:58 +00:00
|
|
|
if !lines.is_empty() && self.cmp_key(&lines[0]) != self.cmp_key(&line) {
|
2014-07-06 00:27:22 +00:00
|
|
|
let print_delimiter = delimiters == "prepend" || (delimiters == "separate" && first_line_printed);
|
|
|
|
first_line_printed |= self.print_lines(writer, &lines, print_delimiter);
|
|
|
|
lines.truncate(0);
|
|
|
|
}
|
|
|
|
lines.push(line);
|
|
|
|
}
|
|
|
|
if !lines.is_empty() {
|
|
|
|
let print_delimiter = delimiters == "prepend" || (delimiters == "separate" && first_line_printed);
|
|
|
|
self.print_lines(writer, &lines, print_delimiter);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-05 19:42:52 +00:00
|
|
|
fn skip_fields(&self, line: &str) -> String {
|
2016-02-29 05:46:58 +00:00
|
|
|
if let Some(skip_fields) = self.skip_fields {
|
|
|
|
if line.split_whitespace().count() > skip_fields {
|
|
|
|
let mut field = 0;
|
|
|
|
let mut i = 0;
|
|
|
|
while field < skip_fields && i < line.len() {
|
|
|
|
while i < line.len() && line.chars().nth(i).unwrap().is_whitespace() {
|
|
|
|
i = i + 1;
|
2015-07-31 17:37:40 +00:00
|
|
|
}
|
2016-02-29 05:46:58 +00:00
|
|
|
while i < line.len() && !line.chars().nth(i).unwrap().is_whitespace() {
|
|
|
|
i = i + 1;
|
|
|
|
}
|
|
|
|
field = field + 1;
|
|
|
|
}
|
|
|
|
line[i..].to_owned()
|
|
|
|
} else {
|
|
|
|
"".to_owned()
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
line[..].to_owned()
|
2015-07-31 17:37:40 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-07 01:25:52 +00:00
|
|
|
fn get_line_terminator(&self) -> u8 {
|
|
|
|
if self.zero_terminated {
|
|
|
|
0
|
|
|
|
} else {
|
|
|
|
'\n' as u8
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-05 19:42:52 +00:00
|
|
|
fn cmp_key(&self, line: &str) -> String {
|
2016-02-29 05:46:58 +00:00
|
|
|
let fields_to_check = &self.skip_fields(line);
|
|
|
|
let len = fields_to_check.len();
|
2014-07-06 00:27:22 +00:00
|
|
|
if len > 0 {
|
2016-02-29 05:46:58 +00:00
|
|
|
fields_to_check.chars()
|
2016-02-29 05:40:38 +00:00
|
|
|
.skip(self.slice_start.unwrap_or(0))
|
|
|
|
.take(self.slice_stop.unwrap_or(len))
|
2015-04-27 19:25:01 +00:00
|
|
|
.map(|c| match c {
|
|
|
|
'a' ... 'z' if self.ignore_case => ((c as u8) - 32) as char,
|
|
|
|
_ => c,
|
|
|
|
}).collect()
|
2014-07-06 00:27:22 +00:00
|
|
|
} else {
|
2016-02-29 05:46:58 +00:00
|
|
|
fields_to_check.to_owned()
|
2014-07-06 00:27:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-05 19:42:52 +00:00
|
|
|
fn print_lines<W: Write>(&self, writer: &mut BufWriter<W>, lines: &[String], print_delimiter: bool) -> bool {
|
2014-07-06 00:27:22 +00:00
|
|
|
let mut first_line_printed = false;
|
|
|
|
let mut count = if self.all_repeated { 1 } else { lines.len() };
|
|
|
|
if lines.len() == 1 && !self.repeats_only
|
|
|
|
|| lines.len() > 1 && !self.uniques_only {
|
2014-07-20 01:13:55 +00:00
|
|
|
self.print_line(writer, &lines[0], count, print_delimiter);
|
2014-07-06 00:27:22 +00:00
|
|
|
first_line_printed = true;
|
|
|
|
count += 1;
|
|
|
|
}
|
|
|
|
if self.all_repeated {
|
2015-04-27 19:25:01 +00:00
|
|
|
for line in lines[1..].iter() {
|
2014-07-06 00:27:22 +00:00
|
|
|
self.print_line(writer, line, count, print_delimiter && !first_line_printed);
|
|
|
|
first_line_printed = true;
|
|
|
|
count += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
first_line_printed
|
|
|
|
}
|
|
|
|
|
2016-01-05 19:42:52 +00:00
|
|
|
fn print_line<W: Write>(&self, writer: &mut BufWriter<W>, line: &str, count: usize, print_delimiter: bool) {
|
2016-08-07 01:25:52 +00:00
|
|
|
let line_terminator = self.get_line_terminator();
|
|
|
|
|
2014-07-06 00:27:22 +00:00
|
|
|
if print_delimiter {
|
2016-08-07 01:25:52 +00:00
|
|
|
crash_if_err!(1, writer.write_all(&[line_terminator]));
|
2014-07-06 00:27:22 +00:00
|
|
|
}
|
2015-04-27 19:25:01 +00:00
|
|
|
|
|
|
|
crash_if_err!(1, if self.show_counts {
|
|
|
|
writer.write_all(format!("{:7} {}", count, line).as_bytes())
|
|
|
|
} else {
|
|
|
|
writer.write_all(line.as_bytes())
|
|
|
|
});
|
2016-08-07 01:25:52 +00:00
|
|
|
crash_if_err!(1, writer.write_all(&[line_terminator]));
|
2014-07-06 00:27:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-21 18:42:42 +00:00
|
|
|
fn opt_parsed<T: FromStr>(opt_name: &str, matches: &Matches) -> Option<T> {
|
2014-07-06 00:27:22 +00:00
|
|
|
matches.opt_str(opt_name).map(|arg_str| {
|
2015-02-03 21:19:13 +00:00
|
|
|
let opt_val: Option<T> = arg_str.parse().ok();
|
2014-07-06 00:27:22 +00:00
|
|
|
opt_val.unwrap_or_else(||
|
|
|
|
crash!(1, "Invalid argument for {}: {}", opt_name, arg_str))
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2015-02-06 13:48:07 +00:00
|
|
|
pub fn uumain(args: Vec<String>) -> i32 {
|
2015-05-21 18:42:42 +00:00
|
|
|
let mut opts = Options::new();
|
|
|
|
|
|
|
|
opts.optflag("c", "count", "prefix lines by the number of occurrences");
|
|
|
|
opts.optflag("d", "repeated", "only print duplicate lines");
|
|
|
|
opts.optflagopt(
|
|
|
|
"D",
|
|
|
|
"all-repeated",
|
|
|
|
"print all duplicate lines delimit-method={none(default),prepend,separate} Delimiting is done with blank lines",
|
|
|
|
"delimit-method"
|
|
|
|
);
|
2015-07-31 17:37:40 +00:00
|
|
|
opts.optopt("f", "skip-fields", "avoid comparing the first N fields", "N");
|
2015-05-21 18:42:42 +00:00
|
|
|
opts.optopt("s", "skip-chars", "avoid comparing the first N characters", "N");
|
|
|
|
opts.optopt("w", "check-chars", "compare no more than N characters in lines", "N");
|
|
|
|
opts.optflag("i", "ignore-case", "ignore differences in case when comparing");
|
|
|
|
opts.optflag("u", "unique", "only print unique lines");
|
2016-08-07 01:25:52 +00:00
|
|
|
opts.optflag("z", "zero-terminated", "end lines with 0 byte, not newline");
|
2015-05-21 18:42:42 +00:00
|
|
|
opts.optflag("h", "help", "display this help and exit");
|
|
|
|
opts.optflag("V", "version", "output version information and exit");
|
|
|
|
|
|
|
|
let matches = match opts.parse(&args[1..]) {
|
2014-07-06 00:27:22 +00:00
|
|
|
Ok(m) => m,
|
|
|
|
Err(f) => crash!(1, "{}", f)
|
|
|
|
};
|
|
|
|
|
|
|
|
if matches.opt_present("help") {
|
|
|
|
println!("{} {}", NAME, VERSION);
|
|
|
|
println!("");
|
|
|
|
println!("Usage:");
|
2015-05-21 18:42:42 +00:00
|
|
|
println!(" {0} [OPTION]... [FILE]...", NAME);
|
2014-07-06 00:27:22 +00:00
|
|
|
println!("");
|
2015-05-21 18:42:42 +00:00
|
|
|
print!("{}", opts.usage("Filter adjacent matching lines from INPUT (or standard input),\n\
|
|
|
|
writing to OUTPUT (or standard output)."));
|
2014-07-06 00:27:22 +00:00
|
|
|
println!("");
|
|
|
|
println!("Note: '{0}' does not detect repeated lines unless they are adjacent.\n\
|
2015-05-21 18:42:42 +00:00
|
|
|
You may want to sort the input first, or use 'sort -u' without '{0}'.\n", NAME);
|
2014-07-06 00:27:22 +00:00
|
|
|
} else if matches.opt_present("version") {
|
|
|
|
println!("{} {}", NAME, VERSION);
|
|
|
|
} else {
|
|
|
|
let (in_file_name, out_file_name) = match matches.free.len() {
|
2016-01-05 19:42:52 +00:00
|
|
|
0 => ("-".to_owned(), "-".to_owned()),
|
|
|
|
1 => (matches.free[0].clone(), "-".to_owned()),
|
2014-07-20 01:13:55 +00:00
|
|
|
2 => (matches.free[0].clone(), matches.free[1].clone()),
|
2014-07-06 00:27:22 +00:00
|
|
|
_ => {
|
2014-07-20 01:13:55 +00:00
|
|
|
crash!(1, "Extra operand: {}", matches.free[2]);
|
2014-07-06 00:27:22 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
let uniq = Uniq {
|
|
|
|
repeats_only: matches.opt_present("repeated") || matches.opt_present("all-repeated"),
|
|
|
|
uniques_only: matches.opt_present("unique"),
|
|
|
|
all_repeated: matches.opt_present("all-repeated"),
|
|
|
|
delimiters: match matches.opt_default("all-repeated", "none") {
|
2015-04-27 19:25:01 +00:00
|
|
|
Some(ref opt_arg) if opt_arg != "none" => {
|
2016-01-05 19:42:52 +00:00
|
|
|
let rep_args = ["prepend".to_owned(), "separate".to_owned()];
|
2014-07-06 00:27:22 +00:00
|
|
|
if !rep_args.contains(opt_arg) {
|
|
|
|
crash!(1, "Incorrect argument for all-repeated: {}", opt_arg.clone());
|
|
|
|
}
|
|
|
|
opt_arg.clone()
|
|
|
|
},
|
2016-01-05 19:42:52 +00:00
|
|
|
_ => "".to_owned()
|
2014-07-06 00:27:22 +00:00
|
|
|
},
|
|
|
|
show_counts: matches.opt_present("count"),
|
2015-07-31 17:37:40 +00:00
|
|
|
skip_fields: opt_parsed("skip-fields", &matches),
|
2014-07-06 00:27:22 +00:00
|
|
|
slice_start: opt_parsed("skip-chars", &matches),
|
|
|
|
slice_stop: opt_parsed("check-chars", &matches),
|
|
|
|
ignore_case: matches.opt_present("ignore-case"),
|
2016-08-07 01:25:52 +00:00
|
|
|
zero_terminated: matches.opt_present("zero-terminated"),
|
2014-07-06 00:27:22 +00:00
|
|
|
};
|
|
|
|
uniq.print_uniq(&mut open_input_file(in_file_name),
|
|
|
|
&mut open_output_file(out_file_name));
|
|
|
|
}
|
|
|
|
0
|
|
|
|
}
|
|
|
|
|
2015-04-27 19:25:01 +00:00
|
|
|
fn open_input_file(in_file_name: String) -> BufReader<Box<Read+'static>> {
|
|
|
|
let in_file = if in_file_name == "-" {
|
|
|
|
Box::new(stdin()) as Box<Read>
|
2014-07-06 00:27:22 +00:00
|
|
|
} else {
|
2015-04-27 19:25:01 +00:00
|
|
|
let path = Path::new(&in_file_name[..]);
|
|
|
|
let in_file = File::open(&path);
|
2014-12-22 00:39:02 +00:00
|
|
|
let r = crash_if_err!(1, in_file);
|
2015-04-27 19:25:01 +00:00
|
|
|
Box::new(r) as Box<Read>
|
2014-07-06 00:27:22 +00:00
|
|
|
};
|
2015-04-27 19:25:01 +00:00
|
|
|
BufReader::new(in_file)
|
2014-07-06 00:27:22 +00:00
|
|
|
}
|
|
|
|
|
2015-04-27 19:25:01 +00:00
|
|
|
fn open_output_file(out_file_name: String) -> BufWriter<Box<Write+'static>> {
|
|
|
|
let out_file = if out_file_name == "-" {
|
|
|
|
Box::new(stdout()) as Box<Write>
|
2014-07-06 00:27:22 +00:00
|
|
|
} else {
|
2015-04-27 19:25:01 +00:00
|
|
|
let path = Path::new(&out_file_name[..]);
|
|
|
|
let in_file = File::create(&path);
|
2014-12-22 00:39:02 +00:00
|
|
|
let w = crash_if_err!(1, in_file);
|
2015-04-27 19:25:01 +00:00
|
|
|
Box::new(w) as Box<Write>
|
2014-07-06 00:27:22 +00:00
|
|
|
};
|
2015-04-27 19:25:01 +00:00
|
|
|
BufWriter::new(out_file)
|
2014-07-06 00:27:22 +00:00
|
|
|
}
|