2014-07-10 02:11:19 +00:00
|
|
|
#![crate_name = "shuf"]
|
2015-02-22 11:21:06 +00:00
|
|
|
#![feature(collections, core, old_io, old_path, rand, rustc_private)]
|
2014-07-10 01:19:59 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This file is part of the uutils coreutils package.
|
|
|
|
*
|
|
|
|
* (c) Arcterus <arcterus@mail.com>
|
|
|
|
*
|
|
|
|
* For the full copyright and license information, please view the LICENSE
|
|
|
|
* file that was distributed with this source code.
|
|
|
|
*/
|
|
|
|
|
|
|
|
extern crate getopts;
|
|
|
|
extern crate libc;
|
|
|
|
|
|
|
|
use std::cmp;
|
2015-01-29 07:29:31 +00:00
|
|
|
use std::old_io as io;
|
|
|
|
use std::old_io::IoResult;
|
2014-07-10 01:19:59 +00:00
|
|
|
use std::iter::{range_inclusive, RangeInclusive};
|
2015-01-08 13:04:02 +00:00
|
|
|
use std::rand::{self, Rng};
|
2015-01-10 18:07:08 +00:00
|
|
|
use std::usize;
|
2014-07-10 01:19:59 +00:00
|
|
|
|
|
|
|
#[path = "../common/util.rs"]
|
2015-01-08 12:54:22 +00:00
|
|
|
#[macro_use]
|
2014-07-10 01:19:59 +00:00
|
|
|
mod util;
|
|
|
|
|
|
|
|
enum Mode {
|
|
|
|
Default,
|
|
|
|
Echo,
|
2015-01-10 18:07:08 +00:00
|
|
|
InputRange(RangeInclusive<usize>)
|
2014-07-10 01:19:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static NAME: &'static str = "shuf";
|
|
|
|
static VERSION: &'static str = "0.0.1";
|
|
|
|
|
2015-02-06 13:48:07 +00:00
|
|
|
pub fn uumain(args: Vec<String>) -> i32 {
|
2014-07-20 01:13:55 +00:00
|
|
|
let program = args[0].clone();
|
2014-07-10 01:19:59 +00:00
|
|
|
|
|
|
|
let opts = [
|
|
|
|
getopts::optflag("e", "echo", "treat each ARG as an input line"),
|
|
|
|
getopts::optopt("i", "input-range", "treat each number LO through HI as an input line", "LO-HI"),
|
|
|
|
getopts::optopt("n", "head-count", "output at most COUNT lines", "COUNT"),
|
|
|
|
getopts::optopt("o", "output", "write result to FILE instead of standard output", "FILE"),
|
|
|
|
getopts::optopt("", "random-source", "get random bytes from FILE", "FILE"),
|
|
|
|
getopts::optflag("r", "repeat", "output lines can be repeated"),
|
|
|
|
getopts::optflag("z", "zero-terminated", "end lines with 0 byte, not newline"),
|
|
|
|
getopts::optflag("h", "help", "display this help and exit"),
|
|
|
|
getopts::optflag("V", "version", "output version information and exit")
|
|
|
|
];
|
2014-11-19 20:55:25 +00:00
|
|
|
let mut matches = match getopts::getopts(args.tail(), &opts) {
|
2014-07-10 01:19:59 +00:00
|
|
|
Ok(m) => m,
|
|
|
|
Err(f) => {
|
|
|
|
crash!(1, "{}", f)
|
|
|
|
}
|
|
|
|
};
|
|
|
|
if matches.opt_present("help") {
|
|
|
|
println!("{name} v{version}
|
|
|
|
|
|
|
|
Usage:
|
|
|
|
{prog} [OPTION]... [FILE]
|
|
|
|
{prog} -e [OPTION]... [ARG]...
|
|
|
|
{prog} -i LO-HI [OPTION]...\n
|
|
|
|
{usage}
|
|
|
|
With no FILE, or when FILE is -, read standard input.",
|
|
|
|
name = NAME, version = VERSION, prog = program,
|
2014-11-19 20:55:25 +00:00
|
|
|
usage = getopts::usage("Write a random permutation of the input lines to standard output.", &opts));
|
2014-07-10 01:19:59 +00:00
|
|
|
} else if matches.opt_present("version") {
|
|
|
|
println!("{} v{}", NAME, VERSION);
|
|
|
|
} else {
|
|
|
|
let echo = matches.opt_present("echo");
|
|
|
|
let mode = match matches.opt_str("input-range") {
|
|
|
|
Some(range) => {
|
|
|
|
if echo {
|
|
|
|
show_error!("cannot specify more than one mode");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
match parse_range(range) {
|
2014-11-19 20:50:37 +00:00
|
|
|
Ok(m) => Mode::InputRange(m),
|
2014-07-10 01:19:59 +00:00
|
|
|
Err((msg, code)) => {
|
|
|
|
show_error!("{}", msg);
|
|
|
|
return code;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-07-10 02:30:38 +00:00
|
|
|
None => {
|
|
|
|
if echo {
|
2014-11-19 20:50:37 +00:00
|
|
|
Mode::Echo
|
2014-07-10 02:30:38 +00:00
|
|
|
} else {
|
|
|
|
if matches.free.len() == 0 {
|
|
|
|
matches.free.push("-".to_string());
|
|
|
|
}
|
2014-11-19 20:50:37 +00:00
|
|
|
Mode::Default
|
2014-07-10 02:30:38 +00:00
|
|
|
}
|
|
|
|
}
|
2014-07-10 01:19:59 +00:00
|
|
|
};
|
|
|
|
let repeat = matches.opt_present("repeat");
|
|
|
|
let zero = matches.opt_present("zero-terminated");
|
|
|
|
let count = match matches.opt_str("head-count") {
|
2015-01-10 18:07:08 +00:00
|
|
|
Some(cnt) => match cnt.parse::<usize>() {
|
2015-02-03 21:19:13 +00:00
|
|
|
Ok(val) => val,
|
|
|
|
Err(e) => {
|
|
|
|
show_error!("'{}' is not a valid count: {}", cnt, e);
|
2014-07-10 01:19:59 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
},
|
2015-01-10 18:07:08 +00:00
|
|
|
None => usize::MAX
|
2014-07-10 01:19:59 +00:00
|
|
|
};
|
|
|
|
let output = matches.opt_str("output");
|
|
|
|
let random = matches.opt_str("random-source");
|
|
|
|
match shuf(matches.free, mode, repeat, zero, count, output, random) {
|
|
|
|
Err(f) => {
|
|
|
|
show_error!("{}", f);
|
|
|
|
return 1;
|
|
|
|
},
|
|
|
|
_ => {}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
0
|
|
|
|
}
|
|
|
|
|
2015-01-10 18:07:08 +00:00
|
|
|
fn shuf(input: Vec<String>, mode: Mode, repeat: bool, zero: bool, count: usize, output: Option<String>, random: Option<String>) -> IoResult<()> {
|
2014-07-10 01:19:59 +00:00
|
|
|
match mode {
|
2014-11-19 20:50:37 +00:00
|
|
|
Mode::Echo => shuf_lines(input, repeat, zero, count, output, random),
|
|
|
|
Mode::InputRange(range) => shuf_lines(range.map(|num| num.to_string()).collect(), repeat, zero, count, output, random),
|
|
|
|
Mode::Default => {
|
2014-09-17 15:11:39 +00:00
|
|
|
let lines: Vec<String> = input.into_iter().flat_map(|filename| {
|
2014-07-10 02:30:38 +00:00
|
|
|
let slice = filename.as_slice();
|
|
|
|
let mut file_buf;
|
|
|
|
let mut stdin_buf;
|
|
|
|
let mut file = io::BufferedReader::new(
|
|
|
|
if slice == "-" {
|
|
|
|
stdin_buf = io::stdio::stdin_raw();
|
|
|
|
&mut stdin_buf as &mut Reader
|
|
|
|
} else {
|
|
|
|
file_buf = crash_if_err!(1, io::File::open(&Path::new(slice)));
|
|
|
|
&mut file_buf as &mut Reader
|
|
|
|
}
|
|
|
|
);
|
2014-07-10 01:19:59 +00:00
|
|
|
let mut lines = vec!();
|
|
|
|
for line in file.lines() {
|
|
|
|
let mut line = crash_if_err!(1, line);
|
2014-09-27 19:05:36 +00:00
|
|
|
line.pop();
|
2014-07-10 01:19:59 +00:00
|
|
|
lines.push(line);
|
|
|
|
}
|
2014-09-17 15:11:39 +00:00
|
|
|
lines.into_iter()
|
2014-07-10 01:19:59 +00:00
|
|
|
}).collect();
|
|
|
|
shuf_lines(lines, repeat, zero, count, output, random)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-03 18:59:23 +00:00
|
|
|
enum WrappedRng {
|
|
|
|
RngFile(rand::reader::ReaderRng<io::File>),
|
2015-01-08 20:59:07 +00:00
|
|
|
RngDefault(rand::ThreadRng),
|
2014-11-03 18:59:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl WrappedRng {
|
|
|
|
fn next_u32(&mut self) -> u32 {
|
|
|
|
match self {
|
2015-01-10 12:48:42 +00:00
|
|
|
&mut WrappedRng::RngFile(ref mut r) => r.next_u32(),
|
|
|
|
&mut WrappedRng::RngDefault(ref mut r) => r.next_u32(),
|
2014-11-03 18:59:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-10 18:07:08 +00:00
|
|
|
fn shuf_lines(mut lines: Vec<String>, repeat: bool, zero: bool, count: usize, outname: Option<String>, random: Option<String>) -> IoResult<()> {
|
2014-07-10 01:19:59 +00:00
|
|
|
let mut output = match outname {
|
2015-01-10 12:48:42 +00:00
|
|
|
Some(name) => Box::new(io::BufferedWriter::new(try!(io::File::create(&Path::new(name))))) as Box<Writer>,
|
|
|
|
None => Box::new(io::stdout()) as Box<Writer>
|
2014-07-10 01:19:59 +00:00
|
|
|
};
|
|
|
|
let mut rng = match random {
|
2014-11-19 20:50:37 +00:00
|
|
|
Some(name) => WrappedRng::RngFile(rand::reader::ReaderRng::new(try!(io::File::open(&Path::new(name))))),
|
2015-01-08 20:59:07 +00:00
|
|
|
None => WrappedRng::RngDefault(rand::thread_rng()),
|
2014-07-10 01:19:59 +00:00
|
|
|
};
|
|
|
|
let mut len = lines.len();
|
|
|
|
let max = if repeat { count } else { cmp::min(count, len) };
|
|
|
|
for _ in range(0, max) {
|
2015-01-10 18:07:08 +00:00
|
|
|
let idx = rng.next_u32() as usize % len;
|
2014-07-20 01:13:55 +00:00
|
|
|
try!(write!(output, "{}{}", lines[idx], if zero { '\0' } else { '\n' }));
|
2014-07-10 01:19:59 +00:00
|
|
|
if !repeat {
|
|
|
|
lines.remove(idx);
|
|
|
|
len -= 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2015-02-06 13:48:07 +00:00
|
|
|
fn parse_range(input_range: String) -> Result<RangeInclusive<usize>, (String, i32)> {
|
2014-07-10 01:19:59 +00:00
|
|
|
let split: Vec<&str> = input_range.as_slice().split('-').collect();
|
|
|
|
if split.len() != 2 {
|
|
|
|
Err(("invalid range format".to_string(), 1))
|
|
|
|
} else {
|
2015-01-10 18:07:08 +00:00
|
|
|
let begin = match split[0].parse::<usize>() {
|
2015-02-03 21:19:13 +00:00
|
|
|
Ok(m) => m,
|
|
|
|
Err(e)=> return Err((format!("{} is not a valid number: {}", split[0], e), 1))
|
2014-07-10 01:19:59 +00:00
|
|
|
};
|
2015-01-10 18:07:08 +00:00
|
|
|
let end = match split[1].parse::<usize>() {
|
2015-02-03 21:19:13 +00:00
|
|
|
Ok(m) => m,
|
|
|
|
Err(e)=> return Err((format!("{} is not a valid number: {}", split[1], e), 1))
|
2014-07-10 01:19:59 +00:00
|
|
|
};
|
|
|
|
Ok(range_inclusive(begin, end))
|
|
|
|
}
|
|
|
|
}
|