Merge pull request #372 from Armavica/unexpand

Implement unexpand
This commit is contained in:
Arcterus 2014-07-28 15:20:04 -07:00
commit b3aeebb344
5 changed files with 337 additions and 1 deletions

View file

@ -223,6 +223,10 @@ path = "tty/tty.rs"
name = "uname"
path = "uname/uname.rs"
[[bin]]
name = "unexpand"
path = "unexpand/unexpand.rs"
[[bin]]
name = "uniq"
path = "uniq/uniq.rs"

View file

@ -76,6 +76,7 @@ PROGS := \
true \
truncate \
tsort \
unexpand \
unlink \
uniq \
wc \
@ -133,6 +134,7 @@ TEST_PROGS := \
seq \
tr \
truncate \
unexpand
TEST ?= $(TEST_PROGS)

View file

@ -164,7 +164,6 @@ To do
- stty
- tail (not all features implemented)
- test (not all features implemented)
- unexpand
- uniq (in progress)
- who

228
src/unexpand/unexpand.rs Normal file
View file

@ -0,0 +1,228 @@
#![crate_name = "unexpand"]
/*
* This file is part of the uutils coreutils package.
*
* (c) Virgile Andreani <virgile.andreani@anbuco.fr>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
#![feature(macro_rules)]
extern crate getopts;
extern crate libc;
use std::io;
use std::from_str;
#[path = "../common/util.rs"]
mod util;
static NAME: &'static str = "unexpand";
static VERSION: &'static str = "0.0.1";
static DEFAULT_TABSTOP: uint = 8;
fn tabstops_parse(s: String) -> Vec<uint> {
let words = s.as_slice().split(',').collect::<Vec<&str>>();
let nums = words.move_iter()
.map(|sn| from_str::from_str::<uint>(sn)
.unwrap_or_else(
|| crash!(1, "{}\n", "tab size contains invalid character(s)"))
)
.collect::<Vec<uint>>();
if nums.iter().any(|&n| n == 0) {
crash!(1, "{}\n", "tab size cannot be 0");
}
match nums.iter().fold((true, 0), |(acc, last), &n| (acc && last <= n, n)) {
(false, _) => crash!(1, "{}\n", "tab sizes must be ascending"),
_ => {}
}
nums
}
struct Options {
files: Vec<String>,
tabstops: Vec<uint>,
aflag: bool
}
impl Options {
fn new(matches: getopts::Matches) -> Options {
let tabstops = match matches.opt_str("t") {
None => vec!(DEFAULT_TABSTOP),
Some(s) => tabstops_parse(s)
};
let aflag = (matches.opt_present("all") || matches.opt_present("tabs"))
&& !matches.opt_present("first-only");
let files =
if matches.free.is_empty() {
vec!("-".to_string())
} else {
matches.free
};
Options { files: files, tabstops: tabstops, aflag: aflag }
}
}
pub fn uumain(args: Vec<String>) -> int {
let opts = [
getopts::optflag("a", "all", "convert all blanks, instead of just initial blanks"),
getopts::optflag("", "first-only", "convert only leading sequences of blanks (overrides -a)"),
getopts::optopt("t", "tabs", "have tabs N characters apart instead of 8 (enables -a)", "N"),
getopts::optopt("t", "tabs", "use comma separated LIST of tab positions (enables -a)", "LIST"),
getopts::optflag("h", "help", "display this help and exit"),
getopts::optflag("V", "version", "output version information and exit"),
];
let matches = match getopts::getopts(args.tail(), opts) {
Ok(m) => m,
Err(f) => crash!(1, "{}", f)
};
if matches.opt_present("help") {
println!("Usage: {:s} [OPTION]... [FILE]...", NAME);
io::print(getopts::usage(
"Convert blanks in each FILE to tabs, writing to standard output.\n\
With no FILE, or when FILE is -, read standard input.", opts).as_slice());
return 0;
}
if matches.opt_present("V") {
println!("{} v{}", NAME, VERSION);
return 0;
}
unexpand(Options::new(matches));
return 0;
}
fn open(path: String) -> io::BufferedReader<Box<Reader>> {
let mut file_buf;
if path.as_slice() == "-" {
io::BufferedReader::new(box io::stdio::stdin_raw() as Box<Reader>)
} else {
file_buf = match io::File::open(&Path::new(path.as_slice())) {
Ok(a) => a,
_ => crash!(1, "{}: {}\n", path, "No such file or directory")
};
io::BufferedReader::new(box file_buf as Box<Reader>)
}
}
fn is_tabstop(tabstops: &[uint], col: uint) -> bool {
match tabstops {
[tabstop] => col % tabstop == 0,
tabstops => tabstops.bsearch(|&e| e.cmp(&col)).is_some()
}
}
fn to_next_stop(tabstops: &[uint], col: uint) -> Option<uint> {
match tabstops {
[tabstop] => Some(tabstop - col % tabstop),
tabstops => tabstops.iter().skip_while(|&t| *t <= col).next()
.map(|&tabstop| tabstop - col % tabstop)
}
}
fn unexpandspan(output: &mut io::LineBufferedWriter<io::stdio::StdWriter>,
tabstops: &[uint], nspaces: uint, col: uint, init: bool) {
let mut cur = col - nspaces;
if nspaces > 1 || init {
loop {
match to_next_stop(tabstops, cur) {
Some(to_next) if cur + to_next <= col => {
safe_write!(output, "{:c}", '\t');
cur += to_next;
}
_ => break
}
}
}
safe_write!(output, "{:1$s}", "", col - cur);
}
fn unexpand(options: Options) {
let mut output = io::stdout();
let ts = options.tabstops.as_slice();
for file in options.files.move_iter() {
let mut col = 0;
let mut nspaces = 0;
let mut init = true;
for c in open(file).chars() {
match c {
Ok(' ') => {
if init || options.aflag {
nspaces += 1;
} else {
nspaces = 0;
safe_write!(output, "{:c}", ' ');
}
col += 1;
}
Ok('\t') if nspaces > 0 => {
if is_tabstop(ts, col) {
nspaces = 0;
col += 1;
safe_write!(output, "{:c}", '\t');
}
match to_next_stop(ts, col) {
Some(to_next) => {
nspaces += to_next;
col += to_next;
}
None => {
col += 1;
unexpandspan(&mut output, ts, nspaces, col, init);
nspaces = 0;
safe_write!(output, "{:c}", '\t');
}
}
}
Ok('\x08') => { // '\b'
if init || options.aflag {
unexpandspan(&mut output, ts, nspaces, col, init)
}
nspaces = 0;
if col > 0 { col -= 1; }
init = false;
safe_write!(output, "{:c}", '\x08');
}
Ok('\n') => {
if init || options.aflag {
unexpandspan(&mut output, ts, nspaces, col, init)
}
nspaces = 0;
col = 0;
init = true;
safe_write!(output, "{:c}", '\n');
}
Ok(c) => {
if init || options.aflag {
unexpandspan(&mut output, ts, nspaces, col, init)
}
nspaces = 0;
col += 1;
init = false;
safe_write!(output, "{:c}", c);
}
Err(_) => break
}
}
if init || options.aflag {
unexpandspan(&mut output, ts, nspaces, col, init)
}
}
}

103
test/unexpand.rs Normal file
View file

@ -0,0 +1,103 @@
use std::io::process::Command;
static PROGNAME: &'static str = "./unexpand";
fn run(input: &str, args: &[&'static str]) -> Vec<u8> {
let mut process = Command::new(PROGNAME).args(args).spawn().unwrap();
process.stdin.take_unwrap().write_str(input).unwrap();
let po = match process.wait_with_output() {
Ok(p) => p,
Err(err) => fail!("{}", err),
};
po.output
}
#[test]
fn unexpand_init_0() {
let out = run(" 1\n 2\n 3\n 4\n", ["-t4"]);
assert_eq!(out.as_slice(), b" 1\n 2\n 3\n\t4\n");
}
#[test]
fn unexpand_init_1() {
let out = run(" 5\n 6\n 7\n 8\n", ["-t4"]);
assert_eq!(out.as_slice(), b"\t 5\n\t 6\n\t 7\n\t\t8\n");
}
#[test]
fn unexpand_init_list_0() {
let out = run(" 1\n 2\n 3\n 4\n", ["-t2,4"]);
assert_eq!(out.as_slice(), b" 1\n\t2\n\t 3\n\t\t4\n");
}
#[test]
fn unexpand_init_list_1() {
// Once the list is exhausted, spaces are not converted anymore
let out = run(" 5\n 6\n 7\n 8\n", ["-t2,4"]);
assert_eq!(out.as_slice(), b"\t\t 5\n\t\t 6\n\t\t 7\n\t\t 8\n");
}
#[test]
fn unexpand_aflag_0() {
let out = run("e E\nf F\ng G\nh H\n", []);
assert_eq!(out.as_slice(), b"e E\nf F\ng G\nh H\n");
}
#[test]
fn unexpand_aflag_1() {
let out = run("e E\nf F\ng G\nh H\n", ["-a"]);
assert_eq!(out.as_slice(), b"e E\nf F\ng\tG\nh\t H\n");
}
#[test]
fn unexpand_aflag_2() {
let out = run("e E\nf F\ng G\nh H\n", ["-t8"]);
assert_eq!(out.as_slice(), b"e E\nf F\ng\tG\nh\t H\n");
}
#[test]
fn unexpand_first_only_0() {
let out = run(" A B", ["-t3"]);
assert_eq!(out.as_slice(), b"\t\t A\t B");
}
#[test]
fn unexpand_first_only_1() {
let out = run(" A B", ["-t3", "--first-only"]);
assert_eq!(out.as_slice(), b"\t\t A B");
}
#[test]
fn unexpand_trailing_space_0() { // evil
// Individual spaces before fields starting with non blanks should not be
// converted, unless they are at the beginning of the line.
let out = run("123 \t1\n123 1\n123 \n123 ", ["-t4"]);
assert_eq!(out.as_slice(), b"123\t\t1\n123 1\n123 \n123 ");
}
#[test]
fn unexpand_trailing_space_1() { // super evil
let out = run(" abc d e f g ", ["-t1"]);
assert_eq!(out.as_slice(), b"\tabc d e\t\tf\t\tg ");
}
#[test]
fn unexpand_spaces_follow_tabs_0() {
// The two first spaces can be included into the first tab.
let out = run(" \t\t A", []);
assert_eq!(out.as_slice(), b"\t\t A");
}
#[test]
fn unexpand_spaces_follow_tabs_1() { // evil
// Explanation of what is going on here:
// 'a' -> 'a' // first tabstop (1)
// ' \t' -> '\t' // second tabstop (4)
// ' ' -> '\t' // third tabstop (5)
// ' B \t' -> ' B \t' // after the list is exhausted, nothing must change
let out = run("a \t B \t", ["-t1,4,5"]);
assert_eq!(out.as_slice(), b"a\t\t B \t");
}