coreutils/cut/ranges.rs
polyphemus 798af52077 Implement fields cutting
Adds an implementation for cut_fields() and creates a separate funtion
for the --output-delimiter, for performance reasons.

This implementation relies on ::read_until() to find the newline for us
but read_until() allocates a vector every time to return it's result.
This is not ideal and should be improved upon by passing a buffer to
read().

This follows/implements the POSIX specification and all the GNU
conventions. It is a drop-in replacement for GNU cut.

One improvement to GNU is that the --delimter option takes a character
as UTF8 as apposed to single byte only for GNU.

Performance is about two times slower than that of GNU cut.

Remove ranges' sentinel value, All cut functions iterate over the ranges
and therefore it only adds an extra iteration instead of improving
performance.
2014-06-27 17:39:49 +02:00

112 lines
3.3 KiB
Rust

/*
* This file is part of the uutils coreutils package.
*
* (c) Rolf Morel <rolfmorel@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
use std;
#[deriving(PartialEq,Eq,PartialOrd,Ord,Show)]
pub struct Range {
pub low: uint,
pub high: uint,
}
impl std::from_str::FromStr for Range {
fn from_str(s: &str) -> Option<Range> {
use std::uint::MAX;
let mut parts = s.splitn('-', 1);
match (parts.next(), parts.next()) {
(Some(nm), None) => {
from_str::<uint>(nm).filtered(|nm| *nm > 0)
.map(|nm| Range { low: nm, high: nm })
}
(Some(n), Some(m)) if m.len() == 0 => {
from_str::<uint>(n).filtered(|low| *low > 0)
.map(|low| Range { low: low, high: MAX })
}
(Some(n), Some(m)) if n.len() == 0 => {
from_str::<uint>(m).filtered(|high| *high >= 1)
.map(|high| Range { low: 1, high: high })
}
(Some(n), Some(m)) => {
match (from_str::<uint>(n), from_str::<uint>(m)) {
(Some(low), Some(high)) if low > 0 && low <= high => {
Some(Range { low: low, high: high })
}
_ => None
}
}
_ => unreachable!()
}
}
}
impl Range {
pub fn from_list(list: &str) -> Result<Vec<Range>, String> {
use std::cmp::max;
let mut ranges = vec!();
for item in list.split(',') {
match from_str::<Range>(item) {
Some(range_item) => ranges.push(range_item),
None => return Err(format!("range '{}' was invalid", item))
}
}
ranges.sort();
// merge overlapping ranges
for i in range(0, ranges.len()) {
let j = i + 1;
while j < ranges.len() && ranges.get(j).low <= ranges.get(i).high {
let j_high = ranges.remove(j).unwrap().high;
ranges.get_mut(i).high = max(ranges.get(i).high, j_high);
}
}
Ok(ranges)
}
}
pub fn complement(ranges: &Vec<Range>) -> Vec<Range> {
use std::uint;
let mut complements = Vec::with_capacity(ranges.len() + 1);
if ranges.len() > 0 && ranges.get(0).low > 1 {
complements.push(Range { low: 1, high: ranges.get(0).low - 1 });
}
let mut ranges_iter = ranges.iter().peekable();
loop {
match (ranges_iter.next(), ranges_iter.peek()) {
(Some(left), Some(right)) => {
if left.high + 1 != right.low {
complements.push(Range {
low: left.high + 1,
high: right.low - 1
});
}
}
(Some(last), None) => {
if last.high < uint::MAX {
complements.push(Range {
low: last.high + 1,
high: uint::MAX
});
}
}
_ => break
}
}
complements
}