Condensed many of the weird stuff in tr in a function...passes more GNU tests

Signed-off-by: Hanif Bin Ariffin <hanif.ariffin.4326@gmail.com>
This commit is contained in:
Hanif Bin Ariffin 2021-07-25 15:51:40 +08:00
parent b7a0ad15a7
commit 5a0870bb30
2 changed files with 156 additions and 133 deletions

View file

@ -26,44 +26,6 @@ mod unicode_table {
pub static BLANK: &'static [char] = &[SPACE, HT]; pub static BLANK: &'static [char] = &[SPACE, HT];
} }
struct Repeat(char);
impl Repeat {
fn new(element: char) -> Repeat {
Repeat(element)
}
}
impl Iterator for Repeat {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
Some(self.0)
}
fn last(self) -> Option<Self::Item> {
Some(self.0)
}
fn any<F>(&mut self, mut f: F) -> bool
where
Self: Sized,
F: FnMut(Self::Item) -> bool,
{
f(self.0)
}
}
fn truncate_iterator<T>(input: Option<usize>) -> impl Fn((usize, T)) -> Option<T> {
move |(idx, c)| match input {
Some(s) => match s.cmp(&idx) {
std::cmp::Ordering::Greater => Some(c),
_ => None,
},
None => Some(c),
}
}
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
pub enum Sequence { pub enum Sequence {
Char(char), Char(char),
@ -89,8 +51,8 @@ impl Sequence {
match self { match self {
Sequence::Char(c) => Box::new(std::iter::once(*c)), Sequence::Char(c) => Box::new(std::iter::once(*c)),
Sequence::CharRange(l, r) => Box::new((*l..=*r).flat_map(char::from_u32)), Sequence::CharRange(l, r) => Box::new((*l..=*r).flat_map(char::from_u32)),
Sequence::CharStar(c) => Box::new(Repeat::new(*c)), Sequence::CharStar(c) => Box::new(std::iter::repeat(*c)),
Sequence::CharRepeat(c, n) => Box::new(Repeat::new(*c).take(*n)), Sequence::CharRepeat(c, n) => Box::new(std::iter::repeat(*c).take(*n)),
Sequence::Alnum => Box::new(('0'..='9').chain('A'..='Z').chain('a'..='z')), Sequence::Alnum => Box::new(('0'..='9').chain('A'..='Z').chain('a'..='z')),
Sequence::Alpha => Box::new(('A'..='Z').chain('a'..='z')), Sequence::Alpha => Box::new(('A'..='Z').chain('a'..='z')),
Sequence::Blank => Box::new(unicode_table::BLANK.into_iter().cloned()), Sequence::Blank => Box::new(unicode_table::BLANK.into_iter().cloned()),
@ -140,22 +102,99 @@ impl Sequence {
pub fn last(&self) -> Option<char> { pub fn last(&self) -> Option<char> {
match self { match self {
Sequence::CharStar(c) => Some(*c), Sequence::CharStar(c) => Some(*c),
// TODO: Can be optimized further...
rest => rest.flatten().last(), rest => rest.flatten().last(),
} }
} }
pub fn len(&self) -> Option<usize> { // Hide all the nasty sh*t in here
match self { pub fn solve_set_characters(
Sequence::CharStar(_) => None, set1: &Vec<Sequence>,
// TODO: Is there a fix for this? set2: &Vec<Sequence>,
rest => Some(rest.flatten().count()), ) -> Result<(Vec<char>, Vec<char>), String> {
let is_char_star = |s: &&Sequence| -> bool {
match s {
Sequence::CharStar(_) => true,
_ => false,
}
};
let set1_star_count = set1.iter().filter(is_char_star).count();
if set1_star_count == 0 {
let set2_star_count = set2.iter().filter(is_char_star).count();
if set2_star_count < 2 {
let char_star = set2.iter().find_map(|s| match s {
Sequence::CharStar(c) => Some(c),
_ => None,
});
let mut partition = set2.as_slice().split(|s| match s {
Sequence::CharStar(_) => true,
_ => false,
});
let set1_len = set1.iter().flat_map(Sequence::flatten).count();
let set2_len = set2
.iter()
.filter_map(|s| match s {
Sequence::CharStar(_) => None,
r => Some(r),
})
.flat_map(Sequence::flatten)
.count();
let star_compensate_len = set1_len.saturating_sub(set2_len);
let set2_solved = match (partition.next(), partition.next()) {
(None, None) => match char_star {
Some(c) => std::iter::repeat(*c).take(star_compensate_len).collect(),
None => std::iter::empty().collect(),
},
(None, Some(set2_b)) => {
if let Some(c) = char_star {
std::iter::repeat(*c)
.take(star_compensate_len)
.chain(set2_b.iter().flat_map(Sequence::flatten))
.collect()
} else {
set2_b.iter().flat_map(Sequence::flatten).collect()
}
}
(Some(set2_a), None) => match char_star {
Some(c) => set2_a
.iter()
.flat_map(Sequence::flatten)
.chain(std::iter::repeat(*c).take(star_compensate_len))
.collect(),
None => set2_a.iter().flat_map(Sequence::flatten).collect(),
},
(Some(set2_a), Some(set2_b)) => match char_star {
Some(c) => set2_a
.iter()
.flat_map(Sequence::flatten)
.chain(std::iter::repeat(*c).take(star_compensate_len))
.chain(set2_b.iter().flat_map(Sequence::flatten))
.collect(),
None => set2_a
.iter()
.chain(set2_b.iter())
.flat_map(Sequence::flatten)
.collect(),
},
};
let set1_solved = set1.iter().flat_map(Sequence::flatten).collect();
return Ok((set1_solved, set2_solved));
} else {
Err(format!(
"{}: only one [c*] repeat construct may appear in string2",
executable!()
))
}
} else {
Err(format!(
"{}: the [c*] repeat construct may not appear in string1",
executable!()
))
} }
} }
} }
impl Sequence { impl Sequence {
pub fn parse_set_string(input: &str) -> Vec<Sequence> { pub fn from_str(input: &str) -> Vec<Sequence> {
many0(alt(( many0(alt((
alt(( alt((
Sequence::parse_char_range_octal_leftright, Sequence::parse_char_range_octal_leftright,
@ -385,28 +424,20 @@ impl SymbolTranslator for DeleteOperation {
pub struct TranslateOperationComplement { pub struct TranslateOperationComplement {
iter: u32, iter: u32,
set2_iter: usize,
set1: Vec<char>, set1: Vec<char>,
set2: Box<dyn Iterator<Item = char>>, set2: Vec<char>,
fallback: char, fallback: char,
translation_map: HashMap<char, char>, translation_map: HashMap<char, char>,
} }
impl TranslateOperationComplement { impl TranslateOperationComplement {
fn new( fn new(set1: Vec<char>, set2: Vec<char>, fallback: char) -> TranslateOperationComplement {
set1: Vec<Sequence>,
set2: Vec<Sequence>,
set1_truncate_length: Option<usize>,
fallback: char,
) -> TranslateOperationComplement {
TranslateOperationComplement { TranslateOperationComplement {
iter: 0, iter: 0,
set1: set1 set2_iter: 0,
.iter() set1,
.flat_map(Sequence::flatten) set2,
.enumerate()
.filter_map(truncate_iterator(set1_truncate_length))
.collect(),
set2: Box::new(set2.into_iter().flat_map(|c| Sequence::flatten(&c))),
fallback, fallback,
translation_map: HashMap::new(), translation_map: HashMap::new(),
} }
@ -419,23 +450,11 @@ pub struct TranslateOperationStandard {
} }
impl TranslateOperationStandard { impl TranslateOperationStandard {
fn new( fn new(set1: Vec<char>, set2: Vec<char>, fallback: char) -> TranslateOperationStandard {
set1: Vec<Sequence>,
set2: Vec<Sequence>,
set1_truncate_length: Option<usize>,
fallback: char,
) -> TranslateOperationStandard {
TranslateOperationStandard { TranslateOperationStandard {
translation_map: set1 translation_map: set1
.iter() .into_iter()
.flat_map(Sequence::flatten) .zip(set2.into_iter().chain(std::iter::repeat(fallback)))
.zip(
set2.iter()
.flat_map(Sequence::flatten)
.chain(Repeat(fallback)),
)
.enumerate()
.filter_map(truncate_iterator(set1_truncate_length))
.collect::<HashMap<_, _>>(), .collect::<HashMap<_, _>>(),
} }
} }
@ -461,40 +480,27 @@ impl TranslateOperation {
pub fn new( pub fn new(
set1: Vec<Sequence>, set1: Vec<Sequence>,
set2: Vec<Sequence>, set2: Vec<Sequence>,
truncate_set1: bool, truncate_set1_flag: bool,
complement: bool, complement: bool,
) -> TranslateOperation { ) -> Result<TranslateOperation, String> {
let fallback = set2 let (mut set1_solved, set2_solved) = Sequence::solve_set_characters(&set1, &set2)?;
.iter() if truncate_set1_flag {
.rev() set1_solved.truncate(set2_solved.len());
.next() }
.map(Sequence::last) let fallback = set2.iter().map(Sequence::last).last().flatten().expect(
.flatten() format!(
.unwrap(); "{}: when not truncating set1, string2 must be non-empty",
let set1_truncate_length = if truncate_set1 { executable!()
set2.iter() )
.map(Sequence::len) .as_str(),
.reduce(|a, b| match (a, b) { );
(Some(l), Some(r)) => Some(l + r),
_ => None,
})
.flatten()
} else {
None
};
if complement { if complement {
TranslateOperation::Complement(TranslateOperationComplement::new( Ok(TranslateOperation::Complement(
set1, TranslateOperationComplement::new(set1_solved, set2_solved, fallback),
set2,
set1_truncate_length,
fallback,
)) ))
} else { } else {
TranslateOperation::Standard(TranslateOperationStandard::new( Ok(TranslateOperation::Standard(
set1, TranslateOperationStandard::new(set1_solved, set2_solved, fallback),
set2,
set1_truncate_length,
fallback,
)) ))
} }
} }
@ -511,6 +517,7 @@ impl SymbolTranslator for TranslateOperation {
), ),
TranslateOperation::Complement(TranslateOperationComplement { TranslateOperation::Complement(TranslateOperationComplement {
iter, iter,
set2_iter,
set1, set1,
set2, set2,
fallback, fallback,
@ -525,11 +532,12 @@ impl SymbolTranslator for TranslateOperation {
Some(*c) Some(*c)
} else { } else {
while translation_map.get(&current).is_none() { while translation_map.get(&current).is_none() {
if let Some(p) = set2.next() { if let Some(value) = set2.get(*set2_iter) {
let (next_index, next_value) = let (next_iter, next_key) =
TranslateOperation::next_complement_char(*iter, &*set1); TranslateOperation::next_complement_char(*iter, &*set1);
*iter = next_index; *iter = next_iter;
translation_map.insert(next_value, p); *set2_iter = set2_iter.saturating_add(1);
translation_map.insert(next_key, *value);
} else { } else {
translation_map.insert(current, *fallback); translation_map.insert(current, *fallback);
} }
@ -622,9 +630,7 @@ fn test_parse_octal() {
for a in '0'..='7' { for a in '0'..='7' {
for b in '0'..='7' { for b in '0'..='7' {
for c in '0'..='7' { for c in '0'..='7' {
assert!( assert!(Sequence::from_str(format!("\\{}{}{}", a, b, c).as_str()).len() == 1);
Sequence::parse_set_string(format!("\\{}{}{}", a, b, c).as_str()).len() == 1
);
} }
} }
} }

View file

@ -69,7 +69,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
if sets.is_empty() { if sets.is_empty() {
show_error!( show_error!(
"missing operand\nTry `{} --help` for more information.", "missing operand\nTry '{} --help' for more information.",
executable!() executable!()
); );
return 1; return 1;
@ -77,7 +77,16 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
if !(delete_flag || squeeze_flag) && sets.len() < 2 { if !(delete_flag || squeeze_flag) && sets.len() < 2 {
show_error!( show_error!(
"missing operand after '{}'\nTry `{} --help` for more information.", "missing operand after '{}'\nTry '{} --help' for more information.",
sets[0],
executable!()
);
return 1;
}
if sets.len() > 2 {
show_error!(
"extra operand '{}'\nTry '{} --help' for more information.",
sets[0], sets[0],
executable!() executable!()
); );
@ -95,50 +104,58 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
let mut delete_buffer = vec![]; let mut delete_buffer = vec![];
{ {
let mut delete_writer = BufWriter::new(&mut delete_buffer); let mut delete_writer = BufWriter::new(&mut delete_buffer);
let delete_op = let delete_op = DeleteOperation::new(Sequence::from_str(&sets[0]), complement_flag);
DeleteOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag);
translate_input(&mut locked_stdin, &mut delete_writer, delete_op); translate_input(&mut locked_stdin, &mut delete_writer, delete_op);
} }
{ {
let mut squeeze_reader = BufReader::new(delete_buffer.as_bytes()); let mut squeeze_reader = BufReader::new(delete_buffer.as_bytes());
let squeeze_op = let op = SqueezeOperation::new(Sequence::from_str(&sets[1]), complement_flag);
SqueezeOperation::new(Sequence::parse_set_string(&sets[1]), complement_flag); translate_input(&mut squeeze_reader, &mut buffered_stdout, op);
translate_input(&mut squeeze_reader, &mut buffered_stdout, squeeze_op);
} }
} else { } else {
let op = DeleteOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag); let op = DeleteOperation::new(Sequence::from_str(&sets[0]), complement_flag);
translate_input(&mut locked_stdin, &mut buffered_stdout, op); translate_input(&mut locked_stdin, &mut buffered_stdout, op);
} }
} else if squeeze_flag { } else if squeeze_flag {
if sets.len() < 2 { if sets.len() < 2 {
let op = SqueezeOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag); let op = SqueezeOperation::new(Sequence::from_str(&sets[0]), complement_flag);
translate_input(&mut locked_stdin, &mut buffered_stdout, op); translate_input(&mut locked_stdin, &mut buffered_stdout, op);
} else { } else {
let mut translate_buffer = vec![]; let mut translate_buffer = vec![];
{ {
let mut writer = BufWriter::new(&mut translate_buffer); let mut writer = BufWriter::new(&mut translate_buffer);
let translate_op = TranslateOperation::new( match TranslateOperation::new(
Sequence::parse_set_string(&sets[0]), Sequence::from_str(&sets[0]),
Sequence::parse_set_string(&sets[1]), Sequence::from_str(&sets[1]),
truncate_set1_flag, truncate_set1_flag,
complement_flag, complement_flag,
); ) {
translate_input(&mut locked_stdin, &mut writer, translate_op); Ok(op) => translate_input(&mut locked_stdin, &mut writer, op),
Err(s) => {
show_error!("{}", s);
return 1;
}
};
} }
{ {
let mut reader = BufReader::new(translate_buffer.as_bytes()); let mut reader = BufReader::new(translate_buffer.as_bytes());
let squeeze_op = SqueezeOperation::new(Sequence::parse_set_string(&sets[1]), false); let squeeze_op = SqueezeOperation::new(Sequence::from_str(&sets[1]), false);
translate_input(&mut reader, &mut buffered_stdout, squeeze_op); translate_input(&mut reader, &mut buffered_stdout, squeeze_op);
} }
} }
} else { } else {
let op = TranslateOperation::new( match TranslateOperation::new(
Sequence::parse_set_string(&sets[0]), Sequence::from_str(&sets[0]),
Sequence::parse_set_string(&sets[1]), Sequence::from_str(&sets[1]),
truncate_set1_flag, truncate_set1_flag,
complement_flag, complement_flag,
); ) {
translate_input(&mut locked_stdin, &mut buffered_stdout, op); Ok(op) => translate_input(&mut locked_stdin, &mut buffered_stdout, op),
Err(s) => {
show_error!("{}", s);
return 1;
}
};
} }
0 0