From 5a0870bb3005e738dab920ff29a73dc7b440414b Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sun, 25 Jul 2021 15:51:40 +0800 Subject: [PATCH] Condensed many of the weird stuff in tr in a function...passes more GNU tests Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/operation.rs | 232 +++++++++++++++++++------------------ src/uu/tr/src/tr.rs | 57 +++++---- 2 files changed, 156 insertions(+), 133 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 2ff43b2a5..72845e531 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -26,44 +26,6 @@ mod unicode_table { pub static BLANK: &'static [char] = &[SPACE, HT]; } -struct Repeat(char); - -impl Repeat { - fn new(element: char) -> Repeat { - Repeat(element) - } -} - -impl Iterator for Repeat { - type Item = char; - - fn next(&mut self) -> Option { - Some(self.0) - } - - fn last(self) -> Option { - Some(self.0) - } - - fn any(&mut self, mut f: F) -> bool - where - Self: Sized, - F: FnMut(Self::Item) -> bool, - { - f(self.0) - } -} - -fn truncate_iterator(input: Option) -> impl Fn((usize, T)) -> Option { - move |(idx, c)| match input { - Some(s) => match s.cmp(&idx) { - std::cmp::Ordering::Greater => Some(c), - _ => None, - }, - None => Some(c), - } -} - #[derive(Debug, Clone, Copy)] pub enum Sequence { Char(char), @@ -89,8 +51,8 @@ impl Sequence { match self { Sequence::Char(c) => Box::new(std::iter::once(*c)), Sequence::CharRange(l, r) => Box::new((*l..=*r).flat_map(char::from_u32)), - Sequence::CharStar(c) => Box::new(Repeat::new(*c)), - Sequence::CharRepeat(c, n) => Box::new(Repeat::new(*c).take(*n)), + Sequence::CharStar(c) => Box::new(std::iter::repeat(*c)), + Sequence::CharRepeat(c, n) => Box::new(std::iter::repeat(*c).take(*n)), Sequence::Alnum => Box::new(('0'..='9').chain('A'..='Z').chain('a'..='z')), Sequence::Alpha => Box::new(('A'..='Z').chain('a'..='z')), Sequence::Blank => Box::new(unicode_table::BLANK.into_iter().cloned()), @@ -140,22 +102,99 @@ impl Sequence { pub fn last(&self) -> Option { match self { Sequence::CharStar(c) => Some(*c), - // TODO: Can be optimized further... rest => rest.flatten().last(), } } - pub fn len(&self) -> Option { - match self { - Sequence::CharStar(_) => None, - // TODO: Is there a fix for this? - rest => Some(rest.flatten().count()), + // Hide all the nasty sh*t in here + pub fn solve_set_characters( + set1: &Vec, + set2: &Vec, + ) -> Result<(Vec, Vec), String> { + let is_char_star = |s: &&Sequence| -> bool { + match s { + Sequence::CharStar(_) => true, + _ => false, + } + }; + let set1_star_count = set1.iter().filter(is_char_star).count(); + if set1_star_count == 0 { + let set2_star_count = set2.iter().filter(is_char_star).count(); + if set2_star_count < 2 { + let char_star = set2.iter().find_map(|s| match s { + Sequence::CharStar(c) => Some(c), + _ => None, + }); + let mut partition = set2.as_slice().split(|s| match s { + Sequence::CharStar(_) => true, + _ => false, + }); + let set1_len = set1.iter().flat_map(Sequence::flatten).count(); + let set2_len = set2 + .iter() + .filter_map(|s| match s { + Sequence::CharStar(_) => None, + r => Some(r), + }) + .flat_map(Sequence::flatten) + .count(); + let star_compensate_len = set1_len.saturating_sub(set2_len); + let set2_solved = match (partition.next(), partition.next()) { + (None, None) => match char_star { + Some(c) => std::iter::repeat(*c).take(star_compensate_len).collect(), + None => std::iter::empty().collect(), + }, + (None, Some(set2_b)) => { + if let Some(c) = char_star { + std::iter::repeat(*c) + .take(star_compensate_len) + .chain(set2_b.iter().flat_map(Sequence::flatten)) + .collect() + } else { + set2_b.iter().flat_map(Sequence::flatten).collect() + } + } + (Some(set2_a), None) => match char_star { + Some(c) => set2_a + .iter() + .flat_map(Sequence::flatten) + .chain(std::iter::repeat(*c).take(star_compensate_len)) + .collect(), + None => set2_a.iter().flat_map(Sequence::flatten).collect(), + }, + (Some(set2_a), Some(set2_b)) => match char_star { + Some(c) => set2_a + .iter() + .flat_map(Sequence::flatten) + .chain(std::iter::repeat(*c).take(star_compensate_len)) + .chain(set2_b.iter().flat_map(Sequence::flatten)) + .collect(), + None => set2_a + .iter() + .chain(set2_b.iter()) + .flat_map(Sequence::flatten) + .collect(), + }, + }; + let set1_solved = set1.iter().flat_map(Sequence::flatten).collect(); + return Ok((set1_solved, set2_solved)); + } else { + Err(format!( + "{}: only one [c*] repeat construct may appear in string2", + executable!() + )) + } + } else { + Err(format!( + "{}: the [c*] repeat construct may not appear in string1", + executable!() + )) } } } impl Sequence { - pub fn parse_set_string(input: &str) -> Vec { + pub fn from_str(input: &str) -> Vec { many0(alt(( alt(( Sequence::parse_char_range_octal_leftright, @@ -385,28 +424,20 @@ impl SymbolTranslator for DeleteOperation { pub struct TranslateOperationComplement { iter: u32, + set2_iter: usize, set1: Vec, - set2: Box>, + set2: Vec, fallback: char, translation_map: HashMap, } impl TranslateOperationComplement { - fn new( - set1: Vec, - set2: Vec, - set1_truncate_length: Option, - fallback: char, - ) -> TranslateOperationComplement { + fn new(set1: Vec, set2: Vec, fallback: char) -> TranslateOperationComplement { TranslateOperationComplement { iter: 0, - set1: set1 - .iter() - .flat_map(Sequence::flatten) - .enumerate() - .filter_map(truncate_iterator(set1_truncate_length)) - .collect(), - set2: Box::new(set2.into_iter().flat_map(|c| Sequence::flatten(&c))), + set2_iter: 0, + set1, + set2, fallback, translation_map: HashMap::new(), } @@ -419,23 +450,11 @@ pub struct TranslateOperationStandard { } impl TranslateOperationStandard { - fn new( - set1: Vec, - set2: Vec, - set1_truncate_length: Option, - fallback: char, - ) -> TranslateOperationStandard { + fn new(set1: Vec, set2: Vec, fallback: char) -> TranslateOperationStandard { TranslateOperationStandard { translation_map: set1 - .iter() - .flat_map(Sequence::flatten) - .zip( - set2.iter() - .flat_map(Sequence::flatten) - .chain(Repeat(fallback)), - ) - .enumerate() - .filter_map(truncate_iterator(set1_truncate_length)) + .into_iter() + .zip(set2.into_iter().chain(std::iter::repeat(fallback))) .collect::>(), } } @@ -461,40 +480,27 @@ impl TranslateOperation { pub fn new( set1: Vec, set2: Vec, - truncate_set1: bool, + truncate_set1_flag: bool, complement: bool, - ) -> TranslateOperation { - let fallback = set2 - .iter() - .rev() - .next() - .map(Sequence::last) - .flatten() - .unwrap(); - let set1_truncate_length = if truncate_set1 { - set2.iter() - .map(Sequence::len) - .reduce(|a, b| match (a, b) { - (Some(l), Some(r)) => Some(l + r), - _ => None, - }) - .flatten() - } else { - None - }; + ) -> Result { + let (mut set1_solved, set2_solved) = Sequence::solve_set_characters(&set1, &set2)?; + if truncate_set1_flag { + set1_solved.truncate(set2_solved.len()); + } + let fallback = set2.iter().map(Sequence::last).last().flatten().expect( + format!( + "{}: when not truncating set1, string2 must be non-empty", + executable!() + ) + .as_str(), + ); if complement { - TranslateOperation::Complement(TranslateOperationComplement::new( - set1, - set2, - set1_truncate_length, - fallback, + Ok(TranslateOperation::Complement( + TranslateOperationComplement::new(set1_solved, set2_solved, fallback), )) } else { - TranslateOperation::Standard(TranslateOperationStandard::new( - set1, - set2, - set1_truncate_length, - fallback, + Ok(TranslateOperation::Standard( + TranslateOperationStandard::new(set1_solved, set2_solved, fallback), )) } } @@ -511,6 +517,7 @@ impl SymbolTranslator for TranslateOperation { ), TranslateOperation::Complement(TranslateOperationComplement { iter, + set2_iter, set1, set2, fallback, @@ -525,11 +532,12 @@ impl SymbolTranslator for TranslateOperation { Some(*c) } else { while translation_map.get(¤t).is_none() { - if let Some(p) = set2.next() { - let (next_index, next_value) = + if let Some(value) = set2.get(*set2_iter) { + let (next_iter, next_key) = TranslateOperation::next_complement_char(*iter, &*set1); - *iter = next_index; - translation_map.insert(next_value, p); + *iter = next_iter; + *set2_iter = set2_iter.saturating_add(1); + translation_map.insert(next_key, *value); } else { translation_map.insert(current, *fallback); } @@ -622,9 +630,7 @@ fn test_parse_octal() { for a in '0'..='7' { for b in '0'..='7' { for c in '0'..='7' { - assert!( - Sequence::parse_set_string(format!("\\{}{}{}", a, b, c).as_str()).len() == 1 - ); + assert!(Sequence::from_str(format!("\\{}{}{}", a, b, c).as_str()).len() == 1); } } } diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index f024fd6db..59e4852b2 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -69,7 +69,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if sets.is_empty() { show_error!( - "missing operand\nTry `{} --help` for more information.", + "missing operand\nTry '{} --help' for more information.", executable!() ); return 1; @@ -77,7 +77,16 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if !(delete_flag || squeeze_flag) && sets.len() < 2 { show_error!( - "missing operand after '{}'\nTry `{} --help` for more information.", + "missing operand after '{}'\nTry '{} --help' for more information.", + sets[0], + executable!() + ); + return 1; + } + + if sets.len() > 2 { + show_error!( + "extra operand '{}'\nTry '{} --help' for more information.", sets[0], executable!() ); @@ -95,50 +104,58 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let mut delete_buffer = vec![]; { let mut delete_writer = BufWriter::new(&mut delete_buffer); - let delete_op = - DeleteOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag); + let delete_op = DeleteOperation::new(Sequence::from_str(&sets[0]), complement_flag); translate_input(&mut locked_stdin, &mut delete_writer, delete_op); } { let mut squeeze_reader = BufReader::new(delete_buffer.as_bytes()); - let squeeze_op = - SqueezeOperation::new(Sequence::parse_set_string(&sets[1]), complement_flag); - translate_input(&mut squeeze_reader, &mut buffered_stdout, squeeze_op); + let op = SqueezeOperation::new(Sequence::from_str(&sets[1]), complement_flag); + translate_input(&mut squeeze_reader, &mut buffered_stdout, op); } } else { - let op = DeleteOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag); + let op = DeleteOperation::new(Sequence::from_str(&sets[0]), complement_flag); translate_input(&mut locked_stdin, &mut buffered_stdout, op); } } else if squeeze_flag { if sets.len() < 2 { - let op = SqueezeOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag); + let op = SqueezeOperation::new(Sequence::from_str(&sets[0]), complement_flag); translate_input(&mut locked_stdin, &mut buffered_stdout, op); } else { let mut translate_buffer = vec![]; { let mut writer = BufWriter::new(&mut translate_buffer); - let translate_op = TranslateOperation::new( - Sequence::parse_set_string(&sets[0]), - Sequence::parse_set_string(&sets[1]), + match TranslateOperation::new( + Sequence::from_str(&sets[0]), + Sequence::from_str(&sets[1]), truncate_set1_flag, complement_flag, - ); - translate_input(&mut locked_stdin, &mut writer, translate_op); + ) { + Ok(op) => translate_input(&mut locked_stdin, &mut writer, op), + Err(s) => { + show_error!("{}", s); + return 1; + } + }; } { let mut reader = BufReader::new(translate_buffer.as_bytes()); - let squeeze_op = SqueezeOperation::new(Sequence::parse_set_string(&sets[1]), false); + let squeeze_op = SqueezeOperation::new(Sequence::from_str(&sets[1]), false); translate_input(&mut reader, &mut buffered_stdout, squeeze_op); } } } else { - let op = TranslateOperation::new( - Sequence::parse_set_string(&sets[0]), - Sequence::parse_set_string(&sets[1]), + match TranslateOperation::new( + Sequence::from_str(&sets[0]), + Sequence::from_str(&sets[1]), truncate_set1_flag, complement_flag, - ); - translate_input(&mut locked_stdin, &mut buffered_stdout, op); + ) { + Ok(op) => translate_input(&mut locked_stdin, &mut buffered_stdout, op), + Err(s) => { + show_error!("{}", s); + return 1; + } + }; } 0