From 05d297351043a9d91a74800752317c5b3e1d0ce8 Mon Sep 17 00:00:00 2001 From: Hanif Bin Ariffin Date: Sun, 18 Jul 2021 14:09:26 +0800 Subject: [PATCH] Reimplemented everything using new expansion module Signed-off-by: Hanif Bin Ariffin --- src/uu/tr/src/expand.rs | 146 ----------------------- src/uu/tr/src/operation.rs | 22 ++-- src/uu/tr/src/tr.rs | 233 ++++++------------------------------- 3 files changed, 46 insertions(+), 355 deletions(-) delete mode 100644 src/uu/tr/src/expand.rs diff --git a/src/uu/tr/src/expand.rs b/src/uu/tr/src/expand.rs deleted file mode 100644 index 5d960921e..000000000 --- a/src/uu/tr/src/expand.rs +++ /dev/null @@ -1,146 +0,0 @@ -// * This file is part of the uutils coreutils package. -// * -// * (c) Michael Gehring -// * (c) kwantam -// * * 2015-04-28 ~ created `expand` module to eliminate most allocs during setup -// * -// * For the full copyright and license information, please view the LICENSE -// * file that was distributed with this source code. - -// spell-checker:ignore (ToDO) allocs slen unesc - -use std::char::from_u32; -use std::cmp::min; -use std::iter::Peekable; -use std::ops::RangeInclusive; - -/// Parse a backslash escape sequence to the corresponding character. Assumes -/// the string starts from the character _after_ the `\` and is not empty. -/// -/// Returns a tuple containing the character and the number of characters -/// consumed from the input. The alphabetic escape sequences consume 1 -/// character; octal escape sequences consume 1 to 3 octal digits. -#[inline] -fn parse_sequence(s: &str) -> (char, usize) { - let mut s = s.chars(); - let c = s.next().expect("invalid escape: empty string"); - - if ('0'..='7').contains(&c) { - let mut v = c.to_digit(8).unwrap(); - let mut consumed = 1; - let bits_per_digit = 3; - - for c in s.take(2) { - match c.to_digit(8) { - Some(c) => { - v = (v << bits_per_digit) | c; - consumed += 1; - } - None => break, - } - } - - (from_u32(v).expect("invalid octal escape"), consumed) - } else { - ( - match c { - 'a' => 0x07u8 as char, - 'b' => 0x08u8 as char, - 'f' => 0x0cu8 as char, - 'v' => 0x0bu8 as char, - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - c => c, - }, - 1, - ) - } -} - -struct Unescape<'a> { - string: &'a str, -} - -impl<'a> Iterator for Unescape<'a> { - type Item = char; - - #[inline] - fn size_hint(&self) -> (usize, Option) { - let slen = self.string.len(); - (min(slen, 1), None) - } - - #[inline] - fn next(&mut self) -> Option { - if self.string.is_empty() { - return None; - } - - // is the next character an escape? - let (ret, idx) = match self.string.chars().next().unwrap() { - '\\' if self.string.len() > 1 => { - // yes---it's \ and it's not the last char in a string - // we know that \ is 1 byte long so we can index into the string safely - let (c, consumed) = parse_sequence(&self.string[1..]); - - (Some(c), 1 + consumed) - } - c => (Some(c), c.len_utf8()), // not an escape char - }; - - self.string = &self.string[idx..]; // advance the pointer to the next char - ret - } -} - -pub struct ExpandSet<'a> { - range: RangeInclusive, - unesc: Peekable>, -} - -impl<'a> Iterator for ExpandSet<'a> { - type Item = char; - - #[inline] - fn size_hint(&self) -> (usize, Option) { - self.unesc.size_hint() - } - - #[inline] - fn next(&mut self) -> Option { - // while the Range has elements, try to return chars from it - // but make sure that they actually turn out to be Chars! - for n in &mut self.range { - if let Some(c) = from_u32(n) { - return Some(c); - } - } - - if let Some(first) = self.unesc.next() { - // peek ahead - if self.unesc.peek() == Some(&'-') && self.unesc.size_hint().0 > 1 { - self.unesc.next(); // this is the '-' - let last = self.unesc.next().unwrap(); // this is the end of the range - - { - self.range = first as u32 + 1..=last as u32; - } - } - - return Some(first); // in any case, return the next char - } - - None - } -} - -impl<'a> ExpandSet<'a> { - #[inline] - pub fn new(s: &'a str) -> ExpandSet<'a> { - ExpandSet { - range: 0..=0, - unesc: Unescape { string: s }.peekable(), - } - } -} diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 72c0158f3..dd3e722ca 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -422,7 +422,7 @@ impl SqueezeOperationNew { impl SymbolTranslatorNew for SqueezeOperationNew { fn translate(&mut self, current: char) -> Option { if self.complement { - if self.squeeze_set.iter().any(|c| c.eq(¤t)) { + let next = if self.squeeze_set.iter().any(|c| c.eq(¤t)) { Some(current) } else { match self.previous { @@ -439,33 +439,35 @@ impl SymbolTranslatorNew for SqueezeOperationNew { Some(current) } } - } + }; + self.previous = Some(current); + next } else { - if self.squeeze_set.iter().any(|c| c.eq(¤t)) { + let next = if self.squeeze_set.iter().any(|c| c.eq(¤t)) { match self.previous { Some(v) => { if v.eq(¤t) { None } else { - self.previous = Some(current); Some(current) } } - None => { - self.previous = Some(current); - Some(current) - } + None => Some(current), } } else { Some(current) - } + }; + self.previous = Some(current); + next } } } -pub fn translate_input_new(input: &mut dyn BufRead, output: &mut dyn Write, mut translator: T) +pub fn translate_input_new(input: &mut R, output: &mut W, mut translator: T) where T: SymbolTranslatorNew, + R: BufRead, + W: Write, { let mut buf = String::new(); let mut output_buf = String::new(); diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index 286e7b023..c21bc679e 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -14,22 +14,18 @@ extern crate uucore; extern crate nom; -mod expand; mod operation; -use bit_set::BitSet; use clap::{crate_version, App, Arg}; -use fnv::FnvHashMap; +use nom::AsBytes; use operation::{translate_input_new, Sequence, SqueezeOperationNew, TranslateOperationNew}; -use std::io::{stdin, stdout, BufRead, BufWriter, Write}; +use std::io::{stdin, stdout, BufReader, BufWriter}; -use crate::{expand::ExpandSet, operation::DeleteOperationNew}; +use crate::operation::DeleteOperationNew; use uucore::InvalidEncodingHandling; static ABOUT: &str = "translate or delete characters"; -const BUFFER_LEN: usize = 1024; - mod options { pub const COMPLEMENT: &str = "complement"; pub const DELETE: &str = "delete"; @@ -38,190 +34,6 @@ mod options { pub const SETS: &str = "sets"; } -trait SymbolTranslator { - fn translate(&self, c: char, prev_c: char) -> Option; -} - -struct DeleteOperation { - bset: BitSet, - complement: bool, -} - -impl SymbolTranslator for DeleteOperation { - fn translate(&self, c: char, _prev_c: char) -> Option { - let uc = c as usize; - if self.complement == self.bset.contains(uc) { - Some(c) - } else { - None - } - } -} - -struct SqueezeOperation { - squeeze_set: BitSet, - complement: bool, -} - -impl SqueezeOperation { - fn new(squeeze_set: ExpandSet, complement: bool) -> SqueezeOperation { - SqueezeOperation { - squeeze_set: squeeze_set.map(|c| c as usize).collect(), - complement, - } - } -} - -impl SymbolTranslator for SqueezeOperation { - fn translate(&self, c: char, prev_c: char) -> Option { - if prev_c == c && self.complement != self.squeeze_set.contains(c as usize) { - None - } else { - Some(c) - } - } -} - -struct DeleteAndSqueezeOperation { - delete_set: BitSet, - squeeze_set: BitSet, - complement: bool, -} - -impl DeleteAndSqueezeOperation { - fn new( - delete_set: ExpandSet, - squeeze_set: ExpandSet, - complement: bool, - ) -> DeleteAndSqueezeOperation { - DeleteAndSqueezeOperation { - delete_set: delete_set.map(|c| c as usize).collect(), - squeeze_set: squeeze_set.map(|c| c as usize).collect(), - complement, - } - } -} - -impl SymbolTranslator for DeleteAndSqueezeOperation { - fn translate(&self, c: char, prev_c: char) -> Option { - if self.complement != self.delete_set.contains(c as usize) - || prev_c == c && self.squeeze_set.contains(c as usize) - { - None - } else { - Some(c) - } - } -} - -struct TranslateOperation { - translate_map: FnvHashMap, - complement: bool, - s2_last: char, -} - -impl TranslateOperation { - fn new( - set1: ExpandSet, - set2: &mut ExpandSet, - truncate: bool, - complement: bool, - ) -> TranslateOperation { - let mut map = FnvHashMap::default(); - let mut s2_prev = '_'; - for i in set1 { - let s2_next = set2.next(); - - if s2_next.is_none() && truncate { - map.insert(i as usize, i); - } else { - s2_prev = s2_next.unwrap_or(s2_prev); - map.insert(i as usize, s2_prev); - } - } - TranslateOperation { - translate_map: map, - complement, - s2_last: set2.last().unwrap_or(s2_prev), - } - } -} - -impl SymbolTranslator for TranslateOperation { - fn translate(&self, c: char, _prev_c: char) -> Option { - if self.complement { - Some(if self.translate_map.contains_key(&(c as usize)) { - c - } else { - self.s2_last - }) - } else { - Some(*self.translate_map.get(&(c as usize)).unwrap_or(&c)) - } - } -} - -struct TranslateAndSqueezeOperation { - translate: TranslateOperation, - squeeze: SqueezeOperation, -} - -impl TranslateAndSqueezeOperation { - fn new(sets: Vec, truncate: bool, complement: bool) -> TranslateAndSqueezeOperation { - let set1 = ExpandSet::new(sets[0].as_ref()); - let set1_ = ExpandSet::new(sets[0].as_ref()); - let mut set2 = ExpandSet::new(sets[1].as_ref()); - let set2_ = ExpandSet::new(sets[1].as_ref()); - TranslateAndSqueezeOperation { - translate: TranslateOperation::new(set1, &mut set2, truncate, complement), - squeeze: SqueezeOperation::new(if complement { set1_ } else { set2_ }, complement), - } - } -} - -impl SymbolTranslator for TranslateAndSqueezeOperation { - fn translate(&self, c: char, prev_c: char) -> Option { - // `unwrap()` will never panic because `Translate.translate()` - // always returns `Some`. - self.squeeze - .translate(self.translate.translate(c, 0 as char).unwrap(), prev_c) - } -} - -fn translate_input( - input: &mut dyn BufRead, - output: &mut dyn Write, - translator: T, -) { - let mut buf = String::with_capacity(BUFFER_LEN + 4); - let mut output_buf = String::with_capacity(BUFFER_LEN + 4); - - while let Ok(length) = input.read_line(&mut buf) { - let mut prev_c = 0 as char; - if length == 0 { - break; - } - { - // isolation to make borrow checker happy - let filtered = buf.chars().filter_map(|c| { - let res = translator.translate(c, prev_c); - // Set `prev_c` to the post-translate character. This - // allows the squeeze operation to correctly function - // after the translate operation. - if let Some(rc) = res { - prev_c = rc; - } - res - }); - - output_buf.extend(filtered); - output.write_all(output_buf.as_bytes()).unwrap(); - } - buf.clear(); - output_buf.clear(); - } -} - fn get_usage() -> String { format!("{} [OPTION]... SET1 [SET2]", executable!()) } @@ -280,12 +92,19 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if delete_flag { if squeeze_flag { - let op = DeleteAndSqueezeOperation::new( - ExpandSet::new(sets[0].as_ref()), - ExpandSet::new(sets[1].as_ref()), - complement_flag, - ); - translate_input(&mut locked_stdin, &mut buffered_stdout, op); + let mut delete_buffer = vec![]; + { + let mut delete_writer = BufWriter::new(&mut delete_buffer); + let delete_op = + DeleteOperationNew::new(Sequence::parse_set_string(&sets[0]), complement_flag); + translate_input_new(&mut locked_stdin, &mut delete_writer, delete_op); + } + { + let mut squeeze_reader = BufReader::new(delete_buffer.as_bytes()); + let squeeze_op = + SqueezeOperationNew::new(Sequence::parse_set_string(&sets[1]), complement_flag); + translate_input_new(&mut squeeze_reader, &mut buffered_stdout, squeeze_op); + } } else { let op = DeleteOperationNew::new(Sequence::parse_set_string(&sets[0]), complement_flag); translate_input_new(&mut locked_stdin, &mut buffered_stdout, op); @@ -294,10 +113,26 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if sets.len() < 2 { let op = SqueezeOperationNew::new(Sequence::parse_set_string(&sets[0]), complement_flag); + translate_input_new(&mut locked_stdin, &mut buffered_stdout, op); } else { - let op = TranslateAndSqueezeOperation::new(sets, truncate_set1_flag, complement_flag); - translate_input(&mut locked_stdin, &mut buffered_stdout, op); + let mut translate_buffer = vec![]; + { + let mut writer = BufWriter::new(&mut translate_buffer); + let translate_op = TranslateOperationNew::new( + Sequence::parse_set_string(&sets[0]), + Sequence::parse_set_string(&sets[1]), + truncate_set1_flag, + complement_flag, + ); + translate_input_new(&mut locked_stdin, &mut writer, translate_op); + } + { + let mut reader = BufReader::new(translate_buffer.as_bytes()); + let squeeze_op = + SqueezeOperationNew::new(Sequence::parse_set_string(&sets[1]), false); + translate_input_new(&mut reader, &mut buffered_stdout, squeeze_op); + } } } else { let op = TranslateOperationNew::new(