mirror of
https://github.com/uutils/coreutils
synced 2024-12-18 00:53:25 +00:00
Reimplemented everything using new expansion module
Signed-off-by: Hanif Bin Ariffin <hanif.ariffin.4326@gmail.com>
This commit is contained in:
parent
c4e04c5384
commit
05d2973510
3 changed files with 46 additions and 355 deletions
|
@ -1,146 +0,0 @@
|
|||
// * This file is part of the uutils coreutils package.
|
||||
// *
|
||||
// * (c) Michael Gehring <mg@ebfe.org>
|
||||
// * (c) kwantam <kwantam@gmail.com>
|
||||
// * * 2015-04-28 ~ created `expand` module to eliminate most allocs during setup
|
||||
// *
|
||||
// * For the full copyright and license information, please view the LICENSE
|
||||
// * file that was distributed with this source code.
|
||||
|
||||
// spell-checker:ignore (ToDO) allocs slen unesc
|
||||
|
||||
use std::char::from_u32;
|
||||
use std::cmp::min;
|
||||
use std::iter::Peekable;
|
||||
use std::ops::RangeInclusive;
|
||||
|
||||
/// Parse a backslash escape sequence to the corresponding character. Assumes
|
||||
/// the string starts from the character _after_ the `\` and is not empty.
|
||||
///
|
||||
/// Returns a tuple containing the character and the number of characters
|
||||
/// consumed from the input. The alphabetic escape sequences consume 1
|
||||
/// character; octal escape sequences consume 1 to 3 octal digits.
|
||||
#[inline]
|
||||
fn parse_sequence(s: &str) -> (char, usize) {
|
||||
let mut s = s.chars();
|
||||
let c = s.next().expect("invalid escape: empty string");
|
||||
|
||||
if ('0'..='7').contains(&c) {
|
||||
let mut v = c.to_digit(8).unwrap();
|
||||
let mut consumed = 1;
|
||||
let bits_per_digit = 3;
|
||||
|
||||
for c in s.take(2) {
|
||||
match c.to_digit(8) {
|
||||
Some(c) => {
|
||||
v = (v << bits_per_digit) | c;
|
||||
consumed += 1;
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
|
||||
(from_u32(v).expect("invalid octal escape"), consumed)
|
||||
} else {
|
||||
(
|
||||
match c {
|
||||
'a' => 0x07u8 as char,
|
||||
'b' => 0x08u8 as char,
|
||||
'f' => 0x0cu8 as char,
|
||||
'v' => 0x0bu8 as char,
|
||||
'n' => '\n',
|
||||
'r' => '\r',
|
||||
't' => '\t',
|
||||
c => c,
|
||||
},
|
||||
1,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
struct Unescape<'a> {
|
||||
string: &'a str,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Unescape<'a> {
|
||||
type Item = char;
|
||||
|
||||
#[inline]
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
let slen = self.string.len();
|
||||
(min(slen, 1), None)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.string.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// is the next character an escape?
|
||||
let (ret, idx) = match self.string.chars().next().unwrap() {
|
||||
'\\' if self.string.len() > 1 => {
|
||||
// yes---it's \ and it's not the last char in a string
|
||||
// we know that \ is 1 byte long so we can index into the string safely
|
||||
let (c, consumed) = parse_sequence(&self.string[1..]);
|
||||
|
||||
(Some(c), 1 + consumed)
|
||||
}
|
||||
c => (Some(c), c.len_utf8()), // not an escape char
|
||||
};
|
||||
|
||||
self.string = &self.string[idx..]; // advance the pointer to the next char
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ExpandSet<'a> {
|
||||
range: RangeInclusive<u32>,
|
||||
unesc: Peekable<Unescape<'a>>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for ExpandSet<'a> {
|
||||
type Item = char;
|
||||
|
||||
#[inline]
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
self.unesc.size_hint()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
// while the Range has elements, try to return chars from it
|
||||
// but make sure that they actually turn out to be Chars!
|
||||
for n in &mut self.range {
|
||||
if let Some(c) = from_u32(n) {
|
||||
return Some(c);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(first) = self.unesc.next() {
|
||||
// peek ahead
|
||||
if self.unesc.peek() == Some(&'-') && self.unesc.size_hint().0 > 1 {
|
||||
self.unesc.next(); // this is the '-'
|
||||
let last = self.unesc.next().unwrap(); // this is the end of the range
|
||||
|
||||
{
|
||||
self.range = first as u32 + 1..=last as u32;
|
||||
}
|
||||
}
|
||||
|
||||
return Some(first); // in any case, return the next char
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ExpandSet<'a> {
|
||||
#[inline]
|
||||
pub fn new(s: &'a str) -> ExpandSet<'a> {
|
||||
ExpandSet {
|
||||
range: 0..=0,
|
||||
unesc: Unescape { string: s }.peekable(),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -422,7 +422,7 @@ impl SqueezeOperationNew {
|
|||
impl SymbolTranslatorNew for SqueezeOperationNew {
|
||||
fn translate(&mut self, current: char) -> Option<char> {
|
||||
if self.complement {
|
||||
if self.squeeze_set.iter().any(|c| c.eq(¤t)) {
|
||||
let next = if self.squeeze_set.iter().any(|c| c.eq(¤t)) {
|
||||
Some(current)
|
||||
} else {
|
||||
match self.previous {
|
||||
|
@ -439,33 +439,35 @@ impl SymbolTranslatorNew for SqueezeOperationNew {
|
|||
Some(current)
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
self.previous = Some(current);
|
||||
next
|
||||
} else {
|
||||
if self.squeeze_set.iter().any(|c| c.eq(¤t)) {
|
||||
let next = if self.squeeze_set.iter().any(|c| c.eq(¤t)) {
|
||||
match self.previous {
|
||||
Some(v) => {
|
||||
if v.eq(¤t) {
|
||||
None
|
||||
} else {
|
||||
self.previous = Some(current);
|
||||
Some(current)
|
||||
}
|
||||
}
|
||||
None => {
|
||||
self.previous = Some(current);
|
||||
Some(current)
|
||||
}
|
||||
None => Some(current),
|
||||
}
|
||||
} else {
|
||||
Some(current)
|
||||
}
|
||||
};
|
||||
self.previous = Some(current);
|
||||
next
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn translate_input_new<T>(input: &mut dyn BufRead, output: &mut dyn Write, mut translator: T)
|
||||
pub fn translate_input_new<T, R, W>(input: &mut R, output: &mut W, mut translator: T)
|
||||
where
|
||||
T: SymbolTranslatorNew,
|
||||
R: BufRead,
|
||||
W: Write,
|
||||
{
|
||||
let mut buf = String::new();
|
||||
let mut output_buf = String::new();
|
||||
|
|
|
@ -14,22 +14,18 @@
|
|||
extern crate uucore;
|
||||
extern crate nom;
|
||||
|
||||
mod expand;
|
||||
mod operation;
|
||||
|
||||
use bit_set::BitSet;
|
||||
use clap::{crate_version, App, Arg};
|
||||
use fnv::FnvHashMap;
|
||||
use nom::AsBytes;
|
||||
use operation::{translate_input_new, Sequence, SqueezeOperationNew, TranslateOperationNew};
|
||||
use std::io::{stdin, stdout, BufRead, BufWriter, Write};
|
||||
use std::io::{stdin, stdout, BufReader, BufWriter};
|
||||
|
||||
use crate::{expand::ExpandSet, operation::DeleteOperationNew};
|
||||
use crate::operation::DeleteOperationNew;
|
||||
use uucore::InvalidEncodingHandling;
|
||||
|
||||
static ABOUT: &str = "translate or delete characters";
|
||||
|
||||
const BUFFER_LEN: usize = 1024;
|
||||
|
||||
mod options {
|
||||
pub const COMPLEMENT: &str = "complement";
|
||||
pub const DELETE: &str = "delete";
|
||||
|
@ -38,190 +34,6 @@ mod options {
|
|||
pub const SETS: &str = "sets";
|
||||
}
|
||||
|
||||
trait SymbolTranslator {
|
||||
fn translate(&self, c: char, prev_c: char) -> Option<char>;
|
||||
}
|
||||
|
||||
struct DeleteOperation {
|
||||
bset: BitSet,
|
||||
complement: bool,
|
||||
}
|
||||
|
||||
impl SymbolTranslator for DeleteOperation {
|
||||
fn translate(&self, c: char, _prev_c: char) -> Option<char> {
|
||||
let uc = c as usize;
|
||||
if self.complement == self.bset.contains(uc) {
|
||||
Some(c)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct SqueezeOperation {
|
||||
squeeze_set: BitSet,
|
||||
complement: bool,
|
||||
}
|
||||
|
||||
impl SqueezeOperation {
|
||||
fn new(squeeze_set: ExpandSet, complement: bool) -> SqueezeOperation {
|
||||
SqueezeOperation {
|
||||
squeeze_set: squeeze_set.map(|c| c as usize).collect(),
|
||||
complement,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SymbolTranslator for SqueezeOperation {
|
||||
fn translate(&self, c: char, prev_c: char) -> Option<char> {
|
||||
if prev_c == c && self.complement != self.squeeze_set.contains(c as usize) {
|
||||
None
|
||||
} else {
|
||||
Some(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct DeleteAndSqueezeOperation {
|
||||
delete_set: BitSet,
|
||||
squeeze_set: BitSet,
|
||||
complement: bool,
|
||||
}
|
||||
|
||||
impl DeleteAndSqueezeOperation {
|
||||
fn new(
|
||||
delete_set: ExpandSet,
|
||||
squeeze_set: ExpandSet,
|
||||
complement: bool,
|
||||
) -> DeleteAndSqueezeOperation {
|
||||
DeleteAndSqueezeOperation {
|
||||
delete_set: delete_set.map(|c| c as usize).collect(),
|
||||
squeeze_set: squeeze_set.map(|c| c as usize).collect(),
|
||||
complement,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SymbolTranslator for DeleteAndSqueezeOperation {
|
||||
fn translate(&self, c: char, prev_c: char) -> Option<char> {
|
||||
if self.complement != self.delete_set.contains(c as usize)
|
||||
|| prev_c == c && self.squeeze_set.contains(c as usize)
|
||||
{
|
||||
None
|
||||
} else {
|
||||
Some(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct TranslateOperation {
|
||||
translate_map: FnvHashMap<usize, char>,
|
||||
complement: bool,
|
||||
s2_last: char,
|
||||
}
|
||||
|
||||
impl TranslateOperation {
|
||||
fn new(
|
||||
set1: ExpandSet,
|
||||
set2: &mut ExpandSet,
|
||||
truncate: bool,
|
||||
complement: bool,
|
||||
) -> TranslateOperation {
|
||||
let mut map = FnvHashMap::default();
|
||||
let mut s2_prev = '_';
|
||||
for i in set1 {
|
||||
let s2_next = set2.next();
|
||||
|
||||
if s2_next.is_none() && truncate {
|
||||
map.insert(i as usize, i);
|
||||
} else {
|
||||
s2_prev = s2_next.unwrap_or(s2_prev);
|
||||
map.insert(i as usize, s2_prev);
|
||||
}
|
||||
}
|
||||
TranslateOperation {
|
||||
translate_map: map,
|
||||
complement,
|
||||
s2_last: set2.last().unwrap_or(s2_prev),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SymbolTranslator for TranslateOperation {
|
||||
fn translate(&self, c: char, _prev_c: char) -> Option<char> {
|
||||
if self.complement {
|
||||
Some(if self.translate_map.contains_key(&(c as usize)) {
|
||||
c
|
||||
} else {
|
||||
self.s2_last
|
||||
})
|
||||
} else {
|
||||
Some(*self.translate_map.get(&(c as usize)).unwrap_or(&c))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct TranslateAndSqueezeOperation {
|
||||
translate: TranslateOperation,
|
||||
squeeze: SqueezeOperation,
|
||||
}
|
||||
|
||||
impl TranslateAndSqueezeOperation {
|
||||
fn new(sets: Vec<String>, truncate: bool, complement: bool) -> TranslateAndSqueezeOperation {
|
||||
let set1 = ExpandSet::new(sets[0].as_ref());
|
||||
let set1_ = ExpandSet::new(sets[0].as_ref());
|
||||
let mut set2 = ExpandSet::new(sets[1].as_ref());
|
||||
let set2_ = ExpandSet::new(sets[1].as_ref());
|
||||
TranslateAndSqueezeOperation {
|
||||
translate: TranslateOperation::new(set1, &mut set2, truncate, complement),
|
||||
squeeze: SqueezeOperation::new(if complement { set1_ } else { set2_ }, complement),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SymbolTranslator for TranslateAndSqueezeOperation {
|
||||
fn translate(&self, c: char, prev_c: char) -> Option<char> {
|
||||
// `unwrap()` will never panic because `Translate.translate()`
|
||||
// always returns `Some`.
|
||||
self.squeeze
|
||||
.translate(self.translate.translate(c, 0 as char).unwrap(), prev_c)
|
||||
}
|
||||
}
|
||||
|
||||
fn translate_input<T: SymbolTranslator>(
|
||||
input: &mut dyn BufRead,
|
||||
output: &mut dyn Write,
|
||||
translator: T,
|
||||
) {
|
||||
let mut buf = String::with_capacity(BUFFER_LEN + 4);
|
||||
let mut output_buf = String::with_capacity(BUFFER_LEN + 4);
|
||||
|
||||
while let Ok(length) = input.read_line(&mut buf) {
|
||||
let mut prev_c = 0 as char;
|
||||
if length == 0 {
|
||||
break;
|
||||
}
|
||||
{
|
||||
// isolation to make borrow checker happy
|
||||
let filtered = buf.chars().filter_map(|c| {
|
||||
let res = translator.translate(c, prev_c);
|
||||
// Set `prev_c` to the post-translate character. This
|
||||
// allows the squeeze operation to correctly function
|
||||
// after the translate operation.
|
||||
if let Some(rc) = res {
|
||||
prev_c = rc;
|
||||
}
|
||||
res
|
||||
});
|
||||
|
||||
output_buf.extend(filtered);
|
||||
output.write_all(output_buf.as_bytes()).unwrap();
|
||||
}
|
||||
buf.clear();
|
||||
output_buf.clear();
|
||||
}
|
||||
}
|
||||
|
||||
fn get_usage() -> String {
|
||||
format!("{} [OPTION]... SET1 [SET2]", executable!())
|
||||
}
|
||||
|
@ -280,12 +92,19 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
|||
|
||||
if delete_flag {
|
||||
if squeeze_flag {
|
||||
let op = DeleteAndSqueezeOperation::new(
|
||||
ExpandSet::new(sets[0].as_ref()),
|
||||
ExpandSet::new(sets[1].as_ref()),
|
||||
complement_flag,
|
||||
);
|
||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||
let mut delete_buffer = vec![];
|
||||
{
|
||||
let mut delete_writer = BufWriter::new(&mut delete_buffer);
|
||||
let delete_op =
|
||||
DeleteOperationNew::new(Sequence::parse_set_string(&sets[0]), complement_flag);
|
||||
translate_input_new(&mut locked_stdin, &mut delete_writer, delete_op);
|
||||
}
|
||||
{
|
||||
let mut squeeze_reader = BufReader::new(delete_buffer.as_bytes());
|
||||
let squeeze_op =
|
||||
SqueezeOperationNew::new(Sequence::parse_set_string(&sets[1]), complement_flag);
|
||||
translate_input_new(&mut squeeze_reader, &mut buffered_stdout, squeeze_op);
|
||||
}
|
||||
} else {
|
||||
let op = DeleteOperationNew::new(Sequence::parse_set_string(&sets[0]), complement_flag);
|
||||
translate_input_new(&mut locked_stdin, &mut buffered_stdout, op);
|
||||
|
@ -294,10 +113,26 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
|||
if sets.len() < 2 {
|
||||
let op =
|
||||
SqueezeOperationNew::new(Sequence::parse_set_string(&sets[0]), complement_flag);
|
||||
|
||||
translate_input_new(&mut locked_stdin, &mut buffered_stdout, op);
|
||||
} else {
|
||||
let op = TranslateAndSqueezeOperation::new(sets, truncate_set1_flag, complement_flag);
|
||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||
let mut translate_buffer = vec![];
|
||||
{
|
||||
let mut writer = BufWriter::new(&mut translate_buffer);
|
||||
let translate_op = TranslateOperationNew::new(
|
||||
Sequence::parse_set_string(&sets[0]),
|
||||
Sequence::parse_set_string(&sets[1]),
|
||||
truncate_set1_flag,
|
||||
complement_flag,
|
||||
);
|
||||
translate_input_new(&mut locked_stdin, &mut writer, translate_op);
|
||||
}
|
||||
{
|
||||
let mut reader = BufReader::new(translate_buffer.as_bytes());
|
||||
let squeeze_op =
|
||||
SqueezeOperationNew::new(Sequence::parse_set_string(&sets[1]), false);
|
||||
translate_input_new(&mut reader, &mut buffered_stdout, squeeze_op);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let op = TranslateOperationNew::new(
|
||||
|
|
Loading…
Reference in a new issue