mirror of
https://github.com/uutils/coreutils
synced 2024-12-18 00:53:25 +00:00
Reimplemented everything using new expansion module
Signed-off-by: Hanif Bin Ariffin <hanif.ariffin.4326@gmail.com>
This commit is contained in:
parent
c4e04c5384
commit
05d2973510
3 changed files with 46 additions and 355 deletions
|
@ -1,146 +0,0 @@
|
||||||
// * This file is part of the uutils coreutils package.
|
|
||||||
// *
|
|
||||||
// * (c) Michael Gehring <mg@ebfe.org>
|
|
||||||
// * (c) kwantam <kwantam@gmail.com>
|
|
||||||
// * * 2015-04-28 ~ created `expand` module to eliminate most allocs during setup
|
|
||||||
// *
|
|
||||||
// * For the full copyright and license information, please view the LICENSE
|
|
||||||
// * file that was distributed with this source code.
|
|
||||||
|
|
||||||
// spell-checker:ignore (ToDO) allocs slen unesc
|
|
||||||
|
|
||||||
use std::char::from_u32;
|
|
||||||
use std::cmp::min;
|
|
||||||
use std::iter::Peekable;
|
|
||||||
use std::ops::RangeInclusive;
|
|
||||||
|
|
||||||
/// Parse a backslash escape sequence to the corresponding character. Assumes
|
|
||||||
/// the string starts from the character _after_ the `\` and is not empty.
|
|
||||||
///
|
|
||||||
/// Returns a tuple containing the character and the number of characters
|
|
||||||
/// consumed from the input. The alphabetic escape sequences consume 1
|
|
||||||
/// character; octal escape sequences consume 1 to 3 octal digits.
|
|
||||||
#[inline]
|
|
||||||
fn parse_sequence(s: &str) -> (char, usize) {
|
|
||||||
let mut s = s.chars();
|
|
||||||
let c = s.next().expect("invalid escape: empty string");
|
|
||||||
|
|
||||||
if ('0'..='7').contains(&c) {
|
|
||||||
let mut v = c.to_digit(8).unwrap();
|
|
||||||
let mut consumed = 1;
|
|
||||||
let bits_per_digit = 3;
|
|
||||||
|
|
||||||
for c in s.take(2) {
|
|
||||||
match c.to_digit(8) {
|
|
||||||
Some(c) => {
|
|
||||||
v = (v << bits_per_digit) | c;
|
|
||||||
consumed += 1;
|
|
||||||
}
|
|
||||||
None => break,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
(from_u32(v).expect("invalid octal escape"), consumed)
|
|
||||||
} else {
|
|
||||||
(
|
|
||||||
match c {
|
|
||||||
'a' => 0x07u8 as char,
|
|
||||||
'b' => 0x08u8 as char,
|
|
||||||
'f' => 0x0cu8 as char,
|
|
||||||
'v' => 0x0bu8 as char,
|
|
||||||
'n' => '\n',
|
|
||||||
'r' => '\r',
|
|
||||||
't' => '\t',
|
|
||||||
c => c,
|
|
||||||
},
|
|
||||||
1,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct Unescape<'a> {
|
|
||||||
string: &'a str,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> Iterator for Unescape<'a> {
|
|
||||||
type Item = char;
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
|
||||||
let slen = self.string.len();
|
|
||||||
(min(slen, 1), None)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
|
||||||
if self.string.is_empty() {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
// is the next character an escape?
|
|
||||||
let (ret, idx) = match self.string.chars().next().unwrap() {
|
|
||||||
'\\' if self.string.len() > 1 => {
|
|
||||||
// yes---it's \ and it's not the last char in a string
|
|
||||||
// we know that \ is 1 byte long so we can index into the string safely
|
|
||||||
let (c, consumed) = parse_sequence(&self.string[1..]);
|
|
||||||
|
|
||||||
(Some(c), 1 + consumed)
|
|
||||||
}
|
|
||||||
c => (Some(c), c.len_utf8()), // not an escape char
|
|
||||||
};
|
|
||||||
|
|
||||||
self.string = &self.string[idx..]; // advance the pointer to the next char
|
|
||||||
ret
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct ExpandSet<'a> {
|
|
||||||
range: RangeInclusive<u32>,
|
|
||||||
unesc: Peekable<Unescape<'a>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> Iterator for ExpandSet<'a> {
|
|
||||||
type Item = char;
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
|
||||||
self.unesc.size_hint()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
|
||||||
// while the Range has elements, try to return chars from it
|
|
||||||
// but make sure that they actually turn out to be Chars!
|
|
||||||
for n in &mut self.range {
|
|
||||||
if let Some(c) = from_u32(n) {
|
|
||||||
return Some(c);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(first) = self.unesc.next() {
|
|
||||||
// peek ahead
|
|
||||||
if self.unesc.peek() == Some(&'-') && self.unesc.size_hint().0 > 1 {
|
|
||||||
self.unesc.next(); // this is the '-'
|
|
||||||
let last = self.unesc.next().unwrap(); // this is the end of the range
|
|
||||||
|
|
||||||
{
|
|
||||||
self.range = first as u32 + 1..=last as u32;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return Some(first); // in any case, return the next char
|
|
||||||
}
|
|
||||||
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> ExpandSet<'a> {
|
|
||||||
#[inline]
|
|
||||||
pub fn new(s: &'a str) -> ExpandSet<'a> {
|
|
||||||
ExpandSet {
|
|
||||||
range: 0..=0,
|
|
||||||
unesc: Unescape { string: s }.peekable(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -422,7 +422,7 @@ impl SqueezeOperationNew {
|
||||||
impl SymbolTranslatorNew for SqueezeOperationNew {
|
impl SymbolTranslatorNew for SqueezeOperationNew {
|
||||||
fn translate(&mut self, current: char) -> Option<char> {
|
fn translate(&mut self, current: char) -> Option<char> {
|
||||||
if self.complement {
|
if self.complement {
|
||||||
if self.squeeze_set.iter().any(|c| c.eq(¤t)) {
|
let next = if self.squeeze_set.iter().any(|c| c.eq(¤t)) {
|
||||||
Some(current)
|
Some(current)
|
||||||
} else {
|
} else {
|
||||||
match self.previous {
|
match self.previous {
|
||||||
|
@ -439,33 +439,35 @@ impl SymbolTranslatorNew for SqueezeOperationNew {
|
||||||
Some(current)
|
Some(current)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
self.previous = Some(current);
|
||||||
|
next
|
||||||
} else {
|
} else {
|
||||||
if self.squeeze_set.iter().any(|c| c.eq(¤t)) {
|
let next = if self.squeeze_set.iter().any(|c| c.eq(¤t)) {
|
||||||
match self.previous {
|
match self.previous {
|
||||||
Some(v) => {
|
Some(v) => {
|
||||||
if v.eq(¤t) {
|
if v.eq(¤t) {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
self.previous = Some(current);
|
|
||||||
Some(current)
|
Some(current)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None => {
|
None => Some(current),
|
||||||
self.previous = Some(current);
|
|
||||||
Some(current)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
Some(current)
|
Some(current)
|
||||||
}
|
};
|
||||||
|
self.previous = Some(current);
|
||||||
|
next
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn translate_input_new<T>(input: &mut dyn BufRead, output: &mut dyn Write, mut translator: T)
|
pub fn translate_input_new<T, R, W>(input: &mut R, output: &mut W, mut translator: T)
|
||||||
where
|
where
|
||||||
T: SymbolTranslatorNew,
|
T: SymbolTranslatorNew,
|
||||||
|
R: BufRead,
|
||||||
|
W: Write,
|
||||||
{
|
{
|
||||||
let mut buf = String::new();
|
let mut buf = String::new();
|
||||||
let mut output_buf = String::new();
|
let mut output_buf = String::new();
|
||||||
|
|
|
@ -14,22 +14,18 @@
|
||||||
extern crate uucore;
|
extern crate uucore;
|
||||||
extern crate nom;
|
extern crate nom;
|
||||||
|
|
||||||
mod expand;
|
|
||||||
mod operation;
|
mod operation;
|
||||||
|
|
||||||
use bit_set::BitSet;
|
|
||||||
use clap::{crate_version, App, Arg};
|
use clap::{crate_version, App, Arg};
|
||||||
use fnv::FnvHashMap;
|
use nom::AsBytes;
|
||||||
use operation::{translate_input_new, Sequence, SqueezeOperationNew, TranslateOperationNew};
|
use operation::{translate_input_new, Sequence, SqueezeOperationNew, TranslateOperationNew};
|
||||||
use std::io::{stdin, stdout, BufRead, BufWriter, Write};
|
use std::io::{stdin, stdout, BufReader, BufWriter};
|
||||||
|
|
||||||
use crate::{expand::ExpandSet, operation::DeleteOperationNew};
|
use crate::operation::DeleteOperationNew;
|
||||||
use uucore::InvalidEncodingHandling;
|
use uucore::InvalidEncodingHandling;
|
||||||
|
|
||||||
static ABOUT: &str = "translate or delete characters";
|
static ABOUT: &str = "translate or delete characters";
|
||||||
|
|
||||||
const BUFFER_LEN: usize = 1024;
|
|
||||||
|
|
||||||
mod options {
|
mod options {
|
||||||
pub const COMPLEMENT: &str = "complement";
|
pub const COMPLEMENT: &str = "complement";
|
||||||
pub const DELETE: &str = "delete";
|
pub const DELETE: &str = "delete";
|
||||||
|
@ -38,190 +34,6 @@ mod options {
|
||||||
pub const SETS: &str = "sets";
|
pub const SETS: &str = "sets";
|
||||||
}
|
}
|
||||||
|
|
||||||
trait SymbolTranslator {
|
|
||||||
fn translate(&self, c: char, prev_c: char) -> Option<char>;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct DeleteOperation {
|
|
||||||
bset: BitSet,
|
|
||||||
complement: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SymbolTranslator for DeleteOperation {
|
|
||||||
fn translate(&self, c: char, _prev_c: char) -> Option<char> {
|
|
||||||
let uc = c as usize;
|
|
||||||
if self.complement == self.bset.contains(uc) {
|
|
||||||
Some(c)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct SqueezeOperation {
|
|
||||||
squeeze_set: BitSet,
|
|
||||||
complement: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SqueezeOperation {
|
|
||||||
fn new(squeeze_set: ExpandSet, complement: bool) -> SqueezeOperation {
|
|
||||||
SqueezeOperation {
|
|
||||||
squeeze_set: squeeze_set.map(|c| c as usize).collect(),
|
|
||||||
complement,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SymbolTranslator for SqueezeOperation {
|
|
||||||
fn translate(&self, c: char, prev_c: char) -> Option<char> {
|
|
||||||
if prev_c == c && self.complement != self.squeeze_set.contains(c as usize) {
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(c)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct DeleteAndSqueezeOperation {
|
|
||||||
delete_set: BitSet,
|
|
||||||
squeeze_set: BitSet,
|
|
||||||
complement: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl DeleteAndSqueezeOperation {
|
|
||||||
fn new(
|
|
||||||
delete_set: ExpandSet,
|
|
||||||
squeeze_set: ExpandSet,
|
|
||||||
complement: bool,
|
|
||||||
) -> DeleteAndSqueezeOperation {
|
|
||||||
DeleteAndSqueezeOperation {
|
|
||||||
delete_set: delete_set.map(|c| c as usize).collect(),
|
|
||||||
squeeze_set: squeeze_set.map(|c| c as usize).collect(),
|
|
||||||
complement,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SymbolTranslator for DeleteAndSqueezeOperation {
|
|
||||||
fn translate(&self, c: char, prev_c: char) -> Option<char> {
|
|
||||||
if self.complement != self.delete_set.contains(c as usize)
|
|
||||||
|| prev_c == c && self.squeeze_set.contains(c as usize)
|
|
||||||
{
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(c)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct TranslateOperation {
|
|
||||||
translate_map: FnvHashMap<usize, char>,
|
|
||||||
complement: bool,
|
|
||||||
s2_last: char,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TranslateOperation {
|
|
||||||
fn new(
|
|
||||||
set1: ExpandSet,
|
|
||||||
set2: &mut ExpandSet,
|
|
||||||
truncate: bool,
|
|
||||||
complement: bool,
|
|
||||||
) -> TranslateOperation {
|
|
||||||
let mut map = FnvHashMap::default();
|
|
||||||
let mut s2_prev = '_';
|
|
||||||
for i in set1 {
|
|
||||||
let s2_next = set2.next();
|
|
||||||
|
|
||||||
if s2_next.is_none() && truncate {
|
|
||||||
map.insert(i as usize, i);
|
|
||||||
} else {
|
|
||||||
s2_prev = s2_next.unwrap_or(s2_prev);
|
|
||||||
map.insert(i as usize, s2_prev);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
TranslateOperation {
|
|
||||||
translate_map: map,
|
|
||||||
complement,
|
|
||||||
s2_last: set2.last().unwrap_or(s2_prev),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SymbolTranslator for TranslateOperation {
|
|
||||||
fn translate(&self, c: char, _prev_c: char) -> Option<char> {
|
|
||||||
if self.complement {
|
|
||||||
Some(if self.translate_map.contains_key(&(c as usize)) {
|
|
||||||
c
|
|
||||||
} else {
|
|
||||||
self.s2_last
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
Some(*self.translate_map.get(&(c as usize)).unwrap_or(&c))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct TranslateAndSqueezeOperation {
|
|
||||||
translate: TranslateOperation,
|
|
||||||
squeeze: SqueezeOperation,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TranslateAndSqueezeOperation {
|
|
||||||
fn new(sets: Vec<String>, truncate: bool, complement: bool) -> TranslateAndSqueezeOperation {
|
|
||||||
let set1 = ExpandSet::new(sets[0].as_ref());
|
|
||||||
let set1_ = ExpandSet::new(sets[0].as_ref());
|
|
||||||
let mut set2 = ExpandSet::new(sets[1].as_ref());
|
|
||||||
let set2_ = ExpandSet::new(sets[1].as_ref());
|
|
||||||
TranslateAndSqueezeOperation {
|
|
||||||
translate: TranslateOperation::new(set1, &mut set2, truncate, complement),
|
|
||||||
squeeze: SqueezeOperation::new(if complement { set1_ } else { set2_ }, complement),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SymbolTranslator for TranslateAndSqueezeOperation {
|
|
||||||
fn translate(&self, c: char, prev_c: char) -> Option<char> {
|
|
||||||
// `unwrap()` will never panic because `Translate.translate()`
|
|
||||||
// always returns `Some`.
|
|
||||||
self.squeeze
|
|
||||||
.translate(self.translate.translate(c, 0 as char).unwrap(), prev_c)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn translate_input<T: SymbolTranslator>(
|
|
||||||
input: &mut dyn BufRead,
|
|
||||||
output: &mut dyn Write,
|
|
||||||
translator: T,
|
|
||||||
) {
|
|
||||||
let mut buf = String::with_capacity(BUFFER_LEN + 4);
|
|
||||||
let mut output_buf = String::with_capacity(BUFFER_LEN + 4);
|
|
||||||
|
|
||||||
while let Ok(length) = input.read_line(&mut buf) {
|
|
||||||
let mut prev_c = 0 as char;
|
|
||||||
if length == 0 {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
{
|
|
||||||
// isolation to make borrow checker happy
|
|
||||||
let filtered = buf.chars().filter_map(|c| {
|
|
||||||
let res = translator.translate(c, prev_c);
|
|
||||||
// Set `prev_c` to the post-translate character. This
|
|
||||||
// allows the squeeze operation to correctly function
|
|
||||||
// after the translate operation.
|
|
||||||
if let Some(rc) = res {
|
|
||||||
prev_c = rc;
|
|
||||||
}
|
|
||||||
res
|
|
||||||
});
|
|
||||||
|
|
||||||
output_buf.extend(filtered);
|
|
||||||
output.write_all(output_buf.as_bytes()).unwrap();
|
|
||||||
}
|
|
||||||
buf.clear();
|
|
||||||
output_buf.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_usage() -> String {
|
fn get_usage() -> String {
|
||||||
format!("{} [OPTION]... SET1 [SET2]", executable!())
|
format!("{} [OPTION]... SET1 [SET2]", executable!())
|
||||||
}
|
}
|
||||||
|
@ -280,12 +92,19 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
|
|
||||||
if delete_flag {
|
if delete_flag {
|
||||||
if squeeze_flag {
|
if squeeze_flag {
|
||||||
let op = DeleteAndSqueezeOperation::new(
|
let mut delete_buffer = vec![];
|
||||||
ExpandSet::new(sets[0].as_ref()),
|
{
|
||||||
ExpandSet::new(sets[1].as_ref()),
|
let mut delete_writer = BufWriter::new(&mut delete_buffer);
|
||||||
complement_flag,
|
let delete_op =
|
||||||
);
|
DeleteOperationNew::new(Sequence::parse_set_string(&sets[0]), complement_flag);
|
||||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
translate_input_new(&mut locked_stdin, &mut delete_writer, delete_op);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let mut squeeze_reader = BufReader::new(delete_buffer.as_bytes());
|
||||||
|
let squeeze_op =
|
||||||
|
SqueezeOperationNew::new(Sequence::parse_set_string(&sets[1]), complement_flag);
|
||||||
|
translate_input_new(&mut squeeze_reader, &mut buffered_stdout, squeeze_op);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
let op = DeleteOperationNew::new(Sequence::parse_set_string(&sets[0]), complement_flag);
|
let op = DeleteOperationNew::new(Sequence::parse_set_string(&sets[0]), complement_flag);
|
||||||
translate_input_new(&mut locked_stdin, &mut buffered_stdout, op);
|
translate_input_new(&mut locked_stdin, &mut buffered_stdout, op);
|
||||||
|
@ -294,10 +113,26 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
if sets.len() < 2 {
|
if sets.len() < 2 {
|
||||||
let op =
|
let op =
|
||||||
SqueezeOperationNew::new(Sequence::parse_set_string(&sets[0]), complement_flag);
|
SqueezeOperationNew::new(Sequence::parse_set_string(&sets[0]), complement_flag);
|
||||||
|
|
||||||
translate_input_new(&mut locked_stdin, &mut buffered_stdout, op);
|
translate_input_new(&mut locked_stdin, &mut buffered_stdout, op);
|
||||||
} else {
|
} else {
|
||||||
let op = TranslateAndSqueezeOperation::new(sets, truncate_set1_flag, complement_flag);
|
let mut translate_buffer = vec![];
|
||||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
{
|
||||||
|
let mut writer = BufWriter::new(&mut translate_buffer);
|
||||||
|
let translate_op = TranslateOperationNew::new(
|
||||||
|
Sequence::parse_set_string(&sets[0]),
|
||||||
|
Sequence::parse_set_string(&sets[1]),
|
||||||
|
truncate_set1_flag,
|
||||||
|
complement_flag,
|
||||||
|
);
|
||||||
|
translate_input_new(&mut locked_stdin, &mut writer, translate_op);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let mut reader = BufReader::new(translate_buffer.as_bytes());
|
||||||
|
let squeeze_op =
|
||||||
|
SqueezeOperationNew::new(Sequence::parse_set_string(&sets[1]), false);
|
||||||
|
translate_input_new(&mut reader, &mut buffered_stdout, squeeze_op);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let op = TranslateOperationNew::new(
|
let op = TranslateOperationNew::new(
|
||||||
|
|
Loading…
Reference in a new issue