mirror of
https://github.com/uutils/coreutils
synced 2024-12-18 00:53:25 +00:00
Condensed many of the weird stuff in tr in a function...passes more GNU tests
Signed-off-by: Hanif Bin Ariffin <hanif.ariffin.4326@gmail.com>
This commit is contained in:
parent
b7a0ad15a7
commit
5a0870bb30
2 changed files with 156 additions and 133 deletions
|
@ -26,44 +26,6 @@ mod unicode_table {
|
|||
pub static BLANK: &'static [char] = &[SPACE, HT];
|
||||
}
|
||||
|
||||
struct Repeat(char);
|
||||
|
||||
impl Repeat {
|
||||
fn new(element: char) -> Repeat {
|
||||
Repeat(element)
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Repeat {
|
||||
type Item = char;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
Some(self.0)
|
||||
}
|
||||
|
||||
fn last(self) -> Option<Self::Item> {
|
||||
Some(self.0)
|
||||
}
|
||||
|
||||
fn any<F>(&mut self, mut f: F) -> bool
|
||||
where
|
||||
Self: Sized,
|
||||
F: FnMut(Self::Item) -> bool,
|
||||
{
|
||||
f(self.0)
|
||||
}
|
||||
}
|
||||
|
||||
fn truncate_iterator<T>(input: Option<usize>) -> impl Fn((usize, T)) -> Option<T> {
|
||||
move |(idx, c)| match input {
|
||||
Some(s) => match s.cmp(&idx) {
|
||||
std::cmp::Ordering::Greater => Some(c),
|
||||
_ => None,
|
||||
},
|
||||
None => Some(c),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum Sequence {
|
||||
Char(char),
|
||||
|
@ -89,8 +51,8 @@ impl Sequence {
|
|||
match self {
|
||||
Sequence::Char(c) => Box::new(std::iter::once(*c)),
|
||||
Sequence::CharRange(l, r) => Box::new((*l..=*r).flat_map(char::from_u32)),
|
||||
Sequence::CharStar(c) => Box::new(Repeat::new(*c)),
|
||||
Sequence::CharRepeat(c, n) => Box::new(Repeat::new(*c).take(*n)),
|
||||
Sequence::CharStar(c) => Box::new(std::iter::repeat(*c)),
|
||||
Sequence::CharRepeat(c, n) => Box::new(std::iter::repeat(*c).take(*n)),
|
||||
Sequence::Alnum => Box::new(('0'..='9').chain('A'..='Z').chain('a'..='z')),
|
||||
Sequence::Alpha => Box::new(('A'..='Z').chain('a'..='z')),
|
||||
Sequence::Blank => Box::new(unicode_table::BLANK.into_iter().cloned()),
|
||||
|
@ -140,22 +102,99 @@ impl Sequence {
|
|||
pub fn last(&self) -> Option<char> {
|
||||
match self {
|
||||
Sequence::CharStar(c) => Some(*c),
|
||||
// TODO: Can be optimized further...
|
||||
rest => rest.flatten().last(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn len(&self) -> Option<usize> {
|
||||
match self {
|
||||
Sequence::CharStar(_) => None,
|
||||
// TODO: Is there a fix for this?
|
||||
rest => Some(rest.flatten().count()),
|
||||
// Hide all the nasty sh*t in here
|
||||
pub fn solve_set_characters(
|
||||
set1: &Vec<Sequence>,
|
||||
set2: &Vec<Sequence>,
|
||||
) -> Result<(Vec<char>, Vec<char>), String> {
|
||||
let is_char_star = |s: &&Sequence| -> bool {
|
||||
match s {
|
||||
Sequence::CharStar(_) => true,
|
||||
_ => false,
|
||||
}
|
||||
};
|
||||
let set1_star_count = set1.iter().filter(is_char_star).count();
|
||||
if set1_star_count == 0 {
|
||||
let set2_star_count = set2.iter().filter(is_char_star).count();
|
||||
if set2_star_count < 2 {
|
||||
let char_star = set2.iter().find_map(|s| match s {
|
||||
Sequence::CharStar(c) => Some(c),
|
||||
_ => None,
|
||||
});
|
||||
let mut partition = set2.as_slice().split(|s| match s {
|
||||
Sequence::CharStar(_) => true,
|
||||
_ => false,
|
||||
});
|
||||
let set1_len = set1.iter().flat_map(Sequence::flatten).count();
|
||||
let set2_len = set2
|
||||
.iter()
|
||||
.filter_map(|s| match s {
|
||||
Sequence::CharStar(_) => None,
|
||||
r => Some(r),
|
||||
})
|
||||
.flat_map(Sequence::flatten)
|
||||
.count();
|
||||
let star_compensate_len = set1_len.saturating_sub(set2_len);
|
||||
let set2_solved = match (partition.next(), partition.next()) {
|
||||
(None, None) => match char_star {
|
||||
Some(c) => std::iter::repeat(*c).take(star_compensate_len).collect(),
|
||||
None => std::iter::empty().collect(),
|
||||
},
|
||||
(None, Some(set2_b)) => {
|
||||
if let Some(c) = char_star {
|
||||
std::iter::repeat(*c)
|
||||
.take(star_compensate_len)
|
||||
.chain(set2_b.iter().flat_map(Sequence::flatten))
|
||||
.collect()
|
||||
} else {
|
||||
set2_b.iter().flat_map(Sequence::flatten).collect()
|
||||
}
|
||||
}
|
||||
(Some(set2_a), None) => match char_star {
|
||||
Some(c) => set2_a
|
||||
.iter()
|
||||
.flat_map(Sequence::flatten)
|
||||
.chain(std::iter::repeat(*c).take(star_compensate_len))
|
||||
.collect(),
|
||||
None => set2_a.iter().flat_map(Sequence::flatten).collect(),
|
||||
},
|
||||
(Some(set2_a), Some(set2_b)) => match char_star {
|
||||
Some(c) => set2_a
|
||||
.iter()
|
||||
.flat_map(Sequence::flatten)
|
||||
.chain(std::iter::repeat(*c).take(star_compensate_len))
|
||||
.chain(set2_b.iter().flat_map(Sequence::flatten))
|
||||
.collect(),
|
||||
None => set2_a
|
||||
.iter()
|
||||
.chain(set2_b.iter())
|
||||
.flat_map(Sequence::flatten)
|
||||
.collect(),
|
||||
},
|
||||
};
|
||||
let set1_solved = set1.iter().flat_map(Sequence::flatten).collect();
|
||||
return Ok((set1_solved, set2_solved));
|
||||
} else {
|
||||
Err(format!(
|
||||
"{}: only one [c*] repeat construct may appear in string2",
|
||||
executable!()
|
||||
))
|
||||
}
|
||||
} else {
|
||||
Err(format!(
|
||||
"{}: the [c*] repeat construct may not appear in string1",
|
||||
executable!()
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Sequence {
|
||||
pub fn parse_set_string(input: &str) -> Vec<Sequence> {
|
||||
pub fn from_str(input: &str) -> Vec<Sequence> {
|
||||
many0(alt((
|
||||
alt((
|
||||
Sequence::parse_char_range_octal_leftright,
|
||||
|
@ -385,28 +424,20 @@ impl SymbolTranslator for DeleteOperation {
|
|||
|
||||
pub struct TranslateOperationComplement {
|
||||
iter: u32,
|
||||
set2_iter: usize,
|
||||
set1: Vec<char>,
|
||||
set2: Box<dyn Iterator<Item = char>>,
|
||||
set2: Vec<char>,
|
||||
fallback: char,
|
||||
translation_map: HashMap<char, char>,
|
||||
}
|
||||
|
||||
impl TranslateOperationComplement {
|
||||
fn new(
|
||||
set1: Vec<Sequence>,
|
||||
set2: Vec<Sequence>,
|
||||
set1_truncate_length: Option<usize>,
|
||||
fallback: char,
|
||||
) -> TranslateOperationComplement {
|
||||
fn new(set1: Vec<char>, set2: Vec<char>, fallback: char) -> TranslateOperationComplement {
|
||||
TranslateOperationComplement {
|
||||
iter: 0,
|
||||
set1: set1
|
||||
.iter()
|
||||
.flat_map(Sequence::flatten)
|
||||
.enumerate()
|
||||
.filter_map(truncate_iterator(set1_truncate_length))
|
||||
.collect(),
|
||||
set2: Box::new(set2.into_iter().flat_map(|c| Sequence::flatten(&c))),
|
||||
set2_iter: 0,
|
||||
set1,
|
||||
set2,
|
||||
fallback,
|
||||
translation_map: HashMap::new(),
|
||||
}
|
||||
|
@ -419,23 +450,11 @@ pub struct TranslateOperationStandard {
|
|||
}
|
||||
|
||||
impl TranslateOperationStandard {
|
||||
fn new(
|
||||
set1: Vec<Sequence>,
|
||||
set2: Vec<Sequence>,
|
||||
set1_truncate_length: Option<usize>,
|
||||
fallback: char,
|
||||
) -> TranslateOperationStandard {
|
||||
fn new(set1: Vec<char>, set2: Vec<char>, fallback: char) -> TranslateOperationStandard {
|
||||
TranslateOperationStandard {
|
||||
translation_map: set1
|
||||
.iter()
|
||||
.flat_map(Sequence::flatten)
|
||||
.zip(
|
||||
set2.iter()
|
||||
.flat_map(Sequence::flatten)
|
||||
.chain(Repeat(fallback)),
|
||||
)
|
||||
.enumerate()
|
||||
.filter_map(truncate_iterator(set1_truncate_length))
|
||||
.into_iter()
|
||||
.zip(set2.into_iter().chain(std::iter::repeat(fallback)))
|
||||
.collect::<HashMap<_, _>>(),
|
||||
}
|
||||
}
|
||||
|
@ -461,40 +480,27 @@ impl TranslateOperation {
|
|||
pub fn new(
|
||||
set1: Vec<Sequence>,
|
||||
set2: Vec<Sequence>,
|
||||
truncate_set1: bool,
|
||||
truncate_set1_flag: bool,
|
||||
complement: bool,
|
||||
) -> TranslateOperation {
|
||||
let fallback = set2
|
||||
.iter()
|
||||
.rev()
|
||||
.next()
|
||||
.map(Sequence::last)
|
||||
.flatten()
|
||||
.unwrap();
|
||||
let set1_truncate_length = if truncate_set1 {
|
||||
set2.iter()
|
||||
.map(Sequence::len)
|
||||
.reduce(|a, b| match (a, b) {
|
||||
(Some(l), Some(r)) => Some(l + r),
|
||||
_ => None,
|
||||
})
|
||||
.flatten()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
) -> Result<TranslateOperation, String> {
|
||||
let (mut set1_solved, set2_solved) = Sequence::solve_set_characters(&set1, &set2)?;
|
||||
if truncate_set1_flag {
|
||||
set1_solved.truncate(set2_solved.len());
|
||||
}
|
||||
let fallback = set2.iter().map(Sequence::last).last().flatten().expect(
|
||||
format!(
|
||||
"{}: when not truncating set1, string2 must be non-empty",
|
||||
executable!()
|
||||
)
|
||||
.as_str(),
|
||||
);
|
||||
if complement {
|
||||
TranslateOperation::Complement(TranslateOperationComplement::new(
|
||||
set1,
|
||||
set2,
|
||||
set1_truncate_length,
|
||||
fallback,
|
||||
Ok(TranslateOperation::Complement(
|
||||
TranslateOperationComplement::new(set1_solved, set2_solved, fallback),
|
||||
))
|
||||
} else {
|
||||
TranslateOperation::Standard(TranslateOperationStandard::new(
|
||||
set1,
|
||||
set2,
|
||||
set1_truncate_length,
|
||||
fallback,
|
||||
Ok(TranslateOperation::Standard(
|
||||
TranslateOperationStandard::new(set1_solved, set2_solved, fallback),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
@ -511,6 +517,7 @@ impl SymbolTranslator for TranslateOperation {
|
|||
),
|
||||
TranslateOperation::Complement(TranslateOperationComplement {
|
||||
iter,
|
||||
set2_iter,
|
||||
set1,
|
||||
set2,
|
||||
fallback,
|
||||
|
@ -525,11 +532,12 @@ impl SymbolTranslator for TranslateOperation {
|
|||
Some(*c)
|
||||
} else {
|
||||
while translation_map.get(¤t).is_none() {
|
||||
if let Some(p) = set2.next() {
|
||||
let (next_index, next_value) =
|
||||
if let Some(value) = set2.get(*set2_iter) {
|
||||
let (next_iter, next_key) =
|
||||
TranslateOperation::next_complement_char(*iter, &*set1);
|
||||
*iter = next_index;
|
||||
translation_map.insert(next_value, p);
|
||||
*iter = next_iter;
|
||||
*set2_iter = set2_iter.saturating_add(1);
|
||||
translation_map.insert(next_key, *value);
|
||||
} else {
|
||||
translation_map.insert(current, *fallback);
|
||||
}
|
||||
|
@ -622,9 +630,7 @@ fn test_parse_octal() {
|
|||
for a in '0'..='7' {
|
||||
for b in '0'..='7' {
|
||||
for c in '0'..='7' {
|
||||
assert!(
|
||||
Sequence::parse_set_string(format!("\\{}{}{}", a, b, c).as_str()).len() == 1
|
||||
);
|
||||
assert!(Sequence::from_str(format!("\\{}{}{}", a, b, c).as_str()).len() == 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -69,7 +69,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
|||
|
||||
if sets.is_empty() {
|
||||
show_error!(
|
||||
"missing operand\nTry `{} --help` for more information.",
|
||||
"missing operand\nTry '{} --help' for more information.",
|
||||
executable!()
|
||||
);
|
||||
return 1;
|
||||
|
@ -77,7 +77,16 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
|||
|
||||
if !(delete_flag || squeeze_flag) && sets.len() < 2 {
|
||||
show_error!(
|
||||
"missing operand after '{}'\nTry `{} --help` for more information.",
|
||||
"missing operand after '{}'\nTry '{} --help' for more information.",
|
||||
sets[0],
|
||||
executable!()
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if sets.len() > 2 {
|
||||
show_error!(
|
||||
"extra operand '{}'\nTry '{} --help' for more information.",
|
||||
sets[0],
|
||||
executable!()
|
||||
);
|
||||
|
@ -95,50 +104,58 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
|||
let mut delete_buffer = vec![];
|
||||
{
|
||||
let mut delete_writer = BufWriter::new(&mut delete_buffer);
|
||||
let delete_op =
|
||||
DeleteOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag);
|
||||
let delete_op = DeleteOperation::new(Sequence::from_str(&sets[0]), complement_flag);
|
||||
translate_input(&mut locked_stdin, &mut delete_writer, delete_op);
|
||||
}
|
||||
{
|
||||
let mut squeeze_reader = BufReader::new(delete_buffer.as_bytes());
|
||||
let squeeze_op =
|
||||
SqueezeOperation::new(Sequence::parse_set_string(&sets[1]), complement_flag);
|
||||
translate_input(&mut squeeze_reader, &mut buffered_stdout, squeeze_op);
|
||||
let op = SqueezeOperation::new(Sequence::from_str(&sets[1]), complement_flag);
|
||||
translate_input(&mut squeeze_reader, &mut buffered_stdout, op);
|
||||
}
|
||||
} else {
|
||||
let op = DeleteOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag);
|
||||
let op = DeleteOperation::new(Sequence::from_str(&sets[0]), complement_flag);
|
||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||
}
|
||||
} else if squeeze_flag {
|
||||
if sets.len() < 2 {
|
||||
let op = SqueezeOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag);
|
||||
let op = SqueezeOperation::new(Sequence::from_str(&sets[0]), complement_flag);
|
||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||
} else {
|
||||
let mut translate_buffer = vec![];
|
||||
{
|
||||
let mut writer = BufWriter::new(&mut translate_buffer);
|
||||
let translate_op = TranslateOperation::new(
|
||||
Sequence::parse_set_string(&sets[0]),
|
||||
Sequence::parse_set_string(&sets[1]),
|
||||
match TranslateOperation::new(
|
||||
Sequence::from_str(&sets[0]),
|
||||
Sequence::from_str(&sets[1]),
|
||||
truncate_set1_flag,
|
||||
complement_flag,
|
||||
);
|
||||
translate_input(&mut locked_stdin, &mut writer, translate_op);
|
||||
) {
|
||||
Ok(op) => translate_input(&mut locked_stdin, &mut writer, op),
|
||||
Err(s) => {
|
||||
show_error!("{}", s);
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
}
|
||||
{
|
||||
let mut reader = BufReader::new(translate_buffer.as_bytes());
|
||||
let squeeze_op = SqueezeOperation::new(Sequence::parse_set_string(&sets[1]), false);
|
||||
let squeeze_op = SqueezeOperation::new(Sequence::from_str(&sets[1]), false);
|
||||
translate_input(&mut reader, &mut buffered_stdout, squeeze_op);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let op = TranslateOperation::new(
|
||||
Sequence::parse_set_string(&sets[0]),
|
||||
Sequence::parse_set_string(&sets[1]),
|
||||
match TranslateOperation::new(
|
||||
Sequence::from_str(&sets[0]),
|
||||
Sequence::from_str(&sets[1]),
|
||||
truncate_set1_flag,
|
||||
complement_flag,
|
||||
);
|
||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||
) {
|
||||
Ok(op) => translate_input(&mut locked_stdin, &mut buffered_stdout, op),
|
||||
Err(s) => {
|
||||
show_error!("{}", s);
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
0
|
||||
|
|
Loading…
Reference in a new issue