mirror of
https://github.com/uutils/coreutils
synced 2024-12-18 09:03:14 +00:00
Condensed many of the weird stuff in tr in a function...passes more GNU tests
Signed-off-by: Hanif Bin Ariffin <hanif.ariffin.4326@gmail.com>
This commit is contained in:
parent
b7a0ad15a7
commit
5a0870bb30
2 changed files with 156 additions and 133 deletions
|
@ -26,44 +26,6 @@ mod unicode_table {
|
||||||
pub static BLANK: &'static [char] = &[SPACE, HT];
|
pub static BLANK: &'static [char] = &[SPACE, HT];
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Repeat(char);
|
|
||||||
|
|
||||||
impl Repeat {
|
|
||||||
fn new(element: char) -> Repeat {
|
|
||||||
Repeat(element)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Iterator for Repeat {
|
|
||||||
type Item = char;
|
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
|
||||||
Some(self.0)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn last(self) -> Option<Self::Item> {
|
|
||||||
Some(self.0)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn any<F>(&mut self, mut f: F) -> bool
|
|
||||||
where
|
|
||||||
Self: Sized,
|
|
||||||
F: FnMut(Self::Item) -> bool,
|
|
||||||
{
|
|
||||||
f(self.0)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn truncate_iterator<T>(input: Option<usize>) -> impl Fn((usize, T)) -> Option<T> {
|
|
||||||
move |(idx, c)| match input {
|
|
||||||
Some(s) => match s.cmp(&idx) {
|
|
||||||
std::cmp::Ordering::Greater => Some(c),
|
|
||||||
_ => None,
|
|
||||||
},
|
|
||||||
None => Some(c),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
pub enum Sequence {
|
pub enum Sequence {
|
||||||
Char(char),
|
Char(char),
|
||||||
|
@ -89,8 +51,8 @@ impl Sequence {
|
||||||
match self {
|
match self {
|
||||||
Sequence::Char(c) => Box::new(std::iter::once(*c)),
|
Sequence::Char(c) => Box::new(std::iter::once(*c)),
|
||||||
Sequence::CharRange(l, r) => Box::new((*l..=*r).flat_map(char::from_u32)),
|
Sequence::CharRange(l, r) => Box::new((*l..=*r).flat_map(char::from_u32)),
|
||||||
Sequence::CharStar(c) => Box::new(Repeat::new(*c)),
|
Sequence::CharStar(c) => Box::new(std::iter::repeat(*c)),
|
||||||
Sequence::CharRepeat(c, n) => Box::new(Repeat::new(*c).take(*n)),
|
Sequence::CharRepeat(c, n) => Box::new(std::iter::repeat(*c).take(*n)),
|
||||||
Sequence::Alnum => Box::new(('0'..='9').chain('A'..='Z').chain('a'..='z')),
|
Sequence::Alnum => Box::new(('0'..='9').chain('A'..='Z').chain('a'..='z')),
|
||||||
Sequence::Alpha => Box::new(('A'..='Z').chain('a'..='z')),
|
Sequence::Alpha => Box::new(('A'..='Z').chain('a'..='z')),
|
||||||
Sequence::Blank => Box::new(unicode_table::BLANK.into_iter().cloned()),
|
Sequence::Blank => Box::new(unicode_table::BLANK.into_iter().cloned()),
|
||||||
|
@ -140,22 +102,99 @@ impl Sequence {
|
||||||
pub fn last(&self) -> Option<char> {
|
pub fn last(&self) -> Option<char> {
|
||||||
match self {
|
match self {
|
||||||
Sequence::CharStar(c) => Some(*c),
|
Sequence::CharStar(c) => Some(*c),
|
||||||
// TODO: Can be optimized further...
|
|
||||||
rest => rest.flatten().last(),
|
rest => rest.flatten().last(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn len(&self) -> Option<usize> {
|
// Hide all the nasty sh*t in here
|
||||||
match self {
|
pub fn solve_set_characters(
|
||||||
Sequence::CharStar(_) => None,
|
set1: &Vec<Sequence>,
|
||||||
// TODO: Is there a fix for this?
|
set2: &Vec<Sequence>,
|
||||||
rest => Some(rest.flatten().count()),
|
) -> Result<(Vec<char>, Vec<char>), String> {
|
||||||
|
let is_char_star = |s: &&Sequence| -> bool {
|
||||||
|
match s {
|
||||||
|
Sequence::CharStar(_) => true,
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let set1_star_count = set1.iter().filter(is_char_star).count();
|
||||||
|
if set1_star_count == 0 {
|
||||||
|
let set2_star_count = set2.iter().filter(is_char_star).count();
|
||||||
|
if set2_star_count < 2 {
|
||||||
|
let char_star = set2.iter().find_map(|s| match s {
|
||||||
|
Sequence::CharStar(c) => Some(c),
|
||||||
|
_ => None,
|
||||||
|
});
|
||||||
|
let mut partition = set2.as_slice().split(|s| match s {
|
||||||
|
Sequence::CharStar(_) => true,
|
||||||
|
_ => false,
|
||||||
|
});
|
||||||
|
let set1_len = set1.iter().flat_map(Sequence::flatten).count();
|
||||||
|
let set2_len = set2
|
||||||
|
.iter()
|
||||||
|
.filter_map(|s| match s {
|
||||||
|
Sequence::CharStar(_) => None,
|
||||||
|
r => Some(r),
|
||||||
|
})
|
||||||
|
.flat_map(Sequence::flatten)
|
||||||
|
.count();
|
||||||
|
let star_compensate_len = set1_len.saturating_sub(set2_len);
|
||||||
|
let set2_solved = match (partition.next(), partition.next()) {
|
||||||
|
(None, None) => match char_star {
|
||||||
|
Some(c) => std::iter::repeat(*c).take(star_compensate_len).collect(),
|
||||||
|
None => std::iter::empty().collect(),
|
||||||
|
},
|
||||||
|
(None, Some(set2_b)) => {
|
||||||
|
if let Some(c) = char_star {
|
||||||
|
std::iter::repeat(*c)
|
||||||
|
.take(star_compensate_len)
|
||||||
|
.chain(set2_b.iter().flat_map(Sequence::flatten))
|
||||||
|
.collect()
|
||||||
|
} else {
|
||||||
|
set2_b.iter().flat_map(Sequence::flatten).collect()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(Some(set2_a), None) => match char_star {
|
||||||
|
Some(c) => set2_a
|
||||||
|
.iter()
|
||||||
|
.flat_map(Sequence::flatten)
|
||||||
|
.chain(std::iter::repeat(*c).take(star_compensate_len))
|
||||||
|
.collect(),
|
||||||
|
None => set2_a.iter().flat_map(Sequence::flatten).collect(),
|
||||||
|
},
|
||||||
|
(Some(set2_a), Some(set2_b)) => match char_star {
|
||||||
|
Some(c) => set2_a
|
||||||
|
.iter()
|
||||||
|
.flat_map(Sequence::flatten)
|
||||||
|
.chain(std::iter::repeat(*c).take(star_compensate_len))
|
||||||
|
.chain(set2_b.iter().flat_map(Sequence::flatten))
|
||||||
|
.collect(),
|
||||||
|
None => set2_a
|
||||||
|
.iter()
|
||||||
|
.chain(set2_b.iter())
|
||||||
|
.flat_map(Sequence::flatten)
|
||||||
|
.collect(),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
let set1_solved = set1.iter().flat_map(Sequence::flatten).collect();
|
||||||
|
return Ok((set1_solved, set2_solved));
|
||||||
|
} else {
|
||||||
|
Err(format!(
|
||||||
|
"{}: only one [c*] repeat construct may appear in string2",
|
||||||
|
executable!()
|
||||||
|
))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Err(format!(
|
||||||
|
"{}: the [c*] repeat construct may not appear in string1",
|
||||||
|
executable!()
|
||||||
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Sequence {
|
impl Sequence {
|
||||||
pub fn parse_set_string(input: &str) -> Vec<Sequence> {
|
pub fn from_str(input: &str) -> Vec<Sequence> {
|
||||||
many0(alt((
|
many0(alt((
|
||||||
alt((
|
alt((
|
||||||
Sequence::parse_char_range_octal_leftright,
|
Sequence::parse_char_range_octal_leftright,
|
||||||
|
@ -385,28 +424,20 @@ impl SymbolTranslator for DeleteOperation {
|
||||||
|
|
||||||
pub struct TranslateOperationComplement {
|
pub struct TranslateOperationComplement {
|
||||||
iter: u32,
|
iter: u32,
|
||||||
|
set2_iter: usize,
|
||||||
set1: Vec<char>,
|
set1: Vec<char>,
|
||||||
set2: Box<dyn Iterator<Item = char>>,
|
set2: Vec<char>,
|
||||||
fallback: char,
|
fallback: char,
|
||||||
translation_map: HashMap<char, char>,
|
translation_map: HashMap<char, char>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TranslateOperationComplement {
|
impl TranslateOperationComplement {
|
||||||
fn new(
|
fn new(set1: Vec<char>, set2: Vec<char>, fallback: char) -> TranslateOperationComplement {
|
||||||
set1: Vec<Sequence>,
|
|
||||||
set2: Vec<Sequence>,
|
|
||||||
set1_truncate_length: Option<usize>,
|
|
||||||
fallback: char,
|
|
||||||
) -> TranslateOperationComplement {
|
|
||||||
TranslateOperationComplement {
|
TranslateOperationComplement {
|
||||||
iter: 0,
|
iter: 0,
|
||||||
set1: set1
|
set2_iter: 0,
|
||||||
.iter()
|
set1,
|
||||||
.flat_map(Sequence::flatten)
|
set2,
|
||||||
.enumerate()
|
|
||||||
.filter_map(truncate_iterator(set1_truncate_length))
|
|
||||||
.collect(),
|
|
||||||
set2: Box::new(set2.into_iter().flat_map(|c| Sequence::flatten(&c))),
|
|
||||||
fallback,
|
fallback,
|
||||||
translation_map: HashMap::new(),
|
translation_map: HashMap::new(),
|
||||||
}
|
}
|
||||||
|
@ -419,23 +450,11 @@ pub struct TranslateOperationStandard {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TranslateOperationStandard {
|
impl TranslateOperationStandard {
|
||||||
fn new(
|
fn new(set1: Vec<char>, set2: Vec<char>, fallback: char) -> TranslateOperationStandard {
|
||||||
set1: Vec<Sequence>,
|
|
||||||
set2: Vec<Sequence>,
|
|
||||||
set1_truncate_length: Option<usize>,
|
|
||||||
fallback: char,
|
|
||||||
) -> TranslateOperationStandard {
|
|
||||||
TranslateOperationStandard {
|
TranslateOperationStandard {
|
||||||
translation_map: set1
|
translation_map: set1
|
||||||
.iter()
|
.into_iter()
|
||||||
.flat_map(Sequence::flatten)
|
.zip(set2.into_iter().chain(std::iter::repeat(fallback)))
|
||||||
.zip(
|
|
||||||
set2.iter()
|
|
||||||
.flat_map(Sequence::flatten)
|
|
||||||
.chain(Repeat(fallback)),
|
|
||||||
)
|
|
||||||
.enumerate()
|
|
||||||
.filter_map(truncate_iterator(set1_truncate_length))
|
|
||||||
.collect::<HashMap<_, _>>(),
|
.collect::<HashMap<_, _>>(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -461,40 +480,27 @@ impl TranslateOperation {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
set1: Vec<Sequence>,
|
set1: Vec<Sequence>,
|
||||||
set2: Vec<Sequence>,
|
set2: Vec<Sequence>,
|
||||||
truncate_set1: bool,
|
truncate_set1_flag: bool,
|
||||||
complement: bool,
|
complement: bool,
|
||||||
) -> TranslateOperation {
|
) -> Result<TranslateOperation, String> {
|
||||||
let fallback = set2
|
let (mut set1_solved, set2_solved) = Sequence::solve_set_characters(&set1, &set2)?;
|
||||||
.iter()
|
if truncate_set1_flag {
|
||||||
.rev()
|
set1_solved.truncate(set2_solved.len());
|
||||||
.next()
|
}
|
||||||
.map(Sequence::last)
|
let fallback = set2.iter().map(Sequence::last).last().flatten().expect(
|
||||||
.flatten()
|
format!(
|
||||||
.unwrap();
|
"{}: when not truncating set1, string2 must be non-empty",
|
||||||
let set1_truncate_length = if truncate_set1 {
|
executable!()
|
||||||
set2.iter()
|
)
|
||||||
.map(Sequence::len)
|
.as_str(),
|
||||||
.reduce(|a, b| match (a, b) {
|
);
|
||||||
(Some(l), Some(r)) => Some(l + r),
|
|
||||||
_ => None,
|
|
||||||
})
|
|
||||||
.flatten()
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
if complement {
|
if complement {
|
||||||
TranslateOperation::Complement(TranslateOperationComplement::new(
|
Ok(TranslateOperation::Complement(
|
||||||
set1,
|
TranslateOperationComplement::new(set1_solved, set2_solved, fallback),
|
||||||
set2,
|
|
||||||
set1_truncate_length,
|
|
||||||
fallback,
|
|
||||||
))
|
))
|
||||||
} else {
|
} else {
|
||||||
TranslateOperation::Standard(TranslateOperationStandard::new(
|
Ok(TranslateOperation::Standard(
|
||||||
set1,
|
TranslateOperationStandard::new(set1_solved, set2_solved, fallback),
|
||||||
set2,
|
|
||||||
set1_truncate_length,
|
|
||||||
fallback,
|
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -511,6 +517,7 @@ impl SymbolTranslator for TranslateOperation {
|
||||||
),
|
),
|
||||||
TranslateOperation::Complement(TranslateOperationComplement {
|
TranslateOperation::Complement(TranslateOperationComplement {
|
||||||
iter,
|
iter,
|
||||||
|
set2_iter,
|
||||||
set1,
|
set1,
|
||||||
set2,
|
set2,
|
||||||
fallback,
|
fallback,
|
||||||
|
@ -525,11 +532,12 @@ impl SymbolTranslator for TranslateOperation {
|
||||||
Some(*c)
|
Some(*c)
|
||||||
} else {
|
} else {
|
||||||
while translation_map.get(¤t).is_none() {
|
while translation_map.get(¤t).is_none() {
|
||||||
if let Some(p) = set2.next() {
|
if let Some(value) = set2.get(*set2_iter) {
|
||||||
let (next_index, next_value) =
|
let (next_iter, next_key) =
|
||||||
TranslateOperation::next_complement_char(*iter, &*set1);
|
TranslateOperation::next_complement_char(*iter, &*set1);
|
||||||
*iter = next_index;
|
*iter = next_iter;
|
||||||
translation_map.insert(next_value, p);
|
*set2_iter = set2_iter.saturating_add(1);
|
||||||
|
translation_map.insert(next_key, *value);
|
||||||
} else {
|
} else {
|
||||||
translation_map.insert(current, *fallback);
|
translation_map.insert(current, *fallback);
|
||||||
}
|
}
|
||||||
|
@ -622,9 +630,7 @@ fn test_parse_octal() {
|
||||||
for a in '0'..='7' {
|
for a in '0'..='7' {
|
||||||
for b in '0'..='7' {
|
for b in '0'..='7' {
|
||||||
for c in '0'..='7' {
|
for c in '0'..='7' {
|
||||||
assert!(
|
assert!(Sequence::from_str(format!("\\{}{}{}", a, b, c).as_str()).len() == 1);
|
||||||
Sequence::parse_set_string(format!("\\{}{}{}", a, b, c).as_str()).len() == 1
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -69,7 +69,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
|
|
||||||
if sets.is_empty() {
|
if sets.is_empty() {
|
||||||
show_error!(
|
show_error!(
|
||||||
"missing operand\nTry `{} --help` for more information.",
|
"missing operand\nTry '{} --help' for more information.",
|
||||||
executable!()
|
executable!()
|
||||||
);
|
);
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -77,7 +77,16 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
|
|
||||||
if !(delete_flag || squeeze_flag) && sets.len() < 2 {
|
if !(delete_flag || squeeze_flag) && sets.len() < 2 {
|
||||||
show_error!(
|
show_error!(
|
||||||
"missing operand after '{}'\nTry `{} --help` for more information.",
|
"missing operand after '{}'\nTry '{} --help' for more information.",
|
||||||
|
sets[0],
|
||||||
|
executable!()
|
||||||
|
);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if sets.len() > 2 {
|
||||||
|
show_error!(
|
||||||
|
"extra operand '{}'\nTry '{} --help' for more information.",
|
||||||
sets[0],
|
sets[0],
|
||||||
executable!()
|
executable!()
|
||||||
);
|
);
|
||||||
|
@ -95,50 +104,58 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
let mut delete_buffer = vec![];
|
let mut delete_buffer = vec![];
|
||||||
{
|
{
|
||||||
let mut delete_writer = BufWriter::new(&mut delete_buffer);
|
let mut delete_writer = BufWriter::new(&mut delete_buffer);
|
||||||
let delete_op =
|
let delete_op = DeleteOperation::new(Sequence::from_str(&sets[0]), complement_flag);
|
||||||
DeleteOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag);
|
|
||||||
translate_input(&mut locked_stdin, &mut delete_writer, delete_op);
|
translate_input(&mut locked_stdin, &mut delete_writer, delete_op);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
let mut squeeze_reader = BufReader::new(delete_buffer.as_bytes());
|
let mut squeeze_reader = BufReader::new(delete_buffer.as_bytes());
|
||||||
let squeeze_op =
|
let op = SqueezeOperation::new(Sequence::from_str(&sets[1]), complement_flag);
|
||||||
SqueezeOperation::new(Sequence::parse_set_string(&sets[1]), complement_flag);
|
translate_input(&mut squeeze_reader, &mut buffered_stdout, op);
|
||||||
translate_input(&mut squeeze_reader, &mut buffered_stdout, squeeze_op);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let op = DeleteOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag);
|
let op = DeleteOperation::new(Sequence::from_str(&sets[0]), complement_flag);
|
||||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||||
}
|
}
|
||||||
} else if squeeze_flag {
|
} else if squeeze_flag {
|
||||||
if sets.len() < 2 {
|
if sets.len() < 2 {
|
||||||
let op = SqueezeOperation::new(Sequence::parse_set_string(&sets[0]), complement_flag);
|
let op = SqueezeOperation::new(Sequence::from_str(&sets[0]), complement_flag);
|
||||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||||
} else {
|
} else {
|
||||||
let mut translate_buffer = vec![];
|
let mut translate_buffer = vec![];
|
||||||
{
|
{
|
||||||
let mut writer = BufWriter::new(&mut translate_buffer);
|
let mut writer = BufWriter::new(&mut translate_buffer);
|
||||||
let translate_op = TranslateOperation::new(
|
match TranslateOperation::new(
|
||||||
Sequence::parse_set_string(&sets[0]),
|
Sequence::from_str(&sets[0]),
|
||||||
Sequence::parse_set_string(&sets[1]),
|
Sequence::from_str(&sets[1]),
|
||||||
truncate_set1_flag,
|
truncate_set1_flag,
|
||||||
complement_flag,
|
complement_flag,
|
||||||
);
|
) {
|
||||||
translate_input(&mut locked_stdin, &mut writer, translate_op);
|
Ok(op) => translate_input(&mut locked_stdin, &mut writer, op),
|
||||||
|
Err(s) => {
|
||||||
|
show_error!("{}", s);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
let mut reader = BufReader::new(translate_buffer.as_bytes());
|
let mut reader = BufReader::new(translate_buffer.as_bytes());
|
||||||
let squeeze_op = SqueezeOperation::new(Sequence::parse_set_string(&sets[1]), false);
|
let squeeze_op = SqueezeOperation::new(Sequence::from_str(&sets[1]), false);
|
||||||
translate_input(&mut reader, &mut buffered_stdout, squeeze_op);
|
translate_input(&mut reader, &mut buffered_stdout, squeeze_op);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let op = TranslateOperation::new(
|
match TranslateOperation::new(
|
||||||
Sequence::parse_set_string(&sets[0]),
|
Sequence::from_str(&sets[0]),
|
||||||
Sequence::parse_set_string(&sets[1]),
|
Sequence::from_str(&sets[1]),
|
||||||
truncate_set1_flag,
|
truncate_set1_flag,
|
||||||
complement_flag,
|
complement_flag,
|
||||||
);
|
) {
|
||||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
Ok(op) => translate_input(&mut locked_stdin, &mut buffered_stdout, op),
|
||||||
|
Err(s) => {
|
||||||
|
show_error!("{}", s);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
0
|
0
|
||||||
|
|
Loading…
Reference in a new issue