tr: A [:lower:]/[:upper:] in set2 must be matched in set1

If there is a [:lower:] or [:upper:] in set2, then there must be a [:lower:] or [:upper:] at the
same logical position in set1

So

tr -t [:upper:] [:lower:] works
tr -t 1[:upper:] [:lower:] doesnt
This commit is contained in:
Christian von Elm 2024-05-24 16:19:04 +02:00
parent 4534f359f2
commit 7e8aaa8ad4
2 changed files with 141 additions and 97 deletions

View file

@ -34,6 +34,7 @@ pub enum BadSequence {
InvalidRepeatCount(String),
EmptySet2WhenNotTruncatingSet1,
ClassExceptLowerUpperInSet2,
ClassInSet2NotMatchedBySet1,
}
impl Display for BadSequence {
@ -58,6 +59,9 @@ impl Display for BadSequence {
Self::ClassExceptLowerUpperInSet2 => {
write!(f, "when translating, the only character classes that may appear in set2 are 'upper' and 'lower'")
}
Self::ClassInSet2NotMatchedBySet1 => {
write!(f, "when translating, every 'upper'/'lower' in set2 must be matched by a 'upper'/'lower' in the same position in set1")
}
}
}
}
@ -91,18 +95,22 @@ pub enum Sequence {
}
impl Sequence {
pub fn flatten(&self) -> Box<dyn Iterator<Item = u8>> {
pub fn flatten_non_lower_upper(&self) -> Box<dyn Iterator<Item = Self>> {
match self {
Self::Char(c) => Box::new(std::iter::once(*c)),
Self::CharRange(l, r) => Box::new(*l..=*r),
Self::CharStar(c) => Box::new(std::iter::repeat(*c)),
Self::CharRepeat(c, n) => Box::new(std::iter::repeat(*c).take(*n)),
Self::Char(c) => Box::new(std::iter::once(*c).map(Self::Char)),
Self::CharRange(l, r) => Box::new((*l..=*r).map(Self::Char)),
Self::CharRepeat(c, n) => Box::new(std::iter::repeat(*c).take(*n).map(Self::Char)),
Self::Class(class) => match class {
Class::Alnum => Box::new((b'0'..=b'9').chain(b'A'..=b'Z').chain(b'a'..=b'z')),
Class::Alpha => Box::new((b'A'..=b'Z').chain(b'a'..=b'z')),
Class::Blank => Box::new(unicode_table::BLANK.iter().cloned()),
Class::Control => Box::new((0..=31).chain(std::iter::once(127))),
Class::Digit => Box::new(b'0'..=b'9'),
Class::Alnum => Box::new(
(b'0'..=b'9')
.chain(b'A'..=b'Z')
.chain(b'a'..=b'z')
.map(Self::Char),
),
Class::Alpha => Box::new((b'A'..=b'Z').chain(b'a'..=b'z').map(Self::Char)),
Class::Blank => Box::new(unicode_table::BLANK.iter().cloned().map(Self::Char)),
Class::Control => Box::new((0..=31).chain(std::iter::once(127)).map(Self::Char)),
Class::Digit => Box::new((b'0'..=b'9').map(Self::Char)),
Class::Graph => Box::new(
(48..=57) // digit
.chain(65..=90) // uppercase
@ -112,9 +120,9 @@ impl Sequence {
.chain(58..=64)
.chain(91..=96)
.chain(123..=126)
.chain(std::iter::once(32)), // space
.chain(std::iter::once(32))
.map(Self::Char), // space
),
Class::Lower => Box::new(b'a'..=b'z'),
Class::Print => Box::new(
(48..=57) // digit
.chain(65..=90) // uppercase
@ -123,13 +131,37 @@ impl Sequence {
.chain(33..=47)
.chain(58..=64)
.chain(91..=96)
.chain(123..=126),
.chain(123..=126)
.map(Self::Char),
),
Class::Punct => Box::new((33..=47).chain(58..=64).chain(91..=96).chain(123..=126)),
Class::Space => Box::new(unicode_table::SPACES.iter().cloned()),
Class::Upper => Box::new(b'A'..=b'Z'),
Class::Xdigit => Box::new((b'0'..=b'9').chain(b'A'..=b'F').chain(b'a'..=b'f')),
Class::Punct => Box::new(
(33..=47)
.chain(58..=64)
.chain(91..=96)
.chain(123..=126)
.map(Self::Char),
),
Class::Space => Box::new(unicode_table::SPACES.iter().cloned().map(Self::Char)),
Class::Xdigit => Box::new(
(b'0'..=b'9')
.chain(b'A'..=b'F')
.chain(b'a'..=b'f')
.map(Self::Char),
),
s => Box::new(std::iter::once(Self::Class(*s))),
},
s => Box::new(std::iter::once(*s)),
}
}
pub fn flatten_all(&self) -> Box<dyn Iterator<Item = Self>> {
match self {
Self::Class(class) => match class {
Class::Lower => Box::new((b'a'..=b'z').map(Self::Char)),
Class::Upper => Box::new((b'A'..=b'Z').map(Self::Char)),
s => Self::Class(*s).flatten_non_lower_upper(),
},
s => s.flatten_non_lower_upper(),
}
}
@ -141,90 +173,97 @@ impl Sequence {
truncate_set1_flag: bool,
translating: bool,
) -> Result<(Vec<u8>, Vec<u8>), BadSequence> {
let set1 = Self::from_str(set1_str)?;
let is_char_star = |s: &&Self| -> bool { matches!(s, Self::CharStar(_)) };
let set1_star_count = set1.iter().filter(is_char_star).count();
if set1_star_count == 0 {
let set2 = Self::from_str(set2_str)?;
if translating
&& set2.iter().any(|&x| {
matches!(x, Self::Class(_))
&& !matches!(x, Self::Class(Class::Upper) | Self::Class(Class::Lower))
})
{
return Err(BadSequence::ClassExceptLowerUpperInSet2);
let to_u8 = |s: Self| -> Option<u8> {
match s {
Self::Char(c) => Some(c),
_ => None,
}
};
let set2_star_count = set2.iter().filter(is_char_star).count();
if set2_star_count < 2 {
let char_star = set2.iter().find_map(|s| match s {
Self::CharStar(c) => Some(c),
_ => None,
});
let mut partition = set2.as_slice().split(|s| matches!(s, Self::CharStar(_)));
let set1_len = set1.iter().flat_map(Self::flatten).count();
let set2_len = set2
.iter()
.filter_map(|s| match s {
Self::CharStar(_) => None,
r => Some(r),
})
.flat_map(Self::flatten)
.count();
let star_compensate_len = set1_len.saturating_sub(set2_len);
let (left, right) = (partition.next(), partition.next());
let set2_solved: Vec<_> = match (left, right) {
(None, None) => match char_star {
Some(c) => std::iter::repeat(*c).take(star_compensate_len).collect(),
None => std::iter::empty().collect(),
},
(None, Some(set2_b)) => {
if let Some(c) = char_star {
std::iter::repeat(*c)
.take(star_compensate_len)
.chain(set2_b.iter().flat_map(Self::flatten))
.collect()
} else {
set2_b.iter().flat_map(Self::flatten).collect()
}
}
(Some(set2_a), None) => match char_star {
Some(c) => set2_a
.iter()
.flat_map(Self::flatten)
.chain(std::iter::repeat(*c).take(star_compensate_len))
.collect(),
None => set2_a.iter().flat_map(Self::flatten).collect(),
},
(Some(set2_a), Some(set2_b)) => match char_star {
Some(c) => set2_a
.iter()
.flat_map(Self::flatten)
.chain(std::iter::repeat(*c).take(star_compensate_len))
.chain(set2_b.iter().flat_map(Self::flatten))
.collect(),
None => set2_a
.iter()
.chain(set2_b.iter())
.flat_map(Self::flatten)
.collect(),
},
};
let mut set1_solved: Vec<_> = set1.iter().flat_map(Self::flatten).collect();
if complement_flag {
set1_solved = (0..=u8::MAX).filter(|x| !set1_solved.contains(x)).collect();
}
if truncate_set1_flag {
set1_solved.truncate(set2_solved.len());
}
Ok((set1_solved, set2_solved))
} else {
Err(BadSequence::MultipleCharRepeatInSet2)
}
} else {
Err(BadSequence::CharRepeatInSet1)
let set1 = Self::from_str(set1_str)?;
if set1.iter().filter(is_char_star).count() != 0 {
return Err(BadSequence::CharRepeatInSet1);
}
let mut set2 = Self::from_str(set2_str)?;
if set2.iter().filter(is_char_star).count() > 1 {
return Err(BadSequence::MultipleCharRepeatInSet2);
}
if translating
&& set2.iter().any(|&x| {
matches!(x, Self::Class(_))
&& !matches!(x, Self::Class(Class::Upper) | Self::Class(Class::Lower))
})
{
return Err(BadSequence::ClassExceptLowerUpperInSet2);
}
let mut set1_solved: Vec<u8> = set1
.iter()
.flat_map(Self::flatten_all)
.filter_map(to_u8)
.collect();
if complement_flag {
set1_solved = (0..=u8::MAX).filter(|x| !set1_solved.contains(x)).collect();
}
let set1_len = set1_solved.len();
let set2_len = set2
.iter()
.filter_map(|s| match s {
Self::CharStar(_) => None,
r => Some(r),
})
.flat_map(Self::flatten_all)
.count();
let star_compensate_len = set1_len.saturating_sub(set2_len);
//Replace CharStar with CharRepeat
set2 = set2
.iter()
.filter_map(|s| match s {
Self::CharStar(0) => None,
Self::CharStar(c) => Some(Self::CharRepeat(*c, star_compensate_len)),
r => Some(*r),
})
.collect();
//Flatten everything but upper/lower into Char
let set1_flattened: Vec<_> = set1
.iter()
.flat_map(Self::flatten_non_lower_upper)
.collect();
set2 = set2
.iter()
.flat_map(Self::flatten_non_lower_upper)
.collect();
if set2
.iter()
.zip(
set1_flattened
.iter()
.chain(std::iter::repeat(&Self::Char(0))),
)
.any(|x| matches!(x.0, Self::Class(_)) && !matches!(x.1, Self::Class(_)))
{
return Err(BadSequence::ClassInSet2NotMatchedBySet1);
}
let set2_solved: Vec<_> = set2
.iter()
.flat_map(Self::flatten_all)
.filter_map(to_u8)
.collect();
//Truncation is done dead last. It has no influence on the other conversion steps
if truncate_set1_flag {
set1_solved.truncate(set2_solved.len());
}
Ok((set1_solved, set2_solved))
}
}

View file

@ -1369,3 +1369,8 @@ fn check_ignore_truncate_when_squeezing() {
fn check_disallow_blank_in_set2_when_translating() {
new_ucmd!().args(&["-t", "1234", "[:blank:]"]).fails();
}
#[test]
fn check_class_in_set2_must_be_matched_in_set1() {
new_ucmd!().args(&["-t", "1[:upper:]", "[:upper:]"]).fails();
}