tr: A [:lower:]/[:upper:] in set2 must be matched in set1

If there is a [:lower:] or [:upper:] in set2, then there must be a [:lower:] or [:upper:] at the
same logical position in set1

So

tr -t [:upper:] [:lower:] works
tr -t 1[:upper:] [:lower:] doesnt
This commit is contained in:
Christian von Elm 2024-05-24 16:19:04 +02:00
parent 4534f359f2
commit 7e8aaa8ad4
2 changed files with 141 additions and 97 deletions

View file

@ -34,6 +34,7 @@ pub enum BadSequence {
InvalidRepeatCount(String), InvalidRepeatCount(String),
EmptySet2WhenNotTruncatingSet1, EmptySet2WhenNotTruncatingSet1,
ClassExceptLowerUpperInSet2, ClassExceptLowerUpperInSet2,
ClassInSet2NotMatchedBySet1,
} }
impl Display for BadSequence { impl Display for BadSequence {
@ -58,6 +59,9 @@ impl Display for BadSequence {
Self::ClassExceptLowerUpperInSet2 => { Self::ClassExceptLowerUpperInSet2 => {
write!(f, "when translating, the only character classes that may appear in set2 are 'upper' and 'lower'") write!(f, "when translating, the only character classes that may appear in set2 are 'upper' and 'lower'")
} }
Self::ClassInSet2NotMatchedBySet1 => {
write!(f, "when translating, every 'upper'/'lower' in set2 must be matched by a 'upper'/'lower' in the same position in set1")
}
} }
} }
} }
@ -91,18 +95,22 @@ pub enum Sequence {
} }
impl Sequence { impl Sequence {
pub fn flatten(&self) -> Box<dyn Iterator<Item = u8>> { pub fn flatten_non_lower_upper(&self) -> Box<dyn Iterator<Item = Self>> {
match self { match self {
Self::Char(c) => Box::new(std::iter::once(*c)), Self::Char(c) => Box::new(std::iter::once(*c).map(Self::Char)),
Self::CharRange(l, r) => Box::new(*l..=*r), Self::CharRange(l, r) => Box::new((*l..=*r).map(Self::Char)),
Self::CharStar(c) => Box::new(std::iter::repeat(*c)), Self::CharRepeat(c, n) => Box::new(std::iter::repeat(*c).take(*n).map(Self::Char)),
Self::CharRepeat(c, n) => Box::new(std::iter::repeat(*c).take(*n)),
Self::Class(class) => match class { Self::Class(class) => match class {
Class::Alnum => Box::new((b'0'..=b'9').chain(b'A'..=b'Z').chain(b'a'..=b'z')), Class::Alnum => Box::new(
Class::Alpha => Box::new((b'A'..=b'Z').chain(b'a'..=b'z')), (b'0'..=b'9')
Class::Blank => Box::new(unicode_table::BLANK.iter().cloned()), .chain(b'A'..=b'Z')
Class::Control => Box::new((0..=31).chain(std::iter::once(127))), .chain(b'a'..=b'z')
Class::Digit => Box::new(b'0'..=b'9'), .map(Self::Char),
),
Class::Alpha => Box::new((b'A'..=b'Z').chain(b'a'..=b'z').map(Self::Char)),
Class::Blank => Box::new(unicode_table::BLANK.iter().cloned().map(Self::Char)),
Class::Control => Box::new((0..=31).chain(std::iter::once(127)).map(Self::Char)),
Class::Digit => Box::new((b'0'..=b'9').map(Self::Char)),
Class::Graph => Box::new( Class::Graph => Box::new(
(48..=57) // digit (48..=57) // digit
.chain(65..=90) // uppercase .chain(65..=90) // uppercase
@ -112,9 +120,9 @@ impl Sequence {
.chain(58..=64) .chain(58..=64)
.chain(91..=96) .chain(91..=96)
.chain(123..=126) .chain(123..=126)
.chain(std::iter::once(32)), // space .chain(std::iter::once(32))
.map(Self::Char), // space
), ),
Class::Lower => Box::new(b'a'..=b'z'),
Class::Print => Box::new( Class::Print => Box::new(
(48..=57) // digit (48..=57) // digit
.chain(65..=90) // uppercase .chain(65..=90) // uppercase
@ -123,13 +131,37 @@ impl Sequence {
.chain(33..=47) .chain(33..=47)
.chain(58..=64) .chain(58..=64)
.chain(91..=96) .chain(91..=96)
.chain(123..=126), .chain(123..=126)
.map(Self::Char),
), ),
Class::Punct => Box::new((33..=47).chain(58..=64).chain(91..=96).chain(123..=126)), Class::Punct => Box::new(
Class::Space => Box::new(unicode_table::SPACES.iter().cloned()), (33..=47)
Class::Upper => Box::new(b'A'..=b'Z'), .chain(58..=64)
Class::Xdigit => Box::new((b'0'..=b'9').chain(b'A'..=b'F').chain(b'a'..=b'f')), .chain(91..=96)
.chain(123..=126)
.map(Self::Char),
),
Class::Space => Box::new(unicode_table::SPACES.iter().cloned().map(Self::Char)),
Class::Xdigit => Box::new(
(b'0'..=b'9')
.chain(b'A'..=b'F')
.chain(b'a'..=b'f')
.map(Self::Char),
),
s => Box::new(std::iter::once(Self::Class(*s))),
}, },
s => Box::new(std::iter::once(*s)),
}
}
pub fn flatten_all(&self) -> Box<dyn Iterator<Item = Self>> {
match self {
Self::Class(class) => match class {
Class::Lower => Box::new((b'a'..=b'z').map(Self::Char)),
Class::Upper => Box::new((b'A'..=b'Z').map(Self::Char)),
s => Self::Class(*s).flatten_non_lower_upper(),
},
s => s.flatten_non_lower_upper(),
} }
} }
@ -141,11 +173,23 @@ impl Sequence {
truncate_set1_flag: bool, truncate_set1_flag: bool,
translating: bool, translating: bool,
) -> Result<(Vec<u8>, Vec<u8>), BadSequence> { ) -> Result<(Vec<u8>, Vec<u8>), BadSequence> {
let set1 = Self::from_str(set1_str)?;
let is_char_star = |s: &&Self| -> bool { matches!(s, Self::CharStar(_)) }; let is_char_star = |s: &&Self| -> bool { matches!(s, Self::CharStar(_)) };
let set1_star_count = set1.iter().filter(is_char_star).count(); let to_u8 = |s: Self| -> Option<u8> {
if set1_star_count == 0 { match s {
let set2 = Self::from_str(set2_str)?; Self::Char(c) => Some(c),
_ => None,
}
};
let set1 = Self::from_str(set1_str)?;
if set1.iter().filter(is_char_star).count() != 0 {
return Err(BadSequence::CharRepeatInSet1);
}
let mut set2 = Self::from_str(set2_str)?;
if set2.iter().filter(is_char_star).count() > 1 {
return Err(BadSequence::MultipleCharRepeatInSet2);
}
if translating if translating
&& set2.iter().any(|&x| { && set2.iter().any(|&x| {
@ -156,75 +200,70 @@ impl Sequence {
return Err(BadSequence::ClassExceptLowerUpperInSet2); return Err(BadSequence::ClassExceptLowerUpperInSet2);
} }
let set2_star_count = set2.iter().filter(is_char_star).count(); let mut set1_solved: Vec<u8> = set1
if set2_star_count < 2 { .iter()
let char_star = set2.iter().find_map(|s| match s { .flat_map(Self::flatten_all)
Self::CharStar(c) => Some(c), .filter_map(to_u8)
_ => None, .collect();
}); if complement_flag {
let mut partition = set2.as_slice().split(|s| matches!(s, Self::CharStar(_))); set1_solved = (0..=u8::MAX).filter(|x| !set1_solved.contains(x)).collect();
let set1_len = set1.iter().flat_map(Self::flatten).count(); }
let set1_len = set1_solved.len();
let set2_len = set2 let set2_len = set2
.iter() .iter()
.filter_map(|s| match s { .filter_map(|s| match s {
Self::CharStar(_) => None, Self::CharStar(_) => None,
r => Some(r), r => Some(r),
}) })
.flat_map(Self::flatten) .flat_map(Self::flatten_all)
.count(); .count();
let star_compensate_len = set1_len.saturating_sub(set2_len); let star_compensate_len = set1_len.saturating_sub(set2_len);
let (left, right) = (partition.next(), partition.next());
let set2_solved: Vec<_> = match (left, right) { //Replace CharStar with CharRepeat
(None, None) => match char_star { set2 = set2
Some(c) => std::iter::repeat(*c).take(star_compensate_len).collect(),
None => std::iter::empty().collect(),
},
(None, Some(set2_b)) => {
if let Some(c) = char_star {
std::iter::repeat(*c)
.take(star_compensate_len)
.chain(set2_b.iter().flat_map(Self::flatten))
.collect()
} else {
set2_b.iter().flat_map(Self::flatten).collect()
}
}
(Some(set2_a), None) => match char_star {
Some(c) => set2_a
.iter() .iter()
.flat_map(Self::flatten) .filter_map(|s| match s {
.chain(std::iter::repeat(*c).take(star_compensate_len)) Self::CharStar(0) => None,
.collect(), Self::CharStar(c) => Some(Self::CharRepeat(*c, star_compensate_len)),
None => set2_a.iter().flat_map(Self::flatten).collect(), r => Some(*r),
}, })
(Some(set2_a), Some(set2_b)) => match char_star { .collect();
Some(c) => set2_a
//Flatten everything but upper/lower into Char
let set1_flattened: Vec<_> = set1
.iter() .iter()
.flat_map(Self::flatten) .flat_map(Self::flatten_non_lower_upper)
.chain(std::iter::repeat(*c).take(star_compensate_len)) .collect();
.chain(set2_b.iter().flat_map(Self::flatten)) set2 = set2
.collect(),
None => set2_a
.iter() .iter()
.chain(set2_b.iter()) .flat_map(Self::flatten_non_lower_upper)
.flat_map(Self::flatten) .collect();
.collect(),
}, if set2
}; .iter()
let mut set1_solved: Vec<_> = set1.iter().flat_map(Self::flatten).collect(); .zip(
if complement_flag { set1_flattened
set1_solved = (0..=u8::MAX).filter(|x| !set1_solved.contains(x)).collect(); .iter()
.chain(std::iter::repeat(&Self::Char(0))),
)
.any(|x| matches!(x.0, Self::Class(_)) && !matches!(x.1, Self::Class(_)))
{
return Err(BadSequence::ClassInSet2NotMatchedBySet1);
} }
let set2_solved: Vec<_> = set2
.iter()
.flat_map(Self::flatten_all)
.filter_map(to_u8)
.collect();
//Truncation is done dead last. It has no influence on the other conversion steps
if truncate_set1_flag { if truncate_set1_flag {
set1_solved.truncate(set2_solved.len()); set1_solved.truncate(set2_solved.len());
} }
Ok((set1_solved, set2_solved)) Ok((set1_solved, set2_solved))
} else {
Err(BadSequence::MultipleCharRepeatInSet2)
}
} else {
Err(BadSequence::CharRepeatInSet1)
}
} }
} }

View file

@ -1369,3 +1369,8 @@ fn check_ignore_truncate_when_squeezing() {
fn check_disallow_blank_in_set2_when_translating() { fn check_disallow_blank_in_set2_when_translating() {
new_ucmd!().args(&["-t", "1234", "[:blank:]"]).fails(); new_ucmd!().args(&["-t", "1234", "[:blank:]"]).fails();
} }
#[test]
fn check_class_in_set2_must_be_matched_in_set1() {
new_ucmd!().args(&["-t", "1[:upper:]", "[:upper:]"]).fails();
}