tr: A [:lower:]/[:upper:] in set2 must be matched in set1

If there is a [:lower:] or [:upper:] in set2, then there must be a [:lower:] or [:upper:] at the
same logical position in set1

So

tr -t [:upper:] [:lower:] works
tr -t 1[:upper:] [:lower:] doesnt
This commit is contained in:
Christian von Elm 2024-05-24 16:19:04 +02:00
parent 4534f359f2
commit 7e8aaa8ad4
2 changed files with 141 additions and 97 deletions

View file

@ -34,6 +34,7 @@ pub enum BadSequence {
InvalidRepeatCount(String), InvalidRepeatCount(String),
EmptySet2WhenNotTruncatingSet1, EmptySet2WhenNotTruncatingSet1,
ClassExceptLowerUpperInSet2, ClassExceptLowerUpperInSet2,
ClassInSet2NotMatchedBySet1,
} }
impl Display for BadSequence { impl Display for BadSequence {
@ -58,6 +59,9 @@ impl Display for BadSequence {
Self::ClassExceptLowerUpperInSet2 => { Self::ClassExceptLowerUpperInSet2 => {
write!(f, "when translating, the only character classes that may appear in set2 are 'upper' and 'lower'") write!(f, "when translating, the only character classes that may appear in set2 are 'upper' and 'lower'")
} }
Self::ClassInSet2NotMatchedBySet1 => {
write!(f, "when translating, every 'upper'/'lower' in set2 must be matched by a 'upper'/'lower' in the same position in set1")
}
} }
} }
} }
@ -91,18 +95,22 @@ pub enum Sequence {
} }
impl Sequence { impl Sequence {
pub fn flatten(&self) -> Box<dyn Iterator<Item = u8>> { pub fn flatten_non_lower_upper(&self) -> Box<dyn Iterator<Item = Self>> {
match self { match self {
Self::Char(c) => Box::new(std::iter::once(*c)), Self::Char(c) => Box::new(std::iter::once(*c).map(Self::Char)),
Self::CharRange(l, r) => Box::new(*l..=*r), Self::CharRange(l, r) => Box::new((*l..=*r).map(Self::Char)),
Self::CharStar(c) => Box::new(std::iter::repeat(*c)), Self::CharRepeat(c, n) => Box::new(std::iter::repeat(*c).take(*n).map(Self::Char)),
Self::CharRepeat(c, n) => Box::new(std::iter::repeat(*c).take(*n)),
Self::Class(class) => match class { Self::Class(class) => match class {
Class::Alnum => Box::new((b'0'..=b'9').chain(b'A'..=b'Z').chain(b'a'..=b'z')), Class::Alnum => Box::new(
Class::Alpha => Box::new((b'A'..=b'Z').chain(b'a'..=b'z')), (b'0'..=b'9')
Class::Blank => Box::new(unicode_table::BLANK.iter().cloned()), .chain(b'A'..=b'Z')
Class::Control => Box::new((0..=31).chain(std::iter::once(127))), .chain(b'a'..=b'z')
Class::Digit => Box::new(b'0'..=b'9'), .map(Self::Char),
),
Class::Alpha => Box::new((b'A'..=b'Z').chain(b'a'..=b'z').map(Self::Char)),
Class::Blank => Box::new(unicode_table::BLANK.iter().cloned().map(Self::Char)),
Class::Control => Box::new((0..=31).chain(std::iter::once(127)).map(Self::Char)),
Class::Digit => Box::new((b'0'..=b'9').map(Self::Char)),
Class::Graph => Box::new( Class::Graph => Box::new(
(48..=57) // digit (48..=57) // digit
.chain(65..=90) // uppercase .chain(65..=90) // uppercase
@ -112,9 +120,9 @@ impl Sequence {
.chain(58..=64) .chain(58..=64)
.chain(91..=96) .chain(91..=96)
.chain(123..=126) .chain(123..=126)
.chain(std::iter::once(32)), // space .chain(std::iter::once(32))
.map(Self::Char), // space
), ),
Class::Lower => Box::new(b'a'..=b'z'),
Class::Print => Box::new( Class::Print => Box::new(
(48..=57) // digit (48..=57) // digit
.chain(65..=90) // uppercase .chain(65..=90) // uppercase
@ -123,13 +131,37 @@ impl Sequence {
.chain(33..=47) .chain(33..=47)
.chain(58..=64) .chain(58..=64)
.chain(91..=96) .chain(91..=96)
.chain(123..=126), .chain(123..=126)
.map(Self::Char),
), ),
Class::Punct => Box::new((33..=47).chain(58..=64).chain(91..=96).chain(123..=126)), Class::Punct => Box::new(
Class::Space => Box::new(unicode_table::SPACES.iter().cloned()), (33..=47)
Class::Upper => Box::new(b'A'..=b'Z'), .chain(58..=64)
Class::Xdigit => Box::new((b'0'..=b'9').chain(b'A'..=b'F').chain(b'a'..=b'f')), .chain(91..=96)
.chain(123..=126)
.map(Self::Char),
),
Class::Space => Box::new(unicode_table::SPACES.iter().cloned().map(Self::Char)),
Class::Xdigit => Box::new(
(b'0'..=b'9')
.chain(b'A'..=b'F')
.chain(b'a'..=b'f')
.map(Self::Char),
),
s => Box::new(std::iter::once(Self::Class(*s))),
}, },
s => Box::new(std::iter::once(*s)),
}
}
pub fn flatten_all(&self) -> Box<dyn Iterator<Item = Self>> {
match self {
Self::Class(class) => match class {
Class::Lower => Box::new((b'a'..=b'z').map(Self::Char)),
Class::Upper => Box::new((b'A'..=b'Z').map(Self::Char)),
s => Self::Class(*s).flatten_non_lower_upper(),
},
s => s.flatten_non_lower_upper(),
} }
} }
@ -141,90 +173,97 @@ impl Sequence {
truncate_set1_flag: bool, truncate_set1_flag: bool,
translating: bool, translating: bool,
) -> Result<(Vec<u8>, Vec<u8>), BadSequence> { ) -> Result<(Vec<u8>, Vec<u8>), BadSequence> {
let set1 = Self::from_str(set1_str)?;
let is_char_star = |s: &&Self| -> bool { matches!(s, Self::CharStar(_)) }; let is_char_star = |s: &&Self| -> bool { matches!(s, Self::CharStar(_)) };
let set1_star_count = set1.iter().filter(is_char_star).count(); let to_u8 = |s: Self| -> Option<u8> {
if set1_star_count == 0 { match s {
let set2 = Self::from_str(set2_str)?; Self::Char(c) => Some(c),
_ => None,
if translating
&& set2.iter().any(|&x| {
matches!(x, Self::Class(_))
&& !matches!(x, Self::Class(Class::Upper) | Self::Class(Class::Lower))
})
{
return Err(BadSequence::ClassExceptLowerUpperInSet2);
} }
};
let set2_star_count = set2.iter().filter(is_char_star).count(); let set1 = Self::from_str(set1_str)?;
if set2_star_count < 2 { if set1.iter().filter(is_char_star).count() != 0 {
let char_star = set2.iter().find_map(|s| match s { return Err(BadSequence::CharRepeatInSet1);
Self::CharStar(c) => Some(c),
_ => None,
});
let mut partition = set2.as_slice().split(|s| matches!(s, Self::CharStar(_)));
let set1_len = set1.iter().flat_map(Self::flatten).count();
let set2_len = set2
.iter()
.filter_map(|s| match s {
Self::CharStar(_) => None,
r => Some(r),
})
.flat_map(Self::flatten)
.count();
let star_compensate_len = set1_len.saturating_sub(set2_len);
let (left, right) = (partition.next(), partition.next());
let set2_solved: Vec<_> = match (left, right) {
(None, None) => match char_star {
Some(c) => std::iter::repeat(*c).take(star_compensate_len).collect(),
None => std::iter::empty().collect(),
},
(None, Some(set2_b)) => {
if let Some(c) = char_star {
std::iter::repeat(*c)
.take(star_compensate_len)
.chain(set2_b.iter().flat_map(Self::flatten))
.collect()
} else {
set2_b.iter().flat_map(Self::flatten).collect()
}
}
(Some(set2_a), None) => match char_star {
Some(c) => set2_a
.iter()
.flat_map(Self::flatten)
.chain(std::iter::repeat(*c).take(star_compensate_len))
.collect(),
None => set2_a.iter().flat_map(Self::flatten).collect(),
},
(Some(set2_a), Some(set2_b)) => match char_star {
Some(c) => set2_a
.iter()
.flat_map(Self::flatten)
.chain(std::iter::repeat(*c).take(star_compensate_len))
.chain(set2_b.iter().flat_map(Self::flatten))
.collect(),
None => set2_a
.iter()
.chain(set2_b.iter())
.flat_map(Self::flatten)
.collect(),
},
};
let mut set1_solved: Vec<_> = set1.iter().flat_map(Self::flatten).collect();
if complement_flag {
set1_solved = (0..=u8::MAX).filter(|x| !set1_solved.contains(x)).collect();
}
if truncate_set1_flag {
set1_solved.truncate(set2_solved.len());
}
Ok((set1_solved, set2_solved))
} else {
Err(BadSequence::MultipleCharRepeatInSet2)
}
} else {
Err(BadSequence::CharRepeatInSet1)
} }
let mut set2 = Self::from_str(set2_str)?;
if set2.iter().filter(is_char_star).count() > 1 {
return Err(BadSequence::MultipleCharRepeatInSet2);
}
if translating
&& set2.iter().any(|&x| {
matches!(x, Self::Class(_))
&& !matches!(x, Self::Class(Class::Upper) | Self::Class(Class::Lower))
})
{
return Err(BadSequence::ClassExceptLowerUpperInSet2);
}
let mut set1_solved: Vec<u8> = set1
.iter()
.flat_map(Self::flatten_all)
.filter_map(to_u8)
.collect();
if complement_flag {
set1_solved = (0..=u8::MAX).filter(|x| !set1_solved.contains(x)).collect();
}
let set1_len = set1_solved.len();
let set2_len = set2
.iter()
.filter_map(|s| match s {
Self::CharStar(_) => None,
r => Some(r),
})
.flat_map(Self::flatten_all)
.count();
let star_compensate_len = set1_len.saturating_sub(set2_len);
//Replace CharStar with CharRepeat
set2 = set2
.iter()
.filter_map(|s| match s {
Self::CharStar(0) => None,
Self::CharStar(c) => Some(Self::CharRepeat(*c, star_compensate_len)),
r => Some(*r),
})
.collect();
//Flatten everything but upper/lower into Char
let set1_flattened: Vec<_> = set1
.iter()
.flat_map(Self::flatten_non_lower_upper)
.collect();
set2 = set2
.iter()
.flat_map(Self::flatten_non_lower_upper)
.collect();
if set2
.iter()
.zip(
set1_flattened
.iter()
.chain(std::iter::repeat(&Self::Char(0))),
)
.any(|x| matches!(x.0, Self::Class(_)) && !matches!(x.1, Self::Class(_)))
{
return Err(BadSequence::ClassInSet2NotMatchedBySet1);
}
let set2_solved: Vec<_> = set2
.iter()
.flat_map(Self::flatten_all)
.filter_map(to_u8)
.collect();
//Truncation is done dead last. It has no influence on the other conversion steps
if truncate_set1_flag {
set1_solved.truncate(set2_solved.len());
}
Ok((set1_solved, set2_solved))
} }
} }

View file

@ -1369,3 +1369,8 @@ fn check_ignore_truncate_when_squeezing() {
fn check_disallow_blank_in_set2_when_translating() { fn check_disallow_blank_in_set2_when_translating() {
new_ucmd!().args(&["-t", "1234", "[:blank:]"]).fails(); new_ucmd!().args(&["-t", "1234", "[:blank:]"]).fails();
} }
#[test]
fn check_class_in_set2_must_be_matched_in_set1() {
new_ucmd!().args(&["-t", "1[:upper:]", "[:upper:]"]).fails();
}