mirror of
https://github.com/uutils/coreutils
synced 2024-12-13 14:52:41 +00:00
tr: A [:lower:]/[:upper:] in set2 must be matched in set1
If there is a [:lower:] or [:upper:] in set2, then there must be a [:lower:] or [:upper:] at the same logical position in set1 So tr -t [:upper:] [:lower:] works tr -t 1[:upper:] [:lower:] doesnt
This commit is contained in:
parent
4534f359f2
commit
7e8aaa8ad4
2 changed files with 141 additions and 97 deletions
|
@ -34,6 +34,7 @@ pub enum BadSequence {
|
||||||
InvalidRepeatCount(String),
|
InvalidRepeatCount(String),
|
||||||
EmptySet2WhenNotTruncatingSet1,
|
EmptySet2WhenNotTruncatingSet1,
|
||||||
ClassExceptLowerUpperInSet2,
|
ClassExceptLowerUpperInSet2,
|
||||||
|
ClassInSet2NotMatchedBySet1,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Display for BadSequence {
|
impl Display for BadSequence {
|
||||||
|
@ -58,6 +59,9 @@ impl Display for BadSequence {
|
||||||
Self::ClassExceptLowerUpperInSet2 => {
|
Self::ClassExceptLowerUpperInSet2 => {
|
||||||
write!(f, "when translating, the only character classes that may appear in set2 are 'upper' and 'lower'")
|
write!(f, "when translating, the only character classes that may appear in set2 are 'upper' and 'lower'")
|
||||||
}
|
}
|
||||||
|
Self::ClassInSet2NotMatchedBySet1 => {
|
||||||
|
write!(f, "when translating, every 'upper'/'lower' in set2 must be matched by a 'upper'/'lower' in the same position in set1")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -91,18 +95,22 @@ pub enum Sequence {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Sequence {
|
impl Sequence {
|
||||||
pub fn flatten(&self) -> Box<dyn Iterator<Item = u8>> {
|
pub fn flatten_non_lower_upper(&self) -> Box<dyn Iterator<Item = Self>> {
|
||||||
match self {
|
match self {
|
||||||
Self::Char(c) => Box::new(std::iter::once(*c)),
|
Self::Char(c) => Box::new(std::iter::once(*c).map(Self::Char)),
|
||||||
Self::CharRange(l, r) => Box::new(*l..=*r),
|
Self::CharRange(l, r) => Box::new((*l..=*r).map(Self::Char)),
|
||||||
Self::CharStar(c) => Box::new(std::iter::repeat(*c)),
|
Self::CharRepeat(c, n) => Box::new(std::iter::repeat(*c).take(*n).map(Self::Char)),
|
||||||
Self::CharRepeat(c, n) => Box::new(std::iter::repeat(*c).take(*n)),
|
|
||||||
Self::Class(class) => match class {
|
Self::Class(class) => match class {
|
||||||
Class::Alnum => Box::new((b'0'..=b'9').chain(b'A'..=b'Z').chain(b'a'..=b'z')),
|
Class::Alnum => Box::new(
|
||||||
Class::Alpha => Box::new((b'A'..=b'Z').chain(b'a'..=b'z')),
|
(b'0'..=b'9')
|
||||||
Class::Blank => Box::new(unicode_table::BLANK.iter().cloned()),
|
.chain(b'A'..=b'Z')
|
||||||
Class::Control => Box::new((0..=31).chain(std::iter::once(127))),
|
.chain(b'a'..=b'z')
|
||||||
Class::Digit => Box::new(b'0'..=b'9'),
|
.map(Self::Char),
|
||||||
|
),
|
||||||
|
Class::Alpha => Box::new((b'A'..=b'Z').chain(b'a'..=b'z').map(Self::Char)),
|
||||||
|
Class::Blank => Box::new(unicode_table::BLANK.iter().cloned().map(Self::Char)),
|
||||||
|
Class::Control => Box::new((0..=31).chain(std::iter::once(127)).map(Self::Char)),
|
||||||
|
Class::Digit => Box::new((b'0'..=b'9').map(Self::Char)),
|
||||||
Class::Graph => Box::new(
|
Class::Graph => Box::new(
|
||||||
(48..=57) // digit
|
(48..=57) // digit
|
||||||
.chain(65..=90) // uppercase
|
.chain(65..=90) // uppercase
|
||||||
|
@ -112,9 +120,9 @@ impl Sequence {
|
||||||
.chain(58..=64)
|
.chain(58..=64)
|
||||||
.chain(91..=96)
|
.chain(91..=96)
|
||||||
.chain(123..=126)
|
.chain(123..=126)
|
||||||
.chain(std::iter::once(32)), // space
|
.chain(std::iter::once(32))
|
||||||
|
.map(Self::Char), // space
|
||||||
),
|
),
|
||||||
Class::Lower => Box::new(b'a'..=b'z'),
|
|
||||||
Class::Print => Box::new(
|
Class::Print => Box::new(
|
||||||
(48..=57) // digit
|
(48..=57) // digit
|
||||||
.chain(65..=90) // uppercase
|
.chain(65..=90) // uppercase
|
||||||
|
@ -123,13 +131,37 @@ impl Sequence {
|
||||||
.chain(33..=47)
|
.chain(33..=47)
|
||||||
.chain(58..=64)
|
.chain(58..=64)
|
||||||
.chain(91..=96)
|
.chain(91..=96)
|
||||||
.chain(123..=126),
|
.chain(123..=126)
|
||||||
|
.map(Self::Char),
|
||||||
),
|
),
|
||||||
Class::Punct => Box::new((33..=47).chain(58..=64).chain(91..=96).chain(123..=126)),
|
Class::Punct => Box::new(
|
||||||
Class::Space => Box::new(unicode_table::SPACES.iter().cloned()),
|
(33..=47)
|
||||||
Class::Upper => Box::new(b'A'..=b'Z'),
|
.chain(58..=64)
|
||||||
Class::Xdigit => Box::new((b'0'..=b'9').chain(b'A'..=b'F').chain(b'a'..=b'f')),
|
.chain(91..=96)
|
||||||
|
.chain(123..=126)
|
||||||
|
.map(Self::Char),
|
||||||
|
),
|
||||||
|
Class::Space => Box::new(unicode_table::SPACES.iter().cloned().map(Self::Char)),
|
||||||
|
Class::Xdigit => Box::new(
|
||||||
|
(b'0'..=b'9')
|
||||||
|
.chain(b'A'..=b'F')
|
||||||
|
.chain(b'a'..=b'f')
|
||||||
|
.map(Self::Char),
|
||||||
|
),
|
||||||
|
s => Box::new(std::iter::once(Self::Class(*s))),
|
||||||
},
|
},
|
||||||
|
s => Box::new(std::iter::once(*s)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn flatten_all(&self) -> Box<dyn Iterator<Item = Self>> {
|
||||||
|
match self {
|
||||||
|
Self::Class(class) => match class {
|
||||||
|
Class::Lower => Box::new((b'a'..=b'z').map(Self::Char)),
|
||||||
|
Class::Upper => Box::new((b'A'..=b'Z').map(Self::Char)),
|
||||||
|
s => Self::Class(*s).flatten_non_lower_upper(),
|
||||||
|
},
|
||||||
|
s => s.flatten_non_lower_upper(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -141,11 +173,23 @@ impl Sequence {
|
||||||
truncate_set1_flag: bool,
|
truncate_set1_flag: bool,
|
||||||
translating: bool,
|
translating: bool,
|
||||||
) -> Result<(Vec<u8>, Vec<u8>), BadSequence> {
|
) -> Result<(Vec<u8>, Vec<u8>), BadSequence> {
|
||||||
let set1 = Self::from_str(set1_str)?;
|
|
||||||
let is_char_star = |s: &&Self| -> bool { matches!(s, Self::CharStar(_)) };
|
let is_char_star = |s: &&Self| -> bool { matches!(s, Self::CharStar(_)) };
|
||||||
let set1_star_count = set1.iter().filter(is_char_star).count();
|
let to_u8 = |s: Self| -> Option<u8> {
|
||||||
if set1_star_count == 0 {
|
match s {
|
||||||
let set2 = Self::from_str(set2_str)?;
|
Self::Char(c) => Some(c),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let set1 = Self::from_str(set1_str)?;
|
||||||
|
if set1.iter().filter(is_char_star).count() != 0 {
|
||||||
|
return Err(BadSequence::CharRepeatInSet1);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut set2 = Self::from_str(set2_str)?;
|
||||||
|
if set2.iter().filter(is_char_star).count() > 1 {
|
||||||
|
return Err(BadSequence::MultipleCharRepeatInSet2);
|
||||||
|
}
|
||||||
|
|
||||||
if translating
|
if translating
|
||||||
&& set2.iter().any(|&x| {
|
&& set2.iter().any(|&x| {
|
||||||
|
@ -156,75 +200,70 @@ impl Sequence {
|
||||||
return Err(BadSequence::ClassExceptLowerUpperInSet2);
|
return Err(BadSequence::ClassExceptLowerUpperInSet2);
|
||||||
}
|
}
|
||||||
|
|
||||||
let set2_star_count = set2.iter().filter(is_char_star).count();
|
let mut set1_solved: Vec<u8> = set1
|
||||||
if set2_star_count < 2 {
|
.iter()
|
||||||
let char_star = set2.iter().find_map(|s| match s {
|
.flat_map(Self::flatten_all)
|
||||||
Self::CharStar(c) => Some(c),
|
.filter_map(to_u8)
|
||||||
_ => None,
|
.collect();
|
||||||
});
|
if complement_flag {
|
||||||
let mut partition = set2.as_slice().split(|s| matches!(s, Self::CharStar(_)));
|
set1_solved = (0..=u8::MAX).filter(|x| !set1_solved.contains(x)).collect();
|
||||||
let set1_len = set1.iter().flat_map(Self::flatten).count();
|
}
|
||||||
|
let set1_len = set1_solved.len();
|
||||||
|
|
||||||
let set2_len = set2
|
let set2_len = set2
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|s| match s {
|
.filter_map(|s| match s {
|
||||||
Self::CharStar(_) => None,
|
Self::CharStar(_) => None,
|
||||||
r => Some(r),
|
r => Some(r),
|
||||||
})
|
})
|
||||||
.flat_map(Self::flatten)
|
.flat_map(Self::flatten_all)
|
||||||
.count();
|
.count();
|
||||||
|
|
||||||
let star_compensate_len = set1_len.saturating_sub(set2_len);
|
let star_compensate_len = set1_len.saturating_sub(set2_len);
|
||||||
let (left, right) = (partition.next(), partition.next());
|
|
||||||
let set2_solved: Vec<_> = match (left, right) {
|
//Replace CharStar with CharRepeat
|
||||||
(None, None) => match char_star {
|
set2 = set2
|
||||||
Some(c) => std::iter::repeat(*c).take(star_compensate_len).collect(),
|
|
||||||
None => std::iter::empty().collect(),
|
|
||||||
},
|
|
||||||
(None, Some(set2_b)) => {
|
|
||||||
if let Some(c) = char_star {
|
|
||||||
std::iter::repeat(*c)
|
|
||||||
.take(star_compensate_len)
|
|
||||||
.chain(set2_b.iter().flat_map(Self::flatten))
|
|
||||||
.collect()
|
|
||||||
} else {
|
|
||||||
set2_b.iter().flat_map(Self::flatten).collect()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
(Some(set2_a), None) => match char_star {
|
|
||||||
Some(c) => set2_a
|
|
||||||
.iter()
|
.iter()
|
||||||
.flat_map(Self::flatten)
|
.filter_map(|s| match s {
|
||||||
.chain(std::iter::repeat(*c).take(star_compensate_len))
|
Self::CharStar(0) => None,
|
||||||
.collect(),
|
Self::CharStar(c) => Some(Self::CharRepeat(*c, star_compensate_len)),
|
||||||
None => set2_a.iter().flat_map(Self::flatten).collect(),
|
r => Some(*r),
|
||||||
},
|
})
|
||||||
(Some(set2_a), Some(set2_b)) => match char_star {
|
.collect();
|
||||||
Some(c) => set2_a
|
|
||||||
|
//Flatten everything but upper/lower into Char
|
||||||
|
let set1_flattened: Vec<_> = set1
|
||||||
.iter()
|
.iter()
|
||||||
.flat_map(Self::flatten)
|
.flat_map(Self::flatten_non_lower_upper)
|
||||||
.chain(std::iter::repeat(*c).take(star_compensate_len))
|
.collect();
|
||||||
.chain(set2_b.iter().flat_map(Self::flatten))
|
set2 = set2
|
||||||
.collect(),
|
|
||||||
None => set2_a
|
|
||||||
.iter()
|
.iter()
|
||||||
.chain(set2_b.iter())
|
.flat_map(Self::flatten_non_lower_upper)
|
||||||
.flat_map(Self::flatten)
|
.collect();
|
||||||
.collect(),
|
|
||||||
},
|
if set2
|
||||||
};
|
.iter()
|
||||||
let mut set1_solved: Vec<_> = set1.iter().flat_map(Self::flatten).collect();
|
.zip(
|
||||||
if complement_flag {
|
set1_flattened
|
||||||
set1_solved = (0..=u8::MAX).filter(|x| !set1_solved.contains(x)).collect();
|
.iter()
|
||||||
|
.chain(std::iter::repeat(&Self::Char(0))),
|
||||||
|
)
|
||||||
|
.any(|x| matches!(x.0, Self::Class(_)) && !matches!(x.1, Self::Class(_)))
|
||||||
|
{
|
||||||
|
return Err(BadSequence::ClassInSet2NotMatchedBySet1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let set2_solved: Vec<_> = set2
|
||||||
|
.iter()
|
||||||
|
.flat_map(Self::flatten_all)
|
||||||
|
.filter_map(to_u8)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
//Truncation is done dead last. It has no influence on the other conversion steps
|
||||||
if truncate_set1_flag {
|
if truncate_set1_flag {
|
||||||
set1_solved.truncate(set2_solved.len());
|
set1_solved.truncate(set2_solved.len());
|
||||||
}
|
}
|
||||||
Ok((set1_solved, set2_solved))
|
Ok((set1_solved, set2_solved))
|
||||||
} else {
|
|
||||||
Err(BadSequence::MultipleCharRepeatInSet2)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Err(BadSequence::CharRepeatInSet1)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1369,3 +1369,8 @@ fn check_ignore_truncate_when_squeezing() {
|
||||||
fn check_disallow_blank_in_set2_when_translating() {
|
fn check_disallow_blank_in_set2_when_translating() {
|
||||||
new_ucmd!().args(&["-t", "1234", "[:blank:]"]).fails();
|
new_ucmd!().args(&["-t", "1234", "[:blank:]"]).fails();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn check_class_in_set2_must_be_matched_in_set1() {
|
||||||
|
new_ucmd!().args(&["-t", "1[:upper:]", "[:upper:]"]).fails();
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue