mirror of
https://github.com/uutils/coreutils
synced 2024-12-18 00:53:25 +00:00
Attempting to fix star expansion
Signed-off-by: Hanif Bin Ariffin <hanif.ariffin.4326@gmail.com>
This commit is contained in:
parent
d5dbedb2e4
commit
279a7cf6b3
1 changed files with 218 additions and 162 deletions
|
@ -26,10 +26,132 @@ mod unicode_table {
|
|||
pub static BLANK: &'static [char] = &[SPACE, HT];
|
||||
}
|
||||
|
||||
struct Repeat(char);
|
||||
|
||||
impl Repeat {
|
||||
fn new(element: char) -> Repeat {
|
||||
Repeat(element)
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Repeat {
|
||||
type Item = char;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
Some(self.0)
|
||||
}
|
||||
|
||||
fn last(self) -> Option<Self::Item> {
|
||||
Some(self.0)
|
||||
}
|
||||
|
||||
fn any<F>(&mut self, mut f: F) -> bool
|
||||
where
|
||||
Self: Sized,
|
||||
F: FnMut(Self::Item) -> bool,
|
||||
{
|
||||
f(self.0)
|
||||
}
|
||||
}
|
||||
|
||||
fn truncate_iterator<T>(input: Option<usize>) -> impl Fn((usize, T)) -> Option<T> {
|
||||
move |(idx, c)| match input {
|
||||
Some(s) => match s.cmp(&idx) {
|
||||
std::cmp::Ordering::Greater => Some(c),
|
||||
_ => None,
|
||||
},
|
||||
None => Some(c),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum Sequence {
|
||||
Char(char),
|
||||
CharRange(Box<dyn Iterator<Item = char>>),
|
||||
CharRange(u32, u32),
|
||||
CharStar(char),
|
||||
CharRepeat(char, usize),
|
||||
Alnum,
|
||||
Alpha,
|
||||
Blank,
|
||||
Control,
|
||||
Digit,
|
||||
Graph,
|
||||
Lower,
|
||||
Print,
|
||||
Punct,
|
||||
Space,
|
||||
Upper,
|
||||
Xdigit,
|
||||
}
|
||||
|
||||
impl Sequence {
|
||||
pub fn flatten(&self) -> Box<dyn Iterator<Item = char>> {
|
||||
match self {
|
||||
Sequence::Char(c) => Box::new(std::iter::once(*c)),
|
||||
Sequence::CharRange(l, r) => Box::new((*l..=*r).flat_map(char::from_u32)),
|
||||
Sequence::CharStar(c) => Box::new(Repeat::new(*c)),
|
||||
Sequence::CharRepeat(c, n) => Box::new(Repeat::new(*c).take(*n)),
|
||||
Sequence::Alnum => Box::new(('0'..='9').chain('A'..='Z').chain('a'..='z')),
|
||||
Sequence::Alpha => Box::new(('A'..='Z').chain('a'..='z')),
|
||||
Sequence::Blank => Box::new(unicode_table::BLANK.into_iter().cloned()),
|
||||
Sequence::Control => Box::new(
|
||||
(0..=31)
|
||||
.chain(std::iter::once(127))
|
||||
.flat_map(char::from_u32),
|
||||
),
|
||||
Sequence::Digit => Box::new('0'..='9'),
|
||||
Sequence::Graph => Box::new(
|
||||
(48..=57) // digit
|
||||
.chain(65..=90) // uppercase
|
||||
.chain(97..=122) // lowercase
|
||||
// punctuations
|
||||
.chain(33..=47)
|
||||
.chain(58..=64)
|
||||
.chain(91..=96)
|
||||
.chain(123..=126)
|
||||
.chain(std::iter::once(32)) // space
|
||||
.flat_map(char::from_u32),
|
||||
),
|
||||
Sequence::Lower => Box::new('a'..='z'),
|
||||
Sequence::Print => Box::new(
|
||||
(48..=57) // digit
|
||||
.chain(65..=90) // uppercase
|
||||
.chain(97..=122) // lowercase
|
||||
// punctuations
|
||||
.chain(33..=47)
|
||||
.chain(58..=64)
|
||||
.chain(91..=96)
|
||||
.chain(123..=126)
|
||||
.flat_map(char::from_u32),
|
||||
),
|
||||
Sequence::Punct => Box::new(
|
||||
(33..=47)
|
||||
.chain(58..=64)
|
||||
.chain(91..=96)
|
||||
.chain(123..=126)
|
||||
.flat_map(char::from_u32),
|
||||
),
|
||||
Sequence::Space => Box::new(unicode_table::SPACES.into_iter().cloned()),
|
||||
Sequence::Upper => Box::new('A'..='Z'),
|
||||
Sequence::Xdigit => Box::new(('0'..='9').chain('A'..='F').chain('a'..='f')),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn last(&self) -> Option<char> {
|
||||
match self {
|
||||
Sequence::CharStar(c) => Some(*c),
|
||||
// TODO: Can be optimized further...
|
||||
rest => rest.flatten().last(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn len(&self) -> Option<usize> {
|
||||
match self {
|
||||
Sequence::CharStar(_) => None,
|
||||
// TODO: Is there a fix for this?
|
||||
rest => Some(rest.flatten().count()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Sequence {
|
||||
|
@ -70,16 +192,6 @@ impl Sequence {
|
|||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn dissolve(self) -> Box<dyn Iterator<Item = char>> {
|
||||
match self {
|
||||
Sequence::Char(c) => Box::new(std::iter::once(c)),
|
||||
Sequence::CharRange(r) => r,
|
||||
Sequence::CharStar(c) => Box::new(std::iter::repeat(c)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Sequence parsers
|
||||
|
||||
fn parse_char(input: &str) -> IResult<&str, Sequence> {
|
||||
anychar(input).map(|(l, r)| (l, Sequence::Char(r)))
|
||||
}
|
||||
|
@ -115,7 +227,7 @@ impl Sequence {
|
|||
separated_pair(anychar, tag("-"), anychar)(input).map(|(l, (a, b))| {
|
||||
(l, {
|
||||
let (start, end) = (u32::from(a), u32::from(b));
|
||||
Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32)))
|
||||
Sequence::CharRange(start, end)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
@ -129,7 +241,7 @@ impl Sequence {
|
|||
.map(|(l, (a, b))| {
|
||||
(l, {
|
||||
let (start, end) = (u32::from(a), u32::from(b));
|
||||
Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32)))
|
||||
Sequence::CharRange(start, end)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
@ -143,7 +255,7 @@ impl Sequence {
|
|||
.map(|(l, (a, b))| {
|
||||
(l, {
|
||||
let (start, end) = (u32::from_str_radix(a, 8).unwrap(), u32::from(b));
|
||||
Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32)))
|
||||
Sequence::CharRange(start, end)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
@ -157,7 +269,7 @@ impl Sequence {
|
|||
.map(|(l, (a, b))| {
|
||||
(l, {
|
||||
let (start, end) = (u32::from(a), u32::from_str_radix(b, 8).unwrap());
|
||||
Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32)))
|
||||
Sequence::CharRange(start, end)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
@ -174,7 +286,7 @@ impl Sequence {
|
|||
u32::from_str_radix(a, 8).unwrap(),
|
||||
u32::from_str_radix(b, 8).unwrap(),
|
||||
);
|
||||
Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32)))
|
||||
Sequence::CharRange(start, end)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
@ -189,136 +301,55 @@ impl Sequence {
|
|||
separated_pair(anychar, tag("*"), digit1),
|
||||
tag("]"),
|
||||
)(input)
|
||||
.map(|(l, (c, n))| {
|
||||
(
|
||||
l,
|
||||
Sequence::CharRange(Box::new(std::iter::repeat(c).take(n.parse().unwrap()))),
|
||||
)
|
||||
})
|
||||
.map(|(l, (c, n))| (l, Sequence::CharRepeat(c, n.parse().unwrap())))
|
||||
}
|
||||
|
||||
fn parse_alnum(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:alnum:]")(input).map(|(l, _)| {
|
||||
(
|
||||
l,
|
||||
Sequence::CharRange(Box::new(('0'..='9').chain('A'..='Z').chain('a'..='z'))),
|
||||
)
|
||||
})
|
||||
tag("[:alnum:]")(input).map(|(l, _)| (l, Sequence::Alnum))
|
||||
}
|
||||
|
||||
fn parse_alpha(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:alpha:]")(input).map(|(l, _)| {
|
||||
(
|
||||
l,
|
||||
Sequence::CharRange(Box::new(('A'..='Z').chain('a'..='z'))),
|
||||
)
|
||||
})
|
||||
tag("[:alpha:]")(input).map(|(l, _)| (l, Sequence::Alpha))
|
||||
}
|
||||
|
||||
fn parse_blank(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:blank:]")(input).map(|(l, _)| {
|
||||
(
|
||||
l,
|
||||
Sequence::CharRange(Box::new(unicode_table::BLANK.into_iter().cloned())),
|
||||
)
|
||||
})
|
||||
tag("[:blank:]")(input).map(|(l, _)| (l, Sequence::Blank))
|
||||
}
|
||||
|
||||
fn parse_control(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:cntrl:]")(input).map(|(l, _)| {
|
||||
(
|
||||
l,
|
||||
Sequence::CharRange(Box::new(
|
||||
(0..=31)
|
||||
.chain(std::iter::once(127))
|
||||
.flat_map(char::from_u32),
|
||||
)),
|
||||
)
|
||||
})
|
||||
tag("[:cntrl:]")(input).map(|(l, _)| (l, Sequence::Control))
|
||||
}
|
||||
|
||||
fn parse_digit(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:digit:]")(input).map(|(l, _)| (l, Sequence::CharRange(Box::new('0'..='9'))))
|
||||
tag("[:digit:]")(input).map(|(l, _)| (l, Sequence::Digit))
|
||||
}
|
||||
|
||||
fn parse_graph(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:graph:]")(input).map(|(l, _)| {
|
||||
(
|
||||
l,
|
||||
Sequence::CharRange(Box::new(
|
||||
(48..=57) // digit
|
||||
.chain(65..=90) // uppercase
|
||||
.chain(97..=122) // lowercase
|
||||
// punctuations
|
||||
.chain(33..=47)
|
||||
.chain(58..=64)
|
||||
.chain(91..=96)
|
||||
.chain(123..=126)
|
||||
.chain(std::iter::once(32)) // space
|
||||
.flat_map(char::from_u32),
|
||||
)),
|
||||
)
|
||||
})
|
||||
tag("[:graph:]")(input).map(|(l, _)| (l, Sequence::Graph))
|
||||
}
|
||||
|
||||
fn parse_lower(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:lower:]")(input).map(|(l, _)| (l, Sequence::CharRange(Box::new('a'..='z'))))
|
||||
tag("[:lower:]")(input).map(|(l, _)| (l, Sequence::Lower))
|
||||
}
|
||||
|
||||
fn parse_print(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:print:]")(input).map(|(l, _)| {
|
||||
(
|
||||
l,
|
||||
Sequence::CharRange(Box::new(
|
||||
(48..=57) // digit
|
||||
.chain(65..=90) // uppercase
|
||||
.chain(97..=122) // lowercase
|
||||
// punctuations
|
||||
.chain(33..=47)
|
||||
.chain(58..=64)
|
||||
.chain(91..=96)
|
||||
.chain(123..=126)
|
||||
.flat_map(char::from_u32),
|
||||
)),
|
||||
)
|
||||
})
|
||||
tag("[:print:]")(input).map(|(l, _)| (l, Sequence::Print))
|
||||
}
|
||||
|
||||
fn parse_punct(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:punct:]")(input).map(|(l, _)| {
|
||||
(
|
||||
l,
|
||||
Sequence::CharRange(Box::new(
|
||||
(33..=47)
|
||||
.chain(58..=64)
|
||||
.chain(91..=96)
|
||||
.chain(123..=126)
|
||||
.flat_map(char::from_u32),
|
||||
)),
|
||||
)
|
||||
})
|
||||
tag("[:punct:]")(input).map(|(l, _)| (l, Sequence::Punct))
|
||||
}
|
||||
|
||||
fn parse_space(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:space:]")(input).map(|(l, _)| {
|
||||
(
|
||||
l,
|
||||
Sequence::CharRange(Box::new(unicode_table::SPACES.into_iter().cloned())),
|
||||
)
|
||||
})
|
||||
tag("[:space:]")(input).map(|(l, _)| (l, Sequence::Space))
|
||||
}
|
||||
|
||||
fn parse_upper(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:upper:]")(input).map(|(l, _)| (l, Sequence::CharRange(Box::new('A'..='Z'))))
|
||||
tag("[:upper:]")(input).map(|(l, _)| (l, Sequence::Upper))
|
||||
}
|
||||
|
||||
fn parse_xdigit(input: &str) -> IResult<&str, Sequence> {
|
||||
tag("[:xdigit:]")(input).map(|(l, _)| {
|
||||
(
|
||||
l,
|
||||
Sequence::CharRange(Box::new(('0'..='9').chain('A'..='F').chain('a'..='f'))),
|
||||
)
|
||||
})
|
||||
tag("[:xdigit:]")(input).map(|(l, _)| (l, Sequence::Xdigit))
|
||||
}
|
||||
|
||||
fn parse_char_equal(input: &str) -> IResult<&str, Sequence> {
|
||||
|
@ -339,10 +370,7 @@ pub struct DeleteOperation {
|
|||
impl DeleteOperation {
|
||||
pub fn new(set: Vec<Sequence>, complement_flag: bool) -> DeleteOperation {
|
||||
DeleteOperation {
|
||||
set: set
|
||||
.into_iter()
|
||||
.flat_map(Sequence::dissolve)
|
||||
.collect::<Vec<_>>(),
|
||||
set: set.iter().flat_map(Sequence::flatten).collect::<Vec<_>>(),
|
||||
complement_flag,
|
||||
}
|
||||
}
|
||||
|
@ -355,21 +383,30 @@ impl SymbolTranslator for DeleteOperation {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TranslateOperationComplement {
|
||||
iter: u32,
|
||||
set1: Vec<char>,
|
||||
set2: Vec<char>,
|
||||
set2: Box<dyn Iterator<Item = char>>,
|
||||
fallback: char,
|
||||
translation_map: HashMap<char, char>,
|
||||
}
|
||||
|
||||
impl TranslateOperationComplement {
|
||||
fn new(set1: Vec<char>, set2: Vec<char>, fallback: char) -> TranslateOperationComplement {
|
||||
fn new(
|
||||
set1: Vec<Sequence>,
|
||||
set2: Vec<Sequence>,
|
||||
set1_truncate_length: Option<usize>,
|
||||
fallback: char,
|
||||
) -> TranslateOperationComplement {
|
||||
TranslateOperationComplement {
|
||||
iter: 0,
|
||||
set1,
|
||||
set2: set2.into_iter().rev().collect(),
|
||||
set1: set1
|
||||
.iter()
|
||||
.flat_map(Sequence::flatten)
|
||||
.enumerate()
|
||||
.filter_map(truncate_iterator(set1_truncate_length))
|
||||
.collect(),
|
||||
set2: Box::new(set2.into_iter().flat_map(|c| Sequence::flatten(&c))),
|
||||
fallback,
|
||||
translation_map: HashMap::new(),
|
||||
}
|
||||
|
@ -382,61 +419,83 @@ pub struct TranslateOperationStandard {
|
|||
}
|
||||
|
||||
impl TranslateOperationStandard {
|
||||
fn new(set1: Vec<char>, set2: Vec<char>, fallback: char) -> TranslateOperationStandard {
|
||||
fn new(
|
||||
set1: Vec<Sequence>,
|
||||
set2: Vec<Sequence>,
|
||||
set1_truncate_length: Option<usize>,
|
||||
fallback: char,
|
||||
) -> TranslateOperationStandard {
|
||||
TranslateOperationStandard {
|
||||
translation_map: set1
|
||||
.into_iter()
|
||||
.zip(set2.into_iter().chain(std::iter::repeat(fallback)))
|
||||
.iter()
|
||||
.flat_map(Sequence::flatten)
|
||||
.zip(
|
||||
set2.iter()
|
||||
.flat_map(Sequence::flatten)
|
||||
.chain(Repeat(fallback)),
|
||||
)
|
||||
.enumerate()
|
||||
.filter_map(truncate_iterator(set1_truncate_length))
|
||||
.collect::<HashMap<_, _>>(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TranslateOperation {
|
||||
Standard(TranslateOperationStandard),
|
||||
Complement(TranslateOperationComplement),
|
||||
}
|
||||
|
||||
impl TranslateOperation {
|
||||
fn next_complement_char(mut iter: u32, ignore_list: &[char]) -> (u32, char) {
|
||||
while (char::from_u32(iter).is_none()
|
||||
|| ignore_list
|
||||
.iter()
|
||||
.map(|c| u32::from(*c))
|
||||
.any(|c| iter.eq(&c)))
|
||||
&& iter.ne(&u32::MAX)
|
||||
{
|
||||
iter = iter.saturating_add(1)
|
||||
}
|
||||
(iter.saturating_add(1), char::from_u32(iter).unwrap())
|
||||
fn next_complement_char(iter: u32, ignore_list: &[char]) -> (u32, char) {
|
||||
(iter..)
|
||||
.filter_map(char::from_u32)
|
||||
.filter(|c| !ignore_list.iter().any(|s| s.eq(c)))
|
||||
.map(|c| (u32::from(c) + 1, c))
|
||||
.next()
|
||||
.expect("exhausted all possible characters")
|
||||
}
|
||||
}
|
||||
|
||||
impl TranslateOperation {
|
||||
pub fn new(
|
||||
pset1: Vec<Sequence>,
|
||||
pset2: Vec<Sequence>,
|
||||
set1: Vec<Sequence>,
|
||||
set2: Vec<Sequence>,
|
||||
truncate_set1: bool,
|
||||
complement: bool,
|
||||
) -> TranslateOperation {
|
||||
// TODO: Only some translation is acceptable i.e. uppercase/lowercase transform.
|
||||
let mut set1 = pset1
|
||||
.into_iter()
|
||||
.flat_map(Sequence::dissolve)
|
||||
.collect::<Vec<_>>();
|
||||
let set2 = pset2
|
||||
.into_iter()
|
||||
.flat_map(Sequence::dissolve)
|
||||
.collect::<Vec<_>>();
|
||||
let fallback = set2.last().cloned().unwrap();
|
||||
if truncate_set1 {
|
||||
set1.truncate(set2.len());
|
||||
}
|
||||
if complement {
|
||||
TranslateOperation::Complement(TranslateOperationComplement::new(set1, set2, fallback))
|
||||
let fallback = set2
|
||||
.iter()
|
||||
.rev()
|
||||
.next()
|
||||
.map(Sequence::last)
|
||||
.flatten()
|
||||
.unwrap();
|
||||
let set1_truncate_length = if truncate_set1 {
|
||||
set2.iter()
|
||||
.map(Sequence::len)
|
||||
.reduce(|a, b| match (a, b) {
|
||||
(Some(l), Some(r)) => Some(l + r),
|
||||
_ => None,
|
||||
})
|
||||
.flatten()
|
||||
} else {
|
||||
TranslateOperation::Standard(TranslateOperationStandard::new(set1, set2, fallback))
|
||||
None
|
||||
};
|
||||
if complement {
|
||||
TranslateOperation::Complement(TranslateOperationComplement::new(
|
||||
set1,
|
||||
set2,
|
||||
set1_truncate_length,
|
||||
fallback,
|
||||
))
|
||||
} else {
|
||||
TranslateOperation::Standard(TranslateOperationStandard::new(
|
||||
set1,
|
||||
set2,
|
||||
set1_truncate_length,
|
||||
fallback,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -466,7 +525,7 @@ impl SymbolTranslator for TranslateOperation {
|
|||
Some(*c)
|
||||
} else {
|
||||
while translation_map.get(¤t).is_none() {
|
||||
if let Some(p) = set2.pop() {
|
||||
if let Some(p) = set2.next() {
|
||||
let (next_index, next_value) =
|
||||
TranslateOperation::next_complement_char(*iter, &*set1);
|
||||
*iter = next_index;
|
||||
|
@ -484,18 +543,15 @@ impl SymbolTranslator for TranslateOperation {
|
|||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SqueezeOperation {
|
||||
squeeze_set: Vec<char>,
|
||||
set1: Vec<char>,
|
||||
complement: bool,
|
||||
previous: Option<char>,
|
||||
}
|
||||
|
||||
impl SqueezeOperation {
|
||||
pub fn new(squeeze_set: Vec<Sequence>, complement: bool) -> SqueezeOperation {
|
||||
pub fn new(set1: Vec<Sequence>, complement: bool) -> SqueezeOperation {
|
||||
SqueezeOperation {
|
||||
squeeze_set: squeeze_set
|
||||
.into_iter()
|
||||
.flat_map(Sequence::dissolve)
|
||||
.collect(),
|
||||
set1: set1.iter().flat_map(Sequence::flatten).collect(),
|
||||
complement,
|
||||
previous: None,
|
||||
}
|
||||
|
@ -505,7 +561,7 @@ impl SqueezeOperation {
|
|||
impl SymbolTranslator for SqueezeOperation {
|
||||
fn translate(&mut self, current: char) -> Option<char> {
|
||||
if self.complement {
|
||||
let next = if self.squeeze_set.iter().any(|c| c.eq(¤t)) {
|
||||
let next = if self.set1.iter().any(|c| c.eq(¤t)) {
|
||||
Some(current)
|
||||
} else {
|
||||
match self.previous {
|
||||
|
@ -526,7 +582,7 @@ impl SymbolTranslator for SqueezeOperation {
|
|||
self.previous = Some(current);
|
||||
next
|
||||
} else {
|
||||
let next = if self.squeeze_set.iter().any(|c| c.eq(¤t)) {
|
||||
let next = if self.set1.iter().any(|c| c.eq(¤t)) {
|
||||
match self.previous {
|
||||
Some(v) if v == current => None,
|
||||
_ => Some(current),
|
||||
|
@ -542,7 +598,7 @@ impl SymbolTranslator for SqueezeOperation {
|
|||
|
||||
pub fn translate_input<T, R, W>(input: &mut R, output: &mut W, mut translator: T)
|
||||
where
|
||||
T: SymbolTranslator + Debug,
|
||||
T: SymbolTranslator,
|
||||
R: BufRead,
|
||||
W: Write,
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue