mirror of
https://github.com/uutils/coreutils
synced 2024-12-18 00:53:25 +00:00
Attempting to fix star expansion
Signed-off-by: Hanif Bin Ariffin <hanif.ariffin.4326@gmail.com>
This commit is contained in:
parent
d5dbedb2e4
commit
279a7cf6b3
1 changed files with 218 additions and 162 deletions
|
@ -26,10 +26,132 @@ mod unicode_table {
|
||||||
pub static BLANK: &'static [char] = &[SPACE, HT];
|
pub static BLANK: &'static [char] = &[SPACE, HT];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct Repeat(char);
|
||||||
|
|
||||||
|
impl Repeat {
|
||||||
|
fn new(element: char) -> Repeat {
|
||||||
|
Repeat(element)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Iterator for Repeat {
|
||||||
|
type Item = char;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
Some(self.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn last(self) -> Option<Self::Item> {
|
||||||
|
Some(self.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn any<F>(&mut self, mut f: F) -> bool
|
||||||
|
where
|
||||||
|
Self: Sized,
|
||||||
|
F: FnMut(Self::Item) -> bool,
|
||||||
|
{
|
||||||
|
f(self.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn truncate_iterator<T>(input: Option<usize>) -> impl Fn((usize, T)) -> Option<T> {
|
||||||
|
move |(idx, c)| match input {
|
||||||
|
Some(s) => match s.cmp(&idx) {
|
||||||
|
std::cmp::Ordering::Greater => Some(c),
|
||||||
|
_ => None,
|
||||||
|
},
|
||||||
|
None => Some(c),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
pub enum Sequence {
|
pub enum Sequence {
|
||||||
Char(char),
|
Char(char),
|
||||||
CharRange(Box<dyn Iterator<Item = char>>),
|
CharRange(u32, u32),
|
||||||
CharStar(char),
|
CharStar(char),
|
||||||
|
CharRepeat(char, usize),
|
||||||
|
Alnum,
|
||||||
|
Alpha,
|
||||||
|
Blank,
|
||||||
|
Control,
|
||||||
|
Digit,
|
||||||
|
Graph,
|
||||||
|
Lower,
|
||||||
|
Print,
|
||||||
|
Punct,
|
||||||
|
Space,
|
||||||
|
Upper,
|
||||||
|
Xdigit,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Sequence {
|
||||||
|
pub fn flatten(&self) -> Box<dyn Iterator<Item = char>> {
|
||||||
|
match self {
|
||||||
|
Sequence::Char(c) => Box::new(std::iter::once(*c)),
|
||||||
|
Sequence::CharRange(l, r) => Box::new((*l..=*r).flat_map(char::from_u32)),
|
||||||
|
Sequence::CharStar(c) => Box::new(Repeat::new(*c)),
|
||||||
|
Sequence::CharRepeat(c, n) => Box::new(Repeat::new(*c).take(*n)),
|
||||||
|
Sequence::Alnum => Box::new(('0'..='9').chain('A'..='Z').chain('a'..='z')),
|
||||||
|
Sequence::Alpha => Box::new(('A'..='Z').chain('a'..='z')),
|
||||||
|
Sequence::Blank => Box::new(unicode_table::BLANK.into_iter().cloned()),
|
||||||
|
Sequence::Control => Box::new(
|
||||||
|
(0..=31)
|
||||||
|
.chain(std::iter::once(127))
|
||||||
|
.flat_map(char::from_u32),
|
||||||
|
),
|
||||||
|
Sequence::Digit => Box::new('0'..='9'),
|
||||||
|
Sequence::Graph => Box::new(
|
||||||
|
(48..=57) // digit
|
||||||
|
.chain(65..=90) // uppercase
|
||||||
|
.chain(97..=122) // lowercase
|
||||||
|
// punctuations
|
||||||
|
.chain(33..=47)
|
||||||
|
.chain(58..=64)
|
||||||
|
.chain(91..=96)
|
||||||
|
.chain(123..=126)
|
||||||
|
.chain(std::iter::once(32)) // space
|
||||||
|
.flat_map(char::from_u32),
|
||||||
|
),
|
||||||
|
Sequence::Lower => Box::new('a'..='z'),
|
||||||
|
Sequence::Print => Box::new(
|
||||||
|
(48..=57) // digit
|
||||||
|
.chain(65..=90) // uppercase
|
||||||
|
.chain(97..=122) // lowercase
|
||||||
|
// punctuations
|
||||||
|
.chain(33..=47)
|
||||||
|
.chain(58..=64)
|
||||||
|
.chain(91..=96)
|
||||||
|
.chain(123..=126)
|
||||||
|
.flat_map(char::from_u32),
|
||||||
|
),
|
||||||
|
Sequence::Punct => Box::new(
|
||||||
|
(33..=47)
|
||||||
|
.chain(58..=64)
|
||||||
|
.chain(91..=96)
|
||||||
|
.chain(123..=126)
|
||||||
|
.flat_map(char::from_u32),
|
||||||
|
),
|
||||||
|
Sequence::Space => Box::new(unicode_table::SPACES.into_iter().cloned()),
|
||||||
|
Sequence::Upper => Box::new('A'..='Z'),
|
||||||
|
Sequence::Xdigit => Box::new(('0'..='9').chain('A'..='F').chain('a'..='f')),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn last(&self) -> Option<char> {
|
||||||
|
match self {
|
||||||
|
Sequence::CharStar(c) => Some(*c),
|
||||||
|
// TODO: Can be optimized further...
|
||||||
|
rest => rest.flatten().last(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn len(&self) -> Option<usize> {
|
||||||
|
match self {
|
||||||
|
Sequence::CharStar(_) => None,
|
||||||
|
// TODO: Is there a fix for this?
|
||||||
|
rest => Some(rest.flatten().count()),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Sequence {
|
impl Sequence {
|
||||||
|
@ -70,16 +192,6 @@ impl Sequence {
|
||||||
.unwrap()
|
.unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn dissolve(self) -> Box<dyn Iterator<Item = char>> {
|
|
||||||
match self {
|
|
||||||
Sequence::Char(c) => Box::new(std::iter::once(c)),
|
|
||||||
Sequence::CharRange(r) => r,
|
|
||||||
Sequence::CharStar(c) => Box::new(std::iter::repeat(c)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Sequence parsers
|
|
||||||
|
|
||||||
fn parse_char(input: &str) -> IResult<&str, Sequence> {
|
fn parse_char(input: &str) -> IResult<&str, Sequence> {
|
||||||
anychar(input).map(|(l, r)| (l, Sequence::Char(r)))
|
anychar(input).map(|(l, r)| (l, Sequence::Char(r)))
|
||||||
}
|
}
|
||||||
|
@ -115,7 +227,7 @@ impl Sequence {
|
||||||
separated_pair(anychar, tag("-"), anychar)(input).map(|(l, (a, b))| {
|
separated_pair(anychar, tag("-"), anychar)(input).map(|(l, (a, b))| {
|
||||||
(l, {
|
(l, {
|
||||||
let (start, end) = (u32::from(a), u32::from(b));
|
let (start, end) = (u32::from(a), u32::from(b));
|
||||||
Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32)))
|
Sequence::CharRange(start, end)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -129,7 +241,7 @@ impl Sequence {
|
||||||
.map(|(l, (a, b))| {
|
.map(|(l, (a, b))| {
|
||||||
(l, {
|
(l, {
|
||||||
let (start, end) = (u32::from(a), u32::from(b));
|
let (start, end) = (u32::from(a), u32::from(b));
|
||||||
Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32)))
|
Sequence::CharRange(start, end)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -143,7 +255,7 @@ impl Sequence {
|
||||||
.map(|(l, (a, b))| {
|
.map(|(l, (a, b))| {
|
||||||
(l, {
|
(l, {
|
||||||
let (start, end) = (u32::from_str_radix(a, 8).unwrap(), u32::from(b));
|
let (start, end) = (u32::from_str_radix(a, 8).unwrap(), u32::from(b));
|
||||||
Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32)))
|
Sequence::CharRange(start, end)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -157,7 +269,7 @@ impl Sequence {
|
||||||
.map(|(l, (a, b))| {
|
.map(|(l, (a, b))| {
|
||||||
(l, {
|
(l, {
|
||||||
let (start, end) = (u32::from(a), u32::from_str_radix(b, 8).unwrap());
|
let (start, end) = (u32::from(a), u32::from_str_radix(b, 8).unwrap());
|
||||||
Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32)))
|
Sequence::CharRange(start, end)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -174,7 +286,7 @@ impl Sequence {
|
||||||
u32::from_str_radix(a, 8).unwrap(),
|
u32::from_str_radix(a, 8).unwrap(),
|
||||||
u32::from_str_radix(b, 8).unwrap(),
|
u32::from_str_radix(b, 8).unwrap(),
|
||||||
);
|
);
|
||||||
Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32)))
|
Sequence::CharRange(start, end)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -189,136 +301,55 @@ impl Sequence {
|
||||||
separated_pair(anychar, tag("*"), digit1),
|
separated_pair(anychar, tag("*"), digit1),
|
||||||
tag("]"),
|
tag("]"),
|
||||||
)(input)
|
)(input)
|
||||||
.map(|(l, (c, n))| {
|
.map(|(l, (c, n))| (l, Sequence::CharRepeat(c, n.parse().unwrap())))
|
||||||
(
|
|
||||||
l,
|
|
||||||
Sequence::CharRange(Box::new(std::iter::repeat(c).take(n.parse().unwrap()))),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_alnum(input: &str) -> IResult<&str, Sequence> {
|
fn parse_alnum(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:alnum:]")(input).map(|(l, _)| {
|
tag("[:alnum:]")(input).map(|(l, _)| (l, Sequence::Alnum))
|
||||||
(
|
|
||||||
l,
|
|
||||||
Sequence::CharRange(Box::new(('0'..='9').chain('A'..='Z').chain('a'..='z'))),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_alpha(input: &str) -> IResult<&str, Sequence> {
|
fn parse_alpha(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:alpha:]")(input).map(|(l, _)| {
|
tag("[:alpha:]")(input).map(|(l, _)| (l, Sequence::Alpha))
|
||||||
(
|
|
||||||
l,
|
|
||||||
Sequence::CharRange(Box::new(('A'..='Z').chain('a'..='z'))),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_blank(input: &str) -> IResult<&str, Sequence> {
|
fn parse_blank(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:blank:]")(input).map(|(l, _)| {
|
tag("[:blank:]")(input).map(|(l, _)| (l, Sequence::Blank))
|
||||||
(
|
|
||||||
l,
|
|
||||||
Sequence::CharRange(Box::new(unicode_table::BLANK.into_iter().cloned())),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_control(input: &str) -> IResult<&str, Sequence> {
|
fn parse_control(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:cntrl:]")(input).map(|(l, _)| {
|
tag("[:cntrl:]")(input).map(|(l, _)| (l, Sequence::Control))
|
||||||
(
|
|
||||||
l,
|
|
||||||
Sequence::CharRange(Box::new(
|
|
||||||
(0..=31)
|
|
||||||
.chain(std::iter::once(127))
|
|
||||||
.flat_map(char::from_u32),
|
|
||||||
)),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_digit(input: &str) -> IResult<&str, Sequence> {
|
fn parse_digit(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:digit:]")(input).map(|(l, _)| (l, Sequence::CharRange(Box::new('0'..='9'))))
|
tag("[:digit:]")(input).map(|(l, _)| (l, Sequence::Digit))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_graph(input: &str) -> IResult<&str, Sequence> {
|
fn parse_graph(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:graph:]")(input).map(|(l, _)| {
|
tag("[:graph:]")(input).map(|(l, _)| (l, Sequence::Graph))
|
||||||
(
|
|
||||||
l,
|
|
||||||
Sequence::CharRange(Box::new(
|
|
||||||
(48..=57) // digit
|
|
||||||
.chain(65..=90) // uppercase
|
|
||||||
.chain(97..=122) // lowercase
|
|
||||||
// punctuations
|
|
||||||
.chain(33..=47)
|
|
||||||
.chain(58..=64)
|
|
||||||
.chain(91..=96)
|
|
||||||
.chain(123..=126)
|
|
||||||
.chain(std::iter::once(32)) // space
|
|
||||||
.flat_map(char::from_u32),
|
|
||||||
)),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_lower(input: &str) -> IResult<&str, Sequence> {
|
fn parse_lower(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:lower:]")(input).map(|(l, _)| (l, Sequence::CharRange(Box::new('a'..='z'))))
|
tag("[:lower:]")(input).map(|(l, _)| (l, Sequence::Lower))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_print(input: &str) -> IResult<&str, Sequence> {
|
fn parse_print(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:print:]")(input).map(|(l, _)| {
|
tag("[:print:]")(input).map(|(l, _)| (l, Sequence::Print))
|
||||||
(
|
|
||||||
l,
|
|
||||||
Sequence::CharRange(Box::new(
|
|
||||||
(48..=57) // digit
|
|
||||||
.chain(65..=90) // uppercase
|
|
||||||
.chain(97..=122) // lowercase
|
|
||||||
// punctuations
|
|
||||||
.chain(33..=47)
|
|
||||||
.chain(58..=64)
|
|
||||||
.chain(91..=96)
|
|
||||||
.chain(123..=126)
|
|
||||||
.flat_map(char::from_u32),
|
|
||||||
)),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_punct(input: &str) -> IResult<&str, Sequence> {
|
fn parse_punct(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:punct:]")(input).map(|(l, _)| {
|
tag("[:punct:]")(input).map(|(l, _)| (l, Sequence::Punct))
|
||||||
(
|
|
||||||
l,
|
|
||||||
Sequence::CharRange(Box::new(
|
|
||||||
(33..=47)
|
|
||||||
.chain(58..=64)
|
|
||||||
.chain(91..=96)
|
|
||||||
.chain(123..=126)
|
|
||||||
.flat_map(char::from_u32),
|
|
||||||
)),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_space(input: &str) -> IResult<&str, Sequence> {
|
fn parse_space(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:space:]")(input).map(|(l, _)| {
|
tag("[:space:]")(input).map(|(l, _)| (l, Sequence::Space))
|
||||||
(
|
|
||||||
l,
|
|
||||||
Sequence::CharRange(Box::new(unicode_table::SPACES.into_iter().cloned())),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_upper(input: &str) -> IResult<&str, Sequence> {
|
fn parse_upper(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:upper:]")(input).map(|(l, _)| (l, Sequence::CharRange(Box::new('A'..='Z'))))
|
tag("[:upper:]")(input).map(|(l, _)| (l, Sequence::Upper))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_xdigit(input: &str) -> IResult<&str, Sequence> {
|
fn parse_xdigit(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:xdigit:]")(input).map(|(l, _)| {
|
tag("[:xdigit:]")(input).map(|(l, _)| (l, Sequence::Xdigit))
|
||||||
(
|
|
||||||
l,
|
|
||||||
Sequence::CharRange(Box::new(('0'..='9').chain('A'..='F').chain('a'..='f'))),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_char_equal(input: &str) -> IResult<&str, Sequence> {
|
fn parse_char_equal(input: &str) -> IResult<&str, Sequence> {
|
||||||
|
@ -339,10 +370,7 @@ pub struct DeleteOperation {
|
||||||
impl DeleteOperation {
|
impl DeleteOperation {
|
||||||
pub fn new(set: Vec<Sequence>, complement_flag: bool) -> DeleteOperation {
|
pub fn new(set: Vec<Sequence>, complement_flag: bool) -> DeleteOperation {
|
||||||
DeleteOperation {
|
DeleteOperation {
|
||||||
set: set
|
set: set.iter().flat_map(Sequence::flatten).collect::<Vec<_>>(),
|
||||||
.into_iter()
|
|
||||||
.flat_map(Sequence::dissolve)
|
|
||||||
.collect::<Vec<_>>(),
|
|
||||||
complement_flag,
|
complement_flag,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -355,21 +383,30 @@ impl SymbolTranslator for DeleteOperation {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct TranslateOperationComplement {
|
pub struct TranslateOperationComplement {
|
||||||
iter: u32,
|
iter: u32,
|
||||||
set1: Vec<char>,
|
set1: Vec<char>,
|
||||||
set2: Vec<char>,
|
set2: Box<dyn Iterator<Item = char>>,
|
||||||
fallback: char,
|
fallback: char,
|
||||||
translation_map: HashMap<char, char>,
|
translation_map: HashMap<char, char>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TranslateOperationComplement {
|
impl TranslateOperationComplement {
|
||||||
fn new(set1: Vec<char>, set2: Vec<char>, fallback: char) -> TranslateOperationComplement {
|
fn new(
|
||||||
|
set1: Vec<Sequence>,
|
||||||
|
set2: Vec<Sequence>,
|
||||||
|
set1_truncate_length: Option<usize>,
|
||||||
|
fallback: char,
|
||||||
|
) -> TranslateOperationComplement {
|
||||||
TranslateOperationComplement {
|
TranslateOperationComplement {
|
||||||
iter: 0,
|
iter: 0,
|
||||||
set1,
|
set1: set1
|
||||||
set2: set2.into_iter().rev().collect(),
|
.iter()
|
||||||
|
.flat_map(Sequence::flatten)
|
||||||
|
.enumerate()
|
||||||
|
.filter_map(truncate_iterator(set1_truncate_length))
|
||||||
|
.collect(),
|
||||||
|
set2: Box::new(set2.into_iter().flat_map(|c| Sequence::flatten(&c))),
|
||||||
fallback,
|
fallback,
|
||||||
translation_map: HashMap::new(),
|
translation_map: HashMap::new(),
|
||||||
}
|
}
|
||||||
|
@ -382,61 +419,83 @@ pub struct TranslateOperationStandard {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TranslateOperationStandard {
|
impl TranslateOperationStandard {
|
||||||
fn new(set1: Vec<char>, set2: Vec<char>, fallback: char) -> TranslateOperationStandard {
|
fn new(
|
||||||
|
set1: Vec<Sequence>,
|
||||||
|
set2: Vec<Sequence>,
|
||||||
|
set1_truncate_length: Option<usize>,
|
||||||
|
fallback: char,
|
||||||
|
) -> TranslateOperationStandard {
|
||||||
TranslateOperationStandard {
|
TranslateOperationStandard {
|
||||||
translation_map: set1
|
translation_map: set1
|
||||||
.into_iter()
|
.iter()
|
||||||
.zip(set2.into_iter().chain(std::iter::repeat(fallback)))
|
.flat_map(Sequence::flatten)
|
||||||
|
.zip(
|
||||||
|
set2.iter()
|
||||||
|
.flat_map(Sequence::flatten)
|
||||||
|
.chain(Repeat(fallback)),
|
||||||
|
)
|
||||||
|
.enumerate()
|
||||||
|
.filter_map(truncate_iterator(set1_truncate_length))
|
||||||
.collect::<HashMap<_, _>>(),
|
.collect::<HashMap<_, _>>(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum TranslateOperation {
|
pub enum TranslateOperation {
|
||||||
Standard(TranslateOperationStandard),
|
Standard(TranslateOperationStandard),
|
||||||
Complement(TranslateOperationComplement),
|
Complement(TranslateOperationComplement),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TranslateOperation {
|
impl TranslateOperation {
|
||||||
fn next_complement_char(mut iter: u32, ignore_list: &[char]) -> (u32, char) {
|
fn next_complement_char(iter: u32, ignore_list: &[char]) -> (u32, char) {
|
||||||
while (char::from_u32(iter).is_none()
|
(iter..)
|
||||||
|| ignore_list
|
.filter_map(char::from_u32)
|
||||||
.iter()
|
.filter(|c| !ignore_list.iter().any(|s| s.eq(c)))
|
||||||
.map(|c| u32::from(*c))
|
.map(|c| (u32::from(c) + 1, c))
|
||||||
.any(|c| iter.eq(&c)))
|
.next()
|
||||||
&& iter.ne(&u32::MAX)
|
.expect("exhausted all possible characters")
|
||||||
{
|
|
||||||
iter = iter.saturating_add(1)
|
|
||||||
}
|
|
||||||
(iter.saturating_add(1), char::from_u32(iter).unwrap())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TranslateOperation {
|
impl TranslateOperation {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
pset1: Vec<Sequence>,
|
set1: Vec<Sequence>,
|
||||||
pset2: Vec<Sequence>,
|
set2: Vec<Sequence>,
|
||||||
truncate_set1: bool,
|
truncate_set1: bool,
|
||||||
complement: bool,
|
complement: bool,
|
||||||
) -> TranslateOperation {
|
) -> TranslateOperation {
|
||||||
// TODO: Only some translation is acceptable i.e. uppercase/lowercase transform.
|
let fallback = set2
|
||||||
let mut set1 = pset1
|
.iter()
|
||||||
.into_iter()
|
.rev()
|
||||||
.flat_map(Sequence::dissolve)
|
.next()
|
||||||
.collect::<Vec<_>>();
|
.map(Sequence::last)
|
||||||
let set2 = pset2
|
.flatten()
|
||||||
.into_iter()
|
.unwrap();
|
||||||
.flat_map(Sequence::dissolve)
|
let set1_truncate_length = if truncate_set1 {
|
||||||
.collect::<Vec<_>>();
|
set2.iter()
|
||||||
let fallback = set2.last().cloned().unwrap();
|
.map(Sequence::len)
|
||||||
if truncate_set1 {
|
.reduce(|a, b| match (a, b) {
|
||||||
set1.truncate(set2.len());
|
(Some(l), Some(r)) => Some(l + r),
|
||||||
}
|
_ => None,
|
||||||
if complement {
|
})
|
||||||
TranslateOperation::Complement(TranslateOperationComplement::new(set1, set2, fallback))
|
.flatten()
|
||||||
} else {
|
} else {
|
||||||
TranslateOperation::Standard(TranslateOperationStandard::new(set1, set2, fallback))
|
None
|
||||||
|
};
|
||||||
|
if complement {
|
||||||
|
TranslateOperation::Complement(TranslateOperationComplement::new(
|
||||||
|
set1,
|
||||||
|
set2,
|
||||||
|
set1_truncate_length,
|
||||||
|
fallback,
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
TranslateOperation::Standard(TranslateOperationStandard::new(
|
||||||
|
set1,
|
||||||
|
set2,
|
||||||
|
set1_truncate_length,
|
||||||
|
fallback,
|
||||||
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -466,7 +525,7 @@ impl SymbolTranslator for TranslateOperation {
|
||||||
Some(*c)
|
Some(*c)
|
||||||
} else {
|
} else {
|
||||||
while translation_map.get(¤t).is_none() {
|
while translation_map.get(¤t).is_none() {
|
||||||
if let Some(p) = set2.pop() {
|
if let Some(p) = set2.next() {
|
||||||
let (next_index, next_value) =
|
let (next_index, next_value) =
|
||||||
TranslateOperation::next_complement_char(*iter, &*set1);
|
TranslateOperation::next_complement_char(*iter, &*set1);
|
||||||
*iter = next_index;
|
*iter = next_index;
|
||||||
|
@ -484,18 +543,15 @@ impl SymbolTranslator for TranslateOperation {
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct SqueezeOperation {
|
pub struct SqueezeOperation {
|
||||||
squeeze_set: Vec<char>,
|
set1: Vec<char>,
|
||||||
complement: bool,
|
complement: bool,
|
||||||
previous: Option<char>,
|
previous: Option<char>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SqueezeOperation {
|
impl SqueezeOperation {
|
||||||
pub fn new(squeeze_set: Vec<Sequence>, complement: bool) -> SqueezeOperation {
|
pub fn new(set1: Vec<Sequence>, complement: bool) -> SqueezeOperation {
|
||||||
SqueezeOperation {
|
SqueezeOperation {
|
||||||
squeeze_set: squeeze_set
|
set1: set1.iter().flat_map(Sequence::flatten).collect(),
|
||||||
.into_iter()
|
|
||||||
.flat_map(Sequence::dissolve)
|
|
||||||
.collect(),
|
|
||||||
complement,
|
complement,
|
||||||
previous: None,
|
previous: None,
|
||||||
}
|
}
|
||||||
|
@ -505,7 +561,7 @@ impl SqueezeOperation {
|
||||||
impl SymbolTranslator for SqueezeOperation {
|
impl SymbolTranslator for SqueezeOperation {
|
||||||
fn translate(&mut self, current: char) -> Option<char> {
|
fn translate(&mut self, current: char) -> Option<char> {
|
||||||
if self.complement {
|
if self.complement {
|
||||||
let next = if self.squeeze_set.iter().any(|c| c.eq(¤t)) {
|
let next = if self.set1.iter().any(|c| c.eq(¤t)) {
|
||||||
Some(current)
|
Some(current)
|
||||||
} else {
|
} else {
|
||||||
match self.previous {
|
match self.previous {
|
||||||
|
@ -526,7 +582,7 @@ impl SymbolTranslator for SqueezeOperation {
|
||||||
self.previous = Some(current);
|
self.previous = Some(current);
|
||||||
next
|
next
|
||||||
} else {
|
} else {
|
||||||
let next = if self.squeeze_set.iter().any(|c| c.eq(¤t)) {
|
let next = if self.set1.iter().any(|c| c.eq(¤t)) {
|
||||||
match self.previous {
|
match self.previous {
|
||||||
Some(v) if v == current => None,
|
Some(v) if v == current => None,
|
||||||
_ => Some(current),
|
_ => Some(current),
|
||||||
|
@ -542,7 +598,7 @@ impl SymbolTranslator for SqueezeOperation {
|
||||||
|
|
||||||
pub fn translate_input<T, R, W>(input: &mut R, output: &mut W, mut translator: T)
|
pub fn translate_input<T, R, W>(input: &mut R, output: &mut W, mut translator: T)
|
||||||
where
|
where
|
||||||
T: SymbolTranslator + Debug,
|
T: SymbolTranslator,
|
||||||
R: BufRead,
|
R: BufRead,
|
||||||
W: Write,
|
W: Write,
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue