Now all tr tests passes with the new translation impl!

Signed-off-by: Hanif Bin Ariffin <hanif.ariffin.4326@gmail.com>
This commit is contained in:
Hanif Bin Ariffin 2021-07-18 12:59:04 +08:00
parent 572cbc6ba2
commit 50167a33a8

View file

@ -1,7 +1,7 @@
use nom::{ use nom::{
branch::alt, branch::alt,
bytes::complete::{tag, take, take_until}, bytes::complete::{tag, take, take_until},
character::complete::one_of, character::complete::{none_of, one_of},
multi::many0, multi::many0,
sequence::{separated_pair, tuple}, sequence::{separated_pair, tuple},
IResult, IResult,
@ -21,7 +21,10 @@ impl Sequence {
pub fn parse_set_string(input: &str) -> Vec<Sequence> { pub fn parse_set_string(input: &str) -> Vec<Sequence> {
many0(alt(( many0(alt((
alt(( alt((
Sequence::parse_octal, Sequence::parse_3_octal,
Sequence::parse_2_octal,
Sequence::parse_1_octal,
Sequence::parse_unrecognized_backslash,
Sequence::parse_backslash, Sequence::parse_backslash,
Sequence::parse_audible_bel, Sequence::parse_audible_bel,
Sequence::parse_backspace, Sequence::parse_backspace,
@ -71,7 +74,44 @@ impl Sequence {
take(1usize)(input).map(|(l, r)| (l, Sequence::Char(r.chars().next().unwrap()))) take(1usize)(input).map(|(l, r)| (l, Sequence::Char(r.chars().next().unwrap())))
} }
fn parse_octal(input: &str) -> IResult<&str, Sequence> { fn parse_unrecognized_backslash(input: &str) -> IResult<&str, Sequence> {
tuple((tag("\\"), none_of("01234567")))(input).map(|(l, (_, a))| {
let c = match a {
'a' => Sequence::Char('\u{0007}'),
'b' => Sequence::Char('\u{0008}'),
'f' => Sequence::Char('\u{000C}'),
'n' => Sequence::Char('\u{000A}'),
'r' => Sequence::Char('\u{000D}'),
't' => Sequence::Char('\u{0009}'),
'v' => Sequence::Char('\u{000B}'),
_ => Sequence::Char(a),
};
(l, c)
})
}
fn parse_1_octal(input: &str) -> IResult<&str, Sequence> {
tuple((tag("\\"), one_of("01234567")))(input).map(|(l, (_, a))| {
(
l,
Sequence::Char(std::char::from_u32(a.to_digit(8).unwrap()).unwrap()),
)
})
}
fn parse_2_octal(input: &str) -> IResult<&str, Sequence> {
tuple((tag("\\"), one_of("01234567"), one_of("01234567")))(input).map(|(l, (_, a, b))| {
(
l,
Sequence::Char(
std::char::from_u32(a.to_digit(8).unwrap() * 8 + b.to_digit(8).unwrap())
.unwrap(),
),
)
})
}
fn parse_3_octal(input: &str) -> IResult<&str, Sequence> {
tuple(( tuple((
tag("\\"), tag("\\"),
one_of("01234567"), one_of("01234567"),
@ -133,17 +173,13 @@ impl Sequence {
u32::from(a.chars().next().unwrap()), u32::from(a.chars().next().unwrap()),
u32::from(b.chars().next().unwrap()), u32::from(b.chars().next().unwrap()),
); );
if (start >= 97 && start <= 122 && end >= 97 && end <= 122 && end > start) if start >= 48 && start <= 90 && end >= 48 && end <= 90 && end > start {
|| (start >= 65 && start <= 90 && end >= 65 && end <= 90 && end > start)
|| (start >= 48 && start <= 57 && end >= 48 && end <= 57 && end > start)
{
Sequence::CharRange( Sequence::CharRange(
(start..=end) (start..=end)
.map(|c| std::char::from_u32(c).unwrap()) .map(|c| std::char::from_u32(c).unwrap())
.collect(), .collect(),
) )
} else { } else {
// This part is unchecked...not all `u32` => `char` is valid
Sequence::CharRange( Sequence::CharRange(
(start..=end) (start..=end)
.filter_map(|c| std::char::from_u32(c)) .filter_map(|c| std::char::from_u32(c))
@ -208,7 +244,7 @@ impl Sequence {
} }
fn parse_lower(input: &str) -> IResult<&str, Sequence> { fn parse_lower(input: &str) -> IResult<&str, Sequence> {
tag("[:lower:]")(input).map(|(_, _)| todo!()) tag("[:lower:]")(input).map(|(l, _)| (l, Sequence::CharRange(('a'..='z').collect())))
} }
fn parse_print(input: &str) -> IResult<&str, Sequence> { fn parse_print(input: &str) -> IResult<&str, Sequence> {
@ -282,37 +318,36 @@ impl TranslateOperationNew {
impl TranslateOperationNew { impl TranslateOperationNew {
pub fn new( pub fn new(
set1: Vec<Sequence>, pset1: Vec<Sequence>,
mut set2: Vec<Sequence>, pset2: Vec<Sequence>,
truncate_set2: bool, truncate_set1: bool,
complement: bool, complement: bool,
) -> TranslateOperationNew { ) -> TranslateOperationNew {
let fallback = set2.last().cloned().unwrap(); let mut set1 = pset1
println!("fallback:{:#?}", fallback); .into_iter()
if truncate_set2 { .flat_map(Sequence::dissolve)
set2.truncate(set1.len()); .collect::<Vec<_>>();
let set2 = pset2
.into_iter()
.flat_map(Sequence::dissolve)
.collect::<Vec<_>>();
if truncate_set1 {
set1.truncate(set2.len());
} }
let fallback = set2.last().cloned().unwrap();
if complement { if complement {
TranslateOperationNew::Complement( TranslateOperationNew::Complement(
0, 0,
set1.into_iter().flat_map(Sequence::dissolve).collect(), set1,
set2.into_iter() set2,
.flat_map(Sequence::dissolve)
.rev()
.collect(),
// TODO: Check how `tr` actually handles this // TODO: Check how `tr` actually handles this
fallback.dissolve().first().cloned().unwrap(), fallback,
HashMap::new(), HashMap::new(),
) )
} else { } else {
TranslateOperationNew::Standard( TranslateOperationNew::Standard(
set1.into_iter() set1.into_iter()
.flat_map(Sequence::dissolve) .zip(set2.into_iter().chain(std::iter::repeat(fallback)))
.zip(
set2.into_iter()
.chain(std::iter::repeat(fallback))
.flat_map(Sequence::dissolve),
)
.collect::<HashMap<_, _>>(), .collect::<HashMap<_, _>>(),
) )
} }