Fixing implementation to passes more GNU tests

Signed-off-by: Hanif Bin Ariffin <hanif.ariffin.4326@gmail.com>
This commit is contained in:
Hanif Bin Ariffin 2021-08-01 10:43:10 +08:00
parent dc033ab619
commit 8c82cd660c
4 changed files with 107 additions and 120 deletions

29
src/uu/tr/src/convert.rs Normal file
View file

@ -0,0 +1,29 @@
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{anychar, one_of},
combinator::{map_opt, recognize},
multi::{many0, many_m_n},
sequence::preceded,
IResult,
};
fn parse_octal(input: &str) -> IResult<&str, char> {
map_opt(
preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))),
|out: &str| {
u32::from_str_radix(out, 8)
.map(|u| char::from_u32(u).unwrap())
.ok()
},
)(input)
}
pub fn reduce_octal_to_char(input: String) -> String {
let result = many0(alt((parse_octal, anychar)))(input.as_str())
.map(|(_, r)| r)
.unwrap()
.into_iter()
.collect();
result
}

View file

@ -2,8 +2,8 @@ use nom::{
branch::alt, branch::alt,
bytes::complete::tag, bytes::complete::tag,
character::complete::{anychar, one_of}, character::complete::{anychar, one_of},
combinator::{map_opt, recognize}, combinator::{map, recognize},
multi::{many0, many1, many_m_n}, multi::{many0, many1},
sequence::{delimited, preceded, separated_pair}, sequence::{delimited, preceded, separated_pair},
IResult, IResult,
}; };
@ -13,18 +13,7 @@ use std::{
io::{BufRead, Write}, io::{BufRead, Write},
}; };
mod unicode_table { use crate::unicode_table;
pub static BEL: char = '\u{0007}';
pub static BS: char = '\u{0008}';
pub static HT: char = '\u{0009}';
pub static LF: char = '\u{000A}';
pub static VT: char = '\u{000B}';
pub static FF: char = '\u{000C}';
pub static CR: char = '\u{000D}';
pub static SPACE: char = '\u{0020}';
pub static SPACES: &'static [char] = &[HT, LF, VT, FF, CR, SPACE];
pub static BLANK: &'static [char] = &[SPACE, HT];
}
#[derive(Debug)] #[derive(Debug)]
pub enum BadSequence { pub enum BadSequence {
@ -32,6 +21,7 @@ pub enum BadSequence {
MissingEquivalentClassChar, MissingEquivalentClassChar,
MultipleCharRepeatInSet2, MultipleCharRepeatInSet2,
CharRepeatInSet1, CharRepeatInSet1,
InvalidRepeatCount(String),
} }
impl Display for BadSequence { impl Display for BadSequence {
@ -49,6 +39,9 @@ impl Display for BadSequence {
BadSequence::CharRepeatInSet1 => { BadSequence::CharRepeatInSet1 => {
writeln!(f, "the [c*] repeat construct may not appear in string1") writeln!(f, "the [c*] repeat construct may not appear in string1")
} }
BadSequence::InvalidRepeatCount(count) => {
writeln!(f, "invalid repeat count '{}' in [c*n] construct", count)
}
} }
} }
} }
@ -135,6 +128,7 @@ impl Sequence {
) -> Result<(Vec<char>, Vec<char>), BadSequence> { ) -> Result<(Vec<char>, Vec<char>), BadSequence> {
let set1 = Sequence::from_str(set1_str)?; let set1 = Sequence::from_str(set1_str)?;
let set2 = Sequence::from_str(set2_str)?; let set2 = Sequence::from_str(set2_str)?;
let is_char_star = |s: &&Sequence| -> bool { let is_char_star = |s: &&Sequence| -> bool {
match s { match s {
Sequence::CharStar(_) => true, Sequence::CharStar(_) => true,
@ -219,7 +213,6 @@ impl Sequence {
pub fn from_str(input: &str) -> Result<Vec<Sequence>, BadSequence> { pub fn from_str(input: &str) -> Result<Vec<Sequence>, BadSequence> {
let result = many0(alt(( let result = many0(alt((
alt(( alt((
Sequence::parse_char_range_octal_leftright,
Sequence::parse_char_range, Sequence::parse_char_range,
Sequence::parse_char_star, Sequence::parse_char_star,
Sequence::parse_char_repeat, Sequence::parse_char_repeat,
@ -241,15 +234,12 @@ impl Sequence {
)), )),
// NOTE: Specific error cases // NOTE: Specific error cases
alt(( alt((
Sequence::parse_empty_bracket, Sequence::error_parse_char_repeat,
Sequence::parse_empty_equivalant_char, Sequence::error_parse_empty_bracket,
Sequence::error_parse_empty_equivalant_char,
)), )),
// NOTE: This must be the last one // NOTE: This must be the last one
alt(( map(Sequence::parse_backslash_or_char, |s| Ok(Sequence::Char(s))),
Sequence::parse_octal,
Sequence::parse_backslash,
Sequence::parse_char,
)),
)))(input) )))(input)
.map(|(_, r)| r) .map(|(_, r)| r)
.unwrap() .unwrap()
@ -258,97 +248,31 @@ impl Sequence {
result result
} }
// TODO: We can surely do better than this :( fn parse_backslash(input: &str) -> IResult<&str, char> {
fn parse_octal_or_char(input: &str) -> IResult<&str, char> {
recognize(alt((
preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))),
preceded(tag("\\"), recognize(anychar)),
recognize(anychar),
)))(input)
.map(|(l, a)| {
(
l,
if let Some(input) = a.strip_prefix('\\') {
if input.is_empty() {
'\\'
} else {
char::from_u32(u32::from_str_radix(&input, 8).unwrap_or_else(|_| {
let c = match input.chars().next().unwrap() {
'a' => unicode_table::BEL,
'b' => unicode_table::BS,
'f' => unicode_table::FF,
'n' => unicode_table::LF,
'r' => unicode_table::CR,
't' => unicode_table::HT,
'v' => unicode_table::VT,
x => x,
};
u32::from(c)
}))
.expect("Cannot convert octal value to character")
}
} else {
input
.chars()
.next()
.expect("We recognized a character so this should not fail")
},
)
})
}
fn parse_char(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
anychar(input).map(|(l, r)| (l, Ok(Sequence::Char(r))))
}
fn parse_backslash(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
preceded(tag("\\"), anychar)(input).map(|(l, a)| { preceded(tag("\\"), anychar)(input).map(|(l, a)| {
let c = match a { let c = match a {
'a' => Sequence::Char(unicode_table::BEL), 'a' => unicode_table::BEL,
'b' => Sequence::Char(unicode_table::BS), 'b' => unicode_table::BS,
'f' => Sequence::Char(unicode_table::FF), 'f' => unicode_table::FF,
'n' => Sequence::Char(unicode_table::LF), 'n' => unicode_table::LF,
'r' => Sequence::Char(unicode_table::CR), 'r' => unicode_table::CR,
't' => Sequence::Char(unicode_table::HT), 't' => unicode_table::HT,
'v' => Sequence::Char(unicode_table::VT), 'v' => unicode_table::VT,
x => Sequence::Char(x), x => x,
}; };
(l, Ok(c)) (l, c)
}) })
} }
fn parse_octal(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> { fn parse_backslash_or_char(input: &str) -> IResult<&str, char> {
map_opt( alt((Sequence::parse_backslash, anychar))(input)
preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))),
|out: &str| {
u32::from_str_radix(out, 8)
.map(|u| Ok(Sequence::Char(char::from_u32(u).unwrap())))
.ok()
},
)(input)
} }
fn parse_char_range(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> { fn parse_char_range(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
separated_pair( separated_pair(
Sequence::parse_octal_or_char, Sequence::parse_backslash_or_char,
tag("-"), tag("-"),
Sequence::parse_octal_or_char, Sequence::parse_backslash_or_char,
)(input)
.map(|(l, (a, b))| {
(l, {
let (start, end) = (u32::from(a), u32::from(b));
Ok(Sequence::CharRange(start, end))
})
})
}
fn parse_char_range_octal_leftright(
input: &str,
) -> IResult<&str, Result<Sequence, BadSequence>> {
separated_pair(
Sequence::parse_octal_or_char,
tag("-"),
Sequence::parse_octal_or_char,
)(input) )(input)
.map(|(l, (a, b))| { .map(|(l, (a, b))| {
(l, { (l, {
@ -359,7 +283,7 @@ impl Sequence {
} }
fn parse_char_star(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> { fn parse_char_star(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
delimited(tag("["), Sequence::parse_octal_or_char, tag("*]"))(input) delimited(tag("["), Sequence::parse_backslash_or_char, tag("*]"))(input)
.map(|(l, a)| (l, Ok(Sequence::CharStar(a)))) .map(|(l, a)| (l, Ok(Sequence::CharStar(a))))
} }
@ -367,19 +291,21 @@ impl Sequence {
delimited( delimited(
tag("["), tag("["),
separated_pair( separated_pair(
Sequence::parse_octal_or_char, Sequence::parse_backslash_or_char,
tag("*"), tag("*"),
recognize(many1(one_of("01234567"))), recognize(many1(one_of("01234567"))),
), ),
tag("]"), tag("]"),
)(input) )(input)
.map(|(l, (c, n))| { .map(|(l, (c, str))| {
( (
l, l,
Ok(Sequence::CharRepeat( match usize::from_str_radix(str, 8)
c, .expect("This should not fail because we only parse against 0-7")
usize::from_str_radix(n, 8).expect("This should not fail "), {
)), 0 => Ok(Sequence::CharStar(c)),
count => Ok(Sequence::CharRepeat(c, count)),
},
) )
}) })
} }
@ -433,15 +359,32 @@ impl Sequence {
} }
fn parse_char_equal(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> { fn parse_char_equal(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
delimited(tag("[="), Sequence::parse_octal_or_char, tag("=]"))(input) delimited(tag("[="), Sequence::parse_backslash_or_char, tag("=]"))(input)
.map(|(l, c)| (l, Ok(Sequence::Char(c)))) .map(|(l, c)| (l, Ok(Sequence::Char(c))))
} }
}
fn parse_empty_bracket(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> { impl Sequence {
fn error_parse_char_repeat(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
delimited(
tag("["),
separated_pair(
Sequence::parse_backslash_or_char,
tag("*"),
recognize(many1(one_of("0123456789"))),
),
tag("]"),
)(input)
.map(|(l, (_, n))| (l, Err(BadSequence::InvalidRepeatCount(n.to_string()))))
}
fn error_parse_empty_bracket(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
tag("[::]")(input).map(|(l, _)| (l, Err(BadSequence::MissingCharClassName))) tag("[::]")(input).map(|(l, _)| (l, Err(BadSequence::MissingCharClassName)))
} }
fn parse_empty_equivalant_char(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> { fn error_parse_empty_equivalant_char(
input: &str,
) -> IResult<&str, Result<Sequence, BadSequence>> {
tag("[==]")(input).map(|(l, _)| (l, Err(BadSequence::MissingEquivalentClassChar))) tag("[==]")(input).map(|(l, _)| (l, Err(BadSequence::MissingEquivalentClassChar)))
} }
} }

View file

@ -14,7 +14,9 @@
extern crate uucore; extern crate uucore;
extern crate nom; extern crate nom;
mod convert;
mod operation; mod operation;
mod unicode_table;
use clap::{crate_version, App, Arg}; use clap::{crate_version, App, Arg};
use nom::AsBytes; use nom::AsBytes;
@ -64,7 +66,11 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
let sets = matches let sets = matches
.values_of(options::SETS) .values_of(options::SETS)
.map(|v| v.map(ToString::to_string).collect::<Vec<_>>()) .map(|v| {
v.map(ToString::to_string)
.map(convert::reduce_octal_to_char)
.collect::<Vec<_>>()
})
.unwrap_or_default(); .unwrap_or_default();
let sets_len = sets.len(); let sets_len = sets.len();
@ -94,6 +100,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
return 1; return 1;
} }
if let Some(first) = sets.get(0) {
if first.ends_with(r"\") {
show_error!("warning: an unescaped backslash at end of string is not portable");
}
}
let stdin = stdin(); let stdin = stdin();
let mut locked_stdin = stdin.lock(); let mut locked_stdin = stdin.lock();
let stdout = stdout(); let stdout = stdout();
@ -113,13 +125,6 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
} }
}; };
if set2.len() == 1 && set2[0] == '\\' {
show_error!(
"{}",
"warning: an unescaped backslash at end of string is not portable"
);
}
if delete_flag { if delete_flag {
if squeeze_flag { if squeeze_flag {
let mut delete_buffer = vec![]; let mut delete_buffer = vec![];

View file

@ -0,0 +1,10 @@
pub static BEL: char = '\u{0007}';
pub static BS: char = '\u{0008}';
pub static HT: char = '\u{0009}';
pub static LF: char = '\u{000A}';
pub static VT: char = '\u{000B}';
pub static FF: char = '\u{000C}';
pub static CR: char = '\u{000D}';
pub static SPACE: char = '\u{0020}';
pub static SPACES: &'static [char] = &[HT, LF, VT, FF, CR, SPACE];
pub static BLANK: &'static [char] = &[SPACE, HT];