mirror of
https://github.com/uutils/coreutils
synced 2024-12-18 00:53:25 +00:00
Fixing implementation to passes more GNU tests
Signed-off-by: Hanif Bin Ariffin <hanif.ariffin.4326@gmail.com>
This commit is contained in:
parent
dc033ab619
commit
8c82cd660c
4 changed files with 107 additions and 120 deletions
29
src/uu/tr/src/convert.rs
Normal file
29
src/uu/tr/src/convert.rs
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
use nom::{
|
||||||
|
branch::alt,
|
||||||
|
bytes::complete::tag,
|
||||||
|
character::complete::{anychar, one_of},
|
||||||
|
combinator::{map_opt, recognize},
|
||||||
|
multi::{many0, many_m_n},
|
||||||
|
sequence::preceded,
|
||||||
|
IResult,
|
||||||
|
};
|
||||||
|
|
||||||
|
fn parse_octal(input: &str) -> IResult<&str, char> {
|
||||||
|
map_opt(
|
||||||
|
preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))),
|
||||||
|
|out: &str| {
|
||||||
|
u32::from_str_radix(out, 8)
|
||||||
|
.map(|u| char::from_u32(u).unwrap())
|
||||||
|
.ok()
|
||||||
|
},
|
||||||
|
)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn reduce_octal_to_char(input: String) -> String {
|
||||||
|
let result = many0(alt((parse_octal, anychar)))(input.as_str())
|
||||||
|
.map(|(_, r)| r)
|
||||||
|
.unwrap()
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
result
|
||||||
|
}
|
|
@ -2,8 +2,8 @@ use nom::{
|
||||||
branch::alt,
|
branch::alt,
|
||||||
bytes::complete::tag,
|
bytes::complete::tag,
|
||||||
character::complete::{anychar, one_of},
|
character::complete::{anychar, one_of},
|
||||||
combinator::{map_opt, recognize},
|
combinator::{map, recognize},
|
||||||
multi::{many0, many1, many_m_n},
|
multi::{many0, many1},
|
||||||
sequence::{delimited, preceded, separated_pair},
|
sequence::{delimited, preceded, separated_pair},
|
||||||
IResult,
|
IResult,
|
||||||
};
|
};
|
||||||
|
@ -13,18 +13,7 @@ use std::{
|
||||||
io::{BufRead, Write},
|
io::{BufRead, Write},
|
||||||
};
|
};
|
||||||
|
|
||||||
mod unicode_table {
|
use crate::unicode_table;
|
||||||
pub static BEL: char = '\u{0007}';
|
|
||||||
pub static BS: char = '\u{0008}';
|
|
||||||
pub static HT: char = '\u{0009}';
|
|
||||||
pub static LF: char = '\u{000A}';
|
|
||||||
pub static VT: char = '\u{000B}';
|
|
||||||
pub static FF: char = '\u{000C}';
|
|
||||||
pub static CR: char = '\u{000D}';
|
|
||||||
pub static SPACE: char = '\u{0020}';
|
|
||||||
pub static SPACES: &'static [char] = &[HT, LF, VT, FF, CR, SPACE];
|
|
||||||
pub static BLANK: &'static [char] = &[SPACE, HT];
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum BadSequence {
|
pub enum BadSequence {
|
||||||
|
@ -32,6 +21,7 @@ pub enum BadSequence {
|
||||||
MissingEquivalentClassChar,
|
MissingEquivalentClassChar,
|
||||||
MultipleCharRepeatInSet2,
|
MultipleCharRepeatInSet2,
|
||||||
CharRepeatInSet1,
|
CharRepeatInSet1,
|
||||||
|
InvalidRepeatCount(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Display for BadSequence {
|
impl Display for BadSequence {
|
||||||
|
@ -49,6 +39,9 @@ impl Display for BadSequence {
|
||||||
BadSequence::CharRepeatInSet1 => {
|
BadSequence::CharRepeatInSet1 => {
|
||||||
writeln!(f, "the [c*] repeat construct may not appear in string1")
|
writeln!(f, "the [c*] repeat construct may not appear in string1")
|
||||||
}
|
}
|
||||||
|
BadSequence::InvalidRepeatCount(count) => {
|
||||||
|
writeln!(f, "invalid repeat count '{}' in [c*n] construct", count)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -135,6 +128,7 @@ impl Sequence {
|
||||||
) -> Result<(Vec<char>, Vec<char>), BadSequence> {
|
) -> Result<(Vec<char>, Vec<char>), BadSequence> {
|
||||||
let set1 = Sequence::from_str(set1_str)?;
|
let set1 = Sequence::from_str(set1_str)?;
|
||||||
let set2 = Sequence::from_str(set2_str)?;
|
let set2 = Sequence::from_str(set2_str)?;
|
||||||
|
|
||||||
let is_char_star = |s: &&Sequence| -> bool {
|
let is_char_star = |s: &&Sequence| -> bool {
|
||||||
match s {
|
match s {
|
||||||
Sequence::CharStar(_) => true,
|
Sequence::CharStar(_) => true,
|
||||||
|
@ -219,7 +213,6 @@ impl Sequence {
|
||||||
pub fn from_str(input: &str) -> Result<Vec<Sequence>, BadSequence> {
|
pub fn from_str(input: &str) -> Result<Vec<Sequence>, BadSequence> {
|
||||||
let result = many0(alt((
|
let result = many0(alt((
|
||||||
alt((
|
alt((
|
||||||
Sequence::parse_char_range_octal_leftright,
|
|
||||||
Sequence::parse_char_range,
|
Sequence::parse_char_range,
|
||||||
Sequence::parse_char_star,
|
Sequence::parse_char_star,
|
||||||
Sequence::parse_char_repeat,
|
Sequence::parse_char_repeat,
|
||||||
|
@ -241,15 +234,12 @@ impl Sequence {
|
||||||
)),
|
)),
|
||||||
// NOTE: Specific error cases
|
// NOTE: Specific error cases
|
||||||
alt((
|
alt((
|
||||||
Sequence::parse_empty_bracket,
|
Sequence::error_parse_char_repeat,
|
||||||
Sequence::parse_empty_equivalant_char,
|
Sequence::error_parse_empty_bracket,
|
||||||
|
Sequence::error_parse_empty_equivalant_char,
|
||||||
)),
|
)),
|
||||||
// NOTE: This must be the last one
|
// NOTE: This must be the last one
|
||||||
alt((
|
map(Sequence::parse_backslash_or_char, |s| Ok(Sequence::Char(s))),
|
||||||
Sequence::parse_octal,
|
|
||||||
Sequence::parse_backslash,
|
|
||||||
Sequence::parse_char,
|
|
||||||
)),
|
|
||||||
)))(input)
|
)))(input)
|
||||||
.map(|(_, r)| r)
|
.map(|(_, r)| r)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
|
@ -258,97 +248,31 @@ impl Sequence {
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: We can surely do better than this :(
|
fn parse_backslash(input: &str) -> IResult<&str, char> {
|
||||||
fn parse_octal_or_char(input: &str) -> IResult<&str, char> {
|
|
||||||
recognize(alt((
|
|
||||||
preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))),
|
|
||||||
preceded(tag("\\"), recognize(anychar)),
|
|
||||||
recognize(anychar),
|
|
||||||
)))(input)
|
|
||||||
.map(|(l, a)| {
|
|
||||||
(
|
|
||||||
l,
|
|
||||||
if let Some(input) = a.strip_prefix('\\') {
|
|
||||||
if input.is_empty() {
|
|
||||||
'\\'
|
|
||||||
} else {
|
|
||||||
char::from_u32(u32::from_str_radix(&input, 8).unwrap_or_else(|_| {
|
|
||||||
let c = match input.chars().next().unwrap() {
|
|
||||||
'a' => unicode_table::BEL,
|
|
||||||
'b' => unicode_table::BS,
|
|
||||||
'f' => unicode_table::FF,
|
|
||||||
'n' => unicode_table::LF,
|
|
||||||
'r' => unicode_table::CR,
|
|
||||||
't' => unicode_table::HT,
|
|
||||||
'v' => unicode_table::VT,
|
|
||||||
x => x,
|
|
||||||
};
|
|
||||||
u32::from(c)
|
|
||||||
}))
|
|
||||||
.expect("Cannot convert octal value to character")
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
input
|
|
||||||
.chars()
|
|
||||||
.next()
|
|
||||||
.expect("We recognized a character so this should not fail")
|
|
||||||
},
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_char(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
|
|
||||||
anychar(input).map(|(l, r)| (l, Ok(Sequence::Char(r))))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_backslash(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
|
|
||||||
preceded(tag("\\"), anychar)(input).map(|(l, a)| {
|
preceded(tag("\\"), anychar)(input).map(|(l, a)| {
|
||||||
let c = match a {
|
let c = match a {
|
||||||
'a' => Sequence::Char(unicode_table::BEL),
|
'a' => unicode_table::BEL,
|
||||||
'b' => Sequence::Char(unicode_table::BS),
|
'b' => unicode_table::BS,
|
||||||
'f' => Sequence::Char(unicode_table::FF),
|
'f' => unicode_table::FF,
|
||||||
'n' => Sequence::Char(unicode_table::LF),
|
'n' => unicode_table::LF,
|
||||||
'r' => Sequence::Char(unicode_table::CR),
|
'r' => unicode_table::CR,
|
||||||
't' => Sequence::Char(unicode_table::HT),
|
't' => unicode_table::HT,
|
||||||
'v' => Sequence::Char(unicode_table::VT),
|
'v' => unicode_table::VT,
|
||||||
x => Sequence::Char(x),
|
x => x,
|
||||||
};
|
};
|
||||||
(l, Ok(c))
|
(l, c)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_octal(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
|
fn parse_backslash_or_char(input: &str) -> IResult<&str, char> {
|
||||||
map_opt(
|
alt((Sequence::parse_backslash, anychar))(input)
|
||||||
preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))),
|
|
||||||
|out: &str| {
|
|
||||||
u32::from_str_radix(out, 8)
|
|
||||||
.map(|u| Ok(Sequence::Char(char::from_u32(u).unwrap())))
|
|
||||||
.ok()
|
|
||||||
},
|
|
||||||
)(input)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_char_range(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
|
fn parse_char_range(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
|
||||||
separated_pair(
|
separated_pair(
|
||||||
Sequence::parse_octal_or_char,
|
Sequence::parse_backslash_or_char,
|
||||||
tag("-"),
|
tag("-"),
|
||||||
Sequence::parse_octal_or_char,
|
Sequence::parse_backslash_or_char,
|
||||||
)(input)
|
|
||||||
.map(|(l, (a, b))| {
|
|
||||||
(l, {
|
|
||||||
let (start, end) = (u32::from(a), u32::from(b));
|
|
||||||
Ok(Sequence::CharRange(start, end))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_char_range_octal_leftright(
|
|
||||||
input: &str,
|
|
||||||
) -> IResult<&str, Result<Sequence, BadSequence>> {
|
|
||||||
separated_pair(
|
|
||||||
Sequence::parse_octal_or_char,
|
|
||||||
tag("-"),
|
|
||||||
Sequence::parse_octal_or_char,
|
|
||||||
)(input)
|
)(input)
|
||||||
.map(|(l, (a, b))| {
|
.map(|(l, (a, b))| {
|
||||||
(l, {
|
(l, {
|
||||||
|
@ -359,7 +283,7 @@ impl Sequence {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_char_star(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
|
fn parse_char_star(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
|
||||||
delimited(tag("["), Sequence::parse_octal_or_char, tag("*]"))(input)
|
delimited(tag("["), Sequence::parse_backslash_or_char, tag("*]"))(input)
|
||||||
.map(|(l, a)| (l, Ok(Sequence::CharStar(a))))
|
.map(|(l, a)| (l, Ok(Sequence::CharStar(a))))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -367,19 +291,21 @@ impl Sequence {
|
||||||
delimited(
|
delimited(
|
||||||
tag("["),
|
tag("["),
|
||||||
separated_pair(
|
separated_pair(
|
||||||
Sequence::parse_octal_or_char,
|
Sequence::parse_backslash_or_char,
|
||||||
tag("*"),
|
tag("*"),
|
||||||
recognize(many1(one_of("01234567"))),
|
recognize(many1(one_of("01234567"))),
|
||||||
),
|
),
|
||||||
tag("]"),
|
tag("]"),
|
||||||
)(input)
|
)(input)
|
||||||
.map(|(l, (c, n))| {
|
.map(|(l, (c, str))| {
|
||||||
(
|
(
|
||||||
l,
|
l,
|
||||||
Ok(Sequence::CharRepeat(
|
match usize::from_str_radix(str, 8)
|
||||||
c,
|
.expect("This should not fail because we only parse against 0-7")
|
||||||
usize::from_str_radix(n, 8).expect("This should not fail "),
|
{
|
||||||
)),
|
0 => Ok(Sequence::CharStar(c)),
|
||||||
|
count => Ok(Sequence::CharRepeat(c, count)),
|
||||||
|
},
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -433,15 +359,32 @@ impl Sequence {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_char_equal(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
|
fn parse_char_equal(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
|
||||||
delimited(tag("[="), Sequence::parse_octal_or_char, tag("=]"))(input)
|
delimited(tag("[="), Sequence::parse_backslash_or_char, tag("=]"))(input)
|
||||||
.map(|(l, c)| (l, Ok(Sequence::Char(c))))
|
.map(|(l, c)| (l, Ok(Sequence::Char(c))))
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn parse_empty_bracket(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
|
impl Sequence {
|
||||||
|
fn error_parse_char_repeat(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
|
||||||
|
delimited(
|
||||||
|
tag("["),
|
||||||
|
separated_pair(
|
||||||
|
Sequence::parse_backslash_or_char,
|
||||||
|
tag("*"),
|
||||||
|
recognize(many1(one_of("0123456789"))),
|
||||||
|
),
|
||||||
|
tag("]"),
|
||||||
|
)(input)
|
||||||
|
.map(|(l, (_, n))| (l, Err(BadSequence::InvalidRepeatCount(n.to_string()))))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn error_parse_empty_bracket(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
|
||||||
tag("[::]")(input).map(|(l, _)| (l, Err(BadSequence::MissingCharClassName)))
|
tag("[::]")(input).map(|(l, _)| (l, Err(BadSequence::MissingCharClassName)))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_empty_equivalant_char(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
|
fn error_parse_empty_equivalant_char(
|
||||||
|
input: &str,
|
||||||
|
) -> IResult<&str, Result<Sequence, BadSequence>> {
|
||||||
tag("[==]")(input).map(|(l, _)| (l, Err(BadSequence::MissingEquivalentClassChar)))
|
tag("[==]")(input).map(|(l, _)| (l, Err(BadSequence::MissingEquivalentClassChar)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,7 +14,9 @@
|
||||||
extern crate uucore;
|
extern crate uucore;
|
||||||
extern crate nom;
|
extern crate nom;
|
||||||
|
|
||||||
|
mod convert;
|
||||||
mod operation;
|
mod operation;
|
||||||
|
mod unicode_table;
|
||||||
|
|
||||||
use clap::{crate_version, App, Arg};
|
use clap::{crate_version, App, Arg};
|
||||||
use nom::AsBytes;
|
use nom::AsBytes;
|
||||||
|
@ -64,7 +66,11 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
|
|
||||||
let sets = matches
|
let sets = matches
|
||||||
.values_of(options::SETS)
|
.values_of(options::SETS)
|
||||||
.map(|v| v.map(ToString::to_string).collect::<Vec<_>>())
|
.map(|v| {
|
||||||
|
v.map(ToString::to_string)
|
||||||
|
.map(convert::reduce_octal_to_char)
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
})
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
let sets_len = sets.len();
|
let sets_len = sets.len();
|
||||||
|
|
||||||
|
@ -94,6 +100,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(first) = sets.get(0) {
|
||||||
|
if first.ends_with(r"\") {
|
||||||
|
show_error!("warning: an unescaped backslash at end of string is not portable");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let stdin = stdin();
|
let stdin = stdin();
|
||||||
let mut locked_stdin = stdin.lock();
|
let mut locked_stdin = stdin.lock();
|
||||||
let stdout = stdout();
|
let stdout = stdout();
|
||||||
|
@ -113,13 +125,6 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if set2.len() == 1 && set2[0] == '\\' {
|
|
||||||
show_error!(
|
|
||||||
"{}",
|
|
||||||
"warning: an unescaped backslash at end of string is not portable"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
if delete_flag {
|
if delete_flag {
|
||||||
if squeeze_flag {
|
if squeeze_flag {
|
||||||
let mut delete_buffer = vec![];
|
let mut delete_buffer = vec![];
|
||||||
|
|
10
src/uu/tr/src/unicode_table.rs
Normal file
10
src/uu/tr/src/unicode_table.rs
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
pub static BEL: char = '\u{0007}';
|
||||||
|
pub static BS: char = '\u{0008}';
|
||||||
|
pub static HT: char = '\u{0009}';
|
||||||
|
pub static LF: char = '\u{000A}';
|
||||||
|
pub static VT: char = '\u{000B}';
|
||||||
|
pub static FF: char = '\u{000C}';
|
||||||
|
pub static CR: char = '\u{000D}';
|
||||||
|
pub static SPACE: char = '\u{0020}';
|
||||||
|
pub static SPACES: &'static [char] = &[HT, LF, VT, FF, CR, SPACE];
|
||||||
|
pub static BLANK: &'static [char] = &[SPACE, HT];
|
Loading…
Reference in a new issue