mirror of
https://github.com/uutils/coreutils
synced 2025-01-07 02:39:11 +00:00
Removes some allocations
Signed-off-by: Hanif Bin Ariffin <hanif.ariffin.4326@gmail.com>
This commit is contained in:
parent
c3bd727f8d
commit
0254ceb48b
1 changed files with 100 additions and 111 deletions
|
@ -1,8 +1,9 @@
|
||||||
|
use crate::unicode_table;
|
||||||
use nom::{
|
use nom::{
|
||||||
branch::alt,
|
branch::alt,
|
||||||
bytes::complete::{tag, take_while1},
|
bytes::complete::{tag, take_while1},
|
||||||
character::complete::{anychar, one_of},
|
character::complete::{anychar, one_of},
|
||||||
combinator::{map_opt, recognize, value},
|
combinator::{map_opt, recognize},
|
||||||
multi::{many0, many_m_n},
|
multi::{many0, many_m_n},
|
||||||
sequence::{preceded, separated_pair, tuple},
|
sequence::{preceded, separated_pair, tuple},
|
||||||
IResult,
|
IResult,
|
||||||
|
@ -13,12 +14,20 @@ use std::{
|
||||||
io::{BufRead, Write},
|
io::{BufRead, Write},
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::unicode_table;
|
static SPACES: &'static [char] = &[
|
||||||
|
unicode_table::HT,
|
||||||
|
unicode_table::LF,
|
||||||
|
unicode_table::VT,
|
||||||
|
unicode_table::FF,
|
||||||
|
unicode_table::CR,
|
||||||
|
unicode_table::SPACE,
|
||||||
|
];
|
||||||
|
static BLANK: &'static [char] = &[unicode_table::SPACE, unicode_table::HT];
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
|
||||||
pub enum Sequence {
|
pub enum Sequence {
|
||||||
Char(char),
|
Char(char),
|
||||||
CharRange(Vec<char>),
|
CharRange(Box<dyn Iterator<Item = char>>),
|
||||||
|
CharStar(char),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Sequence {
|
impl Sequence {
|
||||||
|
@ -53,10 +62,11 @@ impl Sequence {
|
||||||
.unwrap()
|
.unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn dissolve(self) -> Vec<char> {
|
pub fn dissolve(self) -> Box<dyn Iterator<Item = char>> {
|
||||||
match self {
|
match self {
|
||||||
Sequence::Char(c) => vec![c],
|
Sequence::Char(c) => Box::new(std::iter::once(c)),
|
||||||
Sequence::CharRange(r) => r,
|
Sequence::CharRange(r) => r,
|
||||||
|
Sequence::CharStar(c) => Box::new(std::iter::repeat(c)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -97,13 +107,14 @@ impl Sequence {
|
||||||
separated_pair(anychar, tag("-"), anychar)(input).map(|(l, (a, b))| {
|
separated_pair(anychar, tag("-"), anychar)(input).map(|(l, (a, b))| {
|
||||||
(l, {
|
(l, {
|
||||||
let (start, end) = (u32::from(a), u32::from(b));
|
let (start, end) = (u32::from(a), u32::from(b));
|
||||||
Sequence::CharRange((start..=end).filter_map(std::char::from_u32).collect())
|
Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32)))
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_char_star(input: &str) -> IResult<&str, Sequence> {
|
fn parse_char_star(input: &str) -> IResult<&str, Sequence> {
|
||||||
tuple((tag("["), anychar, tag("*]")))(input).map(|(_, (_, _, _))| todo!())
|
tuple((tag("["), anychar, tag("*]")))(input)
|
||||||
|
.map(|(l, (_, c, _))| (l, Sequence::CharStar(c)))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_char_repeat(input: &str) -> IResult<&str, Sequence> {
|
fn parse_char_repeat(input: &str) -> IResult<&str, Sequence> {
|
||||||
|
@ -118,7 +129,7 @@ impl Sequence {
|
||||||
.map(|(l, (_, c, _, n, _))| {
|
.map(|(l, (_, c, _, n, _))| {
|
||||||
(
|
(
|
||||||
l,
|
l,
|
||||||
Sequence::CharRange(std::iter::repeat(c).take(n.parse().unwrap()).collect()),
|
Sequence::CharRange(Box::new(std::iter::repeat(c).take(n.parse().unwrap()))),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -127,104 +138,118 @@ impl Sequence {
|
||||||
tag("[:alnum:]")(input).map(|(l, _)| {
|
tag("[:alnum:]")(input).map(|(l, _)| {
|
||||||
(
|
(
|
||||||
l,
|
l,
|
||||||
Sequence::CharRange(('0'..='9').chain('A'..='Z').chain('a'..='z').collect()),
|
Sequence::CharRange(Box::new(('0'..='9').chain('A'..='Z').chain('a'..='z'))),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_alpha(input: &str) -> IResult<&str, Sequence> {
|
fn parse_alpha(input: &str) -> IResult<&str, Sequence> {
|
||||||
value(
|
tag("[:alpha:]")(input).map(|(l, _)| {
|
||||||
Sequence::CharRange(('A'..='Z').chain('a'..='z').collect()),
|
(
|
||||||
tag("[:alpha:]"),
|
l,
|
||||||
)(input)
|
Sequence::CharRange(Box::new(('A'..='Z').chain('a'..='z'))),
|
||||||
|
)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_blank(input: &str) -> IResult<&str, Sequence> {
|
fn parse_blank(input: &str) -> IResult<&str, Sequence> {
|
||||||
value(
|
tag("[:blank:]")(input)
|
||||||
Sequence::CharRange(vec![unicode_table::SPACE, unicode_table::HT]),
|
.map(|(l, _)| (l, Sequence::CharRange(Box::new(BLANK.into_iter().cloned()))))
|
||||||
tag("[:blank:]"),
|
|
||||||
)(input)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_control(input: &str) -> IResult<&str, Sequence> {
|
fn parse_control(input: &str) -> IResult<&str, Sequence> {
|
||||||
value(
|
tag("[:cntrl:]")(input).map(|(l, _)| {
|
||||||
Sequence::CharRange(
|
(
|
||||||
(0..=31)
|
l,
|
||||||
.chain(std::iter::once(127))
|
Sequence::CharRange(Box::new(
|
||||||
.flat_map(char::from_u32)
|
(0..=31)
|
||||||
.collect(),
|
.chain(std::iter::once(127))
|
||||||
),
|
.flat_map(char::from_u32),
|
||||||
tag("[:cntrl:]"),
|
)),
|
||||||
)(input)
|
)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_digit(input: &str) -> IResult<&str, Sequence> {
|
fn parse_digit(input: &str) -> IResult<&str, Sequence> {
|
||||||
value(Sequence::CharRange(('0'..='9').collect()), tag("[:digit:]"))(input)
|
tag("[:digit:]")(input).map(|(l, _)| (l, Sequence::CharRange(Box::new('0'..='9'))))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_graph(input: &str) -> IResult<&str, Sequence> {
|
fn parse_graph(input: &str) -> IResult<&str, Sequence> {
|
||||||
value(
|
tag("[:graph:]")(input).map(|(l, _)| {
|
||||||
Sequence::CharRange(
|
(
|
||||||
(48..=57) // digit
|
l,
|
||||||
.chain(65..=90) // uppercase
|
Sequence::CharRange(Box::new(
|
||||||
.chain(97..=122) // lowercase
|
(48..=57) // digit
|
||||||
// punctuations
|
.chain(65..=90) // uppercase
|
||||||
.chain(33..=47)
|
.chain(97..=122) // lowercase
|
||||||
.chain(58..=64)
|
// punctuations
|
||||||
.chain(91..=96)
|
.chain(33..=47)
|
||||||
.chain(123..=126)
|
.chain(58..=64)
|
||||||
.flat_map(char::from_u32)
|
.chain(91..=96)
|
||||||
.collect(),
|
.chain(123..=126)
|
||||||
),
|
.chain(std::iter::once(32)) // space
|
||||||
tag("[:graph:]"),
|
.flat_map(char::from_u32),
|
||||||
)(input)
|
)),
|
||||||
|
)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_lower(input: &str) -> IResult<&str, Sequence> {
|
fn parse_lower(input: &str) -> IResult<&str, Sequence> {
|
||||||
value(Sequence::CharRange(('a'..='z').collect()), tag("[:lower:]"))(input)
|
tag("[:lower:]")(input).map(|(l, _)| (l, Sequence::CharRange(Box::new('a'..='z'))))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_print(input: &str) -> IResult<&str, Sequence> {
|
fn parse_print(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:print:]")(input).map(|(_, _)| todo!())
|
tag("[:print:]")(input).map(|(l, _)| {
|
||||||
|
(
|
||||||
|
l,
|
||||||
|
Sequence::CharRange(Box::new(
|
||||||
|
(48..=57) // digit
|
||||||
|
.chain(65..=90) // uppercase
|
||||||
|
.chain(97..=122) // lowercase
|
||||||
|
// punctuations
|
||||||
|
.chain(33..=47)
|
||||||
|
.chain(58..=64)
|
||||||
|
.chain(91..=96)
|
||||||
|
.chain(123..=126)
|
||||||
|
.flat_map(char::from_u32),
|
||||||
|
)),
|
||||||
|
)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_punct(input: &str) -> IResult<&str, Sequence> {
|
fn parse_punct(input: &str) -> IResult<&str, Sequence> {
|
||||||
value(
|
tag("[:punct:]")(input).map(|(l, _)| {
|
||||||
Sequence::CharRange(
|
(
|
||||||
(33..=47)
|
l,
|
||||||
.chain(58..=64)
|
Sequence::CharRange(Box::new(
|
||||||
.chain(91..=96)
|
(33..=47)
|
||||||
.chain(123..=126)
|
.chain(58..=64)
|
||||||
.flat_map(char::from_u32)
|
.chain(91..=96)
|
||||||
.collect(),
|
.chain(123..=126)
|
||||||
),
|
.flat_map(char::from_u32),
|
||||||
tag("[:punct:]"),
|
)),
|
||||||
)(input)
|
)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_space(input: &str) -> IResult<&str, Sequence> {
|
fn parse_space(input: &str) -> IResult<&str, Sequence> {
|
||||||
value(
|
tag("[:space:]")(input).map(|(l, _)| {
|
||||||
Sequence::CharRange(vec![
|
(
|
||||||
unicode_table::HT,
|
l,
|
||||||
unicode_table::LF,
|
Sequence::CharRange(Box::new(SPACES.into_iter().cloned())),
|
||||||
unicode_table::VT,
|
)
|
||||||
unicode_table::FF,
|
})
|
||||||
unicode_table::CR,
|
|
||||||
unicode_table::SPACE,
|
|
||||||
]),
|
|
||||||
tag("[:space:]"),
|
|
||||||
)(input)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_upper(input: &str) -> IResult<&str, Sequence> {
|
fn parse_upper(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:upper:]")(input).map(|(l, _)| (l, Sequence::CharRange(('A'..='Z').collect())))
|
tag("[:upper:]")(input).map(|(l, _)| (l, Sequence::CharRange(Box::new('A'..='Z'))))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_xdigit(input: &str) -> IResult<&str, Sequence> {
|
fn parse_xdigit(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:xdigit:]")(input).map(|(l, _)| {
|
tag("[:xdigit:]")(input).map(|(l, _)| {
|
||||||
(
|
(
|
||||||
l,
|
l,
|
||||||
Sequence::CharRange(('0'..='9').chain('A'..='F').chain('a'..='f').collect()),
|
Sequence::CharRange(Box::new(('0'..='9').chain('A'..='F').chain('a'..='f'))),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -238,16 +263,18 @@ pub trait SymbolTranslator {
|
||||||
fn translate(&mut self, current: char) -> Option<char>;
|
fn translate(&mut self, current: char) -> Option<char>;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct DeleteOperation {
|
pub struct DeleteOperation {
|
||||||
set: Vec<Sequence>,
|
set: Vec<char>,
|
||||||
complement_flag: bool,
|
complement_flag: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DeleteOperation {
|
impl DeleteOperation {
|
||||||
pub fn new(set: Vec<Sequence>, complement_flag: bool) -> DeleteOperation {
|
pub fn new(set: Vec<Sequence>, complement_flag: bool) -> DeleteOperation {
|
||||||
DeleteOperation {
|
DeleteOperation {
|
||||||
set,
|
set: set
|
||||||
|
.into_iter()
|
||||||
|
.flat_map(Sequence::dissolve)
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
complement_flag,
|
complement_flag,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -255,10 +282,7 @@ impl DeleteOperation {
|
||||||
|
|
||||||
impl SymbolTranslator for DeleteOperation {
|
impl SymbolTranslator for DeleteOperation {
|
||||||
fn translate(&mut self, current: char) -> Option<char> {
|
fn translate(&mut self, current: char) -> Option<char> {
|
||||||
let found = self.set.iter().any(|sequence| match sequence {
|
let found = self.set.iter().any(|sequence| sequence.eq(¤t));
|
||||||
Sequence::Char(c) => c.eq(¤t),
|
|
||||||
Sequence::CharRange(r) => r.iter().any(|c| c.eq(¤t)),
|
|
||||||
});
|
|
||||||
(self.complement_flag == found).then(|| current)
|
(self.complement_flag == found).then(|| current)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -463,41 +487,6 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_parse_char_range() {
|
|
||||||
assert_eq!(Sequence::parse_set_string(""), vec![]);
|
|
||||||
assert_eq!(
|
|
||||||
Sequence::parse_set_string("a-z"),
|
|
||||||
vec![Sequence::CharRange(vec![
|
|
||||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q',
|
|
||||||
'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
|
|
||||||
])]
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
Sequence::parse_set_string("a-zA-Z"),
|
|
||||||
vec![
|
|
||||||
Sequence::CharRange(vec![
|
|
||||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
|
|
||||||
'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
|
|
||||||
]),
|
|
||||||
Sequence::CharRange(vec![
|
|
||||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
|
|
||||||
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
|
|
||||||
])
|
|
||||||
]
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
Sequence::parse_set_string(", ┬─┬"),
|
|
||||||
vec![
|
|
||||||
Sequence::Char(','),
|
|
||||||
Sequence::Char(' '),
|
|
||||||
Sequence::Char('┬'),
|
|
||||||
Sequence::Char('─'),
|
|
||||||
Sequence::Char('┬')
|
|
||||||
]
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_parse_octal() {
|
fn test_parse_octal() {
|
||||||
for a in '0'..='7' {
|
for a in '0'..='7' {
|
||||||
|
|
Loading…
Reference in a new issue