mirror of
https://github.com/uutils/coreutils
synced 2025-01-21 17:44:17 +00:00
Simplified and extended parsing capabilities
Signed-off-by: Hanif Bin Ariffin <hanif.ariffin.4326@gmail.com>
This commit is contained in:
parent
2c8ba4ad2d
commit
5657f5af3a
1 changed files with 36 additions and 61 deletions
|
@ -8,7 +8,7 @@ use nom::{
|
||||||
IResult,
|
IResult,
|
||||||
};
|
};
|
||||||
use std::{
|
use std::{
|
||||||
collections::HashMap,
|
collections::{HashMap, HashSet},
|
||||||
fmt::Debug,
|
fmt::Debug,
|
||||||
io::{BufRead, Write},
|
io::{BufRead, Write},
|
||||||
};
|
};
|
||||||
|
@ -47,6 +47,18 @@ pub enum Sequence {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Sequence {
|
impl Sequence {
|
||||||
|
// TODO: Can we do better?
|
||||||
|
pub fn convert_octal_to_char(input: &str) -> char {
|
||||||
|
if input.starts_with("\\") && input.len() > 1 {
|
||||||
|
u32::from_str_radix(&input[1..], 8)
|
||||||
|
.map(|u| char::from_u32(u))
|
||||||
|
.unwrap()
|
||||||
|
.unwrap()
|
||||||
|
} else {
|
||||||
|
input.chars().next().unwrap()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn flatten(&self) -> Box<dyn Iterator<Item = char>> {
|
pub fn flatten(&self) -> Box<dyn Iterator<Item = char>> {
|
||||||
match self {
|
match self {
|
||||||
Sequence::Char(c) => Box::new(std::iter::once(*c)),
|
Sequence::Char(c) => Box::new(std::iter::once(*c)),
|
||||||
|
@ -196,9 +208,6 @@ impl Sequence {
|
||||||
many0(alt((
|
many0(alt((
|
||||||
alt((
|
alt((
|
||||||
Sequence::parse_char_range_octal_leftright,
|
Sequence::parse_char_range_octal_leftright,
|
||||||
Sequence::parse_char_range_octal_left,
|
|
||||||
Sequence::parse_char_range_octal_right,
|
|
||||||
Sequence::parse_char_range_backslash_collapse,
|
|
||||||
Sequence::parse_char_range,
|
Sequence::parse_char_range,
|
||||||
Sequence::parse_char_star,
|
Sequence::parse_char_star,
|
||||||
Sequence::parse_char_repeat,
|
Sequence::parse_char_repeat,
|
||||||
|
@ -229,6 +238,14 @@ impl Sequence {
|
||||||
.unwrap()
|
.unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn parse_octal_or_char(input: &str) -> IResult<&str, char> {
|
||||||
|
recognize(alt((
|
||||||
|
preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))),
|
||||||
|
recognize(anychar),
|
||||||
|
)))(input)
|
||||||
|
.map(|(l, a)| (l, Sequence::convert_octal_to_char(a)))
|
||||||
|
}
|
||||||
|
|
||||||
fn parse_char(input: &str) -> IResult<&str, Sequence> {
|
fn parse_char(input: &str) -> IResult<&str, Sequence> {
|
||||||
anychar(input).map(|(l, r)| (l, Sequence::Char(r)))
|
anychar(input).map(|(l, r)| (l, Sequence::Char(r)))
|
||||||
}
|
}
|
||||||
|
@ -261,19 +278,10 @@ impl Sequence {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_char_range(input: &str) -> IResult<&str, Sequence> {
|
fn parse_char_range(input: &str) -> IResult<&str, Sequence> {
|
||||||
separated_pair(anychar, tag("-"), anychar)(input).map(|(l, (a, b))| {
|
|
||||||
(l, {
|
|
||||||
let (start, end) = (u32::from(a), u32::from(b));
|
|
||||||
Sequence::CharRange(start, end)
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_char_range_backslash_collapse(input: &str) -> IResult<&str, Sequence> {
|
|
||||||
separated_pair(
|
separated_pair(
|
||||||
preceded(tag("\\"), anychar),
|
Sequence::parse_octal_or_char,
|
||||||
tag("-"),
|
tag("-"),
|
||||||
preceded(tag("\\"), anychar),
|
Sequence::parse_octal_or_char,
|
||||||
)(input)
|
)(input)
|
||||||
.map(|(l, (a, b))| {
|
.map(|(l, (a, b))| {
|
||||||
(l, {
|
(l, {
|
||||||
|
@ -283,59 +291,29 @@ impl Sequence {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_char_range_octal_left(input: &str) -> IResult<&str, Sequence> {
|
|
||||||
separated_pair(
|
|
||||||
preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))),
|
|
||||||
tag("-"),
|
|
||||||
anychar,
|
|
||||||
)(input)
|
|
||||||
.map(|(l, (a, b))| {
|
|
||||||
(l, {
|
|
||||||
let (start, end) = (u32::from_str_radix(a, 8).unwrap(), u32::from(b));
|
|
||||||
Sequence::CharRange(start, end)
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_char_range_octal_right(input: &str) -> IResult<&str, Sequence> {
|
|
||||||
separated_pair(
|
|
||||||
anychar,
|
|
||||||
tag("-"),
|
|
||||||
preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))),
|
|
||||||
)(input)
|
|
||||||
.map(|(l, (a, b))| {
|
|
||||||
(l, {
|
|
||||||
let (start, end) = (u32::from(a), u32::from_str_radix(b, 8).unwrap());
|
|
||||||
Sequence::CharRange(start, end)
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_char_range_octal_leftright(input: &str) -> IResult<&str, Sequence> {
|
fn parse_char_range_octal_leftright(input: &str) -> IResult<&str, Sequence> {
|
||||||
separated_pair(
|
separated_pair(
|
||||||
preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))),
|
Sequence::parse_octal_or_char,
|
||||||
tag("-"),
|
tag("-"),
|
||||||
preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))),
|
Sequence::parse_octal_or_char,
|
||||||
)(input)
|
)(input)
|
||||||
.map(|(l, (a, b))| {
|
.map(|(l, (a, b))| {
|
||||||
(l, {
|
(l, {
|
||||||
let (start, end) = (
|
let (start, end) = (u32::from(a), u32::from(b));
|
||||||
u32::from_str_radix(a, 8).unwrap(),
|
|
||||||
u32::from_str_radix(b, 8).unwrap(),
|
|
||||||
);
|
|
||||||
Sequence::CharRange(start, end)
|
Sequence::CharRange(start, end)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_char_star(input: &str) -> IResult<&str, Sequence> {
|
fn parse_char_star(input: &str) -> IResult<&str, Sequence> {
|
||||||
delimited(tag("["), anychar, tag("*]"))(input).map(|(l, c)| (l, Sequence::CharStar(c)))
|
delimited(tag("["), Sequence::parse_octal_or_char, tag("*]"))(input)
|
||||||
|
.map(|(l, a)| (l, Sequence::CharStar(a)))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_char_repeat(input: &str) -> IResult<&str, Sequence> {
|
fn parse_char_repeat(input: &str) -> IResult<&str, Sequence> {
|
||||||
delimited(
|
delimited(
|
||||||
tag("["),
|
tag("["),
|
||||||
separated_pair(anychar, tag("*"), digit1),
|
separated_pair(Sequence::parse_octal_or_char, tag("*"), digit1),
|
||||||
tag("]"),
|
tag("]"),
|
||||||
)(input)
|
)(input)
|
||||||
.map(|(l, (c, n))| (l, Sequence::CharRepeat(c, n.parse().unwrap())))
|
.map(|(l, (c, n))| (l, Sequence::CharRepeat(c, n.parse().unwrap())))
|
||||||
|
@ -390,7 +368,8 @@ impl Sequence {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_char_equal(input: &str) -> IResult<&str, Sequence> {
|
fn parse_char_equal(input: &str) -> IResult<&str, Sequence> {
|
||||||
delimited(tag("[="), anychar, tag("=]"))(input).map(|(_, _)| todo!())
|
delimited(tag("[="), Sequence::parse_octal_or_char, tag("=]"))(input)
|
||||||
|
.map(|(l, c)| (l, Sequence::Char(c)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -544,7 +523,7 @@ impl SymbolTranslator for TranslateOperation {
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct SqueezeOperation {
|
pub struct SqueezeOperation {
|
||||||
set1: Vec<char>,
|
set1: HashSet<char>,
|
||||||
complement: bool,
|
complement: bool,
|
||||||
previous: Option<char>,
|
previous: Option<char>,
|
||||||
}
|
}
|
||||||
|
@ -552,7 +531,7 @@ pub struct SqueezeOperation {
|
||||||
impl SqueezeOperation {
|
impl SqueezeOperation {
|
||||||
pub fn new(set1: Vec<char>, complement: bool) -> SqueezeOperation {
|
pub fn new(set1: Vec<char>, complement: bool) -> SqueezeOperation {
|
||||||
SqueezeOperation {
|
SqueezeOperation {
|
||||||
set1,
|
set1: set1.into_iter().collect(),
|
||||||
complement,
|
complement,
|
||||||
previous: None,
|
previous: None,
|
||||||
}
|
}
|
||||||
|
@ -562,7 +541,7 @@ impl SqueezeOperation {
|
||||||
impl SymbolTranslator for SqueezeOperation {
|
impl SymbolTranslator for SqueezeOperation {
|
||||||
fn translate(&mut self, current: char) -> Option<char> {
|
fn translate(&mut self, current: char) -> Option<char> {
|
||||||
if self.complement {
|
if self.complement {
|
||||||
let next = if self.set1.iter().any(|c| c.eq(¤t)) {
|
let next = if self.set1.contains(¤t) {
|
||||||
Some(current)
|
Some(current)
|
||||||
} else {
|
} else {
|
||||||
match self.previous {
|
match self.previous {
|
||||||
|
@ -570,20 +549,16 @@ impl SymbolTranslator for SqueezeOperation {
|
||||||
if v.eq(¤t) {
|
if v.eq(¤t) {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
self.previous = Some(current);
|
|
||||||
Some(current)
|
Some(current)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None => {
|
None => Some(current),
|
||||||
self.previous = Some(current);
|
|
||||||
Some(current)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
self.previous = Some(current);
|
self.previous = Some(current);
|
||||||
next
|
next
|
||||||
} else {
|
} else {
|
||||||
let next = if self.set1.iter().any(|c| c.eq(¤t)) {
|
let next = if self.set1.contains(¤t) {
|
||||||
match self.previous {
|
match self.previous {
|
||||||
Some(v) if v == current => None,
|
Some(v) if v == current => None,
|
||||||
_ => Some(current),
|
_ => Some(current),
|
||||||
|
|
Loading…
Reference in a new issue