mirror of
https://github.com/uutils/coreutils
synced 2024-11-16 09:48:03 +00:00
Finally fixed parsing octal in char ranges
Signed-off-by: Hanif Bin Ariffin <hanif.ariffin.4326@gmail.com>
This commit is contained in:
parent
5aeeb6cfe9
commit
0acc165720
1 changed files with 81 additions and 11 deletions
|
@ -1,12 +1,12 @@
|
|||
use crate::unicode_table;
|
||||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::{tag, take_until},
|
||||
bytes::complete::tag,
|
||||
character::complete::{anychar, digit1, one_of},
|
||||
combinator::{map_opt, opt, recognize},
|
||||
combinator::{map_opt, recognize},
|
||||
multi::{many0, many_m_n},
|
||||
sequence::{delimited, preceded, separated_pair, tuple},
|
||||
take_until1, IResult,
|
||||
sequence::{delimited, preceded, separated_pair},
|
||||
IResult,
|
||||
};
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
|
@ -34,6 +34,10 @@ impl Sequence {
|
|||
pub fn parse_set_string(input: &str) -> Vec<Sequence> {
|
||||
many0(alt((
|
||||
alt((
|
||||
Sequence::parse_char_range_octal_leftright,
|
||||
Sequence::parse_char_range_octal_left,
|
||||
Sequence::parse_char_range_octal_right,
|
||||
Sequence::parse_char_range_backslash_collapse,
|
||||
Sequence::parse_char_range,
|
||||
Sequence::parse_char_star,
|
||||
Sequence::parse_char_repeat,
|
||||
|
@ -114,6 +118,65 @@ impl Sequence {
|
|||
})
|
||||
}
|
||||
|
||||
fn parse_char_range_backslash_collapse(input: &str) -> IResult<&str, Sequence> {
|
||||
separated_pair(
|
||||
preceded(tag("\\"), anychar),
|
||||
tag("-"),
|
||||
preceded(tag("\\"), anychar),
|
||||
)(input)
|
||||
.map(|(l, (a, b))| {
|
||||
(l, {
|
||||
let (start, end) = (u32::from(a), u32::from(b));
|
||||
Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32)))
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_char_range_octal_left(input: &str) -> IResult<&str, Sequence> {
|
||||
separated_pair(
|
||||
preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))),
|
||||
tag("-"),
|
||||
anychar,
|
||||
)(input)
|
||||
.map(|(l, (a, b))| {
|
||||
(l, {
|
||||
let (start, end) = (u32::from_str_radix(a, 8).unwrap(), u32::from(b));
|
||||
Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32)))
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_char_range_octal_right(input: &str) -> IResult<&str, Sequence> {
|
||||
separated_pair(
|
||||
anychar,
|
||||
tag("-"),
|
||||
preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))),
|
||||
)(input)
|
||||
.map(|(l, (a, b))| {
|
||||
(l, {
|
||||
let (start, end) = (u32::from(a), u32::from_str_radix(b, 8).unwrap());
|
||||
Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32)))
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_char_range_octal_leftright(input: &str) -> IResult<&str, Sequence> {
|
||||
separated_pair(
|
||||
preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))),
|
||||
tag("-"),
|
||||
preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))),
|
||||
)(input)
|
||||
.map(|(l, (a, b))| {
|
||||
(l, {
|
||||
let (start, end) = (
|
||||
u32::from_str_radix(a, 8).unwrap(),
|
||||
u32::from_str_radix(b, 8).unwrap(),
|
||||
);
|
||||
Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32)))
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_char_star(input: &str) -> IResult<&str, Sequence> {
|
||||
delimited(tag("["), anychar, tag("*]"))(input).map(|(l, c)| (l, Sequence::CharStar(c)))
|
||||
}
|
||||
|
@ -261,6 +324,7 @@ pub trait SymbolTranslator {
|
|||
fn translate(&mut self, current: char) -> Option<char>;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DeleteOperation {
|
||||
set: Vec<char>,
|
||||
complement_flag: bool,
|
||||
|
@ -285,7 +349,7 @@ impl SymbolTranslator for DeleteOperation {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug)]
|
||||
pub struct TranslateOperationComplement {
|
||||
iter: u32,
|
||||
set1: Vec<char>,
|
||||
|
@ -306,7 +370,7 @@ impl TranslateOperationComplement {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug)]
|
||||
pub struct TranslateOperationStandard {
|
||||
translation_map: HashMap<char, char>,
|
||||
}
|
||||
|
@ -322,15 +386,21 @@ impl TranslateOperationStandard {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug)]
|
||||
pub enum TranslateOperation {
|
||||
Standard(TranslateOperationStandard),
|
||||
Complement(TranslateOperationComplement),
|
||||
}
|
||||
|
||||
impl TranslateOperation {
|
||||
fn next_complement_char(mut iter: u32) -> (u32, char) {
|
||||
while char::from_u32(iter).is_none() {
|
||||
fn next_complement_char(mut iter: u32, ignore_list: &[char]) -> (u32, char) {
|
||||
while (char::from_u32(iter).is_none()
|
||||
|| ignore_list
|
||||
.iter()
|
||||
.map(|c| u32::from(*c))
|
||||
.any(|c| iter.eq(&c)))
|
||||
&& iter.ne(&u32::MAX)
|
||||
{
|
||||
iter = iter.saturating_add(1)
|
||||
}
|
||||
(iter.saturating_add(1), char::from_u32(iter).unwrap())
|
||||
|
@ -392,7 +462,7 @@ impl SymbolTranslator for TranslateOperation {
|
|||
while translation_map.get(¤t).is_none() {
|
||||
if let Some(p) = set2.pop() {
|
||||
let (next_index, next_value) =
|
||||
TranslateOperation::next_complement_char(*iter);
|
||||
TranslateOperation::next_complement_char(*iter, &*set1);
|
||||
*iter = next_index;
|
||||
translation_map.insert(next_value, p);
|
||||
} else {
|
||||
|
@ -466,7 +536,7 @@ impl SymbolTranslator for SqueezeOperation {
|
|||
|
||||
pub fn translate_input<T, R, W>(input: &mut R, output: &mut W, mut translator: T)
|
||||
where
|
||||
T: SymbolTranslator,
|
||||
T: SymbolTranslator + Debug,
|
||||
R: BufRead,
|
||||
W: Write,
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue