mirror of
https://github.com/uutils/coreutils
synced 2025-01-19 00:24:13 +00:00
printf: basic support for unicode escape sequences
This commit is contained in:
parent
066d8ba73d
commit
68d036c9a2
2 changed files with 44 additions and 20 deletions
|
@ -1,6 +1,7 @@
|
|||
#[derive(Debug)]
|
||||
pub enum EscapedChar {
|
||||
Char(u8),
|
||||
Byte(u8),
|
||||
Char(char),
|
||||
Backslash(u8),
|
||||
End,
|
||||
}
|
||||
|
@ -61,6 +62,24 @@ fn parse_code(input: &mut &[u8], base: Base) -> Option<u8> {
|
|||
Some(ret)
|
||||
}
|
||||
|
||||
/// Parse `\uHHHH` and `\UHHHHHHHH`
|
||||
// TODO: This should print warnings and possibly halt execution when it fails to parse
|
||||
// TODO: If the character cannot be converted to u32, the input should be printed.
|
||||
fn parse_unicode(input: &mut &[u8], digits: u8) -> Option<char> {
|
||||
let (c, rest) = input.split_first()?;
|
||||
let mut ret = Base::Hex.to_digit(*c)? as u32;
|
||||
*input = &rest[..];
|
||||
|
||||
for _ in 1..digits {
|
||||
let (c, rest) = input.split_first()?;
|
||||
let n = Base::Hex.to_digit(*c)?;
|
||||
ret = ret.wrapping_mul(Base::Hex as u32).wrapping_add(n as u32);
|
||||
*input = &rest[..];
|
||||
}
|
||||
|
||||
char::from_u32(ret)
|
||||
}
|
||||
|
||||
pub fn parse_escape_code(rest: &mut &[u8]) -> EscapedChar {
|
||||
if let [c, new_rest @ ..] = rest {
|
||||
// This is for the \NNN syntax for octal sequences.
|
||||
|
@ -68,33 +87,35 @@ pub fn parse_escape_code(rest: &mut &[u8]) -> EscapedChar {
|
|||
// would be the \0NNN syntax.
|
||||
if let b'1'..=b'7' = c {
|
||||
if let Some(parsed) = parse_code(rest, Base::Oct) {
|
||||
return EscapedChar::Char(parsed);
|
||||
return EscapedChar::Byte(parsed);
|
||||
}
|
||||
}
|
||||
|
||||
*rest = &new_rest[..];
|
||||
match c {
|
||||
b'\\' => EscapedChar::Char(b'\\'),
|
||||
b'a' => EscapedChar::Char(b'\x07'),
|
||||
b'b' => EscapedChar::Char(b'\x08'),
|
||||
b'\\' => EscapedChar::Byte(b'\\'),
|
||||
b'a' => EscapedChar::Byte(b'\x07'),
|
||||
b'b' => EscapedChar::Byte(b'\x08'),
|
||||
b'c' => return EscapedChar::End,
|
||||
b'e' => EscapedChar::Char(b'\x1b'),
|
||||
b'f' => EscapedChar::Char(b'\x0c'),
|
||||
b'n' => EscapedChar::Char(b'\n'),
|
||||
b'r' => EscapedChar::Char(b'\r'),
|
||||
b't' => EscapedChar::Char(b'\t'),
|
||||
b'v' => EscapedChar::Char(b'\x0b'),
|
||||
b'e' => EscapedChar::Byte(b'\x1b'),
|
||||
b'f' => EscapedChar::Byte(b'\x0c'),
|
||||
b'n' => EscapedChar::Byte(b'\n'),
|
||||
b'r' => EscapedChar::Byte(b'\r'),
|
||||
b't' => EscapedChar::Byte(b'\t'),
|
||||
b'v' => EscapedChar::Byte(b'\x0b'),
|
||||
b'x' => {
|
||||
if let Some(c) = parse_code(rest, Base::Hex) {
|
||||
EscapedChar::Char(c)
|
||||
EscapedChar::Byte(c)
|
||||
} else {
|
||||
EscapedChar::Backslash(b'x')
|
||||
}
|
||||
}
|
||||
b'0' => EscapedChar::Char(parse_code(rest, Base::Oct).unwrap_or(b'\0')),
|
||||
b'0' => EscapedChar::Byte(parse_code(rest, Base::Oct).unwrap_or(b'\0')),
|
||||
b'u' => EscapedChar::Char(parse_unicode(rest, 4).unwrap_or('\0')),
|
||||
b'U' => EscapedChar::Char(parse_unicode(rest, 8).unwrap_or('\0')),
|
||||
c => EscapedChar::Backslash(*c),
|
||||
}
|
||||
} else {
|
||||
EscapedChar::Char(b'\\')
|
||||
EscapedChar::Byte(b'\\')
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,11 +19,12 @@
|
|||
|
||||
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
|
||||
|
||||
mod escape;
|
||||
mod argument;
|
||||
mod escape;
|
||||
pub mod num_format;
|
||||
mod spec;
|
||||
|
||||
pub use argument::*;
|
||||
use spec::Spec;
|
||||
use std::{
|
||||
error::Error,
|
||||
|
@ -31,7 +32,6 @@ use std::{
|
|||
io::{stdout, Write},
|
||||
ops::ControlFlow,
|
||||
};
|
||||
pub use argument::*;
|
||||
|
||||
use crate::error::UError;
|
||||
|
||||
|
@ -91,9 +91,12 @@ impl FormatChar for u8 {
|
|||
impl FormatChar for EscapedChar {
|
||||
fn write(&self, mut writer: impl Write) -> std::io::Result<ControlFlow<()>> {
|
||||
match self {
|
||||
EscapedChar::Char(c) => {
|
||||
EscapedChar::Byte(c) => {
|
||||
writer.write(&[*c])?;
|
||||
}
|
||||
EscapedChar::Char(c) => {
|
||||
write!(writer, "{c}")?;
|
||||
}
|
||||
EscapedChar::Backslash(c) => {
|
||||
writer.write(&[b'\\', *c])?;
|
||||
}
|
||||
|
@ -125,7 +128,7 @@ pub fn parse_spec_and_escape(
|
|||
[] => return None,
|
||||
[b'%', b'%', rest @ ..] => {
|
||||
current = rest;
|
||||
Some(Ok(FormatItem::Char(EscapedChar::Char(b'%'))))
|
||||
Some(Ok(FormatItem::Char(EscapedChar::Byte(b'%'))))
|
||||
}
|
||||
[b'%', rest @ ..] => {
|
||||
current = rest;
|
||||
|
@ -141,7 +144,7 @@ pub fn parse_spec_and_escape(
|
|||
}
|
||||
[c, rest @ ..] => {
|
||||
current = rest;
|
||||
Some(Ok(FormatItem::Char(EscapedChar::Char(*c))))
|
||||
Some(Ok(FormatItem::Char(EscapedChar::Byte(*c))))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
@ -179,7 +182,7 @@ fn parse_escape_only(fmt: &[u8]) -> impl Iterator<Item = EscapedChar> + '_ {
|
|||
}
|
||||
[c, rest @ ..] => {
|
||||
current = rest;
|
||||
Some(EscapedChar::Char(*c))
|
||||
Some(EscapedChar::Byte(*c))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue