printf: basic support for unicode escape sequences

This commit is contained in:
Terts Diepraam 2023-11-20 13:45:02 +01:00
parent 066d8ba73d
commit 68d036c9a2
2 changed files with 44 additions and 20 deletions

View file

@ -1,6 +1,7 @@
#[derive(Debug)]
pub enum EscapedChar {
Char(u8),
Byte(u8),
Char(char),
Backslash(u8),
End,
}
@ -61,6 +62,24 @@ fn parse_code(input: &mut &[u8], base: Base) -> Option<u8> {
Some(ret)
}
/// Parse `\uHHHH` and `\UHHHHHHHH`
// TODO: This should print warnings and possibly halt execution when it fails to parse
// TODO: If the character cannot be converted to u32, the input should be printed.
fn parse_unicode(input: &mut &[u8], digits: u8) -> Option<char> {
let (c, rest) = input.split_first()?;
let mut ret = Base::Hex.to_digit(*c)? as u32;
*input = &rest[..];
for _ in 1..digits {
let (c, rest) = input.split_first()?;
let n = Base::Hex.to_digit(*c)?;
ret = ret.wrapping_mul(Base::Hex as u32).wrapping_add(n as u32);
*input = &rest[..];
}
char::from_u32(ret)
}
pub fn parse_escape_code(rest: &mut &[u8]) -> EscapedChar {
if let [c, new_rest @ ..] = rest {
// This is for the \NNN syntax for octal sequences.
@ -68,33 +87,35 @@ pub fn parse_escape_code(rest: &mut &[u8]) -> EscapedChar {
// would be the \0NNN syntax.
if let b'1'..=b'7' = c {
if let Some(parsed) = parse_code(rest, Base::Oct) {
return EscapedChar::Char(parsed);
return EscapedChar::Byte(parsed);
}
}
*rest = &new_rest[..];
match c {
b'\\' => EscapedChar::Char(b'\\'),
b'a' => EscapedChar::Char(b'\x07'),
b'b' => EscapedChar::Char(b'\x08'),
b'\\' => EscapedChar::Byte(b'\\'),
b'a' => EscapedChar::Byte(b'\x07'),
b'b' => EscapedChar::Byte(b'\x08'),
b'c' => return EscapedChar::End,
b'e' => EscapedChar::Char(b'\x1b'),
b'f' => EscapedChar::Char(b'\x0c'),
b'n' => EscapedChar::Char(b'\n'),
b'r' => EscapedChar::Char(b'\r'),
b't' => EscapedChar::Char(b'\t'),
b'v' => EscapedChar::Char(b'\x0b'),
b'e' => EscapedChar::Byte(b'\x1b'),
b'f' => EscapedChar::Byte(b'\x0c'),
b'n' => EscapedChar::Byte(b'\n'),
b'r' => EscapedChar::Byte(b'\r'),
b't' => EscapedChar::Byte(b'\t'),
b'v' => EscapedChar::Byte(b'\x0b'),
b'x' => {
if let Some(c) = parse_code(rest, Base::Hex) {
EscapedChar::Char(c)
EscapedChar::Byte(c)
} else {
EscapedChar::Backslash(b'x')
}
}
b'0' => EscapedChar::Char(parse_code(rest, Base::Oct).unwrap_or(b'\0')),
b'0' => EscapedChar::Byte(parse_code(rest, Base::Oct).unwrap_or(b'\0')),
b'u' => EscapedChar::Char(parse_unicode(rest, 4).unwrap_or('\0')),
b'U' => EscapedChar::Char(parse_unicode(rest, 8).unwrap_or('\0')),
c => EscapedChar::Backslash(*c),
}
} else {
EscapedChar::Char(b'\\')
EscapedChar::Byte(b'\\')
}
}

View file

@ -19,11 +19,12 @@
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
mod escape;
mod argument;
mod escape;
pub mod num_format;
mod spec;
pub use argument::*;
use spec::Spec;
use std::{
error::Error,
@ -31,7 +32,6 @@ use std::{
io::{stdout, Write},
ops::ControlFlow,
};
pub use argument::*;
use crate::error::UError;
@ -91,9 +91,12 @@ impl FormatChar for u8 {
impl FormatChar for EscapedChar {
fn write(&self, mut writer: impl Write) -> std::io::Result<ControlFlow<()>> {
match self {
EscapedChar::Char(c) => {
EscapedChar::Byte(c) => {
writer.write(&[*c])?;
}
EscapedChar::Char(c) => {
write!(writer, "{c}")?;
}
EscapedChar::Backslash(c) => {
writer.write(&[b'\\', *c])?;
}
@ -125,7 +128,7 @@ pub fn parse_spec_and_escape(
[] => return None,
[b'%', b'%', rest @ ..] => {
current = rest;
Some(Ok(FormatItem::Char(EscapedChar::Char(b'%'))))
Some(Ok(FormatItem::Char(EscapedChar::Byte(b'%'))))
}
[b'%', rest @ ..] => {
current = rest;
@ -141,7 +144,7 @@ pub fn parse_spec_and_escape(
}
[c, rest @ ..] => {
current = rest;
Some(Ok(FormatItem::Char(EscapedChar::Char(*c))))
Some(Ok(FormatItem::Char(EscapedChar::Byte(*c))))
}
})
}
@ -179,7 +182,7 @@ fn parse_escape_only(fmt: &[u8]) -> impl Iterator<Item = EscapedChar> + '_ {
}
[c, rest @ ..] => {
current = rest;
Some(EscapedChar::Char(*c))
Some(EscapedChar::Byte(*c))
}
})
}